diff options
Diffstat (limited to 'tools/calmbench/ab.py')
-rw-r--r-- | tools/calmbench/ab.py | 326 |
1 files changed, 326 insertions, 0 deletions
diff --git a/tools/calmbench/ab.py b/tools/calmbench/ab.py new file mode 100644 index 0000000000..2ffb9e3ec3 --- /dev/null +++ b/tools/calmbench/ab.py @@ -0,0 +1,326 @@ +#!/usr/bin/python +# encoding: utf-8 + +# Copyright 2017 Google Inc. +# +# Use of this source code is governed by a BSD-style license that can be found +# in the LICENSE file. +# +# This is an A/B test utility script used by calmbench.py +# +# For each bench, we get a distribution of min_ms measurements from nanobench. +# From that, we try to recover the 1/3 and 2/3 quantiles of the distribution. +# If range (1/3 quantile, 2/3 quantile) is completely disjoint between A and B, +# we report that as a regression. +# +# The more measurements we have for a bench, the more accurate our quantiles +# are. However, taking more measurements is time consuming. Hence we'll prune +# out benches and only take more measurements for benches whose current quantile +# ranges are disjoint. +# +# P.S. The current script is brute forcely translated from a ruby script. So it +# may be ugly... + +import re +import os +import sys +import time +import json +import subprocess +import shlex +from argparse import ArgumentParser +from multiprocessing import Process +from threading import Thread +from threading import Lock +from pdb import set_trace + + +HELP = """ +\033[31mPlease call calmbench.py to drive this script if you're not doing so. +This script is not supposed to be used by itself. (At least, it's not easy to +use by itself.) +\033[0m +""" + +FACTOR = 3 # lower/upper quantile factor +DIFF_T = 0.99 # different enough threshold +TERM = 10 # terminate after this no. of iterations without suspect changes +MAXTRY = 30 # max number of nanobench tries to narrow down suspects + +UNITS = "ns µs ms s".split() + + +timesLock = Lock() +timesA = {} +timesB = {} + + +def parse_args(): + parser = ArgumentParser(description=HELP) + + parser.add_argument('skiadir', type=str, help="skia directory") + parser.add_argument('outdir', type=str, help="output directory") + parser.add_argument('a', type=str, help="name of A") + parser.add_argument('b', type=str, help="name of B") + parser.add_argument('nano_a', type=str, help="path to A's nanobench binary") + parser.add_argument('nano_b', type=str, help="path to B's nanobench binary") + parser.add_argument('arg_a', type=str, help="args for A's nanobench run") + parser.add_argument('arg_b', type=str, help="args for B's nanobench run") + parser.add_argument('repeat', type=int, help="number of initial runs") + parser.add_argument('skip_b', type=str, help=("whether to skip running B" + " ('true' or 'false')")) + parser.add_argument('config', type=str, help="nanobenh config") + parser.add_argument('threads', type=int, help="number of threads to run") + parser.add_argument('noinit', type=str, help=("whether to skip running B" + " ('true' or 'false')")) + + args = parser.parse_args() + args.skip_b = args.skip_b == "true" + args.noinit = args.noinit == "true" + + return args + +def append_dict_sorted_array(dict_array, key, value): + if key not in dict_array: + dict_array[key] = [] + dict_array[key].append(value) + dict_array[key].sort() + + +def add_time(args, name, bench, t, unit): + normalized_t = t * 1000 ** UNITS.index(unit); + if name.startswith(args.a): + append_dict_sorted_array(timesA, bench, normalized_t) + else: + append_dict_sorted_array(timesB, bench, normalized_t) + + +def append_times_from_file(args, name, filename): + with open(filename) as f: + lines = f.readlines() + for line in lines: + items = line.split() + if len(items) > 10: + bench = items[10] + matches = re.search("([+-]?\d*.?\d+)(s|ms|µs|ns)", items[3]) + if (not matches or items[9] != args.config): + continue + time_num = matches.group(1) + time_unit = matches.group(2) + add_time(args, name, bench, float(time_num), time_unit) + + +class ThreadRunner: + """Simplest and stupidiest threaded executer.""" + def __init__(self): + self.threads = [] + + def add(self, args, fn): + if len(self.threads) >= args.threads: + self.wait() + t = Thread(target = fn) + t.daemon = True + self.threads.append(t) + t.start() + + def wait(self): + def spin(): + i = 0 + spinners = [". ", ".. ", "..."] + while len(self.threads) > 0: + timesLock.acquire() + sys.stderr.write( + "\r" + spinners[i % len(spinners)] + + " (%d threads running)" % len(self.threads) + + " \r" # spaces for erasing characters + ) + timesLock.release() + time.sleep(0.5) + i += 1 + + ts = Thread(target = spin); + ts.start() + for t in self.threads: + t.join() + self.threads = [] + ts.join() + + +def split_arg(arg): + raw = shlex.split(arg) + result = [] + for r in raw: + if '~' in r: + result.append(os.path.expanduser(r)) + else: + result.append(r) + return result + + +def run(args, threadRunner, name, nano, arg, i): + def task(): + file_i = "%s/%s.out%d" % (args.outdir, name, i) + + should_run = not args.noinit and not (name == args.b and args.skip_b) + if i <= 0: + should_run = True # always run for suspects + + if should_run: + if i > 0: + timesLock.acquire() + print "Init run %d for %s..." % (i, name) + timesLock.release() + subprocess.check_call(["touch", file_i]) + with open(file_i, 'w') as f: + subprocess.check_call([nano] + split_arg(arg) + + ["--config", args.config], stderr=f, stdout=f) + + timesLock.acquire() + append_times_from_file(args, name, file_i) + timesLock.release() + + threadRunner.add(args, task) + + +def init_run(args): + threadRunner = ThreadRunner() + for i in range(1, max(args.repeat, args.threads / 2) + 1): + run(args, threadRunner, args.a, args.nano_a, args.arg_a, i) + run(args, threadRunner, args.b, args.nano_b, args.arg_b, i) + threadRunner.wait() + + +def get_lower_upper(values): + i = max(0, (len(values) - 1) / FACTOR) + return values[i], values[-i - 1] + + +def different_enough(lower1, upper2): + return upper2 < DIFF_T * lower1 + + +def get_suspects(): + suspects = [] + for bench in timesA.keys(): + if bench not in timesB: + continue + lowerA, upperA = get_lower_upper(timesA[bench]) + lowerB, upperB = get_lower_upper(timesB[bench]) + if different_enough(lowerA, upperB) or different_enough(lowerB, upperA): + suspects.append(bench) + return suspects + + +def process_bench_pattern(s): + if ".skp" in s: # skp bench won't match their exact names... + return "^\"" + s[0:(s.index(".skp") + 3)] + "\"" + else: + return "^\"" + s + "\"$" + + +def suspects_arg(suspects): + patterns = map(process_bench_pattern, suspects) + return " --match " + (" ".join(patterns)) + + +def median(array): + return array[len(array) / 2] + + +def regression(bench): + a = median(timesA[bench]) + b = median(timesB[bench]) + if (a == 0): # bad bench, just return no regression + return 1 + return b / a + + +def percentage(x): + return (x - 1) * 100 + + +def format_r(r): + return ('%6.2f' % percentage(r)) + "%" + + +def test(): + args = parse_args() + + init_run(args) + last_unchanged_iter = 0 + last_suspect_number = -1 + tryCnt = 0 + it = 0 + while tryCnt < MAXTRY: + it += 1 + suspects = get_suspects() + if len(suspects) != last_suspect_number: + last_suspect_number = len(suspects) + last_unchanged_iter = it + if (len(suspects) == 0 or it - last_unchanged_iter >= TERM): + break + + print "Number of suspects at iteration %d: %d" % (it, len(suspects)) + threadRunner = ThreadRunner() + for j in range(1, max(1, args.threads / 2) + 1): + run(args, threadRunner, args.a, args.nano_a, + args.arg_a + suspects_arg(suspects), -j) + run(args, threadRunner, args.b, args.nano_b, + args.arg_b + suspects_arg(suspects), -j) + tryCnt += 1 + threadRunner.wait() + + suspects = get_suspects() + if len(suspects) == 0: + print ("%s and %s does not seem to have significant " + \ + "performance differences.") % (args.a, args.b) + else: + suspects.sort(key = regression) + print "%s (compared to %s) is likely" % (args.a, args.b) + for suspect in suspects: + r = regression(suspect) + if r < 1: + print "\033[31m %s slower in %s\033[0m" % \ + (format_r(1/r), suspect) + else: + print "\033[32m %s faster in %s\033[0m" % \ + (format_r(r), suspect) + + with open("%s/bench_%s_%s.json" % (args.outdir, args.a, args.b), 'w') as f: + f.write(json.dumps(map( + lambda bench: {bench: regression(bench)}, + suspects + ))) + print ("\033[36mJSON results available in %s\033[0m" % f.name) + + with open("%s/bench_%s_%s.csv" % (args.outdir, args.a, args.b), 'w') as out: + out.write(("bench, significant?, raw regresion, " + + "%(A)s quantile (ns), %(B)s quantile (ns), " + + "%(A)s (ns), %(B)s (ns)\n") % {'A': args.a, 'B': args.b}) + for bench in suspects + timesA.keys(): + if (bench not in timesA or bench not in timesB): + continue + ta = timesA[bench] + tb = timesB[bench] + out.write( + "%s, %s, %f, " % (bench, bench in suspects, regression(bench)) + + ' '.join(map(str, get_lower_upper(ta))) + ", " + + ' '.join(map(str, get_lower_upper(tb))) + ", " + + ("%s, %s\n" % (' '.join(map(str, ta)), ' '.join(map(str, tb)))) + ) + print (("\033[36m" + + "Compared %d benches. " + + "%d of them seem to be significantly differrent." + + "\033[0m") % + (len([x for x in timesA if x in timesB]), len(suspects))) + print ("\033[36mPlease see detailed bench results in %s\033[0m" % + out.name) + + +if __name__ == "__main__": + try: + test() + except Exception as e: + print e + print HELP + raise |