aboutsummaryrefslogtreecommitdiffhomepage
path: root/tools/calmbench/ab.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/calmbench/ab.py')
-rw-r--r--tools/calmbench/ab.py326
1 files changed, 326 insertions, 0 deletions
diff --git a/tools/calmbench/ab.py b/tools/calmbench/ab.py
new file mode 100644
index 0000000000..2ffb9e3ec3
--- /dev/null
+++ b/tools/calmbench/ab.py
@@ -0,0 +1,326 @@
+#!/usr/bin/python
+# encoding: utf-8
+
+# Copyright 2017 Google Inc.
+#
+# Use of this source code is governed by a BSD-style license that can be found
+# in the LICENSE file.
+#
+# This is an A/B test utility script used by calmbench.py
+#
+# For each bench, we get a distribution of min_ms measurements from nanobench.
+# From that, we try to recover the 1/3 and 2/3 quantiles of the distribution.
+# If range (1/3 quantile, 2/3 quantile) is completely disjoint between A and B,
+# we report that as a regression.
+#
+# The more measurements we have for a bench, the more accurate our quantiles
+# are. However, taking more measurements is time consuming. Hence we'll prune
+# out benches and only take more measurements for benches whose current quantile
+# ranges are disjoint.
+#
+# P.S. The current script is brute forcely translated from a ruby script. So it
+# may be ugly...
+
+import re
+import os
+import sys
+import time
+import json
+import subprocess
+import shlex
+from argparse import ArgumentParser
+from multiprocessing import Process
+from threading import Thread
+from threading import Lock
+from pdb import set_trace
+
+
+HELP = """
+\033[31mPlease call calmbench.py to drive this script if you're not doing so.
+This script is not supposed to be used by itself. (At least, it's not easy to
+use by itself.)
+\033[0m
+"""
+
+FACTOR = 3 # lower/upper quantile factor
+DIFF_T = 0.99 # different enough threshold
+TERM = 10 # terminate after this no. of iterations without suspect changes
+MAXTRY = 30 # max number of nanobench tries to narrow down suspects
+
+UNITS = "ns µs ms s".split()
+
+
+timesLock = Lock()
+timesA = {}
+timesB = {}
+
+
+def parse_args():
+ parser = ArgumentParser(description=HELP)
+
+ parser.add_argument('skiadir', type=str, help="skia directory")
+ parser.add_argument('outdir', type=str, help="output directory")
+ parser.add_argument('a', type=str, help="name of A")
+ parser.add_argument('b', type=str, help="name of B")
+ parser.add_argument('nano_a', type=str, help="path to A's nanobench binary")
+ parser.add_argument('nano_b', type=str, help="path to B's nanobench binary")
+ parser.add_argument('arg_a', type=str, help="args for A's nanobench run")
+ parser.add_argument('arg_b', type=str, help="args for B's nanobench run")
+ parser.add_argument('repeat', type=int, help="number of initial runs")
+ parser.add_argument('skip_b', type=str, help=("whether to skip running B"
+ " ('true' or 'false')"))
+ parser.add_argument('config', type=str, help="nanobenh config")
+ parser.add_argument('threads', type=int, help="number of threads to run")
+ parser.add_argument('noinit', type=str, help=("whether to skip running B"
+ " ('true' or 'false')"))
+
+ args = parser.parse_args()
+ args.skip_b = args.skip_b == "true"
+ args.noinit = args.noinit == "true"
+
+ return args
+
+def append_dict_sorted_array(dict_array, key, value):
+ if key not in dict_array:
+ dict_array[key] = []
+ dict_array[key].append(value)
+ dict_array[key].sort()
+
+
+def add_time(args, name, bench, t, unit):
+ normalized_t = t * 1000 ** UNITS.index(unit);
+ if name.startswith(args.a):
+ append_dict_sorted_array(timesA, bench, normalized_t)
+ else:
+ append_dict_sorted_array(timesB, bench, normalized_t)
+
+
+def append_times_from_file(args, name, filename):
+ with open(filename) as f:
+ lines = f.readlines()
+ for line in lines:
+ items = line.split()
+ if len(items) > 10:
+ bench = items[10]
+ matches = re.search("([+-]?\d*.?\d+)(s|ms|µs|ns)", items[3])
+ if (not matches or items[9] != args.config):
+ continue
+ time_num = matches.group(1)
+ time_unit = matches.group(2)
+ add_time(args, name, bench, float(time_num), time_unit)
+
+
+class ThreadRunner:
+ """Simplest and stupidiest threaded executer."""
+ def __init__(self):
+ self.threads = []
+
+ def add(self, args, fn):
+ if len(self.threads) >= args.threads:
+ self.wait()
+ t = Thread(target = fn)
+ t.daemon = True
+ self.threads.append(t)
+ t.start()
+
+ def wait(self):
+ def spin():
+ i = 0
+ spinners = [". ", ".. ", "..."]
+ while len(self.threads) > 0:
+ timesLock.acquire()
+ sys.stderr.write(
+ "\r" + spinners[i % len(spinners)] +
+ " (%d threads running)" % len(self.threads) +
+ " \r" # spaces for erasing characters
+ )
+ timesLock.release()
+ time.sleep(0.5)
+ i += 1
+
+ ts = Thread(target = spin);
+ ts.start()
+ for t in self.threads:
+ t.join()
+ self.threads = []
+ ts.join()
+
+
+def split_arg(arg):
+ raw = shlex.split(arg)
+ result = []
+ for r in raw:
+ if '~' in r:
+ result.append(os.path.expanduser(r))
+ else:
+ result.append(r)
+ return result
+
+
+def run(args, threadRunner, name, nano, arg, i):
+ def task():
+ file_i = "%s/%s.out%d" % (args.outdir, name, i)
+
+ should_run = not args.noinit and not (name == args.b and args.skip_b)
+ if i <= 0:
+ should_run = True # always run for suspects
+
+ if should_run:
+ if i > 0:
+ timesLock.acquire()
+ print "Init run %d for %s..." % (i, name)
+ timesLock.release()
+ subprocess.check_call(["touch", file_i])
+ with open(file_i, 'w') as f:
+ subprocess.check_call([nano] + split_arg(arg) +
+ ["--config", args.config], stderr=f, stdout=f)
+
+ timesLock.acquire()
+ append_times_from_file(args, name, file_i)
+ timesLock.release()
+
+ threadRunner.add(args, task)
+
+
+def init_run(args):
+ threadRunner = ThreadRunner()
+ for i in range(1, max(args.repeat, args.threads / 2) + 1):
+ run(args, threadRunner, args.a, args.nano_a, args.arg_a, i)
+ run(args, threadRunner, args.b, args.nano_b, args.arg_b, i)
+ threadRunner.wait()
+
+
+def get_lower_upper(values):
+ i = max(0, (len(values) - 1) / FACTOR)
+ return values[i], values[-i - 1]
+
+
+def different_enough(lower1, upper2):
+ return upper2 < DIFF_T * lower1
+
+
+def get_suspects():
+ suspects = []
+ for bench in timesA.keys():
+ if bench not in timesB:
+ continue
+ lowerA, upperA = get_lower_upper(timesA[bench])
+ lowerB, upperB = get_lower_upper(timesB[bench])
+ if different_enough(lowerA, upperB) or different_enough(lowerB, upperA):
+ suspects.append(bench)
+ return suspects
+
+
+def process_bench_pattern(s):
+ if ".skp" in s: # skp bench won't match their exact names...
+ return "^\"" + s[0:(s.index(".skp") + 3)] + "\""
+ else:
+ return "^\"" + s + "\"$"
+
+
+def suspects_arg(suspects):
+ patterns = map(process_bench_pattern, suspects)
+ return " --match " + (" ".join(patterns))
+
+
+def median(array):
+ return array[len(array) / 2]
+
+
+def regression(bench):
+ a = median(timesA[bench])
+ b = median(timesB[bench])
+ if (a == 0): # bad bench, just return no regression
+ return 1
+ return b / a
+
+
+def percentage(x):
+ return (x - 1) * 100
+
+
+def format_r(r):
+ return ('%6.2f' % percentage(r)) + "%"
+
+
+def test():
+ args = parse_args()
+
+ init_run(args)
+ last_unchanged_iter = 0
+ last_suspect_number = -1
+ tryCnt = 0
+ it = 0
+ while tryCnt < MAXTRY:
+ it += 1
+ suspects = get_suspects()
+ if len(suspects) != last_suspect_number:
+ last_suspect_number = len(suspects)
+ last_unchanged_iter = it
+ if (len(suspects) == 0 or it - last_unchanged_iter >= TERM):
+ break
+
+ print "Number of suspects at iteration %d: %d" % (it, len(suspects))
+ threadRunner = ThreadRunner()
+ for j in range(1, max(1, args.threads / 2) + 1):
+ run(args, threadRunner, args.a, args.nano_a,
+ args.arg_a + suspects_arg(suspects), -j)
+ run(args, threadRunner, args.b, args.nano_b,
+ args.arg_b + suspects_arg(suspects), -j)
+ tryCnt += 1
+ threadRunner.wait()
+
+ suspects = get_suspects()
+ if len(suspects) == 0:
+ print ("%s and %s does not seem to have significant " + \
+ "performance differences.") % (args.a, args.b)
+ else:
+ suspects.sort(key = regression)
+ print "%s (compared to %s) is likely" % (args.a, args.b)
+ for suspect in suspects:
+ r = regression(suspect)
+ if r < 1:
+ print "\033[31m %s slower in %s\033[0m" % \
+ (format_r(1/r), suspect)
+ else:
+ print "\033[32m %s faster in %s\033[0m" % \
+ (format_r(r), suspect)
+
+ with open("%s/bench_%s_%s.json" % (args.outdir, args.a, args.b), 'w') as f:
+ f.write(json.dumps(map(
+ lambda bench: {bench: regression(bench)},
+ suspects
+ )))
+ print ("\033[36mJSON results available in %s\033[0m" % f.name)
+
+ with open("%s/bench_%s_%s.csv" % (args.outdir, args.a, args.b), 'w') as out:
+ out.write(("bench, significant?, raw regresion, " +
+ "%(A)s quantile (ns), %(B)s quantile (ns), " +
+ "%(A)s (ns), %(B)s (ns)\n") % {'A': args.a, 'B': args.b})
+ for bench in suspects + timesA.keys():
+ if (bench not in timesA or bench not in timesB):
+ continue
+ ta = timesA[bench]
+ tb = timesB[bench]
+ out.write(
+ "%s, %s, %f, " % (bench, bench in suspects, regression(bench)) +
+ ' '.join(map(str, get_lower_upper(ta))) + ", " +
+ ' '.join(map(str, get_lower_upper(tb))) + ", " +
+ ("%s, %s\n" % (' '.join(map(str, ta)), ' '.join(map(str, tb))))
+ )
+ print (("\033[36m" +
+ "Compared %d benches. " +
+ "%d of them seem to be significantly differrent." +
+ "\033[0m") %
+ (len([x for x in timesA if x in timesB]), len(suspects)))
+ print ("\033[36mPlease see detailed bench results in %s\033[0m" %
+ out.name)
+
+
+if __name__ == "__main__":
+ try:
+ test()
+ except Exception as e:
+ print e
+ print HELP
+ raise