aboutsummaryrefslogtreecommitdiffhomepage
path: root/tools/calmbench
diff options
context:
space:
mode:
authorGravatar Yuqian Li <liyuqian@google.com>2017-09-29 11:20:01 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-09-29 16:00:34 +0000
commit980379d47589f06719c4f3545c412789bf4d43f4 (patch)
treeaa61a87db339ab9f7aa7f032adb76772cae40dcb /tools/calmbench
parent45a6f147b0dc47630e75dcfc8f9c4ce816392553 (diff)
Add a new calmbench tool for noiseless nanobench
This tool can quickly check all nanobench tests including svgs and skps (<5 minutes for CPU, ~30 minutes for GPU) and find significant performance regressions without much noise. This tool is not only faster (lower latency to get regression alerts), but also more sensitive compared to our k-means and step-fitting bots (especially for changes that only affect very few benches). It may still miss some regressions, but the regressions reported should be valid with very high probability. Bug: skia: Change-Id: I02115e6c5ab630e4c56b2087ffeb5cae1d4a618e Reviewed-on: https://skia-review.googlesource.com/50060 Commit-Queue: Yuqian Li <liyuqian@google.com> Reviewed-by: Eric Boren <borenet@google.com>
Diffstat (limited to 'tools/calmbench')
-rw-r--r--tools/calmbench/ab.py326
-rw-r--r--tools/calmbench/calmbench.py174
2 files changed, 500 insertions, 0 deletions
diff --git a/tools/calmbench/ab.py b/tools/calmbench/ab.py
new file mode 100644
index 0000000000..2ffb9e3ec3
--- /dev/null
+++ b/tools/calmbench/ab.py
@@ -0,0 +1,326 @@
+#!/usr/bin/python
+# encoding: utf-8
+
+# Copyright 2017 Google Inc.
+#
+# Use of this source code is governed by a BSD-style license that can be found
+# in the LICENSE file.
+#
+# This is an A/B test utility script used by calmbench.py
+#
+# For each bench, we get a distribution of min_ms measurements from nanobench.
+# From that, we try to recover the 1/3 and 2/3 quantiles of the distribution.
+# If range (1/3 quantile, 2/3 quantile) is completely disjoint between A and B,
+# we report that as a regression.
+#
+# The more measurements we have for a bench, the more accurate our quantiles
+# are. However, taking more measurements is time consuming. Hence we'll prune
+# out benches and only take more measurements for benches whose current quantile
+# ranges are disjoint.
+#
+# P.S. The current script is brute forcely translated from a ruby script. So it
+# may be ugly...
+
+import re
+import os
+import sys
+import time
+import json
+import subprocess
+import shlex
+from argparse import ArgumentParser
+from multiprocessing import Process
+from threading import Thread
+from threading import Lock
+from pdb import set_trace
+
+
+HELP = """
+\033[31mPlease call calmbench.py to drive this script if you're not doing so.
+This script is not supposed to be used by itself. (At least, it's not easy to
+use by itself.)
+\033[0m
+"""
+
+FACTOR = 3 # lower/upper quantile factor
+DIFF_T = 0.99 # different enough threshold
+TERM = 10 # terminate after this no. of iterations without suspect changes
+MAXTRY = 30 # max number of nanobench tries to narrow down suspects
+
+UNITS = "ns µs ms s".split()
+
+
+timesLock = Lock()
+timesA = {}
+timesB = {}
+
+
+def parse_args():
+ parser = ArgumentParser(description=HELP)
+
+ parser.add_argument('skiadir', type=str, help="skia directory")
+ parser.add_argument('outdir', type=str, help="output directory")
+ parser.add_argument('a', type=str, help="name of A")
+ parser.add_argument('b', type=str, help="name of B")
+ parser.add_argument('nano_a', type=str, help="path to A's nanobench binary")
+ parser.add_argument('nano_b', type=str, help="path to B's nanobench binary")
+ parser.add_argument('arg_a', type=str, help="args for A's nanobench run")
+ parser.add_argument('arg_b', type=str, help="args for B's nanobench run")
+ parser.add_argument('repeat', type=int, help="number of initial runs")
+ parser.add_argument('skip_b', type=str, help=("whether to skip running B"
+ " ('true' or 'false')"))
+ parser.add_argument('config', type=str, help="nanobenh config")
+ parser.add_argument('threads', type=int, help="number of threads to run")
+ parser.add_argument('noinit', type=str, help=("whether to skip running B"
+ " ('true' or 'false')"))
+
+ args = parser.parse_args()
+ args.skip_b = args.skip_b == "true"
+ args.noinit = args.noinit == "true"
+
+ return args
+
+def append_dict_sorted_array(dict_array, key, value):
+ if key not in dict_array:
+ dict_array[key] = []
+ dict_array[key].append(value)
+ dict_array[key].sort()
+
+
+def add_time(args, name, bench, t, unit):
+ normalized_t = t * 1000 ** UNITS.index(unit);
+ if name.startswith(args.a):
+ append_dict_sorted_array(timesA, bench, normalized_t)
+ else:
+ append_dict_sorted_array(timesB, bench, normalized_t)
+
+
+def append_times_from_file(args, name, filename):
+ with open(filename) as f:
+ lines = f.readlines()
+ for line in lines:
+ items = line.split()
+ if len(items) > 10:
+ bench = items[10]
+ matches = re.search("([+-]?\d*.?\d+)(s|ms|µs|ns)", items[3])
+ if (not matches or items[9] != args.config):
+ continue
+ time_num = matches.group(1)
+ time_unit = matches.group(2)
+ add_time(args, name, bench, float(time_num), time_unit)
+
+
+class ThreadRunner:
+ """Simplest and stupidiest threaded executer."""
+ def __init__(self):
+ self.threads = []
+
+ def add(self, args, fn):
+ if len(self.threads) >= args.threads:
+ self.wait()
+ t = Thread(target = fn)
+ t.daemon = True
+ self.threads.append(t)
+ t.start()
+
+ def wait(self):
+ def spin():
+ i = 0
+ spinners = [". ", ".. ", "..."]
+ while len(self.threads) > 0:
+ timesLock.acquire()
+ sys.stderr.write(
+ "\r" + spinners[i % len(spinners)] +
+ " (%d threads running)" % len(self.threads) +
+ " \r" # spaces for erasing characters
+ )
+ timesLock.release()
+ time.sleep(0.5)
+ i += 1
+
+ ts = Thread(target = spin);
+ ts.start()
+ for t in self.threads:
+ t.join()
+ self.threads = []
+ ts.join()
+
+
+def split_arg(arg):
+ raw = shlex.split(arg)
+ result = []
+ for r in raw:
+ if '~' in r:
+ result.append(os.path.expanduser(r))
+ else:
+ result.append(r)
+ return result
+
+
+def run(args, threadRunner, name, nano, arg, i):
+ def task():
+ file_i = "%s/%s.out%d" % (args.outdir, name, i)
+
+ should_run = not args.noinit and not (name == args.b and args.skip_b)
+ if i <= 0:
+ should_run = True # always run for suspects
+
+ if should_run:
+ if i > 0:
+ timesLock.acquire()
+ print "Init run %d for %s..." % (i, name)
+ timesLock.release()
+ subprocess.check_call(["touch", file_i])
+ with open(file_i, 'w') as f:
+ subprocess.check_call([nano] + split_arg(arg) +
+ ["--config", args.config], stderr=f, stdout=f)
+
+ timesLock.acquire()
+ append_times_from_file(args, name, file_i)
+ timesLock.release()
+
+ threadRunner.add(args, task)
+
+
+def init_run(args):
+ threadRunner = ThreadRunner()
+ for i in range(1, max(args.repeat, args.threads / 2) + 1):
+ run(args, threadRunner, args.a, args.nano_a, args.arg_a, i)
+ run(args, threadRunner, args.b, args.nano_b, args.arg_b, i)
+ threadRunner.wait()
+
+
+def get_lower_upper(values):
+ i = max(0, (len(values) - 1) / FACTOR)
+ return values[i], values[-i - 1]
+
+
+def different_enough(lower1, upper2):
+ return upper2 < DIFF_T * lower1
+
+
+def get_suspects():
+ suspects = []
+ for bench in timesA.keys():
+ if bench not in timesB:
+ continue
+ lowerA, upperA = get_lower_upper(timesA[bench])
+ lowerB, upperB = get_lower_upper(timesB[bench])
+ if different_enough(lowerA, upperB) or different_enough(lowerB, upperA):
+ suspects.append(bench)
+ return suspects
+
+
+def process_bench_pattern(s):
+ if ".skp" in s: # skp bench won't match their exact names...
+ return "^\"" + s[0:(s.index(".skp") + 3)] + "\""
+ else:
+ return "^\"" + s + "\"$"
+
+
+def suspects_arg(suspects):
+ patterns = map(process_bench_pattern, suspects)
+ return " --match " + (" ".join(patterns))
+
+
+def median(array):
+ return array[len(array) / 2]
+
+
+def regression(bench):
+ a = median(timesA[bench])
+ b = median(timesB[bench])
+ if (a == 0): # bad bench, just return no regression
+ return 1
+ return b / a
+
+
+def percentage(x):
+ return (x - 1) * 100
+
+
+def format_r(r):
+ return ('%6.2f' % percentage(r)) + "%"
+
+
+def test():
+ args = parse_args()
+
+ init_run(args)
+ last_unchanged_iter = 0
+ last_suspect_number = -1
+ tryCnt = 0
+ it = 0
+ while tryCnt < MAXTRY:
+ it += 1
+ suspects = get_suspects()
+ if len(suspects) != last_suspect_number:
+ last_suspect_number = len(suspects)
+ last_unchanged_iter = it
+ if (len(suspects) == 0 or it - last_unchanged_iter >= TERM):
+ break
+
+ print "Number of suspects at iteration %d: %d" % (it, len(suspects))
+ threadRunner = ThreadRunner()
+ for j in range(1, max(1, args.threads / 2) + 1):
+ run(args, threadRunner, args.a, args.nano_a,
+ args.arg_a + suspects_arg(suspects), -j)
+ run(args, threadRunner, args.b, args.nano_b,
+ args.arg_b + suspects_arg(suspects), -j)
+ tryCnt += 1
+ threadRunner.wait()
+
+ suspects = get_suspects()
+ if len(suspects) == 0:
+ print ("%s and %s does not seem to have significant " + \
+ "performance differences.") % (args.a, args.b)
+ else:
+ suspects.sort(key = regression)
+ print "%s (compared to %s) is likely" % (args.a, args.b)
+ for suspect in suspects:
+ r = regression(suspect)
+ if r < 1:
+ print "\033[31m %s slower in %s\033[0m" % \
+ (format_r(1/r), suspect)
+ else:
+ print "\033[32m %s faster in %s\033[0m" % \
+ (format_r(r), suspect)
+
+ with open("%s/bench_%s_%s.json" % (args.outdir, args.a, args.b), 'w') as f:
+ f.write(json.dumps(map(
+ lambda bench: {bench: regression(bench)},
+ suspects
+ )))
+ print ("\033[36mJSON results available in %s\033[0m" % f.name)
+
+ with open("%s/bench_%s_%s.csv" % (args.outdir, args.a, args.b), 'w') as out:
+ out.write(("bench, significant?, raw regresion, " +
+ "%(A)s quantile (ns), %(B)s quantile (ns), " +
+ "%(A)s (ns), %(B)s (ns)\n") % {'A': args.a, 'B': args.b})
+ for bench in suspects + timesA.keys():
+ if (bench not in timesA or bench not in timesB):
+ continue
+ ta = timesA[bench]
+ tb = timesB[bench]
+ out.write(
+ "%s, %s, %f, " % (bench, bench in suspects, regression(bench)) +
+ ' '.join(map(str, get_lower_upper(ta))) + ", " +
+ ' '.join(map(str, get_lower_upper(tb))) + ", " +
+ ("%s, %s\n" % (' '.join(map(str, ta)), ' '.join(map(str, tb))))
+ )
+ print (("\033[36m" +
+ "Compared %d benches. " +
+ "%d of them seem to be significantly differrent." +
+ "\033[0m") %
+ (len([x for x in timesA if x in timesB]), len(suspects)))
+ print ("\033[36mPlease see detailed bench results in %s\033[0m" %
+ out.name)
+
+
+if __name__ == "__main__":
+ try:
+ test()
+ except Exception as e:
+ print e
+ print HELP
+ raise
diff --git a/tools/calmbench/calmbench.py b/tools/calmbench/calmbench.py
new file mode 100644
index 0000000000..4d1319de1a
--- /dev/null
+++ b/tools/calmbench/calmbench.py
@@ -0,0 +1,174 @@
+#!/usr/bin/pyton
+
+# Copyright 2017 Google Inc.
+#
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import os
+import sys
+import subprocess
+import multiprocessing
+
+from argparse import ArgumentParser
+
+
+README = """
+Simply run
+\033[36m
+ python {0} TEST_GIT_BRANCH
+\033[0m
+to see if TEST_GIT_BRANCH has performance regressions against master in 8888.
+
+To compare a specific config with svg and skp resources included, add --config
+and --extraarg option. For exampe,
+\033[36m
+ python {0} TEST_GIT_BRANCH --config gl \\
+ --extraarg "--svgs ~/Desktop/bots/svgs --skps ~/Desktop/bots/skps"
+\033[0m
+For more options, please see
+
+ python {0} --help
+""".format(__file__)
+
+
+CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
+AB_SCRIPT = "ab.py"
+
+
+def parse_args():
+ if len(sys.argv) <= 1 or sys.argv[1] == '-h' or sys.argv[1] == '--help':
+ print README
+
+ parser = ArgumentParser(
+ description='Noiselessly (hence calm) becnhmark a git branch against ' +
+ 'another baseline branch (e.g., master) using multiple ' +
+ ' nanobench runs.'
+ )
+
+ default_threads = max(1, multiprocessing.cpu_count() / 2);
+ default_skiadir = os.path.normpath(CURRENT_DIR + "/../../")
+
+ config_help = (
+ 'nanobench config; we currently support only one config '
+ 'at a time (default: %(default)s)')
+ reps_help = (
+ 'initial repititions of the nanobench run; this may be '
+ 'overridden when we have many threads (default: %(default)s)')
+ extraarg_help = (
+ 'nanobench args (example: --svgs ~/Desktop/bots/svgs --skps '
+ '~/Desktop/bots/skps)')
+ baseline_help = (
+ 'baseline branch to compare against (default: %(default)s)')
+ basearg_help = (
+ 'nanobench arg for the baseline branch; if not given, we use '
+ ' the same arg for both the test branch and the baseline branch')
+ threads_help = (
+ 'number of threads to be used (default: %(default)s); '
+ 'for GPU config, this will always be 1')
+ no_compile_help = (
+ 'whether NOT to compile nanobench and copy it to WRITEDIR '
+ '(i.e., reuse previous nanobench compiled)')
+ skip_base_help = (
+ 'whether NOT to run nanobench on baseline branch '
+ '(i.e., reuse previous baseline measurements)')
+ noinit_help = (
+ 'whether to skip initial nanobench runs (default: %(default)s)')
+
+ definitions = [
+ # argname, type, default value, help
+ ['--config', str, '8888', config_help],
+ ['--skiadir', str, default_skiadir, 'default: %(default)s'],
+ ['--ninjadir', str, 'out/Release', 'default: %(default)s'],
+ ['--writedir', str, '/var/tmp', 'default: %(default)s'],
+ ['--extraarg', str, '', extraarg_help],
+ ['--baseline', str, 'master', baseline_help],
+ ['--basearg', str, '', basearg_help],
+ ['--reps', int, 2, reps_help],
+ ['--threads', int, default_threads, threads_help]
+ ]
+
+ for d in definitions:
+ parser.add_argument(d[0], type=d[1], default=d[2], help=d[3])
+
+ parser.add_argument('branch', type=str, help="the test branch to benchmark")
+ parser.add_argument('--no-compile', dest='no_compile', action="store_true",
+ help=no_compile_help)
+ parser.add_argument('--skip-base', dest='skipbase', action="store_true",
+ help=skip_base_help)
+ parser.add_argument('--noinit', dest='noinit', action="store_true",
+ help=noinit_help)
+ parser.set_defaults(no_compile=False);
+ parser.set_defaults(skipbase=False);
+ parser.set_defaults(noinit=False);
+
+ args = parser.parse_args()
+ if not args.basearg:
+ args.basearg = args.extraarg
+
+ return args
+
+
+def nano_path(args, branch):
+ return args.writedir + '/nanobench_' + branch
+
+
+def compile_branch(args, branch):
+ print "Compiling branch %s" % args.branch
+
+ os.chdir(args.skiadir)
+ commands = [
+ ['git', 'checkout', branch],
+ ['ninja', '-C', args.ninjadir, 'nanobench'],
+ ['cp', args.ninjadir + '/nanobench', nano_path(args, branch)]
+ ]
+ for command in commands:
+ subprocess.check_call(command, cwd=args.skiadir)
+
+
+def compile_nanobench(args):
+ compile_branch(args, args.branch)
+ compile_branch(args, args.baseline)
+
+
+def main():
+ args = parse_args()
+
+ # copy in case that it will be gone after git branch switching
+ orig_ab_name = CURRENT_DIR + "/" + AB_SCRIPT
+ temp_ab_name = args.writedir + "/" + AB_SCRIPT
+ subprocess.check_call(['cp', orig_ab_name, temp_ab_name])
+
+ if not args.no_compile:
+ compile_nanobench(args)
+
+ command = [
+ 'python',
+ temp_ab_name,
+ args.skiadir,
+ args.writedir,
+ args.branch + ("_A" if args.branch == args.baseline else ""),
+ args.baseline + ("_B" if args.branch == args.baseline else ""),
+ nano_path(args, args.branch),
+ nano_path(args, args.baseline),
+ args.extraarg,
+ args.basearg,
+ str(args.reps),
+ "true" if args.skipbase else "false",
+ args.config,
+ str(args.threads if args.config in ["8888", "565"] else 1),
+ "true" if args.noinit else "false"
+ ]
+
+ p = subprocess.Popen(command, cwd=args.skiadir)
+ try:
+ p.wait()
+ except KeyboardInterrupt:
+ try:
+ p.terminate()
+ except OSError as e:
+ print e
+
+
+if __name__ == "__main__":
+ main()