From 980379d47589f06719c4f3545c412789bf4d43f4 Mon Sep 17 00:00:00 2001 From: Yuqian Li Date: Fri, 29 Sep 2017 11:20:01 -0400 Subject: Add a new calmbench tool for noiseless nanobench This tool can quickly check all nanobench tests including svgs and skps (<5 minutes for CPU, ~30 minutes for GPU) and find significant performance regressions without much noise. This tool is not only faster (lower latency to get regression alerts), but also more sensitive compared to our k-means and step-fitting bots (especially for changes that only affect very few benches). It may still miss some regressions, but the regressions reported should be valid with very high probability. Bug: skia: Change-Id: I02115e6c5ab630e4c56b2087ffeb5cae1d4a618e Reviewed-on: https://skia-review.googlesource.com/50060 Commit-Queue: Yuqian Li Reviewed-by: Eric Boren --- tools/calmbench/ab.py | 326 +++++++++++++++++++++++++++++++++++++++++++ tools/calmbench/calmbench.py | 174 +++++++++++++++++++++++ 2 files changed, 500 insertions(+) create mode 100644 tools/calmbench/ab.py create mode 100644 tools/calmbench/calmbench.py (limited to 'tools/calmbench') diff --git a/tools/calmbench/ab.py b/tools/calmbench/ab.py new file mode 100644 index 0000000000..2ffb9e3ec3 --- /dev/null +++ b/tools/calmbench/ab.py @@ -0,0 +1,326 @@ +#!/usr/bin/python +# encoding: utf-8 + +# Copyright 2017 Google Inc. +# +# Use of this source code is governed by a BSD-style license that can be found +# in the LICENSE file. +# +# This is an A/B test utility script used by calmbench.py +# +# For each bench, we get a distribution of min_ms measurements from nanobench. +# From that, we try to recover the 1/3 and 2/3 quantiles of the distribution. +# If range (1/3 quantile, 2/3 quantile) is completely disjoint between A and B, +# we report that as a regression. +# +# The more measurements we have for a bench, the more accurate our quantiles +# are. However, taking more measurements is time consuming. Hence we'll prune +# out benches and only take more measurements for benches whose current quantile +# ranges are disjoint. +# +# P.S. The current script is brute forcely translated from a ruby script. So it +# may be ugly... + +import re +import os +import sys +import time +import json +import subprocess +import shlex +from argparse import ArgumentParser +from multiprocessing import Process +from threading import Thread +from threading import Lock +from pdb import set_trace + + +HELP = """ +\033[31mPlease call calmbench.py to drive this script if you're not doing so. +This script is not supposed to be used by itself. (At least, it's not easy to +use by itself.) +\033[0m +""" + +FACTOR = 3 # lower/upper quantile factor +DIFF_T = 0.99 # different enough threshold +TERM = 10 # terminate after this no. of iterations without suspect changes +MAXTRY = 30 # max number of nanobench tries to narrow down suspects + +UNITS = "ns µs ms s".split() + + +timesLock = Lock() +timesA = {} +timesB = {} + + +def parse_args(): + parser = ArgumentParser(description=HELP) + + parser.add_argument('skiadir', type=str, help="skia directory") + parser.add_argument('outdir', type=str, help="output directory") + parser.add_argument('a', type=str, help="name of A") + parser.add_argument('b', type=str, help="name of B") + parser.add_argument('nano_a', type=str, help="path to A's nanobench binary") + parser.add_argument('nano_b', type=str, help="path to B's nanobench binary") + parser.add_argument('arg_a', type=str, help="args for A's nanobench run") + parser.add_argument('arg_b', type=str, help="args for B's nanobench run") + parser.add_argument('repeat', type=int, help="number of initial runs") + parser.add_argument('skip_b', type=str, help=("whether to skip running B" + " ('true' or 'false')")) + parser.add_argument('config', type=str, help="nanobenh config") + parser.add_argument('threads', type=int, help="number of threads to run") + parser.add_argument('noinit', type=str, help=("whether to skip running B" + " ('true' or 'false')")) + + args = parser.parse_args() + args.skip_b = args.skip_b == "true" + args.noinit = args.noinit == "true" + + return args + +def append_dict_sorted_array(dict_array, key, value): + if key not in dict_array: + dict_array[key] = [] + dict_array[key].append(value) + dict_array[key].sort() + + +def add_time(args, name, bench, t, unit): + normalized_t = t * 1000 ** UNITS.index(unit); + if name.startswith(args.a): + append_dict_sorted_array(timesA, bench, normalized_t) + else: + append_dict_sorted_array(timesB, bench, normalized_t) + + +def append_times_from_file(args, name, filename): + with open(filename) as f: + lines = f.readlines() + for line in lines: + items = line.split() + if len(items) > 10: + bench = items[10] + matches = re.search("([+-]?\d*.?\d+)(s|ms|µs|ns)", items[3]) + if (not matches or items[9] != args.config): + continue + time_num = matches.group(1) + time_unit = matches.group(2) + add_time(args, name, bench, float(time_num), time_unit) + + +class ThreadRunner: + """Simplest and stupidiest threaded executer.""" + def __init__(self): + self.threads = [] + + def add(self, args, fn): + if len(self.threads) >= args.threads: + self.wait() + t = Thread(target = fn) + t.daemon = True + self.threads.append(t) + t.start() + + def wait(self): + def spin(): + i = 0 + spinners = [". ", ".. ", "..."] + while len(self.threads) > 0: + timesLock.acquire() + sys.stderr.write( + "\r" + spinners[i % len(spinners)] + + " (%d threads running)" % len(self.threads) + + " \r" # spaces for erasing characters + ) + timesLock.release() + time.sleep(0.5) + i += 1 + + ts = Thread(target = spin); + ts.start() + for t in self.threads: + t.join() + self.threads = [] + ts.join() + + +def split_arg(arg): + raw = shlex.split(arg) + result = [] + for r in raw: + if '~' in r: + result.append(os.path.expanduser(r)) + else: + result.append(r) + return result + + +def run(args, threadRunner, name, nano, arg, i): + def task(): + file_i = "%s/%s.out%d" % (args.outdir, name, i) + + should_run = not args.noinit and not (name == args.b and args.skip_b) + if i <= 0: + should_run = True # always run for suspects + + if should_run: + if i > 0: + timesLock.acquire() + print "Init run %d for %s..." % (i, name) + timesLock.release() + subprocess.check_call(["touch", file_i]) + with open(file_i, 'w') as f: + subprocess.check_call([nano] + split_arg(arg) + + ["--config", args.config], stderr=f, stdout=f) + + timesLock.acquire() + append_times_from_file(args, name, file_i) + timesLock.release() + + threadRunner.add(args, task) + + +def init_run(args): + threadRunner = ThreadRunner() + for i in range(1, max(args.repeat, args.threads / 2) + 1): + run(args, threadRunner, args.a, args.nano_a, args.arg_a, i) + run(args, threadRunner, args.b, args.nano_b, args.arg_b, i) + threadRunner.wait() + + +def get_lower_upper(values): + i = max(0, (len(values) - 1) / FACTOR) + return values[i], values[-i - 1] + + +def different_enough(lower1, upper2): + return upper2 < DIFF_T * lower1 + + +def get_suspects(): + suspects = [] + for bench in timesA.keys(): + if bench not in timesB: + continue + lowerA, upperA = get_lower_upper(timesA[bench]) + lowerB, upperB = get_lower_upper(timesB[bench]) + if different_enough(lowerA, upperB) or different_enough(lowerB, upperA): + suspects.append(bench) + return suspects + + +def process_bench_pattern(s): + if ".skp" in s: # skp bench won't match their exact names... + return "^\"" + s[0:(s.index(".skp") + 3)] + "\"" + else: + return "^\"" + s + "\"$" + + +def suspects_arg(suspects): + patterns = map(process_bench_pattern, suspects) + return " --match " + (" ".join(patterns)) + + +def median(array): + return array[len(array) / 2] + + +def regression(bench): + a = median(timesA[bench]) + b = median(timesB[bench]) + if (a == 0): # bad bench, just return no regression + return 1 + return b / a + + +def percentage(x): + return (x - 1) * 100 + + +def format_r(r): + return ('%6.2f' % percentage(r)) + "%" + + +def test(): + args = parse_args() + + init_run(args) + last_unchanged_iter = 0 + last_suspect_number = -1 + tryCnt = 0 + it = 0 + while tryCnt < MAXTRY: + it += 1 + suspects = get_suspects() + if len(suspects) != last_suspect_number: + last_suspect_number = len(suspects) + last_unchanged_iter = it + if (len(suspects) == 0 or it - last_unchanged_iter >= TERM): + break + + print "Number of suspects at iteration %d: %d" % (it, len(suspects)) + threadRunner = ThreadRunner() + for j in range(1, max(1, args.threads / 2) + 1): + run(args, threadRunner, args.a, args.nano_a, + args.arg_a + suspects_arg(suspects), -j) + run(args, threadRunner, args.b, args.nano_b, + args.arg_b + suspects_arg(suspects), -j) + tryCnt += 1 + threadRunner.wait() + + suspects = get_suspects() + if len(suspects) == 0: + print ("%s and %s does not seem to have significant " + \ + "performance differences.") % (args.a, args.b) + else: + suspects.sort(key = regression) + print "%s (compared to %s) is likely" % (args.a, args.b) + for suspect in suspects: + r = regression(suspect) + if r < 1: + print "\033[31m %s slower in %s\033[0m" % \ + (format_r(1/r), suspect) + else: + print "\033[32m %s faster in %s\033[0m" % \ + (format_r(r), suspect) + + with open("%s/bench_%s_%s.json" % (args.outdir, args.a, args.b), 'w') as f: + f.write(json.dumps(map( + lambda bench: {bench: regression(bench)}, + suspects + ))) + print ("\033[36mJSON results available in %s\033[0m" % f.name) + + with open("%s/bench_%s_%s.csv" % (args.outdir, args.a, args.b), 'w') as out: + out.write(("bench, significant?, raw regresion, " + + "%(A)s quantile (ns), %(B)s quantile (ns), " + + "%(A)s (ns), %(B)s (ns)\n") % {'A': args.a, 'B': args.b}) + for bench in suspects + timesA.keys(): + if (bench not in timesA or bench not in timesB): + continue + ta = timesA[bench] + tb = timesB[bench] + out.write( + "%s, %s, %f, " % (bench, bench in suspects, regression(bench)) + + ' '.join(map(str, get_lower_upper(ta))) + ", " + + ' '.join(map(str, get_lower_upper(tb))) + ", " + + ("%s, %s\n" % (' '.join(map(str, ta)), ' '.join(map(str, tb)))) + ) + print (("\033[36m" + + "Compared %d benches. " + + "%d of them seem to be significantly differrent." + + "\033[0m") % + (len([x for x in timesA if x in timesB]), len(suspects))) + print ("\033[36mPlease see detailed bench results in %s\033[0m" % + out.name) + + +if __name__ == "__main__": + try: + test() + except Exception as e: + print e + print HELP + raise diff --git a/tools/calmbench/calmbench.py b/tools/calmbench/calmbench.py new file mode 100644 index 0000000000..4d1319de1a --- /dev/null +++ b/tools/calmbench/calmbench.py @@ -0,0 +1,174 @@ +#!/usr/bin/pyton + +# Copyright 2017 Google Inc. +# +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import os +import sys +import subprocess +import multiprocessing + +from argparse import ArgumentParser + + +README = """ +Simply run +\033[36m + python {0} TEST_GIT_BRANCH +\033[0m +to see if TEST_GIT_BRANCH has performance regressions against master in 8888. + +To compare a specific config with svg and skp resources included, add --config +and --extraarg option. For exampe, +\033[36m + python {0} TEST_GIT_BRANCH --config gl \\ + --extraarg "--svgs ~/Desktop/bots/svgs --skps ~/Desktop/bots/skps" +\033[0m +For more options, please see + + python {0} --help +""".format(__file__) + + +CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) +AB_SCRIPT = "ab.py" + + +def parse_args(): + if len(sys.argv) <= 1 or sys.argv[1] == '-h' or sys.argv[1] == '--help': + print README + + parser = ArgumentParser( + description='Noiselessly (hence calm) becnhmark a git branch against ' + + 'another baseline branch (e.g., master) using multiple ' + + ' nanobench runs.' + ) + + default_threads = max(1, multiprocessing.cpu_count() / 2); + default_skiadir = os.path.normpath(CURRENT_DIR + "/../../") + + config_help = ( + 'nanobench config; we currently support only one config ' + 'at a time (default: %(default)s)') + reps_help = ( + 'initial repititions of the nanobench run; this may be ' + 'overridden when we have many threads (default: %(default)s)') + extraarg_help = ( + 'nanobench args (example: --svgs ~/Desktop/bots/svgs --skps ' + '~/Desktop/bots/skps)') + baseline_help = ( + 'baseline branch to compare against (default: %(default)s)') + basearg_help = ( + 'nanobench arg for the baseline branch; if not given, we use ' + ' the same arg for both the test branch and the baseline branch') + threads_help = ( + 'number of threads to be used (default: %(default)s); ' + 'for GPU config, this will always be 1') + no_compile_help = ( + 'whether NOT to compile nanobench and copy it to WRITEDIR ' + '(i.e., reuse previous nanobench compiled)') + skip_base_help = ( + 'whether NOT to run nanobench on baseline branch ' + '(i.e., reuse previous baseline measurements)') + noinit_help = ( + 'whether to skip initial nanobench runs (default: %(default)s)') + + definitions = [ + # argname, type, default value, help + ['--config', str, '8888', config_help], + ['--skiadir', str, default_skiadir, 'default: %(default)s'], + ['--ninjadir', str, 'out/Release', 'default: %(default)s'], + ['--writedir', str, '/var/tmp', 'default: %(default)s'], + ['--extraarg', str, '', extraarg_help], + ['--baseline', str, 'master', baseline_help], + ['--basearg', str, '', basearg_help], + ['--reps', int, 2, reps_help], + ['--threads', int, default_threads, threads_help] + ] + + for d in definitions: + parser.add_argument(d[0], type=d[1], default=d[2], help=d[3]) + + parser.add_argument('branch', type=str, help="the test branch to benchmark") + parser.add_argument('--no-compile', dest='no_compile', action="store_true", + help=no_compile_help) + parser.add_argument('--skip-base', dest='skipbase', action="store_true", + help=skip_base_help) + parser.add_argument('--noinit', dest='noinit', action="store_true", + help=noinit_help) + parser.set_defaults(no_compile=False); + parser.set_defaults(skipbase=False); + parser.set_defaults(noinit=False); + + args = parser.parse_args() + if not args.basearg: + args.basearg = args.extraarg + + return args + + +def nano_path(args, branch): + return args.writedir + '/nanobench_' + branch + + +def compile_branch(args, branch): + print "Compiling branch %s" % args.branch + + os.chdir(args.skiadir) + commands = [ + ['git', 'checkout', branch], + ['ninja', '-C', args.ninjadir, 'nanobench'], + ['cp', args.ninjadir + '/nanobench', nano_path(args, branch)] + ] + for command in commands: + subprocess.check_call(command, cwd=args.skiadir) + + +def compile_nanobench(args): + compile_branch(args, args.branch) + compile_branch(args, args.baseline) + + +def main(): + args = parse_args() + + # copy in case that it will be gone after git branch switching + orig_ab_name = CURRENT_DIR + "/" + AB_SCRIPT + temp_ab_name = args.writedir + "/" + AB_SCRIPT + subprocess.check_call(['cp', orig_ab_name, temp_ab_name]) + + if not args.no_compile: + compile_nanobench(args) + + command = [ + 'python', + temp_ab_name, + args.skiadir, + args.writedir, + args.branch + ("_A" if args.branch == args.baseline else ""), + args.baseline + ("_B" if args.branch == args.baseline else ""), + nano_path(args, args.branch), + nano_path(args, args.baseline), + args.extraarg, + args.basearg, + str(args.reps), + "true" if args.skipbase else "false", + args.config, + str(args.threads if args.config in ["8888", "565"] else 1), + "true" if args.noinit else "false" + ] + + p = subprocess.Popen(command, cwd=args.skiadir) + try: + p.wait() + except KeyboardInterrupt: + try: + p.terminate() + except OSError as e: + print e + + +if __name__ == "__main__": + main() -- cgit v1.2.3