From 980379d47589f06719c4f3545c412789bf4d43f4 Mon Sep 17 00:00:00 2001
From: Yuqian Li <liyuqian@google.com>
Date: Fri, 29 Sep 2017 11:20:01 -0400
Subject: Add a new calmbench tool for noiseless nanobench

This tool can quickly check all nanobench tests including svgs
and skps (<5 minutes for CPU, ~30 minutes for GPU) and find
significant performance regressions without much noise.

This tool is not only faster (lower latency to get regression
alerts), but also more sensitive compared to our k-means and
step-fitting bots (especially for changes that only affect very
few benches). It may still miss some regressions, but the
regressions reported should be valid with very high probability.

Bug: skia:
Change-Id: I02115e6c5ab630e4c56b2087ffeb5cae1d4a618e
Reviewed-on: https://skia-review.googlesource.com/50060
Commit-Queue: Yuqian Li <liyuqian@google.com>
Reviewed-by: Eric Boren <borenet@google.com>
---
 tools/calmbench/ab.py        | 326 +++++++++++++++++++++++++++++++++++++++++++
 tools/calmbench/calmbench.py | 174 +++++++++++++++++++++++
 2 files changed, 500 insertions(+)
 create mode 100644 tools/calmbench/ab.py
 create mode 100644 tools/calmbench/calmbench.py

(limited to 'tools/calmbench')

diff --git a/tools/calmbench/ab.py b/tools/calmbench/ab.py
new file mode 100644
index 0000000000..2ffb9e3ec3
--- /dev/null
+++ b/tools/calmbench/ab.py
@@ -0,0 +1,326 @@
+#!/usr/bin/python
+# encoding: utf-8
+
+# Copyright 2017 Google Inc.
+#
+# Use of this source code is governed by a BSD-style license that can be found
+# in the LICENSE file.
+#
+# This is an A/B test utility script used by calmbench.py
+#
+# For each bench, we get a distribution of min_ms measurements from nanobench.
+# From that, we try to recover the 1/3 and 2/3 quantiles of the distribution.
+# If range (1/3 quantile, 2/3 quantile) is completely disjoint between A and B,
+# we report that as a regression.
+#
+# The more measurements we have for a bench, the more accurate our quantiles
+# are. However, taking more measurements is time consuming. Hence we'll prune
+# out benches and only take more measurements for benches whose current quantile
+# ranges are disjoint.
+#
+# P.S. The current script is brute forcely translated from a ruby script. So it
+# may be ugly...
+
+import re
+import os
+import sys
+import time
+import json
+import subprocess
+import shlex
+from argparse import ArgumentParser
+from multiprocessing import Process
+from threading import Thread
+from threading import Lock
+from pdb import set_trace
+
+
+HELP = """
+\033[31mPlease call calmbench.py to drive this script if you're not doing so.
+This script is not supposed to be used by itself. (At least, it's not easy to
+use by itself.)
+\033[0m
+"""
+
+FACTOR  = 3     # lower/upper quantile factor
+DIFF_T  = 0.99  # different enough threshold
+TERM    = 10    # terminate after this no. of iterations without suspect changes
+MAXTRY  = 30    # max number of nanobench tries to narrow down suspects
+
+UNITS   = "ns µs ms s".split()
+
+
+timesLock = Lock()
+timesA  = {}
+timesB  = {}
+
+
+def parse_args():
+  parser = ArgumentParser(description=HELP)
+
+  parser.add_argument('skiadir', type=str, help="skia directory")
+  parser.add_argument('outdir', type=str, help="output directory")
+  parser.add_argument('a', type=str, help="name of A")
+  parser.add_argument('b', type=str, help="name of B")
+  parser.add_argument('nano_a', type=str, help="path to A's nanobench binary")
+  parser.add_argument('nano_b', type=str, help="path to B's nanobench binary")
+  parser.add_argument('arg_a', type=str, help="args for A's nanobench run")
+  parser.add_argument('arg_b', type=str, help="args for B's nanobench run")
+  parser.add_argument('repeat', type=int, help="number of initial runs")
+  parser.add_argument('skip_b', type=str, help=("whether to skip running B"
+                                                " ('true' or 'false')"))
+  parser.add_argument('config', type=str, help="nanobenh config")
+  parser.add_argument('threads', type=int, help="number of threads to run")
+  parser.add_argument('noinit', type=str, help=("whether to skip running B"
+                                                " ('true' or 'false')"))
+
+  args = parser.parse_args()
+  args.skip_b = args.skip_b == "true"
+  args.noinit = args.noinit == "true"
+
+  return args
+
+def append_dict_sorted_array(dict_array, key, value):
+  if key not in dict_array:
+    dict_array[key] = []
+  dict_array[key].append(value)
+  dict_array[key].sort()
+
+
+def add_time(args, name, bench, t, unit):
+  normalized_t = t * 1000 ** UNITS.index(unit);
+  if name.startswith(args.a):
+    append_dict_sorted_array(timesA, bench, normalized_t)
+  else:
+    append_dict_sorted_array(timesB, bench, normalized_t)
+
+
+def append_times_from_file(args, name, filename):
+  with open(filename) as f:
+    lines = f.readlines()
+  for line in lines:
+    items = line.split()
+    if len(items) > 10:
+      bench = items[10]
+      matches = re.search("([+-]?\d*.?\d+)(s|ms|µs|ns)", items[3])
+      if (not matches or items[9] != args.config):
+        continue
+      time_num = matches.group(1)
+      time_unit = matches.group(2)
+      add_time(args, name, bench, float(time_num), time_unit)
+
+
+class ThreadRunner:
+  """Simplest and stupidiest threaded executer."""
+  def __init__(self):
+    self.threads = []
+
+  def add(self, args, fn):
+    if len(self.threads) >= args.threads:
+      self.wait()
+    t = Thread(target = fn)
+    t.daemon = True
+    self.threads.append(t)
+    t.start()
+
+  def wait(self):
+    def spin():
+      i = 0
+      spinners = [".  ", ".. ", "..."]
+      while len(self.threads) > 0:
+        timesLock.acquire()
+        sys.stderr.write(
+            "\r" + spinners[i % len(spinners)] +
+            " (%d threads running)" % len(self.threads) +
+            "           \r" # spaces for erasing characters
+        )
+        timesLock.release()
+        time.sleep(0.5)
+        i += 1
+
+    ts = Thread(target = spin);
+    ts.start()
+    for t in self.threads:
+        t.join()
+    self.threads = []
+    ts.join()
+
+
+def split_arg(arg):
+  raw = shlex.split(arg)
+  result = []
+  for r in raw:
+    if '~' in r:
+      result.append(os.path.expanduser(r))
+    else:
+      result.append(r)
+  return result
+
+
+def run(args, threadRunner, name, nano, arg, i):
+  def task():
+    file_i = "%s/%s.out%d" % (args.outdir, name, i)
+
+    should_run = not args.noinit and not (name == args.b and args.skip_b)
+    if i <= 0:
+      should_run = True # always run for suspects
+
+    if should_run:
+      if i > 0:
+        timesLock.acquire()
+        print "Init run %d for %s..." % (i, name)
+        timesLock.release()
+      subprocess.check_call(["touch", file_i])
+      with open(file_i, 'w') as f:
+        subprocess.check_call([nano] + split_arg(arg) +
+                              ["--config", args.config], stderr=f, stdout=f)
+
+    timesLock.acquire()
+    append_times_from_file(args, name, file_i)
+    timesLock.release()
+
+  threadRunner.add(args, task)
+
+
+def init_run(args):
+  threadRunner = ThreadRunner()
+  for i in range(1, max(args.repeat, args.threads / 2) + 1):
+    run(args, threadRunner, args.a, args.nano_a, args.arg_a, i)
+    run(args, threadRunner, args.b, args.nano_b, args.arg_b, i)
+  threadRunner.wait()
+
+
+def get_lower_upper(values):
+  i = max(0, (len(values) - 1) / FACTOR)
+  return values[i], values[-i - 1]
+
+
+def different_enough(lower1, upper2):
+  return upper2 < DIFF_T * lower1
+
+
+def get_suspects():
+  suspects = []
+  for bench in timesA.keys():
+    if bench not in timesB:
+      continue
+    lowerA, upperA = get_lower_upper(timesA[bench])
+    lowerB, upperB = get_lower_upper(timesB[bench])
+    if different_enough(lowerA, upperB) or different_enough(lowerB, upperA):
+      suspects.append(bench)
+  return suspects
+
+
+def process_bench_pattern(s):
+  if ".skp" in s: # skp bench won't match their exact names...
+    return "^\"" + s[0:(s.index(".skp") + 3)] + "\""
+  else:
+    return "^\"" + s + "\"$"
+
+
+def suspects_arg(suspects):
+  patterns = map(process_bench_pattern, suspects)
+  return " --match " + (" ".join(patterns))
+
+
+def median(array):
+  return array[len(array) / 2]
+
+
+def regression(bench):
+  a = median(timesA[bench])
+  b = median(timesB[bench])
+  if (a == 0): # bad bench, just return no regression
+    return 1
+  return b / a
+
+
+def percentage(x):
+  return (x - 1) * 100
+
+
+def format_r(r):
+  return ('%6.2f' % percentage(r)) + "%"
+
+
+def test():
+  args = parse_args()
+
+  init_run(args)
+  last_unchanged_iter = 0
+  last_suspect_number = -1
+  tryCnt = 0
+  it = 0
+  while tryCnt < MAXTRY:
+    it += 1
+    suspects = get_suspects()
+    if len(suspects) != last_suspect_number:
+      last_suspect_number = len(suspects)
+      last_unchanged_iter = it
+    if (len(suspects) == 0 or it - last_unchanged_iter >= TERM):
+      break
+
+    print "Number of suspects at iteration %d: %d" % (it, len(suspects))
+    threadRunner = ThreadRunner()
+    for j in range(1, max(1, args.threads / 2) + 1):
+      run(args, threadRunner, args.a, args.nano_a,
+          args.arg_a + suspects_arg(suspects), -j)
+      run(args, threadRunner, args.b, args.nano_b,
+          args.arg_b + suspects_arg(suspects), -j)
+      tryCnt += 1
+    threadRunner.wait()
+
+  suspects = get_suspects()
+  if len(suspects) == 0:
+    print ("%s and %s does not seem to have significant " + \
+           "performance differences.") % (args.a, args.b)
+  else:
+    suspects.sort(key = regression)
+    print "%s (compared to %s) is likely" % (args.a, args.b)
+    for suspect in suspects:
+      r = regression(suspect)
+      if r < 1:
+        print "\033[31m  %s slower in %s\033[0m" % \
+                (format_r(1/r), suspect)
+      else:
+        print "\033[32m  %s faster in %s\033[0m" % \
+                (format_r(r), suspect)
+
+  with open("%s/bench_%s_%s.json" % (args.outdir, args.a, args.b), 'w') as f:
+    f.write(json.dumps(map(
+        lambda bench: {bench: regression(bench)},
+        suspects
+    )))
+    print ("\033[36mJSON results available in %s\033[0m" % f.name)
+
+  with open("%s/bench_%s_%s.csv" % (args.outdir, args.a, args.b), 'w') as out:
+    out.write(("bench, significant?, raw regresion, " +
+                   "%(A)s quantile (ns), %(B)s quantile (ns), " +
+                   "%(A)s (ns), %(B)s (ns)\n") % {'A': args.a, 'B': args.b})
+    for bench in suspects + timesA.keys():
+      if (bench not in timesA or bench not in timesB):
+        continue
+      ta = timesA[bench]
+      tb = timesB[bench]
+      out.write(
+          "%s, %s, %f, " % (bench, bench in suspects, regression(bench)) +
+          ' '.join(map(str, get_lower_upper(ta))) + ", " +
+          ' '.join(map(str, get_lower_upper(tb))) + ", " +
+          ("%s, %s\n" % (' '.join(map(str, ta)), ' '.join(map(str, tb))))
+      )
+    print (("\033[36m" +
+           "Compared %d benches. " +
+           "%d of them seem to be significantly differrent." +
+           "\033[0m") %
+           (len([x for x in timesA if x in timesB]), len(suspects)))
+    print ("\033[36mPlease see detailed bench results in %s\033[0m" %
+            out.name)
+
+
+if __name__ == "__main__":
+  try:
+    test()
+  except Exception as e:
+    print e
+    print HELP
+    raise
diff --git a/tools/calmbench/calmbench.py b/tools/calmbench/calmbench.py
new file mode 100644
index 0000000000..4d1319de1a
--- /dev/null
+++ b/tools/calmbench/calmbench.py
@@ -0,0 +1,174 @@
+#!/usr/bin/pyton
+
+# Copyright 2017 Google Inc.
+#
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import os
+import sys
+import subprocess
+import multiprocessing
+
+from argparse import ArgumentParser
+
+
+README = """
+Simply run
+\033[36m
+    python {0} TEST_GIT_BRANCH
+\033[0m
+to see if TEST_GIT_BRANCH has performance regressions against master in 8888.
+
+To compare a specific config with svg and skp resources included, add --config
+and --extraarg option. For exampe,
+\033[36m
+    python {0} TEST_GIT_BRANCH --config gl \\
+        --extraarg "--svgs ~/Desktop/bots/svgs --skps ~/Desktop/bots/skps"
+\033[0m
+For more options, please see
+
+    python {0} --help
+""".format(__file__)
+
+
+CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
+AB_SCRIPT = "ab.py"
+
+
+def parse_args():
+  if len(sys.argv) <= 1 or sys.argv[1] == '-h' or sys.argv[1] == '--help':
+    print README
+
+  parser = ArgumentParser(
+    description='Noiselessly (hence calm) becnhmark a git branch against ' +
+                'another baseline branch (e.g., master) using multiple ' +
+                ' nanobench runs.'
+  )
+
+  default_threads = max(1, multiprocessing.cpu_count() / 2);
+  default_skiadir = os.path.normpath(CURRENT_DIR + "/../../")
+
+  config_help = (
+      'nanobench config; we currently support only one config '
+      'at a time (default: %(default)s)')
+  reps_help = (
+      'initial repititions of the nanobench run; this may be '
+      'overridden when we have many threads (default: %(default)s)')
+  extraarg_help = (
+      'nanobench args (example: --svgs ~/Desktop/bots/svgs --skps '
+      '~/Desktop/bots/skps)')
+  baseline_help = (
+      'baseline branch to compare against (default: %(default)s)')
+  basearg_help = (
+      'nanobench arg for the baseline branch; if not given, we use '
+      ' the same arg for both the test branch and the baseline branch')
+  threads_help = (
+      'number of threads to be used (default: %(default)s); '
+      'for GPU config, this will always be 1')
+  no_compile_help = (
+      'whether NOT to compile nanobench and copy it to WRITEDIR '
+      '(i.e., reuse previous nanobench compiled)')
+  skip_base_help = (
+      'whether NOT to run nanobench on baseline branch '
+      '(i.e., reuse previous baseline measurements)')
+  noinit_help = (
+      'whether to skip initial nanobench runs (default: %(default)s)')
+
+  definitions = [
+    # argname, type, default value, help
+    ['--config',    str, '8888', config_help],
+    ['--skiadir',   str, default_skiadir, 'default: %(default)s'],
+    ['--ninjadir',  str, 'out/Release', 'default: %(default)s'],
+    ['--writedir',  str, '/var/tmp', 'default: %(default)s'],
+    ['--extraarg',  str, '', extraarg_help],
+    ['--baseline',  str, 'master', baseline_help],
+    ['--basearg',   str, '', basearg_help],
+    ['--reps',      int, 2, reps_help],
+    ['--threads',   int, default_threads, threads_help]
+  ]
+
+  for d in definitions:
+    parser.add_argument(d[0], type=d[1], default=d[2], help=d[3])
+
+  parser.add_argument('branch', type=str, help="the test branch to benchmark")
+  parser.add_argument('--no-compile', dest='no_compile', action="store_true",
+      help=no_compile_help)
+  parser.add_argument('--skip-base', dest='skipbase', action="store_true",
+      help=skip_base_help)
+  parser.add_argument('--noinit', dest='noinit', action="store_true",
+      help=noinit_help)
+  parser.set_defaults(no_compile=False);
+  parser.set_defaults(skipbase=False);
+  parser.set_defaults(noinit=False);
+
+  args = parser.parse_args()
+  if not args.basearg:
+    args.basearg = args.extraarg
+
+  return args
+
+
+def nano_path(args, branch):
+  return args.writedir + '/nanobench_' + branch
+
+
+def compile_branch(args, branch):
+  print "Compiling branch %s" % args.branch
+
+  os.chdir(args.skiadir)
+  commands = [
+    ['git', 'checkout', branch],
+    ['ninja', '-C', args.ninjadir, 'nanobench'],
+    ['cp', args.ninjadir + '/nanobench', nano_path(args, branch)]
+  ]
+  for command in commands:
+    subprocess.check_call(command, cwd=args.skiadir)
+
+
+def compile_nanobench(args):
+  compile_branch(args, args.branch)
+  compile_branch(args, args.baseline)
+
+
+def main():
+  args = parse_args()
+
+  # copy in case that it will be gone after git branch switching
+  orig_ab_name = CURRENT_DIR + "/" + AB_SCRIPT
+  temp_ab_name = args.writedir + "/" + AB_SCRIPT
+  subprocess.check_call(['cp', orig_ab_name, temp_ab_name])
+
+  if not args.no_compile:
+    compile_nanobench(args)
+
+  command = [
+    'python',
+    temp_ab_name,
+    args.skiadir,
+    args.writedir,
+    args.branch + ("_A" if args.branch == args.baseline else ""),
+    args.baseline + ("_B" if args.branch == args.baseline else ""),
+    nano_path(args, args.branch),
+    nano_path(args, args.baseline),
+    args.extraarg,
+    args.basearg,
+    str(args.reps),
+    "true" if args.skipbase else "false",
+    args.config,
+    str(args.threads if args.config in ["8888", "565"] else 1),
+    "true" if args.noinit else "false"
+  ]
+
+  p = subprocess.Popen(command, cwd=args.skiadir)
+  try:
+    p.wait()
+  except KeyboardInterrupt:
+    try:
+      p.terminate()
+    except OSError as e:
+      print e
+
+
+if __name__ == "__main__":
+  main()
-- 
cgit v1.2.3