3 files changed, 75 insertions, 24 deletions
diff --git a/bench/check_bench_regressions.py b/bench/check_bench_regressions.py
index b26eb56ae0..48ce180a39 100644
--- a/bench/check_bench_regressions.py
+++ b/bench/check_bench_regressions.py
@@ -18,6 +18,16 @@ import xml.sax.saxutils
 # Maximum expected number of characters we expect in an svn revision.
 MAX_SVN_REV_LENGTH = 5
 
+# Indices for getting elements from bench expectation files.
+# See bench_expectations_<builder>.txt for details.
+EXPECTED_IDX = -3
+LB_IDX = -2
+UB_IDX = -1
+
+# Indices of the tuple of dictionaries containing slower and faster alerts.
+SLOWER = 0
+FASTER = 1
+
 def usage():
     """Prints simple usage information."""
 
@@ -134,16 +144,35 @@ def read_expectations(expectations, filename):
         if bench_entry in expectations:
             raise Exception("Dup entries for bench expectation %s" %
                             bench_entry)
-        # [<Bench_BmpConfig_TimeType>,<Platform-Alg>] -> (LB, UB)
-        expectations[bench_entry] = (float(elements[-2]),
-                                     float(elements[-1]))
+        # [<Bench_BmpConfig_TimeType>,<Platform-Alg>] -> (LB, UB, EXPECTED)
+        expectations[bench_entry] = (float(elements[LB_IDX]),
+                                     float(elements[UB_IDX]),
+                                     float(elements[EXPECTED_IDX]))
+
+def check_expectations(lines, expectations, key_suffix):
+    """Check if any bench results are outside of expected range.
+
+    For each input line in lines, checks the expectations dictionary to see if
+    the bench is out of the given range.
+
+    Args:
+      lines: dictionary mapping Label objects to the bench values.
+      expectations: dictionary returned by read_expectations().
+      key_suffix: string of <Platform>-<Alg> containing the bot platform and the
+        bench representation algorithm.
+
+    Returns:
+      No return value.
 
-def check_expectations(lines, expectations, revision, key_suffix):
-    """Check if there are benches in the given revising out of range.
+    Raises:
+      Exception containing bench data that are out of range, if any.
     """
     # The platform for this bot, to pass to the dashboard plot.
     platform = key_suffix[ : key_suffix.rfind('-')]
-    exceptions = []
+    # Tuple of dictionaries recording exceptions that are slower and faster,
+    # respectively. Each dictionary maps off_ratio (ratio of actual to expected)
+    # to a list of corresponding exception messages.
+    exceptions = ({}, {})
     for line in lines:
         line_str = str(line)
         line_str = line_str[ : line_str.find('_{')]
@@ -151,14 +180,31 @@ def check_expectations(lines, expectations, revision, key_suffix):
         if bench_platform_key not in expectations:
             continue
         this_bench_value = lines[line]
-        this_min, this_max = expectations[bench_platform_key]
+        this_min, this_max, this_expected = expectations[bench_platform_key]
         if this_bench_value < this_min or this_bench_value > this_max:
-            exception = 'Bench %s value %s out of range [%s, %s].' % (
-                bench_platform_key, this_bench_value, this_min, this_max)
-            exceptions.append(exception)
-    if exceptions:
-        raise Exception('Bench values out of range:\n' +
-                        '\n'.join(exceptions))
+            off_ratio = this_bench_value / this_expected
+            exception = 'Bench %s out of range [%s, %s] (%s vs %s, %s%%).' % (
+                bench_platform_key, this_min, this_max, this_bench_value,
+                this_expected, (off_ratio - 1) * 100)
+            if off_ratio > 1:  # Bench is slower.
+                exceptions[SLOWER].setdefault(off_ratio, []).append(exception)
+            else:
+                exceptions[FASTER].setdefault(off_ratio, []).append(exception)
+    outputs = []
+    for i in [SLOWER, FASTER]:
+      if exceptions[i]:
+          ratios = exceptions[i].keys()
+          ratios.sort(reverse=True)
+          li = []
+          for ratio in ratios:
+              li.extend(exceptions[i][ratio])
+          header = '%s benches got slower (sorted by %% difference):' % len(li)
+          if i == FASTER:
+              header = header.replace('slower', 'faster')
+          outputs.extend(['', header] + li)
+
+    if outputs:
+        raise Exception('\n'.join(outputs))
 
 def main():
     """Parses command line and checks bench expectations."""
@@ -210,8 +256,7 @@ def main():
     bench_dict = create_bench_dict(data_points)
 
     if bench_expectations:
-        check_expectations(bench_dict, bench_expectations, rev,
-                           platform_and_alg)
+        check_expectations(bench_dict, bench_expectations, platform_and_alg)
 
 
 if __name__ == "__main__":
diff --git a/tools/tests/benchalerts/Perf-Android-Nexus7-Tegra3-Arm7-Release/expectations.txt b/tools/tests/benchalerts/Perf-Android-Nexus7-Tegra3-Arm7-Release/expectations.txt
index e0cdf9d59c..8872fce571 100644
--- a/tools/tests/benchalerts/Perf-Android-Nexus7-Tegra3-Arm7-Release/expectations.txt
+++ b/tools/tests/benchalerts/Perf-Android-Nexus7-Tegra3-Arm7-Release/expectations.txt
@@ -1,3 +1,4 @@
 # Bench expectation entries for testing check_bench_regressions.py.
-desk_amazon.skp_record_,Perf-Android-Nexus7-Tegra3-Arm7-Release-25th,1.213,-8.908,11.334
-desk_baidu.skp_record_,Perf-Android-Nexus7-Tegra3-Arm7-Release-25th,0.939,5.155,11.033
+desk_amazon.skp_record_,Perf-Android-Nexus7-Tegra3-Arm7-Release-25th,1.1,-1,1.2
+desk_baidu.skp_record_,Perf-Android-Nexus7-Tegra3-Arm7-Release-25th,0.939,0.9,1
+desk_blogger.skp_record_,Perf-Android-Nexus7-Tegra3-Arm7-Release-25th,0.5,0.4,0.6
diff --git a/tools/tests/benchalerts/Perf-Android-Nexus7-Tegra3-Arm7-Release/output-expected/stderr b/tools/tests/benchalerts/Perf-Android-Nexus7-Tegra3-Arm7-Release/output-expected/stderr
index 751d5ab396..7bb933d451 100644
--- a/tools/tests/benchalerts/Perf-Android-Nexus7-Tegra3-Arm7-Release/output-expected/stderr
+++ b/tools/tests/benchalerts/Perf-Android-Nexus7-Tegra3-Arm7-Release/output-expected/stderr
@@ -1,9 +1,14 @@
 Traceback (most recent call last):
-  File "bench/check_bench_regressions.py", line 218, in <module>
+  File "bench/check_bench_regressions.py", line 259, in <module>
     main()
-  File "bench/check_bench_regressions.py", line 214, in main
-    platform_and_alg)
-  File "bench/check_bench_regressions.py", line 161, in check_expectations
-    '\n'.join(exceptions))
-Exception: Bench values out of range:
-Bench desk_baidu.skp_record_,Perf-Android-Nexus7-Tegra3-Arm7-Release-25th value 0.83 out of range [5.155, 11.033].
+  File "bench/check_bench_regressions.py", line 255, in main
+    check_expectations(bench_dict, bench_expectations, platform_and_alg)
+  File "bench/check_bench_regressions.py", line 203, in check_expectations
+    raise Exception('\n'.join(outputs))
+Exception: 
+2 Slower benche(s) (Sorted):
+Bench desk_blogger.skp_record_,Perf-Android-Nexus7-Tegra3-Arm7-Release-25th out of range [0.4, 0.6] (1.794 vs 0.5, 258.8%).
+Bench desk_amazon.skp_record_,Perf-Android-Nexus7-Tegra3-Arm7-Release-25th out of range [-1.0, 1.2] (1.213 vs 1.1, 10.2727272727%).
+
+1 Faster benche(s) (Sorted):
+Bench desk_baidu.skp_record_,Perf-Android-Nexus7-Tegra3-Arm7-Release-25th out of range [0.9, 1.0] (0.83 vs 0.939, -11.6080937167%).