bench/bench_analyze.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247

#!/usr/bin/env python
# Copyright (c) 2012 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be found
# in the LICENSE file.

""" Analyze recent bench data from graphs, and output suggested ranges.

This script reads and parses Skia benchmark values from the xhtml files
generated by bench_graph_svg.py, and outputs an html file containing suggested
bench ranges to use in bench_expectations.txt, with analytical plots.
"""

__author__ = 'bensong@google.com (Ben Chen)'

import getopt
import math
import re
import sys
import urllib
from datetime import datetime


# Constants for calculating suggested bench ranges.
WINDOW = 5  # Moving average sliding window size.
# We use moving average as expected bench value, and calculate average Variance
# of bench from the moving average. Set range to be [X_UB * Variance above
# moving average, X_LB * Variance below moving average] of latest revision.
X_UB = 4.0
X_LB = 5.0

# List of platforms.
PLATFORMS = ['GalaxyNexus_4-1_Float_Release',
             'Mac_Float_NoDebug_32',
             'Mac_Float_NoDebug_64',
             'MacMiniLion_Float_NoDebug_32',
             'MacMiniLion_Float_NoDebug_64',
             'Nexus7_4-1_Float_Release',
             'Shuttle_Ubuntu12_ATI5770_Float_Release_64',
             'Shuttle_Win7_Intel_Float_Release_32',
             'Shuttle_Win7_Intel_Float_Release_64',
             'Xoom_4-1_Float_Release'
            ]

# List of bench representation algorithms. Flag "-a" is chosen from the list.
ALGS = ['25th', 'avg', 'med', 'min']

# Regular expressions for parsing bench/revision values.
HEIGHT_RE = 'height (\d+\.\d+) corresponds to bench value (\d+\.\d+).-->'
REV_RE = '<rect id="(\d+)" x="(\d+\.\d+)" y="'  # Revision corresponding x.
LINE_RE = '<polyline id="(.*)".*points="(.*)"/>'  # Bench value lines.

# Bench graph url pattern.
INPUT_URL_TEMPLATE = ('http://chromium-skia-gm.commondatastorage.googleapis.com'
                      '/graph-Skia_%s-2.xhtml')

# Output HTML elements and templates.
HTML_HEAD = ('<html><head><title>Skia Bench Expected Ranges</title>'
             '<script type="text/javascript" src="https://raw.github.com/google'
             '/skia-buildbot/master/dygraph-combined.js"></script></head><body>'
             'Please adjust values as appropriate and update benches to monitor'
             ' in bench/bench_expectations.txt.<br><br>')
HTML_SUFFIX = '</body></html>'
GRAPH_PREFIX = ('<br>%s<br><div id="%s" style="width:400px;height:200px"></div>'
                '<script type="text/javascript">g%s=new Dygraph('
                'document.getElementById("%s"),"rev,bench,alert\\n')
GRAPH_SUFFIX = ('",{customBars: true,"alert":{strokeWidth:0.0,drawPoints:true,'
                'pointSize:4,highlightCircleSize:6}});</script>')


def Usage():
  """Prints flag usage information."""
  print '-a <representation-algorithm>: defaults to "25th".'
  print '  If set, must be one of the list element in ALGS defined above.'
  print '-b <bench-prefix>: prefix of matching bench names to analyze.'
  print '  Only include benchmarks whose names start with this string.'
  print '  Cannot be empty, because there are too many benches overall.'
  print '-o <file>: html output filename. Output to STDOUT if not set.'
  print '-p <platform-prefix>: prefix of platform names to analyze.'
  print '  PLATFORMS has list of matching candidates. Matches all if not set.'

def GetBenchValues(page, bench_prefix):
  """Returns a dict of matching bench values from the given xhtml page.
  Args:
    page: substring used to construct the specific bench graph URL to fetch.
    bench_prefix: only benches starting with this string will be included.

  Returns:
    a dict mapping benchmark name and revision combinations to bench values.
  """
  height = None
  max_bench = None
  height_scale = None
  revisions = []
  x_axes = []  # For calculating corresponding revisions.
  val_dic = {}  # dict[bench_name][revision] -> bench_value

  lines = urllib.urlopen(INPUT_URL_TEMPLATE % page).readlines()
  for line in lines:
    height_match = re.search(HEIGHT_RE, line)
    if height_match:
      height = float(height_match.group(1))
      max_bench = float(height_match.group(2))
      height_scale = max_bench / height

    rev_match = re.search(REV_RE, line)
    if rev_match:
      revisions.append(int(rev_match.group(1)))
      x_axes.append(float(rev_match.group(2)))

    line_match = re.search(LINE_RE, line)
    if not line_match:
      continue
    bench = line_match.group(1)
    bench = bench[:bench.find('_{')]
    if not bench.startswith(bench_prefix):
      continue
    if bench not in val_dic:
      val_dic[bench] = {}

    vals = line_match.group(2).strip().split(' ')
    if len(vals) < WINDOW:  # Too few bench data points; skip.
      continue
    for val in vals:
      x, y = [float(i) for i in val.split(',')]
      for i in range(len(x_axes)):
        if x <= x_axes[i]:  # Found corresponding bench revision.
          break
      val_dic[bench][revisions[i]] = float(
          '%.3f' % ((height - y) * height_scale))

  return val_dic

def CreateBenchOutput(page, bench, val_dic):
  """Returns output for the given page and bench data in dict.
  Args:
    page: substring of bench graph webpage, to indicate the bench platform.
    bench: name of the benchmark to process.
    val_dic: dict[bench_name][revision] -> bench_value.

  Returns:
    string of html/javascript as part of the whole script output for the bench.
  """
  revs = val_dic[bench].keys()
  revs.sort()
  # Uses moving average to calculate expected bench variance, then sets
  # expectations and ranges accordingly.
  variances = []
  moving_avgs = []
  points = []
  for rev in revs:
    points.append(val_dic[bench][rev])
    if len(points) >= WINDOW:
      moving_avgs.append(sum(points[-WINDOW:]) / WINDOW)
      variances.append(abs(points[-1] - moving_avgs[-1]))
    else:  # For the first WINDOW-1 points, cannot calculate moving average.
      moving_avgs.append(points[-1])  # Uses actual value as estimates.
      variances.append(0)
  if len(variances) >= WINDOW:
    for i in range(WINDOW - 1):
      # Backfills estimated variances for the first WINDOW-1 points.
      variances[i] = variances[WINDOW - 1]

  avg_var = sum(variances) / len(variances)
  for val in variances:  # Removes outlier variances. Only does one iter.
    if val > min(X_LB, X_UB) * avg_var:
      variances.remove(val)
  avg_var = sum(variances) / len(variances)

  graph_id = '%s_%s' % (bench, page.replace('-', '_'))
  expectations = '%s,%s,%.2f,%.2f,%.2f' % (bench, page, moving_avgs[-1],
                                           moving_avgs[-1] - X_LB * avg_var,
                                           moving_avgs[-1] + X_UB * avg_var)
  out = GRAPH_PREFIX % (expectations, graph_id, graph_id, graph_id)
  for i in range(len(revs)):
    out += '%s,%.2f;%.2f;%.2f,' % (revs[i], moving_avgs[i] - X_LB * avg_var,
                                   points[i], moving_avgs[i] + X_UB * avg_var)
    if (points[i] > moving_avgs[i] + X_UB * avg_var or
        points[i] < moving_avgs[i] - X_LB * avg_var):  # Mark as alert point.
      out += '%.2f;%.2f;%.2f\\n' % (points[i], points[i], points[i])
    else:
      out += 'NaN;NaN;NaN\\n'

  return out

def main():
  """Parses flags and outputs analysis results."""
  try:
    opts, _ = getopt.getopt(sys.argv[1:], 'a:b:o:p:')
  except getopt.GetoptError, err:
    Usage()
    sys.exit(2)

  alg = '25th'
  bench_prefix = None
  out_file = None
  platform_prefix = ''
  for option, value in opts:
    if option == '-a':
      if value not in ALGS:
        raise Exception('Invalid flag -a (%s): must be set to one of %s.' %
                        (value, str(ALGS)))
      alg = value
    elif option == '-b':
      bench_prefix = value
    elif option == '-o':
      out_file = value
    elif option == '-p':
      platform_prefix = value
    else:
      Usage()
      raise Exception('Error handling flags.')

  if not bench_prefix:
    raise Exception('Must provide nonempty Flag -b (bench name prefix).')

  pages = []
  for platform in PLATFORMS:
    if not platform.startswith(platform_prefix):
      continue
    pages.append('%s-%s' % (platform, alg))

  if not pages:  # No matching platform found.
    raise Exception('Flag -p (platform prefix: %s) does not match any of %s.' %
                    (platform_prefix, str(PLATFORMS)))

  body = ''
  # Iterates through bench graph xhtml pages for oututting matching benches.
  for page in pages:
    bench_value_dict = GetBenchValues(page, bench_prefix)
    for bench in bench_value_dict:
      body += CreateBenchOutput(page, bench, bench_value_dict) + GRAPH_SUFFIX

  if not body:
    raise Exception('No bench outputs. Most likely there are no matching bench'
                    ' prefix (%s) in Flags -b for platforms %s.\nPlease also '
                    'check if the bench graph URLs are valid at %s.' % (
                        bench_prefix, str(PLATFORMS), INPUT_URL_TEMPLATE))
  if out_file:
    f = open(out_file, 'w+')
    f.write(HTML_HEAD + body + HTML_SUFFIX)
    f.close()
  else:
    print HTML_HEAD + body + HTML_SUFFIX


if '__main__' == __name__:
  main()