tools/skpbench/parseskpbench.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155

#!/usr/bin/env python

# Copyright 2016 Google Inc.
#
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

from __future__ import print_function
from _benchresult import BenchResult
from argparse import ArgumentParser
from datetime import datetime
import collections
import operator
import os
import sys
import tempfile
import urllib
import urlparse
import webbrowser

__argparse = ArgumentParser(description='''

Parses output files from skpbench.py into csv.

This script can also be used to generate a Google sheet:

(1) Install the "Office Editing for Docs, Sheets & Slides" Chrome extension:
    https://chrome.google.com/webstore/detail/office-editing-for-docs-s/gbkeegbaiigmenfmjfclcdgdpimamgkj

(2) Designate Chrome os-wide as the default application for opening .csv files.

(3) Run parseskpbench.py with the --open flag.

''')

__argparse.add_argument('-r', '--result',
    choices=['median', 'accum', 'max', 'min'], default='median',
    help="result to use for cell values")
__argparse.add_argument('-f', '--force',
    action='store_true', help='silently ignore warnings')
__argparse.add_argument('-o', '--open',
    action='store_true',
    help="generate a temp file and open it (theoretically in a web browser)")
__argparse.add_argument('-n', '--name',
    default='skpbench_%s' % datetime.now().strftime('%Y-%m-%d_%H.%M.%S.csv'),
    help="if using --open, a name for the temp file")
__argparse.add_argument('sources',
    nargs='+', help="source files with skpbench results ('-' for stdin)")

FLAGS = __argparse.parse_args()


class Parser:
  def __init__(self):
    self.configs = list() # use list to preserve the order configs appear in.
    self.rows = collections.defaultdict(dict)
    self.cols = collections.defaultdict(dict)
    self.metric = None
    self.samples = None
    self.sample_ms = None

  def parse_file(self, infile):
    for line in infile:
      match = BenchResult.match(line)
      if not match:
        continue
      if self.metric is None:
        self.metric = match.metric
      elif match.metric != self.metric:
        raise ValueError("results have mismatched metrics (%s and %s)" %
                         (self.metric, match.metric))
      if self.samples is None:
        self.samples = match.samples
      elif not FLAGS.force and match.samples != self.samples:
        raise ValueError("results have mismatched number of samples. "
                         "(use --force to ignore)")
      if self.sample_ms is None:
        self.sample_ms = match.sample_ms
      elif not FLAGS.force and match.sample_ms != self.sample_ms:
        raise ValueError("results have mismatched sampling times. "
                         "(use --force to ignore)")
      if not match.config in self.configs:
        self.configs.append(match.config)
      self.rows[match.bench][match.config] = match.get_string(FLAGS.result)
      self.cols[match.config][match.bench] = getattr(match, FLAGS.result)

  def print_csv(self, outfile=sys.stdout):
    print('%s_%s' % (FLAGS.result, self.metric), file=outfile)

    # Write the header.
    outfile.write('bench,')
    for config in self.configs:
      outfile.write('%s,' % config)
    outfile.write('\n')

    # Write the rows.
    for bench, row in self.rows.items():
      outfile.write('%s,' % bench)
      for config in self.configs:
        if config in row:
          outfile.write('%s,' % row[config])
        elif FLAGS.force:
          outfile.write(',')
        else:
          raise ValueError("%s: missing value for %s. (use --force to ignore)" %
                           (bench, config))
      outfile.write('\n')

    # Add simple, literal averages.
    if len(self.rows) > 1:
      outfile.write('\n')
      self.__print_computed_row('MEAN',
        lambda col: reduce(operator.add, col.values()) / len(col),
        outfile=outfile)
      self.__print_computed_row('GEOMEAN',
        lambda col: reduce(operator.mul, col.values()) ** (1.0 / len(col)),
        outfile=outfile)

  def __print_computed_row(self, name, func, outfile=sys.stdout):
    outfile.write('%s,' % name)
    for config in self.configs:
      assert(len(self.cols[config]) == len(self.rows))
      outfile.write('%.4g,' % func(self.cols[config]))
    outfile.write('\n')


def main():
  parser = Parser()

  # Parse the input files.
  for src in FLAGS.sources:
    if src == '-':
      parser.parse_file(sys.stdin)
    else:
      with open(src, mode='r') as infile:
        parser.parse_file(infile)

  # Print the csv.
  if not FLAGS.open:
    parser.print_csv()
  else:
    dirname = tempfile.mkdtemp()
    basename = FLAGS.name
    if os.path.splitext(basename)[1] != '.csv':
      basename += '.csv';
    pathname = os.path.join(dirname, basename)
    with open(pathname, mode='w') as tmpfile:
      parser.print_csv(outfile=tmpfile)
    fileuri = urlparse.urljoin('file:', urllib.pathname2url(pathname))
    print('opening %s' % fileuri)
    webbrowser.open(fileuri)


if __name__ == '__main__':
  main()