tools/parse_llvm_coverage.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213

#!/usr/bin/env python
# Copyright (c) 2015 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.


"""Parse an LLVM coverage report to generate useable results."""


import argparse
import json
import os
import re
import subprocess
import sys


def _fix_filename(filename):
  """Return a filename which we can use to identify the file.

  The file paths printed by llvm-cov take the form:

      /path/to/repo/out/dir/../../src/filename.cpp

  And then they're truncated to 22 characters with leading ellipses:

      ...../../src/filename.cpp

  This makes it really tough to determine whether the file actually belongs in
  the Skia repo.  This function strips out the leading junk so that, if the file
  exists in the repo, the returned string matches the end of some relative path
  in the repo. This doesn't guarantee correctness, but it's about as close as
  we can get.
  """
  return filename.split('..')[-1].lstrip('./')


def _file_in_repo(filename, all_files):
  """Return the name of the checked-in file matching the given filename.

  Use suffix matching to determine which checked-in files the given filename
  matches. If there are no matches or multiple matches, return None.
  """
  new_file = _fix_filename(filename)
  matched = []
  for f in all_files:
    if f.endswith(new_file):
      matched.append(f)
  if len(matched) == 1:
    return matched[0]
  elif len(matched) > 1:
    print >> sys.stderr, ('WARNING: multiple matches for %s; skipping:\n\t%s'
                          % (new_file, '\n\t'.join(matched)))
  return None


def _get_per_file_per_line_coverage(report):
  """Return a dict whose keys are file names and values are coverage data.

  Values are lists which take the form (lineno, coverage, code).
  """
  all_files = []
  for root, dirs, files in os.walk(os.getcwd()):
    if 'third_party/externals' in root:
      continue
    files = [f for f in files if not (f[0] == '.' or f.endswith('.pyc'))]
    dirs[:] = [d for d in dirs if not d[0] == '.']
    for name in files:
      all_files.append(os.path.join(root[(len(os.getcwd()) + 1):], name))
  all_files.sort()

  lines = report.splitlines()
  current_file = None
  file_lines = []
  files = {}
  not_checked_in = '%' # Use this as the file name for not-checked-in files.
  for line in lines:
    m = re.match('([a-zA-Z0-9\./_-]+):', line)
    if m:
      if current_file and current_file != not_checked_in:
        files[current_file] = file_lines
      match_filename = _file_in_repo(m.groups()[0], all_files)
      current_file = match_filename or not_checked_in
      file_lines = []
    else:
      if current_file != not_checked_in:
        skip = re.match('^\s{2}-+$|^\s{2}\|.+$', line)
        if line and not skip:
          cov, linenum, code = line.split('|', 2)
          cov = cov.strip()
          if cov:
            cov = int(cov)
          else:
            cov = None # We don't care about coverage for this line.
          linenum = int(linenum.strip())
          assert linenum == len(file_lines) + 1
          file_lines.append((linenum, cov, code.decode('utf-8', 'replace')))
  return files


def _testname(filename):
  """Transform the file name into an ingestible test name."""
  return re.sub(r'[^a-zA-Z0-9]', '_', filename)


def _nanobench_json(results, properties, key):
  """Return the results in JSON format like that produced by nanobench."""
  rv = {}
  # Copy over the properties first, then set the 'key' and 'results' keys,
  # in order to avoid bad formatting in case the user passes in a properties
  # dict containing those keys.
  rv.update(properties)
  rv['key'] = key
  rv['results'] = {
    _testname(f): {
      'coverage': {
        'percent': percent,
        'lines_not_covered': not_covered_lines,
        'options': {
          'fullname': f,
          'dir': os.path.dirname(f),
          'source_type': 'coverage',
        },
      },
    } for percent, not_covered_lines, f in results
  }
  return rv


def _parse_key_value(kv_list):
  """Return a dict whose key/value pairs are derived from the given list.

  For example:

      ['k1', 'v1', 'k2', 'v2']
  becomes:

      {'k1': 'v1',
       'k2': 'v2'}
  """
  if len(kv_list) % 2 != 0:
    raise Exception('Invalid key/value pairs: %s' % kv_list)

  rv = {}
  for i in xrange(len(kv_list) / 2):
    rv[kv_list[i*2]] = kv_list[i*2+1]
  return rv


def _get_per_file_summaries(line_by_line):
  """Summarize the full line-by-line coverage report by file."""
  per_file = []
  for filepath, lines in line_by_line.iteritems():
    total_lines = 0
    covered_lines = 0
    for _, cov, _ in lines:
      if cov is not None:
        total_lines += 1
        if cov > 0:
          covered_lines += 1
    if total_lines > 0:
      per_file.append((float(covered_lines)/float(total_lines)*100.0,
                       total_lines - covered_lines,
                       filepath))
  return per_file


def main():
  """Generate useful data from a coverage report."""
  # Parse args.
  parser = argparse.ArgumentParser()
  parser.add_argument('--report', help='input file; an llvm coverage report.',
                      required=True)
  parser.add_argument('--nanobench', help='output file for nanobench data.')
  parser.add_argument(
      '--key', metavar='key_or_value', nargs='+',
      help='key/value pairs identifying this bot.')
  parser.add_argument(
      '--properties', metavar='key_or_value', nargs='+',
      help='key/value pairs representing properties of this build.')
  parser.add_argument('--linebyline',
                      help='output file for line-by-line JSON data.')
  args = parser.parse_args()

  if args.nanobench and not (args.key and args.properties):
    raise Exception('--key and --properties are required with --nanobench')

  with open(args.report) as f:
    report = f.read()

  line_by_line = _get_per_file_per_line_coverage(report)

  if args.linebyline:
    with open(args.linebyline, 'w') as f:
      json.dump(line_by_line, f)

  if args.nanobench:
    # Parse the key and properties for use in the nanobench JSON output.
    key = _parse_key_value(args.key)
    properties = _parse_key_value(args.properties)

    # Get per-file summaries.
    per_file = _get_per_file_summaries(line_by_line)

    # Write results.
    format_results = _nanobench_json(per_file, properties, key)
    with open(args.nanobench, 'w') as f:
      json.dump(format_results, f)


if __name__ == '__main__':
  main()