aboutsummaryrefslogtreecommitdiffhomepage
path: root/tools/parse_llvm_coverage.py
diff options
context:
space:
mode:
authorGravatar borenet <borenet@google.com>2015-07-20 09:43:36 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2015-07-20 09:43:36 -0700
commita6ae14e2232a46fbc3baed74e2581dde119e7f1e (patch)
tree171443b6fb7c2b5e4a22cdea600ca76f0645fa60 /tools/parse_llvm_coverage.py
parent54f313ccb8eba45954fe0a45092433cbf739b053 (diff)
Split llvm_coverage_run into two scripts
... One for running to generate the coverage data, another to parse the data into various formats. NOTRY=true BUG=skia:2430 Review URL: https://codereview.chromium.org/1239963002
Diffstat (limited to 'tools/parse_llvm_coverage.py')
-rwxr-xr-xtools/parse_llvm_coverage.py201
1 files changed, 201 insertions, 0 deletions
diff --git a/tools/parse_llvm_coverage.py b/tools/parse_llvm_coverage.py
new file mode 100755
index 0000000000..2eadddd3cb
--- /dev/null
+++ b/tools/parse_llvm_coverage.py
@@ -0,0 +1,201 @@
+#!/usr/bin/env python
+# Copyright (c) 2015 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+
+"""Parse an LLVM coverage report to generate useable results."""
+
+
+import argparse
+import json
+import os
+import re
+import subprocess
+import sys
+
+
+def _fix_filename(filename):
+ """Return a filename which we can use to identify the file.
+
+ The file paths printed by llvm-cov take the form:
+
+ /path/to/repo/out/dir/../../src/filename.cpp
+
+ And then they're truncated to 22 characters with leading ellipses:
+
+ ...../../src/filename.cpp
+
+ This makes it really tough to determine whether the file actually belongs in
+ the Skia repo. This function strips out the leading junk so that, if the file
+ exists in the repo, the returned string matches the end of some relative path
+ in the repo. This doesn't guarantee correctness, but it's about as close as
+ we can get.
+ """
+ return filename.split('..')[-1].lstrip('./')
+
+
+def _file_in_repo(filename, all_files):
+ """Return the name of the checked-in file matching the given filename.
+
+ Use suffix matching to determine which checked-in files the given filename
+ matches. If there are no matches or multiple matches, return None.
+ """
+ new_file = _fix_filename(filename)
+ matched = []
+ for f in all_files:
+ if f.endswith(new_file):
+ matched.append(f)
+ if len(matched) == 1:
+ return matched[0]
+ elif len(matched) > 1:
+ print >> sys.stderr, ('WARNING: multiple matches for %s; skipping:\n\t%s'
+ % (new_file, '\n\t'.join(matched)))
+ return None
+
+
+def _get_per_file_per_line_coverage(report):
+ """Return a dict whose keys are file names and values are coverage data.
+
+ Values are lists which take the form (lineno, coverage, code).
+ """
+ all_files = subprocess.check_output(['git', 'ls-files']).splitlines()
+ lines = report.splitlines()
+ current_file = None
+ file_lines = []
+ files = {}
+ not_checked_in = '%' # Use this as the file name for not-checked-in files.
+ for line in lines:
+ m = re.match('([a-zA-Z0-9\./_-]+):', line)
+ if m:
+ if current_file and current_file != not_checked_in:
+ files[current_file] = file_lines
+ match_filename = _file_in_repo(m.groups()[0], all_files)
+ current_file = match_filename or not_checked_in
+ file_lines = []
+ else:
+ if current_file != not_checked_in:
+ skip = re.match('^\s{2}-+$|^\s{2}\|.+$', line)
+ if line and not skip:
+ cov, linenum, code = line.split('|', 2)
+ cov = cov.strip()
+ if cov:
+ cov = int(cov)
+ else:
+ cov = None # We don't care about coverage for this line.
+ linenum = int(linenum.strip())
+ assert linenum == len(file_lines) + 1
+ file_lines.append((linenum, cov, code.decode('utf-8', 'replace')))
+ return files
+
+
+
+def _testname(filename):
+ """Transform the file name into an ingestible test name."""
+ return re.sub(r'[^a-zA-Z0-9]', '_', filename)
+
+
+def _nanobench_json(results, properties, key):
+ """Return the results in JSON format like that produced by nanobench."""
+ rv = {}
+ # Copy over the properties first, then set the 'key' and 'results' keys,
+ # in order to avoid bad formatting in case the user passes in a properties
+ # dict containing those keys.
+ rv.update(properties)
+ rv['key'] = key
+ rv['results'] = {
+ _testname(f): {
+ 'coverage': {
+ 'percent': percent,
+ 'options': {
+ 'fullname': f,
+ 'dir': os.path.dirname(f),
+ },
+ },
+ } for percent, f in results
+ }
+ return rv
+
+
+def _parse_key_value(kv_list):
+ """Return a dict whose key/value pairs are derived from the given list.
+
+ For example:
+
+ ['k1', 'v1', 'k2', 'v2']
+ becomes:
+
+ {'k1': 'v1',
+ 'k2': 'v2'}
+ """
+ if len(kv_list) % 2 != 0:
+ raise Exception('Invalid key/value pairs: %s' % kv_list)
+
+ rv = {}
+ for i in xrange(len(kv_list) / 2):
+ rv[kv_list[i*2]] = kv_list[i*2+1]
+ return rv
+
+
+def _get_per_file_summaries(line_by_line):
+ """Summarize the full line-by-line coverage report by file."""
+ per_file = []
+ for filepath, lines in line_by_line.iteritems():
+ total_lines = 0
+ covered_lines = 0
+ for _, cov, _ in lines:
+ if cov is not None:
+ total_lines += 1
+ if cov > 0:
+ covered_lines += 1
+ if total_lines > 0:
+ per_file.append((float(covered_lines)/float(total_lines)*100.0,
+ filepath))
+ return per_file
+
+
+def main():
+ """Generate useful data from a coverage report."""
+ # Parse args.
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--report', help='input file; an llvm coverage report.',
+ required=True)
+ parser.add_argument('--nanobench', help='output file for nanobench data.')
+ parser.add_argument(
+ '--key', metavar='key_or_value', nargs='+',
+ help='key/value pairs identifying this bot.')
+ parser.add_argument(
+ '--properties', metavar='key_or_value', nargs='+',
+ help='key/value pairs representing properties of this build.')
+ parser.add_argument('--linebyline',
+ help='output file for line-by-line JSON data.')
+ args = parser.parse_args()
+
+ if args.nanobench and not (args.key and args.properties):
+ raise Exception('--key and --properties are required with --nanobench')
+
+ with open(args.report) as f:
+ report = f.read()
+
+ line_by_line = _get_per_file_per_line_coverage(report)
+
+ if args.linebyline:
+ with open(args.linebyline, 'w') as f:
+ json.dump(line_by_line, f)
+
+ if args.nanobench:
+ # Parse the key and properties for use in the nanobench JSON output.
+ key = _parse_key_value(args.key)
+ properties = _parse_key_value(args.properties)
+
+ # Get per-file summaries.
+ per_file = _get_per_file_summaries(line_by_line)
+
+ # Write results.
+ format_results = _nanobench_json(per_file, properties, key)
+ with open(args.nanobench, 'w') as f:
+ json.dump(format_results, f)
+
+
+if __name__ == '__main__':
+ main()