From a6ae14e2232a46fbc3baed74e2581dde119e7f1e Mon Sep 17 00:00:00 2001 From: borenet Date: Mon, 20 Jul 2015 09:43:36 -0700 Subject: Split llvm_coverage_run into two scripts ... One for running to generate the coverage data, another to parse the data into various formats. NOTRY=true BUG=skia:2430 Review URL: https://codereview.chromium.org/1239963002 --- tools/parse_llvm_coverage.py | 201 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 201 insertions(+) create mode 100755 tools/parse_llvm_coverage.py (limited to 'tools/parse_llvm_coverage.py') diff --git a/tools/parse_llvm_coverage.py b/tools/parse_llvm_coverage.py new file mode 100755 index 0000000000..2eadddd3cb --- /dev/null +++ b/tools/parse_llvm_coverage.py @@ -0,0 +1,201 @@ +#!/usr/bin/env python +# Copyright (c) 2015 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + + +"""Parse an LLVM coverage report to generate useable results.""" + + +import argparse +import json +import os +import re +import subprocess +import sys + + +def _fix_filename(filename): + """Return a filename which we can use to identify the file. + + The file paths printed by llvm-cov take the form: + + /path/to/repo/out/dir/../../src/filename.cpp + + And then they're truncated to 22 characters with leading ellipses: + + ...../../src/filename.cpp + + This makes it really tough to determine whether the file actually belongs in + the Skia repo. This function strips out the leading junk so that, if the file + exists in the repo, the returned string matches the end of some relative path + in the repo. This doesn't guarantee correctness, but it's about as close as + we can get. + """ + return filename.split('..')[-1].lstrip('./') + + +def _file_in_repo(filename, all_files): + """Return the name of the checked-in file matching the given filename. + + Use suffix matching to determine which checked-in files the given filename + matches. If there are no matches or multiple matches, return None. + """ + new_file = _fix_filename(filename) + matched = [] + for f in all_files: + if f.endswith(new_file): + matched.append(f) + if len(matched) == 1: + return matched[0] + elif len(matched) > 1: + print >> sys.stderr, ('WARNING: multiple matches for %s; skipping:\n\t%s' + % (new_file, '\n\t'.join(matched))) + return None + + +def _get_per_file_per_line_coverage(report): + """Return a dict whose keys are file names and values are coverage data. + + Values are lists which take the form (lineno, coverage, code). + """ + all_files = subprocess.check_output(['git', 'ls-files']).splitlines() + lines = report.splitlines() + current_file = None + file_lines = [] + files = {} + not_checked_in = '%' # Use this as the file name for not-checked-in files. + for line in lines: + m = re.match('([a-zA-Z0-9\./_-]+):', line) + if m: + if current_file and current_file != not_checked_in: + files[current_file] = file_lines + match_filename = _file_in_repo(m.groups()[0], all_files) + current_file = match_filename or not_checked_in + file_lines = [] + else: + if current_file != not_checked_in: + skip = re.match('^\s{2}-+$|^\s{2}\|.+$', line) + if line and not skip: + cov, linenum, code = line.split('|', 2) + cov = cov.strip() + if cov: + cov = int(cov) + else: + cov = None # We don't care about coverage for this line. + linenum = int(linenum.strip()) + assert linenum == len(file_lines) + 1 + file_lines.append((linenum, cov, code.decode('utf-8', 'replace'))) + return files + + + +def _testname(filename): + """Transform the file name into an ingestible test name.""" + return re.sub(r'[^a-zA-Z0-9]', '_', filename) + + +def _nanobench_json(results, properties, key): + """Return the results in JSON format like that produced by nanobench.""" + rv = {} + # Copy over the properties first, then set the 'key' and 'results' keys, + # in order to avoid bad formatting in case the user passes in a properties + # dict containing those keys. + rv.update(properties) + rv['key'] = key + rv['results'] = { + _testname(f): { + 'coverage': { + 'percent': percent, + 'options': { + 'fullname': f, + 'dir': os.path.dirname(f), + }, + }, + } for percent, f in results + } + return rv + + +def _parse_key_value(kv_list): + """Return a dict whose key/value pairs are derived from the given list. + + For example: + + ['k1', 'v1', 'k2', 'v2'] + becomes: + + {'k1': 'v1', + 'k2': 'v2'} + """ + if len(kv_list) % 2 != 0: + raise Exception('Invalid key/value pairs: %s' % kv_list) + + rv = {} + for i in xrange(len(kv_list) / 2): + rv[kv_list[i*2]] = kv_list[i*2+1] + return rv + + +def _get_per_file_summaries(line_by_line): + """Summarize the full line-by-line coverage report by file.""" + per_file = [] + for filepath, lines in line_by_line.iteritems(): + total_lines = 0 + covered_lines = 0 + for _, cov, _ in lines: + if cov is not None: + total_lines += 1 + if cov > 0: + covered_lines += 1 + if total_lines > 0: + per_file.append((float(covered_lines)/float(total_lines)*100.0, + filepath)) + return per_file + + +def main(): + """Generate useful data from a coverage report.""" + # Parse args. + parser = argparse.ArgumentParser() + parser.add_argument('--report', help='input file; an llvm coverage report.', + required=True) + parser.add_argument('--nanobench', help='output file for nanobench data.') + parser.add_argument( + '--key', metavar='key_or_value', nargs='+', + help='key/value pairs identifying this bot.') + parser.add_argument( + '--properties', metavar='key_or_value', nargs='+', + help='key/value pairs representing properties of this build.') + parser.add_argument('--linebyline', + help='output file for line-by-line JSON data.') + args = parser.parse_args() + + if args.nanobench and not (args.key and args.properties): + raise Exception('--key and --properties are required with --nanobench') + + with open(args.report) as f: + report = f.read() + + line_by_line = _get_per_file_per_line_coverage(report) + + if args.linebyline: + with open(args.linebyline, 'w') as f: + json.dump(line_by_line, f) + + if args.nanobench: + # Parse the key and properties for use in the nanobench JSON output. + key = _parse_key_value(args.key) + properties = _parse_key_value(args.properties) + + # Get per-file summaries. + per_file = _get_per_file_summaries(line_by_line) + + # Write results. + format_results = _nanobench_json(per_file, properties, key) + with open(args.nanobench, 'w') as f: + json.dump(format_results, f) + + +if __name__ == '__main__': + main() -- cgit v1.2.3