From a6ae14e2232a46fbc3baed74e2581dde119e7f1e Mon Sep 17 00:00:00 2001
From: borenet <borenet@google.com>
Date: Mon, 20 Jul 2015 09:43:36 -0700
Subject: Split llvm_coverage_run into two scripts

... One for running to generate the coverage data, another to parse the data into various formats.

NOTRY=true
BUG=skia:2430

Review URL: https://codereview.chromium.org/1239963002
---
 tools/parse_llvm_coverage.py | 201 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 201 insertions(+)
 create mode 100755 tools/parse_llvm_coverage.py

(limited to 'tools/parse_llvm_coverage.py')

diff --git a/tools/parse_llvm_coverage.py b/tools/parse_llvm_coverage.py
new file mode 100755
index 0000000000..2eadddd3cb
--- /dev/null
+++ b/tools/parse_llvm_coverage.py
@@ -0,0 +1,201 @@
+#!/usr/bin/env python
+# Copyright (c) 2015 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+
+"""Parse an LLVM coverage report to generate useable results."""
+
+
+import argparse
+import json
+import os
+import re
+import subprocess
+import sys
+
+
+def _fix_filename(filename):
+  """Return a filename which we can use to identify the file.
+
+  The file paths printed by llvm-cov take the form:
+
+      /path/to/repo/out/dir/../../src/filename.cpp
+
+  And then they're truncated to 22 characters with leading ellipses:
+
+      ...../../src/filename.cpp
+
+  This makes it really tough to determine whether the file actually belongs in
+  the Skia repo.  This function strips out the leading junk so that, if the file
+  exists in the repo, the returned string matches the end of some relative path
+  in the repo. This doesn't guarantee correctness, but it's about as close as
+  we can get.
+  """
+  return filename.split('..')[-1].lstrip('./')
+
+
+def _file_in_repo(filename, all_files):
+  """Return the name of the checked-in file matching the given filename.
+
+  Use suffix matching to determine which checked-in files the given filename
+  matches. If there are no matches or multiple matches, return None.
+  """
+  new_file = _fix_filename(filename)
+  matched = []
+  for f in all_files:
+    if f.endswith(new_file):
+      matched.append(f)
+  if len(matched) == 1:
+    return matched[0]
+  elif len(matched) > 1:
+    print >> sys.stderr, ('WARNING: multiple matches for %s; skipping:\n\t%s'
+                          % (new_file, '\n\t'.join(matched)))
+  return None
+
+
+def _get_per_file_per_line_coverage(report):
+  """Return a dict whose keys are file names and values are coverage data.
+
+  Values are lists which take the form (lineno, coverage, code).
+  """
+  all_files = subprocess.check_output(['git', 'ls-files']).splitlines()
+  lines = report.splitlines()
+  current_file = None
+  file_lines = []
+  files = {}
+  not_checked_in = '%' # Use this as the file name for not-checked-in files.
+  for line in lines:
+    m = re.match('([a-zA-Z0-9\./_-]+):', line)
+    if m:
+      if current_file and current_file != not_checked_in:
+        files[current_file] = file_lines
+      match_filename = _file_in_repo(m.groups()[0], all_files)
+      current_file = match_filename or not_checked_in
+      file_lines = []
+    else:
+      if current_file != not_checked_in:
+        skip = re.match('^\s{2}-+$|^\s{2}\|.+$', line)
+        if line and not skip:
+          cov, linenum, code = line.split('|', 2)
+          cov = cov.strip()
+          if cov:
+            cov = int(cov)
+          else:
+            cov = None # We don't care about coverage for this line.
+          linenum = int(linenum.strip())
+          assert linenum == len(file_lines) + 1
+          file_lines.append((linenum, cov, code.decode('utf-8', 'replace')))
+  return files
+
+
+
+def _testname(filename):
+  """Transform the file name into an ingestible test name."""
+  return re.sub(r'[^a-zA-Z0-9]', '_', filename)
+
+
+def _nanobench_json(results, properties, key):
+  """Return the results in JSON format like that produced by nanobench."""
+  rv = {}
+  # Copy over the properties first, then set the 'key' and 'results' keys,
+  # in order to avoid bad formatting in case the user passes in a properties
+  # dict containing those keys.
+  rv.update(properties)
+  rv['key'] = key
+  rv['results'] = {
+    _testname(f): {
+      'coverage': {
+        'percent': percent,
+        'options': {
+          'fullname': f,
+          'dir': os.path.dirname(f),
+        },
+      },
+    } for percent, f in results
+  }
+  return rv
+
+
+def _parse_key_value(kv_list):
+  """Return a dict whose key/value pairs are derived from the given list.
+
+  For example:
+
+      ['k1', 'v1', 'k2', 'v2']
+  becomes:
+
+      {'k1': 'v1',
+       'k2': 'v2'}
+  """
+  if len(kv_list) % 2 != 0:
+    raise Exception('Invalid key/value pairs: %s' % kv_list)
+
+  rv = {}
+  for i in xrange(len(kv_list) / 2):
+    rv[kv_list[i*2]] = kv_list[i*2+1]
+  return rv
+
+
+def _get_per_file_summaries(line_by_line):
+  """Summarize the full line-by-line coverage report by file."""
+  per_file = []
+  for filepath, lines in line_by_line.iteritems():
+    total_lines = 0
+    covered_lines = 0
+    for _, cov, _ in lines:
+      if cov is not None:
+        total_lines += 1
+        if cov > 0:
+          covered_lines += 1
+    if total_lines > 0:
+      per_file.append((float(covered_lines)/float(total_lines)*100.0,
+                       filepath))
+  return per_file
+
+
+def main():
+  """Generate useful data from a coverage report."""
+  # Parse args.
+  parser = argparse.ArgumentParser()
+  parser.add_argument('--report', help='input file; an llvm coverage report.',
+                      required=True)
+  parser.add_argument('--nanobench', help='output file for nanobench data.')
+  parser.add_argument(
+      '--key', metavar='key_or_value', nargs='+',
+      help='key/value pairs identifying this bot.')
+  parser.add_argument(
+      '--properties', metavar='key_or_value', nargs='+',
+      help='key/value pairs representing properties of this build.')
+  parser.add_argument('--linebyline',
+                      help='output file for line-by-line JSON data.')
+  args = parser.parse_args()
+
+  if args.nanobench and not (args.key and args.properties):
+    raise Exception('--key and --properties are required with --nanobench')
+
+  with open(args.report) as f:
+    report = f.read()
+
+  line_by_line = _get_per_file_per_line_coverage(report)
+
+  if args.linebyline:
+    with open(args.linebyline, 'w') as f:
+      json.dump(line_by_line, f)
+
+  if args.nanobench:
+    # Parse the key and properties for use in the nanobench JSON output.
+    key = _parse_key_value(args.key)
+    properties = _parse_key_value(args.properties)
+
+    # Get per-file summaries.
+    per_file = _get_per_file_summaries(line_by_line)
+
+    # Write results.
+    format_results = _nanobench_json(per_file, properties, key)
+    with open(args.nanobench, 'w') as f:
+      json.dump(format_results, f)
+
+
+if __name__ == '__main__':
+  main()
-- 
cgit v1.2.3