aboutsummaryrefslogtreecommitdiffhomepage
path: root/tools/run_tests/run_build_statistics.py
diff options
context:
space:
mode:
authorGravatar Adele Zhou <adelez@adelez2.mtv.corp.google.com>2016-08-11 18:45:18 -0700
committerGravatar Adele Zhou <adelez@adelez2.mtv.corp.google.com>2016-08-12 17:22:44 -0700
commitbf3b769bd263047cc31ee9546ecc85396fca04a4 (patch)
tree04926a8d79c075a1e3590feeba50cd7f0c91e403 /tools/run_tests/run_build_statistics.py
parentb6528c59a9a87ae5926ac9e4a11226a4ce490b36 (diff)
Tool for collecting build statistics.
Diffstat (limited to 'tools/run_tests/run_build_statistics.py')
-rwxr-xr-xtools/run_tests/run_build_statistics.py204
1 files changed, 204 insertions, 0 deletions
diff --git a/tools/run_tests/run_build_statistics.py b/tools/run_tests/run_build_statistics.py
new file mode 100755
index 0000000000..92c53782a8
--- /dev/null
+++ b/tools/run_tests/run_build_statistics.py
@@ -0,0 +1,204 @@
+#!/usr/bin/env python2.7
+# Copyright 2016, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Tool to get build statistics from Jenkins and upload to BigQuery."""
+
+import argparse
+import jenkinsapi
+from jenkinsapi.custom_exceptions import JenkinsAPIException
+from jenkinsapi.jenkins import Jenkins
+import json
+import os
+import re
+import sys
+import urllib
+
+
+gcp_utils_dir = os.path.abspath(os.path.join(
+ os.path.dirname(__file__), '../gcp/utils'))
+sys.path.append(gcp_utils_dir)
+import big_query_utils
+
+
+_HAS_MATRIX=True
+_PROJECT_ID = 'grpc-testing'
+_HAS_MATRIX = True
+_BUILDS = {'gRPC_master': _HAS_MATRIX,
+ 'gRPC_interop_master': not _HAS_MATRIX,
+ 'gRPC_pull_requests': _HAS_MATRIX,
+ 'gRPC_interop_pull_requests': not _HAS_MATRIX,
+}
+_URL_BASE = 'https://grpc-testing.appspot.com/job'
+_KNOWN_ERRORS = [
+ 'Failed to build workspace Tests with scheme AllTests',
+ 'Build timed out',
+ 'FATAL: Unable to produce a script file',
+ 'FAILED: Failed to build interop docker images',
+ 'LLVM ERROR: IO failure on output stream.',
+ 'MSBUILD : error MSB1009: Project file does not exist.',
+]
+_UNKNOWN_ERROR = 'Unknown error'
+_DATASET_ID = 'build_statistics'
+
+
+def _scrape_for_known_errors(html):
+ error_list = []
+ known_error_count = 0
+ for known_error in _KNOWN_ERRORS:
+ errors = re.findall(known_error, html)
+ this_error_count = len(errors)
+ if this_error_count > 0:
+ known_error_count += this_error_count
+ error_list.append({'description': known_error,
+ 'count': this_error_count})
+ print('====> %d failures due to %s' % (this_error_count, known_error))
+ return error_list, known_error_count
+
+
+def _get_last_processed_buildnumber(build_name):
+ query = 'SELECT max(build_number) FROM [%s:%s.%s];' % (
+ _PROJECT_ID, _DATASET_ID, build_name)
+ query_job = big_query_utils.sync_query_job(bq, _PROJECT_ID, query)
+ page = bq.jobs().getQueryResults(
+ pageToken=None,
+ **query_job['jobReference']).execute(num_retries=3)
+ if page['rows'][0]['f'][0]['v']:
+ return int(page['rows'][0]['f'][0]['v'])
+ return 0
+
+
+def _process_matrix(build, url_base):
+ matrix_list = []
+ for matrix in build.get_matrix_runs():
+ matrix_str = re.match('.*\\xc2\\xbb ((?:[^,]+,?)+) #.*',
+ matrix.name).groups()[0]
+ matrix_tuple = matrix_str.split(',')
+ json_url = '%s/config=%s,language=%s,platform=%s/testReport/api/json' % (
+ url_base, matrix_tuple[0], matrix_tuple[1], matrix_tuple[2])
+ console_url = '%s/config=%s,language=%s,platform=%s/consoleFull' % (
+ url_base, matrix_tuple[0], matrix_tuple[1], matrix_tuple[2])
+ matrix_dict = {'name': matrix_str,
+ 'duration': matrix.get_duration().total_seconds()}
+ matrix_dict.update(_process_build(json_url, console_url))
+ matrix_list.append(matrix_dict)
+
+ return matrix_list
+
+
+def _process_build(json_url, console_url):
+ build_result = {}
+ error_list = []
+ try:
+ html = urllib.urlopen(json_url).read()
+ test_result = json.loads(html)
+ print('====> Parsing result from %s' % json_url)
+ failure_count = test_result['failCount']
+ build_result['pass_count'] = test_result['passCount']
+ build_result['failure_count'] = failure_count
+ if failure_count > 0:
+ error_list, known_error_count = _scrape_for_known_errors(html)
+ unknown_error_count = failure_count - known_error_count
+ # This can happen if the same error occurs multiple times in one test.
+ if failure_count < known_error_count:
+ print('====> Some errors are duplicates.')
+ unknown_error_count = 0
+ error_list.append({'description': _UNKNOWN_ERROR,
+ 'count': unknown_error_count})
+ except Exception as e:
+ print('====> Got exception for %s: %s.' % (json_url, str(e)))
+ print('====> Parsing errors from %s.' % console_url)
+ html = urllib.urlopen(console_url).read()
+ build_result['pass_count'] = 0
+ build_result['failure_count'] = 1
+ error_list, _ = _scrape_for_known_errors(html)
+ if error_list:
+ error_list.append({'description': _UNKNOWN_ERROR, 'count': 0})
+ else:
+ error_list.append({'description': _UNKNOWN_ERROR, 'count': 1})
+
+ if error_list:
+ build_result['error'] = error_list
+
+ return build_result
+
+
+# parse command line
+argp = argparse.ArgumentParser(description='Get build statistics.')
+argp.add_argument('-u', '--username', default='jenkins')
+argp.add_argument('-b', '--builds',
+ choices=['all'] + sorted(_BUILDS.keys()),
+ nargs='+',
+ default=['all'])
+args = argp.parse_args()
+
+J = Jenkins('https://grpc-testing.appspot.com', args.username, 'apiToken')
+bq = big_query_utils.create_big_query()
+
+for build_name in _BUILDS.keys() if 'all' in args.builds else args.builds:
+ print('====> Build: %s' % build_name)
+ # Since get_last_completed_build() always fails due to malformatted string
+ # error, we use get_build_metadata() instead.
+ job = None
+ try:
+ job = J[build_name]
+ except Exception as e:
+ print('====> Failed to get build %s: %s.' % (build_name, str(e)))
+ continue
+ last_processed_build_number = _get_last_processed_buildnumber(build_name)
+ last_complete_build_number = job.get_last_completed_buildnumber()
+ # To avoid processing all builds for a project never looked at. In this case,
+ # only examine 10 latest builds.
+ starting_build_number = max(last_processed_build_number+1,
+ last_complete_build_number-9)
+ for build_number in xrange(starting_build_number,
+ last_complete_build_number+1):
+ print('====> Processing %s build %d.' % (build_name, build_number))
+ build = None
+ try:
+ build = job.get_build_metadata(build_number)
+ except KeyError:
+ print('====> Build %s is missing. Skip.' % build_number)
+ continue
+ build_result = {'build_number': build_number,
+ 'timestamp': str(build.get_timestamp())}
+ url_base = json_url = '%s/%s/%d' % (_URL_BASE, build_name, build_number)
+ if _BUILDS[build_name]: # The build has matrix, such as gRPC_master.
+ build_result['matrix'] = _process_matrix(build, url_base)
+ else:
+ json_url = '%s/testReport/api/json' % url_base
+ console_url = '%s/consoleFull' % url_base
+ build_result['duration'] = build.get_duration().total_seconds()
+ build_result.update(_process_build(json_url, console_url))
+ rows = [big_query_utils.make_row(build_number, build_result)]
+ if not big_query_utils.insert_rows(bq, _PROJECT_ID, _DATASET_ID, build_name,
+ rows):
+ print '====> Error uploading result to bigquery.'
+ sys.exit(1)
+