diff options
-rwxr-xr-x | tools/debug/core/chttp2_ref_leak.py | 22 | ||||
-rw-r--r-- | tools/debug/core/error_ref_leak.py | 34 | ||||
-rwxr-xr-x | tools/distrib/yapf_code.sh | 7 | ||||
-rw-r--r-- | tools/flakes/detect_flakes.py | 88 | ||||
-rwxr-xr-x | tools/gcp/utils/big_query_utils.py | 276 | ||||
-rw-r--r-- | tools/github/pr_latency.py | 211 | ||||
-rwxr-xr-x | tools/line_count/collect-history.py | 25 | ||||
-rwxr-xr-x | tools/line_count/summarize-history.py | 17 | ||||
-rwxr-xr-x | tools/line_count/yaml2csv.py | 25 | ||||
-rwxr-xr-x | tools/mkowners/mkowners.py | 312 |
10 files changed, 548 insertions, 469 deletions
diff --git a/tools/debug/core/chttp2_ref_leak.py b/tools/debug/core/chttp2_ref_leak.py index d693dd9e54..a6a5448775 100755 --- a/tools/debug/core/chttp2_ref_leak.py +++ b/tools/debug/core/chttp2_ref_leak.py @@ -20,8 +20,10 @@ import collections import sys import re + def new_obj(): - return ['destroy'] + return ['destroy'] + outstanding = collections.defaultdict(new_obj) @@ -29,14 +31,14 @@ outstanding = collections.defaultdict(new_obj) # chttp2:unref:0x629000005200 2->1 destroy [src/core/ext/transport/chttp2/transport/chttp2_transport.c:599] for line in sys.stdin: - m = re.search(r'chttp2:( ref|unref):0x([a-fA-F0-9]+) [^ ]+ ([^[]+) \[(.*)\]', line) - if m: - if m.group(1) == ' ref': - outstanding[m.group(2)].append(m.group(3)) - else: - outstanding[m.group(2)].remove(m.group(3)) + m = re.search( + r'chttp2:( ref|unref):0x([a-fA-F0-9]+) [^ ]+ ([^[]+) \[(.*)\]', line) + if m: + if m.group(1) == ' ref': + outstanding[m.group(2)].append(m.group(3)) + else: + outstanding[m.group(2)].remove(m.group(3)) for obj, remaining in outstanding.items(): - if remaining: - print 'LEAKED: %s %r' % (obj, remaining) - + if remaining: + print 'LEAKED: %s %r' % (obj, remaining) diff --git a/tools/debug/core/error_ref_leak.py b/tools/debug/core/error_ref_leak.py index 6582328a5b..7806338683 100644 --- a/tools/debug/core/error_ref_leak.py +++ b/tools/debug/core/error_ref_leak.py @@ -26,22 +26,22 @@ data = sys.stdin.readlines() errs = [] for line in data: - # if we care about the line - if re.search(r'error.cc', line): - # str manip to cut off left part of log line - line = line.partition('error.cc:')[-1] - line = re.sub(r'\d+] ', r'', line) - line = line.strip().split() - err = line[0].strip(":") - if line[1] == "create": - assert(err not in errs) - errs.append(err) - elif line[0] == "realloc": - errs.remove(line[1]) - errs.append(line[3]) - # explicitly look for the last dereference - elif line[1] == "1" and line[3] == "0": - assert(err in errs) - errs.remove(err) + # if we care about the line + if re.search(r'error.cc', line): + # str manip to cut off left part of log line + line = line.partition('error.cc:')[-1] + line = re.sub(r'\d+] ', r'', line) + line = line.strip().split() + err = line[0].strip(":") + if line[1] == "create": + assert (err not in errs) + errs.append(err) + elif line[0] == "realloc": + errs.remove(line[1]) + errs.append(line[3]) + # explicitly look for the last dereference + elif line[1] == "1" and line[3] == "0": + assert (err in errs) + errs.remove(err) print "leaked:", errs diff --git a/tools/distrib/yapf_code.sh b/tools/distrib/yapf_code.sh index 698c341d88..85a45b6a11 100755 --- a/tools/distrib/yapf_code.sh +++ b/tools/distrib/yapf_code.sh @@ -20,12 +20,7 @@ cd "$(dirname "${0}")/../.." DIRS=( 'src/python' - 'tools/buildgen' - 'tools/codegen' - 'tools/distrib' - 'tools/interop_matrix' - 'tools/profiling' - 'tools/run_tests' + 'tools' ) EXCLUSIONS=( 'grpcio/grpc_*.py' diff --git a/tools/flakes/detect_flakes.py b/tools/flakes/detect_flakes.py index c5c7f61771..b066ee6139 100644 --- a/tools/flakes/detect_flakes.py +++ b/tools/flakes/detect_flakes.py @@ -12,7 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Detect new flakes introduced in the last 24h hours with respect to the previous six days""" @@ -32,26 +31,29 @@ sys.path.append(gcp_utils_dir) import big_query_utils + def print_table(table): kokoro_base_url = 'https://kokoro.corp.google.com/job/' for k, v in table.items(): - job_name = v[0] - build_id = v[1] - ts = int(float(v[2])) - # TODO(dgq): timezone handling is wrong. We need to determine the timezone - # of the computer running this script. - human_ts = datetime.datetime.utcfromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S PDT') - job_path = '{}/{}'.format('/job/'.join(job_name.split('/')), build_id) - full_kokoro_url = kokoro_base_url + job_path - print("Test: {}, Timestamp: {}, url: {}\n".format(k, human_ts, full_kokoro_url)) + job_name = v[0] + build_id = v[1] + ts = int(float(v[2])) + # TODO(dgq): timezone handling is wrong. We need to determine the timezone + # of the computer running this script. + human_ts = datetime.datetime.utcfromtimestamp(ts).strftime( + '%Y-%m-%d %H:%M:%S PDT') + job_path = '{}/{}'.format('/job/'.join(job_name.split('/')), build_id) + full_kokoro_url = kokoro_base_url + job_path + print("Test: {}, Timestamp: {}, url: {}\n".format(k, human_ts, + full_kokoro_url)) def get_flaky_tests(days_lower_bound, days_upper_bound, limit=None): - """ period is one of "WEEK", "DAY", etc. + """ period is one of "WEEK", "DAY", etc. (see https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#date_add). """ - bq = big_query_utils.create_big_query() - query = """ + bq = big_query_utils.create_big_query() + query = """ SELECT REGEXP_REPLACE(test_name, r'/\d+', '') AS filtered_test_name, job_name, @@ -65,41 +67,45 @@ WHERE AND NOT REGEXP_MATCH(job_name, '.*portability.*') AND result != 'PASSED' AND result != 'SKIPPED' ORDER BY timestamp desc -""".format(days_lower_bound=days_lower_bound, days_upper_bound=days_upper_bound) - if limit: - query += '\n LIMIT {}'.format(limit) - query_job = big_query_utils.sync_query_job(bq, 'grpc-testing', query) - page = bq.jobs().getQueryResults( - pageToken=None, **query_job['jobReference']).execute(num_retries=3) - rows = page.get('rows') - if rows: - return {row['f'][0]['v']: +""".format( + days_lower_bound=days_lower_bound, days_upper_bound=days_upper_bound) + if limit: + query += '\n LIMIT {}'.format(limit) + query_job = big_query_utils.sync_query_job(bq, 'grpc-testing', query) + page = bq.jobs().getQueryResults( + pageToken=None, **query_job['jobReference']).execute(num_retries=3) + rows = page.get('rows') + if rows: + return { + row['f'][0]['v']: (row['f'][1]['v'], row['f'][2]['v'], row['f'][3]['v']) - for row in rows} - else: - return {} + for row in rows + } + else: + return {} def get_new_flakes(): - last_week_sans_yesterday = get_flaky_tests(-14, -1) - last_24 = get_flaky_tests(0, +1) - last_week_sans_yesterday_names = set(last_week_sans_yesterday.keys()) - last_24_names = set(last_24.keys()) - logging.debug('|last_week_sans_yesterday| =', len(last_week_sans_yesterday_names)) - logging.debug('|last_24_names| =', len(last_24_names)) - new_flakes = last_24_names - last_week_sans_yesterday_names - logging.debug('|new_flakes| = ', len(new_flakes)) - return {k: last_24[k] for k in new_flakes} + last_week_sans_yesterday = get_flaky_tests(-14, -1) + last_24 = get_flaky_tests(0, +1) + last_week_sans_yesterday_names = set(last_week_sans_yesterday.keys()) + last_24_names = set(last_24.keys()) + logging.debug('|last_week_sans_yesterday| =', + len(last_week_sans_yesterday_names)) + logging.debug('|last_24_names| =', len(last_24_names)) + new_flakes = last_24_names - last_week_sans_yesterday_names + logging.debug('|new_flakes| = ', len(new_flakes)) + return {k: last_24[k] for k in new_flakes} def main(): - new_flakes = get_new_flakes() - if new_flakes: - print("Found {} new flakes:".format(len(new_flakes))) - print_table(new_flakes) - else: - print("No new flakes found!") + new_flakes = get_new_flakes() + if new_flakes: + print("Found {} new flakes:".format(len(new_flakes))) + print_table(new_flakes) + else: + print("No new flakes found!") if __name__ == '__main__': - main() + main() diff --git a/tools/gcp/utils/big_query_utils.py b/tools/gcp/utils/big_query_utils.py index 77a5f5691e..3e811ca2bf 100755 --- a/tools/gcp/utils/big_query_utils.py +++ b/tools/gcp/utils/big_query_utils.py @@ -28,154 +28,174 @@ NUM_RETRIES = 3 def create_big_query(): - """Authenticates with cloud platform and gets a BiqQuery service object + """Authenticates with cloud platform and gets a BiqQuery service object """ - creds = GoogleCredentials.get_application_default() - return discovery.build('bigquery', 'v2', credentials=creds, cache_discovery=False) + creds = GoogleCredentials.get_application_default() + return discovery.build( + 'bigquery', 'v2', credentials=creds, cache_discovery=False) def create_dataset(biq_query, project_id, dataset_id): - is_success = True - body = { - 'datasetReference': { - 'projectId': project_id, - 'datasetId': dataset_id - } - } - - try: - dataset_req = biq_query.datasets().insert(projectId=project_id, body=body) - dataset_req.execute(num_retries=NUM_RETRIES) - except HttpError as http_error: - if http_error.resp.status == 409: - print 'Warning: The dataset %s already exists' % dataset_id - else: - # Note: For more debugging info, print "http_error.content" - print 'Error in creating dataset: %s. Err: %s' % (dataset_id, http_error) - is_success = False - return is_success + is_success = True + body = { + 'datasetReference': { + 'projectId': project_id, + 'datasetId': dataset_id + } + } + + try: + dataset_req = biq_query.datasets().insert( + projectId=project_id, body=body) + dataset_req.execute(num_retries=NUM_RETRIES) + except HttpError as http_error: + if http_error.resp.status == 409: + print 'Warning: The dataset %s already exists' % dataset_id + else: + # Note: For more debugging info, print "http_error.content" + print 'Error in creating dataset: %s. Err: %s' % (dataset_id, + http_error) + is_success = False + return is_success def create_table(big_query, project_id, dataset_id, table_id, table_schema, description): - fields = [{'name': field_name, - 'type': field_type, - 'description': field_description - } for (field_name, field_type, field_description) in table_schema] - return create_table2(big_query, project_id, dataset_id, table_id, - fields, description) - - -def create_partitioned_table(big_query, project_id, dataset_id, table_id, table_schema, - description, partition_type='DAY', expiration_ms=_EXPIRATION_MS): - """Creates a partitioned table. By default, a date-paritioned table is created with + fields = [{ + 'name': field_name, + 'type': field_type, + 'description': field_description + } for (field_name, field_type, field_description) in table_schema] + return create_table2(big_query, project_id, dataset_id, table_id, fields, + description) + + +def create_partitioned_table(big_query, + project_id, + dataset_id, + table_id, + table_schema, + description, + partition_type='DAY', + expiration_ms=_EXPIRATION_MS): + """Creates a partitioned table. By default, a date-paritioned table is created with each partition lasting 30 days after it was last modified. """ - fields = [{'name': field_name, - 'type': field_type, - 'description': field_description - } for (field_name, field_type, field_description) in table_schema] - return create_table2(big_query, project_id, dataset_id, table_id, - fields, description, partition_type, expiration_ms) - - -def create_table2(big_query, project_id, dataset_id, table_id, fields_schema, - description, partition_type=None, expiration_ms=None): - is_success = True - - body = { - 'description': description, - 'schema': { - 'fields': fields_schema - }, - 'tableReference': { - 'datasetId': dataset_id, - 'projectId': project_id, - 'tableId': table_id - } - } - - if partition_type and expiration_ms: - body["timePartitioning"] = { - "type": partition_type, - "expirationMs": expiration_ms + fields = [{ + 'name': field_name, + 'type': field_type, + 'description': field_description + } for (field_name, field_type, field_description) in table_schema] + return create_table2(big_query, project_id, dataset_id, table_id, fields, + description, partition_type, expiration_ms) + + +def create_table2(big_query, + project_id, + dataset_id, + table_id, + fields_schema, + description, + partition_type=None, + expiration_ms=None): + is_success = True + + body = { + 'description': description, + 'schema': { + 'fields': fields_schema + }, + 'tableReference': { + 'datasetId': dataset_id, + 'projectId': project_id, + 'tableId': table_id + } } - try: - table_req = big_query.tables().insert(projectId=project_id, - datasetId=dataset_id, - body=body) - res = table_req.execute(num_retries=NUM_RETRIES) - print 'Successfully created %s "%s"' % (res['kind'], res['id']) - except HttpError as http_error: - if http_error.resp.status == 409: - print 'Warning: Table %s already exists' % table_id - else: - print 'Error in creating table: %s. Err: %s' % (table_id, http_error) - is_success = False - return is_success + if partition_type and expiration_ms: + body["timePartitioning"] = { + "type": partition_type, + "expirationMs": expiration_ms + } + + try: + table_req = big_query.tables().insert( + projectId=project_id, datasetId=dataset_id, body=body) + res = table_req.execute(num_retries=NUM_RETRIES) + print 'Successfully created %s "%s"' % (res['kind'], res['id']) + except HttpError as http_error: + if http_error.resp.status == 409: + print 'Warning: Table %s already exists' % table_id + else: + print 'Error in creating table: %s. Err: %s' % (table_id, + http_error) + is_success = False + return is_success def patch_table(big_query, project_id, dataset_id, table_id, fields_schema): - is_success = True - - body = { - 'schema': { - 'fields': fields_schema - }, - 'tableReference': { - 'datasetId': dataset_id, - 'projectId': project_id, - 'tableId': table_id - } - } - - try: - table_req = big_query.tables().patch(projectId=project_id, - datasetId=dataset_id, - tableId=table_id, - body=body) - res = table_req.execute(num_retries=NUM_RETRIES) - print 'Successfully patched %s "%s"' % (res['kind'], res['id']) - except HttpError as http_error: - print 'Error in creating table: %s. Err: %s' % (table_id, http_error) - is_success = False - return is_success + is_success = True + + body = { + 'schema': { + 'fields': fields_schema + }, + 'tableReference': { + 'datasetId': dataset_id, + 'projectId': project_id, + 'tableId': table_id + } + } + + try: + table_req = big_query.tables().patch( + projectId=project_id, + datasetId=dataset_id, + tableId=table_id, + body=body) + res = table_req.execute(num_retries=NUM_RETRIES) + print 'Successfully patched %s "%s"' % (res['kind'], res['id']) + except HttpError as http_error: + print 'Error in creating table: %s. Err: %s' % (table_id, http_error) + is_success = False + return is_success def insert_rows(big_query, project_id, dataset_id, table_id, rows_list): - is_success = True - body = {'rows': rows_list} - try: - insert_req = big_query.tabledata().insertAll(projectId=project_id, - datasetId=dataset_id, - tableId=table_id, - body=body) - res = insert_req.execute(num_retries=NUM_RETRIES) - if res.get('insertErrors', None): - print 'Error inserting rows! Response: %s' % res - is_success = False - except HttpError as http_error: - print 'Error inserting rows to the table %s' % table_id - is_success = False - - return is_success + is_success = True + body = {'rows': rows_list} + try: + insert_req = big_query.tabledata().insertAll( + projectId=project_id, + datasetId=dataset_id, + tableId=table_id, + body=body) + res = insert_req.execute(num_retries=NUM_RETRIES) + if res.get('insertErrors', None): + print 'Error inserting rows! Response: %s' % res + is_success = False + except HttpError as http_error: + print 'Error inserting rows to the table %s' % table_id + is_success = False + + return is_success def sync_query_job(big_query, project_id, query, timeout=5000): - query_data = {'query': query, 'timeoutMs': timeout} - query_job = None - try: - query_job = big_query.jobs().query( - projectId=project_id, - body=query_data).execute(num_retries=NUM_RETRIES) - except HttpError as http_error: - print 'Query execute job failed with error: %s' % http_error - print http_error.content - return query_job - - # List of (column name, column type, description) tuples + query_data = {'query': query, 'timeoutMs': timeout} + query_job = None + try: + query_job = big_query.jobs().query( + projectId=project_id, + body=query_data).execute(num_retries=NUM_RETRIES) + except HttpError as http_error: + print 'Query execute job failed with error: %s' % http_error + print http_error.content + return query_job + + + # List of (column name, column type, description) tuples def make_row(unique_row_id, row_values_dict): - """row_values_dict is a dictionary of column name and column value. + """row_values_dict is a dictionary of column name and column value. """ - return {'insertId': unique_row_id, 'json': row_values_dict} + return {'insertId': unique_row_id, 'json': row_values_dict} diff --git a/tools/github/pr_latency.py b/tools/github/pr_latency.py index 5d635835e5..0131e60bbc 100644 --- a/tools/github/pr_latency.py +++ b/tools/github/pr_latency.py @@ -12,7 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Measure the time between PR creation and completion of all tests. You'll need a github API token to avoid being rate-limited. See @@ -46,118 +45,156 @@ COMMITS = 'https://api.github.com/repos/grpc/grpc/pulls/{pr_number}/commits' def gh(url): - request = urllib2.Request(url) - if TOKEN: - request.add_header('Authorization', 'token {}'.format(TOKEN)) - response = urllib2.urlopen(request) - return response.read() + request = urllib2.Request(url) + if TOKEN: + request.add_header('Authorization', 'token {}'.format(TOKEN)) + response = urllib2.urlopen(request) + return response.read() def print_csv_header(): - print('pr,base_time,test_time,latency_seconds,successes,failures,errors') - - -def output(pr, base_time, test_time, diff_time, successes, failures, errors, mode='human'): - if mode == 'human': - print("PR #{} base time: {} UTC, Tests completed at: {} UTC. Latency: {}." - "\n\tSuccesses: {}, Failures: {}, Errors: {}".format( - pr, base_time, test_time, diff_time, successes, failures, errors)) - elif mode == 'csv': - print(','.join([str(pr), str(base_time), - str(test_time), str(int((test_time-base_time).total_seconds())), - str(successes), str(failures), str(errors)])) + print('pr,base_time,test_time,latency_seconds,successes,failures,errors') + + +def output(pr, + base_time, + test_time, + diff_time, + successes, + failures, + errors, + mode='human'): + if mode == 'human': + print( + "PR #{} base time: {} UTC, Tests completed at: {} UTC. Latency: {}." + "\n\tSuccesses: {}, Failures: {}, Errors: {}".format( + pr, base_time, test_time, diff_time, successes, failures, + errors)) + elif mode == 'csv': + print(','.join([ + str(pr), str(base_time), str(test_time), str( + int((test_time - base_time).total_seconds())), str(successes), + str(failures), str(errors) + ])) def parse_timestamp(datetime_str): - return datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M:%SZ') + return datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M:%SZ') def to_posix_timestamp(dt): - return str((dt - datetime(1970, 1, 1)).total_seconds()) + return str((dt - datetime(1970, 1, 1)).total_seconds()) def get_pr_data(): - latest_prs = json.loads(gh(PRS)) - res = [{'number': pr['number'], - 'created_at': parse_timestamp(pr['created_at']), - 'updated_at': parse_timestamp(pr['updated_at']), - 'statuses_url': pr['statuses_url']} - for pr in latest_prs] - return res + latest_prs = json.loads(gh(PRS)) + res = [{ + 'number': pr['number'], + 'created_at': parse_timestamp(pr['created_at']), + 'updated_at': parse_timestamp(pr['updated_at']), + 'statuses_url': pr['statuses_url'] + } for pr in latest_prs] + return res def get_commits_data(pr_number): - commits = json.loads(gh(COMMITS.format(pr_number=pr_number))) - return {'num_commits': len(commits), - 'most_recent_date': parse_timestamp(commits[-1]['commit']['author']['date'])} + commits = json.loads(gh(COMMITS.format(pr_number=pr_number))) + return { + 'num_commits': len(commits), + 'most_recent_date': + parse_timestamp(commits[-1]['commit']['author']['date']) + } def get_status_data(statuses_url, system): - status_url = statuses_url.replace('statuses', 'status') - statuses = json.loads(gh(status_url + '?per_page=100')) - successes = 0 - failures = 0 - errors = 0 - latest_datetime = None - if not statuses: return None - if system == 'kokoro': string_in_target_url = 'kokoro' - elif system == 'jenkins': string_in_target_url = 'grpc-testing' - for status in statuses['statuses']: - if not status['target_url'] or string_in_target_url not in status['target_url']: continue # Ignore jenkins - if status['state'] == 'pending': return None - elif status['state'] == 'success': successes += 1 - elif status['state'] == 'failure': failures += 1 - elif status['state'] == 'error': errors += 1 - if not latest_datetime: - latest_datetime = parse_timestamp(status['updated_at']) - else: - latest_datetime = max(latest_datetime, parse_timestamp(status['updated_at'])) - # First status is the most recent one. - if any([successes, failures, errors]) and sum([successes, failures, errors]) > 15: - return {'latest_datetime': latest_datetime, + status_url = statuses_url.replace('statuses', 'status') + statuses = json.loads(gh(status_url + '?per_page=100')) + successes = 0 + failures = 0 + errors = 0 + latest_datetime = None + if not statuses: return None + if system == 'kokoro': string_in_target_url = 'kokoro' + elif system == 'jenkins': string_in_target_url = 'grpc-testing' + for status in statuses['statuses']: + if not status['target_url'] or string_in_target_url not in status[ + 'target_url']: + continue # Ignore jenkins + if status['state'] == 'pending': return None + elif status['state'] == 'success': successes += 1 + elif status['state'] == 'failure': failures += 1 + elif status['state'] == 'error': errors += 1 + if not latest_datetime: + latest_datetime = parse_timestamp(status['updated_at']) + else: + latest_datetime = max(latest_datetime, + parse_timestamp(status['updated_at'])) + # First status is the most recent one. + if any([successes, failures, errors]) and sum( + [successes, failures, errors]) > 15: + return { + 'latest_datetime': latest_datetime, 'successes': successes, 'failures': failures, - 'errors': errors} - else: return None + 'errors': errors + } + else: + return None def build_args_parser(): - import argparse - parser = argparse.ArgumentParser() - parser.add_argument('--format', type=str, choices=['human', 'csv'], - default='human', - help='Output format: are you a human or a machine?') - parser.add_argument('--system', type=str, choices=['jenkins', 'kokoro'], - required=True, help='Consider only the given CI system') - parser.add_argument('--token', type=str, default='', - help='GitHub token to use its API with a higher rate limit') - return parser + import argparse + parser = argparse.ArgumentParser() + parser.add_argument( + '--format', + type=str, + choices=['human', 'csv'], + default='human', + help='Output format: are you a human or a machine?') + parser.add_argument( + '--system', + type=str, + choices=['jenkins', 'kokoro'], + required=True, + help='Consider only the given CI system') + parser.add_argument( + '--token', + type=str, + default='', + help='GitHub token to use its API with a higher rate limit') + return parser def main(): - import sys - global TOKEN - args_parser = build_args_parser() - args = args_parser.parse_args() - TOKEN = args.token - if args.format == 'csv': print_csv_header() - for pr_data in get_pr_data(): - commit_data = get_commits_data(pr_data['number']) - # PR with a single commit -> use the PRs creation time. - # else -> use the latest commit's date. - base_timestamp = pr_data['updated_at'] - if commit_data['num_commits'] > 1: - base_timestamp = commit_data['most_recent_date'] - else: - base_timestamp = pr_data['created_at'] - last_status = get_status_data(pr_data['statuses_url'], args.system) - if last_status: - diff = last_status['latest_datetime'] - base_timestamp - if diff < timedelta(hours=5): - output(pr_data['number'], base_timestamp, last_status['latest_datetime'], - diff, last_status['successes'], last_status['failures'], - last_status['errors'], mode=args.format) + import sys + global TOKEN + args_parser = build_args_parser() + args = args_parser.parse_args() + TOKEN = args.token + if args.format == 'csv': print_csv_header() + for pr_data in get_pr_data(): + commit_data = get_commits_data(pr_data['number']) + # PR with a single commit -> use the PRs creation time. + # else -> use the latest commit's date. + base_timestamp = pr_data['updated_at'] + if commit_data['num_commits'] > 1: + base_timestamp = commit_data['most_recent_date'] + else: + base_timestamp = pr_data['created_at'] + last_status = get_status_data(pr_data['statuses_url'], args.system) + if last_status: + diff = last_status['latest_datetime'] - base_timestamp + if diff < timedelta(hours=5): + output( + pr_data['number'], + base_timestamp, + last_status['latest_datetime'], + diff, + last_status['successes'], + last_status['failures'], + last_status['errors'], + mode=args.format) if __name__ == '__main__': - main() + main() diff --git a/tools/line_count/collect-history.py b/tools/line_count/collect-history.py index 3f030fbb8f..d2d5c95705 100755 --- a/tools/line_count/collect-history.py +++ b/tools/line_count/collect-history.py @@ -19,20 +19,23 @@ import datetime # this script is only of historical interest: it's the script that was used to # bootstrap the dataset + def daterange(start, end): - for n in range(int((end - start).days)): - yield start + datetime.timedelta(n) + for n in range(int((end - start).days)): + yield start + datetime.timedelta(n) + start_date = datetime.date(2017, 3, 26) end_date = datetime.date(2017, 3, 29) for dt in daterange(start_date, end_date): - dmy = dt.strftime('%Y-%m-%d') - sha1 = subprocess.check_output(['git', 'rev-list', '-n', '1', - '--before=%s' % dmy, - 'master']).strip() - subprocess.check_call(['git', 'checkout', sha1]) - subprocess.check_call(['git', 'submodule', 'update']) - subprocess.check_call(['git', 'clean', '-f', '-x', '-d']) - subprocess.check_call(['cloc', '--vcs=git', '--by-file', '--yaml', '--out=../count/%s.yaml' % dmy, '.']) - + dmy = dt.strftime('%Y-%m-%d') + sha1 = subprocess.check_output( + ['git', 'rev-list', '-n', '1', '--before=%s' % dmy, 'master']).strip() + subprocess.check_call(['git', 'checkout', sha1]) + subprocess.check_call(['git', 'submodule', 'update']) + subprocess.check_call(['git', 'clean', '-f', '-x', '-d']) + subprocess.check_call([ + 'cloc', '--vcs=git', '--by-file', '--yaml', + '--out=../count/%s.yaml' % dmy, '.' + ]) diff --git a/tools/line_count/summarize-history.py b/tools/line_count/summarize-history.py index d2ef7ec324..80b0ed7a7e 100755 --- a/tools/line_count/summarize-history.py +++ b/tools/line_count/summarize-history.py @@ -13,22 +13,25 @@ # See the License for the specific language governing permissions and # limitations under the License. - import subprocess import datetime # this script is only of historical interest: it's the script that was used to # bootstrap the dataset + def daterange(start, end): - for n in range(int((end - start).days)): - yield start + datetime.timedelta(n) + for n in range(int((end - start).days)): + yield start + datetime.timedelta(n) + start_date = datetime.date(2017, 3, 26) end_date = datetime.date(2017, 3, 29) for dt in daterange(start_date, end_date): - dmy = dt.strftime('%Y-%m-%d') - print dmy - subprocess.check_call(['tools/line_count/yaml2csv.py', '-i', '../count/%s.yaml' % dmy, '-d', dmy, '-o', '../count/%s.csv' % dmy]) - + dmy = dt.strftime('%Y-%m-%d') + print dmy + subprocess.check_call([ + 'tools/line_count/yaml2csv.py', '-i', '../count/%s.yaml' % dmy, '-d', + dmy, '-o', '../count/%s.csv' % dmy + ]) diff --git a/tools/line_count/yaml2csv.py b/tools/line_count/yaml2csv.py index 2a38a12c80..dd2e92b360 100755 --- a/tools/line_count/yaml2csv.py +++ b/tools/line_count/yaml2csv.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - import yaml import argparse import datetime @@ -21,18 +20,22 @@ import csv argp = argparse.ArgumentParser(description='Convert cloc yaml to bigquery csv') argp.add_argument('-i', '--input', type=str) -argp.add_argument('-d', '--date', type=str, default=datetime.date.today().strftime('%Y-%m-%d')) +argp.add_argument( + '-d', + '--date', + type=str, + default=datetime.date.today().strftime('%Y-%m-%d')) argp.add_argument('-o', '--output', type=str, default='out.csv') args = argp.parse_args() data = yaml.load(open(args.input).read()) with open(args.output, 'w') as outf: - writer = csv.DictWriter(outf, ['date', 'name', 'language', 'code', 'comment', 'blank']) - for key, value in data.iteritems(): - if key == 'header': continue - if key == 'SUM': continue - if key.startswith('third_party/'): continue - row = {'name': key, 'date': args.date} - row.update(value) - writer.writerow(row) - + writer = csv.DictWriter( + outf, ['date', 'name', 'language', 'code', 'comment', 'blank']) + for key, value in data.iteritems(): + if key == 'header': continue + if key == 'SUM': continue + if key.startswith('third_party/'): continue + row = {'name': key, 'date': args.date} + row.update(value) + writer.writerow(row) diff --git a/tools/mkowners/mkowners.py b/tools/mkowners/mkowners.py index e0ad998bdc..d8b3d3c332 100755 --- a/tools/mkowners/mkowners.py +++ b/tools/mkowners/mkowners.py @@ -24,10 +24,8 @@ import subprocess # Find the root of the git tree # -git_root = (subprocess - .check_output(['git', 'rev-parse', '--show-toplevel']) - .decode('utf-8') - .strip()) +git_root = (subprocess.check_output(['git', 'rev-parse', '--show-toplevel']) + .decode('utf-8').strip()) # # Parse command line arguments @@ -36,19 +34,22 @@ git_root = (subprocess default_out = os.path.join(git_root, '.github', 'CODEOWNERS') argp = argparse.ArgumentParser('Generate .github/CODEOWNERS file') -argp.add_argument('--out', '-o', - type=str, - default=default_out, - help='Output file (default %s)' % default_out) +argp.add_argument( + '--out', + '-o', + type=str, + default=default_out, + help='Output file (default %s)' % default_out) args = argp.parse_args() # # Walk git tree to locate all OWNERS files # -owners_files = [os.path.join(root, 'OWNERS') - for root, dirs, files in os.walk(git_root) - if 'OWNERS' in files] +owners_files = [ + os.path.join(root, 'OWNERS') for root, dirs, files in os.walk(git_root) + if 'OWNERS' in files +] # # Parse owners files @@ -57,39 +58,40 @@ owners_files = [os.path.join(root, 'OWNERS') Owners = collections.namedtuple('Owners', 'parent directives dir') Directive = collections.namedtuple('Directive', 'who globs') + def parse_owners(filename): - with open(filename) as f: - src = f.read().splitlines() - parent = True - directives = [] - for line in src: - line = line.strip() - # line := directive | comment - if not line: continue - if line[0] == '#': continue - # it's a directive - directive = None - if line == 'set noparent': - parent = False - elif line == '*': - directive = Directive(who='*', globs=[]) - elif ' ' in line: - (who, globs) = line.split(' ', 1) - globs_list = [glob - for glob in globs.split(' ') - if glob] - directive = Directive(who=who, globs=globs_list) - else: - directive = Directive(who=line, globs=[]) - if directive: - directives.append(directive) - return Owners(parent=parent, - directives=directives, - dir=os.path.relpath(os.path.dirname(filename), git_root)) - -owners_data = sorted([parse_owners(filename) - for filename in owners_files], - key=operator.attrgetter('dir')) + with open(filename) as f: + src = f.read().splitlines() + parent = True + directives = [] + for line in src: + line = line.strip() + # line := directive | comment + if not line: continue + if line[0] == '#': continue + # it's a directive + directive = None + if line == 'set noparent': + parent = False + elif line == '*': + directive = Directive(who='*', globs=[]) + elif ' ' in line: + (who, globs) = line.split(' ', 1) + globs_list = [glob for glob in globs.split(' ') if glob] + directive = Directive(who=who, globs=globs_list) + else: + directive = Directive(who=line, globs=[]) + if directive: + directives.append(directive) + return Owners( + parent=parent, + directives=directives, + dir=os.path.relpath(os.path.dirname(filename), git_root)) + + +owners_data = sorted( + [parse_owners(filename) for filename in owners_files], + key=operator.attrgetter('dir')) # # Modify owners so that parented OWNERS files point to the actual @@ -98,24 +100,24 @@ owners_data = sorted([parse_owners(filename) new_owners_data = [] for owners in owners_data: - if owners.parent == True: - best_parent = None - best_parent_score = None - for possible_parent in owners_data: - if possible_parent is owners: continue - rel = os.path.relpath(owners.dir, possible_parent.dir) - # '..' ==> we had to walk up from possible_parent to get to owners - # ==> not a parent - if '..' in rel: continue - depth = len(rel.split(os.sep)) - if not best_parent or depth < best_parent_score: - best_parent = possible_parent - best_parent_score = depth - if best_parent: - owners = owners._replace(parent = best_parent.dir) - else: - owners = owners._replace(parent = None) - new_owners_data.append(owners) + if owners.parent == True: + best_parent = None + best_parent_score = None + for possible_parent in owners_data: + if possible_parent is owners: continue + rel = os.path.relpath(owners.dir, possible_parent.dir) + # '..' ==> we had to walk up from possible_parent to get to owners + # ==> not a parent + if '..' in rel: continue + depth = len(rel.split(os.sep)) + if not best_parent or depth < best_parent_score: + best_parent = possible_parent + best_parent_score = depth + if best_parent: + owners = owners._replace(parent=best_parent.dir) + else: + owners = owners._replace(parent=None) + new_owners_data.append(owners) owners_data = new_owners_data # @@ -123,106 +125,114 @@ owners_data = new_owners_data # a CODEOWNERS file for GitHub # + def full_dir(rules_dir, sub_path): - return os.path.join(rules_dir, sub_path) if rules_dir != '.' else sub_path + return os.path.join(rules_dir, sub_path) if rules_dir != '.' else sub_path + # glob using git gg_cache = {} + + def git_glob(glob): - global gg_cache - if glob in gg_cache: return gg_cache[glob] - r = set(subprocess - .check_output(['git', 'ls-files', os.path.join(git_root, glob)]) - .decode('utf-8') - .strip() - .splitlines()) - gg_cache[glob] = r - return r + global gg_cache + if glob in gg_cache: return gg_cache[glob] + r = set( + subprocess.check_output( + ['git', 'ls-files', os.path.join(git_root, glob)]).decode('utf-8') + .strip().splitlines()) + gg_cache[glob] = r + return r + def expand_directives(root, directives): - globs = collections.OrderedDict() - # build a table of glob --> owners - for directive in directives: - for glob in directive.globs or ['**']: - if glob not in globs: - globs[glob] = [] - if directive.who not in globs[glob]: - globs[glob].append(directive.who) - # expand owners for intersecting globs - sorted_globs = sorted(globs.keys(), - key=lambda g: len(git_glob(full_dir(root, g))), - reverse=True) - out_globs = collections.OrderedDict() - for glob_add in sorted_globs: - who_add = globs[glob_add] - pre_items = [i for i in out_globs.items()] - out_globs[glob_add] = who_add.copy() - for glob_have, who_have in pre_items: - files_add = git_glob(full_dir(root, glob_add)) - files_have = git_glob(full_dir(root, glob_have)) - intersect = files_have.intersection(files_add) - if intersect: - for f in sorted(files_add): # sorted to ensure merge stability - if f not in intersect: - out_globs[os.path.relpath(f, start=root)] = who_add - for who in who_have: - if who not in out_globs[glob_add]: - out_globs[glob_add].append(who) - return out_globs + globs = collections.OrderedDict() + # build a table of glob --> owners + for directive in directives: + for glob in directive.globs or ['**']: + if glob not in globs: + globs[glob] = [] + if directive.who not in globs[glob]: + globs[glob].append(directive.who) + # expand owners for intersecting globs + sorted_globs = sorted( + globs.keys(), + key=lambda g: len(git_glob(full_dir(root, g))), + reverse=True) + out_globs = collections.OrderedDict() + for glob_add in sorted_globs: + who_add = globs[glob_add] + pre_items = [i for i in out_globs.items()] + out_globs[glob_add] = who_add.copy() + for glob_have, who_have in pre_items: + files_add = git_glob(full_dir(root, glob_add)) + files_have = git_glob(full_dir(root, glob_have)) + intersect = files_have.intersection(files_add) + if intersect: + for f in sorted(files_add): # sorted to ensure merge stability + if f not in intersect: + out_globs[os.path.relpath(f, start=root)] = who_add + for who in who_have: + if who not in out_globs[glob_add]: + out_globs[glob_add].append(who) + return out_globs + def add_parent_to_globs(parent, globs, globs_dir): - if not parent: return - for owners in owners_data: - if owners.dir == parent: - owners_globs = expand_directives(owners.dir, owners.directives) - for oglob, oglob_who in owners_globs.items(): - for gglob, gglob_who in globs.items(): - files_parent = git_glob(full_dir(owners.dir, oglob)) - files_child = git_glob(full_dir(globs_dir, gglob)) - intersect = files_parent.intersection(files_child) - gglob_who_orig = gglob_who.copy() - if intersect: - for f in sorted(files_child): # sorted to ensure merge stability - if f not in intersect: - who = gglob_who_orig.copy() - globs[os.path.relpath(f, start=globs_dir)] = who - for who in oglob_who: - if who not in gglob_who: - gglob_who.append(who) - add_parent_to_globs(owners.parent, globs, globs_dir) - return - assert(False) + if not parent: return + for owners in owners_data: + if owners.dir == parent: + owners_globs = expand_directives(owners.dir, owners.directives) + for oglob, oglob_who in owners_globs.items(): + for gglob, gglob_who in globs.items(): + files_parent = git_glob(full_dir(owners.dir, oglob)) + files_child = git_glob(full_dir(globs_dir, gglob)) + intersect = files_parent.intersection(files_child) + gglob_who_orig = gglob_who.copy() + if intersect: + for f in sorted(files_child + ): # sorted to ensure merge stability + if f not in intersect: + who = gglob_who_orig.copy() + globs[os.path.relpath(f, start=globs_dir)] = who + for who in oglob_who: + if who not in gglob_who: + gglob_who.append(who) + add_parent_to_globs(owners.parent, globs, globs_dir) + return + assert (False) + todo = owners_data.copy() done = set() with open(args.out, 'w') as out: - out.write('# Auto-generated by the tools/mkowners/mkowners.py tool\n') - out.write('# Uses OWNERS files in different modules throughout the\n') - out.write('# repository as the source of truth for module ownership.\n') - written_globs = [] - while todo: - head, *todo = todo - if head.parent and not head.parent in done: - todo.append(head) - continue - globs = expand_directives(head.dir, head.directives) - add_parent_to_globs(head.parent, globs, head.dir) - for glob, owners in globs.items(): - skip = False - for glob1, owners1, dir1 in reversed(written_globs): - files = git_glob(full_dir(head.dir, glob)) - files1 = git_glob(full_dir(dir1, glob1)) - intersect = files.intersection(files1) - if files == intersect: - if sorted(owners) == sorted(owners1): - skip = True # nothing new in this rule - break - elif intersect: - # continuing would cause a semantic change since some files are - # affected differently by this rule and CODEOWNERS is order dependent - break - if not skip: - out.write('/%s %s\n' % ( - full_dir(head.dir, glob), ' '.join(owners))) - written_globs.append((glob, owners, head.dir)) - done.add(head.dir) + out.write('# Auto-generated by the tools/mkowners/mkowners.py tool\n') + out.write('# Uses OWNERS files in different modules throughout the\n') + out.write('# repository as the source of truth for module ownership.\n') + written_globs = [] + while todo: + head, *todo = todo + if head.parent and not head.parent in done: + todo.append(head) + continue + globs = expand_directives(head.dir, head.directives) + add_parent_to_globs(head.parent, globs, head.dir) + for glob, owners in globs.items(): + skip = False + for glob1, owners1, dir1 in reversed(written_globs): + files = git_glob(full_dir(head.dir, glob)) + files1 = git_glob(full_dir(dir1, glob1)) + intersect = files.intersection(files1) + if files == intersect: + if sorted(owners) == sorted(owners1): + skip = True # nothing new in this rule + break + elif intersect: + # continuing would cause a semantic change since some files are + # affected differently by this rule and CODEOWNERS is order dependent + break + if not skip: + out.write('/%s %s\n' % (full_dir(head.dir, glob), + ' '.join(owners))) + written_globs.append((glob, owners, head.dir)) + done.add(head.dir) |