aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rwxr-xr-xtools/debug/core/chttp2_ref_leak.py22
-rw-r--r--tools/debug/core/error_ref_leak.py34
-rwxr-xr-xtools/distrib/yapf_code.sh7
-rw-r--r--tools/flakes/detect_flakes.py88
-rwxr-xr-xtools/gcp/utils/big_query_utils.py276
-rw-r--r--tools/github/pr_latency.py211
-rwxr-xr-xtools/line_count/collect-history.py25
-rwxr-xr-xtools/line_count/summarize-history.py17
-rwxr-xr-xtools/line_count/yaml2csv.py25
-rwxr-xr-xtools/mkowners/mkowners.py312
10 files changed, 548 insertions, 469 deletions
diff --git a/tools/debug/core/chttp2_ref_leak.py b/tools/debug/core/chttp2_ref_leak.py
index d693dd9e54..a6a5448775 100755
--- a/tools/debug/core/chttp2_ref_leak.py
+++ b/tools/debug/core/chttp2_ref_leak.py
@@ -20,8 +20,10 @@ import collections
import sys
import re
+
def new_obj():
- return ['destroy']
+ return ['destroy']
+
outstanding = collections.defaultdict(new_obj)
@@ -29,14 +31,14 @@ outstanding = collections.defaultdict(new_obj)
# chttp2:unref:0x629000005200 2->1 destroy [src/core/ext/transport/chttp2/transport/chttp2_transport.c:599]
for line in sys.stdin:
- m = re.search(r'chttp2:( ref|unref):0x([a-fA-F0-9]+) [^ ]+ ([^[]+) \[(.*)\]', line)
- if m:
- if m.group(1) == ' ref':
- outstanding[m.group(2)].append(m.group(3))
- else:
- outstanding[m.group(2)].remove(m.group(3))
+ m = re.search(
+ r'chttp2:( ref|unref):0x([a-fA-F0-9]+) [^ ]+ ([^[]+) \[(.*)\]', line)
+ if m:
+ if m.group(1) == ' ref':
+ outstanding[m.group(2)].append(m.group(3))
+ else:
+ outstanding[m.group(2)].remove(m.group(3))
for obj, remaining in outstanding.items():
- if remaining:
- print 'LEAKED: %s %r' % (obj, remaining)
-
+ if remaining:
+ print 'LEAKED: %s %r' % (obj, remaining)
diff --git a/tools/debug/core/error_ref_leak.py b/tools/debug/core/error_ref_leak.py
index 6582328a5b..7806338683 100644
--- a/tools/debug/core/error_ref_leak.py
+++ b/tools/debug/core/error_ref_leak.py
@@ -26,22 +26,22 @@ data = sys.stdin.readlines()
errs = []
for line in data:
- # if we care about the line
- if re.search(r'error.cc', line):
- # str manip to cut off left part of log line
- line = line.partition('error.cc:')[-1]
- line = re.sub(r'\d+] ', r'', line)
- line = line.strip().split()
- err = line[0].strip(":")
- if line[1] == "create":
- assert(err not in errs)
- errs.append(err)
- elif line[0] == "realloc":
- errs.remove(line[1])
- errs.append(line[3])
- # explicitly look for the last dereference
- elif line[1] == "1" and line[3] == "0":
- assert(err in errs)
- errs.remove(err)
+ # if we care about the line
+ if re.search(r'error.cc', line):
+ # str manip to cut off left part of log line
+ line = line.partition('error.cc:')[-1]
+ line = re.sub(r'\d+] ', r'', line)
+ line = line.strip().split()
+ err = line[0].strip(":")
+ if line[1] == "create":
+ assert (err not in errs)
+ errs.append(err)
+ elif line[0] == "realloc":
+ errs.remove(line[1])
+ errs.append(line[3])
+ # explicitly look for the last dereference
+ elif line[1] == "1" and line[3] == "0":
+ assert (err in errs)
+ errs.remove(err)
print "leaked:", errs
diff --git a/tools/distrib/yapf_code.sh b/tools/distrib/yapf_code.sh
index 698c341d88..85a45b6a11 100755
--- a/tools/distrib/yapf_code.sh
+++ b/tools/distrib/yapf_code.sh
@@ -20,12 +20,7 @@ cd "$(dirname "${0}")/../.."
DIRS=(
'src/python'
- 'tools/buildgen'
- 'tools/codegen'
- 'tools/distrib'
- 'tools/interop_matrix'
- 'tools/profiling'
- 'tools/run_tests'
+ 'tools'
)
EXCLUSIONS=(
'grpcio/grpc_*.py'
diff --git a/tools/flakes/detect_flakes.py b/tools/flakes/detect_flakes.py
index c5c7f61771..b066ee6139 100644
--- a/tools/flakes/detect_flakes.py
+++ b/tools/flakes/detect_flakes.py
@@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
"""Detect new flakes introduced in the last 24h hours with respect to the
previous six days"""
@@ -32,26 +31,29 @@ sys.path.append(gcp_utils_dir)
import big_query_utils
+
def print_table(table):
kokoro_base_url = 'https://kokoro.corp.google.com/job/'
for k, v in table.items():
- job_name = v[0]
- build_id = v[1]
- ts = int(float(v[2]))
- # TODO(dgq): timezone handling is wrong. We need to determine the timezone
- # of the computer running this script.
- human_ts = datetime.datetime.utcfromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S PDT')
- job_path = '{}/{}'.format('/job/'.join(job_name.split('/')), build_id)
- full_kokoro_url = kokoro_base_url + job_path
- print("Test: {}, Timestamp: {}, url: {}\n".format(k, human_ts, full_kokoro_url))
+ job_name = v[0]
+ build_id = v[1]
+ ts = int(float(v[2]))
+ # TODO(dgq): timezone handling is wrong. We need to determine the timezone
+ # of the computer running this script.
+ human_ts = datetime.datetime.utcfromtimestamp(ts).strftime(
+ '%Y-%m-%d %H:%M:%S PDT')
+ job_path = '{}/{}'.format('/job/'.join(job_name.split('/')), build_id)
+ full_kokoro_url = kokoro_base_url + job_path
+ print("Test: {}, Timestamp: {}, url: {}\n".format(k, human_ts,
+ full_kokoro_url))
def get_flaky_tests(days_lower_bound, days_upper_bound, limit=None):
- """ period is one of "WEEK", "DAY", etc.
+ """ period is one of "WEEK", "DAY", etc.
(see https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#date_add). """
- bq = big_query_utils.create_big_query()
- query = """
+ bq = big_query_utils.create_big_query()
+ query = """
SELECT
REGEXP_REPLACE(test_name, r'/\d+', '') AS filtered_test_name,
job_name,
@@ -65,41 +67,45 @@ WHERE
AND NOT REGEXP_MATCH(job_name, '.*portability.*')
AND result != 'PASSED' AND result != 'SKIPPED'
ORDER BY timestamp desc
-""".format(days_lower_bound=days_lower_bound, days_upper_bound=days_upper_bound)
- if limit:
- query += '\n LIMIT {}'.format(limit)
- query_job = big_query_utils.sync_query_job(bq, 'grpc-testing', query)
- page = bq.jobs().getQueryResults(
- pageToken=None, **query_job['jobReference']).execute(num_retries=3)
- rows = page.get('rows')
- if rows:
- return {row['f'][0]['v']:
+""".format(
+ days_lower_bound=days_lower_bound, days_upper_bound=days_upper_bound)
+ if limit:
+ query += '\n LIMIT {}'.format(limit)
+ query_job = big_query_utils.sync_query_job(bq, 'grpc-testing', query)
+ page = bq.jobs().getQueryResults(
+ pageToken=None, **query_job['jobReference']).execute(num_retries=3)
+ rows = page.get('rows')
+ if rows:
+ return {
+ row['f'][0]['v']:
(row['f'][1]['v'], row['f'][2]['v'], row['f'][3]['v'])
- for row in rows}
- else:
- return {}
+ for row in rows
+ }
+ else:
+ return {}
def get_new_flakes():
- last_week_sans_yesterday = get_flaky_tests(-14, -1)
- last_24 = get_flaky_tests(0, +1)
- last_week_sans_yesterday_names = set(last_week_sans_yesterday.keys())
- last_24_names = set(last_24.keys())
- logging.debug('|last_week_sans_yesterday| =', len(last_week_sans_yesterday_names))
- logging.debug('|last_24_names| =', len(last_24_names))
- new_flakes = last_24_names - last_week_sans_yesterday_names
- logging.debug('|new_flakes| = ', len(new_flakes))
- return {k: last_24[k] for k in new_flakes}
+ last_week_sans_yesterday = get_flaky_tests(-14, -1)
+ last_24 = get_flaky_tests(0, +1)
+ last_week_sans_yesterday_names = set(last_week_sans_yesterday.keys())
+ last_24_names = set(last_24.keys())
+ logging.debug('|last_week_sans_yesterday| =',
+ len(last_week_sans_yesterday_names))
+ logging.debug('|last_24_names| =', len(last_24_names))
+ new_flakes = last_24_names - last_week_sans_yesterday_names
+ logging.debug('|new_flakes| = ', len(new_flakes))
+ return {k: last_24[k] for k in new_flakes}
def main():
- new_flakes = get_new_flakes()
- if new_flakes:
- print("Found {} new flakes:".format(len(new_flakes)))
- print_table(new_flakes)
- else:
- print("No new flakes found!")
+ new_flakes = get_new_flakes()
+ if new_flakes:
+ print("Found {} new flakes:".format(len(new_flakes)))
+ print_table(new_flakes)
+ else:
+ print("No new flakes found!")
if __name__ == '__main__':
- main()
+ main()
diff --git a/tools/gcp/utils/big_query_utils.py b/tools/gcp/utils/big_query_utils.py
index 77a5f5691e..3e811ca2bf 100755
--- a/tools/gcp/utils/big_query_utils.py
+++ b/tools/gcp/utils/big_query_utils.py
@@ -28,154 +28,174 @@ NUM_RETRIES = 3
def create_big_query():
- """Authenticates with cloud platform and gets a BiqQuery service object
+ """Authenticates with cloud platform and gets a BiqQuery service object
"""
- creds = GoogleCredentials.get_application_default()
- return discovery.build('bigquery', 'v2', credentials=creds, cache_discovery=False)
+ creds = GoogleCredentials.get_application_default()
+ return discovery.build(
+ 'bigquery', 'v2', credentials=creds, cache_discovery=False)
def create_dataset(biq_query, project_id, dataset_id):
- is_success = True
- body = {
- 'datasetReference': {
- 'projectId': project_id,
- 'datasetId': dataset_id
- }
- }
-
- try:
- dataset_req = biq_query.datasets().insert(projectId=project_id, body=body)
- dataset_req.execute(num_retries=NUM_RETRIES)
- except HttpError as http_error:
- if http_error.resp.status == 409:
- print 'Warning: The dataset %s already exists' % dataset_id
- else:
- # Note: For more debugging info, print "http_error.content"
- print 'Error in creating dataset: %s. Err: %s' % (dataset_id, http_error)
- is_success = False
- return is_success
+ is_success = True
+ body = {
+ 'datasetReference': {
+ 'projectId': project_id,
+ 'datasetId': dataset_id
+ }
+ }
+
+ try:
+ dataset_req = biq_query.datasets().insert(
+ projectId=project_id, body=body)
+ dataset_req.execute(num_retries=NUM_RETRIES)
+ except HttpError as http_error:
+ if http_error.resp.status == 409:
+ print 'Warning: The dataset %s already exists' % dataset_id
+ else:
+ # Note: For more debugging info, print "http_error.content"
+ print 'Error in creating dataset: %s. Err: %s' % (dataset_id,
+ http_error)
+ is_success = False
+ return is_success
def create_table(big_query, project_id, dataset_id, table_id, table_schema,
description):
- fields = [{'name': field_name,
- 'type': field_type,
- 'description': field_description
- } for (field_name, field_type, field_description) in table_schema]
- return create_table2(big_query, project_id, dataset_id, table_id,
- fields, description)
-
-
-def create_partitioned_table(big_query, project_id, dataset_id, table_id, table_schema,
- description, partition_type='DAY', expiration_ms=_EXPIRATION_MS):
- """Creates a partitioned table. By default, a date-paritioned table is created with
+ fields = [{
+ 'name': field_name,
+ 'type': field_type,
+ 'description': field_description
+ } for (field_name, field_type, field_description) in table_schema]
+ return create_table2(big_query, project_id, dataset_id, table_id, fields,
+ description)
+
+
+def create_partitioned_table(big_query,
+ project_id,
+ dataset_id,
+ table_id,
+ table_schema,
+ description,
+ partition_type='DAY',
+ expiration_ms=_EXPIRATION_MS):
+ """Creates a partitioned table. By default, a date-paritioned table is created with
each partition lasting 30 days after it was last modified.
"""
- fields = [{'name': field_name,
- 'type': field_type,
- 'description': field_description
- } for (field_name, field_type, field_description) in table_schema]
- return create_table2(big_query, project_id, dataset_id, table_id,
- fields, description, partition_type, expiration_ms)
-
-
-def create_table2(big_query, project_id, dataset_id, table_id, fields_schema,
- description, partition_type=None, expiration_ms=None):
- is_success = True
-
- body = {
- 'description': description,
- 'schema': {
- 'fields': fields_schema
- },
- 'tableReference': {
- 'datasetId': dataset_id,
- 'projectId': project_id,
- 'tableId': table_id
- }
- }
-
- if partition_type and expiration_ms:
- body["timePartitioning"] = {
- "type": partition_type,
- "expirationMs": expiration_ms
+ fields = [{
+ 'name': field_name,
+ 'type': field_type,
+ 'description': field_description
+ } for (field_name, field_type, field_description) in table_schema]
+ return create_table2(big_query, project_id, dataset_id, table_id, fields,
+ description, partition_type, expiration_ms)
+
+
+def create_table2(big_query,
+ project_id,
+ dataset_id,
+ table_id,
+ fields_schema,
+ description,
+ partition_type=None,
+ expiration_ms=None):
+ is_success = True
+
+ body = {
+ 'description': description,
+ 'schema': {
+ 'fields': fields_schema
+ },
+ 'tableReference': {
+ 'datasetId': dataset_id,
+ 'projectId': project_id,
+ 'tableId': table_id
+ }
}
- try:
- table_req = big_query.tables().insert(projectId=project_id,
- datasetId=dataset_id,
- body=body)
- res = table_req.execute(num_retries=NUM_RETRIES)
- print 'Successfully created %s "%s"' % (res['kind'], res['id'])
- except HttpError as http_error:
- if http_error.resp.status == 409:
- print 'Warning: Table %s already exists' % table_id
- else:
- print 'Error in creating table: %s. Err: %s' % (table_id, http_error)
- is_success = False
- return is_success
+ if partition_type and expiration_ms:
+ body["timePartitioning"] = {
+ "type": partition_type,
+ "expirationMs": expiration_ms
+ }
+
+ try:
+ table_req = big_query.tables().insert(
+ projectId=project_id, datasetId=dataset_id, body=body)
+ res = table_req.execute(num_retries=NUM_RETRIES)
+ print 'Successfully created %s "%s"' % (res['kind'], res['id'])
+ except HttpError as http_error:
+ if http_error.resp.status == 409:
+ print 'Warning: Table %s already exists' % table_id
+ else:
+ print 'Error in creating table: %s. Err: %s' % (table_id,
+ http_error)
+ is_success = False
+ return is_success
def patch_table(big_query, project_id, dataset_id, table_id, fields_schema):
- is_success = True
-
- body = {
- 'schema': {
- 'fields': fields_schema
- },
- 'tableReference': {
- 'datasetId': dataset_id,
- 'projectId': project_id,
- 'tableId': table_id
- }
- }
-
- try:
- table_req = big_query.tables().patch(projectId=project_id,
- datasetId=dataset_id,
- tableId=table_id,
- body=body)
- res = table_req.execute(num_retries=NUM_RETRIES)
- print 'Successfully patched %s "%s"' % (res['kind'], res['id'])
- except HttpError as http_error:
- print 'Error in creating table: %s. Err: %s' % (table_id, http_error)
- is_success = False
- return is_success
+ is_success = True
+
+ body = {
+ 'schema': {
+ 'fields': fields_schema
+ },
+ 'tableReference': {
+ 'datasetId': dataset_id,
+ 'projectId': project_id,
+ 'tableId': table_id
+ }
+ }
+
+ try:
+ table_req = big_query.tables().patch(
+ projectId=project_id,
+ datasetId=dataset_id,
+ tableId=table_id,
+ body=body)
+ res = table_req.execute(num_retries=NUM_RETRIES)
+ print 'Successfully patched %s "%s"' % (res['kind'], res['id'])
+ except HttpError as http_error:
+ print 'Error in creating table: %s. Err: %s' % (table_id, http_error)
+ is_success = False
+ return is_success
def insert_rows(big_query, project_id, dataset_id, table_id, rows_list):
- is_success = True
- body = {'rows': rows_list}
- try:
- insert_req = big_query.tabledata().insertAll(projectId=project_id,
- datasetId=dataset_id,
- tableId=table_id,
- body=body)
- res = insert_req.execute(num_retries=NUM_RETRIES)
- if res.get('insertErrors', None):
- print 'Error inserting rows! Response: %s' % res
- is_success = False
- except HttpError as http_error:
- print 'Error inserting rows to the table %s' % table_id
- is_success = False
-
- return is_success
+ is_success = True
+ body = {'rows': rows_list}
+ try:
+ insert_req = big_query.tabledata().insertAll(
+ projectId=project_id,
+ datasetId=dataset_id,
+ tableId=table_id,
+ body=body)
+ res = insert_req.execute(num_retries=NUM_RETRIES)
+ if res.get('insertErrors', None):
+ print 'Error inserting rows! Response: %s' % res
+ is_success = False
+ except HttpError as http_error:
+ print 'Error inserting rows to the table %s' % table_id
+ is_success = False
+
+ return is_success
def sync_query_job(big_query, project_id, query, timeout=5000):
- query_data = {'query': query, 'timeoutMs': timeout}
- query_job = None
- try:
- query_job = big_query.jobs().query(
- projectId=project_id,
- body=query_data).execute(num_retries=NUM_RETRIES)
- except HttpError as http_error:
- print 'Query execute job failed with error: %s' % http_error
- print http_error.content
- return query_job
-
- # List of (column name, column type, description) tuples
+ query_data = {'query': query, 'timeoutMs': timeout}
+ query_job = None
+ try:
+ query_job = big_query.jobs().query(
+ projectId=project_id,
+ body=query_data).execute(num_retries=NUM_RETRIES)
+ except HttpError as http_error:
+ print 'Query execute job failed with error: %s' % http_error
+ print http_error.content
+ return query_job
+
+
+ # List of (column name, column type, description) tuples
def make_row(unique_row_id, row_values_dict):
- """row_values_dict is a dictionary of column name and column value.
+ """row_values_dict is a dictionary of column name and column value.
"""
- return {'insertId': unique_row_id, 'json': row_values_dict}
+ return {'insertId': unique_row_id, 'json': row_values_dict}
diff --git a/tools/github/pr_latency.py b/tools/github/pr_latency.py
index 5d635835e5..0131e60bbc 100644
--- a/tools/github/pr_latency.py
+++ b/tools/github/pr_latency.py
@@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
"""Measure the time between PR creation and completion of all tests.
You'll need a github API token to avoid being rate-limited. See
@@ -46,118 +45,156 @@ COMMITS = 'https://api.github.com/repos/grpc/grpc/pulls/{pr_number}/commits'
def gh(url):
- request = urllib2.Request(url)
- if TOKEN:
- request.add_header('Authorization', 'token {}'.format(TOKEN))
- response = urllib2.urlopen(request)
- return response.read()
+ request = urllib2.Request(url)
+ if TOKEN:
+ request.add_header('Authorization', 'token {}'.format(TOKEN))
+ response = urllib2.urlopen(request)
+ return response.read()
def print_csv_header():
- print('pr,base_time,test_time,latency_seconds,successes,failures,errors')
-
-
-def output(pr, base_time, test_time, diff_time, successes, failures, errors, mode='human'):
- if mode == 'human':
- print("PR #{} base time: {} UTC, Tests completed at: {} UTC. Latency: {}."
- "\n\tSuccesses: {}, Failures: {}, Errors: {}".format(
- pr, base_time, test_time, diff_time, successes, failures, errors))
- elif mode == 'csv':
- print(','.join([str(pr), str(base_time),
- str(test_time), str(int((test_time-base_time).total_seconds())),
- str(successes), str(failures), str(errors)]))
+ print('pr,base_time,test_time,latency_seconds,successes,failures,errors')
+
+
+def output(pr,
+ base_time,
+ test_time,
+ diff_time,
+ successes,
+ failures,
+ errors,
+ mode='human'):
+ if mode == 'human':
+ print(
+ "PR #{} base time: {} UTC, Tests completed at: {} UTC. Latency: {}."
+ "\n\tSuccesses: {}, Failures: {}, Errors: {}".format(
+ pr, base_time, test_time, diff_time, successes, failures,
+ errors))
+ elif mode == 'csv':
+ print(','.join([
+ str(pr), str(base_time), str(test_time), str(
+ int((test_time - base_time).total_seconds())), str(successes),
+ str(failures), str(errors)
+ ]))
def parse_timestamp(datetime_str):
- return datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M:%SZ')
+ return datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M:%SZ')
def to_posix_timestamp(dt):
- return str((dt - datetime(1970, 1, 1)).total_seconds())
+ return str((dt - datetime(1970, 1, 1)).total_seconds())
def get_pr_data():
- latest_prs = json.loads(gh(PRS))
- res = [{'number': pr['number'],
- 'created_at': parse_timestamp(pr['created_at']),
- 'updated_at': parse_timestamp(pr['updated_at']),
- 'statuses_url': pr['statuses_url']}
- for pr in latest_prs]
- return res
+ latest_prs = json.loads(gh(PRS))
+ res = [{
+ 'number': pr['number'],
+ 'created_at': parse_timestamp(pr['created_at']),
+ 'updated_at': parse_timestamp(pr['updated_at']),
+ 'statuses_url': pr['statuses_url']
+ } for pr in latest_prs]
+ return res
def get_commits_data(pr_number):
- commits = json.loads(gh(COMMITS.format(pr_number=pr_number)))
- return {'num_commits': len(commits),
- 'most_recent_date': parse_timestamp(commits[-1]['commit']['author']['date'])}
+ commits = json.loads(gh(COMMITS.format(pr_number=pr_number)))
+ return {
+ 'num_commits': len(commits),
+ 'most_recent_date':
+ parse_timestamp(commits[-1]['commit']['author']['date'])
+ }
def get_status_data(statuses_url, system):
- status_url = statuses_url.replace('statuses', 'status')
- statuses = json.loads(gh(status_url + '?per_page=100'))
- successes = 0
- failures = 0
- errors = 0
- latest_datetime = None
- if not statuses: return None
- if system == 'kokoro': string_in_target_url = 'kokoro'
- elif system == 'jenkins': string_in_target_url = 'grpc-testing'
- for status in statuses['statuses']:
- if not status['target_url'] or string_in_target_url not in status['target_url']: continue # Ignore jenkins
- if status['state'] == 'pending': return None
- elif status['state'] == 'success': successes += 1
- elif status['state'] == 'failure': failures += 1
- elif status['state'] == 'error': errors += 1
- if not latest_datetime:
- latest_datetime = parse_timestamp(status['updated_at'])
- else:
- latest_datetime = max(latest_datetime, parse_timestamp(status['updated_at']))
- # First status is the most recent one.
- if any([successes, failures, errors]) and sum([successes, failures, errors]) > 15:
- return {'latest_datetime': latest_datetime,
+ status_url = statuses_url.replace('statuses', 'status')
+ statuses = json.loads(gh(status_url + '?per_page=100'))
+ successes = 0
+ failures = 0
+ errors = 0
+ latest_datetime = None
+ if not statuses: return None
+ if system == 'kokoro': string_in_target_url = 'kokoro'
+ elif system == 'jenkins': string_in_target_url = 'grpc-testing'
+ for status in statuses['statuses']:
+ if not status['target_url'] or string_in_target_url not in status[
+ 'target_url']:
+ continue # Ignore jenkins
+ if status['state'] == 'pending': return None
+ elif status['state'] == 'success': successes += 1
+ elif status['state'] == 'failure': failures += 1
+ elif status['state'] == 'error': errors += 1
+ if not latest_datetime:
+ latest_datetime = parse_timestamp(status['updated_at'])
+ else:
+ latest_datetime = max(latest_datetime,
+ parse_timestamp(status['updated_at']))
+ # First status is the most recent one.
+ if any([successes, failures, errors]) and sum(
+ [successes, failures, errors]) > 15:
+ return {
+ 'latest_datetime': latest_datetime,
'successes': successes,
'failures': failures,
- 'errors': errors}
- else: return None
+ 'errors': errors
+ }
+ else:
+ return None
def build_args_parser():
- import argparse
- parser = argparse.ArgumentParser()
- parser.add_argument('--format', type=str, choices=['human', 'csv'],
- default='human',
- help='Output format: are you a human or a machine?')
- parser.add_argument('--system', type=str, choices=['jenkins', 'kokoro'],
- required=True, help='Consider only the given CI system')
- parser.add_argument('--token', type=str, default='',
- help='GitHub token to use its API with a higher rate limit')
- return parser
+ import argparse
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--format',
+ type=str,
+ choices=['human', 'csv'],
+ default='human',
+ help='Output format: are you a human or a machine?')
+ parser.add_argument(
+ '--system',
+ type=str,
+ choices=['jenkins', 'kokoro'],
+ required=True,
+ help='Consider only the given CI system')
+ parser.add_argument(
+ '--token',
+ type=str,
+ default='',
+ help='GitHub token to use its API with a higher rate limit')
+ return parser
def main():
- import sys
- global TOKEN
- args_parser = build_args_parser()
- args = args_parser.parse_args()
- TOKEN = args.token
- if args.format == 'csv': print_csv_header()
- for pr_data in get_pr_data():
- commit_data = get_commits_data(pr_data['number'])
- # PR with a single commit -> use the PRs creation time.
- # else -> use the latest commit's date.
- base_timestamp = pr_data['updated_at']
- if commit_data['num_commits'] > 1:
- base_timestamp = commit_data['most_recent_date']
- else:
- base_timestamp = pr_data['created_at']
- last_status = get_status_data(pr_data['statuses_url'], args.system)
- if last_status:
- diff = last_status['latest_datetime'] - base_timestamp
- if diff < timedelta(hours=5):
- output(pr_data['number'], base_timestamp, last_status['latest_datetime'],
- diff, last_status['successes'], last_status['failures'],
- last_status['errors'], mode=args.format)
+ import sys
+ global TOKEN
+ args_parser = build_args_parser()
+ args = args_parser.parse_args()
+ TOKEN = args.token
+ if args.format == 'csv': print_csv_header()
+ for pr_data in get_pr_data():
+ commit_data = get_commits_data(pr_data['number'])
+ # PR with a single commit -> use the PRs creation time.
+ # else -> use the latest commit's date.
+ base_timestamp = pr_data['updated_at']
+ if commit_data['num_commits'] > 1:
+ base_timestamp = commit_data['most_recent_date']
+ else:
+ base_timestamp = pr_data['created_at']
+ last_status = get_status_data(pr_data['statuses_url'], args.system)
+ if last_status:
+ diff = last_status['latest_datetime'] - base_timestamp
+ if diff < timedelta(hours=5):
+ output(
+ pr_data['number'],
+ base_timestamp,
+ last_status['latest_datetime'],
+ diff,
+ last_status['successes'],
+ last_status['failures'],
+ last_status['errors'],
+ mode=args.format)
if __name__ == '__main__':
- main()
+ main()
diff --git a/tools/line_count/collect-history.py b/tools/line_count/collect-history.py
index 3f030fbb8f..d2d5c95705 100755
--- a/tools/line_count/collect-history.py
+++ b/tools/line_count/collect-history.py
@@ -19,20 +19,23 @@ import datetime
# this script is only of historical interest: it's the script that was used to
# bootstrap the dataset
+
def daterange(start, end):
- for n in range(int((end - start).days)):
- yield start + datetime.timedelta(n)
+ for n in range(int((end - start).days)):
+ yield start + datetime.timedelta(n)
+
start_date = datetime.date(2017, 3, 26)
end_date = datetime.date(2017, 3, 29)
for dt in daterange(start_date, end_date):
- dmy = dt.strftime('%Y-%m-%d')
- sha1 = subprocess.check_output(['git', 'rev-list', '-n', '1',
- '--before=%s' % dmy,
- 'master']).strip()
- subprocess.check_call(['git', 'checkout', sha1])
- subprocess.check_call(['git', 'submodule', 'update'])
- subprocess.check_call(['git', 'clean', '-f', '-x', '-d'])
- subprocess.check_call(['cloc', '--vcs=git', '--by-file', '--yaml', '--out=../count/%s.yaml' % dmy, '.'])
-
+ dmy = dt.strftime('%Y-%m-%d')
+ sha1 = subprocess.check_output(
+ ['git', 'rev-list', '-n', '1', '--before=%s' % dmy, 'master']).strip()
+ subprocess.check_call(['git', 'checkout', sha1])
+ subprocess.check_call(['git', 'submodule', 'update'])
+ subprocess.check_call(['git', 'clean', '-f', '-x', '-d'])
+ subprocess.check_call([
+ 'cloc', '--vcs=git', '--by-file', '--yaml',
+ '--out=../count/%s.yaml' % dmy, '.'
+ ])
diff --git a/tools/line_count/summarize-history.py b/tools/line_count/summarize-history.py
index d2ef7ec324..80b0ed7a7e 100755
--- a/tools/line_count/summarize-history.py
+++ b/tools/line_count/summarize-history.py
@@ -13,22 +13,25 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-
import subprocess
import datetime
# this script is only of historical interest: it's the script that was used to
# bootstrap the dataset
+
def daterange(start, end):
- for n in range(int((end - start).days)):
- yield start + datetime.timedelta(n)
+ for n in range(int((end - start).days)):
+ yield start + datetime.timedelta(n)
+
start_date = datetime.date(2017, 3, 26)
end_date = datetime.date(2017, 3, 29)
for dt in daterange(start_date, end_date):
- dmy = dt.strftime('%Y-%m-%d')
- print dmy
- subprocess.check_call(['tools/line_count/yaml2csv.py', '-i', '../count/%s.yaml' % dmy, '-d', dmy, '-o', '../count/%s.csv' % dmy])
-
+ dmy = dt.strftime('%Y-%m-%d')
+ print dmy
+ subprocess.check_call([
+ 'tools/line_count/yaml2csv.py', '-i', '../count/%s.yaml' % dmy, '-d',
+ dmy, '-o', '../count/%s.csv' % dmy
+ ])
diff --git a/tools/line_count/yaml2csv.py b/tools/line_count/yaml2csv.py
index 2a38a12c80..dd2e92b360 100755
--- a/tools/line_count/yaml2csv.py
+++ b/tools/line_count/yaml2csv.py
@@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-
import yaml
import argparse
import datetime
@@ -21,18 +20,22 @@ import csv
argp = argparse.ArgumentParser(description='Convert cloc yaml to bigquery csv')
argp.add_argument('-i', '--input', type=str)
-argp.add_argument('-d', '--date', type=str, default=datetime.date.today().strftime('%Y-%m-%d'))
+argp.add_argument(
+ '-d',
+ '--date',
+ type=str,
+ default=datetime.date.today().strftime('%Y-%m-%d'))
argp.add_argument('-o', '--output', type=str, default='out.csv')
args = argp.parse_args()
data = yaml.load(open(args.input).read())
with open(args.output, 'w') as outf:
- writer = csv.DictWriter(outf, ['date', 'name', 'language', 'code', 'comment', 'blank'])
- for key, value in data.iteritems():
- if key == 'header': continue
- if key == 'SUM': continue
- if key.startswith('third_party/'): continue
- row = {'name': key, 'date': args.date}
- row.update(value)
- writer.writerow(row)
-
+ writer = csv.DictWriter(
+ outf, ['date', 'name', 'language', 'code', 'comment', 'blank'])
+ for key, value in data.iteritems():
+ if key == 'header': continue
+ if key == 'SUM': continue
+ if key.startswith('third_party/'): continue
+ row = {'name': key, 'date': args.date}
+ row.update(value)
+ writer.writerow(row)
diff --git a/tools/mkowners/mkowners.py b/tools/mkowners/mkowners.py
index e0ad998bdc..d8b3d3c332 100755
--- a/tools/mkowners/mkowners.py
+++ b/tools/mkowners/mkowners.py
@@ -24,10 +24,8 @@ import subprocess
# Find the root of the git tree
#
-git_root = (subprocess
- .check_output(['git', 'rev-parse', '--show-toplevel'])
- .decode('utf-8')
- .strip())
+git_root = (subprocess.check_output(['git', 'rev-parse', '--show-toplevel'])
+ .decode('utf-8').strip())
#
# Parse command line arguments
@@ -36,19 +34,22 @@ git_root = (subprocess
default_out = os.path.join(git_root, '.github', 'CODEOWNERS')
argp = argparse.ArgumentParser('Generate .github/CODEOWNERS file')
-argp.add_argument('--out', '-o',
- type=str,
- default=default_out,
- help='Output file (default %s)' % default_out)
+argp.add_argument(
+ '--out',
+ '-o',
+ type=str,
+ default=default_out,
+ help='Output file (default %s)' % default_out)
args = argp.parse_args()
#
# Walk git tree to locate all OWNERS files
#
-owners_files = [os.path.join(root, 'OWNERS')
- for root, dirs, files in os.walk(git_root)
- if 'OWNERS' in files]
+owners_files = [
+ os.path.join(root, 'OWNERS') for root, dirs, files in os.walk(git_root)
+ if 'OWNERS' in files
+]
#
# Parse owners files
@@ -57,39 +58,40 @@ owners_files = [os.path.join(root, 'OWNERS')
Owners = collections.namedtuple('Owners', 'parent directives dir')
Directive = collections.namedtuple('Directive', 'who globs')
+
def parse_owners(filename):
- with open(filename) as f:
- src = f.read().splitlines()
- parent = True
- directives = []
- for line in src:
- line = line.strip()
- # line := directive | comment
- if not line: continue
- if line[0] == '#': continue
- # it's a directive
- directive = None
- if line == 'set noparent':
- parent = False
- elif line == '*':
- directive = Directive(who='*', globs=[])
- elif ' ' in line:
- (who, globs) = line.split(' ', 1)
- globs_list = [glob
- for glob in globs.split(' ')
- if glob]
- directive = Directive(who=who, globs=globs_list)
- else:
- directive = Directive(who=line, globs=[])
- if directive:
- directives.append(directive)
- return Owners(parent=parent,
- directives=directives,
- dir=os.path.relpath(os.path.dirname(filename), git_root))
-
-owners_data = sorted([parse_owners(filename)
- for filename in owners_files],
- key=operator.attrgetter('dir'))
+ with open(filename) as f:
+ src = f.read().splitlines()
+ parent = True
+ directives = []
+ for line in src:
+ line = line.strip()
+ # line := directive | comment
+ if not line: continue
+ if line[0] == '#': continue
+ # it's a directive
+ directive = None
+ if line == 'set noparent':
+ parent = False
+ elif line == '*':
+ directive = Directive(who='*', globs=[])
+ elif ' ' in line:
+ (who, globs) = line.split(' ', 1)
+ globs_list = [glob for glob in globs.split(' ') if glob]
+ directive = Directive(who=who, globs=globs_list)
+ else:
+ directive = Directive(who=line, globs=[])
+ if directive:
+ directives.append(directive)
+ return Owners(
+ parent=parent,
+ directives=directives,
+ dir=os.path.relpath(os.path.dirname(filename), git_root))
+
+
+owners_data = sorted(
+ [parse_owners(filename) for filename in owners_files],
+ key=operator.attrgetter('dir'))
#
# Modify owners so that parented OWNERS files point to the actual
@@ -98,24 +100,24 @@ owners_data = sorted([parse_owners(filename)
new_owners_data = []
for owners in owners_data:
- if owners.parent == True:
- best_parent = None
- best_parent_score = None
- for possible_parent in owners_data:
- if possible_parent is owners: continue
- rel = os.path.relpath(owners.dir, possible_parent.dir)
- # '..' ==> we had to walk up from possible_parent to get to owners
- # ==> not a parent
- if '..' in rel: continue
- depth = len(rel.split(os.sep))
- if not best_parent or depth < best_parent_score:
- best_parent = possible_parent
- best_parent_score = depth
- if best_parent:
- owners = owners._replace(parent = best_parent.dir)
- else:
- owners = owners._replace(parent = None)
- new_owners_data.append(owners)
+ if owners.parent == True:
+ best_parent = None
+ best_parent_score = None
+ for possible_parent in owners_data:
+ if possible_parent is owners: continue
+ rel = os.path.relpath(owners.dir, possible_parent.dir)
+ # '..' ==> we had to walk up from possible_parent to get to owners
+ # ==> not a parent
+ if '..' in rel: continue
+ depth = len(rel.split(os.sep))
+ if not best_parent or depth < best_parent_score:
+ best_parent = possible_parent
+ best_parent_score = depth
+ if best_parent:
+ owners = owners._replace(parent=best_parent.dir)
+ else:
+ owners = owners._replace(parent=None)
+ new_owners_data.append(owners)
owners_data = new_owners_data
#
@@ -123,106 +125,114 @@ owners_data = new_owners_data
# a CODEOWNERS file for GitHub
#
+
def full_dir(rules_dir, sub_path):
- return os.path.join(rules_dir, sub_path) if rules_dir != '.' else sub_path
+ return os.path.join(rules_dir, sub_path) if rules_dir != '.' else sub_path
+
# glob using git
gg_cache = {}
+
+
def git_glob(glob):
- global gg_cache
- if glob in gg_cache: return gg_cache[glob]
- r = set(subprocess
- .check_output(['git', 'ls-files', os.path.join(git_root, glob)])
- .decode('utf-8')
- .strip()
- .splitlines())
- gg_cache[glob] = r
- return r
+ global gg_cache
+ if glob in gg_cache: return gg_cache[glob]
+ r = set(
+ subprocess.check_output(
+ ['git', 'ls-files', os.path.join(git_root, glob)]).decode('utf-8')
+ .strip().splitlines())
+ gg_cache[glob] = r
+ return r
+
def expand_directives(root, directives):
- globs = collections.OrderedDict()
- # build a table of glob --> owners
- for directive in directives:
- for glob in directive.globs or ['**']:
- if glob not in globs:
- globs[glob] = []
- if directive.who not in globs[glob]:
- globs[glob].append(directive.who)
- # expand owners for intersecting globs
- sorted_globs = sorted(globs.keys(),
- key=lambda g: len(git_glob(full_dir(root, g))),
- reverse=True)
- out_globs = collections.OrderedDict()
- for glob_add in sorted_globs:
- who_add = globs[glob_add]
- pre_items = [i for i in out_globs.items()]
- out_globs[glob_add] = who_add.copy()
- for glob_have, who_have in pre_items:
- files_add = git_glob(full_dir(root, glob_add))
- files_have = git_glob(full_dir(root, glob_have))
- intersect = files_have.intersection(files_add)
- if intersect:
- for f in sorted(files_add): # sorted to ensure merge stability
- if f not in intersect:
- out_globs[os.path.relpath(f, start=root)] = who_add
- for who in who_have:
- if who not in out_globs[glob_add]:
- out_globs[glob_add].append(who)
- return out_globs
+ globs = collections.OrderedDict()
+ # build a table of glob --> owners
+ for directive in directives:
+ for glob in directive.globs or ['**']:
+ if glob not in globs:
+ globs[glob] = []
+ if directive.who not in globs[glob]:
+ globs[glob].append(directive.who)
+ # expand owners for intersecting globs
+ sorted_globs = sorted(
+ globs.keys(),
+ key=lambda g: len(git_glob(full_dir(root, g))),
+ reverse=True)
+ out_globs = collections.OrderedDict()
+ for glob_add in sorted_globs:
+ who_add = globs[glob_add]
+ pre_items = [i for i in out_globs.items()]
+ out_globs[glob_add] = who_add.copy()
+ for glob_have, who_have in pre_items:
+ files_add = git_glob(full_dir(root, glob_add))
+ files_have = git_glob(full_dir(root, glob_have))
+ intersect = files_have.intersection(files_add)
+ if intersect:
+ for f in sorted(files_add): # sorted to ensure merge stability
+ if f not in intersect:
+ out_globs[os.path.relpath(f, start=root)] = who_add
+ for who in who_have:
+ if who not in out_globs[glob_add]:
+ out_globs[glob_add].append(who)
+ return out_globs
+
def add_parent_to_globs(parent, globs, globs_dir):
- if not parent: return
- for owners in owners_data:
- if owners.dir == parent:
- owners_globs = expand_directives(owners.dir, owners.directives)
- for oglob, oglob_who in owners_globs.items():
- for gglob, gglob_who in globs.items():
- files_parent = git_glob(full_dir(owners.dir, oglob))
- files_child = git_glob(full_dir(globs_dir, gglob))
- intersect = files_parent.intersection(files_child)
- gglob_who_orig = gglob_who.copy()
- if intersect:
- for f in sorted(files_child): # sorted to ensure merge stability
- if f not in intersect:
- who = gglob_who_orig.copy()
- globs[os.path.relpath(f, start=globs_dir)] = who
- for who in oglob_who:
- if who not in gglob_who:
- gglob_who.append(who)
- add_parent_to_globs(owners.parent, globs, globs_dir)
- return
- assert(False)
+ if not parent: return
+ for owners in owners_data:
+ if owners.dir == parent:
+ owners_globs = expand_directives(owners.dir, owners.directives)
+ for oglob, oglob_who in owners_globs.items():
+ for gglob, gglob_who in globs.items():
+ files_parent = git_glob(full_dir(owners.dir, oglob))
+ files_child = git_glob(full_dir(globs_dir, gglob))
+ intersect = files_parent.intersection(files_child)
+ gglob_who_orig = gglob_who.copy()
+ if intersect:
+ for f in sorted(files_child
+ ): # sorted to ensure merge stability
+ if f not in intersect:
+ who = gglob_who_orig.copy()
+ globs[os.path.relpath(f, start=globs_dir)] = who
+ for who in oglob_who:
+ if who not in gglob_who:
+ gglob_who.append(who)
+ add_parent_to_globs(owners.parent, globs, globs_dir)
+ return
+ assert (False)
+
todo = owners_data.copy()
done = set()
with open(args.out, 'w') as out:
- out.write('# Auto-generated by the tools/mkowners/mkowners.py tool\n')
- out.write('# Uses OWNERS files in different modules throughout the\n')
- out.write('# repository as the source of truth for module ownership.\n')
- written_globs = []
- while todo:
- head, *todo = todo
- if head.parent and not head.parent in done:
- todo.append(head)
- continue
- globs = expand_directives(head.dir, head.directives)
- add_parent_to_globs(head.parent, globs, head.dir)
- for glob, owners in globs.items():
- skip = False
- for glob1, owners1, dir1 in reversed(written_globs):
- files = git_glob(full_dir(head.dir, glob))
- files1 = git_glob(full_dir(dir1, glob1))
- intersect = files.intersection(files1)
- if files == intersect:
- if sorted(owners) == sorted(owners1):
- skip = True # nothing new in this rule
- break
- elif intersect:
- # continuing would cause a semantic change since some files are
- # affected differently by this rule and CODEOWNERS is order dependent
- break
- if not skip:
- out.write('/%s %s\n' % (
- full_dir(head.dir, glob), ' '.join(owners)))
- written_globs.append((glob, owners, head.dir))
- done.add(head.dir)
+ out.write('# Auto-generated by the tools/mkowners/mkowners.py tool\n')
+ out.write('# Uses OWNERS files in different modules throughout the\n')
+ out.write('# repository as the source of truth for module ownership.\n')
+ written_globs = []
+ while todo:
+ head, *todo = todo
+ if head.parent and not head.parent in done:
+ todo.append(head)
+ continue
+ globs = expand_directives(head.dir, head.directives)
+ add_parent_to_globs(head.parent, globs, head.dir)
+ for glob, owners in globs.items():
+ skip = False
+ for glob1, owners1, dir1 in reversed(written_globs):
+ files = git_glob(full_dir(head.dir, glob))
+ files1 = git_glob(full_dir(dir1, glob1))
+ intersect = files.intersection(files1)
+ if files == intersect:
+ if sorted(owners) == sorted(owners1):
+ skip = True # nothing new in this rule
+ break
+ elif intersect:
+ # continuing would cause a semantic change since some files are
+ # affected differently by this rule and CODEOWNERS is order dependent
+ break
+ if not skip:
+ out.write('/%s %s\n' % (full_dir(head.dir, glob),
+ ' '.join(owners)))
+ written_globs.append((glob, owners, head.dir))
+ done.add(head.dir)