diff options
Diffstat (limited to 'tools/gcp/utils/big_query_utils.py')
-rwxr-xr-x | tools/gcp/utils/big_query_utils.py | 52 |
1 files changed, 50 insertions, 2 deletions
diff --git a/tools/gcp/utils/big_query_utils.py b/tools/gcp/utils/big_query_utils.py index e32b7a3ca5..77a5f5691e 100755 --- a/tools/gcp/utils/big_query_utils.py +++ b/tools/gcp/utils/big_query_utils.py @@ -22,6 +22,8 @@ from apiclient import discovery from apiclient.errors import HttpError from oauth2client.client import GoogleCredentials +# 30 days in milliseconds +_EXPIRATION_MS = 30 * 24 * 60 * 60 * 1000 NUM_RETRIES = 3 @@ -29,7 +31,7 @@ def create_big_query(): """Authenticates with cloud platform and gets a BiqQuery service object """ creds = GoogleCredentials.get_application_default() - return discovery.build('bigquery', 'v2', credentials=creds) + return discovery.build('bigquery', 'v2', credentials=creds, cache_discovery=False) def create_dataset(biq_query, project_id, dataset_id): @@ -64,8 +66,21 @@ def create_table(big_query, project_id, dataset_id, table_id, table_schema, fields, description) +def create_partitioned_table(big_query, project_id, dataset_id, table_id, table_schema, + description, partition_type='DAY', expiration_ms=_EXPIRATION_MS): + """Creates a partitioned table. By default, a date-paritioned table is created with + each partition lasting 30 days after it was last modified. + """ + fields = [{'name': field_name, + 'type': field_type, + 'description': field_description + } for (field_name, field_type, field_description) in table_schema] + return create_table2(big_query, project_id, dataset_id, table_id, + fields, description, partition_type, expiration_ms) + + def create_table2(big_query, project_id, dataset_id, table_id, fields_schema, - description): + description, partition_type=None, expiration_ms=None): is_success = True body = { @@ -80,6 +95,12 @@ def create_table2(big_query, project_id, dataset_id, table_id, fields_schema, } } + if partition_type and expiration_ms: + body["timePartitioning"] = { + "type": partition_type, + "expirationMs": expiration_ms + } + try: table_req = big_query.tables().insert(projectId=project_id, datasetId=dataset_id, @@ -95,6 +116,33 @@ def create_table2(big_query, project_id, dataset_id, table_id, fields_schema, return is_success +def patch_table(big_query, project_id, dataset_id, table_id, fields_schema): + is_success = True + + body = { + 'schema': { + 'fields': fields_schema + }, + 'tableReference': { + 'datasetId': dataset_id, + 'projectId': project_id, + 'tableId': table_id + } + } + + try: + table_req = big_query.tables().patch(projectId=project_id, + datasetId=dataset_id, + tableId=table_id, + body=body) + res = table_req.execute(num_retries=NUM_RETRIES) + print 'Successfully patched %s "%s"' % (res['kind'], res['id']) + except HttpError as http_error: + print 'Error in creating table: %s. Err: %s' % (table_id, http_error) + is_success = False + return is_success + + def insert_rows(big_query, project_id, dataset_id, table_id, rows_list): is_success = True body = {'rows': rows_list} |