diff options
Diffstat (limited to 'tools/gcp/utils')
-rwxr-xr-x | tools/gcp/utils/big_query_utils.py | 140 | ||||
-rwxr-xr-x | tools/gcp/utils/kubernetes_api.py | 269 |
2 files changed, 409 insertions, 0 deletions
diff --git a/tools/gcp/utils/big_query_utils.py b/tools/gcp/utils/big_query_utils.py new file mode 100755 index 0000000000..c331a67942 --- /dev/null +++ b/tools/gcp/utils/big_query_utils.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python2.7 +# Copyright 2015, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import argparse +import json +import uuid +import httplib2 + +from apiclient import discovery +from apiclient.errors import HttpError +from oauth2client.client import GoogleCredentials + +NUM_RETRIES = 3 + + +def create_big_query(): + """Authenticates with cloud platform and gets a BiqQuery service object + """ + creds = GoogleCredentials.get_application_default() + return discovery.build('bigquery', 'v2', credentials=creds) + + +def create_dataset(biq_query, project_id, dataset_id): + is_success = True + body = { + 'datasetReference': { + 'projectId': project_id, + 'datasetId': dataset_id + } + } + + try: + dataset_req = biq_query.datasets().insert(projectId=project_id, body=body) + dataset_req.execute(num_retries=NUM_RETRIES) + except HttpError as http_error: + if http_error.resp.status == 409: + print 'Warning: The dataset %s already exists' % dataset_id + else: + # Note: For more debugging info, print "http_error.content" + print 'Error in creating dataset: %s. Err: %s' % (dataset_id, http_error) + is_success = False + return is_success + + +def create_table(big_query, project_id, dataset_id, table_id, table_schema, + description): + is_success = True + + body = { + 'description': description, + 'schema': { + 'fields': [{ + 'name': field_name, + 'type': field_type, + 'description': field_description + } for (field_name, field_type, field_description) in table_schema] + }, + 'tableReference': { + 'datasetId': dataset_id, + 'projectId': project_id, + 'tableId': table_id + } + } + + try: + table_req = big_query.tables().insert(projectId=project_id, + datasetId=dataset_id, + body=body) + res = table_req.execute(num_retries=NUM_RETRIES) + print 'Successfully created %s "%s"' % (res['kind'], res['id']) + except HttpError as http_error: + if http_error.resp.status == 409: + print 'Warning: Table %s already exists' % table_id + else: + print 'Error in creating table: %s. Err: %s' % (table_id, http_error) + is_success = False + return is_success + + +def insert_rows(big_query, project_id, dataset_id, table_id, rows_list): + is_success = True + body = {'rows': rows_list} + try: + insert_req = big_query.tabledata().insertAll(projectId=project_id, + datasetId=dataset_id, + tableId=table_id, + body=body) + print body + res = insert_req.execute(num_retries=NUM_RETRIES) + print res + except HttpError as http_error: + print 'Error in inserting rows in the table %s' % table_id + is_success = False + return is_success + + +def sync_query_job(big_query, project_id, query, timeout=5000): + query_data = {'query': query, 'timeoutMs': timeout} + query_job = None + try: + query_job = big_query.jobs().query( + projectId=project_id, + body=query_data).execute(num_retries=NUM_RETRIES) + except HttpError as http_error: + print 'Query execute job failed with error: %s' % http_error + print http_error.content + return query_job + + # List of (column name, column type, description) tuples +def make_row(unique_row_id, row_values_dict): + """row_values_dict is a dictionary of column name and column value. + """ + return {'insertId': unique_row_id, 'json': row_values_dict} diff --git a/tools/gcp/utils/kubernetes_api.py b/tools/gcp/utils/kubernetes_api.py new file mode 100755 index 0000000000..a8a4aad69b --- /dev/null +++ b/tools/gcp/utils/kubernetes_api.py @@ -0,0 +1,269 @@ +#!/usr/bin/env python2.7 +# Copyright 2015, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import requests +import json + +_REQUEST_TIMEOUT_SECS = 10 + + +def _make_pod_config(pod_name, image_name, container_port_list, cmd_list, + arg_list, env_dict): + """Creates a string containing the Pod defintion as required by the Kubernetes API""" + body = { + 'kind': 'Pod', + 'apiVersion': 'v1', + 'metadata': { + 'name': pod_name, + 'labels': {'name': pod_name} + }, + 'spec': { + 'containers': [ + { + 'name': pod_name, + 'image': image_name, + 'ports': [{'containerPort': port, + 'protocol': 'TCP'} + for port in container_port_list], + 'imagePullPolicy': 'Always' + } + ] + } + } + + env_list = [{'name': k, 'value': v} for (k, v) in env_dict.iteritems()] + if len(env_list) > 0: + body['spec']['containers'][0]['env'] = env_list + + # Add the 'Command' and 'Args' attributes if they are passed. + # Note: + # - 'Command' overrides the ENTRYPOINT in the Docker Image + # - 'Args' override the CMD in Docker image (yes, it is confusing!) + if len(cmd_list) > 0: + body['spec']['containers'][0]['command'] = cmd_list + if len(arg_list) > 0: + body['spec']['containers'][0]['args'] = arg_list + return json.dumps(body) + + +def _make_service_config(service_name, pod_name, service_port_list, + container_port_list, is_headless): + """Creates a string containing the Service definition as required by the Kubernetes API. + + NOTE: + This creates either a Headless Service or 'LoadBalancer' service depending on + the is_headless parameter. For Headless services, there is no 'type' attribute + and the 'clusterIP' attribute is set to 'None'. Also, if the service is + Headless, Kubernetes creates DNS entries for Pods - i.e creates DNS A-records + mapping the service's name to the Pods' IPs + """ + if len(container_port_list) != len(service_port_list): + print( + 'ERROR: container_port_list and service_port_list must be of same size') + return '' + body = { + 'kind': 'Service', + 'apiVersion': 'v1', + 'metadata': { + 'name': service_name, + 'labels': { + 'name': service_name + } + }, + 'spec': { + 'ports': [], + 'selector': { + 'name': pod_name + } + } + } + # Populate the 'ports' list in the 'spec' section. This maps service ports + # (port numbers that are exposed by Kubernetes) to container ports (i.e port + # numbers that are exposed by your Docker image) + for idx in range(len(container_port_list)): + port_entry = { + 'port': service_port_list[idx], + 'targetPort': container_port_list[idx], + 'protocol': 'TCP' + } + body['spec']['ports'].append(port_entry) + + # Make this either a LoadBalancer service or a headless service depending on + # the is_headless parameter + if is_headless: + body['spec']['clusterIP'] = 'None' + else: + body['spec']['type'] = 'LoadBalancer' + return json.dumps(body) + + +def _print_connection_error(msg): + print('ERROR: Connection failed. Did you remember to run Kubenetes proxy on ' + 'localhost (i.e kubectl proxy --port=<proxy_port>) ?. Error: %s' % msg) + + +def _do_post(post_url, api_name, request_body): + """Helper to do HTTP POST. + + Note: + 1) On success, Kubernetes returns a success code of 201(CREATED) not 200(OK) + 2) A response code of 509(CONFLICT) is interpreted as a success code (since + the error is most likely due to the resource already existing). This makes + _do_post() idempotent which is semantically desirable. + """ + is_success = True + try: + r = requests.post(post_url, + data=request_body, + timeout=_REQUEST_TIMEOUT_SECS) + if r.status_code == requests.codes.conflict: + print('WARN: Looks like the resource already exists. Api: %s, url: %s' % + (api_name, post_url)) + elif r.status_code != requests.codes.created: + print('ERROR: %s API returned error. HTTP response: (%d) %s' % + (api_name, r.status_code, r.text)) + is_success = False + except (requests.exceptions.Timeout, + requests.exceptions.ConnectionError) as e: + is_success = False + _print_connection_error(str(e)) + return is_success + + +def _do_delete(del_url, api_name): + """Helper to do HTTP DELETE. + + Note: A response code of 404(NOT_FOUND) is treated as success to keep + _do_delete() idempotent. + """ + is_success = True + try: + r = requests.delete(del_url, timeout=_REQUEST_TIMEOUT_SECS) + if r.status_code == requests.codes.not_found: + print('WARN: The resource does not exist. Api: %s, url: %s' % + (api_name, del_url)) + elif r.status_code != requests.codes.ok: + print('ERROR: %s API returned error. HTTP response: %s' % + (api_name, r.text)) + is_success = False + except (requests.exceptions.Timeout, + requests.exceptions.ConnectionError) as e: + is_success = False + _print_connection_error(str(e)) + return is_success + + +def create_service(kube_host, kube_port, namespace, service_name, pod_name, + service_port_list, container_port_list, is_headless): + """Creates either a Headless Service or a LoadBalancer Service depending + on the is_headless parameter. + """ + post_url = 'http://%s:%d/api/v1/namespaces/%s/services' % ( + kube_host, kube_port, namespace) + request_body = _make_service_config(service_name, pod_name, service_port_list, + container_port_list, is_headless) + return _do_post(post_url, 'Create Service', request_body) + + +def create_pod(kube_host, kube_port, namespace, pod_name, image_name, + container_port_list, cmd_list, arg_list, env_dict): + """Creates a Kubernetes Pod. + + Note that it is generally NOT considered a good practice to directly create + Pods. Typically, the recommendation is to create 'Controllers' to create and + manage Pods' lifecycle. Currently Kubernetes only supports 'Replication + Controller' which creates a configurable number of 'identical Replicas' of + Pods and automatically restarts any Pods in case of failures (for eg: Machine + failures in Kubernetes). This makes it less flexible for our test use cases + where we might want slightly different set of args to each Pod. Hence we + directly create Pods and not care much about Kubernetes failures since those + are very rare. + """ + post_url = 'http://%s:%d/api/v1/namespaces/%s/pods' % (kube_host, kube_port, + namespace) + request_body = _make_pod_config(pod_name, image_name, container_port_list, + cmd_list, arg_list, env_dict) + return _do_post(post_url, 'Create Pod', request_body) + + +def delete_service(kube_host, kube_port, namespace, service_name): + del_url = 'http://%s:%d/api/v1/namespaces/%s/services/%s' % ( + kube_host, kube_port, namespace, service_name) + return _do_delete(del_url, 'Delete Service') + + +def delete_pod(kube_host, kube_port, namespace, pod_name): + del_url = 'http://%s:%d/api/v1/namespaces/%s/pods/%s' % (kube_host, kube_port, + namespace, pod_name) + return _do_delete(del_url, 'Delete Pod') + + +def create_pod_and_service(kube_host, kube_port, namespace, pod_name, + image_name, container_port_list, cmd_list, arg_list, + env_dict, is_headless_service): + """A helper function that creates a pod and a service (if pod creation was successful).""" + is_success = create_pod(kube_host, kube_port, namespace, pod_name, image_name, + container_port_list, cmd_list, arg_list, env_dict) + if not is_success: + print 'Error in creating Pod' + return False + + is_success = create_service( + kube_host, + kube_port, + namespace, + pod_name, # Use pod_name for service + pod_name, + container_port_list, # Service port list same as container port list + container_port_list, + is_headless_service) + if not is_success: + print 'Error in creating Service' + return False + + print 'Successfully created the pod/service %s' % pod_name + return True + + +def delete_pod_and_service(kube_host, kube_port, namespace, pod_name): + """ A helper function that calls delete_pod and delete_service """ + is_success = delete_pod(kube_host, kube_port, namespace, pod_name) + if not is_success: + print 'Error in deleting pod %s' % pod_name + return False + + # Note: service name assumed to the the same as pod name + is_success = delete_service(kube_host, kube_port, namespace, pod_name) + if not is_success: + print 'Error in deleting service %s' % pod_name + return False + + print 'Successfully deleted the Pod/Service: %s' % pod_name + return True |