diff options
Diffstat (limited to 'tools/gcp')
-rwxr-xr-x | tools/gcp/stress_test/run_client.py | 25 | ||||
-rwxr-xr-x | tools/gcp/stress_test/run_node.sh | 37 | ||||
-rwxr-xr-x | tools/gcp/stress_test/run_ruby.sh | 37 | ||||
-rwxr-xr-x | tools/gcp/stress_test/run_server.py | 26 | ||||
-rwxr-xr-x | tools/gcp/stress_test/stress_test_utils.py | 43 | ||||
-rwxr-xr-x | tools/gcp/utils/big_query_utils.py | 24 |
6 files changed, 158 insertions, 34 deletions
diff --git a/tools/gcp/stress_test/run_client.py b/tools/gcp/stress_test/run_client.py index e6b28d5624..2004bf6db4 100755 --- a/tools/gcp/stress_test/run_client.py +++ b/tools/gcp/stress_test/run_client.py @@ -1,5 +1,5 @@ #!/usr/bin/env python2.7 -# Copyright 2015, Google Inc. +# Copyright 2015-2016, Google Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -31,6 +31,7 @@ import datetime import os import re +import resource import select import subprocess import sys @@ -89,11 +90,16 @@ def run_client(): examining logs). This is the reason why the script waits forever in case of failures """ + # Set the 'core file' size to 'unlimited' so that 'core' files are generated + # if the client crashes (Note: This is not relevant for Java and Go clients) + resource.setrlimit(resource.RLIMIT_CORE, + (resource.RLIM_INFINITY, resource.RLIM_INFINITY)) + env = dict(os.environ) image_type = env['STRESS_TEST_IMAGE_TYPE'] - image_name = env['STRESS_TEST_IMAGE'] + stress_client_cmd = env['STRESS_TEST_CMD'].split() args_str = env['STRESS_TEST_ARGS_STR'] - metrics_client_image = env['METRICS_CLIENT_IMAGE'] + metrics_client_cmd = env['METRICS_CLIENT_CMD'].split() metrics_client_args_str = env['METRICS_CLIENT_ARGS_STR'] run_id = env['RUN_ID'] pod_name = env['POD_NAME'] @@ -103,6 +109,11 @@ def run_client(): dataset_id = env['DATASET_ID'] summary_table_id = env['SUMMARY_TABLE_ID'] qps_table_id = env['QPS_TABLE_ID'] + # The following parameter is to inform us whether the stress client runs + # forever until forcefully stopped or will it naturally stop after sometime. + # This way, we know that the stress client process should not terminate (even + # if it does with a success exit code) and flag the termination as a failure + will_run_forever = env.get('WILL_RUN_FOREVER', '1') bq_helper = BigQueryHelper(run_id, image_type, pod_name, project_id, dataset_id, summary_table_id, qps_table_id) @@ -125,9 +136,8 @@ def run_client(): # Update status that the test is starting (in the status table) bq_helper.insert_summary_row(EventType.STARTING, details) - metrics_cmd = [metrics_client_image - ] + [x for x in metrics_client_args_str.split()] - stress_cmd = [image_name] + [x for x in args_str.split()] + metrics_cmd = metrics_client_cmd + [x for x in metrics_client_args_str.split()] + stress_cmd = stress_client_cmd + [x for x in args_str.split()] print 'Launching process %s ...' % stress_cmd stress_p = subprocess.Popen(args=stress_cmd, @@ -141,11 +151,12 @@ def run_client(): while True: # Check if stress_client is still running. If so, collect metrics and upload # to BigQuery status table + # If stress_p.poll() is not None, it means that the stress client terminated if stress_p.poll() is not None: end_time = datetime.datetime.now().isoformat() event_type = EventType.SUCCESS details = 'End time: %s' % end_time - if stress_p.returncode != 0: + if will_run_forever == '1' or stress_p.returncode != 0: event_type = EventType.FAILURE details = 'Return code = %d. End time: %s' % (stress_p.returncode, end_time) diff --git a/tools/gcp/stress_test/run_node.sh b/tools/gcp/stress_test/run_node.sh new file mode 100755 index 0000000000..4a4da6fc8b --- /dev/null +++ b/tools/gcp/stress_test/run_node.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# Copyright 2015-2016, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This is a wrapper script that was created to help run_server.py and +# run_client.py to launch 'node js' stress clients and stress servers +source ~/.nvm/nvm.sh + +set -ex + +$@ diff --git a/tools/gcp/stress_test/run_ruby.sh b/tools/gcp/stress_test/run_ruby.sh new file mode 100755 index 0000000000..80d0567447 --- /dev/null +++ b/tools/gcp/stress_test/run_ruby.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# Copyright 2015-2016, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This is a wrapper script that was created to help run_server.py and +# run_client.py to launch 'node js' stress clients and stress servers +source /etc/profile.d/rvm.sh + +set -ex + +$@ diff --git a/tools/gcp/stress_test/run_server.py b/tools/gcp/stress_test/run_server.py index dc4741b95e..a666ae2900 100755 --- a/tools/gcp/stress_test/run_server.py +++ b/tools/gcp/stress_test/run_server.py @@ -1,5 +1,5 @@ #!/usr/bin/env python2.7 -# Copyright 2015, Google Inc. +# Copyright 2015-2016, Google Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -30,6 +30,7 @@ import datetime import os +import resource import select import subprocess import sys @@ -56,26 +57,36 @@ def run_server(): might want to connect to the pod for examining logs). This is the reason why the script waits forever in case of failures. """ + # Set the 'core file' size to 'unlimited' so that 'core' files are generated + # if the server crashes (Note: This is not relevant for Java and Go servers) + resource.setrlimit(resource.RLIMIT_CORE, + (resource.RLIM_INFINITY, resource.RLIM_INFINITY)) # Read the parameters from environment variables env = dict(os.environ) run_id = env['RUN_ID'] # The unique run id for this test image_type = env['STRESS_TEST_IMAGE_TYPE'] - image_name = env['STRESS_TEST_IMAGE'] + stress_server_cmd = env['STRESS_TEST_CMD'].split() args_str = env['STRESS_TEST_ARGS_STR'] pod_name = env['POD_NAME'] project_id = env['GCP_PROJECT_ID'] dataset_id = env['DATASET_ID'] summary_table_id = env['SUMMARY_TABLE_ID'] qps_table_id = env['QPS_TABLE_ID'] + # The following parameter is to inform us whether the server runs forever + # until forcefully stopped or will it naturally stop after sometime. + # This way, we know that the process should not terminate (even if it does + # with a success exit code) and flag any termination as a failure. + will_run_forever = env.get('WILL_RUN_FOREVER', '1') logfile_name = env.get('LOGFILE_NAME') print('pod_name: %s, project_id: %s, run_id: %s, dataset_id: %s, ' - 'summary_table_id: %s, qps_table_id: %s') % ( - pod_name, project_id, run_id, dataset_id, summary_table_id, - qps_table_id) + 'summary_table_id: %s, qps_table_id: %s') % (pod_name, project_id, + run_id, dataset_id, + summary_table_id, + qps_table_id) bq_helper = BigQueryHelper(run_id, image_type, pod_name, project_id, dataset_id, summary_table_id, qps_table_id) @@ -98,7 +109,7 @@ def run_server(): # Update status that the test is starting (in the status table) bq_helper.insert_summary_row(EventType.STARTING, details) - stress_cmd = [image_name] + [x for x in args_str.split()] + stress_cmd = stress_server_cmd + [x for x in args_str.split()] print 'Launching process %s ...' % stress_cmd stress_p = subprocess.Popen(args=stress_cmd, @@ -106,7 +117,8 @@ def run_server(): stderr=subprocess.STDOUT) returncode = stress_p.wait() - if returncode != 0: + + if will_run_forever == '1' or returncode != 0: end_time = datetime.datetime.now().isoformat() event_type = EventType.FAILURE details = 'Returncode: %d; End time: %s' % (returncode, end_time) diff --git a/tools/gcp/stress_test/stress_test_utils.py b/tools/gcp/stress_test/stress_test_utils.py index 79d9dea675..19d59c0df1 100755 --- a/tools/gcp/stress_test/stress_test_utils.py +++ b/tools/gcp/stress_test/stress_test_utils.py @@ -103,16 +103,32 @@ class BigQueryHelper: return bq_utils.insert_rows(self.bq, self.project_id, self.dataset_id, self.qps_table_id, [row]) - def check_if_any_tests_failed(self, num_query_retries=3): + def check_if_any_tests_failed(self, num_query_retries=3, timeout_msec=30000): query = ('SELECT event_type FROM %s.%s WHERE run_id = \'%s\' AND ' 'event_type="%s"') % (self.dataset_id, self.summary_table_id, self.run_id, EventType.FAILURE) - query_job = bq_utils.sync_query_job(self.bq, self.project_id, query) - page = self.bq.jobs().getQueryResults(**query_job['jobReference']).execute( - num_retries=num_query_retries) - num_failures = int(page['totalRows']) - print 'num rows: ', num_failures - return num_failures > 0 + page = None + try: + query_job = bq_utils.sync_query_job(self.bq, self.project_id, query) + job_id = query_job['jobReference']['jobId'] + project_id = query_job['jobReference']['projectId'] + page = self.bq.jobs().getQueryResults( + projectId=project_id, + jobId=job_id, + timeoutMs=timeout_msec).execute(num_retries=num_query_retries) + + if not page['jobComplete']: + print('TIMEOUT ERROR: The query %s timed out. Current timeout value is' + ' %d msec. Returning False (i.e assuming there are no failures)' + ) % (query, timeoout_msec) + return False + + num_failures = int(page['totalRows']) + print 'num rows: ', num_failures + return num_failures > 0 + except: + print 'Exception in check_if_any_tests_failed(). Info: ', sys.exc_info() + print 'Query: ', query def print_summary_records(self, num_query_retries=3): line = '-' * 120 @@ -126,8 +142,9 @@ class BigQueryHelper: self.dataset_id, self.summary_table_id, self.run_id) query_job = bq_utils.sync_query_job(self.bq, self.project_id, query) - print '{:<25} {:<12} {:<12} {:<30} {}'.format( - 'Pod name', 'Image type', 'Event type', 'Date', 'Details') + print '{:<25} {:<12} {:<12} {:<30} {}'.format('Pod name', 'Image type', + 'Event type', 'Date', + 'Details') print line page_token = None while True: @@ -136,9 +153,11 @@ class BigQueryHelper: **query_job['jobReference']).execute(num_retries=num_query_retries) rows = page.get('rows', []) for row in rows: - print '{:<25} {:<12} {:<12} {:<30} {}'.format( - row['f'][0]['v'], row['f'][1]['v'], row['f'][2]['v'], - row['f'][3]['v'], row['f'][4]['v']) + print '{:<25} {:<12} {:<12} {:<30} {}'.format(row['f'][0]['v'], + row['f'][1]['v'], + row['f'][2]['v'], + row['f'][3]['v'], + row['f'][4]['v']) page_token = page.get('pageToken') if not page_token: break diff --git a/tools/gcp/utils/big_query_utils.py b/tools/gcp/utils/big_query_utils.py index c331a67942..9dbc69c5d6 100755 --- a/tools/gcp/utils/big_query_utils.py +++ b/tools/gcp/utils/big_query_utils.py @@ -71,16 +71,22 @@ def create_dataset(biq_query, project_id, dataset_id): def create_table(big_query, project_id, dataset_id, table_id, table_schema, description): + fields = [{'name': field_name, + 'type': field_type, + 'description': field_description + } for (field_name, field_type, field_description) in table_schema] + return create_table2(big_query, project_id, dataset_id, table_id, + fields, description) + + +def create_table2(big_query, project_id, dataset_id, table_id, fields_schema, + description): is_success = True body = { 'description': description, 'schema': { - 'fields': [{ - 'name': field_name, - 'type': field_type, - 'description': field_description - } for (field_name, field_type, field_description) in table_schema] + 'fields': fields_schema }, 'tableReference': { 'datasetId': dataset_id, @@ -112,12 +118,14 @@ def insert_rows(big_query, project_id, dataset_id, table_id, rows_list): datasetId=dataset_id, tableId=table_id, body=body) - print body res = insert_req.execute(num_retries=NUM_RETRIES) - print res + if res.get('insertErrors', None): + print 'Error inserting rows! Response: %s' % res + is_success = False except HttpError as http_error: - print 'Error in inserting rows in the table %s' % table_id + print 'Error inserting rows to the table %s' % table_id is_success = False + return is_success |