aboutsummaryrefslogtreecommitdiffhomepage
path: root/tools/gcp/stress_test/run_client.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/gcp/stress_test/run_client.py')
-rwxr-xr-xtools/gcp/stress_test/run_client.py206
1 files changed, 0 insertions, 206 deletions
diff --git a/tools/gcp/stress_test/run_client.py b/tools/gcp/stress_test/run_client.py
deleted file mode 100755
index 51ada6820d..0000000000
--- a/tools/gcp/stress_test/run_client.py
+++ /dev/null
@@ -1,206 +0,0 @@
-#!/usr/bin/env python2.7
-# Copyright 2015-2016, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-# * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import datetime
-import os
-import re
-import resource
-import select
-import subprocess
-import sys
-import time
-
-from stress_test_utils import EventType
-from stress_test_utils import BigQueryHelper
-
-
-# TODO (sree): Write a python grpc client to directly query the metrics instead
-# of calling metrics_client
-def _get_qps(metrics_cmd):
- qps = 0
- try:
- # Note: gpr_log() writes even non-error messages to stderr stream. So it is
- # important that we set stderr=subprocess.STDOUT
- p = subprocess.Popen(args=metrics_cmd,
- stdout=subprocess.PIPE,
- stderr=subprocess.STDOUT)
- retcode = p.wait()
- (out_str, err_str) = p.communicate()
- if retcode != 0:
- print 'Error in reading metrics information'
- print 'Output: ', out_str
- else:
- # The overall qps is printed at the end of the line
- m = re.search('\d+$', out_str)
- qps = int(m.group()) if m else 0
- except Exception as ex:
- print 'Exception while reading metrics information: ' + str(ex)
- return qps
-
-
-def run_client():
- """This is a wrapper around the stress test client and performs the following:
- 1) Create the following two tables in Big Query:
- (i) Summary table: To record events like the test started, completed
- successfully or failed
- (ii) Qps table: To periodically record the QPS sent by this client
- 2) Start the stress test client and add a row in the Big Query summary
- table
- 3) Once every few seconds (as specificed by the poll_interval_secs) poll
- the status of the stress test client process and perform the
- following:
- 3.1) If the process is still running, get the current qps by invoking
- the metrics client program and add a row in the Big Query
- Qps table. Sleep for a duration specified by poll_interval_secs
- 3.2) If the process exited successfully, add a row in the Big Query
- Summary table and exit
- 3.3) If the process failed, add a row in Big Query summary table and
- wait forever.
- NOTE: This script typically runs inside a GKE pod which means
- that the pod gets destroyed when the script exits. However, in
- case the stress test client fails, we would not want the pod to
- be destroyed (since we might want to connect to the pod for
- examining logs). This is the reason why the script waits forever
- in case of failures
- """
- # Set the 'core file' size to 'unlimited' so that 'core' files are generated
- # if the client crashes (Note: This is not relevant for Java and Go clients)
- resource.setrlimit(resource.RLIMIT_CORE,
- (resource.RLIM_INFINITY, resource.RLIM_INFINITY))
-
- env = dict(os.environ)
- image_type = env['STRESS_TEST_IMAGE_TYPE']
- stress_client_cmd = env['STRESS_TEST_CMD'].split()
- args_str = env['STRESS_TEST_ARGS_STR']
- metrics_client_cmd = env['METRICS_CLIENT_CMD'].split()
- metrics_client_args_str = env['METRICS_CLIENT_ARGS_STR']
- run_id = env['RUN_ID']
- pod_name = env['POD_NAME']
- logfile_name = env.get('LOGFILE_NAME')
- poll_interval_secs = float(env['POLL_INTERVAL_SECS'])
- project_id = env['GCP_PROJECT_ID']
- dataset_id = env['DATASET_ID']
- summary_table_id = env['SUMMARY_TABLE_ID']
- qps_table_id = env['QPS_TABLE_ID']
- # The following parameter is to inform us whether the stress client runs
- # forever until forcefully stopped or will it naturally stop after sometime.
- # This way, we know that the stress client process should not terminate (even
- # if it does with a success exit code) and flag the termination as a failure
- will_run_forever = env.get('WILL_RUN_FOREVER', '1')
-
- bq_helper = BigQueryHelper(run_id, image_type, pod_name, project_id,
- dataset_id, summary_table_id, qps_table_id)
- bq_helper.initialize()
-
- # Create BigQuery Dataset and Tables: Summary Table and Metrics Table
- if not bq_helper.setup_tables():
- print 'Error in creating BigQuery tables'
- return
-
- start_time = datetime.datetime.now()
-
- logfile = None
- details = 'Logging to stdout'
- if logfile_name is not None:
- print 'Opening logfile: %s ...' % logfile_name
- details = 'Logfile: %s' % logfile_name
- logfile = open(logfile_name, 'w')
-
- metrics_cmd = metrics_client_cmd + [x
- for x in metrics_client_args_str.split()]
- stress_cmd = stress_client_cmd + [x for x in args_str.split()]
-
- details = '%s, Metrics command: %s, Stress client command: %s' % (
- details, str(metrics_cmd), str(stress_cmd))
- # Update status that the test is starting (in the status table)
- bq_helper.insert_summary_row(EventType.STARTING, details)
-
- print 'Launching process %s ...' % stress_cmd
- stress_p = subprocess.Popen(args=stress_cmd,
- stdout=logfile,
- stderr=subprocess.STDOUT)
-
- qps_history = [1, 1, 1] # Maintain the last 3 qps readings
- qps_history_idx = 0 # Index into the qps_history list
-
- is_running_status_written = False
- is_error = False
- while True:
- # Check if stress_client is still running. If so, collect metrics and upload
- # to BigQuery status table
- # If stress_p.poll() is not None, it means that the stress client terminated
- if stress_p.poll() is not None:
- end_time = datetime.datetime.now().isoformat()
- event_type = EventType.SUCCESS
- details = 'End time: %s' % end_time
- if will_run_forever == '1' or stress_p.returncode != 0:
- event_type = EventType.FAILURE
- details = 'Return code = %d. End time: %s' % (stress_p.returncode,
- end_time)
- is_error = True
- bq_helper.insert_summary_row(event_type, details)
- print details
- break
-
- if not is_running_status_written:
- bq_helper.insert_summary_row(EventType.RUNNING, '')
- is_running_status_written = True
-
- # Stress client still running. Get metrics
- qps = _get_qps(metrics_cmd)
- qps_recorded_at = datetime.datetime.now().isoformat()
- print 'qps: %d at %s' % (qps, qps_recorded_at)
-
- # If QPS has been zero for the last 3 iterations, flag it as error and exit
- qps_history[qps_history_idx] = qps
- qps_history_idx = (qps_history_idx + 1) % len(qps_history)
- if sum(qps_history) == 0:
- details = 'QPS has been zero for the last %d seconds - as of : %s' % (
- poll_interval_secs * 3, qps_recorded_at)
- is_error = True
- bq_helper.insert_summary_row(EventType.FAILURE, details)
- print details
- break
-
- # Upload qps metrics to BiqQuery
- bq_helper.insert_qps_row(qps, qps_recorded_at)
-
- time.sleep(poll_interval_secs)
-
- if is_error:
- print 'Waiting indefinitely..'
- select.select([], [], [])
-
- print 'Completed'
- return
-
-
-if __name__ == '__main__':
- run_client()