diff options
author | Jan Tattermusch <jtattermusch@users.noreply.github.com> | 2016-05-18 18:41:35 -0700 |
---|---|---|
committer | Jan Tattermusch <jtattermusch@users.noreply.github.com> | 2016-05-18 18:41:35 -0700 |
commit | b9f11c3fa232aed6648eb21a533fa5492da75227 (patch) | |
tree | 811ec22602a48fe8e96cc37196a0b92f1dcee3d9 | |
parent | 8c1d79690d01b279cb48de1b66566a4858ed665f (diff) | |
parent | 4dd02fc63c4e23c797dbbd277db8359e6d1c3491 (diff) |
Merge pull request #6625 from sreecha/stress_test_misc
Misc changes to stress test framework (to reduce false positives and tools for debugging)
-rw-r--r-- | test/cpp/interop/metrics_client.cc | 16 | ||||
-rwxr-xr-x | tools/gcp/stress_test/run_client.py | 14 | ||||
-rwxr-xr-x | tools/gcp/stress_test/run_server.py | 10 | ||||
-rwxr-xr-x | tools/gcp/stress_test/stress_test_utils.py | 7 | ||||
-rw-r--r-- | tools/run_tests/stress_test/configs/csharp.json | 7 | ||||
-rw-r--r-- | tools/run_tests/stress_test/configs/java.json | 7 | ||||
-rwxr-xr-x | tools/run_tests/stress_test/print_summary.py | 59 |
7 files changed, 100 insertions, 20 deletions
diff --git a/test/cpp/interop/metrics_client.cc b/test/cpp/interop/metrics_client.cc index cc304f2e89..c8c2215fab 100644 --- a/test/cpp/interop/metrics_client.cc +++ b/test/cpp/interop/metrics_client.cc @@ -42,13 +42,15 @@ #include "test/cpp/util/metrics_server.h" #include "test/cpp/util/test_config.h" -DEFINE_string(metrics_server_address, "", +int kDeadlineSecs = 10; + +DEFINE_string(metrics_server_address, "localhost:8081", "The metrics server addresses in the fomrat <hostname>:<port>"); +DEFINE_int32(deadline_secs, kDeadlineSecs, + "The deadline (in seconds) for RCP call"); DEFINE_bool(total_only, false, "If true, this prints only the total value of all gauges"); -int kDeadlineSecs = 10; - using grpc::testing::EmptyMessage; using grpc::testing::GaugeResponse; using grpc::testing::MetricsService; @@ -56,12 +58,13 @@ using grpc::testing::MetricsServiceImpl; // Prints the values of all Gauges (unless total_only is set to 'true' in which // case this only prints the sum of all gauge values). -bool PrintMetrics(std::unique_ptr<MetricsService::Stub> stub, bool total_only) { +bool PrintMetrics(std::unique_ptr<MetricsService::Stub> stub, bool total_only, + int deadline_secs) { grpc::ClientContext context; EmptyMessage message; std::chrono::system_clock::time_point deadline = - std::chrono::system_clock::now() + std::chrono::seconds(kDeadlineSecs); + std::chrono::system_clock::now() + std::chrono::seconds(deadline_secs); context.set_deadline(deadline); @@ -108,7 +111,8 @@ int main(int argc, char** argv) { std::shared_ptr<grpc::Channel> channel(grpc::CreateChannel( FLAGS_metrics_server_address, grpc::InsecureChannelCredentials())); - if (!PrintMetrics(MetricsService::NewStub(channel), FLAGS_total_only)) { + if (!PrintMetrics(MetricsService::NewStub(channel), FLAGS_total_only, + FLAGS_deadline_secs)) { return 1; } diff --git a/tools/gcp/stress_test/run_client.py b/tools/gcp/stress_test/run_client.py index 2004bf6db4..51ada6820d 100755 --- a/tools/gcp/stress_test/run_client.py +++ b/tools/gcp/stress_test/run_client.py @@ -133,12 +133,15 @@ def run_client(): details = 'Logfile: %s' % logfile_name logfile = open(logfile_name, 'w') + metrics_cmd = metrics_client_cmd + [x + for x in metrics_client_args_str.split()] + stress_cmd = stress_client_cmd + [x for x in args_str.split()] + + details = '%s, Metrics command: %s, Stress client command: %s' % ( + details, str(metrics_cmd), str(stress_cmd)) # Update status that the test is starting (in the status table) bq_helper.insert_summary_row(EventType.STARTING, details) - metrics_cmd = metrics_client_cmd + [x for x in metrics_client_args_str.split()] - stress_cmd = stress_client_cmd + [x for x in args_str.split()] - print 'Launching process %s ...' % stress_cmd stress_p = subprocess.Popen(args=stress_cmd, stdout=logfile, @@ -147,6 +150,7 @@ def run_client(): qps_history = [1, 1, 1] # Maintain the last 3 qps readings qps_history_idx = 0 # Index into the qps_history list + is_running_status_written = False is_error = False while True: # Check if stress_client is still running. If so, collect metrics and upload @@ -165,6 +169,10 @@ def run_client(): print details break + if not is_running_status_written: + bq_helper.insert_summary_row(EventType.RUNNING, '') + is_running_status_written = True + # Stress client still running. Get metrics qps = _get_qps(metrics_cmd) qps_recorded_at = datetime.datetime.now().isoformat() diff --git a/tools/gcp/stress_test/run_server.py b/tools/gcp/stress_test/run_server.py index a666ae2900..8f47e42ef3 100755 --- a/tools/gcp/stress_test/run_server.py +++ b/tools/gcp/stress_test/run_server.py @@ -106,16 +106,22 @@ def run_server(): logfile = open(logfile_name, 'w') details = 'Logfile: %s' % logfile_name + stress_cmd = stress_server_cmd + [x for x in args_str.split()] + + details = '%s, Stress server command: %s' % (details, str(stress_cmd)) # Update status that the test is starting (in the status table) bq_helper.insert_summary_row(EventType.STARTING, details) - stress_cmd = stress_server_cmd + [x for x in args_str.split()] - print 'Launching process %s ...' % stress_cmd stress_p = subprocess.Popen(args=stress_cmd, stdout=logfile, stderr=subprocess.STDOUT) + # Update the status to running if subprocess.Popen launched the server + if stress_p.poll() is None: + bq_helper.insert_summary_row(EventType.RUNNING, '') + + # Wait for the server process to terminate returncode = stress_p.wait() if will_run_forever == '1' or returncode != 0: diff --git a/tools/gcp/stress_test/stress_test_utils.py b/tools/gcp/stress_test/stress_test_utils.py index 19d59c0df1..b821fc8fcc 100755 --- a/tools/gcp/stress_test/stress_test_utils.py +++ b/tools/gcp/stress_test/stress_test_utils.py @@ -46,6 +46,7 @@ import big_query_utils as bq_utils class EventType: STARTING = 'STARTING' + RUNNING = 'RUNNING' SUCCESS = 'SUCCESS' FAILURE = 'FAILURE' @@ -195,11 +196,11 @@ class BigQueryHelper: ('image_type', 'STRING', 'Client or Server?'), ('pod_name', 'STRING', 'GKE pod hosting this image'), ('event_date', 'STRING', 'The date of this event'), - ('event_type', 'STRING', 'STARTED/SUCCESS/FAILURE'), + ('event_type', 'STRING', 'STARTING/RUNNING/SUCCESS/FAILURE'), ('details', 'STRING', 'Any other relevant details') ] - desc = ('The table that contains START/SUCCESS/FAILURE events for ' - ' the stress test clients and servers') + desc = ('The table that contains STARTING/RUNNING/SUCCESS/FAILURE events ' + 'for the stress test clients and servers') return bq_utils.create_table(self.bq, self.project_id, self.dataset_id, self.summary_table_id, summary_table_schema, desc) diff --git a/tools/run_tests/stress_test/configs/csharp.json b/tools/run_tests/stress_test/configs/csharp.json index 587e1552b9..c438e08964 100644 --- a/tools/run_tests/stress_test/configs/csharp.json +++ b/tools/run_tests/stress_test/configs/csharp.json @@ -10,7 +10,7 @@ "baseTemplates": { "default": { "wrapperScriptPath": "/var/local/git/grpc/tools/gcp/stress_test/run_client.py", - "pollIntervalSecs": 60, + "pollIntervalSecs": 100, "clientArgs": { "num_channels_per_server":5, "num_stubs_per_channel":10, @@ -20,7 +20,8 @@ "metricsPort": 8081, "metricsArgs": { "metrics_server_address": "localhost:8081", - "total_only": "true" + "total_only": "true", + "deadline_secs": 60 } } }, @@ -78,7 +79,7 @@ "globalSettings": { "buildDockerImages": true, - "pollIntervalSecs": 60, + "pollIntervalSecs": 100, "testDurationSecs": 7200, "kubernetesProxyPort": 8009, "datasetIdNamePrefix": "stress_test_csharp", diff --git a/tools/run_tests/stress_test/configs/java.json b/tools/run_tests/stress_test/configs/java.json index b7c6d8b286..92af63c6b5 100644 --- a/tools/run_tests/stress_test/configs/java.json +++ b/tools/run_tests/stress_test/configs/java.json @@ -10,7 +10,7 @@ "baseTemplates": { "default": { "wrapperScriptPath": "/var/local/git/grpc/tools/gcp/stress_test/run_client.py", - "pollIntervalSecs": 60, + "pollIntervalSecs": 100, "clientArgs": { "num_channels_per_server":5, "num_stubs_per_channel":10, @@ -20,7 +20,8 @@ "metricsPort": 8081, "metricsArgs": { "metrics_server_address": "localhost:8081", - "total_only": "true" + "total_only": "true", + "deadline_secs": 60 }, "env": { "STRESSTEST_CLIENT_OPTS":"-Xmx3g -Xms3g -XX:NewSize=1500m -XX:MaxNewSize=1500m -XX:+UseConcMarkSweepGC" @@ -85,7 +86,7 @@ "globalSettings": { "buildDockerImages": true, - "pollIntervalSecs": 60, + "pollIntervalSecs": 100, "testDurationSecs": 7200, "kubernetesProxyPort": 8008, "datasetIdNamePrefix": "stress_test_java", diff --git a/tools/run_tests/stress_test/print_summary.py b/tools/run_tests/stress_test/print_summary.py new file mode 100755 index 0000000000..cb1a33961e --- /dev/null +++ b/tools/run_tests/stress_test/print_summary.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python2.7 +# Copyright 2016, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import argparse +import os +import sys + +stress_test_utils_dir = os.path.abspath(os.path.join( + os.path.dirname(__file__), '../../gcp/stress_test')) +sys.path.append(stress_test_utils_dir) +from stress_test_utils import BigQueryHelper + +argp = argparse.ArgumentParser( + description='Print summary tables', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) +argp.add_argument('--gcp_project_id', + required=True, + help='The Google Cloud Platform Project Id') +argp.add_argument('--dataset_id', type=str, required=True) +argp.add_argument('--run_id', type=str, required=True) +argp.add_argument('--summary_table_id', type=str, default='summary') +argp.add_argument('--qps_table_id', type=str, default='qps') +argp.add_argument('--summary_only', action='store_true', default=True) + +if __name__ == '__main__': + args = argp.parse_args() + bq_helper = BigQueryHelper(args.run_id, '', '', args.gcp_project_id, + args.dataset_id, args.summary_table_id, + args.qps_table_id) + bq_helper.initialize() + if not args.summary_only: + bq_helper.print_qps_records() + bq_helper.print_summary_records() |