aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Jan Tattermusch <jtattermusch@users.noreply.github.com>2016-05-18 18:41:35 -0700
committerGravatar Jan Tattermusch <jtattermusch@users.noreply.github.com>2016-05-18 18:41:35 -0700
commitb9f11c3fa232aed6648eb21a533fa5492da75227 (patch)
tree811ec22602a48fe8e96cc37196a0b92f1dcee3d9
parent8c1d79690d01b279cb48de1b66566a4858ed665f (diff)
parent4dd02fc63c4e23c797dbbd277db8359e6d1c3491 (diff)
Merge pull request #6625 from sreecha/stress_test_misc
Misc changes to stress test framework (to reduce false positives and tools for debugging)
-rw-r--r--test/cpp/interop/metrics_client.cc16
-rwxr-xr-xtools/gcp/stress_test/run_client.py14
-rwxr-xr-xtools/gcp/stress_test/run_server.py10
-rwxr-xr-xtools/gcp/stress_test/stress_test_utils.py7
-rw-r--r--tools/run_tests/stress_test/configs/csharp.json7
-rw-r--r--tools/run_tests/stress_test/configs/java.json7
-rwxr-xr-xtools/run_tests/stress_test/print_summary.py59
7 files changed, 100 insertions, 20 deletions
diff --git a/test/cpp/interop/metrics_client.cc b/test/cpp/interop/metrics_client.cc
index cc304f2e89..c8c2215fab 100644
--- a/test/cpp/interop/metrics_client.cc
+++ b/test/cpp/interop/metrics_client.cc
@@ -42,13 +42,15 @@
#include "test/cpp/util/metrics_server.h"
#include "test/cpp/util/test_config.h"
-DEFINE_string(metrics_server_address, "",
+int kDeadlineSecs = 10;
+
+DEFINE_string(metrics_server_address, "localhost:8081",
"The metrics server addresses in the fomrat <hostname>:<port>");
+DEFINE_int32(deadline_secs, kDeadlineSecs,
+ "The deadline (in seconds) for RCP call");
DEFINE_bool(total_only, false,
"If true, this prints only the total value of all gauges");
-int kDeadlineSecs = 10;
-
using grpc::testing::EmptyMessage;
using grpc::testing::GaugeResponse;
using grpc::testing::MetricsService;
@@ -56,12 +58,13 @@ using grpc::testing::MetricsServiceImpl;
// Prints the values of all Gauges (unless total_only is set to 'true' in which
// case this only prints the sum of all gauge values).
-bool PrintMetrics(std::unique_ptr<MetricsService::Stub> stub, bool total_only) {
+bool PrintMetrics(std::unique_ptr<MetricsService::Stub> stub, bool total_only,
+ int deadline_secs) {
grpc::ClientContext context;
EmptyMessage message;
std::chrono::system_clock::time_point deadline =
- std::chrono::system_clock::now() + std::chrono::seconds(kDeadlineSecs);
+ std::chrono::system_clock::now() + std::chrono::seconds(deadline_secs);
context.set_deadline(deadline);
@@ -108,7 +111,8 @@ int main(int argc, char** argv) {
std::shared_ptr<grpc::Channel> channel(grpc::CreateChannel(
FLAGS_metrics_server_address, grpc::InsecureChannelCredentials()));
- if (!PrintMetrics(MetricsService::NewStub(channel), FLAGS_total_only)) {
+ if (!PrintMetrics(MetricsService::NewStub(channel), FLAGS_total_only,
+ FLAGS_deadline_secs)) {
return 1;
}
diff --git a/tools/gcp/stress_test/run_client.py b/tools/gcp/stress_test/run_client.py
index 2004bf6db4..51ada6820d 100755
--- a/tools/gcp/stress_test/run_client.py
+++ b/tools/gcp/stress_test/run_client.py
@@ -133,12 +133,15 @@ def run_client():
details = 'Logfile: %s' % logfile_name
logfile = open(logfile_name, 'w')
+ metrics_cmd = metrics_client_cmd + [x
+ for x in metrics_client_args_str.split()]
+ stress_cmd = stress_client_cmd + [x for x in args_str.split()]
+
+ details = '%s, Metrics command: %s, Stress client command: %s' % (
+ details, str(metrics_cmd), str(stress_cmd))
# Update status that the test is starting (in the status table)
bq_helper.insert_summary_row(EventType.STARTING, details)
- metrics_cmd = metrics_client_cmd + [x for x in metrics_client_args_str.split()]
- stress_cmd = stress_client_cmd + [x for x in args_str.split()]
-
print 'Launching process %s ...' % stress_cmd
stress_p = subprocess.Popen(args=stress_cmd,
stdout=logfile,
@@ -147,6 +150,7 @@ def run_client():
qps_history = [1, 1, 1] # Maintain the last 3 qps readings
qps_history_idx = 0 # Index into the qps_history list
+ is_running_status_written = False
is_error = False
while True:
# Check if stress_client is still running. If so, collect metrics and upload
@@ -165,6 +169,10 @@ def run_client():
print details
break
+ if not is_running_status_written:
+ bq_helper.insert_summary_row(EventType.RUNNING, '')
+ is_running_status_written = True
+
# Stress client still running. Get metrics
qps = _get_qps(metrics_cmd)
qps_recorded_at = datetime.datetime.now().isoformat()
diff --git a/tools/gcp/stress_test/run_server.py b/tools/gcp/stress_test/run_server.py
index a666ae2900..8f47e42ef3 100755
--- a/tools/gcp/stress_test/run_server.py
+++ b/tools/gcp/stress_test/run_server.py
@@ -106,16 +106,22 @@ def run_server():
logfile = open(logfile_name, 'w')
details = 'Logfile: %s' % logfile_name
+ stress_cmd = stress_server_cmd + [x for x in args_str.split()]
+
+ details = '%s, Stress server command: %s' % (details, str(stress_cmd))
# Update status that the test is starting (in the status table)
bq_helper.insert_summary_row(EventType.STARTING, details)
- stress_cmd = stress_server_cmd + [x for x in args_str.split()]
-
print 'Launching process %s ...' % stress_cmd
stress_p = subprocess.Popen(args=stress_cmd,
stdout=logfile,
stderr=subprocess.STDOUT)
+ # Update the status to running if subprocess.Popen launched the server
+ if stress_p.poll() is None:
+ bq_helper.insert_summary_row(EventType.RUNNING, '')
+
+ # Wait for the server process to terminate
returncode = stress_p.wait()
if will_run_forever == '1' or returncode != 0:
diff --git a/tools/gcp/stress_test/stress_test_utils.py b/tools/gcp/stress_test/stress_test_utils.py
index 19d59c0df1..b821fc8fcc 100755
--- a/tools/gcp/stress_test/stress_test_utils.py
+++ b/tools/gcp/stress_test/stress_test_utils.py
@@ -46,6 +46,7 @@ import big_query_utils as bq_utils
class EventType:
STARTING = 'STARTING'
+ RUNNING = 'RUNNING'
SUCCESS = 'SUCCESS'
FAILURE = 'FAILURE'
@@ -195,11 +196,11 @@ class BigQueryHelper:
('image_type', 'STRING', 'Client or Server?'),
('pod_name', 'STRING', 'GKE pod hosting this image'),
('event_date', 'STRING', 'The date of this event'),
- ('event_type', 'STRING', 'STARTED/SUCCESS/FAILURE'),
+ ('event_type', 'STRING', 'STARTING/RUNNING/SUCCESS/FAILURE'),
('details', 'STRING', 'Any other relevant details')
]
- desc = ('The table that contains START/SUCCESS/FAILURE events for '
- ' the stress test clients and servers')
+ desc = ('The table that contains STARTING/RUNNING/SUCCESS/FAILURE events '
+ 'for the stress test clients and servers')
return bq_utils.create_table(self.bq, self.project_id, self.dataset_id,
self.summary_table_id, summary_table_schema,
desc)
diff --git a/tools/run_tests/stress_test/configs/csharp.json b/tools/run_tests/stress_test/configs/csharp.json
index 587e1552b9..c438e08964 100644
--- a/tools/run_tests/stress_test/configs/csharp.json
+++ b/tools/run_tests/stress_test/configs/csharp.json
@@ -10,7 +10,7 @@
"baseTemplates": {
"default": {
"wrapperScriptPath": "/var/local/git/grpc/tools/gcp/stress_test/run_client.py",
- "pollIntervalSecs": 60,
+ "pollIntervalSecs": 100,
"clientArgs": {
"num_channels_per_server":5,
"num_stubs_per_channel":10,
@@ -20,7 +20,8 @@
"metricsPort": 8081,
"metricsArgs": {
"metrics_server_address": "localhost:8081",
- "total_only": "true"
+ "total_only": "true",
+ "deadline_secs": 60
}
}
},
@@ -78,7 +79,7 @@
"globalSettings": {
"buildDockerImages": true,
- "pollIntervalSecs": 60,
+ "pollIntervalSecs": 100,
"testDurationSecs": 7200,
"kubernetesProxyPort": 8009,
"datasetIdNamePrefix": "stress_test_csharp",
diff --git a/tools/run_tests/stress_test/configs/java.json b/tools/run_tests/stress_test/configs/java.json
index b7c6d8b286..92af63c6b5 100644
--- a/tools/run_tests/stress_test/configs/java.json
+++ b/tools/run_tests/stress_test/configs/java.json
@@ -10,7 +10,7 @@
"baseTemplates": {
"default": {
"wrapperScriptPath": "/var/local/git/grpc/tools/gcp/stress_test/run_client.py",
- "pollIntervalSecs": 60,
+ "pollIntervalSecs": 100,
"clientArgs": {
"num_channels_per_server":5,
"num_stubs_per_channel":10,
@@ -20,7 +20,8 @@
"metricsPort": 8081,
"metricsArgs": {
"metrics_server_address": "localhost:8081",
- "total_only": "true"
+ "total_only": "true",
+ "deadline_secs": 60
},
"env": {
"STRESSTEST_CLIENT_OPTS":"-Xmx3g -Xms3g -XX:NewSize=1500m -XX:MaxNewSize=1500m -XX:+UseConcMarkSweepGC"
@@ -85,7 +86,7 @@
"globalSettings": {
"buildDockerImages": true,
- "pollIntervalSecs": 60,
+ "pollIntervalSecs": 100,
"testDurationSecs": 7200,
"kubernetesProxyPort": 8008,
"datasetIdNamePrefix": "stress_test_java",
diff --git a/tools/run_tests/stress_test/print_summary.py b/tools/run_tests/stress_test/print_summary.py
new file mode 100755
index 0000000000..cb1a33961e
--- /dev/null
+++ b/tools/run_tests/stress_test/print_summary.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python2.7
+# Copyright 2016, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import argparse
+import os
+import sys
+
+stress_test_utils_dir = os.path.abspath(os.path.join(
+ os.path.dirname(__file__), '../../gcp/stress_test'))
+sys.path.append(stress_test_utils_dir)
+from stress_test_utils import BigQueryHelper
+
+argp = argparse.ArgumentParser(
+ description='Print summary tables',
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+argp.add_argument('--gcp_project_id',
+ required=True,
+ help='The Google Cloud Platform Project Id')
+argp.add_argument('--dataset_id', type=str, required=True)
+argp.add_argument('--run_id', type=str, required=True)
+argp.add_argument('--summary_table_id', type=str, default='summary')
+argp.add_argument('--qps_table_id', type=str, default='qps')
+argp.add_argument('--summary_only', action='store_true', default=True)
+
+if __name__ == '__main__':
+ args = argp.parse_args()
+ bq_helper = BigQueryHelper(args.run_id, '', '', args.gcp_project_id,
+ args.dataset_id, args.summary_table_id,
+ args.qps_table_id)
+ bq_helper.initialize()
+ if not args.summary_only:
+ bq_helper.print_qps_records()
+ bq_helper.print_summary_records()