From 7d037a5ee2ebdba2fb8d0688399151878818c297 Mon Sep 17 00:00:00 2001
From: Sree Kuchibhotla <sreek@google.com>
Date: Fri, 12 Feb 2016 19:49:23 -0800
Subject: first version of bigquery python helpers

---
 tools/bigquery/big_query_utils.py                  | 181 +++++++++++++++++++++
 .../dockerfile/grpc_interop_stress_cxx/Dockerfile  |   5 +
 2 files changed, 186 insertions(+)
 create mode 100644 tools/bigquery/big_query_utils.py

(limited to 'tools')

diff --git a/tools/bigquery/big_query_utils.py b/tools/bigquery/big_query_utils.py
new file mode 100644
index 0000000000..ebcf9d6ec3
--- /dev/null
+++ b/tools/bigquery/big_query_utils.py
@@ -0,0 +1,181 @@
+import argparse
+import json
+import uuid
+import httplib2
+
+from apiclient import discovery
+from apiclient.errors import HttpError
+from oauth2client.client import GoogleCredentials
+
+NUM_RETRIES = 3
+
+
+def create_bq():
+  """Authenticates with cloud platform and gets a BiqQuery service object
+  """
+  creds = GoogleCredentials.get_application_default()
+  return discovery.build('bigquery', 'v2', credentials=creds)
+
+
+def create_ds(biq_query, project_id, dataset_id):
+  is_success = True
+  body = {
+      'datasetReference': {
+          'projectId': project_id,
+          'datasetId': dataset_id
+      }
+  }
+  try:
+    dataset_req = biq_query.datasets().insert(projectId=project_id, body=body)
+    dataset_req.execute(num_retries=NUM_RETRIES)
+  except HttpError as http_error:
+    if http_error.resp.status == 409:
+      print 'Warning: The dataset %s already exists' % dataset_id
+    else:
+      # Note: For more debugging info, print "http_error.content"
+      print 'Error in creating dataset: %s. Err: %s' % (dataset_id, http_error)
+      is_success = False
+  return is_success
+
+
+def make_field(field_name, field_type, field_description):
+  return {
+      'name': field_name,
+      'type': field_type,
+      'description': field_description
+  }
+
+
+def create_table(big_query, project_id, dataset_id, table_id, fields_list,
+                 description):
+  is_success = True
+  body = {
+      'description': description,
+      'schema': {
+          'fields': fields_list
+      },
+      'tableReference': {
+          'datasetId': dataset_id,
+          'projectId': project_id,
+          'tableId': table_id
+      }
+  }
+  try:
+    table_req = big_query.tables().insert(projectId=project_id,
+                                          datasetId=dataset_id,
+                                          body=body)
+    res = table_req.execute(num_retries=NUM_RETRIES)
+    print 'Successfully created %s "%s"' % (res['kind'], res['id'])
+  except HttpError as http_error:
+    if http_error.resp.status == 409:
+      print 'Warning: Table %s already exists' % table_id
+    else:
+      print 'Error in creating table: %s. Err: %s' % (table_id, http_error)
+      is_success = False
+  return is_success
+
+
+def insert_rows(big_query, project_id, dataset_id, table_id, rows_list):
+  is_success = True
+  body = {'rows': rows_list}
+  try:
+    insert_req = big_query.tabledata().insertAll(projectId=project_id,
+                                                 datasetId=dataset_id,
+                                                 tableId=table_id,
+                                                 body=body)
+    print body
+    res = insert_req.execute(num_retries=NUM_RETRIES)
+    print res
+  except HttpError as http_error:
+    print 'Error in inserting rows in the table %s' % table_id
+    is_success = False
+  return is_success
+
+#####################
+
+
+def make_emp_row(emp_id, emp_name, emp_email):
+  return {
+      'insertId': str(emp_id),
+      'json': {
+          'emp_id': emp_id,
+          'emp_name': emp_name,
+          'emp_email_id': emp_email
+      }
+  }
+
+
+def get_emp_table_fields_list():
+  return [
+      make_field('emp_id', 'INTEGER', 'Employee id'),
+      make_field('emp_name', 'STRING', 'Employee name'),
+      make_field('emp_email_id', 'STRING', 'Employee email id')
+  ]
+
+
+def insert_emp_rows(big_query, project_id, dataset_id, table_id, start_idx,
+                    num_rows):
+  rows_list = [make_emp_row(i, 'sree_%d' % i, 'sreecha_%d@gmail.com' % i)
+               for i in range(start_idx, start_idx + num_rows)]
+  insert_rows(big_query, project_id, dataset_id, table_id, rows_list)
+
+
+def create_emp_table(big_query, project_id, dataset_id, table_id):
+  fields_list = get_emp_table_fields_list()
+  description = 'Test table created by sree'
+  create_table(big_query, project_id, dataset_id, table_id, fields_list,
+               description)
+
+
+def sync_query(big_query, project_id, query, timeout=5000):
+  query_data = {'query': query, 'timeoutMs': timeout}
+  query_job = None
+  try:
+    query_job = big_query.jobs().query(
+        projectId=project_id,
+        body=query_data).execute(num_retries=NUM_RETRIES)
+  except HttpError as http_error:
+    print 'Query execute job failed with error: %s' % http_error
+    print http_error.content
+  return query_job
+
+#[Start query_emp_records]
+def query_emp_records(big_query, project_id, dataset_id, table_id):
+  query = 'SELECT emp_id, emp_name FROM %s.%s ORDER BY emp_id;' % (dataset_id, table_id)
+  print query
+  query_job = sync_query(big_query, project_id, query, 5000)
+  job_id = query_job['jobReference']
+
+  print query_job
+  print '**Starting paging **'
+  #[Start Paging]
+  page_token = None
+  while True:
+    page = big_query.jobs().getQueryResults(
+        pageToken=page_token,
+        **query_job['jobReference']).execute(num_retries=NUM_RETRIES)
+    rows = page['rows']
+    for row in rows:
+      print row['f'][0]['v'], "---", row['f'][1]['v']
+    page_token = page.get('pageToken')
+    if not page_token:
+      break
+  #[End Paging]
+#[End query_emp_records]
+
+#########################
+DATASET_SEQ_NUM = 1
+TABLE_SEQ_NUM = 11
+
+PROJECT_ID = 'sree-gce'
+DATASET_ID = 'sree_test_dataset_%d' % DATASET_SEQ_NUM
+TABLE_ID = 'sree_test_table_%d' % TABLE_SEQ_NUM
+
+EMP_ROW_IDX = 10
+EMP_NUM_ROWS = 5
+
+bq = create_bq()
+create_ds(bq, PROJECT_ID, DATASET_ID)
+create_emp_table(bq, PROJECT_ID, DATASET_ID, TABLE_ID)
+insert_emp_rows(bq, PROJECT_ID, DATASET_ID, TABLE_ID, EMP_ROW_IDX, EMP_NUM_ROWS)
+query_emp_records(bq, PROJECT_ID, DATASET_ID, TABLE_ID)
diff --git a/tools/dockerfile/grpc_interop_stress_cxx/Dockerfile b/tools/dockerfile/grpc_interop_stress_cxx/Dockerfile
index 58a8c32e34..4123cc1a26 100644
--- a/tools/dockerfile/grpc_interop_stress_cxx/Dockerfile
+++ b/tools/dockerfile/grpc_interop_stress_cxx/Dockerfile
@@ -59,6 +59,8 @@ RUN apt-get update && apt-get install -y \
   wget \
   zip && apt-get clean
 
+RUN easy_install -U pip
+
 # Prepare ccache
 RUN ln -s /usr/bin/ccache /usr/local/bin/gcc
 RUN ln -s /usr/bin/ccache /usr/local/bin/g++
@@ -71,5 +73,8 @@ RUN ln -s /usr/bin/ccache /usr/local/bin/clang++
 # C++ dependencies
 RUN apt-get update && apt-get -y install libgflags-dev libgtest-dev libc++-dev clang
 
+# Google Cloud platform API libraries (for BigQuery)
+RUN pip install --upgrade google-api-python-client
+
 # Define the default command.
 CMD ["bash"]
-- 
cgit v1.2.3


From 44ca2c26409a172b80bc9f40f7578f3eaf1d135d Mon Sep 17 00:00:00 2001
From: Sree Kuchibhotla <sreek@google.com>
Date: Tue, 16 Feb 2016 09:48:36 -0800
Subject: Examples

---
 test/cpp/interop/metrics_client.cc                 |  41 +++--
 tools/bigquery/big_query_utils.py                  | 181 ---------------------
 .../build_interop_stress.sh                        |   2 +-
 tools/gke/big_query_utils.py                       | 181 +++++++++++++++++++++
 tools/gke/create_client.py                         | 108 ++++++++++++
 tools/gke/create_server.py                         |  74 +++++++++
 tools/gke/delete_client.py                         |  66 ++++++++
 tools/gke/delete_server.py                         |  58 +++++++
 tools/gke/kubernetes_api.py                        |  35 ++--
 tools/run_tests/stress_test_wrapper.py             |  96 +++++++++++
 10 files changed, 632 insertions(+), 210 deletions(-)
 delete mode 100644 tools/bigquery/big_query_utils.py
 create mode 100644 tools/gke/big_query_utils.py
 create mode 100755 tools/gke/create_client.py
 create mode 100755 tools/gke/create_server.py
 create mode 100755 tools/gke/delete_client.py
 create mode 100755 tools/gke/delete_server.py
 create mode 100755 tools/run_tests/stress_test_wrapper.py

(limited to 'tools')

diff --git a/test/cpp/interop/metrics_client.cc b/test/cpp/interop/metrics_client.cc
index 0c140ffd85..cc304f2e89 100644
--- a/test/cpp/interop/metrics_client.cc
+++ b/test/cpp/interop/metrics_client.cc
@@ -37,39 +37,45 @@
 #include <gflags/gflags.h>
 #include <grpc++/grpc++.h>
 
-#include "test/cpp/util/metrics_server.h"
-#include "test/cpp/util/test_config.h"
 #include "src/proto/grpc/testing/metrics.grpc.pb.h"
 #include "src/proto/grpc/testing/metrics.pb.h"
+#include "test/cpp/util/metrics_server.h"
+#include "test/cpp/util/test_config.h"
 
 DEFINE_string(metrics_server_address, "",
               "The metrics server addresses in the fomrat <hostname>:<port>");
+DEFINE_bool(total_only, false,
+            "If true, this prints only the total value of all gauges");
+
+int kDeadlineSecs = 10;
 
 using grpc::testing::EmptyMessage;
 using grpc::testing::GaugeResponse;
 using grpc::testing::MetricsService;
 using grpc::testing::MetricsServiceImpl;
 
-void PrintMetrics(const grpc::string& server_address) {
-  gpr_log(GPR_INFO, "creating a channel to %s", server_address.c_str());
-  std::shared_ptr<grpc::Channel> channel(
-      grpc::CreateChannel(server_address, grpc::InsecureChannelCredentials()));
-
-  std::unique_ptr<MetricsService::Stub> stub(MetricsService::NewStub(channel));
-
+// Prints the values of all Gauges (unless total_only is set to 'true' in which
+// case this only prints the sum of all gauge values).
+bool PrintMetrics(std::unique_ptr<MetricsService::Stub> stub, bool total_only) {
   grpc::ClientContext context;
   EmptyMessage message;
 
+  std::chrono::system_clock::time_point deadline =
+      std::chrono::system_clock::now() + std::chrono::seconds(kDeadlineSecs);
+
+  context.set_deadline(deadline);
+
   std::unique_ptr<grpc::ClientReader<GaugeResponse>> reader(
       stub->GetAllGauges(&context, message));
 
   GaugeResponse gauge_response;
   long overall_qps = 0;
-  int idx = 0;
   while (reader->Read(&gauge_response)) {
     if (gauge_response.value_case() == GaugeResponse::kLongValue) {
-      gpr_log(GPR_INFO, "Gauge: %d (%s: %ld)", ++idx,
-              gauge_response.name().c_str(), gauge_response.long_value());
+      if (!total_only) {
+        gpr_log(GPR_INFO, "%s: %ld", gauge_response.name().c_str(),
+                gauge_response.long_value());
+      }
       overall_qps += gauge_response.long_value();
     } else {
       gpr_log(GPR_INFO, "Gauge %s is not a long value",
@@ -77,12 +83,14 @@ void PrintMetrics(const grpc::string& server_address) {
     }
   }
 
-  gpr_log(GPR_INFO, "OVERALL: %ld", overall_qps);
+  gpr_log(GPR_INFO, "%ld", overall_qps);
 
   const grpc::Status status = reader->Finish();
   if (!status.ok()) {
     gpr_log(GPR_ERROR, "Error in getting metrics from the client");
   }
+
+  return status.ok();
 }
 
 int main(int argc, char** argv) {
@@ -97,7 +105,12 @@ int main(int argc, char** argv) {
     return 1;
   }
 
-  PrintMetrics(FLAGS_metrics_server_address);
+  std::shared_ptr<grpc::Channel> channel(grpc::CreateChannel(
+      FLAGS_metrics_server_address, grpc::InsecureChannelCredentials()));
+
+  if (!PrintMetrics(MetricsService::NewStub(channel), FLAGS_total_only)) {
+    return 1;
+  }
 
   return 0;
 }
diff --git a/tools/bigquery/big_query_utils.py b/tools/bigquery/big_query_utils.py
deleted file mode 100644
index ebcf9d6ec3..0000000000
--- a/tools/bigquery/big_query_utils.py
+++ /dev/null
@@ -1,181 +0,0 @@
-import argparse
-import json
-import uuid
-import httplib2
-
-from apiclient import discovery
-from apiclient.errors import HttpError
-from oauth2client.client import GoogleCredentials
-
-NUM_RETRIES = 3
-
-
-def create_bq():
-  """Authenticates with cloud platform and gets a BiqQuery service object
-  """
-  creds = GoogleCredentials.get_application_default()
-  return discovery.build('bigquery', 'v2', credentials=creds)
-
-
-def create_ds(biq_query, project_id, dataset_id):
-  is_success = True
-  body = {
-      'datasetReference': {
-          'projectId': project_id,
-          'datasetId': dataset_id
-      }
-  }
-  try:
-    dataset_req = biq_query.datasets().insert(projectId=project_id, body=body)
-    dataset_req.execute(num_retries=NUM_RETRIES)
-  except HttpError as http_error:
-    if http_error.resp.status == 409:
-      print 'Warning: The dataset %s already exists' % dataset_id
-    else:
-      # Note: For more debugging info, print "http_error.content"
-      print 'Error in creating dataset: %s. Err: %s' % (dataset_id, http_error)
-      is_success = False
-  return is_success
-
-
-def make_field(field_name, field_type, field_description):
-  return {
-      'name': field_name,
-      'type': field_type,
-      'description': field_description
-  }
-
-
-def create_table(big_query, project_id, dataset_id, table_id, fields_list,
-                 description):
-  is_success = True
-  body = {
-      'description': description,
-      'schema': {
-          'fields': fields_list
-      },
-      'tableReference': {
-          'datasetId': dataset_id,
-          'projectId': project_id,
-          'tableId': table_id
-      }
-  }
-  try:
-    table_req = big_query.tables().insert(projectId=project_id,
-                                          datasetId=dataset_id,
-                                          body=body)
-    res = table_req.execute(num_retries=NUM_RETRIES)
-    print 'Successfully created %s "%s"' % (res['kind'], res['id'])
-  except HttpError as http_error:
-    if http_error.resp.status == 409:
-      print 'Warning: Table %s already exists' % table_id
-    else:
-      print 'Error in creating table: %s. Err: %s' % (table_id, http_error)
-      is_success = False
-  return is_success
-
-
-def insert_rows(big_query, project_id, dataset_id, table_id, rows_list):
-  is_success = True
-  body = {'rows': rows_list}
-  try:
-    insert_req = big_query.tabledata().insertAll(projectId=project_id,
-                                                 datasetId=dataset_id,
-                                                 tableId=table_id,
-                                                 body=body)
-    print body
-    res = insert_req.execute(num_retries=NUM_RETRIES)
-    print res
-  except HttpError as http_error:
-    print 'Error in inserting rows in the table %s' % table_id
-    is_success = False
-  return is_success
-
-#####################
-
-
-def make_emp_row(emp_id, emp_name, emp_email):
-  return {
-      'insertId': str(emp_id),
-      'json': {
-          'emp_id': emp_id,
-          'emp_name': emp_name,
-          'emp_email_id': emp_email
-      }
-  }
-
-
-def get_emp_table_fields_list():
-  return [
-      make_field('emp_id', 'INTEGER', 'Employee id'),
-      make_field('emp_name', 'STRING', 'Employee name'),
-      make_field('emp_email_id', 'STRING', 'Employee email id')
-  ]
-
-
-def insert_emp_rows(big_query, project_id, dataset_id, table_id, start_idx,
-                    num_rows):
-  rows_list = [make_emp_row(i, 'sree_%d' % i, 'sreecha_%d@gmail.com' % i)
-               for i in range(start_idx, start_idx + num_rows)]
-  insert_rows(big_query, project_id, dataset_id, table_id, rows_list)
-
-
-def create_emp_table(big_query, project_id, dataset_id, table_id):
-  fields_list = get_emp_table_fields_list()
-  description = 'Test table created by sree'
-  create_table(big_query, project_id, dataset_id, table_id, fields_list,
-               description)
-
-
-def sync_query(big_query, project_id, query, timeout=5000):
-  query_data = {'query': query, 'timeoutMs': timeout}
-  query_job = None
-  try:
-    query_job = big_query.jobs().query(
-        projectId=project_id,
-        body=query_data).execute(num_retries=NUM_RETRIES)
-  except HttpError as http_error:
-    print 'Query execute job failed with error: %s' % http_error
-    print http_error.content
-  return query_job
-
-#[Start query_emp_records]
-def query_emp_records(big_query, project_id, dataset_id, table_id):
-  query = 'SELECT emp_id, emp_name FROM %s.%s ORDER BY emp_id;' % (dataset_id, table_id)
-  print query
-  query_job = sync_query(big_query, project_id, query, 5000)
-  job_id = query_job['jobReference']
-
-  print query_job
-  print '**Starting paging **'
-  #[Start Paging]
-  page_token = None
-  while True:
-    page = big_query.jobs().getQueryResults(
-        pageToken=page_token,
-        **query_job['jobReference']).execute(num_retries=NUM_RETRIES)
-    rows = page['rows']
-    for row in rows:
-      print row['f'][0]['v'], "---", row['f'][1]['v']
-    page_token = page.get('pageToken')
-    if not page_token:
-      break
-  #[End Paging]
-#[End query_emp_records]
-
-#########################
-DATASET_SEQ_NUM = 1
-TABLE_SEQ_NUM = 11
-
-PROJECT_ID = 'sree-gce'
-DATASET_ID = 'sree_test_dataset_%d' % DATASET_SEQ_NUM
-TABLE_ID = 'sree_test_table_%d' % TABLE_SEQ_NUM
-
-EMP_ROW_IDX = 10
-EMP_NUM_ROWS = 5
-
-bq = create_bq()
-create_ds(bq, PROJECT_ID, DATASET_ID)
-create_emp_table(bq, PROJECT_ID, DATASET_ID, TABLE_ID)
-insert_emp_rows(bq, PROJECT_ID, DATASET_ID, TABLE_ID, EMP_ROW_IDX, EMP_NUM_ROWS)
-query_emp_records(bq, PROJECT_ID, DATASET_ID, TABLE_ID)
diff --git a/tools/dockerfile/grpc_interop_stress_cxx/build_interop_stress.sh b/tools/dockerfile/grpc_interop_stress_cxx/build_interop_stress.sh
index 6ed3ccb3fa..392bdfccda 100755
--- a/tools/dockerfile/grpc_interop_stress_cxx/build_interop_stress.sh
+++ b/tools/dockerfile/grpc_interop_stress_cxx/build_interop_stress.sh
@@ -42,4 +42,4 @@ cd /var/local/git/grpc
 make install-certs
 
 # build C++ interop stress client, interop client and server
-make stress_test interop_client interop_server
+make stress_test metrics_client interop_client interop_server
diff --git a/tools/gke/big_query_utils.py b/tools/gke/big_query_utils.py
new file mode 100644
index 0000000000..ebcf9d6ec3
--- /dev/null
+++ b/tools/gke/big_query_utils.py
@@ -0,0 +1,181 @@
+import argparse
+import json
+import uuid
+import httplib2
+
+from apiclient import discovery
+from apiclient.errors import HttpError
+from oauth2client.client import GoogleCredentials
+
+NUM_RETRIES = 3
+
+
+def create_bq():
+  """Authenticates with cloud platform and gets a BiqQuery service object
+  """
+  creds = GoogleCredentials.get_application_default()
+  return discovery.build('bigquery', 'v2', credentials=creds)
+
+
+def create_ds(biq_query, project_id, dataset_id):
+  is_success = True
+  body = {
+      'datasetReference': {
+          'projectId': project_id,
+          'datasetId': dataset_id
+      }
+  }
+  try:
+    dataset_req = biq_query.datasets().insert(projectId=project_id, body=body)
+    dataset_req.execute(num_retries=NUM_RETRIES)
+  except HttpError as http_error:
+    if http_error.resp.status == 409:
+      print 'Warning: The dataset %s already exists' % dataset_id
+    else:
+      # Note: For more debugging info, print "http_error.content"
+      print 'Error in creating dataset: %s. Err: %s' % (dataset_id, http_error)
+      is_success = False
+  return is_success
+
+
+def make_field(field_name, field_type, field_description):
+  return {
+      'name': field_name,
+      'type': field_type,
+      'description': field_description
+  }
+
+
+def create_table(big_query, project_id, dataset_id, table_id, fields_list,
+                 description):
+  is_success = True
+  body = {
+      'description': description,
+      'schema': {
+          'fields': fields_list
+      },
+      'tableReference': {
+          'datasetId': dataset_id,
+          'projectId': project_id,
+          'tableId': table_id
+      }
+  }
+  try:
+    table_req = big_query.tables().insert(projectId=project_id,
+                                          datasetId=dataset_id,
+                                          body=body)
+    res = table_req.execute(num_retries=NUM_RETRIES)
+    print 'Successfully created %s "%s"' % (res['kind'], res['id'])
+  except HttpError as http_error:
+    if http_error.resp.status == 409:
+      print 'Warning: Table %s already exists' % table_id
+    else:
+      print 'Error in creating table: %s. Err: %s' % (table_id, http_error)
+      is_success = False
+  return is_success
+
+
+def insert_rows(big_query, project_id, dataset_id, table_id, rows_list):
+  is_success = True
+  body = {'rows': rows_list}
+  try:
+    insert_req = big_query.tabledata().insertAll(projectId=project_id,
+                                                 datasetId=dataset_id,
+                                                 tableId=table_id,
+                                                 body=body)
+    print body
+    res = insert_req.execute(num_retries=NUM_RETRIES)
+    print res
+  except HttpError as http_error:
+    print 'Error in inserting rows in the table %s' % table_id
+    is_success = False
+  return is_success
+
+#####################
+
+
+def make_emp_row(emp_id, emp_name, emp_email):
+  return {
+      'insertId': str(emp_id),
+      'json': {
+          'emp_id': emp_id,
+          'emp_name': emp_name,
+          'emp_email_id': emp_email
+      }
+  }
+
+
+def get_emp_table_fields_list():
+  return [
+      make_field('emp_id', 'INTEGER', 'Employee id'),
+      make_field('emp_name', 'STRING', 'Employee name'),
+      make_field('emp_email_id', 'STRING', 'Employee email id')
+  ]
+
+
+def insert_emp_rows(big_query, project_id, dataset_id, table_id, start_idx,
+                    num_rows):
+  rows_list = [make_emp_row(i, 'sree_%d' % i, 'sreecha_%d@gmail.com' % i)
+               for i in range(start_idx, start_idx + num_rows)]
+  insert_rows(big_query, project_id, dataset_id, table_id, rows_list)
+
+
+def create_emp_table(big_query, project_id, dataset_id, table_id):
+  fields_list = get_emp_table_fields_list()
+  description = 'Test table created by sree'
+  create_table(big_query, project_id, dataset_id, table_id, fields_list,
+               description)
+
+
+def sync_query(big_query, project_id, query, timeout=5000):
+  query_data = {'query': query, 'timeoutMs': timeout}
+  query_job = None
+  try:
+    query_job = big_query.jobs().query(
+        projectId=project_id,
+        body=query_data).execute(num_retries=NUM_RETRIES)
+  except HttpError as http_error:
+    print 'Query execute job failed with error: %s' % http_error
+    print http_error.content
+  return query_job
+
+#[Start query_emp_records]
+def query_emp_records(big_query, project_id, dataset_id, table_id):
+  query = 'SELECT emp_id, emp_name FROM %s.%s ORDER BY emp_id;' % (dataset_id, table_id)
+  print query
+  query_job = sync_query(big_query, project_id, query, 5000)
+  job_id = query_job['jobReference']
+
+  print query_job
+  print '**Starting paging **'
+  #[Start Paging]
+  page_token = None
+  while True:
+    page = big_query.jobs().getQueryResults(
+        pageToken=page_token,
+        **query_job['jobReference']).execute(num_retries=NUM_RETRIES)
+    rows = page['rows']
+    for row in rows:
+      print row['f'][0]['v'], "---", row['f'][1]['v']
+    page_token = page.get('pageToken')
+    if not page_token:
+      break
+  #[End Paging]
+#[End query_emp_records]
+
+#########################
+DATASET_SEQ_NUM = 1
+TABLE_SEQ_NUM = 11
+
+PROJECT_ID = 'sree-gce'
+DATASET_ID = 'sree_test_dataset_%d' % DATASET_SEQ_NUM
+TABLE_ID = 'sree_test_table_%d' % TABLE_SEQ_NUM
+
+EMP_ROW_IDX = 10
+EMP_NUM_ROWS = 5
+
+bq = create_bq()
+create_ds(bq, PROJECT_ID, DATASET_ID)
+create_emp_table(bq, PROJECT_ID, DATASET_ID, TABLE_ID)
+insert_emp_rows(bq, PROJECT_ID, DATASET_ID, TABLE_ID, EMP_ROW_IDX, EMP_NUM_ROWS)
+query_emp_records(bq, PROJECT_ID, DATASET_ID, TABLE_ID)
diff --git a/tools/gke/create_client.py b/tools/gke/create_client.py
new file mode 100755
index 0000000000..bc56ef0ef1
--- /dev/null
+++ b/tools/gke/create_client.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python2.7
+# Copyright 2015, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+
+import kubernetes_api
+
+argp = argparse.ArgumentParser(description='Launch Stress tests in GKE')
+
+argp.add_argument('-n',
+                  '--num_instances',
+                  required=True,
+                  type=int,
+                  help='The number of instances to launch in GKE')
+args = argp.parse_args()
+
+kubernetes_api_server="localhost"
+kubernetes_api_port=8001
+
+
+# Docker image
+image_name="gcr.io/sree-gce/grpc_stress_test_2"
+
+server_address = "stress-server.default.svc.cluster.local:8080"
+metrics_server_address = "localhost:8081"
+
+stress_test_arg_list=[
+    "--server_addresses=" + server_address,
+    "--test_cases=empty_unary:20,large_unary:20",
+    "--num_stubs_per_channel=10"
+]
+
+metrics_client_arg_list=[
+    "--metrics_server_address=" + metrics_server_address,
+    "--total_only=true"]
+
+env_dict={
+    "GPRC_ROOT": "/var/local/git/grpc",
+    "STRESS_TEST_IMAGE": "/var/local/git/grpc/bins/opt/stress_test",
+    "STRESS_TEST_ARGS_STR": ' '.join(stress_test_arg_list),
+    "METRICS_CLIENT_IMAGE": "/var/local/git/grpc/bins/opt/metrics_client",
+    "METRICS_CLIENT_ARGS_STR": ' '.join(metrics_client_arg_list)}
+
+cmd_list=["/var/local/git/grpc/bins/opt/stress_test"]
+arg_list=stress_test_arg_list # make this [] in future
+port_list=[8081]
+
+namespace = 'default'
+is_headless_service = False # Client is NOT headless service
+
+print('Creating %d instances of client..' % args.num_instances)
+
+for i in range(1, args.num_instances + 1):
+  service_name = 'stress-client-%d' % i
+  pod_name = service_name  # Use the same name for kubernetes Service and Pod
+  is_success = kubernetes_api.create_pod(
+      kubernetes_api_server,
+      kubernetes_api_port,
+      namespace,
+      pod_name,
+      image_name,
+      port_list,
+      cmd_list,
+      arg_list,
+      env_dict)
+  if not is_success:
+    print("Error in creating pod %s" % pod_name)
+  else:
+    is_success = kubernetes_api.create_service(
+      kubernetes_api_server,
+      kubernetes_api_port,
+      namespace,
+      service_name,
+      pod_name,
+      port_list,  # Service port list
+      port_list,  # Container port list (same as service port list)
+      is_headless_service)
+    if not is_success:
+      print("Error in creating service %s" % service_name)
+    else:
+      print("Created client %s" % pod_name)
diff --git a/tools/gke/create_server.py b/tools/gke/create_server.py
new file mode 100755
index 0000000000..23ab62c205
--- /dev/null
+++ b/tools/gke/create_server.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python2.7
+# Copyright 2015, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+
+import kubernetes_api
+
+service_name = 'stress-server'
+pod_name = service_name  # Use the same name for kubernetes Service and Pod
+namespace = 'default'
+is_headless_service = True
+cmd_list=['/var/local/git/grpc/bins/opt/interop_server']
+arg_list=['--port=8080']
+port_list=[8080]
+image_name='gcr.io/sree-gce/grpc_stress_test_2'
+env_dict={}
+
+# Make sure you run kubectl proxy --port=8001
+kubernetes_api_server='localhost'
+kubernetes_api_port=8001
+
+is_success = kubernetes_api.create_pod(
+      kubernetes_api_server,
+      kubernetes_api_port,
+      namespace,
+      pod_name,
+      image_name,
+      port_list,
+      cmd_list,
+      arg_list,
+      env_dict)
+if not is_success:
+  print("Error in creating pod")
+else:
+  is_success = kubernetes_api.create_service(
+      kubernetes_api_server,
+      kubernetes_api_port,
+      namespace,
+      service_name,
+      pod_name,
+      port_list,  # Service port list
+      port_list,  # Container port list (same as service port list)
+      is_headless_service)
+  if not is_success:
+    print("Error in creating service")
+  else:
+    print("Successfully created the Server")
diff --git a/tools/gke/delete_client.py b/tools/gke/delete_client.py
new file mode 100755
index 0000000000..aa519f26b8
--- /dev/null
+++ b/tools/gke/delete_client.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python2.7
+# Copyright 2015, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+
+import kubernetes_api
+
+argp = argparse.ArgumentParser(description='Delete Stress test clients in GKE')
+argp.add_argument('-n',
+                  '--num_instances',
+                  required=True,
+                  type=int,
+                  help='The number of instances currently running')
+
+args = argp.parse_args()
+for i in range(1, args.num_instances + 1):
+  service_name = 'stress-client-%d' % i
+  pod_name = service_name
+  namespace = 'default'
+  kubernetes_api_server="localhost"
+  kubernetes_api_port=8001
+
+  is_success=kubernetes_api.delete_pod(
+      kubernetes_api_server,
+      kubernetes_api_port,
+      namespace,
+      pod_name)
+  if not is_success:
+    print('Error in deleting Pod %s' % pod_name)
+  else:
+    is_success= kubernetes_api.delete_service(
+      kubernetes_api_server,
+      kubernetes_api_port,
+      namespace,
+      service_name)
+    if not is_success:
+      print('Error in deleting Service %s' % service_name)
+    else:
+      print('Deleted %s' % pod_name)
diff --git a/tools/gke/delete_server.py b/tools/gke/delete_server.py
new file mode 100755
index 0000000000..6e3fdcc33b
--- /dev/null
+++ b/tools/gke/delete_server.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python2.7
+# Copyright 2015, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+
+import kubernetes_api
+
+service_name = 'stress-server'
+pod_name = service_name  # Use the same name for kubernetes Service and Pod
+namespace = 'default'
+is_headless_service = True
+kubernetes_api_server="localhost"
+kubernetes_api_port=8001
+
+is_success = kubernetes_api.delete_pod(
+      kubernetes_api_server,
+      kubernetes_api_port,
+      namespace,
+      pod_name)
+if not is_success:
+  print("Error in deleting Pod %s" % pod_name)
+else:
+  is_success = kubernetes_api.delete_service(
+      kubernetes_api_server,
+      kubernetes_api_port,
+      namespace,
+      service_name)
+  if not is_success:
+    print("Error in deleting Service %d" % service_name)
+  else:
+    print("Deleted server %s" % service_name)
diff --git a/tools/gke/kubernetes_api.py b/tools/gke/kubernetes_api.py
index 7dd3015365..14d724bd31 100755
--- a/tools/gke/kubernetes_api.py
+++ b/tools/gke/kubernetes_api.py
@@ -33,8 +33,9 @@ import json
 
 _REQUEST_TIMEOUT_SECS = 10
 
+
 def _make_pod_config(pod_name, image_name, container_port_list, cmd_list,
-                    arg_list):
+                     arg_list, env_dict):
   """Creates a string containing the Pod defintion as required by the Kubernetes API"""
   body = {
       'kind': 'Pod',
@@ -48,20 +49,21 @@ def _make_pod_config(pod_name, image_name, container_port_list, cmd_list,
               {
                   'name': pod_name,
                   'image': image_name,
-                  'ports': []
+                  'ports': [{'containerPort': port,
+                             'protocol': 'TCP'} for port in container_port_list]
               }
           ]
       }
   }
-  # Populate the 'ports' list
-  for port in container_port_list:
-    port_entry = {'containerPort': port, 'protocol': 'TCP'}
-    body['spec']['containers'][0]['ports'].append(port_entry)
+
+  env_list = [{'name': k, 'value': v} for (k, v) in env_dict.iteritems()]
+  if len(env_list) > 0:
+    body['spec']['containers'][0]['env'] = env_list
 
   # Add the 'Command' and 'Args' attributes if they are passed.
   # Note:
   #  - 'Command' overrides the ENTRYPOINT in the Docker Image
-  #  - 'Args' override the COMMAND in Docker image (yes, it is confusing!)
+  #  - 'Args' override the CMD in Docker image (yes, it is confusing!)
   if len(cmd_list) > 0:
     body['spec']['containers'][0]['command'] = cmd_list
   if len(arg_list) > 0:
@@ -70,7 +72,7 @@ def _make_pod_config(pod_name, image_name, container_port_list, cmd_list,
 
 
 def _make_service_config(service_name, pod_name, service_port_list,
-                        container_port_list, is_headless):
+                         container_port_list, is_headless):
   """Creates a string containing the Service definition as required by the Kubernetes API.
 
   NOTE:
@@ -124,6 +126,7 @@ def _print_connection_error(msg):
   print('ERROR: Connection failed. Did you remember to run Kubenetes proxy on '
         'localhost (i.e kubectl proxy --port=<proxy_port>) ?. Error: %s' % msg)
 
+
 def _do_post(post_url, api_name, request_body):
   """Helper to do HTTP POST.
 
@@ -135,7 +138,9 @@ def _do_post(post_url, api_name, request_body):
   """
   is_success = True
   try:
-    r = requests.post(post_url, data=request_body, timeout=_REQUEST_TIMEOUT_SECS)
+    r = requests.post(post_url,
+                      data=request_body,
+                      timeout=_REQUEST_TIMEOUT_SECS)
     if r.status_code == requests.codes.conflict:
       print('WARN: Looks like the resource already exists. Api: %s, url: %s' %
             (api_name, post_url))
@@ -143,7 +148,8 @@ def _do_post(post_url, api_name, request_body):
       print('ERROR: %s API returned error. HTTP response: (%d) %s' %
             (api_name, r.status_code, r.text))
       is_success = False
-  except(requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
+  except (requests.exceptions.Timeout,
+          requests.exceptions.ConnectionError) as e:
     is_success = False
     _print_connection_error(str(e))
   return is_success
@@ -165,7 +171,8 @@ def _do_delete(del_url, api_name):
       print('ERROR: %s API returned error. HTTP response: %s' %
             (api_name, r.text))
       is_success = False
-  except(requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
+  except (requests.exceptions.Timeout,
+          requests.exceptions.ConnectionError) as e:
     is_success = False
     _print_connection_error(str(e))
   return is_success
@@ -179,12 +186,12 @@ def create_service(kube_host, kube_port, namespace, service_name, pod_name,
   post_url = 'http://%s:%d/api/v1/namespaces/%s/services' % (
       kube_host, kube_port, namespace)
   request_body = _make_service_config(service_name, pod_name, service_port_list,
-                                     container_port_list, is_headless)
+                                      container_port_list, is_headless)
   return _do_post(post_url, 'Create Service', request_body)
 
 
 def create_pod(kube_host, kube_port, namespace, pod_name, image_name,
-               container_port_list, cmd_list, arg_list):
+               container_port_list, cmd_list, arg_list, env_dict):
   """Creates a Kubernetes Pod.
 
   Note that it is generally NOT considered a good practice to directly create
@@ -200,7 +207,7 @@ def create_pod(kube_host, kube_port, namespace, pod_name, image_name,
   post_url = 'http://%s:%d/api/v1/namespaces/%s/pods' % (kube_host, kube_port,
                                                          namespace)
   request_body = _make_pod_config(pod_name, image_name, container_port_list,
-                                 cmd_list, arg_list)
+                                  cmd_list, arg_list, env_dict)
   return _do_post(post_url, 'Create Pod', request_body)
 
 
diff --git a/tools/run_tests/stress_test_wrapper.py b/tools/run_tests/stress_test_wrapper.py
new file mode 100755
index 0000000000..8f1bd2024e
--- /dev/null
+++ b/tools/run_tests/stress_test_wrapper.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python2.7
+import os
+import re
+import select
+import subprocess
+import sys
+import time
+
+GRPC_ROOT = '/usr/local/google/home/sreek/workspace/grpc/'
+STRESS_TEST_IMAGE = GRPC_ROOT + 'bins/opt/stress_test'
+STRESS_TEST_ARGS_STR = ' '.join([
+    '--server_addresses=localhost:8000',
+    '--test_cases=empty_unary:1,large_unary:1', '--num_stubs_per_channel=10',
+    '--test_duration_secs=10'])
+METRICS_CLIENT_IMAGE = GRPC_ROOT + 'bins/opt/metrics_client'
+METRICS_CLIENT_ARGS_STR = ' '.join([
+    '--metrics_server_address=localhost:8081', '--total_only=true'])
+LOGFILE_NAME = 'stress_test.log'
+
+
+# TODO (sree): Write a python grpc client to directly query the metrics instead
+# of calling metrics_client
+def get_qps(metrics_cmd):
+  qps = 0
+  try:
+    # Note: gpr_log() writes even non-error messages to stderr stream. So it is 
+    # important that we set stderr=subprocess.STDOUT
+    p = subprocess.Popen(args=metrics_cmd,
+                         stdout=subprocess.PIPE,
+                         stderr=subprocess.STDOUT)
+    retcode = p.wait()
+    (out_str, err_str) = p.communicate()
+    if retcode != 0:
+      print 'Error in reading metrics information'
+      print 'Output: ', out_str
+    else:
+      # The overall qps is printed at the end of the line
+      m = re.search('\d+$', out_str)
+      qps = int(m.group()) if m else 0
+  except Exception as ex:
+    print 'Exception while reading metrics information: ' + str(ex)
+  return qps
+
+def main(argv):
+  # TODO(sree) Create BigQuery Tables
+  # (Summary table), (Metrics table)
+
+  # TODO(sree) Update status that the test is starting (in the status table)
+  #
+
+  metrics_cmd = [METRICS_CLIENT_IMAGE
+                ] + [x for x in METRICS_CLIENT_ARGS_STR.split()]
+
+  stress_cmd = [STRESS_TEST_IMAGE] + [x for x in STRESS_TEST_ARGS_STR.split()]
+  # TODO(sree): Add an option to print to stdout if logfilename is absent
+  logfile = open(LOGFILE_NAME, 'w')
+  stress_p = subprocess.Popen(args=arg_list,
+                              stdout=logfile,
+                              stderr=subprocess.STDOUT)
+
+  qps_history = [1, 1, 1]  # Maintain the last 3 qps
+  qps_history_idx = 0  # Index into the qps_history list
+
+  is_error = False
+  while True:
+    # Check if stress_client is still running. If so, collect metrics and upload
+    # to BigQuery status table
+    #
+    if stress_p is not None:
+      # TODO(sree) Upload completion status to BiqQuery
+      is_error = (stress_p.returncode != 0)
+      break
+
+    # Stress client still running. Get metrics
+    qps = get_qps(metrics_cmd)
+
+    # If QPS has been zero for the last 3 iterations, flag it as error and exit
+    qps_history[qps_history_idx] = qps
+    qps_history_idx = (qps_histor_idx + 1) % len(qps_history)
+    if sum(a) == 0:
+      print ('QPS has been zero for the last 3 iterations. Not monitoring '
+             'anymore. The stress test client may be stalled.')
+      is_error = True
+      break
+
+    #TODO(sree) Upload qps metrics to BiqQuery
+
+  if is_error:
+    print 'Waiting indefinitely..'
+    select.select([],[],[])
+
+  return 1
+
+
+if __name__ == '__main__':
+  main(sys.argv[1:])
-- 
cgit v1.2.3


From 559e45becd0a50bd6af850900abbb2b5759f8719 Mon Sep 17 00:00:00 2001
From: Sree Kuchibhotla <sreek@google.com>
Date: Fri, 19 Feb 2016 03:02:16 -0800
Subject: Scripts to launch stress tests in GKE

---
 test/cpp/util/metrics_server.cc                  |   2 +-
 tools/big_query/big_query_utils.py               | 140 ++++++++
 tools/gke/big_query_utils.py                     | 181 -----------
 tools/gke/create_client.py                       | 108 -------
 tools/gke/create_server.py                       |  74 -----
 tools/gke/delete_client.py                       |  66 ----
 tools/gke/delete_server.py                       |  58 ----
 tools/gke/kubernetes_api.py                      |   5 +-
 tools/gke/run_stress_tests_on_gke.py             | 389 +++++++++++++++++++++++
 tools/run_tests/stress_test/run_client.py        | 188 +++++++++++
 tools/run_tests/stress_test/run_server.py        | 115 +++++++
 tools/run_tests/stress_test/stress_test_utils.py | 192 +++++++++++
 tools/run_tests/stress_test_wrapper.py           |  96 ------
 13 files changed, 1028 insertions(+), 586 deletions(-)
 create mode 100755 tools/big_query/big_query_utils.py
 delete mode 100644 tools/gke/big_query_utils.py
 delete mode 100755 tools/gke/create_client.py
 delete mode 100755 tools/gke/create_server.py
 delete mode 100755 tools/gke/delete_client.py
 delete mode 100755 tools/gke/delete_server.py
 create mode 100755 tools/gke/run_stress_tests_on_gke.py
 create mode 100755 tools/run_tests/stress_test/run_client.py
 create mode 100755 tools/run_tests/stress_test/run_server.py
 create mode 100755 tools/run_tests/stress_test/stress_test_utils.py
 delete mode 100755 tools/run_tests/stress_test_wrapper.py

(limited to 'tools')

diff --git a/test/cpp/util/metrics_server.cc b/test/cpp/util/metrics_server.cc
index 07978d0bdb..34d51eb316 100644
--- a/test/cpp/util/metrics_server.cc
+++ b/test/cpp/util/metrics_server.cc
@@ -57,7 +57,7 @@ long Gauge::Get() {
 grpc::Status MetricsServiceImpl::GetAllGauges(
     ServerContext* context, const EmptyMessage* request,
     ServerWriter<GaugeResponse>* writer) {
-  gpr_log(GPR_INFO, "GetAllGauges called");
+  gpr_log(GPR_DEBUG, "GetAllGauges called");
 
   std::lock_guard<std::mutex> lock(mu_);
   for (auto it = gauges_.begin(); it != gauges_.end(); it++) {
diff --git a/tools/big_query/big_query_utils.py b/tools/big_query/big_query_utils.py
new file mode 100755
index 0000000000..267d019850
--- /dev/null
+++ b/tools/big_query/big_query_utils.py
@@ -0,0 +1,140 @@
+#!/usr/bin/env python2.7
+# Copyright 2015-2016 Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import json
+import uuid
+import httplib2
+
+from apiclient import discovery
+from apiclient.errors import HttpError
+from oauth2client.client import GoogleCredentials
+
+NUM_RETRIES = 3
+
+
+def create_big_query():
+  """Authenticates with cloud platform and gets a BiqQuery service object
+  """
+  creds = GoogleCredentials.get_application_default()
+  return discovery.build('bigquery', 'v2', credentials=creds)
+
+
+def create_dataset(biq_query, project_id, dataset_id):
+  is_success = True
+  body = {
+      'datasetReference': {
+          'projectId': project_id,
+          'datasetId': dataset_id
+      }
+  }
+
+  try:
+    dataset_req = biq_query.datasets().insert(projectId=project_id, body=body)
+    dataset_req.execute(num_retries=NUM_RETRIES)
+  except HttpError as http_error:
+    if http_error.resp.status == 409:
+      print 'Warning: The dataset %s already exists' % dataset_id
+    else:
+      # Note: For more debugging info, print "http_error.content"
+      print 'Error in creating dataset: %s. Err: %s' % (dataset_id, http_error)
+      is_success = False
+  return is_success
+
+
+def create_table(big_query, project_id, dataset_id, table_id, table_schema,
+                 description):
+  is_success = True
+
+  body = {
+      'description': description,
+      'schema': {
+          'fields': [{
+              'name': field_name,
+              'type': field_type,
+              'description': field_description
+          } for (field_name, field_type, field_description) in table_schema]
+      },
+      'tableReference': {
+          'datasetId': dataset_id,
+          'projectId': project_id,
+          'tableId': table_id
+      }
+  }
+
+  try:
+    table_req = big_query.tables().insert(projectId=project_id,
+                                          datasetId=dataset_id,
+                                          body=body)
+    res = table_req.execute(num_retries=NUM_RETRIES)
+    print 'Successfully created %s "%s"' % (res['kind'], res['id'])
+  except HttpError as http_error:
+    if http_error.resp.status == 409:
+      print 'Warning: Table %s already exists' % table_id
+    else:
+      print 'Error in creating table: %s. Err: %s' % (table_id, http_error)
+      is_success = False
+  return is_success
+
+
+def insert_rows(big_query, project_id, dataset_id, table_id, rows_list):
+  is_success = True
+  body = {'rows': rows_list}
+  try:
+    insert_req = big_query.tabledata().insertAll(projectId=project_id,
+                                                 datasetId=dataset_id,
+                                                 tableId=table_id,
+                                                 body=body)
+    print body
+    res = insert_req.execute(num_retries=NUM_RETRIES)
+    print res
+  except HttpError as http_error:
+    print 'Error in inserting rows in the table %s' % table_id
+    is_success = False
+  return is_success
+
+
+def sync_query_job(big_query, project_id, query, timeout=5000):
+  query_data = {'query': query, 'timeoutMs': timeout}
+  query_job = None
+  try:
+    query_job = big_query.jobs().query(
+        projectId=project_id,
+        body=query_data).execute(num_retries=NUM_RETRIES)
+  except HttpError as http_error:
+    print 'Query execute job failed with error: %s' % http_error
+    print http_error.content
+  return query_job
+
+  # List of (column name, column type, description) tuples
+def make_row(unique_row_id, row_values_dict):
+  """row_values_dict is a dictionar of column name and column value.
+  """
+  return {'insertId': unique_row_id, 'json': row_values_dict}
diff --git a/tools/gke/big_query_utils.py b/tools/gke/big_query_utils.py
deleted file mode 100644
index ebcf9d6ec3..0000000000
--- a/tools/gke/big_query_utils.py
+++ /dev/null
@@ -1,181 +0,0 @@
-import argparse
-import json
-import uuid
-import httplib2
-
-from apiclient import discovery
-from apiclient.errors import HttpError
-from oauth2client.client import GoogleCredentials
-
-NUM_RETRIES = 3
-
-
-def create_bq():
-  """Authenticates with cloud platform and gets a BiqQuery service object
-  """
-  creds = GoogleCredentials.get_application_default()
-  return discovery.build('bigquery', 'v2', credentials=creds)
-
-
-def create_ds(biq_query, project_id, dataset_id):
-  is_success = True
-  body = {
-      'datasetReference': {
-          'projectId': project_id,
-          'datasetId': dataset_id
-      }
-  }
-  try:
-    dataset_req = biq_query.datasets().insert(projectId=project_id, body=body)
-    dataset_req.execute(num_retries=NUM_RETRIES)
-  except HttpError as http_error:
-    if http_error.resp.status == 409:
-      print 'Warning: The dataset %s already exists' % dataset_id
-    else:
-      # Note: For more debugging info, print "http_error.content"
-      print 'Error in creating dataset: %s. Err: %s' % (dataset_id, http_error)
-      is_success = False
-  return is_success
-
-
-def make_field(field_name, field_type, field_description):
-  return {
-      'name': field_name,
-      'type': field_type,
-      'description': field_description
-  }
-
-
-def create_table(big_query, project_id, dataset_id, table_id, fields_list,
-                 description):
-  is_success = True
-  body = {
-      'description': description,
-      'schema': {
-          'fields': fields_list
-      },
-      'tableReference': {
-          'datasetId': dataset_id,
-          'projectId': project_id,
-          'tableId': table_id
-      }
-  }
-  try:
-    table_req = big_query.tables().insert(projectId=project_id,
-                                          datasetId=dataset_id,
-                                          body=body)
-    res = table_req.execute(num_retries=NUM_RETRIES)
-    print 'Successfully created %s "%s"' % (res['kind'], res['id'])
-  except HttpError as http_error:
-    if http_error.resp.status == 409:
-      print 'Warning: Table %s already exists' % table_id
-    else:
-      print 'Error in creating table: %s. Err: %s' % (table_id, http_error)
-      is_success = False
-  return is_success
-
-
-def insert_rows(big_query, project_id, dataset_id, table_id, rows_list):
-  is_success = True
-  body = {'rows': rows_list}
-  try:
-    insert_req = big_query.tabledata().insertAll(projectId=project_id,
-                                                 datasetId=dataset_id,
-                                                 tableId=table_id,
-                                                 body=body)
-    print body
-    res = insert_req.execute(num_retries=NUM_RETRIES)
-    print res
-  except HttpError as http_error:
-    print 'Error in inserting rows in the table %s' % table_id
-    is_success = False
-  return is_success
-
-#####################
-
-
-def make_emp_row(emp_id, emp_name, emp_email):
-  return {
-      'insertId': str(emp_id),
-      'json': {
-          'emp_id': emp_id,
-          'emp_name': emp_name,
-          'emp_email_id': emp_email
-      }
-  }
-
-
-def get_emp_table_fields_list():
-  return [
-      make_field('emp_id', 'INTEGER', 'Employee id'),
-      make_field('emp_name', 'STRING', 'Employee name'),
-      make_field('emp_email_id', 'STRING', 'Employee email id')
-  ]
-
-
-def insert_emp_rows(big_query, project_id, dataset_id, table_id, start_idx,
-                    num_rows):
-  rows_list = [make_emp_row(i, 'sree_%d' % i, 'sreecha_%d@gmail.com' % i)
-               for i in range(start_idx, start_idx + num_rows)]
-  insert_rows(big_query, project_id, dataset_id, table_id, rows_list)
-
-
-def create_emp_table(big_query, project_id, dataset_id, table_id):
-  fields_list = get_emp_table_fields_list()
-  description = 'Test table created by sree'
-  create_table(big_query, project_id, dataset_id, table_id, fields_list,
-               description)
-
-
-def sync_query(big_query, project_id, query, timeout=5000):
-  query_data = {'query': query, 'timeoutMs': timeout}
-  query_job = None
-  try:
-    query_job = big_query.jobs().query(
-        projectId=project_id,
-        body=query_data).execute(num_retries=NUM_RETRIES)
-  except HttpError as http_error:
-    print 'Query execute job failed with error: %s' % http_error
-    print http_error.content
-  return query_job
-
-#[Start query_emp_records]
-def query_emp_records(big_query, project_id, dataset_id, table_id):
-  query = 'SELECT emp_id, emp_name FROM %s.%s ORDER BY emp_id;' % (dataset_id, table_id)
-  print query
-  query_job = sync_query(big_query, project_id, query, 5000)
-  job_id = query_job['jobReference']
-
-  print query_job
-  print '**Starting paging **'
-  #[Start Paging]
-  page_token = None
-  while True:
-    page = big_query.jobs().getQueryResults(
-        pageToken=page_token,
-        **query_job['jobReference']).execute(num_retries=NUM_RETRIES)
-    rows = page['rows']
-    for row in rows:
-      print row['f'][0]['v'], "---", row['f'][1]['v']
-    page_token = page.get('pageToken')
-    if not page_token:
-      break
-  #[End Paging]
-#[End query_emp_records]
-
-#########################
-DATASET_SEQ_NUM = 1
-TABLE_SEQ_NUM = 11
-
-PROJECT_ID = 'sree-gce'
-DATASET_ID = 'sree_test_dataset_%d' % DATASET_SEQ_NUM
-TABLE_ID = 'sree_test_table_%d' % TABLE_SEQ_NUM
-
-EMP_ROW_IDX = 10
-EMP_NUM_ROWS = 5
-
-bq = create_bq()
-create_ds(bq, PROJECT_ID, DATASET_ID)
-create_emp_table(bq, PROJECT_ID, DATASET_ID, TABLE_ID)
-insert_emp_rows(bq, PROJECT_ID, DATASET_ID, TABLE_ID, EMP_ROW_IDX, EMP_NUM_ROWS)
-query_emp_records(bq, PROJECT_ID, DATASET_ID, TABLE_ID)
diff --git a/tools/gke/create_client.py b/tools/gke/create_client.py
deleted file mode 100755
index bc56ef0ef1..0000000000
--- a/tools/gke/create_client.py
+++ /dev/null
@@ -1,108 +0,0 @@
-#!/usr/bin/env python2.7
-# Copyright 2015, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import argparse
-
-import kubernetes_api
-
-argp = argparse.ArgumentParser(description='Launch Stress tests in GKE')
-
-argp.add_argument('-n',
-                  '--num_instances',
-                  required=True,
-                  type=int,
-                  help='The number of instances to launch in GKE')
-args = argp.parse_args()
-
-kubernetes_api_server="localhost"
-kubernetes_api_port=8001
-
-
-# Docker image
-image_name="gcr.io/sree-gce/grpc_stress_test_2"
-
-server_address = "stress-server.default.svc.cluster.local:8080"
-metrics_server_address = "localhost:8081"
-
-stress_test_arg_list=[
-    "--server_addresses=" + server_address,
-    "--test_cases=empty_unary:20,large_unary:20",
-    "--num_stubs_per_channel=10"
-]
-
-metrics_client_arg_list=[
-    "--metrics_server_address=" + metrics_server_address,
-    "--total_only=true"]
-
-env_dict={
-    "GPRC_ROOT": "/var/local/git/grpc",
-    "STRESS_TEST_IMAGE": "/var/local/git/grpc/bins/opt/stress_test",
-    "STRESS_TEST_ARGS_STR": ' '.join(stress_test_arg_list),
-    "METRICS_CLIENT_IMAGE": "/var/local/git/grpc/bins/opt/metrics_client",
-    "METRICS_CLIENT_ARGS_STR": ' '.join(metrics_client_arg_list)}
-
-cmd_list=["/var/local/git/grpc/bins/opt/stress_test"]
-arg_list=stress_test_arg_list # make this [] in future
-port_list=[8081]
-
-namespace = 'default'
-is_headless_service = False # Client is NOT headless service
-
-print('Creating %d instances of client..' % args.num_instances)
-
-for i in range(1, args.num_instances + 1):
-  service_name = 'stress-client-%d' % i
-  pod_name = service_name  # Use the same name for kubernetes Service and Pod
-  is_success = kubernetes_api.create_pod(
-      kubernetes_api_server,
-      kubernetes_api_port,
-      namespace,
-      pod_name,
-      image_name,
-      port_list,
-      cmd_list,
-      arg_list,
-      env_dict)
-  if not is_success:
-    print("Error in creating pod %s" % pod_name)
-  else:
-    is_success = kubernetes_api.create_service(
-      kubernetes_api_server,
-      kubernetes_api_port,
-      namespace,
-      service_name,
-      pod_name,
-      port_list,  # Service port list
-      port_list,  # Container port list (same as service port list)
-      is_headless_service)
-    if not is_success:
-      print("Error in creating service %s" % service_name)
-    else:
-      print("Created client %s" % pod_name)
diff --git a/tools/gke/create_server.py b/tools/gke/create_server.py
deleted file mode 100755
index 23ab62c205..0000000000
--- a/tools/gke/create_server.py
+++ /dev/null
@@ -1,74 +0,0 @@
-#!/usr/bin/env python2.7
-# Copyright 2015, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import argparse
-
-import kubernetes_api
-
-service_name = 'stress-server'
-pod_name = service_name  # Use the same name for kubernetes Service and Pod
-namespace = 'default'
-is_headless_service = True
-cmd_list=['/var/local/git/grpc/bins/opt/interop_server']
-arg_list=['--port=8080']
-port_list=[8080]
-image_name='gcr.io/sree-gce/grpc_stress_test_2'
-env_dict={}
-
-# Make sure you run kubectl proxy --port=8001
-kubernetes_api_server='localhost'
-kubernetes_api_port=8001
-
-is_success = kubernetes_api.create_pod(
-      kubernetes_api_server,
-      kubernetes_api_port,
-      namespace,
-      pod_name,
-      image_name,
-      port_list,
-      cmd_list,
-      arg_list,
-      env_dict)
-if not is_success:
-  print("Error in creating pod")
-else:
-  is_success = kubernetes_api.create_service(
-      kubernetes_api_server,
-      kubernetes_api_port,
-      namespace,
-      service_name,
-      pod_name,
-      port_list,  # Service port list
-      port_list,  # Container port list (same as service port list)
-      is_headless_service)
-  if not is_success:
-    print("Error in creating service")
-  else:
-    print("Successfully created the Server")
diff --git a/tools/gke/delete_client.py b/tools/gke/delete_client.py
deleted file mode 100755
index aa519f26b8..0000000000
--- a/tools/gke/delete_client.py
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/usr/bin/env python2.7
-# Copyright 2015, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import argparse
-
-import kubernetes_api
-
-argp = argparse.ArgumentParser(description='Delete Stress test clients in GKE')
-argp.add_argument('-n',
-                  '--num_instances',
-                  required=True,
-                  type=int,
-                  help='The number of instances currently running')
-
-args = argp.parse_args()
-for i in range(1, args.num_instances + 1):
-  service_name = 'stress-client-%d' % i
-  pod_name = service_name
-  namespace = 'default'
-  kubernetes_api_server="localhost"
-  kubernetes_api_port=8001
-
-  is_success=kubernetes_api.delete_pod(
-      kubernetes_api_server,
-      kubernetes_api_port,
-      namespace,
-      pod_name)
-  if not is_success:
-    print('Error in deleting Pod %s' % pod_name)
-  else:
-    is_success= kubernetes_api.delete_service(
-      kubernetes_api_server,
-      kubernetes_api_port,
-      namespace,
-      service_name)
-    if not is_success:
-      print('Error in deleting Service %s' % service_name)
-    else:
-      print('Deleted %s' % pod_name)
diff --git a/tools/gke/delete_server.py b/tools/gke/delete_server.py
deleted file mode 100755
index 6e3fdcc33b..0000000000
--- a/tools/gke/delete_server.py
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/usr/bin/env python2.7
-# Copyright 2015, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import argparse
-
-import kubernetes_api
-
-service_name = 'stress-server'
-pod_name = service_name  # Use the same name for kubernetes Service and Pod
-namespace = 'default'
-is_headless_service = True
-kubernetes_api_server="localhost"
-kubernetes_api_port=8001
-
-is_success = kubernetes_api.delete_pod(
-      kubernetes_api_server,
-      kubernetes_api_port,
-      namespace,
-      pod_name)
-if not is_success:
-  print("Error in deleting Pod %s" % pod_name)
-else:
-  is_success = kubernetes_api.delete_service(
-      kubernetes_api_server,
-      kubernetes_api_port,
-      namespace,
-      service_name)
-  if not is_success:
-    print("Error in deleting Service %d" % service_name)
-  else:
-    print("Deleted server %s" % service_name)
diff --git a/tools/gke/kubernetes_api.py b/tools/gke/kubernetes_api.py
index 14d724bd31..d14c26ad6a 100755
--- a/tools/gke/kubernetes_api.py
+++ b/tools/gke/kubernetes_api.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python2.7
-# Copyright 2015, Google Inc.
+# Copyright 2015-2016 Google Inc.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -50,7 +50,8 @@ def _make_pod_config(pod_name, image_name, container_port_list, cmd_list,
                   'name': pod_name,
                   'image': image_name,
                   'ports': [{'containerPort': port,
-                             'protocol': 'TCP'} for port in container_port_list]
+                             'protocol': 'TCP'} for port in container_port_list],
+                  'imagePullPolicy': 'Always'
               }
           ]
       }
diff --git a/tools/gke/run_stress_tests_on_gke.py b/tools/gke/run_stress_tests_on_gke.py
new file mode 100755
index 0000000000..d0c3887a42
--- /dev/null
+++ b/tools/gke/run_stress_tests_on_gke.py
@@ -0,0 +1,389 @@
+#!/usr/bin/env python2.7
+# Copyright 2015-2016, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import datetime
+import os
+import subprocess
+import sys
+import time
+
+import kubernetes_api
+
+GRPC_ROOT = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '../..'))
+os.chdir(GRPC_ROOT)
+
+class BigQuerySettings:
+
+  def __init__(self, run_id, dataset_id, summary_table_id, qps_table_id):
+    self.run_id = run_id
+    self.dataset_id = dataset_id
+    self.summary_table_id = summary_table_id
+    self.qps_table_id = qps_table_id
+
+
+class KubernetesProxy:
+  """ Class to start a proxy on localhost to the Kubernetes API server """
+
+  def __init__(self, api_port):
+    self.port = api_port
+    self.p = None
+    self.started = False
+
+  def start(self):
+    cmd = ['kubectl', 'proxy', '--port=%d' % self.port]
+    self.p = subprocess.Popen(args=cmd)
+    self.started = True
+    time.sleep(2)
+    print '..Started'
+
+  def get_port(self):
+    return self.port
+
+  def is_started(self):
+    return self.started
+
+  def __del__(self):
+    if self.p is not None:
+      self.p.kill()
+
+
+def _build_docker_image(image_name, tag_name):
+  """ Build the docker image and add a tag """
+  os.environ['INTEROP_IMAGE'] = image_name
+  # Note that 'BASE_NAME' HAS to be 'grpc_interop_stress_cxx' since the script
+  # build_interop_stress_image.sh invokes the following script:
+  #   tools/dockerfile/$BASE_NAME/build_interop_stress.sh
+  os.environ['BASE_NAME'] = 'grpc_interop_stress_cxx'
+  cmd = ['tools/jenkins/build_interop_stress_image.sh']
+  p = subprocess.Popen(args=cmd)
+  retcode = p.wait()
+  if retcode != 0:
+    print 'Error in building docker image'
+    return False
+
+  cmd = ['docker', 'tag', '-f', image_name, tag_name]
+  p = subprocess.Popen(args=cmd)
+  retcode = p.wait()
+  if retcode != 0:
+    print 'Error in creating the tag %s for %s' % (tag_name, image_name)
+    return False
+
+  return True
+
+
+def _push_docker_image_to_gke_registry(docker_tag_name):
+  """Executes 'gcloud docker push <docker_tag_name>' to push the image to GKE registry"""
+  cmd = ['gcloud', 'docker', 'push', docker_tag_name]
+  print 'Pushing %s to GKE registry..' % docker_tag_name
+  p = subprocess.Popen(args=cmd)
+  retcode = p.wait()
+  if retcode != 0:
+    print 'Error in pushing docker image %s to the GKE registry' % docker_tag_name
+    return False
+  return True
+
+
+def _launch_image_on_gke(kubernetes_api_server, kubernetes_api_port, namespace,
+                         pod_name, image_name, port_list, cmd_list, arg_list,
+                         env_dict, is_headless_service):
+  """Creates a GKE Pod and a Service object for a given image by calling Kubernetes API"""
+  is_success = kubernetes_api.create_pod(
+      kubernetes_api_server,
+      kubernetes_api_port,
+      namespace,
+      pod_name,
+      image_name,
+      port_list,  # The ports to be exposed on this container/pod
+      cmd_list,  # The command that launches the stress server
+      arg_list,
+      env_dict  # Environment variables to be passed to the pod
+  )
+  if not is_success:
+    print 'Error in creating Pod'
+    return False
+
+  is_success = kubernetes_api.create_service(
+      kubernetes_api_server,
+      kubernetes_api_port,
+      namespace,
+      pod_name,  # Use the pod name for service name as well
+      pod_name,
+      port_list,  # Service port list
+      port_list,  # Container port list (same as service port list)
+      is_headless_service)
+  if not is_success:
+    print 'Error in creating Service'
+    return False
+
+  print 'Successfully created the pod/service %s' % pod_name
+  return True
+
+
+def _delete_image_on_gke(kubernetes_proxy, pod_name_list):
+  """Deletes a GKE Pod and Service object for given list of Pods by calling Kubernetes API"""
+  if not kubernetes_proxy.is_started:
+    print 'Kubernetes proxy must be started before calling this function'
+    return False
+
+  is_success = True
+  for pod_name in pod_name_list:
+    is_success = kubernetes_api.delete_pod(
+        'localhost', kubernetes_proxy.get_port(), 'default', pod_name)
+    if not is_success:
+      print 'Error in deleting pod %s' % pod_name
+      break
+
+    is_success = kubernetes_api.delete_service(
+        'localhost', kubernetes_proxy.get_port(), 'default',
+        pod_name)  # service name same as pod name
+    if not is_success:
+      print 'Error in deleting service %s' % pod_name
+      break
+
+  if is_success:
+    print 'Successfully deleted the Pods/Services: %s' % ','.join(pod_name_list)
+
+  return is_success
+
+
+def _launch_server(gcp_project_id, docker_image_name, bq_settings,
+                   kubernetes_proxy, server_pod_name, server_port):
+  """ Launches a stress test server instance in GKE cluster """
+  if not kubernetes_proxy.is_started:
+    print 'Kubernetes proxy must be started before calling this function'
+    return False
+
+  server_cmd_list = [
+      '/var/local/git/grpc/tools/run_tests/stress_test/run_server.py'
+  ]  # Process that is launched
+  server_arg_list = []  # run_server.py does not take any args (for now)
+
+  # == Parameters to the server process launched in GKE ==
+  server_env = {
+      'STRESS_TEST_IMAGE_TYPE': 'SERVER',
+      'STRESS_TEST_IMAGE': '/var/local/git/grpc/bins/opt/interop_server',
+      'STRESS_TEST_ARGS_STR': '--port=%s' % server_port,
+      'RUN_ID': bq_settings.run_id,
+      'POD_NAME': server_pod_name,
+      'GCP_PROJECT_ID': gcp_project_id,
+      'DATASET_ID': bq_settings.dataset_id,
+      'SUMMARY_TABLE_ID': bq_settings.summary_table_id,
+      'QPS_TABLE_ID': bq_settings.qps_table_id
+  }
+
+  # Launch Server
+  is_success = _launch_image_on_gke(
+      'localhost',
+      kubernetes_proxy.get_port(),
+      'default',
+      server_pod_name,
+      docker_image_name,
+      [server_port],  # Port that should be exposed on the container
+      server_cmd_list,
+      server_arg_list,
+      server_env,
+      True  # Headless = True for server. Since we want DNS records to be greated by GKE
+  )
+
+  return is_success
+
+
+def _launch_client(gcp_project_id, docker_image_name, bq_settings,
+                   kubernetes_proxy, num_instances, client_pod_name_prefix,
+                   server_pod_name, server_port):
+  """ Launches a configurable number of stress test clients on GKE cluster """
+  if not kubernetes_proxy.is_started:
+    print 'Kubernetes proxy must be started before calling this function'
+    return False
+
+  server_address = '%s.default.svc.cluster.local:%d' % (server_pod_name,
+                                                        server_port)
+  #TODO(sree) Make the whole client args configurable
+  test_cases_str = 'empty_unary:1,large_unary:1'
+  stress_client_arg_list = [
+      '--server_addresses=%s' % server_address,
+      '--test_cases=%s' % test_cases_str, '--num_stubs_per_channel=10'
+  ]
+
+  client_cmd_list = [
+      '/var/local/git/grpc/tools/run_tests/stress_test/run_client.py'
+  ]
+  # run_client.py takes no args. All args are passed as env variables
+  client_arg_list = []
+
+  # TODO(sree) Make this configurable (and also less frequent)
+  poll_interval_secs = 5
+
+  metrics_port = 8081
+  metrics_server_address = 'localhost:%d' % metrics_port
+  metrics_client_arg_list = [
+      '--metrics_server_address=%s' % metrics_server_address,
+      '--total_only=true'
+  ]
+
+  client_env = {
+      'STRESS_TEST_IMAGE_TYPE': 'CLIENT',
+      'STRESS_TEST_IMAGE': '/var/local/git/grpc/bins/opt/stress_test',
+      'STRESS_TEST_ARGS_STR': ' '.join(stress_client_arg_list),
+      'METRICS_CLIENT_IMAGE': '/var/local/git/grpc/bins/opt/metrics_client',
+      'METRICS_CLIENT_ARGS_STR': ' '.join(metrics_client_arg_list),
+      'RUN_ID': bq_settings.run_id,
+      'POLL_INTERVAL_SECS': str(poll_interval_secs),
+      'GCP_PROJECT_ID': gcp_project_id,
+      'DATASET_ID': bq_settings.dataset_id,
+      'SUMMARY_TABLE_ID': bq_settings.summary_table_id,
+      'QPS_TABLE_ID': bq_settings.qps_table_id
+  }
+
+  for i in range(1, num_instances + 1):
+    pod_name = '%s-%d' % (client_pod_name_prefix, i)
+    client_env['POD_NAME'] = pod_name
+    is_success = _launch_image_on_gke(
+        'localhost',
+        kubernetes_proxy.get_port(),
+        'default',
+        pod_name,
+        docker_image_name,
+        [metrics_port],  # Client pods expose metrics port
+        client_cmd_list,
+        client_arg_list,
+        client_env,
+        False  # Client is not a headless service.
+    )
+    if not is_success:
+      print 'Error in launching client %s' % pod_name
+      return False
+
+  return True
+
+
+def _launch_server_and_client(gcp_project_id, docker_image_name,
+                              num_client_instances):
+  # == Big Query tables related settings (Common for both server and client) ==
+
+  # Create a unique id for this run (Note: Using timestamp instead of UUID to
+  # make it easier to deduce the date/time of the run just by looking at the run
+  # run id. This is useful in debugging when looking at records in Biq query)
+  run_id = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
+
+  dataset_id = 'stress_test_%s' % run_id
+  summary_table_id = 'summary'
+  qps_table_id = 'qps'
+
+  bq_settings = BigQuerySettings(run_id, dataset_id, summary_table_id,
+                                 qps_table_id)
+
+  # Start kubernetes proxy
+  kubernetes_api_port = 9001
+  kubernetes_proxy = KubernetesProxy(kubernetes_api_port)
+  kubernetes_proxy.start()
+
+  server_pod_name = 'stress-server'
+  server_port = 8080
+  is_success = _launch_server(gcp_project_id, docker_image_name, bq_settings,
+                              kubernetes_proxy, server_pod_name, server_port)
+  if not is_success:
+    print 'Error in launching server'
+    return False
+
+  # Server takes a while to start.
+  # TODO(sree) Use Kubernetes API to query the status of the server instead of
+  # sleeping
+  time.sleep(60)
+
+  # Launch client
+  server_address = '%s.default.svc.cluster.local:%d' % (server_pod_name,
+                                                        server_port)
+  client_pod_name_prefix = 'stress-client'
+  is_success = _launch_client(gcp_project_id, docker_image_name, bq_settings,
+                              kubernetes_proxy, num_client_instances,
+                              client_pod_name_prefix, server_pod_name,
+                              server_port)
+  if not is_success:
+    print 'Error in launching client(s)'
+    return False
+
+  return True
+
+
+def _delete_server_and_client(num_client_instances):
+  kubernetes_api_port = 9001
+  kubernetes_proxy = KubernetesProxy(kubernetes_api_port)
+  kubernetes_proxy.start()
+
+  # Delete clients first
+  client_pod_names = ['stress-client-%d' % i
+                      for i in range(1, num_client_instances + 1)]
+
+  is_success = _delete_image_on_gke(kubernetes_proxy, client_pod_names)
+  if not is_success:
+    return False
+
+  # Delete server
+  server_pod_name = 'stress-server'
+  return _delete_image_on_gke(kubernetes_proxy, [server_pod_name])
+
+
+def _build_and_push_docker_image(gcp_project_id, docker_image_name, tag_name):
+  is_success = _build_docker_image(docker_image_name, tag_name)
+  if not is_success:
+    return False
+  return _push_docker_image_to_gke_registry(tag_name)
+
+
+# TODO(sree): This is just to test the above APIs. Rewrite this to make
+# everything configurable (like image names / number of instances etc)
+def test_run():
+  image_name = 'grpc_stress_test'
+  gcp_project_id = 'sree-gce'
+  tag_name = 'gcr.io/%s/%s' % (gcp_project_id, image_name)
+  num_client_instances = 3
+
+  is_success = _build_docker_image(image_name, tag_name)
+  if not is_success:
+    return
+
+  is_success = _push_docker_image_to_gke_registry(tag_name)
+  if not is_success:
+    return
+
+  is_success = _launch_server_and_client(gcp_project_id, tag_name,
+                                         num_client_instances)
+
+  # Run the test for 2 mins
+  time.sleep(120)
+
+  is_success = _delete_server_and_client(num_client_instances)
+
+  if not is_success:
+    return
+
+
+if __name__ == '__main__':
+  test_run()
diff --git a/tools/run_tests/stress_test/run_client.py b/tools/run_tests/stress_test/run_client.py
new file mode 100755
index 0000000000..33958bce49
--- /dev/null
+++ b/tools/run_tests/stress_test/run_client.py
@@ -0,0 +1,188 @@
+#!/usr/bin/env python2.7
+# Copyright 2015-2016, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import datetime
+import os
+import re
+import select
+import subprocess
+import sys
+import time
+
+from stress_test_utils import EventType
+from stress_test_utils import BigQueryHelper
+
+
+# TODO (sree): Write a python grpc client to directly query the metrics instead
+# of calling metrics_client
+def _get_qps(metrics_cmd):
+  qps = 0
+  try:
+    # Note: gpr_log() writes even non-error messages to stderr stream. So it is 
+    # important that we set stderr=subprocess.STDOUT
+    p = subprocess.Popen(args=metrics_cmd,
+                         stdout=subprocess.PIPE,
+                         stderr=subprocess.STDOUT)
+    retcode = p.wait()
+    (out_str, err_str) = p.communicate()
+    if retcode != 0:
+      print 'Error in reading metrics information'
+      print 'Output: ', out_str
+    else:
+      # The overall qps is printed at the end of the line
+      m = re.search('\d+$', out_str)
+      qps = int(m.group()) if m else 0
+  except Exception as ex:
+    print 'Exception while reading metrics information: ' + str(ex)
+  return qps
+
+
+def run_client():
+  """This is a wrapper around the stress test client and performs the following:
+      1) Create the following two tables in Big Query:
+         (i) Summary table: To record events like the test started, completed
+                            successfully or failed
+        (ii) Qps table: To periodically record the QPS sent by this client
+      2) Start the stress test client and add a row in the Big Query summary
+         table
+      3) Once every few seconds (as specificed by the poll_interval_secs) poll
+         the status of the stress test client process and perform the
+         following:
+          3.1) If the process is still running, get the current qps by invoking
+               the metrics client program and add a row in the Big Query
+               Qps table. Sleep for a duration specified by poll_interval_secs
+          3.2) If the process exited successfully, add a row in the Big Query
+               Summary table and exit
+          3.3) If the process failed, add a row in Big Query summary table and
+               wait forever.
+               NOTE: This script typically runs inside a GKE pod which means
+               that the pod gets destroyed when the script exits. However, in
+               case the stress test client fails, we would not want the pod to
+               be destroyed (since we might want to connect to the pod for
+               examining logs). This is the reason why the script waits forever
+               in case of failures
+  """
+  env = dict(os.environ)
+  image_type = env['STRESS_TEST_IMAGE_TYPE']
+  image_name = env['STRESS_TEST_IMAGE']
+  args_str = env['STRESS_TEST_ARGS_STR']
+  metrics_client_image = env['METRICS_CLIENT_IMAGE']
+  metrics_client_args_str = env['METRICS_CLIENT_ARGS_STR']
+  run_id = env['RUN_ID']
+  pod_name = env['POD_NAME']
+  logfile_name = env.get('LOGFILE_NAME')
+  poll_interval_secs = float(env['POLL_INTERVAL_SECS'])
+  project_id = env['GCP_PROJECT_ID']
+  dataset_id = env['DATASET_ID']
+  summary_table_id = env['SUMMARY_TABLE_ID']
+  qps_table_id = env['QPS_TABLE_ID']
+
+  bq_helper = BigQueryHelper(run_id, image_type, pod_name, project_id,
+                             dataset_id, summary_table_id, qps_table_id)
+  bq_helper.initialize()
+
+  # Create BigQuery Dataset and Tables: Summary Table and Metrics Table
+  if not bq_helper.setup_tables():
+    print 'Error in creating BigQuery tables'
+    return
+
+  start_time = datetime.datetime.now()
+
+  logfile = None
+  details = 'Logging to stdout'
+  if logfile_name is not None:
+    print 'Opening logfile: %s ...' % logfile_name
+    details = 'Logfile: %s' % logfile_name
+    logfile = open(logfile_name, 'w')
+
+  # Update status that the test is starting (in the status table)
+  bq_helper.insert_summary_row(EventType.STARTING, details)
+
+  metrics_cmd = [metrics_client_image
+                ] + [x for x in metrics_client_args_str.split()]
+  stress_cmd = [image_name] + [x for x in args_str.split()]
+
+  print 'Launching process %s ...' % stress_cmd
+  stress_p = subprocess.Popen(args=stress_cmd,
+                              stdout=logfile,
+                              stderr=subprocess.STDOUT)
+
+  qps_history = [1, 1, 1]  # Maintain the last 3 qps readings
+  qps_history_idx = 0  # Index into the qps_history list
+
+  is_error = False
+  while True:
+    # Check if stress_client is still running. If so, collect metrics and upload
+    # to BigQuery status table
+    if stress_p.poll() is not None:
+      # TODO(sree) Upload completion status to BigQuery
+      end_time = datetime.datetime.now().isoformat()
+      event_type = EventType.SUCCESS
+      details = 'End time: %s' % end_time
+      if stress_p.returncode != 0:
+        event_type = EventType.FAILURE
+        details = 'Return code = %d. End time: %s' % (stress_p.returncode,
+                                                      end_time)
+        is_error = True
+      bq_helper.insert_summary_row(event_type, details)
+      print details
+      break
+
+    # Stress client still running. Get metrics
+    qps = _get_qps(metrics_cmd)
+    qps_recorded_at = datetime.datetime.now().isoformat()
+    print 'qps: %d at %s' % (qps, qps_recorded_at)
+
+    # If QPS has been zero for the last 3 iterations, flag it as error and exit
+    qps_history[qps_history_idx] = qps
+    qps_history_idx = (qps_history_idx + 1) % len(qps_history)
+    if sum(qps_history) == 0:
+      details = 'QPS has been zero for the last %d seconds - as of : %s' % (
+          poll_interval_secs * 3, qps_recorded_at)
+      is_error = True
+      bq_helper.insert_summary_row(EventType.FAILURE, details)
+      print details
+      break
+
+    # Upload qps metrics to BiqQuery
+    bq_helper.insert_qps_row(qps, qps_recorded_at)
+
+    time.sleep(poll_interval_secs)
+
+  if is_error:
+    print 'Waiting indefinitely..'
+    select.select([], [], [])
+
+  print 'Completed'
+  return
+
+
+if __name__ == '__main__':
+  run_client()
diff --git a/tools/run_tests/stress_test/run_server.py b/tools/run_tests/stress_test/run_server.py
new file mode 100755
index 0000000000..9ad8d63638
--- /dev/null
+++ b/tools/run_tests/stress_test/run_server.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python2.7
+# Copyright 2015-2016, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import datetime
+import os
+import select
+import subprocess
+import sys
+import time
+
+from stress_test_utils import BigQueryHelper
+from stress_test_utils import EventType
+
+
+def run_server():
+  """This is a wrapper around the interop server and performs the following:
+      1) Create a 'Summary table' in Big Query to record events like the server
+         started, completed successfully or failed. NOTE: This also creates
+         another table called the QPS table which is currently NOT needed on the
+         server (it is needed on the stress test clients)
+      2) Start the server process and add a row in Big Query summary table
+      3) Wait for the server process to terminate. The server process does not
+         terminate unless there is an error.
+         If the server process terminated with a failure, add a row in Big Query
+         and wait forever.
+         NOTE: This script typically runs inside a GKE pod which means that the
+         pod gets destroyed when the script exits. However, in case the server
+         process fails, we would not want the pod to be destroyed (since we
+         might want to connect to the pod for examining logs). This is the
+         reason why the script waits forever in case of failures.
+  """
+
+  # Read the parameters from environment variables
+  env = dict(os.environ)
+
+  run_id = env['RUN_ID']  # The unique run id for this test
+  image_type = env['STRESS_TEST_IMAGE_TYPE']
+  image_name = env['STRESS_TEST_IMAGE']
+  args_str = env['STRESS_TEST_ARGS_STR']
+  pod_name = env['POD_NAME']
+  project_id = env['GCP_PROJECT_ID']
+  dataset_id = env['DATASET_ID']
+  summary_table_id = env['SUMMARY_TABLE_ID']
+  qps_table_id = env['QPS_TABLE_ID']
+
+  logfile_name = env.get('LOGFILE_NAME')
+
+  bq_helper = BigQueryHelper(run_id, image_type, pod_name, project_id,
+                             dataset_id, summary_table_id, qps_table_id)
+  bq_helper.initialize()
+
+  # Create BigQuery Dataset and Tables: Summary Table and Metrics Table
+  if not bq_helper.setup_tables():
+    print 'Error in creating BigQuery tables'
+    return
+
+  start_time = datetime.datetime.now()
+
+  logfile = None
+  details = 'Logging to stdout'
+  if logfile_name is not None:
+    print 'Opening log file: ', logfile_name
+    logfile = open(logfile_name, 'w')
+    details = 'Logfile: %s' % logfile_name
+
+  # Update status that the test is starting (in the status table)
+  bq_helper.insert_summary_row(EventType.STARTING, details)
+
+  stress_cmd = [image_name] + [x for x in args_str.split()]
+
+  print 'Launching process %s ...' % stress_cmd
+  stress_p = subprocess.Popen(args=stress_cmd,
+                              stdout=logfile,
+                              stderr=subprocess.STDOUT)
+
+  returncode = stress_p.wait()
+  if returncode != 0:
+    end_time = datetime.datetime.now().isoformat()
+    event_type = EventType.FAILURE
+    details = 'Returncode: %d; End time: %s' % (returncode, end_time)
+    bq_helper.insert_summary_row(event_type, details)
+    print 'Waiting indefinitely..'
+    select.select([], [], [])
+  return returncode
+
+
+if __name__ == '__main__':
+  run_server()
diff --git a/tools/run_tests/stress_test/stress_test_utils.py b/tools/run_tests/stress_test/stress_test_utils.py
new file mode 100755
index 0000000000..a0626ce3ac
--- /dev/null
+++ b/tools/run_tests/stress_test/stress_test_utils.py
@@ -0,0 +1,192 @@
+#!/usr/bin/env python2.7
+# Copyright 2015-2016, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import datetime
+import json
+import os
+import re
+import select
+import subprocess
+import sys
+import time
+
+# Import big_query_utils module
+bq_utils_dir = os.path.abspath(os.path.join(
+    os.path.dirname(__file__), '../../big_query'))
+sys.path.append(bq_utils_dir)
+import big_query_utils as bq_utils
+
+class EventType:
+  STARTING = 'STARTING'
+  SUCCESS = 'SUCCESS'
+  FAILURE = 'FAILURE'
+
+class BigQueryHelper:
+  """Helper class for the stress test wrappers to interact with BigQuery.
+  """
+
+  def __init__(self, run_id, image_type, pod_name, project_id, dataset_id,
+               summary_table_id, qps_table_id):
+    self.run_id = run_id
+    self.image_type = image_type
+    self.pod_name = pod_name
+    self.project_id = project_id
+    self.dataset_id = dataset_id
+    self.summary_table_id = summary_table_id
+    self.qps_table_id = qps_table_id
+
+  def initialize(self):
+    self.bq = bq_utils.create_big_query()
+
+  def setup_tables(self):
+    return bq_utils.create_dataset(self.bq, self.project_id, self.dataset_id) \
+        and self.__create_summary_table() \
+        and self.__create_qps_table()
+
+  def insert_summary_row(self, event_type, details):
+    row_values_dict = {
+        'run_id': self.run_id,
+        'image_type': self.image_type,
+        'pod_name': self.pod_name,
+        'event_date': datetime.datetime.now().isoformat(),
+        'event_type': event_type,
+        'details': details
+    }
+    # Something that uniquely identifies the row (Biquery needs it for duplicate
+    # detection).
+    row_unique_id = '%s_%s_%s' % (self.run_id, self.pod_name, event_type)
+
+    row = bq_utils.make_row(row_unique_id, row_values_dict)
+    return bq_utils.insert_rows(self.bq, self.project_id, self.dataset_id,
+                                self.summary_table_id, [row])
+
+  def insert_qps_row(self, qps, recorded_at):
+    row_values_dict = {
+        'run_id': self.run_id,
+        'pod_name': self.pod_name,
+        'recorded_at': recorded_at,
+        'qps': qps
+    }
+
+    row_unique_id = '%s_%s_%s' % (self.run_id, self.pod_name, recorded_at)
+    row = bq_utils.make_row(row_unique_id, row_values_dict)
+    return bq_utils.insert_rows(self.bq, self.project_id, self.dataset_id,
+                                self.qps_table_id, [row])
+
+  def check_if_any_tests_failed(self, num_query_retries=3):
+    query = ('SELECT event_type FROM %s.%s WHERE run_id = %s AND '
+             'event_type="%s"') % (self.dataset_id, self.summary_table_id,
+                                         self.run_id, EventType.FAILURE)
+    query_job = bq_utils.sync_query_job(self.bq, self.project_id, query)
+    page = self.bq.jobs().getQueryResults(**query_job['jobReference']).execute(
+        num_retries=num_query_retries)
+    print page
+    num_failures = int(page['totalRows'])
+    print 'num rows: ', num_failures
+    return num_failures > 0
+
+  def print_summary_records(self, num_query_retries=3):
+    line = '-' * 120
+    print line
+    print 'Summary records'
+    print 'Run Id', self.run_id
+    print line
+    query = ('SELECT pod_name, image_type, event_type, event_date, details'
+             ' FROM %s.%s WHERE run_id = %s ORDER by event_date;') % (
+                 self.dataset_id, self.summary_table_id, self.run_id)
+    query_job = bq_utils.sync_query_job(self.bq, self.project_id, query)
+
+    print '{:<25} {:<12} {:<12} {:<30} {}'.format(
+        'Pod name', 'Image type', 'Event type', 'Date', 'Details')
+    print line
+    page_token = None
+    while True:
+      page = self.bq.jobs().getQueryResults(
+          pageToken=page_token,
+          **query_job['jobReference']).execute(num_retries=num_query_retries)
+      rows = page.get('rows', [])
+      for row in rows:
+        print '{:<25} {:<12} {:<12} {:<30} {}'.format(
+            row['f'][0]['v'], row['f'][1]['v'], row['f'][2]['v'],
+            row['f'][3]['v'], row['f'][4]['v'])
+      page_token = page.get('pageToken')
+      if not page_token:
+        break
+
+  def print_qps_records(self, num_query_retries=3):
+    line = '-' * 80
+    print line
+    print 'QPS Summary'
+    print 'Run Id: ', self.run_id
+    print line
+    query = (
+        'SELECT pod_name, recorded_at, qps FROM %s.%s WHERE run_id = %s ORDER '
+        'by recorded_at;') % (self.dataset_id, self.qps_table_id, self.run_id)
+    query_job = bq_utils.sync_query_job(self.bq, self.project_id, query)
+    print '{:<25} {:30} {}'.format('Pod name', 'Recorded at', 'Qps')
+    print line
+    page_token = None
+    while True:
+      page = self.bq.jobs().getQueryResults(
+          pageToken=page_token,
+          **query_job['jobReference']).execute(num_retries=num_query_retries)
+      rows = page.get('rows', [])
+      for row in rows:
+        print '{:<25} {:30} {}'.format(row['f'][0]['v'], row['f'][1]['v'],
+                                       row['f'][2]['v'])
+      page_token = page.get('pageToken')
+      if not page_token:
+        break
+
+  def __create_summary_table(self):
+    summary_table_schema = [
+        ('run_id', 'INTEGER', 'Test run id'),
+        ('image_type', 'STRING', 'Client or Server?'),
+        ('pod_name', 'STRING', 'GKE pod hosting this image'),
+        ('event_date', 'STRING', 'The date of this event'),
+        ('event_type', 'STRING', 'STARTED/SUCCESS/FAILURE'),
+        ('details', 'STRING', 'Any other relevant details')
+    ]
+    desc = ('The table that contains START/SUCCESS/FAILURE events for '
+            ' the stress test clients and servers')
+    return bq_utils.create_table(self.bq, self.project_id, self.dataset_id,
+                                 self.summary_table_id, summary_table_schema,
+                                 desc)
+
+  def __create_qps_table(self):
+    qps_table_schema = [
+        ('run_id', 'INTEGER', 'Test run id'),
+        ('pod_name', 'STRING', 'GKE pod hosting this image'),
+        ('recorded_at', 'STRING', 'Metrics recorded at time'),
+        ('qps', 'INTEGER', 'Queries per second')
+    ]
+    desc = 'The table that cointains the qps recorded at various intervals'
+    return bq_utils.create_table(self.bq, self.project_id, self.dataset_id,
+                                 self.qps_table_id, qps_table_schema, desc)
diff --git a/tools/run_tests/stress_test_wrapper.py b/tools/run_tests/stress_test_wrapper.py
deleted file mode 100755
index 8f1bd2024e..0000000000
--- a/tools/run_tests/stress_test_wrapper.py
+++ /dev/null
@@ -1,96 +0,0 @@
-#!/usr/bin/env python2.7
-import os
-import re
-import select
-import subprocess
-import sys
-import time
-
-GRPC_ROOT = '/usr/local/google/home/sreek/workspace/grpc/'
-STRESS_TEST_IMAGE = GRPC_ROOT + 'bins/opt/stress_test'
-STRESS_TEST_ARGS_STR = ' '.join([
-    '--server_addresses=localhost:8000',
-    '--test_cases=empty_unary:1,large_unary:1', '--num_stubs_per_channel=10',
-    '--test_duration_secs=10'])
-METRICS_CLIENT_IMAGE = GRPC_ROOT + 'bins/opt/metrics_client'
-METRICS_CLIENT_ARGS_STR = ' '.join([
-    '--metrics_server_address=localhost:8081', '--total_only=true'])
-LOGFILE_NAME = 'stress_test.log'
-
-
-# TODO (sree): Write a python grpc client to directly query the metrics instead
-# of calling metrics_client
-def get_qps(metrics_cmd):
-  qps = 0
-  try:
-    # Note: gpr_log() writes even non-error messages to stderr stream. So it is 
-    # important that we set stderr=subprocess.STDOUT
-    p = subprocess.Popen(args=metrics_cmd,
-                         stdout=subprocess.PIPE,
-                         stderr=subprocess.STDOUT)
-    retcode = p.wait()
-    (out_str, err_str) = p.communicate()
-    if retcode != 0:
-      print 'Error in reading metrics information'
-      print 'Output: ', out_str
-    else:
-      # The overall qps is printed at the end of the line
-      m = re.search('\d+$', out_str)
-      qps = int(m.group()) if m else 0
-  except Exception as ex:
-    print 'Exception while reading metrics information: ' + str(ex)
-  return qps
-
-def main(argv):
-  # TODO(sree) Create BigQuery Tables
-  # (Summary table), (Metrics table)
-
-  # TODO(sree) Update status that the test is starting (in the status table)
-  #
-
-  metrics_cmd = [METRICS_CLIENT_IMAGE
-                ] + [x for x in METRICS_CLIENT_ARGS_STR.split()]
-
-  stress_cmd = [STRESS_TEST_IMAGE] + [x for x in STRESS_TEST_ARGS_STR.split()]
-  # TODO(sree): Add an option to print to stdout if logfilename is absent
-  logfile = open(LOGFILE_NAME, 'w')
-  stress_p = subprocess.Popen(args=arg_list,
-                              stdout=logfile,
-                              stderr=subprocess.STDOUT)
-
-  qps_history = [1, 1, 1]  # Maintain the last 3 qps
-  qps_history_idx = 0  # Index into the qps_history list
-
-  is_error = False
-  while True:
-    # Check if stress_client is still running. If so, collect metrics and upload
-    # to BigQuery status table
-    #
-    if stress_p is not None:
-      # TODO(sree) Upload completion status to BiqQuery
-      is_error = (stress_p.returncode != 0)
-      break
-
-    # Stress client still running. Get metrics
-    qps = get_qps(metrics_cmd)
-
-    # If QPS has been zero for the last 3 iterations, flag it as error and exit
-    qps_history[qps_history_idx] = qps
-    qps_history_idx = (qps_histor_idx + 1) % len(qps_history)
-    if sum(a) == 0:
-      print ('QPS has been zero for the last 3 iterations. Not monitoring '
-             'anymore. The stress test client may be stalled.')
-      is_error = True
-      break
-
-    #TODO(sree) Upload qps metrics to BiqQuery
-
-  if is_error:
-    print 'Waiting indefinitely..'
-    select.select([],[],[])
-
-  return 1
-
-
-if __name__ == '__main__':
-  main(sys.argv[1:])
-- 
cgit v1.2.3


From 2715a39a2e7b5a47ff6726daadeac63c4664550e Mon Sep 17 00:00:00 2001
From: Sree Kuchibhotla <sreek@google.com>
Date: Wed, 24 Feb 2016 12:01:52 -0800
Subject: Change RUN_ID type to string to allow for a non-numeric run_id

---
 tools/big_query/big_query_utils.py               |   2 +-
 tools/gke/run_stress_tests_on_gke.py             | 103 +++++++++++++++--------
 tools/run_tests/stress_test/run_server.py        |   5 ++
 tools/run_tests/stress_test/stress_test_utils.py |  17 ++--
 4 files changed, 85 insertions(+), 42 deletions(-)

(limited to 'tools')

diff --git a/tools/big_query/big_query_utils.py b/tools/big_query/big_query_utils.py
index 267d019850..e2379fd1aa 100755
--- a/tools/big_query/big_query_utils.py
+++ b/tools/big_query/big_query_utils.py
@@ -135,6 +135,6 @@ def sync_query_job(big_query, project_id, query, timeout=5000):
 
   # List of (column name, column type, description) tuples
 def make_row(unique_row_id, row_values_dict):
-  """row_values_dict is a dictionar of column name and column value.
+  """row_values_dict is a dictionary of column name and column value.
   """
   return {'insertId': unique_row_id, 'json': row_values_dict}
diff --git a/tools/gke/run_stress_tests_on_gke.py b/tools/gke/run_stress_tests_on_gke.py
index d0c3887a42..0ea7b7fcc1 100755
--- a/tools/gke/run_stress_tests_on_gke.py
+++ b/tools/gke/run_stress_tests_on_gke.py
@@ -33,11 +33,17 @@ import subprocess
 import sys
 import time
 
+stress_test_utils_dir = os.path.abspath(os.path.join(
+    os.path.dirname(__file__), '../run_tests/stress_test'))
+sys.path.append(stress_test_utils_dir)
+from stress_test_utils import BigQueryHelper
+
 import kubernetes_api
 
 GRPC_ROOT = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '../..'))
 os.chdir(GRPC_ROOT)
 
+
 class BigQuerySettings:
 
   def __init__(self, run_id, dataset_id, summary_table_id, qps_table_id):
@@ -283,27 +289,16 @@ def _launch_client(gcp_project_id, docker_image_name, bq_settings,
   return True
 
 
-def _launch_server_and_client(gcp_project_id, docker_image_name,
+def _launch_server_and_client(bq_settings, gcp_project_id, docker_image_name,
                               num_client_instances):
-  # == Big Query tables related settings (Common for both server and client) ==
-
-  # Create a unique id for this run (Note: Using timestamp instead of UUID to
-  # make it easier to deduce the date/time of the run just by looking at the run
-  # run id. This is useful in debugging when looking at records in Biq query)
-  run_id = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
-
-  dataset_id = 'stress_test_%s' % run_id
-  summary_table_id = 'summary'
-  qps_table_id = 'qps'
-
-  bq_settings = BigQuerySettings(run_id, dataset_id, summary_table_id,
-                                 qps_table_id)
-
   # Start kubernetes proxy
   kubernetes_api_port = 9001
   kubernetes_proxy = KubernetesProxy(kubernetes_api_port)
   kubernetes_proxy.start()
 
+  # num of seconds to wait for the GKE image to start and warmup
+  image_warmp_secs = 60
+
   server_pod_name = 'stress-server'
   server_port = 8080
   is_success = _launch_server(gcp_project_id, docker_image_name, bq_settings,
@@ -315,7 +310,8 @@ def _launch_server_and_client(gcp_project_id, docker_image_name,
   # Server takes a while to start.
   # TODO(sree) Use Kubernetes API to query the status of the server instead of
   # sleeping
-  time.sleep(60)
+  print 'Waiting for %s seconds for the server to start...' % image_warmp_secs
+  time.sleep(image_warmp_secs)
 
   # Launch client
   server_address = '%s.default.svc.cluster.local:%d' % (server_pod_name,
@@ -329,6 +325,8 @@ def _launch_server_and_client(gcp_project_id, docker_image_name,
     print 'Error in launching client(s)'
     return False
 
+  print 'Waiting for %s seconds for the client images to start...' % image_warmp_secs
+  time.sleep(image_warmp_secs)
   return True
 
 
@@ -359,31 +357,68 @@ def _build_and_push_docker_image(gcp_project_id, docker_image_name, tag_name):
 
 # TODO(sree): This is just to test the above APIs. Rewrite this to make
 # everything configurable (like image names / number of instances etc)
-def test_run():
-  image_name = 'grpc_stress_test'
-  gcp_project_id = 'sree-gce'
-  tag_name = 'gcr.io/%s/%s' % (gcp_project_id, image_name)
-  num_client_instances = 3
+def run_test(skip_building_image, gcp_project_id, image_name, tag_name,
+             num_client_instances, poll_interval_secs, total_duration_secs):
+  if not skip_building_image:
+    is_success = _build_docker_image(image_name, tag_name)
+    if not is_success:
+      return False
 
-  is_success = _build_docker_image(image_name, tag_name)
-  if not is_success:
-    return
+    is_success = _push_docker_image_to_gke_registry(tag_name)
+    if not is_success:
+      return False
 
-  is_success = _push_docker_image_to_gke_registry(tag_name)
-  if not is_success:
-    return
+  # == Big Query tables related settings (Common for both server and client) ==
+
+  # Create a unique id for this run (Note: Using timestamp instead of UUID to
+  # make it easier to deduce the date/time of the run just by looking at the run
+  # run id. This is useful in debugging when looking at records in Biq query)
+  run_id = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
+  dataset_id = 'stress_test_%s' % run_id
+  summary_table_id = 'summary'
+  qps_table_id = 'qps'
+  bq_settings = BigQuerySettings(run_id, dataset_id, summary_table_id,
+                                 qps_table_id)
 
-  is_success = _launch_server_and_client(gcp_project_id, tag_name,
+  bq_helper = BigQueryHelper(run_id, '', '', gcp_project_id, dataset_id,
+                             summary_table_id, qps_table_id)
+  bq_helper.initialize()
+  is_success = _launch_server_and_client(bq_settings, gcp_project_id, tag_name,
                                          num_client_instances)
+  if not is_success:
+    return False
+
+  start_time = datetime.datetime.now()
+  end_time = start_time + datetime.timedelta(seconds=total_duration_secs)
 
-  # Run the test for 2 mins
-  time.sleep(120)
+  while True:
+    if datetime.datetime.now() > end_time:
+      print 'Test was run for %d seconds' % total_duration_secs
+      break
 
-  is_success = _delete_server_and_client(num_client_instances)
+    # Check if either stress server or clients have failed
+    if not bq_helper.check_if_any_tests_failed():
+      is_success = False
+      print 'Some tests failed.'
+      break
+    # Things seem to be running fine. Wait until next poll time to check the
+    # status
+    time.sleep(poll_interval_secs)
 
-  if not is_success:
-    return
+  # Print BiqQuery tables
+  bq_helper.print_summary_records()
+  bq_helper.print_qps_records()
+
+  _delete_server_and_client(num_client_instances)
+  return is_success
 
 
 if __name__ == '__main__':
-  test_run()
+  image_name = 'grpc_stress_test'
+  gcp_project_id = 'sree-gce'
+  tag_name = 'gcr.io/%s/%s' % (gcp_project_id, image_name)
+  num_client_instances = 3
+  poll_interval_secs = 5,
+  test_duration_secs = 150
+  run_test(True, gcp_project_id, image_name, tag_name, num_client_instances,
+           poll_interval_secs, test_duration_secs)
diff --git a/tools/run_tests/stress_test/run_server.py b/tools/run_tests/stress_test/run_server.py
index 9ad8d63638..64322f6100 100755
--- a/tools/run_tests/stress_test/run_server.py
+++ b/tools/run_tests/stress_test/run_server.py
@@ -72,6 +72,11 @@ def run_server():
 
   logfile_name = env.get('LOGFILE_NAME')
 
+  print('pod_name: %s, project_id: %s, run_id: %s, dataset_id: %s, '
+        'summary_table_id: %s, qps_table_id: %s') % (
+            pod_name, project_id, run_id, dataset_id, summary_table_id,
+            qps_table_id)
+
   bq_helper = BigQueryHelper(run_id, image_type, pod_name, project_id,
                              dataset_id, summary_table_id, qps_table_id)
   bq_helper.initialize()
diff --git a/tools/run_tests/stress_test/stress_test_utils.py b/tools/run_tests/stress_test/stress_test_utils.py
index a0626ce3ac..71f0dcd921 100755
--- a/tools/run_tests/stress_test/stress_test_utils.py
+++ b/tools/run_tests/stress_test/stress_test_utils.py
@@ -43,11 +43,13 @@ bq_utils_dir = os.path.abspath(os.path.join(
 sys.path.append(bq_utils_dir)
 import big_query_utils as bq_utils
 
+
 class EventType:
   STARTING = 'STARTING'
   SUCCESS = 'SUCCESS'
   FAILURE = 'FAILURE'
 
+
 class BigQueryHelper:
   """Helper class for the stress test wrappers to interact with BigQuery.
   """
@@ -101,9 +103,9 @@ class BigQueryHelper:
                                 self.qps_table_id, [row])
 
   def check_if_any_tests_failed(self, num_query_retries=3):
-    query = ('SELECT event_type FROM %s.%s WHERE run_id = %s AND '
+    query = ('SELECT event_type FROM %s.%s WHERE run_id = \'%s\' AND '
              'event_type="%s"') % (self.dataset_id, self.summary_table_id,
-                                         self.run_id, EventType.FAILURE)
+                                   self.run_id, EventType.FAILURE)
     query_job = bq_utils.sync_query_job(self.bq, self.project_id, query)
     page = self.bq.jobs().getQueryResults(**query_job['jobReference']).execute(
         num_retries=num_query_retries)
@@ -119,7 +121,7 @@ class BigQueryHelper:
     print 'Run Id', self.run_id
     print line
     query = ('SELECT pod_name, image_type, event_type, event_date, details'
-             ' FROM %s.%s WHERE run_id = %s ORDER by event_date;') % (
+             ' FROM %s.%s WHERE run_id = \'%s\' ORDER by event_date;') % (
                  self.dataset_id, self.summary_table_id, self.run_id)
     query_job = bq_utils.sync_query_job(self.bq, self.project_id, query)
 
@@ -147,8 +149,9 @@ class BigQueryHelper:
     print 'Run Id: ', self.run_id
     print line
     query = (
-        'SELECT pod_name, recorded_at, qps FROM %s.%s WHERE run_id = %s ORDER '
-        'by recorded_at;') % (self.dataset_id, self.qps_table_id, self.run_id)
+        'SELECT pod_name, recorded_at, qps FROM %s.%s WHERE run_id = \'%s\' '
+        'ORDER by recorded_at;') % (self.dataset_id, self.qps_table_id,
+                                    self.run_id)
     query_job = bq_utils.sync_query_job(self.bq, self.project_id, query)
     print '{:<25} {:30} {}'.format('Pod name', 'Recorded at', 'Qps')
     print line
@@ -167,7 +170,7 @@ class BigQueryHelper:
 
   def __create_summary_table(self):
     summary_table_schema = [
-        ('run_id', 'INTEGER', 'Test run id'),
+        ('run_id', 'STRING', 'Test run id'),
         ('image_type', 'STRING', 'Client or Server?'),
         ('pod_name', 'STRING', 'GKE pod hosting this image'),
         ('event_date', 'STRING', 'The date of this event'),
@@ -182,7 +185,7 @@ class BigQueryHelper:
 
   def __create_qps_table(self):
     qps_table_schema = [
-        ('run_id', 'INTEGER', 'Test run id'),
+        ('run_id', 'STRING', 'Test run id'),
         ('pod_name', 'STRING', 'GKE pod hosting this image'),
         ('recorded_at', 'STRING', 'Metrics recorded at time'),
         ('qps', 'INTEGER', 'Queries per second')
-- 
cgit v1.2.3


From 8bcbee815c5d7aa6cda13371241536db6dc03fbb Mon Sep 17 00:00:00 2001
From: Sree Kuchibhotla <sreek@google.com>
Date: Wed, 24 Feb 2016 15:52:54 -0800
Subject: Fix a bug in failure check

---
 tools/gke/run_stress_tests_on_gke.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/gke/run_stress_tests_on_gke.py b/tools/gke/run_stress_tests_on_gke.py
index 0ea7b7fcc1..9a8a33cac5 100755
--- a/tools/gke/run_stress_tests_on_gke.py
+++ b/tools/gke/run_stress_tests_on_gke.py
@@ -244,7 +244,7 @@ def _launch_client(gcp_project_id, docker_image_name, bq_settings,
   client_arg_list = []
 
   # TODO(sree) Make this configurable (and also less frequent)
-  poll_interval_secs = 5
+  poll_interval_secs = 30
 
   metrics_port = 8081
   metrics_server_address = 'localhost:%d' % metrics_port
@@ -397,7 +397,7 @@ def run_test(skip_building_image, gcp_project_id, image_name, tag_name,
       break
 
     # Check if either stress server or clients have failed
-    if not bq_helper.check_if_any_tests_failed():
+    if bq_helper.check_if_any_tests_failed():
       is_success = False
       print 'Some tests failed.'
       break
-- 
cgit v1.2.3


From f63c49238e1696d2f74c68d6323e959ceb8b8791 Mon Sep 17 00:00:00 2001
From: Sree Kuchibhotla <sreek@google.com>
Date: Wed, 24 Feb 2016 17:31:52 -0800
Subject: Some more cleanup

---
 tools/gke/run_stress_tests_on_gke.py             |  3 ++-
 tools/run_tests/stress_test/stress_test_utils.py | 12 +++++++-----
 2 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/gke/run_stress_tests_on_gke.py b/tools/gke/run_stress_tests_on_gke.py
index 9a8a33cac5..065b11e91c 100755
--- a/tools/gke/run_stress_tests_on_gke.py
+++ b/tools/gke/run_stress_tests_on_gke.py
@@ -403,6 +403,7 @@ def run_test(skip_building_image, gcp_project_id, image_name, tag_name,
       break
     # Things seem to be running fine. Wait until next poll time to check the
     # status
+    print 'Sleeping for %d seconds..' % poll_interval_secs
     time.sleep(poll_interval_secs)
 
   # Print BiqQuery tables
@@ -418,7 +419,7 @@ if __name__ == '__main__':
   gcp_project_id = 'sree-gce'
   tag_name = 'gcr.io/%s/%s' % (gcp_project_id, image_name)
   num_client_instances = 3
-  poll_interval_secs = 5,
+  poll_interval_secs = 10
   test_duration_secs = 150
   run_test(True, gcp_project_id, image_name, tag_name, num_client_instances,
            poll_interval_secs, test_duration_secs)
diff --git a/tools/run_tests/stress_test/stress_test_utils.py b/tools/run_tests/stress_test/stress_test_utils.py
index 71f0dcd921..7adc0068f9 100755
--- a/tools/run_tests/stress_test/stress_test_utils.py
+++ b/tools/run_tests/stress_test/stress_test_utils.py
@@ -81,10 +81,9 @@ class BigQueryHelper:
         'event_type': event_type,
         'details': details
     }
-    # Something that uniquely identifies the row (Biquery needs it for duplicate
-    # detection).
+    # row_unique_id is something that uniquely identifies the row (BigQuery uses
+    # it for duplicate detection).
     row_unique_id = '%s_%s_%s' % (self.run_id, self.pod_name, event_type)
-
     row = bq_utils.make_row(row_unique_id, row_values_dict)
     return bq_utils.insert_rows(self.bq, self.project_id, self.dataset_id,
                                 self.summary_table_id, [row])
@@ -97,6 +96,8 @@ class BigQueryHelper:
         'qps': qps
     }
 
+    # row_unique_id is something that uniquely identifies the row (BigQuery uses
+    # it for duplicate detection).
     row_unique_id = '%s_%s_%s' % (self.run_id, self.pod_name, recorded_at)
     row = bq_utils.make_row(row_unique_id, row_values_dict)
     return bq_utils.insert_rows(self.bq, self.project_id, self.dataset_id,
@@ -109,7 +110,6 @@ class BigQueryHelper:
     query_job = bq_utils.sync_query_job(self.bq, self.project_id, query)
     page = self.bq.jobs().getQueryResults(**query_job['jobReference']).execute(
         num_retries=num_query_retries)
-    print page
     num_failures = int(page['totalRows'])
     print 'num rows: ', num_failures
     return num_failures > 0
@@ -118,7 +118,8 @@ class BigQueryHelper:
     line = '-' * 120
     print line
     print 'Summary records'
-    print 'Run Id', self.run_id
+    print 'Run Id: ', self.run_id
+    print 'Dataset Id: ', self.dataset_id
     print line
     query = ('SELECT pod_name, image_type, event_type, event_date, details'
              ' FROM %s.%s WHERE run_id = \'%s\' ORDER by event_date;') % (
@@ -147,6 +148,7 @@ class BigQueryHelper:
     print line
     print 'QPS Summary'
     print 'Run Id: ', self.run_id
+    print 'Dataset Id: ', self.dataset_id
     print line
     query = (
         'SELECT pod_name, recorded_at, qps FROM %s.%s WHERE run_id = \'%s\' '
-- 
cgit v1.2.3


From 61c134f5f83a1897264016ef765e82b61ebd3992 Mon Sep 17 00:00:00 2001
From: Sree Kuchibhotla <sreek@google.com>
Date: Fri, 26 Feb 2016 11:03:29 -0800
Subject: Significantly rewrite tools/gke/run_stress_tests_on_gke.py and make
 everything configurable

---
 tools/gke/kubernetes_api.py          |  47 ++-
 tools/gke/run_stress_tests_on_gke.py | 541 ++++++++++++++++++++++-------------
 2 files changed, 387 insertions(+), 201 deletions(-)

(limited to 'tools')

diff --git a/tools/gke/kubernetes_api.py b/tools/gke/kubernetes_api.py
index d14c26ad6a..e1017e9da6 100755
--- a/tools/gke/kubernetes_api.py
+++ b/tools/gke/kubernetes_api.py
@@ -50,7 +50,8 @@ def _make_pod_config(pod_name, image_name, container_port_list, cmd_list,
                   'name': pod_name,
                   'image': image_name,
                   'ports': [{'containerPort': port,
-                             'protocol': 'TCP'} for port in container_port_list],
+                             'protocol': 'TCP'}
+                            for port in container_port_list],
                   'imagePullPolicy': 'Always'
               }
           ]
@@ -222,3 +223,47 @@ def delete_pod(kube_host, kube_port, namespace, pod_name):
   del_url = 'http://%s:%d/api/v1/namespaces/%s/pods/%s' % (kube_host, kube_port,
                                                            namespace, pod_name)
   return _do_delete(del_url, 'Delete Pod')
+
+
+def create_pod_and_service(kube_host, kube_port, namespace, pod_name,
+                           image_name, container_port_list, cmd_list, arg_list,
+                           env_dict, is_headless_service):
+  """A simple helper function that creates a pod and a service (if pod creation was successful)."""
+  is_success = create_pod(kube_host, kube_port, namespace, pod_name, image_name,
+                          container_port_list, cmd_list, arg_list, env_dict)
+  if not is_success:
+    print 'Error in creating Pod'
+    return False
+
+  is_success = create_service(
+      kube_host,
+      kube_port,
+      namespace,
+      pod_name,  # Use pod_name for service
+      pod_name,
+      container_port_list,  # Service port list same as container port list
+      container_port_list,
+      is_headless_service)
+  if not is_success:
+    print 'Error in creating Service'
+    return False
+
+  print 'Successfully created the pod/service %s' % pod_name
+  return True
+
+
+def delete_pod_and_service(kube_host, kube_port, namespace, pod_name):
+  """ A simple helper function that calls delete_pod and delete_service """
+  is_success = delete_pod(kube_host, kube_port, namespace, pod_name)
+  if not is_success:
+    print 'Error in deleting pod %s' % pod_name
+    return False
+
+  # Note: service name assumed to the the same as pod name
+  is_success = delete_service(kube_host, kube_port, namespace, pod_name)
+  if not is_success:
+    print 'Error in deleting service %s' % pod_name
+    return False
+
+  print 'Successfully deleted the Pod/Service: %s' % pod_name
+  return True
diff --git a/tools/gke/run_stress_tests_on_gke.py b/tools/gke/run_stress_tests_on_gke.py
index 065b11e91c..cf0ef595a6 100755
--- a/tools/gke/run_stress_tests_on_gke.py
+++ b/tools/gke/run_stress_tests_on_gke.py
@@ -27,6 +27,7 @@
 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import argparse
 import datetime
 import os
 import subprocess
@@ -40,17 +41,52 @@ from stress_test_utils import BigQueryHelper
 
 import kubernetes_api
 
-GRPC_ROOT = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '../..'))
-os.chdir(GRPC_ROOT)
+_GRPC_ROOT = os.path.abspath(os.path.join(
+    os.path.dirname(sys.argv[0]), '../..'))
+os.chdir(_GRPC_ROOT)
 
+# num of seconds to wait for the GKE image to start and warmup
+_GKE_IMAGE_WARMUP_WAIT_SECS = 60
 
-class BigQuerySettings:
+_SERVER_POD_NAME = 'stress-server'
+_CLIENT_POD_NAME_PREFIX = 'stress-client'
+_DATASET_ID_PREFIX = 'stress_test'
+_SUMMARY_TABLE_ID = 'summary'
+_QPS_TABLE_ID = 'qps'
 
-  def __init__(self, run_id, dataset_id, summary_table_id, qps_table_id):
-    self.run_id = run_id
-    self.dataset_id = dataset_id
-    self.summary_table_id = summary_table_id
-    self.qps_table_id = qps_table_id
+_DEFAULT_DOCKER_IMAGE_NAME = 'grpc_stress_test'
+
+# The default port on which the kubernetes proxy server is started on localhost
+# (i.e kubectl proxy --port=<port>)
+_DEFAULT_KUBERNETES_PROXY_PORT = 8001
+
+# How frequently should the stress client wrapper script (running inside a GKE
+# container) poll the health of the stress client (also running inside the GKE
+# container) and upload metrics to BigQuery
+_DEFAULT_STRESS_CLIENT_POLL_INTERVAL_SECS = 60
+
+# The default setting for stress test server and client
+_DEFAULT_STRESS_SERVER_PORT = 8080
+_DEFAULT_METRICS_PORT = 8081
+_DEFAULT_TEST_CASES_STR = 'empty_unary:1,large_unary:1,client_streaming:1,server_streaming:1,empty_stream:1'
+_DEFAULT_NUM_CHANNELS_PER_SERVER = 5
+_DEFAULT_NUM_STUBS_PER_CHANNEL = 10
+_DEFAULT_METRICS_COLLECTION_INTERVAL_SECS = 30
+
+# Number of stress client instances to launch
+_DEFAULT_NUM_CLIENTS = 3
+
+# How frequently should this test monitor the health of Stress clients and
+# Servers running in GKE
+_DEFAULT_TEST_POLL_INTERVAL_SECS = 60
+
+# Default run time for this test (2 hour)
+_DEFAULT_TEST_DURATION_SECS = 7200
+
+# The number of seconds it would take a GKE pod to warm up (i.e get to 'Running'
+# state from the time of creation). Ideally this is something the test should
+# automatically determine by using Kubernetes API to poll the pods status.
+_DEFAULT_GKE_WARMUP_SECS = 60
 
 
 class KubernetesProxy:
@@ -76,11 +112,74 @@ class KubernetesProxy:
 
   def __del__(self):
     if self.p is not None:
+      print 'Shutting down Kubernetes proxy..'
       self.p.kill()
 
 
+class TestSettings:
+
+  def __init__(self, build_docker_image, test_poll_interval_secs,
+               test_duration_secs, kubernetes_proxy_port):
+    self.build_docker_image = build_docker_image
+    self.test_poll_interval_secs = test_poll_interval_secs
+    self.test_duration_secs = test_duration_secs
+    self.kubernetes_proxy_port = kubernetes_proxy_port
+
+
+class GkeSettings:
+
+  def __init__(self, project_id, docker_image_name):
+    self.project_id = project_id
+    self.docker_image_name = docker_image_name
+    self.tag_name = 'gcr.io/%s/%s' % (project_id, docker_image_name)
+
+
+class BigQuerySettings:
+
+  def __init__(self, run_id, dataset_id, summary_table_id, qps_table_id):
+    self.run_id = run_id
+    self.dataset_id = dataset_id
+    self.summary_table_id = summary_table_id
+    self.qps_table_id = qps_table_id
+
+
+class StressServerSettings:
+
+  def __init__(self, server_pod_name, server_port):
+    self.server_pod_name = server_pod_name
+    self.server_port = server_port
+
+
+class StressClientSettings:
+
+  def __init__(self, num_clients, client_pod_name_prefix, server_pod_name,
+               server_port, metrics_port, metrics_collection_interval_secs,
+               stress_client_poll_interval_secs, num_channels_per_server,
+               num_stubs_per_channel, test_cases_str):
+    self.num_clients = num_clients
+    self.client_pod_name_prefix = client_pod_name_prefix
+    self.server_pod_name = server_pod_name
+    self.server_port = server_port
+    self.metrics_port = metrics_port
+    self.metrics_collection_interval_secs = metrics_collection_interval_secs
+    self.stress_client_poll_interval_secs = stress_client_poll_interval_secs
+    self.num_channels_per_server = num_channels_per_server
+    self.num_stubs_per_channel = num_stubs_per_channel
+    self.test_cases_str = test_cases_str
+
+    # == Derived properties ==
+    # Note: Client can accept a list of server addresses (a comma separated list
+    # of 'server_name:server_port'). In this case, we only have one server
+    # address to pass
+    self.server_addresses = '%s.default.svc.cluster.local:%d' % (
+        server_pod_name, server_port)
+    self.client_pod_names_list = ['%s-%d' % (client_pod_name_prefix, i)
+                                  for i in range(1, num_clients + 1)]
+
+
 def _build_docker_image(image_name, tag_name):
   """ Build the docker image and add a tag """
+  print 'Building docker image: %s' % image_name
   os.environ['INTEROP_IMAGE'] = image_name
   # Note that 'BASE_NAME' HAS to be 'grpc_interop_stress_cxx' since the script
   # build_interop_stress_image.sh invokes the following script:
@@ -93,6 +192,7 @@ def _build_docker_image(image_name, tag_name):
     print 'Error in building docker image'
     return False
 
+  print 'Adding an additional tag %s to the image %s' % (tag_name, image_name)
   cmd = ['docker', 'tag', '-f', image_name, tag_name]
   p = subprocess.Popen(args=cmd)
   retcode = p.wait()
@@ -115,144 +215,86 @@ def _push_docker_image_to_gke_registry(docker_tag_name):
   return True
 
 
-def _launch_image_on_gke(kubernetes_api_server, kubernetes_api_port, namespace,
-                         pod_name, image_name, port_list, cmd_list, arg_list,
-                         env_dict, is_headless_service):
-  """Creates a GKE Pod and a Service object for a given image by calling Kubernetes API"""
-  is_success = kubernetes_api.create_pod(
-      kubernetes_api_server,
-      kubernetes_api_port,
-      namespace,
-      pod_name,
-      image_name,
-      port_list,  # The ports to be exposed on this container/pod
-      cmd_list,  # The command that launches the stress server
-      arg_list,
-      env_dict  # Environment variables to be passed to the pod
-  )
-  if not is_success:
-    print 'Error in creating Pod'
-    return False
-
-  is_success = kubernetes_api.create_service(
-      kubernetes_api_server,
-      kubernetes_api_port,
-      namespace,
-      pod_name,  # Use the pod name for service name as well
-      pod_name,
-      port_list,  # Service port list
-      port_list,  # Container port list (same as service port list)
-      is_headless_service)
-  if not is_success:
-    print 'Error in creating Service'
-    return False
-
-  print 'Successfully created the pod/service %s' % pod_name
-  return True
-
-
-def _delete_image_on_gke(kubernetes_proxy, pod_name_list):
-  """Deletes a GKE Pod and Service object for given list of Pods by calling Kubernetes API"""
-  if not kubernetes_proxy.is_started:
-    print 'Kubernetes proxy must be started before calling this function'
-    return False
-
-  is_success = True
-  for pod_name in pod_name_list:
-    is_success = kubernetes_api.delete_pod(
-        'localhost', kubernetes_proxy.get_port(), 'default', pod_name)
-    if not is_success:
-      print 'Error in deleting pod %s' % pod_name
-      break
-
-    is_success = kubernetes_api.delete_service(
-        'localhost', kubernetes_proxy.get_port(), 'default',
-        pod_name)  # service name same as pod name
-    if not is_success:
-      print 'Error in deleting service %s' % pod_name
-      break
-
-  if is_success:
-    print 'Successfully deleted the Pods/Services: %s' % ','.join(pod_name_list)
-
-  return is_success
-
-
-def _launch_server(gcp_project_id, docker_image_name, bq_settings,
-                   kubernetes_proxy, server_pod_name, server_port):
+def _launch_server(gke_settings, stress_server_settings, bq_settings,
+                   kubernetes_proxy):
   """ Launches a stress test server instance in GKE cluster """
   if not kubernetes_proxy.is_started:
     print 'Kubernetes proxy must be started before calling this function'
     return False
 
+  # This is the wrapper script that is run in the container. This script runs
+  # the actual stress test server
   server_cmd_list = [
       '/var/local/git/grpc/tools/run_tests/stress_test/run_server.py'
-  ]  # Process that is launched
-  server_arg_list = []  # run_server.py does not take any args (for now)
+  ]
 
-  # == Parameters to the server process launched in GKE ==
+  # run_server.py does not take any args from the command line. The args are
+  # instead passed via environment variables (see server_env below)
+  server_arg_list = []
+
+  # The parameters to the script run_server.py are injected into the container
+  # via environment variables
   server_env = {
       'STRESS_TEST_IMAGE_TYPE': 'SERVER',
       'STRESS_TEST_IMAGE': '/var/local/git/grpc/bins/opt/interop_server',
-      'STRESS_TEST_ARGS_STR': '--port=%s' % server_port,
+      'STRESS_TEST_ARGS_STR': '--port=%s' % stress_server_settings.server_port,
       'RUN_ID': bq_settings.run_id,
-      'POD_NAME': server_pod_name,
-      'GCP_PROJECT_ID': gcp_project_id,
+      'POD_NAME': stress_server_settings.server_pod_name,
+      'GCP_PROJECT_ID': gke_settings.project_id,
       'DATASET_ID': bq_settings.dataset_id,
       'SUMMARY_TABLE_ID': bq_settings.summary_table_id,
       'QPS_TABLE_ID': bq_settings.qps_table_id
   }
 
   # Launch Server
-  is_success = _launch_image_on_gke(
+  is_success = kubernetes_api.create_pod_and_service(
       'localhost',
       kubernetes_proxy.get_port(),
-      'default',
-      server_pod_name,
-      docker_image_name,
-      [server_port],  # Port that should be exposed on the container
+      'default',  # Use 'default' namespace
+      stress_server_settings.server_pod_name,
+      gke_settings.tag_name,
+      [stress_server_settings.server_port],  # Port that should be exposed
       server_cmd_list,
       server_arg_list,
       server_env,
-      True  # Headless = True for server. Since we want DNS records to be greated by GKE
+      True  # Headless = True for server. Since we want DNS records to be created by GKE
   )
 
   return is_success
 
 
-def _launch_client(gcp_project_id, docker_image_name, bq_settings,
-                   kubernetes_proxy, num_instances, client_pod_name_prefix,
-                   server_pod_name, server_port):
+def _launch_client(gke_settings, stress_server_settings, stress_client_settings,
+                   bq_settings, kubernetes_proxy):
   """ Launches a configurable number of stress test clients on GKE cluster """
   if not kubernetes_proxy.is_started:
     print 'Kubernetes proxy must be started before calling this function'
     return False
 
-  server_address = '%s.default.svc.cluster.local:%d' % (server_pod_name,
-                                                        server_port)
-  #TODO(sree) Make the whole client args configurable
-  test_cases_str = 'empty_unary:1,large_unary:1'
   stress_client_arg_list = [
-      '--server_addresses=%s' % server_address,
-      '--test_cases=%s' % test_cases_str, '--num_stubs_per_channel=10'
+      '--server_addresses=%s' % stress_client_settings.server_addresses,
+      '--test_cases=%s' % stress_client_settings.test_cases_str,
+      '--num_stubs_per_channel=%d' %
+      stress_client_settings.num_stubs_per_channel
   ]
 
+  # This is the wrapper script that is run in the container. This script runs
+  # the actual stress client
   client_cmd_list = [
       '/var/local/git/grpc/tools/run_tests/stress_test/run_client.py'
   ]
-  # run_client.py takes no args. All args are passed as env variables
-  client_arg_list = []
 
-  # TODO(sree) Make this configurable (and also less frequent)
-  poll_interval_secs = 30
+  # run_client.py takes no args. All args are passed as env variables (see
+  # client_env)
+  client_arg_list = []
 
-  metrics_port = 8081
-  metrics_server_address = 'localhost:%d' % metrics_port
+  metrics_server_address = 'localhost:%d' % stress_client_settings.metrics_port
   metrics_client_arg_list = [
       '--metrics_server_address=%s' % metrics_server_address,
       '--total_only=true'
   ]
 
+  # The parameters to the script run_client.py are injected into the container
+  # via environment variables
   client_env = {
       'STRESS_TEST_IMAGE_TYPE': 'CLIENT',
       'STRESS_TEST_IMAGE': '/var/local/git/grpc/bins/opt/stress_test',
@@ -260,27 +302,28 @@ def _launch_client(gcp_project_id, docker_image_name, bq_settings,
       'METRICS_CLIENT_IMAGE': '/var/local/git/grpc/bins/opt/metrics_client',
       'METRICS_CLIENT_ARGS_STR': ' '.join(metrics_client_arg_list),
       'RUN_ID': bq_settings.run_id,
-      'POLL_INTERVAL_SECS': str(poll_interval_secs),
-      'GCP_PROJECT_ID': gcp_project_id,
+      'POLL_INTERVAL_SECS':
+          str(stress_client_settings.stress_client_poll_interval_secs),
+      'GCP_PROJECT_ID': gke_settings.project_id,
       'DATASET_ID': bq_settings.dataset_id,
       'SUMMARY_TABLE_ID': bq_settings.summary_table_id,
       'QPS_TABLE_ID': bq_settings.qps_table_id
   }
 
-  for i in range(1, num_instances + 1):
-    pod_name = '%s-%d' % (client_pod_name_prefix, i)
+  for pod_name in stress_client_settings.client_pod_names_list:
     client_env['POD_NAME'] = pod_name
-    is_success = _launch_image_on_gke(
-        'localhost',
+    is_success = kubernetes_api.create_pod_and_service(
+        'localhost',  # Since proxy is running on localhost
         kubernetes_proxy.get_port(),
-        'default',
+        'default',  # default namespace
         pod_name,
-        docker_image_name,
-        [metrics_port],  # Client pods expose metrics port
+        gke_settings.tag_name,
+        [stress_client_settings.metrics_port
+        ],  # Client pods expose metrics port
         client_cmd_list,
         client_arg_list,
         client_env,
-        False  # Client is not a headless service.
+        False  # Client is not a headless service
     )
     if not is_success:
       print 'Error in launching client %s' % pod_name
@@ -289,20 +332,17 @@ def _launch_client(gcp_project_id, docker_image_name, bq_settings,
   return True
 
 
-def _launch_server_and_client(bq_settings, gcp_project_id, docker_image_name,
-                              num_client_instances):
+def _launch_server_and_client(gke_settings, stress_server_settings,
+                              stress_client_settings, bq_settings,
+                              kubernetes_proxy_port):
   # Start kubernetes proxy
-  kubernetes_api_port = 9001
-  kubernetes_proxy = KubernetesProxy(kubernetes_api_port)
+  print 'Kubernetes proxy'
+  kubernetes_proxy = KubernetesProxy(kubernetes_proxy_port)
   kubernetes_proxy.start()
 
-  # num of seconds to wait for the GKE image to start and warmup
-  image_warmp_secs = 60
-
-  server_pod_name = 'stress-server'
-  server_port = 8080
-  is_success = _launch_server(gcp_project_id, docker_image_name, bq_settings,
-                              kubernetes_proxy, server_pod_name, server_port)
+  print 'Launching server..'
+  is_success = _launch_server(gke_settings, stress_server_settings, bq_settings,
+                              kubernetes_proxy)
   if not is_success:
     print 'Error in launching server'
     return False
@@ -310,116 +350,217 @@ def _launch_server_and_client(bq_settings, gcp_project_id, docker_image_name,
   # Server takes a while to start.
   # TODO(sree) Use Kubernetes API to query the status of the server instead of
   # sleeping
-  print 'Waiting for %s seconds for the server to start...' % image_warmp_secs
-  time.sleep(image_warmp_secs)
+  print 'Waiting for %s seconds for the server to start...' % _GKE_IMAGE_WARMUP_WAIT_SECS
+  time.sleep(_GKE_IMAGE_WARMUP_WAIT_SECS)
 
   # Launch client
-  server_address = '%s.default.svc.cluster.local:%d' % (server_pod_name,
-                                                        server_port)
   client_pod_name_prefix = 'stress-client'
-  is_success = _launch_client(gcp_project_id, docker_image_name, bq_settings,
-                              kubernetes_proxy, num_client_instances,
-                              client_pod_name_prefix, server_pod_name,
-                              server_port)
+  is_success = _launch_client(gke_settings, stress_server_settings,
+                              stress_client_settings, bq_settings,
+                              kubernetes_proxy)
+
   if not is_success:
     print 'Error in launching client(s)'
     return False
 
-  print 'Waiting for %s seconds for the client images to start...' % image_warmp_secs
-  time.sleep(image_warmp_secs)
+  print 'Waiting for %s seconds for the client images to start...' % _GKE_IMAGE_WARMUP_WAIT_SECS
+  time.sleep(_GKE_IMAGE_WARMUP_WAIT_SECS)
   return True
 
 
-def _delete_server_and_client(num_client_instances):
-  kubernetes_api_port = 9001
-  kubernetes_proxy = KubernetesProxy(kubernetes_api_port)
+def _delete_server_and_client(stress_server_settings, stress_client_settings,
+                              kubernetes_proxy_port):
+  kubernetes_proxy = KubernetesProxy(kubernetes_proxy_port)
   kubernetes_proxy.start()
 
   # Delete clients first
-  client_pod_names = ['stress-client-%d' % i
-                      for i in range(1, num_client_instances + 1)]
-
-  is_success = _delete_image_on_gke(kubernetes_proxy, client_pod_names)
-  if not is_success:
-    return False
+  is_success = True
+  for pod_name in stress_client_settings.client_pod_names_list:
+    is_success = kubernetes_api.delete_pod_and_service(
+        'localhost', kubernetes_proxy_port, 'default', pod_name)
+    if not is_success:
+      return False
 
   # Delete server
-  server_pod_name = 'stress-server'
-  return _delete_image_on_gke(kubernetes_proxy, [server_pod_name])
+  is_success = kubernetes_api.delete_pod_and_service(
+      'localhost', kubernetes_proxy_port, 'default',
+      stress_server_settings.server_pod_name)
+  return is_success
 
 
-def _build_and_push_docker_image(gcp_project_id, docker_image_name, tag_name):
-  is_success = _build_docker_image(docker_image_name, tag_name)
-  if not is_success:
-    return False
-  return _push_docker_image_to_gke_registry(tag_name)
-
+def run_test_main(test_settings, gke_settings, stress_server_settings,
+                  stress_client_clients):
+  is_success = True
 
-# TODO(sree): This is just to test the above APIs. Rewrite this to make
-# everything configurable (like image names / number of instances etc)
-def run_test(skip_building_image, gcp_project_id, image_name, tag_name,
-             num_client_instances, poll_interval_secs, total_duration_secs):
-  if not skip_building_image:
-    is_success = _build_docker_image(image_name, tag_name)
+  if test_settings.build_docker_image:
+    is_success = _build_docker_image(gke_settings.docker_image_name,
+                                     gke_settings.tag_name)
     if not is_success:
       return False
 
-    is_success = _push_docker_image_to_gke_registry(tag_name)
+    is_success = _push_docker_image_to_gke_registry(gke_settings.tag_name)
     if not is_success:
       return False
 
-  # == Big Query tables related settings (Common for both server and client) ==
-
   # Create a unique id for this run (Note: Using timestamp instead of UUID to
   # make it easier to deduce the date/time of the run just by looking at the run
   # run id. This is useful in debugging when looking at records in Biq query)
   run_id = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
-  dataset_id = 'stress_test_%s' % run_id
-  summary_table_id = 'summary'
-  qps_table_id = 'qps'
-  bq_settings = BigQuerySettings(run_id, dataset_id, summary_table_id,
-                                 qps_table_id)
-
-  bq_helper = BigQueryHelper(run_id, '', '', gcp_project_id, dataset_id,
-                             summary_table_id, qps_table_id)
+  dataset_id = '%s_%s' % (_DATASET_ID_PREFIX, run_id)
+
+  # Big Query settings (common for both Stress Server and Client)
+  bq_settings = BigQuerySettings(run_id, dataset_id, _SUMMARY_TABLE_ID,
+                                 _QPS_TABLE_ID)
+
+  bq_helper = BigQueryHelper(run_id, '', '', args.project_id, dataset_id,
+                             _SUMMARY_TABLE_ID, _QPS_TABLE_ID)
   bq_helper.initialize()
-  is_success = _launch_server_and_client(bq_settings, gcp_project_id, tag_name,
-                                         num_client_instances)
-  if not is_success:
-    return False
 
-  start_time = datetime.datetime.now()
-  end_time = start_time + datetime.timedelta(seconds=total_duration_secs)
-
-  while True:
-    if datetime.datetime.now() > end_time:
-      print 'Test was run for %d seconds' % total_duration_secs
-      break
-
-    # Check if either stress server or clients have failed
-    if bq_helper.check_if_any_tests_failed():
-      is_success = False
-      print 'Some tests failed.'
-      break
-    # Things seem to be running fine. Wait until next poll time to check the
-    # status
-    print 'Sleeping for %d seconds..' % poll_interval_secs
-    time.sleep(poll_interval_secs)
-
-  # Print BiqQuery tables
-  bq_helper.print_summary_records()
-  bq_helper.print_qps_records()
-
-  _delete_server_and_client(num_client_instances)
+  try:
+    is_success = _launch_server_and_client(gke_settings, stress_server_settings,
+                                           stress_client_settings, bq_settings,
+                                           test_settings.kubernetes_proxy_port)
+    if not is_success:
+      return False
+
+    start_time = datetime.datetime.now()
+    end_time = start_time + datetime.timedelta(
+        seconds=test_settings.test_duration_secs)
+    print 'Running the test until %s' % end_time.isoformat()
+
+    while True:
+      if datetime.datetime.now() > end_time:
+        print 'Test was run for %d seconds' % test_settings.test_duration_secs
+        break
+
+      # Check if either stress server or clients have failed
+      if bq_helper.check_if_any_tests_failed():
+        is_success = False
+        print 'Some tests failed.'
+        break
+
+      # Things seem to be running fine. Wait until next poll time to check the
+      # status
+      print 'Sleeping for %d seconds..' % test_settings.test_poll_interval_secs
+      time.sleep(test_settings.test_poll_interval_secs)
+
+    # Print BiqQuery tables
+    bq_helper.print_summary_records()
+    bq_helper.print_qps_records()
+
+  finally:
+    # If is_success is False at this point, it means that the stress tests were
+    # started successfully but failed while running the tests. In this case we
+    # do should not delete the pods (since they contain all the failure
+    # information)
+    if is_success:
+      _delete_server_and_client(stress_server_settings, stress_client_settings,
+                                test_settings.kubernetes_proxy_port)
+
   return is_success
 
 
+argp = argparse.ArgumentParser(
+    description='Launch stress tests in GKE',
+    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+argp.add_argument('--project_id',
+                  required=True,
+                  help='The Google Cloud Platform Project Id')
+argp.add_argument('--num_clients',
+                  default=1,
+                  type=int,
+                  help='Number of client instances to start')
+argp.add_argument('--docker_image_name',
+                  default=_DEFAULT_DOCKER_IMAGE_NAME,
+                  help='The name of the docker image containing stress client '
+                  'and stress servers')
+argp.add_argument('--build_docker_image',
+                  dest='build_docker_image',
+                  action='store_true',
+                  help='Build a docker image and push to Google Container '
+                  'Registry')
+argp.add_argument('--do_not_build_docker_image',
+                  dest='build_docker_image',
+                  action='store_false',
+                  help='Do not build and push docker image to Google Container '
+                  'Registry')
+argp.set_defaults(build_docker_image=True)
+
+argp.add_argument('--test_poll_interval_secs',
+                  default=_DEFAULT_TEST_POLL_INTERVAL_SECS,
+                  type=int,
+                  help='How frequently should this script should monitor the '
+                  'health of stress clients and servers running in the GKE '
+                  'cluster')
+argp.add_argument('--test_duration_secs',
+                  default=_DEFAULT_TEST_DURATION_SECS,
+                  type=int,
+                  help='How long should this test be run')
+argp.add_argument('--kubernetes_proxy_port',
+                  default=_DEFAULT_KUBERNETES_PROXY_PORT,
+                  type=int,
+                  help='The port on which the kubernetes proxy (on localhost)'
+                  ' is started')
+argp.add_argument('--stress_server_port',
+                  default=_DEFAULT_STRESS_SERVER_PORT,
+                  type=int,
+                  help='The port on which the stress server (in GKE '
+                  'containers) listens')
+argp.add_argument('--stress_client_metrics_port',
+                  default=_DEFAULT_METRICS_PORT,
+                  type=int,
+                  help='The port on which the stress clients (in GKE '
+                  'containers) expose metrics')
+argp.add_argument('--stress_client_poll_interval_secs',
+                  default=_DEFAULT_STRESS_CLIENT_POLL_INTERVAL_SECS,
+                  type=int,
+                  help='How frequently should the stress client wrapper script'
+                  ' running inside GKE should monitor health of the actual '
+                  ' stress client process and upload the metrics to BigQuery')
+argp.add_argument('--stress_client_metrics_collection_interval_secs',
+                  default=_DEFAULT_METRICS_COLLECTION_INTERVAL_SECS,
+                  type=int,
+                  help='How frequently should metrics be collected in-memory on'
+                  ' the stress clients (running inside GKE containers). Note '
+                  'that this is NOT the same as the upload-to-BigQuery '
+                  'frequency. The metrics upload frequency is controlled by the'
+                  ' --stress_client_poll_interval_secs flag')
+argp.add_argument('--stress_client_num_channels_per_server',
+                  default=_DEFAULT_NUM_CHANNELS_PER_SERVER,
+                  type=int,
+                  help='The number of channels created to each server from a '
+                  'stress client')
+argp.add_argument('--stress_client_num_stubs_per_channel',
+                  default=_DEFAULT_NUM_STUBS_PER_CHANNEL,
+                  type=int,
+                  help='The number of stubs created per channel. This number '
+                  'indicates the max number of RPCs that can be made in '
+                  'parallel on each channel at any given time')
+argp.add_argument('--stress_client_test_cases',
+                  default=_DEFAULT_TEST_CASES_STR,
+                  help='List of test cases (with weights) to be executed by the'
+                  ' stress test client. The list is in the following format:\n'
+                  '  <testcase_1:w_1,<test_case2:w_2>..<testcase_n:w_n>\n'
+                  ' (Note: The weights do not have to add up to 100)')
+
 if __name__ == '__main__':
-  image_name = 'grpc_stress_test'
-  gcp_project_id = 'sree-gce'
-  tag_name = 'gcr.io/%s/%s' % (gcp_project_id, image_name)
-  num_client_instances = 3
-  poll_interval_secs = 10
-  test_duration_secs = 150
-  run_test(True, gcp_project_id, image_name, tag_name, num_client_instances,
-           poll_interval_secs, test_duration_secs)
+  args = argp.parse_args()
+
+  test_settings = TestSettings(
+      args.build_docker_image, args.test_poll_interval_secs,
+      args.test_duration_secs, args.kubernetes_proxy_port)
+
+  gke_settings = GkeSettings(args.project_id, args.docker_image_name)
+
+  stress_server_settings = StressServerSettings(_SERVER_POD_NAME,
+                                                args.stress_server_port)
+  stress_client_settings = StressClientSettings(
+      args.num_clients, _CLIENT_POD_NAME_PREFIX, _SERVER_POD_NAME,
+      args.stress_server_port, args.stress_client_metrics_port,
+      args.stress_client_metrics_collection_interval_secs,
+      args.stress_client_poll_interval_secs,
+      args.stress_client_num_channels_per_server,
+      args.stress_client_num_stubs_per_channel, args.stress_client_test_cases)
+
+  run_test_main(test_settings, gke_settings, stress_server_settings,
+                stress_client_settings)
-- 
cgit v1.2.3


From da25fdb8826d59c30a851e4e936b09e450247301 Mon Sep 17 00:00:00 2001
From: Sree Kuchibhotla <sreek@google.com>
Date: Fri, 26 Feb 2016 13:41:06 -0800
Subject: Address code review comments

---
 tools/gke/kubernetes_api.py                 |  4 ++--
 tools/gke/run_stress_tests_on_gke.py        | 18 ++++--------------
 tools/jenkins/build_interop_stress_image.sh |  3 +++
 tools/run_tests/stress_test/run_client.py   |  1 -
 4 files changed, 9 insertions(+), 17 deletions(-)

(limited to 'tools')

diff --git a/tools/gke/kubernetes_api.py b/tools/gke/kubernetes_api.py
index e1017e9da6..2d3f771e93 100755
--- a/tools/gke/kubernetes_api.py
+++ b/tools/gke/kubernetes_api.py
@@ -228,7 +228,7 @@ def delete_pod(kube_host, kube_port, namespace, pod_name):
 def create_pod_and_service(kube_host, kube_port, namespace, pod_name,
                            image_name, container_port_list, cmd_list, arg_list,
                            env_dict, is_headless_service):
-  """A simple helper function that creates a pod and a service (if pod creation was successful)."""
+  """A helper function that creates a pod and a service (if pod creation was successful)."""
   is_success = create_pod(kube_host, kube_port, namespace, pod_name, image_name,
                           container_port_list, cmd_list, arg_list, env_dict)
   if not is_success:
@@ -253,7 +253,7 @@ def create_pod_and_service(kube_host, kube_port, namespace, pod_name,
 
 
 def delete_pod_and_service(kube_host, kube_port, namespace, pod_name):
-  """ A simple helper function that calls delete_pod and delete_service """
+  """ A helper function that calls delete_pod and delete_service """
   is_success = delete_pod(kube_host, kube_port, namespace, pod_name)
   if not is_success:
     print 'Error in deleting pod %s' % pod_name
diff --git a/tools/gke/run_stress_tests_on_gke.py b/tools/gke/run_stress_tests_on_gke.py
index cf0ef595a6..d126e3db43 100755
--- a/tools/gke/run_stress_tests_on_gke.py
+++ b/tools/gke/run_stress_tests_on_gke.py
@@ -178,28 +178,19 @@ class StressClientSettings:
 
 
 def _build_docker_image(image_name, tag_name):
-  """ Build the docker image and add a tag """
+  """ Build the docker image and add tag it to the GKE repository """
   print 'Building docker image: %s' % image_name
   os.environ['INTEROP_IMAGE'] = image_name
+  os.environ['INTEROP_IMAGE_REPOSITORY_TAG'] = tag_name
   # Note that 'BASE_NAME' HAS to be 'grpc_interop_stress_cxx' since the script
   # build_interop_stress_image.sh invokes the following script:
   #   tools/dockerfile/$BASE_NAME/build_interop_stress.sh
   os.environ['BASE_NAME'] = 'grpc_interop_stress_cxx'
   cmd = ['tools/jenkins/build_interop_stress_image.sh']
-  p = subprocess.Popen(args=cmd)
-  retcode = p.wait()
+  retcode = subprocess.call(args=cmd)
   if retcode != 0:
     print 'Error in building docker image'
     return False
-
-  print 'Adding an additional tag %s to the image %s' % (tag_name, image_name)
-  cmd = ['docker', 'tag', '-f', image_name, tag_name]
-  p = subprocess.Popen(args=cmd)
-  retcode = p.wait()
-  if retcode != 0:
-    print 'Error in creating the tag %s for %s' % (tag_name, image_name)
-    return False
-
   return True
 
 
@@ -207,8 +198,7 @@ def _push_docker_image_to_gke_registry(docker_tag_name):
   """Executes 'gcloud docker push <docker_tag_name>' to push the image to GKE registry"""
   cmd = ['gcloud', 'docker', 'push', docker_tag_name]
   print 'Pushing %s to GKE registry..' % docker_tag_name
-  p = subprocess.Popen(args=cmd)
-  retcode = p.wait()
+  retcode = subprocess.call(args=cmd)
   if retcode != 0:
     print 'Error in pushing docker image %s to the GKE registry' % docker_tag_name
     return False
diff --git a/tools/jenkins/build_interop_stress_image.sh b/tools/jenkins/build_interop_stress_image.sh
index 92f2dab5e3..4c8e998a8a 100755
--- a/tools/jenkins/build_interop_stress_image.sh
+++ b/tools/jenkins/build_interop_stress_image.sh
@@ -35,6 +35,8 @@ set -x
 
 # Params:
 #  INTEROP_IMAGE - name of tag of the final interop image
+#  INTEROP_IMAGE_TAG - Optional. If set, the created image will be tagged using
+#    the command: 'docker tag $INTEROP_IMAGE $INTEROP_IMAGE_REPOSITORY_TAG'
 #  BASE_NAME - base name used to locate the base Dockerfile and build script
 #  TTY_FLAG - optional -t flag to make docker allocate tty
 #  BUILD_INTEROP_DOCKER_EXTRA_ARGS - optional args to be passed to the
@@ -77,6 +79,7 @@ CONTAINER_NAME="build_${BASE_NAME}_$(uuidgen)"
   $BASE_IMAGE \
   bash -l /var/local/jenkins/grpc/tools/dockerfile/$BASE_NAME/build_interop_stress.sh \
   && docker commit $CONTAINER_NAME $INTEROP_IMAGE \
+  && ( if [ -n $INTEROP_IMAGE_REPOSITORY_TAG ]; then docker tag $INTEROP_IMAGE $INTEROP_IMAGE_REPOSITORY_TAG ; fi ) \
   && echo "Successfully built image $INTEROP_IMAGE")
 EXITCODE=$?
 
diff --git a/tools/run_tests/stress_test/run_client.py b/tools/run_tests/stress_test/run_client.py
index 33958bce49..0fa1bf1cb9 100755
--- a/tools/run_tests/stress_test/run_client.py
+++ b/tools/run_tests/stress_test/run_client.py
@@ -142,7 +142,6 @@ def run_client():
     # Check if stress_client is still running. If so, collect metrics and upload
     # to BigQuery status table
     if stress_p.poll() is not None:
-      # TODO(sree) Upload completion status to BigQuery
       end_time = datetime.datetime.now().isoformat()
       event_type = EventType.SUCCESS
       details = 'End time: %s' % end_time
-- 
cgit v1.2.3


From aadb910524a62a816f334e1d5b1c74ee5d7f974c Mon Sep 17 00:00:00 2001
From: Sree Kuchibhotla <sreek@google.com>
Date: Fri, 26 Feb 2016 16:47:47 -0800
Subject: Change directory structure for the scripts (remove tools/big_query
 and tools/gke directies and instead create tools/gcp). Move scripts around in
 to the appropriate directories

---
 tools/big_query/big_query_utils.py                 | 140 ------
 tools/gcp/stress_test/run_client.py                | 187 +++++++
 tools/gcp/stress_test/run_server.py                | 120 +++++
 tools/gcp/stress_test/stress_test_utils.py         | 197 ++++++++
 tools/gcp/utils/big_query_utils.py                 | 140 ++++++
 tools/gcp/utils/kubernetes_api.py                  | 269 ++++++++++
 tools/gke/kubernetes_api.py                        | 269 ----------
 tools/gke/run_stress_tests_on_gke.py               | 556 ---------------------
 tools/jenkins/build_interop_stress_image.sh        |   2 +-
 tools/run_tests/stress_test/run_client.py          | 187 -------
 tools/run_tests/stress_test/run_server.py          | 120 -----
 .../stress_test/run_stress_tests_on_gke.py         | 556 +++++++++++++++++++++
 tools/run_tests/stress_test/stress_test_utils.py   | 197 --------
 13 files changed, 1470 insertions(+), 1470 deletions(-)
 delete mode 100755 tools/big_query/big_query_utils.py
 create mode 100755 tools/gcp/stress_test/run_client.py
 create mode 100755 tools/gcp/stress_test/run_server.py
 create mode 100755 tools/gcp/stress_test/stress_test_utils.py
 create mode 100755 tools/gcp/utils/big_query_utils.py
 create mode 100755 tools/gcp/utils/kubernetes_api.py
 delete mode 100755 tools/gke/kubernetes_api.py
 delete mode 100755 tools/gke/run_stress_tests_on_gke.py
 delete mode 100755 tools/run_tests/stress_test/run_client.py
 delete mode 100755 tools/run_tests/stress_test/run_server.py
 create mode 100755 tools/run_tests/stress_test/run_stress_tests_on_gke.py
 delete mode 100755 tools/run_tests/stress_test/stress_test_utils.py

(limited to 'tools')

diff --git a/tools/big_query/big_query_utils.py b/tools/big_query/big_query_utils.py
deleted file mode 100755
index e2379fd1aa..0000000000
--- a/tools/big_query/big_query_utils.py
+++ /dev/null
@@ -1,140 +0,0 @@
-#!/usr/bin/env python2.7
-# Copyright 2015-2016 Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import argparse
-import json
-import uuid
-import httplib2
-
-from apiclient import discovery
-from apiclient.errors import HttpError
-from oauth2client.client import GoogleCredentials
-
-NUM_RETRIES = 3
-
-
-def create_big_query():
-  """Authenticates with cloud platform and gets a BiqQuery service object
-  """
-  creds = GoogleCredentials.get_application_default()
-  return discovery.build('bigquery', 'v2', credentials=creds)
-
-
-def create_dataset(biq_query, project_id, dataset_id):
-  is_success = True
-  body = {
-      'datasetReference': {
-          'projectId': project_id,
-          'datasetId': dataset_id
-      }
-  }
-
-  try:
-    dataset_req = biq_query.datasets().insert(projectId=project_id, body=body)
-    dataset_req.execute(num_retries=NUM_RETRIES)
-  except HttpError as http_error:
-    if http_error.resp.status == 409:
-      print 'Warning: The dataset %s already exists' % dataset_id
-    else:
-      # Note: For more debugging info, print "http_error.content"
-      print 'Error in creating dataset: %s. Err: %s' % (dataset_id, http_error)
-      is_success = False
-  return is_success
-
-
-def create_table(big_query, project_id, dataset_id, table_id, table_schema,
-                 description):
-  is_success = True
-
-  body = {
-      'description': description,
-      'schema': {
-          'fields': [{
-              'name': field_name,
-              'type': field_type,
-              'description': field_description
-          } for (field_name, field_type, field_description) in table_schema]
-      },
-      'tableReference': {
-          'datasetId': dataset_id,
-          'projectId': project_id,
-          'tableId': table_id
-      }
-  }
-
-  try:
-    table_req = big_query.tables().insert(projectId=project_id,
-                                          datasetId=dataset_id,
-                                          body=body)
-    res = table_req.execute(num_retries=NUM_RETRIES)
-    print 'Successfully created %s "%s"' % (res['kind'], res['id'])
-  except HttpError as http_error:
-    if http_error.resp.status == 409:
-      print 'Warning: Table %s already exists' % table_id
-    else:
-      print 'Error in creating table: %s. Err: %s' % (table_id, http_error)
-      is_success = False
-  return is_success
-
-
-def insert_rows(big_query, project_id, dataset_id, table_id, rows_list):
-  is_success = True
-  body = {'rows': rows_list}
-  try:
-    insert_req = big_query.tabledata().insertAll(projectId=project_id,
-                                                 datasetId=dataset_id,
-                                                 tableId=table_id,
-                                                 body=body)
-    print body
-    res = insert_req.execute(num_retries=NUM_RETRIES)
-    print res
-  except HttpError as http_error:
-    print 'Error in inserting rows in the table %s' % table_id
-    is_success = False
-  return is_success
-
-
-def sync_query_job(big_query, project_id, query, timeout=5000):
-  query_data = {'query': query, 'timeoutMs': timeout}
-  query_job = None
-  try:
-    query_job = big_query.jobs().query(
-        projectId=project_id,
-        body=query_data).execute(num_retries=NUM_RETRIES)
-  except HttpError as http_error:
-    print 'Query execute job failed with error: %s' % http_error
-    print http_error.content
-  return query_job
-
-  # List of (column name, column type, description) tuples
-def make_row(unique_row_id, row_values_dict):
-  """row_values_dict is a dictionary of column name and column value.
-  """
-  return {'insertId': unique_row_id, 'json': row_values_dict}
diff --git a/tools/gcp/stress_test/run_client.py b/tools/gcp/stress_test/run_client.py
new file mode 100755
index 0000000000..0fa1bf1cb9
--- /dev/null
+++ b/tools/gcp/stress_test/run_client.py
@@ -0,0 +1,187 @@
+#!/usr/bin/env python2.7
+# Copyright 2015-2016, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import datetime
+import os
+import re
+import select
+import subprocess
+import sys
+import time
+
+from stress_test_utils import EventType
+from stress_test_utils import BigQueryHelper
+
+
+# TODO (sree): Write a python grpc client to directly query the metrics instead
+# of calling metrics_client
+def _get_qps(metrics_cmd):
+  qps = 0
+  try:
+    # Note: gpr_log() writes even non-error messages to stderr stream. So it is 
+    # important that we set stderr=subprocess.STDOUT
+    p = subprocess.Popen(args=metrics_cmd,
+                         stdout=subprocess.PIPE,
+                         stderr=subprocess.STDOUT)
+    retcode = p.wait()
+    (out_str, err_str) = p.communicate()
+    if retcode != 0:
+      print 'Error in reading metrics information'
+      print 'Output: ', out_str
+    else:
+      # The overall qps is printed at the end of the line
+      m = re.search('\d+$', out_str)
+      qps = int(m.group()) if m else 0
+  except Exception as ex:
+    print 'Exception while reading metrics information: ' + str(ex)
+  return qps
+
+
+def run_client():
+  """This is a wrapper around the stress test client and performs the following:
+      1) Create the following two tables in Big Query:
+         (i) Summary table: To record events like the test started, completed
+                            successfully or failed
+        (ii) Qps table: To periodically record the QPS sent by this client
+      2) Start the stress test client and add a row in the Big Query summary
+         table
+      3) Once every few seconds (as specificed by the poll_interval_secs) poll
+         the status of the stress test client process and perform the
+         following:
+          3.1) If the process is still running, get the current qps by invoking
+               the metrics client program and add a row in the Big Query
+               Qps table. Sleep for a duration specified by poll_interval_secs
+          3.2) If the process exited successfully, add a row in the Big Query
+               Summary table and exit
+          3.3) If the process failed, add a row in Big Query summary table and
+               wait forever.
+               NOTE: This script typically runs inside a GKE pod which means
+               that the pod gets destroyed when the script exits. However, in
+               case the stress test client fails, we would not want the pod to
+               be destroyed (since we might want to connect to the pod for
+               examining logs). This is the reason why the script waits forever
+               in case of failures
+  """
+  env = dict(os.environ)
+  image_type = env['STRESS_TEST_IMAGE_TYPE']
+  image_name = env['STRESS_TEST_IMAGE']
+  args_str = env['STRESS_TEST_ARGS_STR']
+  metrics_client_image = env['METRICS_CLIENT_IMAGE']
+  metrics_client_args_str = env['METRICS_CLIENT_ARGS_STR']
+  run_id = env['RUN_ID']
+  pod_name = env['POD_NAME']
+  logfile_name = env.get('LOGFILE_NAME')
+  poll_interval_secs = float(env['POLL_INTERVAL_SECS'])
+  project_id = env['GCP_PROJECT_ID']
+  dataset_id = env['DATASET_ID']
+  summary_table_id = env['SUMMARY_TABLE_ID']
+  qps_table_id = env['QPS_TABLE_ID']
+
+  bq_helper = BigQueryHelper(run_id, image_type, pod_name, project_id,
+                             dataset_id, summary_table_id, qps_table_id)
+  bq_helper.initialize()
+
+  # Create BigQuery Dataset and Tables: Summary Table and Metrics Table
+  if not bq_helper.setup_tables():
+    print 'Error in creating BigQuery tables'
+    return
+
+  start_time = datetime.datetime.now()
+
+  logfile = None
+  details = 'Logging to stdout'
+  if logfile_name is not None:
+    print 'Opening logfile: %s ...' % logfile_name
+    details = 'Logfile: %s' % logfile_name
+    logfile = open(logfile_name, 'w')
+
+  # Update status that the test is starting (in the status table)
+  bq_helper.insert_summary_row(EventType.STARTING, details)
+
+  metrics_cmd = [metrics_client_image
+                ] + [x for x in metrics_client_args_str.split()]
+  stress_cmd = [image_name] + [x for x in args_str.split()]
+
+  print 'Launching process %s ...' % stress_cmd
+  stress_p = subprocess.Popen(args=stress_cmd,
+                              stdout=logfile,
+                              stderr=subprocess.STDOUT)
+
+  qps_history = [1, 1, 1]  # Maintain the last 3 qps readings
+  qps_history_idx = 0  # Index into the qps_history list
+
+  is_error = False
+  while True:
+    # Check if stress_client is still running. If so, collect metrics and upload
+    # to BigQuery status table
+    if stress_p.poll() is not None:
+      end_time = datetime.datetime.now().isoformat()
+      event_type = EventType.SUCCESS
+      details = 'End time: %s' % end_time
+      if stress_p.returncode != 0:
+        event_type = EventType.FAILURE
+        details = 'Return code = %d. End time: %s' % (stress_p.returncode,
+                                                      end_time)
+        is_error = True
+      bq_helper.insert_summary_row(event_type, details)
+      print details
+      break
+
+    # Stress client still running. Get metrics
+    qps = _get_qps(metrics_cmd)
+    qps_recorded_at = datetime.datetime.now().isoformat()
+    print 'qps: %d at %s' % (qps, qps_recorded_at)
+
+    # If QPS has been zero for the last 3 iterations, flag it as error and exit
+    qps_history[qps_history_idx] = qps
+    qps_history_idx = (qps_history_idx + 1) % len(qps_history)
+    if sum(qps_history) == 0:
+      details = 'QPS has been zero for the last %d seconds - as of : %s' % (
+          poll_interval_secs * 3, qps_recorded_at)
+      is_error = True
+      bq_helper.insert_summary_row(EventType.FAILURE, details)
+      print details
+      break
+
+    # Upload qps metrics to BiqQuery
+    bq_helper.insert_qps_row(qps, qps_recorded_at)
+
+    time.sleep(poll_interval_secs)
+
+  if is_error:
+    print 'Waiting indefinitely..'
+    select.select([], [], [])
+
+  print 'Completed'
+  return
+
+
+if __name__ == '__main__':
+  run_client()
diff --git a/tools/gcp/stress_test/run_server.py b/tools/gcp/stress_test/run_server.py
new file mode 100755
index 0000000000..64322f6100
--- /dev/null
+++ b/tools/gcp/stress_test/run_server.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python2.7
+# Copyright 2015-2016, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import datetime
+import os
+import select
+import subprocess
+import sys
+import time
+
+from stress_test_utils import BigQueryHelper
+from stress_test_utils import EventType
+
+
+def run_server():
+  """This is a wrapper around the interop server and performs the following:
+      1) Create a 'Summary table' in Big Query to record events like the server
+         started, completed successfully or failed. NOTE: This also creates
+         another table called the QPS table which is currently NOT needed on the
+         server (it is needed on the stress test clients)
+      2) Start the server process and add a row in Big Query summary table
+      3) Wait for the server process to terminate. The server process does not
+         terminate unless there is an error.
+         If the server process terminated with a failure, add a row in Big Query
+         and wait forever.
+         NOTE: This script typically runs inside a GKE pod which means that the
+         pod gets destroyed when the script exits. However, in case the server
+         process fails, we would not want the pod to be destroyed (since we
+         might want to connect to the pod for examining logs). This is the
+         reason why the script waits forever in case of failures.
+  """
+
+  # Read the parameters from environment variables
+  env = dict(os.environ)
+
+  run_id = env['RUN_ID']  # The unique run id for this test
+  image_type = env['STRESS_TEST_IMAGE_TYPE']
+  image_name = env['STRESS_TEST_IMAGE']
+  args_str = env['STRESS_TEST_ARGS_STR']
+  pod_name = env['POD_NAME']
+  project_id = env['GCP_PROJECT_ID']
+  dataset_id = env['DATASET_ID']
+  summary_table_id = env['SUMMARY_TABLE_ID']
+  qps_table_id = env['QPS_TABLE_ID']
+
+  logfile_name = env.get('LOGFILE_NAME')
+
+  print('pod_name: %s, project_id: %s, run_id: %s, dataset_id: %s, '
+        'summary_table_id: %s, qps_table_id: %s') % (
+            pod_name, project_id, run_id, dataset_id, summary_table_id,
+            qps_table_id)
+
+  bq_helper = BigQueryHelper(run_id, image_type, pod_name, project_id,
+                             dataset_id, summary_table_id, qps_table_id)
+  bq_helper.initialize()
+
+  # Create BigQuery Dataset and Tables: Summary Table and Metrics Table
+  if not bq_helper.setup_tables():
+    print 'Error in creating BigQuery tables'
+    return
+
+  start_time = datetime.datetime.now()
+
+  logfile = None
+  details = 'Logging to stdout'
+  if logfile_name is not None:
+    print 'Opening log file: ', logfile_name
+    logfile = open(logfile_name, 'w')
+    details = 'Logfile: %s' % logfile_name
+
+  # Update status that the test is starting (in the status table)
+  bq_helper.insert_summary_row(EventType.STARTING, details)
+
+  stress_cmd = [image_name] + [x for x in args_str.split()]
+
+  print 'Launching process %s ...' % stress_cmd
+  stress_p = subprocess.Popen(args=stress_cmd,
+                              stdout=logfile,
+                              stderr=subprocess.STDOUT)
+
+  returncode = stress_p.wait()
+  if returncode != 0:
+    end_time = datetime.datetime.now().isoformat()
+    event_type = EventType.FAILURE
+    details = 'Returncode: %d; End time: %s' % (returncode, end_time)
+    bq_helper.insert_summary_row(event_type, details)
+    print 'Waiting indefinitely..'
+    select.select([], [], [])
+  return returncode
+
+
+if __name__ == '__main__':
+  run_server()
diff --git a/tools/gcp/stress_test/stress_test_utils.py b/tools/gcp/stress_test/stress_test_utils.py
new file mode 100755
index 0000000000..c4b437e345
--- /dev/null
+++ b/tools/gcp/stress_test/stress_test_utils.py
@@ -0,0 +1,197 @@
+#!/usr/bin/env python2.7
+# Copyright 2015-2016, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import datetime
+import json
+import os
+import re
+import select
+import subprocess
+import sys
+import time
+
+# Import big_query_utils module
+bq_utils_dir = os.path.abspath(os.path.join(
+    os.path.dirname(__file__), '../utils'))
+sys.path.append(bq_utils_dir)
+import big_query_utils as bq_utils
+
+
+class EventType:
+  STARTING = 'STARTING'
+  SUCCESS = 'SUCCESS'
+  FAILURE = 'FAILURE'
+
+
+class BigQueryHelper:
+  """Helper class for the stress test wrappers to interact with BigQuery.
+  """
+
+  def __init__(self, run_id, image_type, pod_name, project_id, dataset_id,
+               summary_table_id, qps_table_id):
+    self.run_id = run_id
+    self.image_type = image_type
+    self.pod_name = pod_name
+    self.project_id = project_id
+    self.dataset_id = dataset_id
+    self.summary_table_id = summary_table_id
+    self.qps_table_id = qps_table_id
+
+  def initialize(self):
+    self.bq = bq_utils.create_big_query()
+
+  def setup_tables(self):
+    return bq_utils.create_dataset(self.bq, self.project_id, self.dataset_id) \
+        and self.__create_summary_table() \
+        and self.__create_qps_table()
+
+  def insert_summary_row(self, event_type, details):
+    row_values_dict = {
+        'run_id': self.run_id,
+        'image_type': self.image_type,
+        'pod_name': self.pod_name,
+        'event_date': datetime.datetime.now().isoformat(),
+        'event_type': event_type,
+        'details': details
+    }
+    # row_unique_id is something that uniquely identifies the row (BigQuery uses
+    # it for duplicate detection).
+    row_unique_id = '%s_%s_%s' % (self.run_id, self.pod_name, event_type)
+    row = bq_utils.make_row(row_unique_id, row_values_dict)
+    return bq_utils.insert_rows(self.bq, self.project_id, self.dataset_id,
+                                self.summary_table_id, [row])
+
+  def insert_qps_row(self, qps, recorded_at):
+    row_values_dict = {
+        'run_id': self.run_id,
+        'pod_name': self.pod_name,
+        'recorded_at': recorded_at,
+        'qps': qps
+    }
+
+    # row_unique_id is something that uniquely identifies the row (BigQuery uses
+    # it for duplicate detection).
+    row_unique_id = '%s_%s_%s' % (self.run_id, self.pod_name, recorded_at)
+    row = bq_utils.make_row(row_unique_id, row_values_dict)
+    return bq_utils.insert_rows(self.bq, self.project_id, self.dataset_id,
+                                self.qps_table_id, [row])
+
+  def check_if_any_tests_failed(self, num_query_retries=3):
+    query = ('SELECT event_type FROM %s.%s WHERE run_id = \'%s\' AND '
+             'event_type="%s"') % (self.dataset_id, self.summary_table_id,
+                                   self.run_id, EventType.FAILURE)
+    query_job = bq_utils.sync_query_job(self.bq, self.project_id, query)
+    page = self.bq.jobs().getQueryResults(**query_job['jobReference']).execute(
+        num_retries=num_query_retries)
+    num_failures = int(page['totalRows'])
+    print 'num rows: ', num_failures
+    return num_failures > 0
+
+  def print_summary_records(self, num_query_retries=3):
+    line = '-' * 120
+    print line
+    print 'Summary records'
+    print 'Run Id: ', self.run_id
+    print 'Dataset Id: ', self.dataset_id
+    print line
+    query = ('SELECT pod_name, image_type, event_type, event_date, details'
+             ' FROM %s.%s WHERE run_id = \'%s\' ORDER by event_date;') % (
+                 self.dataset_id, self.summary_table_id, self.run_id)
+    query_job = bq_utils.sync_query_job(self.bq, self.project_id, query)
+
+    print '{:<25} {:<12} {:<12} {:<30} {}'.format(
+        'Pod name', 'Image type', 'Event type', 'Date', 'Details')
+    print line
+    page_token = None
+    while True:
+      page = self.bq.jobs().getQueryResults(
+          pageToken=page_token,
+          **query_job['jobReference']).execute(num_retries=num_query_retries)
+      rows = page.get('rows', [])
+      for row in rows:
+        print '{:<25} {:<12} {:<12} {:<30} {}'.format(
+            row['f'][0]['v'], row['f'][1]['v'], row['f'][2]['v'],
+            row['f'][3]['v'], row['f'][4]['v'])
+      page_token = page.get('pageToken')
+      if not page_token:
+        break
+
+  def print_qps_records(self, num_query_retries=3):
+    line = '-' * 80
+    print line
+    print 'QPS Summary'
+    print 'Run Id: ', self.run_id
+    print 'Dataset Id: ', self.dataset_id
+    print line
+    query = (
+        'SELECT pod_name, recorded_at, qps FROM %s.%s WHERE run_id = \'%s\' '
+        'ORDER by recorded_at;') % (self.dataset_id, self.qps_table_id,
+                                    self.run_id)
+    query_job = bq_utils.sync_query_job(self.bq, self.project_id, query)
+    print '{:<25} {:30} {}'.format('Pod name', 'Recorded at', 'Qps')
+    print line
+    page_token = None
+    while True:
+      page = self.bq.jobs().getQueryResults(
+          pageToken=page_token,
+          **query_job['jobReference']).execute(num_retries=num_query_retries)
+      rows = page.get('rows', [])
+      for row in rows:
+        print '{:<25} {:30} {}'.format(row['f'][0]['v'], row['f'][1]['v'],
+                                       row['f'][2]['v'])
+      page_token = page.get('pageToken')
+      if not page_token:
+        break
+
+  def __create_summary_table(self):
+    summary_table_schema = [
+        ('run_id', 'STRING', 'Test run id'),
+        ('image_type', 'STRING', 'Client or Server?'),
+        ('pod_name', 'STRING', 'GKE pod hosting this image'),
+        ('event_date', 'STRING', 'The date of this event'),
+        ('event_type', 'STRING', 'STARTED/SUCCESS/FAILURE'),
+        ('details', 'STRING', 'Any other relevant details')
+    ]
+    desc = ('The table that contains START/SUCCESS/FAILURE events for '
+            ' the stress test clients and servers')
+    return bq_utils.create_table(self.bq, self.project_id, self.dataset_id,
+                                 self.summary_table_id, summary_table_schema,
+                                 desc)
+
+  def __create_qps_table(self):
+    qps_table_schema = [
+        ('run_id', 'STRING', 'Test run id'),
+        ('pod_name', 'STRING', 'GKE pod hosting this image'),
+        ('recorded_at', 'STRING', 'Metrics recorded at time'),
+        ('qps', 'INTEGER', 'Queries per second')
+    ]
+    desc = 'The table that cointains the qps recorded at various intervals'
+    return bq_utils.create_table(self.bq, self.project_id, self.dataset_id,
+                                 self.qps_table_id, qps_table_schema, desc)
diff --git a/tools/gcp/utils/big_query_utils.py b/tools/gcp/utils/big_query_utils.py
new file mode 100755
index 0000000000..e2379fd1aa
--- /dev/null
+++ b/tools/gcp/utils/big_query_utils.py
@@ -0,0 +1,140 @@
+#!/usr/bin/env python2.7
+# Copyright 2015-2016 Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import json
+import uuid
+import httplib2
+
+from apiclient import discovery
+from apiclient.errors import HttpError
+from oauth2client.client import GoogleCredentials
+
+NUM_RETRIES = 3
+
+
+def create_big_query():
+  """Authenticates with cloud platform and gets a BiqQuery service object
+  """
+  creds = GoogleCredentials.get_application_default()
+  return discovery.build('bigquery', 'v2', credentials=creds)
+
+
+def create_dataset(biq_query, project_id, dataset_id):
+  is_success = True
+  body = {
+      'datasetReference': {
+          'projectId': project_id,
+          'datasetId': dataset_id
+      }
+  }
+
+  try:
+    dataset_req = biq_query.datasets().insert(projectId=project_id, body=body)
+    dataset_req.execute(num_retries=NUM_RETRIES)
+  except HttpError as http_error:
+    if http_error.resp.status == 409:
+      print 'Warning: The dataset %s already exists' % dataset_id
+    else:
+      # Note: For more debugging info, print "http_error.content"
+      print 'Error in creating dataset: %s. Err: %s' % (dataset_id, http_error)
+      is_success = False
+  return is_success
+
+
+def create_table(big_query, project_id, dataset_id, table_id, table_schema,
+                 description):
+  is_success = True
+
+  body = {
+      'description': description,
+      'schema': {
+          'fields': [{
+              'name': field_name,
+              'type': field_type,
+              'description': field_description
+          } for (field_name, field_type, field_description) in table_schema]
+      },
+      'tableReference': {
+          'datasetId': dataset_id,
+          'projectId': project_id,
+          'tableId': table_id
+      }
+  }
+
+  try:
+    table_req = big_query.tables().insert(projectId=project_id,
+                                          datasetId=dataset_id,
+                                          body=body)
+    res = table_req.execute(num_retries=NUM_RETRIES)
+    print 'Successfully created %s "%s"' % (res['kind'], res['id'])
+  except HttpError as http_error:
+    if http_error.resp.status == 409:
+      print 'Warning: Table %s already exists' % table_id
+    else:
+      print 'Error in creating table: %s. Err: %s' % (table_id, http_error)
+      is_success = False
+  return is_success
+
+
+def insert_rows(big_query, project_id, dataset_id, table_id, rows_list):
+  is_success = True
+  body = {'rows': rows_list}
+  try:
+    insert_req = big_query.tabledata().insertAll(projectId=project_id,
+                                                 datasetId=dataset_id,
+                                                 tableId=table_id,
+                                                 body=body)
+    print body
+    res = insert_req.execute(num_retries=NUM_RETRIES)
+    print res
+  except HttpError as http_error:
+    print 'Error in inserting rows in the table %s' % table_id
+    is_success = False
+  return is_success
+
+
+def sync_query_job(big_query, project_id, query, timeout=5000):
+  query_data = {'query': query, 'timeoutMs': timeout}
+  query_job = None
+  try:
+    query_job = big_query.jobs().query(
+        projectId=project_id,
+        body=query_data).execute(num_retries=NUM_RETRIES)
+  except HttpError as http_error:
+    print 'Query execute job failed with error: %s' % http_error
+    print http_error.content
+  return query_job
+
+  # List of (column name, column type, description) tuples
+def make_row(unique_row_id, row_values_dict):
+  """row_values_dict is a dictionary of column name and column value.
+  """
+  return {'insertId': unique_row_id, 'json': row_values_dict}
diff --git a/tools/gcp/utils/kubernetes_api.py b/tools/gcp/utils/kubernetes_api.py
new file mode 100755
index 0000000000..2d3f771e93
--- /dev/null
+++ b/tools/gcp/utils/kubernetes_api.py
@@ -0,0 +1,269 @@
+#!/usr/bin/env python2.7
+# Copyright 2015-2016 Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import requests
+import json
+
+_REQUEST_TIMEOUT_SECS = 10
+
+
+def _make_pod_config(pod_name, image_name, container_port_list, cmd_list,
+                     arg_list, env_dict):
+  """Creates a string containing the Pod defintion as required by the Kubernetes API"""
+  body = {
+      'kind': 'Pod',
+      'apiVersion': 'v1',
+      'metadata': {
+          'name': pod_name,
+          'labels': {'name': pod_name}
+      },
+      'spec': {
+          'containers': [
+              {
+                  'name': pod_name,
+                  'image': image_name,
+                  'ports': [{'containerPort': port,
+                             'protocol': 'TCP'}
+                            for port in container_port_list],
+                  'imagePullPolicy': 'Always'
+              }
+          ]
+      }
+  }
+
+  env_list = [{'name': k, 'value': v} for (k, v) in env_dict.iteritems()]
+  if len(env_list) > 0:
+    body['spec']['containers'][0]['env'] = env_list
+
+  # Add the 'Command' and 'Args' attributes if they are passed.
+  # Note:
+  #  - 'Command' overrides the ENTRYPOINT in the Docker Image
+  #  - 'Args' override the CMD in Docker image (yes, it is confusing!)
+  if len(cmd_list) > 0:
+    body['spec']['containers'][0]['command'] = cmd_list
+  if len(arg_list) > 0:
+    body['spec']['containers'][0]['args'] = arg_list
+  return json.dumps(body)
+
+
+def _make_service_config(service_name, pod_name, service_port_list,
+                         container_port_list, is_headless):
+  """Creates a string containing the Service definition as required by the Kubernetes API.
+
+  NOTE:
+  This creates either a Headless Service or 'LoadBalancer' service depending on
+  the is_headless parameter. For Headless services, there is no 'type' attribute
+  and the 'clusterIP' attribute is set to 'None'. Also, if the service is
+  Headless, Kubernetes creates DNS entries for Pods - i.e creates DNS A-records
+  mapping the service's name to the Pods' IPs
+  """
+  if len(container_port_list) != len(service_port_list):
+    print(
+        'ERROR: container_port_list and service_port_list must be of same size')
+    return ''
+  body = {
+      'kind': 'Service',
+      'apiVersion': 'v1',
+      'metadata': {
+          'name': service_name,
+          'labels': {
+              'name': service_name
+          }
+      },
+      'spec': {
+          'ports': [],
+          'selector': {
+              'name': pod_name
+          }
+      }
+  }
+  # Populate the 'ports' list in the 'spec' section. This maps service ports
+  # (port numbers that are exposed by Kubernetes) to container ports (i.e port
+  # numbers that are exposed by your Docker image)
+  for idx in range(len(container_port_list)):
+    port_entry = {
+        'port': service_port_list[idx],
+        'targetPort': container_port_list[idx],
+        'protocol': 'TCP'
+    }
+    body['spec']['ports'].append(port_entry)
+
+  # Make this either a LoadBalancer service or a headless service depending on
+  # the is_headless parameter
+  if is_headless:
+    body['spec']['clusterIP'] = 'None'
+  else:
+    body['spec']['type'] = 'LoadBalancer'
+  return json.dumps(body)
+
+
+def _print_connection_error(msg):
+  print('ERROR: Connection failed. Did you remember to run Kubenetes proxy on '
+        'localhost (i.e kubectl proxy --port=<proxy_port>) ?. Error: %s' % msg)
+
+
+def _do_post(post_url, api_name, request_body):
+  """Helper to do HTTP POST.
+
+  Note:
+  1) On success, Kubernetes returns a success code of 201(CREATED) not 200(OK)
+  2) A response code of 509(CONFLICT) is interpreted as a success code (since
+  the error is most likely due to the resource already existing). This makes
+  _do_post() idempotent which is semantically desirable.
+  """
+  is_success = True
+  try:
+    r = requests.post(post_url,
+                      data=request_body,
+                      timeout=_REQUEST_TIMEOUT_SECS)
+    if r.status_code == requests.codes.conflict:
+      print('WARN: Looks like the resource already exists. Api: %s, url: %s' %
+            (api_name, post_url))
+    elif r.status_code != requests.codes.created:
+      print('ERROR: %s API returned error. HTTP response: (%d) %s' %
+            (api_name, r.status_code, r.text))
+      is_success = False
+  except (requests.exceptions.Timeout,
+          requests.exceptions.ConnectionError) as e:
+    is_success = False
+    _print_connection_error(str(e))
+  return is_success
+
+
+def _do_delete(del_url, api_name):
+  """Helper to do HTTP DELETE.
+
+  Note: A response code of 404(NOT_FOUND) is treated as success to keep
+  _do_delete() idempotent.
+  """
+  is_success = True
+  try:
+    r = requests.delete(del_url, timeout=_REQUEST_TIMEOUT_SECS)
+    if r.status_code == requests.codes.not_found:
+      print('WARN: The resource does not exist. Api: %s, url: %s' %
+            (api_name, del_url))
+    elif r.status_code != requests.codes.ok:
+      print('ERROR: %s API returned error. HTTP response: %s' %
+            (api_name, r.text))
+      is_success = False
+  except (requests.exceptions.Timeout,
+          requests.exceptions.ConnectionError) as e:
+    is_success = False
+    _print_connection_error(str(e))
+  return is_success
+
+
+def create_service(kube_host, kube_port, namespace, service_name, pod_name,
+                   service_port_list, container_port_list, is_headless):
+  """Creates either a Headless Service or a LoadBalancer Service depending
+  on the is_headless parameter.
+  """
+  post_url = 'http://%s:%d/api/v1/namespaces/%s/services' % (
+      kube_host, kube_port, namespace)
+  request_body = _make_service_config(service_name, pod_name, service_port_list,
+                                      container_port_list, is_headless)
+  return _do_post(post_url, 'Create Service', request_body)
+
+
+def create_pod(kube_host, kube_port, namespace, pod_name, image_name,
+               container_port_list, cmd_list, arg_list, env_dict):
+  """Creates a Kubernetes Pod.
+
+  Note that it is generally NOT considered a good practice to directly create
+  Pods. Typically, the recommendation is to create 'Controllers' to create and
+  manage Pods' lifecycle. Currently Kubernetes only supports 'Replication
+  Controller' which creates a configurable number of 'identical Replicas' of
+  Pods and automatically restarts any Pods in case of failures (for eg: Machine
+  failures in Kubernetes). This makes it less flexible for our test use cases
+  where we might want slightly different set of args to each Pod. Hence we
+  directly create Pods and not care much about Kubernetes failures since those
+  are very rare.
+  """
+  post_url = 'http://%s:%d/api/v1/namespaces/%s/pods' % (kube_host, kube_port,
+                                                         namespace)
+  request_body = _make_pod_config(pod_name, image_name, container_port_list,
+                                  cmd_list, arg_list, env_dict)
+  return _do_post(post_url, 'Create Pod', request_body)
+
+
+def delete_service(kube_host, kube_port, namespace, service_name):
+  del_url = 'http://%s:%d/api/v1/namespaces/%s/services/%s' % (
+      kube_host, kube_port, namespace, service_name)
+  return _do_delete(del_url, 'Delete Service')
+
+
+def delete_pod(kube_host, kube_port, namespace, pod_name):
+  del_url = 'http://%s:%d/api/v1/namespaces/%s/pods/%s' % (kube_host, kube_port,
+                                                           namespace, pod_name)
+  return _do_delete(del_url, 'Delete Pod')
+
+
+def create_pod_and_service(kube_host, kube_port, namespace, pod_name,
+                           image_name, container_port_list, cmd_list, arg_list,
+                           env_dict, is_headless_service):
+  """A helper function that creates a pod and a service (if pod creation was successful)."""
+  is_success = create_pod(kube_host, kube_port, namespace, pod_name, image_name,
+                          container_port_list, cmd_list, arg_list, env_dict)
+  if not is_success:
+    print 'Error in creating Pod'
+    return False
+
+  is_success = create_service(
+      kube_host,
+      kube_port,
+      namespace,
+      pod_name,  # Use pod_name for service
+      pod_name,
+      container_port_list,  # Service port list same as container port list
+      container_port_list,
+      is_headless_service)
+  if not is_success:
+    print 'Error in creating Service'
+    return False
+
+  print 'Successfully created the pod/service %s' % pod_name
+  return True
+
+
+def delete_pod_and_service(kube_host, kube_port, namespace, pod_name):
+  """ A helper function that calls delete_pod and delete_service """
+  is_success = delete_pod(kube_host, kube_port, namespace, pod_name)
+  if not is_success:
+    print 'Error in deleting pod %s' % pod_name
+    return False
+
+  # Note: service name assumed to the the same as pod name
+  is_success = delete_service(kube_host, kube_port, namespace, pod_name)
+  if not is_success:
+    print 'Error in deleting service %s' % pod_name
+    return False
+
+  print 'Successfully deleted the Pod/Service: %s' % pod_name
+  return True
diff --git a/tools/gke/kubernetes_api.py b/tools/gke/kubernetes_api.py
deleted file mode 100755
index 2d3f771e93..0000000000
--- a/tools/gke/kubernetes_api.py
+++ /dev/null
@@ -1,269 +0,0 @@
-#!/usr/bin/env python2.7
-# Copyright 2015-2016 Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import requests
-import json
-
-_REQUEST_TIMEOUT_SECS = 10
-
-
-def _make_pod_config(pod_name, image_name, container_port_list, cmd_list,
-                     arg_list, env_dict):
-  """Creates a string containing the Pod defintion as required by the Kubernetes API"""
-  body = {
-      'kind': 'Pod',
-      'apiVersion': 'v1',
-      'metadata': {
-          'name': pod_name,
-          'labels': {'name': pod_name}
-      },
-      'spec': {
-          'containers': [
-              {
-                  'name': pod_name,
-                  'image': image_name,
-                  'ports': [{'containerPort': port,
-                             'protocol': 'TCP'}
-                            for port in container_port_list],
-                  'imagePullPolicy': 'Always'
-              }
-          ]
-      }
-  }
-
-  env_list = [{'name': k, 'value': v} for (k, v) in env_dict.iteritems()]
-  if len(env_list) > 0:
-    body['spec']['containers'][0]['env'] = env_list
-
-  # Add the 'Command' and 'Args' attributes if they are passed.
-  # Note:
-  #  - 'Command' overrides the ENTRYPOINT in the Docker Image
-  #  - 'Args' override the CMD in Docker image (yes, it is confusing!)
-  if len(cmd_list) > 0:
-    body['spec']['containers'][0]['command'] = cmd_list
-  if len(arg_list) > 0:
-    body['spec']['containers'][0]['args'] = arg_list
-  return json.dumps(body)
-
-
-def _make_service_config(service_name, pod_name, service_port_list,
-                         container_port_list, is_headless):
-  """Creates a string containing the Service definition as required by the Kubernetes API.
-
-  NOTE:
-  This creates either a Headless Service or 'LoadBalancer' service depending on
-  the is_headless parameter. For Headless services, there is no 'type' attribute
-  and the 'clusterIP' attribute is set to 'None'. Also, if the service is
-  Headless, Kubernetes creates DNS entries for Pods - i.e creates DNS A-records
-  mapping the service's name to the Pods' IPs
-  """
-  if len(container_port_list) != len(service_port_list):
-    print(
-        'ERROR: container_port_list and service_port_list must be of same size')
-    return ''
-  body = {
-      'kind': 'Service',
-      'apiVersion': 'v1',
-      'metadata': {
-          'name': service_name,
-          'labels': {
-              'name': service_name
-          }
-      },
-      'spec': {
-          'ports': [],
-          'selector': {
-              'name': pod_name
-          }
-      }
-  }
-  # Populate the 'ports' list in the 'spec' section. This maps service ports
-  # (port numbers that are exposed by Kubernetes) to container ports (i.e port
-  # numbers that are exposed by your Docker image)
-  for idx in range(len(container_port_list)):
-    port_entry = {
-        'port': service_port_list[idx],
-        'targetPort': container_port_list[idx],
-        'protocol': 'TCP'
-    }
-    body['spec']['ports'].append(port_entry)
-
-  # Make this either a LoadBalancer service or a headless service depending on
-  # the is_headless parameter
-  if is_headless:
-    body['spec']['clusterIP'] = 'None'
-  else:
-    body['spec']['type'] = 'LoadBalancer'
-  return json.dumps(body)
-
-
-def _print_connection_error(msg):
-  print('ERROR: Connection failed. Did you remember to run Kubenetes proxy on '
-        'localhost (i.e kubectl proxy --port=<proxy_port>) ?. Error: %s' % msg)
-
-
-def _do_post(post_url, api_name, request_body):
-  """Helper to do HTTP POST.
-
-  Note:
-  1) On success, Kubernetes returns a success code of 201(CREATED) not 200(OK)
-  2) A response code of 509(CONFLICT) is interpreted as a success code (since
-  the error is most likely due to the resource already existing). This makes
-  _do_post() idempotent which is semantically desirable.
-  """
-  is_success = True
-  try:
-    r = requests.post(post_url,
-                      data=request_body,
-                      timeout=_REQUEST_TIMEOUT_SECS)
-    if r.status_code == requests.codes.conflict:
-      print('WARN: Looks like the resource already exists. Api: %s, url: %s' %
-            (api_name, post_url))
-    elif r.status_code != requests.codes.created:
-      print('ERROR: %s API returned error. HTTP response: (%d) %s' %
-            (api_name, r.status_code, r.text))
-      is_success = False
-  except (requests.exceptions.Timeout,
-          requests.exceptions.ConnectionError) as e:
-    is_success = False
-    _print_connection_error(str(e))
-  return is_success
-
-
-def _do_delete(del_url, api_name):
-  """Helper to do HTTP DELETE.
-
-  Note: A response code of 404(NOT_FOUND) is treated as success to keep
-  _do_delete() idempotent.
-  """
-  is_success = True
-  try:
-    r = requests.delete(del_url, timeout=_REQUEST_TIMEOUT_SECS)
-    if r.status_code == requests.codes.not_found:
-      print('WARN: The resource does not exist. Api: %s, url: %s' %
-            (api_name, del_url))
-    elif r.status_code != requests.codes.ok:
-      print('ERROR: %s API returned error. HTTP response: %s' %
-            (api_name, r.text))
-      is_success = False
-  except (requests.exceptions.Timeout,
-          requests.exceptions.ConnectionError) as e:
-    is_success = False
-    _print_connection_error(str(e))
-  return is_success
-
-
-def create_service(kube_host, kube_port, namespace, service_name, pod_name,
-                   service_port_list, container_port_list, is_headless):
-  """Creates either a Headless Service or a LoadBalancer Service depending
-  on the is_headless parameter.
-  """
-  post_url = 'http://%s:%d/api/v1/namespaces/%s/services' % (
-      kube_host, kube_port, namespace)
-  request_body = _make_service_config(service_name, pod_name, service_port_list,
-                                      container_port_list, is_headless)
-  return _do_post(post_url, 'Create Service', request_body)
-
-
-def create_pod(kube_host, kube_port, namespace, pod_name, image_name,
-               container_port_list, cmd_list, arg_list, env_dict):
-  """Creates a Kubernetes Pod.
-
-  Note that it is generally NOT considered a good practice to directly create
-  Pods. Typically, the recommendation is to create 'Controllers' to create and
-  manage Pods' lifecycle. Currently Kubernetes only supports 'Replication
-  Controller' which creates a configurable number of 'identical Replicas' of
-  Pods and automatically restarts any Pods in case of failures (for eg: Machine
-  failures in Kubernetes). This makes it less flexible for our test use cases
-  where we might want slightly different set of args to each Pod. Hence we
-  directly create Pods and not care much about Kubernetes failures since those
-  are very rare.
-  """
-  post_url = 'http://%s:%d/api/v1/namespaces/%s/pods' % (kube_host, kube_port,
-                                                         namespace)
-  request_body = _make_pod_config(pod_name, image_name, container_port_list,
-                                  cmd_list, arg_list, env_dict)
-  return _do_post(post_url, 'Create Pod', request_body)
-
-
-def delete_service(kube_host, kube_port, namespace, service_name):
-  del_url = 'http://%s:%d/api/v1/namespaces/%s/services/%s' % (
-      kube_host, kube_port, namespace, service_name)
-  return _do_delete(del_url, 'Delete Service')
-
-
-def delete_pod(kube_host, kube_port, namespace, pod_name):
-  del_url = 'http://%s:%d/api/v1/namespaces/%s/pods/%s' % (kube_host, kube_port,
-                                                           namespace, pod_name)
-  return _do_delete(del_url, 'Delete Pod')
-
-
-def create_pod_and_service(kube_host, kube_port, namespace, pod_name,
-                           image_name, container_port_list, cmd_list, arg_list,
-                           env_dict, is_headless_service):
-  """A helper function that creates a pod and a service (if pod creation was successful)."""
-  is_success = create_pod(kube_host, kube_port, namespace, pod_name, image_name,
-                          container_port_list, cmd_list, arg_list, env_dict)
-  if not is_success:
-    print 'Error in creating Pod'
-    return False
-
-  is_success = create_service(
-      kube_host,
-      kube_port,
-      namespace,
-      pod_name,  # Use pod_name for service
-      pod_name,
-      container_port_list,  # Service port list same as container port list
-      container_port_list,
-      is_headless_service)
-  if not is_success:
-    print 'Error in creating Service'
-    return False
-
-  print 'Successfully created the pod/service %s' % pod_name
-  return True
-
-
-def delete_pod_and_service(kube_host, kube_port, namespace, pod_name):
-  """ A helper function that calls delete_pod and delete_service """
-  is_success = delete_pod(kube_host, kube_port, namespace, pod_name)
-  if not is_success:
-    print 'Error in deleting pod %s' % pod_name
-    return False
-
-  # Note: service name assumed to the the same as pod name
-  is_success = delete_service(kube_host, kube_port, namespace, pod_name)
-  if not is_success:
-    print 'Error in deleting service %s' % pod_name
-    return False
-
-  print 'Successfully deleted the Pod/Service: %s' % pod_name
-  return True
diff --git a/tools/gke/run_stress_tests_on_gke.py b/tools/gke/run_stress_tests_on_gke.py
deleted file mode 100755
index d126e3db43..0000000000
--- a/tools/gke/run_stress_tests_on_gke.py
+++ /dev/null
@@ -1,556 +0,0 @@
-#!/usr/bin/env python2.7
-# Copyright 2015-2016, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-import argparse
-import datetime
-import os
-import subprocess
-import sys
-import time
-
-stress_test_utils_dir = os.path.abspath(os.path.join(
-    os.path.dirname(__file__), '../run_tests/stress_test'))
-sys.path.append(stress_test_utils_dir)
-from stress_test_utils import BigQueryHelper
-
-import kubernetes_api
-
-_GRPC_ROOT = os.path.abspath(os.path.join(
-    os.path.dirname(sys.argv[0]), '../..'))
-os.chdir(_GRPC_ROOT)
-
-# num of seconds to wait for the GKE image to start and warmup
-_GKE_IMAGE_WARMUP_WAIT_SECS = 60
-
-_SERVER_POD_NAME = 'stress-server'
-_CLIENT_POD_NAME_PREFIX = 'stress-client'
-_DATASET_ID_PREFIX = 'stress_test'
-_SUMMARY_TABLE_ID = 'summary'
-_QPS_TABLE_ID = 'qps'
-
-_DEFAULT_DOCKER_IMAGE_NAME = 'grpc_stress_test'
-
-# The default port on which the kubernetes proxy server is started on localhost
-# (i.e kubectl proxy --port=<port>)
-_DEFAULT_KUBERNETES_PROXY_PORT = 8001
-
-# How frequently should the stress client wrapper script (running inside a GKE
-# container) poll the health of the stress client (also running inside the GKE
-# container) and upload metrics to BigQuery
-_DEFAULT_STRESS_CLIENT_POLL_INTERVAL_SECS = 60
-
-# The default setting for stress test server and client
-_DEFAULT_STRESS_SERVER_PORT = 8080
-_DEFAULT_METRICS_PORT = 8081
-_DEFAULT_TEST_CASES_STR = 'empty_unary:1,large_unary:1,client_streaming:1,server_streaming:1,empty_stream:1'
-_DEFAULT_NUM_CHANNELS_PER_SERVER = 5
-_DEFAULT_NUM_STUBS_PER_CHANNEL = 10
-_DEFAULT_METRICS_COLLECTION_INTERVAL_SECS = 30
-
-# Number of stress client instances to launch
-_DEFAULT_NUM_CLIENTS = 3
-
-# How frequently should this test monitor the health of Stress clients and
-# Servers running in GKE
-_DEFAULT_TEST_POLL_INTERVAL_SECS = 60
-
-# Default run time for this test (2 hour)
-_DEFAULT_TEST_DURATION_SECS = 7200
-
-# The number of seconds it would take a GKE pod to warm up (i.e get to 'Running'
-# state from the time of creation). Ideally this is something the test should
-# automatically determine by using Kubernetes API to poll the pods status.
-_DEFAULT_GKE_WARMUP_SECS = 60
-
-
-class KubernetesProxy:
-  """ Class to start a proxy on localhost to the Kubernetes API server """
-
-  def __init__(self, api_port):
-    self.port = api_port
-    self.p = None
-    self.started = False
-
-  def start(self):
-    cmd = ['kubectl', 'proxy', '--port=%d' % self.port]
-    self.p = subprocess.Popen(args=cmd)
-    self.started = True
-    time.sleep(2)
-    print '..Started'
-
-  def get_port(self):
-    return self.port
-
-  def is_started(self):
-    return self.started
-
-  def __del__(self):
-    if self.p is not None:
-      print 'Shutting down Kubernetes proxy..'
-      self.p.kill()
-
-
-class TestSettings:
-
-  def __init__(self, build_docker_image, test_poll_interval_secs,
-               test_duration_secs, kubernetes_proxy_port):
-    self.build_docker_image = build_docker_image
-    self.test_poll_interval_secs = test_poll_interval_secs
-    self.test_duration_secs = test_duration_secs
-    self.kubernetes_proxy_port = kubernetes_proxy_port
-
-
-class GkeSettings:
-
-  def __init__(self, project_id, docker_image_name):
-    self.project_id = project_id
-    self.docker_image_name = docker_image_name
-    self.tag_name = 'gcr.io/%s/%s' % (project_id, docker_image_name)
-
-
-class BigQuerySettings:
-
-  def __init__(self, run_id, dataset_id, summary_table_id, qps_table_id):
-    self.run_id = run_id
-    self.dataset_id = dataset_id
-    self.summary_table_id = summary_table_id
-    self.qps_table_id = qps_table_id
-
-
-class StressServerSettings:
-
-  def __init__(self, server_pod_name, server_port):
-    self.server_pod_name = server_pod_name
-    self.server_port = server_port
-
-
-class StressClientSettings:
-
-  def __init__(self, num_clients, client_pod_name_prefix, server_pod_name,
-               server_port, metrics_port, metrics_collection_interval_secs,
-               stress_client_poll_interval_secs, num_channels_per_server,
-               num_stubs_per_channel, test_cases_str):
-    self.num_clients = num_clients
-    self.client_pod_name_prefix = client_pod_name_prefix
-    self.server_pod_name = server_pod_name
-    self.server_port = server_port
-    self.metrics_port = metrics_port
-    self.metrics_collection_interval_secs = metrics_collection_interval_secs
-    self.stress_client_poll_interval_secs = stress_client_poll_interval_secs
-    self.num_channels_per_server = num_channels_per_server
-    self.num_stubs_per_channel = num_stubs_per_channel
-    self.test_cases_str = test_cases_str
-
-    # == Derived properties ==
-    # Note: Client can accept a list of server addresses (a comma separated list
-    # of 'server_name:server_port'). In this case, we only have one server
-    # address to pass
-    self.server_addresses = '%s.default.svc.cluster.local:%d' % (
-        server_pod_name, server_port)
-    self.client_pod_names_list = ['%s-%d' % (client_pod_name_prefix, i)
-                                  for i in range(1, num_clients + 1)]
-
-
-def _build_docker_image(image_name, tag_name):
-  """ Build the docker image and add tag it to the GKE repository """
-  print 'Building docker image: %s' % image_name
-  os.environ['INTEROP_IMAGE'] = image_name
-  os.environ['INTEROP_IMAGE_REPOSITORY_TAG'] = tag_name
-  # Note that 'BASE_NAME' HAS to be 'grpc_interop_stress_cxx' since the script
-  # build_interop_stress_image.sh invokes the following script:
-  #   tools/dockerfile/$BASE_NAME/build_interop_stress.sh
-  os.environ['BASE_NAME'] = 'grpc_interop_stress_cxx'
-  cmd = ['tools/jenkins/build_interop_stress_image.sh']
-  retcode = subprocess.call(args=cmd)
-  if retcode != 0:
-    print 'Error in building docker image'
-    return False
-  return True
-
-
-def _push_docker_image_to_gke_registry(docker_tag_name):
-  """Executes 'gcloud docker push <docker_tag_name>' to push the image to GKE registry"""
-  cmd = ['gcloud', 'docker', 'push', docker_tag_name]
-  print 'Pushing %s to GKE registry..' % docker_tag_name
-  retcode = subprocess.call(args=cmd)
-  if retcode != 0:
-    print 'Error in pushing docker image %s to the GKE registry' % docker_tag_name
-    return False
-  return True
-
-
-def _launch_server(gke_settings, stress_server_settings, bq_settings,
-                   kubernetes_proxy):
-  """ Launches a stress test server instance in GKE cluster """
-  if not kubernetes_proxy.is_started:
-    print 'Kubernetes proxy must be started before calling this function'
-    return False
-
-  # This is the wrapper script that is run in the container. This script runs
-  # the actual stress test server
-  server_cmd_list = [
-      '/var/local/git/grpc/tools/run_tests/stress_test/run_server.py'
-  ]
-
-  # run_server.py does not take any args from the command line. The args are
-  # instead passed via environment variables (see server_env below)
-  server_arg_list = []
-
-  # The parameters to the script run_server.py are injected into the container
-  # via environment variables
-  server_env = {
-      'STRESS_TEST_IMAGE_TYPE': 'SERVER',
-      'STRESS_TEST_IMAGE': '/var/local/git/grpc/bins/opt/interop_server',
-      'STRESS_TEST_ARGS_STR': '--port=%s' % stress_server_settings.server_port,
-      'RUN_ID': bq_settings.run_id,
-      'POD_NAME': stress_server_settings.server_pod_name,
-      'GCP_PROJECT_ID': gke_settings.project_id,
-      'DATASET_ID': bq_settings.dataset_id,
-      'SUMMARY_TABLE_ID': bq_settings.summary_table_id,
-      'QPS_TABLE_ID': bq_settings.qps_table_id
-  }
-
-  # Launch Server
-  is_success = kubernetes_api.create_pod_and_service(
-      'localhost',
-      kubernetes_proxy.get_port(),
-      'default',  # Use 'default' namespace
-      stress_server_settings.server_pod_name,
-      gke_settings.tag_name,
-      [stress_server_settings.server_port],  # Port that should be exposed
-      server_cmd_list,
-      server_arg_list,
-      server_env,
-      True  # Headless = True for server. Since we want DNS records to be created by GKE
-  )
-
-  return is_success
-
-
-def _launch_client(gke_settings, stress_server_settings, stress_client_settings,
-                   bq_settings, kubernetes_proxy):
-  """ Launches a configurable number of stress test clients on GKE cluster """
-  if not kubernetes_proxy.is_started:
-    print 'Kubernetes proxy must be started before calling this function'
-    return False
-
-  stress_client_arg_list = [
-      '--server_addresses=%s' % stress_client_settings.server_addresses,
-      '--test_cases=%s' % stress_client_settings.test_cases_str,
-      '--num_stubs_per_channel=%d' %
-      stress_client_settings.num_stubs_per_channel
-  ]
-
-  # This is the wrapper script that is run in the container. This script runs
-  # the actual stress client
-  client_cmd_list = [
-      '/var/local/git/grpc/tools/run_tests/stress_test/run_client.py'
-  ]
-
-  # run_client.py takes no args. All args are passed as env variables (see
-  # client_env)
-  client_arg_list = []
-
-  metrics_server_address = 'localhost:%d' % stress_client_settings.metrics_port
-  metrics_client_arg_list = [
-      '--metrics_server_address=%s' % metrics_server_address,
-      '--total_only=true'
-  ]
-
-  # The parameters to the script run_client.py are injected into the container
-  # via environment variables
-  client_env = {
-      'STRESS_TEST_IMAGE_TYPE': 'CLIENT',
-      'STRESS_TEST_IMAGE': '/var/local/git/grpc/bins/opt/stress_test',
-      'STRESS_TEST_ARGS_STR': ' '.join(stress_client_arg_list),
-      'METRICS_CLIENT_IMAGE': '/var/local/git/grpc/bins/opt/metrics_client',
-      'METRICS_CLIENT_ARGS_STR': ' '.join(metrics_client_arg_list),
-      'RUN_ID': bq_settings.run_id,
-      'POLL_INTERVAL_SECS':
-          str(stress_client_settings.stress_client_poll_interval_secs),
-      'GCP_PROJECT_ID': gke_settings.project_id,
-      'DATASET_ID': bq_settings.dataset_id,
-      'SUMMARY_TABLE_ID': bq_settings.summary_table_id,
-      'QPS_TABLE_ID': bq_settings.qps_table_id
-  }
-
-  for pod_name in stress_client_settings.client_pod_names_list:
-    client_env['POD_NAME'] = pod_name
-    is_success = kubernetes_api.create_pod_and_service(
-        'localhost',  # Since proxy is running on localhost
-        kubernetes_proxy.get_port(),
-        'default',  # default namespace
-        pod_name,
-        gke_settings.tag_name,
-        [stress_client_settings.metrics_port
-        ],  # Client pods expose metrics port
-        client_cmd_list,
-        client_arg_list,
-        client_env,
-        False  # Client is not a headless service
-    )
-    if not is_success:
-      print 'Error in launching client %s' % pod_name
-      return False
-
-  return True
-
-
-def _launch_server_and_client(gke_settings, stress_server_settings,
-                              stress_client_settings, bq_settings,
-                              kubernetes_proxy_port):
-  # Start kubernetes proxy
-  print 'Kubernetes proxy'
-  kubernetes_proxy = KubernetesProxy(kubernetes_proxy_port)
-  kubernetes_proxy.start()
-
-  print 'Launching server..'
-  is_success = _launch_server(gke_settings, stress_server_settings, bq_settings,
-                              kubernetes_proxy)
-  if not is_success:
-    print 'Error in launching server'
-    return False
-
-  # Server takes a while to start.
-  # TODO(sree) Use Kubernetes API to query the status of the server instead of
-  # sleeping
-  print 'Waiting for %s seconds for the server to start...' % _GKE_IMAGE_WARMUP_WAIT_SECS
-  time.sleep(_GKE_IMAGE_WARMUP_WAIT_SECS)
-
-  # Launch client
-  client_pod_name_prefix = 'stress-client'
-  is_success = _launch_client(gke_settings, stress_server_settings,
-                              stress_client_settings, bq_settings,
-                              kubernetes_proxy)
-
-  if not is_success:
-    print 'Error in launching client(s)'
-    return False
-
-  print 'Waiting for %s seconds for the client images to start...' % _GKE_IMAGE_WARMUP_WAIT_SECS
-  time.sleep(_GKE_IMAGE_WARMUP_WAIT_SECS)
-  return True
-
-
-def _delete_server_and_client(stress_server_settings, stress_client_settings,
-                              kubernetes_proxy_port):
-  kubernetes_proxy = KubernetesProxy(kubernetes_proxy_port)
-  kubernetes_proxy.start()
-
-  # Delete clients first
-  is_success = True
-  for pod_name in stress_client_settings.client_pod_names_list:
-    is_success = kubernetes_api.delete_pod_and_service(
-        'localhost', kubernetes_proxy_port, 'default', pod_name)
-    if not is_success:
-      return False
-
-  # Delete server
-  is_success = kubernetes_api.delete_pod_and_service(
-      'localhost', kubernetes_proxy_port, 'default',
-      stress_server_settings.server_pod_name)
-  return is_success
-
-
-def run_test_main(test_settings, gke_settings, stress_server_settings,
-                  stress_client_clients):
-  is_success = True
-
-  if test_settings.build_docker_image:
-    is_success = _build_docker_image(gke_settings.docker_image_name,
-                                     gke_settings.tag_name)
-    if not is_success:
-      return False
-
-    is_success = _push_docker_image_to_gke_registry(gke_settings.tag_name)
-    if not is_success:
-      return False
-
-  # Create a unique id for this run (Note: Using timestamp instead of UUID to
-  # make it easier to deduce the date/time of the run just by looking at the run
-  # run id. This is useful in debugging when looking at records in Biq query)
-  run_id = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
-  dataset_id = '%s_%s' % (_DATASET_ID_PREFIX, run_id)
-
-  # Big Query settings (common for both Stress Server and Client)
-  bq_settings = BigQuerySettings(run_id, dataset_id, _SUMMARY_TABLE_ID,
-                                 _QPS_TABLE_ID)
-
-  bq_helper = BigQueryHelper(run_id, '', '', args.project_id, dataset_id,
-                             _SUMMARY_TABLE_ID, _QPS_TABLE_ID)
-  bq_helper.initialize()
-
-  try:
-    is_success = _launch_server_and_client(gke_settings, stress_server_settings,
-                                           stress_client_settings, bq_settings,
-                                           test_settings.kubernetes_proxy_port)
-    if not is_success:
-      return False
-
-    start_time = datetime.datetime.now()
-    end_time = start_time + datetime.timedelta(
-        seconds=test_settings.test_duration_secs)
-    print 'Running the test until %s' % end_time.isoformat()
-
-    while True:
-      if datetime.datetime.now() > end_time:
-        print 'Test was run for %d seconds' % test_settings.test_duration_secs
-        break
-
-      # Check if either stress server or clients have failed
-      if bq_helper.check_if_any_tests_failed():
-        is_success = False
-        print 'Some tests failed.'
-        break
-
-      # Things seem to be running fine. Wait until next poll time to check the
-      # status
-      print 'Sleeping for %d seconds..' % test_settings.test_poll_interval_secs
-      time.sleep(test_settings.test_poll_interval_secs)
-
-    # Print BiqQuery tables
-    bq_helper.print_summary_records()
-    bq_helper.print_qps_records()
-
-  finally:
-    # If is_success is False at this point, it means that the stress tests were
-    # started successfully but failed while running the tests. In this case we
-    # do should not delete the pods (since they contain all the failure
-    # information)
-    if is_success:
-      _delete_server_and_client(stress_server_settings, stress_client_settings,
-                                test_settings.kubernetes_proxy_port)
-
-  return is_success
-
-
-argp = argparse.ArgumentParser(
-    description='Launch stress tests in GKE',
-    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-argp.add_argument('--project_id',
-                  required=True,
-                  help='The Google Cloud Platform Project Id')
-argp.add_argument('--num_clients',
-                  default=1,
-                  type=int,
-                  help='Number of client instances to start')
-argp.add_argument('--docker_image_name',
-                  default=_DEFAULT_DOCKER_IMAGE_NAME,
-                  help='The name of the docker image containing stress client '
-                  'and stress servers')
-argp.add_argument('--build_docker_image',
-                  dest='build_docker_image',
-                  action='store_true',
-                  help='Build a docker image and push to Google Container '
-                  'Registry')
-argp.add_argument('--do_not_build_docker_image',
-                  dest='build_docker_image',
-                  action='store_false',
-                  help='Do not build and push docker image to Google Container '
-                  'Registry')
-argp.set_defaults(build_docker_image=True)
-
-argp.add_argument('--test_poll_interval_secs',
-                  default=_DEFAULT_TEST_POLL_INTERVAL_SECS,
-                  type=int,
-                  help='How frequently should this script should monitor the '
-                  'health of stress clients and servers running in the GKE '
-                  'cluster')
-argp.add_argument('--test_duration_secs',
-                  default=_DEFAULT_TEST_DURATION_SECS,
-                  type=int,
-                  help='How long should this test be run')
-argp.add_argument('--kubernetes_proxy_port',
-                  default=_DEFAULT_KUBERNETES_PROXY_PORT,
-                  type=int,
-                  help='The port on which the kubernetes proxy (on localhost)'
-                  ' is started')
-argp.add_argument('--stress_server_port',
-                  default=_DEFAULT_STRESS_SERVER_PORT,
-                  type=int,
-                  help='The port on which the stress server (in GKE '
-                  'containers) listens')
-argp.add_argument('--stress_client_metrics_port',
-                  default=_DEFAULT_METRICS_PORT,
-                  type=int,
-                  help='The port on which the stress clients (in GKE '
-                  'containers) expose metrics')
-argp.add_argument('--stress_client_poll_interval_secs',
-                  default=_DEFAULT_STRESS_CLIENT_POLL_INTERVAL_SECS,
-                  type=int,
-                  help='How frequently should the stress client wrapper script'
-                  ' running inside GKE should monitor health of the actual '
-                  ' stress client process and upload the metrics to BigQuery')
-argp.add_argument('--stress_client_metrics_collection_interval_secs',
-                  default=_DEFAULT_METRICS_COLLECTION_INTERVAL_SECS,
-                  type=int,
-                  help='How frequently should metrics be collected in-memory on'
-                  ' the stress clients (running inside GKE containers). Note '
-                  'that this is NOT the same as the upload-to-BigQuery '
-                  'frequency. The metrics upload frequency is controlled by the'
-                  ' --stress_client_poll_interval_secs flag')
-argp.add_argument('--stress_client_num_channels_per_server',
-                  default=_DEFAULT_NUM_CHANNELS_PER_SERVER,
-                  type=int,
-                  help='The number of channels created to each server from a '
-                  'stress client')
-argp.add_argument('--stress_client_num_stubs_per_channel',
-                  default=_DEFAULT_NUM_STUBS_PER_CHANNEL,
-                  type=int,
-                  help='The number of stubs created per channel. This number '
-                  'indicates the max number of RPCs that can be made in '
-                  'parallel on each channel at any given time')
-argp.add_argument('--stress_client_test_cases',
-                  default=_DEFAULT_TEST_CASES_STR,
-                  help='List of test cases (with weights) to be executed by the'
-                  ' stress test client. The list is in the following format:\n'
-                  '  <testcase_1:w_1,<test_case2:w_2>..<testcase_n:w_n>\n'
-                  ' (Note: The weights do not have to add up to 100)')
-
-if __name__ == '__main__':
-  args = argp.parse_args()
-
-  test_settings = TestSettings(
-      args.build_docker_image, args.test_poll_interval_secs,
-      args.test_duration_secs, args.kubernetes_proxy_port)
-
-  gke_settings = GkeSettings(args.project_id, args.docker_image_name)
-
-  stress_server_settings = StressServerSettings(_SERVER_POD_NAME,
-                                                args.stress_server_port)
-  stress_client_settings = StressClientSettings(
-      args.num_clients, _CLIENT_POD_NAME_PREFIX, _SERVER_POD_NAME,
-      args.stress_server_port, args.stress_client_metrics_port,
-      args.stress_client_metrics_collection_interval_secs,
-      args.stress_client_poll_interval_secs,
-      args.stress_client_num_channels_per_server,
-      args.stress_client_num_stubs_per_channel, args.stress_client_test_cases)
-
-  run_test_main(test_settings, gke_settings, stress_server_settings,
-                stress_client_settings)
diff --git a/tools/jenkins/build_interop_stress_image.sh b/tools/jenkins/build_interop_stress_image.sh
index 4c8e998a8a..501dc5b7ca 100755
--- a/tools/jenkins/build_interop_stress_image.sh
+++ b/tools/jenkins/build_interop_stress_image.sh
@@ -79,7 +79,7 @@ CONTAINER_NAME="build_${BASE_NAME}_$(uuidgen)"
   $BASE_IMAGE \
   bash -l /var/local/jenkins/grpc/tools/dockerfile/$BASE_NAME/build_interop_stress.sh \
   && docker commit $CONTAINER_NAME $INTEROP_IMAGE \
-  && ( if [ -n $INTEROP_IMAGE_REPOSITORY_TAG ]; then docker tag $INTEROP_IMAGE $INTEROP_IMAGE_REPOSITORY_TAG ; fi ) \
+  && ( if [ -n "$INTEROP_IMAGE_REPOSITORY_TAG" ]; then docker tag -f $INTEROP_IMAGE $INTEROP_IMAGE_REPOSITORY_TAG ; fi ) \
   && echo "Successfully built image $INTEROP_IMAGE")
 EXITCODE=$?
 
diff --git a/tools/run_tests/stress_test/run_client.py b/tools/run_tests/stress_test/run_client.py
deleted file mode 100755
index 0fa1bf1cb9..0000000000
--- a/tools/run_tests/stress_test/run_client.py
+++ /dev/null
@@ -1,187 +0,0 @@
-#!/usr/bin/env python2.7
-# Copyright 2015-2016, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import datetime
-import os
-import re
-import select
-import subprocess
-import sys
-import time
-
-from stress_test_utils import EventType
-from stress_test_utils import BigQueryHelper
-
-
-# TODO (sree): Write a python grpc client to directly query the metrics instead
-# of calling metrics_client
-def _get_qps(metrics_cmd):
-  qps = 0
-  try:
-    # Note: gpr_log() writes even non-error messages to stderr stream. So it is 
-    # important that we set stderr=subprocess.STDOUT
-    p = subprocess.Popen(args=metrics_cmd,
-                         stdout=subprocess.PIPE,
-                         stderr=subprocess.STDOUT)
-    retcode = p.wait()
-    (out_str, err_str) = p.communicate()
-    if retcode != 0:
-      print 'Error in reading metrics information'
-      print 'Output: ', out_str
-    else:
-      # The overall qps is printed at the end of the line
-      m = re.search('\d+$', out_str)
-      qps = int(m.group()) if m else 0
-  except Exception as ex:
-    print 'Exception while reading metrics information: ' + str(ex)
-  return qps
-
-
-def run_client():
-  """This is a wrapper around the stress test client and performs the following:
-      1) Create the following two tables in Big Query:
-         (i) Summary table: To record events like the test started, completed
-                            successfully or failed
-        (ii) Qps table: To periodically record the QPS sent by this client
-      2) Start the stress test client and add a row in the Big Query summary
-         table
-      3) Once every few seconds (as specificed by the poll_interval_secs) poll
-         the status of the stress test client process and perform the
-         following:
-          3.1) If the process is still running, get the current qps by invoking
-               the metrics client program and add a row in the Big Query
-               Qps table. Sleep for a duration specified by poll_interval_secs
-          3.2) If the process exited successfully, add a row in the Big Query
-               Summary table and exit
-          3.3) If the process failed, add a row in Big Query summary table and
-               wait forever.
-               NOTE: This script typically runs inside a GKE pod which means
-               that the pod gets destroyed when the script exits. However, in
-               case the stress test client fails, we would not want the pod to
-               be destroyed (since we might want to connect to the pod for
-               examining logs). This is the reason why the script waits forever
-               in case of failures
-  """
-  env = dict(os.environ)
-  image_type = env['STRESS_TEST_IMAGE_TYPE']
-  image_name = env['STRESS_TEST_IMAGE']
-  args_str = env['STRESS_TEST_ARGS_STR']
-  metrics_client_image = env['METRICS_CLIENT_IMAGE']
-  metrics_client_args_str = env['METRICS_CLIENT_ARGS_STR']
-  run_id = env['RUN_ID']
-  pod_name = env['POD_NAME']
-  logfile_name = env.get('LOGFILE_NAME')
-  poll_interval_secs = float(env['POLL_INTERVAL_SECS'])
-  project_id = env['GCP_PROJECT_ID']
-  dataset_id = env['DATASET_ID']
-  summary_table_id = env['SUMMARY_TABLE_ID']
-  qps_table_id = env['QPS_TABLE_ID']
-
-  bq_helper = BigQueryHelper(run_id, image_type, pod_name, project_id,
-                             dataset_id, summary_table_id, qps_table_id)
-  bq_helper.initialize()
-
-  # Create BigQuery Dataset and Tables: Summary Table and Metrics Table
-  if not bq_helper.setup_tables():
-    print 'Error in creating BigQuery tables'
-    return
-
-  start_time = datetime.datetime.now()
-
-  logfile = None
-  details = 'Logging to stdout'
-  if logfile_name is not None:
-    print 'Opening logfile: %s ...' % logfile_name
-    details = 'Logfile: %s' % logfile_name
-    logfile = open(logfile_name, 'w')
-
-  # Update status that the test is starting (in the status table)
-  bq_helper.insert_summary_row(EventType.STARTING, details)
-
-  metrics_cmd = [metrics_client_image
-                ] + [x for x in metrics_client_args_str.split()]
-  stress_cmd = [image_name] + [x for x in args_str.split()]
-
-  print 'Launching process %s ...' % stress_cmd
-  stress_p = subprocess.Popen(args=stress_cmd,
-                              stdout=logfile,
-                              stderr=subprocess.STDOUT)
-
-  qps_history = [1, 1, 1]  # Maintain the last 3 qps readings
-  qps_history_idx = 0  # Index into the qps_history list
-
-  is_error = False
-  while True:
-    # Check if stress_client is still running. If so, collect metrics and upload
-    # to BigQuery status table
-    if stress_p.poll() is not None:
-      end_time = datetime.datetime.now().isoformat()
-      event_type = EventType.SUCCESS
-      details = 'End time: %s' % end_time
-      if stress_p.returncode != 0:
-        event_type = EventType.FAILURE
-        details = 'Return code = %d. End time: %s' % (stress_p.returncode,
-                                                      end_time)
-        is_error = True
-      bq_helper.insert_summary_row(event_type, details)
-      print details
-      break
-
-    # Stress client still running. Get metrics
-    qps = _get_qps(metrics_cmd)
-    qps_recorded_at = datetime.datetime.now().isoformat()
-    print 'qps: %d at %s' % (qps, qps_recorded_at)
-
-    # If QPS has been zero for the last 3 iterations, flag it as error and exit
-    qps_history[qps_history_idx] = qps
-    qps_history_idx = (qps_history_idx + 1) % len(qps_history)
-    if sum(qps_history) == 0:
-      details = 'QPS has been zero for the last %d seconds - as of : %s' % (
-          poll_interval_secs * 3, qps_recorded_at)
-      is_error = True
-      bq_helper.insert_summary_row(EventType.FAILURE, details)
-      print details
-      break
-
-    # Upload qps metrics to BiqQuery
-    bq_helper.insert_qps_row(qps, qps_recorded_at)
-
-    time.sleep(poll_interval_secs)
-
-  if is_error:
-    print 'Waiting indefinitely..'
-    select.select([], [], [])
-
-  print 'Completed'
-  return
-
-
-if __name__ == '__main__':
-  run_client()
diff --git a/tools/run_tests/stress_test/run_server.py b/tools/run_tests/stress_test/run_server.py
deleted file mode 100755
index 64322f6100..0000000000
--- a/tools/run_tests/stress_test/run_server.py
+++ /dev/null
@@ -1,120 +0,0 @@
-#!/usr/bin/env python2.7
-# Copyright 2015-2016, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import datetime
-import os
-import select
-import subprocess
-import sys
-import time
-
-from stress_test_utils import BigQueryHelper
-from stress_test_utils import EventType
-
-
-def run_server():
-  """This is a wrapper around the interop server and performs the following:
-      1) Create a 'Summary table' in Big Query to record events like the server
-         started, completed successfully or failed. NOTE: This also creates
-         another table called the QPS table which is currently NOT needed on the
-         server (it is needed on the stress test clients)
-      2) Start the server process and add a row in Big Query summary table
-      3) Wait for the server process to terminate. The server process does not
-         terminate unless there is an error.
-         If the server process terminated with a failure, add a row in Big Query
-         and wait forever.
-         NOTE: This script typically runs inside a GKE pod which means that the
-         pod gets destroyed when the script exits. However, in case the server
-         process fails, we would not want the pod to be destroyed (since we
-         might want to connect to the pod for examining logs). This is the
-         reason why the script waits forever in case of failures.
-  """
-
-  # Read the parameters from environment variables
-  env = dict(os.environ)
-
-  run_id = env['RUN_ID']  # The unique run id for this test
-  image_type = env['STRESS_TEST_IMAGE_TYPE']
-  image_name = env['STRESS_TEST_IMAGE']
-  args_str = env['STRESS_TEST_ARGS_STR']
-  pod_name = env['POD_NAME']
-  project_id = env['GCP_PROJECT_ID']
-  dataset_id = env['DATASET_ID']
-  summary_table_id = env['SUMMARY_TABLE_ID']
-  qps_table_id = env['QPS_TABLE_ID']
-
-  logfile_name = env.get('LOGFILE_NAME')
-
-  print('pod_name: %s, project_id: %s, run_id: %s, dataset_id: %s, '
-        'summary_table_id: %s, qps_table_id: %s') % (
-            pod_name, project_id, run_id, dataset_id, summary_table_id,
-            qps_table_id)
-
-  bq_helper = BigQueryHelper(run_id, image_type, pod_name, project_id,
-                             dataset_id, summary_table_id, qps_table_id)
-  bq_helper.initialize()
-
-  # Create BigQuery Dataset and Tables: Summary Table and Metrics Table
-  if not bq_helper.setup_tables():
-    print 'Error in creating BigQuery tables'
-    return
-
-  start_time = datetime.datetime.now()
-
-  logfile = None
-  details = 'Logging to stdout'
-  if logfile_name is not None:
-    print 'Opening log file: ', logfile_name
-    logfile = open(logfile_name, 'w')
-    details = 'Logfile: %s' % logfile_name
-
-  # Update status that the test is starting (in the status table)
-  bq_helper.insert_summary_row(EventType.STARTING, details)
-
-  stress_cmd = [image_name] + [x for x in args_str.split()]
-
-  print 'Launching process %s ...' % stress_cmd
-  stress_p = subprocess.Popen(args=stress_cmd,
-                              stdout=logfile,
-                              stderr=subprocess.STDOUT)
-
-  returncode = stress_p.wait()
-  if returncode != 0:
-    end_time = datetime.datetime.now().isoformat()
-    event_type = EventType.FAILURE
-    details = 'Returncode: %d; End time: %s' % (returncode, end_time)
-    bq_helper.insert_summary_row(event_type, details)
-    print 'Waiting indefinitely..'
-    select.select([], [], [])
-  return returncode
-
-
-if __name__ == '__main__':
-  run_server()
diff --git a/tools/run_tests/stress_test/run_stress_tests_on_gke.py b/tools/run_tests/stress_test/run_stress_tests_on_gke.py
new file mode 100755
index 0000000000..634eb1aca5
--- /dev/null
+++ b/tools/run_tests/stress_test/run_stress_tests_on_gke.py
@@ -0,0 +1,556 @@
+#!/usr/bin/env python2.7
+# Copyright 2015-2016, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import argparse
+import datetime
+import os
+import subprocess
+import sys
+import time
+
+stress_test_utils_dir = os.path.abspath(os.path.join(
+    os.path.dirname(__file__), '../../gcp/stress_test'))
+sys.path.append(stress_test_utils_dir)
+from stress_test_utils import BigQueryHelper
+
+kubernetes_api_dir = os.path.abspath(os.path.join(
+    os.path.dirname(__file__), '../../gcp/utils'))
+sys.path.append(kubernetes_api_dir)
+
+import kubernetes_api
+
+_GRPC_ROOT = os.path.abspath(os.path.join(
+    os.path.dirname(sys.argv[0]), '../../..'))
+os.chdir(_GRPC_ROOT)
+
+# num of seconds to wait for the GKE image to start and warmup
+_GKE_IMAGE_WARMUP_WAIT_SECS = 60
+
+_SERVER_POD_NAME = 'stress-server'
+_CLIENT_POD_NAME_PREFIX = 'stress-client'
+_DATASET_ID_PREFIX = 'stress_test'
+_SUMMARY_TABLE_ID = 'summary'
+_QPS_TABLE_ID = 'qps'
+
+_DEFAULT_DOCKER_IMAGE_NAME = 'grpc_stress_test'
+
+# The default port on which the kubernetes proxy server is started on localhost
+# (i.e kubectl proxy --port=<port>)
+_DEFAULT_KUBERNETES_PROXY_PORT = 8001
+
+# How frequently should the stress client wrapper script (running inside a GKE
+# container) poll the health of the stress client (also running inside the GKE
+# container) and upload metrics to BigQuery
+_DEFAULT_STRESS_CLIENT_POLL_INTERVAL_SECS = 60
+
+# The default setting for stress test server and client
+_DEFAULT_STRESS_SERVER_PORT = 8080
+_DEFAULT_METRICS_PORT = 8081
+_DEFAULT_TEST_CASES_STR = 'empty_unary:1,large_unary:1,client_streaming:1,server_streaming:1,empty_stream:1'
+_DEFAULT_NUM_CHANNELS_PER_SERVER = 5
+_DEFAULT_NUM_STUBS_PER_CHANNEL = 10
+_DEFAULT_METRICS_COLLECTION_INTERVAL_SECS = 30
+
+# Number of stress client instances to launch
+_DEFAULT_NUM_CLIENTS = 3
+
+# How frequently should this test monitor the health of Stress clients and
+# Servers running in GKE
+_DEFAULT_TEST_POLL_INTERVAL_SECS = 60
+
+# Default run time for this test (2 hour)
+_DEFAULT_TEST_DURATION_SECS = 7200
+
+# The number of seconds it would take a GKE pod to warm up (i.e get to 'Running'
+# state from the time of creation). Ideally this is something the test should
+# automatically determine by using Kubernetes API to poll the pods status.
+_DEFAULT_GKE_WARMUP_SECS = 60
+
+
+class KubernetesProxy:
+  """ Class to start a proxy on localhost to the Kubernetes API server """
+
+  def __init__(self, api_port):
+    self.port = api_port
+    self.p = None
+    self.started = False
+
+  def start(self):
+    cmd = ['kubectl', 'proxy', '--port=%d' % self.port]
+    self.p = subprocess.Popen(args=cmd)
+    self.started = True
+    time.sleep(2)
+    print '..Started'
+
+  def get_port(self):
+    return self.port
+
+  def is_started(self):
+    return self.started
+
+  def __del__(self):
+    if self.p is not None:
+      print 'Shutting down Kubernetes proxy..'
+      self.p.kill()
+
+
+class TestSettings:
+
+  def __init__(self, build_docker_image, test_poll_interval_secs,
+               test_duration_secs, kubernetes_proxy_port):
+    self.build_docker_image = build_docker_image
+    self.test_poll_interval_secs = test_poll_interval_secs
+    self.test_duration_secs = test_duration_secs
+    self.kubernetes_proxy_port = kubernetes_proxy_port
+
+
+class GkeSettings:
+
+  def __init__(self, project_id, docker_image_name):
+    self.project_id = project_id
+    self.docker_image_name = docker_image_name
+    self.tag_name = 'gcr.io/%s/%s' % (project_id, docker_image_name)
+
+
+class BigQuerySettings:
+
+  def __init__(self, run_id, dataset_id, summary_table_id, qps_table_id):
+    self.run_id = run_id
+    self.dataset_id = dataset_id
+    self.summary_table_id = summary_table_id
+    self.qps_table_id = qps_table_id
+
+
+class StressServerSettings:
+
+  def __init__(self, server_pod_name, server_port):
+    self.server_pod_name = server_pod_name
+    self.server_port = server_port
+
+
+class StressClientSettings:
+
+  def __init__(self, num_clients, client_pod_name_prefix, server_pod_name,
+               server_port, metrics_port, metrics_collection_interval_secs,
+               stress_client_poll_interval_secs, num_channels_per_server,
+               num_stubs_per_channel, test_cases_str):
+    self.num_clients = num_clients
+    self.client_pod_name_prefix = client_pod_name_prefix
+    self.server_pod_name = server_pod_name
+    self.server_port = server_port
+    self.metrics_port = metrics_port
+    self.metrics_collection_interval_secs = metrics_collection_interval_secs
+    self.stress_client_poll_interval_secs = stress_client_poll_interval_secs
+    self.num_channels_per_server = num_channels_per_server
+    self.num_stubs_per_channel = num_stubs_per_channel
+    self.test_cases_str = test_cases_str
+
+    # == Derived properties ==
+    # Note: Client can accept a list of server addresses (a comma separated list
+    # of 'server_name:server_port'). In this case, we only have one server
+    # address to pass
+    self.server_addresses = '%s.default.svc.cluster.local:%d' % (
+        server_pod_name, server_port)
+    self.client_pod_names_list = ['%s-%d' % (client_pod_name_prefix, i)
+                                  for i in range(1, num_clients + 1)]
+
+
+def _build_docker_image(image_name, tag_name):
+  """ Build the docker image and add tag it to the GKE repository """
+  print 'Building docker image: %s' % image_name
+  os.environ['INTEROP_IMAGE'] = image_name
+  os.environ['INTEROP_IMAGE_REPOSITORY_TAG'] = tag_name
+  # Note that 'BASE_NAME' HAS to be 'grpc_interop_stress_cxx' since the script
+  # build_interop_stress_image.sh invokes the following script:
+  #   tools/dockerfile/$BASE_NAME/build_interop_stress.sh
+  os.environ['BASE_NAME'] = 'grpc_interop_stress_cxx'
+  cmd = ['tools/jenkins/build_interop_stress_image.sh']
+  retcode = subprocess.call(args=cmd)
+  if retcode != 0:
+    print 'Error in building docker image'
+    return False
+  return True
+
+
+def _push_docker_image_to_gke_registry(docker_tag_name):
+  """Executes 'gcloud docker push <docker_tag_name>' to push the image to GKE registry"""
+  cmd = ['gcloud', 'docker', 'push', docker_tag_name]
+  print 'Pushing %s to GKE registry..' % docker_tag_name
+  retcode = subprocess.call(args=cmd)
+  if retcode != 0:
+    print 'Error in pushing docker image %s to the GKE registry' % docker_tag_name
+    return False
+  return True
+
+
+def _launch_server(gke_settings, stress_server_settings, bq_settings,
+                   kubernetes_proxy):
+  """ Launches a stress test server instance in GKE cluster """
+  if not kubernetes_proxy.is_started:
+    print 'Kubernetes proxy must be started before calling this function'
+    return False
+
+  # This is the wrapper script that is run in the container. This script runs
+  # the actual stress test server
+  server_cmd_list = ['/var/local/git/grpc/tools/gcp/stress_test/run_server.py']
+
+  # run_server.py does not take any args from the command line. The args are
+  # instead passed via environment variables (see server_env below)
+  server_arg_list = []
+
+  # The parameters to the script run_server.py are injected into the container
+  # via environment variables
+  server_env = {
+      'STRESS_TEST_IMAGE_TYPE': 'SERVER',
+      'STRESS_TEST_IMAGE': '/var/local/git/grpc/bins/opt/interop_server',
+      'STRESS_TEST_ARGS_STR': '--port=%s' % stress_server_settings.server_port,
+      'RUN_ID': bq_settings.run_id,
+      'POD_NAME': stress_server_settings.server_pod_name,
+      'GCP_PROJECT_ID': gke_settings.project_id,
+      'DATASET_ID': bq_settings.dataset_id,
+      'SUMMARY_TABLE_ID': bq_settings.summary_table_id,
+      'QPS_TABLE_ID': bq_settings.qps_table_id
+  }
+
+  # Launch Server
+  is_success = kubernetes_api.create_pod_and_service(
+      'localhost',
+      kubernetes_proxy.get_port(),
+      'default',  # Use 'default' namespace
+      stress_server_settings.server_pod_name,
+      gke_settings.tag_name,
+      [stress_server_settings.server_port],  # Port that should be exposed
+      server_cmd_list,
+      server_arg_list,
+      server_env,
+      True  # Headless = True for server. Since we want DNS records to be created by GKE
+  )
+
+  return is_success
+
+
+def _launch_client(gke_settings, stress_server_settings, stress_client_settings,
+                   bq_settings, kubernetes_proxy):
+  """ Launches a configurable number of stress test clients on GKE cluster """
+  if not kubernetes_proxy.is_started:
+    print 'Kubernetes proxy must be started before calling this function'
+    return False
+
+  stress_client_arg_list = [
+      '--server_addresses=%s' % stress_client_settings.server_addresses,
+      '--test_cases=%s' % stress_client_settings.test_cases_str,
+      '--num_stubs_per_channel=%d' %
+      stress_client_settings.num_stubs_per_channel
+  ]
+
+  # This is the wrapper script that is run in the container. This script runs
+  # the actual stress client
+  client_cmd_list = ['/var/local/git/grpc/tools/gcp/stress_test/run_client.py']
+
+  # run_client.py takes no args. All args are passed as env variables (see
+  # client_env)
+  client_arg_list = []
+
+  metrics_server_address = 'localhost:%d' % stress_client_settings.metrics_port
+  metrics_client_arg_list = [
+      '--metrics_server_address=%s' % metrics_server_address,
+      '--total_only=true'
+  ]
+
+  # The parameters to the script run_client.py are injected into the container
+  # via environment variables
+  client_env = {
+      'STRESS_TEST_IMAGE_TYPE': 'CLIENT',
+      'STRESS_TEST_IMAGE': '/var/local/git/grpc/bins/opt/stress_test',
+      'STRESS_TEST_ARGS_STR': ' '.join(stress_client_arg_list),
+      'METRICS_CLIENT_IMAGE': '/var/local/git/grpc/bins/opt/metrics_client',
+      'METRICS_CLIENT_ARGS_STR': ' '.join(metrics_client_arg_list),
+      'RUN_ID': bq_settings.run_id,
+      'POLL_INTERVAL_SECS':
+          str(stress_client_settings.stress_client_poll_interval_secs),
+      'GCP_PROJECT_ID': gke_settings.project_id,
+      'DATASET_ID': bq_settings.dataset_id,
+      'SUMMARY_TABLE_ID': bq_settings.summary_table_id,
+      'QPS_TABLE_ID': bq_settings.qps_table_id
+  }
+
+  for pod_name in stress_client_settings.client_pod_names_list:
+    client_env['POD_NAME'] = pod_name
+    is_success = kubernetes_api.create_pod_and_service(
+        'localhost',  # Since proxy is running on localhost
+        kubernetes_proxy.get_port(),
+        'default',  # default namespace
+        pod_name,
+        gke_settings.tag_name,
+        [stress_client_settings.metrics_port
+        ],  # Client pods expose metrics port
+        client_cmd_list,
+        client_arg_list,
+        client_env,
+        False  # Client is not a headless service
+    )
+    if not is_success:
+      print 'Error in launching client %s' % pod_name
+      return False
+
+  return True
+
+
+def _launch_server_and_client(gke_settings, stress_server_settings,
+                              stress_client_settings, bq_settings,
+                              kubernetes_proxy_port):
+  # Start kubernetes proxy
+  print 'Kubernetes proxy'
+  kubernetes_proxy = KubernetesProxy(kubernetes_proxy_port)
+  kubernetes_proxy.start()
+
+  print 'Launching server..'
+  is_success = _launch_server(gke_settings, stress_server_settings, bq_settings,
+                              kubernetes_proxy)
+  if not is_success:
+    print 'Error in launching server'
+    return False
+
+  # Server takes a while to start.
+  # TODO(sree) Use Kubernetes API to query the status of the server instead of
+  # sleeping
+  print 'Waiting for %s seconds for the server to start...' % _GKE_IMAGE_WARMUP_WAIT_SECS
+  time.sleep(_GKE_IMAGE_WARMUP_WAIT_SECS)
+
+  # Launch client
+  client_pod_name_prefix = 'stress-client'
+  is_success = _launch_client(gke_settings, stress_server_settings,
+                              stress_client_settings, bq_settings,
+                              kubernetes_proxy)
+
+  if not is_success:
+    print 'Error in launching client(s)'
+    return False
+
+  print 'Waiting for %s seconds for the client images to start...' % _GKE_IMAGE_WARMUP_WAIT_SECS
+  time.sleep(_GKE_IMAGE_WARMUP_WAIT_SECS)
+  return True
+
+
+def _delete_server_and_client(stress_server_settings, stress_client_settings,
+                              kubernetes_proxy_port):
+  kubernetes_proxy = KubernetesProxy(kubernetes_proxy_port)
+  kubernetes_proxy.start()
+
+  # Delete clients first
+  is_success = True
+  for pod_name in stress_client_settings.client_pod_names_list:
+    is_success = kubernetes_api.delete_pod_and_service(
+        'localhost', kubernetes_proxy_port, 'default', pod_name)
+    if not is_success:
+      return False
+
+  # Delete server
+  is_success = kubernetes_api.delete_pod_and_service(
+      'localhost', kubernetes_proxy_port, 'default',
+      stress_server_settings.server_pod_name)
+  return is_success
+
+
+def run_test_main(test_settings, gke_settings, stress_server_settings,
+                  stress_client_clients):
+  is_success = True
+
+  if test_settings.build_docker_image:
+    is_success = _build_docker_image(gke_settings.docker_image_name,
+                                     gke_settings.tag_name)
+    if not is_success:
+      return False
+
+    is_success = _push_docker_image_to_gke_registry(gke_settings.tag_name)
+    if not is_success:
+      return False
+
+  # Create a unique id for this run (Note: Using timestamp instead of UUID to
+  # make it easier to deduce the date/time of the run just by looking at the run
+  # run id. This is useful in debugging when looking at records in Biq query)
+  run_id = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
+  dataset_id = '%s_%s' % (_DATASET_ID_PREFIX, run_id)
+
+  # Big Query settings (common for both Stress Server and Client)
+  bq_settings = BigQuerySettings(run_id, dataset_id, _SUMMARY_TABLE_ID,
+                                 _QPS_TABLE_ID)
+
+  bq_helper = BigQueryHelper(run_id, '', '', args.project_id, dataset_id,
+                             _SUMMARY_TABLE_ID, _QPS_TABLE_ID)
+  bq_helper.initialize()
+
+  try:
+    is_success = _launch_server_and_client(gke_settings, stress_server_settings,
+                                           stress_client_settings, bq_settings,
+                                           test_settings.kubernetes_proxy_port)
+    if not is_success:
+      return False
+
+    start_time = datetime.datetime.now()
+    end_time = start_time + datetime.timedelta(
+        seconds=test_settings.test_duration_secs)
+    print 'Running the test until %s' % end_time.isoformat()
+
+    while True:
+      if datetime.datetime.now() > end_time:
+        print 'Test was run for %d seconds' % test_settings.test_duration_secs
+        break
+
+      # Check if either stress server or clients have failed
+      if bq_helper.check_if_any_tests_failed():
+        is_success = False
+        print 'Some tests failed.'
+        break
+
+      # Things seem to be running fine. Wait until next poll time to check the
+      # status
+      print 'Sleeping for %d seconds..' % test_settings.test_poll_interval_secs
+      time.sleep(test_settings.test_poll_interval_secs)
+
+    # Print BiqQuery tables
+    bq_helper.print_summary_records()
+    bq_helper.print_qps_records()
+
+  finally:
+    # If is_success is False at this point, it means that the stress tests were
+    # started successfully but failed while running the tests. In this case we
+    # do should not delete the pods (since they contain all the failure
+    # information)
+    if is_success:
+      _delete_server_and_client(stress_server_settings, stress_client_settings,
+                                test_settings.kubernetes_proxy_port)
+
+  return is_success
+
+
+argp = argparse.ArgumentParser(
+    description='Launch stress tests in GKE',
+    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+argp.add_argument('--project_id',
+                  required=True,
+                  help='The Google Cloud Platform Project Id')
+argp.add_argument('--num_clients',
+                  default=1,
+                  type=int,
+                  help='Number of client instances to start')
+argp.add_argument('--docker_image_name',
+                  default=_DEFAULT_DOCKER_IMAGE_NAME,
+                  help='The name of the docker image containing stress client '
+                  'and stress servers')
+argp.add_argument('--build_docker_image',
+                  dest='build_docker_image',
+                  action='store_true',
+                  help='Build a docker image and push to Google Container '
+                  'Registry')
+argp.add_argument('--do_not_build_docker_image',
+                  dest='build_docker_image',
+                  action='store_false',
+                  help='Do not build and push docker image to Google Container '
+                  'Registry')
+argp.set_defaults(build_docker_image=True)
+
+argp.add_argument('--test_poll_interval_secs',
+                  default=_DEFAULT_TEST_POLL_INTERVAL_SECS,
+                  type=int,
+                  help='How frequently should this script should monitor the '
+                  'health of stress clients and servers running in the GKE '
+                  'cluster')
+argp.add_argument('--test_duration_secs',
+                  default=_DEFAULT_TEST_DURATION_SECS,
+                  type=int,
+                  help='How long should this test be run')
+argp.add_argument('--kubernetes_proxy_port',
+                  default=_DEFAULT_KUBERNETES_PROXY_PORT,
+                  type=int,
+                  help='The port on which the kubernetes proxy (on localhost)'
+                  ' is started')
+argp.add_argument('--stress_server_port',
+                  default=_DEFAULT_STRESS_SERVER_PORT,
+                  type=int,
+                  help='The port on which the stress server (in GKE '
+                  'containers) listens')
+argp.add_argument('--stress_client_metrics_port',
+                  default=_DEFAULT_METRICS_PORT,
+                  type=int,
+                  help='The port on which the stress clients (in GKE '
+                  'containers) expose metrics')
+argp.add_argument('--stress_client_poll_interval_secs',
+                  default=_DEFAULT_STRESS_CLIENT_POLL_INTERVAL_SECS,
+                  type=int,
+                  help='How frequently should the stress client wrapper script'
+                  ' running inside GKE should monitor health of the actual '
+                  ' stress client process and upload the metrics to BigQuery')
+argp.add_argument('--stress_client_metrics_collection_interval_secs',
+                  default=_DEFAULT_METRICS_COLLECTION_INTERVAL_SECS,
+                  type=int,
+                  help='How frequently should metrics be collected in-memory on'
+                  ' the stress clients (running inside GKE containers). Note '
+                  'that this is NOT the same as the upload-to-BigQuery '
+                  'frequency. The metrics upload frequency is controlled by the'
+                  ' --stress_client_poll_interval_secs flag')
+argp.add_argument('--stress_client_num_channels_per_server',
+                  default=_DEFAULT_NUM_CHANNELS_PER_SERVER,
+                  type=int,
+                  help='The number of channels created to each server from a '
+                  'stress client')
+argp.add_argument('--stress_client_num_stubs_per_channel',
+                  default=_DEFAULT_NUM_STUBS_PER_CHANNEL,
+                  type=int,
+                  help='The number of stubs created per channel. This number '
+                  'indicates the max number of RPCs that can be made in '
+                  'parallel on each channel at any given time')
+argp.add_argument('--stress_client_test_cases',
+                  default=_DEFAULT_TEST_CASES_STR,
+                  help='List of test cases (with weights) to be executed by the'
+                  ' stress test client. The list is in the following format:\n'
+                  '  <testcase_1:w_1,<test_case2:w_2>..<testcase_n:w_n>\n'
+                  ' (Note: The weights do not have to add up to 100)')
+
+if __name__ == '__main__':
+  args = argp.parse_args()
+
+  test_settings = TestSettings(
+      args.build_docker_image, args.test_poll_interval_secs,
+      args.test_duration_secs, args.kubernetes_proxy_port)
+
+  gke_settings = GkeSettings(args.project_id, args.docker_image_name)
+
+  stress_server_settings = StressServerSettings(_SERVER_POD_NAME,
+                                                args.stress_server_port)
+  stress_client_settings = StressClientSettings(
+      args.num_clients, _CLIENT_POD_NAME_PREFIX, _SERVER_POD_NAME,
+      args.stress_server_port, args.stress_client_metrics_port,
+      args.stress_client_metrics_collection_interval_secs,
+      args.stress_client_poll_interval_secs,
+      args.stress_client_num_channels_per_server,
+      args.stress_client_num_stubs_per_channel, args.stress_client_test_cases)
+
+  run_test_main(test_settings, gke_settings, stress_server_settings,
+                stress_client_settings)
diff --git a/tools/run_tests/stress_test/stress_test_utils.py b/tools/run_tests/stress_test/stress_test_utils.py
deleted file mode 100755
index 7adc0068f9..0000000000
--- a/tools/run_tests/stress_test/stress_test_utils.py
+++ /dev/null
@@ -1,197 +0,0 @@
-#!/usr/bin/env python2.7
-# Copyright 2015-2016, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import datetime
-import json
-import os
-import re
-import select
-import subprocess
-import sys
-import time
-
-# Import big_query_utils module
-bq_utils_dir = os.path.abspath(os.path.join(
-    os.path.dirname(__file__), '../../big_query'))
-sys.path.append(bq_utils_dir)
-import big_query_utils as bq_utils
-
-
-class EventType:
-  STARTING = 'STARTING'
-  SUCCESS = 'SUCCESS'
-  FAILURE = 'FAILURE'
-
-
-class BigQueryHelper:
-  """Helper class for the stress test wrappers to interact with BigQuery.
-  """
-
-  def __init__(self, run_id, image_type, pod_name, project_id, dataset_id,
-               summary_table_id, qps_table_id):
-    self.run_id = run_id
-    self.image_type = image_type
-    self.pod_name = pod_name
-    self.project_id = project_id
-    self.dataset_id = dataset_id
-    self.summary_table_id = summary_table_id
-    self.qps_table_id = qps_table_id
-
-  def initialize(self):
-    self.bq = bq_utils.create_big_query()
-
-  def setup_tables(self):
-    return bq_utils.create_dataset(self.bq, self.project_id, self.dataset_id) \
-        and self.__create_summary_table() \
-        and self.__create_qps_table()
-
-  def insert_summary_row(self, event_type, details):
-    row_values_dict = {
-        'run_id': self.run_id,
-        'image_type': self.image_type,
-        'pod_name': self.pod_name,
-        'event_date': datetime.datetime.now().isoformat(),
-        'event_type': event_type,
-        'details': details
-    }
-    # row_unique_id is something that uniquely identifies the row (BigQuery uses
-    # it for duplicate detection).
-    row_unique_id = '%s_%s_%s' % (self.run_id, self.pod_name, event_type)
-    row = bq_utils.make_row(row_unique_id, row_values_dict)
-    return bq_utils.insert_rows(self.bq, self.project_id, self.dataset_id,
-                                self.summary_table_id, [row])
-
-  def insert_qps_row(self, qps, recorded_at):
-    row_values_dict = {
-        'run_id': self.run_id,
-        'pod_name': self.pod_name,
-        'recorded_at': recorded_at,
-        'qps': qps
-    }
-
-    # row_unique_id is something that uniquely identifies the row (BigQuery uses
-    # it for duplicate detection).
-    row_unique_id = '%s_%s_%s' % (self.run_id, self.pod_name, recorded_at)
-    row = bq_utils.make_row(row_unique_id, row_values_dict)
-    return bq_utils.insert_rows(self.bq, self.project_id, self.dataset_id,
-                                self.qps_table_id, [row])
-
-  def check_if_any_tests_failed(self, num_query_retries=3):
-    query = ('SELECT event_type FROM %s.%s WHERE run_id = \'%s\' AND '
-             'event_type="%s"') % (self.dataset_id, self.summary_table_id,
-                                   self.run_id, EventType.FAILURE)
-    query_job = bq_utils.sync_query_job(self.bq, self.project_id, query)
-    page = self.bq.jobs().getQueryResults(**query_job['jobReference']).execute(
-        num_retries=num_query_retries)
-    num_failures = int(page['totalRows'])
-    print 'num rows: ', num_failures
-    return num_failures > 0
-
-  def print_summary_records(self, num_query_retries=3):
-    line = '-' * 120
-    print line
-    print 'Summary records'
-    print 'Run Id: ', self.run_id
-    print 'Dataset Id: ', self.dataset_id
-    print line
-    query = ('SELECT pod_name, image_type, event_type, event_date, details'
-             ' FROM %s.%s WHERE run_id = \'%s\' ORDER by event_date;') % (
-                 self.dataset_id, self.summary_table_id, self.run_id)
-    query_job = bq_utils.sync_query_job(self.bq, self.project_id, query)
-
-    print '{:<25} {:<12} {:<12} {:<30} {}'.format(
-        'Pod name', 'Image type', 'Event type', 'Date', 'Details')
-    print line
-    page_token = None
-    while True:
-      page = self.bq.jobs().getQueryResults(
-          pageToken=page_token,
-          **query_job['jobReference']).execute(num_retries=num_query_retries)
-      rows = page.get('rows', [])
-      for row in rows:
-        print '{:<25} {:<12} {:<12} {:<30} {}'.format(
-            row['f'][0]['v'], row['f'][1]['v'], row['f'][2]['v'],
-            row['f'][3]['v'], row['f'][4]['v'])
-      page_token = page.get('pageToken')
-      if not page_token:
-        break
-
-  def print_qps_records(self, num_query_retries=3):
-    line = '-' * 80
-    print line
-    print 'QPS Summary'
-    print 'Run Id: ', self.run_id
-    print 'Dataset Id: ', self.dataset_id
-    print line
-    query = (
-        'SELECT pod_name, recorded_at, qps FROM %s.%s WHERE run_id = \'%s\' '
-        'ORDER by recorded_at;') % (self.dataset_id, self.qps_table_id,
-                                    self.run_id)
-    query_job = bq_utils.sync_query_job(self.bq, self.project_id, query)
-    print '{:<25} {:30} {}'.format('Pod name', 'Recorded at', 'Qps')
-    print line
-    page_token = None
-    while True:
-      page = self.bq.jobs().getQueryResults(
-          pageToken=page_token,
-          **query_job['jobReference']).execute(num_retries=num_query_retries)
-      rows = page.get('rows', [])
-      for row in rows:
-        print '{:<25} {:30} {}'.format(row['f'][0]['v'], row['f'][1]['v'],
-                                       row['f'][2]['v'])
-      page_token = page.get('pageToken')
-      if not page_token:
-        break
-
-  def __create_summary_table(self):
-    summary_table_schema = [
-        ('run_id', 'STRING', 'Test run id'),
-        ('image_type', 'STRING', 'Client or Server?'),
-        ('pod_name', 'STRING', 'GKE pod hosting this image'),
-        ('event_date', 'STRING', 'The date of this event'),
-        ('event_type', 'STRING', 'STARTED/SUCCESS/FAILURE'),
-        ('details', 'STRING', 'Any other relevant details')
-    ]
-    desc = ('The table that contains START/SUCCESS/FAILURE events for '
-            ' the stress test clients and servers')
-    return bq_utils.create_table(self.bq, self.project_id, self.dataset_id,
-                                 self.summary_table_id, summary_table_schema,
-                                 desc)
-
-  def __create_qps_table(self):
-    qps_table_schema = [
-        ('run_id', 'STRING', 'Test run id'),
-        ('pod_name', 'STRING', 'GKE pod hosting this image'),
-        ('recorded_at', 'STRING', 'Metrics recorded at time'),
-        ('qps', 'INTEGER', 'Queries per second')
-    ]
-    desc = 'The table that cointains the qps recorded at various intervals'
-    return bq_utils.create_table(self.bq, self.project_id, self.dataset_id,
-                                 self.qps_table_id, qps_table_schema, desc)
-- 
cgit v1.2.3


From fa4b163feac008bf7147cdad4f2dd88ef778ad2c Mon Sep 17 00:00:00 2001
From: Jan Tattermusch <jtattermusch@google.com>
Date: Fri, 26 Feb 2016 17:14:01 -0800
Subject: windows C# distribtest

---
 test/distrib/csharp/DistribTest.sln                |  6 ++++++
 test/distrib/csharp/DistribTest/DistribTest.csproj | 20 ++++++++++++++++++++
 test/distrib/csharp/run_distrib_test.bat           | 21 +++++++++++++++++++++
 test/distrib/csharp/run_distrib_test.sh            |  4 +---
 test/distrib/csharp/update_version.sh              | 10 +++++++++-
 tools/run_tests/distribtest_targets.py             | 11 ++++++++++-
 6 files changed, 67 insertions(+), 5 deletions(-)
 create mode 100644 test/distrib/csharp/run_distrib_test.bat

(limited to 'tools')

diff --git a/test/distrib/csharp/DistribTest.sln b/test/distrib/csharp/DistribTest.sln
index 0eca35c30f..78d5397ca9 100644
--- a/test/distrib/csharp/DistribTest.sln
+++ b/test/distrib/csharp/DistribTest.sln
@@ -8,13 +8,19 @@ EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
+		Debug|x64 = Debug|x64
 		Release|Any CPU = Release|Any CPU
+		Release|x64 = Release|x64
 	EndGlobalSection
 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
 		{A3E61CC3-3710-49A3-A830-A0066EDBCE2F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{A3E61CC3-3710-49A3-A830-A0066EDBCE2F}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{A3E61CC3-3710-49A3-A830-A0066EDBCE2F}.Debug|x64.ActiveCfg = Debug|x64
+		{A3E61CC3-3710-49A3-A830-A0066EDBCE2F}.Debug|x64.Build.0 = Debug|x64
 		{A3E61CC3-3710-49A3-A830-A0066EDBCE2F}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{A3E61CC3-3710-49A3-A830-A0066EDBCE2F}.Release|Any CPU.Build.0 = Release|Any CPU
+		{A3E61CC3-3710-49A3-A830-A0066EDBCE2F}.Release|x64.ActiveCfg = Release|x64
+		{A3E61CC3-3710-49A3-A830-A0066EDBCE2F}.Release|x64.Build.0 = Release|x64
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
diff --git a/test/distrib/csharp/DistribTest/DistribTest.csproj b/test/distrib/csharp/DistribTest/DistribTest.csproj
index 124fc1bdf0..7605495f0f 100644
--- a/test/distrib/csharp/DistribTest/DistribTest.csproj
+++ b/test/distrib/csharp/DistribTest/DistribTest.csproj
@@ -32,6 +32,26 @@
     <ErrorReport>prompt</ErrorReport>
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x64'">
+    <DebugSymbols>true</DebugSymbols>
+    <OutputPath>bin\x64\Debug\</OutputPath>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <DebugType>full</DebugType>
+    <PlatformTarget>x64</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <Prefer32Bit>true</Prefer32Bit>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x64'">
+    <OutputPath>bin\x64\Release\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <Optimize>true</Optimize>
+    <DebugType>pdbonly</DebugType>
+    <PlatformTarget>x64</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <Prefer32Bit>true</Prefer32Bit>
+  </PropertyGroup>
   <ItemGroup>
     <Reference Include="BouncyCastle.Crypto">
       <HintPath>..\packages\BouncyCastle.1.7.0\lib\Net40-Client\BouncyCastle.Crypto.dll</HintPath>
diff --git a/test/distrib/csharp/run_distrib_test.bat b/test/distrib/csharp/run_distrib_test.bat
new file mode 100644
index 0000000000..950894f668
--- /dev/null
+++ b/test/distrib/csharp/run_distrib_test.bat
@@ -0,0 +1,21 @@
+
+@rem enter this directory
+cd /d %~dp0
+
+@rem extract input artifacts
+powershell -Command "Add-Type -Assembly 'System.IO.Compression.FileSystem'; [System.IO.Compression.ZipFile]::ExtractToDirectory('../../../input_artifacts/csharp_nugets.zip', 'TestNugetFeed');"
+
+update_version.sh auto
+
+set NUGET=C:\nuget\nuget.exe
+%NUGET% restore || goto :error
+
+@call build_vs2015.bat DistribTest.sln %MSBUILD_EXTRA_ARGS% || goto :error
+
+%DISTRIBTEST_OUTPATH%\DistribTest.exe || goto :error
+
+goto :EOF
+
+:error
+echo Failed!
+exit /b %errorlevel%
diff --git a/test/distrib/csharp/run_distrib_test.sh b/test/distrib/csharp/run_distrib_test.sh
index 1de62041b3..934174a9a4 100755
--- a/test/distrib/csharp/run_distrib_test.sh
+++ b/test/distrib/csharp/run_distrib_test.sh
@@ -34,9 +34,7 @@ cd $(dirname $0)
 
 unzip -o "$EXTERNAL_GIT_ROOT/input_artifacts/csharp_nugets.zip" -d TestNugetFeed
 
-# Extract the version number from Grpc nuget package name.
-CSHARP_VERSION=$(ls TestNugetFeed | grep '^Grpc\.[0-9].*\.nupkg$' | sed s/^Grpc\.// | sed s/\.nupkg$//)
-./update_version.sh $CSHARP_VERSION
+./update_version.sh auto
 
 nuget restore
 
diff --git a/test/distrib/csharp/update_version.sh b/test/distrib/csharp/update_version.sh
index f2554e8998..b0d07721f6 100755
--- a/test/distrib/csharp/update_version.sh
+++ b/test/distrib/csharp/update_version.sh
@@ -32,5 +32,13 @@ set -e
 
 cd $(dirname $0)
 
+CSHARP_VERSION="$1"
+if [ "$CSHARP_VERSION" == "auto" ]
+then
+  # autodetect C# version
+  CSHARP_VERSION=$(ls TestNugetFeed | grep '^Grpc\.[0-9].*\.nupkg$' | sed s/^Grpc\.// | sed s/\.nupkg$//)
+  echo "Autodetected nuget ${CSHARP_VERSION}"
+fi
+
 # Replaces version placeholder with value provided as first argument.
-sed -ibak "s/__GRPC_NUGET_VERSION__/$1/g" DistribTest/packages.config DistribTest/DistribTest.csproj
+sed -ibak "s/__GRPC_NUGET_VERSION__/${CSHARP_VERSION}/g" DistribTest/packages.config DistribTest/DistribTest.csproj
diff --git a/tools/run_tests/distribtest_targets.py b/tools/run_tests/distribtest_targets.py
index 933103f0a0..fb951b68f9 100644
--- a/tools/run_tests/distribtest_targets.py
+++ b/tools/run_tests/distribtest_targets.py
@@ -97,7 +97,14 @@ class CSharpDistribTest(object):
           ['test/distrib/csharp/run_distrib_test.sh'],
           environ={'EXTERNAL_GIT_ROOT': '../../..'})
     else:
-      raise Exception("Not supported yet.")
+      if self.arch == 'x64':
+        environ={'MSBUILD_EXTRA_ARGS': '/p:Platform=x64',
+                 'DISTRIBTEST_OUTPATH': 'DistribTest\\bin\\x64\\Debug'}
+      else:
+        environ={'DISTRIBTEST_OUTPATH': 'DistribTest\\bin\\\Debug'}
+      return create_jobspec(self.name,
+          ['test\\distrib\\csharp\\run_distrib_test.bat'],
+          environ=environ)
 
   def __str__(self):
     return self.name
@@ -240,6 +247,8 @@ def targets():
           CSharpDistribTest('linux', 'x64', 'ubuntu1510'),
           CSharpDistribTest('linux', 'x64', 'ubuntu1604'),
           CSharpDistribTest('macos', 'x86'),
+          CSharpDistribTest('windows', 'x86'),
+          CSharpDistribTest('windows', 'x64'),
           PythonDistribTest('linux', 'x64', 'wheezy'),
           PythonDistribTest('linux', 'x64', 'jessie'),
           PythonDistribTest('linux', 'x86', 'jessie'),
-- 
cgit v1.2.3


From e1dd18a945ea11d1eba412ea0483b3996a222c3b Mon Sep 17 00:00:00 2001
From: Sree Kuchibhotla <sreek@google.com>
Date: Mon, 29 Feb 2016 13:28:55 -0800
Subject: Fix copyright

---
 test/cpp/interop/metrics_client.cc | 2 +-
 tools/gcp/utils/big_query_utils.py | 2 +-
 tools/gcp/utils/kubernetes_api.py  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/test/cpp/interop/metrics_client.cc b/test/cpp/interop/metrics_client.cc
index cc304f2e89..bd48c7d4ef 100644
--- a/test/cpp/interop/metrics_client.cc
+++ b/test/cpp/interop/metrics_client.cc
@@ -1,6 +1,6 @@
 /*
  *
- * Copyright 2015, Google Inc.
+ * Copyright 2015-2016, Google Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/tools/gcp/utils/big_query_utils.py b/tools/gcp/utils/big_query_utils.py
index e2379fd1aa..7bb1e14354 100755
--- a/tools/gcp/utils/big_query_utils.py
+++ b/tools/gcp/utils/big_query_utils.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python2.7
-# Copyright 2015-2016 Google Inc.
+# Copyright 2015-2016, Google Inc.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/tools/gcp/utils/kubernetes_api.py b/tools/gcp/utils/kubernetes_api.py
index 2d3f771e93..e8ddd2f1b3 100755
--- a/tools/gcp/utils/kubernetes_api.py
+++ b/tools/gcp/utils/kubernetes_api.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python2.7
-# Copyright 2015-2016 Google Inc.
+# Copyright 2015-2016, Google Inc.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
-- 
cgit v1.2.3


From 3f1aa9b99ae6752cc22cab2707b1d8c3846f21be Mon Sep 17 00:00:00 2001
From: Jan Tattermusch <jtattermusch@google.com>
Date: Mon, 29 Feb 2016 15:15:23 -0800
Subject: add copyright and cleanup python code

---
 test/distrib/csharp/run_distrib_test.bat | 28 ++++++++++++++++++++++++++++
 tools/run_tests/distribtest_targets.py   |  4 +++-
 2 files changed, 31 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/test/distrib/csharp/run_distrib_test.bat b/test/distrib/csharp/run_distrib_test.bat
index 950894f668..67bfc58ac8 100644
--- a/test/distrib/csharp/run_distrib_test.bat
+++ b/test/distrib/csharp/run_distrib_test.bat
@@ -1,3 +1,31 @@
+@rem Copyright 2016, Google Inc.
+@rem All rights reserved.
+@rem
+@rem Redistribution and use in source and binary forms, with or without
+@rem modification, are permitted provided that the following conditions are
+@rem met:
+@rem
+@rem     * Redistributions of source code must retain the above copyright
+@rem notice, this list of conditions and the following disclaimer.
+@rem     * Redistributions in binary form must reproduce the above
+@rem copyright notice, this list of conditions and the following disclaimer
+@rem in the documentation and/or other materials provided with the
+@rem distribution.
+@rem     * Neither the name of Google Inc. nor the names of its
+@rem contributors may be used to endorse or promote products derived from
+@rem this software without specific prior written permission.
+@rem
+@rem THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+@rem "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+@rem LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+@rem A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+@rem OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+@rem SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+@rem LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+@rem DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+@rem THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+@rem (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+@rem OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 @rem enter this directory
 cd /d %~dp0
diff --git a/tools/run_tests/distribtest_targets.py b/tools/run_tests/distribtest_targets.py
index fb951b68f9..34cc1cd710 100644
--- a/tools/run_tests/distribtest_targets.py
+++ b/tools/run_tests/distribtest_targets.py
@@ -96,7 +96,7 @@ class CSharpDistribTest(object):
       return create_jobspec(self.name,
           ['test/distrib/csharp/run_distrib_test.sh'],
           environ={'EXTERNAL_GIT_ROOT': '../../..'})
-    else:
+    elif self.platform == 'windows':
       if self.arch == 'x64':
         environ={'MSBUILD_EXTRA_ARGS': '/p:Platform=x64',
                  'DISTRIBTEST_OUTPATH': 'DistribTest\\bin\\x64\\Debug'}
@@ -105,6 +105,8 @@ class CSharpDistribTest(object):
       return create_jobspec(self.name,
           ['test\\distrib\\csharp\\run_distrib_test.bat'],
           environ=environ)
+    else:
+      raise Exception("Not supported yet.")
 
   def __str__(self):
     return self.name
-- 
cgit v1.2.3


From 494f3128331adcf601903b00ea62b767d99f84c2 Mon Sep 17 00:00:00 2001
From: Jan Tattermusch <jtattermusch@google.com>
Date: Tue, 1 Mar 2016 13:55:42 -0800
Subject: fix reporting for multiple test runs

---
 tools/run_tests/jobset.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/run_tests/jobset.py b/tools/run_tests/jobset.py
index adf178bb3c..a3b246dc08 100755
--- a/tools/run_tests/jobset.py
+++ b/tools/run_tests/jobset.py
@@ -384,7 +384,8 @@ class Jobset(object):
                 self._travis,
                 self._add_env)
       self._running.add(job)
-      self.resultset[job.GetSpec().shortname] = []
+      if not self.resultset.has_key(job.GetSpec().shortname):
+        self.resultset[job.GetSpec().shortname] = []
     return True
 
   def reap(self):
-- 
cgit v1.2.3