6 files changed, 158 insertions, 34 deletions
diff --git a/tools/gcp/stress_test/run_client.py b/tools/gcp/stress_test/run_client.py
index e6b28d5624..2004bf6db4 100755
--- a/tools/gcp/stress_test/run_client.py
+++ b/tools/gcp/stress_test/run_client.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python2.7
-# Copyright 2015, Google Inc.
+# Copyright 2015-2016, Google Inc.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -31,6 +31,7 @@
 import datetime
 import os
 import re
+import resource
 import select
 import subprocess
 import sys
@@ -89,11 +90,16 @@ def run_client():
                examining logs). This is the reason why the script waits forever
                in case of failures
   """
+  # Set the 'core file' size to 'unlimited' so that 'core' files are generated
+  # if the client crashes (Note: This is not relevant for Java and Go clients)
+  resource.setrlimit(resource.RLIMIT_CORE,
+                     (resource.RLIM_INFINITY, resource.RLIM_INFINITY))
+
   env = dict(os.environ)
   image_type = env['STRESS_TEST_IMAGE_TYPE']
-  image_name = env['STRESS_TEST_IMAGE']
+  stress_client_cmd = env['STRESS_TEST_CMD'].split()
   args_str = env['STRESS_TEST_ARGS_STR']
-  metrics_client_image = env['METRICS_CLIENT_IMAGE']
+  metrics_client_cmd = env['METRICS_CLIENT_CMD'].split()
   metrics_client_args_str = env['METRICS_CLIENT_ARGS_STR']
   run_id = env['RUN_ID']
   pod_name = env['POD_NAME']
@@ -103,6 +109,11 @@ def run_client():
   dataset_id = env['DATASET_ID']
   summary_table_id = env['SUMMARY_TABLE_ID']
   qps_table_id = env['QPS_TABLE_ID']
+  # The following parameter is to inform us whether the stress client runs
+  # forever until forcefully stopped or will it naturally stop after sometime.
+  # This way, we know that the stress client process should not terminate (even
+  # if it does with a success exit code) and flag the termination as a failure
+  will_run_forever = env.get('WILL_RUN_FOREVER', '1')
 
   bq_helper = BigQueryHelper(run_id, image_type, pod_name, project_id,
                              dataset_id, summary_table_id, qps_table_id)
@@ -125,9 +136,8 @@ def run_client():
   # Update status that the test is starting (in the status table)
   bq_helper.insert_summary_row(EventType.STARTING, details)
 
-  metrics_cmd = [metrics_client_image
-                ] + [x for x in metrics_client_args_str.split()]
-  stress_cmd = [image_name] + [x for x in args_str.split()]
+  metrics_cmd = metrics_client_cmd + [x for x in metrics_client_args_str.split()]
+  stress_cmd = stress_client_cmd + [x for x in args_str.split()]
 
   print 'Launching process %s ...' % stress_cmd
   stress_p = subprocess.Popen(args=stress_cmd,
@@ -141,11 +151,12 @@ def run_client():
   while True:
     # Check if stress_client is still running. If so, collect metrics and upload
     # to BigQuery status table
+    # If stress_p.poll() is not None, it means that the stress client terminated
     if stress_p.poll() is not None:
       end_time = datetime.datetime.now().isoformat()
       event_type = EventType.SUCCESS
       details = 'End time: %s' % end_time
-      if stress_p.returncode != 0:
+      if will_run_forever == '1' or stress_p.returncode != 0:
         event_type = EventType.FAILURE
         details = 'Return code = %d. End time: %s' % (stress_p.returncode,
                                                       end_time)
diff --git a/tools/gcp/stress_test/run_node.sh b/tools/gcp/stress_test/run_node.sh
new file mode 100755
index 0000000000..4a4da6fc8b
--- /dev/null
+++ b/tools/gcp/stress_test/run_node.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+# Copyright 2015-2016, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This is a wrapper script that was created to help run_server.py and
+# run_client.py to launch 'node js' stress clients and stress servers
+source ~/.nvm/nvm.sh
+
+set -ex
+
+$@
diff --git a/tools/gcp/stress_test/run_ruby.sh b/tools/gcp/stress_test/run_ruby.sh
new file mode 100755
index 0000000000..80d0567447
--- /dev/null
+++ b/tools/gcp/stress_test/run_ruby.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+# Copyright 2015-2016, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This is a wrapper script that was created to help run_server.py and
+# run_client.py to launch 'node js' stress clients and stress servers
+source /etc/profile.d/rvm.sh
+
+set -ex
+
+$@
diff --git a/tools/gcp/stress_test/run_server.py b/tools/gcp/stress_test/run_server.py
index dc4741b95e..a666ae2900 100755
--- a/tools/gcp/stress_test/run_server.py
+++ b/tools/gcp/stress_test/run_server.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python2.7
-# Copyright 2015, Google Inc.
+# Copyright 2015-2016, Google Inc.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -30,6 +30,7 @@
 
 import datetime
 import os
+import resource
 import select
 import subprocess
 import sys
@@ -56,26 +57,36 @@ def run_server():
          might want to connect to the pod for examining logs). This is the
          reason why the script waits forever in case of failures.
   """
+  # Set the 'core file' size to 'unlimited' so that 'core' files are generated
+  # if the server crashes (Note: This is not relevant for Java and Go servers)
+  resource.setrlimit(resource.RLIMIT_CORE,
+                     (resource.RLIM_INFINITY, resource.RLIM_INFINITY))
 
   # Read the parameters from environment variables
   env = dict(os.environ)
 
   run_id = env['RUN_ID']  # The unique run id for this test
   image_type = env['STRESS_TEST_IMAGE_TYPE']
-  image_name = env['STRESS_TEST_IMAGE']
+  stress_server_cmd = env['STRESS_TEST_CMD'].split()
   args_str = env['STRESS_TEST_ARGS_STR']
   pod_name = env['POD_NAME']
   project_id = env['GCP_PROJECT_ID']
   dataset_id = env['DATASET_ID']
   summary_table_id = env['SUMMARY_TABLE_ID']
   qps_table_id = env['QPS_TABLE_ID']
+  # The following parameter is to inform us whether the server runs forever
+  # until forcefully stopped or will it naturally stop after sometime.
+  # This way, we know that the process should not terminate (even if it does
+  # with a success exit code) and flag any termination as a failure.
+  will_run_forever = env.get('WILL_RUN_FOREVER', '1')
 
   logfile_name = env.get('LOGFILE_NAME')
 
   print('pod_name: %s, project_id: %s, run_id: %s, dataset_id: %s, '
-        'summary_table_id: %s, qps_table_id: %s') % (
-            pod_name, project_id, run_id, dataset_id, summary_table_id,
-            qps_table_id)
+        'summary_table_id: %s, qps_table_id: %s') % (pod_name, project_id,
+                                                     run_id, dataset_id,
+                                                     summary_table_id,
+                                                     qps_table_id)
 
   bq_helper = BigQueryHelper(run_id, image_type, pod_name, project_id,
                              dataset_id, summary_table_id, qps_table_id)
@@ -98,7 +109,7 @@ def run_server():
   # Update status that the test is starting (in the status table)
   bq_helper.insert_summary_row(EventType.STARTING, details)
 
-  stress_cmd = [image_name] + [x for x in args_str.split()]
+  stress_cmd = stress_server_cmd + [x for x in args_str.split()]
 
   print 'Launching process %s ...' % stress_cmd
   stress_p = subprocess.Popen(args=stress_cmd,
@@ -106,7 +117,8 @@ def run_server():
                               stderr=subprocess.STDOUT)
 
   returncode = stress_p.wait()
-  if returncode != 0:
+
+  if will_run_forever == '1' or returncode != 0:
     end_time = datetime.datetime.now().isoformat()
     event_type = EventType.FAILURE
     details = 'Returncode: %d; End time: %s' % (returncode, end_time)
diff --git a/tools/gcp/stress_test/stress_test_utils.py b/tools/gcp/stress_test/stress_test_utils.py
index 79d9dea675..19d59c0df1 100755
--- a/tools/gcp/stress_test/stress_test_utils.py
+++ b/tools/gcp/stress_test/stress_test_utils.py
@@ -103,16 +103,32 @@ class BigQueryHelper:
     return bq_utils.insert_rows(self.bq, self.project_id, self.dataset_id,
                                 self.qps_table_id, [row])
 
-  def check_if_any_tests_failed(self, num_query_retries=3):
+  def check_if_any_tests_failed(self, num_query_retries=3, timeout_msec=30000):
     query = ('SELECT event_type FROM %s.%s WHERE run_id = \'%s\' AND '
              'event_type="%s"') % (self.dataset_id, self.summary_table_id,
                                    self.run_id, EventType.FAILURE)
-    query_job = bq_utils.sync_query_job(self.bq, self.project_id, query)
-    page = self.bq.jobs().getQueryResults(**query_job['jobReference']).execute(
-        num_retries=num_query_retries)
-    num_failures = int(page['totalRows'])
-    print 'num rows: ', num_failures
-    return num_failures > 0
+    page = None
+    try:
+      query_job = bq_utils.sync_query_job(self.bq, self.project_id, query)
+      job_id = query_job['jobReference']['jobId']
+      project_id = query_job['jobReference']['projectId']
+      page = self.bq.jobs().getQueryResults(
+          projectId=project_id,
+          jobId=job_id,
+          timeoutMs=timeout_msec).execute(num_retries=num_query_retries)
+
+      if not page['jobComplete']:
+        print('TIMEOUT ERROR: The query %s timed out. Current timeout value is'
+              ' %d msec. Returning False (i.e assuming there are no failures)'
+             ) % (query, timeoout_msec)
+        return False
+
+      num_failures = int(page['totalRows'])
+      print 'num rows: ', num_failures
+      return num_failures > 0
+    except:
+      print 'Exception in check_if_any_tests_failed(). Info: ', sys.exc_info()
+      print 'Query: ', query
 
   def print_summary_records(self, num_query_retries=3):
     line = '-' * 120
@@ -126,8 +142,9 @@ class BigQueryHelper:
                  self.dataset_id, self.summary_table_id, self.run_id)
     query_job = bq_utils.sync_query_job(self.bq, self.project_id, query)
 
-    print '{:<25} {:<12} {:<12} {:<30} {}'.format(
-        'Pod name', 'Image type', 'Event type', 'Date', 'Details')
+    print '{:<25} {:<12} {:<12} {:<30} {}'.format('Pod name', 'Image type',
+                                                  'Event type', 'Date',
+                                                  'Details')
     print line
     page_token = None
     while True:
@@ -136,9 +153,11 @@ class BigQueryHelper:
           **query_job['jobReference']).execute(num_retries=num_query_retries)
       rows = page.get('rows', [])
       for row in rows:
-        print '{:<25} {:<12} {:<12} {:<30} {}'.format(
-            row['f'][0]['v'], row['f'][1]['v'], row['f'][2]['v'],
-            row['f'][3]['v'], row['f'][4]['v'])
+        print '{:<25} {:<12} {:<12} {:<30} {}'.format(row['f'][0]['v'],
+                                                      row['f'][1]['v'],
+                                                      row['f'][2]['v'],
+                                                      row['f'][3]['v'],
+                                                      row['f'][4]['v'])
       page_token = page.get('pageToken')
       if not page_token:
         break
diff --git a/tools/gcp/utils/big_query_utils.py b/tools/gcp/utils/big_query_utils.py
index c331a67942..9dbc69c5d6 100755
--- a/tools/gcp/utils/big_query_utils.py
+++ b/tools/gcp/utils/big_query_utils.py
@@ -71,16 +71,22 @@ def create_dataset(biq_query, project_id, dataset_id):
 
 def create_table(big_query, project_id, dataset_id, table_id, table_schema,
                  description):
+  fields = [{'name': field_name,
+             'type': field_type,
+             'description': field_description
+             } for (field_name, field_type, field_description) in table_schema]
+  return create_table2(big_query, project_id, dataset_id, table_id,
+                       fields, description)
+
+
+def create_table2(big_query, project_id, dataset_id, table_id, fields_schema,
+                 description):
   is_success = True
 
   body = {
       'description': description,
       'schema': {
-          'fields': [{
-              'name': field_name,
-              'type': field_type,
-              'description': field_description
-          } for (field_name, field_type, field_description) in table_schema]
+          'fields': fields_schema
       },
       'tableReference': {
           'datasetId': dataset_id,
@@ -112,12 +118,14 @@ def insert_rows(big_query, project_id, dataset_id, table_id, rows_list):
                                                  datasetId=dataset_id,
                                                  tableId=table_id,
                                                  body=body)
-    print body
     res = insert_req.execute(num_retries=NUM_RETRIES)
-    print res
+    if res.get('insertErrors', None):
+      print 'Error inserting rows! Response: %s' % res
+      is_success = False
   except HttpError as http_error:
-    print 'Error in inserting rows in the table %s' % table_id
+    print 'Error inserting rows to the table %s' % table_id
     is_success = False
+
   return is_success