Merge pull request #9522 from ctiller/bm_perf

Update latency profiler to use (more appropriate) microbenchmarks
author: Craig Tiller <ctiller@google.com> 2017-02-08 13:57:09 -0800
committer: GitHub <noreply@github.com> 2017-02-08 13:57:09 -0800
commit: 7dd42bf3d8fefd00603d6f6e502fc286ac721f81 (patch)
tree: d39309701ed1abf6147f067af39410cd1b55629a
parent: 2e096b0fec7d56a71dc0bcb4ca5c9b6b918a330a (diff)
parent: 2ef0d54ffcddbd1dadd6cc546ce9df6c5c0bfb0f (diff)
8 files changed, 311 insertions, 162 deletions
diff --git a/src/core/lib/profiling/basic_timers.c b/src/core/lib/profiling/basic_timers.c
index bdf9af2339..1f1987fb8e 100644
--- a/src/core/lib/profiling/basic_timers.c
+++ b/src/core/lib/profiling/basic_timers.c
@@ -43,6 +43,9 @@
 #include <grpc/support/thd.h>
 #include <grpc/support/time.h>
 #include <stdio.h>
+#include <string.h>
+
+#include "src/core/lib/support/env.h"
 
 typedef enum { BEGIN = '{', END = '}', MARK = '.' } marker_type;
 
@@ -74,7 +77,7 @@ typedef struct gpr_timer_log_list {
 static __thread gpr_timer_log *g_thread_log;
 static gpr_once g_once_init = GPR_ONCE_INIT;
 static FILE *output_file;
-static const char *output_filename = "latency_trace.txt";
+static const char *output_filename_or_null = NULL;
 static pthread_mutex_t g_mu;
 static pthread_cond_t g_cv;
 static gpr_timer_log_list g_in_progress_logs;
@@ -85,6 +88,17 @@ static __thread int g_thread_id;
 static int g_next_thread_id;
 static int g_writing_enabled = 1;
 
+static const char *output_filename() {
+  if (output_filename_or_null == NULL) {
+    output_filename_or_null = gpr_getenv("LATENCY_TRACE");
+    if (output_filename_or_null == NULL ||
+        strlen(output_filename_or_null) == 0) {
+      output_filename_or_null = "latency_trace.txt";
+    }
+  }
+  return output_filename_or_null;
+}
+
 static int timer_log_push_back(gpr_timer_log_list *list, gpr_timer_log *log) {
   if (list->head == NULL) {
     list->head = list->tail = log;
@@ -134,7 +148,7 @@ static void timer_log_remove(gpr_timer_log_list *list, gpr_timer_log *log) {
 static void write_log(gpr_timer_log *log) {
   size_t i;
   if (output_file == NULL) {
-    output_file = fopen(output_filename, "w");
+    output_file = fopen(output_filename(), "w");
   }
   for (i = 0; i < log->num_entries; i++) {
     gpr_timer_entry *entry = &(log->log[i]);
@@ -198,7 +212,7 @@ static void finish_writing(void) {
 }
 
 void gpr_timers_set_log_filename(const char *filename) {
-  output_filename = filename;
+  output_filename_or_null = filename;
 }
 
 static void init_output() {
diff --git a/src/core/lib/slice/slice_intern.c b/src/core/lib/slice/slice_intern.c
index 7cbd17bffd..32adc4df97 100644
--- a/src/core/lib/slice/slice_intern.c
+++ b/src/core/lib/slice/slice_intern.c
@@ -215,7 +215,9 @@ bool grpc_slice_is_interned(grpc_slice slice) {
 }
 
 grpc_slice grpc_slice_intern(grpc_slice slice) {
+  GPR_TIMER_BEGIN("grpc_slice_intern", 0);
   if (GRPC_IS_STATIC_METADATA_STRING(slice)) {
+    GPR_TIMER_END("grpc_slice_intern", 0);
     return slice;
   }
 
@@ -225,6 +227,7 @@ grpc_slice grpc_slice_intern(grpc_slice slice) {
         static_metadata_hash[(hash + i) % GPR_ARRAY_SIZE(static_metadata_hash)];
     if (ent.hash == hash && ent.idx < GRPC_STATIC_MDSTR_COUNT &&
         grpc_slice_eq(grpc_static_slice_table[ent.idx], slice)) {
+      GPR_TIMER_END("grpc_slice_intern", 0);
       return grpc_static_slice_table[ent.idx];
     }
   }
@@ -247,7 +250,7 @@ grpc_slice grpc_slice_intern(grpc_slice slice) {
         /* and treat this as if we were never here... sshhh */
       } else {
         gpr_mu_unlock(&shard->mu);
-        GPR_TIMER_END("grpc_mdstr_from_buffer", 0);
+        GPR_TIMER_END("grpc_slice_intern", 0);
         return materialize(s);
       }
     }
@@ -275,6 +278,7 @@ grpc_slice grpc_slice_intern(grpc_slice slice) {
 
   gpr_mu_unlock(&shard->mu);
 
+  GPR_TIMER_END("grpc_slice_intern", 0);
   return materialize(s);
 }
 
diff --git a/test/cpp/microbenchmarks/bm_fullstack.cc b/test/cpp/microbenchmarks/bm_fullstack.cc
index a870ff7455..9d883e68d7 100644
--- a/test/cpp/microbenchmarks/bm_fullstack.cc
+++ b/test/cpp/microbenchmarks/bm_fullstack.cc
@@ -58,6 +58,7 @@ extern "C" {
 #include "test/core/util/passthru_endpoint.h"
 #include "test/core/util/port.h"
 }
+#include "src/core/lib/profiling/timers.h"
 #include "src/cpp/client/create_channel_internal.h"
 #include "src/proto/grpc/testing/echo.grpc.pb.h"
 #include "third_party/benchmark/include/benchmark/benchmark.h"
@@ -437,6 +438,7 @@ static void BM_UnaryPingPong(benchmark::State& state) {
   std::unique_ptr<EchoTestService::Stub> stub(
       EchoTestService::NewStub(fixture->channel()));
   while (state.KeepRunning()) {
+    GPR_TIMER_SCOPE("BenchmarkCycle", 0);
     recv_response.Clear();
     ClientContext cli_ctx;
     ClientContextMutator cli_ctx_mut(&cli_ctx);
@@ -690,6 +692,7 @@ static void BM_PumpStreamClientToServer(benchmark::State& state) {
     }
     response_rw.Read(&recv_request, tag(0));
     while (state.KeepRunning()) {
+      GPR_TIMER_SCOPE("BenchmarkCycle", 0);
       request_rw->Write(send_request, tag(1));
       while (true) {
         GPR_ASSERT(fixture->cq()->Next(&t, &ok));
@@ -747,6 +750,7 @@ static void BM_PumpStreamServerToClient(benchmark::State& state) {
     }
     request_rw->Read(&recv_response, tag(0));
     while (state.KeepRunning()) {
+      GPR_TIMER_SCOPE("BenchmarkCycle", 0);
       response_rw.Write(send_response, tag(1));
       while (true) {
         GPR_ASSERT(fixture->cq()->Next(&t, &ok));
diff --git a/tools/profiling/latency_profile/profile_analyzer.py b/tools/profiling/latency_profile/profile_analyzer.py
index 48b8e9b950..2087cd2793 100755
--- a/tools/profiling/latency_profile/profile_analyzer.py
+++ b/tools/profiling/latency_profile/profile_analyzer.py
@@ -34,6 +34,7 @@ import hashlib
 import itertools
 import json
 import math
+import sys
 import tabulate
 import time
 
@@ -49,6 +50,7 @@ TIME_FROM_LAST_IMPORTANT = object()
 argp = argparse.ArgumentParser(description='Process output of basic_prof builds')
 argp.add_argument('--source', default='latency_trace.txt', type=str)
 argp.add_argument('--fmt', choices=tabulate.tabulate_formats, default='simple')
+argp.add_argument('--out', default='-', type=str)
 args = argp.parse_args()
 
 class LineItem(object):
@@ -246,16 +248,20 @@ FORMAT = [
   ('TO_SCOPE_END', time_format(TIME_TO_SCOPE_END)),
 ]
 
+out = sys.stdout
+if args.out != '-':
+  out = open(args.out, 'w')
+
 if args.fmt == 'html':
-  print '<html>'
-  print '<head>'
-  print '<title>Profile Report</title>'
-  print '</head>'
+  print >>out, '<html>'
+  print >>out, '<head>'
+  print >>out, '<title>Profile Report</title>'
+  print >>out, '</head>'
 
 accounted_for = 0
 for cs in call_stacks:
   if args.fmt in BANNER:
-    print BANNER[args.fmt] % {
+    print >>out, BANNER[args.fmt] % {
         'count': cs.count,
     }
   header, _ = zip(*FORMAT)
@@ -265,7 +271,7 @@ for cs in call_stacks:
     for _, fn in FORMAT:
       fields.append(fn(line))
     table.append(fields)
-  print tabulate.tabulate(table, header, tablefmt=args.fmt)
+  print >>out, tabulate.tabulate(table, header, tablefmt=args.fmt)
   accounted_for += cs.count
   if accounted_for > .99 * total_stacks:
     break
diff --git a/tools/profiling/latency_profile/run_latency_profile.sh b/tools/profiling/latency_profile/run_latency_profile.sh
index 618db202dc..715f525d2f 100755
--- a/tools/profiling/latency_profile/run_latency_profile.sh
+++ b/tools/profiling/latency_profile/run_latency_profile.sh
@@ -28,55 +28,6 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-# format argument via
-# $ echo '{...}' | python -mjson.tool
-read -r -d '' SCENARIOS_JSON_ARG <<'EOF'
-{
-    "scenarios": [
-        {
-            "benchmark_seconds": 5,
-            "client_config": {
-                "client_channels": 1,
-                "client_type": "SYNC_CLIENT",
-                "histogram_params": {
-                    "max_possible": 60000000000.0,
-                    "resolution": 0.01
-                },
-                "load_params": {
-                    "closed_loop": {}
-                },
-                "outstanding_rpcs_per_channel": 1,
-                "payload_config": {
-                    "simple_params": {
-                        "req_size": 0,
-                        "resp_size": 0
-                    }
-                },
-                "rpc_type": "UNARY",
-                "security_params": {
-                    "server_host_override": "foo.test.google.fr",
-                    "use_test_ca": true
-                }
-            },
-            "name": "cpp_protobuf_sync_unary_ping_pong_secure",
-            "num_clients": 1,
-            "num_servers": 1,
-            "server_config": {
-                "core_limit": 1,
-                "security_params": {
-                    "server_host_override": "foo.test.google.fr",
-                    "use_test_ca": true
-                },
-                "server_type": "SYNC_SERVER"
-            },
-            "spawn_local_worker_count": 2,
-            "warmup_seconds": 5
-        }
-    ]
-}
-
-EOF
-
 set -ex
 
 cd $(dirname $0)/../../..
@@ -93,14 +44,4 @@ else
   PYTHON=python2.7
 fi
 
-make CONFIG=basicprof -j$CPUS qps_json_driver
-
-mkdir -p reports
-bins/basicprof/qps_json_driver --scenarios_json="$SCENARIOS_JSON_ARG"
-
-echo '<html><head></head><body>Latency profile for:<br/>' > reports/index.html
-echo "<p><pre>${SCENARIOS_JSON_ARG}</pre></p>" >> reports/index.html
-echo '<p><pre>' >> reports/index.html
-$PYTHON tools/profiling/latency_profile/profile_analyzer.py \
-    --source=latency_trace.txt --fmt=simple >> reports/index.html
-echo '</pre></p></body></html>' >> reports/index.html
+$PYTHON tools/run_tests/run_microbenchmark.py bm_fullstack
diff --git a/tools/run_tests/python_utils/start_port_server.py b/tools/run_tests/python_utils/start_port_server.py
new file mode 100644
index 0000000000..d521aa6a9d
--- /dev/null
+++ b/tools/run_tests/python_utils/start_port_server.py
@@ -0,0 +1,129 @@
+# Copyright 2015, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from __future__ import print_function
+
+from six.moves import urllib
+import os
+import subprocess
+import tempfile
+import sys
+import time
+import jobset
+
+def start_port_server(port_server_port):
+  # check if a compatible port server is running
+  # if incompatible (version mismatch) ==> start a new one
+  # if not running ==> start a new one
+  # otherwise, leave it up
+  try:
+    version = int(urllib.request.urlopen(
+        'http://localhost:%d/version_number' % port_server_port,
+        timeout=10).read())
+    print('detected port server running version %d' % version)
+    running = True
+  except Exception as e:
+    print('failed to detect port server: %s' % sys.exc_info()[0])
+    print(e.strerror)
+    running = False
+  if running:
+    current_version = int(subprocess.check_output(
+        [sys.executable, os.path.abspath('tools/run_tests/python_utils/port_server.py'),
+         'dump_version']))
+    print('my port server is version %d' % current_version)
+    running = (version >= current_version)
+    if not running:
+      print('port_server version mismatch: killing the old one')
+      urllib.request.urlopen('http://localhost:%d/quitquitquit' % port_server_port).read()
+      time.sleep(1)
+  if not running:
+    fd, logfile = tempfile.mkstemp()
+    os.close(fd)
+    print('starting port_server, with log file %s' % logfile)
+    args = [sys.executable, os.path.abspath('tools/run_tests/python_utils/port_server.py'),
+            '-p', '%d' % port_server_port, '-l', logfile]
+    env = dict(os.environ)
+    env['BUILD_ID'] = 'pleaseDontKillMeJenkins'
+    if jobset.platform_string() == 'windows':
+      # Working directory of port server needs to be outside of Jenkins
+      # workspace to prevent file lock issues.
+      tempdir = tempfile.mkdtemp()
+      port_server = subprocess.Popen(
+          args,
+          env=env,
+          cwd=tempdir,
+          creationflags = 0x00000008, # detached process
+          close_fds=True)
+    else:
+      port_server = subprocess.Popen(
+          args,
+          env=env,
+          preexec_fn=os.setsid,
+          close_fds=True)
+    time.sleep(1)
+    # ensure port server is up
+    waits = 0
+    while True:
+      if waits > 10:
+        print('killing port server due to excessive start up waits')
+        port_server.kill()
+      if port_server.poll() is not None:
+        print('port_server failed to start')
+        # try one final time: maybe another build managed to start one
+        time.sleep(1)
+        try:
+          urllib.request.urlopen('http://localhost:%d/get' % port_server_port,
+                          timeout=1).read()
+          print('last ditch attempt to contact port server succeeded')
+          break
+        except:
+          traceback.print_exc()
+          port_log = open(logfile, 'r').read()
+          print(port_log)
+          sys.exit(1)
+      try:
+        urllib.request.urlopen('http://localhost:%d/get' % port_server_port,
+                        timeout=1).read()
+        print('port server is up and ready')
+        break
+      except socket.timeout:
+        print('waiting for port_server: timeout')
+        traceback.print_exc();
+        time.sleep(1)
+        waits += 1
+      except urllib.error.URLError:
+        print('waiting for port_server: urlerror')
+        traceback.print_exc();
+        time.sleep(1)
+        waits += 1
+      except:
+        traceback.print_exc()
+        port_server.kill()
+        raise
+
diff --git a/tools/run_tests/run_microbenchmark.py b/tools/run_tests/run_microbenchmark.py
new file mode 100755
index 0000000000..096d7d78ab
--- /dev/null
+++ b/tools/run_tests/run_microbenchmark.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python2.7
+# Copyright 2017, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import multiprocessing
+import os
+import subprocess
+import sys
+
+import python_utils.jobset as jobset
+import python_utils.start_port_server as start_port_server
+
+flamegraph_dir = os.path.join(os.path.expanduser('~'), 'FlameGraph')
+
+os.chdir(os.path.join(os.path.dirname(sys.argv[0]), '../..'))
+if not os.path.exists('reports'):
+  os.makedirs('reports')
+
+port_server_port = 32766
+start_port_server.start_port_server(port_server_port)
+
+def fnize(s):
+  out = ''
+  for c in s:
+    if c in '<>, /':
+      if len(out) and out[-1] == '_': continue
+      out += '_'
+    else:
+      out += c
+  return out
+
+# index html
+index_html = """
+<html>
+<head>
+<title>Microbenchmark Results</title>
+</head>
+<body>
+"""
+
+def heading(name):
+  global index_html
+  index_html += "<h1>%s</h1>\n" % name
+
+def link(txt, tgt):
+  global index_html
+  index_html += "<p><a href=\"%s\">%s</a></p>\n" % (tgt, txt)
+
+benchmarks = []
+profile_analysis = []
+cleanup = []
+
+for bm_name in sys.argv[1:]:
+  # generate latency profiles
+  heading('Latency Profiles: %s' % bm_name)
+  subprocess.check_call(
+      ['make', bm_name,
+       'CONFIG=basicprof', '-j', '%d' % multiprocessing.cpu_count()])
+  for line in subprocess.check_output(['bins/basicprof/%s' % bm_name,
+                                       '--benchmark_list_tests']).splitlines():
+    link(line, '%s.txt' % fnize(line))
+    benchmarks.append(
+        jobset.JobSpec(['bins/basicprof/%s' % bm_name, '--benchmark_filter=^%s$' % line],
+                       environ={'LATENCY_TRACE': '%s.trace' % fnize(line)}))
+    profile_analysis.append(
+        jobset.JobSpec([sys.executable,
+                        'tools/profiling/latency_profile/profile_analyzer.py',
+                        '--source', '%s.trace' % fnize(line), '--fmt', 'simple',
+                        '--out', 'reports/%s.txt' % fnize(line)], timeout_seconds=None))
+    cleanup.append(jobset.JobSpec(['rm', '%s.trace' % fnize(line)]))
+    # periodically flush out the list of jobs: profile_analysis jobs at least
+    # consume upwards of five gigabytes of ram in some cases, and so analysing
+    # hundreds of them at once is impractical -- but we want at least some
+    # concurrency or the work takes too long
+    if len(benchmarks) >= min(4, multiprocessing.cpu_count()):
+      # run up to half the cpu count: each benchmark can use up to two cores
+      # (one for the microbenchmark, one for the data flush)
+      jobset.run(benchmarks, maxjobs=max(1, multiprocessing.cpu_count()/2),
+                 add_env={'GRPC_TEST_PORT_SERVER': 'localhost:%d' % port_server_port})
+      jobset.run(profile_analysis, maxjobs=multiprocessing.cpu_count())
+      jobset.run(cleanup, maxjobs=multiprocessing.cpu_count())
+      benchmarks = []
+      profile_analysis = []
+      cleanup = []
+  # run the remaining benchmarks that weren't flushed
+  if len(benchmarks):
+    jobset.run(benchmarks, maxjobs=max(1, multiprocessing.cpu_count()/2),
+               add_env={'GRPC_TEST_PORT_SERVER': 'localhost:%d' % port_server_port})
+    jobset.run(profile_analysis, maxjobs=multiprocessing.cpu_count())
+    jobset.run(cleanup, maxjobs=multiprocessing.cpu_count())
+
+  # generate flamegraphs
+  heading('Flamegraphs: %s' % bm_name)
+  subprocess.check_call(
+      ['make', bm_name,
+       'CONFIG=mutrace', '-j', '%d' % multiprocessing.cpu_count()])
+  for line in subprocess.check_output(['bins/mutrace/%s' % bm_name,
+                                       '--benchmark_list_tests']).splitlines():
+    subprocess.check_call(['sudo', 'perf', 'record', '-g', '-c', '1000',
+                           'bins/mutrace/%s' % bm_name,
+                           '--benchmark_filter=^%s$' % line,
+                           '--benchmark_min_time=20'])
+    with open('/tmp/bm.perf', 'w') as f:
+      f.write(subprocess.check_output(['sudo', 'perf', 'script']))
+    with open('/tmp/bm.folded', 'w') as f:
+      f.write(subprocess.check_output([
+          '%s/stackcollapse-perf.pl' % flamegraph_dir, '/tmp/bm.perf']))
+    link(line, '%s.svg' % fnize(line))
+    with open('reports/%s.svg' % fnize(line), 'w') as f:
+      f.write(subprocess.check_output([
+          '%s/flamegraph.pl' % flamegraph_dir, '/tmp/bm.folded']))
+
+index_html += "</body>\n</html>\n"
+with open('reports/index.html', 'w') as f:
+  f.write(index_html)
diff --git a/tools/run_tests/run_tests.py b/tools/run_tests/run_tests.py
index 0d835a7de1..75571aaadb 100755
--- a/tools/run_tests/run_tests.py
+++ b/tools/run_tests/run_tests.py
@@ -57,6 +57,7 @@ import uuid
 import python_utils.jobset as jobset
 import python_utils.report_utils as report_utils
 import python_utils.watch_dirs as watch_dirs
+import python_utils.start_port_server as start_port_server
 
 
 _ROOT = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '../..'))
@@ -1374,97 +1375,6 @@ def _shut_down_legacy_server(legacy_server_port):
         'http://localhost:%d/quitquitquit' % legacy_server_port).read()
 
 
-def _start_port_server(port_server_port):
-  # check if a compatible port server is running
-  # if incompatible (version mismatch) ==> start a new one
-  # if not running ==> start a new one
-  # otherwise, leave it up
-  try:
-    version = int(urllib.request.urlopen(
-        'http://localhost:%d/version_number' % port_server_port,
-        timeout=10).read())
-    print('detected port server running version %d' % version)
-    running = True
-  except Exception as e:
-    print('failed to detect port server: %s' % sys.exc_info()[0])
-    print(e.strerror)
-    running = False
-  if running:
-    current_version = int(subprocess.check_output(
-        [sys.executable, os.path.abspath('tools/run_tests/python_utils/port_server.py'),
-         'dump_version']))
-    print('my port server is version %d' % current_version)
-    running = (version >= current_version)
-    if not running:
-      print('port_server version mismatch: killing the old one')
-      urllib.request.urlopen('http://localhost:%d/quitquitquit' % port_server_port).read()
-      time.sleep(1)
-  if not running:
-    fd, logfile = tempfile.mkstemp()
-    os.close(fd)
-    print('starting port_server, with log file %s' % logfile)
-    args = [sys.executable, os.path.abspath('tools/run_tests/python_utils/port_server.py'),
-            '-p', '%d' % port_server_port, '-l', logfile]
-    env = dict(os.environ)
-    env['BUILD_ID'] = 'pleaseDontKillMeJenkins'
-    if platform_string() == 'windows':
-      # Working directory of port server needs to be outside of Jenkins
-      # workspace to prevent file lock issues.
-      tempdir = tempfile.mkdtemp()
-      port_server = subprocess.Popen(
-          args,
-          env=env,
-          cwd=tempdir,
-          creationflags = 0x00000008, # detached process
-          close_fds=True)
-    else:
-      port_server = subprocess.Popen(
-          args,
-          env=env,
-          preexec_fn=os.setsid,
-          close_fds=True)
-    time.sleep(1)
-    # ensure port server is up
-    waits = 0
-    while True:
-      if waits > 10:
-        print('killing port server due to excessive start up waits')
-        port_server.kill()
-      if port_server.poll() is not None:
-        print('port_server failed to start')
-        # try one final time: maybe another build managed to start one
-        time.sleep(1)
-        try:
-          urllib.request.urlopen('http://localhost:%d/get' % port_server_port,
-                          timeout=1).read()
-          print('last ditch attempt to contact port server succeeded')
-          break
-        except:
-          traceback.print_exc()
-          port_log = open(logfile, 'r').read()
-          print(port_log)
-          sys.exit(1)
-      try:
-        urllib.request.urlopen('http://localhost:%d/get' % port_server_port,
-                        timeout=1).read()
-        print('port server is up and ready')
-        break
-      except socket.timeout:
-        print('waiting for port_server: timeout')
-        traceback.print_exc();
-        time.sleep(1)
-        waits += 1
-      except urllib.error.URLError:
-        print('waiting for port_server: urlerror')
-        traceback.print_exc();
-        time.sleep(1)
-        waits += 1
-      except:
-        traceback.print_exc()
-        port_server.kill()
-        raise
-
-
 def _calculate_num_runs_failures(list_of_results):
   """Caculate number of runs and failures for a particular test.
 
@@ -1512,7 +1422,7 @@ def _build_and_run(
   antagonists = [subprocess.Popen(['tools/run_tests/python_utils/antagonist.py'])
                  for _ in range(0, args.antagonists)]
   port_server_port = 32766
-  _start_port_server(port_server_port)
+  start_port_server.start_port_server(port_server_port)
   resultset = None
   num_test_failures = 0
   try:
author	Craig Tiller <ctiller@google.com>	2017-02-08 13:57:09 -0800
committer	GitHub <noreply@github.com>	2017-02-08 13:57:09 -0800
commit	7dd42bf3d8fefd00603d6f6e502fc286ac721f81 (patch)
tree	d39309701ed1abf6147f067af39410cd1b55629a
parent	2e096b0fec7d56a71dc0bcb4ca5c9b6b918a330a (diff)
parent	2ef0d54ffcddbd1dadd6cc546ce9df6c5c0bfb0f (diff)