diff options
author | Craig Tiller <ctiller@google.com> | 2017-02-08 13:57:09 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-02-08 13:57:09 -0800 |
commit | 7dd42bf3d8fefd00603d6f6e502fc286ac721f81 (patch) | |
tree | d39309701ed1abf6147f067af39410cd1b55629a | |
parent | 2e096b0fec7d56a71dc0bcb4ca5c9b6b918a330a (diff) | |
parent | 2ef0d54ffcddbd1dadd6cc546ce9df6c5c0bfb0f (diff) |
Merge pull request #9522 from ctiller/bm_perf
Update latency profiler to use (more appropriate) microbenchmarks
-rw-r--r-- | src/core/lib/profiling/basic_timers.c | 20 | ||||
-rw-r--r-- | src/core/lib/slice/slice_intern.c | 6 | ||||
-rw-r--r-- | test/cpp/microbenchmarks/bm_fullstack.cc | 4 | ||||
-rwxr-xr-x | tools/profiling/latency_profile/profile_analyzer.py | 18 | ||||
-rwxr-xr-x | tools/profiling/latency_profile/run_latency_profile.sh | 61 | ||||
-rw-r--r-- | tools/run_tests/python_utils/start_port_server.py | 129 | ||||
-rwxr-xr-x | tools/run_tests/run_microbenchmark.py | 141 | ||||
-rwxr-xr-x | tools/run_tests/run_tests.py | 94 |
8 files changed, 311 insertions, 162 deletions
diff --git a/src/core/lib/profiling/basic_timers.c b/src/core/lib/profiling/basic_timers.c index bdf9af2339..1f1987fb8e 100644 --- a/src/core/lib/profiling/basic_timers.c +++ b/src/core/lib/profiling/basic_timers.c @@ -43,6 +43,9 @@ #include <grpc/support/thd.h> #include <grpc/support/time.h> #include <stdio.h> +#include <string.h> + +#include "src/core/lib/support/env.h" typedef enum { BEGIN = '{', END = '}', MARK = '.' } marker_type; @@ -74,7 +77,7 @@ typedef struct gpr_timer_log_list { static __thread gpr_timer_log *g_thread_log; static gpr_once g_once_init = GPR_ONCE_INIT; static FILE *output_file; -static const char *output_filename = "latency_trace.txt"; +static const char *output_filename_or_null = NULL; static pthread_mutex_t g_mu; static pthread_cond_t g_cv; static gpr_timer_log_list g_in_progress_logs; @@ -85,6 +88,17 @@ static __thread int g_thread_id; static int g_next_thread_id; static int g_writing_enabled = 1; +static const char *output_filename() { + if (output_filename_or_null == NULL) { + output_filename_or_null = gpr_getenv("LATENCY_TRACE"); + if (output_filename_or_null == NULL || + strlen(output_filename_or_null) == 0) { + output_filename_or_null = "latency_trace.txt"; + } + } + return output_filename_or_null; +} + static int timer_log_push_back(gpr_timer_log_list *list, gpr_timer_log *log) { if (list->head == NULL) { list->head = list->tail = log; @@ -134,7 +148,7 @@ static void timer_log_remove(gpr_timer_log_list *list, gpr_timer_log *log) { static void write_log(gpr_timer_log *log) { size_t i; if (output_file == NULL) { - output_file = fopen(output_filename, "w"); + output_file = fopen(output_filename(), "w"); } for (i = 0; i < log->num_entries; i++) { gpr_timer_entry *entry = &(log->log[i]); @@ -198,7 +212,7 @@ static void finish_writing(void) { } void gpr_timers_set_log_filename(const char *filename) { - output_filename = filename; + output_filename_or_null = filename; } static void init_output() { diff --git a/src/core/lib/slice/slice_intern.c b/src/core/lib/slice/slice_intern.c index 7cbd17bffd..32adc4df97 100644 --- a/src/core/lib/slice/slice_intern.c +++ b/src/core/lib/slice/slice_intern.c @@ -215,7 +215,9 @@ bool grpc_slice_is_interned(grpc_slice slice) { } grpc_slice grpc_slice_intern(grpc_slice slice) { + GPR_TIMER_BEGIN("grpc_slice_intern", 0); if (GRPC_IS_STATIC_METADATA_STRING(slice)) { + GPR_TIMER_END("grpc_slice_intern", 0); return slice; } @@ -225,6 +227,7 @@ grpc_slice grpc_slice_intern(grpc_slice slice) { static_metadata_hash[(hash + i) % GPR_ARRAY_SIZE(static_metadata_hash)]; if (ent.hash == hash && ent.idx < GRPC_STATIC_MDSTR_COUNT && grpc_slice_eq(grpc_static_slice_table[ent.idx], slice)) { + GPR_TIMER_END("grpc_slice_intern", 0); return grpc_static_slice_table[ent.idx]; } } @@ -247,7 +250,7 @@ grpc_slice grpc_slice_intern(grpc_slice slice) { /* and treat this as if we were never here... sshhh */ } else { gpr_mu_unlock(&shard->mu); - GPR_TIMER_END("grpc_mdstr_from_buffer", 0); + GPR_TIMER_END("grpc_slice_intern", 0); return materialize(s); } } @@ -275,6 +278,7 @@ grpc_slice grpc_slice_intern(grpc_slice slice) { gpr_mu_unlock(&shard->mu); + GPR_TIMER_END("grpc_slice_intern", 0); return materialize(s); } diff --git a/test/cpp/microbenchmarks/bm_fullstack.cc b/test/cpp/microbenchmarks/bm_fullstack.cc index a870ff7455..9d883e68d7 100644 --- a/test/cpp/microbenchmarks/bm_fullstack.cc +++ b/test/cpp/microbenchmarks/bm_fullstack.cc @@ -58,6 +58,7 @@ extern "C" { #include "test/core/util/passthru_endpoint.h" #include "test/core/util/port.h" } +#include "src/core/lib/profiling/timers.h" #include "src/cpp/client/create_channel_internal.h" #include "src/proto/grpc/testing/echo.grpc.pb.h" #include "third_party/benchmark/include/benchmark/benchmark.h" @@ -437,6 +438,7 @@ static void BM_UnaryPingPong(benchmark::State& state) { std::unique_ptr<EchoTestService::Stub> stub( EchoTestService::NewStub(fixture->channel())); while (state.KeepRunning()) { + GPR_TIMER_SCOPE("BenchmarkCycle", 0); recv_response.Clear(); ClientContext cli_ctx; ClientContextMutator cli_ctx_mut(&cli_ctx); @@ -690,6 +692,7 @@ static void BM_PumpStreamClientToServer(benchmark::State& state) { } response_rw.Read(&recv_request, tag(0)); while (state.KeepRunning()) { + GPR_TIMER_SCOPE("BenchmarkCycle", 0); request_rw->Write(send_request, tag(1)); while (true) { GPR_ASSERT(fixture->cq()->Next(&t, &ok)); @@ -747,6 +750,7 @@ static void BM_PumpStreamServerToClient(benchmark::State& state) { } request_rw->Read(&recv_response, tag(0)); while (state.KeepRunning()) { + GPR_TIMER_SCOPE("BenchmarkCycle", 0); response_rw.Write(send_response, tag(1)); while (true) { GPR_ASSERT(fixture->cq()->Next(&t, &ok)); diff --git a/tools/profiling/latency_profile/profile_analyzer.py b/tools/profiling/latency_profile/profile_analyzer.py index 48b8e9b950..2087cd2793 100755 --- a/tools/profiling/latency_profile/profile_analyzer.py +++ b/tools/profiling/latency_profile/profile_analyzer.py @@ -34,6 +34,7 @@ import hashlib import itertools import json import math +import sys import tabulate import time @@ -49,6 +50,7 @@ TIME_FROM_LAST_IMPORTANT = object() argp = argparse.ArgumentParser(description='Process output of basic_prof builds') argp.add_argument('--source', default='latency_trace.txt', type=str) argp.add_argument('--fmt', choices=tabulate.tabulate_formats, default='simple') +argp.add_argument('--out', default='-', type=str) args = argp.parse_args() class LineItem(object): @@ -246,16 +248,20 @@ FORMAT = [ ('TO_SCOPE_END', time_format(TIME_TO_SCOPE_END)), ] +out = sys.stdout +if args.out != '-': + out = open(args.out, 'w') + if args.fmt == 'html': - print '<html>' - print '<head>' - print '<title>Profile Report</title>' - print '</head>' + print >>out, '<html>' + print >>out, '<head>' + print >>out, '<title>Profile Report</title>' + print >>out, '</head>' accounted_for = 0 for cs in call_stacks: if args.fmt in BANNER: - print BANNER[args.fmt] % { + print >>out, BANNER[args.fmt] % { 'count': cs.count, } header, _ = zip(*FORMAT) @@ -265,7 +271,7 @@ for cs in call_stacks: for _, fn in FORMAT: fields.append(fn(line)) table.append(fields) - print tabulate.tabulate(table, header, tablefmt=args.fmt) + print >>out, tabulate.tabulate(table, header, tablefmt=args.fmt) accounted_for += cs.count if accounted_for > .99 * total_stacks: break diff --git a/tools/profiling/latency_profile/run_latency_profile.sh b/tools/profiling/latency_profile/run_latency_profile.sh index 618db202dc..715f525d2f 100755 --- a/tools/profiling/latency_profile/run_latency_profile.sh +++ b/tools/profiling/latency_profile/run_latency_profile.sh @@ -28,55 +28,6 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# format argument via -# $ echo '{...}' | python -mjson.tool -read -r -d '' SCENARIOS_JSON_ARG <<'EOF' -{ - "scenarios": [ - { - "benchmark_seconds": 5, - "client_config": { - "client_channels": 1, - "client_type": "SYNC_CLIENT", - "histogram_params": { - "max_possible": 60000000000.0, - "resolution": 0.01 - }, - "load_params": { - "closed_loop": {} - }, - "outstanding_rpcs_per_channel": 1, - "payload_config": { - "simple_params": { - "req_size": 0, - "resp_size": 0 - } - }, - "rpc_type": "UNARY", - "security_params": { - "server_host_override": "foo.test.google.fr", - "use_test_ca": true - } - }, - "name": "cpp_protobuf_sync_unary_ping_pong_secure", - "num_clients": 1, - "num_servers": 1, - "server_config": { - "core_limit": 1, - "security_params": { - "server_host_override": "foo.test.google.fr", - "use_test_ca": true - }, - "server_type": "SYNC_SERVER" - }, - "spawn_local_worker_count": 2, - "warmup_seconds": 5 - } - ] -} - -EOF - set -ex cd $(dirname $0)/../../.. @@ -93,14 +44,4 @@ else PYTHON=python2.7 fi -make CONFIG=basicprof -j$CPUS qps_json_driver - -mkdir -p reports -bins/basicprof/qps_json_driver --scenarios_json="$SCENARIOS_JSON_ARG" - -echo '<html><head></head><body>Latency profile for:<br/>' > reports/index.html -echo "<p><pre>${SCENARIOS_JSON_ARG}</pre></p>" >> reports/index.html -echo '<p><pre>' >> reports/index.html -$PYTHON tools/profiling/latency_profile/profile_analyzer.py \ - --source=latency_trace.txt --fmt=simple >> reports/index.html -echo '</pre></p></body></html>' >> reports/index.html +$PYTHON tools/run_tests/run_microbenchmark.py bm_fullstack diff --git a/tools/run_tests/python_utils/start_port_server.py b/tools/run_tests/python_utils/start_port_server.py new file mode 100644 index 0000000000..d521aa6a9d --- /dev/null +++ b/tools/run_tests/python_utils/start_port_server.py @@ -0,0 +1,129 @@ +# Copyright 2015, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from __future__ import print_function + +from six.moves import urllib +import os +import subprocess +import tempfile +import sys +import time +import jobset + +def start_port_server(port_server_port): + # check if a compatible port server is running + # if incompatible (version mismatch) ==> start a new one + # if not running ==> start a new one + # otherwise, leave it up + try: + version = int(urllib.request.urlopen( + 'http://localhost:%d/version_number' % port_server_port, + timeout=10).read()) + print('detected port server running version %d' % version) + running = True + except Exception as e: + print('failed to detect port server: %s' % sys.exc_info()[0]) + print(e.strerror) + running = False + if running: + current_version = int(subprocess.check_output( + [sys.executable, os.path.abspath('tools/run_tests/python_utils/port_server.py'), + 'dump_version'])) + print('my port server is version %d' % current_version) + running = (version >= current_version) + if not running: + print('port_server version mismatch: killing the old one') + urllib.request.urlopen('http://localhost:%d/quitquitquit' % port_server_port).read() + time.sleep(1) + if not running: + fd, logfile = tempfile.mkstemp() + os.close(fd) + print('starting port_server, with log file %s' % logfile) + args = [sys.executable, os.path.abspath('tools/run_tests/python_utils/port_server.py'), + '-p', '%d' % port_server_port, '-l', logfile] + env = dict(os.environ) + env['BUILD_ID'] = 'pleaseDontKillMeJenkins' + if jobset.platform_string() == 'windows': + # Working directory of port server needs to be outside of Jenkins + # workspace to prevent file lock issues. + tempdir = tempfile.mkdtemp() + port_server = subprocess.Popen( + args, + env=env, + cwd=tempdir, + creationflags = 0x00000008, # detached process + close_fds=True) + else: + port_server = subprocess.Popen( + args, + env=env, + preexec_fn=os.setsid, + close_fds=True) + time.sleep(1) + # ensure port server is up + waits = 0 + while True: + if waits > 10: + print('killing port server due to excessive start up waits') + port_server.kill() + if port_server.poll() is not None: + print('port_server failed to start') + # try one final time: maybe another build managed to start one + time.sleep(1) + try: + urllib.request.urlopen('http://localhost:%d/get' % port_server_port, + timeout=1).read() + print('last ditch attempt to contact port server succeeded') + break + except: + traceback.print_exc() + port_log = open(logfile, 'r').read() + print(port_log) + sys.exit(1) + try: + urllib.request.urlopen('http://localhost:%d/get' % port_server_port, + timeout=1).read() + print('port server is up and ready') + break + except socket.timeout: + print('waiting for port_server: timeout') + traceback.print_exc(); + time.sleep(1) + waits += 1 + except urllib.error.URLError: + print('waiting for port_server: urlerror') + traceback.print_exc(); + time.sleep(1) + waits += 1 + except: + traceback.print_exc() + port_server.kill() + raise + diff --git a/tools/run_tests/run_microbenchmark.py b/tools/run_tests/run_microbenchmark.py new file mode 100755 index 0000000000..096d7d78ab --- /dev/null +++ b/tools/run_tests/run_microbenchmark.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python2.7 +# Copyright 2017, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import multiprocessing +import os +import subprocess +import sys + +import python_utils.jobset as jobset +import python_utils.start_port_server as start_port_server + +flamegraph_dir = os.path.join(os.path.expanduser('~'), 'FlameGraph') + +os.chdir(os.path.join(os.path.dirname(sys.argv[0]), '../..')) +if not os.path.exists('reports'): + os.makedirs('reports') + +port_server_port = 32766 +start_port_server.start_port_server(port_server_port) + +def fnize(s): + out = '' + for c in s: + if c in '<>, /': + if len(out) and out[-1] == '_': continue + out += '_' + else: + out += c + return out + +# index html +index_html = """ +<html> +<head> +<title>Microbenchmark Results</title> +</head> +<body> +""" + +def heading(name): + global index_html + index_html += "<h1>%s</h1>\n" % name + +def link(txt, tgt): + global index_html + index_html += "<p><a href=\"%s\">%s</a></p>\n" % (tgt, txt) + +benchmarks = [] +profile_analysis = [] +cleanup = [] + +for bm_name in sys.argv[1:]: + # generate latency profiles + heading('Latency Profiles: %s' % bm_name) + subprocess.check_call( + ['make', bm_name, + 'CONFIG=basicprof', '-j', '%d' % multiprocessing.cpu_count()]) + for line in subprocess.check_output(['bins/basicprof/%s' % bm_name, + '--benchmark_list_tests']).splitlines(): + link(line, '%s.txt' % fnize(line)) + benchmarks.append( + jobset.JobSpec(['bins/basicprof/%s' % bm_name, '--benchmark_filter=^%s$' % line], + environ={'LATENCY_TRACE': '%s.trace' % fnize(line)})) + profile_analysis.append( + jobset.JobSpec([sys.executable, + 'tools/profiling/latency_profile/profile_analyzer.py', + '--source', '%s.trace' % fnize(line), '--fmt', 'simple', + '--out', 'reports/%s.txt' % fnize(line)], timeout_seconds=None)) + cleanup.append(jobset.JobSpec(['rm', '%s.trace' % fnize(line)])) + # periodically flush out the list of jobs: profile_analysis jobs at least + # consume upwards of five gigabytes of ram in some cases, and so analysing + # hundreds of them at once is impractical -- but we want at least some + # concurrency or the work takes too long + if len(benchmarks) >= min(4, multiprocessing.cpu_count()): + # run up to half the cpu count: each benchmark can use up to two cores + # (one for the microbenchmark, one for the data flush) + jobset.run(benchmarks, maxjobs=max(1, multiprocessing.cpu_count()/2), + add_env={'GRPC_TEST_PORT_SERVER': 'localhost:%d' % port_server_port}) + jobset.run(profile_analysis, maxjobs=multiprocessing.cpu_count()) + jobset.run(cleanup, maxjobs=multiprocessing.cpu_count()) + benchmarks = [] + profile_analysis = [] + cleanup = [] + # run the remaining benchmarks that weren't flushed + if len(benchmarks): + jobset.run(benchmarks, maxjobs=max(1, multiprocessing.cpu_count()/2), + add_env={'GRPC_TEST_PORT_SERVER': 'localhost:%d' % port_server_port}) + jobset.run(profile_analysis, maxjobs=multiprocessing.cpu_count()) + jobset.run(cleanup, maxjobs=multiprocessing.cpu_count()) + + # generate flamegraphs + heading('Flamegraphs: %s' % bm_name) + subprocess.check_call( + ['make', bm_name, + 'CONFIG=mutrace', '-j', '%d' % multiprocessing.cpu_count()]) + for line in subprocess.check_output(['bins/mutrace/%s' % bm_name, + '--benchmark_list_tests']).splitlines(): + subprocess.check_call(['sudo', 'perf', 'record', '-g', '-c', '1000', + 'bins/mutrace/%s' % bm_name, + '--benchmark_filter=^%s$' % line, + '--benchmark_min_time=20']) + with open('/tmp/bm.perf', 'w') as f: + f.write(subprocess.check_output(['sudo', 'perf', 'script'])) + with open('/tmp/bm.folded', 'w') as f: + f.write(subprocess.check_output([ + '%s/stackcollapse-perf.pl' % flamegraph_dir, '/tmp/bm.perf'])) + link(line, '%s.svg' % fnize(line)) + with open('reports/%s.svg' % fnize(line), 'w') as f: + f.write(subprocess.check_output([ + '%s/flamegraph.pl' % flamegraph_dir, '/tmp/bm.folded'])) + +index_html += "</body>\n</html>\n" +with open('reports/index.html', 'w') as f: + f.write(index_html) diff --git a/tools/run_tests/run_tests.py b/tools/run_tests/run_tests.py index 0d835a7de1..75571aaadb 100755 --- a/tools/run_tests/run_tests.py +++ b/tools/run_tests/run_tests.py @@ -57,6 +57,7 @@ import uuid import python_utils.jobset as jobset import python_utils.report_utils as report_utils import python_utils.watch_dirs as watch_dirs +import python_utils.start_port_server as start_port_server _ROOT = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '../..')) @@ -1374,97 +1375,6 @@ def _shut_down_legacy_server(legacy_server_port): 'http://localhost:%d/quitquitquit' % legacy_server_port).read() -def _start_port_server(port_server_port): - # check if a compatible port server is running - # if incompatible (version mismatch) ==> start a new one - # if not running ==> start a new one - # otherwise, leave it up - try: - version = int(urllib.request.urlopen( - 'http://localhost:%d/version_number' % port_server_port, - timeout=10).read()) - print('detected port server running version %d' % version) - running = True - except Exception as e: - print('failed to detect port server: %s' % sys.exc_info()[0]) - print(e.strerror) - running = False - if running: - current_version = int(subprocess.check_output( - [sys.executable, os.path.abspath('tools/run_tests/python_utils/port_server.py'), - 'dump_version'])) - print('my port server is version %d' % current_version) - running = (version >= current_version) - if not running: - print('port_server version mismatch: killing the old one') - urllib.request.urlopen('http://localhost:%d/quitquitquit' % port_server_port).read() - time.sleep(1) - if not running: - fd, logfile = tempfile.mkstemp() - os.close(fd) - print('starting port_server, with log file %s' % logfile) - args = [sys.executable, os.path.abspath('tools/run_tests/python_utils/port_server.py'), - '-p', '%d' % port_server_port, '-l', logfile] - env = dict(os.environ) - env['BUILD_ID'] = 'pleaseDontKillMeJenkins' - if platform_string() == 'windows': - # Working directory of port server needs to be outside of Jenkins - # workspace to prevent file lock issues. - tempdir = tempfile.mkdtemp() - port_server = subprocess.Popen( - args, - env=env, - cwd=tempdir, - creationflags = 0x00000008, # detached process - close_fds=True) - else: - port_server = subprocess.Popen( - args, - env=env, - preexec_fn=os.setsid, - close_fds=True) - time.sleep(1) - # ensure port server is up - waits = 0 - while True: - if waits > 10: - print('killing port server due to excessive start up waits') - port_server.kill() - if port_server.poll() is not None: - print('port_server failed to start') - # try one final time: maybe another build managed to start one - time.sleep(1) - try: - urllib.request.urlopen('http://localhost:%d/get' % port_server_port, - timeout=1).read() - print('last ditch attempt to contact port server succeeded') - break - except: - traceback.print_exc() - port_log = open(logfile, 'r').read() - print(port_log) - sys.exit(1) - try: - urllib.request.urlopen('http://localhost:%d/get' % port_server_port, - timeout=1).read() - print('port server is up and ready') - break - except socket.timeout: - print('waiting for port_server: timeout') - traceback.print_exc(); - time.sleep(1) - waits += 1 - except urllib.error.URLError: - print('waiting for port_server: urlerror') - traceback.print_exc(); - time.sleep(1) - waits += 1 - except: - traceback.print_exc() - port_server.kill() - raise - - def _calculate_num_runs_failures(list_of_results): """Caculate number of runs and failures for a particular test. @@ -1512,7 +1422,7 @@ def _build_and_run( antagonists = [subprocess.Popen(['tools/run_tests/python_utils/antagonist.py']) for _ in range(0, args.antagonists)] port_server_port = 32766 - _start_port_server(port_server_port) + start_port_server.start_port_server(port_server_port) resultset = None num_test_failures = 0 try: |