aboutsummaryrefslogtreecommitdiffhomepage
path: root/tools
diff options
context:
space:
mode:
authorGravatar Craig Tiller <ctiller@google.com>2017-02-02 16:08:05 -0800
committerGravatar Craig Tiller <ctiller@google.com>2017-02-02 16:08:05 -0800
commit7dc4ea66f0688c30aa7ddd865074a808fd20f076 (patch)
tree98559190df65980bc3818f99bda09a3b595266a1 /tools
parent95ca017ce85ecdabe6e39275acfe9db736f97735 (diff)
Make the microbenchmark profile gatherer run some in parallel
Diffstat (limited to 'tools')
-rwxr-xr-xtools/profiling/latency_profile/profile_analyzer.py18
-rw-r--r--tools/run_tests/python_utils/start_port_server.py128
-rwxr-xr-xtools/run_tests/run_microbenchmark.py (renamed from tools/profiling/microbenchmark/bm.py)36
-rwxr-xr-xtools/run_tests/run_tests.py94
4 files changed, 167 insertions, 109 deletions
diff --git a/tools/profiling/latency_profile/profile_analyzer.py b/tools/profiling/latency_profile/profile_analyzer.py
index 48b8e9b950..2087cd2793 100755
--- a/tools/profiling/latency_profile/profile_analyzer.py
+++ b/tools/profiling/latency_profile/profile_analyzer.py
@@ -34,6 +34,7 @@ import hashlib
import itertools
import json
import math
+import sys
import tabulate
import time
@@ -49,6 +50,7 @@ TIME_FROM_LAST_IMPORTANT = object()
argp = argparse.ArgumentParser(description='Process output of basic_prof builds')
argp.add_argument('--source', default='latency_trace.txt', type=str)
argp.add_argument('--fmt', choices=tabulate.tabulate_formats, default='simple')
+argp.add_argument('--out', default='-', type=str)
args = argp.parse_args()
class LineItem(object):
@@ -246,16 +248,20 @@ FORMAT = [
('TO_SCOPE_END', time_format(TIME_TO_SCOPE_END)),
]
+out = sys.stdout
+if args.out != '-':
+ out = open(args.out, 'w')
+
if args.fmt == 'html':
- print '<html>'
- print '<head>'
- print '<title>Profile Report</title>'
- print '</head>'
+ print >>out, '<html>'
+ print >>out, '<head>'
+ print >>out, '<title>Profile Report</title>'
+ print >>out, '</head>'
accounted_for = 0
for cs in call_stacks:
if args.fmt in BANNER:
- print BANNER[args.fmt] % {
+ print >>out, BANNER[args.fmt] % {
'count': cs.count,
}
header, _ = zip(*FORMAT)
@@ -265,7 +271,7 @@ for cs in call_stacks:
for _, fn in FORMAT:
fields.append(fn(line))
table.append(fields)
- print tabulate.tabulate(table, header, tablefmt=args.fmt)
+ print >>out, tabulate.tabulate(table, header, tablefmt=args.fmt)
accounted_for += cs.count
if accounted_for > .99 * total_stacks:
break
diff --git a/tools/run_tests/python_utils/start_port_server.py b/tools/run_tests/python_utils/start_port_server.py
new file mode 100644
index 0000000000..4103eb0534
--- /dev/null
+++ b/tools/run_tests/python_utils/start_port_server.py
@@ -0,0 +1,128 @@
+# Copyright 2015, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from __future__ import print_function
+
+from six.moves import urllib
+import os
+import subprocess
+import tempfile
+import sys
+import time
+
+def start_port_server(port_server_port):
+ # check if a compatible port server is running
+ # if incompatible (version mismatch) ==> start a new one
+ # if not running ==> start a new one
+ # otherwise, leave it up
+ try:
+ version = int(urllib.request.urlopen(
+ 'http://localhost:%d/version_number' % port_server_port,
+ timeout=10).read())
+ print('detected port server running version %d' % version)
+ running = True
+ except Exception as e:
+ print('failed to detect port server: %s' % sys.exc_info()[0])
+ print(e.strerror)
+ running = False
+ if running:
+ current_version = int(subprocess.check_output(
+ [sys.executable, os.path.abspath('tools/run_tests/python_utils/port_server.py'),
+ 'dump_version']))
+ print('my port server is version %d' % current_version)
+ running = (version >= current_version)
+ if not running:
+ print('port_server version mismatch: killing the old one')
+ urllib.request.urlopen('http://localhost:%d/quitquitquit' % port_server_port).read()
+ time.sleep(1)
+ if not running:
+ fd, logfile = tempfile.mkstemp()
+ os.close(fd)
+ print('starting port_server, with log file %s' % logfile)
+ args = [sys.executable, os.path.abspath('tools/run_tests/python_utils/port_server.py'),
+ '-p', '%d' % port_server_port, '-l', logfile]
+ env = dict(os.environ)
+ env['BUILD_ID'] = 'pleaseDontKillMeJenkins'
+ if platform_string() == 'windows':
+ # Working directory of port server needs to be outside of Jenkins
+ # workspace to prevent file lock issues.
+ tempdir = tempfile.mkdtemp()
+ port_server = subprocess.Popen(
+ args,
+ env=env,
+ cwd=tempdir,
+ creationflags = 0x00000008, # detached process
+ close_fds=True)
+ else:
+ port_server = subprocess.Popen(
+ args,
+ env=env,
+ preexec_fn=os.setsid,
+ close_fds=True)
+ time.sleep(1)
+ # ensure port server is up
+ waits = 0
+ while True:
+ if waits > 10:
+ print('killing port server due to excessive start up waits')
+ port_server.kill()
+ if port_server.poll() is not None:
+ print('port_server failed to start')
+ # try one final time: maybe another build managed to start one
+ time.sleep(1)
+ try:
+ urllib.request.urlopen('http://localhost:%d/get' % port_server_port,
+ timeout=1).read()
+ print('last ditch attempt to contact port server succeeded')
+ break
+ except:
+ traceback.print_exc()
+ port_log = open(logfile, 'r').read()
+ print(port_log)
+ sys.exit(1)
+ try:
+ urllib.request.urlopen('http://localhost:%d/get' % port_server_port,
+ timeout=1).read()
+ print('port server is up and ready')
+ break
+ except socket.timeout:
+ print('waiting for port_server: timeout')
+ traceback.print_exc();
+ time.sleep(1)
+ waits += 1
+ except urllib.error.URLError:
+ print('waiting for port_server: urlerror')
+ traceback.print_exc();
+ time.sleep(1)
+ waits += 1
+ except:
+ traceback.print_exc()
+ port_server.kill()
+ raise
+
diff --git a/tools/profiling/microbenchmark/bm.py b/tools/run_tests/run_microbenchmark.py
index b4f0de841f..42a31a622f 100755
--- a/tools/profiling/microbenchmark/bm.py
+++ b/tools/run_tests/run_microbenchmark.py
@@ -33,8 +33,18 @@ import os
import subprocess
import sys
+import python_utils.jobset as jobset
+import python_utils.start_port_server as start_port_server
+
flamegraph_dir = os.path.join(os.path.expanduser('~'), 'FlameGraph')
+os.chdir(os.path.join(os.path.dirname(sys.argv[0]), '../..'))
+if not os.path.exists('reports'):
+ os.makedirs('reports')
+
+port_server_port = 32766
+start_port_server.start_port_server(port_server_port)
+
def fnize(s):
out = ''
for c in s:
@@ -45,10 +55,6 @@ def fnize(s):
out += c
return out
-os.chdir(os.path.join(os.path.dirname(sys.argv[0]), '../../..'))
-if not os.path.exists('reports'):
- os.makedirs('reports')
-
# index html
index_html = """
<html>
@@ -66,6 +72,9 @@ def link(txt, tgt):
global index_html
index_html += "<p><a href=\"%s\">%s</a></p>\n" % (tgt, txt)
+benchmarks = []
+profile_analysis = []
+
for bm_name in sys.argv[1:]:
# generate latency profiles
heading('Latency Profiles: %s' % bm_name)
@@ -75,13 +84,18 @@ for bm_name in sys.argv[1:]:
for line in subprocess.check_output(['bins/basicprof/%s' % bm_name,
'--benchmark_list_tests']).splitlines():
link(line, '%s.txt' % fnize(line))
- with open('reports/%s.txt' % fnize(line), 'w') as f:
- f.write(subprocess.check_output(['bins/basicprof/%s' % bm_name,
- '--benchmark_filter=^%s$' % line]))
- f.write('\n***********************************************************\n')
- f.write(subprocess.check_output([
- sys.executable, 'tools/profiling/latency_profile/profile_analyzer.py',
- '--source', 'latency_trace.txt', '--fmt', 'simple']))
+ benchmarks.append(
+ jobset.JobSpec(['bins/basicprof/%s' % bm_name, '--benchmark_filter=^%s$' % line],
+ environ={'LATENCY_TRACE': '%s.trace' % fnize(line)}))
+ profile_analysis.append(
+ jobset.JobSpec([sys.executable,
+ 'tools/profiling/latency_profile/profile_analyzer.py',
+ '--source', '%s.trace' % fnize(line), '--fmt', 'simple',
+ '--out', 'reports/%s.txt' % fnize(line)], timeout_seconds=None))
+
+ jobset.run(benchmarks, maxjobs=multiprocessing.cpu_count()/2,
+ add_env={'GRPC_TEST_PORT_SERVER': 'localhost:%d' % port_server_port})
+ jobset.run(profile_analysis, maxjobs=multiprocessing.cpu_count())
# generate flamegraphs
heading('Flamegraphs: %s' % bm_name)
diff --git a/tools/run_tests/run_tests.py b/tools/run_tests/run_tests.py
index 9d76725810..999ebf87eb 100755
--- a/tools/run_tests/run_tests.py
+++ b/tools/run_tests/run_tests.py
@@ -57,6 +57,7 @@ import uuid
import python_utils.jobset as jobset
import python_utils.report_utils as report_utils
import python_utils.watch_dirs as watch_dirs
+import python_utils.start_port_server as start_port_server
_ROOT = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '../..'))
@@ -1322,97 +1323,6 @@ def _shut_down_legacy_server(legacy_server_port):
'http://localhost:%d/quitquitquit' % legacy_server_port).read()
-def _start_port_server(port_server_port):
- # check if a compatible port server is running
- # if incompatible (version mismatch) ==> start a new one
- # if not running ==> start a new one
- # otherwise, leave it up
- try:
- version = int(urllib.request.urlopen(
- 'http://localhost:%d/version_number' % port_server_port,
- timeout=10).read())
- print('detected port server running version %d' % version)
- running = True
- except Exception as e:
- print('failed to detect port server: %s' % sys.exc_info()[0])
- print(e.strerror)
- running = False
- if running:
- current_version = int(subprocess.check_output(
- [sys.executable, os.path.abspath('tools/run_tests/python_utils/port_server.py'),
- 'dump_version']))
- print('my port server is version %d' % current_version)
- running = (version >= current_version)
- if not running:
- print('port_server version mismatch: killing the old one')
- urllib.request.urlopen('http://localhost:%d/quitquitquit' % port_server_port).read()
- time.sleep(1)
- if not running:
- fd, logfile = tempfile.mkstemp()
- os.close(fd)
- print('starting port_server, with log file %s' % logfile)
- args = [sys.executable, os.path.abspath('tools/run_tests/python_utils/port_server.py'),
- '-p', '%d' % port_server_port, '-l', logfile]
- env = dict(os.environ)
- env['BUILD_ID'] = 'pleaseDontKillMeJenkins'
- if platform_string() == 'windows':
- # Working directory of port server needs to be outside of Jenkins
- # workspace to prevent file lock issues.
- tempdir = tempfile.mkdtemp()
- port_server = subprocess.Popen(
- args,
- env=env,
- cwd=tempdir,
- creationflags = 0x00000008, # detached process
- close_fds=True)
- else:
- port_server = subprocess.Popen(
- args,
- env=env,
- preexec_fn=os.setsid,
- close_fds=True)
- time.sleep(1)
- # ensure port server is up
- waits = 0
- while True:
- if waits > 10:
- print('killing port server due to excessive start up waits')
- port_server.kill()
- if port_server.poll() is not None:
- print('port_server failed to start')
- # try one final time: maybe another build managed to start one
- time.sleep(1)
- try:
- urllib.request.urlopen('http://localhost:%d/get' % port_server_port,
- timeout=1).read()
- print('last ditch attempt to contact port server succeeded')
- break
- except:
- traceback.print_exc()
- port_log = open(logfile, 'r').read()
- print(port_log)
- sys.exit(1)
- try:
- urllib.request.urlopen('http://localhost:%d/get' % port_server_port,
- timeout=1).read()
- print('port server is up and ready')
- break
- except socket.timeout:
- print('waiting for port_server: timeout')
- traceback.print_exc();
- time.sleep(1)
- waits += 1
- except urllib.error.URLError:
- print('waiting for port_server: urlerror')
- traceback.print_exc();
- time.sleep(1)
- waits += 1
- except:
- traceback.print_exc()
- port_server.kill()
- raise
-
-
def _calculate_num_runs_failures(list_of_results):
"""Caculate number of runs and failures for a particular test.
@@ -1460,7 +1370,7 @@ def _build_and_run(
antagonists = [subprocess.Popen(['tools/run_tests/python_utils/antagonist.py'])
for _ in range(0, args.antagonists)]
port_server_port = 32766
- _start_port_server(port_server_port)
+ start_port_server.start_port_server(port_server_port)
resultset = None
num_test_failures = 0
try: