From 5920abc5358e854a0f4a2217bf8be230977d1853 Mon Sep 17 00:00:00 2001 From: Matt Kwong Date: Thu, 24 Aug 2017 12:26:05 -0700 Subject: Revert "Don't clear alarm in jobset when running performance tests" This reverts commit c15d32bbe89a2bf950992ded06d1b3da7f1f39a6. --- tools/run_tests/python_utils/jobset.py | 13 ++++--------- tools/run_tests/run_performance_tests.py | 10 +++++----- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/tools/run_tests/python_utils/jobset.py b/tools/run_tests/python_utils/jobset.py index 08d652ae3f..044c6f3aa4 100755 --- a/tools/run_tests/python_utils/jobset.py +++ b/tools/run_tests/python_utils/jobset.py @@ -367,10 +367,9 @@ class Jobset(object): """Manages one run of jobs.""" def __init__(self, check_cancelled, maxjobs, newline_on_success, travis, - stop_on_failure, add_env, quiet_success, max_time, clear_alarms): + stop_on_failure, add_env, quiet_success, max_time): self._running = set() self._check_cancelled = check_cancelled - self._clear_alarms = clear_alarms self._cancelled = False self._failures = 0 self._completed = 0 @@ -474,10 +473,7 @@ class Jobset(object): while self._running: if self.cancelled(): pass # poll cancellation self.reap() - # Clear the alarms when finished to avoid a race condition causing job - # failures. Don't do this when running multi-VM tests because clearing - # the alarms causes the test to stall - if platform_string() != 'windows' and self._clear_alarms: + if platform_string() != 'windows': signal.alarm(0) return not self.cancelled() and self._failures == 0 @@ -507,8 +503,7 @@ def run(cmdlines, add_env={}, skip_jobs=False, quiet_success=False, - max_time=-1, - clear_alarms=True): + max_time=-1): if skip_jobs: resultset = {} skipped_job_result = JobResult() @@ -520,7 +515,7 @@ def run(cmdlines, js = Jobset(check_cancelled, maxjobs if maxjobs is not None else _DEFAULT_MAX_JOBS, newline_on_success, travis, stop_on_failure, add_env, - quiet_success, max_time, clear_alarms) + quiet_success, max_time) for cmdline, remaining in tag_remaining(cmdlines): if not js.start(cmdline): break diff --git a/tools/run_tests/run_performance_tests.py b/tools/run_tests/run_performance_tests.py index 0db5e4ef83..b9277c919b 100755 --- a/tools/run_tests/run_performance_tests.py +++ b/tools/run_tests/run_performance_tests.py @@ -183,7 +183,7 @@ def archive_repo(languages): jobset.message('START', 'Archiving local repository.', do_newline=True) num_failures, _ = jobset.run( - [archive_job], newline_on_success=True, maxjobs=1, clear_alarms=False) + [archive_job], newline_on_success=True, maxjobs=1) if num_failures == 0: jobset.message('SUCCESS', 'Archive with local repository created successfully.', @@ -215,7 +215,7 @@ def prepare_remote_hosts(hosts, prepare_local=False): timeout_seconds=prepare_timeout)) jobset.message('START', 'Preparing hosts.', do_newline=True) num_failures, _ = jobset.run( - prepare_jobs, newline_on_success=True, maxjobs=10, clear_alarms=False) + prepare_jobs, newline_on_success=True, maxjobs=10) if num_failures == 0: jobset.message('SUCCESS', 'Prepare step completed successfully.', @@ -248,7 +248,7 @@ def build_on_remote_hosts(hosts, languages=scenario_config.LANGUAGES.keys(), bui timeout_seconds=build_timeout)) jobset.message('START', 'Building.', do_newline=True) num_failures, _ = jobset.run( - build_jobs, newline_on_success=True, maxjobs=10, clear_alarms=False) + build_jobs, newline_on_success=True, maxjobs=10) if num_failures == 0: jobset.message('SUCCESS', 'Built successfully.', @@ -414,7 +414,7 @@ def run_collect_perf_profile_jobs(hosts_and_base_names, scenario_name): perf_report_jobs.append(perf_report_processor_job(host, perf_base_name, output_filename)) jobset.message('START', 'Collecting perf reports from qps workers', do_newline=True) - failures, _ = jobset.run(perf_report_jobs, newline_on_success=True, maxjobs=1, clear_alarms=False) + failures, _ = jobset.run(perf_report_jobs, newline_on_success=True, maxjobs=1) jobset.message('END', 'Collecting perf reports from qps workers', do_newline=True) return failures @@ -556,7 +556,7 @@ for scenario in scenarios: jobs = [scenario.jobspec] if scenario.workers: jobs.append(create_quit_jobspec(scenario.workers, remote_host=args.remote_driver_host)) - scenario_failures, resultset = jobset.run(jobs, newline_on_success=True, maxjobs=1, clear_alarms=False) + scenario_failures, resultset = jobset.run(jobs, newline_on_success=True, maxjobs=1) total_scenario_failures += scenario_failures merged_resultset = dict(itertools.chain(six.iteritems(merged_resultset), six.iteritems(resultset))) -- cgit v1.2.3 From 3da8c5defbc9a83d3db0bf84fb0dce012802d9b3 Mon Sep 17 00:00:00 2001 From: Matt Kwong Date: Thu, 24 Aug 2017 12:35:04 -0700 Subject: Let alarms at end of jobset.py trigger isntead of clearing --- tools/run_tests/python_utils/jobset.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/run_tests/python_utils/jobset.py b/tools/run_tests/python_utils/jobset.py index 044c6f3aa4..50fe7d7df8 100755 --- a/tools/run_tests/python_utils/jobset.py +++ b/tools/run_tests/python_utils/jobset.py @@ -473,8 +473,10 @@ class Jobset(object): while self._running: if self.cancelled(): pass # poll cancellation self.reap() - if platform_string() != 'windows': - signal.alarm(0) + global have_alarm + if platform_string() != 'windows' and have_alarm: + signal.alarm(1) + signal.pause() return not self.cancelled() and self._failures == 0 -- cgit v1.2.3 From 738b1bb424292b4138e82a3c2085ab4a3a95507e Mon Sep 17 00:00:00 2001 From: Matt Kwong Date: Tue, 29 Aug 2017 12:45:53 -0700 Subject: Get rid of have_alarm var in jobset.py --- tools/run_tests/python_utils/jobset.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/tools/run_tests/python_utils/jobset.py b/tools/run_tests/python_utils/jobset.py index 50fe7d7df8..6151a7276a 100755 --- a/tools/run_tests/python_utils/jobset.py +++ b/tools/run_tests/python_utils/jobset.py @@ -71,10 +71,8 @@ def platform_string(): if platform_string() == 'windows': pass else: - have_alarm = False def alarm_handler(unused_signum, unused_frame): - global have_alarm - have_alarm = False + pass signal.signal(signal.SIGCHLD, lambda unused_signum, unused_frame: None) signal.signal(signal.SIGALRM, alarm_handler) @@ -454,10 +452,7 @@ class Jobset(object): if platform_string() == 'windows': time.sleep(0.1) else: - global have_alarm - if not have_alarm: - have_alarm = True - signal.alarm(10) + signal.alarm(10) signal.pause() def cancelled(self): @@ -473,10 +468,8 @@ class Jobset(object): while self._running: if self.cancelled(): pass # poll cancellation self.reap() - global have_alarm - if platform_string() != 'windows' and have_alarm: - signal.alarm(1) - signal.pause() + if platform_string() != 'windows': + signal.alarm(0) return not self.cancelled() and self._failures == 0 -- cgit v1.2.3