diff options
author | Nicolas Noble <nicolasnoble@users.noreply.github.com> | 2015-02-26 22:57:02 -0800 |
---|---|---|
committer | Nicolas Noble <nicolasnoble@users.noreply.github.com> | 2015-02-26 22:57:02 -0800 |
commit | dadafd6c93603c0ca13c4335ad4f65a70d08728d (patch) | |
tree | 94dac49601a0bfae96e8084ef2ae4c49bbf512e3 /tools/run_tests | |
parent | afcbfa1ba3800d685346c61fa96c43b4b911d8cc (diff) | |
parent | 9b3cc7461a7a68d8f6ab37460bbd988d8325c154 (diff) |
Merge pull request #867 from ctiller/timeout2
Force-kill long running processes
Diffstat (limited to 'tools/run_tests')
-rwxr-xr-x | tools/run_tests/jobset.py | 15 |
1 files changed, 15 insertions, 0 deletions
diff --git a/tools/run_tests/jobset.py b/tools/run_tests/jobset.py index e40235341c..b8178ffebf 100755 --- a/tools/run_tests/jobset.py +++ b/tools/run_tests/jobset.py @@ -43,10 +43,17 @@ import time _DEFAULT_MAX_JOBS = 16 * multiprocessing.cpu_count() +have_alarm = False +def alarm_handler(unused_signum, unused_frame): + global have_alarm + have_alarm = False + + # setup a signal handler so that signal.pause registers 'something' # when a child finishes # not using futures and threading to avoid a dependency on subprocess32 signal.signal(signal.SIGCHLD, lambda unused_signum, unused_frame: None) +signal.signal(signal.SIGALRM, alarm_handler) def shuffle_iteratable(it): @@ -187,6 +194,9 @@ class Job(object): do_newline=self._newline_on_success or self._travis) if self._bin_hash: update_cache.finished(self._spec.identity(), self._bin_hash) + elif self._state == _RUNNING and time.time() - self._start > 300: + message('TIMEOUT', self._spec.shortname, do_newline=self._travis) + self.kill() return self._state def kill(self): @@ -240,6 +250,7 @@ class Jobset(object): st = job.state(self._cache) if st == _RUNNING: continue if st == _FAILURE: self._failures += 1 + if st == _KILLED: self._failures += 1 dead.add(job) for job in dead: self._completed += 1 @@ -248,6 +259,10 @@ class Jobset(object): if (not self._travis): message('WAITING', '%d jobs running, %d complete, %d failed' % ( len(self._running), self._completed, self._failures)) + global have_alarm + if not have_alarm: + have_alarm = True + signal.alarm(10) signal.pause() def cancelled(self): |