aboutsummaryrefslogtreecommitdiffhomepage
path: root/tools/run_tests
diff options
context:
space:
mode:
authorGravatar Nicolas Noble <nicolasnoble@users.noreply.github.com>2015-02-26 22:57:02 -0800
committerGravatar Nicolas Noble <nicolasnoble@users.noreply.github.com>2015-02-26 22:57:02 -0800
commitdadafd6c93603c0ca13c4335ad4f65a70d08728d (patch)
tree94dac49601a0bfae96e8084ef2ae4c49bbf512e3 /tools/run_tests
parentafcbfa1ba3800d685346c61fa96c43b4b911d8cc (diff)
parent9b3cc7461a7a68d8f6ab37460bbd988d8325c154 (diff)
Merge pull request #867 from ctiller/timeout2
Force-kill long running processes
Diffstat (limited to 'tools/run_tests')
-rwxr-xr-xtools/run_tests/jobset.py15
1 files changed, 15 insertions, 0 deletions
diff --git a/tools/run_tests/jobset.py b/tools/run_tests/jobset.py
index e40235341c..b8178ffebf 100755
--- a/tools/run_tests/jobset.py
+++ b/tools/run_tests/jobset.py
@@ -43,10 +43,17 @@ import time
_DEFAULT_MAX_JOBS = 16 * multiprocessing.cpu_count()
+have_alarm = False
+def alarm_handler(unused_signum, unused_frame):
+ global have_alarm
+ have_alarm = False
+
+
# setup a signal handler so that signal.pause registers 'something'
# when a child finishes
# not using futures and threading to avoid a dependency on subprocess32
signal.signal(signal.SIGCHLD, lambda unused_signum, unused_frame: None)
+signal.signal(signal.SIGALRM, alarm_handler)
def shuffle_iteratable(it):
@@ -187,6 +194,9 @@ class Job(object):
do_newline=self._newline_on_success or self._travis)
if self._bin_hash:
update_cache.finished(self._spec.identity(), self._bin_hash)
+ elif self._state == _RUNNING and time.time() - self._start > 300:
+ message('TIMEOUT', self._spec.shortname, do_newline=self._travis)
+ self.kill()
return self._state
def kill(self):
@@ -240,6 +250,7 @@ class Jobset(object):
st = job.state(self._cache)
if st == _RUNNING: continue
if st == _FAILURE: self._failures += 1
+ if st == _KILLED: self._failures += 1
dead.add(job)
for job in dead:
self._completed += 1
@@ -248,6 +259,10 @@ class Jobset(object):
if (not self._travis):
message('WAITING', '%d jobs running, %d complete, %d failed' % (
len(self._running), self._completed, self._failures))
+ global have_alarm
+ if not have_alarm:
+ have_alarm = True
+ signal.alarm(10)
signal.pause()
def cancelled(self):