diff options
Diffstat (limited to 'tools/run_tests/python_utils')
-rwxr-xr-x | tools/run_tests/python_utils/jobset.py | 19 | ||||
-rw-r--r-- | tools/run_tests/python_utils/upload_test_results.py | 15 |
2 files changed, 25 insertions, 9 deletions
diff --git a/tools/run_tests/python_utils/jobset.py b/tools/run_tests/python_utils/jobset.py index 062c79a0de..d523095e70 100755 --- a/tools/run_tests/python_utils/jobset.py +++ b/tools/run_tests/python_utils/jobset.py @@ -306,8 +306,8 @@ class Job(object): else: self._state = _FAILURE if not self._suppress_failure_message: - message('FAILED', '%s [ret=%d, pid=%d]' % ( - self._spec.shortname, self._process.returncode, self._process.pid), + message('FAILED', '%s [ret=%d, pid=%d, time=%.1fsec]' % ( + self._spec.shortname, self._process.returncode, self._process.pid, elapsed), stdout(), do_newline=True) self.result.state = 'FAILED' self.result.num_failures += 1 @@ -326,7 +326,7 @@ class Job(object): self.result.cpu_estimated = float('%.01f' % self._spec.cpu_cost) measurement = '; cpu_cost=%.01f; estimated=%.01f' % (self.result.cpu_measured, self.result.cpu_estimated) if not self._quiet_success: - message('PASSED', '%s [time=%.1fsec; retries=%d:%d%s]' % ( + message('PASSED', '%s [time=%.1fsec, retries=%d:%d%s]' % ( self._spec.shortname, elapsed, self._retries, self._timeout_retries, measurement), stdout() if self._spec.verbose_success else None, do_newline=self._newline_on_success or self._travis) @@ -334,6 +334,8 @@ class Job(object): elif (self._state == _RUNNING and self._spec.timeout_seconds is not None and time.time() - self._start > self._spec.timeout_seconds): + elapsed = time.time() - self._start + self.result.elapsed_time = elapsed if self._timeout_retries < self._spec.timeout_retries: message('TIMEOUT_FLAKE', '%s [pid=%d]' % (self._spec.shortname, self._process.pid), stdout(), do_newline=True) self._timeout_retries += 1 @@ -344,7 +346,7 @@ class Job(object): self._process.terminate() self.start() else: - message('TIMEOUT', '%s [pid=%d]' % (self._spec.shortname, self._process.pid), stdout(), do_newline=True) + message('TIMEOUT', '%s [pid=%d, time=%.1fsec]' % (self._spec.shortname, self._process.pid, elapsed), stdout(), do_newline=True) self.kill() self.result.state = 'TIMEOUT' self.result.num_failures += 1 @@ -364,7 +366,7 @@ class Job(object): class Jobset(object): """Manages one run of jobs.""" - def __init__(self, check_cancelled, maxjobs, newline_on_success, travis, + def __init__(self, check_cancelled, maxjobs, maxjobs_cpu_agnostic, newline_on_success, travis, stop_on_failure, add_env, quiet_success, max_time): self._running = set() self._check_cancelled = check_cancelled @@ -372,6 +374,7 @@ class Jobset(object): self._failures = 0 self._completed = 0 self._maxjobs = maxjobs + self._maxjobs_cpu_agnostic = maxjobs_cpu_agnostic self._newline_on_success = newline_on_success self._travis = travis self._stop_on_failure = stop_on_failure @@ -406,7 +409,9 @@ class Jobset(object): if self.cancelled(): return False current_cpu_cost = self.cpu_cost() if current_cpu_cost == 0: break - if current_cpu_cost + spec.cpu_cost <= self._maxjobs: break + if current_cpu_cost + spec.cpu_cost <= self._maxjobs: + if len(self._running) < self._maxjobs_cpu_agnostic: + break self.reap() if self.cancelled(): return False job = Job(spec, @@ -491,6 +496,7 @@ def tag_remaining(xs): def run(cmdlines, check_cancelled=_never_cancelled, maxjobs=None, + maxjobs_cpu_agnostic=None, newline_on_success=False, travis=False, infinite_runs=False, @@ -509,6 +515,7 @@ def run(cmdlines, return 0, resultset js = Jobset(check_cancelled, maxjobs if maxjobs is not None else _DEFAULT_MAX_JOBS, + maxjobs_cpu_agnostic if maxjobs_cpu_agnostic is not None else _DEFAULT_MAX_JOBS, newline_on_success, travis, stop_on_failure, add_env, quiet_success, max_time) for cmdline, remaining in tag_remaining(cmdlines): diff --git a/tools/run_tests/python_utils/upload_test_results.py b/tools/run_tests/python_utils/upload_test_results.py index 580e7f7d81..15e827769e 100644 --- a/tools/run_tests/python_utils/upload_test_results.py +++ b/tools/run_tests/python_utils/upload_test_results.py @@ -102,6 +102,15 @@ def upload_results_to_bq(resultset, bq_table, args, platform): test_results['timestamp'] = time.strftime('%Y-%m-%d %H:%M:%S') row = big_query_utils.make_row(str(uuid.uuid4()), test_results) - if not big_query_utils.insert_rows(bq, _PROJECT_ID, _DATASET_ID, bq_table, [row]): - print('Error uploading result to bigquery.') - sys.exit(1) + + # TODO(jtattermusch): rows are inserted one by one, very inefficient + max_retries = 3 + for attempt in range(max_retries): + if big_query_utils.insert_rows(bq, _PROJECT_ID, _DATASET_ID, bq_table, [row]): + break + else: + if attempt < max_retries - 1: + print('Error uploading result to bigquery, will retry.') + else: + print('Error uploading result to bigquery, all attempts failed.') + sys.exit(1) |