diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2017-02-28 12:35:30 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-02-28 12:49:14 -0800 |
commit | cc8f7a774726bff6ed44526a1a604b6efabcd850 (patch) | |
tree | 41b5c52bb44c97eb5f5eb51963ac624c8089f701 /tensorflow/tools/dist_test | |
parent | d08b72a3e226f87ded59e3f6d2d820c49e8c78f1 (diff) |
Remove kubectl_util and benchmark_util from tensorflow repo since they now live
under benchmarks repo (https://github.com/tensorflow/benchmarks).
Change: 148802196
Diffstat (limited to 'tensorflow/tools/dist_test')
-rw-r--r-- | tensorflow/tools/dist_test/python/BUILD | 23 | ||||
-rw-r--r-- | tensorflow/tools/dist_test/python/benchmark_util.py | 77 | ||||
-rw-r--r-- | tensorflow/tools/dist_test/python/benchmark_util_test.py | 58 | ||||
-rw-r--r-- | tensorflow/tools/dist_test/scripts/BUILD | 16 | ||||
-rw-r--r-- | tensorflow/tools/dist_test/scripts/kubectl_util.py | 196 | ||||
-rw-r--r-- | tensorflow/tools/dist_test/scripts/kubectl_util_test.py | 72 |
6 files changed, 0 insertions, 442 deletions
diff --git a/tensorflow/tools/dist_test/python/BUILD b/tensorflow/tools/dist_test/python/BUILD deleted file mode 100644 index a2927c5bfc..0000000000 --- a/tensorflow/tools/dist_test/python/BUILD +++ /dev/null @@ -1,23 +0,0 @@ -# Python tools for running distributed benchmarks. - -licenses(["notice"]) # Apache 2.0 - -py_library( - name = "benchmark_util_lib", - srcs = ["benchmark_util.py"], - srcs_version = "PY2AND3", - deps = [ - "//tensorflow/core:protos_all_py", - "//tensorflow/python:platform", - ], -) - -py_test( - name = "benchmark_util_test", - srcs = ["benchmark_util_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":benchmark_util_lib", - "//tensorflow/python:client_testlib", - ], -) diff --git a/tensorflow/tools/dist_test/python/benchmark_util.py b/tensorflow/tools/dist_test/python/benchmark_util.py deleted file mode 100644 index 5727908d6f..0000000000 --- a/tensorflow/tools/dist_test/python/benchmark_util.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Provides helper functions for distributed benchmarks running on Jenkins.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import calendar -from collections import namedtuple -import os - -from google.protobuf import json_format - -from tensorflow.core.util import test_log_pb2 -from tensorflow.python.platform import gfile - - -_OUTPUT_FILE_ENV_VAR = 'TF_DIST_BENCHMARK_RESULTS_FILE' -_TEST_NAME_ENV_VAR = 'TF_DIST_BENCHMARK_NAME' - - -# Represents a single timing entry where -# - name is a string -# - timing is the latency to track (for e.g. mean time per iter) -# - iters is the number of iterations -TimingEntry = namedtuple( - 'TimingEntry', ['name', 'timing', 'iters']) - - -def store_data_in_json(timing_entries, start_time, output_file=None): - """Stores benchmark results in JSON format. - - Args: - timing_entries: list of TimingEntry objects. - start_time: (datetime) start time of the test run. - output_file: if specified, writes benchmark results to output_file. - If not specified, writes results to the file specified by - BENCHMARK_RESULTS_FILE environment variable. - - Raises: - ValueError: when neither output_file is passed in nor - BENCHMARK_RESULTS_FILE is set. - """ - test_result = test_log_pb2.TestResults( - start_time=calendar.timegm(start_time.timetuple())) - if not output_file: - if _OUTPUT_FILE_ENV_VAR not in os.environ: - raise ValueError('Could not determine location to store results at.') - output_file = os.environ[_OUTPUT_FILE_ENV_VAR] - - with gfile.Open(output_file, 'wb') as jsonfile: - if _TEST_NAME_ENV_VAR in os.environ: - test_result.name = os.environ['POD_NAME_PREFIX'] - else: - test_result.name = 'TestBenchmark' - - for timing_entry in timing_entries: - test_result.entries.entry.add( - name=timing_entry.name, - iters=timing_entry.iters, - wall_time=timing_entry.timing - ) - json_test_results = json_format.MessageToJson(test_result) - jsonfile.write(json_test_results) diff --git a/tensorflow/tools/dist_test/python/benchmark_util_test.py b/tensorflow/tools/dist_test/python/benchmark_util_test.py deleted file mode 100644 index e8a71f3153..0000000000 --- a/tensorflow/tools/dist_test/python/benchmark_util_test.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for tensorflow.tools.dist_test.python.benchmark_util.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import datetime -import json -import os - -from tensorflow.python.platform import googletest -from tensorflow.python.platform import test -from tensorflow.tools.dist_test.python import benchmark_util - - -class BenchmarkUtilTest(googletest.TestCase): - - def testStoreDataWithNoEntries(self): - output_file = os.path.join(test.get_temp_dir(), 'test_output1.json') - timing_entries = [] - benchmark_util.store_data_in_json( - timing_entries, datetime.date(2017, 1, 1), output_file) - json_output = json.loads(open(output_file, 'r').read()) - self.assertEquals('TestBenchmark', json_output['name']) - self.assertEquals(u'1483228800', json_output['startTime']) - - def testStoreDataWithEntries(self): - output_file = os.path.join(test.get_temp_dir(), 'test_output2.json') - timing_entries = [ - benchmark_util.TimingEntry('test', 0.1, 1)] - benchmark_util.store_data_in_json( - timing_entries, datetime.date(2017, 1, 1), output_file) - json_output = json.loads(open(output_file, 'r').read()) - - self.assertEquals(1, len(json_output['entries']['entry'])) - self.assertEquals('test', json_output['entries']['entry'][0]['name']) - self.assertEquals(0.1, json_output['entries']['entry'][0]['wallTime']) - self.assertEquals(u'1', json_output['entries']['entry'][0]['iters']) - self.assertEquals(u'1483228800', json_output['startTime']) - self.assertEquals('TestBenchmark', json_output['name']) - - -if __name__ == '__main__': - googletest.main() diff --git a/tensorflow/tools/dist_test/scripts/BUILD b/tensorflow/tools/dist_test/scripts/BUILD index 9041a19af7..abcd521a26 100644 --- a/tensorflow/tools/dist_test/scripts/BUILD +++ b/tensorflow/tools/dist_test/scripts/BUILD @@ -19,19 +19,3 @@ py_test( "//tensorflow/python:client_testlib", ], ) - -py_library( - name = "kubectl_util_lib", - srcs = ["kubectl_util.py"], - srcs_version = "PY2AND3", -) - -py_test( - name = "kubectl_util_test", - srcs = ["kubectl_util_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":kubectl_util_lib", - "//tensorflow/python:client_testlib", - ], -) diff --git a/tensorflow/tools/dist_test/scripts/kubectl_util.py b/tensorflow/tools/dist_test/scripts/kubectl_util.py deleted file mode 100644 index ae0c5601e9..0000000000 --- a/tensorflow/tools/dist_test/scripts/kubectl_util.py +++ /dev/null @@ -1,196 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utils for running, waiting and stopping benchmark jobs on kubernetes. - -Functions in this file assume kubernetes jobs have 'name-prefix' and 'job' -selectors set. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import logging -import subprocess -import time - - -_KUBECTL = 'kubectl' -WAIT_PERIOD_SECONDS = 20 - - -class TimeoutError(Exception): - pass - - -def _WaitUntil(timeout, predicate, *args): - start_time = time.time() - while time.time() - start_time < timeout: - time.sleep(WAIT_PERIOD_SECONDS) - if predicate(*args): - return True - return False - - -def _GetPodNames(pod_name_prefix, job_name=None): - """Get pod names based on the pod_name_prefix and job_name. - - Args: - pod_name_prefix: value of 'name-prefix' selector. - job_name: value of 'job' selector. If None, pod names will be - selected only based on 'name-prefix' selector. - - Returns: - List of pod names. - """ - pod_list_command = [ - _KUBECTL, 'get', 'pods', '-o', 'name', - '-l', _GetJobSelector(pod_name_prefix, job_name)] - logging.info('Command to get pod names: %s', ' '.join(pod_list_command)) - output = subprocess.check_output(pod_list_command, universal_newlines=True) - pod_names = [name for name in output.strip().split('\n') if name] - logging.info('Pod names: "%s"', ','.join(pod_names)) - return pod_names - - -def CreatePods(pod_name, yaml_file): - """Creates pods based on the given kubernetes config. - - Args: - pod_name: 'name-prefix' selector for the pods. - yaml_file: kubernetes yaml config. - - Raises: - TimeoutError: if jobs didn't come up for a long time. - """ - command = [_KUBECTL, 'create', '--filename=%s' % yaml_file] - logging.info('Creating pods: %s', subprocess.list2cmdline(command)) - subprocess.check_call(command) - - if not _WaitUntil(100, _GetPodNames, pod_name): - raise TimeoutError( - 'Timed out waiting for %s pod to come up.' % pod_name) - - -def DeletePods(pod_name, yaml_file): - """Deletes pods based on the given kubernetes config. - - Args: - pod_name: 'name-prefix' selector for the pods. - yaml_file: kubernetes yaml config. - - Raises: - TimeoutError: if jobs didn't terminate for a long time. - """ - command = [_KUBECTL, 'delete', '--filename=%s' % yaml_file] - logging.info('Deleting pods: %s', ' '.join(command)) - subprocess.call(command) - - def CheckPodsAreTerminated(): - return not _GetPodNames(pod_name) - if not _WaitUntil(100, CheckPodsAreTerminated): - raise TimeoutError( - 'Timed out waiting for %s pod to terminate.' % pod_name) - - -def _GetJobSelector(pod_name_prefix, job_name=None): - selector = 'name-prefix in (%s)' % pod_name_prefix - if job_name: - selector += ',job in (%s)' % job_name - return selector - - -def WaitForCompletion(pod_name_prefix, job_name='worker', timeout=2*60*60): - """Waits until jobs matching pod_name and job_name are terminated. - - Args: - pod_name_prefix: value of 'name-prefix' selector. - job_name: value of 'job' selector. - timeout: how long to wait for jobs to terminate before timing out. - - Returns: - True if jobs terminated with success, False otherwise. - - Raises: - TimeoutError: if jobs haven't terminated after timeout. - ValueError: if we couldn't find jobs matching pod_name and job_name. - """ - # Jsonpath that selects comma-separated exit codes (followed by extra comma - # at the end). - # If a job doesn't have an exit code yet, empty string will be returned - # instead. For ex. output for 2 jobs where one is missing an exit code - # and the other one has an exit code of 0 would look like: ,0, - last_state_query = ( - 'jsonpath=\'{range .items[*]}' - '{.status.containerStatuses[?(@.lastState.terminated)]' - '.lastState.terminated.exitCode},{end}\'') - status_command = [ - _KUBECTL, 'get', '-o', last_state_query, - 'pods', '-l', _GetJobSelector(pod_name_prefix, job_name) - ] - - exit_codes = [] - start_time = time.time() - while time.time() - start_time < timeout: - # Output of check_output is a string that starts and ends with '. - output = subprocess.check_output( - status_command, universal_newlines=True).strip('\'') - logging.debug('Pod status: %s', output) - if not output: - raise ValueError( - 'Query did not match any data. Query: %s' % ' '.join(status_command)) - # Output will end with an extra comma. So, we remove it before splitting. - exit_codes = output[:-1].split(',') - if '' not in exit_codes: # fetched all exit codes - break - time.sleep(WAIT_PERIOD_SECONDS) - - if '' in exit_codes: - raise TimeoutError( - 'Timed out waiting for %s %s jobs to finish.' % - (pod_name_prefix, job_name)) - _PrintLogs(pod_name_prefix, job_name) - - failed_job_count = sum(code != '0' for code in exit_codes) - if failed_job_count > 0: - logging.error('%d out of %d jobs failed. Exit codes: %s', - failed_job_count, len(exit_codes), ','.join(exit_codes)) - return False - return True - - -def _PrintLogs(pod_name_prefix, job_name): - """Prints pod logs. - - If a pod has been restarted, prints logs from previous run. Otherwise, - prints the logs from current run. We print logs for pods selected - based on pod_name_prefix and job_name. - - Args: - pod_name_prefix: value of 'name-prefix' selector. - job_name: value of 'job' selector. - """ - for pod_name in _GetPodNames(pod_name_prefix, job_name): - try: - # Get previous logs. - logs_command = [_KUBECTL, 'logs', '-p', pod_name] - logging.info('Command to get logs: %s', ' '.join(logs_command)) - output = subprocess.check_output(logs_command, universal_newlines=True) - except subprocess.CalledProcessError: - # We couldn't get previous logs, so we will try to get current logs. - logs_command = [_KUBECTL, 'logs', pod_name] - logging.info('Command to get logs: %s', ' '.join(logs_command)) - output = subprocess.check_output(logs_command, universal_newlines=True) - print('%s logs:' % pod_name) - print(output) diff --git a/tensorflow/tools/dist_test/scripts/kubectl_util_test.py b/tensorflow/tools/dist_test/scripts/kubectl_util_test.py deleted file mode 100644 index e2ddf79202..0000000000 --- a/tensorflow/tools/dist_test/scripts/kubectl_util_test.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for tensorflow.tools.dist_test.scripts.kubectl_util.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import subprocess - -from tensorflow.python.platform import googletest -from tensorflow.python.platform import test -from tensorflow.tools.dist_test.scripts import kubectl_util - - -kubectl_util.WAIT_PERIOD_SECONDS = 1 - - -class KubectlUtilTest(googletest.TestCase): - - @test.mock.patch.object(subprocess, 'check_output') - @test.mock.patch.object(subprocess, 'check_call') - def testCreatePods(self, mock_check_call, mock_check_output): - mock_check_output.return_value = 'nonempty' - kubectl_util.CreatePods('test_pod', 'test.yaml') - mock_check_call.assert_called_once_with( - ['kubectl', 'create', '--filename=test.yaml']) - mock_check_output.assert_called_once_with( - ['kubectl', 'get', 'pods', '-o', 'name', '-l', - 'name-prefix in (test_pod)'], universal_newlines=True) - - @test.mock.patch.object(subprocess, 'check_output') - @test.mock.patch.object(subprocess, 'call') - def testDeletePods(self, mock_check_call, mock_check_output): - mock_check_output.return_value = '' - kubectl_util.DeletePods('test_pod', 'test.yaml') - mock_check_call.assert_called_once_with( - ['kubectl', 'delete', '--filename=test.yaml']) - mock_check_output.assert_called_once_with( - ['kubectl', 'get', 'pods', '-o', 'name', '-l', - 'name-prefix in (test_pod)'], universal_newlines=True) - - @test.mock.patch.object(subprocess, 'check_output') - def testWaitForCompletion(self, mock_check_output): - # Test success - mock_check_output.return_value = '\'0,0,\'' - self.assertTrue(kubectl_util.WaitForCompletion('test_pod')) - - # Test failure - mock_check_output.return_value = '\'0,1,\'' - self.assertFalse(kubectl_util.WaitForCompletion('test_pod')) - - # Test timeout - with self.assertRaises(kubectl_util.TimeoutError): - mock_check_output.return_value = '\'0,,\'' - kubectl_util.WaitForCompletion('test_pod', timeout=5) - - -if __name__ == '__main__': - googletest.main() |