diff options
author | Jacques Pienaar <jpienaar@google.com> | 2018-03-21 12:07:51 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-03-21 12:10:30 -0700 |
commit | 2d0531d72c7dcbb0e149cafdd3a16ee8c3ff357a (patch) | |
tree | 1179ecdd684d10c6549f85aa95f33dd79463a093 /tensorflow/tools/dist_test | |
parent | cbede3ea7574b36f429710bc08617d08455bcc21 (diff) |
Merge changes from github.
PiperOrigin-RevId: 189945839
Diffstat (limited to 'tensorflow/tools/dist_test')
-rw-r--r-- | tensorflow/tools/dist_test/README.md | 8 | ||||
-rwxr-xr-x | tensorflow/tools/dist_test/local_test.sh | 22 | ||||
-rw-r--r-- | tensorflow/tools/dist_test/python/mnist_replica.py | 2 |
3 files changed, 17 insertions, 15 deletions
diff --git a/tensorflow/tools/dist_test/README.md b/tensorflow/tools/dist_test/README.md index c1b1f79bbd..228d5ee35d 100644 --- a/tensorflow/tools/dist_test/README.md +++ b/tensorflow/tools/dist_test/README.md @@ -17,6 +17,14 @@ cesnsu model: ./local_test.sh --model_name CENSUS_WIDENDEEP +You can test specify version of TensorFlow: + +```shell +./local_test.sh ${whl_file_url} +``` + +For example, you can find these TensorFlow python package URLs from [here](https://www.tensorflow.org/install/install_linux#the_url_of_the_tensorflow_python_package) for Ubuntu. + **2) Launch a remote k8s cluster on Google Kubernetes Engine (GKE) and run the test suite on it** diff --git a/tensorflow/tools/dist_test/local_test.sh b/tensorflow/tools/dist_test/local_test.sh index 435f9d0dc9..caae7fd530 100755 --- a/tensorflow/tools/dist_test/local_test.sh +++ b/tensorflow/tools/dist_test/local_test.sh @@ -16,12 +16,11 @@ # # Tests distributed TensorFlow on a locally running TF GRPC cluster. # -# This script peforms the following steps: -# 1) Build the docker-in-docker (dind) image capable of running docker and -# Kubernetes (k8s) cluster inside. +# This script performs the following steps: +# 1) Build the docker image capable of running distributed TensorFlow in docker. # 2) Run a container from the aforementioned image and start docker service # in it -# 3) Call a script to launch a k8s TensorFlow GRPC cluster inside the container +# 3) Call a script to launch a distributed TensorFlow GRPC cluster inside the container # and run the distributed test suite. # # Usage: local_test.sh <whl_file_location> @@ -64,15 +63,9 @@ die() { # Configurations DOCKER_IMG_NAME="tensorflow/tf-dist-test-local-cluster" -LOCAL_K8S_CACHE=${HOME}/kubernetes -# Helper function -get_container_id_by_image_name() { - # Get the id of a container by image name - # Usage: get_docker_container_id_by_image_name <img_name> - - docker ps | grep $1 | awk '{print $1}' -} +# Use TensorFlow v1.5.0 for Python 2.7 and CPU only as we set num_gpus to 0 in the below +DEFAULT_WHL_FILE_LOCATION="https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp27-none-linux_x86_64.whl" # Parse input arguments LEAVE_CONTAINER_RUNNING=0 @@ -84,7 +77,8 @@ SYNC_REPLICAS_FLAG="" WHL_FILE_LOCATION=${1} if [[ -z "${WHL_FILE_LOCATION}" ]]; then - die "whl file location is not specified" + WHL_FILE_LOCATION=${DEFAULT_WHL_FILE_LOCATION} + echo "use default whl file location" fi while true; do @@ -121,7 +115,7 @@ DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # Get utility functions source ${DIR}/scripts/utils.sh -# Build docker-in-docker image for local k8s cluster. +# Build docker image for local distributed TensorFlow cluster. NO_CACHE_FLAG="" if [[ ! -z "${TF_DIST_DOCKER_NO_CACHE}" ]] && [[ "${TF_DIST_DOCKER_NO_CACHE}" != "0" ]]; then diff --git a/tensorflow/tools/dist_test/python/mnist_replica.py b/tensorflow/tools/dist_test/python/mnist_replica.py index a2d12442c4..d6e7f317dd 100644 --- a/tensorflow/tools/dist_test/python/mnist_replica.py +++ b/tensorflow/tools/dist_test/python/mnist_replica.py @@ -56,7 +56,7 @@ flags.DEFINE_integer("task_index", None, flags.DEFINE_integer("num_gpus", 1, "Total number of gpus for each machine." "If you don't use GPU, please set it to '0'") flags.DEFINE_integer("replicas_to_aggregate", None, - "Number of replicas to aggregate before parameter update" + "Number of replicas to aggregate before parameter update " "is applied (For sync_replicas mode only; default: " "num_workers)") flags.DEFINE_integer("hidden_units", 100, |