diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2016-10-10 10:26:22 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2016-10-10 11:35:00 -0700 |
commit | edaf3b342db4afa1c872da541fb0ac176a4e8ef9 (patch) | |
tree | b976073fdc2a6404cbdc3ee323a637e2e1b16846 /tensorflow/tools/dist_test | |
parent | d1518c26530daaee854e73365bd7dfb9a2f69dbd (diff) |
Merge changes from github.
Change: 135698415
Diffstat (limited to 'tensorflow/tools/dist_test')
-rw-r--r-- | tensorflow/tools/dist_test/Dockerfile | 35 | ||||
-rw-r--r-- | tensorflow/tools/dist_test/Dockerfile.local | 13 | ||||
-rw-r--r-- | tensorflow/tools/dist_test/README.md | 4 | ||||
-rwxr-xr-x | tensorflow/tools/dist_test/local_test.sh | 48 | ||||
-rwxr-xr-x | tensorflow/tools/dist_test/remote_test.sh | 37 | ||||
-rw-r--r-- | tensorflow/tools/dist_test/server/Dockerfile | 2 | ||||
-rw-r--r-- | tensorflow/tools/dist_test/server/Dockerfile.test | 2 |
7 files changed, 107 insertions, 34 deletions
diff --git a/tensorflow/tools/dist_test/Dockerfile b/tensorflow/tools/dist_test/Dockerfile index 3fc50de9d5..9888cfd14f 100644 --- a/tensorflow/tools/dist_test/Dockerfile +++ b/tensorflow/tools/dist_test/Dockerfile @@ -1,13 +1,35 @@ -FROM ubuntu:14.04 +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# +# Docker image for testing distributed (GRPC) TensorFlow on Google Container +# Engine (GKE). +# +# See ./remote_test.sh for usage example. + +FROM ubuntu:16.04 MAINTAINER Shanqing Cai <cais@google.com> RUN apt-get update RUN apt-get install -y --no-install-recommends \ - curl \ python \ python-numpy \ - python-pip + python-pip \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* # Install Google Cloud SDK RUN curl -O https://dl.google.com/dl/cloudsdk/channels/rapid/install_google_cloud_sdk.bash @@ -17,10 +39,11 @@ RUN ./install_google_cloud_sdk.bash --disable-prompts --install-dir=/var/gcloud # Install kubectl RUN /var/gcloud/google-cloud-sdk/bin/gcloud components install kubectl -# Install nightly TensorFlow pip +# Install TensorFlow pip whl # TODO(cais): Should we build it locally instead? -RUN pip install \ - https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0-cp27-none-linux_x86_64.whl +COPY tensorflow-*.whl / +RUN pip install /tensorflow-*.whl +RUN rm -f /tensorflow-*.whl # Copy test files COPY scripts /var/tf-dist-test/scripts diff --git a/tensorflow/tools/dist_test/Dockerfile.local b/tensorflow/tools/dist_test/Dockerfile.local index eb615be1fe..0cfb8d529e 100644 --- a/tensorflow/tools/dist_test/Dockerfile.local +++ b/tensorflow/tools/dist_test/Dockerfile.local @@ -23,19 +23,16 @@ MAINTAINER Shanqing Cai <cais@google.com> # Pick up some TF dependencies. RUN apt-get update && apt-get install -y \ - curl \ python-numpy \ python-pip \ && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -RUN curl -O https://bootstrap.pypa.io/get-pip.py && \ - python get-pip.py && \ - rm get-pip.py - -# Install TensorFlow CPU version from nightly build. -RUN pip --no-cache-dir install \ - https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0-cp27-none-linux_x86_64.whl +# Install TensorFlow pip whl +# TODO(cais): Should we build it locally instead? +COPY tensorflow-*.whl / +RUN pip install /tensorflow-*.whl +RUN rm -f /tensorflow-*.whl ADD . /var/tf_dist_test diff --git a/tensorflow/tools/dist_test/README.md b/tensorflow/tools/dist_test/README.md index 91f64dd9c3..39c040e051 100644 --- a/tensorflow/tools/dist_test/README.md +++ b/tensorflow/tools/dist_test/README.md @@ -112,5 +112,5 @@ servers. For example: kubectl create -f tf-k8s-with-lb.yaml -See [Kubernetes kubectl documentation] -(http://kubernetes.io/docs/user-guide/kubectl-overview/) for more details. +See [Kubernetes kubectl documentation](http://kubernetes.io/docs/user-guide/kubectl-overview/) +for more details. diff --git a/tensorflow/tools/dist_test/local_test.sh b/tensorflow/tools/dist_test/local_test.sh index 727258c6d8..e46e60dd81 100755 --- a/tensorflow/tools/dist_test/local_test.sh +++ b/tensorflow/tools/dist_test/local_test.sh @@ -24,16 +24,20 @@ # 3) Call a script to launch a k8s TensorFlow GRPC cluster inside the container # and run the distributed test suite. # -# Usage: local_test.sh [--leave_container_running] +# Usage: local_test.sh <whl_url> +# [--leave_container_running] # [--model_name <MODEL_NAME>] # [--num_workers <NUM_WORKERS>] # [--num_parameter_servers <NUM_PARAMETER_SERVERS>] # [--sync_replicas] # -# E.g., local_test.sh --model_name CENSUS_WIDENDEEP -# local_test.sh --num_workers 3 --num_parameter_servers 3 +# E.g., local_test.sh <whl_url> --model_name CENSUS_WIDENDEEP +# local_test.sh <whl_url> --num_workers 3 --num_parameter_servers 3 # # Arguments: +# <whl_url> +# Specify custom TensorFlow whl file URL to install in the test Docker image. +# # --leave_container_running: Do not stop the docker-in-docker container after # the termination of the tests, e.g., for debugging # @@ -48,6 +52,7 @@ # (workers) will be aggregated before applied, which avoids stale parameter # updates. # +# # In addition, this script obeys the following environment variables: # TF_DIST_DOCKER_NO_CACHE: do not use cache when building docker images @@ -72,6 +77,11 @@ NUM_WORKERS=2 NUM_PARAMETER_SERVERS=2 SYNC_REPLICAS_FLAG="" +WHL_URL=${1} +if [[ -z "${WHL_URL}" ]]; then + die "whl file URL is not specified" +fi + while true; do if [[ $1 == "--leave_container_running" ]]; then LEAVE_CONTAINER_RUNNING=1 @@ -84,6 +94,8 @@ while true; do NUM_PARAMETER_SERVERS=$2 elif [[ $1 == "--sync_replicas" ]]; then SYNC_REPLICAS_FLAG="--sync_replicas" + elif [[ $1 == "--whl_url" ]]; then + WHL_URL=$2 fi shift @@ -104,25 +116,35 @@ DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # Get utility functions source ${DIR}/scripts/utils.sh - -# First, make sure that no docker-in-docker container of the same image -# is already running -if [[ ! -z $(get_container_id_by_image_name ${DOCKER_IMG_NAME}) ]]; then - die "It appears that there is already at least one Docker container "\ -"of image name ${DOCKER_IMG_NAME} running. Please stop it before trying again" -fi - -# Build docker-in-docker image for local k8s cluster +# Build docker-in-docker image for local k8s cluster. NO_CACHE_FLAG="" if [[ ! -z "${TF_DIST_DOCKER_NO_CACHE}" ]] && [[ "${TF_DIST_DOCKER_NO_CACHE}" != "0" ]]; then NO_CACHE_FLAG="--no-cache" fi +# Create docker build context directory. +BUILD_DIR=$(mktemp -d) +echo "" +echo "Using whl file URL: ${WHL_URL}" +echo "Building in temporary directory: ${BUILD_DIR}" + +cp -r ${DIR}/* "${BUILD_DIR}"/ || \ + die "Failed to copy files to ${BUILD_DIR}" + +# Download whl file into the build context directory. +wget -P "${BUILD_DIR}" ${WHL_URL} || \ + die "Failed to download tensorflow whl file from URL: ${WHL_URL}" + +# Build docker image for test. docker build ${NO_CACHE_FLAG} -t ${DOCKER_IMG_NAME} \ - -f ${DIR}/Dockerfile.local ${DIR} || \ + -f "${BUILD_DIR}/Dockerfile.local" "${BUILD_DIR}" || \ die "Failed to build docker image: ${DOCKER_IMG_NAME}" +# Clean up docker build context directory. +rm -rf "${BUILD_DIR}" + +# Run docker image for test. docker run ${DOCKER_IMG_NAME} \ /var/tf_dist_test/scripts/dist_mnist_test.sh \ --ps_hosts "localhost:2000,localhost:2001" \ diff --git a/tensorflow/tools/dist_test/remote_test.sh b/tensorflow/tools/dist_test/remote_test.sh index 1d4a52c4c2..b1e6b1e71e 100755 --- a/tensorflow/tools/dist_test/remote_test.sh +++ b/tensorflow/tools/dist_test/remote_test.sh @@ -20,13 +20,17 @@ # runs from within a container based on the image. # # Usage: -# remote_test.sh [--setup_cluster_only] +# remote_test.sh <whl_url> +# [--setup_cluster_only] # [--num_workers <NUM_WORKERS>] # [--num_parameter_servers <NUM_PARAMETER_SERVERS>] # [--sync_replicas] # # Arguments: -# --setup_cluster_only: +# <whl_url> +# Specify custom TensorFlow whl file URL to install in the test Docker image. +# +# --setup_cluster_only: # Setup the TensorFlow k8s cluster only, and do not perform testing of # the distributed runtime. # @@ -42,6 +46,7 @@ # updates. # # +# # If any of the following environment variable has non-empty values, it will # be mapped into the docker container to override the default values (see # dist_test.sh) @@ -95,8 +100,34 @@ if [[ ! -z "${TF_DIST_DOCKER_NO_CACHE}" ]] && NO_CACHE_FLAG="--no-cache" fi +# Parse command-line arguments. +WHL_URL=${1} +if [[ -z "${WHL_URL}" ]]; then + die "whl URL is not specified" +fi + +# Create docker build context directory. +BUILD_DIR=$(mktemp -d) +echo "" +echo "Using custom whl file URL: ${WHL_URL}" +echo "Building in temporary directory: ${BUILD_DIR}" + +cp -r ${DIR}/* ${BUILD_DIR}/ || \ + die "Failed to copy files to ${BUILD_DIR}" + +# Download whl file into the build context directory. +wget -P "${BUILD_DIR}" ${WHL_URL} || \ + die "Failed to download tensorflow whl file from URL: ${WHL_URL}" + +# Build docker image for test. docker build ${NO_CACHE_FLAG} \ - -t ${DOCKER_IMG_NAME} -f "${DIR}/Dockerfile" "${DIR}" + -t ${DOCKER_IMG_NAME} -f "${BUILD_DIR}/Dockerfile" "${BUILD_DIR}" || \ + die "Failed to build docker image: ${DOCKER_IMG_NAME}" + +# Clean up docker build context directory. +rm -rf "${BUILD_DIR}" + +# Run docker image for test. KEY_FILE=${TF_DIST_GCLOUD_KEY_FILE:-"${HOME}/gcloud-secrets/tensorflow-testing.json"} docker run --rm -v ${KEY_FILE}:/var/gcloud/secrets/tensorflow-testing.json \ diff --git a/tensorflow/tools/dist_test/server/Dockerfile b/tensorflow/tools/dist_test/server/Dockerfile index d1d11e0524..9cc61a826b 100644 --- a/tensorflow/tools/dist_test/server/Dockerfile +++ b/tensorflow/tools/dist_test/server/Dockerfile @@ -36,7 +36,7 @@ RUN curl -O https://bootstrap.pypa.io/get-pip.py && \ # Install TensorFlow CPU version from nightly build RUN pip --no-cache-dir install \ - https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0-cp27-none-linux_x86_64.whl + https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl # Copy files, including the GRPC server binary at # server/grpc_tensorflow_server.py diff --git a/tensorflow/tools/dist_test/server/Dockerfile.test b/tensorflow/tools/dist_test/server/Dockerfile.test index 298d1854a7..5bafa29468 100644 --- a/tensorflow/tools/dist_test/server/Dockerfile.test +++ b/tensorflow/tools/dist_test/server/Dockerfile.test @@ -42,7 +42,7 @@ RUN pip install --upgrade pandas==0.18.1 # Install TensorFlow CPU version. RUN pip --no-cache-dir install \ - https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0-cp27-none-linux_x86_64.whl + https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl # Copy files, including the GRPC server binary at # server/grpc_tensorflow_server.py |