aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/tools/dist_test
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2016-10-10 10:26:22 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-10-10 11:35:00 -0700
commitedaf3b342db4afa1c872da541fb0ac176a4e8ef9 (patch)
treeb976073fdc2a6404cbdc3ee323a637e2e1b16846 /tensorflow/tools/dist_test
parentd1518c26530daaee854e73365bd7dfb9a2f69dbd (diff)
Merge changes from github.
Change: 135698415
Diffstat (limited to 'tensorflow/tools/dist_test')
-rw-r--r--tensorflow/tools/dist_test/Dockerfile35
-rw-r--r--tensorflow/tools/dist_test/Dockerfile.local13
-rw-r--r--tensorflow/tools/dist_test/README.md4
-rwxr-xr-xtensorflow/tools/dist_test/local_test.sh48
-rwxr-xr-xtensorflow/tools/dist_test/remote_test.sh37
-rw-r--r--tensorflow/tools/dist_test/server/Dockerfile2
-rw-r--r--tensorflow/tools/dist_test/server/Dockerfile.test2
7 files changed, 107 insertions, 34 deletions
diff --git a/tensorflow/tools/dist_test/Dockerfile b/tensorflow/tools/dist_test/Dockerfile
index 3fc50de9d5..9888cfd14f 100644
--- a/tensorflow/tools/dist_test/Dockerfile
+++ b/tensorflow/tools/dist_test/Dockerfile
@@ -1,13 +1,35 @@
-FROM ubuntu:14.04
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Docker image for testing distributed (GRPC) TensorFlow on Google Container
+# Engine (GKE).
+#
+# See ./remote_test.sh for usage example.
+
+FROM ubuntu:16.04
MAINTAINER Shanqing Cai <cais@google.com>
RUN apt-get update
RUN apt-get install -y --no-install-recommends \
- curl \
python \
python-numpy \
- python-pip
+ python-pip \
+ && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists/*
# Install Google Cloud SDK
RUN curl -O https://dl.google.com/dl/cloudsdk/channels/rapid/install_google_cloud_sdk.bash
@@ -17,10 +39,11 @@ RUN ./install_google_cloud_sdk.bash --disable-prompts --install-dir=/var/gcloud
# Install kubectl
RUN /var/gcloud/google-cloud-sdk/bin/gcloud components install kubectl
-# Install nightly TensorFlow pip
+# Install TensorFlow pip whl
# TODO(cais): Should we build it locally instead?
-RUN pip install \
- https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0-cp27-none-linux_x86_64.whl
+COPY tensorflow-*.whl /
+RUN pip install /tensorflow-*.whl
+RUN rm -f /tensorflow-*.whl
# Copy test files
COPY scripts /var/tf-dist-test/scripts
diff --git a/tensorflow/tools/dist_test/Dockerfile.local b/tensorflow/tools/dist_test/Dockerfile.local
index eb615be1fe..0cfb8d529e 100644
--- a/tensorflow/tools/dist_test/Dockerfile.local
+++ b/tensorflow/tools/dist_test/Dockerfile.local
@@ -23,19 +23,16 @@ MAINTAINER Shanqing Cai <cais@google.com>
# Pick up some TF dependencies.
RUN apt-get update && apt-get install -y \
- curl \
python-numpy \
python-pip \
&& \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
-RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
- python get-pip.py && \
- rm get-pip.py
-
-# Install TensorFlow CPU version from nightly build.
-RUN pip --no-cache-dir install \
- https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0-cp27-none-linux_x86_64.whl
+# Install TensorFlow pip whl
+# TODO(cais): Should we build it locally instead?
+COPY tensorflow-*.whl /
+RUN pip install /tensorflow-*.whl
+RUN rm -f /tensorflow-*.whl
ADD . /var/tf_dist_test
diff --git a/tensorflow/tools/dist_test/README.md b/tensorflow/tools/dist_test/README.md
index 91f64dd9c3..39c040e051 100644
--- a/tensorflow/tools/dist_test/README.md
+++ b/tensorflow/tools/dist_test/README.md
@@ -112,5 +112,5 @@ servers. For example:
kubectl create -f tf-k8s-with-lb.yaml
-See [Kubernetes kubectl documentation]
-(http://kubernetes.io/docs/user-guide/kubectl-overview/) for more details.
+See [Kubernetes kubectl documentation](http://kubernetes.io/docs/user-guide/kubectl-overview/)
+for more details.
diff --git a/tensorflow/tools/dist_test/local_test.sh b/tensorflow/tools/dist_test/local_test.sh
index 727258c6d8..e46e60dd81 100755
--- a/tensorflow/tools/dist_test/local_test.sh
+++ b/tensorflow/tools/dist_test/local_test.sh
@@ -24,16 +24,20 @@
# 3) Call a script to launch a k8s TensorFlow GRPC cluster inside the container
# and run the distributed test suite.
#
-# Usage: local_test.sh [--leave_container_running]
+# Usage: local_test.sh <whl_url>
+# [--leave_container_running]
# [--model_name <MODEL_NAME>]
# [--num_workers <NUM_WORKERS>]
# [--num_parameter_servers <NUM_PARAMETER_SERVERS>]
# [--sync_replicas]
#
-# E.g., local_test.sh --model_name CENSUS_WIDENDEEP
-# local_test.sh --num_workers 3 --num_parameter_servers 3
+# E.g., local_test.sh <whl_url> --model_name CENSUS_WIDENDEEP
+# local_test.sh <whl_url> --num_workers 3 --num_parameter_servers 3
#
# Arguments:
+# <whl_url>
+# Specify custom TensorFlow whl file URL to install in the test Docker image.
+#
# --leave_container_running: Do not stop the docker-in-docker container after
# the termination of the tests, e.g., for debugging
#
@@ -48,6 +52,7 @@
# (workers) will be aggregated before applied, which avoids stale parameter
# updates.
#
+#
# In addition, this script obeys the following environment variables:
# TF_DIST_DOCKER_NO_CACHE: do not use cache when building docker images
@@ -72,6 +77,11 @@ NUM_WORKERS=2
NUM_PARAMETER_SERVERS=2
SYNC_REPLICAS_FLAG=""
+WHL_URL=${1}
+if [[ -z "${WHL_URL}" ]]; then
+ die "whl file URL is not specified"
+fi
+
while true; do
if [[ $1 == "--leave_container_running" ]]; then
LEAVE_CONTAINER_RUNNING=1
@@ -84,6 +94,8 @@ while true; do
NUM_PARAMETER_SERVERS=$2
elif [[ $1 == "--sync_replicas" ]]; then
SYNC_REPLICAS_FLAG="--sync_replicas"
+ elif [[ $1 == "--whl_url" ]]; then
+ WHL_URL=$2
fi
shift
@@ -104,25 +116,35 @@ DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# Get utility functions
source ${DIR}/scripts/utils.sh
-
-# First, make sure that no docker-in-docker container of the same image
-# is already running
-if [[ ! -z $(get_container_id_by_image_name ${DOCKER_IMG_NAME}) ]]; then
- die "It appears that there is already at least one Docker container "\
-"of image name ${DOCKER_IMG_NAME} running. Please stop it before trying again"
-fi
-
-# Build docker-in-docker image for local k8s cluster
+# Build docker-in-docker image for local k8s cluster.
NO_CACHE_FLAG=""
if [[ ! -z "${TF_DIST_DOCKER_NO_CACHE}" ]] &&
[[ "${TF_DIST_DOCKER_NO_CACHE}" != "0" ]]; then
NO_CACHE_FLAG="--no-cache"
fi
+# Create docker build context directory.
+BUILD_DIR=$(mktemp -d)
+echo ""
+echo "Using whl file URL: ${WHL_URL}"
+echo "Building in temporary directory: ${BUILD_DIR}"
+
+cp -r ${DIR}/* "${BUILD_DIR}"/ || \
+ die "Failed to copy files to ${BUILD_DIR}"
+
+# Download whl file into the build context directory.
+wget -P "${BUILD_DIR}" ${WHL_URL} || \
+ die "Failed to download tensorflow whl file from URL: ${WHL_URL}"
+
+# Build docker image for test.
docker build ${NO_CACHE_FLAG} -t ${DOCKER_IMG_NAME} \
- -f ${DIR}/Dockerfile.local ${DIR} || \
+ -f "${BUILD_DIR}/Dockerfile.local" "${BUILD_DIR}" || \
die "Failed to build docker image: ${DOCKER_IMG_NAME}"
+# Clean up docker build context directory.
+rm -rf "${BUILD_DIR}"
+
+# Run docker image for test.
docker run ${DOCKER_IMG_NAME} \
/var/tf_dist_test/scripts/dist_mnist_test.sh \
--ps_hosts "localhost:2000,localhost:2001" \
diff --git a/tensorflow/tools/dist_test/remote_test.sh b/tensorflow/tools/dist_test/remote_test.sh
index 1d4a52c4c2..b1e6b1e71e 100755
--- a/tensorflow/tools/dist_test/remote_test.sh
+++ b/tensorflow/tools/dist_test/remote_test.sh
@@ -20,13 +20,17 @@
# runs from within a container based on the image.
#
# Usage:
-# remote_test.sh [--setup_cluster_only]
+# remote_test.sh <whl_url>
+# [--setup_cluster_only]
# [--num_workers <NUM_WORKERS>]
# [--num_parameter_servers <NUM_PARAMETER_SERVERS>]
# [--sync_replicas]
#
# Arguments:
-# --setup_cluster_only:
+# <whl_url>
+# Specify custom TensorFlow whl file URL to install in the test Docker image.
+#
+# --setup_cluster_only:
# Setup the TensorFlow k8s cluster only, and do not perform testing of
# the distributed runtime.
#
@@ -42,6 +46,7 @@
# updates.
#
#
+#
# If any of the following environment variable has non-empty values, it will
# be mapped into the docker container to override the default values (see
# dist_test.sh)
@@ -95,8 +100,34 @@ if [[ ! -z "${TF_DIST_DOCKER_NO_CACHE}" ]] &&
NO_CACHE_FLAG="--no-cache"
fi
+# Parse command-line arguments.
+WHL_URL=${1}
+if [[ -z "${WHL_URL}" ]]; then
+ die "whl URL is not specified"
+fi
+
+# Create docker build context directory.
+BUILD_DIR=$(mktemp -d)
+echo ""
+echo "Using custom whl file URL: ${WHL_URL}"
+echo "Building in temporary directory: ${BUILD_DIR}"
+
+cp -r ${DIR}/* ${BUILD_DIR}/ || \
+ die "Failed to copy files to ${BUILD_DIR}"
+
+# Download whl file into the build context directory.
+wget -P "${BUILD_DIR}" ${WHL_URL} || \
+ die "Failed to download tensorflow whl file from URL: ${WHL_URL}"
+
+# Build docker image for test.
docker build ${NO_CACHE_FLAG} \
- -t ${DOCKER_IMG_NAME} -f "${DIR}/Dockerfile" "${DIR}"
+ -t ${DOCKER_IMG_NAME} -f "${BUILD_DIR}/Dockerfile" "${BUILD_DIR}" || \
+ die "Failed to build docker image: ${DOCKER_IMG_NAME}"
+
+# Clean up docker build context directory.
+rm -rf "${BUILD_DIR}"
+
+# Run docker image for test.
KEY_FILE=${TF_DIST_GCLOUD_KEY_FILE:-"${HOME}/gcloud-secrets/tensorflow-testing.json"}
docker run --rm -v ${KEY_FILE}:/var/gcloud/secrets/tensorflow-testing.json \
diff --git a/tensorflow/tools/dist_test/server/Dockerfile b/tensorflow/tools/dist_test/server/Dockerfile
index d1d11e0524..9cc61a826b 100644
--- a/tensorflow/tools/dist_test/server/Dockerfile
+++ b/tensorflow/tools/dist_test/server/Dockerfile
@@ -36,7 +36,7 @@ RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
# Install TensorFlow CPU version from nightly build
RUN pip --no-cache-dir install \
- https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0-cp27-none-linux_x86_64.whl
+ https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl
# Copy files, including the GRPC server binary at
# server/grpc_tensorflow_server.py
diff --git a/tensorflow/tools/dist_test/server/Dockerfile.test b/tensorflow/tools/dist_test/server/Dockerfile.test
index 298d1854a7..5bafa29468 100644
--- a/tensorflow/tools/dist_test/server/Dockerfile.test
+++ b/tensorflow/tools/dist_test/server/Dockerfile.test
@@ -42,7 +42,7 @@ RUN pip install --upgrade pandas==0.18.1
# Install TensorFlow CPU version.
RUN pip --no-cache-dir install \
- https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0-cp27-none-linux_x86_64.whl
+ https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl
# Copy files, including the GRPC server binary at
# server/grpc_tensorflow_server.py