aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/tools/docker
diff options
context:
space:
mode:
authorGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-08-13 12:03:08 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-08-13 12:03:08 -0700
commit55b327916aa9b1f558242869412ae411d00d20ee (patch)
tree3e59457a9684e9ad598bc9bf940b11cb3267b594 /tensorflow/tools/docker
parentfcef09c17887451bfc138bac63b1be7b72993f71 (diff)
parent64994e5eb05c262ae1aef9480c54e0f992833b02 (diff)
Merge pull request #21177 from Intel-tensorflow:mkl-horovod-container
PiperOrigin-RevId: 208518776
Diffstat (limited to 'tensorflow/tools/docker')
-rwxr-xr-xtensorflow/tools/docker/Dockerfile.devel-mkl-horovod166
-rwxr-xr-xtensorflow/tools/docker/Dockerfile.mkl-horovod109
-rwxr-xr-xtensorflow/tools/docker/parameterized_docker_build.sh34
3 files changed, 301 insertions, 8 deletions
diff --git a/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod b/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod
new file mode 100755
index 0000000000..cf69fb9033
--- /dev/null
+++ b/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod
@@ -0,0 +1,166 @@
+FROM ubuntu:16.04
+
+LABEL maintainer="Cong Xu <cong.xu@intel.com>"
+
+# These parameters can be overridden by parameterized_docker_build.sh
+ARG TF_BUILD_VERSION=r1.9
+ARG PYTHON="python"
+ARG PYTHON3_DEV=""
+ARG WHL_DIR="/tmp/pip"
+ARG PIP="pip"
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ build-essential \
+ curl \
+ git \
+ libcurl3-dev \
+ libfreetype6-dev \
+ libhdf5-serial-dev \
+ libpng12-dev \
+ libzmq3-dev \
+ pkg-config \
+ python-dev \
+ ${PYTHON3_DEV} \
+ rsync \
+ software-properties-common \
+ unzip \
+ zip \
+ zlib1g-dev \
+ openjdk-8-jdk \
+ openjdk-8-jre-headless \
+ wget \
+ numactl \
+ openssh-client \
+ openssh-server \
+ && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists/*
+
+RUN curl -fSsL -O https://bootstrap.pypa.io/get-pip.py && \
+ ${PYTHON} get-pip.py && \
+ rm get-pip.py
+
+RUN ${PIP} --no-cache-dir install \
+ Pillow \
+ h5py \
+ ipykernel \
+ jupyter \
+ matplotlib \
+ mock \
+ numpy \
+ scipy \
+ sklearn \
+ pandas \
+ && \
+ ${PYTHON} -m ipykernel.kernelspec
+
+RUN if [ "${PYTHON}" = "python3" ]; then \
+ ln -s -f /usr/bin/python3 /usr/bin/python; \
+ fi
+
+# Set up our notebook config.
+COPY jupyter_notebook_config.py /root/.jupyter/
+
+# Jupyter has issues with being run directly:
+# https://github.com/ipython/ipython/issues/7062
+# We just add a little wrapper script.
+COPY run_jupyter.sh /
+
+# Set up Bazel.
+
+# Running bazel inside a `docker build` command causes trouble, cf:
+# https://github.com/bazelbuild/bazel/issues/134
+# The easiest solution is to set up a bazelrc file forcing --batch.
+RUN echo "startup --batch" >>/etc/bazel.bazelrc
+# Similarly, we need to workaround sandboxing issues:
+# https://github.com/bazelbuild/bazel/issues/418
+RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
+ >>/etc/bazel.bazelrc
+# Install the most recent bazel release.
+ENV BAZEL_VERSION 0.15.0
+WORKDIR /
+RUN mkdir /bazel && \
+ cd /bazel && \
+ curl -H "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36" -fSsL -O https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh && \
+ curl -H "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36" -fSsL -o /bazel/LICENSE.txt https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE && \
+ chmod +x bazel-*.sh && \
+ ./bazel-$BAZEL_VERSION-installer-linux-x86_64.sh && \
+ cd / && \
+ rm -f /bazel/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh
+
+# Download and build TensorFlow.
+WORKDIR /tensorflow
+
+# Download and build TensorFlow.
+# Enable checking out both tags and branches
+RUN export TAG_PREFIX="v" && \
+ echo ${TF_BUILD_VERSION} | grep -q ^${TAG_PREFIX}; \
+ if [ $? -eq 0 ]; then \
+ git clone --depth=1 https://github.com/tensorflow/tensorflow.git . && \
+ git fetch --tags && \
+ git checkout ${TF_BUILD_VERSION}; \
+ else \
+ git clone --depth=1 --branch=${TF_BUILD_VERSION} https://github.com/tensorflow/tensorflow.git . ; \
+ fi
+
+RUN yes "" | ${PYTHON} configure.py
+
+ENV CI_BUILD_PYTHON ${PYTHON}
+
+# Set bazel build parameters in .bazelrc in parameterized_docker_build.sh
+# Use --copt=-march values to get optimized builds appropriate for the hardware
+# platform of your choice.
+# For ivy-bridge or sandy-bridge
+# --copt=-march="avx" \
+# For haswell, broadwell, or skylake
+# --copt=-march="avx2" \
+COPY .bazelrc /root/.bazelrc
+
+RUN tensorflow/tools/ci_build/builds/configured CPU \
+ bazel --bazelrc=/root/.bazelrc build -c opt \
+ tensorflow/tools/pip_package:build_pip_package && \
+ bazel-bin/tensorflow/tools/pip_package/build_pip_package "${WHL_DIR}" && \
+ ${PIP} --no-cache-dir install --upgrade "${WHL_DIR}"/tensorflow-*.whl && \
+ rm -rf /root/.cache
+# Clean up Bazel cache when done.
+
+WORKDIR /root
+
+# Install Open MPI
+RUN mkdir /tmp/openmpi && \
+ cd /tmp/openmpi && \
+ wget https://www.open-mpi.org/software/ompi/v3.0/downloads/openmpi-3.0.0.tar.gz && \
+ tar zxf openmpi-3.0.0.tar.gz && \
+ cd openmpi-3.0.0 && \
+ ./configure --enable-orterun-prefix-by-default && \
+ make -j $(nproc) all && \
+ make install && \
+ ldconfig && \
+ rm -rf /tmp/openmpi
+
+# Create a wrapper for OpenMPI to allow running as root by default
+RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real && \
+ echo '#!/bin/bash' > /usr/local/bin/mpirun && \
+ echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun && \
+ chmod a+x /usr/local/bin/mpirun
+
+# Configure OpenMPI to run good defaults:
+RUN echo "btl_tcp_if_exclude = lo,docker0" >> /usr/local/etc/openmpi-mca-params.conf
+
+# Install Horovod
+RUN ${PIP} install --no-cache-dir horovod
+
+# Install OpenSSH for MPI to communicate between containers
+RUN mkdir -p /var/run/sshd
+
+# Allow OpenSSH to talk to containers without asking for confirmation
+RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
+ echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
+ mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
+
+# TensorBoard
+EXPOSE 6006
+# IPython
+EXPOSE 8888
+
+WORKDIR /root
diff --git a/tensorflow/tools/docker/Dockerfile.mkl-horovod b/tensorflow/tools/docker/Dockerfile.mkl-horovod
new file mode 100755
index 0000000000..9485a5bff6
--- /dev/null
+++ b/tensorflow/tools/docker/Dockerfile.mkl-horovod
@@ -0,0 +1,109 @@
+FROM ubuntu:16.04
+
+LABEL maintainer="Cong Xu <cong.xu@intel.com>"
+
+# This parameter MUST be set by parameterized_docker_build.sh
+ARG TF_WHL_URL
+
+# Optional parameters
+ARG TF_BUILD_VERSION=r1.9
+ARG PYTHON="python"
+ARG PYTHON_DEV="python-dev"
+ARG PIP="pip"
+
+# Pick up some TF dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ build-essential \
+ curl \
+ libfreetype6-dev \
+ libhdf5-serial-dev \
+ libpng12-dev \
+ libzmq3-dev \
+ pkg-config \
+ python \
+ ${PYTHON_DEV} \
+ rsync \
+ software-properties-common \
+ unzip \
+ && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists/*
+
+RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
+ python get-pip.py && \
+ rm get-pip.py
+
+RUN ${PIP} --no-cache-dir install \
+ Pillow \
+ h5py \
+ ipykernel \
+ jupyter \
+ matplotlib \
+ numpy \
+ pandas \
+ scipy \
+ sklearn \
+ && \
+ python -m ipykernel.kernelspec
+
+COPY ${TF_WHL_URL} /
+RUN ${PIP} install --no-cache-dir --force-reinstall /${TF_WHL_URL} && \
+ rm -rf /${TF_WHL_URL}
+
+RUN if [ "${PYTHON}" = "python3" ]; then \
+ ln -s -f /usr/bin/python3 /usr/bin/python; \
+ fi
+
+# Set up our notebook config.
+COPY jupyter_notebook_config.py /root/.jupyter/
+
+# Copy sample notebooks.
+COPY notebooks /notebooks
+
+# Jupyter has issues with being run directly:
+# https://github.com/ipython/ipython/issues/7062
+# We just add a little wrapper script.
+COPY run_jupyter.sh /
+
+WORKDIR /root
+
+# Install Open MPI
+RUN mkdir /tmp/openmpi && \
+ cd /tmp/openmpi && \
+ wget https://www.open-mpi.org/software/ompi/v3.0/downloads/openmpi-3.0.0.tar.gz && \
+ tar zxf openmpi-3.0.0.tar.gz && \
+ cd openmpi-3.0.0 && \
+ ./configure --enable-orterun-prefix-by-default && \
+ make -j $(nproc) all && \
+ make install && \
+ ldconfig && \
+ rm -rf /tmp/openmpi
+
+# Create a wrapper for OpenMPI to allow running as root by default
+RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real && \
+ echo '#!/bin/bash' > /usr/local/bin/mpirun && \
+ echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun && \
+ chmod a+x /usr/local/bin/mpirun
+
+# Configure OpenMPI to run good defaults:
+RUN echo "btl_tcp_if_exclude = lo,docker0" >> /usr/local/etc/openmpi-mca-params.conf
+
+# Install Horovod
+RUN ${PIP} install --no-cache-dir horovod
+
+# Install OpenSSH for MPI to communicate between containers
+RUN mkdir -p /var/run/sshd
+
+# Allow OpenSSH to talk to containers without asking for confirmation
+RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
+ echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
+ mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
+
+# TensorBoard
+EXPOSE 6006
+# IPython
+EXPOSE 8888
+
+WORKDIR "/notebooks"
+
+CMD ["/run_jupyter.sh", "--allow-root"]
diff --git a/tensorflow/tools/docker/parameterized_docker_build.sh b/tensorflow/tools/docker/parameterized_docker_build.sh
index 4681c5fd61..04fc1659af 100755
--- a/tensorflow/tools/docker/parameterized_docker_build.sh
+++ b/tensorflow/tools/docker/parameterized_docker_build.sh
@@ -19,8 +19,8 @@
# parameterized_docker_build.sh
#
# The script obeys the following environment variables:
-# TF_DOCKER_BUILD_TYPE: (CPU | GPU | MKL)
-# CPU, GPU, or MKL image
+# TF_DOCKER_BUILD_TYPE: (CPU | GPU | MKL | MKL-HOROVOD)
+# CPU, GPU, MKL or MKL-HOROVOD image
#
# TF_DOCKER_BUILD_IS_DEVEL: (NO | YES)
# Is this developer image
@@ -169,6 +169,15 @@ elif [[ ${TF_DOCKER_BUILD_TYPE} == "mkl" ]]; then
else
ORIG_DOCKERFILE="${ORIG_DOCKERFILE}.mkl"
fi
+elif [[ ${TF_DOCKER_BUILD_TYPE} == "mkl-horovod" ]]; then
+ DOCKER_BINARY="docker"
+ FINAL_TAG="${FINAL_TAG}-mkl-horovod"
+ if [[ ${ORIG_DOCKERFILE} == *"."* ]]; then
+ # There is already a dot in the tag, use "-"
+ ORIG_DOCKERFILE="${ORIG_DOCKERFILE}-mkl-horovod"
+ else
+ ORIG_DOCKERFILE="${ORIG_DOCKERFILE}.mkl-horovod"
+ fi
elif [[ ${TF_DOCKER_BUILD_TYPE} == "gpu" ]]; then
DOCKER_BINARY="nvidia-docker"
@@ -227,6 +236,10 @@ if [[ "${TF_DOCKER_BUILD_IS_DEVEL}" == "no" ]]; then
die "FAIL: Non-development MKL builds require a pre-built pip whl."
fi
+ if [[ "${TF_DOCKER_BUILD_TYPE}" == "mkl-horovod" ]]; then
+ die "FAIL: Non-development MKL-HOROVOD builds require a pre-built pip whl."
+ fi
+
if [[ "${TF_DOCKER_BUILD_TYPE}" == "gpu" ]]; then
export TF_BUILD_APPEND_CI_DOCKER_EXTRA_PARAMS=\
"${TF_BUILD_APPEND_CI_DOCKER_EXTRA_PARAMS} -e TF_CUDA_COMPUTE_CAPABILITIES=3.0,3.5,5.2"
@@ -279,7 +292,8 @@ if [[ "${TF_DOCKER_BUILD_IS_DEVEL}" == "no" ]]; then
# Use string replacement to put the correct file name into the Dockerfile
PIP_WHL=$(basename "${PIP_WHL}")
- if [[ ${TF_DOCKER_BUILD_TYPE} == "mkl" ]]; then
+ if [[ ${TF_DOCKER_BUILD_TYPE} == "mkl" ]] || \
+ [[ ${TF_DOCKER_BUILD_TYPE} == "mkl-horovod" ]]; then
TF_DOCKER_BUILD_ARGS+=("--build-arg TF_WHL_URL=${PIP_WHL}" )
cp "${ORIG_DOCKERFILE}" "${DOCKERFILE}"
else
@@ -295,7 +309,8 @@ if [[ "${TF_DOCKER_BUILD_IS_DEVEL}" == "no" ]]; then
echo
else
echo "Downloading pip wheel from: ${TF_DOCKER_BUILD_CENTRAL_PIP}"
- if [[ ${TF_DOCKER_BUILD_TYPE} == "mkl" ]]; then
+ if [[ ${TF_DOCKER_BUILD_TYPE} == "mkl" ]] || \
+ [[ ${TF_DOCKER_BUILD_TYPE} == "mkl-horovod" ]]; then
pushd "${TMP_DIR}/"
curl -O ${TF_DOCKER_BUILD_CENTRAL_PIP}
popd
@@ -319,7 +334,8 @@ if [[ "${TF_DOCKER_BUILD_IS_DEVEL}" == "no" ]]; then
# Modify python/pip version if necessary.
if [[ "${TF_DOCKER_BUILD_PYTHON_VERSION}" == "python3" ]]; then
- if [[ ${TF_DOCKER_BUILD_TYPE} == "mkl" ]]; then
+ if [[ ${TF_DOCKER_BUILD_TYPE} == "mkl" ]] || \
+ [[ ${TF_DOCKER_BUILD_TYPE} == "mkl-horovod" ]]; then
TF_DOCKER_BUILD_ARGS+=("--build-arg PYTHON=${TF_DOCKER_BUILD_PYTHON_VERSION}")
TF_DOCKER_BUILD_ARGS+=("--build-arg PYTHON_DEV=python3-dev")
TF_DOCKER_BUILD_ARGS+=("--build-arg PIP=pip3")
@@ -340,8 +356,9 @@ if [[ "${TF_DOCKER_BUILD_IS_DEVEL}" == "no" ]]; then
else # TF_DOCKER_BUILD_IS_DEVEL == 'yes'
DOCKERFILE="${TMP_DIR}/Dockerfile"
- # Set up Dockerfile ARGS for mkl build
- if [[ ${TF_DOCKER_BUILD_TYPE} == "mkl" ]]; then
+ # Set up Dockerfile ARGS for mkl and mkl-horovod build
+ if [[ ${TF_DOCKER_BUILD_TYPE} == "mkl" ]] || \
+ [[ ${TF_DOCKER_BUILD_TYPE} == "mkl-horovod" ]]; then
if [[ -z "${TF_BAZEL_BUILD_OPTIONS// }" ]]; then
TF_BAZEL_BUILD_OPTIONS=("--config=mkl --copt=-mavx --cxxopt=-D_GLIBCXX_USE_CXX11_ABI=0")
else
@@ -361,7 +378,8 @@ else # TF_DOCKER_BUILD_IS_DEVEL == 'yes'
# Modify python/pip version if necessary.
if [[ "${TF_DOCKER_BUILD_PYTHON_VERSION}" == "python3" ]]; then
- if [[ ${TF_DOCKER_BUILD_TYPE} == "mkl" ]]; then
+ if [[ ${TF_DOCKER_BUILD_TYPE} == "mkl" ]] || \
+ [[ ${TF_DOCKER_BUILD_TYPE} == "mkl-horovod" ]]; then
TF_DOCKER_BUILD_ARGS+=("--build-arg PYTHON=${TF_DOCKER_BUILD_PYTHON_VERSION}")
TF_DOCKER_BUILD_ARGS+=("--build-arg PYTHON3_DEV=python3-dev")
TF_DOCKER_BUILD_ARGS+=("--build-arg WHL_DIR=/tmp/pip3")