aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-06-26 23:58:04 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-06-27 00:00:26 -0700
commitcbcbc29bf5d44a5c8a92e69d487283071d532738 (patch)
tree999ccd4485a506f0118a4c15833e806d001ba76a
parent0d526454affd27cd331b22b8fd128345b515cac7 (diff)
Fix Windows GPU Build
PiperOrigin-RevId: 202260254
-rw-r--r--tensorflow/BUILD7
-rw-r--r--tensorflow/contrib/BUILD6
-rw-r--r--tensorflow/contrib/nccl/BUILD5
-rw-r--r--tensorflow/stream_executor/stream.h61
-rw-r--r--tensorflow/tensorflow.bzl6
-rw-r--r--tensorflow/tools/ci_build/windows/bazel/common_env.sh12
-rw-r--r--tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh62
7 files changed, 123 insertions, 36 deletions
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 8d0d9f14bc..e4530a5962 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -252,6 +252,13 @@ config_setting(
)
config_setting(
+ name = "with_cuda_support_windows_override",
+ define_values = {"using_cuda_nvcc": "true"},
+ values = {"cpu": "x64_windows"},
+ visibility = ["//visibility:public"],
+)
+
+config_setting(
name = "with_gcp_support_android_override",
define_values = {"with_gcp_support": "true"},
values = {"crosstool_top": "//external:android/crosstool"},
diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD
index fffab5a795..2d7916c8b1 100644
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@@ -9,6 +9,7 @@ load("//third_party/mpi:mpi.bzl", "if_mpi")
load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
load("@local_config_tensorrt//:build_defs.bzl", "if_tensorrt")
load("//tensorflow:tensorflow.bzl", "if_not_windows")
+load("//tensorflow:tensorflow.bzl", "if_not_windows_cuda")
py_library(
name = "contrib_py",
@@ -45,7 +46,6 @@ py_library(
"//tensorflow/contrib/factorization:factorization_py",
"//tensorflow/contrib/feature_column:feature_column_py",
"//tensorflow/contrib/framework:framework_py",
- "//tensorflow/contrib/fused_conv:fused_conv_py",
"//tensorflow/contrib/gan",
"//tensorflow/contrib/graph_editor:graph_editor_py",
"//tensorflow/contrib/grid_rnn:grid_rnn_py",
@@ -123,7 +123,9 @@ py_library(
"//tensorflow/contrib/kafka",
],
"//conditions:default": [],
- }) + if_not_windows([
+ }) + if_not_windows_cuda([
+ "//tensorflow/contrib/fused_conv:fused_conv_py", # unresolved symbols, need to export more symbols
+ ]) + if_not_windows([
"//tensorflow/contrib/ffmpeg:ffmpeg_ops_py",
"//tensorflow/contrib/lite/python:lite", # unix dependency, need to fix code
]),
diff --git a/tensorflow/contrib/nccl/BUILD b/tensorflow/contrib/nccl/BUILD
index 7cfdf0f607..62996d1fd8 100644
--- a/tensorflow/contrib/nccl/BUILD
+++ b/tensorflow/contrib/nccl/BUILD
@@ -19,17 +19,18 @@ load("//tensorflow:tensorflow.bzl", "cuda_py_test")
load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
load("//tensorflow:tensorflow.bzl", "tf_kernel_library")
load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library")
+load("//tensorflow:tensorflow.bzl", "if_not_windows_cuda")
tf_custom_op_library(
name = "python/ops/_nccl_ops.so",
srcs = [
"ops/nccl_ops.cc",
],
- gpu_srcs = [
+ gpu_srcs = if_not_windows_cuda([
"kernels/nccl_manager.cc",
"kernels/nccl_manager.h",
"kernels/nccl_ops.cc",
- ],
+ ]),
deps = if_cuda([
"@local_config_nccl//:nccl",
"//tensorflow/core:gpu_headers_lib",
diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h
index a32f4105ad..e8885e1eb6 100644
--- a/tensorflow/stream_executor/stream.h
+++ b/tensorflow/stream_executor/stream.h
@@ -25,6 +25,7 @@ limitations under the License.
#include <functional>
#include <memory>
+#include "tensorflow/core/platform/macros.h"
#include "tensorflow/stream_executor/blas.h"
#include "tensorflow/stream_executor/device_memory.h"
#include "tensorflow/stream_executor/dnn.h"
@@ -1349,33 +1350,39 @@ class Stream {
DeviceMemory<std::complex<double>> *x, int incx);
// See BlasSupport::DoBlasGemm.
- Stream &ThenBlasGemm(blas::Transpose transa, blas::Transpose transb, uint64 m,
- uint64 n, uint64 k, float alpha,
- const DeviceMemory<Eigen::half> &a, int lda,
- const DeviceMemory<Eigen::half> &b, int ldb, float beta,
- DeviceMemory<Eigen::half> *c, int ldc);
- Stream &ThenBlasGemm(blas::Transpose transa, blas::Transpose transb, uint64 m,
- uint64 n, uint64 k, float alpha,
- const DeviceMemory<float> &a, int lda,
- const DeviceMemory<float> &b, int ldb, float beta,
- DeviceMemory<float> *c, int ldc);
- Stream &ThenBlasGemm(blas::Transpose transa, blas::Transpose transb, uint64 m,
- uint64 n, uint64 k, double alpha,
- const DeviceMemory<double> &a, int lda,
- const DeviceMemory<double> &b, int ldb, double beta,
- DeviceMemory<double> *c, int ldc);
- Stream &ThenBlasGemm(blas::Transpose transa, blas::Transpose transb, uint64 m,
- uint64 n, uint64 k, std::complex<float> alpha,
- const DeviceMemory<std::complex<float>> &a, int lda,
- const DeviceMemory<std::complex<float>> &b, int ldb,
- std::complex<float> beta,
- DeviceMemory<std::complex<float>> *c, int ldc);
- Stream &ThenBlasGemm(blas::Transpose transa, blas::Transpose transb, uint64 m,
- uint64 n, uint64 k, std::complex<double> alpha,
- const DeviceMemory<std::complex<double>> &a, int lda,
- const DeviceMemory<std::complex<double>> &b, int ldb,
- std::complex<double> beta,
- DeviceMemory<std::complex<double>> *c, int ldc);
+ TF_EXPORT Stream &ThenBlasGemm(blas::Transpose transa, blas::Transpose transb,
+ uint64 m, uint64 n, uint64 k, float alpha,
+ const DeviceMemory<Eigen::half> &a, int lda,
+ const DeviceMemory<Eigen::half> &b, int ldb,
+ float beta, DeviceMemory<Eigen::half> *c,
+ int ldc);
+ TF_EXPORT Stream &ThenBlasGemm(blas::Transpose transa, blas::Transpose transb,
+ uint64 m, uint64 n, uint64 k, float alpha,
+ const DeviceMemory<float> &a, int lda,
+ const DeviceMemory<float> &b, int ldb,
+ float beta, DeviceMemory<float> *c, int ldc);
+ TF_EXPORT Stream &ThenBlasGemm(blas::Transpose transa, blas::Transpose transb,
+ uint64 m, uint64 n, uint64 k, double alpha,
+ const DeviceMemory<double> &a, int lda,
+ const DeviceMemory<double> &b, int ldb,
+ double beta, DeviceMemory<double> *c, int ldc);
+ TF_EXPORT Stream &ThenBlasGemm(blas::Transpose transa, blas::Transpose transb,
+ uint64 m, uint64 n, uint64 k,
+ std::complex<float> alpha,
+ const DeviceMemory<std::complex<float>> &a,
+ int lda,
+ const DeviceMemory<std::complex<float>> &b,
+ int ldb, std::complex<float> beta,
+ DeviceMemory<std::complex<float>> *c, int ldc);
+ TF_EXPORT Stream &ThenBlasGemm(blas::Transpose transa, blas::Transpose transb,
+ uint64 m, uint64 n, uint64 k,
+ std::complex<double> alpha,
+ const DeviceMemory<std::complex<double>> &a,
+ int lda,
+ const DeviceMemory<std::complex<double>> &b,
+ int ldb, std::complex<double> beta,
+ DeviceMemory<std::complex<double>> *c,
+ int ldc);
Stream &ThenBlasGemmWithProfiling(blas::Transpose transa,
blas::Transpose transb, uint64 m, uint64 n,
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 6bb393a3f4..e4632c4811 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -148,6 +148,12 @@ def if_windows(a):
"//conditions:default": [],
})
+def if_not_windows_cuda(a):
+ return select({
+ clean_dep("//tensorflow:with_cuda_support_windows_override"): [],
+ "//conditions:default": a,
+ })
+
def if_linux_x86_64(a):
return select({
clean_dep("//tensorflow:linux_x86_64"): a,
diff --git a/tensorflow/tools/ci_build/windows/bazel/common_env.sh b/tensorflow/tools/ci_build/windows/bazel/common_env.sh
index eefa8ee2d5..8a237e4e28 100644
--- a/tensorflow/tools/ci_build/windows/bazel/common_env.sh
+++ b/tensorflow/tools/ci_build/windows/bazel/common_env.sh
@@ -49,3 +49,15 @@ export PATH="/c/Program Files/Git/cmd:$PATH"
# Make sure we have pip in PATH
export PATH="/c/${PYTHON_BASE_PATH}/Scripts:$PATH"
+
+# Setting default values to CUDA related environment variables
+export TF_CUDA_VERSION=${TF_CUDA_VERSION:-9.0}
+export TF_CUDNN_VERSION=${TF_CUDNN_VERSION:-7.0}
+export TF_CUDA_COMPUTE_CAPABILITIES=${TF_CUDA_COMPUTE_CAPABILITIES:-3.7}
+export CUDA_INSTALL_PATH=${CUDA_INSTALL_PATH:-"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v${TF_CUDA_VERSION}"}
+export CUDNN_INSTALL_PATH=${CUDNN_INSTALL_PATH:-"C:/tools/cuda"}
+
+# Add Cuda and Cudnn dll directories into PATH
+export PATH="$(cygpath -u "${CUDA_INSTALL_PATH}")/bin:$PATH"
+export PATH="$(cygpath -u "${CUDA_INSTALL_PATH}")/extras/CUPTI/libx64:$PATH"
+export PATH="$(cygpath -u "${CUDNN_INSTALL_PATH}")/bin:$PATH"
diff --git a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
index 922bb67bbf..ededad615a 100644
--- a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
@@ -42,9 +42,58 @@ source "tensorflow/tools/ci_build/windows/bazel/common_env.sh" \
source "tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh" \
|| { echo "Failed to source bazel_test_lib.sh" >&2; exit 1; }
+# Recreate an empty bazelrc file under source root
+export TMP_BAZELRC=.tmp.bazelrc
+rm -f "${TMP_BAZELRC}"
+touch "${TMP_BAZELRC}"
+
+function cleanup {
+ # Remove all options in .tmp.bazelrc
+ echo "" > "${TMP_BAZELRC}"
+}
+trap cleanup EXIT
+
+skip_test=0
+release_build=0
+
+for ARG in "$@"; do
+ if [[ "$ARG" == --skip_test ]]; then
+ skip_test=1
+ elif [[ "$ARG" == --enable_gcs_remote_cache ]]; then
+ set_gcs_remote_cache_options
+ elif [[ "$ARG" == --release_build ]]; then
+ release_build=1
+ fi
+done
+
+if [[ "$release_build" != 1 ]]; then
+ # --define=override_eigen_strong_inline=true speeds up the compiling of conv_grad_ops_3d.cc and conv_ops_3d.cc
+ # by 20 minutes. See https://github.com/tensorflow/tensorflow/issues/10521
+ # Because this hurts the performance of TF, we don't enable it in release build.
+ echo "build --define=override_eigen_strong_inline=true" >> "${TMP_BAZELRC}"
+fi
+
+# The host and target platforms are the same in Windows build. So we don't have
+# to distinct them. This helps avoid building the same targets twice.
+echo "build --distinct_host_configuration=false" >> "${TMP_BAZELRC}"
+
+# Enable short object file path to avoid long path issue on Windows.
+echo "startup --output_user_root=${TMPDIR}" >> "${TMP_BAZELRC}"
+
+# Disable nvcc warnings to reduce log file size.
+echo "build --copt=-nvcc_options=disable-warnings" >> "${TMP_BAZELRC}"
+
+if ! grep -q "import %workspace%/${TMP_BAZELRC}" .bazelrc; then
+ echo "import %workspace%/${TMP_BAZELRC}" >> .bazelrc
+fi
+
run_configure_for_gpu_build
-bazel build -c opt tensorflow/tools/pip_package:build_pip_package || exit $?
+bazel build --announce_rc --config=opt tensorflow/tools/pip_package:build_pip_package || exit $?
+
+if [[ "$skip_test" == 1 ]]; then
+ exit 0
+fi
# Create a python test directory to avoid package name conflict
PY_TEST_DIR="py_test_dir"
@@ -59,8 +108,11 @@ reinstall_tensorflow_pip ${PIP_NAME}
# Define no_tensorflow_py_deps=true so that every py_test has no deps anymore,
# which will result testing system installed tensorflow
# GPU tests are very flaky when running concurrently, so set local_test_jobs=1
-bazel test -c opt -k --test_output=errors \
+bazel test --announce_rc --config=opt -k --test_output=errors \
--define=no_tensorflow_py_deps=true --test_lang_filters=py \
- --test_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu,no_oss \
- --build_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu,no_oss \
- --local_test_jobs=1 --build_tests_only //${PY_TEST_DIR}/tensorflow/python/...
+ --test_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu,-no_oss \
+ --build_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu,-no_oss --build_tests_only \
+ --local_test_jobs=1 --test_timeout="300,450,1200,3600" \
+ --flaky_test_attempts=3 \
+ //${PY_TEST_DIR}/tensorflow/python/... \
+ //${PY_TEST_DIR}/tensorflow/contrib/...