diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2018-06-26 23:58:04 -0700 |
---|---|---|
committer | Gunhan Gulsoy <gunan@google.com> | 2018-06-28 21:37:43 -0700 |
commit | 11157efc4e94a7c70ff7532d7bb835fb5d9d19da (patch) | |
tree | 492384123eabb96505a68c65c77dcc28094d1c45 /tensorflow | |
parent | c30c57bd0792c50397883252ee5b2960988846d3 (diff) |
Fix Windows GPU Build
PiperOrigin-RevId: 202260254
Diffstat (limited to 'tensorflow')
-rw-r--r-- | tensorflow/BUILD | 7 | ||||
-rw-r--r-- | tensorflow/contrib/BUILD | 6 | ||||
-rw-r--r-- | tensorflow/contrib/nccl/BUILD | 5 | ||||
-rw-r--r-- | tensorflow/stream_executor/stream.h | 61 | ||||
-rw-r--r-- | tensorflow/tensorflow.bzl | 6 | ||||
-rw-r--r-- | tensorflow/tools/ci_build/windows/bazel/common_env.sh | 12 | ||||
-rw-r--r-- | tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh | 62 |
7 files changed, 123 insertions, 36 deletions
diff --git a/tensorflow/BUILD b/tensorflow/BUILD index a15d033013..f362900387 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -258,6 +258,13 @@ config_setting( ) config_setting( + name = "with_cuda_support_windows_override", + define_values = {"using_cuda_nvcc": "true"}, + values = {"cpu": "x64_windows"}, + visibility = ["//visibility:public"], +) + +config_setting( name = "with_gcp_support_android_override", define_values = {"with_gcp_support": "true"}, values = {"crosstool_top": "//external:android/crosstool"}, diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index fffab5a795..2d7916c8b1 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -9,6 +9,7 @@ load("//third_party/mpi:mpi.bzl", "if_mpi") load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") load("@local_config_tensorrt//:build_defs.bzl", "if_tensorrt") load("//tensorflow:tensorflow.bzl", "if_not_windows") +load("//tensorflow:tensorflow.bzl", "if_not_windows_cuda") py_library( name = "contrib_py", @@ -45,7 +46,6 @@ py_library( "//tensorflow/contrib/factorization:factorization_py", "//tensorflow/contrib/feature_column:feature_column_py", "//tensorflow/contrib/framework:framework_py", - "//tensorflow/contrib/fused_conv:fused_conv_py", "//tensorflow/contrib/gan", "//tensorflow/contrib/graph_editor:graph_editor_py", "//tensorflow/contrib/grid_rnn:grid_rnn_py", @@ -123,7 +123,9 @@ py_library( "//tensorflow/contrib/kafka", ], "//conditions:default": [], - }) + if_not_windows([ + }) + if_not_windows_cuda([ + "//tensorflow/contrib/fused_conv:fused_conv_py", # unresolved symbols, need to export more symbols + ]) + if_not_windows([ "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py", "//tensorflow/contrib/lite/python:lite", # unix dependency, need to fix code ]), diff --git a/tensorflow/contrib/nccl/BUILD b/tensorflow/contrib/nccl/BUILD index 7cfdf0f607..62996d1fd8 100644 --- a/tensorflow/contrib/nccl/BUILD +++ b/tensorflow/contrib/nccl/BUILD @@ -19,17 +19,18 @@ load("//tensorflow:tensorflow.bzl", "cuda_py_test") load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") load("//tensorflow:tensorflow.bzl", "tf_kernel_library") load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library") +load("//tensorflow:tensorflow.bzl", "if_not_windows_cuda") tf_custom_op_library( name = "python/ops/_nccl_ops.so", srcs = [ "ops/nccl_ops.cc", ], - gpu_srcs = [ + gpu_srcs = if_not_windows_cuda([ "kernels/nccl_manager.cc", "kernels/nccl_manager.h", "kernels/nccl_ops.cc", - ], + ]), deps = if_cuda([ "@local_config_nccl//:nccl", "//tensorflow/core:gpu_headers_lib", diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h index a32f4105ad..e8885e1eb6 100644 --- a/tensorflow/stream_executor/stream.h +++ b/tensorflow/stream_executor/stream.h @@ -25,6 +25,7 @@ limitations under the License. #include <functional> #include <memory> +#include "tensorflow/core/platform/macros.h" #include "tensorflow/stream_executor/blas.h" #include "tensorflow/stream_executor/device_memory.h" #include "tensorflow/stream_executor/dnn.h" @@ -1349,33 +1350,39 @@ class Stream { DeviceMemory<std::complex<double>> *x, int incx); // See BlasSupport::DoBlasGemm. - Stream &ThenBlasGemm(blas::Transpose transa, blas::Transpose transb, uint64 m, - uint64 n, uint64 k, float alpha, - const DeviceMemory<Eigen::half> &a, int lda, - const DeviceMemory<Eigen::half> &b, int ldb, float beta, - DeviceMemory<Eigen::half> *c, int ldc); - Stream &ThenBlasGemm(blas::Transpose transa, blas::Transpose transb, uint64 m, - uint64 n, uint64 k, float alpha, - const DeviceMemory<float> &a, int lda, - const DeviceMemory<float> &b, int ldb, float beta, - DeviceMemory<float> *c, int ldc); - Stream &ThenBlasGemm(blas::Transpose transa, blas::Transpose transb, uint64 m, - uint64 n, uint64 k, double alpha, - const DeviceMemory<double> &a, int lda, - const DeviceMemory<double> &b, int ldb, double beta, - DeviceMemory<double> *c, int ldc); - Stream &ThenBlasGemm(blas::Transpose transa, blas::Transpose transb, uint64 m, - uint64 n, uint64 k, std::complex<float> alpha, - const DeviceMemory<std::complex<float>> &a, int lda, - const DeviceMemory<std::complex<float>> &b, int ldb, - std::complex<float> beta, - DeviceMemory<std::complex<float>> *c, int ldc); - Stream &ThenBlasGemm(blas::Transpose transa, blas::Transpose transb, uint64 m, - uint64 n, uint64 k, std::complex<double> alpha, - const DeviceMemory<std::complex<double>> &a, int lda, - const DeviceMemory<std::complex<double>> &b, int ldb, - std::complex<double> beta, - DeviceMemory<std::complex<double>> *c, int ldc); + TF_EXPORT Stream &ThenBlasGemm(blas::Transpose transa, blas::Transpose transb, + uint64 m, uint64 n, uint64 k, float alpha, + const DeviceMemory<Eigen::half> &a, int lda, + const DeviceMemory<Eigen::half> &b, int ldb, + float beta, DeviceMemory<Eigen::half> *c, + int ldc); + TF_EXPORT Stream &ThenBlasGemm(blas::Transpose transa, blas::Transpose transb, + uint64 m, uint64 n, uint64 k, float alpha, + const DeviceMemory<float> &a, int lda, + const DeviceMemory<float> &b, int ldb, + float beta, DeviceMemory<float> *c, int ldc); + TF_EXPORT Stream &ThenBlasGemm(blas::Transpose transa, blas::Transpose transb, + uint64 m, uint64 n, uint64 k, double alpha, + const DeviceMemory<double> &a, int lda, + const DeviceMemory<double> &b, int ldb, + double beta, DeviceMemory<double> *c, int ldc); + TF_EXPORT Stream &ThenBlasGemm(blas::Transpose transa, blas::Transpose transb, + uint64 m, uint64 n, uint64 k, + std::complex<float> alpha, + const DeviceMemory<std::complex<float>> &a, + int lda, + const DeviceMemory<std::complex<float>> &b, + int ldb, std::complex<float> beta, + DeviceMemory<std::complex<float>> *c, int ldc); + TF_EXPORT Stream &ThenBlasGemm(blas::Transpose transa, blas::Transpose transb, + uint64 m, uint64 n, uint64 k, + std::complex<double> alpha, + const DeviceMemory<std::complex<double>> &a, + int lda, + const DeviceMemory<std::complex<double>> &b, + int ldb, std::complex<double> beta, + DeviceMemory<std::complex<double>> *c, + int ldc); Stream &ThenBlasGemmWithProfiling(blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 6bb393a3f4..e4632c4811 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -148,6 +148,12 @@ def if_windows(a): "//conditions:default": [], }) +def if_not_windows_cuda(a): + return select({ + clean_dep("//tensorflow:with_cuda_support_windows_override"): [], + "//conditions:default": a, + }) + def if_linux_x86_64(a): return select({ clean_dep("//tensorflow:linux_x86_64"): a, diff --git a/tensorflow/tools/ci_build/windows/bazel/common_env.sh b/tensorflow/tools/ci_build/windows/bazel/common_env.sh index eefa8ee2d5..8a237e4e28 100644 --- a/tensorflow/tools/ci_build/windows/bazel/common_env.sh +++ b/tensorflow/tools/ci_build/windows/bazel/common_env.sh @@ -49,3 +49,15 @@ export PATH="/c/Program Files/Git/cmd:$PATH" # Make sure we have pip in PATH export PATH="/c/${PYTHON_BASE_PATH}/Scripts:$PATH" + +# Setting default values to CUDA related environment variables +export TF_CUDA_VERSION=${TF_CUDA_VERSION:-9.0} +export TF_CUDNN_VERSION=${TF_CUDNN_VERSION:-7.0} +export TF_CUDA_COMPUTE_CAPABILITIES=${TF_CUDA_COMPUTE_CAPABILITIES:-3.7} +export CUDA_INSTALL_PATH=${CUDA_INSTALL_PATH:-"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v${TF_CUDA_VERSION}"} +export CUDNN_INSTALL_PATH=${CUDNN_INSTALL_PATH:-"C:/tools/cuda"} + +# Add Cuda and Cudnn dll directories into PATH +export PATH="$(cygpath -u "${CUDA_INSTALL_PATH}")/bin:$PATH" +export PATH="$(cygpath -u "${CUDA_INSTALL_PATH}")/extras/CUPTI/libx64:$PATH" +export PATH="$(cygpath -u "${CUDNN_INSTALL_PATH}")/bin:$PATH" diff --git a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh index 922bb67bbf..ededad615a 100644 --- a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh +++ b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh @@ -42,9 +42,58 @@ source "tensorflow/tools/ci_build/windows/bazel/common_env.sh" \ source "tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh" \ || { echo "Failed to source bazel_test_lib.sh" >&2; exit 1; } +# Recreate an empty bazelrc file under source root +export TMP_BAZELRC=.tmp.bazelrc +rm -f "${TMP_BAZELRC}" +touch "${TMP_BAZELRC}" + +function cleanup { + # Remove all options in .tmp.bazelrc + echo "" > "${TMP_BAZELRC}" +} +trap cleanup EXIT + +skip_test=0 +release_build=0 + +for ARG in "$@"; do + if [[ "$ARG" == --skip_test ]]; then + skip_test=1 + elif [[ "$ARG" == --enable_gcs_remote_cache ]]; then + set_gcs_remote_cache_options + elif [[ "$ARG" == --release_build ]]; then + release_build=1 + fi +done + +if [[ "$release_build" != 1 ]]; then + # --define=override_eigen_strong_inline=true speeds up the compiling of conv_grad_ops_3d.cc and conv_ops_3d.cc + # by 20 minutes. See https://github.com/tensorflow/tensorflow/issues/10521 + # Because this hurts the performance of TF, we don't enable it in release build. + echo "build --define=override_eigen_strong_inline=true" >> "${TMP_BAZELRC}" +fi + +# The host and target platforms are the same in Windows build. So we don't have +# to distinct them. This helps avoid building the same targets twice. +echo "build --distinct_host_configuration=false" >> "${TMP_BAZELRC}" + +# Enable short object file path to avoid long path issue on Windows. +echo "startup --output_user_root=${TMPDIR}" >> "${TMP_BAZELRC}" + +# Disable nvcc warnings to reduce log file size. +echo "build --copt=-nvcc_options=disable-warnings" >> "${TMP_BAZELRC}" + +if ! grep -q "import %workspace%/${TMP_BAZELRC}" .bazelrc; then + echo "import %workspace%/${TMP_BAZELRC}" >> .bazelrc +fi + run_configure_for_gpu_build -bazel build -c opt tensorflow/tools/pip_package:build_pip_package || exit $? +bazel build --announce_rc --config=opt tensorflow/tools/pip_package:build_pip_package || exit $? + +if [[ "$skip_test" == 1 ]]; then + exit 0 +fi # Create a python test directory to avoid package name conflict PY_TEST_DIR="py_test_dir" @@ -59,8 +108,11 @@ reinstall_tensorflow_pip ${PIP_NAME} # Define no_tensorflow_py_deps=true so that every py_test has no deps anymore, # which will result testing system installed tensorflow # GPU tests are very flaky when running concurrently, so set local_test_jobs=1 -bazel test -c opt -k --test_output=errors \ +bazel test --announce_rc --config=opt -k --test_output=errors \ --define=no_tensorflow_py_deps=true --test_lang_filters=py \ - --test_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu,no_oss \ - --build_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu,no_oss \ - --local_test_jobs=1 --build_tests_only //${PY_TEST_DIR}/tensorflow/python/... + --test_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu,-no_oss \ + --build_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu,-no_oss --build_tests_only \ + --local_test_jobs=1 --test_timeout="300,450,1200,3600" \ + --flaky_test_attempts=3 \ + //${PY_TEST_DIR}/tensorflow/python/... \ + //${PY_TEST_DIR}/tensorflow/contrib/... |