From a771598ad83ca33eb42594d7e804859371ba4ca9 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 9 Nov 2016 13:14:03 -0800 Subject: Merge changes from github. Change: 138675832 --- .gitignore | 1 + configure | 105 +++++- tensorflow/__init__.py | 5 +- tensorflow/contrib/bayesflow/__init__.py | 3 +- tensorflow/contrib/cmake/CMakeLists.txt | 10 +- tensorflow/contrib/cmake/README.md | 21 +- tensorflow/contrib/cmake/external/googletest.cmake | 29 ++ tensorflow/contrib/cmake/external/zlib.cmake | 2 +- tensorflow/contrib/cmake/tf_core_ops.cmake | 1 + tensorflow/contrib/cmake/tf_python.cmake | 1 + tensorflow/contrib/cmake/tf_tests.cmake | 384 +++++++++++++++++++++ tensorflow/contrib/distributions/__init__.py | 2 + tensorflow/contrib/factorization/__init__.py | 1 + .../factorization/python/ops/factorization_ops.py | 1 + tensorflow/contrib/framework/__init__.py | 1 + .../contrib/framework/python/framework/__init__.py | 2 + tensorflow/contrib/grid_rnn/__init__.py | 1 + tensorflow/contrib/layers/__init__.py | 1 + .../contrib/layers/python/layers/__init__.py | 1 + tensorflow/contrib/learn/__init__.py | 1 + tensorflow/contrib/learn/python/__init__.py | 1 + .../contrib/learn/python/learn/ops/__init__.py | 1 + .../learn/python/learn/preprocessing/__init__.py | 1 + tensorflow/contrib/lookup/__init__.py | 1 + tensorflow/contrib/losses/__init__.py | 1 + .../contrib/losses/python/losses/__init__.py | 1 + tensorflow/contrib/makefile/README.md | 4 +- tensorflow/contrib/metrics/__init__.py | 3 +- .../contrib/metrics/python/metrics/__init__.py | 1 + tensorflow/contrib/ndlstm/python/__init__.py | 1 + tensorflow/contrib/opt/__init__.py | 1 + tensorflow/contrib/quantization/__init__.py | 1 + tensorflow/contrib/quantization/python/__init__.py | 1 + .../contrib/quantization/python/array_ops.py | 3 +- tensorflow/contrib/quantization/python/math_ops.py | 1 + tensorflow/contrib/quantization/python/nn_ops.py | 1 + tensorflow/contrib/rnn/__init__.py | 1 + tensorflow/contrib/seq2seq/__init__.py | 3 +- tensorflow/contrib/specs/python/__init__.py | 1 + tensorflow/contrib/tensor_forest/__init__.py | 1 + .../contrib/tensor_forest/client/__init__.py | 3 +- tensorflow/contrib/tensor_forest/data/__init__.py | 3 +- .../contrib/tensor_forest/hybrid/__init__.py | 1 + tensorflow/contrib/testing/__init__.py | 1 + tensorflow/contrib/training/__init__.py | 1 + tensorflow/core/BUILD | 28 ++ tensorflow/core/common_runtime/device_set_test.cc | 9 +- .../core/common_runtime/direct_session_test.cc | 2 + tensorflow/core/common_runtime/sycl/sycl_device.cc | 88 +++++ tensorflow/core/common_runtime/sycl/sycl_device.h | 62 ++++ .../common_runtime/sycl/sycl_device_context.cc | 46 +++ .../core/common_runtime/sycl/sycl_device_context.h | 42 +++ .../common_runtime/sycl/sycl_device_factory.cc | 44 +++ tensorflow/core/framework/device_base.h | 17 + tensorflow/core/framework/op_kernel.cc | 7 + tensorflow/core/framework/op_kernel.h | 6 + tensorflow/core/framework/tensor.cc | 72 +++- tensorflow/core/framework/tensor_test.cc | 16 +- tensorflow/core/framework/types.cc | 1 + tensorflow/core/framework/types.h | 5 +- tensorflow/core/framework/types_test.cc | 1 + tensorflow/core/kernels/BUILD | 3 + tensorflow/core/kernels/constant_op.cc | 11 + tensorflow/core/kernels/cwise_op_round.cc | 8 + tensorflow/core/kernels/cwise_ops_common.h | 35 +- tensorflow/core/kernels/cwise_ops_sycl_common.h | 138 ++++++++ tensorflow/core/kernels/function_ops.cc | 23 ++ tensorflow/core/kernels/identity_op.cc | 19 + tensorflow/core/kernels/no_op.cc | 4 + .../parameterized_truncated_normal_op_gpu.cu.cc | 12 +- tensorflow/core/kernels/sendrecv_ops.cc | 8 + tensorflow/core/kernels/substr_op.cc | 233 +++++++++++++ tensorflow/core/lib/io/path.cc | 1 + tensorflow/core/ops/string_ops.cc | 109 ++++++ .../core/platform/default/build_config/BUILD | 16 + tensorflow/core/platform/hadoop/BUILD | 8 +- .../core/platform/hadoop/hadoop_file_system.cc | 17 +- .../platform/hadoop/hadoop_file_system_test.cc | 46 ++- tensorflow/core/platform/windows/intrinsics_port.h | 2 +- tensorflow/core/util/device_name_utils.cc | 1 + tensorflow/examples/learn/wide_n_deep_tutorial.py | 4 + .../functions_and_classes/shard1/tf.linspace.md | 2 +- .../shard1/tf.nn.sampled_softmax_loss.md | 4 +- .../shard4/tf.nn.depthwise_conv2d_native.md | 2 +- .../functions_and_classes/shard4/tf.nn.nce_loss.md | 3 +- .../shard7/tf.nn.local_response_normalization.md | 4 +- .../functions_and_classes/shard8/tf.nn.conv2d.md | 2 +- tensorflow/g3doc/how_tos/meta_graph/index.md | 2 +- tensorflow/g3doc/resources/index.md | 1 + tensorflow/g3doc/tutorials/deep_cnn/index.md | 2 +- tensorflow/python/__init__.py | 2 + tensorflow/python/kernel_tests/BUILD | 7 + tensorflow/python/kernel_tests/substr_op_test.py | 235 +++++++++++++ tensorflow/python/ops/math_ops.py | 2 + tensorflow/python/ops/math_ops_test.py | 10 + tensorflow/python/ops/nn.py | 4 +- tensorflow/python/ops/string_ops.py | 2 + tensorflow/python/ops/variables.py | 27 +- tensorflow/python/training/training.py | 6 +- tensorflow/tensorboard/backend/server.py | 18 +- tensorflow/tensorboard/backend/server_test.py | 5 + .../tools/ci_build/windows/cpu/cmake/run_build.bat | 38 ++ .../tools/ci_build/windows/cpu/cmake/run_py.bat | 37 ++ .../ci_build/windows/cpu/pip/build_tf_windows.sh | 59 ++++ tensorflow/tools/ci_build/windows/cpu/pip/run.bat | 1 + tensorflow/tools/git/.gitignore | 1 + tensorflow/workspace.bzl | 3 + third_party/eigen3/BUILD | 5 +- tools/bazel.rc.template | 3 + util/python/python_config.sh | 28 +- 110 files changed, 2140 insertions(+), 130 deletions(-) create mode 100644 tensorflow/contrib/cmake/external/googletest.cmake create mode 100644 tensorflow/contrib/cmake/tf_tests.cmake create mode 100644 tensorflow/core/common_runtime/sycl/sycl_device.cc create mode 100644 tensorflow/core/common_runtime/sycl/sycl_device.h create mode 100644 tensorflow/core/common_runtime/sycl/sycl_device_context.cc create mode 100644 tensorflow/core/common_runtime/sycl/sycl_device_context.h create mode 100644 tensorflow/core/common_runtime/sycl/sycl_device_factory.cc create mode 100644 tensorflow/core/kernels/cwise_ops_sycl_common.h create mode 100644 tensorflow/core/kernels/substr_op.cc create mode 100644 tensorflow/python/kernel_tests/substr_op_test.py create mode 100644 tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat create mode 100644 tensorflow/tools/ci_build/windows/cpu/cmake/run_py.bat create mode 100644 tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh create mode 100644 tensorflow/tools/ci_build/windows/cpu/pip/run.bat create mode 100644 tensorflow/tools/git/.gitignore diff --git a/.gitignore b/.gitignore index 7b811375c2..e1fa12ea6a 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ node_modules /third_party/py/numpy/numpy_include /tools/bazel.rc /tools/python_bin_path.sh +/tools/git/gen /util/python/python_include /util/python/python_lib /pip_test diff --git a/configure b/configure index dcdbff23b5..dacaebb490 100755 --- a/configure +++ b/configure @@ -53,6 +53,7 @@ if is_windows; then TF_NEED_GCP=0 TF_NEED_HDFS=0 TF_NEED_CUDA=0 + TF_NEED_OPENCL=0 fi while [ "$TF_NEED_GCP" == "" ]; do @@ -116,6 +117,17 @@ GEN_GIT_SOURCE=tensorflow/tools/git/gen_git_source.py chmod a+x ${GEN_GIT_SOURCE} "${PYTHON_BIN_PATH}" ${GEN_GIT_SOURCE} --configure "${SOURCE_BASE_DIR}" +## Set up SYCL-related environment settings +while [ "$TF_NEED_OPENCL" == "" ]; do + read -p "Do you wish to build TensorFlow with OpenCL support? [y/N] " INPUT + case $INPUT in + [Yy]* ) echo "OpenCL support will be enabled for TensorFlow"; TF_NEED_OPENCL=1;; + [Nn]* ) echo "No OpenCL support will be enabled for TensorFlow"; TF_NEED_OPENCL=0;; + "" ) echo "No OpenCL support will be enabled for TensorFlow"; TF_NEED_OPENCL=0;; + * ) echo "Invalid selection: " $INPUT;; + esac +done + ## Set up Cuda-related environment settings while [ "$TF_NEED_CUDA" == "" ]; do @@ -129,12 +141,14 @@ while [ "$TF_NEED_CUDA" == "" ]; do done export TF_NEED_CUDA -if [ "$TF_NEED_CUDA" == "0" ]; then +export TF_NEED_SYCL +if [[ "$TF_NEED_CUDA" == "0" ]] && [[ "$TF_NEED_OPENCL" == "0" ]]; then echo "Configuration finished" bazel_clean_and_fetch exit fi +if [ "$TF_NEED_CUDA" == "1" ]; then # Set up which gcc nvcc should use as the host compiler while true; do fromuser="" @@ -336,6 +350,95 @@ EOF TF_CUDA_COMPUTE_CAPABILITIES="" done +# end of if "$TF_NEED_CUDA" == "1" +fi + +# OpenCL configuration + +if [ "$TF_NEED_OPENCL" == "1" ]; then + +# Determine which C++ compiler should be used as the host compiler +while true; do + fromuser="" + if [ -z "$HOST_CXX_COMPILER" ]; then + default_cxx_host_compiler=$(which g++|| true) + read -p "Please specify which C++ compiler should be used as the host C++ compiler. [Default is $default_cxx_host_compiler]: " HOST_CXX_COMPILER + fromuser="1" + if [ -z "$HOST_CXX_COMPILER" ]; then + HOST_CXX_COMPILER=$default_cxx_host_compiler + fi + fi + if [ -e "$HOST_CXX_COMPILER" ]; then + export HOST_CXX_COMPILER + break + fi + echo "Invalid C++ compiler path. ${HOST_CXX_COMPILER} cannot be found" 1>&2 + if [ -z "$fromuser" ]; then + exit 1 + fi + HOST_CXX_COMPILER="" + # Retry +done + +# Determine which C compiler should be used as the host compiler +while true; do + fromuser="" + if [ -z "$HOST_C_COMPILER" ]; then + default_c_host_compiler=$(which gcc|| true) + read -p "Please specify which C compiler should be used as the host C compiler. [Default is $default_c_host_compiler]: " HOST_C_COMPILER + fromuser="1" + if [ -z "$HOST_C_COMPILER" ]; then + HOST_C_COMPILER=$default_c_host_compiler + fi + fi + if [ -e "$HOST_C_COMPILER" ]; then + export HOST_C_COMPILER + break + fi + echo "Invalid C compiler path. ${HOST_C_COMPILER} cannot be found" 1>&2 + if [ -z "$fromuser" ]; then + exit 1 + fi + HOST_C_COMPILER="" + # Retry +done + +while true; do + # Configure the OPENCL version to use. + TF_OPENCL_VERSION="1.2" + + # Point to ComputeCpp root + if [ -z "$COMPUTECPP_TOOLKIT_PATH" ]; then + default_computecpp_toolkit_path=/usr/local/computecpp + read -p "Please specify the location where ComputeCpp $TF_OPENCL_VERSION is installed. Refer to README.md for more details. [Default is $default_computecpp_toolkit_path]: " COMPUTECPP_TOOLKIT_PATH + fromuser="1" + if [ -z "$COMPUTECPP_TOOLKIT_PATH" ]; then + COMPUTECPP_TOOLKIT_PATH=$default_computecpp_toolkit_path + fi + fi + + if [ "$OSNAME" == "Linux" ]; then + SYCL_RT_LIB_PATH="lib/libComputeCpp.so" + fi + + if [ -e "${COMPUTECPP_TOOLKIT_PATH}/${SYCL_RT_LIB_PATH}" ]; then + export COMPUTECPP_TOOLKIT_PATH + break + fi + echo "Invalid SYCL $TF_OPENCL_VERSION library path. ${COMPUTECPP_TOOLKIT_PATH}/${SYCL_RT_LIB_PATH} cannot be found" + + if [ -z "$fromuser" ]; then + exit 1 + fi + # Retry + TF_OPENCL_VERSION="" + COMPUTECPP_TOOLKIT_PATH="" +done + +export TF_NEED_OPENCL +# end of if "$TF_NEED_OPENCL" == "1" +fi + bazel_clean_and_fetch echo "Configuration finished" diff --git a/tensorflow/__init__.py b/tensorflow/__init__.py index ec7cd91e7e..92d390a976 100644 --- a/tensorflow/__init__.py +++ b/tensorflow/__init__.py @@ -15,13 +15,14 @@ # Bring in all of the public TensorFlow interface into this # module. -# pylint: disable=wildcard-import + from __future__ import absolute_import from __future__ import division from __future__ import print_function +# pylint: disable=wildcard-import from tensorflow.python import * - +# pylint: enable=wildcard-import # Lazily import the `tf.contrib` module. This avoids loading all of the # dependencies of `tf.contrib` at `import tensorflow` time. diff --git a/tensorflow/contrib/bayesflow/__init__.py b/tensorflow/contrib/bayesflow/__init__.py index 2cd2380882..53dac35675 100644 --- a/tensorflow/contrib/bayesflow/__init__.py +++ b/tensorflow/contrib/bayesflow/__init__.py @@ -20,7 +20,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -# pylint: disable=unused-import,wildcard-import,line-too-long +# pylint: disable=unused-import,line-too-long from tensorflow.contrib.bayesflow.python.ops import entropy from tensorflow.contrib.bayesflow.python.ops import monte_carlo from tensorflow.contrib.bayesflow.python.ops import special_math @@ -29,3 +29,4 @@ from tensorflow.contrib.bayesflow.python.ops import stochastic_graph from tensorflow.contrib.bayesflow.python.ops import stochastic_tensor from tensorflow.contrib.bayesflow.python.ops import stochastic_variables from tensorflow.contrib.bayesflow.python.ops import variational_inference +# pylint: enable=unused-import,line-too-long diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 9c38e151ff..a935e31f17 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -22,7 +22,8 @@ option(tensorflow_BUILD_CC_EXAMPLE "Build the C++ tutorial example" ON) option(tensorflow_BUILD_PYTHON_BINDINGS "Build the Python bindings" ON) option(tensorflow_BUILD_ALL_KERNELS "Build all OpKernels" ON) option(tensorflow_BUILD_CONTRIB_KERNELS "Build OpKernels from tensorflow/contrib/..." ON) - +option(tensorflow_BUILD_CC_TESTS "Build cc unit tests " OFF) +option(tensorflow_BUILD_PYTHON_TESTS "Build python unit tests " OFF) #Threads: defines CMAKE_THREAD_LIBS_INIT and adds -pthread compile option for # targets that link ${CMAKE_THREAD_LIBS_INIT}. @@ -74,6 +75,9 @@ include(jsoncpp) include(farmhash) include(highwayhash) include(protobuf) +if (tensorflow_BUILD_CC_TESTS) + include(googletest) +endif() set(tensorflow_EXTERNAL_LIBRARIES ${zlib_STATIC_LIBRARIES} @@ -194,7 +198,6 @@ include(tf_core_kernels.cmake) if(tensorflow_ENABLE_GRPC_SUPPORT) include(tf_core_distributed_runtime.cmake) endif() - include(tf_cc_ops.cmake) if(tensorflow_BUILD_CC_EXAMPLE) include(tf_tutorials.cmake) @@ -203,3 +206,6 @@ endif() if(tensorflow_BUILD_PYTHON_BINDINGS) include(tf_python.cmake) endif() +if (tensorflow_BUILD_CC_TESTS OR tensorflow_BUILD_PYTHON_TESTS) + include(tf_tests.cmake) +endif() diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md index a3510b5e86..3f8dcc525b 100644 --- a/tensorflow/contrib/cmake/README.md +++ b/tensorflow/contrib/cmake/README.md @@ -60,7 +60,9 @@ Note: Windows support is in an **alpha** state, and we welcome your feedback. on Windows, but have not yet committed to supporting that configuration.) - The following Python APIs are not currently implemented: - * Loading custom op libraries via `tf.load_op_library()`. + * Loading custom op libraries via `tf.load_op_library()`. In order to use your + custom op, please put the source code under the tensorflow/core/user_ops + directory, and a shape function is required (not optional) for each op. * Path manipulation functions (such as `tf.gfile.ListDirectory()`) are not functional. @@ -76,7 +78,6 @@ Note: Windows support is in an **alpha** state, and we welcome your feedback. * `ImmutableConst` * `Lgamma` * `Polygamma` - * `SparseMatmul` * `Zeta` - Google Cloud Storage support is not currently implemented. The GCS library @@ -195,7 +196,21 @@ Step-by-step Windows build * `-Dtensorflow_ENABLE_GPU=(ON|OFF)`. Defaults to `OFF`. Include GPU support. If GPU is enabled you need to install the CUDA 8.0 Toolkit and CUDNN 5.1. CMake will expect the location of CUDNN in -DCUDNN_HOME=path_you_unziped_cudnn. - + + * `-Dtensorflow_BUILD_CC_TESTS=(ON|OFF)`. Defaults to `OFF`. This builds cc unit tests. + There are many of them and building will take a few hours. + After cmake, build and execute the tests with + ``` + MSBuild /p:Configuration=RelWithDebInfo ALL_BUILD.vcxproj + ctest -C RelWithDebInfo + ``` + + * `-Dtensorflow_BUILD_PYTHON_TESTS=(ON|OFF)`. Defaults to `OFF`. This enables python kernel tests. + After building the python wheel, you need to install the new wheel before running the tests. + To execute the tests, use + ``` + ctest -C RelWithDebInfo + ``` 4. Invoke MSBuild to build TensorFlow. diff --git a/tensorflow/contrib/cmake/external/googletest.cmake b/tensorflow/contrib/cmake/external/googletest.cmake new file mode 100644 index 0000000000..e6daf62a51 --- /dev/null +++ b/tensorflow/contrib/cmake/external/googletest.cmake @@ -0,0 +1,29 @@ +include (ExternalProject) + +set(googletest_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/googletest/src/googletest/googletest/include) +set(googletest_URL https://github.com/google/googletest.git) +set(googletest_BUILD ${CMAKE_CURRENT_BINARY_DIR}/googletest/) +set(googletest_TAG ec44c6c1675c25b9827aacd08c02433cccde7780) + +if(WIN32) + set(googletest_STATIC_LIBRARIES + ${CMAKE_CURRENT_BINARY_DIR}/googletest/src/googletest/googletest/${CMAKE_BUILD_TYPE}/gtest.lib) +else() + set(googletest_STATIC_LIBRARIES + ${CMAKE_CURRENT_BINARY_DIR}/googletest/src/googletest/googletest/${CMAKE_BUILD_TYPE}/gtest.a) +endif() + +ExternalProject_Add(googletest + PREFIX googletest + GIT_REPOSITORY ${googletest_URL} + GIT_TAG ${googletest_TAG} + DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" + BUILD_IN_SOURCE 1 + #PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/grpc/CMakeLists.txt ${GRPC_BUILD} + INSTALL_COMMAND "" + CMAKE_CACHE_ARGS + -DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE} + -DBUILD_GMOCK:BOOL=OFF + -DBUILD_GTEST:BOOL=ON + -Dgtest_force_shared_crt:BOOL=ON +) diff --git a/tensorflow/contrib/cmake/external/zlib.cmake b/tensorflow/contrib/cmake/external/zlib.cmake index ded2e41770..afe5e366ae 100644 --- a/tensorflow/contrib/cmake/external/zlib.cmake +++ b/tensorflow/contrib/cmake/external/zlib.cmake @@ -8,7 +8,7 @@ set(ZLIB_TAG 50893291621658f355bc5b4d450a8d06a563053d) if(WIN32) set(zlib_STATIC_LIBRARIES - ${CMAKE_CURRENT_BINARY_DIR}/zlib/install/lib/zlib.lib) + ${CMAKE_CURRENT_BINARY_DIR}/zlib/install/lib/zlibstatic.lib) else() set(zlib_STATIC_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/zlib/install/lib/libz.a) diff --git a/tensorflow/contrib/cmake/tf_core_ops.cmake b/tensorflow/contrib/cmake/tf_core_ops.cmake index ac7228ef68..a25b37c754 100644 --- a/tensorflow/contrib/cmake/tf_core_ops.cmake +++ b/tensorflow/contrib/cmake/tf_core_ops.cmake @@ -14,6 +14,7 @@ set(tf_op_lib_names "no_op" "parsing_ops" "random_ops" + "resource_variable_ops" "script_ops" "sdca_ops" "sendrecv_ops" diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index 907158b646..9a9aed4375 100644 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -270,6 +270,7 @@ GENERATE_PYTHON_OP_LIB("logging_ops") GENERATE_PYTHON_OP_LIB("nn_ops") GENERATE_PYTHON_OP_LIB("parsing_ops") GENERATE_PYTHON_OP_LIB("random_ops") +GENERATE_PYTHON_OP_LIB("resource_variable_ops") GENERATE_PYTHON_OP_LIB("script_ops") GENERATE_PYTHON_OP_LIB("sdca_ops") GENERATE_PYTHON_OP_LIB("state_ops") diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake new file mode 100644 index 0000000000..783cb40b67 --- /dev/null +++ b/tensorflow/contrib/cmake/tf_tests.cmake @@ -0,0 +1,384 @@ +enable_testing() + +# +# get a temp path for test data +# +function(GetTestRunPath VAR_NAME OBJ_NAME) + if(WIN32) + if(DEFINED ENV{TMP}) + set(TMPDIR "$ENV{TMP}") + elseif(DEFINED ENV{TEMP}) + set(TMPDIR "$ENV{TEMP}") + endif() + string(REPLACE "\\" "/" TMPDIR ${TMPDIR}) + else() + set(TMPDIR "$ENV{TMPDIR}") + endif() + if(NOT EXISTS "${TMPDIR}") + message(FATAL_ERROR "Unable to determine a path to the temporary directory") + endif() + set(${VAR_NAME} "${TMPDIR}/${OBJ_NAME}" PARENT_SCOPE) +endfunction(GetTestRunPath) + +# +# create test for each source +# +function(AddTests) + cmake_parse_arguments(_AT "" "" "SOURCES;OBJECTS;LIBS;DATA;DEPENDS" ${ARGN}) + foreach(sourcefile ${_AT_SOURCES}) + string(REPLACE "${tensorflow_source_dir}/" "" exename ${sourcefile}) + string(REPLACE ".cc" "" exename ${exename}) + string(REPLACE "/" "_" exename ${exename}) + AddTest( + TARGET ${exename} + SOURCES ${sourcefile} + OBJECTS ${_AT_OBJECTS} + LIBS ${_AT_LIBS} + DATA ${_AT_DATA} + DEPENDS ${_AT_DEPENDS} + ) + endforeach() +endfunction(AddTests) + +# +# create once test +# +function(AddTest) + cmake_parse_arguments(_AT "" "TARGET" "SOURCES;OBJECTS;LIBS;DATA;DEPENDS" ${ARGN}) + + list(REMOVE_DUPLICATES _AT_SOURCES) + list(REMOVE_DUPLICATES _AT_OBJECTS) + list(REMOVE_DUPLICATES _AT_LIBS) + if (_AT_DATA) + list(REMOVE_DUPLICATES _AT_DATA) + endif(_AT_DATA) + if (_AT_DEPENDS) + list(REMOVE_DUPLICATES _AT_DEPENDS) + endif(_AT_DEPENDS) + + add_executable(${_AT_TARGET} ${_AT_SOURCES} ${_AT_OBJECTS}) + target_link_libraries(${_AT_TARGET} ${_AT_LIBS}) + + GetTestRunPath(testdir ${_AT_TARGET}) + set(tempdir "${testdir}/tmp") + file(REMOVE_RECURSE "${testdir}") + file(MAKE_DIRECTORY "${testdir}") + file(MAKE_DIRECTORY "${tempdir}") + add_test(NAME ${_AT_TARGET} COMMAND ${_AT_TARGET} WORKING_DIRECTORY "${testdir}") + set_tests_properties(${_AT_TARGET} + PROPERTIES ENVIRONMENT "TEST_TMPDIR=${tempdir};TEST_SRCDIR=${testdir}" + ) + + foreach(datafile ${_AT_DATA}) + add_custom_command( + TARGET ${_AT_TARGET} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy + "${CMAKE_CURRENT_SOURCE_DIR}/${datafile}" + "${testdir}/${datafile}" + DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${datafile}" + ) + endforeach() + + if (_AT_DEPENDS) + add_dependencies(${_AT_TARGET} ${_AT_DEPENDS}) + endif() +endfunction(AddTest) + +# +# create python test for each script +# +function(AddPythonTests) + cmake_parse_arguments(_AT "" "" "SOURCES;DATA;DEPENDS" ${ARGN}) + list(REMOVE_DUPLICATES _AT_SOURCES) + if (_AT_DATA) + list(REMOVE_DUPLICATES _AT_DATA) + endif(_AT_DATA) + if (_AT_DEPENDS) + list(REMOVE_DUPLICATES _AT_DEPENDS) + endif(_AT_DEPENDS) + + foreach(sourcefile ${_AT_SOURCES}) + add_test(NAME ${sourcefile} COMMAND ${PYTHON_EXECUTABLE} ${sourcefile}) + if (_AT_DEPENDS) + add_dependencies(${_AT_TARGET} ${_AT_DEPENDS}) + endif() + endforeach() +endfunction(AddPythonTests) + +if (tensorflow_BUILD_PYTHON_TESTS) + # + # python tests. This assumes that the tensorflow wheel is + # installed on the test system. + # TODO: we currently don't handle tests that need to have + # some environment setup: see AddTest how to add this + # + + # include all test + file(GLOB_RECURSE tf_test_src_py + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/*.py" + ) + + # exclude the onces we don't want + set(tf_test_src_py_exclude + # generally not working + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/__init__.py" + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/benchmark_test.py" + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/resource_variable_ops_test.py" + ) + if (WIN32) + set(tf_test_src_py_exclude + ${tf_test_src_py_exclude} + # generally excluded + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/__init__.py" + + # TODO: failing tests. + # Nothing critical in here but should get this list down to [] + # The failing list is grouped by failure source + # stl on windows handles overflows different + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/as_string_op_test.py" + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/cast_op_test.py" + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/string_to_number_op_test.py" + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/clip_ops_test.py" + # misc + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/cwise_ops_test.py" + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/variable_scope_test.py" + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/reshape_op_test.py" + # int32/int64 mixup + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/functional_ops_test.py" + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/py_func_test.py" + # issues related to windows fs + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/io_ops_test.py" + # missing kernel + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/pooling_ops_test.py" + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/conv_ops_test.py" + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/depthwise_conv_op_test.py" + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py" + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/pool_test.py" + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/svd_op_test.py" + # cuda launch failed + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/diag_op_test.py" + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/trace_op_test.py" + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/one_hot_op_test.py" # gpu, T=uint8 + ) + endif() + list(REMOVE_ITEM tf_test_src_py ${tf_test_src_py_exclude}) + + AddPythonTests( + SOURCES ${tf_test_src_py} + ) +endif(tensorflow_BUILD_PYTHON_TESTS) + +if (tensorflow_BUILD_CC_TESTS) + # + # cc unit tests. Be aware that by default we include 250+ tests which + # will take time and space to build. + # If you wan to cut this down, for example to a specific test, modify + # tf_test_src_simple to your needs + # + + include_directories(${googletest_INCLUDE_DIRS}) + + # cc tests wrapper + set(tf_src_testlib + "${tensorflow_source_dir}/tensorflow/cc/framework/testutil.cc" + "${tensorflow_source_dir}/tensorflow/cc/gradients/grad_testutil.cc" + "${tensorflow_source_dir}/tensorflow/core/common_runtime/kernel_benchmark_testlib.cc" + "${tensorflow_source_dir}/tensorflow/core/framework/function_testlib.cc" + "${tensorflow_source_dir}/tensorflow/core/framework/shape_inference_testutil.cc" + "${tensorflow_source_dir}/tensorflow/core/framework/tensor_testutil.cc" + "${tensorflow_source_dir}/tensorflow/core/graph/testlib.cc" + "${tensorflow_source_dir}/tensorflow/core/platform/test.cc" + "${tensorflow_source_dir}/tensorflow/core/platform/test_main.cc" + "${tensorflow_source_dir}/tensorflow/core/platform/default/test_benchmark.cc" + "${tensorflow_source_dir}/tensorflow/c/c_api.cc" + "${tensorflow_source_dir}/tensorflow/c/checkpoint_reader.cc" + "${tensorflow_source_dir}/tensorflow/c/tf_status_helper.cc" + ) + + # include all test + file(GLOB_RECURSE tf_test_src_simple + "${tensorflow_source_dir}/tensorflow/cc/*_test.cc" + "${tensorflow_source_dir}/tensorflow/python/*_test.cc" + "${tensorflow_source_dir}/tensorflow/core/*_test.cc" + "${tensorflow_source_dir}/tensorflow/user_ops/*_test.cc" + "${tensorflow_source_dir}/tensorflow/contrib/rnn/*_test.cc" + ) + + if (NOT tensorflow_ENABLE_GPU) + # exclude gpu tests if we are not buildig for gpu + set(tf_test_src_simple_exclude + ${tf_test_src_simple_exclude} + "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/gpu_allocator_retry_test.cc" + "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc" + "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc" + "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/gpu_event_mgr_test.cc" + "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/gpu_stream_util_test.cc" + "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc" + ) + endif() + + # exclude the onces we don't want + set(tf_test_src_simple_exclude + # generally not working + "${tensorflow_source_dir}/tensorflow/cc/client/client_session_test.cc" + "${tensorflow_source_dir}/tensorflow/cc/framework/gradients_test.cc" + "${tensorflow_source_dir}/tensorflow/core/distributed_runtime/call_options_test.cc" + "${tensorflow_source_dir}/tensorflow/core/distributed_runtime/tensor_coding_test.cc" + ) + + if (WIN32) + set(tf_test_src_simple_exclude + ${tf_test_src_simple_exclude} + # generally excluded + "${tensorflow_source_dir}/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_test.cc" + "${tensorflow_source_dir}/tensorflow/cc/framework/cc_ops_test.cc" # test_op.h missing + + # TODO: test failing + "${tensorflow_source_dir}/tensorflow/core/common_runtime/simple_placer_test.cc" + "${tensorflow_source_dir}/tensorflow/core/distributed_runtime/executor_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/quantized_reshape_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/requantization_range_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/requantize_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/restore_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/restore_v2_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/save_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/sparse_reduce_sum_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/restore_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/quantize_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/lib/core/status_test.cc" + "${tensorflow_source_dir}/tensorflow/core/lib/strings/str_util_test.cc" + "${tensorflow_source_dir}/tensorflow/core/lib/strings/numbers_test.cc" + "${tensorflow_source_dir}/tensorflow/core/lib/monitoring/collection_registry_test.cc" + "${tensorflow_source_dir}/tensorflow/core/util/tensor_slice_reader_test.cc" + "${tensorflow_source_dir}/tensorflow/core/platform/file_system_test.cc" + "${tensorflow_source_dir}/tensorflow/core/platform/logging_test.cc" + "${tensorflow_source_dir}/tensorflow/core/platform/env_test.cc" + "${tensorflow_source_dir}/tensorflow/core/ops/math_grad_test.cc" + "${tensorflow_source_dir}/tensorflow/contrib/cudnn_rnn/cudnn_rnn_ops_test.cc" + "${tensorflow_source_dir}/tensorflow/contrib/rnn/ops/gru_ops_test.cc" # status 5 + "${tensorflow_source_dir}/tensorflow/contrib/rnn/ops/lstm_ops_test.cc" # status 5 + + # TODO: not compiling + "${tensorflow_source_dir}/tensorflow/cc/framework/gradient_checker_test.cc" + "${tensorflow_source_dir}/tensorflow/cc/gradients/math_grad_test.cc" + "${tensorflow_source_dir}/tensorflow/cc/gradients/array_grad_test.cc" + "${tensorflow_source_dir}/tensorflow/cc/saved_model/loader_test.cc" + "${tensorflow_source_dir}/tensorflow/cc/training/queue_runner_test.cc" + "${tensorflow_source_dir}/tensorflow/cc/training/coordinator_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/nn_ops_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/quantization_utils_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/activation_ops_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/batch_norm_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/bias_add_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/concat_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/conv_ops_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/matmul_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/pooling_ops_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/quantize_and_dequantize_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/quantize_down_and_shrink_range_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/parameterized_truncated_normal_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/non_max_suppression_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/fused_batch_norm_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/eigen_backward_spatial_convolutions_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/hexagon/hexagon_graph_transferer_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/adjust_contrast_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/batch_norm_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/cast_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/colorspace_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/control_flow_ops_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/conv_ops_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/debug_ops_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/resize_bilinear_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/resize_nearest_neighbor_op_benchmark_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/spacetobatch_benchmark_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/sparse_add_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/sparse_dense_binary_op_shared_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/summary_image_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/summary_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/quantized_activation_ops_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/quantized_bias_add_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/quantized_concat_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/quantized_conv_ops_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/quantized_matmul_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/quantized_pooling_ops_test.cc" + "${tensorflow_source_dir}/tensorflow/core/kernels/quantized_batch_norm_op_test.cc" + "${tensorflow_source_dir}/tensorflow/core/platform/cloud/gcs_file_system_test.cc" + "${tensorflow_source_dir}/tensorflow/core/platform/cloud/google_auth_provider_test.cc" + "${tensorflow_source_dir}/tensorflow/core/platform/cloud/http_request_test.cc" + "${tensorflow_source_dir}/tensorflow/core/platform/cloud/oauth_client_test.cc" + "${tensorflow_source_dir}/tensorflow/core/platform/cloud/retrying_file_system_test.cc" + "${tensorflow_source_dir}/tensorflow/core/platform/cloud/time_util_test.cc" + "${tensorflow_source_dir}/tensorflow/core/platform/hadoop/hadoop_file_system_test.cc" + "${tensorflow_source_dir}/tensorflow/core/platform/port_test.cc" + "${tensorflow_source_dir}/tensorflow/core/platform/profile_utils/cpu_utils_test.cc" + "${tensorflow_source_dir}/tensorflow/core/common_runtime/direct_session_test.cc" + "${tensorflow_source_dir}/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc" + "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/gpu_allocator_retry_test.cc" + "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc" + "${tensorflow_source_dir}/tensorflow/core/distributed_runtime/master_test.cc" + "${tensorflow_source_dir}/tensorflow/core/distributed_runtime/remote_device_test.cc" + "${tensorflow_source_dir}/tensorflow/core/distributed_runtime/rpc/grpc_channel_test.cc" + "${tensorflow_source_dir}/tensorflow/core/distributed_runtime/rpc/grpc_session_test.cc" + "${tensorflow_source_dir}/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding_test.cc" + "${tensorflow_source_dir}/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr_test.cc" + "${tensorflow_source_dir}/tensorflow/core/distributed_runtime/master_test.cc" + "${tensorflow_source_dir}/tensorflow/core/framework/partial_tensor_shape_test.cc" + "${tensorflow_source_dir}/tensorflow/core/lib/core/notification_test.cc" + "${tensorflow_source_dir}/tensorflow/core/lib/gtl/cleanup_test.cc" + "${tensorflow_source_dir}/tensorflow/core/lib/gtl/edit_distance_test.cc" + "${tensorflow_source_dir}/tensorflow/core/lib/strings/strcat_test.cc" + "${tensorflow_source_dir}/tensorflow/core/ops/array_grad_test.cc" + "${tensorflow_source_dir}/tensorflow/core/ops/nn_ops_test.cc" + "${tensorflow_source_dir}/tensorflow/core/example/example_parser_configuration_test.cc" + "${tensorflow_source_dir}/tensorflow/core/example/feature_util_test.cc" + "${tensorflow_source_dir}/tensorflow/core/util/reporter_test.cc" + "${tensorflow_source_dir}/tensorflow/core/util/memmapped_file_system_test.cc" + "${tensorflow_source_dir}/tensorflow/core/util/sparse_sparse_tensor_test.cc" + "${tensorflow_source_dir}/tensorflow/core/debug/debug_gateway_test.cc" + "${tensorflow_source_dir}/tensorflow/core/debug/debug_io_utils_test.cc" + "${tensorflow_source_dir}/tensorflow/contrib/factorization/kernels/clustering_ops_test.cc" + "${tensorflow_source_dir}/tensorflow/contrib/session_bundle/bundle_shim_test.cc" + "${tensorflow_source_dir}/tensorflow/contrib/session_bundle/bundle_test.cc" + "${tensorflow_source_dir}/tensorflow/contrib/session_bundle/signature_test.cc" + "${tensorflow_source_dir}/tensorflow/contrib/tensor_forest/core/ops/training_ops_test.cc" + "${tensorflow_source_dir}/tensorflow/contrib/tensor_forest/core/ops/tree_utils_test.cc" + "${tensorflow_source_dir}/tensorflow/contrib/tensor_forest/core/data/sparse_values_to_indices_test.cc" + ) + endif() + + list(REMOVE_ITEM tf_test_src_simple ${tf_test_src_simple_exclude}) + + set(tf_test_lib tf_test_lib) + add_library(${tf_test_lib} STATIC ${tf_src_testlib}) + + # this is giving to much objects and libraries to the linker but + # it makes this script much easier. So for now we do it this way. + set(tf_obj_test + $ + $ + $ + $ + $ + $ + $ + $ + $<$:$> + ) + + set(tf_test_libs + tf_protos_cc + tf_test_lib + ${tf_core_gpu_kernels_lib} + ${googletest_STATIC_LIBRARIES} + ${tensorflow_EXTERNAL_LIBRARIES} + ) + + AddTests( + SOURCES ${tf_test_src_simple} + OBJECTS ${tf_obj_test} + LIBS ${tf_test_libs} + DEPENDS googletest + ) +endif(tensorflow_BUILD_CC_TESTS) diff --git a/tensorflow/contrib/distributions/__init__.py b/tensorflow/contrib/distributions/__init__.py index 8111118462..c6522069e9 100644 --- a/tensorflow/contrib/distributions/__init__.py +++ b/tensorflow/contrib/distributions/__init__.py @@ -126,3 +126,5 @@ from tensorflow.contrib.distributions.python.ops.student_t import * from tensorflow.contrib.distributions.python.ops.transformed_distribution import * from tensorflow.contrib.distributions.python.ops.uniform import * from tensorflow.contrib.distributions.python.ops.wishart import * + +# pylint: enable=unused-import,wildcard-import,line-too-long,g-importing-member diff --git a/tensorflow/contrib/factorization/__init__.py b/tensorflow/contrib/factorization/__init__.py index 61ff3c28ec..7ce448a36c 100644 --- a/tensorflow/contrib/factorization/__init__.py +++ b/tensorflow/contrib/factorization/__init__.py @@ -24,3 +24,4 @@ from tensorflow.contrib.factorization.python.ops.factorization_ops import * from tensorflow.contrib.factorization.python.ops.gmm import * from tensorflow.contrib.factorization.python.ops.gmm_ops import * from tensorflow.contrib.factorization.python.ops.kmeans import * +# pylint: enable=wildcard-import diff --git a/tensorflow/contrib/factorization/python/ops/factorization_ops.py b/tensorflow/contrib/factorization/python/ops/factorization_ops.py index 34fa0129dd..7708aa1e9e 100644 --- a/tensorflow/contrib/factorization/python/ops/factorization_ops.py +++ b/tensorflow/contrib/factorization/python/ops/factorization_ops.py @@ -27,6 +27,7 @@ import tensorflow as tf # pylint: disable=wildcard-import,undefined-variable from tensorflow.contrib.factorization.python.ops.gen_factorization_ops import * +# pylint: enable=wildcard-import from tensorflow.contrib.util import loader from tensorflow.python.framework import ops from tensorflow.python.ops import embedding_ops diff --git a/tensorflow/contrib/framework/__init__.py b/tensorflow/contrib/framework/__init__.py index 9414fffda4..24f1dee4d7 100644 --- a/tensorflow/contrib/framework/__init__.py +++ b/tensorflow/contrib/framework/__init__.py @@ -73,5 +73,6 @@ import sys from tensorflow.contrib.framework.python.framework import * from tensorflow.contrib.framework.python.ops import * from tensorflow.python.util.all_util import make_all +# pylint: enable=unused-import,wildcard-import __all__ = make_all(__name__) diff --git a/tensorflow/contrib/framework/python/framework/__init__.py b/tensorflow/contrib/framework/python/framework/__init__.py index 1b8a5a1b39..d30d6d9df3 100644 --- a/tensorflow/contrib/framework/python/framework/__init__.py +++ b/tensorflow/contrib/framework/python/framework/__init__.py @@ -22,7 +22,9 @@ from __future__ import print_function from tensorflow.contrib.framework.python.framework.checkpoint_utils import * from tensorflow.contrib.framework.python.framework.experimental import experimental from tensorflow.contrib.framework.python.framework.tensor_util import * +# pylint: enable=wildcard-import from tensorflow.python.util import decorator_utils from tensorflow.python.util.deprecation import deprecated from tensorflow.python.util.deprecation import deprecated_arg_values from tensorflow.python.util.deprecation import deprecated_args + diff --git a/tensorflow/contrib/grid_rnn/__init__.py b/tensorflow/contrib/grid_rnn/__init__.py index 28f379375a..e005dd2083 100644 --- a/tensorflow/contrib/grid_rnn/__init__.py +++ b/tensorflow/contrib/grid_rnn/__init__.py @@ -25,3 +25,4 @@ from __future__ import print_function # pylint: disable=unused-import,wildcard-import, line-too-long from tensorflow.contrib.grid_rnn.python.ops.grid_rnn_cell import * +# pylint: enable=unused-import,wildcard-import,line-too-long diff --git a/tensorflow/contrib/layers/__init__.py b/tensorflow/contrib/layers/__init__.py index eab567a2bf..0fc3e7a841 100644 --- a/tensorflow/contrib/layers/__init__.py +++ b/tensorflow/contrib/layers/__init__.py @@ -116,5 +116,6 @@ import sys from tensorflow.contrib.layers.python.layers import * from tensorflow.contrib.layers.python.ops import sparse_ops from tensorflow.python.util.all_util import make_all +# pylint: enable=unused-import,wildcard-import __all__ = make_all(__name__) diff --git a/tensorflow/contrib/layers/python/layers/__init__.py b/tensorflow/contrib/layers/python/layers/__init__.py index 75a07cefc8..10344d007d 100644 --- a/tensorflow/contrib/layers/python/layers/__init__.py +++ b/tensorflow/contrib/layers/python/layers/__init__.py @@ -31,3 +31,4 @@ from tensorflow.contrib.layers.python.layers.summaries import * from tensorflow.contrib.layers.python.layers.target_column import * from tensorflow.contrib.layers.python.ops.bucketization_op import * from tensorflow.contrib.layers.python.ops.sparse_feature_cross_op import * +# pylint: enable=wildcard-import diff --git a/tensorflow/contrib/learn/__init__.py b/tensorflow/contrib/learn/__init__.py index 3f43c2cc23..ce98f2542d 100644 --- a/tensorflow/contrib/learn/__init__.py +++ b/tensorflow/contrib/learn/__init__.py @@ -62,6 +62,7 @@ from __future__ import print_function # pylint: disable=wildcard-import from tensorflow.contrib.learn.python.learn import * +# pylint: enable=wildcard-import from tensorflow.python.util.all_util import make_all __all__ = make_all(__name__) diff --git a/tensorflow/contrib/learn/python/__init__.py b/tensorflow/contrib/learn/python/__init__.py index 9645f7ac5c..bbebd5ab97 100644 --- a/tensorflow/contrib/learn/python/__init__.py +++ b/tensorflow/contrib/learn/python/__init__.py @@ -21,3 +21,4 @@ from __future__ import print_function # pylint: disable=wildcard-import from tensorflow.contrib.learn.python.learn import * +# pylint: enable=wildcard-import diff --git a/tensorflow/contrib/learn/python/learn/ops/__init__.py b/tensorflow/contrib/learn/python/learn/ops/__init__.py index d9f8fb83b1..173c894721 100644 --- a/tensorflow/contrib/learn/python/learn/ops/__init__.py +++ b/tensorflow/contrib/learn/python/learn/ops/__init__.py @@ -24,3 +24,4 @@ from tensorflow.contrib.learn.python.learn.ops.array_ops import * from tensorflow.contrib.learn.python.learn.ops.embeddings_ops import * from tensorflow.contrib.learn.python.learn.ops.losses_ops import * from tensorflow.contrib.learn.python.learn.ops.seq2seq_ops import * +# pylint: enable=wildcard-import diff --git a/tensorflow/contrib/learn/python/learn/preprocessing/__init__.py b/tensorflow/contrib/learn/python/learn/preprocessing/__init__.py index 15ba2b3ce5..7bcc177d4e 100644 --- a/tensorflow/contrib/learn/python/learn/preprocessing/__init__.py +++ b/tensorflow/contrib/learn/python/learn/preprocessing/__init__.py @@ -22,3 +22,4 @@ from __future__ import print_function # pylint: disable=wildcard-import from tensorflow.contrib.learn.python.learn.preprocessing.categorical import * from tensorflow.contrib.learn.python.learn.preprocessing.text import * +# pylint: enable=wildcard-import diff --git a/tensorflow/contrib/lookup/__init__.py b/tensorflow/contrib/lookup/__init__.py index b3edfa4eb4..8717c3fa2b 100644 --- a/tensorflow/contrib/lookup/__init__.py +++ b/tensorflow/contrib/lookup/__init__.py @@ -35,3 +35,4 @@ from __future__ import print_function # pylint: disable=unused-import,wildcard-import from tensorflow.contrib.lookup.lookup_ops import * +# pylint: enable=unused-import,wildcard-import diff --git a/tensorflow/contrib/losses/__init__.py b/tensorflow/contrib/losses/__init__.py index 6fd241f2f4..14a4d53152 100644 --- a/tensorflow/contrib/losses/__init__.py +++ b/tensorflow/contrib/losses/__init__.py @@ -23,3 +23,4 @@ import sys # pylint: disable=unused-import,wildcard-import from tensorflow.contrib.losses.python.losses import * +# pylint: enable=unused-import,wildcard-import diff --git a/tensorflow/contrib/losses/python/losses/__init__.py b/tensorflow/contrib/losses/python/losses/__init__.py index 54c21684bd..f53d4a000d 100644 --- a/tensorflow/contrib/losses/python/losses/__init__.py +++ b/tensorflow/contrib/losses/python/losses/__init__.py @@ -133,5 +133,6 @@ from __future__ import print_function # pylint: disable=unused-import,wildcard-import from tensorflow.contrib.losses.python.losses.loss_ops import * from tensorflow.python.util.all_util import make_all +# pylint: enable=unused-import,wildcard-import __all__ = make_all(__name__) diff --git a/tensorflow/contrib/makefile/README.md b/tensorflow/contrib/makefile/README.md index 03a745ad4c..b010a4387b 100644 --- a/tensorflow/contrib/makefile/README.md +++ b/tensorflow/contrib/makefile/README.md @@ -117,7 +117,7 @@ attached Android device: adb push ~/graphs/inception/tensorflow_inception_graph.pb /data/local/tmp/ adb push tensorflow/contrib/makefile/gen/bin/benchmark /data/local/tmp/ adb shell '/data/local/tmp/benchmark \ - --graph=/data/local/tmp/classify_image_graph_def.pb \ + --graph=/data/local/tmp/tensorflow_inception_graph.pb \ --input_layer="input:0" \ --input_layer_shape="1,224,224,3" \ --input_layer_type="float" \ @@ -190,7 +190,7 @@ tensorflow/contrib/makefile/download_dependencies.sh Next, you will need to compile protobufs for iOS: ```bash -compile_ios_protobuf.sh +tensorflow/contrib/makefile/compile_ios_protobuf.sh ``` Then, you can run the makefile specifying iOS as the target, along with the diff --git a/tensorflow/contrib/metrics/__init__.py b/tensorflow/contrib/metrics/__init__.py index 2a23d2689b..8bfa1f97d8 100644 --- a/tensorflow/contrib/metrics/__init__.py +++ b/tensorflow/contrib/metrics/__init__.py @@ -142,6 +142,7 @@ from __future__ import print_function # pylint: disable=unused-import,line-too-long,g-importing-member,wildcard-import from tensorflow.contrib.metrics.python.metrics import * +# pylint: enable=wildcard-import from tensorflow.contrib.metrics.python.ops.confusion_matrix_ops import confusion_matrix from tensorflow.contrib.metrics.python.ops.histogram_ops import auc_using_histogram from tensorflow.contrib.metrics.python.ops.metric_ops import aggregate_metric_map @@ -176,6 +177,6 @@ from tensorflow.contrib.metrics.python.ops.set_ops import set_intersection from tensorflow.contrib.metrics.python.ops.set_ops import set_size from tensorflow.contrib.metrics.python.ops.set_ops import set_union from tensorflow.python.util.all_util import make_all - +# pylint: enable=unused-import,line-too-long __all__ = make_all(__name__) diff --git a/tensorflow/contrib/metrics/python/metrics/__init__.py b/tensorflow/contrib/metrics/python/metrics/__init__.py index d3570bc2a7..beba5b5f7e 100644 --- a/tensorflow/contrib/metrics/python/metrics/__init__.py +++ b/tensorflow/contrib/metrics/python/metrics/__init__.py @@ -20,3 +20,4 @@ from __future__ import print_function # pylint: disable=wildcard-import from tensorflow.contrib.metrics.python.metrics.classification import * +# pylint: enable=wildcard-import diff --git a/tensorflow/contrib/ndlstm/python/__init__.py b/tensorflow/contrib/ndlstm/python/__init__.py index 696fe29ee6..1aa51a6ec4 100644 --- a/tensorflow/contrib/ndlstm/python/__init__.py +++ b/tensorflow/contrib/ndlstm/python/__init__.py @@ -22,3 +22,4 @@ from __future__ import print_function from tensorflow.contrib.ndlstm.python.lstm1d import * from tensorflow.contrib.ndlstm.python.lstm2d import * from tensorflow.contrib.ndlstm.python.misc import * +# pylint: enable=wildcard-import diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py index 11e1eaea67..ec54c9b3c9 100644 --- a/tensorflow/contrib/opt/__init__.py +++ b/tensorflow/contrib/opt/__init__.py @@ -22,3 +22,4 @@ from __future__ import print_function from tensorflow.contrib.opt.python.training.external_optimizer import * from tensorflow.contrib.opt.python.training.moving_average_optimizer import * from tensorflow.contrib.opt.python.training.variable_clipping_optimizer import * +# pylint: enable=wildcard-import diff --git a/tensorflow/contrib/quantization/__init__.py b/tensorflow/contrib/quantization/__init__.py index dcb73399b3..bbc7d91e5a 100644 --- a/tensorflow/contrib/quantization/__init__.py +++ b/tensorflow/contrib/quantization/__init__.py @@ -28,3 +28,4 @@ from tensorflow.python.ops import gen_array_ops as quantized_gen_array_ops from tensorflow.python.ops.gen_array_ops import dequantize from tensorflow.python.ops.gen_array_ops import quantize_v2 from tensorflow.python.ops.gen_array_ops import quantized_concat +# pylint: enable=unused-import,wildcard-import,g-bad-import-order diff --git a/tensorflow/contrib/quantization/python/__init__.py b/tensorflow/contrib/quantization/python/__init__.py index 0cc8cf5025..903a53d841 100644 --- a/tensorflow/contrib/quantization/python/__init__.py +++ b/tensorflow/contrib/quantization/python/__init__.py @@ -22,3 +22,4 @@ from __future__ import print_function from tensorflow.contrib.quantization.python.array_ops import * from tensorflow.contrib.quantization.python.math_ops import * from tensorflow.contrib.quantization.python.nn_ops import * +# pylint: enable=unused-import,wildcard-import diff --git a/tensorflow/contrib/quantization/python/array_ops.py b/tensorflow/contrib/quantization/python/array_ops.py index b873d4df14..98ade5defe 100644 --- a/tensorflow/contrib/quantization/python/array_ops.py +++ b/tensorflow/contrib/quantization/python/array_ops.py @@ -18,8 +18,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -# pylint: disable=unused-import,wildcard-import +# pylint: disable=unused-import from tensorflow.python.ops import gen_array_ops as quantized_gen_array_ops from tensorflow.python.ops.gen_array_ops import dequantize from tensorflow.python.ops.gen_array_ops import quantize_v2 from tensorflow.python.ops.gen_array_ops import quantized_concat +# pylint: enable=unused-import diff --git a/tensorflow/contrib/quantization/python/math_ops.py b/tensorflow/contrib/quantization/python/math_ops.py index d863cdad26..79aa101396 100644 --- a/tensorflow/contrib/quantization/python/math_ops.py +++ b/tensorflow/contrib/quantization/python/math_ops.py @@ -23,3 +23,4 @@ from tensorflow.python.framework import common_shapes from tensorflow.python.framework import ops from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops.gen_math_ops import * +# pylint: enable=unused-import,wildcard-import diff --git a/tensorflow/contrib/quantization/python/nn_ops.py b/tensorflow/contrib/quantization/python/nn_ops.py index fd28423317..501438e4cc 100644 --- a/tensorflow/contrib/quantization/python/nn_ops.py +++ b/tensorflow/contrib/quantization/python/nn_ops.py @@ -23,3 +23,4 @@ from tensorflow.python.framework import common_shapes from tensorflow.python.framework import ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops.gen_nn_ops import * +# pylint: enable=unused-import,wildcard-import diff --git a/tensorflow/contrib/rnn/__init__.py b/tensorflow/contrib/rnn/__init__.py index 5acdaca11d..e89f603b2f 100644 --- a/tensorflow/contrib/rnn/__init__.py +++ b/tensorflow/contrib/rnn/__init__.py @@ -45,3 +45,4 @@ from tensorflow.contrib.rnn.python.ops.gru_ops import * from tensorflow.contrib.rnn.python.ops.lstm_ops import * from tensorflow.contrib.rnn.python.ops.rnn import * from tensorflow.contrib.rnn.python.ops.rnn_cell import * +# pylint: enable=unused-import,wildcard-import,line-too-long diff --git a/tensorflow/contrib/seq2seq/__init__.py b/tensorflow/contrib/seq2seq/__init__.py index 8861cb94d0..e67e4a7ca9 100644 --- a/tensorflow/contrib/seq2seq/__init__.py +++ b/tensorflow/contrib/seq2seq/__init__.py @@ -21,6 +21,7 @@ from __future__ import print_function import sys -# pylint: disable=unused-import,wildcard-import,line-too-long +# pylint: disable=unused-import,line-too-long from tensorflow.contrib.seq2seq.python.ops import layers from tensorflow.contrib.seq2seq.python.ops import loss +# pylint: enable=unused-import,line-too-long diff --git a/tensorflow/contrib/specs/python/__init__.py b/tensorflow/contrib/specs/python/__init__.py index 1e063e5d31..52db61e421 100644 --- a/tensorflow/contrib/specs/python/__init__.py +++ b/tensorflow/contrib/specs/python/__init__.py @@ -24,3 +24,4 @@ from tensorflow.contrib.specs.python.specs import * from tensorflow.contrib.specs.python.specs_lib import * from tensorflow.contrib.specs.python.specs_ops import * from tensorflow.contrib.specs.python.summaries import * +# pylint: enable=wildcard-import diff --git a/tensorflow/contrib/tensor_forest/__init__.py b/tensorflow/contrib/tensor_forest/__init__.py index 7d97e01df0..06448eed07 100644 --- a/tensorflow/contrib/tensor_forest/__init__.py +++ b/tensorflow/contrib/tensor_forest/__init__.py @@ -21,3 +21,4 @@ from __future__ import print_function from tensorflow.contrib.tensor_forest.client import * from tensorflow.contrib.tensor_forest.data import * from tensorflow.contrib.tensor_forest.python import * +# pylint: enable=unused-import,wildcard-import diff --git a/tensorflow/contrib/tensor_forest/client/__init__.py b/tensorflow/contrib/tensor_forest/client/__init__.py index 753f406cbc..1a0c87c4cc 100644 --- a/tensorflow/contrib/tensor_forest/client/__init__.py +++ b/tensorflow/contrib/tensor_forest/client/__init__.py @@ -17,5 +17,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -# pylint: disable=unused-import,wildcard-import +# pylint: disable=unused-import from tensorflow.contrib.tensor_forest.client import eval_metrics +# pylint: enable=unused-import diff --git a/tensorflow/contrib/tensor_forest/data/__init__.py b/tensorflow/contrib/tensor_forest/data/__init__.py index 3d04705878..23b9993c5b 100644 --- a/tensorflow/contrib/tensor_forest/data/__init__.py +++ b/tensorflow/contrib/tensor_forest/data/__init__.py @@ -17,5 +17,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -# pylint: disable=unused-import,wildcard-import +# pylint: disable=unused-import from tensorflow.contrib.tensor_forest.data import data_ops +# pylint: enable=unused-import diff --git a/tensorflow/contrib/tensor_forest/hybrid/__init__.py b/tensorflow/contrib/tensor_forest/hybrid/__init__.py index 0c8bf75e5a..2a65ab2f13 100644 --- a/tensorflow/contrib/tensor_forest/hybrid/__init__.py +++ b/tensorflow/contrib/tensor_forest/hybrid/__init__.py @@ -19,3 +19,4 @@ from __future__ import print_function # pylint: disable=unused-import,wildcard-import from tensorflow.contrib.tensor_forest.hybrid.python import * +# pylint: enable=unused-import,wildcard-import diff --git a/tensorflow/contrib/testing/__init__.py b/tensorflow/contrib/testing/__init__.py index be382665b7..a01e7c190a 100644 --- a/tensorflow/contrib/testing/__init__.py +++ b/tensorflow/contrib/testing/__init__.py @@ -21,3 +21,4 @@ from __future__ import print_function # pylint: disable=unused-import,wildcard-import from tensorflow.contrib.testing.python.framework.fake_summary_writer import * from tensorflow.contrib.testing.python.framework.util_test import * +# pylint: enable=unused-import,wildcard-import diff --git a/tensorflow/contrib/training/__init__.py b/tensorflow/contrib/training/__init__.py index 6d3b46ec26..02b9b166ba 100644 --- a/tensorflow/contrib/training/__init__.py +++ b/tensorflow/contrib/training/__init__.py @@ -77,5 +77,6 @@ from tensorflow.contrib.training.python.training.training import create_train_op from tensorflow.contrib.training.python.training.training import multiply_gradients from tensorflow.contrib.training.python.training.training import train from tensorflow.python.util.all_util import make_all +# pylint: enable=unused-import,wildcard-import __all__ = make_all(__name__) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 46b72d20d3..9485a2819e 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -512,6 +512,7 @@ cc_library( deps = [ ":core_cpu", ":gpu_runtime", + ":sycl_runtime", ], ) @@ -1389,6 +1390,33 @@ tf_cuda_library( alwayslink = 1, ) +cc_library( + name = "sycl_runtime", + srcs = if_not_windows([ + "common_runtime/sycl/sycl_device.cc", + "common_runtime/sycl/sycl_device_context.cc", + "common_runtime/sycl/sycl_device_factory.cc", + ]), + hdrs = if_not_windows([ + "common_runtime/sycl/sycl_device.h", + "common_runtime/sycl/sycl_device_context.h", + ]), + copts = tf_copts(), + linkstatic = 1, + deps = [ + ":core_cpu", + ":core_cpu_internal", + ":framework", + ":framework_internal", + ":lib", + ":lib_internal", + ":protos_all_cc", + "//third_party/eigen3", + #"@local_config_sycl//sycl:sycl", + ], + alwayslink = 1, +) + # ----------------------------------------------------------------------------- # Tests diff --git a/tensorflow/core/common_runtime/device_set_test.cc b/tensorflow/core/common_runtime/device_set_test.cc index 21d3be2f61..550fbf568e 100644 --- a/tensorflow/core/common_runtime/device_set_test.cc +++ b/tensorflow/core/common_runtime/device_set_test.cc @@ -68,12 +68,17 @@ TEST_F(DeviceSetTest, PrioritizedDeviceTypeList) { (std::vector{DeviceType(DEVICE_GPU), DeviceType(DEVICE_CPU)}), types()); + AddDevice("SYCL", "/job:a/replica:0/task:0/device:sycl:0"); + EXPECT_EQ( + (std::vector{DeviceType(DEVICE_SYCL), DeviceType(DEVICE_GPU), + DeviceType(DEVICE_CPU)}), types()); + AddDevice("T1", "/job:a/replica:0/task:0/device:T1:0"); AddDevice("T1", "/job:a/replica:0/task:0/device:T1:1"); AddDevice("T2", "/job:a/replica:0/task:0/device:T2:0"); EXPECT_EQ( - (std::vector{DeviceType("T1"), DeviceType("T2"), - DeviceType(DEVICE_GPU), DeviceType(DEVICE_CPU)}), + (std::vector{DeviceType(DEVICE_SYCL), DeviceType("T1"), + DeviceType("T2"), DeviceType(DEVICE_GPU), DeviceType(DEVICE_CPU)}), types()); } diff --git a/tensorflow/core/common_runtime/direct_session_test.cc b/tensorflow/core/common_runtime/direct_session_test.cc index d82cf01fb8..44f17d6260 100644 --- a/tensorflow/core/common_runtime/direct_session_test.cc +++ b/tensorflow/core/common_runtime/direct_session_test.cc @@ -818,6 +818,8 @@ class BlockingOp : public OpKernel { REGISTER_KERNEL_BUILDER(Name("BlockingOp").Device(DEVICE_CPU), BlockingOp); REGISTER_OP("BlockingOp").Input("x: float").Output("y: float").Doc(""); +REGISTER_KERNEL_BUILDER(Name("BlockingOp").Device(DEVICE_SYCL), BlockingOp); + static void TestSessionInterOpThreadsImpl(bool use_function_lib) { FunctionDefLibrary library_graph_def; if (use_function_lib) { diff --git a/tensorflow/core/common_runtime/sycl/sycl_device.cc b/tensorflow/core/common_runtime/sycl/sycl_device.cc new file mode 100644 index 0000000000..ae5b5fbb58 --- /dev/null +++ b/tensorflow/core/common_runtime/sycl/sycl_device.cc @@ -0,0 +1,88 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#if TENSORFLOW_USE_SYCL + +#include "tensorflow/core/common_runtime/sycl/sycl_device.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +#include "tensorflow/core/framework/tensor.pb_text.h" +#include "tensorflow/core/platform/tracing.h" + +namespace tensorflow { + +cl::sycl::gpu_selector s; +cl::sycl::queue q(s); + +SYCLDevice::SYCLDevice(const SessionOptions& options, const string& name, + Bytes memory_limit, const DeviceLocality& locality, + const string& physical_device_desc, Allocator* allocator) + : LocalDevice(options, + Device::BuildDeviceAttributes(name, DEVICE_SYCL, memory_limit, + locality, physical_device_desc), + allocator), + allocator_(allocator), + device_context_(new SYCLDeviceContext()), + device_(q) { + set_eigen_sycl_device(&device_); +} + +SYCLDevice::~SYCLDevice() { + device_context_->Unref(); +} + +void SYCLDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) { + assert(context); + if (port::Tracing::IsActive()) { + // TODO(pbar) We really need a useful identifier of the graph node. + const uint64 id = Hash64(op_kernel->name()); + port::Tracing::ScopedActivity region(port::Tracing::EventCategory::kCompute, + id); + } + op_kernel->Compute(context); +} + +Allocator* SYCLDevice::GetAllocator(AllocatorAttributes attr) { + return allocator_; +} + +Status SYCLDevice::MakeTensorFromProto(const TensorProto& tensor_proto, + const AllocatorAttributes alloc_attrs, + Tensor* tensor) { + Tensor parsed(tensor_proto.dtype()); + if (!parsed.FromProto(cpu_allocator(), tensor_proto)) { + return errors::InvalidArgument("Cannot parse tensor from proto: ", + ProtoDebugString(tensor_proto)); + } + *tensor = std::move(parsed); + return Status::OK(); +} + +Status SYCLDevice::FillContextMap(const Graph* graph, + DeviceContextMap* device_context_map) { + // Fill in the context map. It is OK for this map to contain + // duplicate DeviceContexts so long as we increment the refcount. + device_context_map->resize(graph->num_node_ids()); + for (Node* n : graph->nodes()) { + device_context_->Ref(); + (*device_context_map)[n->id()] = device_context_; + } + + return Status::OK(); +} + +} // namespace tensorflow + +#endif // TENSORFLOW_USE_SYCL diff --git a/tensorflow/core/common_runtime/sycl/sycl_device.h b/tensorflow/core/common_runtime/sycl/sycl_device.h new file mode 100644 index 0000000000..eaa9429b16 --- /dev/null +++ b/tensorflow/core/common_runtime/sycl/sycl_device.h @@ -0,0 +1,62 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#if !TENSORFLOW_USE_SYCL +#error This file must only be included when building TensorFlow with SYCL support +#endif + +#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_ +#define TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_ + +#define EIGEN_USE_SYCL + +#include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/common_runtime/local_device.h" +#include "tensorflow/core/common_runtime/sycl/sycl_device_context.h" +#include "tensorflow/core/public/session_options.h" + +namespace tensorflow { + +class SYCLDevice : public LocalDevice { + public: + SYCLDevice(const SessionOptions& options, const string& name, + Bytes memory_limit, const DeviceLocality& locality, + const string& physical_device_desc, Allocator* allocator); + ~SYCLDevice() override; + + void Compute(OpKernel* op_kernel, OpKernelContext* context) override; + Allocator* GetAllocator(AllocatorAttributes attr) override; + Status MakeTensorFromProto(const TensorProto& tensor_proto, + const AllocatorAttributes alloc_attrs, + Tensor* tensor) override; + + Status FillContextMap(const Graph* graph, + DeviceContextMap* device_context_map) override; + + Status Sync() override { return Status::OK(); } + static string GetShortDeviceDescription(/*int device_id, + const DeviceDescription& desc*/) { + return strings::StrCat("device: 0, name SYCL, pci bus id: 0"); + } + + private: + Allocator* allocator_; // Not owned + SYCLDeviceContext* device_context_; + Eigen::SyclDevice device_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_ diff --git a/tensorflow/core/common_runtime/sycl/sycl_device_context.cc b/tensorflow/core/common_runtime/sycl/sycl_device_context.cc new file mode 100644 index 0000000000..bbf241a22f --- /dev/null +++ b/tensorflow/core/common_runtime/sycl/sycl_device_context.cc @@ -0,0 +1,46 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/common_runtime/sycl/sycl_device_context.h" +#include "tensorflow/core/common_runtime/dma_helper.h" + +namespace tensorflow { + +void SYCLDeviceContext::CopyCPUTensorToDevice(const Tensor* cpu_tensor, Device* device, + Tensor* device_tensor, + StatusCallback done) const { + const int64 total_bytes = cpu_tensor->TotalBytes(); + if (total_bytes > 0) { + const void* src_ptr = DMAHelper::base(cpu_tensor); + void* dst_ptr = DMAHelper::base(device_tensor); + ::memcpy(dst_ptr, src_ptr, total_bytes); + } + done(Status::OK()); +} + +void SYCLDeviceContext::CopyDeviceTensorToCPU(const Tensor* device_tensor, StringPiece edge_name, + Device* device, Tensor* cpu_tensor, + StatusCallback done) { + const int64 total_bytes = device_tensor->TotalBytes(); + if (total_bytes > 0) { + const void* src_ptr = DMAHelper::base(device_tensor); + void* dst_ptr = DMAHelper::base(cpu_tensor); + ::memcpy(dst_ptr, src_ptr, total_bytes); + } + done(Status::OK()); +} + +} // namespace tensorflow + diff --git a/tensorflow/core/common_runtime/sycl/sycl_device_context.h b/tensorflow/core/common_runtime/sycl/sycl_device_context.h new file mode 100644 index 0000000000..327de52eaa --- /dev/null +++ b/tensorflow/core/common_runtime/sycl/sycl_device_context.h @@ -0,0 +1,42 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_ +#define TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_ + +#include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/framework/device_base.h" + +namespace tensorflow { + +class SYCLDeviceContext : public DeviceContext { + public: + SYCLDeviceContext() {} + + ~SYCLDeviceContext() override {} + + void CopyCPUTensorToDevice(const Tensor* cpu_tensor, Device* device, + Tensor* device_tensor, + StatusCallback done) const override; + + void CopyDeviceTensorToCPU(const Tensor* device_tensor, StringPiece edge_name, + Device* device, Tensor* cpu_tensor, + StatusCallback done) override; + +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_ diff --git a/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc b/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc new file mode 100644 index 0000000000..97c4c2c236 --- /dev/null +++ b/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc @@ -0,0 +1,44 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#if TENSORFLOW_USE_SYCL + +#include "tensorflow/core/common_runtime/sycl/sycl_device.h" + +namespace tensorflow { + +class SYCLDeviceFactory : public DeviceFactory { + public: + Status CreateDevices(const SessionOptions& options, const string& name_prefix, + std::vector* devices) override { + int n = 1; + auto iter = options.config.device_count().find("SYCL"); + if (iter != options.config.device_count().end()) { + n = iter->second; + } + for (int i = 0; i < n; i++) { + string name = strings::StrCat(name_prefix, "/device:SYCL:", i); + devices->push_back(new SYCLDevice( + options, name, Bytes(256 << 20), DeviceLocality(), + SYCLDevice::GetShortDeviceDescription(), cpu_allocator())); + } + return Status::OK(); + } +}; + +REGISTER_LOCAL_DEVICE_FACTORY("SYCL", SYCLDeviceFactory); +} + +#endif // TENSORFLOW_USE_SYCL diff --git a/tensorflow/core/framework/device_base.h b/tensorflow/core/framework/device_base.h index 8f0075dcd6..6edbda1276 100644 --- a/tensorflow/core/framework/device_base.h +++ b/tensorflow/core/framework/device_base.h @@ -30,6 +30,9 @@ limitations under the License. namespace Eigen { struct ThreadPoolDevice; +#ifdef TENSORFLOW_USE_SYCL +struct SyclDevice; +#endif } // end namespace Eigen namespace perftools { @@ -145,6 +148,10 @@ class DeviceBase { eigen_cpu_device_ = d; } +#ifdef TENSORFLOW_USE_SYCL + void set_eigen_sycl_device(Eigen::SyclDevice* d) { eigen_sycl_device_ = d; } +#endif + // Return the Allocator implementation to use based on the allocator // attributes requested. See allocator.h for more details. virtual Allocator* GetAllocator(AllocatorAttributes /*attr*/) { @@ -167,6 +174,13 @@ class DeviceBase { return eigen_cpu_device_; } +#ifdef TENSORFLOW_USE_SYCL + const Eigen::SyclDevice* eigen_sycl_device() const { + CHECK(eigen_sycl_device_ != nullptr); + return eigen_sycl_device_; + } +#endif + // Caller owns the return value. The OpKernelContext calls this even // for devices that do not implement an eigen_gpu_device. Overridden // by GPU devices to return a derived type. @@ -203,6 +217,9 @@ class DeviceBase { CpuWorkerThreads* cpu_worker_threads_ = nullptr; GpuDeviceInfo* gpu_device_info_ = nullptr; Eigen::ThreadPoolDevice* eigen_cpu_device_ = nullptr; +#ifdef TENSORFLOW_USE_SYCL + Eigen::SyclDevice* eigen_sycl_device_ = nullptr; +#endif }; } // namespace tensorflow diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc index a7a93cb69c..50520bb3fd 100644 --- a/tensorflow/core/framework/op_kernel.cc +++ b/tensorflow/core/framework/op_kernel.cc @@ -949,6 +949,13 @@ const Eigen::GpuDevice& OpKernelContext::eigen_device() const { return eigen_gpu_device(); } +#ifdef TENSORFLOW_USE_SYCL +template <> +const Eigen::SyclDevice& OpKernelContext::eigen_device() const { + return eigen_sycl_device(); +} +#endif + void OpKernelConstruction::CtxFailure(Status s) { VLOG(1) << s; SetStatus(s); diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h index 145b8c5315..4a66d43e50 100644 --- a/tensorflow/core/framework/op_kernel.h +++ b/tensorflow/core/framework/op_kernel.h @@ -53,6 +53,7 @@ limitations under the License. namespace Eigen { struct ThreadPoolDevice; struct GpuDevice; +struct SyclDevice; } // end namespace Eigen namespace tensorflow { @@ -891,6 +892,11 @@ class OpKernelContext { const Eigen::GpuDevice& eigen_gpu_device() const { return params_->eigen_gpu_device->device(); } +#ifdef TENSORFLOW_USE_SYCL + const Eigen::SyclDevice& eigen_sycl_device() const { + return *device()->eigen_sycl_device(); + } +#endif template const EigenDeviceType& eigen_device() const; diff --git a/tensorflow/core/framework/tensor.cc b/tensorflow/core/framework/tensor.cc index ce4ae2d3d3..05db5a32b5 100644 --- a/tensorflow/core/framework/tensor.cc +++ b/tensorflow/core/framework/tensor.cc @@ -721,14 +721,58 @@ bool Tensor::CanUseDMA() const { #undef CASE namespace { +// Print from left dim to right dim recursively. template -string SummarizeArray(int64 limit, int64 num_elts, const char* data) { +void PrintOneDim(int dim_index, gtl::InlinedVector shape, int64 limit, + int shape_size, T* data, int64* data_index, string* result) { + if (*data_index >= limit) return; + int64 element_count = shape[dim_index]; + // We have reached the right-most dimension of the tensor. + if (dim_index == shape_size - 1) { + for (int64 i = 0; i < element_count; i++) { + if (*data_index >= limit) return; + if (i > 0) strings::StrAppend(result, " "); + strings::StrAppend(result, data[(*data_index)++]); + } + return; + } + // Loop every element of one dim. + for (int64 i = 0; i < element_count; i++) { + bool flag = false; + if (*data_index < limit) { + strings::StrAppend(result, "["); + flag = true; + } + // As for each element, print the sub-dim. + PrintOneDim(dim_index + 1, shape, limit, shape_size, + data, data_index, result); + if (*data_index < limit || flag) { + strings::StrAppend(result, "]"); + flag = false; + } + } +} + +template +string SummarizeArray(int64 limit, int64 num_elts, const TensorShape& tensor_shape, + const char* data) { string ret; const T* array = reinterpret_cast(data); - for (int64 i = 0; i < limit; ++i) { - if (i > 0) strings::StrAppend(&ret, " "); - strings::StrAppend(&ret, array[i]); + + const gtl::InlinedVector shape = tensor_shape.dim_sizes(); + if(shape.empty()) { + for (int64 i = 0; i < limit; ++i) { + if (i > 0) strings::StrAppend(&ret, " "); + strings::StrAppend(&ret, array[i]); + } + if (num_elts > limit) strings::StrAppend(&ret, "..."); + return ret; } + int64 data_index = 0; + const int shape_size = tensor_shape.dims(); + PrintOneDim(0, shape, limit, shape_size, + array, &data_index, &ret); + if (num_elts > limit) strings::StrAppend(&ret, "..."); return ret; } @@ -744,40 +788,40 @@ string Tensor::SummarizeValue(int64 max_entries) const { const char* data = limit > 0 ? tensor_data().data() : nullptr; switch (dtype()) { case DT_HALF: - return SummarizeArray(limit, num_elts, data); + return SummarizeArray(limit, num_elts, shape_, data); break; case DT_FLOAT: - return SummarizeArray(limit, num_elts, data); + return SummarizeArray(limit, num_elts, shape_, data); break; case DT_DOUBLE: - return SummarizeArray(limit, num_elts, data); + return SummarizeArray(limit, num_elts, shape_, data); break; case DT_INT32: - return SummarizeArray(limit, num_elts, data); + return SummarizeArray(limit, num_elts, shape_, data); break; case DT_UINT8: case DT_QUINT8: - return SummarizeArray(limit, num_elts, data); + return SummarizeArray(limit, num_elts, shape_, data); break; case DT_UINT16: case DT_QUINT16: - return SummarizeArray(limit, num_elts, data); + return SummarizeArray(limit, num_elts, shape_, data); break; case DT_INT16: case DT_QINT16: - return SummarizeArray(limit, num_elts, data); + return SummarizeArray(limit, num_elts, shape_, data); break; case DT_INT8: case DT_QINT8: - return SummarizeArray(limit, num_elts, data); + return SummarizeArray(limit, num_elts, shape_, data); break; case DT_INT64: - return SummarizeArray(limit, num_elts, data); + return SummarizeArray(limit, num_elts, shape_, data); break; case DT_BOOL: // TODO(tucker): Is it better to emit "True False..."? This // will emit "1 0..." which is more compact. - return SummarizeArray(limit, num_elts, data); + return SummarizeArray(limit, num_elts, shape_, data); break; default: { // All irregular cases diff --git a/tensorflow/core/framework/tensor_test.cc b/tensorflow/core/framework/tensor_test.cc index 4d8d378b84..3c3b457e74 100644 --- a/tensorflow/core/framework/tensor_test.cc +++ b/tensorflow/core/framework/tensor_test.cc @@ -834,20 +834,24 @@ TEST(SummarizeValue, INT32) { Tensor x = MkTensor(DT_INT32, TensorShape({5}), {1, 2, 3, 4, 0}); EXPECT_EQ("1 2 3 4 0", x.SummarizeValue(16)); x = MkTensor(DT_INT32, TensorShape({2, 2}), {1, 2, 3, 4, 0}); - EXPECT_EQ("1 2 3 4", x.SummarizeValue(16)); + EXPECT_EQ("[1 2][3 4]", x.SummarizeValue(16)); x = MkTensor(DT_INT32, TensorShape({2, 2, 1, 1}), {1, 2, 3, 4, 0}); - EXPECT_EQ("1 2 3 4", x.SummarizeValue(16)); - EXPECT_EQ("1 2 3...", x.SummarizeValue(3)); + EXPECT_EQ("[[[1]][[2]]][[[3]][[4]]]", x.SummarizeValue(16)); + EXPECT_EQ("[[[1]][[2]]][[[3]]]...", x.SummarizeValue(3)); + x = MkTensor(DT_INT32, TensorShape({0}), {}); + EXPECT_EQ("", x.SummarizeValue(16)); } TEST(SummarizeValue, FLOAT) { Tensor x = MkTensor(DT_FLOAT, TensorShape({5}), {1, 2, 3, 4, 0}); EXPECT_EQ("1 2 3 4 0", x.SummarizeValue(16)); x = MkTensor(DT_FLOAT, TensorShape({2, 2}), {1, 2, 3, 4, 0}); - EXPECT_EQ("1 2 3 4", x.SummarizeValue(16)); + EXPECT_EQ("[1 2][3 4]", x.SummarizeValue(16)); x = MkTensor(DT_FLOAT, TensorShape({2, 2, 1, 1}), {1, 2, 3, 4, 0}); - EXPECT_EQ("1 2 3 4", x.SummarizeValue(16)); - EXPECT_EQ("1 2 3...", x.SummarizeValue(3)); + EXPECT_EQ("[[[1]][[2]]][[[3]][[4]]]", x.SummarizeValue(16)); + EXPECT_EQ("[[[1]][[2]]][[[3]]]...", x.SummarizeValue(3)); + x = MkTensor(DT_FLOAT, TensorShape({0}), {}); + EXPECT_EQ("", x.SummarizeValue(16)); } TEST(SummarizeValue, BOOL) { diff --git a/tensorflow/core/framework/types.cc b/tensorflow/core/framework/types.cc index 62d6610d2e..4ddd14bb20 100644 --- a/tensorflow/core/framework/types.cc +++ b/tensorflow/core/framework/types.cc @@ -37,6 +37,7 @@ std::ostream& operator<<(std::ostream& os, const DeviceType& d) { const char* const DEVICE_CPU = "CPU"; const char* const DEVICE_GPU = "GPU"; +const char* const DEVICE_SYCL = "SYCL"; string DataTypeString(DataType dtype) { if (IsRefType(dtype)) { diff --git a/tensorflow/core/framework/types.h b/tensorflow/core/framework/types.h index 7d4ed9a004..589730baf1 100644 --- a/tensorflow/core/framework/types.h +++ b/tensorflow/core/framework/types.h @@ -68,8 +68,9 @@ class DeviceType { std::ostream& operator<<(std::ostream& os, const DeviceType& d); // Convenient constants that can be passed to a DeviceType constructor -extern const char* const DEVICE_CPU; // "CPU" -extern const char* const DEVICE_GPU; // "GPU" +extern const char* const DEVICE_CPU; // "CPU" +extern const char* const DEVICE_GPU; // "GPU" +extern const char* const DEVICE_SYCL; // "SYCL" typedef gtl::InlinedVector MemoryTypeVector; typedef gtl::ArraySlice MemoryTypeSlice; diff --git a/tensorflow/core/framework/types_test.cc b/tensorflow/core/framework/types_test.cc index 18e0ef9c39..bc57740469 100644 --- a/tensorflow/core/framework/types_test.cc +++ b/tensorflow/core/framework/types_test.cc @@ -25,6 +25,7 @@ namespace { TEST(TypesTest, DeviceTypeName) { EXPECT_EQ("CPU", DeviceTypeString(DeviceType(DEVICE_CPU))); EXPECT_EQ("GPU", DeviceTypeString(DeviceType(DEVICE_GPU))); + EXPECT_EQ("SYCL", DeviceTypeString(DeviceType(DEVICE_SYCL))); } TEST(TypesTest, kDataTypeRefOffset) { diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 6afcba1e24..7b61ea26a3 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -2164,14 +2164,17 @@ tf_kernel_libraries( "reduce_join_op", "string_join_op", "string_split_op", + "substr_op", "as_string_op", "base64_ops", ], deps = [ + ":bounds_check", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:string_ops_op_lib", + "//third_party/eigen3", ], ) diff --git a/tensorflow/core/kernels/constant_op.cc b/tensorflow/core/kernels/constant_op.cc index dba37ca396..4a289e1800 100644 --- a/tensorflow/core/kernels/constant_op.cc +++ b/tensorflow/core/kernels/constant_op.cc @@ -51,6 +51,17 @@ ConstantOp::~ConstantOp() {} REGISTER_KERNEL_BUILDER(Name("Const").Device(DEVICE_CPU), ConstantOp); +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Const") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint("dtype"), \ + ConstantOp); +TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL); +#undef REGISTER_SYCL_KERNEL +#endif + #if GOOGLE_CUDA #define REGISTER_KERNEL(D, TYPE) \ REGISTER_KERNEL_BUILDER( \ diff --git a/tensorflow/core/kernels/cwise_op_round.cc b/tensorflow/core/kernels/cwise_op_round.cc index 0457f3931d..7a4482dbb2 100644 --- a/tensorflow/core/kernels/cwise_op_round.cc +++ b/tensorflow/core/kernels/cwise_op_round.cc @@ -18,6 +18,14 @@ limitations under the License. namespace tensorflow { REGISTER5(UnaryOp, CPU, "Round", functor::round, Eigen::half, float, double, int32, int64); + +#ifdef TENSORFLOW_USE_SYCL +REGISTER(UnaryOp, SYCL, "Round", functor::round, float); +namespace functor { +DEFINE_UNARY1(round, float); +} // namespace functor +#endif + #if GOOGLE_CUDA REGISTER5(UnaryOp, GPU, "Round", functor::round, Eigen::half, float, double, int32, int64); diff --git a/tensorflow/core/kernels/cwise_ops_common.h b/tensorflow/core/kernels/cwise_ops_common.h index 5ad6b1fd4a..c825a91fb1 100644 --- a/tensorflow/core/kernels/cwise_ops_common.h +++ b/tensorflow/core/kernels/cwise_ops_common.h @@ -20,6 +20,10 @@ limitations under the License. #define EIGEN_USE_THREADS +#ifdef TENSORFLOW_USE_SYCL +#include "tensorflow/core/kernels/cwise_ops_sycl_common.h" +#endif + #include "tensorflow/core/kernels/cwise_ops.h" #include "tensorflow/core/kernels/cwise_ops_gradients.h" @@ -33,6 +37,9 @@ namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; +#ifdef TENSORFLOW_USE_SYCL +typedef Eigen::SyclDevice SYCLDevice; +#endif class BinaryOpShared : public OpKernel { public: @@ -96,45 +103,45 @@ class BinaryOp : public BinaryOpShared { if (state.in1_num_elements == 1) { // tensor op scalar functor::BinaryFunctor().Right( - eigen_device, out_flat, in0.flat(), in1.scalar(), - error_ptr); + eigen_device, out_flat, in0.template flat(), + in1.template scalar(), error_ptr); } else if (state.in0_num_elements == 1) { // scalar op tensor functor::BinaryFunctor().Left( - eigen_device, out_flat, in0.scalar(), in1.flat(), - error_ptr); + eigen_device, out_flat, in0.template scalar(), + in1.template flat(), error_ptr); } else { functor::BinaryFunctor()( - eigen_device, out_flat, in0.flat(), in1.flat(), - error_ptr); + eigen_device, out_flat, in0.template flat(), + in1.template flat(), error_ptr); } } else if (ndims == 2) { functor::BinaryFunctor().BCast( eigen_device, out->shaped(bcast->result_shape()), - in0.shaped(bcast->x_reshape()), + in0.template shaped(bcast->x_reshape()), BCast::ToIndexArray<2>(bcast->x_bcast()), - in1.shaped(bcast->y_reshape()), + in1.template shaped(bcast->y_reshape()), BCast::ToIndexArray<2>(bcast->y_bcast()), error_ptr); } else if (ndims == 3) { functor::BinaryFunctor().BCast( eigen_device, out->shaped(bcast->result_shape()), - in0.shaped(bcast->x_reshape()), + in0.template shaped(bcast->x_reshape()), BCast::ToIndexArray<3>(bcast->x_bcast()), - in1.shaped(bcast->y_reshape()), + in1.template shaped(bcast->y_reshape()), BCast::ToIndexArray<3>(bcast->y_bcast()), error_ptr); } else if (ndims == 4) { functor::BinaryFunctor().BCast( eigen_device, out->shaped(bcast->result_shape()), - in0.shaped(bcast->x_reshape()), + in0.template shaped(bcast->x_reshape()), BCast::ToIndexArray<4>(bcast->x_bcast()), - in1.shaped(bcast->y_reshape()), + in1.template shaped(bcast->y_reshape()), BCast::ToIndexArray<4>(bcast->y_bcast()), error_ptr); } else if (ndims == 5) { functor::BinaryFunctor().BCast( eigen_device, out->shaped(bcast->result_shape()), - in0.shaped(bcast->x_reshape()), + in0.template shaped(bcast->x_reshape()), BCast::ToIndexArray<5>(bcast->x_bcast()), - in1.shaped(bcast->y_reshape()), + in1.template shaped(bcast->y_reshape()), BCast::ToIndexArray<5>(bcast->y_bcast()), error_ptr); } else { SetUnimplementedError(ctx); diff --git a/tensorflow/core/kernels/cwise_ops_sycl_common.h b/tensorflow/core/kernels/cwise_ops_sycl_common.h new file mode 100644 index 0000000000..c66ae42c2d --- /dev/null +++ b/tensorflow/core/kernels/cwise_ops_sycl_common.h @@ -0,0 +1,138 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#if !TENSORFLOW_USE_SYCL +#error This file must only be included when building TensorFlow with SYCL support +#endif + +#ifndef TENSORFLOW_CORE_KERNELS_CWISE_OPS_SYCL_COMMON_H_ +#define TENSORFLOW_CORE_KERNELS_CWISE_OPS_SYCL_COMMON_H_ + +#define EIGEN_USE_SYCL + +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/kernels/cwise_ops.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { +namespace functor { + +typedef Eigen::SyclDevice SYCLDevice; + +template +void Assign(const SYCLDevice& d, OUT out, RHS rhs) { + out.device(d) = rhs; +} + +// Partial specialization of UnaryFunctor. +template +struct UnaryFunctor { + void operator()(const SYCLDevice& d, typename Functor::tout_type out, + typename Functor::tin_type in) { + To32Bit(out).device(d) = To32Bit(in).unaryExpr(typename Functor::func()); + } +}; + +// Partial specialization of BinaryFunctor. +template +struct BinaryFunctor { + void operator()(const SYCLDevice& d, typename Functor::tout_type out, + typename Functor::tin_type in0, + typename Functor::tin_type in1, bool* error) { + Assign(d, out, in0.binaryExpr(in1, typename Functor::func())); + } + + void Left(const SYCLDevice& d, typename Functor::tout_type out, + typename Functor::tscalar_type scalar, + typename Functor::tin_type in, bool* error) { + LOG(FATAL) << "BinaryFunctor::Left NOT IMPLEMENTED ! "; + } + + void Right(const SYCLDevice& d, typename Functor::tout_type out, + typename Functor::tin_type in, + typename Functor::tscalar_type scalar, bool* error) { + typedef typename Functor::out_type Tout; + typedef typename Functor::in_type Tin; + typedef typename Functor::func Binary; + typedef typename Eigen::internal::scalar_right Unary; + Assign(d, out, in.unaryExpr(Unary(scalar.data()))); + } + + void BCast(const SYCLDevice& d, + typename TTypes::Tensor out, + typename TTypes::ConstTensor in0, + typename Eigen::array bcast0, + typename TTypes::ConstTensor in1, + typename Eigen::array bcast1, + bool* error) { + LOG(FATAL) << "BinaryFunctor::BCast NOT IMPLEMENTED "; + } +}; + +// Macros to explicitly instantiate kernels on GPU for multiple types +// (T0, T1, etc.) for UnaryFunctor (e.g., functor::sqrt). +#define DEFINE_UNARY1(F, T) template struct UnaryFunctor > +#define DEFINE_UNARY2(F, T0, T1) \ + DEFINE_UNARY1(F, T0); \ + DEFINE_UNARY1(F, T1) +#define DEFINE_UNARY3(F, T0, T1, T2) \ + DEFINE_UNARY2(F, T0, T1); \ + DEFINE_UNARY1(F, T2) +#define DEFINE_UNARY4(F, T0, T1, T2, T3) \ + DEFINE_UNARY2(F, T0, T1); \ + DEFINE_UNARY2(F, T2, T3) +#define DEFINE_UNARY5(F, T0, T1, T2, T3, T4) \ + DEFINE_UNARY2(F, T0, T1); \ + DEFINE_UNARY3(F, T2, T3, T4) + +// Macros to explicitly instantiate kernels on GPU for multiple types +// (T0, T1, etc.) for BinaryFunctor. +#define DEFINE_BINARY1(F, T) \ + template struct BinaryFunctor, 1>; \ + template struct BinaryFunctor, 2>; \ + template struct BinaryFunctor, 3> +#define DEFINE_BINARY2(F, T0, T1) \ + DEFINE_BINARY1(F, T0); \ + DEFINE_BINARY1(F, T1) +#define DEFINE_BINARY3(F, T0, T1, T2) \ + DEFINE_BINARY2(F, T0, T1); \ + DEFINE_BINARY1(F, T2) +#define DEFINE_BINARY4(F, T0, T1, T2, T3) \ + DEFINE_BINARY2(F, T0, T1); \ + DEFINE_BINARY2(F, T2, T3) +#define DEFINE_BINARY5(F, T0, T1, T2, T3, T4) \ + DEFINE_BINARY2(F, T0, T1); \ + DEFINE_BINARY3(F, T2, T3, T4) +#define DEFINE_BINARY6(F, T0, T1, T2, T3, T4, T5) \ + DEFINE_BINARY3(F, T0, T1, T2); \ + DEFINE_BINARY3(F, T3, T4, T5) +#define DEFINE_BINARY7(F, T0, T1, T2, T3, T4, T5, T6) \ + DEFINE_BINARY3(F, T0, T1, T2); \ + DEFINE_BINARY4(F, T3, T4, T5, T6) +#define DEFINE_BINARY8(F, T0, T1, T2, T3, T4, T5, T6, T7) \ + DEFINE_BINARY4(F, T0, T1, T2, T3); \ + DEFINE_BINARY4(F, T4, T5, T6, T7) +#define DEFINE_BINARY9(F, T0, T1, T2, T3, T4, T5, T6, T7, T8) \ + DEFINE_BINARY4(F, T0, T1, T2, T3); \ + DEFINE_BINARY5(F, T4, T5, T6, T7, T8) +#define DEFINE_BINARY10(F, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9) \ + DEFINE_BINARY5(F, T0, T1, T2, T3, T4); \ + DEFINE_BINARY5(F, T5, T6, T7, T8, T9) + +} // end namespace functor +} // end namespace tensorflow + +#endif // TENSORFLOW_CORE_KERNELS_CWISE_OPS_SYCL_COMMON_H_ diff --git a/tensorflow/core/kernels/function_ops.cc b/tensorflow/core/kernels/function_ops.cc index 4a08f98b33..7cb9a3a657 100644 --- a/tensorflow/core/kernels/function_ops.cc +++ b/tensorflow/core/kernels/function_ops.cc @@ -87,6 +87,29 @@ class RetvalOp : public OpKernel { REGISTER_KERNEL_BUILDER(Name("_Arg").Device(DEVICE_CPU), ArgOp); REGISTER_KERNEL_BUILDER(Name("_Retval").Device(DEVICE_CPU), RetvalOp); +#if TENSORFLOW_USE_SYCL +#define REGISTER(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("_Arg").Device(DEVICE_SYCL).TypeConstraint("T"), ArgOp); + TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER) + TF_CALL_bool(REGISTER) REGISTER_KERNEL_BUILDER(Name("_Arg") + .Device(DEVICE_GPU) + .HostMemory("output") + .TypeConstraint("T"), + ArgOp); +#undef REGISTER +#define REGISTER(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("_Retval").Device(DEVICE_SYCL).TypeConstraint("T"), RetvalOp); + TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER) + TF_CALL_bool(REGISTER) REGISTER_KERNEL_BUILDER(Name("_Retval") + .Device(DEVICE_GPU) + .HostMemory("input") + .TypeConstraint("T"), + RetvalOp); +#undef REGISTER +#endif + #define REGISTER(type) \ REGISTER_KERNEL_BUILDER( \ Name("_Arg").Device(DEVICE_GPU).TypeConstraint("T"), ArgOp); diff --git a/tensorflow/core/kernels/identity_op.cc b/tensorflow/core/kernels/identity_op.cc index 459d329ba4..45d27dd19e 100644 --- a/tensorflow/core/kernels/identity_op.cc +++ b/tensorflow/core/kernels/identity_op.cc @@ -34,6 +34,24 @@ REGISTER_KERNEL_BUILDER(Name("PlaceholderWithDefault").Device(DEVICE_CPU), REGISTER_KERNEL_BUILDER(Name("RefIdentity").Device(DEVICE_CPU), IdentityOp); +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("Identity").Device(DEVICE_SYCL).TypeConstraint("T"), \ + IdentityOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("RefIdentity").Device(DEVICE_SYCL).TypeConstraint("T"), \ + IdentityOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("StopGradient").Device(DEVICE_SYCL).TypeConstraint("T"),\ + IdentityOp) + +TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL); +REGISTER_SYCL_KERNEL(bfloat16); + +#undef REGISTER_SYCL_KERNEL +#endif + #define REGISTER_GPU_KERNEL(type) \ REGISTER_KERNEL_BUILDER( \ Name("Identity").Device(DEVICE_GPU).TypeConstraint("T"), \ @@ -50,6 +68,7 @@ REGISTER_GPU_KERNEL(bfloat16); #undef REGISTER_GPU_KERNEL + #if GOOGLE_CUDA // A special GPU kernel for int32 and bool. // TODO(b/25387198): Also enable int32 in device memory. This kernel diff --git a/tensorflow/core/kernels/no_op.cc b/tensorflow/core/kernels/no_op.cc index 0ad05ee323..0993e6e1fc 100644 --- a/tensorflow/core/kernels/no_op.cc +++ b/tensorflow/core/kernels/no_op.cc @@ -20,4 +20,8 @@ namespace tensorflow { REGISTER_KERNEL_BUILDER(Name("NoOp").Device(DEVICE_CPU), NoOp); REGISTER_KERNEL_BUILDER(Name("NoOp").Device(DEVICE_GPU), NoOp); +#if TENSORFLOW_USE_SYCL +REGISTER_KERNEL_BUILDER(Name("NoOp").Device(DEVICE_SYCL), NoOp); +#endif + } // namespace tensorflow diff --git a/tensorflow/core/kernels/parameterized_truncated_normal_op_gpu.cu.cc b/tensorflow/core/kernels/parameterized_truncated_normal_op_gpu.cu.cc index 42d4744069..8b85bd4ebe 100644 --- a/tensorflow/core/kernels/parameterized_truncated_normal_op_gpu.cu.cc +++ b/tensorflow/core/kernels/parameterized_truncated_normal_op_gpu.cu.cc @@ -29,7 +29,14 @@ limitations under the License. #include "tensorflow/core/lib/random/random_distributions.h" #include "tensorflow/core/util/cuda_kernel_helper.h" +#ifdef COMPILER_MSVC +// msvc does not support unroll. One could try the loop pragma but we need to +// take a closer look if this generates better code in this case. For now let +// the compiler take care of of it. +#define UNROLL +#else #define UNROLL _Pragma("unroll") +#endif namespace tensorflow { @@ -99,6 +106,7 @@ __global__ void __launch_bounds__(1024) Eigen::numext::exp(T(0.5) + (normMin * (normMin - sqrtFactor)) / T(4)) / (normMin + sqrtFactor); const T diff = normMax - normMin; + const T two = T(2.0); // Validate the normalized min and max, because the originals may have been // flipped already. @@ -124,7 +132,7 @@ __global__ void __launch_bounds__(1024) z[i] = rand[i] * diff + normMin; } UNROLL for (int i = 0; i < kDistSize; i++) { - g[i] = (plusFactor - z[i] * z[i]) / 2.0; + g[i] = (plusFactor - z[i] * z[i]) / two; } const auto u = dist(&gen); @@ -161,7 +169,7 @@ __global__ void __launch_bounds__(1024) UNROLL for (int i = 0; i < kDistSize; i += 2) { const T z = -Eigen::numext::log(rand[i]) / alpha + normMin; const T x = normMin < alpha ? alpha - z : normMin - alpha; - const T g = Eigen::numext::exp(-x * x / 2.0); + const T g = Eigen::numext::exp(-x * x / two); const T u = rand[i + 1]; if ((u <= g && z < normMax) || numIterations + 1 >= kMaxIterations) { data[offset] = z * stddev + mean; diff --git a/tensorflow/core/kernels/sendrecv_ops.cc b/tensorflow/core/kernels/sendrecv_ops.cc index c2a04ed0c4..9e9cdda382 100644 --- a/tensorflow/core/kernels/sendrecv_ops.cc +++ b/tensorflow/core/kernels/sendrecv_ops.cc @@ -78,6 +78,10 @@ void SendOp::Compute(OpKernelContext* ctx) { REGISTER_KERNEL_BUILDER(Name("_Send").Device(DEVICE_CPU), SendOp); REGISTER_KERNEL_BUILDER(Name("_Send").Device(DEVICE_GPU), SendOp); +#if TENSORFLOW_USE_SYCL +REGISTER_KERNEL_BUILDER(Name("_Send").Device(DEVICE_SYCL), SendOp); +#endif + REGISTER_KERNEL_BUILDER(Name("_HostSend").Device(DEVICE_CPU), SendOp); REGISTER_KERNEL_BUILDER( Name("_HostSend").Device(DEVICE_GPU).HostMemory("tensor"), SendOp); @@ -136,6 +140,10 @@ void RecvOp::ComputeAsync(OpKernelContext* ctx, DoneCallback done) { REGISTER_KERNEL_BUILDER(Name("_Recv").Device(DEVICE_CPU), RecvOp); REGISTER_KERNEL_BUILDER(Name("_Recv").Device(DEVICE_GPU), RecvOp); +#if TENSORFLOW_USE_SYCL +REGISTER_KERNEL_BUILDER(Name("_Recv").Device(DEVICE_SYCL), RecvOp); +#endif + REGISTER_KERNEL_BUILDER(Name("_HostRecv").Device(DEVICE_CPU), RecvOp); REGISTER_KERNEL_BUILDER( Name("_HostRecv").Device(DEVICE_GPU).HostMemory("tensor"), RecvOp); diff --git a/tensorflow/core/kernels/substr_op.cc b/tensorflow/core/kernels/substr_op.cc new file mode 100644 index 0000000000..020ad12c2d --- /dev/null +++ b/tensorflow/core/kernels/substr_op.cc @@ -0,0 +1,233 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/framework/kernel_def_builder.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/bounds_check.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/util/bcast.h" + +namespace tensorflow { + +// Position/length can be 32 or 64-bit integers +template +class SubstrOp : public OpKernel { + public: + using OpKernel::OpKernel; + + void Compute(OpKernelContext* context) override { + // Get inputs + const Tensor& input_tensor = context->input(0); + const Tensor& pos_tensor = context->input(1); + const Tensor& len_tensor = context->input(2); + const TensorShape input_shape = input_tensor.shape(); + const TensorShape pos_shape = pos_tensor.shape(); + const TensorShape len_shape = len_tensor.shape(); + + bool is_scalar = TensorShapeUtils::IsScalar(pos_shape); + + if (is_scalar || input_shape == pos_shape) { + // pos/len are either scalar or match the shape of input_tensor + // Do not need to do broadcasting + + // Reshape input + auto input = input_tensor.flat(); + // Allocate output + Tensor* output_tensor = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output("output", input_tensor.shape(), + &output_tensor)); + auto output = output_tensor->flat(); + if (is_scalar) { + // Perform Op with scalar pos/len + const T pos = tensorflow::internal::SubtleMustCopy(pos_tensor.scalar()()); + const T len = tensorflow::internal::SubtleMustCopy(len_tensor.scalar()()); + for (size_t i = 0; i < input_tensor.NumElements(); ++i) { + string in = input(i); + OP_REQUIRES(context, FastBoundsCheck(pos, in.size()), + errors::InvalidArgument("pos ", pos, " out of range for string", + "b'", in, "' at index ", i)); + output(i) = in.substr(pos, len); + } + } else { + // Perform Op element-wise with tensor pos/len + auto pos_flat = pos_tensor.flat(); + auto len_flat = len_tensor.flat(); + for (size_t i = 0; i < input_tensor.NumElements(); ++i) { + string in = input(i); + const T pos = tensorflow::internal::SubtleMustCopy(pos_flat(i)); + const T len = tensorflow::internal::SubtleMustCopy(len_flat(i)); + OP_REQUIRES(context, FastBoundsCheck(pos, in.size()), + errors::InvalidArgument("pos ", pos, " out of range for string", + "b'", in, "' at index ", i)); + output(i) = in.substr(pos, len); + } + } + } else { + // Perform op with broadcasting + // TODO: Use ternary broadcasting for once available in Eigen. Current + // implementation iterates through broadcasted ops element-wise; + // this should be parallelized. + + // Create BCast helper with shape of input and pos/len + BCast bcast(BCast::FromShape(input_shape), BCast::FromShape(pos_shape)); + OP_REQUIRES(context, bcast.IsValid(), + errors::InvalidArgument("Incompatible shapes: ", + input_shape.DebugString(), " vs. ", + pos_shape.DebugString())); + TensorShape output_shape = BCast::ToShape(bcast.result_shape()); + int ndims = output_shape.dims(); + Tensor* output_tensor = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output("output", output_shape, + &output_tensor)); + switch (ndims) { + case 1: { + // Reshape tensors according to BCast results + auto input = input_tensor.shaped(bcast.x_reshape()); + auto output = output_tensor->shaped(bcast.result_shape()); + auto pos_shaped = pos_tensor.shaped(bcast.y_reshape()); + auto len_shaped = len_tensor.shaped(bcast.y_reshape()); + + // Allocate temporary buffer for broadcasted input tensor + Tensor input_buffer; + OP_REQUIRES_OK(context, + context->allocate_temp(DT_STRING, + output_shape, + &input_buffer)); + typename TTypes::Tensor input_bcast = + input_buffer.shaped(bcast.result_shape()); + input_bcast = input.broadcast( + BCast::ToIndexArray<1>(bcast.x_bcast())); + + // Allocate temporary buffer for broadcasted position tensor + Tensor pos_buffer; + OP_REQUIRES_OK(context, + context->allocate_temp(DataTypeToEnum::v(), + output_shape, + &pos_buffer)); + typename TTypes::Tensor pos_bcast = pos_buffer.shaped( + bcast.result_shape()); + pos_bcast = pos_shaped.broadcast( + BCast::ToIndexArray<1>(bcast.y_bcast())); + + // Allocate temporary buffer for broadcasted length tensor + Tensor len_buffer; + OP_REQUIRES_OK(context, + context->allocate_temp(DataTypeToEnum::v(), + output_shape, + &len_buffer)); + typename TTypes::Tensor len_bcast = len_buffer.shaped( + bcast.result_shape()); + len_bcast = len_shaped.broadcast( + BCast::ToIndexArray<1>(bcast.y_bcast())); + + // Iterate through broadcasted tensors and perform substr + for (int i = 0; i < output_shape.dim_size(0); ++i) { + string in = input_bcast(i); + const T pos = tensorflow::internal::SubtleMustCopy(pos_bcast(i)); + const T len = tensorflow::internal::SubtleMustCopy(len_bcast(i)); + OP_REQUIRES(context, FastBoundsCheck(pos, input_bcast(i).size()), + errors::InvalidArgument("pos ", pos, " out of range for string", + "b'", in, "' at index ", i)); + output(i) = in.substr(pos, len); + } + break; + } + case 2: { + // Reshape tensors according to BCast results + auto input = input_tensor.shaped(bcast.x_reshape()); + auto output = output_tensor->shaped(bcast.result_shape()); + auto pos_shaped = pos_tensor.shaped(bcast.y_reshape()); + auto len_shaped = len_tensor.shaped(bcast.y_reshape()); + + // Allocate temporary buffer for broadcasted input tensor + Tensor input_buffer; + OP_REQUIRES_OK(context, + context->allocate_temp(DT_STRING, + output_shape, + &input_buffer)); + typename TTypes::Tensor input_bcast = + input_buffer.shaped(bcast.result_shape()); + input_bcast = input.broadcast( + BCast::ToIndexArray<2>(bcast.x_bcast())); + + // Allocate temporary buffer for broadcasted position tensor + Tensor pos_buffer; + OP_REQUIRES_OK(context, + context->allocate_temp(DataTypeToEnum::v(), + output_shape, + &pos_buffer)); + typename TTypes::Tensor pos_bcast = pos_buffer.shaped( + bcast.result_shape()); + pos_bcast = pos_shaped.broadcast( + BCast::ToIndexArray<2>(bcast.y_bcast())); + + // Allocate temporary buffer for broadcasted length tensor + Tensor len_buffer; + OP_REQUIRES_OK(context, + context->allocate_temp(DataTypeToEnum::v(), + output_shape, + &len_buffer)); + typename TTypes::Tensor len_bcast = len_buffer.shaped( + bcast.result_shape()); + len_bcast = len_shaped.broadcast( + BCast::ToIndexArray<2>(bcast.y_bcast())); + + // Iterate through broadcasted tensors and perform substr + for (int i = 0; i < output_shape.dim_size(0); ++i) { + for (int j = 0; j < output_shape.dim_size(1); ++j) { + string in = input_bcast(i, j); + const T pos = tensorflow::internal::SubtleMustCopy( + pos_bcast(i, j)); + const T len = tensorflow::internal::SubtleMustCopy( + len_bcast(i, j)); + OP_REQUIRES( + context, + FastBoundsCheck(pos, in.size()), + errors::InvalidArgument("pos ", pos, " out of range for ", + "string b'", in, "' at index (" + , i, ", ", j, ")")); + output(i, j) = in.substr(pos, len); + + } + } + break; + } + default: { + context->SetStatus(errors::Unimplemented( + "Substr broadcast not implemented for ", ndims, " dimensions")); + } + } + } + } +}; + +#define REGISTER_SUBSTR(type) \ + REGISTER_KERNEL_BUILDER(Name("Substr") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T"), \ + SubstrOp); +REGISTER_SUBSTR(int32); +REGISTER_SUBSTR(int64); +} // namespace tensorflow diff --git a/tensorflow/core/lib/io/path.cc b/tensorflow/core/lib/io/path.cc index 31397722fe..ab2fd7739f 100644 --- a/tensorflow/core/lib/io/path.cc +++ b/tensorflow/core/lib/io/path.cc @@ -196,6 +196,7 @@ void ParseURI(StringPiece remaining, StringPiece* scheme, StringPiece* host, // 0. Parse scheme // Make sure scheme matches [a-zA-Z][0-9a-zA-Z.]* // TODO(keveman): Allow "+" and "-" in the scheme. + // Keep URI pattern in tensorboard/backend/server.py updated accordingly if (!strings::Scanner(remaining) .One(strings::Scanner::LETTER) .Many(strings::Scanner::LETTER_DIGIT_DOT) diff --git a/tensorflow/core/ops/string_ops.cc b/tensorflow/core/ops/string_ops.cc index a112e1c879..c427d247b1 100644 --- a/tensorflow/core/ops/string_ops.cc +++ b/tensorflow/core/ops/string_ops.cc @@ -281,4 +281,113 @@ input: Base64 strings to decode. output: Decoded strings. )doc"); +REGISTER_OP("Substr") + .Input("input: string") + .Input("pos: T") + .Input("len: T") + .Output("output: string") + .Attr("T: {int32, int64}") + .SetShapeFn([](InferenceContext* c) { + ShapeHandle pos_shape = c->input(1); + ShapeHandle len_shape = c->input(2); + ShapeHandle unused; + // Check that pos/len have same rank + TF_RETURN_IF_ERROR(c->WithRank(pos_shape, c->Rank(len_shape), &unused)); + // Check that dimensions are equal + for (int32 i = 0; i < c->Rank(pos_shape); ++i) { + DimensionHandle pos_dim = c->Dim(pos_shape, i); + DimensionHandle len_dim = c->Dim(len_shape, i); + if (c->Value(pos_dim) != c->Value(len_dim)) { + return errors::InvalidArgument("pos and len shapes must match: ", + c->DebugString(pos_shape), " vs. ", + c->DebugString(len_shape)); + } + } + // c->input(0) is the ShapeHandle to input strings + // BroadcastBinaryOpShapeFn infers shape from c->input(0) and c->input(1). + return shape_inference::BroadcastBinaryOpShapeFn(c); + }) + .Doc(R"doc( +Return substrings from `Tensor` of strings. + +For each string in the input `Tensor`, creates a substring starting at index +`pos` with a total length of `len`. + +If `len` defines a substring that would extend beyond the length of the input +string, then as many characters as possible are used. + +If `pos` is negative or specifies a character index larger than any of the input +strings, then an `InvalidArgumentError` is thrown. + +`pos` and `len` must have the same shape, otherwise a `ValueError` is thrown on +Op creation. + +*NOTE*: `Substr` supports broadcasting up to two dimensions. More about +broadcasting +[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) + +--- + +Examples + +Using scalar `pos` and `len`: + +``` +input = [b'Hello', b'World'] +position = 1 +length = 3 + +output = [b'ell', b'orl'] +``` + +Using `pos` and `len` with same shape as `input`: + +``` +input = [[b'ten', b'eleven', b'twelve'], + [b'thirteen', b'fourteen', b'fifteen'], + [b'sixteen', b'seventeen', b'eighteen']] +position = [[1, 2, 3], + [1, 2, 3], + [1, 2, 3]] +length = [[2, 3, 4], + [4, 3, 2], + [5, 5, 5]] + +output = [[b'en', b'eve', b'lve'], + [b'hirt', b'urt', b'te'], + [b'ixtee', b'vente', b'hteen']] +``` + +Broadcasting `pos` and `len` onto `input`: + +``` +input = [[b'ten', b'eleven', b'twelve'], + [b'thirteen', b'fourteen', b'fifteen'], + [b'sixteen', b'seventeen', b'eighteen'], + [b'nineteen', b'twenty', b'twentyone']] +position = [1, 2, 3] +length = [1, 2, 3] + +output = [[b'e', b'ev', b'lve'], + [b'h', b'ur', b'tee'], + [b'i', b've', b'hte'], + [b'i', b'en', b'nty']] +``` + +Broadcasting `input` onto `pos` and `len`: + +``` +input = b'thirteen' +position = [1, 5, 7] +length = [3, 2, 1] + +output = [b'hir', b'ee', b'n"] +``` + +input: Tensor of strings +pos: Scalar defining the position of first character in each substring +len: Scalar defining the number of characters to include in each substring +output: Tensor of substrings +)doc"); + } // namespace tensorflow diff --git a/tensorflow/core/platform/default/build_config/BUILD b/tensorflow/core/platform/default/build_config/BUILD index f4ff341a48..2cb64c3922 100644 --- a/tensorflow/core/platform/default/build_config/BUILD +++ b/tensorflow/core/platform/default/build_config/BUILD @@ -10,6 +10,7 @@ exports_files(["LICENSE"]) load("//tensorflow:tensorflow.bzl", "tf_copts") load("//tensorflow:tensorflow.bzl", "tf_cuda_library") load("@local_config_cuda//cuda:platform.bzl", "cuda_library_path") +load("@local_config_sycl//sycl:platform.bzl", "sycl_library_path") cc_library( name = "gtest", @@ -143,6 +144,21 @@ cc_library( ], ) +cc_library( + name = "sycl", + data = [ + "@local_config_sycl//sycl:{}".format(sycl_library_path("ComputeCpp")), + ], + linkopts = select({ + "//conditions:default": [ + "-Wl,-rpath,../local_config_sycl/sycl/lib", + ], + }), + deps = [ + "@local_config_sycl//sycl:syclrt", + ], +) + filegroup( name = "android_srcs", srcs = glob(["*.h"]), diff --git a/tensorflow/core/platform/hadoop/BUILD b/tensorflow/core/platform/hadoop/BUILD index b7c4363665..774a439855 100644 --- a/tensorflow/core/platform/hadoop/BUILD +++ b/tensorflow/core/platform/hadoop/BUILD @@ -43,12 +43,18 @@ cc_library( # http://hadoop.apache.org/releases.html # 3. Extract the Hadoop distribution and run: # source libexec/hadoop-config.sh -# 4. bazel test \ +# 4. Optionally set up HDFS cluster configurations (optionally Kerberos) within +# $HADOOP_HDFS_HOME/etc/hadoop if you want to test against real +# distributed HDFS cluster +# 5. bazel test \ # --test_env=LD_LIBRARY_PATH=$JAVA_HOME/jre/lib/amd64/server \ # --test_env=HADOOP_HDFS_HOME=$HADOOP_HDFS_HOME \ # --test_env=CLASSPATH=$($HADOOP_HDFS_HOME/bin/hadoop classpath --glob) \ # --test_strategy=local \ # :hadoop_file_system_test +# To test against the real distributed cluster, add the following option for +# bazel test: +# --test_env=HADOOP_TEST_TMPDIR=hdfs://cluster/test/tmp/dir tf_cc_test( name = "hadoop_file_system_test", size = "small", diff --git a/tensorflow/core/platform/hadoop/hadoop_file_system.cc b/tensorflow/core/platform/hadoop/hadoop_file_system.cc index 7ae63ac1d2..745eb9e6cd 100644 --- a/tensorflow/core/platform/hadoop/hadoop_file_system.cc +++ b/tensorflow/core/platform/hadoop/hadoop_file_system.cc @@ -56,6 +56,8 @@ class LibHDFS { std::function hdfsBuilderConnect; std::function hdfsNewBuilder; std::function hdfsBuilderSetNameNode; + std::function + hdfsBuilderSetKerbTicketCachePath; std::function hdfsCloseFile; std::function hdfsPread; std::function hdfsWrite; @@ -81,6 +83,7 @@ class LibHDFS { BIND_HDFS_FUNC(hdfsBuilderConnect); BIND_HDFS_FUNC(hdfsNewBuilder); BIND_HDFS_FUNC(hdfsBuilderSetNameNode); + BIND_HDFS_FUNC(hdfsBuilderSetKerbTicketCachePath); BIND_HDFS_FUNC(hdfsCloseFile); BIND_HDFS_FUNC(hdfsPread); BIND_HDFS_FUNC(hdfsWrite); @@ -135,6 +138,10 @@ Status HadoopFileSystem::Connect(StringPiece fname, hdfsFS* fs) { } else { hdfs_->hdfsBuilderSetNameNode(builder, nn.c_str()); } + char* ticket_cache_path = getenv("KERB_TICKET_CACHE_PATH"); + if (ticket_cache_path != nullptr) { + hdfs_->hdfsBuilderSetKerbTicketCachePath(builder, ticket_cache_path); + } *fs = hdfs_->hdfsBuilderConnect(builder); if (*fs == nullptr) { return errors::NotFound(strerror(errno)); @@ -360,9 +367,15 @@ Status HadoopFileSystem::DeleteDir(const string& dir) { hdfsFileInfo* info = hdfs_->hdfsListDirectory(fs, TranslateName(dir).c_str(), &entries); if (info != nullptr) { - return IOError(dir, errno); + hdfs_->hdfsFreeFileInfo(info, entries); + } + // Due to HDFS bug HDFS-8407, we can't distinguish between an error and empty + // folder, expscially for Kerberos enable setup, EAGAIN is quite common when + // the call is actually successful. Check again by Stat. + if (info == nullptr && errno != 0) { + FileStatistics stat; + TF_RETURN_IF_ERROR(Stat(dir, &stat)); } - hdfs_->hdfsFreeFileInfo(info, entries); if (entries > 0) { return errors::FailedPrecondition("Cannot delete a non-empty directory."); diff --git a/tensorflow/core/platform/hadoop/hadoop_file_system_test.cc b/tensorflow/core/platform/hadoop/hadoop_file_system_test.cc index cb12913f13..59e1d23645 100644 --- a/tensorflow/core/platform/hadoop/hadoop_file_system_test.cc +++ b/tensorflow/core/platform/hadoop/hadoop_file_system_test.cc @@ -28,6 +28,15 @@ class HadoopFileSystemTest : public ::testing::Test { protected: HadoopFileSystemTest() {} + string TmpDir(const string& path) { + char* test_dir = getenv("HADOOP_TEST_TMPDIR"); + if (test_dir != nullptr) { + return io::JoinPath(string(test_dir), path); + } else { + return "file://" + io::JoinPath(testing::TmpDir(), path); + } + } + Status WriteString(const string& fname, const string& content) { std::unique_ptr writer; TF_RETURN_IF_ERROR(hdfs.NewWritableFile(fname, &writer)); @@ -58,8 +67,7 @@ class HadoopFileSystemTest : public ::testing::Test { }; TEST_F(HadoopFileSystemTest, RandomAccessFile) { - const string fname = - "file://" + io::JoinPath(testing::TmpDir(), "RandomAccessFile"); + const string fname = TmpDir("RandomAccessFile"); const string content = "abcdefghijklmn"; TF_ASSERT_OK(WriteString(fname, content)); @@ -83,8 +91,7 @@ TEST_F(HadoopFileSystemTest, RandomAccessFile) { TEST_F(HadoopFileSystemTest, WritableFile) { std::unique_ptr writer; - const string fname = - "file://" + io::JoinPath(testing::TmpDir(), "WritableFile"); + const string fname = TmpDir("WritableFile"); TF_EXPECT_OK(hdfs.NewWritableFile(fname, &writer)); TF_EXPECT_OK(writer->Append("content1,")); TF_EXPECT_OK(writer->Append("content2")); @@ -98,16 +105,14 @@ TEST_F(HadoopFileSystemTest, WritableFile) { } TEST_F(HadoopFileSystemTest, FileExists) { - const string fname = - "file://" + io::JoinPath(testing::TmpDir(), "FileExists"); + const string fname = TmpDir("FileExists"); EXPECT_EQ(error::Code::NOT_FOUND, hdfs.FileExists(fname).code()); TF_ASSERT_OK(WriteString(fname, "test")); TF_EXPECT_OK(hdfs.FileExists(fname)); } TEST_F(HadoopFileSystemTest, GetChildren) { - const string base = - "file://" + io::JoinPath(testing::TmpDir(), "GetChildren"); + const string base = TmpDir("GetChildren"); TF_EXPECT_OK(hdfs.CreateDir(base)); const string file = io::JoinPath(base, "testfile.csv"); @@ -122,16 +127,14 @@ TEST_F(HadoopFileSystemTest, GetChildren) { } TEST_F(HadoopFileSystemTest, DeleteFile) { - const string fname = - "file://" + io::JoinPath(testing::TmpDir(), "DeleteFile"); + const string fname = TmpDir("DeleteFile"); EXPECT_FALSE(hdfs.DeleteFile(fname).ok()); TF_ASSERT_OK(WriteString(fname, "test")); TF_EXPECT_OK(hdfs.DeleteFile(fname)); } TEST_F(HadoopFileSystemTest, GetFileSize) { - const string fname = - "file://" + io::JoinPath(testing::TmpDir(), "GetFileSize"); + const string fname = TmpDir("GetFileSize"); TF_ASSERT_OK(WriteString(fname, "test")); uint64 file_size = 0; TF_EXPECT_OK(hdfs.GetFileSize(fname, &file_size)); @@ -139,8 +142,7 @@ TEST_F(HadoopFileSystemTest, GetFileSize) { } TEST_F(HadoopFileSystemTest, CreateDirStat) { - const string dir = - "file://" + io::JoinPath(testing::TmpDir(), "CreateDirStat"); + const string dir = TmpDir("CreateDirStat"); TF_EXPECT_OK(hdfs.CreateDir(dir)); FileStatistics stat; TF_EXPECT_OK(hdfs.Stat(dir, &stat)); @@ -148,7 +150,7 @@ TEST_F(HadoopFileSystemTest, CreateDirStat) { } TEST_F(HadoopFileSystemTest, DeleteDir) { - const string dir = "file://" + io::JoinPath(testing::TmpDir(), "DeleteDir"); + const string dir = TmpDir("DeleteDir"); EXPECT_FALSE(hdfs.DeleteDir(dir).ok()); TF_EXPECT_OK(hdfs.CreateDir(dir)); TF_EXPECT_OK(hdfs.DeleteDir(dir)); @@ -157,10 +159,8 @@ TEST_F(HadoopFileSystemTest, DeleteDir) { } TEST_F(HadoopFileSystemTest, RenameFile) { - const string fname1 = - "file://" + io::JoinPath(testing::TmpDir(), "RenameFile1"); - const string fname2 = - "file://" + io::JoinPath(testing::TmpDir(), "RenameFile2"); + const string fname1 = TmpDir("RenameFile1"); + const string fname2 = TmpDir("RenameFile2"); TF_ASSERT_OK(WriteString(fname1, "test")); TF_EXPECT_OK(hdfs.RenameFile(fname1, fname2)); string content; @@ -169,10 +169,8 @@ TEST_F(HadoopFileSystemTest, RenameFile) { } TEST_F(HadoopFileSystemTest, RenameFile_Overwrite) { - const string fname1 = - "file://" + io::JoinPath(testing::TmpDir(), "RenameFile1"); - const string fname2 = - "file://" + io::JoinPath(testing::TmpDir(), "RenameFile2"); + const string fname1 = TmpDir("RenameFile1"); + const string fname2 = TmpDir("RenameFile2"); TF_ASSERT_OK(WriteString(fname2, "test")); TF_EXPECT_OK(hdfs.FileExists(fname2)); @@ -185,7 +183,7 @@ TEST_F(HadoopFileSystemTest, RenameFile_Overwrite) { } TEST_F(HadoopFileSystemTest, StatFile) { - const string fname = "file://" + io::JoinPath(testing::TmpDir(), "StatFile"); + const string fname = TmpDir("StatFile"); TF_ASSERT_OK(WriteString(fname, "test")); FileStatistics stat; TF_EXPECT_OK(hdfs.Stat(fname, &stat)); diff --git a/tensorflow/core/platform/windows/intrinsics_port.h b/tensorflow/core/platform/windows/intrinsics_port.h index 2c491245a7..a4fa1e9971 100644 --- a/tensorflow/core/platform/windows/intrinsics_port.h +++ b/tensorflow/core/platform/windows/intrinsics_port.h @@ -20,7 +20,7 @@ limitations under the License. #ifdef _MSC_VER // the following avx intrinsics are not defined on windows // in immintrin.h so we define them here. -// +// #include "tensorflow/core/platform/types.h" #define _mm_load_pd1 _mm_load1_pd diff --git a/tensorflow/core/util/device_name_utils.cc b/tensorflow/core/util/device_name_utils.cc index c38b5758fa..ac18300496 100644 --- a/tensorflow/core/util/device_name_utils.cc +++ b/tensorflow/core/util/device_name_utils.cc @@ -142,6 +142,7 @@ bool DeviceNameUtils::ParseFullName(StringPiece fullname, ParsedName* p) { progress = true; } + // Handle legacy naming convention for cpu and gpu. if (str_util::ConsumePrefix(&fullname, "/cpu:") || str_util::ConsumePrefix(&fullname, "/CPU:")) { p->has_type = true; diff --git a/tensorflow/examples/learn/wide_n_deep_tutorial.py b/tensorflow/examples/learn/wide_n_deep_tutorial.py index 8dd7bc8ef0..888bf33b48 100644 --- a/tensorflow/examples/learn/wide_n_deep_tutorial.py +++ b/tensorflow/examples/learn/wide_n_deep_tutorial.py @@ -180,6 +180,10 @@ def train_and_eval(): skiprows=1, engine="python") + # remove NaN elements + df_train = df_train.dropna(how='any', axis=0) + df_test = df_test.dropna(how='any', axis=0) + df_train[LABEL_COLUMN] = ( df_train["income_bracket"].apply(lambda x: ">50K" in x)).astype(int) df_test[LABEL_COLUMN] = ( diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.linspace.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.linspace.md index 29b8993fe6..0ffd371877 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.linspace.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.linspace.md @@ -3,7 +3,7 @@ Generates values in an interval. A sequence of `num` evenly-spaced values are generated beginning at `start`. -If `num > 1`, the values in the sequence increase by `stop - start / num - 1`, +If `num > 1`, the values in the sequence increase by `(stop - start) / (num - 1)`, so that the last one is exactly `stop`. For example: diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.nn.sampled_softmax_loss.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.nn.sampled_softmax_loss.md index 6d22f67352..44388cce0c 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.nn.sampled_softmax_loss.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.nn.sampled_softmax_loss.md @@ -11,8 +11,8 @@ the full softmax loss. At inference time, you can compute full softmax probabilities with the expression `tf.nn.softmax(tf.matmul(inputs, tf.transpose(weights)) + biases)`. -See our [Candidate Sampling Algorithms Reference] -(../../extras/candidate_sampling.pdf) +See our +[Candidate Sampling Algorithms Reference](../../extras/candidate_sampling.pdf) Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007) ([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.depthwise_conv2d_native.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.depthwise_conv2d_native.md index c2736f1ba9..2e04ee2be5 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.depthwise_conv2d_native.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.depthwise_conv2d_native.md @@ -17,7 +17,7 @@ for k in 0..in_channels-1 filter[di, dj, k, q] Must have `strides[0] = strides[3] = 1`. For the most common case of the same -horizontal and vertices strides, `strides = [1, stride, stride, 1]`. +horizontal and vertical strides, `strides = [1, stride, stride, 1]`. ##### Args: diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.nce_loss.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.nce_loss.md index b0fa637215..aa2d46f2a7 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.nce_loss.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.nce_loss.md @@ -42,8 +42,7 @@ with an otherwise unused class. where a sampled class equals one of the target classes. If set to `True`, this is a "Sampled Logistic" loss instead of NCE, and we are learning to generate log-odds instead of log probabilities. See - our [Candidate Sampling Algorithms Reference] - (../../extras/candidate_sampling.pdf). + our [Candidate Sampling Algorithms Reference](../../extras/candidate_sampling.pdf). Default is False. * `partition_strategy`: A string specifying the partitioning strategy, relevant if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.nn.local_response_normalization.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.nn.local_response_normalization.md index 81134df29f..2738a61f9d 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.nn.local_response_normalization.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.nn.local_response_normalization.md @@ -11,8 +11,8 @@ each component is divided by the weighted, squared sum of inputs within sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2) output = input / (bias + alpha * sqr_sum) ** beta -For details, see [Krizhevsky et al., ImageNet classification with deep -convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks). +For details, see +[Krizhevsky et al., ImageNet classification with deep convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks). ##### Args: diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.conv2d.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.conv2d.md index d40ed35657..3f51a3bb37 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.conv2d.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.conv2d.md @@ -22,7 +22,7 @@ In detail, with the default NHWC format, filter[di, dj, q, k] Must have `strides[0] = strides[3] = 1`. For the most common case of the same -horizontal and vertices strides, `strides = [1, stride, stride, 1]`. +horizontal and vertical strides, `strides = [1, stride, stride, 1]`. ##### Args: diff --git a/tensorflow/g3doc/how_tos/meta_graph/index.md b/tensorflow/g3doc/how_tos/meta_graph/index.md index 2b39e5765e..7dace88b23 100644 --- a/tensorflow/g3doc/how_tos/meta_graph/index.md +++ b/tensorflow/g3doc/how_tos/meta_graph/index.md @@ -207,7 +207,7 @@ Here are some of the typical usage models: sess.run(logits) # Creates a saver. saver0 = tf.train.Saver() - saver0.save(sess, saver0_ckpt) + saver0.save(sess, 'my-save-dir/my-model-10000') # Generates MetaGraphDef. saver0.export_meta_graph('my-save-dir/my-model-10000.meta') ``` diff --git a/tensorflow/g3doc/resources/index.md b/tensorflow/g3doc/resources/index.md index 045de56ce6..b4dc63bb38 100644 --- a/tensorflow/g3doc/resources/index.md +++ b/tensorflow/g3doc/resources/index.md @@ -39,6 +39,7 @@ The TensorFlow community has created many great projects around TensorFlow, incl * [Caffe to TensorFlow model converter](https://github.com/ethereon/caffe-tensorflow) * [Bitfusion's` GPU-enabled AWS EC2 TensorFlow AMI](https://github.com/bitfusionio/amis/tree/master/awsmrkt-bfboost-ubuntu14-cuda75-tensorflow) ([Launch AMI](https://aws.amazon.com/marketplace/pp/B01EYKBEQ0)) * [Rust language bindings](https://github.com/google/tensorflow-rust) +* [Operator Vectorization Library](https://github.com/opveclib/opveclib) ### Development diff --git a/tensorflow/g3doc/tutorials/deep_cnn/index.md b/tensorflow/g3doc/tutorials/deep_cnn/index.md index a5302df914..ed431eaa37 100644 --- a/tensorflow/g3doc/tutorials/deep_cnn/index.md +++ b/tensorflow/g3doc/tutorials/deep_cnn/index.md @@ -246,7 +246,7 @@ Filling queue with 20000 CIFAR images before starting to train. This will take a ... ``` -The script reports the total loss every 10 steps as well the speed at which +The script reports the total loss every 10 steps as well as the speed at which the last batch of data was processed. A few comments: * The first batch of data can be inordinately slow (e.g. several minutes) as the diff --git a/tensorflow/python/__init__.py b/tensorflow/python/__init__.py index 226e28fc18..39cce84f9b 100644 --- a/tensorflow/python/__init__.py +++ b/tensorflow/python/__init__.py @@ -79,6 +79,8 @@ from tensorflow.python.client.client_lib import * # Ops from tensorflow.python.ops.standard_ops import * +# pylint: enable=wildcard-import + # Bring in subpackages. from tensorflow.python.ops import nn from tensorflow.python.ops import resources diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index d78fd1a813..dc7c72e220 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -357,6 +357,13 @@ tf_py_test( additional_deps = ["//tensorflow:tensorflow_py"], ) +tf_py_test( + name = "substr_op_test", + size = "small", + srcs = ["substr_op_test.py"], + additional_deps = ["//tensorflow:tensorflow_py"], +) + tf_py_test( name = "summary_ops_test", size = "small", diff --git a/tensorflow/python/kernel_tests/substr_op_test.py b/tensorflow/python/kernel_tests/substr_op_test.py new file mode 100644 index 0000000000..7c78ab14ba --- /dev/null +++ b/tensorflow/python/kernel_tests/substr_op_test.py @@ -0,0 +1,235 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for Substr op from string_ops.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import tensorflow as tf + + +class SubstrOpTest(tf.test.TestCase): + + def _testScalarString(self, dtype): + test_string = b"Hello" + position = np.array(1, dtype) + length = np.array(3, dtype) + expected_value = b"ell" + + substr_op = tf.substr(test_string, position, length) + with self.test_session(): + substr = substr_op.eval() + self.assertAllEqual(substr, expected_value) + + def _testVectorStrings(self, dtype): + test_string = [b"Hello", b"World"] + position = np.array(1, dtype) + length = np.array(3, dtype) + expected_value = [b"ell", b"orl"] + + substr_op = tf.substr(test_string, position, length) + with self.test_session(): + substr = substr_op.eval() + self.assertAllEqual(substr, expected_value) + + def _testMatrixStrings(self, dtype): + test_string = [[b"ten", b"eleven", b"twelve"], + [b"thirteen", b"fourteen", b"fifteen"], + [b"sixteen", b"seventeen", b"eighteen"]] + position = np.array(1, dtype) + length = np.array(4, dtype) + expected_value = [[b"en", b"leve", b"welv"], + [b"hirt", b"ourt", b"ifte"], + [b"ixte", b"even", b"ight"]] + + substr_op = tf.substr(test_string, position, length) + with self.test_session(): + substr = substr_op.eval() + self.assertAllEqual(substr, expected_value) + + def _testElementWisePosLen(self, dtype): + test_string = [[b"ten", b"eleven", b"twelve"], + [b"thirteen", b"fourteen", b"fifteen"], + [b"sixteen", b"seventeen", b"eighteen"]] + position = np.array([[1, 2, 3], + [1, 2, 3], + [1, 2, 3]], dtype) + length = np.array([[2, 3, 4], + [4, 3, 2], + [5, 5, 5]], dtype) + expected_value = [[b"en", b"eve", b"lve"], + [b"hirt", b"urt", b"te"], + [b"ixtee", b"vente", b"hteen"]] + + substr_op = tf.substr(test_string, position, length) + with self.test_session(): + substr = substr_op.eval() + self.assertAllEqual(substr, expected_value) + + def _testBroadcast(self, dtype): + # Broadcast pos/len onto input string + test_string = [[b"ten", b"eleven", b"twelve"], + [b"thirteen", b"fourteen", b"fifteen"], + [b"sixteen", b"seventeen", b"eighteen"], + [b"nineteen", b"twenty", b"twentyone"]] + position = np.array([1, 2, 3], dtype) + length = np.array([1, 2, 3], dtype) + expected_value = [[b"e", b"ev", b"lve"], + [b"h", b"ur", b"tee"], + [b"i", b"ve", b"hte"], + [b"i", b"en", b"nty"]] + substr_op = tf.substr(test_string, position, length) + with self.test_session(): + substr = substr_op.eval() + self.assertAllEqual(substr, expected_value) + + # Broadcast input string onto pos/len + test_string = [b"thirteen", b"fourteen", b"fifteen"] + position = np.array([[1, 2, 3], + [3, 2, 1], + [5, 5, 5]], dtype) + length = np.array([[3, 2, 1], + [1, 2, 3], + [2, 2, 2]], dtype) + expected_value = [[b"hir", b"ur", b"t"], + [b"r", b"ur", b"ift"], + [b"ee", b"ee", b"en"]] + substr_op = tf.substr(test_string, position, length) + with self.test_session(): + substr = substr_op.eval() + self.assertAllEqual(substr, expected_value) + + # Test 1D broadcast + test_string = b"thirteen" + position = np.array([1, 5, 7], dtype) + length = np.array([3, 2, 1], dtype) + expected_value = [b"hir", b"ee", b"n"] + substr_op = tf.substr(test_string, position, length) + with self.test_session(): + substr = substr_op.eval() + self.assertAllEqual(substr, expected_value) + + def _testBadBroadcast(self, dtype): + test_string = [[b"ten", b"eleven", b"twelve"], + [b"thirteen", b"fourteen", b"fifteen"], + [b"sixteen", b"seventeen", b"eighteen"]] + position = np.array([1, 2, 3, 4], dtype) + length = np.array([1, 2, 3, 4], dtype) + expected_value = [[b"e", b"ev", b"lve"], + [b"h", b"ur", b"tee"], + [b"i", b"ve", b"hte"]] + with self.assertRaises(ValueError): + substr_op = tf.substr(test_string, position, length) + + def _testOutOfRangeError(self, dtype): + # Scalar/Scalar + test_string = b"Hello" + position = np.array(7, dtype) + length = np.array(3, dtype) + substr_op = tf.substr(test_string, position, length) + with self.test_session(): + with self.assertRaises(tf.errors.InvalidArgumentError): + substr = substr_op.eval() + + # Vector/Scalar + test_string = [b"good", b"good", b"bad", b"good"] + position = np.array(3, dtype) + length = np.array(1, dtype) + substr_op = tf.substr(test_string, position, length) + with self.test_session(): + with self.assertRaises(tf.errors.InvalidArgumentError): + substr = substr_op.eval() + + # Negative pos + test_string = b"Hello" + position = np.array(-1, dtype) + length = np.array(3, dtype) + substr_op = tf.substr(test_string, position, length) + with self.test_session(): + with self.assertRaises(tf.errors.InvalidArgumentError): + substr = substr_op.eval() + + # Matrix/Matrix + test_string = [[b"good", b"good", b"good"], + [b"good", b"good", b"bad"], + [b"good", b"good", b"good"]] + position = np.array([[1, 2, 3], + [1, 2, 3], + [1, 2, 3]], dtype) + length = np.array([[3, 2, 1], + [1, 2, 3], + [2, 2, 2]], dtype) + substr_op = tf.substr(test_string, position, length) + with self.test_session(): + with self.assertRaises(tf.errors.InvalidArgumentError): + substr = substr_op.eval() + + # Broadcast + test_string = [[b"good", b"good", b"good"], + [b"good", b"good", b"bad"]] + position = np.array([1, 2, 3], dtype) + length = np.array([1, 2, 3], dtype) + substr_op = tf.substr(test_string, position, length) + with self.test_session(): + with self.assertRaises(tf.errors.InvalidArgumentError): + substr = substr_op.eval() + + def _testMismatchPosLenShapes(self, dtype): + test_string = [[b"ten", b"eleven", b"twelve"], + [b"thirteen", b"fourteen", b"fifteen"], + [b"sixteen", b"seventeen", b"eighteen"]] + position = np.array([[1, 2, 3]], dtype) + length = np.array([2, 3, 4], dtype) + # Should fail: position/length have different rank + with self.assertRaises(ValueError): + substr_op = tf.substr(test_string, position, length) + + position = np.array([[1, 2, 3], + [1, 2, 3], + [1, 2, 3]], dtype) + length = np.array([[2, 3, 4]], dtype) + # Should fail: postion/length have different dimensionality + with self.assertRaises(ValueError): + substr_op = tf.substr(test_string, position, length) + + def _testAll(self, dtype): + self._testScalarString(dtype) + self._testVectorStrings(dtype) + self._testMatrixStrings(dtype) + self._testElementWisePosLen(dtype) + self._testBroadcast(dtype) + self._testBadBroadcast(dtype) + self._testOutOfRangeError(dtype) + self._testMismatchPosLenShapes(dtype) + + def testInt32(self): + self._testAll(np.int32) + + def testInt64(self): + self._testAll(np.int64) + + def testWrongDtype(self): + with self.test_session(): + with self.assertRaises(TypeError): + tf.substr(b"test", 3.0, 1) + with self.assertRaises(TypeError): + tf.substr(b"test", 3, 1.0) + + +if __name__ == "__main__": + tf.test.main() diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index b8b8d76dab..1c4a200a2d 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -1407,6 +1407,8 @@ def reduce_logsumexp(input_tensor, reduction_indices=None, keep_dims=False, reduction_indices, keep_dims=True)) + my_max if not keep_dims: + if isinstance(reduction_indices, int): + reduction_indices = [reduction_indices] result = array_ops.squeeze(result, reduction_indices) return result diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py index 17d90afb5d..76d943aaea 100644 --- a/tensorflow/python/ops/math_ops_test.py +++ b/tensorflow/python/ops/math_ops_test.py @@ -67,6 +67,16 @@ class LogSumExpTest(test_util.TensorFlowTestCase): self.assertShapeEqual(y_np, y_tf) y_tf_np = y_tf.eval() self.assertAllClose(y_tf_np, y_np) + + def testReductionIndices2(self): + for dtype in [np.float16, np.float32, np.double]: + x_np = np.random.rand(5, 5).astype(dtype) + with self.test_session(use_gpu=True): + y_tf = math_ops.reduce_logsumexp(x_np, reduction_indices=0) + y_np = log(np.sum(exp(x_np), axis=0)) + self.assertShapeEqual(y_np, y_tf) + y_tf_np = y_tf.eval() + self.assertAllClose(y_tf_np, y_np) def testKeepDims(self): for dtype in [np.float16, np.float32, np.double]: diff --git a/tensorflow/python/ops/nn.py b/tensorflow/python/ops/nn.py index ceb55d6daf..71296ea798 100644 --- a/tensorflow/python/ops/nn.py +++ b/tensorflow/python/ops/nn.py @@ -454,7 +454,7 @@ def sigmoid_cross_entropy_with_logits(logits, targets, name=None): relu_logits = math_ops.select(cond, logits, zeros) neg_abs_logits = math_ops.select(cond, -logits, logits) return math_ops.add(relu_logits - logits * targets, - math_ops.log(1 + math_ops.exp(neg_abs_logits)), + math_ops.log1p(math_ops.exp(neg_abs_logits)), name=name) @@ -522,7 +522,7 @@ def weighted_cross_entropy_with_logits(logits, targets, pos_weight, name=None): log_weight = 1 + (pos_weight - 1) * targets return math_ops.add( (1 - targets) * logits, - log_weight * (math_ops.log(1 + math_ops.exp(-math_ops.abs(logits))) + + log_weight * (math_ops.log1p(math_ops.exp(-math_ops.abs(logits))) + nn_ops.relu(-logits)), name=name) diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py index e26f75ff51..416d6fccd9 100644 --- a/tensorflow/python/ops/string_ops.py +++ b/tensorflow/python/ops/string_ops.py @@ -33,6 +33,7 @@ string tensor. ## Splitting @@string_split +@@substr ## Conversion @@ -138,3 +139,4 @@ def _ReduceJoinShape(op): ops.RegisterShape("StringJoin")(common_shapes.call_cpp_shape_fn) ops.RegisterShape("StringSplit")(common_shapes.call_cpp_shape_fn) +ops.RegisterShape("Substr")(common_shapes.call_cpp_shape_fn) diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py index c700a8a924..f5b7ad6632 100644 --- a/tensorflow/python/ops/variables.py +++ b/tensorflow/python/ops/variables.py @@ -23,6 +23,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import gen_state_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import state_ops from tensorflow.python.util.deprecation import deprecated @@ -316,9 +317,14 @@ class Variable(object): if init_from_fn: expected_shape_list = full_shape_to_list(expected_shape) set_shape = validate_shape and expected_shape.is_fully_defined() - self._variable = state_ops.variable_op( - expected_shape_list, dtype.base_dtype, set_shape=set_shape, - name=name) + self._variable = gen_state_ops._variable( + shape=expected_shape_list, + dtype=dtype.base_dtype, + name=name, + container="", + shared_name="") + if set_shape: + self._variable.set_shape(expected_shape_list) with ops.colocate_with(self._variable.op): with ops.name_scope("Initializer"): # Colocate the tensors created by the initial_value() function @@ -336,12 +342,15 @@ class Variable(object): and self._initial_value.get_shape().is_fully_defined()) # In this case, the variable op can't be created until after the # initial_value has been converted to a Tensor with a known type. - self._variable = state_ops.variable_op( - full_shape_to_list(self._initial_value.get_shape()), - self._initial_value.dtype.base_dtype, - set_shape=set_shape, - name=name) - + self._variable = gen_state_ops._variable( + shape=full_shape_to_list(self._initial_value.get_shape()), + dtype=self._initial_value.dtype.base_dtype, + name=name, + container="", + shared_name="") + if set_shape: + self._variable.set_shape( + full_shape_to_list(self._initial_value.get_shape())) # Manually overrides the variable's shape with the initial value's. if validate_shape: initial_value_shape = self._initial_value.get_shape() diff --git a/tensorflow/python/training/training.py b/tensorflow/python/training/training.py index a0f72e2d23..c45dd0cf15 100644 --- a/tensorflow/python/training/training.py +++ b/tensorflow/python/training/training.py @@ -205,6 +205,7 @@ from tensorflow.python.training.queue_runner import * # For the module level doc. from tensorflow.python.training import input as _input from tensorflow.python.training.input import * +# pylint: enable=wildcard-import from tensorflow.python.training.basic_session_run_hooks import LoggingTensorHook from tensorflow.python.training.basic_session_run_hooks import StopAtStepHook @@ -246,7 +247,7 @@ from tensorflow.python.training.training_util import assert_global_step from tensorflow.python.pywrap_tensorflow import do_quantize_training_on_graphdef from tensorflow.python.pywrap_tensorflow import NewCheckpointReader - +# pylint: disable=wildcard-import # Training data protos. from tensorflow.core.example.example_pb2 import * from tensorflow.core.example.feature_pb2 import * @@ -254,7 +255,7 @@ from tensorflow.core.protobuf.saver_pb2 import * # Utility op. Open Source. TODO(touts): move to nn? from tensorflow.python.training.learning_rate_decay import * - +# pylint: enable=wildcard-import # Distributed computing support. from tensorflow.core.protobuf.tensorflow_server_pb2 import ClusterDef @@ -263,7 +264,6 @@ from tensorflow.core.protobuf.tensorflow_server_pb2 import ServerDef from tensorflow.python.training.server_lib import ClusterSpec from tensorflow.python.training.server_lib import Server - # Symbols whitelisted for export without documentation. _allowed_symbols = [ # TODO(cwhipkey): review these and move to contrib or expose through diff --git a/tensorflow/tensorboard/backend/server.py b/tensorflow/tensorboard/backend/server.py index 2763083600..6f961f1803 100644 --- a/tensorflow/tensorboard/backend/server.py +++ b/tensorflow/tensorboard/backend/server.py @@ -25,6 +25,7 @@ import functools import os import threading import time +import re import six from six.moves import BaseHTTPServer @@ -67,21 +68,20 @@ def ParseEventFilesSpec(logdir): files = {} if logdir is None: return files + # Make sure keeping consistent with ParseURI in core/lib/io/path.cc + uri_pattern = re.compile("[a-zA-Z][0-9a-zA-Z.]://.*") for specification in logdir.split(','): - # If it's a gcs or hdfs path, don't split on colon - if (io_wrapper.IsGCSPath(specification) or - specification.startswith('hdfs://')): - run_name = None - path = specification - # If the spec looks like /foo:bar/baz, then we assume it's a path with a - # colon. - elif ':' in specification and specification[0] != '/': + # Check if the spec contains group. A spec start with xyz:// is regarded as + # URI path spec instead of group spec. If the spec looks like /foo:bar/baz, + # then we assume it's a path with a colon. + if uri_pattern.match(specification) is None and \ + ':' in specification and specification[0] != '/': # We split at most once so run_name:/path:with/a/colon will work. run_name, _, path = specification.partition(':') else: run_name = None path = specification - if not (io_wrapper.IsGCSPath(path) or path.startswith('hdfs://')): + if uri_pattern.match(path) is None: path = os.path.realpath(path) files[path] = run_name return files diff --git a/tensorflow/tensorboard/backend/server_test.py b/tensorflow/tensorboard/backend/server_test.py index c3088491a9..6ede62b3d6 100644 --- a/tensorflow/tensorboard/backend/server_test.py +++ b/tensorflow/tensorboard/backend/server_test.py @@ -472,6 +472,11 @@ class ParseEventFilesSpecTest(tf.test.TestCase): expected = {'gs://foo/./path//..': None} self.assertEqual(server.ParseEventFilesSpec(logdir_string), expected) + def testRunNameWithGCSPath(self): + logdir_string = 'lol:gs://foo/path' + expected = {'gs://foo/path': 'lol'} + self.assertEqual(server.ParseEventFilesSpec(logdir_string), expected) + class TensorBoardAssetsTest(tf.test.TestCase): diff --git a/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat b/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat new file mode 100644 index 0000000000..377626ebdc --- /dev/null +++ b/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat @@ -0,0 +1,38 @@ +:: This script assumes the standard setup on tensorflow Jenkins windows machines. +:: It is NOT guaranteed to work on any other machine. Use at your own risk! +:: +:: REQUIREMENTS: +:: * All installed in standard locations: +:: - JDK8, and JAVA_HOME set. +:: - Microsoft Visual Studio 2015 Community Edition +:: - Msys2 +:: - Anaconda3 +:: - CMake +:: * Before running this script, you have to set BUILD_CC_TESTS and BUILD_PYTHON_TESTS +:: variables to either "ON" or "OFF". +:: * Either have the REPO_ROOT variable set, or run this from the repository root directory. + +:: Check and set REPO_ROOT +IF [%REPO_ROOT%] == [] ( + SET REPO_ROOT=%cd% +) + +:: Import all bunch of variables Visual Studio needs. +CALL "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" +:: Turn echo back on, above script turns it off. +ECHO ON + +:: Some common variables to be shared between runs. +SET CMAKE_EXE="C:\Program Files\cmake\bin\cmake.exe" +SET SWIG_EXE="C:\ProgramData\chocolatey\bin\swig.exe" +SET PY_EXE="C:\Program Files\Anaconda3\python.exe" +SET PY_LIB="C:\Program Files\Anaconda3\libs\python35.lib" + +SET CMAKE_DIR=%REPO_ROOT%\tensorflow\contrib\cmake +SET MSBUILD_EXE="C:\Program Files (x86)\MSBuild\14.0\Bin\msbuild.exe" + +:: Run cmake to create Visual Studio Project files. +%CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% + +:: Run msbuild in the resulting VS project files to build a pip package. +%MSBUILD_EXE% /p:Configuration=Release /maxcpucount:32 tf_python_build_pip_package.vcxproj diff --git a/tensorflow/tools/ci_build/windows/cpu/cmake/run_py.bat b/tensorflow/tools/ci_build/windows/cpu/cmake/run_py.bat new file mode 100644 index 0000000000..c8f65402ff --- /dev/null +++ b/tensorflow/tools/ci_build/windows/cpu/cmake/run_py.bat @@ -0,0 +1,37 @@ +:: This script assumes the standard setup on tensorflow Jenkins windows machines. +:: It is NOT guaranteed to work on any other machine. Use at your own risk! +:: +:: REQUIREMENTS: +:: * All installed in standard locations: +:: - JDK8, and JAVA_HOME set. +:: - Microsoft Visual Studio 2015 Community Edition +:: - Msys2 +:: - Anaconda3 +:: - CMake + +:: Record the directory we are in. Script should be invoked from the root of the repository. +SET REPO_ROOT=%cd% + +:: Make sure we have a clean directory to build things in. +SET BUILD_DIR=cmake_build +RMDIR %BUILD_DIR% /S /Q +MKDIR %BUILD_DIR% +CD %BUILD_DIR% + +:: Set which tests to build +SET BUILD_CC_TESTS=OFF +SET BUILD_PYTHON_TESTS=ON + +:: Run the CMAKE build to build the pip package. +CALL %REPO_ROOT%\tensorflow\tools\ci_build\windows\cpu\cmake\run_build.bat + +SET PIP_EXE="C:\Program Files\Anaconda3\Scripts\pip.exe" + +:: Uninstall tensorflow pip package, which might be a leftover from old runs. +%PIP_EXE% uninstall tensorflow + +:: Install the pip package. +%PIP_EXE% install %REPO_ROOT%\%BUILD_DIR%\tf_python\dist\tensorflow-0.11.0rc2_cmake_experimental-py3-none-any.whl + +:: Run all python tests +ctest -C Release --output-on-failure diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh new file mode 100644 index 0000000000..356aab1fd7 --- /dev/null +++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh @@ -0,0 +1,59 @@ +#!/bin/bash + +# This script assumes the standard setup on tensorflow Jenkins windows machines. +# It is NOT guaranteed to work on any other machine. Use at your own risk! +# +# REQUIREMENTS: +# * All installed in standard locations: +# - JDK8, and JAVA_HOME set. +# - Microsoft Visual Studio 2015 Community Edition +# - Msys2 +# - Anaconda3 +# * Bazel windows executable copied as "bazel.exe" and included in PATH. + +# All commands shall pass, and all should be visible. +set -x +set -e + +# This script is under /tensorflow/tools/ci_build/windows/cpu/pip/ +# Change into repository root. +script_dir=$(dirname $0) +cd ${script_dir%%tensorflow/tools/ci_build/windows/cpu/pip} + +# Use a temporary directory with a short name. +export TMPDIR="C:/tmp" + +# Set bash path +export BAZEL_SH="C:/tools/msys64/usr/bin/bash" + +# Set Python path for ./configure +export PYTHON_BIN_PATH="C:/Program Files/Anaconda3/python" + +# Set Python path for cc_configure.bzl +export BAZEL_PYTHON="C:/Program Files/Anaconda3/python" + +# Set Visual Studio path +export BAZEL_VS="C:/Program Files (x86)/Microsoft Visual Studio 14.0" + +# Add python into PATH, it's needed because gen_git_source.py uses +# '/usr/bin/env python' as a shebang +export PATH="/c/Program Files/Anaconda3:$PATH" + +# bazel clean --expunge doesn't work on Windows yet. +# Clean the output base manually to ensure build correctness +bazel clean +output_base=$(bazel info output_base) +bazel shutdown +# Sleep 5s to wait for jvm shutdown completely +# otherwise rm will fail with device or resource busy error +sleep 5 +rm -rf ${output_base} + +echo "" | ./configure + +bazel build -c opt --cpu=x64_windows_msvc --host_cpu=x64_windows_msvc\ + --copt="/w" --verbose_failures --experimental_ui\ + tensorflow/tools/pip_package:build_pip_package || exit $? + + +./bazel-bin/tensorflow/tools/pip_package/build_pip_package $PWD diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/run.bat b/tensorflow/tools/ci_build/windows/cpu/pip/run.bat new file mode 100644 index 0000000000..552334adc8 --- /dev/null +++ b/tensorflow/tools/ci_build/windows/cpu/pip/run.bat @@ -0,0 +1 @@ +c:\tools\msys64\usr\bin\bash -l %cd%/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh %* diff --git a/tensorflow/tools/git/.gitignore b/tensorflow/tools/git/.gitignore new file mode 100644 index 0000000000..4f62b849d5 --- /dev/null +++ b/tensorflow/tools/git/.gitignore @@ -0,0 +1 @@ +gen diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 688cf57eed..be9801828e 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -1,11 +1,14 @@ # TensorFlow external dependencies that can be loaded in WORKSPACE files. load("//third_party/gpus:cuda_configure.bzl", "cuda_configure") +load("//third_party/sycl:sycl_configure.bzl", "sycl_configure") + # If TensorFlow is linked as a submodule. # path_prefix and tf_repo_name are no longer used. def tf_workspace(path_prefix = "", tf_repo_name = ""): cuda_configure(name = "local_config_cuda") + sycl_configure(name = "local_config_sycl") if path_prefix: print("path_prefix was specified to tf_workspace but is no longer used and will be removed in the future.") if tf_repo_name: diff --git a/third_party/eigen3/BUILD b/third_party/eigen3/BUILD index 9ab7aadf87..f697866bde 100644 --- a/third_party/eigen3/BUILD +++ b/third_party/eigen3/BUILD @@ -23,5 +23,8 @@ cc_library( "unsupported/Eigen/CXX11/FixedPoint", ], visibility = ["//visibility:public"], - deps = ["@eigen_archive//:eigen"], + deps = [ + "@eigen_archive//:eigen", + "@local_config_sycl//sycl:sycl", + ], ) diff --git a/tools/bazel.rc.template b/tools/bazel.rc.template index 58dd7434a8..7a5c2aac80 100644 --- a/tools/bazel.rc.template +++ b/tools/bazel.rc.template @@ -1,6 +1,9 @@ build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true +build:sycl --crosstool_top=@local_config_sycl//crosstool:toolchain +build:sycl --define=using_sycl=true + build --force_python=py$PYTHON_MAJOR_VERSION build --host_force_python=py$PYTHON_MAJOR_VERSION build --python$PYTHON_MAJOR_VERSION_path=$PYTHON_BINARY diff --git a/util/python/python_config.sh b/util/python/python_config.sh index 8a780c82b8..789c4b35b3 100755 --- a/util/python/python_config.sh +++ b/util/python/python_config.sh @@ -120,20 +120,28 @@ function setup_python { IFS=',' python_lib_path=($(python_path)) unset IFS - echo "Found possible Python library paths:" - for x in "${python_lib_path[@]}"; do - echo " $x" - done - set -- "${python_lib_path[@]}" - echo "Please input the desired Python library path to use. Default is ["$1"]" - read b || true - if [ "$b" == "" ]; then + + if [ 1 = "$USE_DEFAULT_PYTHON_LIB_PATH" ]; then PYTHON_LIB_PATH="$(default_python_path "${python_lib_path[0]}")" - echo $PYTHON_LIB_PATH + echo "Using python library path: $PYTHON_LIB_PATH" + else - PYTHON_LIB_PATH="$b" + echo "Found possible Python library paths:" + for x in "${python_lib_path[@]}"; do + echo " $x" + done + set -- "${python_lib_path[@]}" + echo "Please input the desired Python library path to use. Default is ["$1"]" + read b || true + if [ "$b" == "" ]; then + PYTHON_LIB_PATH="$(default_python_path "${python_lib_path[0]}")" + echo "Using python library path: $PYTHON_LIB_PATH" + else + PYTHON_LIB_PATH="$b" + fi fi fi + if test -d "$PYTHON_LIB_PATH" -a -x "$PYTHON_LIB_PATH"; then python_lib="$PYTHON_LIB_PATH" else -- cgit v1.2.3