diff options
author | 2016-11-17 15:37:00 -0800 | |
---|---|---|
committer | 2016-11-17 15:45:18 -0800 | |
commit | 54e5000e0b980abe905900599c4493fadae34a15 (patch) | |
tree | c1f0751e5565882a77646589360fe090bec3f3fc /tensorflow | |
parent | 8a5610cd9f0b7087c1a7e97071ba1cf9b885315a (diff) |
Merge changes from github.
Change: 139516555
Diffstat (limited to 'tensorflow')
108 files changed, 1402 insertions, 253 deletions
diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc index 17d72835a0..a9c426cc34 100644 --- a/tensorflow/c/c_api.cc +++ b/tensorflow/c/c_api.cc @@ -1612,7 +1612,7 @@ TF_Operation* TF_GraphNextOperation(TF_Graph* graph, size_t* pos) { } mutex_lock l(graph->mu); - while (*pos < graph->graph.num_node_ids()) { + while (*pos < static_cast<size_t>(graph->graph.num_node_ids())) { Node* node = graph->graph.FindNodeId(*pos); // FindNodeId() returns nullptr for nodes that have been deleted. // We aren't currently allowing nodes to be deleted, but it is safer diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index a935e31f17..b5246cb151 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -4,6 +4,9 @@ cmake_minimum_required(VERSION 3.1) # Project project(tensorflow C CXX) +# Set C++14 as standard for the whole project +set(CMAKE_CXX_STANDARD 14) + # Actual source is the ../../.. directory get_filename_component(tf_contrib_source_dir ${tensorflow_SOURCE_DIR} PATH) get_filename_component(tf_tf_source_dir ${tf_contrib_source_dir} PATH) diff --git a/tensorflow/contrib/cmake/external/farmhash.cmake b/tensorflow/contrib/cmake/external/farmhash.cmake index b2c13a14fb..f6805a33aa 100644 --- a/tensorflow/contrib/cmake/external/farmhash.cmake +++ b/tensorflow/contrib/cmake/external/farmhash.cmake @@ -3,8 +3,8 @@ include (ExternalProject) set(farmhash_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/farmhash_archive ${CMAKE_CURRENT_BINARY_DIR}/external/farmhash_archive/util) set(farmhash_URL https://github.com/google/farmhash/archive/34c13ddfab0e35422f4c3979f360635a8c050260.zip) set(farmhash_HASH SHA256=e3d37a59101f38fd58fb799ed404d630f0eee18bfc2a2433910977cc8fea9c28) -set(farmhash_BUILD ${CMAKE_BINARY_DIR}/farmhash/src/farmhash) -set(farmhash_INSTALL ${CMAKE_BINARY_DIR}/farmhash/install) +set(farmhash_BUILD ${CMAKE_CURRENT_BINARY_DIR}/farmhash/src/farmhash) +set(farmhash_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/farmhash/install) set(farmhash_INCLUDES ${farmhash_BUILD}) set(farmhash_HEADERS "${farmhash_BUILD}/src/farmhash.h" @@ -19,7 +19,7 @@ if(WIN32) URL_HASH ${farmhash_HASH} DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" BUILD_IN_SOURCE 1 - PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/farmhash/CMakeLists.txt ${farmhash_BUILD} + PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/farmhash/CMakeLists.txt ${farmhash_BUILD} INSTALL_DIR ${farmhash_INSTALL} CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=Release diff --git a/tensorflow/contrib/cmake/external/gemmlowp.cmake b/tensorflow/contrib/cmake/external/gemmlowp.cmake index 024c064cf4..93a0c8d864 100644 --- a/tensorflow/contrib/cmake/external/gemmlowp.cmake +++ b/tensorflow/contrib/cmake/external/gemmlowp.cmake @@ -2,8 +2,8 @@ include (ExternalProject) set(gemmlowp_URL http://github.com/google/gemmlowp/archive/a6f29d8ac48d63293f845f2253eccbf86bc28321.tar.gz) set(gemmlowp_HASH SHA256=75d40ea8e68b0d1644f052fffe8f14a410b2a73d40ccb859a95c0578d194ec26) -set(gemmlowp_BUILD ${CMAKE_BINARY_DIR}/gemmlowp/src/gemmlowp) -set(gemmlowp_INCLUDE_DIR ${CMAKE_BINARY_DIR}/gemmlowp/src/gemmlowp) +set(gemmlowp_BUILD ${CMAKE_CURRENT_BINARY_DIR}/gemmlowp/src/gemmlowp) +set(gemmlowp_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/gemmlowp/src/gemmlowp) ExternalProject_Add(gemmlowp PREFIX gemmlowp @@ -11,5 +11,5 @@ ExternalProject_Add(gemmlowp URL_HASH ${gemmlowp_HASH} DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" BUILD_IN_SOURCE 1 - PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/gemmlowp/CMakeLists.txt ${gemmlowp_BUILD} + PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/gemmlowp/CMakeLists.txt ${gemmlowp_BUILD} INSTALL_COMMAND "") diff --git a/tensorflow/contrib/cmake/external/grpc.cmake b/tensorflow/contrib/cmake/external/grpc.cmake index 1c34458ec8..c33b0dd81e 100644 --- a/tensorflow/contrib/cmake/external/grpc.cmake +++ b/tensorflow/contrib/cmake/external/grpc.cmake @@ -24,7 +24,7 @@ ExternalProject_Add(grpc GIT_TAG ${GRPC_TAG} DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" BUILD_IN_SOURCE 1 - PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/grpc/CMakeLists.txt ${GRPC_BUILD} + PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/grpc/CMakeLists.txt ${GRPC_BUILD} INSTALL_COMMAND "" CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=Release diff --git a/tensorflow/contrib/cmake/external/highwayhash.cmake b/tensorflow/contrib/cmake/external/highwayhash.cmake index a19ebfe24f..9f80be32cb 100644 --- a/tensorflow/contrib/cmake/external/highwayhash.cmake +++ b/tensorflow/contrib/cmake/external/highwayhash.cmake @@ -3,8 +3,8 @@ include (ExternalProject) set(highwayhash_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/highwayhash) set(highwayhash_URL https://github.com/google/highwayhash.git) set(highwayhash_TAG be5edafc2e1a455768e260ccd68ae7317b6690ee) -set(highwayhash_BUILD ${CMAKE_BINARY_DIR}/highwayhash/src/highwayhash) -set(highwayhash_INSTALL ${CMAKE_BINARY_DIR}/highwayhash/install) +set(highwayhash_BUILD ${CMAKE_CURRENT_BINARY_DIR}/highwayhash/src/highwayhash) +set(highwayhash_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/highwayhash/install) # put highwayhash includes in the directory where they are expected add_custom_target(highwayhash_create_destination_dir @@ -28,7 +28,7 @@ ExternalProject_Add(highwayhash GIT_TAG ${highwayhash_TAG} DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" BUILD_IN_SOURCE 1 - PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/highwayhash/CMakeLists.txt ${highwayhash_BUILD} + PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/highwayhash/CMakeLists.txt ${highwayhash_BUILD} INSTALL_DIR ${highwayhash_INSTALL} CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=Release diff --git a/tensorflow/contrib/cmake/external/jpeg.cmake b/tensorflow/contrib/cmake/external/jpeg.cmake index a94eb65ddb..cde037949c 100644 --- a/tensorflow/contrib/cmake/external/jpeg.cmake +++ b/tensorflow/contrib/cmake/external/jpeg.cmake @@ -3,8 +3,8 @@ include (ExternalProject) set(jpeg_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/jpeg_archive) set(jpeg_URL http://www.ijg.org/files/jpegsrc.v9a.tar.gz) set(jpeg_HASH SHA256=3a753ea48d917945dd54a2d97de388aa06ca2eb1066cbfdc6652036349fe05a7) -set(jpeg_BUILD ${CMAKE_BINARY_DIR}/jpeg/src/jpeg) -set(jpeg_INSTALL ${CMAKE_BINARY_DIR}/jpeg/install) +set(jpeg_BUILD ${CMAKE_CURRENT_BINARY_DIR}/jpeg/src/jpeg) +set(jpeg_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/jpeg/install) if(WIN32) set(jpeg_STATIC_LIBRARIES ${jpeg_INSTALL}/lib/libjpeg.lib) @@ -32,7 +32,7 @@ if (WIN32) PREFIX jpeg URL ${jpeg_URL} URL_HASH ${jpeg_HASH} - PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/jpeg/CMakeLists.txt ${jpeg_BUILD} + PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/jpeg/CMakeLists.txt ${jpeg_BUILD} INSTALL_DIR ${jpeg_INSTALL} DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" CMAKE_CACHE_ARGS @@ -42,7 +42,7 @@ if (WIN32) ) ExternalProject_Add_Step(jpeg copy_jconfig - COMMAND ${CMAKE_COMMAND} -E copy + COMMAND ${CMAKE_COMMAND} -E copy ${jpeg_BUILD}/jconfig.vc ${jpeg_BUILD}/jconfig.h DEPENDEES patch DEPENDERS build diff --git a/tensorflow/contrib/cmake/external/jsoncpp.cmake b/tensorflow/contrib/cmake/external/jsoncpp.cmake index 75d5d72703..43d6e0456c 100644 --- a/tensorflow/contrib/cmake/external/jsoncpp.cmake +++ b/tensorflow/contrib/cmake/external/jsoncpp.cmake @@ -4,7 +4,7 @@ set(jsoncpp_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/jsoncpp/src/jsoncpp) #set(jsoncpp_EXTRA_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/jsoncpp/src) set(jsoncpp_URL https://github.com/open-source-parsers/jsoncpp.git) set(jsoncpp_TAG 4356d9b) -set(jsoncpp_BUILD ${CMAKE_BINARY_DIR}/jsoncpp/src/jsoncpp/src/lib_json) +set(jsoncpp_BUILD ${CMAKE_CURRENT_BINARY_DIR}/jsoncpp/src/jsoncpp/src/lib_json) set(jsoncpp_LIBRARIES ${jsoncpp_BUILD}/obj/so/libjsoncpp.so) set(jsoncpp_INCLUDES ${jsoncpp_BUILD}) diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake index 2155c30185..5ee6987175 100644 --- a/tensorflow/contrib/cmake/external/protobuf.cmake +++ b/tensorflow/contrib/cmake/external/protobuf.cmake @@ -20,7 +20,7 @@ ExternalProject_Add(protobuf GIT_TAG ${PROTOBUF_TAG} DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" BUILD_IN_SOURCE 1 - SOURCE_DIR ${CMAKE_BINARY_DIR}/protobuf/src/protobuf + SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf CONFIGURE_COMMAND ${CMAKE_COMMAND} cmake/ -Dprotobuf_BUILD_TESTS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON diff --git a/tensorflow/contrib/cmake/setup.py b/tensorflow/contrib/cmake/setup.py index 78cb546f85..b036016f5e 100644 --- a/tensorflow/contrib/cmake/setup.py +++ b/tensorflow/contrib/cmake/setup.py @@ -26,7 +26,7 @@ from setuptools import find_packages, setup, Command from setuptools.command.install import install as InstallCommandBase from setuptools.dist import Distribution -_VERSION = '0.11.0rc2-cmake-experimental' +_VERSION = '0.11.0-cmake-experimental' REQUIRED_PACKAGES = [ 'numpy >= 1.11.0', diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake index d4f44803a3..99aa347f6d 100644 --- a/tensorflow/contrib/cmake/tf_core_kernels.cmake +++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake @@ -89,8 +89,6 @@ if(WIN32) "${tensorflow_source_dir}/tensorflow/core/kernels/meta_support.*" "${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.h" "${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.cc" - "${tensorflow_source_dir}/tensorflow/core/kernels/svd*.cc" - "${tensorflow_source_dir}/tensorflow/core/kernels/avgpooling_op.*" ) list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_windows_exclude_srcs}) endif(WIN32) @@ -100,14 +98,6 @@ file(GLOB_RECURSE tf_core_gpu_kernels_srcs "${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/*.cu.cc" ) -if(WIN32) - file(GLOB_RECURSE tf_core_gpu_kernels_exclude_srcs - # not working on windows yet - "${tensorflow_source_dir}/tensorflow/core/kernels/avgpooling_op_gpu.cu.cc" - ) - list(REMOVE_ITEM tf_core_gpu_kernels_srcs ${tf_core_gpu_kernels_exclude_srcs}) -endif(WIN32) - add_library(tf_core_kernels OBJECT ${tf_core_kernels_srcs}) add_dependencies(tf_core_kernels tf_core_cpu) diff --git a/tensorflow/contrib/cmake/tf_core_ops.cmake b/tensorflow/contrib/cmake/tf_core_ops.cmake index a25b37c754..5523023cb7 100644 --- a/tensorflow/contrib/cmake/tf_core_ops.cmake +++ b/tensorflow/contrib/cmake/tf_core_ops.cmake @@ -37,6 +37,17 @@ foreach(tf_op_lib_name ${tf_op_lib_names}) add_dependencies(tf_${tf_op_lib_name} tf_core_framework) endforeach() +function(GENERATE_CONTRIB_OP_LIBRARY op_lib_name cc_srcs) + add_library(tf_contrib_${op_lib_name}_ops OBJECT ${cc_srcs}) + add_dependencies(tf_contrib_${op_lib_name}_ops tf_core_framework) +endfunction() + +GENERATE_CONTRIB_OP_LIBRARY(cudnn_rnn "${tensorflow_source_dir}/tensorflow/contrib/cudnn_rnn/ops/cudnn_rnn_ops.cc") +GENERATE_CONTRIB_OP_LIBRARY(factorization_clustering "${tensorflow_source_dir}/tensorflow/contrib/factorization/ops/clustering_ops.cc") +GENERATE_CONTRIB_OP_LIBRARY(factorization_factorization "${tensorflow_source_dir}/tensorflow/contrib/factorization/ops/factorization_ops.cc") +GENERATE_CONTRIB_OP_LIBRARY(framework_variable "${tensorflow_source_dir}/tensorflow/contrib/framework/ops/variable_ops.cc") + + ######################################################## # tf_user_ops library ######################################################## diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index 9a9aed4375..072d01200e 100644 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -48,24 +48,6 @@ endif(NOT NUMPY_INCLUDE_DIR) # TODO(mrry): Configure this to build in a directory other than tf_python/ -# tf_python_srcs contains all static .py files -file(GLOB_RECURSE tf_python_srcs RELATIVE ${tensorflow_source_dir} - "${tensorflow_source_dir}/tensorflow/python/*.py" -) -list(APPEND tf_python_srcs "tensorflow/__init__.py") - -# tf_python_copy_scripts_to_destination copies all Python files -# (including static source and generated protobuf wrappers, but *not* -# generated TensorFlow op wrappers) into tf_python/. -add_custom_target(tf_python_copy_scripts_to_destination) - -# Copy static files to tf_python/. -foreach(script ${tf_python_srcs}) - get_filename_component(REL_DIR ${script} DIRECTORY) - add_custom_command(TARGET tf_python_copy_scripts_to_destination PRE_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/${script} ${CMAKE_CURRENT_BINARY_DIR}/tf_python/${script}) -endforeach() - # Generates the Python protobuf wrappers. # ROOT_DIR must be absolute; subsequent arguments are interpreted as # paths of .proto files, and must be relative to ROOT_DIR. @@ -129,6 +111,8 @@ endfunction() file(GLOB_RECURSE tf_protos_python_srcs RELATIVE ${tensorflow_source_dir} "${tensorflow_source_dir}/tensorflow/core/*.proto" "${tensorflow_source_dir}/tensorflow/python/*.proto" + "${tensorflow_source_dir}/tensorflow/contrib/session_bundle/*.proto" + "${tensorflow_source_dir}/tensorflow/contrib/tensorboard/*.proto" ) RELATIVE_PROTOBUF_GENERATE_PYTHON( ${tensorflow_source_dir} PYTHON_PROTO_GENFILES ${tf_protos_python_srcs} @@ -140,18 +124,36 @@ RELATIVE_PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS add_library(tf_python_protos_cc ${PROTO_SRCS} ${PROTO_HDRS}) + # tf_python_touchup_modules adds empty __init__.py files to all # directories containing Python code, so that Python will recognize # them as modules. -add_custom_target(tf_python_touchup_modules - DEPENDS tf_python_copy_scripts_to_destination -) +add_custom_target(tf_python_touchup_modules) +# tf_python_copy_scripts_to_destination copies all Python files +# (including static source and generated protobuf wrappers, but *not* +# generated TensorFlow op wrappers) into tf_python/. +add_custom_target(tf_python_copy_scripts_to_destination DEPENDS tf_python_touchup_modules) + + +# tf_python_srcs contains all static .py files function(add_python_module MODULE_NAME) + set(options DONTCOPY) + cmake_parse_arguments(ADD_PYTHON_MODULE "${options}" "" "" ${ARGN}) add_custom_command(TARGET tf_python_touchup_modules PRE_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_CURRENT_BINARY_DIR}/tf_python/${MODULE_NAME}") add_custom_command(TARGET tf_python_touchup_modules PRE_BUILD COMMAND ${CMAKE_COMMAND} -E touch "${CMAKE_CURRENT_BINARY_DIR}/tf_python/${MODULE_NAME}/__init__.py") + file(GLOB module_python_srcs RELATIVE ${tensorflow_source_dir} + "${tensorflow_source_dir}/${MODULE_NAME}/*.py" + ) + if(NOT ${ADD_PYTHON_MODULE_DONTCOPY}) + foreach(script ${module_python_srcs}) + get_filename_component(REL_DIR ${script} DIRECTORY) + add_custom_command(TARGET tf_python_copy_scripts_to_destination PRE_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/${script} ${CMAKE_CURRENT_BINARY_DIR}/tf_python/${script}) + endforeach() + endif() endfunction() add_python_module("tensorflow") @@ -164,33 +166,205 @@ add_python_module("tensorflow/core/protobuf") add_python_module("tensorflow/core/util") add_python_module("tensorflow/python") add_python_module("tensorflow/python/client") +add_python_module("tensorflow/python/debug") +add_python_module("tensorflow/python/debug/cli") +add_python_module("tensorflow/python/debug/examples") +add_python_module("tensorflow/python/debug/wrappers") add_python_module("tensorflow/python/framework") -add_python_module("tensorflow/python/ops") add_python_module("tensorflow/python/kernel_tests") add_python_module("tensorflow/python/lib") add_python_module("tensorflow/python/lib/core") -add_python_module("tensorflow/python/lib/core/io") +add_python_module("tensorflow/python/lib/io") +add_python_module("tensorflow/python/ops") add_python_module("tensorflow/python/platform") add_python_module("tensorflow/python/platform/default") add_python_module("tensorflow/python/platform/summary") -add_python_module("tensorflow/python/platform/summary/impl") +add_python_module("tensorflow/python/summary") +add_python_module("tensorflow/python/summary/impl") +add_python_module("tensorflow/python/summary/writer") add_python_module("tensorflow/python/tools") add_python_module("tensorflow/python/training") +add_python_module("tensorflow/python/user_ops") add_python_module("tensorflow/python/util") add_python_module("tensorflow/python/util/protobuf") -add_python_module("tensorflow/contrib") + +add_python_module("tensorflow/contrib/") +add_python_module("tensorflow/contrib/android") +add_python_module("tensorflow/contrib/android/java") +add_python_module("tensorflow/contrib/android/java/org") +add_python_module("tensorflow/contrib/android/java/org/tensorflow") +add_python_module("tensorflow/contrib/android/java/org/tensorflow/contrib") +add_python_module("tensorflow/contrib/android/java/org/tensorflow/contrib/android") +add_python_module("tensorflow/contrib/android/jni") add_python_module("tensorflow/contrib/bayesflow") +add_python_module("tensorflow/contrib/bayesflow/examples") +add_python_module("tensorflow/contrib/bayesflow/examples/reinforce_simple") add_python_module("tensorflow/contrib/bayesflow/python") +add_python_module("tensorflow/contrib/bayesflow/python/kernel_tests") add_python_module("tensorflow/contrib/bayesflow/python/ops") -add_python_module("tensorflow/contrib/bayesflow/python/ops/bernoulli") +add_python_module("tensorflow/contrib/copy_graph") +add_python_module("tensorflow/contrib/copy_graph/python") +add_python_module("tensorflow/contrib/copy_graph/python/util") +add_python_module("tensorflow/contrib/crf") +add_python_module("tensorflow/contrib/crf/python") +add_python_module("tensorflow/contrib/crf/python/kernel_tests") +add_python_module("tensorflow/contrib/crf/python/ops") +add_python_module("tensorflow/contrib/cudnn_rnn") +add_python_module("tensorflow/contrib/cudnn_rnn/kernels") +add_python_module("tensorflow/contrib/cudnn_rnn/ops") +add_python_module("tensorflow/contrib/cudnn_rnn/python") +add_python_module("tensorflow/contrib/cudnn_rnn/python/kernel_tests") +add_python_module("tensorflow/contrib/cudnn_rnn/python/ops") +add_python_module("tensorflow/contrib/distributions") +add_python_module("tensorflow/contrib/distributions/python") +add_python_module("tensorflow/contrib/distributions/python/kernel_tests") +add_python_module("tensorflow/contrib/distributions/python/ops") +add_python_module("tensorflow/contrib/factorization") +add_python_module("tensorflow/contrib/factorization/examples") +add_python_module("tensorflow/contrib/factorization/kernels") +add_python_module("tensorflow/contrib/factorization/ops") +add_python_module("tensorflow/contrib/factorization/python") +add_python_module("tensorflow/contrib/factorization/python/kernel_tests") +add_python_module("tensorflow/contrib/factorization/python/ops") +add_python_module("tensorflow/contrib/ffmpeg") +add_python_module("tensorflow/contrib/ffmpeg/default") +add_python_module("tensorflow/contrib/ffmpeg/testdata") add_python_module("tensorflow/contrib/framework") +add_python_module("tensorflow/contrib/framework/kernels") +add_python_module("tensorflow/contrib/framework/ops") add_python_module("tensorflow/contrib/framework/python") add_python_module("tensorflow/contrib/framework/python/framework") +add_python_module("tensorflow/contrib/framework/python/ops") +add_python_module("tensorflow/contrib/graph_editor") +add_python_module("tensorflow/contrib/graph_editor/examples") +add_python_module("tensorflow/contrib/graph_editor/tests") +add_python_module("tensorflow/contrib/grid_rnn") +add_python_module("tensorflow/contrib/grid_rnn/python") +add_python_module("tensorflow/contrib/grid_rnn/python/kernel_tests") +add_python_module("tensorflow/contrib/grid_rnn/python/ops") +add_python_module("tensorflow/contrib/integrate") +add_python_module("tensorflow/contrib/integrate/python") +add_python_module("tensorflow/contrib/integrate/python/ops") +add_python_module("tensorflow/contrib/ios_examples") +add_python_module("tensorflow/contrib/ios_examples/benchmark") +add_python_module("tensorflow/contrib/ios_examples/benchmark/benchmark.xcodeproj") +add_python_module("tensorflow/contrib/ios_examples/benchmark/data") +add_python_module("tensorflow/contrib/ios_examples/camera") +add_python_module("tensorflow/contrib/ios_examples/camera/camera_example.xcodeproj") +add_python_module("tensorflow/contrib/ios_examples/camera/data") +add_python_module("tensorflow/contrib/ios_examples/camera/en.lproj") +add_python_module("tensorflow/contrib/ios_examples/simple") +add_python_module("tensorflow/contrib/ios_examples/simple/data") +add_python_module("tensorflow/contrib/ios_examples/simple/tf_ios_makefile_example.xcodeproj") add_python_module("tensorflow/contrib/layers") +add_python_module("tensorflow/contrib/layers/kernels") +add_python_module("tensorflow/contrib/layers/ops") add_python_module("tensorflow/contrib/layers/python") +add_python_module("tensorflow/contrib/layers/python/kernel_tests") add_python_module("tensorflow/contrib/layers/python/layers") add_python_module("tensorflow/contrib/layers/python/ops") - +add_python_module("tensorflow/contrib/learn") +add_python_module("tensorflow/contrib/learn/python") +add_python_module("tensorflow/contrib/learn/python/learn") +add_python_module("tensorflow/contrib/learn/python/learn/dataframe") +add_python_module("tensorflow/contrib/learn/python/learn/dataframe/queues") +add_python_module("tensorflow/contrib/learn/python/learn/dataframe/transforms") +add_python_module("tensorflow/contrib/learn/python/learn/datasets") +add_python_module("tensorflow/contrib/learn/python/learn/datasets/data") +add_python_module("tensorflow/contrib/learn/python/learn/estimators") +add_python_module("tensorflow/contrib/learn/python/learn/learn_io") +add_python_module("tensorflow/contrib/learn/python/learn/ops") +add_python_module("tensorflow/contrib/learn/python/learn/preprocessing") +add_python_module("tensorflow/contrib/learn/python/learn/preprocessing/tests") +add_python_module("tensorflow/contrib/learn/python/learn/tests") +add_python_module("tensorflow/contrib/learn/python/learn/tests/dataframe") +add_python_module("tensorflow/contrib/learn/python/learn/utils") +add_python_module("tensorflow/contrib/linear_optimizer") +add_python_module("tensorflow/contrib/linear_optimizer/kernels") +add_python_module("tensorflow/contrib/linear_optimizer/kernels/g3doc") +add_python_module("tensorflow/contrib/linear_optimizer/python") +add_python_module("tensorflow/contrib/linear_optimizer/python/kernel_tests") +add_python_module("tensorflow/contrib/linear_optimizer/python/ops") +add_python_module("tensorflow/contrib/lookup") +add_python_module("tensorflow/contrib/losses") +add_python_module("tensorflow/contrib/losses/python") +add_python_module("tensorflow/contrib/losses/python/losses") +add_python_module("tensorflow/contrib/makefile") +add_python_module("tensorflow/contrib/makefile/test") +add_python_module("tensorflow/contrib/metrics") +add_python_module("tensorflow/contrib/metrics/kernels") +add_python_module("tensorflow/contrib/metrics/ops") +add_python_module("tensorflow/contrib/metrics/python") +add_python_module("tensorflow/contrib/metrics/python/kernel_tests") +add_python_module("tensorflow/contrib/metrics/python/metrics") +add_python_module("tensorflow/contrib/metrics/python/ops") +add_python_module("tensorflow/contrib/ndlstm") +add_python_module("tensorflow/contrib/ndlstm/python") +add_python_module("tensorflow/contrib/opt") +add_python_module("tensorflow/contrib/opt/python") +add_python_module("tensorflow/contrib/opt/python/training") +add_python_module("tensorflow/contrib/pi_examples") +add_python_module("tensorflow/contrib/pi_examples/camera") +add_python_module("tensorflow/contrib/pi_examples/label_image") +add_python_module("tensorflow/contrib/pi_examples/label_image/data") +add_python_module("tensorflow/contrib/quantization") +add_python_module("tensorflow/contrib/quantization/python") +add_python_module("tensorflow/contrib/rnn") +add_python_module("tensorflow/contrib/rnn/kernels") +add_python_module("tensorflow/contrib/rnn/ops") +add_python_module("tensorflow/contrib/rnn/python") +add_python_module("tensorflow/contrib/rnn/python/kernel_tests") +add_python_module("tensorflow/contrib/rnn/python/ops") +add_python_module("tensorflow/contrib/seq2seq") +add_python_module("tensorflow/contrib/seq2seq/python") +add_python_module("tensorflow/contrib/seq2seq/python/kernel_tests") +add_python_module("tensorflow/contrib/seq2seq/python/ops") +add_python_module("tensorflow/contrib/session_bundle") +add_python_module("tensorflow/contrib/session_bundle/example") +add_python_module("tensorflow/contrib/session_bundle/testdata") +add_python_module("tensorflow/contrib/session_bundle/testdata/saved_model_half_plus_two") +add_python_module("tensorflow/contrib/session_bundle/testdata/saved_model_half_plus_two/variables") +add_python_module("tensorflow/contrib/slim") +add_python_module("tensorflow/contrib/slim/python") +add_python_module("tensorflow/contrib/slim/python/slim") +add_python_module("tensorflow/contrib/slim/python/slim/data") +add_python_module("tensorflow/contrib/slim/python/slim/nets") +add_python_module("tensorflow/contrib/specs") +add_python_module("tensorflow/contrib/specs/python") +add_python_module("tensorflow/contrib/tensorboard") +add_python_module("tensorflow/contrib/tensorboard/plugins") +add_python_module("tensorflow/contrib/tensorboard/plugins/projector") +add_python_module("tensorflow/contrib/tensor_forest") +add_python_module("tensorflow/contrib/tensor_forest/client") +add_python_module("tensorflow/contrib/tensor_forest/core") +add_python_module("tensorflow/contrib/tensor_forest/core/ops") +add_python_module("tensorflow/contrib/tensor_forest/data") +add_python_module("tensorflow/contrib/tensor_forest/hybrid") +add_python_module("tensorflow/contrib/tensor_forest/hybrid/core") +add_python_module("tensorflow/contrib/tensor_forest/hybrid/core/ops") +add_python_module("tensorflow/contrib/tensor_forest/hybrid/python") +add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/kernel_tests") +add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/layers") +add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/models") +add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/ops") +add_python_module("tensorflow/contrib/tensor_forest/python") +add_python_module("tensorflow/contrib/tensor_forest/python/kernel_tests") +add_python_module("tensorflow/contrib/tensor_forest/python/ops") +add_python_module("tensorflow/contrib/tensorboard") +add_python_module("tensorflow/contrib/tensorboard") +add_python_module("tensorflow/contrib/tensorboard/plugins") +add_python_module("tensorflow/contrib/tensorboard/plugins/projector") +add_python_module("tensorflow/contrib/testing") +add_python_module("tensorflow/contrib/testing/python") +add_python_module("tensorflow/contrib/testing/python/framework") +add_python_module("tensorflow/contrib/tfprof" DONTCOPY) # SWIG wrapper not implemented. +#add_python_module("tensorflow/contrib/tfprof/python") +#add_python_module("tensorflow/contrib/tfprof/python/tools") +#add_python_module("tensorflow/contrib/tfprof/python/tools/tfprof") +add_python_module("tensorflow/contrib/training") +add_python_module("tensorflow/contrib/training/python") +add_python_module("tensorflow/contrib/training/python/training") +add_python_module("tensorflow/contrib/util") ######################################################## @@ -280,6 +454,15 @@ GENERATE_PYTHON_OP_LIB("user_ops") GENERATE_PYTHON_OP_LIB("training_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/training/gen_training_ops.py) +GENERATE_PYTHON_OP_LIB("contrib_cudnn_rnn_ops" + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/cudnn_rnn/ops/gen_cudnn_rnn_ops.py) +GENERATE_PYTHON_OP_LIB("contrib_factorization_clustering_ops" + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/factorization/python/ops/gen_clustering_ops.py) +GENERATE_PYTHON_OP_LIB("contrib_factorization_factorization_ops" + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/factorization/python/ops/gen_factorization_ops.py) +GENERATE_PYTHON_OP_LIB("contrib_framework_variable_ops" + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/framework/python/ops/gen_variable_ops.py) + add_custom_target(tf_python_ops SOURCES ${tf_python_ops_generated_files} ${PYTHON_PROTO_GENFILES}) add_dependencies(tf_python_ops tf_python_op_gen_main) diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake index 658e4c343c..3a9fd639e6 100644 --- a/tensorflow/contrib/cmake/tf_tests.cmake +++ b/tensorflow/contrib/cmake/tf_tests.cmake @@ -149,12 +149,8 @@ if (tensorflow_BUILD_PYTHON_TESTS) # issues related to windows fs "${tensorflow_source_dir}/tensorflow/python/kernel_tests/io_ops_test.py" # missing kernel - "${tensorflow_source_dir}/tensorflow/python/kernel_tests/pooling_ops_test.py" "${tensorflow_source_dir}/tensorflow/python/kernel_tests/conv_ops_test.py" "${tensorflow_source_dir}/tensorflow/python/kernel_tests/depthwise_conv_op_test.py" - "${tensorflow_source_dir}/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py" - "${tensorflow_source_dir}/tensorflow/python/kernel_tests/pool_test.py" - "${tensorflow_source_dir}/tensorflow/python/kernel_tests/svd_op_test.py" # cuda launch failed "${tensorflow_source_dir}/tensorflow/python/kernel_tests/diag_op_test.py" "${tensorflow_source_dir}/tensorflow/python/kernel_tests/trace_op_test.py" diff --git a/tensorflow/contrib/factorization/kernels/wals_solver_ops.cc b/tensorflow/contrib/factorization/kernels/wals_solver_ops.cc index 4f35337c0c..92606225d7 100644 --- a/tensorflow/contrib/factorization/kernels/wals_solver_ops.cc +++ b/tensorflow/contrib/factorization/kernels/wals_solver_ops.cc @@ -257,7 +257,7 @@ class WALSComputePartialLhsAndRhsOp : public OpKernel { lhs_mat = lhs_symm; counter.DecrementCount(); }; - for (int i = 1; i < shards.size(); ++i) { + for (size_t i = 1; i < shards.size(); ++i) { worker_threads.workers->Schedule(std::bind(work, shards[i])); } // Inline execute the 1st shard. diff --git a/tensorflow/contrib/makefile/proto_text_cc_files.txt b/tensorflow/contrib/makefile/proto_text_cc_files.txt index d4adcec550..ccc2aaa07f 100644 --- a/tensorflow/contrib/makefile/proto_text_cc_files.txt +++ b/tensorflow/contrib/makefile/proto_text_cc_files.txt @@ -11,6 +11,7 @@ tensorflow/core/platform/posix/env.cc tensorflow/core/platform/posix/load_library.cc tensorflow/core/platform/file_system.cc tensorflow/core/platform/env.cc +tensorflow/core/platform/setround.cc tensorflow/core/platform/denormal.cc tensorflow/core/platform/default/tracing.cc tensorflow/core/platform/default/logging.cc diff --git a/tensorflow/contrib/metrics/kernels/set_kernels.cc b/tensorflow/contrib/metrics/kernels/set_kernels.cc index 2d2f496da7..61fe250206 100644 --- a/tensorflow/contrib/metrics/kernels/set_kernels.cc +++ b/tensorflow/contrib/metrics/kernels/set_kernels.cc @@ -611,7 +611,7 @@ void SetOperationOp<T>::ComputeSparseToSparse(OpKernelContext* ctx) const { int64 compare_groups; CompareGroups(ctx, set1_group_indices, set2_group_indices, &compare_groups); - const std::vector<int64>* group_indices; + const std::vector<int64>* group_indices = nullptr; // Get values from set1, if applicable. set1_group_set.clear(); diff --git a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py index b80bd7248b..b952040c30 100644 --- a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py +++ b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py @@ -294,10 +294,7 @@ class Image(ItemHandler): image_buffer = keys_to_tensors[self._image_key] image_format = keys_to_tensors[self._format_key] - image = self._decode(image_buffer, image_format) - if self._shape is not None: - image = array_ops.reshape(image, self._shape) - return image + return self._decode(image_buffer, image_format) def _decode(self, image_buffer, image_format): """Decodes the image buffer. @@ -316,12 +313,23 @@ class Image(ItemHandler): def decode_jpg(): return image_ops.decode_jpeg(image_buffer, self._channels) - image = control_flow_ops.case({ + # For RGBA images JPEG is not a valid decoder option. + if self._channels > 3: + pred_fn_pairs = { + math_ops.logical_or(math_ops.equal(image_format, 'raw'), + math_ops.equal(image_format, 'RAW')): decode_raw, + } + default_decoder = decode_png + else: + pred_fn_pairs = { math_ops.logical_or(math_ops.equal(image_format, 'png'), math_ops.equal(image_format, 'PNG')): decode_png, math_ops.logical_or(math_ops.equal(image_format, 'raw'), math_ops.equal(image_format, 'RAW')): decode_raw, - }, default=decode_jpg, exclusive=True) + } + default_decoder = decode_jpg + + image = control_flow_ops.case(pred_fn_pairs, default=default_decoder, exclusive=True) image.set_shape([None, None, self._channels]) if self._shape is not None: diff --git a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder_test.py b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder_test.py index cd75db8967..f572f9c3e9 100644 --- a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder_test.py +++ b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder_test.py @@ -168,7 +168,7 @@ class TFExampleDecoderTest(tf.test.TestCase): self.assertEqual(tf_decoded_image.get_shape().ndims, 3) def testDecodeExampleWithPngEncoding(self): - test_image_channels = [1, 3] + test_image_channels = [1, 3, 4] for channels in test_image_channels: image_shape = (2, 3, channels) image, serialized_example = self.GenerateImage( @@ -183,7 +183,7 @@ class TFExampleDecoderTest(tf.test.TestCase): self.assertAllClose(image, decoded_image, atol=0) def testDecodeExampleWithPNGEncoding(self): - test_image_channels = [1, 3] + test_image_channels = [1, 3, 4] for channels in test_image_channels: image_shape = (2, 3, channels) image, serialized_example = self.GenerateImage( diff --git a/tensorflow/contrib/tensor_forest/core/ops/tree_utils.cc b/tensorflow/contrib/tensor_forest/core/ops/tree_utils.cc index e93267c1e3..544336b1ba 100644 --- a/tensorflow/contrib/tensor_forest/core/ops/tree_utils.cc +++ b/tensorflow/contrib/tensor_forest/core/ops/tree_utils.cc @@ -395,7 +395,7 @@ double getDistanceFromLambda3(double lambda3, const std::vector<float>& mu1, // x = (lambda_1 1 + 2 mu1) / (2 - 2 lambda_3) // y = (lambda_2 1 + 2 mu2) / (2 + 2 lambda_3) double dist = 0.0; - for (int i = 0; i < mu1.size(); i++) { + for (size_t i = 0; i < mu1.size(); i++) { double diff = (lambda1 + 2.0 * mu1[i]) / (2.0 - 2.0 * lambda3) - mu1[i]; dist += diff * diff; diff = (lambda2 + 2.0 * mu2[i]) / (2.0 + 2.0 * lambda3) - mu2[i]; diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index ec60e853b2..29b05cecba 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1118,6 +1118,7 @@ tf_version_info_genrule() cc_library( name = "version_lib", srcs = ["util/version_info.cc"], + hdrs = ["public/version.h"], copts = tf_copts(), ) @@ -1129,7 +1130,6 @@ tf_cuda_library( "example/**/*.cc", "framework/**/*.h", "framework/**/*.cc", - "public/version.h", "util/**/*.h", "util/**/*.cc", ], @@ -1142,6 +1142,7 @@ tf_cuda_library( "framework/fake_input.*", "util/memmapped_file_system.*", "util/memmapped_file_system_writer.*", + "util/version_info.cc", ], ) + select({ "//tensorflow:windows": [], @@ -1394,11 +1395,13 @@ tf_cuda_library( cc_library( name = "sycl_runtime", srcs = if_not_windows([ + "common_runtime/sycl/sycl_allocator.cc", "common_runtime/sycl/sycl_device.cc", "common_runtime/sycl/sycl_device_context.cc", "common_runtime/sycl/sycl_device_factory.cc", ]), hdrs = if_not_windows([ + "common_runtime/sycl/sycl_allocator.h", "common_runtime/sycl/sycl_device.h", "common_runtime/sycl/sycl_device_context.h", ]), diff --git a/tensorflow/core/common_runtime/sycl/sycl_allocator.cc b/tensorflow/core/common_runtime/sycl/sycl_allocator.cc new file mode 100644 index 0000000000..175b784825 --- /dev/null +++ b/tensorflow/core/common_runtime/sycl/sycl_allocator.cc @@ -0,0 +1,35 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifdef TENSORFLOW_USE_SYCL + +#include "tensorflow/core/common_runtime/sycl/sycl_allocator.h" + +namespace tensorflow { + +SYCLAllocator::~SYCLAllocator() { } + +string SYCLAllocator::Name() { return "device:SYCL"; } + +void *SYCLAllocator::AllocateRaw(size_t alignment, size_t num_bytes) { + auto p = device_->allocate(num_bytes); + return p; +} + +void SYCLAllocator::DeallocateRaw(void *ptr) { device_->deallocate(ptr); } + +} // namespace tensorflow + +#endif // TENSORFLOW_USE_SYCL diff --git a/tensorflow/core/common_runtime/sycl/sycl_allocator.h b/tensorflow/core/common_runtime/sycl/sycl_allocator.h new file mode 100644 index 0000000000..887c727f6e --- /dev/null +++ b/tensorflow/core/common_runtime/sycl/sycl_allocator.h @@ -0,0 +1,45 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#if !TENSORFLOW_USE_SYCL +#error This file must only be included when building TensorFlow with SYCL support +#endif + +#ifndef TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_ALLOCATOR_H_ +#define TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_ALLOCATOR_H_ + +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/platform/types.h" +#define EIGEN_USE_SYCL +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +namespace tensorflow { + +class SYCLAllocator : public Allocator { +public: + SYCLAllocator(Eigen::SyclDevice* device) : device_(device) {} + virtual ~SYCLAllocator() override; + string Name() override; + void *AllocateRaw(size_t alignment, size_t num_bytes) override; + void DeallocateRaw(void *ptr) override; + +private: + Eigen::SyclDevice *device_; // not owned + TF_DISALLOW_COPY_AND_ASSIGN(SYCLAllocator); +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_ALLOCATOR_H_ diff --git a/tensorflow/core/common_runtime/sycl/sycl_device.cc b/tensorflow/core/common_runtime/sycl/sycl_device.cc index dc4e8db7ba..10a037c02d 100644 --- a/tensorflow/core/common_runtime/sycl/sycl_device.cc +++ b/tensorflow/core/common_runtime/sycl/sycl_device.cc @@ -23,25 +23,13 @@ limitations under the License. namespace tensorflow { -cl::sycl::gpu_selector s; -cl::sycl::queue q(s); - -SYCLDevice::SYCLDevice(const SessionOptions& options, const string& name, - Bytes memory_limit, const DeviceLocality& locality, - const string& physical_device_desc, Allocator* allocator) - : LocalDevice(options, - Device::BuildDeviceAttributes(name, DEVICE_SYCL, memory_limit, - locality, physical_device_desc), - allocator), - allocator_(allocator), - device_context_(new SYCLDeviceContext()), - device_(q) { - set_eigen_sycl_device(&device_); +SYCLDevice::~SYCLDevice() { + device_context_->Unref(); + delete sycl_allocator_; + delete sycl_device_; } -SYCLDevice::~SYCLDevice() { device_context_->Unref(); } - -void SYCLDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) { +void SYCLDevice::Compute(OpKernel *op_kernel, OpKernelContext *context) { assert(context); if (port::Tracing::IsActive()) { // TODO(pbar) We really need a useful identifier of the graph node. @@ -52,28 +40,45 @@ void SYCLDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) { op_kernel->Compute(context); } -Allocator* SYCLDevice::GetAllocator(AllocatorAttributes attr) { - return allocator_; +Allocator *SYCLDevice::GetAllocator(AllocatorAttributes attr) { + if (attr.on_host()) + return cpu_allocator_; + else + return sycl_allocator_; } -Status SYCLDevice::MakeTensorFromProto(const TensorProto& tensor_proto, +Status SYCLDevice::MakeTensorFromProto(const TensorProto &tensor_proto, const AllocatorAttributes alloc_attrs, - Tensor* tensor) { + Tensor *tensor) { + AllocatorAttributes attr; + attr.set_on_host(true); + attr.set_gpu_compatible(true); + Allocator *host_alloc = GetAllocator(attr); Tensor parsed(tensor_proto.dtype()); - if (!parsed.FromProto(cpu_allocator(), tensor_proto)) { + if (!parsed.FromProto(host_alloc, tensor_proto)) { return errors::InvalidArgument("Cannot parse tensor from proto: ", - ProtoDebugString(tensor_proto)); + tensor_proto.DebugString()); } - *tensor = std::move(parsed); - return Status::OK(); + Status status; + if (alloc_attrs.on_host()) { + *tensor = parsed; + } else { + Tensor copy(GetAllocator(alloc_attrs), parsed.dtype(), parsed.shape()); + device_context_->CopyCPUTensorToDevice(&parsed, this, ©, + [&status](const Status &s) { + status = s; + }); + *tensor = copy; + } + return status; } -Status SYCLDevice::FillContextMap(const Graph* graph, - DeviceContextMap* device_context_map) { +Status SYCLDevice::FillContextMap(const Graph *graph, + DeviceContextMap *device_context_map) { // Fill in the context map. It is OK for this map to contain // duplicate DeviceContexts so long as we increment the refcount. device_context_map->resize(graph->num_node_ids()); - for (Node* n : graph->nodes()) { + for (Node *n : graph->nodes()) { device_context_->Ref(); (*device_context_map)[n->id()] = device_context_; } @@ -81,6 +86,6 @@ Status SYCLDevice::FillContextMap(const Graph* graph, return Status::OK(); } -} // namespace tensorflow +} // namespace tensorflow -#endif // TENSORFLOW_USE_SYCL +#endif // TENSORFLOW_USE_SYCL diff --git a/tensorflow/core/common_runtime/sycl/sycl_device.h b/tensorflow/core/common_runtime/sycl/sycl_device.h index eaa9429b16..d3b3db2a71 100644 --- a/tensorflow/core/common_runtime/sycl/sycl_device.h +++ b/tensorflow/core/common_runtime/sycl/sycl_device.h @@ -24,26 +24,40 @@ limitations under the License. #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/common_runtime/local_device.h" +#include "tensorflow/core/common_runtime/sycl/sycl_allocator.h" #include "tensorflow/core/common_runtime/sycl/sycl_device_context.h" #include "tensorflow/core/public/session_options.h" namespace tensorflow { + class SYCLDevice : public LocalDevice { - public: - SYCLDevice(const SessionOptions& options, const string& name, - Bytes memory_limit, const DeviceLocality& locality, - const string& physical_device_desc, Allocator* allocator); +public: + template <typename SYCLSelector> + SYCLDevice(const SessionOptions &options, const string &name, + Bytes memory_limit, const DeviceLocality &locality, + const string &physical_device_desc, SYCLSelector sycl_selector, + Allocator *cpu_allocator) + : LocalDevice(options, Device::BuildDeviceAttributes( + name, DEVICE_SYCL, memory_limit, locality, + physical_device_desc), nullptr), + cpu_allocator_(cpu_allocator), + sycl_device_(new Eigen::SyclDevice(sycl_selector)), + sycl_allocator_(new SYCLAllocator(sycl_device_)), + device_context_(new SYCLDeviceContext()) { + set_eigen_sycl_device(sycl_device_); + } + ~SYCLDevice() override; - void Compute(OpKernel* op_kernel, OpKernelContext* context) override; - Allocator* GetAllocator(AllocatorAttributes attr) override; - Status MakeTensorFromProto(const TensorProto& tensor_proto, + void Compute(OpKernel *op_kernel, OpKernelContext *context) override; + Allocator *GetAllocator(AllocatorAttributes attr) override; + Status MakeTensorFromProto(const TensorProto &tensor_proto, const AllocatorAttributes alloc_attrs, - Tensor* tensor) override; + Tensor *tensor) override; - Status FillContextMap(const Graph* graph, - DeviceContextMap* device_context_map) override; + Status FillContextMap(const Graph *graph, + DeviceContextMap *device_context_map) override; Status Sync() override { return Status::OK(); } static string GetShortDeviceDescription(/*int device_id, @@ -51,12 +65,13 @@ class SYCLDevice : public LocalDevice { return strings::StrCat("device: 0, name SYCL, pci bus id: 0"); } - private: - Allocator* allocator_; // Not owned - SYCLDeviceContext* device_context_; - Eigen::SyclDevice device_; +private: + Allocator *cpu_allocator_; // owned + Eigen::SyclDevice* sycl_device_; // owned + SYCLAllocator *sycl_allocator_; // owned + SYCLDeviceContext *device_context_; }; -} // namespace tensorflow +} // namespace tensorflow -#endif // TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_ +#endif // TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_ diff --git a/tensorflow/core/common_runtime/sycl/sycl_device_context.cc b/tensorflow/core/common_runtime/sycl/sycl_device_context.cc index 70b1a0e93b..9dd289bebd 100644 --- a/tensorflow/core/common_runtime/sycl/sycl_device_context.cc +++ b/tensorflow/core/common_runtime/sycl/sycl_device_context.cc @@ -13,36 +13,171 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#if TENSORFLOW_USE_SYCL + +#define EIGEN_USE_SYCL + #include "tensorflow/core/common_runtime/sycl/sycl_device_context.h" #include "tensorflow/core/common_runtime/dma_helper.h" +#define EIGEN_USE_SYCL +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + namespace tensorflow { -void SYCLDeviceContext::CopyCPUTensorToDevice(const Tensor* cpu_tensor, - Device* device, - Tensor* device_tensor, +void SYCLDeviceContext::CopyCPUTensorToDevice(const Tensor *cpu_tensor, + Device *device, + Tensor *device_tensor, StatusCallback done) const { const int64 total_bytes = cpu_tensor->TotalBytes(); if (total_bytes > 0) { - const void* src_ptr = DMAHelper::base(cpu_tensor); - void* dst_ptr = DMAHelper::base(device_tensor); - ::memcpy(dst_ptr, src_ptr, total_bytes); + const void *src_ptr = DMAHelper::base(cpu_tensor); + void *dst_ptr = DMAHelper::base(device_tensor); + switch (cpu_tensor->dtype()) { + case DT_FLOAT: + device->eigen_sycl_device()->memcpyHostToDevice( + static_cast<float *>(dst_ptr), static_cast<const float *>(src_ptr), + total_bytes); + break; + case DT_DOUBLE: + device->eigen_sycl_device()->memcpyHostToDevice( + static_cast<double *>(dst_ptr), static_cast<const double *>(src_ptr), + total_bytes); + break; + case DT_INT32: + device->eigen_sycl_device()->memcpyHostToDevice( + static_cast<int32 *>(dst_ptr), static_cast<const int32 *>(src_ptr), + total_bytes); + break; + case DT_INT64: + device->eigen_sycl_device()->memcpyHostToDevice( + static_cast<int64 *>(dst_ptr), static_cast<const int64 *>(src_ptr), + total_bytes); + break; + case DT_HALF: + device->eigen_sycl_device()->memcpyHostToDevice( + static_cast<Eigen::half *>(dst_ptr), + static_cast<const Eigen::half *>(src_ptr), total_bytes); + break; + case DT_COMPLEX64: + device->eigen_sycl_device()->memcpyHostToDevice( + static_cast<std::complex<float> *>(dst_ptr), + static_cast<const std::complex<float> *>(src_ptr), total_bytes); + break; + case DT_COMPLEX128: + device->eigen_sycl_device()->memcpyHostToDevice( + static_cast<std::complex<double> *>(dst_ptr), + static_cast<const std::complex<double> *>(src_ptr), total_bytes); + break; + case DT_INT8: + device->eigen_sycl_device()->memcpyHostToDevice( + static_cast<int8 *>(dst_ptr), static_cast<const int8 *>(src_ptr), + total_bytes); + break; + case DT_INT16: + device->eigen_sycl_device()->memcpyHostToDevice( + static_cast<int16 *>(dst_ptr), static_cast<const int16 *>(src_ptr), + total_bytes); + break; + case DT_UINT8: + device->eigen_sycl_device()->memcpyHostToDevice( + static_cast<uint8 *>(dst_ptr), static_cast<const uint8 *>(src_ptr), + total_bytes); + break; + case DT_UINT16: + device->eigen_sycl_device()->memcpyHostToDevice( + static_cast<uint16 *>(dst_ptr), static_cast<const uint16 *>(src_ptr), + total_bytes); + break; + case DT_BOOL: + device->eigen_sycl_device()->memcpyHostToDevice( + static_cast<bool *>(dst_ptr), static_cast<const bool *>(src_ptr), + total_bytes); + break; + default: + assert(false && "unsupported type"); + } } done(Status::OK()); } -void SYCLDeviceContext::CopyDeviceTensorToCPU(const Tensor* device_tensor, +void SYCLDeviceContext::CopyDeviceTensorToCPU(const Tensor *device_tensor, StringPiece edge_name, - Device* device, - Tensor* cpu_tensor, + Device *device, + Tensor *cpu_tensor, StatusCallback done) { const int64 total_bytes = device_tensor->TotalBytes(); if (total_bytes > 0) { + device->eigen_sycl_device()->deallocate_all(); const void* src_ptr = DMAHelper::base(device_tensor); void* dst_ptr = DMAHelper::base(cpu_tensor); - ::memcpy(dst_ptr, src_ptr, total_bytes); + switch (device_tensor->dtype()) { + case DT_FLOAT: + device->eigen_sycl_device()->memcpyDeviceToHost( + static_cast<float *>(dst_ptr), static_cast<const float *>(src_ptr), + total_bytes); + break; + case DT_DOUBLE: + device->eigen_sycl_device()->memcpyDeviceToHost( + static_cast<double *>(dst_ptr), static_cast<const double *>(src_ptr), + total_bytes); + break; + case DT_INT32: + device->eigen_sycl_device()->memcpyDeviceToHost( + static_cast<int32 *>(dst_ptr), static_cast<const int32 *>(src_ptr), + total_bytes); + break; + case DT_INT64: + device->eigen_sycl_device()->memcpyDeviceToHost( + static_cast<int64 *>(dst_ptr), static_cast<const int64 *>(src_ptr), + total_bytes); + break; + case DT_HALF: + device->eigen_sycl_device()->memcpyDeviceToHost( + static_cast<Eigen::half *>(dst_ptr), + static_cast<const Eigen::half *>(src_ptr), total_bytes); + break; + case DT_COMPLEX64: + device->eigen_sycl_device()->memcpyDeviceToHost( + static_cast<std::complex<float> *>(dst_ptr), + static_cast<const std::complex<float> *>(src_ptr), total_bytes); + break; + case DT_COMPLEX128: + device->eigen_sycl_device()->memcpyDeviceToHost( + static_cast<std::complex<double> *>(dst_ptr), + static_cast<const std::complex<double> *>(src_ptr), total_bytes); + break; + case DT_INT8: + device->eigen_sycl_device()->memcpyDeviceToHost( + static_cast<int8 *>(dst_ptr), static_cast<const int8 *>(src_ptr), + total_bytes); + break; + case DT_INT16: + device->eigen_sycl_device()->memcpyDeviceToHost( + static_cast<int16 *>(dst_ptr), static_cast<const int16 *>(src_ptr), + total_bytes); + break; + case DT_UINT8: + device->eigen_sycl_device()->memcpyDeviceToHost( + static_cast<uint8 *>(dst_ptr), static_cast<const uint8 *>(src_ptr), + total_bytes); + break; + case DT_UINT16: + device->eigen_sycl_device()->memcpyDeviceToHost( + static_cast<uint16 *>(dst_ptr), static_cast<const uint16 *>(src_ptr), + total_bytes); + break; + case DT_BOOL: + device->eigen_sycl_device()->memcpyDeviceToHost( + static_cast<bool *>(dst_ptr), static_cast<const bool *>(src_ptr), + total_bytes); + break; + default: + assert(false && "unsupported type"); + } } done(Status::OK()); } } // namespace tensorflow +#endif // TENSORFLOW_USE_SYCL diff --git a/tensorflow/core/common_runtime/sycl/sycl_device_context.h b/tensorflow/core/common_runtime/sycl/sycl_device_context.h index 67cfe47aa1..1f7ad543d9 100644 --- a/tensorflow/core/common_runtime/sycl/sycl_device_context.h +++ b/tensorflow/core/common_runtime/sycl/sycl_device_context.h @@ -13,6 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#if !TENSORFLOW_USE_SYCL +#error This file must only be included when building TensorFlow with SYCL support +#endif + #ifndef TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_ #define TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_ @@ -22,20 +26,20 @@ limitations under the License. namespace tensorflow { class SYCLDeviceContext : public DeviceContext { - public: +public: SYCLDeviceContext() {} ~SYCLDeviceContext() override {} - void CopyCPUTensorToDevice(const Tensor* cpu_tensor, Device* device, - Tensor* device_tensor, + void CopyCPUTensorToDevice(const Tensor *cpu_tensor, Device *device, + Tensor *device_tensor, StatusCallback done) const override; - void CopyDeviceTensorToCPU(const Tensor* device_tensor, StringPiece edge_name, - Device* device, Tensor* cpu_tensor, + void CopyDeviceTensorToCPU(const Tensor *device_tensor, StringPiece edge_name, + Device *device, Tensor *cpu_tensor, StatusCallback done) override; }; -} // namespace tensorflow +} // namespace tensorflow -#endif // TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_ +#endif // TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_ diff --git a/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc b/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc index 97c4c2c236..9b8770420c 100644 --- a/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc +++ b/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc @@ -20,9 +20,9 @@ limitations under the License. namespace tensorflow { class SYCLDeviceFactory : public DeviceFactory { - public: - Status CreateDevices(const SessionOptions& options, const string& name_prefix, - std::vector<Device*>* devices) override { +public: + Status CreateDevices(const SessionOptions &options, const string &name_prefix, + std::vector<Device *> *devices) override { int n = 1; auto iter = options.config.device_count().find("SYCL"); if (iter != options.config.device_count().end()) { @@ -30,9 +30,10 @@ class SYCLDeviceFactory : public DeviceFactory { } for (int i = 0; i < n; i++) { string name = strings::StrCat(name_prefix, "/device:SYCL:", i); - devices->push_back(new SYCLDevice( - options, name, Bytes(256 << 20), DeviceLocality(), - SYCLDevice::GetShortDeviceDescription(), cpu_allocator())); + devices->push_back(new SYCLDevice(options, name, Bytes(256 << 20), + DeviceLocality(), + SYCLDevice::GetShortDeviceDescription(), + cl::sycl::gpu_selector(), cpu_allocator())); } return Status::OK(); } @@ -41,4 +42,4 @@ class SYCLDeviceFactory : public DeviceFactory { REGISTER_LOCAL_DEVICE_FACTORY("SYCL", SYCLDeviceFactory); } -#endif // TENSORFLOW_USE_SYCL +#endif // TENSORFLOW_USE_SYCL diff --git a/tensorflow/core/distributed_runtime/master.cc b/tensorflow/core/distributed_runtime/master.cc index 741282be31..de468f00b7 100644 --- a/tensorflow/core/distributed_runtime/master.cc +++ b/tensorflow/core/distributed_runtime/master.cc @@ -91,8 +91,8 @@ void Master::GC() { std::vector<string> handles; const int64 num_micros = static_cast<int64>(session_gc_seconds_ * 1000000); for (const auto& entry : sessions_) { - auto lat = entry.second->last_access_time_usec(); - if (env->NowMicros() - lat > num_micros) { + int64 lat = entry.second->last_access_time_usec(); + if (static_cast<int64>(env->NowMicros()) - lat > num_micros) { handles.push_back(entry.first); auto* sess = entry.second; SchedClosure([this, sess]() { @@ -399,7 +399,7 @@ void Master::CleanupWorkers(const ResetRequest& reset) { } ++c; } - for (int i = 0; i < n.size(); ++i) { + for (size_t i = 0; i < n.size(); ++i) { n[i].WaitForNotification(); } } diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc b/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc index eb188a7984..31a3404a07 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc @@ -69,7 +69,7 @@ Status ValidateHostPortPair(const string& host_port) { Status GrpcChannelSpec::AddHostPortsJob(const string& job_id, const std::vector<string>& host_ports) { std::map<int, string> host_ports_map; - for (int i = 0; i < host_ports.size(); ++i) { + for (size_t i = 0; i < host_ports.size(); ++i) { host_ports_map[i] = host_ports[i]; } return AddHostPortsJob(job_id, host_ports_map); diff --git a/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc b/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc index 3b0614cc75..c4bb37fcbe 100644 --- a/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc +++ b/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc @@ -156,7 +156,7 @@ class RpcRecvTensorFreeList { public: RpcRecvTensorFreeList() {} ~RpcRecvTensorFreeList() { - for (int i = 0; i < objects_.size(); i++) { + for (size_t i = 0; i < objects_.size(); i++) { delete objects_[i]; } } diff --git a/tensorflow/core/distributed_runtime/tensor_coding.cc b/tensorflow/core/distributed_runtime/tensor_coding.cc index b26970b606..55b7d5fe82 100644 --- a/tensorflow/core/distributed_runtime/tensor_coding.cc +++ b/tensorflow/core/distributed_runtime/tensor_coding.cc @@ -192,7 +192,7 @@ bool TensorResponse::ParseTensorSubmessage( TensorShape shape(tensor_meta->tensor_shape()); Tensor t(allocator_, tensor_meta->dtype(), shape); StringPiece buf = t.tensor_data(); - if (num_bytes != buf.size()) return false; + if (static_cast<size_t>(num_bytes) != buf.size()) return false; // TODO(jeff,sanjay): Figure out a way to avoid this copy if // the underlying ZeroCopyInputStream data is properly aligned // and compatible with what allocator_ wants. diff --git a/tensorflow/core/kernels/avgpooling_op_gpu.cu.cc b/tensorflow/core/kernels/avgpooling_op_gpu.cu.cc index a190b2168a..2be330d142 100644 --- a/tensorflow/core/kernels/avgpooling_op_gpu.cu.cc +++ b/tensorflow/core/kernels/avgpooling_op_gpu.cu.cc @@ -72,7 +72,7 @@ __global__ void AvePoolBackwardNHWC(const int nthreads, wstart = max(wstart, 0); int pool_size = (hend - hstart) * (wend - wstart); gradient += - top_diff_slice[(ph * pooled_width + pw) * channels] / pool_size; + top_diff_slice[(ph * pooled_width + pw) * channels] / dtype(pool_size); } } bottom_diff[index] = gradient; diff --git a/tensorflow/core/kernels/bcast_ops.cc b/tensorflow/core/kernels/bcast_ops.cc index 10354cbb56..db8842a547 100644 --- a/tensorflow/core/kernels/bcast_ops.cc +++ b/tensorflow/core/kernels/bcast_ops.cc @@ -90,4 +90,14 @@ REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs") .HostMemory("r1"), BCastGradArgsOp); +#if TENSORFLOW_USE_SYCL +REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs") + .Device(DEVICE_SYCL) + .TypeConstraint<int32>("T") + .HostMemory("s0") + .HostMemory("s1") + .HostMemory("r0") + .HostMemory("r1"), + BCastGradArgsOp); +#endif } // end namespace tensorflow diff --git a/tensorflow/core/kernels/constant_op.cc b/tensorflow/core/kernels/constant_op.cc index c2d5af0316..4c8c9939bc 100644 --- a/tensorflow/core/kernels/constant_op.cc +++ b/tensorflow/core/kernels/constant_op.cc @@ -16,6 +16,9 @@ limitations under the License. // See docs in ../ops/array_ops.cc. #define EIGEN_USE_THREADS +#if TENSORFLOW_USE_SYCL +#define EIGEN_USE_SYCL +#endif #include "tensorflow/core/kernels/constant_op.h" diff --git a/tensorflow/core/kernels/control_flow_ops.cc b/tensorflow/core/kernels/control_flow_ops.cc index 65413a09b2..1a8c17b1ef 100644 --- a/tensorflow/core/kernels/control_flow_ops.cc +++ b/tensorflow/core/kernels/control_flow_ops.cc @@ -112,6 +112,15 @@ REGISTER_GPU_HOST_REF_KERNEL(string); #undef REGISTER_GPU_HOST_KERNEL #undef REGISTER_GPU_HOST_REF_KERNEL +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("Switch").Device(DEVICE_SYCL).TypeConstraint<type>("T"), SwitchOp) +REGISTER_SYCL_KERNEL(bool); +TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL); +#undef REGISTER_SYCL_KERNEL +#endif + class RefSelectOp : public OpKernel { public: explicit RefSelectOp(OpKernelConstruction* context) : OpKernel(context) { @@ -209,6 +218,15 @@ REGISTER_GPU_REF_KERNEL(bool); #undef REGISTER_GPU_KERNEL #undef REGISTER_GPU_REF_KERNEL +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("Merge").Device(DEVICE_SYCL).TypeConstraint<type>("T"), MergeOp) +REGISTER_SYCL_KERNEL(bool); +TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL); +#undef REGISTER_SYCL_KERNEL +#endif + // Special GPU kernels for int32 and string. // TODO(b/25387198): Also enable int32 in device memory. This kernel // registration requires all int32 inputs and outputs to be in host memory. @@ -259,6 +277,15 @@ REGISTER_GPU_REF_KERNEL(bool); #undef REGISTER_GPU_KERNEL #undef REGISTER_GPU_REF_KERNEL +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("Enter").Device(DEVICE_SYCL).TypeConstraint<type>("T"), EnterOp) +REGISTER_SYCL_KERNEL(bool); +TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL); +#undef REGISTER_SYCL_KERNEL +#endif + // Special GPU kernels for int32 and string. // TODO(b/25387198): Also enable int32 in device memory. This kernel // registration requires all int32 inputs and outputs to be in host memory. @@ -310,6 +337,15 @@ REGISTER_GPU_KERNEL(bool); #undef REGISTER_GPU_KERNEL #undef REGISTER_GPU_REF_KERNEL +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("Exit").Device(DEVICE_SYCL).TypeConstraint<type>("T"), ExitOp) +REGISTER_SYCL_KERNEL(bool); +TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL); +#undef REGISTER_SYCL_KERNEL +#endif + // Special GPU kernels for int32 and string. // TODO(b/25387198): Also enable int32 in device memory. This kernel // registration requires all int32 inputs and outputs to be in host memory. @@ -380,6 +416,15 @@ REGISTER_GPU_HOST_KERNEL(string); #undef REGISTER_GPU_HOST_KERNEL +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("NextIteration").Device(DEVICE_SYCL).TypeConstraint<type>("T"), NextIterationOp) + REGISTER_SYCL_KERNEL(bool); + TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL); +#undef REGISTER_SYCL_KERNEL +#endif + // A LoopCond op has one input and one output. The input is a boolean // scalar representing the taken branches of the "pivot" Switch that // determines loop termination. As a contract, any high-level front-end diff --git a/tensorflow/core/kernels/cwise_op_add_1.cc b/tensorflow/core/kernels/cwise_op_add_1.cc index 75c6118795..44c552d18e 100644 --- a/tensorflow/core/kernels/cwise_op_add_1.cc +++ b/tensorflow/core/kernels/cwise_op_add_1.cc @@ -18,6 +18,18 @@ limitations under the License. namespace tensorflow { REGISTER5(BinaryOp, CPU, "Add", functor::add, float, Eigen::half, double, int32, int64); + +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Add") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("T"), \ + BinaryOp<SYCLDevice, functor::add<TYPE>>); +TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL); +#undef REGISTER_SYCL_KERNEL +#endif // TENSORFLOW_USE_SYCL + #if GOOGLE_CUDA REGISTER3(BinaryOp, GPU, "Add", functor::add, float, Eigen::half, double); diff --git a/tensorflow/core/kernels/cwise_op_div.cc b/tensorflow/core/kernels/cwise_op_div.cc index 925c9e9916..c2b05a69b2 100644 --- a/tensorflow/core/kernels/cwise_op_div.cc +++ b/tensorflow/core/kernels/cwise_op_div.cc @@ -24,6 +24,16 @@ REGISTER5(BinaryOp, CPU, "TruncateDiv", functor::safe_div, uint8, uint16, int16, int32, int64); REGISTER5(BinaryOp, CPU, "RealDiv", functor::div, float, Eigen::half, double, complex64, complex128); +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Div") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("T"), \ + BinaryOp<SYCLDevice, functor::div<TYPE>>); +REGISTER_SYCL_KERNEL(float) +#undef REGISTER_SYCL_KERNEL +#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA REGISTER9(BinaryOp, GPU, "Div", functor::div, float, Eigen::half, double, uint8, uint16, int16, int64, complex64, complex128); diff --git a/tensorflow/core/kernels/cwise_op_floor_div.cc b/tensorflow/core/kernels/cwise_op_floor_div.cc index 83b2771ed2..7930d83413 100644 --- a/tensorflow/core/kernels/cwise_op_floor_div.cc +++ b/tensorflow/core/kernels/cwise_op_floor_div.cc @@ -18,6 +18,16 @@ limitations under the License. namespace tensorflow { REGISTER5(BinaryOp, CPU, "FloorDiv", functor::safe_floor_div, uint8, uint16, int16, int32, int64); +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("FloorDiv") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("T"), \ + BinaryOp<SYCLDevice, functor::floor_div<TYPE>>); +TF_CALL_INTEGRAL_TYPES(REGISTER_SYCL_KERNEL); +#undef REGISTER_SYCL_KERNEL +#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA REGISTER4(BinaryOp, GPU, "FloorDiv", functor::floor_div, uint8, uint16, int16, int64); diff --git a/tensorflow/core/kernels/cwise_op_gpu_rint.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_rint.cu.cc new file mode 100644 index 0000000000..028b944d27 --- /dev/null +++ b/tensorflow/core/kernels/cwise_op_gpu_rint.cu.cc @@ -0,0 +1,26 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#if GOOGLE_CUDA + +#include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h" + +namespace tensorflow { +namespace functor { +DEFINE_UNARY2(rint, float, double); +} // namespace functor +} // namespace tensorflow + +#endif // GOOGLE_CUDA diff --git a/tensorflow/core/kernels/cwise_op_isfinite.cc b/tensorflow/core/kernels/cwise_op_isfinite.cc index 954b5d25bd..e38b271318 100644 --- a/tensorflow/core/kernels/cwise_op_isfinite.cc +++ b/tensorflow/core/kernels/cwise_op_isfinite.cc @@ -18,6 +18,16 @@ limitations under the License. namespace tensorflow { REGISTER3(UnaryOp, CPU, "IsFinite", functor::isfinite, float, Eigen::half, double); +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("IsFinite") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("T"), \ + UnaryOp<SYCLDevice, functor::isfinite<TYPE>>); +TF_CALL_REAL_NUMBER_TYPES(REGISTER_SYCL_KERNEL); +#undef REGISTER_SYCL_KERNEL +#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA REGISTER3(UnaryOp, GPU, "IsFinite", functor::isfinite, float, Eigen::half, double); diff --git a/tensorflow/core/kernels/cwise_op_isinf.cc b/tensorflow/core/kernels/cwise_op_isinf.cc index 407dadcb69..bf056dbe0e 100644 --- a/tensorflow/core/kernels/cwise_op_isinf.cc +++ b/tensorflow/core/kernels/cwise_op_isinf.cc @@ -17,6 +17,16 @@ limitations under the License. namespace tensorflow { REGISTER3(UnaryOp, CPU, "IsInf", functor::isinf, float, Eigen::half, double); +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("IsInf") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("T"), \ + UnaryOp<SYCLDevice, functor::isinf<TYPE>>); +TF_CALL_REAL_NUMBER_TYPES(REGISTER_SYCL_KERNEL); +#undef REGISTER_SYCL_KERNEL +#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA REGISTER3(UnaryOp, GPU, "IsInf", functor::isinf, float, Eigen::half, double); #endif diff --git a/tensorflow/core/kernels/cwise_op_isnan.cc b/tensorflow/core/kernels/cwise_op_isnan.cc index f150b2f3f4..d2bac23882 100644 --- a/tensorflow/core/kernels/cwise_op_isnan.cc +++ b/tensorflow/core/kernels/cwise_op_isnan.cc @@ -17,6 +17,16 @@ limitations under the License. namespace tensorflow { REGISTER3(UnaryOp, CPU, "IsNan", functor::isnan, float, Eigen::half, double); +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("IsNan") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("T"), \ + UnaryOp<SYCLDevice, functor::isnan<TYPE>>); +TF_CALL_REAL_NUMBER_TYPES(REGISTER_SYCL_KERNEL); +#undef REGISTER_SYCL_KERNEL +#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA REGISTER3(UnaryOp, GPU, "IsNan", functor::isnan, float, Eigen::half, double); #endif diff --git a/tensorflow/core/kernels/cwise_op_mul_1.cc b/tensorflow/core/kernels/cwise_op_mul_1.cc index 09e9f070da..e23fe6761d 100644 --- a/tensorflow/core/kernels/cwise_op_mul_1.cc +++ b/tensorflow/core/kernels/cwise_op_mul_1.cc @@ -19,6 +19,17 @@ namespace tensorflow { REGISTER5(BinaryOp, CPU, "Mul", functor::mul, float, Eigen::half, double, uint8, int32); + +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Mul") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("T"), \ + BinaryOp<SYCLDevice, functor::mul<TYPE>>); +REGISTER_SYCL_KERNEL(float) +#undef REGISTER_SYCL_KERNEL +#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA REGISTER4(BinaryOp, GPU, "Mul", functor::mul, float, Eigen::half, double, uint8); diff --git a/tensorflow/core/kernels/cwise_op_rint.cc b/tensorflow/core/kernels/cwise_op_rint.cc new file mode 100644 index 0000000000..a741b3d718 --- /dev/null +++ b/tensorflow/core/kernels/cwise_op_rint.cc @@ -0,0 +1,23 @@ +/* Copyright 2016 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/kernels/cwise_ops_common.h" + +namespace tensorflow { +REGISTER2(UnaryOp, CPU, "Rint", functor::rint, float, double); +#if GOOGLE_CUDA +REGISTER2(UnaryOp, GPU, "Rint", functor::rint, float, double); +#endif +} // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_sub.cc b/tensorflow/core/kernels/cwise_op_sub.cc index d000e54cfc..ed78ba37a8 100644 --- a/tensorflow/core/kernels/cwise_op_sub.cc +++ b/tensorflow/core/kernels/cwise_op_sub.cc @@ -24,6 +24,16 @@ REGISTER7(BinaryOp, CPU, "Sub", functor::sub, float, Eigen::half, double, int32, // int32 version of this op is needed, so explicitly include it. REGISTER(BinaryOp, CPU, "Sub", functor::sub, int32); #endif // __ANDROID_TYPES_SLIM__ +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Sub") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("T"), \ + BinaryOp<SYCLDevice, functor::sub<TYPE>>); +TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL); +#undef REGISTER_SYCL_KERNEL +#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA REGISTER6(BinaryOp, GPU, "Sub", functor::sub, float, Eigen::half, double, int64, complex64, complex128); diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h index b038d73e17..7f35e03feb 100644 --- a/tensorflow/core/kernels/cwise_ops.h +++ b/tensorflow/core/kernels/cwise_ops.h @@ -521,6 +521,27 @@ struct round : base<T, Eigen::internal::scalar_round_op_google<T>> {}; template <typename T> struct ceil : base<T, Eigen::internal::scalar_ceil_op<T>> {}; +/** this should go in Eigen + * \brief Template functor to compute the round to int value of a scalar + */ +template <typename Scalar> +struct scalar_rint_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_rint_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar + operator()(const Scalar& a) const { +#if defined(__CUDACC__) + return ::rint(a); +#elif defined(__ANDROID__) + return rint(a); +#else + return std::rint(a); +#endif + } +}; + +template <typename T> +struct rint : base<T, scalar_rint_op<T>> {}; + //////////////////////////////////////////////////////////////////////////////// // Binary functors //////////////////////////////////////////////////////////////////////////////// diff --git a/tensorflow/core/kernels/cwise_ops_sycl_common.h b/tensorflow/core/kernels/cwise_ops_sycl_common.h index c66ae42c2d..4c22cc4855 100644 --- a/tensorflow/core/kernels/cwise_ops_sycl_common.h +++ b/tensorflow/core/kernels/cwise_ops_sycl_common.h @@ -22,6 +22,8 @@ limitations under the License. #define EIGEN_USE_SYCL +#include "tensorflow/core/framework/register_types.h" + #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/kernels/cwise_ops.h" #include "tensorflow/core/platform/logging.h" @@ -32,6 +34,14 @@ namespace functor { typedef Eigen::SyclDevice SYCLDevice; +template <typename Index, int N> Eigen::array<Index, N> GenerateArrayOfOnes() { + Eigen::array<Index, N> result; + for (int i = 0; i < N; ++i) { + result[i] = 1; + } + return result; +} + template <typename OUT, typename RHS> void Assign(const SYCLDevice& d, OUT out, RHS rhs) { out.device(d) = rhs; @@ -52,23 +62,31 @@ struct BinaryFunctor<SYCLDevice, Functor, NDIMS, has_errors> { void operator()(const SYCLDevice& d, typename Functor::tout_type out, typename Functor::tin_type in0, typename Functor::tin_type in1, bool* error) { - Assign(d, out, in0.binaryExpr(in1, typename Functor::func())); + To32Bit(out).device(d) = To32Bit(in0).binaryExpr(in1, typename Functor::func()); } void Left(const SYCLDevice& d, typename Functor::tout_type out, typename Functor::tscalar_type scalar, typename Functor::tin_type in, bool* error) { - LOG(FATAL) << "BinaryFunctor::Left NOT IMPLEMENTED ! "; + typedef typename Functor::func Binary; + constexpr int NumDims = Functor::tin_type::NumDimensions; + typedef typename Functor::tin_type::Scalar T; + typedef typename Functor::tin_type::Index Index; + Eigen::array<Index, NumDims> scalar_dim = GenerateArrayOfOnes<Index, NumDims>(); + Eigen::TensorMap<Eigen::Tensor<T, NumDims, Eigen::RowMajor>> tmp(scalar.data(), scalar_dim); + out.device(d) = tmp.broadcast(in.dimensions()).binaryExpr(in, Binary()); } void Right(const SYCLDevice& d, typename Functor::tout_type out, typename Functor::tin_type in, typename Functor::tscalar_type scalar, bool* error) { - typedef typename Functor::out_type Tout; - typedef typename Functor::in_type Tin; typedef typename Functor::func Binary; - typedef typename Eigen::internal::scalar_right<Tout, Tin, Binary> Unary; - Assign(d, out, in.unaryExpr(Unary(scalar.data()))); + constexpr int NumDims = Functor::tin_type::NumDimensions; + typedef typename Functor::tin_type::Scalar T; + typedef typename Functor::tin_type::Index Index; + Eigen::array<Index, NumDims> scalar_dim = GenerateArrayOfOnes<Index, NumDims>(); + Eigen::TensorMap<Eigen::Tensor<T, NumDims, Eigen::RowMajor>> tmp(scalar.data(), scalar_dim); + out.device(d) = in.binaryExpr(tmp.broadcast(in.dimensions()), Binary()); } void BCast(const SYCLDevice& d, @@ -78,7 +96,25 @@ struct BinaryFunctor<SYCLDevice, Functor, NDIMS, has_errors> { typename TTypes<typename Functor::in_type, NDIMS>::ConstTensor in1, typename Eigen::array<Eigen::DenseIndex, NDIMS> bcast1, bool* error) { - LOG(FATAL) << "BinaryFunctor::BCast NOT IMPLEMENTED "; + typedef typename Functor::in_type T; + typename Functor::func func; + if ((NDIMS == 2) && Functor::use_bcast_optimization && + use_bcast_optimization<T>::value) { + const bool bcast0_all_one = AllOne<NDIMS>(bcast0); + const bool bcast1_all_one = AllOne<NDIMS>(bcast1); + if (bcast0_all_one && !bcast1_all_one) { + To32Bit(out).device(d) = + To32Bit(in0).binaryExpr(To32Bit(in1).broadcast(bcast1), func); + return; + } + if (!bcast0_all_one && bcast1_all_one) { + To32Bit(out).device(d) = + To32Bit(in0).broadcast(bcast0).binaryExpr(To32Bit(in1), func); + return; + } + } + To32Bit(out).device(d) = To32Bit(in0).broadcast(bcast0).binaryExpr( + To32Bit(in1).broadcast(bcast1), func); } }; diff --git a/tensorflow/core/kernels/cwise_ops_test.cc b/tensorflow/core/kernels/cwise_ops_test.cc index 823e7e14ed..6250928aca 100644 --- a/tensorflow/core/kernels/cwise_ops_test.cc +++ b/tensorflow/core/kernels/cwise_ops_test.cc @@ -59,6 +59,11 @@ BM_UNARY(gpu, Conj, std::complex<float>, DT_COMPLEX64); BM_UNARY(cpu, Conj, std::complex<double>, DT_COMPLEX128); BM_UNARY(gpu, Conj, std::complex<double>, DT_COMPLEX128); +BM_UNARY(cpu, Rint, double, DT_DOUBLE); +BM_UNARY(gpu, Rint, double, DT_DOUBLE); +BM_UNARY(cpu, Rint, float, DT_FLOAT); +BM_UNARY(gpu, Rint, float, DT_FLOAT); + // data func scalar. static Graph* BinaryScalar(int num, const string& func) { Graph* g = new Graph(OpRegistry::Global()); diff --git a/tensorflow/core/kernels/dense_update_ops.cc b/tensorflow/core/kernels/dense_update_ops.cc index 025e9a8de1..baa8f83091 100644 --- a/tensorflow/core/kernels/dense_update_ops.cc +++ b/tensorflow/core/kernels/dense_update_ops.cc @@ -14,6 +14,9 @@ limitations under the License. ==============================================================================*/ #define EIGEN_USE_THREADS +#if TENSORFLOW_USE_SYCL +#define EIGEN_USE_SYCL +#endif #include "tensorflow/core/kernels/dense_update_ops.h" #include "tensorflow/core/framework/op_kernel.h" @@ -92,6 +95,18 @@ TF_CALL_ALL_TYPES(REGISTER_KERNELS); TF_CALL_QUANTIZED_TYPES(REGISTER_KERNELS); #undef REGISTER_KERNELS +#if TENSORFLOW_USE_SYCL +typedef Eigen::SyclDevice SYCLDevice; +#define REGISTER_SYCL_KERNEL(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("Assign") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<type>("T"), \ + AssignOpT<SYCLDevice, type>); +TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL); +#undef REGISTER_SYCL_KERNEL +#endif + #if GOOGLE_CUDA // Only register 'Assign' on GPU for the subset of types also supported by // 'Variable' (see variable_ops.cc.) diff --git a/tensorflow/core/kernels/eigen_pooling.h b/tensorflow/core/kernels/eigen_pooling.h index 96acbe824a..8eea1b0f9d 100644 --- a/tensorflow/core/kernels/eigen_pooling.h +++ b/tensorflow/core/kernels/eigen_pooling.h @@ -325,7 +325,7 @@ struct AvgPoolMeanReducer { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { eigen_assert(scalarCount_ > 0); - return accum / scalarCount_; + return accum / T(scalarCount_); } #if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__) diff --git a/tensorflow/core/kernels/eigen_spatial_convolutions.h b/tensorflow/core/kernels/eigen_spatial_convolutions.h index a2a9e15301..7702f3e70a 100644 --- a/tensorflow/core/kernels/eigen_spatial_convolutions.h +++ b/tensorflow/core/kernels/eigen_spatial_convolutions.h @@ -991,6 +991,9 @@ EIGEN_DEVICE_FUNC out_width = numext::ceil(InputCols / static_cast<float>(col_stride)); break; default: + // Initialize unused variables to avoid a compiler warning + out_height = 0; + out_width = 0; eigen_assert(false && "unexpected padding"); } diff --git a/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc b/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc index dd9f83afbc..56ffe58569 100644 --- a/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc +++ b/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc @@ -72,8 +72,8 @@ struct GatherNdSlice<GPUDevice, T, Index, IXDIM> { Eigen::array<int64, IXDIM> batch_strides; Eigen::array<int64, IXDIM> batch_indices; if (IXDIM > 0) { - batch_strides[IXDIM - 1] = s_size; - batch_indices[IXDIM - 1] = Tparams.dimension(IXDIM - 1); + batch_strides[size_t(IXDIM - 1)] = s_size; + batch_indices[size_t(IXDIM - 1)] = Tparams.dimension(IXDIM - 1); } for (int i = IXDIM - 1; i > 0; --i) { batch_indices[i - 1] = Tparams.dimension(i - 1); diff --git a/tensorflow/core/kernels/identity_op.cc b/tensorflow/core/kernels/identity_op.cc index 711afd1bf3..8ede544b7f 100644 --- a/tensorflow/core/kernels/identity_op.cc +++ b/tensorflow/core/kernels/identity_op.cc @@ -68,6 +68,7 @@ REGISTER_GPU_KERNEL(bfloat16); #undef REGISTER_GPU_KERNEL + #if GOOGLE_CUDA // A special GPU kernel for int32 and bool. // TODO(b/25387198): Also enable int32 in device memory. This kernel diff --git a/tensorflow/core/kernels/matrix_inverse_op.cc b/tensorflow/core/kernels/matrix_inverse_op.cc index 884e3d61a4..0572d48b3e 100644 --- a/tensorflow/core/kernels/matrix_inverse_op.cc +++ b/tensorflow/core/kernels/matrix_inverse_op.cc @@ -52,7 +52,7 @@ class MatrixInverseOp : public LinearAlgebraOp<Scalar> { Eigen::PartialPivLU<Matrix> lu_decomposition; if (adjoint_) { // TODO(rmlarsen): For Eigen 3.2, this creates a temporary copy. - // Make sure to backport: https://bitbucket.org/eigen/eigen/commits/ \ + // Make sure to backport: https://bitbucket.org/eigen/eigen/commits/ // bd2219a74c96dfe3f6bc2c23588749e36d2d8173 lu_decomposition.compute(input.adjoint()); } else { diff --git a/tensorflow/core/kernels/matrix_solve_op.cc b/tensorflow/core/kernels/matrix_solve_op.cc index a2dfd58bd1..e10a102871 100644 --- a/tensorflow/core/kernels/matrix_solve_op.cc +++ b/tensorflow/core/kernels/matrix_solve_op.cc @@ -75,7 +75,7 @@ class MatrixSolveOp : public LinearAlgebraOp<Scalar> { Eigen::PartialPivLU<Matrix> lu_decomposition(matrix.rows()); if (adjoint_) { // TODO(rmlarsen): For Eigen 3.2, this creates a temporary copy. - // Make sure to backport: https://bitbucket.org/eigen/eigen/commits/ \ + // Make sure to backport: https://bitbucket.org/eigen/eigen/commits/ // bd2219a74c96dfe3f6bc2c23588749e36d2d8173 lu_decomposition.compute(matrix.adjoint()); } else { @@ -95,7 +95,7 @@ class MatrixSolveOp : public LinearAlgebraOp<Scalar> { // TODO(rmlarsen): Add check based on condition number estimation. // The necessary changes to Eigen are in - // https://bitbucket.org/eigen/eigen/pull-requests/174/ \ + // https://bitbucket.org/eigen/eigen/pull-requests/174/ // add-matrix-condition-number-estimation/diff outputs->at(0) = lu_decomposition.solve(rhs); } diff --git a/tensorflow/core/kernels/scatter_nd_op.cc b/tensorflow/core/kernels/scatter_nd_op.cc index 5aeb3d2c0e..7704c5f65a 100644 --- a/tensorflow/core/kernels/scatter_nd_op.cc +++ b/tensorflow/core/kernels/scatter_nd_op.cc @@ -317,9 +317,9 @@ class ScatterNdUpdateOp : public OpKernel { scatter_nd_op::UpdateOp::SUB); // TODO(simister): Find a way to reduce amount of templated generated code // to reduce build size, then re-enable these additional operations. -// REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdMul", \ -// scatter_nd_op::UpdateOp::MUL); \ -// REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdDiv", \ +// REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdMul", +// scatter_nd_op::UpdateOp::MUL); +// REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdDiv", // scatter_nd_op::UpdateOp::DIV); #define REGISTER_SCATTER_ND(type, dev) \ diff --git a/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h b/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h index 442721d37b..bbe2c6864f 100644 --- a/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h +++ b/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h @@ -175,7 +175,7 @@ struct ScatterNdFunctor<CPUDevice, T, Index, OP, IXDIM> { REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::SUB); // TODO(simister): Re-enable after identifying a way to reduce the binary size // due to too many template instantiations. -// REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::MUL); \ +// REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::MUL); // REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::DIV); TF_CALL_ALL_TYPES(REGISTER_SCATTER_ND_UPDATE); diff --git a/tensorflow/core/kernels/sendrecv_ops.cc b/tensorflow/core/kernels/sendrecv_ops.cc index 9e9cdda382..1c7d50e161 100644 --- a/tensorflow/core/kernels/sendrecv_ops.cc +++ b/tensorflow/core/kernels/sendrecv_ops.cc @@ -80,6 +80,8 @@ REGISTER_KERNEL_BUILDER(Name("_Send").Device(DEVICE_GPU), SendOp); #if TENSORFLOW_USE_SYCL REGISTER_KERNEL_BUILDER(Name("_Send").Device(DEVICE_SYCL), SendOp); +REGISTER_KERNEL_BUILDER( + Name("_HostSend").Device(DEVICE_SYCL).HostMemory("tensor"), SendOp); #endif REGISTER_KERNEL_BUILDER(Name("_HostSend").Device(DEVICE_CPU), SendOp); @@ -148,4 +150,9 @@ REGISTER_KERNEL_BUILDER(Name("_HostRecv").Device(DEVICE_CPU), RecvOp); REGISTER_KERNEL_BUILDER( Name("_HostRecv").Device(DEVICE_GPU).HostMemory("tensor"), RecvOp); +#if TENSORFLOW_USE_SYCL +REGISTER_KERNEL_BUILDER( + Name("_HostRecv").Device(DEVICE_SYCL).HostMemory("tensor"), RecvOp); +#endif + } // end namespace tensorflow diff --git a/tensorflow/core/kernels/variable_ops.cc b/tensorflow/core/kernels/variable_ops.cc index b97df3e00c..532825a2a8 100644 --- a/tensorflow/core/kernels/variable_ops.cc +++ b/tensorflow/core/kernels/variable_ops.cc @@ -31,6 +31,17 @@ REGISTER_KERNEL_BUILDER(Name("DestroyTemporaryVariable").Device(DEVICE_CPU), REGISTER_KERNEL_BUILDER(Name("IsVariableInitialized").Device(DEVICE_CPU), IsVariableInitializedOp); +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Variable") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("dtype"), \ + VariableOp); +TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL); +#undef REGISTER_SYCL_KERNEL +#endif + #if GOOGLE_CUDA // Only register 'Variable' on GPU for the subset of types also supported by // 'Assign' (see dense_update_ops.cc.) diff --git a/tensorflow/core/lib/core/threadpool.cc b/tensorflow/core/lib/core/threadpool.cc index 534ef902fb..a2245bb28e 100644 --- a/tensorflow/core/lib/core/threadpool.cc +++ b/tensorflow/core/lib/core/threadpool.cc @@ -21,9 +21,11 @@ limitations under the License. #include "tensorflow/core/platform/denormal.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/setround.h" #include "tensorflow/core/platform/tracing.h" #include "tensorflow/core/platform/types.h" + namespace tensorflow { namespace thread { @@ -50,6 +52,8 @@ struct EigenEnvironment { return env_->StartThread(thread_options_, name_, [=]() { // Set the processor flag to flush denormals to zero port::ScopedFlushDenormal flush; + // Set the C++ rounding mode to ROUND TO NEAREST + port::ScopedSetRound round; f(); }); } diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index 1fd641ee36..25972252c5 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -3859,7 +3859,7 @@ strides: 1-D of length 4. How far the centers of two consecutive patches are in rates: 1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the input stride, specifying how far two consecutive patch samples are in the input. Equivalent to extracting patches with - `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1), followed by + `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by subsampling them spatially by a factor of `rates`. padding: The type of padding algorithm to use. diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index 56bd594644..f854ad288b 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -472,6 +472,25 @@ REGISTER_OP("Ceil") Returns element-wise smallest integer in not less than x. )doc"); +REGISTER_OP("Rint") + .Input("x: T") + .Output("y: T") + .Attr("T: {float, double}") + .SetShapeFn(shape_inference::UnchangedShape) + .Doc(R"doc( +Returns element-wise integer closest to x. + +If the result is midway between two representable values, +the even representable is chosen. +For example: + +``` +rint(-1.5) ==> -2.0 +rint(0.5000001) ==> 1.0 +rint([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) ==> [-2., -2., -0., 0., 2., 2., 2.] +``` +)doc"); + // Declares cwise binary operations signature: 't, 't -> 't. #define BINARY_MORE() \ diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index bf87c0a495..6d100a718d 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -6591,7 +6591,7 @@ op { attr { name: "rates" type: "list(int)" - description: "1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the\ninput stride, specifying how far two consecutive patch samples are in the\ninput. Equivalent to extracting patches with\n`patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1), followed by\nsubsampling them spatially by a factor of `rates`." + description: "1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the\ninput stride, specifying how far two consecutive patch samples are in the\ninput. Equivalent to extracting patches with\n`patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by\nsubsampling them spatially by a factor of `rates`." has_minimum: true minimum: 4 } diff --git a/tensorflow/core/platform/default/build_config_root.bzl b/tensorflow/core/platform/default/build_config_root.bzl index 439bf97a2c..2fa2726bde 100644 --- a/tensorflow/core/platform/default/build_config_root.bzl +++ b/tensorflow/core/platform/default/build_config_root.bzl @@ -4,3 +4,6 @@ def tf_cuda_tests_tags(): return ["local"] + +def tf_sycl_tests_tags(): + return ["local"] diff --git a/tensorflow/core/platform/setround.cc b/tensorflow/core/platform/setround.cc new file mode 100644 index 0000000000..febb47781c --- /dev/null +++ b/tensorflow/core/platform/setround.cc @@ -0,0 +1,35 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/platform/setround.h" + +#ifdef __STDC_IEC_559__ +#include <fenv.h> // fesetround, FE_* +#endif + +namespace tensorflow { +namespace port { + +ScopedSetRound::ScopedSetRound() { +#ifdef __STDC_IEC_559__ + std::fesetround(FE_TONEAREST); +#endif +} + +ScopedSetRound::~ScopedSetRound() { +} + +} // namespace port +} // namespace tensorflow diff --git a/tensorflow/core/platform/setround.h b/tensorflow/core/platform/setround.h new file mode 100644 index 0000000000..4b1b3fd497 --- /dev/null +++ b/tensorflow/core/platform/setround.h @@ -0,0 +1,38 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_PLATFORM_SETROUND_H_ +#define TENSORFLOW_PLATFORM_SETROUND_H_ + +#include "tensorflow/core/platform/macros.h" + +namespace tensorflow { +namespace port { + +// While this class is active, floating point numbers are rounded to NEAREST +// to zero. The destructor restores the original flags. +class ScopedSetRound { + public: + ScopedSetRound(); + ~ScopedSetRound(); + + private: + TF_DISALLOW_COPY_AND_ASSIGN(ScopedSetRound); +}; + +} // namespace port +} // namespace tensorflow + +#endif // TENSORFLOW_PLATFORM_SETROUN_H_ diff --git a/tensorflow/core/platform/windows/env.cc b/tensorflow/core/platform/windows/env.cc index 41ce5d9320..904d06e2a9 100644 --- a/tensorflow/core/platform/windows/env.cc +++ b/tensorflow/core/platform/windows/env.cc @@ -72,7 +72,7 @@ class WindowsEnv : public Env { } bool MatchPath(const string& path, const string& pattern) override { - return PathMatchSpec(path.c_str(), pattern.c_str()) == S_OK; + return PathMatchSpec(path.c_str(), pattern.c_str()) == TRUE; } uint64 NowMicros() override { diff --git a/tensorflow/core/platform/windows/windows_file_system.cc b/tensorflow/core/platform/windows/windows_file_system.cc index c6c42f0150..31516bb2ee 100644 --- a/tensorflow/core/platform/windows/windows_file_system.cc +++ b/tensorflow/core/platform/windows/windows_file_system.cc @@ -386,7 +386,7 @@ Status WindowsFileSystem::GetChildren(const string& dir, string pattern = translated_dir; if (!pattern.empty() && pattern.back() != '\\' && pattern.back() != '/') { - pattern += '\\*'; + pattern += "\\*"; } else { pattern += '*'; } diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 66af1897be..dd2dad417d 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -20,7 +20,7 @@ limitations under the License. #define TF_MAJOR_VERSION 0 #define TF_MINOR_VERSION 11 -#define TF_PATCH_VERSION 0rc2 +#define TF_PATCH_VERSION head // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") diff --git a/tensorflow/core/util/example_proto_fast_parsing.cc b/tensorflow/core/util/example_proto_fast_parsing.cc index abf8d77f86..f1885bb980 100644 --- a/tensorflow/core/util/example_proto_fast_parsing.cc +++ b/tensorflow/core/util/example_proto_fast_parsing.cc @@ -87,6 +87,8 @@ class Feature { *dtype = DT_INT64; break; default: + // Initialize variable to avoid compiler warning + *dtype = DT_INVALID; return errors::InvalidArgument("Unsuported datatype."); } return Status::OK(); diff --git a/tensorflow/core/util/tensor_format.h b/tensorflow/core/util/tensor_format.h index 560b6310b2..a21dee5d58 100644 --- a/tensorflow/core/util/tensor_format.h +++ b/tensorflow/core/util/tensor_format.h @@ -58,6 +58,7 @@ inline int32 GetTensorDimIndex(TensorFormat format, char dimension) { return 1 + NDIMS; default: LOG(FATAL) << "Invalid dimension: " << dimension; + return -1; // Avoid compiler warning about missing return value } } else if (format == FORMAT_NCHW) { switch (dimension) { @@ -77,9 +78,11 @@ inline int32 GetTensorDimIndex(TensorFormat format, char dimension) { return NDIMS + 1; default: LOG(FATAL) << "Invalid dimension: " << dimension; + return -1; // Avoid compiler warning about missing return value } } else { LOG(FATAL) << "Invalid format: " << static_cast<int>(format); + return -1; // Avoid compiler warning about missing return value } } diff --git a/tensorflow/core/util/tensor_slice_reader_cache.cc b/tensorflow/core/util/tensor_slice_reader_cache.cc index 9ac44f328e..06fc9aa444 100644 --- a/tensorflow/core/util/tensor_slice_reader_cache.cc +++ b/tensorflow/core/util/tensor_slice_reader_cache.cc @@ -52,7 +52,7 @@ const TensorSliceReader* TensorSliceReaderCache::GetReader( TensorSliceReader::OpenTableFunction open_function, int preferred_shard) { mutex_lock l(mu_); -#ifdef __GXX_RTTI +#if defined(__GXX_RTTI) || defined(_CPPRTTI) // Get the function pointer from the open_function value. TensorSliceReaderCache::OpenFuncType* func_ptr = open_function.target<TensorSliceReaderCache::OpenFuncType>(); diff --git a/tensorflow/g3doc/api_docs/python/array_ops.md b/tensorflow/g3doc/api_docs/python/array_ops.md index 61fecfdf09..b076bd28cf 100644 --- a/tensorflow/g3doc/api_docs/python/array_ops.md +++ b/tensorflow/g3doc/api_docs/python/array_ops.md @@ -1428,7 +1428,7 @@ Extract `patches` from `images` and put them in the "depth" output dimension. 1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the input stride, specifying how far two consecutive patch samples are in the input. Equivalent to extracting patches with - `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1), followed by + `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by subsampling them spatially by a factor of `rates`. * <b>`padding`</b>: A `string` from: `"SAME", "VALID"`. The type of padding algorithm to use. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.linspace.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.linspace.md index e79dfd2f4d..3fbb0c303a 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.linspace.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.linspace.md @@ -3,7 +3,7 @@ Generates values in an interval. A sequence of `num` evenly-spaced values are generated beginning at `start`. -If `num > 1`, the values in the sequence increase by `stop - start / num - 1`, +If `num > 1`, the values in the sequence increase by `(stop - start) / (num - 1)`, so that the last one is exactly `stop`. For example: diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.nn.sampled_softmax_loss.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.nn.sampled_softmax_loss.md index 82f44f6f4d..e898cbaa4f 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.nn.sampled_softmax_loss.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.nn.sampled_softmax_loss.md @@ -11,8 +11,8 @@ the full softmax loss. At inference time, you can compute full softmax probabilities with the expression `tf.nn.softmax(tf.matmul(inputs, tf.transpose(weights)) + biases)`. -See our [Candidate Sampling Algorithms Reference] -(../../extras/candidate_sampling.pdf) +See our +[Candidate Sampling Algorithms Reference](../../extras/candidate_sampling.pdf) Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007) ([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.extract_image_patches.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.extract_image_patches.md index bf6f268d4f..853ce0176f 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.extract_image_patches.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.extract_image_patches.md @@ -16,7 +16,7 @@ Extract `patches` from `images` and put them in the "depth" output dimension. 1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the input stride, specifying how far two consecutive patch samples are in the input. Equivalent to extracting patches with - `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1), followed by + `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by subsampling them spatially by a factor of `rates`. * <b>`padding`</b>: A `string` from: `"SAME", "VALID"`. The type of padding algorithm to use. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.depthwise_conv2d_native.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.depthwise_conv2d_native.md index 403621dc00..0867e30876 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.depthwise_conv2d_native.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.depthwise_conv2d_native.md @@ -17,7 +17,7 @@ for k in 0..in_channels-1 filter[di, dj, k, q] Must have `strides[0] = strides[3] = 1`. For the most common case of the same -horizontal and vertices strides, `strides = [1, stride, stride, 1]`. +horizontal and vertical strides, `strides = [1, stride, stride, 1]`. ##### Args: diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.nce_loss.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.nce_loss.md index f405aa7707..186209d3f5 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.nce_loss.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.nce_loss.md @@ -42,8 +42,7 @@ with an otherwise unused class. where a sampled class equals one of the target classes. If set to `True`, this is a "Sampled Logistic" loss instead of NCE, and we are learning to generate log-odds instead of log probabilities. See - our [Candidate Sampling Algorithms Reference] - (../../extras/candidate_sampling.pdf). + our [Candidate Sampling Algorithms Reference](../../extras/candidate_sampling.pdf). Default is False. * <b>`partition_strategy`</b>: A string specifying the partitioning strategy, relevant if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.nn.local_response_normalization.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.nn.local_response_normalization.md index 4feb46302d..e7d32923a1 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.nn.local_response_normalization.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.nn.local_response_normalization.md @@ -11,8 +11,8 @@ each component is divided by the weighted, squared sum of inputs within sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2) output = input / (bias + alpha * sqr_sum) ** beta -For details, see [Krizhevsky et al., ImageNet classification with deep -convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks). +For details, see +[Krizhevsky et al., ImageNet classification with deep convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks). ##### Args: diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.conv2d.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.conv2d.md index a7d2cf094f..8153224ed4 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.conv2d.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.conv2d.md @@ -22,7 +22,7 @@ In detail, with the default NHWC format, filter[di, dj, q, k] Must have `strides[0] = strides[3] = 1`. For the most common case of the same -horizontal and vertices strides, `strides = [1, stride, stride, 1]`. +horizontal and vertical strides, `strides = [1, stride, stride, 1]`. ##### Args: diff --git a/tensorflow/g3doc/get_started/os_setup.md b/tensorflow/g3doc/get_started/os_setup.md index 5ef608c7b8..431e214b30 100644 --- a/tensorflow/g3doc/get_started/os_setup.md +++ b/tensorflow/g3doc/get_started/os_setup.md @@ -63,37 +63,37 @@ Then, select the correct binary to install: ```bash # Ubuntu/Linux 64-bit, CPU only, Python 2.7 -$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl +$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7 # Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below. -$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl +$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl # Mac OS X, CPU only, Python 2.7: -$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py2-none-any.whl +$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py2-none-any.whl # Mac OS X, GPU enabled, Python 2.7: -$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py2-none-any.whl +$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py2-none-any.whl # Ubuntu/Linux 64-bit, CPU only, Python 3.4 -$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl +$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl # Ubuntu/Linux 64-bit, GPU enabled, Python 3.4 # Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below. -$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl +$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl # Ubuntu/Linux 64-bit, CPU only, Python 3.5 -$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl +$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl # Ubuntu/Linux 64-bit, GPU enabled, Python 3.5 # Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below. -$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl +$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl # Mac OS X, CPU only, Python 3.4 or 3.5: -$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py3-none-any.whl +$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py3-none-any.whl # Mac OS X, GPU enabled, Python 3.4 or 3.5: -$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py3-none-any.whl +$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py3-none-any.whl ``` Install TensorFlow: @@ -159,37 +159,37 @@ Now, install TensorFlow just as you would for a regular Pip installation. First ```bash # Ubuntu/Linux 64-bit, CPU only, Python 2.7 -(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl +(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7 # Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below. -(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl +(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl # Mac OS X, CPU only, Python 2.7: -(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py2-none-any.whl +(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py2-none-any.whl # Mac OS X, GPU enabled, Python 2.7: -(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py2-none-any.whl +(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py2-none-any.whl # Ubuntu/Linux 64-bit, CPU only, Python 3.4 -(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl +(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl # Ubuntu/Linux 64-bit, GPU enabled, Python 3.4 # Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below. -(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl +(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl # Ubuntu/Linux 64-bit, CPU only, Python 3.5 -(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl +(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl # Ubuntu/Linux 64-bit, GPU enabled, Python 3.5 # Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below. -(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl +(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl # Mac OS X, CPU only, Python 3.4 or 3.5: -(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py3-none-any.whl +(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py3-none-any.whl # Mac OS X, GPU enabled, Python 3.4 or 3.5: -(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py3-none-any.whl +(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py3-none-any.whl ``` Finally install TensorFlow: @@ -298,37 +298,37 @@ select the correct binary to install: ```bash # Ubuntu/Linux 64-bit, CPU only, Python 2.7 -(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl +(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7 # Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below. -(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl +(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl # Mac OS X, CPU only, Python 2.7: -(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py2-none-any.whl +(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py2-none-any.whl # Mac OS X, GPU enabled, Python 2.7: -(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py2-none-any.whl +(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py2-none-any.whl # Ubuntu/Linux 64-bit, CPU only, Python 3.4 -(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl +(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl # Ubuntu/Linux 64-bit, GPU enabled, Python 3.4 # Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below. -(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl +(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl # Ubuntu/Linux 64-bit, CPU only, Python 3.5 -(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl +(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl # Ubuntu/Linux 64-bit, GPU enabled, Python 3.5 # Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below. -(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl +(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl # Mac OS X, CPU only, Python 3.4 or 3.5: -(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py3-none-any.whl +(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py3-none-any.whl # Mac OS X, GPU enabled, Python 3.4 or 3.5: -(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py3-none-any.whl +(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py3-none-any.whl ``` Finally install TensorFlow: @@ -396,7 +396,7 @@ code. code. We also have tags with `latest` replaced by a released version (e.g., -`0.11.0rc2-gpu`). +`0.11.0-gpu`). With Docker the installation is as follows: @@ -781,7 +781,7 @@ $ bazel build -c opt --config=cuda //tensorflow/tools/pip_package:build_pip_pack $ bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg # The name of the .whl file will depend on your platform. -$ sudo pip install /tmp/tensorflow_pkg/tensorflow-0.11.0rc2-py2-none-any.whl +$ sudo pip install /tmp/tensorflow_pkg/tensorflow-0.11.0-py2-none-any.whl ``` ## Setting up TensorFlow for Development diff --git a/tensorflow/g3doc/how_tos/adding_an_op/index.md b/tensorflow/g3doc/how_tos/adding_an_op/index.md index 3be1efb13f..aa5dcfb4f2 100644 --- a/tensorflow/g3doc/how_tos/adding_an_op/index.md +++ b/tensorflow/g3doc/how_tos/adding_an_op/index.md @@ -44,6 +44,8 @@ add a call to the `REGISTER_OP` macro that defines the interface for such an Op: #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/shape_inference.h" +using namespace tensorflow; + REGISTER_OP("ZeroOut") .Input("to_zero: int32") .Output("zeroed: int32") @@ -236,12 +238,26 @@ class ZeroOutTest(tf.test.TestCase): with self.test_session(): result = zero_out_module.zero_out([5, 4, 3, 2, 1]) self.assertAllEqual(result.eval(), [5, 0, 0, 0, 0]) + +if __name__ == "__main__": + tf.test.main() +``` + +Add a 'zero_out_op_test' target to `tensorflow/python/kernel_tests/BUILD` among the other CPU-only test targets: + +``` +tf_py_test( + name = "zero_out_op_test", + size = "small", + srcs = ["zero_out_op_test.py"], + additional_deps = ["//tensorflow:tensorflow_py"], +) ``` Then run your test: ```sh -$ bazel test tensorflow/python:zero_out_op_test +$ bazel test //tensorflow/python/kernel_tests:zero_out_op_test ``` ## Validation @@ -895,7 +911,7 @@ For more details, see In general, changes to specifications must be backwards-compatible: changing the specification of an Op must not break prior serialized `GraphDef` protocol -buffers constructed from older specfications. The details of `GraphDef` +buffers constructed from older specifications. The details of `GraphDef` compatibility are [described here](../../resources/versions.md#graphs). There are several ways to preserve backwards-compatibility. @@ -1117,7 +1133,7 @@ found in [common_shape_fns.h](https://www.tensorflow.org/code/tensorflow/core/fr REGISTER_OP("ZeroOut") .Input("to_zero: int32") .Output("zeroed: int32") - .SetShapeFn([](::tensorflow::shape_inference::UnchangedShape); + .SetShapeFn(::tensorflow::shape_inference::UnchangedShape); ``` A shape function can also constrain the shape of an input. For the version of @@ -1193,7 +1209,7 @@ the following: ``` This specifies that the shape function should use the C++-implemented -shape specfication defined in your `REGISTER_OP` declaration above. Note +shape specification defined in your `REGISTER_OP` declaration above. Note that TensorFlow will soon make this the default, so you only need to define the shape function once in C++ to get shape inference for free in Python. diff --git a/tensorflow/g3doc/index.md b/tensorflow/g3doc/index.md index 4d7149961a..3bfba40416 100644 --- a/tensorflow/g3doc/index.md +++ b/tensorflow/g3doc/index.md @@ -1,10 +1,5 @@ # TensorFlow for Googlers -This site has TensorFlow documentation for Google engineers. The menu at the -left lists those parts of the public TensorFlow documentation that pertain to -Google engineers, along with some internal-only resources written specifically -for Google engineers. - TensorFlowâ„¢ is an open source software library for numerical computation using data flow graphs. Nodes in the graph represent mathematical operations, while the graph edges represent the multidimensional data arrays (tensors) that flow @@ -18,4 +13,4 @@ applicable in a wide variety of other domains as well. The following documents show you how to set up and use the TensorFlow system. ## Table of Contents -<!--#include virtual="sitemap.md" -->
\ No newline at end of file +<!--#include virtual="sitemap.md" --> diff --git a/tensorflow/models/embedding/word2vec.py b/tensorflow/models/embedding/word2vec.py index c5cb52bfbc..e463e300c1 100644 --- a/tensorflow/models/embedding/word2vec.py +++ b/tensorflow/models/embedding/word2vec.py @@ -147,6 +147,8 @@ class Options(object): # Where to write out summaries. self.save_path = FLAGS.save_path + if not os.path.exists(self.save_path): + os.makedirs(self.save_path) # Eval options. # The text file for eval. diff --git a/tensorflow/models/embedding/word2vec_optimized.py b/tensorflow/models/embedding/word2vec_optimized.py index a6e8f9277d..2efdf66867 100644 --- a/tensorflow/models/embedding/word2vec_optimized.py +++ b/tensorflow/models/embedding/word2vec_optimized.py @@ -126,6 +126,8 @@ class Options(object): # Where to write out summaries. self.save_path = FLAGS.save_path + if not os.path.exists(self.save_path): + os.makedirs(self.save_path) # Eval options. diff --git a/tensorflow/models/image/cifar10/cifar10.py b/tensorflow/models/image/cifar10/cifar10.py index 7df2149d40..1c51b76f09 100644 --- a/tensorflow/models/image/cifar10/cifar10.py +++ b/tensorflow/models/image/cifar10/cifar10.py @@ -207,8 +207,8 @@ def inference(images): wd=0.0) conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME') biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0)) - bias = tf.nn.bias_add(conv, biases) - conv1 = tf.nn.relu(bias, name=scope.name) + pre_activation = tf.nn.bias_add(conv, biases) + conv1 = tf.nn.relu(pre_activation, name=scope.name) _activation_summary(conv1) # pool1 @@ -226,8 +226,8 @@ def inference(images): wd=0.0) conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME') biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1)) - bias = tf.nn.bias_add(conv, biases) - conv2 = tf.nn.relu(bias, name=scope.name) + pre_activation = tf.nn.bias_add(conv, biases) + conv2 = tf.nn.relu(pre_activation, name=scope.name) _activation_summary(conv2) # norm2 diff --git a/tensorflow/python/client/session.py b/tensorflow/python/client/session.py index d626165dc0..dd55154a3a 100644 --- a/tensorflow/python/client/session.py +++ b/tensorflow/python/client/session.py @@ -122,6 +122,54 @@ _REGISTERED_EXPANSIONS = [ lambda feed: [feed])] # pylint: enable=g-long-lambda +def register_session_run_conversion_functions(tensor_type, fetch_function, + feed_function=None, feed_function_for_partial_run=None): + """Register fetch and feed conversion functions for `tf.Session.run()`. + + This function registers a triple of conversion functions for fetching and/or + feeding values of user-defined types in a call to tf.Session.run(). + + An example + + ```python + class SquaredTensor(object): + def __init__(self, tensor): + self.sq = tf.square(tensor) + #you can define conversion functions as follows: + fetch_function = lambda squared_tensor:([squared_tensor.sq], + lambda val: val[0]) + feed_function = lambda feed, feed_val: [(feed.sq, feed_val)] + feed_function_for_partial_run = lambda feed: [feed.sq] + #then after invoking this register function, you can use as follows: + session.run(squared_tensor1, + feed_dict = {squared_tensor2 : some_numpy_array}) + ``` + + Args: + tensor_type: The type for which you want to register a conversion function. + fetch_function: A callable that takes an object of type `tensor_type` and + returns a tuple, where the first element is a list of `tf.Tensor` objects, + and the second element is a callable that takes a list of ndarrays and + returns an object of some value type that corresponds to `tensor_type`. + fetch_function describes how to expand fetch into its component Tensors + and how to contract the fetched results back into a single return value. + feed_function: A callable that takes feed_key and feed_value as input, and + returns a list of tuples (feed_tensor, feed_val), feed_key must have type + `tensor_type`, and feed_tensor must have type `tf.Tensor`. Each feed + function describes how to unpack a single fed value and map it to feeds + of one or more tensors and their corresponding values. + feed_function_for_partial_run: A callable for specifying tensor values to + feed when setting up a partial run, which takes a `tensor_type` type + object as input, and returns a list of Tensors. + """ + for conversion_function in _REGISTERED_EXPANSIONS: + if issubclass(conversion_function[0], tensor_type): + raise ValueError( + '%s has already been registered so ignore it.', tensor_type) + return + _REGISTERED_EXPANSIONS.insert(0, + (tensor_type, fetch_function, feed_function, feed_function_for_partial_run)) + class _FetchMapper(object): """Definition of the interface provided by fetch mappers. diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py index bf0a964867..a20376b91d 100644 --- a/tensorflow/python/client/session_test.py +++ b/tensorflow/python/client/session_test.py @@ -1554,6 +1554,33 @@ class SessionTest(test_util.TensorFlowTestCase): sess.run(enqueue_op) self.assertEqual(sess.run(q.size()), num_epochs * 2) + def testRegisterFetchAndFeedConversionFunctions(self): + class SquaredTensor(object): + def __init__(self, tensor): + self.sq = math_ops.square(tensor) + + fetch_fn = lambda squared_tensor: ([squared_tensor.sq], lambda val: val[0]) + feed_fn1 = lambda feed, feed_val: [(feed.sq, feed_val)] + feed_fn2 = lambda feed: [feed.sq] + + session.register_session_run_conversion_functions(SquaredTensor, fetch_fn, + feed_fn1, feed_fn2) + with self.assertRaises(ValueError): + session.register_session_run_conversion_functions(SquaredTensor, + fetch_fn, feed_fn1, feed_fn2) + with self.test_session() as sess: + np1 = np.array([1.0, 1.5, 2.0, 2.5]) + np2 = np.array([3.0, 3.5, 4.0, 4.5]) + squared_tensor = SquaredTensor(np2) + squared_eval = sess.run(squared_tensor) + self.assertAllClose(np2 * np2, squared_eval) + squared_eval = sess.run(squared_tensor, feed_dict={ + squared_tensor : np1 * np1}) + self.assertAllClose(np1 * np1, squared_eval) + partial_run = sess.partial_run_setup([squared_tensor], []) + squared_eval = sess.partial_run(partial_run, squared_tensor) + self.assertAllClose(np2 * np2, squared_eval) + if __name__ == '__main__': googletest.main() diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index dc7c72e220..8db55a9359 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -12,6 +12,7 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "tf_py_test") load("//tensorflow:tensorflow.bzl", "cuda_py_test") +load("//tensorflow:tensorflow.bzl", "sycl_py_test") # CPU only tests should use tf_py_test, GPU tests use cuda_py_test # Please avoid the py_tests and cuda_py_tests (plural) while we @@ -1362,6 +1363,13 @@ cuda_py_test( tags = ["nomsan"], # fails in msan from numpy calls ) +sycl_py_test( + name = "basic_gpu_test", + size = "small", + srcs = ["basic_gpu_test.py"], + additional_deps = ["//tensorflow:tensorflow_py"], +) + filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/python/kernel_tests/basic_gpu_test.py b/tensorflow/python/kernel_tests/basic_gpu_test.py new file mode 100644 index 0000000000..541c95aa28 --- /dev/null +++ b/tensorflow/python/kernel_tests/basic_gpu_test.py @@ -0,0 +1,61 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Functional tests for basic component wise operations using a GPU device.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +import math +import numpy as np +from tensorflow.python.ops import gen_math_ops +from tensorflow.python.ops.gen_array_ops import _broadcast_gradient_args + +class GPUBinaryOpsTest(tf.test.TestCase): + def _compareGPU(self, x, y, np_func, tf_func): + with self.test_session(use_gpu=True) as sess: + inx = tf.convert_to_tensor(x) + iny = tf.convert_to_tensor(y) + out = tf_func(inx, iny) + tf_gpu = sess.run(out) + + with self.test_session(use_gpu=False) as sess: + inx = tf.convert_to_tensor(x) + iny = tf.convert_to_tensor(y) + out = tf_func(inx, iny) + tf_cpu = sess.run(out) + + self.assertAllClose(tf_cpu, tf_gpu) + + def testFloatBasic(self): + x = np.linspace(-5, 20, 15).reshape(1, 3, 5).astype(np.float32) + y = np.linspace(20, -5, 15).reshape(1, 3, 5).astype(np.float32) + self._compareGPU(x, y, np.add, tf.add) + self._compareGPU(x, y, np.subtract, tf.sub) + self._compareGPU(x, y, np.multiply, tf.mul) + self._compareGPU(x, y + 0.1, np.true_divide, tf.truediv) + + #def _GetGradientArgs(self, xs, ys): + #with self.test_session(use_gpu=True) as sess: + # return sess.run(_broadcast_gradient_args(xs, ys)) + + #def testBroadcast(self): + #r0, r1 = self._GetGradientArgs([2, 3, 5], [1]) + #self.assertAllEqual(r0, []) + #self.assertAllEqual(r1, [0, 1, 2]) + +if __name__ == "__main__": + tf.test.main() diff --git a/tensorflow/python/kernel_tests/cwise_ops_test.py b/tensorflow/python/kernel_tests/cwise_ops_test.py index bca2030d01..3322d22b9f 100644 --- a/tensorflow/python/kernel_tests/cwise_ops_test.py +++ b/tensorflow/python/kernel_tests/cwise_ops_test.py @@ -1778,9 +1778,17 @@ class IsFiniteInfNanTest(tf.test.TestCase): class RoundingTest(tf.test.TestCase): - def _compare(self, x, use_gpu): + def _compare_values(self, x, y=None): + y = np.rint(x) if y is None else np.asarray(y) + with self.test_session() as sess: + tf_rint = tf.rint(x) + np_rint = sess.run(tf_rint) + self.assertAllEqual(y, np_rint) + self.assertShapeEqual(y, tf_rint) + + def _compare(self, x): np_floor, np_ceil = np.floor(x), np.ceil(x) - with self.test_session(use_gpu=use_gpu) as sess: + with self.test_session() as sess: inx = tf.convert_to_tensor(x) ofloor, oceil = tf.floor(inx), tf.ceil(inx) tf_floor, tf_ceil = sess.run([ofloor, oceil]) @@ -1790,9 +1798,20 @@ class RoundingTest(tf.test.TestCase): self.assertShapeEqual(np_ceil, oceil) def _testDtype(self, dtype): - data = (np.arange(-3, 3) / 4.).reshape([1, 3, 2]).astype(dtype) - self._compare(data, use_gpu=True) - self._compare(data, use_gpu=True) + data = (np.arange(-3, 3) / 4.).reshape(1, 3, 2).astype(dtype) + self._compare(data) + # TODO: rint op is not supported for float16 + if dtype is np.float16: + return + self._compare_values(data) + x = [0.5, 0.5000001] + y = [0.0, 1.0] + self._compare_values(x, y=y) + + # numpy example + x = [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0] + y = [-2., -2., -0., 0., 2., 2., 2.] + self._compare_values(x, y=y) def testTypes(self): for dtype in [np.float16, np.float32, np.float64]: diff --git a/tensorflow/python/kernel_tests/init_ops_test.py b/tensorflow/python/kernel_tests/init_ops_test.py index fd39b8f4c6..1285ed1912 100644 --- a/tensorflow/python/kernel_tests/init_ops_test.py +++ b/tensorflow/python/kernel_tests/init_ops_test.py @@ -28,25 +28,27 @@ from tensorflow.python.ops import init_ops # Returns true iff the two initializers produce the same tensor to # within a tiny tolerance. -def identicaltest(tc, init1, init2): +def identicaltest(tc, init1, init2, shape=None): """Tests if two initializations are identical to within tiny tolerances. Args: tc: An instance of TensorFlowTestCase. init1: An Initializer that generates a tensor of a given shape init2: An Initializer that generates a tensor of a given shape + shape: Shape of the tensor to initialize or `None` to use a vector of length 100. Returns: True or False as determined by test. """ - num = 100 + if shape is None: + shape = [100] with tc.test_session(graph=tf.Graph()): - t1 = init1([num]).eval() + t1 = init1(shape).eval() with tc.test_session(graph=tf.Graph()): - t2 = init2([num]).eval() + t2 = init2(shape).eval() return np.allclose(t1, t2, rtol=1e-15, atol=1e-15) -def duplicated_initializer(tc, init, graph_seed): +def duplicated_initializer(tc, init, graph_seed, shape=None): """Tests duplicated random initializer within the same graph. This test generates two random kernels from the same initializer to the same @@ -58,14 +60,16 @@ def duplicated_initializer(tc, init, graph_seed): tc: An instance of TensorFlowTestCase. init: An Initializer that generates a tensor of a given shape graph_seed: A graph-level seed to use. + shape: Shape of the tensor to initialize or `None` to use a vector of length 100. Returns: True or False as determined by test. """ - num = 100 + if shape is None: + shape = [100] with tc.test_session(graph=tf.Graph()): random_seed.set_random_seed(graph_seed) - t1 = init([num]).eval() - t2 = init([num]).eval() + t1 = init(shape).eval() + t2 = init(shape).eval() return np.allclose(t1, t2, rtol=1e-15, atol=1e-15) @@ -444,5 +448,59 @@ class DeviceTest(tf.test.TestCase): self.assertDeviceEqual("/job:ps", var.initializer.device) +class OrthogonalInitializerTest(tf.test.TestCase): + + def testInitializerIdentical(self): + for dtype in [tf.float32, tf.float64]: + init1 = tf.orthogonal_initializer(seed=1, dtype=dtype) + init2 = tf.orthogonal_initializer(seed=1, dtype=dtype) + self.assertTrue(identicaltest(self, init1, init2, (10, 10))) + + def testInitializerDifferent(self): + for dtype in [tf.float32, tf.float64]: + init1 = tf.orthogonal_initializer(seed=1, dtype=dtype) + init2 = tf.orthogonal_initializer(seed=2, dtype=dtype) + self.assertFalse(identicaltest(self, init1, init2, (10, 10))) + + def testDuplicatedInitializer(self): + init = tf.orthogonal_initializer() + self.assertFalse(duplicated_initializer(self, init, 1, (10, 10))) + + def testInvalidDataType(self): + self.assertRaises( + ValueError, + tf.orthogonal_initializer, dtype=tf.string) + + def testInvalidShape(self): + init1 = tf.orthogonal_initializer() + with self.test_session(graph=tf.Graph(), use_gpu=True): + self.assertRaises(ValueError, init1, shape=[5]) + + def testGain(self): + shape = (10, 10) + for dtype in [tf.float32, tf.float64]: + init1 = tf.orthogonal_initializer(seed=1, dtype=dtype) + init2 = tf.orthogonal_initializer(gain=3.14, seed=1, dtype=dtype) + with self.test_session(graph=tf.Graph(), use_gpu=True): + t1 = init1(shape).eval() + with self.test_session(graph=tf.Graph(), use_gpu=True): + t2 = init2(shape).eval() + return np.allclose(t1, t2 / 3.14, rtol=1e-15, atol=1e-15) + + def testShapesValues(self): + for dtype in [tf.float32, tf.float64]: + for shape in [(10, 10), (10, 9, 8), (100, 5, 5), (50, 40), (40, 50)]: + init = tf.orthogonal_initializer(dtype=dtype) + with self.test_session(graph=tf.Graph(), use_gpu=True): + # Check the shape + t = init(shape).eval() + self.assertAllEqual(shape, t.shape) + # Check orthogonality by computing the inner product + t = t.reshape((np.prod(t.shape[:-1]), t.shape[-1])) + if t.shape[0] > t.shape[1]: + self.assertAllClose(np.dot(t.T, t), np.eye(t.shape[1])) + else: + self.assertAllClose(np.dot(t, t.T), np.eye(t.shape[0])) + if __name__ == "__main__": tf.test.main() diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py index a037069d2f..0197e8f390 100644 --- a/tensorflow/python/ops/array_grad.py +++ b/tensorflow/python/ops/array_grad.py @@ -523,6 +523,10 @@ def _ExtractImagePatchesGrad(op, grad): batch_size, rows_in, cols_in, channels = [ dim.value for dim in op.inputs[0].get_shape() ] + input_bhwc = array_ops.shape(op.inputs[0]) + batch_size = input_bhwc[0] + channels = input_bhwc[3] + _, rows_out, cols_out, _ = [ dim.value for dim in op.outputs[0].get_shape() ] diff --git a/tensorflow/python/ops/image_grad.py b/tensorflow/python/ops/image_grad.py index 81f089660a..b6b61ab92c 100644 --- a/tensorflow/python/ops/image_grad.py +++ b/tensorflow/python/ops/image_grad.py @@ -35,10 +35,16 @@ def _ResizeNearestNeighborGrad(op, grad): Returns: The gradients w.r.t. the input and the output. """ + image = op.inputs[0] + if image.get_shape()[1:3].is_fully_defined(): + image_shape = image.get_shape()[1:3] + else: + image_shape = array_ops.shape(image)[1:3] + # pylint: disable=protected-access grads = gen_image_ops._resize_nearest_neighbor_grad( grad, - op.inputs[0].get_shape()[1:3], + image_shape, align_corners=op.get_attr("align_corners")) # pylint: enable=protected-access return [grads, None] diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py index 94364b408a..bbe9977d4b 100644 --- a/tensorflow/python/ops/init_ops.py +++ b/tensorflow/python/ops/init_ops.py @@ -40,6 +40,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops +from tensorflow.python.ops import linalg_ops def _assert_float_dtype(dtype): @@ -343,3 +344,55 @@ class _RandomWalkInitializer(object): """Generate a tensor used to initialize a variable.""" return random_ops._random_walk(shape, self._nonlinearity, dtype, seed=self._seed) + + +def orthogonal_initializer(gain=1.0, dtype=dtypes.float32, seed=None): + """Returns an initializer that generates an orthogonal matrix or a reshaped + orthogonal matrix. + + If the shape of the tensor to initialize is two-dimensional, i is initialized + with an orthogonal matrix obtained from the singular value decomposition of a + matrix of uniform random numbers. + + If the shape of the tensor to initialize is more than two-dimensional, a matrix + of shape `(shape[0] * ... * shape[n - 2], shape[n - 1])` is initialized, where + `n` is the length of the shape vector. The matrix is subsequently reshaped to + give a tensor of the desired shape. + + Args: + gain: multiplicative factor to apply to the orthogonal matrix + dtype: The type of the output. + seed: A Python integer. Used to create random seeds. See + [`set_random_seed`](../../api_docs/python/constant_op.md#set_random_seed) + for behavior. + + Returns: + An initializer that generates orthogonal tensors + + Raises: + ValueError: if `dtype` is not a floating point type or if `shape` has fewer than two entries. + """ + def _initializer(shape, dtype=_assert_float_dtype(dtype), partition_info=None): + # Check the shape + if len(shape) < 2: + raise ValueError('the tensor to initialize must be at least two-dimensional') + # Flatten the input shape with the last dimension remaining its original shape so it works for conv2d + num_rows = 1 + for dim in shape[:-1]: + num_rows *= dim + num_cols = shape[-1] + flat_shape = (num_rows, num_cols) + + # Generate a random matrix + a = random_ops.random_uniform(flat_shape, dtype=dtype, seed=seed) + # Compute the svd + _, u, v = linalg_ops.svd(a, full_matrices=False) + # Pick the appropriate singular value decomposition + if num_rows > num_cols: + q = u + else: + # Tensorflow departs from numpy conventions such that we need to transpose axes here + q = array_ops.transpose(v) + return gain * array_ops.reshape(q, shape) + + return _initializer diff --git a/tensorflow/python/ops/math_grad.py b/tensorflow/python/ops/math_grad.py index 8d999f0074..2e8328a98b 100644 --- a/tensorflow/python/ops/math_grad.py +++ b/tensorflow/python/ops/math_grad.py @@ -796,6 +796,12 @@ def _FloorGrad(_, unused_grad): return [None] +@ops.RegisterGradient("Rint") +def _RintGrad(_, unused_grad): + # the gradient of Rint is zero + return [None] + + @ops.RegisterGradient("BatchMatMul") def _BatchMatMul(op, grad): """Returns the gradient of x and y given the gradient of x * y.""" diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index a431b66b85..d9a723a163 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -75,6 +75,7 @@ mathematical functions to your graph. @@zeta @@polygamma @@betainc +@@rint ## Matrix Math Functions diff --git a/tensorflow/python/ops/state_ops.py b/tensorflow/python/ops/state_ops.py index 232b6e9cdf..f2a201a609 100644 --- a/tensorflow/python/ops/state_ops.py +++ b/tensorflow/python/ops/state_ops.py @@ -69,6 +69,7 @@ create variables contingent on certain conditions. @@uniform_unit_scaling_initializer @@zeros_initializer @@ones_initializer +@@orthogonal_initializer ## Variable Partitioners for Sharding diff --git a/tensorflow/python/platform/tf_logging.py b/tensorflow/python/platform/tf_logging.py index 7af533e74f..70ecda1dda 100644 --- a/tensorflow/python/platform/tf_logging.py +++ b/tensorflow/python/platform/tf_logging.py @@ -171,7 +171,6 @@ def google2_log_prefix(level, timestamp=None, file_and_line=None): """Assemble a logline prefix using the google2 format.""" # pylint: disable=global-variable-not-assigned global _level_names - global _logfile_map, _logfile_map_mutex # pylint: enable=global-variable-not-assigned # Record current time diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 31b4461dad..47a83d51a4 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -41,6 +41,7 @@ def check_version(bazel_version): load( "//tensorflow/core:platform/default/build_config_root.bzl", "tf_cuda_tests_tags", + "tf_sycl_tests_tags", ) load( "@local_config_cuda//cuda:build_defs.bzl", @@ -886,6 +887,20 @@ def cuda_py_test(name, srcs, size="medium", data=[], main=None, args=[], additional_deps=additional_deps, flaky=flaky) +def sycl_py_test(name, srcs, size="medium", data=[], main=None, args=[], + shard_count=1, additional_deps=[], tags=[], flaky=0): + test_tags = tags + tf_sycl_tests_tags() + tf_py_test(name=name, + size=size, + srcs=srcs, + data=data, + main=main, + args=args, + tags=test_tags, + shard_count=shard_count, + additional_deps=additional_deps, + flaky=flaky) + def py_tests(name, srcs, size="medium", diff --git a/tensorflow/tools/ci_build/windows/cpu/cmake/run_py.bat b/tensorflow/tools/ci_build/windows/cpu/cmake/run_py.bat index c8f65402ff..45ddfaee9c 100644 --- a/tensorflow/tools/ci_build/windows/cpu/cmake/run_py.bat +++ b/tensorflow/tools/ci_build/windows/cpu/cmake/run_py.bat @@ -28,10 +28,10 @@ CALL %REPO_ROOT%\tensorflow\tools\ci_build\windows\cpu\cmake\run_build.bat SET PIP_EXE="C:\Program Files\Anaconda3\Scripts\pip.exe" :: Uninstall tensorflow pip package, which might be a leftover from old runs. -%PIP_EXE% uninstall tensorflow +%PIP_EXE% uninstall -y tensorflow :: Install the pip package. -%PIP_EXE% install %REPO_ROOT%\%BUILD_DIR%\tf_python\dist\tensorflow-0.11.0rc2_cmake_experimental-py3-none-any.whl +%PIP_EXE% install --upgrade %REPO_ROOT%\%BUILD_DIR%\tf_python\dist\tensorflow-0.11.0rc2_cmake_experimental-py3-none-any.whl :: Run all python tests ctest -C Release --output-on-failure diff --git a/tensorflow/tools/docker/Dockerfile b/tensorflow/tools/docker/Dockerfile index 39da8b2e65..82973226fc 100644 --- a/tensorflow/tools/docker/Dockerfile +++ b/tensorflow/tools/docker/Dockerfile @@ -33,7 +33,7 @@ RUN pip --no-cache-dir install \ && \ python -m ipykernel.kernelspec -ENV TENSORFLOW_VERSION 0.11.0rc2 +ENV TENSORFLOW_VERSION 0.11.0 # --- DO NOT EDIT OR DELETE BETWEEN THE LINES --- # # These lines will be edited automatically by parameterized_docker_build.sh. # diff --git a/tensorflow/tools/docker/Dockerfile.gpu b/tensorflow/tools/docker/Dockerfile.gpu index b369bc10e7..30de920130 100644 --- a/tensorflow/tools/docker/Dockerfile.gpu +++ b/tensorflow/tools/docker/Dockerfile.gpu @@ -33,7 +33,7 @@ RUN pip --no-cache-dir install \ && \ python -m ipykernel.kernelspec -ENV TENSORFLOW_VERSION 0.11.0rc2 +ENV TENSORFLOW_VERSION 0.11.0 # --- DO NOT EDIT OR DELETE BETWEEN THE LINES --- # # These lines will be edited automatically by parameterized_docker_build.sh. # diff --git a/tensorflow/tools/gcs_test/Dockerfile b/tensorflow/tools/gcs_test/Dockerfile index 3b95e32665..a9db7ce9b0 100644 --- a/tensorflow/tools/gcs_test/Dockerfile +++ b/tensorflow/tools/gcs_test/Dockerfile @@ -17,7 +17,7 @@ RUN ./install_google_cloud_sdk.bash --disable-prompts --install-dir=/var/gcloud # Install nightly TensorFlow pip RUN pip install \ - https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl + https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-cp27-none-linux_x86_64.whl # Copy test files RUN mkdir -p /gcs-smoke/python diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 84c6a06a9a..8a15e0a296 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -26,7 +26,7 @@ from setuptools import find_packages, setup, Command from setuptools.command.install import install as InstallCommandBase from setuptools.dist import Distribution -_VERSION = '0.11.0rc2' +_VERSION = '0.11.0' REQUIRED_PACKAGES = [ 'numpy >= 1.11.0', diff --git a/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc b/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc index c31f3c4e81..c5b3ca38ba 100644 --- a/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc +++ b/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc @@ -154,7 +154,7 @@ class Generator { string GetPackageReferencePrefix(const FileDescriptor* fd) { string result = "::"; const string& package = fd->package(); - for (int i = 0; i < package.size(); ++i) { + for (size_t i = 0; i < package.size(); ++i) { if (package[i] == '.') { result += "::"; } else { @@ -446,6 +446,7 @@ void Generator::AppendParseMessageFunction(const Descriptor& md) { Print("StringPiece identifier;"); Print("if (!scanner->GetResult(nullptr, &identifier)) return false;"); Print("bool parsed_colon = false;"); + Print("(void)parsed_colon;"); // Avoid "set but not used" compiler warning Print("ProtoSpaceAndComments(scanner);"); Print("if (scanner->Peek() == ':') {"); Nest().Print("parsed_colon = true;"); @@ -675,7 +676,7 @@ void Generator::AppendMessageFunctions(const Descriptor& md) { void Generator::AddNamespaceToCurrentSection(const string& package, bool open) { Print(); std::vector<string> parts = {""}; - for (int i = 0; i < package.size(); ++i) { + for (size_t i = 0; i < package.size(); ++i) { if (package[i] == '.') { parts.resize(parts.size() + 1); } else { diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 617101a306..06e16cdb04 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -17,8 +17,8 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): # These lines need to be changed when updating Eigen. They are parsed from # this file by the cmake and make builds to determine the eigen version and # hash. - eigen_version = "3d41a24add9b" - eigen_sha256 = "ce91f8db04493096c0f8bd5ebca7f5a295c88874cda99b3e9d99c9ed10154f99" + eigen_version = "62bdceacdafa" + eigen_sha256 = "c66f4693a0fd1f5c2cf009e01eb49671ce9cbb56874c3d07d3b8928ffc132cec" native.new_http_archive( name = "eigen_archive", |