aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2016-11-17 15:37:00 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-11-17 15:45:18 -0800
commit54e5000e0b980abe905900599c4493fadae34a15 (patch)
treec1f0751e5565882a77646589360fe090bec3f3fc /tensorflow
parent8a5610cd9f0b7087c1a7e97071ba1cf9b885315a (diff)
Merge changes from github.
Change: 139516555
Diffstat (limited to 'tensorflow')
-rw-r--r--tensorflow/c/c_api.cc2
-rw-r--r--tensorflow/contrib/cmake/CMakeLists.txt3
-rw-r--r--tensorflow/contrib/cmake/external/farmhash.cmake6
-rw-r--r--tensorflow/contrib/cmake/external/gemmlowp.cmake6
-rw-r--r--tensorflow/contrib/cmake/external/grpc.cmake2
-rw-r--r--tensorflow/contrib/cmake/external/highwayhash.cmake6
-rw-r--r--tensorflow/contrib/cmake/external/jpeg.cmake8
-rw-r--r--tensorflow/contrib/cmake/external/jsoncpp.cmake2
-rw-r--r--tensorflow/contrib/cmake/external/protobuf.cmake2
-rw-r--r--tensorflow/contrib/cmake/setup.py2
-rw-r--r--tensorflow/contrib/cmake/tf_core_kernels.cmake10
-rw-r--r--tensorflow/contrib/cmake/tf_core_ops.cmake11
-rw-r--r--tensorflow/contrib/cmake/tf_python.cmake237
-rw-r--r--tensorflow/contrib/cmake/tf_tests.cmake4
-rw-r--r--tensorflow/contrib/factorization/kernels/wals_solver_ops.cc2
-rw-r--r--tensorflow/contrib/makefile/proto_text_cc_files.txt1
-rw-r--r--tensorflow/contrib/metrics/kernels/set_kernels.cc2
-rw-r--r--tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py20
-rw-r--r--tensorflow/contrib/slim/python/slim/data/tfexample_decoder_test.py4
-rw-r--r--tensorflow/contrib/tensor_forest/core/ops/tree_utils.cc2
-rw-r--r--tensorflow/core/BUILD5
-rw-r--r--tensorflow/core/common_runtime/sycl/sycl_allocator.cc35
-rw-r--r--tensorflow/core/common_runtime/sycl/sycl_allocator.h45
-rw-r--r--tensorflow/core/common_runtime/sycl/sycl_device.cc65
-rw-r--r--tensorflow/core/common_runtime/sycl/sycl_device.h47
-rw-r--r--tensorflow/core/common_runtime/sycl/sycl_device_context.cc155
-rw-r--r--tensorflow/core/common_runtime/sycl/sycl_device_context.h18
-rw-r--r--tensorflow/core/common_runtime/sycl/sycl_device_factory.cc15
-rw-r--r--tensorflow/core/distributed_runtime/master.cc6
-rw-r--r--tensorflow/core/distributed_runtime/rpc/grpc_channel.cc2
-rw-r--r--tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc2
-rw-r--r--tensorflow/core/distributed_runtime/tensor_coding.cc2
-rw-r--r--tensorflow/core/kernels/avgpooling_op_gpu.cu.cc2
-rw-r--r--tensorflow/core/kernels/bcast_ops.cc10
-rw-r--r--tensorflow/core/kernels/constant_op.cc3
-rw-r--r--tensorflow/core/kernels/control_flow_ops.cc45
-rw-r--r--tensorflow/core/kernels/cwise_op_add_1.cc12
-rw-r--r--tensorflow/core/kernels/cwise_op_div.cc10
-rw-r--r--tensorflow/core/kernels/cwise_op_floor_div.cc10
-rw-r--r--tensorflow/core/kernels/cwise_op_gpu_rint.cu.cc26
-rw-r--r--tensorflow/core/kernels/cwise_op_isfinite.cc10
-rw-r--r--tensorflow/core/kernels/cwise_op_isinf.cc10
-rw-r--r--tensorflow/core/kernels/cwise_op_isnan.cc10
-rw-r--r--tensorflow/core/kernels/cwise_op_mul_1.cc11
-rw-r--r--tensorflow/core/kernels/cwise_op_rint.cc23
-rw-r--r--tensorflow/core/kernels/cwise_op_sub.cc10
-rw-r--r--tensorflow/core/kernels/cwise_ops.h21
-rw-r--r--tensorflow/core/kernels/cwise_ops_sycl_common.h50
-rw-r--r--tensorflow/core/kernels/cwise_ops_test.cc5
-rw-r--r--tensorflow/core/kernels/dense_update_ops.cc15
-rw-r--r--tensorflow/core/kernels/eigen_pooling.h2
-rw-r--r--tensorflow/core/kernels/eigen_spatial_convolutions.h3
-rw-r--r--tensorflow/core/kernels/gather_nd_op_gpu.cu.cc4
-rw-r--r--tensorflow/core/kernels/identity_op.cc1
-rw-r--r--tensorflow/core/kernels/matrix_inverse_op.cc2
-rw-r--r--tensorflow/core/kernels/matrix_solve_op.cc4
-rw-r--r--tensorflow/core/kernels/scatter_nd_op.cc6
-rw-r--r--tensorflow/core/kernels/scatter_nd_op_cpu_impl.h2
-rw-r--r--tensorflow/core/kernels/sendrecv_ops.cc7
-rw-r--r--tensorflow/core/kernels/variable_ops.cc11
-rw-r--r--tensorflow/core/lib/core/threadpool.cc4
-rw-r--r--tensorflow/core/ops/array_ops.cc2
-rw-r--r--tensorflow/core/ops/math_ops.cc19
-rw-r--r--tensorflow/core/ops/ops.pbtxt2
-rw-r--r--tensorflow/core/platform/default/build_config_root.bzl3
-rw-r--r--tensorflow/core/platform/setround.cc35
-rw-r--r--tensorflow/core/platform/setround.h38
-rw-r--r--tensorflow/core/platform/windows/env.cc2
-rw-r--r--tensorflow/core/platform/windows/windows_file_system.cc2
-rw-r--r--tensorflow/core/public/version.h2
-rw-r--r--tensorflow/core/util/example_proto_fast_parsing.cc2
-rw-r--r--tensorflow/core/util/tensor_format.h3
-rw-r--r--tensorflow/core/util/tensor_slice_reader_cache.cc2
-rw-r--r--tensorflow/g3doc/api_docs/python/array_ops.md2
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.linspace.md2
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.nn.sampled_softmax_loss.md4
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.extract_image_patches.md2
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.depthwise_conv2d_native.md2
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.nce_loss.md3
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.nn.local_response_normalization.md4
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.conv2d.md2
-rw-r--r--tensorflow/g3doc/get_started/os_setup.md64
-rw-r--r--tensorflow/g3doc/how_tos/adding_an_op/index.md24
-rw-r--r--tensorflow/g3doc/index.md7
-rw-r--r--tensorflow/models/embedding/word2vec.py2
-rw-r--r--tensorflow/models/embedding/word2vec_optimized.py2
-rw-r--r--tensorflow/models/image/cifar10/cifar10.py8
-rw-r--r--tensorflow/python/client/session.py48
-rw-r--r--tensorflow/python/client/session_test.py27
-rw-r--r--tensorflow/python/kernel_tests/BUILD8
-rw-r--r--tensorflow/python/kernel_tests/basic_gpu_test.py61
-rw-r--r--tensorflow/python/kernel_tests/cwise_ops_test.py29
-rw-r--r--tensorflow/python/kernel_tests/init_ops_test.py74
-rw-r--r--tensorflow/python/ops/array_grad.py4
-rw-r--r--tensorflow/python/ops/image_grad.py8
-rw-r--r--tensorflow/python/ops/init_ops.py53
-rw-r--r--tensorflow/python/ops/math_grad.py6
-rw-r--r--tensorflow/python/ops/math_ops.py1
-rw-r--r--tensorflow/python/ops/state_ops.py1
-rw-r--r--tensorflow/python/platform/tf_logging.py1
-rw-r--r--tensorflow/tensorflow.bzl15
-rw-r--r--tensorflow/tools/ci_build/windows/cpu/cmake/run_py.bat4
-rw-r--r--tensorflow/tools/docker/Dockerfile2
-rw-r--r--tensorflow/tools/docker/Dockerfile.gpu2
-rw-r--r--tensorflow/tools/gcs_test/Dockerfile2
-rw-r--r--tensorflow/tools/pip_package/setup.py2
-rw-r--r--tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc5
-rw-r--r--tensorflow/workspace.bzl4
108 files changed, 1402 insertions, 253 deletions
diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc
index 17d72835a0..a9c426cc34 100644
--- a/tensorflow/c/c_api.cc
+++ b/tensorflow/c/c_api.cc
@@ -1612,7 +1612,7 @@ TF_Operation* TF_GraphNextOperation(TF_Graph* graph, size_t* pos) {
}
mutex_lock l(graph->mu);
- while (*pos < graph->graph.num_node_ids()) {
+ while (*pos < static_cast<size_t>(graph->graph.num_node_ids())) {
Node* node = graph->graph.FindNodeId(*pos);
// FindNodeId() returns nullptr for nodes that have been deleted.
// We aren't currently allowing nodes to be deleted, but it is safer
diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index a935e31f17..b5246cb151 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -4,6 +4,9 @@ cmake_minimum_required(VERSION 3.1)
# Project
project(tensorflow C CXX)
+# Set C++14 as standard for the whole project
+set(CMAKE_CXX_STANDARD 14)
+
# Actual source is the ../../.. directory
get_filename_component(tf_contrib_source_dir ${tensorflow_SOURCE_DIR} PATH)
get_filename_component(tf_tf_source_dir ${tf_contrib_source_dir} PATH)
diff --git a/tensorflow/contrib/cmake/external/farmhash.cmake b/tensorflow/contrib/cmake/external/farmhash.cmake
index b2c13a14fb..f6805a33aa 100644
--- a/tensorflow/contrib/cmake/external/farmhash.cmake
+++ b/tensorflow/contrib/cmake/external/farmhash.cmake
@@ -3,8 +3,8 @@ include (ExternalProject)
set(farmhash_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/farmhash_archive ${CMAKE_CURRENT_BINARY_DIR}/external/farmhash_archive/util)
set(farmhash_URL https://github.com/google/farmhash/archive/34c13ddfab0e35422f4c3979f360635a8c050260.zip)
set(farmhash_HASH SHA256=e3d37a59101f38fd58fb799ed404d630f0eee18bfc2a2433910977cc8fea9c28)
-set(farmhash_BUILD ${CMAKE_BINARY_DIR}/farmhash/src/farmhash)
-set(farmhash_INSTALL ${CMAKE_BINARY_DIR}/farmhash/install)
+set(farmhash_BUILD ${CMAKE_CURRENT_BINARY_DIR}/farmhash/src/farmhash)
+set(farmhash_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/farmhash/install)
set(farmhash_INCLUDES ${farmhash_BUILD})
set(farmhash_HEADERS
"${farmhash_BUILD}/src/farmhash.h"
@@ -19,7 +19,7 @@ if(WIN32)
URL_HASH ${farmhash_HASH}
DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
BUILD_IN_SOURCE 1
- PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/farmhash/CMakeLists.txt ${farmhash_BUILD}
+ PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/farmhash/CMakeLists.txt ${farmhash_BUILD}
INSTALL_DIR ${farmhash_INSTALL}
CMAKE_CACHE_ARGS
-DCMAKE_BUILD_TYPE:STRING=Release
diff --git a/tensorflow/contrib/cmake/external/gemmlowp.cmake b/tensorflow/contrib/cmake/external/gemmlowp.cmake
index 024c064cf4..93a0c8d864 100644
--- a/tensorflow/contrib/cmake/external/gemmlowp.cmake
+++ b/tensorflow/contrib/cmake/external/gemmlowp.cmake
@@ -2,8 +2,8 @@ include (ExternalProject)
set(gemmlowp_URL http://github.com/google/gemmlowp/archive/a6f29d8ac48d63293f845f2253eccbf86bc28321.tar.gz)
set(gemmlowp_HASH SHA256=75d40ea8e68b0d1644f052fffe8f14a410b2a73d40ccb859a95c0578d194ec26)
-set(gemmlowp_BUILD ${CMAKE_BINARY_DIR}/gemmlowp/src/gemmlowp)
-set(gemmlowp_INCLUDE_DIR ${CMAKE_BINARY_DIR}/gemmlowp/src/gemmlowp)
+set(gemmlowp_BUILD ${CMAKE_CURRENT_BINARY_DIR}/gemmlowp/src/gemmlowp)
+set(gemmlowp_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/gemmlowp/src/gemmlowp)
ExternalProject_Add(gemmlowp
PREFIX gemmlowp
@@ -11,5 +11,5 @@ ExternalProject_Add(gemmlowp
URL_HASH ${gemmlowp_HASH}
DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
BUILD_IN_SOURCE 1
- PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/gemmlowp/CMakeLists.txt ${gemmlowp_BUILD}
+ PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/gemmlowp/CMakeLists.txt ${gemmlowp_BUILD}
INSTALL_COMMAND "")
diff --git a/tensorflow/contrib/cmake/external/grpc.cmake b/tensorflow/contrib/cmake/external/grpc.cmake
index 1c34458ec8..c33b0dd81e 100644
--- a/tensorflow/contrib/cmake/external/grpc.cmake
+++ b/tensorflow/contrib/cmake/external/grpc.cmake
@@ -24,7 +24,7 @@ ExternalProject_Add(grpc
GIT_TAG ${GRPC_TAG}
DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
BUILD_IN_SOURCE 1
- PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/grpc/CMakeLists.txt ${GRPC_BUILD}
+ PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/grpc/CMakeLists.txt ${GRPC_BUILD}
INSTALL_COMMAND ""
CMAKE_CACHE_ARGS
-DCMAKE_BUILD_TYPE:STRING=Release
diff --git a/tensorflow/contrib/cmake/external/highwayhash.cmake b/tensorflow/contrib/cmake/external/highwayhash.cmake
index a19ebfe24f..9f80be32cb 100644
--- a/tensorflow/contrib/cmake/external/highwayhash.cmake
+++ b/tensorflow/contrib/cmake/external/highwayhash.cmake
@@ -3,8 +3,8 @@ include (ExternalProject)
set(highwayhash_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/highwayhash)
set(highwayhash_URL https://github.com/google/highwayhash.git)
set(highwayhash_TAG be5edafc2e1a455768e260ccd68ae7317b6690ee)
-set(highwayhash_BUILD ${CMAKE_BINARY_DIR}/highwayhash/src/highwayhash)
-set(highwayhash_INSTALL ${CMAKE_BINARY_DIR}/highwayhash/install)
+set(highwayhash_BUILD ${CMAKE_CURRENT_BINARY_DIR}/highwayhash/src/highwayhash)
+set(highwayhash_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/highwayhash/install)
# put highwayhash includes in the directory where they are expected
add_custom_target(highwayhash_create_destination_dir
@@ -28,7 +28,7 @@ ExternalProject_Add(highwayhash
GIT_TAG ${highwayhash_TAG}
DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
BUILD_IN_SOURCE 1
- PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/highwayhash/CMakeLists.txt ${highwayhash_BUILD}
+ PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/highwayhash/CMakeLists.txt ${highwayhash_BUILD}
INSTALL_DIR ${highwayhash_INSTALL}
CMAKE_CACHE_ARGS
-DCMAKE_BUILD_TYPE:STRING=Release
diff --git a/tensorflow/contrib/cmake/external/jpeg.cmake b/tensorflow/contrib/cmake/external/jpeg.cmake
index a94eb65ddb..cde037949c 100644
--- a/tensorflow/contrib/cmake/external/jpeg.cmake
+++ b/tensorflow/contrib/cmake/external/jpeg.cmake
@@ -3,8 +3,8 @@ include (ExternalProject)
set(jpeg_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/jpeg_archive)
set(jpeg_URL http://www.ijg.org/files/jpegsrc.v9a.tar.gz)
set(jpeg_HASH SHA256=3a753ea48d917945dd54a2d97de388aa06ca2eb1066cbfdc6652036349fe05a7)
-set(jpeg_BUILD ${CMAKE_BINARY_DIR}/jpeg/src/jpeg)
-set(jpeg_INSTALL ${CMAKE_BINARY_DIR}/jpeg/install)
+set(jpeg_BUILD ${CMAKE_CURRENT_BINARY_DIR}/jpeg/src/jpeg)
+set(jpeg_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/jpeg/install)
if(WIN32)
set(jpeg_STATIC_LIBRARIES ${jpeg_INSTALL}/lib/libjpeg.lib)
@@ -32,7 +32,7 @@ if (WIN32)
PREFIX jpeg
URL ${jpeg_URL}
URL_HASH ${jpeg_HASH}
- PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/jpeg/CMakeLists.txt ${jpeg_BUILD}
+ PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/jpeg/CMakeLists.txt ${jpeg_BUILD}
INSTALL_DIR ${jpeg_INSTALL}
DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
CMAKE_CACHE_ARGS
@@ -42,7 +42,7 @@ if (WIN32)
)
ExternalProject_Add_Step(jpeg copy_jconfig
- COMMAND ${CMAKE_COMMAND} -E copy
+ COMMAND ${CMAKE_COMMAND} -E copy
${jpeg_BUILD}/jconfig.vc ${jpeg_BUILD}/jconfig.h
DEPENDEES patch
DEPENDERS build
diff --git a/tensorflow/contrib/cmake/external/jsoncpp.cmake b/tensorflow/contrib/cmake/external/jsoncpp.cmake
index 75d5d72703..43d6e0456c 100644
--- a/tensorflow/contrib/cmake/external/jsoncpp.cmake
+++ b/tensorflow/contrib/cmake/external/jsoncpp.cmake
@@ -4,7 +4,7 @@ set(jsoncpp_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/jsoncpp/src/jsoncpp)
#set(jsoncpp_EXTRA_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/jsoncpp/src)
set(jsoncpp_URL https://github.com/open-source-parsers/jsoncpp.git)
set(jsoncpp_TAG 4356d9b)
-set(jsoncpp_BUILD ${CMAKE_BINARY_DIR}/jsoncpp/src/jsoncpp/src/lib_json)
+set(jsoncpp_BUILD ${CMAKE_CURRENT_BINARY_DIR}/jsoncpp/src/jsoncpp/src/lib_json)
set(jsoncpp_LIBRARIES ${jsoncpp_BUILD}/obj/so/libjsoncpp.so)
set(jsoncpp_INCLUDES ${jsoncpp_BUILD})
diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake
index 2155c30185..5ee6987175 100644
--- a/tensorflow/contrib/cmake/external/protobuf.cmake
+++ b/tensorflow/contrib/cmake/external/protobuf.cmake
@@ -20,7 +20,7 @@ ExternalProject_Add(protobuf
GIT_TAG ${PROTOBUF_TAG}
DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
BUILD_IN_SOURCE 1
- SOURCE_DIR ${CMAKE_BINARY_DIR}/protobuf/src/protobuf
+ SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf
CONFIGURE_COMMAND ${CMAKE_COMMAND} cmake/
-Dprotobuf_BUILD_TESTS=OFF
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
diff --git a/tensorflow/contrib/cmake/setup.py b/tensorflow/contrib/cmake/setup.py
index 78cb546f85..b036016f5e 100644
--- a/tensorflow/contrib/cmake/setup.py
+++ b/tensorflow/contrib/cmake/setup.py
@@ -26,7 +26,7 @@ from setuptools import find_packages, setup, Command
from setuptools.command.install import install as InstallCommandBase
from setuptools.dist import Distribution
-_VERSION = '0.11.0rc2-cmake-experimental'
+_VERSION = '0.11.0-cmake-experimental'
REQUIRED_PACKAGES = [
'numpy >= 1.11.0',
diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake
index d4f44803a3..99aa347f6d 100644
--- a/tensorflow/contrib/cmake/tf_core_kernels.cmake
+++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake
@@ -89,8 +89,6 @@ if(WIN32)
"${tensorflow_source_dir}/tensorflow/core/kernels/meta_support.*"
"${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.h"
"${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.cc"
- "${tensorflow_source_dir}/tensorflow/core/kernels/svd*.cc"
- "${tensorflow_source_dir}/tensorflow/core/kernels/avgpooling_op.*"
)
list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_windows_exclude_srcs})
endif(WIN32)
@@ -100,14 +98,6 @@ file(GLOB_RECURSE tf_core_gpu_kernels_srcs
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/*.cu.cc"
)
-if(WIN32)
- file(GLOB_RECURSE tf_core_gpu_kernels_exclude_srcs
- # not working on windows yet
- "${tensorflow_source_dir}/tensorflow/core/kernels/avgpooling_op_gpu.cu.cc"
- )
- list(REMOVE_ITEM tf_core_gpu_kernels_srcs ${tf_core_gpu_kernels_exclude_srcs})
-endif(WIN32)
-
add_library(tf_core_kernels OBJECT ${tf_core_kernels_srcs})
add_dependencies(tf_core_kernels tf_core_cpu)
diff --git a/tensorflow/contrib/cmake/tf_core_ops.cmake b/tensorflow/contrib/cmake/tf_core_ops.cmake
index a25b37c754..5523023cb7 100644
--- a/tensorflow/contrib/cmake/tf_core_ops.cmake
+++ b/tensorflow/contrib/cmake/tf_core_ops.cmake
@@ -37,6 +37,17 @@ foreach(tf_op_lib_name ${tf_op_lib_names})
add_dependencies(tf_${tf_op_lib_name} tf_core_framework)
endforeach()
+function(GENERATE_CONTRIB_OP_LIBRARY op_lib_name cc_srcs)
+ add_library(tf_contrib_${op_lib_name}_ops OBJECT ${cc_srcs})
+ add_dependencies(tf_contrib_${op_lib_name}_ops tf_core_framework)
+endfunction()
+
+GENERATE_CONTRIB_OP_LIBRARY(cudnn_rnn "${tensorflow_source_dir}/tensorflow/contrib/cudnn_rnn/ops/cudnn_rnn_ops.cc")
+GENERATE_CONTRIB_OP_LIBRARY(factorization_clustering "${tensorflow_source_dir}/tensorflow/contrib/factorization/ops/clustering_ops.cc")
+GENERATE_CONTRIB_OP_LIBRARY(factorization_factorization "${tensorflow_source_dir}/tensorflow/contrib/factorization/ops/factorization_ops.cc")
+GENERATE_CONTRIB_OP_LIBRARY(framework_variable "${tensorflow_source_dir}/tensorflow/contrib/framework/ops/variable_ops.cc")
+
+
########################################################
# tf_user_ops library
########################################################
diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake
index 9a9aed4375..072d01200e 100644
--- a/tensorflow/contrib/cmake/tf_python.cmake
+++ b/tensorflow/contrib/cmake/tf_python.cmake
@@ -48,24 +48,6 @@ endif(NOT NUMPY_INCLUDE_DIR)
# TODO(mrry): Configure this to build in a directory other than tf_python/
-# tf_python_srcs contains all static .py files
-file(GLOB_RECURSE tf_python_srcs RELATIVE ${tensorflow_source_dir}
- "${tensorflow_source_dir}/tensorflow/python/*.py"
-)
-list(APPEND tf_python_srcs "tensorflow/__init__.py")
-
-# tf_python_copy_scripts_to_destination copies all Python files
-# (including static source and generated protobuf wrappers, but *not*
-# generated TensorFlow op wrappers) into tf_python/.
-add_custom_target(tf_python_copy_scripts_to_destination)
-
-# Copy static files to tf_python/.
-foreach(script ${tf_python_srcs})
- get_filename_component(REL_DIR ${script} DIRECTORY)
- add_custom_command(TARGET tf_python_copy_scripts_to_destination PRE_BUILD
- COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/${script} ${CMAKE_CURRENT_BINARY_DIR}/tf_python/${script})
-endforeach()
-
# Generates the Python protobuf wrappers.
# ROOT_DIR must be absolute; subsequent arguments are interpreted as
# paths of .proto files, and must be relative to ROOT_DIR.
@@ -129,6 +111,8 @@ endfunction()
file(GLOB_RECURSE tf_protos_python_srcs RELATIVE ${tensorflow_source_dir}
"${tensorflow_source_dir}/tensorflow/core/*.proto"
"${tensorflow_source_dir}/tensorflow/python/*.proto"
+ "${tensorflow_source_dir}/tensorflow/contrib/session_bundle/*.proto"
+ "${tensorflow_source_dir}/tensorflow/contrib/tensorboard/*.proto"
)
RELATIVE_PROTOBUF_GENERATE_PYTHON(
${tensorflow_source_dir} PYTHON_PROTO_GENFILES ${tf_protos_python_srcs}
@@ -140,18 +124,36 @@ RELATIVE_PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS
add_library(tf_python_protos_cc ${PROTO_SRCS} ${PROTO_HDRS})
+
# tf_python_touchup_modules adds empty __init__.py files to all
# directories containing Python code, so that Python will recognize
# them as modules.
-add_custom_target(tf_python_touchup_modules
- DEPENDS tf_python_copy_scripts_to_destination
-)
+add_custom_target(tf_python_touchup_modules)
+# tf_python_copy_scripts_to_destination copies all Python files
+# (including static source and generated protobuf wrappers, but *not*
+# generated TensorFlow op wrappers) into tf_python/.
+add_custom_target(tf_python_copy_scripts_to_destination DEPENDS tf_python_touchup_modules)
+
+
+# tf_python_srcs contains all static .py files
function(add_python_module MODULE_NAME)
+ set(options DONTCOPY)
+ cmake_parse_arguments(ADD_PYTHON_MODULE "${options}" "" "" ${ARGN})
add_custom_command(TARGET tf_python_touchup_modules PRE_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_CURRENT_BINARY_DIR}/tf_python/${MODULE_NAME}")
add_custom_command(TARGET tf_python_touchup_modules PRE_BUILD
COMMAND ${CMAKE_COMMAND} -E touch "${CMAKE_CURRENT_BINARY_DIR}/tf_python/${MODULE_NAME}/__init__.py")
+ file(GLOB module_python_srcs RELATIVE ${tensorflow_source_dir}
+ "${tensorflow_source_dir}/${MODULE_NAME}/*.py"
+ )
+ if(NOT ${ADD_PYTHON_MODULE_DONTCOPY})
+ foreach(script ${module_python_srcs})
+ get_filename_component(REL_DIR ${script} DIRECTORY)
+ add_custom_command(TARGET tf_python_copy_scripts_to_destination PRE_BUILD
+ COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/${script} ${CMAKE_CURRENT_BINARY_DIR}/tf_python/${script})
+ endforeach()
+ endif()
endfunction()
add_python_module("tensorflow")
@@ -164,33 +166,205 @@ add_python_module("tensorflow/core/protobuf")
add_python_module("tensorflow/core/util")
add_python_module("tensorflow/python")
add_python_module("tensorflow/python/client")
+add_python_module("tensorflow/python/debug")
+add_python_module("tensorflow/python/debug/cli")
+add_python_module("tensorflow/python/debug/examples")
+add_python_module("tensorflow/python/debug/wrappers")
add_python_module("tensorflow/python/framework")
-add_python_module("tensorflow/python/ops")
add_python_module("tensorflow/python/kernel_tests")
add_python_module("tensorflow/python/lib")
add_python_module("tensorflow/python/lib/core")
-add_python_module("tensorflow/python/lib/core/io")
+add_python_module("tensorflow/python/lib/io")
+add_python_module("tensorflow/python/ops")
add_python_module("tensorflow/python/platform")
add_python_module("tensorflow/python/platform/default")
add_python_module("tensorflow/python/platform/summary")
-add_python_module("tensorflow/python/platform/summary/impl")
+add_python_module("tensorflow/python/summary")
+add_python_module("tensorflow/python/summary/impl")
+add_python_module("tensorflow/python/summary/writer")
add_python_module("tensorflow/python/tools")
add_python_module("tensorflow/python/training")
+add_python_module("tensorflow/python/user_ops")
add_python_module("tensorflow/python/util")
add_python_module("tensorflow/python/util/protobuf")
-add_python_module("tensorflow/contrib")
+
+add_python_module("tensorflow/contrib/")
+add_python_module("tensorflow/contrib/android")
+add_python_module("tensorflow/contrib/android/java")
+add_python_module("tensorflow/contrib/android/java/org")
+add_python_module("tensorflow/contrib/android/java/org/tensorflow")
+add_python_module("tensorflow/contrib/android/java/org/tensorflow/contrib")
+add_python_module("tensorflow/contrib/android/java/org/tensorflow/contrib/android")
+add_python_module("tensorflow/contrib/android/jni")
add_python_module("tensorflow/contrib/bayesflow")
+add_python_module("tensorflow/contrib/bayesflow/examples")
+add_python_module("tensorflow/contrib/bayesflow/examples/reinforce_simple")
add_python_module("tensorflow/contrib/bayesflow/python")
+add_python_module("tensorflow/contrib/bayesflow/python/kernel_tests")
add_python_module("tensorflow/contrib/bayesflow/python/ops")
-add_python_module("tensorflow/contrib/bayesflow/python/ops/bernoulli")
+add_python_module("tensorflow/contrib/copy_graph")
+add_python_module("tensorflow/contrib/copy_graph/python")
+add_python_module("tensorflow/contrib/copy_graph/python/util")
+add_python_module("tensorflow/contrib/crf")
+add_python_module("tensorflow/contrib/crf/python")
+add_python_module("tensorflow/contrib/crf/python/kernel_tests")
+add_python_module("tensorflow/contrib/crf/python/ops")
+add_python_module("tensorflow/contrib/cudnn_rnn")
+add_python_module("tensorflow/contrib/cudnn_rnn/kernels")
+add_python_module("tensorflow/contrib/cudnn_rnn/ops")
+add_python_module("tensorflow/contrib/cudnn_rnn/python")
+add_python_module("tensorflow/contrib/cudnn_rnn/python/kernel_tests")
+add_python_module("tensorflow/contrib/cudnn_rnn/python/ops")
+add_python_module("tensorflow/contrib/distributions")
+add_python_module("tensorflow/contrib/distributions/python")
+add_python_module("tensorflow/contrib/distributions/python/kernel_tests")
+add_python_module("tensorflow/contrib/distributions/python/ops")
+add_python_module("tensorflow/contrib/factorization")
+add_python_module("tensorflow/contrib/factorization/examples")
+add_python_module("tensorflow/contrib/factorization/kernels")
+add_python_module("tensorflow/contrib/factorization/ops")
+add_python_module("tensorflow/contrib/factorization/python")
+add_python_module("tensorflow/contrib/factorization/python/kernel_tests")
+add_python_module("tensorflow/contrib/factorization/python/ops")
+add_python_module("tensorflow/contrib/ffmpeg")
+add_python_module("tensorflow/contrib/ffmpeg/default")
+add_python_module("tensorflow/contrib/ffmpeg/testdata")
add_python_module("tensorflow/contrib/framework")
+add_python_module("tensorflow/contrib/framework/kernels")
+add_python_module("tensorflow/contrib/framework/ops")
add_python_module("tensorflow/contrib/framework/python")
add_python_module("tensorflow/contrib/framework/python/framework")
+add_python_module("tensorflow/contrib/framework/python/ops")
+add_python_module("tensorflow/contrib/graph_editor")
+add_python_module("tensorflow/contrib/graph_editor/examples")
+add_python_module("tensorflow/contrib/graph_editor/tests")
+add_python_module("tensorflow/contrib/grid_rnn")
+add_python_module("tensorflow/contrib/grid_rnn/python")
+add_python_module("tensorflow/contrib/grid_rnn/python/kernel_tests")
+add_python_module("tensorflow/contrib/grid_rnn/python/ops")
+add_python_module("tensorflow/contrib/integrate")
+add_python_module("tensorflow/contrib/integrate/python")
+add_python_module("tensorflow/contrib/integrate/python/ops")
+add_python_module("tensorflow/contrib/ios_examples")
+add_python_module("tensorflow/contrib/ios_examples/benchmark")
+add_python_module("tensorflow/contrib/ios_examples/benchmark/benchmark.xcodeproj")
+add_python_module("tensorflow/contrib/ios_examples/benchmark/data")
+add_python_module("tensorflow/contrib/ios_examples/camera")
+add_python_module("tensorflow/contrib/ios_examples/camera/camera_example.xcodeproj")
+add_python_module("tensorflow/contrib/ios_examples/camera/data")
+add_python_module("tensorflow/contrib/ios_examples/camera/en.lproj")
+add_python_module("tensorflow/contrib/ios_examples/simple")
+add_python_module("tensorflow/contrib/ios_examples/simple/data")
+add_python_module("tensorflow/contrib/ios_examples/simple/tf_ios_makefile_example.xcodeproj")
add_python_module("tensorflow/contrib/layers")
+add_python_module("tensorflow/contrib/layers/kernels")
+add_python_module("tensorflow/contrib/layers/ops")
add_python_module("tensorflow/contrib/layers/python")
+add_python_module("tensorflow/contrib/layers/python/kernel_tests")
add_python_module("tensorflow/contrib/layers/python/layers")
add_python_module("tensorflow/contrib/layers/python/ops")
-
+add_python_module("tensorflow/contrib/learn")
+add_python_module("tensorflow/contrib/learn/python")
+add_python_module("tensorflow/contrib/learn/python/learn")
+add_python_module("tensorflow/contrib/learn/python/learn/dataframe")
+add_python_module("tensorflow/contrib/learn/python/learn/dataframe/queues")
+add_python_module("tensorflow/contrib/learn/python/learn/dataframe/transforms")
+add_python_module("tensorflow/contrib/learn/python/learn/datasets")
+add_python_module("tensorflow/contrib/learn/python/learn/datasets/data")
+add_python_module("tensorflow/contrib/learn/python/learn/estimators")
+add_python_module("tensorflow/contrib/learn/python/learn/learn_io")
+add_python_module("tensorflow/contrib/learn/python/learn/ops")
+add_python_module("tensorflow/contrib/learn/python/learn/preprocessing")
+add_python_module("tensorflow/contrib/learn/python/learn/preprocessing/tests")
+add_python_module("tensorflow/contrib/learn/python/learn/tests")
+add_python_module("tensorflow/contrib/learn/python/learn/tests/dataframe")
+add_python_module("tensorflow/contrib/learn/python/learn/utils")
+add_python_module("tensorflow/contrib/linear_optimizer")
+add_python_module("tensorflow/contrib/linear_optimizer/kernels")
+add_python_module("tensorflow/contrib/linear_optimizer/kernels/g3doc")
+add_python_module("tensorflow/contrib/linear_optimizer/python")
+add_python_module("tensorflow/contrib/linear_optimizer/python/kernel_tests")
+add_python_module("tensorflow/contrib/linear_optimizer/python/ops")
+add_python_module("tensorflow/contrib/lookup")
+add_python_module("tensorflow/contrib/losses")
+add_python_module("tensorflow/contrib/losses/python")
+add_python_module("tensorflow/contrib/losses/python/losses")
+add_python_module("tensorflow/contrib/makefile")
+add_python_module("tensorflow/contrib/makefile/test")
+add_python_module("tensorflow/contrib/metrics")
+add_python_module("tensorflow/contrib/metrics/kernels")
+add_python_module("tensorflow/contrib/metrics/ops")
+add_python_module("tensorflow/contrib/metrics/python")
+add_python_module("tensorflow/contrib/metrics/python/kernel_tests")
+add_python_module("tensorflow/contrib/metrics/python/metrics")
+add_python_module("tensorflow/contrib/metrics/python/ops")
+add_python_module("tensorflow/contrib/ndlstm")
+add_python_module("tensorflow/contrib/ndlstm/python")
+add_python_module("tensorflow/contrib/opt")
+add_python_module("tensorflow/contrib/opt/python")
+add_python_module("tensorflow/contrib/opt/python/training")
+add_python_module("tensorflow/contrib/pi_examples")
+add_python_module("tensorflow/contrib/pi_examples/camera")
+add_python_module("tensorflow/contrib/pi_examples/label_image")
+add_python_module("tensorflow/contrib/pi_examples/label_image/data")
+add_python_module("tensorflow/contrib/quantization")
+add_python_module("tensorflow/contrib/quantization/python")
+add_python_module("tensorflow/contrib/rnn")
+add_python_module("tensorflow/contrib/rnn/kernels")
+add_python_module("tensorflow/contrib/rnn/ops")
+add_python_module("tensorflow/contrib/rnn/python")
+add_python_module("tensorflow/contrib/rnn/python/kernel_tests")
+add_python_module("tensorflow/contrib/rnn/python/ops")
+add_python_module("tensorflow/contrib/seq2seq")
+add_python_module("tensorflow/contrib/seq2seq/python")
+add_python_module("tensorflow/contrib/seq2seq/python/kernel_tests")
+add_python_module("tensorflow/contrib/seq2seq/python/ops")
+add_python_module("tensorflow/contrib/session_bundle")
+add_python_module("tensorflow/contrib/session_bundle/example")
+add_python_module("tensorflow/contrib/session_bundle/testdata")
+add_python_module("tensorflow/contrib/session_bundle/testdata/saved_model_half_plus_two")
+add_python_module("tensorflow/contrib/session_bundle/testdata/saved_model_half_plus_two/variables")
+add_python_module("tensorflow/contrib/slim")
+add_python_module("tensorflow/contrib/slim/python")
+add_python_module("tensorflow/contrib/slim/python/slim")
+add_python_module("tensorflow/contrib/slim/python/slim/data")
+add_python_module("tensorflow/contrib/slim/python/slim/nets")
+add_python_module("tensorflow/contrib/specs")
+add_python_module("tensorflow/contrib/specs/python")
+add_python_module("tensorflow/contrib/tensorboard")
+add_python_module("tensorflow/contrib/tensorboard/plugins")
+add_python_module("tensorflow/contrib/tensorboard/plugins/projector")
+add_python_module("tensorflow/contrib/tensor_forest")
+add_python_module("tensorflow/contrib/tensor_forest/client")
+add_python_module("tensorflow/contrib/tensor_forest/core")
+add_python_module("tensorflow/contrib/tensor_forest/core/ops")
+add_python_module("tensorflow/contrib/tensor_forest/data")
+add_python_module("tensorflow/contrib/tensor_forest/hybrid")
+add_python_module("tensorflow/contrib/tensor_forest/hybrid/core")
+add_python_module("tensorflow/contrib/tensor_forest/hybrid/core/ops")
+add_python_module("tensorflow/contrib/tensor_forest/hybrid/python")
+add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/kernel_tests")
+add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/layers")
+add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/models")
+add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/ops")
+add_python_module("tensorflow/contrib/tensor_forest/python")
+add_python_module("tensorflow/contrib/tensor_forest/python/kernel_tests")
+add_python_module("tensorflow/contrib/tensor_forest/python/ops")
+add_python_module("tensorflow/contrib/tensorboard")
+add_python_module("tensorflow/contrib/tensorboard")
+add_python_module("tensorflow/contrib/tensorboard/plugins")
+add_python_module("tensorflow/contrib/tensorboard/plugins/projector")
+add_python_module("tensorflow/contrib/testing")
+add_python_module("tensorflow/contrib/testing/python")
+add_python_module("tensorflow/contrib/testing/python/framework")
+add_python_module("tensorflow/contrib/tfprof" DONTCOPY) # SWIG wrapper not implemented.
+#add_python_module("tensorflow/contrib/tfprof/python")
+#add_python_module("tensorflow/contrib/tfprof/python/tools")
+#add_python_module("tensorflow/contrib/tfprof/python/tools/tfprof")
+add_python_module("tensorflow/contrib/training")
+add_python_module("tensorflow/contrib/training/python")
+add_python_module("tensorflow/contrib/training/python/training")
+add_python_module("tensorflow/contrib/util")
########################################################
@@ -280,6 +454,15 @@ GENERATE_PYTHON_OP_LIB("user_ops")
GENERATE_PYTHON_OP_LIB("training_ops"
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/training/gen_training_ops.py)
+GENERATE_PYTHON_OP_LIB("contrib_cudnn_rnn_ops"
+ DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/cudnn_rnn/ops/gen_cudnn_rnn_ops.py)
+GENERATE_PYTHON_OP_LIB("contrib_factorization_clustering_ops"
+ DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/factorization/python/ops/gen_clustering_ops.py)
+GENERATE_PYTHON_OP_LIB("contrib_factorization_factorization_ops"
+ DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/factorization/python/ops/gen_factorization_ops.py)
+GENERATE_PYTHON_OP_LIB("contrib_framework_variable_ops"
+ DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/framework/python/ops/gen_variable_ops.py)
+
add_custom_target(tf_python_ops SOURCES ${tf_python_ops_generated_files} ${PYTHON_PROTO_GENFILES})
add_dependencies(tf_python_ops tf_python_op_gen_main)
diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake
index 658e4c343c..3a9fd639e6 100644
--- a/tensorflow/contrib/cmake/tf_tests.cmake
+++ b/tensorflow/contrib/cmake/tf_tests.cmake
@@ -149,12 +149,8 @@ if (tensorflow_BUILD_PYTHON_TESTS)
# issues related to windows fs
"${tensorflow_source_dir}/tensorflow/python/kernel_tests/io_ops_test.py"
# missing kernel
- "${tensorflow_source_dir}/tensorflow/python/kernel_tests/pooling_ops_test.py"
"${tensorflow_source_dir}/tensorflow/python/kernel_tests/conv_ops_test.py"
"${tensorflow_source_dir}/tensorflow/python/kernel_tests/depthwise_conv_op_test.py"
- "${tensorflow_source_dir}/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py"
- "${tensorflow_source_dir}/tensorflow/python/kernel_tests/pool_test.py"
- "${tensorflow_source_dir}/tensorflow/python/kernel_tests/svd_op_test.py"
# cuda launch failed
"${tensorflow_source_dir}/tensorflow/python/kernel_tests/diag_op_test.py"
"${tensorflow_source_dir}/tensorflow/python/kernel_tests/trace_op_test.py"
diff --git a/tensorflow/contrib/factorization/kernels/wals_solver_ops.cc b/tensorflow/contrib/factorization/kernels/wals_solver_ops.cc
index 4f35337c0c..92606225d7 100644
--- a/tensorflow/contrib/factorization/kernels/wals_solver_ops.cc
+++ b/tensorflow/contrib/factorization/kernels/wals_solver_ops.cc
@@ -257,7 +257,7 @@ class WALSComputePartialLhsAndRhsOp : public OpKernel {
lhs_mat = lhs_symm;
counter.DecrementCount();
};
- for (int i = 1; i < shards.size(); ++i) {
+ for (size_t i = 1; i < shards.size(); ++i) {
worker_threads.workers->Schedule(std::bind(work, shards[i]));
}
// Inline execute the 1st shard.
diff --git a/tensorflow/contrib/makefile/proto_text_cc_files.txt b/tensorflow/contrib/makefile/proto_text_cc_files.txt
index d4adcec550..ccc2aaa07f 100644
--- a/tensorflow/contrib/makefile/proto_text_cc_files.txt
+++ b/tensorflow/contrib/makefile/proto_text_cc_files.txt
@@ -11,6 +11,7 @@ tensorflow/core/platform/posix/env.cc
tensorflow/core/platform/posix/load_library.cc
tensorflow/core/platform/file_system.cc
tensorflow/core/platform/env.cc
+tensorflow/core/platform/setround.cc
tensorflow/core/platform/denormal.cc
tensorflow/core/platform/default/tracing.cc
tensorflow/core/platform/default/logging.cc
diff --git a/tensorflow/contrib/metrics/kernels/set_kernels.cc b/tensorflow/contrib/metrics/kernels/set_kernels.cc
index 2d2f496da7..61fe250206 100644
--- a/tensorflow/contrib/metrics/kernels/set_kernels.cc
+++ b/tensorflow/contrib/metrics/kernels/set_kernels.cc
@@ -611,7 +611,7 @@ void SetOperationOp<T>::ComputeSparseToSparse(OpKernelContext* ctx) const {
int64 compare_groups;
CompareGroups(ctx, set1_group_indices, set2_group_indices, &compare_groups);
- const std::vector<int64>* group_indices;
+ const std::vector<int64>* group_indices = nullptr;
// Get values from set1, if applicable.
set1_group_set.clear();
diff --git a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py
index b80bd7248b..b952040c30 100644
--- a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py
+++ b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py
@@ -294,10 +294,7 @@ class Image(ItemHandler):
image_buffer = keys_to_tensors[self._image_key]
image_format = keys_to_tensors[self._format_key]
- image = self._decode(image_buffer, image_format)
- if self._shape is not None:
- image = array_ops.reshape(image, self._shape)
- return image
+ return self._decode(image_buffer, image_format)
def _decode(self, image_buffer, image_format):
"""Decodes the image buffer.
@@ -316,12 +313,23 @@ class Image(ItemHandler):
def decode_jpg():
return image_ops.decode_jpeg(image_buffer, self._channels)
- image = control_flow_ops.case({
+ # For RGBA images JPEG is not a valid decoder option.
+ if self._channels > 3:
+ pred_fn_pairs = {
+ math_ops.logical_or(math_ops.equal(image_format, 'raw'),
+ math_ops.equal(image_format, 'RAW')): decode_raw,
+ }
+ default_decoder = decode_png
+ else:
+ pred_fn_pairs = {
math_ops.logical_or(math_ops.equal(image_format, 'png'),
math_ops.equal(image_format, 'PNG')): decode_png,
math_ops.logical_or(math_ops.equal(image_format, 'raw'),
math_ops.equal(image_format, 'RAW')): decode_raw,
- }, default=decode_jpg, exclusive=True)
+ }
+ default_decoder = decode_jpg
+
+ image = control_flow_ops.case(pred_fn_pairs, default=default_decoder, exclusive=True)
image.set_shape([None, None, self._channels])
if self._shape is not None:
diff --git a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder_test.py b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder_test.py
index cd75db8967..f572f9c3e9 100644
--- a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder_test.py
+++ b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder_test.py
@@ -168,7 +168,7 @@ class TFExampleDecoderTest(tf.test.TestCase):
self.assertEqual(tf_decoded_image.get_shape().ndims, 3)
def testDecodeExampleWithPngEncoding(self):
- test_image_channels = [1, 3]
+ test_image_channels = [1, 3, 4]
for channels in test_image_channels:
image_shape = (2, 3, channels)
image, serialized_example = self.GenerateImage(
@@ -183,7 +183,7 @@ class TFExampleDecoderTest(tf.test.TestCase):
self.assertAllClose(image, decoded_image, atol=0)
def testDecodeExampleWithPNGEncoding(self):
- test_image_channels = [1, 3]
+ test_image_channels = [1, 3, 4]
for channels in test_image_channels:
image_shape = (2, 3, channels)
image, serialized_example = self.GenerateImage(
diff --git a/tensorflow/contrib/tensor_forest/core/ops/tree_utils.cc b/tensorflow/contrib/tensor_forest/core/ops/tree_utils.cc
index e93267c1e3..544336b1ba 100644
--- a/tensorflow/contrib/tensor_forest/core/ops/tree_utils.cc
+++ b/tensorflow/contrib/tensor_forest/core/ops/tree_utils.cc
@@ -395,7 +395,7 @@ double getDistanceFromLambda3(double lambda3, const std::vector<float>& mu1,
// x = (lambda_1 1 + 2 mu1) / (2 - 2 lambda_3)
// y = (lambda_2 1 + 2 mu2) / (2 + 2 lambda_3)
double dist = 0.0;
- for (int i = 0; i < mu1.size(); i++) {
+ for (size_t i = 0; i < mu1.size(); i++) {
double diff = (lambda1 + 2.0 * mu1[i]) / (2.0 - 2.0 * lambda3) - mu1[i];
dist += diff * diff;
diff = (lambda2 + 2.0 * mu2[i]) / (2.0 + 2.0 * lambda3) - mu2[i];
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index ec60e853b2..29b05cecba 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1118,6 +1118,7 @@ tf_version_info_genrule()
cc_library(
name = "version_lib",
srcs = ["util/version_info.cc"],
+ hdrs = ["public/version.h"],
copts = tf_copts(),
)
@@ -1129,7 +1130,6 @@ tf_cuda_library(
"example/**/*.cc",
"framework/**/*.h",
"framework/**/*.cc",
- "public/version.h",
"util/**/*.h",
"util/**/*.cc",
],
@@ -1142,6 +1142,7 @@ tf_cuda_library(
"framework/fake_input.*",
"util/memmapped_file_system.*",
"util/memmapped_file_system_writer.*",
+ "util/version_info.cc",
],
) + select({
"//tensorflow:windows": [],
@@ -1394,11 +1395,13 @@ tf_cuda_library(
cc_library(
name = "sycl_runtime",
srcs = if_not_windows([
+ "common_runtime/sycl/sycl_allocator.cc",
"common_runtime/sycl/sycl_device.cc",
"common_runtime/sycl/sycl_device_context.cc",
"common_runtime/sycl/sycl_device_factory.cc",
]),
hdrs = if_not_windows([
+ "common_runtime/sycl/sycl_allocator.h",
"common_runtime/sycl/sycl_device.h",
"common_runtime/sycl/sycl_device_context.h",
]),
diff --git a/tensorflow/core/common_runtime/sycl/sycl_allocator.cc b/tensorflow/core/common_runtime/sycl/sycl_allocator.cc
new file mode 100644
index 0000000000..175b784825
--- /dev/null
+++ b/tensorflow/core/common_runtime/sycl/sycl_allocator.cc
@@ -0,0 +1,35 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifdef TENSORFLOW_USE_SYCL
+
+#include "tensorflow/core/common_runtime/sycl/sycl_allocator.h"
+
+namespace tensorflow {
+
+SYCLAllocator::~SYCLAllocator() { }
+
+string SYCLAllocator::Name() { return "device:SYCL"; }
+
+void *SYCLAllocator::AllocateRaw(size_t alignment, size_t num_bytes) {
+ auto p = device_->allocate(num_bytes);
+ return p;
+}
+
+void SYCLAllocator::DeallocateRaw(void *ptr) { device_->deallocate(ptr); }
+
+} // namespace tensorflow
+
+#endif // TENSORFLOW_USE_SYCL
diff --git a/tensorflow/core/common_runtime/sycl/sycl_allocator.h b/tensorflow/core/common_runtime/sycl/sycl_allocator.h
new file mode 100644
index 0000000000..887c727f6e
--- /dev/null
+++ b/tensorflow/core/common_runtime/sycl/sycl_allocator.h
@@ -0,0 +1,45 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if !TENSORFLOW_USE_SYCL
+#error This file must only be included when building TensorFlow with SYCL support
+#endif
+
+#ifndef TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_ALLOCATOR_H_
+#define TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_ALLOCATOR_H_
+
+#include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/platform/types.h"
+#define EIGEN_USE_SYCL
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
+namespace tensorflow {
+
+class SYCLAllocator : public Allocator {
+public:
+ SYCLAllocator(Eigen::SyclDevice* device) : device_(device) {}
+ virtual ~SYCLAllocator() override;
+ string Name() override;
+ void *AllocateRaw(size_t alignment, size_t num_bytes) override;
+ void DeallocateRaw(void *ptr) override;
+
+private:
+ Eigen::SyclDevice *device_; // not owned
+ TF_DISALLOW_COPY_AND_ASSIGN(SYCLAllocator);
+};
+
+} // namespace tensorflow
+
+#endif // TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_ALLOCATOR_H_
diff --git a/tensorflow/core/common_runtime/sycl/sycl_device.cc b/tensorflow/core/common_runtime/sycl/sycl_device.cc
index dc4e8db7ba..10a037c02d 100644
--- a/tensorflow/core/common_runtime/sycl/sycl_device.cc
+++ b/tensorflow/core/common_runtime/sycl/sycl_device.cc
@@ -23,25 +23,13 @@ limitations under the License.
namespace tensorflow {
-cl::sycl::gpu_selector s;
-cl::sycl::queue q(s);
-
-SYCLDevice::SYCLDevice(const SessionOptions& options, const string& name,
- Bytes memory_limit, const DeviceLocality& locality,
- const string& physical_device_desc, Allocator* allocator)
- : LocalDevice(options,
- Device::BuildDeviceAttributes(name, DEVICE_SYCL, memory_limit,
- locality, physical_device_desc),
- allocator),
- allocator_(allocator),
- device_context_(new SYCLDeviceContext()),
- device_(q) {
- set_eigen_sycl_device(&device_);
+SYCLDevice::~SYCLDevice() {
+ device_context_->Unref();
+ delete sycl_allocator_;
+ delete sycl_device_;
}
-SYCLDevice::~SYCLDevice() { device_context_->Unref(); }
-
-void SYCLDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) {
+void SYCLDevice::Compute(OpKernel *op_kernel, OpKernelContext *context) {
assert(context);
if (port::Tracing::IsActive()) {
// TODO(pbar) We really need a useful identifier of the graph node.
@@ -52,28 +40,45 @@ void SYCLDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) {
op_kernel->Compute(context);
}
-Allocator* SYCLDevice::GetAllocator(AllocatorAttributes attr) {
- return allocator_;
+Allocator *SYCLDevice::GetAllocator(AllocatorAttributes attr) {
+ if (attr.on_host())
+ return cpu_allocator_;
+ else
+ return sycl_allocator_;
}
-Status SYCLDevice::MakeTensorFromProto(const TensorProto& tensor_proto,
+Status SYCLDevice::MakeTensorFromProto(const TensorProto &tensor_proto,
const AllocatorAttributes alloc_attrs,
- Tensor* tensor) {
+ Tensor *tensor) {
+ AllocatorAttributes attr;
+ attr.set_on_host(true);
+ attr.set_gpu_compatible(true);
+ Allocator *host_alloc = GetAllocator(attr);
Tensor parsed(tensor_proto.dtype());
- if (!parsed.FromProto(cpu_allocator(), tensor_proto)) {
+ if (!parsed.FromProto(host_alloc, tensor_proto)) {
return errors::InvalidArgument("Cannot parse tensor from proto: ",
- ProtoDebugString(tensor_proto));
+ tensor_proto.DebugString());
}
- *tensor = std::move(parsed);
- return Status::OK();
+ Status status;
+ if (alloc_attrs.on_host()) {
+ *tensor = parsed;
+ } else {
+ Tensor copy(GetAllocator(alloc_attrs), parsed.dtype(), parsed.shape());
+ device_context_->CopyCPUTensorToDevice(&parsed, this, &copy,
+ [&status](const Status &s) {
+ status = s;
+ });
+ *tensor = copy;
+ }
+ return status;
}
-Status SYCLDevice::FillContextMap(const Graph* graph,
- DeviceContextMap* device_context_map) {
+Status SYCLDevice::FillContextMap(const Graph *graph,
+ DeviceContextMap *device_context_map) {
// Fill in the context map. It is OK for this map to contain
// duplicate DeviceContexts so long as we increment the refcount.
device_context_map->resize(graph->num_node_ids());
- for (Node* n : graph->nodes()) {
+ for (Node *n : graph->nodes()) {
device_context_->Ref();
(*device_context_map)[n->id()] = device_context_;
}
@@ -81,6 +86,6 @@ Status SYCLDevice::FillContextMap(const Graph* graph,
return Status::OK();
}
-} // namespace tensorflow
+} // namespace tensorflow
-#endif // TENSORFLOW_USE_SYCL
+#endif // TENSORFLOW_USE_SYCL
diff --git a/tensorflow/core/common_runtime/sycl/sycl_device.h b/tensorflow/core/common_runtime/sycl/sycl_device.h
index eaa9429b16..d3b3db2a71 100644
--- a/tensorflow/core/common_runtime/sycl/sycl_device.h
+++ b/tensorflow/core/common_runtime/sycl/sycl_device.h
@@ -24,26 +24,40 @@ limitations under the License.
#include "tensorflow/core/common_runtime/device_factory.h"
#include "tensorflow/core/common_runtime/local_device.h"
+#include "tensorflow/core/common_runtime/sycl/sycl_allocator.h"
#include "tensorflow/core/common_runtime/sycl/sycl_device_context.h"
#include "tensorflow/core/public/session_options.h"
namespace tensorflow {
+
class SYCLDevice : public LocalDevice {
- public:
- SYCLDevice(const SessionOptions& options, const string& name,
- Bytes memory_limit, const DeviceLocality& locality,
- const string& physical_device_desc, Allocator* allocator);
+public:
+ template <typename SYCLSelector>
+ SYCLDevice(const SessionOptions &options, const string &name,
+ Bytes memory_limit, const DeviceLocality &locality,
+ const string &physical_device_desc, SYCLSelector sycl_selector,
+ Allocator *cpu_allocator)
+ : LocalDevice(options, Device::BuildDeviceAttributes(
+ name, DEVICE_SYCL, memory_limit, locality,
+ physical_device_desc), nullptr),
+ cpu_allocator_(cpu_allocator),
+ sycl_device_(new Eigen::SyclDevice(sycl_selector)),
+ sycl_allocator_(new SYCLAllocator(sycl_device_)),
+ device_context_(new SYCLDeviceContext()) {
+ set_eigen_sycl_device(sycl_device_);
+ }
+
~SYCLDevice() override;
- void Compute(OpKernel* op_kernel, OpKernelContext* context) override;
- Allocator* GetAllocator(AllocatorAttributes attr) override;
- Status MakeTensorFromProto(const TensorProto& tensor_proto,
+ void Compute(OpKernel *op_kernel, OpKernelContext *context) override;
+ Allocator *GetAllocator(AllocatorAttributes attr) override;
+ Status MakeTensorFromProto(const TensorProto &tensor_proto,
const AllocatorAttributes alloc_attrs,
- Tensor* tensor) override;
+ Tensor *tensor) override;
- Status FillContextMap(const Graph* graph,
- DeviceContextMap* device_context_map) override;
+ Status FillContextMap(const Graph *graph,
+ DeviceContextMap *device_context_map) override;
Status Sync() override { return Status::OK(); }
static string GetShortDeviceDescription(/*int device_id,
@@ -51,12 +65,13 @@ class SYCLDevice : public LocalDevice {
return strings::StrCat("device: 0, name SYCL, pci bus id: 0");
}
- private:
- Allocator* allocator_; // Not owned
- SYCLDeviceContext* device_context_;
- Eigen::SyclDevice device_;
+private:
+ Allocator *cpu_allocator_; // owned
+ Eigen::SyclDevice* sycl_device_; // owned
+ SYCLAllocator *sycl_allocator_; // owned
+ SYCLDeviceContext *device_context_;
};
-} // namespace tensorflow
+} // namespace tensorflow
-#endif // TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_
+#endif // TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_
diff --git a/tensorflow/core/common_runtime/sycl/sycl_device_context.cc b/tensorflow/core/common_runtime/sycl/sycl_device_context.cc
index 70b1a0e93b..9dd289bebd 100644
--- a/tensorflow/core/common_runtime/sycl/sycl_device_context.cc
+++ b/tensorflow/core/common_runtime/sycl/sycl_device_context.cc
@@ -13,36 +13,171 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
+#if TENSORFLOW_USE_SYCL
+
+#define EIGEN_USE_SYCL
+
#include "tensorflow/core/common_runtime/sycl/sycl_device_context.h"
#include "tensorflow/core/common_runtime/dma_helper.h"
+#define EIGEN_USE_SYCL
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
namespace tensorflow {
-void SYCLDeviceContext::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
- Device* device,
- Tensor* device_tensor,
+void SYCLDeviceContext::CopyCPUTensorToDevice(const Tensor *cpu_tensor,
+ Device *device,
+ Tensor *device_tensor,
StatusCallback done) const {
const int64 total_bytes = cpu_tensor->TotalBytes();
if (total_bytes > 0) {
- const void* src_ptr = DMAHelper::base(cpu_tensor);
- void* dst_ptr = DMAHelper::base(device_tensor);
- ::memcpy(dst_ptr, src_ptr, total_bytes);
+ const void *src_ptr = DMAHelper::base(cpu_tensor);
+ void *dst_ptr = DMAHelper::base(device_tensor);
+ switch (cpu_tensor->dtype()) {
+ case DT_FLOAT:
+ device->eigen_sycl_device()->memcpyHostToDevice(
+ static_cast<float *>(dst_ptr), static_cast<const float *>(src_ptr),
+ total_bytes);
+ break;
+ case DT_DOUBLE:
+ device->eigen_sycl_device()->memcpyHostToDevice(
+ static_cast<double *>(dst_ptr), static_cast<const double *>(src_ptr),
+ total_bytes);
+ break;
+ case DT_INT32:
+ device->eigen_sycl_device()->memcpyHostToDevice(
+ static_cast<int32 *>(dst_ptr), static_cast<const int32 *>(src_ptr),
+ total_bytes);
+ break;
+ case DT_INT64:
+ device->eigen_sycl_device()->memcpyHostToDevice(
+ static_cast<int64 *>(dst_ptr), static_cast<const int64 *>(src_ptr),
+ total_bytes);
+ break;
+ case DT_HALF:
+ device->eigen_sycl_device()->memcpyHostToDevice(
+ static_cast<Eigen::half *>(dst_ptr),
+ static_cast<const Eigen::half *>(src_ptr), total_bytes);
+ break;
+ case DT_COMPLEX64:
+ device->eigen_sycl_device()->memcpyHostToDevice(
+ static_cast<std::complex<float> *>(dst_ptr),
+ static_cast<const std::complex<float> *>(src_ptr), total_bytes);
+ break;
+ case DT_COMPLEX128:
+ device->eigen_sycl_device()->memcpyHostToDevice(
+ static_cast<std::complex<double> *>(dst_ptr),
+ static_cast<const std::complex<double> *>(src_ptr), total_bytes);
+ break;
+ case DT_INT8:
+ device->eigen_sycl_device()->memcpyHostToDevice(
+ static_cast<int8 *>(dst_ptr), static_cast<const int8 *>(src_ptr),
+ total_bytes);
+ break;
+ case DT_INT16:
+ device->eigen_sycl_device()->memcpyHostToDevice(
+ static_cast<int16 *>(dst_ptr), static_cast<const int16 *>(src_ptr),
+ total_bytes);
+ break;
+ case DT_UINT8:
+ device->eigen_sycl_device()->memcpyHostToDevice(
+ static_cast<uint8 *>(dst_ptr), static_cast<const uint8 *>(src_ptr),
+ total_bytes);
+ break;
+ case DT_UINT16:
+ device->eigen_sycl_device()->memcpyHostToDevice(
+ static_cast<uint16 *>(dst_ptr), static_cast<const uint16 *>(src_ptr),
+ total_bytes);
+ break;
+ case DT_BOOL:
+ device->eigen_sycl_device()->memcpyHostToDevice(
+ static_cast<bool *>(dst_ptr), static_cast<const bool *>(src_ptr),
+ total_bytes);
+ break;
+ default:
+ assert(false && "unsupported type");
+ }
}
done(Status::OK());
}
-void SYCLDeviceContext::CopyDeviceTensorToCPU(const Tensor* device_tensor,
+void SYCLDeviceContext::CopyDeviceTensorToCPU(const Tensor *device_tensor,
StringPiece edge_name,
- Device* device,
- Tensor* cpu_tensor,
+ Device *device,
+ Tensor *cpu_tensor,
StatusCallback done) {
const int64 total_bytes = device_tensor->TotalBytes();
if (total_bytes > 0) {
+ device->eigen_sycl_device()->deallocate_all();
const void* src_ptr = DMAHelper::base(device_tensor);
void* dst_ptr = DMAHelper::base(cpu_tensor);
- ::memcpy(dst_ptr, src_ptr, total_bytes);
+ switch (device_tensor->dtype()) {
+ case DT_FLOAT:
+ device->eigen_sycl_device()->memcpyDeviceToHost(
+ static_cast<float *>(dst_ptr), static_cast<const float *>(src_ptr),
+ total_bytes);
+ break;
+ case DT_DOUBLE:
+ device->eigen_sycl_device()->memcpyDeviceToHost(
+ static_cast<double *>(dst_ptr), static_cast<const double *>(src_ptr),
+ total_bytes);
+ break;
+ case DT_INT32:
+ device->eigen_sycl_device()->memcpyDeviceToHost(
+ static_cast<int32 *>(dst_ptr), static_cast<const int32 *>(src_ptr),
+ total_bytes);
+ break;
+ case DT_INT64:
+ device->eigen_sycl_device()->memcpyDeviceToHost(
+ static_cast<int64 *>(dst_ptr), static_cast<const int64 *>(src_ptr),
+ total_bytes);
+ break;
+ case DT_HALF:
+ device->eigen_sycl_device()->memcpyDeviceToHost(
+ static_cast<Eigen::half *>(dst_ptr),
+ static_cast<const Eigen::half *>(src_ptr), total_bytes);
+ break;
+ case DT_COMPLEX64:
+ device->eigen_sycl_device()->memcpyDeviceToHost(
+ static_cast<std::complex<float> *>(dst_ptr),
+ static_cast<const std::complex<float> *>(src_ptr), total_bytes);
+ break;
+ case DT_COMPLEX128:
+ device->eigen_sycl_device()->memcpyDeviceToHost(
+ static_cast<std::complex<double> *>(dst_ptr),
+ static_cast<const std::complex<double> *>(src_ptr), total_bytes);
+ break;
+ case DT_INT8:
+ device->eigen_sycl_device()->memcpyDeviceToHost(
+ static_cast<int8 *>(dst_ptr), static_cast<const int8 *>(src_ptr),
+ total_bytes);
+ break;
+ case DT_INT16:
+ device->eigen_sycl_device()->memcpyDeviceToHost(
+ static_cast<int16 *>(dst_ptr), static_cast<const int16 *>(src_ptr),
+ total_bytes);
+ break;
+ case DT_UINT8:
+ device->eigen_sycl_device()->memcpyDeviceToHost(
+ static_cast<uint8 *>(dst_ptr), static_cast<const uint8 *>(src_ptr),
+ total_bytes);
+ break;
+ case DT_UINT16:
+ device->eigen_sycl_device()->memcpyDeviceToHost(
+ static_cast<uint16 *>(dst_ptr), static_cast<const uint16 *>(src_ptr),
+ total_bytes);
+ break;
+ case DT_BOOL:
+ device->eigen_sycl_device()->memcpyDeviceToHost(
+ static_cast<bool *>(dst_ptr), static_cast<const bool *>(src_ptr),
+ total_bytes);
+ break;
+ default:
+ assert(false && "unsupported type");
+ }
}
done(Status::OK());
}
} // namespace tensorflow
+#endif // TENSORFLOW_USE_SYCL
diff --git a/tensorflow/core/common_runtime/sycl/sycl_device_context.h b/tensorflow/core/common_runtime/sycl/sycl_device_context.h
index 67cfe47aa1..1f7ad543d9 100644
--- a/tensorflow/core/common_runtime/sycl/sycl_device_context.h
+++ b/tensorflow/core/common_runtime/sycl/sycl_device_context.h
@@ -13,6 +13,10 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
+#if !TENSORFLOW_USE_SYCL
+#error This file must only be included when building TensorFlow with SYCL support
+#endif
+
#ifndef TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_
#define TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_
@@ -22,20 +26,20 @@ limitations under the License.
namespace tensorflow {
class SYCLDeviceContext : public DeviceContext {
- public:
+public:
SYCLDeviceContext() {}
~SYCLDeviceContext() override {}
- void CopyCPUTensorToDevice(const Tensor* cpu_tensor, Device* device,
- Tensor* device_tensor,
+ void CopyCPUTensorToDevice(const Tensor *cpu_tensor, Device *device,
+ Tensor *device_tensor,
StatusCallback done) const override;
- void CopyDeviceTensorToCPU(const Tensor* device_tensor, StringPiece edge_name,
- Device* device, Tensor* cpu_tensor,
+ void CopyDeviceTensorToCPU(const Tensor *device_tensor, StringPiece edge_name,
+ Device *device, Tensor *cpu_tensor,
StatusCallback done) override;
};
-} // namespace tensorflow
+} // namespace tensorflow
-#endif // TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_
+#endif // TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_
diff --git a/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc b/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc
index 97c4c2c236..9b8770420c 100644
--- a/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc
+++ b/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc
@@ -20,9 +20,9 @@ limitations under the License.
namespace tensorflow {
class SYCLDeviceFactory : public DeviceFactory {
- public:
- Status CreateDevices(const SessionOptions& options, const string& name_prefix,
- std::vector<Device*>* devices) override {
+public:
+ Status CreateDevices(const SessionOptions &options, const string &name_prefix,
+ std::vector<Device *> *devices) override {
int n = 1;
auto iter = options.config.device_count().find("SYCL");
if (iter != options.config.device_count().end()) {
@@ -30,9 +30,10 @@ class SYCLDeviceFactory : public DeviceFactory {
}
for (int i = 0; i < n; i++) {
string name = strings::StrCat(name_prefix, "/device:SYCL:", i);
- devices->push_back(new SYCLDevice(
- options, name, Bytes(256 << 20), DeviceLocality(),
- SYCLDevice::GetShortDeviceDescription(), cpu_allocator()));
+ devices->push_back(new SYCLDevice(options, name, Bytes(256 << 20),
+ DeviceLocality(),
+ SYCLDevice::GetShortDeviceDescription(),
+ cl::sycl::gpu_selector(), cpu_allocator()));
}
return Status::OK();
}
@@ -41,4 +42,4 @@ class SYCLDeviceFactory : public DeviceFactory {
REGISTER_LOCAL_DEVICE_FACTORY("SYCL", SYCLDeviceFactory);
}
-#endif // TENSORFLOW_USE_SYCL
+#endif // TENSORFLOW_USE_SYCL
diff --git a/tensorflow/core/distributed_runtime/master.cc b/tensorflow/core/distributed_runtime/master.cc
index 741282be31..de468f00b7 100644
--- a/tensorflow/core/distributed_runtime/master.cc
+++ b/tensorflow/core/distributed_runtime/master.cc
@@ -91,8 +91,8 @@ void Master::GC() {
std::vector<string> handles;
const int64 num_micros = static_cast<int64>(session_gc_seconds_ * 1000000);
for (const auto& entry : sessions_) {
- auto lat = entry.second->last_access_time_usec();
- if (env->NowMicros() - lat > num_micros) {
+ int64 lat = entry.second->last_access_time_usec();
+ if (static_cast<int64>(env->NowMicros()) - lat > num_micros) {
handles.push_back(entry.first);
auto* sess = entry.second;
SchedClosure([this, sess]() {
@@ -399,7 +399,7 @@ void Master::CleanupWorkers(const ResetRequest& reset) {
}
++c;
}
- for (int i = 0; i < n.size(); ++i) {
+ for (size_t i = 0; i < n.size(); ++i) {
n[i].WaitForNotification();
}
}
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc b/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc
index eb188a7984..31a3404a07 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc
@@ -69,7 +69,7 @@ Status ValidateHostPortPair(const string& host_port) {
Status GrpcChannelSpec::AddHostPortsJob(const string& job_id,
const std::vector<string>& host_ports) {
std::map<int, string> host_ports_map;
- for (int i = 0; i < host_ports.size(); ++i) {
+ for (size_t i = 0; i < host_ports.size(); ++i) {
host_ports_map[i] = host_ports[i];
}
return AddHostPortsJob(job_id, host_ports_map);
diff --git a/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc b/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc
index 3b0614cc75..c4bb37fcbe 100644
--- a/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc
+++ b/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc
@@ -156,7 +156,7 @@ class RpcRecvTensorFreeList {
public:
RpcRecvTensorFreeList() {}
~RpcRecvTensorFreeList() {
- for (int i = 0; i < objects_.size(); i++) {
+ for (size_t i = 0; i < objects_.size(); i++) {
delete objects_[i];
}
}
diff --git a/tensorflow/core/distributed_runtime/tensor_coding.cc b/tensorflow/core/distributed_runtime/tensor_coding.cc
index b26970b606..55b7d5fe82 100644
--- a/tensorflow/core/distributed_runtime/tensor_coding.cc
+++ b/tensorflow/core/distributed_runtime/tensor_coding.cc
@@ -192,7 +192,7 @@ bool TensorResponse::ParseTensorSubmessage(
TensorShape shape(tensor_meta->tensor_shape());
Tensor t(allocator_, tensor_meta->dtype(), shape);
StringPiece buf = t.tensor_data();
- if (num_bytes != buf.size()) return false;
+ if (static_cast<size_t>(num_bytes) != buf.size()) return false;
// TODO(jeff,sanjay): Figure out a way to avoid this copy if
// the underlying ZeroCopyInputStream data is properly aligned
// and compatible with what allocator_ wants.
diff --git a/tensorflow/core/kernels/avgpooling_op_gpu.cu.cc b/tensorflow/core/kernels/avgpooling_op_gpu.cu.cc
index a190b2168a..2be330d142 100644
--- a/tensorflow/core/kernels/avgpooling_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/avgpooling_op_gpu.cu.cc
@@ -72,7 +72,7 @@ __global__ void AvePoolBackwardNHWC(const int nthreads,
wstart = max(wstart, 0);
int pool_size = (hend - hstart) * (wend - wstart);
gradient +=
- top_diff_slice[(ph * pooled_width + pw) * channels] / pool_size;
+ top_diff_slice[(ph * pooled_width + pw) * channels] / dtype(pool_size);
}
}
bottom_diff[index] = gradient;
diff --git a/tensorflow/core/kernels/bcast_ops.cc b/tensorflow/core/kernels/bcast_ops.cc
index 10354cbb56..db8842a547 100644
--- a/tensorflow/core/kernels/bcast_ops.cc
+++ b/tensorflow/core/kernels/bcast_ops.cc
@@ -90,4 +90,14 @@ REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
.HostMemory("r1"),
BCastGradArgsOp);
+#if TENSORFLOW_USE_SYCL
+REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
+ .Device(DEVICE_SYCL)
+ .TypeConstraint<int32>("T")
+ .HostMemory("s0")
+ .HostMemory("s1")
+ .HostMemory("r0")
+ .HostMemory("r1"),
+ BCastGradArgsOp);
+#endif
} // end namespace tensorflow
diff --git a/tensorflow/core/kernels/constant_op.cc b/tensorflow/core/kernels/constant_op.cc
index c2d5af0316..4c8c9939bc 100644
--- a/tensorflow/core/kernels/constant_op.cc
+++ b/tensorflow/core/kernels/constant_op.cc
@@ -16,6 +16,9 @@ limitations under the License.
// See docs in ../ops/array_ops.cc.
#define EIGEN_USE_THREADS
+#if TENSORFLOW_USE_SYCL
+#define EIGEN_USE_SYCL
+#endif
#include "tensorflow/core/kernels/constant_op.h"
diff --git a/tensorflow/core/kernels/control_flow_ops.cc b/tensorflow/core/kernels/control_flow_ops.cc
index 65413a09b2..1a8c17b1ef 100644
--- a/tensorflow/core/kernels/control_flow_ops.cc
+++ b/tensorflow/core/kernels/control_flow_ops.cc
@@ -112,6 +112,15 @@ REGISTER_GPU_HOST_REF_KERNEL(string);
#undef REGISTER_GPU_HOST_KERNEL
#undef REGISTER_GPU_HOST_REF_KERNEL
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(type) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Switch").Device(DEVICE_SYCL).TypeConstraint<type>("T"), SwitchOp)
+REGISTER_SYCL_KERNEL(bool);
+TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif
+
class RefSelectOp : public OpKernel {
public:
explicit RefSelectOp(OpKernelConstruction* context) : OpKernel(context) {
@@ -209,6 +218,15 @@ REGISTER_GPU_REF_KERNEL(bool);
#undef REGISTER_GPU_KERNEL
#undef REGISTER_GPU_REF_KERNEL
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(type) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Merge").Device(DEVICE_SYCL).TypeConstraint<type>("T"), MergeOp)
+REGISTER_SYCL_KERNEL(bool);
+TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif
+
// Special GPU kernels for int32 and string.
// TODO(b/25387198): Also enable int32 in device memory. This kernel
// registration requires all int32 inputs and outputs to be in host memory.
@@ -259,6 +277,15 @@ REGISTER_GPU_REF_KERNEL(bool);
#undef REGISTER_GPU_KERNEL
#undef REGISTER_GPU_REF_KERNEL
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(type) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Enter").Device(DEVICE_SYCL).TypeConstraint<type>("T"), EnterOp)
+REGISTER_SYCL_KERNEL(bool);
+TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif
+
// Special GPU kernels for int32 and string.
// TODO(b/25387198): Also enable int32 in device memory. This kernel
// registration requires all int32 inputs and outputs to be in host memory.
@@ -310,6 +337,15 @@ REGISTER_GPU_KERNEL(bool);
#undef REGISTER_GPU_KERNEL
#undef REGISTER_GPU_REF_KERNEL
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(type) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Exit").Device(DEVICE_SYCL).TypeConstraint<type>("T"), ExitOp)
+REGISTER_SYCL_KERNEL(bool);
+TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif
+
// Special GPU kernels for int32 and string.
// TODO(b/25387198): Also enable int32 in device memory. This kernel
// registration requires all int32 inputs and outputs to be in host memory.
@@ -380,6 +416,15 @@ REGISTER_GPU_HOST_KERNEL(string);
#undef REGISTER_GPU_HOST_KERNEL
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(type) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("NextIteration").Device(DEVICE_SYCL).TypeConstraint<type>("T"), NextIterationOp)
+ REGISTER_SYCL_KERNEL(bool);
+ TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif
+
// A LoopCond op has one input and one output. The input is a boolean
// scalar representing the taken branches of the "pivot" Switch that
// determines loop termination. As a contract, any high-level front-end
diff --git a/tensorflow/core/kernels/cwise_op_add_1.cc b/tensorflow/core/kernels/cwise_op_add_1.cc
index 75c6118795..44c552d18e 100644
--- a/tensorflow/core/kernels/cwise_op_add_1.cc
+++ b/tensorflow/core/kernels/cwise_op_add_1.cc
@@ -18,6 +18,18 @@ limitations under the License.
namespace tensorflow {
REGISTER5(BinaryOp, CPU, "Add", functor::add, float, Eigen::half, double, int32,
int64);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Add") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("T"), \
+ BinaryOp<SYCLDevice, functor::add<TYPE>>);
+TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
+
#if GOOGLE_CUDA
REGISTER3(BinaryOp, GPU, "Add", functor::add, float, Eigen::half, double);
diff --git a/tensorflow/core/kernels/cwise_op_div.cc b/tensorflow/core/kernels/cwise_op_div.cc
index 925c9e9916..c2b05a69b2 100644
--- a/tensorflow/core/kernels/cwise_op_div.cc
+++ b/tensorflow/core/kernels/cwise_op_div.cc
@@ -24,6 +24,16 @@ REGISTER5(BinaryOp, CPU, "TruncateDiv", functor::safe_div, uint8, uint16, int16,
int32, int64);
REGISTER5(BinaryOp, CPU, "RealDiv", functor::div, float, Eigen::half, double,
complex64, complex128);
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Div") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("T"), \
+ BinaryOp<SYCLDevice, functor::div<TYPE>>);
+REGISTER_SYCL_KERNEL(float)
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
#if GOOGLE_CUDA
REGISTER9(BinaryOp, GPU, "Div", functor::div, float, Eigen::half, double, uint8,
uint16, int16, int64, complex64, complex128);
diff --git a/tensorflow/core/kernels/cwise_op_floor_div.cc b/tensorflow/core/kernels/cwise_op_floor_div.cc
index 83b2771ed2..7930d83413 100644
--- a/tensorflow/core/kernels/cwise_op_floor_div.cc
+++ b/tensorflow/core/kernels/cwise_op_floor_div.cc
@@ -18,6 +18,16 @@ limitations under the License.
namespace tensorflow {
REGISTER5(BinaryOp, CPU, "FloorDiv", functor::safe_floor_div, uint8, uint16,
int16, int32, int64);
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("FloorDiv") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("T"), \
+ BinaryOp<SYCLDevice, functor::floor_div<TYPE>>);
+TF_CALL_INTEGRAL_TYPES(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
#if GOOGLE_CUDA
REGISTER4(BinaryOp, GPU, "FloorDiv", functor::floor_div, uint8, uint16, int16,
int64);
diff --git a/tensorflow/core/kernels/cwise_op_gpu_rint.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_rint.cu.cc
new file mode 100644
index 0000000000..028b944d27
--- /dev/null
+++ b/tensorflow/core/kernels/cwise_op_gpu_rint.cu.cc
@@ -0,0 +1,26 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA
+
+#include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
+
+namespace tensorflow {
+namespace functor {
+DEFINE_UNARY2(rint, float, double);
+} // namespace functor
+} // namespace tensorflow
+
+#endif // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/cwise_op_isfinite.cc b/tensorflow/core/kernels/cwise_op_isfinite.cc
index 954b5d25bd..e38b271318 100644
--- a/tensorflow/core/kernels/cwise_op_isfinite.cc
+++ b/tensorflow/core/kernels/cwise_op_isfinite.cc
@@ -18,6 +18,16 @@ limitations under the License.
namespace tensorflow {
REGISTER3(UnaryOp, CPU, "IsFinite", functor::isfinite, float, Eigen::half,
double);
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("IsFinite") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("T"), \
+ UnaryOp<SYCLDevice, functor::isfinite<TYPE>>);
+TF_CALL_REAL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
#if GOOGLE_CUDA
REGISTER3(UnaryOp, GPU, "IsFinite", functor::isfinite, float, Eigen::half,
double);
diff --git a/tensorflow/core/kernels/cwise_op_isinf.cc b/tensorflow/core/kernels/cwise_op_isinf.cc
index 407dadcb69..bf056dbe0e 100644
--- a/tensorflow/core/kernels/cwise_op_isinf.cc
+++ b/tensorflow/core/kernels/cwise_op_isinf.cc
@@ -17,6 +17,16 @@ limitations under the License.
namespace tensorflow {
REGISTER3(UnaryOp, CPU, "IsInf", functor::isinf, float, Eigen::half, double);
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("IsInf") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("T"), \
+ UnaryOp<SYCLDevice, functor::isinf<TYPE>>);
+TF_CALL_REAL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
#if GOOGLE_CUDA
REGISTER3(UnaryOp, GPU, "IsInf", functor::isinf, float, Eigen::half, double);
#endif
diff --git a/tensorflow/core/kernels/cwise_op_isnan.cc b/tensorflow/core/kernels/cwise_op_isnan.cc
index f150b2f3f4..d2bac23882 100644
--- a/tensorflow/core/kernels/cwise_op_isnan.cc
+++ b/tensorflow/core/kernels/cwise_op_isnan.cc
@@ -17,6 +17,16 @@ limitations under the License.
namespace tensorflow {
REGISTER3(UnaryOp, CPU, "IsNan", functor::isnan, float, Eigen::half, double);
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("IsNan") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("T"), \
+ UnaryOp<SYCLDevice, functor::isnan<TYPE>>);
+TF_CALL_REAL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
#if GOOGLE_CUDA
REGISTER3(UnaryOp, GPU, "IsNan", functor::isnan, float, Eigen::half, double);
#endif
diff --git a/tensorflow/core/kernels/cwise_op_mul_1.cc b/tensorflow/core/kernels/cwise_op_mul_1.cc
index 09e9f070da..e23fe6761d 100644
--- a/tensorflow/core/kernels/cwise_op_mul_1.cc
+++ b/tensorflow/core/kernels/cwise_op_mul_1.cc
@@ -19,6 +19,17 @@ namespace tensorflow {
REGISTER5(BinaryOp, CPU, "Mul", functor::mul, float, Eigen::half, double,
uint8, int32);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Mul") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("T"), \
+ BinaryOp<SYCLDevice, functor::mul<TYPE>>);
+REGISTER_SYCL_KERNEL(float)
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
#if GOOGLE_CUDA
REGISTER4(BinaryOp, GPU, "Mul", functor::mul, float, Eigen::half, double,
uint8);
diff --git a/tensorflow/core/kernels/cwise_op_rint.cc b/tensorflow/core/kernels/cwise_op_rint.cc
new file mode 100644
index 0000000000..a741b3d718
--- /dev/null
+++ b/tensorflow/core/kernels/cwise_op_rint.cc
@@ -0,0 +1,23 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/kernels/cwise_ops_common.h"
+
+namespace tensorflow {
+REGISTER2(UnaryOp, CPU, "Rint", functor::rint, float, double);
+#if GOOGLE_CUDA
+REGISTER2(UnaryOp, GPU, "Rint", functor::rint, float, double);
+#endif
+} // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_sub.cc b/tensorflow/core/kernels/cwise_op_sub.cc
index d000e54cfc..ed78ba37a8 100644
--- a/tensorflow/core/kernels/cwise_op_sub.cc
+++ b/tensorflow/core/kernels/cwise_op_sub.cc
@@ -24,6 +24,16 @@ REGISTER7(BinaryOp, CPU, "Sub", functor::sub, float, Eigen::half, double, int32,
// int32 version of this op is needed, so explicitly include it.
REGISTER(BinaryOp, CPU, "Sub", functor::sub, int32);
#endif // __ANDROID_TYPES_SLIM__
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Sub") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("T"), \
+ BinaryOp<SYCLDevice, functor::sub<TYPE>>);
+TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
#if GOOGLE_CUDA
REGISTER6(BinaryOp, GPU, "Sub", functor::sub, float, Eigen::half, double, int64,
complex64, complex128);
diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h
index b038d73e17..7f35e03feb 100644
--- a/tensorflow/core/kernels/cwise_ops.h
+++ b/tensorflow/core/kernels/cwise_ops.h
@@ -521,6 +521,27 @@ struct round : base<T, Eigen::internal::scalar_round_op_google<T>> {};
template <typename T>
struct ceil : base<T, Eigen::internal::scalar_ceil_op<T>> {};
+/** this should go in Eigen
+ * \brief Template functor to compute the round to int value of a scalar
+ */
+template <typename Scalar>
+struct scalar_rint_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_rint_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar
+ operator()(const Scalar& a) const {
+#if defined(__CUDACC__)
+ return ::rint(a);
+#elif defined(__ANDROID__)
+ return rint(a);
+#else
+ return std::rint(a);
+#endif
+ }
+};
+
+template <typename T>
+struct rint : base<T, scalar_rint_op<T>> {};
+
////////////////////////////////////////////////////////////////////////////////
// Binary functors
////////////////////////////////////////////////////////////////////////////////
diff --git a/tensorflow/core/kernels/cwise_ops_sycl_common.h b/tensorflow/core/kernels/cwise_ops_sycl_common.h
index c66ae42c2d..4c22cc4855 100644
--- a/tensorflow/core/kernels/cwise_ops_sycl_common.h
+++ b/tensorflow/core/kernels/cwise_ops_sycl_common.h
@@ -22,6 +22,8 @@ limitations under the License.
#define EIGEN_USE_SYCL
+#include "tensorflow/core/framework/register_types.h"
+
#include "tensorflow/core/framework/tensor_types.h"
#include "tensorflow/core/kernels/cwise_ops.h"
#include "tensorflow/core/platform/logging.h"
@@ -32,6 +34,14 @@ namespace functor {
typedef Eigen::SyclDevice SYCLDevice;
+template <typename Index, int N> Eigen::array<Index, N> GenerateArrayOfOnes() {
+ Eigen::array<Index, N> result;
+ for (int i = 0; i < N; ++i) {
+ result[i] = 1;
+ }
+ return result;
+}
+
template <typename OUT, typename RHS>
void Assign(const SYCLDevice& d, OUT out, RHS rhs) {
out.device(d) = rhs;
@@ -52,23 +62,31 @@ struct BinaryFunctor<SYCLDevice, Functor, NDIMS, has_errors> {
void operator()(const SYCLDevice& d, typename Functor::tout_type out,
typename Functor::tin_type in0,
typename Functor::tin_type in1, bool* error) {
- Assign(d, out, in0.binaryExpr(in1, typename Functor::func()));
+ To32Bit(out).device(d) = To32Bit(in0).binaryExpr(in1, typename Functor::func());
}
void Left(const SYCLDevice& d, typename Functor::tout_type out,
typename Functor::tscalar_type scalar,
typename Functor::tin_type in, bool* error) {
- LOG(FATAL) << "BinaryFunctor::Left NOT IMPLEMENTED ! ";
+ typedef typename Functor::func Binary;
+ constexpr int NumDims = Functor::tin_type::NumDimensions;
+ typedef typename Functor::tin_type::Scalar T;
+ typedef typename Functor::tin_type::Index Index;
+ Eigen::array<Index, NumDims> scalar_dim = GenerateArrayOfOnes<Index, NumDims>();
+ Eigen::TensorMap<Eigen::Tensor<T, NumDims, Eigen::RowMajor>> tmp(scalar.data(), scalar_dim);
+ out.device(d) = tmp.broadcast(in.dimensions()).binaryExpr(in, Binary());
}
void Right(const SYCLDevice& d, typename Functor::tout_type out,
typename Functor::tin_type in,
typename Functor::tscalar_type scalar, bool* error) {
- typedef typename Functor::out_type Tout;
- typedef typename Functor::in_type Tin;
typedef typename Functor::func Binary;
- typedef typename Eigen::internal::scalar_right<Tout, Tin, Binary> Unary;
- Assign(d, out, in.unaryExpr(Unary(scalar.data())));
+ constexpr int NumDims = Functor::tin_type::NumDimensions;
+ typedef typename Functor::tin_type::Scalar T;
+ typedef typename Functor::tin_type::Index Index;
+ Eigen::array<Index, NumDims> scalar_dim = GenerateArrayOfOnes<Index, NumDims>();
+ Eigen::TensorMap<Eigen::Tensor<T, NumDims, Eigen::RowMajor>> tmp(scalar.data(), scalar_dim);
+ out.device(d) = in.binaryExpr(tmp.broadcast(in.dimensions()), Binary());
}
void BCast(const SYCLDevice& d,
@@ -78,7 +96,25 @@ struct BinaryFunctor<SYCLDevice, Functor, NDIMS, has_errors> {
typename TTypes<typename Functor::in_type, NDIMS>::ConstTensor in1,
typename Eigen::array<Eigen::DenseIndex, NDIMS> bcast1,
bool* error) {
- LOG(FATAL) << "BinaryFunctor::BCast NOT IMPLEMENTED ";
+ typedef typename Functor::in_type T;
+ typename Functor::func func;
+ if ((NDIMS == 2) && Functor::use_bcast_optimization &&
+ use_bcast_optimization<T>::value) {
+ const bool bcast0_all_one = AllOne<NDIMS>(bcast0);
+ const bool bcast1_all_one = AllOne<NDIMS>(bcast1);
+ if (bcast0_all_one && !bcast1_all_one) {
+ To32Bit(out).device(d) =
+ To32Bit(in0).binaryExpr(To32Bit(in1).broadcast(bcast1), func);
+ return;
+ }
+ if (!bcast0_all_one && bcast1_all_one) {
+ To32Bit(out).device(d) =
+ To32Bit(in0).broadcast(bcast0).binaryExpr(To32Bit(in1), func);
+ return;
+ }
+ }
+ To32Bit(out).device(d) = To32Bit(in0).broadcast(bcast0).binaryExpr(
+ To32Bit(in1).broadcast(bcast1), func);
}
};
diff --git a/tensorflow/core/kernels/cwise_ops_test.cc b/tensorflow/core/kernels/cwise_ops_test.cc
index 823e7e14ed..6250928aca 100644
--- a/tensorflow/core/kernels/cwise_ops_test.cc
+++ b/tensorflow/core/kernels/cwise_ops_test.cc
@@ -59,6 +59,11 @@ BM_UNARY(gpu, Conj, std::complex<float>, DT_COMPLEX64);
BM_UNARY(cpu, Conj, std::complex<double>, DT_COMPLEX128);
BM_UNARY(gpu, Conj, std::complex<double>, DT_COMPLEX128);
+BM_UNARY(cpu, Rint, double, DT_DOUBLE);
+BM_UNARY(gpu, Rint, double, DT_DOUBLE);
+BM_UNARY(cpu, Rint, float, DT_FLOAT);
+BM_UNARY(gpu, Rint, float, DT_FLOAT);
+
// data func scalar.
static Graph* BinaryScalar(int num, const string& func) {
Graph* g = new Graph(OpRegistry::Global());
diff --git a/tensorflow/core/kernels/dense_update_ops.cc b/tensorflow/core/kernels/dense_update_ops.cc
index 025e9a8de1..baa8f83091 100644
--- a/tensorflow/core/kernels/dense_update_ops.cc
+++ b/tensorflow/core/kernels/dense_update_ops.cc
@@ -14,6 +14,9 @@ limitations under the License.
==============================================================================*/
#define EIGEN_USE_THREADS
+#if TENSORFLOW_USE_SYCL
+#define EIGEN_USE_SYCL
+#endif
#include "tensorflow/core/kernels/dense_update_ops.h"
#include "tensorflow/core/framework/op_kernel.h"
@@ -92,6 +95,18 @@ TF_CALL_ALL_TYPES(REGISTER_KERNELS);
TF_CALL_QUANTIZED_TYPES(REGISTER_KERNELS);
#undef REGISTER_KERNELS
+#if TENSORFLOW_USE_SYCL
+typedef Eigen::SyclDevice SYCLDevice;
+#define REGISTER_SYCL_KERNEL(type) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Assign") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<type>("T"), \
+ AssignOpT<SYCLDevice, type>);
+TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif
+
#if GOOGLE_CUDA
// Only register 'Assign' on GPU for the subset of types also supported by
// 'Variable' (see variable_ops.cc.)
diff --git a/tensorflow/core/kernels/eigen_pooling.h b/tensorflow/core/kernels/eigen_pooling.h
index 96acbe824a..8eea1b0f9d 100644
--- a/tensorflow/core/kernels/eigen_pooling.h
+++ b/tensorflow/core/kernels/eigen_pooling.h
@@ -325,7 +325,7 @@ struct AvgPoolMeanReducer {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
eigen_assert(scalarCount_ > 0);
- return accum / scalarCount_;
+ return accum / T(scalarCount_);
}
#if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__)
diff --git a/tensorflow/core/kernels/eigen_spatial_convolutions.h b/tensorflow/core/kernels/eigen_spatial_convolutions.h
index a2a9e15301..7702f3e70a 100644
--- a/tensorflow/core/kernels/eigen_spatial_convolutions.h
+++ b/tensorflow/core/kernels/eigen_spatial_convolutions.h
@@ -991,6 +991,9 @@ EIGEN_DEVICE_FUNC
out_width = numext::ceil(InputCols / static_cast<float>(col_stride));
break;
default:
+ // Initialize unused variables to avoid a compiler warning
+ out_height = 0;
+ out_width = 0;
eigen_assert(false && "unexpected padding");
}
diff --git a/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc b/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc
index dd9f83afbc..56ffe58569 100644
--- a/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc
@@ -72,8 +72,8 @@ struct GatherNdSlice<GPUDevice, T, Index, IXDIM> {
Eigen::array<int64, IXDIM> batch_strides;
Eigen::array<int64, IXDIM> batch_indices;
if (IXDIM > 0) {
- batch_strides[IXDIM - 1] = s_size;
- batch_indices[IXDIM - 1] = Tparams.dimension(IXDIM - 1);
+ batch_strides[size_t(IXDIM - 1)] = s_size;
+ batch_indices[size_t(IXDIM - 1)] = Tparams.dimension(IXDIM - 1);
}
for (int i = IXDIM - 1; i > 0; --i) {
batch_indices[i - 1] = Tparams.dimension(i - 1);
diff --git a/tensorflow/core/kernels/identity_op.cc b/tensorflow/core/kernels/identity_op.cc
index 711afd1bf3..8ede544b7f 100644
--- a/tensorflow/core/kernels/identity_op.cc
+++ b/tensorflow/core/kernels/identity_op.cc
@@ -68,6 +68,7 @@ REGISTER_GPU_KERNEL(bfloat16);
#undef REGISTER_GPU_KERNEL
+
#if GOOGLE_CUDA
// A special GPU kernel for int32 and bool.
// TODO(b/25387198): Also enable int32 in device memory. This kernel
diff --git a/tensorflow/core/kernels/matrix_inverse_op.cc b/tensorflow/core/kernels/matrix_inverse_op.cc
index 884e3d61a4..0572d48b3e 100644
--- a/tensorflow/core/kernels/matrix_inverse_op.cc
+++ b/tensorflow/core/kernels/matrix_inverse_op.cc
@@ -52,7 +52,7 @@ class MatrixInverseOp : public LinearAlgebraOp<Scalar> {
Eigen::PartialPivLU<Matrix> lu_decomposition;
if (adjoint_) {
// TODO(rmlarsen): For Eigen 3.2, this creates a temporary copy.
- // Make sure to backport: https://bitbucket.org/eigen/eigen/commits/ \
+ // Make sure to backport: https://bitbucket.org/eigen/eigen/commits/
// bd2219a74c96dfe3f6bc2c23588749e36d2d8173
lu_decomposition.compute(input.adjoint());
} else {
diff --git a/tensorflow/core/kernels/matrix_solve_op.cc b/tensorflow/core/kernels/matrix_solve_op.cc
index a2dfd58bd1..e10a102871 100644
--- a/tensorflow/core/kernels/matrix_solve_op.cc
+++ b/tensorflow/core/kernels/matrix_solve_op.cc
@@ -75,7 +75,7 @@ class MatrixSolveOp : public LinearAlgebraOp<Scalar> {
Eigen::PartialPivLU<Matrix> lu_decomposition(matrix.rows());
if (adjoint_) {
// TODO(rmlarsen): For Eigen 3.2, this creates a temporary copy.
- // Make sure to backport: https://bitbucket.org/eigen/eigen/commits/ \
+ // Make sure to backport: https://bitbucket.org/eigen/eigen/commits/
// bd2219a74c96dfe3f6bc2c23588749e36d2d8173
lu_decomposition.compute(matrix.adjoint());
} else {
@@ -95,7 +95,7 @@ class MatrixSolveOp : public LinearAlgebraOp<Scalar> {
// TODO(rmlarsen): Add check based on condition number estimation.
// The necessary changes to Eigen are in
- // https://bitbucket.org/eigen/eigen/pull-requests/174/ \
+ // https://bitbucket.org/eigen/eigen/pull-requests/174/
// add-matrix-condition-number-estimation/diff
outputs->at(0) = lu_decomposition.solve(rhs);
}
diff --git a/tensorflow/core/kernels/scatter_nd_op.cc b/tensorflow/core/kernels/scatter_nd_op.cc
index 5aeb3d2c0e..7704c5f65a 100644
--- a/tensorflow/core/kernels/scatter_nd_op.cc
+++ b/tensorflow/core/kernels/scatter_nd_op.cc
@@ -317,9 +317,9 @@ class ScatterNdUpdateOp : public OpKernel {
scatter_nd_op::UpdateOp::SUB);
// TODO(simister): Find a way to reduce amount of templated generated code
// to reduce build size, then re-enable these additional operations.
-// REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdMul", \
-// scatter_nd_op::UpdateOp::MUL); \
-// REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdDiv", \
+// REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdMul",
+// scatter_nd_op::UpdateOp::MUL);
+// REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdDiv",
// scatter_nd_op::UpdateOp::DIV);
#define REGISTER_SCATTER_ND(type, dev) \
diff --git a/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h b/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h
index 442721d37b..bbe2c6864f 100644
--- a/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h
+++ b/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h
@@ -175,7 +175,7 @@ struct ScatterNdFunctor<CPUDevice, T, Index, OP, IXDIM> {
REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::SUB);
// TODO(simister): Re-enable after identifying a way to reduce the binary size
// due to too many template instantiations.
-// REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::MUL); \
+// REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::MUL);
// REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::DIV);
TF_CALL_ALL_TYPES(REGISTER_SCATTER_ND_UPDATE);
diff --git a/tensorflow/core/kernels/sendrecv_ops.cc b/tensorflow/core/kernels/sendrecv_ops.cc
index 9e9cdda382..1c7d50e161 100644
--- a/tensorflow/core/kernels/sendrecv_ops.cc
+++ b/tensorflow/core/kernels/sendrecv_ops.cc
@@ -80,6 +80,8 @@ REGISTER_KERNEL_BUILDER(Name("_Send").Device(DEVICE_GPU), SendOp);
#if TENSORFLOW_USE_SYCL
REGISTER_KERNEL_BUILDER(Name("_Send").Device(DEVICE_SYCL), SendOp);
+REGISTER_KERNEL_BUILDER(
+ Name("_HostSend").Device(DEVICE_SYCL).HostMemory("tensor"), SendOp);
#endif
REGISTER_KERNEL_BUILDER(Name("_HostSend").Device(DEVICE_CPU), SendOp);
@@ -148,4 +150,9 @@ REGISTER_KERNEL_BUILDER(Name("_HostRecv").Device(DEVICE_CPU), RecvOp);
REGISTER_KERNEL_BUILDER(
Name("_HostRecv").Device(DEVICE_GPU).HostMemory("tensor"), RecvOp);
+#if TENSORFLOW_USE_SYCL
+REGISTER_KERNEL_BUILDER(
+ Name("_HostRecv").Device(DEVICE_SYCL).HostMemory("tensor"), RecvOp);
+#endif
+
} // end namespace tensorflow
diff --git a/tensorflow/core/kernels/variable_ops.cc b/tensorflow/core/kernels/variable_ops.cc
index b97df3e00c..532825a2a8 100644
--- a/tensorflow/core/kernels/variable_ops.cc
+++ b/tensorflow/core/kernels/variable_ops.cc
@@ -31,6 +31,17 @@ REGISTER_KERNEL_BUILDER(Name("DestroyTemporaryVariable").Device(DEVICE_CPU),
REGISTER_KERNEL_BUILDER(Name("IsVariableInitialized").Device(DEVICE_CPU),
IsVariableInitializedOp);
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Variable") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("dtype"), \
+ VariableOp);
+TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif
+
#if GOOGLE_CUDA
// Only register 'Variable' on GPU for the subset of types also supported by
// 'Assign' (see dense_update_ops.cc.)
diff --git a/tensorflow/core/lib/core/threadpool.cc b/tensorflow/core/lib/core/threadpool.cc
index 534ef902fb..a2245bb28e 100644
--- a/tensorflow/core/lib/core/threadpool.cc
+++ b/tensorflow/core/lib/core/threadpool.cc
@@ -21,9 +21,11 @@ limitations under the License.
#include "tensorflow/core/platform/denormal.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/setround.h"
#include "tensorflow/core/platform/tracing.h"
#include "tensorflow/core/platform/types.h"
+
namespace tensorflow {
namespace thread {
@@ -50,6 +52,8 @@ struct EigenEnvironment {
return env_->StartThread(thread_options_, name_, [=]() {
// Set the processor flag to flush denormals to zero
port::ScopedFlushDenormal flush;
+ // Set the C++ rounding mode to ROUND TO NEAREST
+ port::ScopedSetRound round;
f();
});
}
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 1fd641ee36..25972252c5 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -3859,7 +3859,7 @@ strides: 1-D of length 4. How far the centers of two consecutive patches are in
rates: 1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the
input stride, specifying how far two consecutive patch samples are in the
input. Equivalent to extracting patches with
- `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1), followed by
+ `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by
subsampling them spatially by a factor of `rates`.
padding: The type of padding algorithm to use.
diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index 56bd594644..f854ad288b 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -472,6 +472,25 @@ REGISTER_OP("Ceil")
Returns element-wise smallest integer in not less than x.
)doc");
+REGISTER_OP("Rint")
+ .Input("x: T")
+ .Output("y: T")
+ .Attr("T: {float, double}")
+ .SetShapeFn(shape_inference::UnchangedShape)
+ .Doc(R"doc(
+Returns element-wise integer closest to x.
+
+If the result is midway between two representable values,
+the even representable is chosen.
+For example:
+
+```
+rint(-1.5) ==> -2.0
+rint(0.5000001) ==> 1.0
+rint([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) ==> [-2., -2., -0., 0., 2., 2., 2.]
+```
+)doc");
+
// Declares cwise binary operations signature: 't, 't -> 't.
#define BINARY_MORE() \
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index bf87c0a495..6d100a718d 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -6591,7 +6591,7 @@ op {
attr {
name: "rates"
type: "list(int)"
- description: "1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the\ninput stride, specifying how far two consecutive patch samples are in the\ninput. Equivalent to extracting patches with\n`patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1), followed by\nsubsampling them spatially by a factor of `rates`."
+ description: "1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the\ninput stride, specifying how far two consecutive patch samples are in the\ninput. Equivalent to extracting patches with\n`patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by\nsubsampling them spatially by a factor of `rates`."
has_minimum: true
minimum: 4
}
diff --git a/tensorflow/core/platform/default/build_config_root.bzl b/tensorflow/core/platform/default/build_config_root.bzl
index 439bf97a2c..2fa2726bde 100644
--- a/tensorflow/core/platform/default/build_config_root.bzl
+++ b/tensorflow/core/platform/default/build_config_root.bzl
@@ -4,3 +4,6 @@
def tf_cuda_tests_tags():
return ["local"]
+
+def tf_sycl_tests_tags():
+ return ["local"]
diff --git a/tensorflow/core/platform/setround.cc b/tensorflow/core/platform/setround.cc
new file mode 100644
index 0000000000..febb47781c
--- /dev/null
+++ b/tensorflow/core/platform/setround.cc
@@ -0,0 +1,35 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/platform/setround.h"
+
+#ifdef __STDC_IEC_559__
+#include <fenv.h> // fesetround, FE_*
+#endif
+
+namespace tensorflow {
+namespace port {
+
+ScopedSetRound::ScopedSetRound() {
+#ifdef __STDC_IEC_559__
+ std::fesetround(FE_TONEAREST);
+#endif
+}
+
+ScopedSetRound::~ScopedSetRound() {
+}
+
+} // namespace port
+} // namespace tensorflow
diff --git a/tensorflow/core/platform/setround.h b/tensorflow/core/platform/setround.h
new file mode 100644
index 0000000000..4b1b3fd497
--- /dev/null
+++ b/tensorflow/core/platform/setround.h
@@ -0,0 +1,38 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_PLATFORM_SETROUND_H_
+#define TENSORFLOW_PLATFORM_SETROUND_H_
+
+#include "tensorflow/core/platform/macros.h"
+
+namespace tensorflow {
+namespace port {
+
+// While this class is active, floating point numbers are rounded to NEAREST
+// to zero. The destructor restores the original flags.
+class ScopedSetRound {
+ public:
+ ScopedSetRound();
+ ~ScopedSetRound();
+
+ private:
+ TF_DISALLOW_COPY_AND_ASSIGN(ScopedSetRound);
+};
+
+} // namespace port
+} // namespace tensorflow
+
+#endif // TENSORFLOW_PLATFORM_SETROUN_H_
diff --git a/tensorflow/core/platform/windows/env.cc b/tensorflow/core/platform/windows/env.cc
index 41ce5d9320..904d06e2a9 100644
--- a/tensorflow/core/platform/windows/env.cc
+++ b/tensorflow/core/platform/windows/env.cc
@@ -72,7 +72,7 @@ class WindowsEnv : public Env {
}
bool MatchPath(const string& path, const string& pattern) override {
- return PathMatchSpec(path.c_str(), pattern.c_str()) == S_OK;
+ return PathMatchSpec(path.c_str(), pattern.c_str()) == TRUE;
}
uint64 NowMicros() override {
diff --git a/tensorflow/core/platform/windows/windows_file_system.cc b/tensorflow/core/platform/windows/windows_file_system.cc
index c6c42f0150..31516bb2ee 100644
--- a/tensorflow/core/platform/windows/windows_file_system.cc
+++ b/tensorflow/core/platform/windows/windows_file_system.cc
@@ -386,7 +386,7 @@ Status WindowsFileSystem::GetChildren(const string& dir,
string pattern = translated_dir;
if (!pattern.empty() && pattern.back() != '\\' && pattern.back() != '/') {
- pattern += '\\*';
+ pattern += "\\*";
} else {
pattern += '*';
}
diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index 66af1897be..dd2dad417d 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -20,7 +20,7 @@ limitations under the License.
#define TF_MAJOR_VERSION 0
#define TF_MINOR_VERSION 11
-#define TF_PATCH_VERSION 0rc2
+#define TF_PATCH_VERSION head
// TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
// "-beta", "-rc", "-rc.1")
diff --git a/tensorflow/core/util/example_proto_fast_parsing.cc b/tensorflow/core/util/example_proto_fast_parsing.cc
index abf8d77f86..f1885bb980 100644
--- a/tensorflow/core/util/example_proto_fast_parsing.cc
+++ b/tensorflow/core/util/example_proto_fast_parsing.cc
@@ -87,6 +87,8 @@ class Feature {
*dtype = DT_INT64;
break;
default:
+ // Initialize variable to avoid compiler warning
+ *dtype = DT_INVALID;
return errors::InvalidArgument("Unsuported datatype.");
}
return Status::OK();
diff --git a/tensorflow/core/util/tensor_format.h b/tensorflow/core/util/tensor_format.h
index 560b6310b2..a21dee5d58 100644
--- a/tensorflow/core/util/tensor_format.h
+++ b/tensorflow/core/util/tensor_format.h
@@ -58,6 +58,7 @@ inline int32 GetTensorDimIndex(TensorFormat format, char dimension) {
return 1 + NDIMS;
default:
LOG(FATAL) << "Invalid dimension: " << dimension;
+ return -1; // Avoid compiler warning about missing return value
}
} else if (format == FORMAT_NCHW) {
switch (dimension) {
@@ -77,9 +78,11 @@ inline int32 GetTensorDimIndex(TensorFormat format, char dimension) {
return NDIMS + 1;
default:
LOG(FATAL) << "Invalid dimension: " << dimension;
+ return -1; // Avoid compiler warning about missing return value
}
} else {
LOG(FATAL) << "Invalid format: " << static_cast<int>(format);
+ return -1; // Avoid compiler warning about missing return value
}
}
diff --git a/tensorflow/core/util/tensor_slice_reader_cache.cc b/tensorflow/core/util/tensor_slice_reader_cache.cc
index 9ac44f328e..06fc9aa444 100644
--- a/tensorflow/core/util/tensor_slice_reader_cache.cc
+++ b/tensorflow/core/util/tensor_slice_reader_cache.cc
@@ -52,7 +52,7 @@ const TensorSliceReader* TensorSliceReaderCache::GetReader(
TensorSliceReader::OpenTableFunction open_function, int preferred_shard) {
mutex_lock l(mu_);
-#ifdef __GXX_RTTI
+#if defined(__GXX_RTTI) || defined(_CPPRTTI)
// Get the function pointer from the open_function value.
TensorSliceReaderCache::OpenFuncType* func_ptr =
open_function.target<TensorSliceReaderCache::OpenFuncType>();
diff --git a/tensorflow/g3doc/api_docs/python/array_ops.md b/tensorflow/g3doc/api_docs/python/array_ops.md
index 61fecfdf09..b076bd28cf 100644
--- a/tensorflow/g3doc/api_docs/python/array_ops.md
+++ b/tensorflow/g3doc/api_docs/python/array_ops.md
@@ -1428,7 +1428,7 @@ Extract `patches` from `images` and put them in the "depth" output dimension.
1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the
input stride, specifying how far two consecutive patch samples are in the
input. Equivalent to extracting patches with
- `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1), followed by
+ `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by
subsampling them spatially by a factor of `rates`.
* <b>`padding`</b>: A `string` from: `"SAME", "VALID"`.
The type of padding algorithm to use.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.linspace.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.linspace.md
index e79dfd2f4d..3fbb0c303a 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.linspace.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.linspace.md
@@ -3,7 +3,7 @@
Generates values in an interval.
A sequence of `num` evenly-spaced values are generated beginning at `start`.
-If `num > 1`, the values in the sequence increase by `stop - start / num - 1`,
+If `num > 1`, the values in the sequence increase by `(stop - start) / (num - 1)`,
so that the last one is exactly `stop`.
For example:
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.nn.sampled_softmax_loss.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.nn.sampled_softmax_loss.md
index 82f44f6f4d..e898cbaa4f 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.nn.sampled_softmax_loss.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.nn.sampled_softmax_loss.md
@@ -11,8 +11,8 @@ the full softmax loss.
At inference time, you can compute full softmax probabilities with the
expression `tf.nn.softmax(tf.matmul(inputs, tf.transpose(weights)) + biases)`.
-See our [Candidate Sampling Algorithms Reference]
-(../../extras/candidate_sampling.pdf)
+See our
+[Candidate Sampling Algorithms Reference](../../extras/candidate_sampling.pdf)
Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007)
([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.extract_image_patches.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.extract_image_patches.md
index bf6f268d4f..853ce0176f 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.extract_image_patches.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.extract_image_patches.md
@@ -16,7 +16,7 @@ Extract `patches` from `images` and put them in the "depth" output dimension.
1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the
input stride, specifying how far two consecutive patch samples are in the
input. Equivalent to extracting patches with
- `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1), followed by
+ `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by
subsampling them spatially by a factor of `rates`.
* <b>`padding`</b>: A `string` from: `"SAME", "VALID"`.
The type of padding algorithm to use.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.depthwise_conv2d_native.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.depthwise_conv2d_native.md
index 403621dc00..0867e30876 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.depthwise_conv2d_native.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.depthwise_conv2d_native.md
@@ -17,7 +17,7 @@ for k in 0..in_channels-1
filter[di, dj, k, q]
Must have `strides[0] = strides[3] = 1`. For the most common case of the same
-horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
+horizontal and vertical strides, `strides = [1, stride, stride, 1]`.
##### Args:
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.nce_loss.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.nce_loss.md
index f405aa7707..186209d3f5 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.nce_loss.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.nce_loss.md
@@ -42,8 +42,7 @@ with an otherwise unused class.
where a sampled class equals one of the target classes. If set to
`True`, this is a "Sampled Logistic" loss instead of NCE, and we are
learning to generate log-odds instead of log probabilities. See
- our [Candidate Sampling Algorithms Reference]
- (../../extras/candidate_sampling.pdf).
+ our [Candidate Sampling Algorithms Reference](../../extras/candidate_sampling.pdf).
Default is False.
* <b>`partition_strategy`</b>: A string specifying the partitioning strategy, relevant
if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.nn.local_response_normalization.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.nn.local_response_normalization.md
index 4feb46302d..e7d32923a1 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.nn.local_response_normalization.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.nn.local_response_normalization.md
@@ -11,8 +11,8 @@ each component is divided by the weighted, squared sum of inputs within
sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2)
output = input / (bias + alpha * sqr_sum) ** beta
-For details, see [Krizhevsky et al., ImageNet classification with deep
-convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).
+For details, see
+[Krizhevsky et al., ImageNet classification with deep convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).
##### Args:
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.conv2d.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.conv2d.md
index a7d2cf094f..8153224ed4 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.conv2d.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.conv2d.md
@@ -22,7 +22,7 @@ In detail, with the default NHWC format,
filter[di, dj, q, k]
Must have `strides[0] = strides[3] = 1`. For the most common case of the same
-horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
+horizontal and vertical strides, `strides = [1, stride, stride, 1]`.
##### Args:
diff --git a/tensorflow/g3doc/get_started/os_setup.md b/tensorflow/g3doc/get_started/os_setup.md
index 5ef608c7b8..431e214b30 100644
--- a/tensorflow/g3doc/get_started/os_setup.md
+++ b/tensorflow/g3doc/get_started/os_setup.md
@@ -63,37 +63,37 @@ Then, select the correct binary to install:
```bash
# Ubuntu/Linux 64-bit, CPU only, Python 2.7
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 2.7
# Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl
# Mac OS X, CPU only, Python 2.7:
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py2-none-any.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py2-none-any.whl
# Mac OS X, GPU enabled, Python 2.7:
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py2-none-any.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py2-none-any.whl
# Ubuntu/Linux 64-bit, CPU only, Python 3.4
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.4
# Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, CPU only, Python 3.5
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.5
# Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl
# Mac OS X, CPU only, Python 3.4 or 3.5:
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py3-none-any.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py3-none-any.whl
# Mac OS X, GPU enabled, Python 3.4 or 3.5:
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py3-none-any.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py3-none-any.whl
```
Install TensorFlow:
@@ -159,37 +159,37 @@ Now, install TensorFlow just as you would for a regular Pip installation. First
```bash
# Ubuntu/Linux 64-bit, CPU only, Python 2.7
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 2.7
# Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl
# Mac OS X, CPU only, Python 2.7:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py2-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py2-none-any.whl
# Mac OS X, GPU enabled, Python 2.7:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py2-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py2-none-any.whl
# Ubuntu/Linux 64-bit, CPU only, Python 3.4
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.4
# Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, CPU only, Python 3.5
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.5
# Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl
# Mac OS X, CPU only, Python 3.4 or 3.5:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py3-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py3-none-any.whl
# Mac OS X, GPU enabled, Python 3.4 or 3.5:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py3-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py3-none-any.whl
```
Finally install TensorFlow:
@@ -298,37 +298,37 @@ select the correct binary to install:
```bash
# Ubuntu/Linux 64-bit, CPU only, Python 2.7
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 2.7
# Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl
# Mac OS X, CPU only, Python 2.7:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py2-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py2-none-any.whl
# Mac OS X, GPU enabled, Python 2.7:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py2-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py2-none-any.whl
# Ubuntu/Linux 64-bit, CPU only, Python 3.4
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.4
# Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, CPU only, Python 3.5
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.5
# Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl
# Mac OS X, CPU only, Python 3.4 or 3.5:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py3-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py3-none-any.whl
# Mac OS X, GPU enabled, Python 3.4 or 3.5:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py3-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py3-none-any.whl
```
Finally install TensorFlow:
@@ -396,7 +396,7 @@ code.
code.
We also have tags with `latest` replaced by a released version (e.g.,
-`0.11.0rc2-gpu`).
+`0.11.0-gpu`).
With Docker the installation is as follows:
@@ -781,7 +781,7 @@ $ bazel build -c opt --config=cuda //tensorflow/tools/pip_package:build_pip_pack
$ bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
# The name of the .whl file will depend on your platform.
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-0.11.0rc2-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-0.11.0-py2-none-any.whl
```
## Setting up TensorFlow for Development
diff --git a/tensorflow/g3doc/how_tos/adding_an_op/index.md b/tensorflow/g3doc/how_tos/adding_an_op/index.md
index 3be1efb13f..aa5dcfb4f2 100644
--- a/tensorflow/g3doc/how_tos/adding_an_op/index.md
+++ b/tensorflow/g3doc/how_tos/adding_an_op/index.md
@@ -44,6 +44,8 @@ add a call to the `REGISTER_OP` macro that defines the interface for such an Op:
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"
+using namespace tensorflow;
+
REGISTER_OP("ZeroOut")
.Input("to_zero: int32")
.Output("zeroed: int32")
@@ -236,12 +238,26 @@ class ZeroOutTest(tf.test.TestCase):
with self.test_session():
result = zero_out_module.zero_out([5, 4, 3, 2, 1])
self.assertAllEqual(result.eval(), [5, 0, 0, 0, 0])
+
+if __name__ == "__main__":
+ tf.test.main()
+```
+
+Add a 'zero_out_op_test' target to `tensorflow/python/kernel_tests/BUILD` among the other CPU-only test targets:
+
+```
+tf_py_test(
+ name = "zero_out_op_test",
+ size = "small",
+ srcs = ["zero_out_op_test.py"],
+ additional_deps = ["//tensorflow:tensorflow_py"],
+)
```
Then run your test:
```sh
-$ bazel test tensorflow/python:zero_out_op_test
+$ bazel test //tensorflow/python/kernel_tests:zero_out_op_test
```
## Validation
@@ -895,7 +911,7 @@ For more details, see
In general, changes to specifications must be backwards-compatible: changing the
specification of an Op must not break prior serialized `GraphDef` protocol
-buffers constructed from older specfications. The details of `GraphDef`
+buffers constructed from older specifications. The details of `GraphDef`
compatibility are [described here](../../resources/versions.md#graphs).
There are several ways to preserve backwards-compatibility.
@@ -1117,7 +1133,7 @@ found in [common_shape_fns.h](https://www.tensorflow.org/code/tensorflow/core/fr
REGISTER_OP("ZeroOut")
.Input("to_zero: int32")
.Output("zeroed: int32")
- .SetShapeFn([](::tensorflow::shape_inference::UnchangedShape);
+ .SetShapeFn(::tensorflow::shape_inference::UnchangedShape);
```
A shape function can also constrain the shape of an input. For the version of
@@ -1193,7 +1209,7 @@ the following:
```
This specifies that the shape function should use the C++-implemented
-shape specfication defined in your `REGISTER_OP` declaration above. Note
+shape specification defined in your `REGISTER_OP` declaration above. Note
that TensorFlow will soon make this the default, so you only need
to define the shape function once in C++ to get shape inference for
free in Python.
diff --git a/tensorflow/g3doc/index.md b/tensorflow/g3doc/index.md
index 4d7149961a..3bfba40416 100644
--- a/tensorflow/g3doc/index.md
+++ b/tensorflow/g3doc/index.md
@@ -1,10 +1,5 @@
# TensorFlow for Googlers
-This site has TensorFlow documentation for Google engineers. The menu at the
-left lists those parts of the public TensorFlow documentation that pertain to
-Google engineers, along with some internal-only resources written specifically
-for Google engineers.
-
TensorFlowâ„¢ is an open source software library for numerical computation using
data flow graphs. Nodes in the graph represent mathematical operations, while
the graph edges represent the multidimensional data arrays (tensors) that flow
@@ -18,4 +13,4 @@ applicable in a wide variety of other domains as well. The following documents
show you how to set up and use the TensorFlow system.
## Table of Contents
-<!--#include virtual="sitemap.md" --> \ No newline at end of file
+<!--#include virtual="sitemap.md" -->
diff --git a/tensorflow/models/embedding/word2vec.py b/tensorflow/models/embedding/word2vec.py
index c5cb52bfbc..e463e300c1 100644
--- a/tensorflow/models/embedding/word2vec.py
+++ b/tensorflow/models/embedding/word2vec.py
@@ -147,6 +147,8 @@ class Options(object):
# Where to write out summaries.
self.save_path = FLAGS.save_path
+ if not os.path.exists(self.save_path):
+ os.makedirs(self.save_path)
# Eval options.
# The text file for eval.
diff --git a/tensorflow/models/embedding/word2vec_optimized.py b/tensorflow/models/embedding/word2vec_optimized.py
index a6e8f9277d..2efdf66867 100644
--- a/tensorflow/models/embedding/word2vec_optimized.py
+++ b/tensorflow/models/embedding/word2vec_optimized.py
@@ -126,6 +126,8 @@ class Options(object):
# Where to write out summaries.
self.save_path = FLAGS.save_path
+ if not os.path.exists(self.save_path):
+ os.makedirs(self.save_path)
# Eval options.
diff --git a/tensorflow/models/image/cifar10/cifar10.py b/tensorflow/models/image/cifar10/cifar10.py
index 7df2149d40..1c51b76f09 100644
--- a/tensorflow/models/image/cifar10/cifar10.py
+++ b/tensorflow/models/image/cifar10/cifar10.py
@@ -207,8 +207,8 @@ def inference(images):
wd=0.0)
conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
- bias = tf.nn.bias_add(conv, biases)
- conv1 = tf.nn.relu(bias, name=scope.name)
+ pre_activation = tf.nn.bias_add(conv, biases)
+ conv1 = tf.nn.relu(pre_activation, name=scope.name)
_activation_summary(conv1)
# pool1
@@ -226,8 +226,8 @@ def inference(images):
wd=0.0)
conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
- bias = tf.nn.bias_add(conv, biases)
- conv2 = tf.nn.relu(bias, name=scope.name)
+ pre_activation = tf.nn.bias_add(conv, biases)
+ conv2 = tf.nn.relu(pre_activation, name=scope.name)
_activation_summary(conv2)
# norm2
diff --git a/tensorflow/python/client/session.py b/tensorflow/python/client/session.py
index d626165dc0..dd55154a3a 100644
--- a/tensorflow/python/client/session.py
+++ b/tensorflow/python/client/session.py
@@ -122,6 +122,54 @@ _REGISTERED_EXPANSIONS = [
lambda feed: [feed])]
# pylint: enable=g-long-lambda
+def register_session_run_conversion_functions(tensor_type, fetch_function,
+ feed_function=None, feed_function_for_partial_run=None):
+ """Register fetch and feed conversion functions for `tf.Session.run()`.
+
+ This function registers a triple of conversion functions for fetching and/or
+ feeding values of user-defined types in a call to tf.Session.run().
+
+ An example
+
+ ```python
+ class SquaredTensor(object):
+ def __init__(self, tensor):
+ self.sq = tf.square(tensor)
+ #you can define conversion functions as follows:
+ fetch_function = lambda squared_tensor:([squared_tensor.sq],
+ lambda val: val[0])
+ feed_function = lambda feed, feed_val: [(feed.sq, feed_val)]
+ feed_function_for_partial_run = lambda feed: [feed.sq]
+ #then after invoking this register function, you can use as follows:
+ session.run(squared_tensor1,
+ feed_dict = {squared_tensor2 : some_numpy_array})
+ ```
+
+ Args:
+ tensor_type: The type for which you want to register a conversion function.
+ fetch_function: A callable that takes an object of type `tensor_type` and
+ returns a tuple, where the first element is a list of `tf.Tensor` objects,
+ and the second element is a callable that takes a list of ndarrays and
+ returns an object of some value type that corresponds to `tensor_type`.
+ fetch_function describes how to expand fetch into its component Tensors
+ and how to contract the fetched results back into a single return value.
+ feed_function: A callable that takes feed_key and feed_value as input, and
+ returns a list of tuples (feed_tensor, feed_val), feed_key must have type
+ `tensor_type`, and feed_tensor must have type `tf.Tensor`. Each feed
+ function describes how to unpack a single fed value and map it to feeds
+ of one or more tensors and their corresponding values.
+ feed_function_for_partial_run: A callable for specifying tensor values to
+ feed when setting up a partial run, which takes a `tensor_type` type
+ object as input, and returns a list of Tensors.
+ """
+ for conversion_function in _REGISTERED_EXPANSIONS:
+ if issubclass(conversion_function[0], tensor_type):
+ raise ValueError(
+ '%s has already been registered so ignore it.', tensor_type)
+ return
+ _REGISTERED_EXPANSIONS.insert(0,
+ (tensor_type, fetch_function, feed_function, feed_function_for_partial_run))
+
class _FetchMapper(object):
"""Definition of the interface provided by fetch mappers.
diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py
index bf0a964867..a20376b91d 100644
--- a/tensorflow/python/client/session_test.py
+++ b/tensorflow/python/client/session_test.py
@@ -1554,6 +1554,33 @@ class SessionTest(test_util.TensorFlowTestCase):
sess.run(enqueue_op)
self.assertEqual(sess.run(q.size()), num_epochs * 2)
+ def testRegisterFetchAndFeedConversionFunctions(self):
+ class SquaredTensor(object):
+ def __init__(self, tensor):
+ self.sq = math_ops.square(tensor)
+
+ fetch_fn = lambda squared_tensor: ([squared_tensor.sq], lambda val: val[0])
+ feed_fn1 = lambda feed, feed_val: [(feed.sq, feed_val)]
+ feed_fn2 = lambda feed: [feed.sq]
+
+ session.register_session_run_conversion_functions(SquaredTensor, fetch_fn,
+ feed_fn1, feed_fn2)
+ with self.assertRaises(ValueError):
+ session.register_session_run_conversion_functions(SquaredTensor,
+ fetch_fn, feed_fn1, feed_fn2)
+ with self.test_session() as sess:
+ np1 = np.array([1.0, 1.5, 2.0, 2.5])
+ np2 = np.array([3.0, 3.5, 4.0, 4.5])
+ squared_tensor = SquaredTensor(np2)
+ squared_eval = sess.run(squared_tensor)
+ self.assertAllClose(np2 * np2, squared_eval)
+ squared_eval = sess.run(squared_tensor, feed_dict={
+ squared_tensor : np1 * np1})
+ self.assertAllClose(np1 * np1, squared_eval)
+ partial_run = sess.partial_run_setup([squared_tensor], [])
+ squared_eval = sess.partial_run(partial_run, squared_tensor)
+ self.assertAllClose(np2 * np2, squared_eval)
+
if __name__ == '__main__':
googletest.main()
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index dc7c72e220..8db55a9359 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -12,6 +12,7 @@ licenses(["notice"]) # Apache 2.0
load("//tensorflow:tensorflow.bzl", "tf_py_test")
load("//tensorflow:tensorflow.bzl", "cuda_py_test")
+load("//tensorflow:tensorflow.bzl", "sycl_py_test")
# CPU only tests should use tf_py_test, GPU tests use cuda_py_test
# Please avoid the py_tests and cuda_py_tests (plural) while we
@@ -1362,6 +1363,13 @@ cuda_py_test(
tags = ["nomsan"], # fails in msan from numpy calls
)
+sycl_py_test(
+ name = "basic_gpu_test",
+ size = "small",
+ srcs = ["basic_gpu_test.py"],
+ additional_deps = ["//tensorflow:tensorflow_py"],
+)
+
filegroup(
name = "all_files",
srcs = glob(
diff --git a/tensorflow/python/kernel_tests/basic_gpu_test.py b/tensorflow/python/kernel_tests/basic_gpu_test.py
new file mode 100644
index 0000000000..541c95aa28
--- /dev/null
+++ b/tensorflow/python/kernel_tests/basic_gpu_test.py
@@ -0,0 +1,61 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functional tests for basic component wise operations using a GPU device."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+import math
+import numpy as np
+from tensorflow.python.ops import gen_math_ops
+from tensorflow.python.ops.gen_array_ops import _broadcast_gradient_args
+
+class GPUBinaryOpsTest(tf.test.TestCase):
+ def _compareGPU(self, x, y, np_func, tf_func):
+ with self.test_session(use_gpu=True) as sess:
+ inx = tf.convert_to_tensor(x)
+ iny = tf.convert_to_tensor(y)
+ out = tf_func(inx, iny)
+ tf_gpu = sess.run(out)
+
+ with self.test_session(use_gpu=False) as sess:
+ inx = tf.convert_to_tensor(x)
+ iny = tf.convert_to_tensor(y)
+ out = tf_func(inx, iny)
+ tf_cpu = sess.run(out)
+
+ self.assertAllClose(tf_cpu, tf_gpu)
+
+ def testFloatBasic(self):
+ x = np.linspace(-5, 20, 15).reshape(1, 3, 5).astype(np.float32)
+ y = np.linspace(20, -5, 15).reshape(1, 3, 5).astype(np.float32)
+ self._compareGPU(x, y, np.add, tf.add)
+ self._compareGPU(x, y, np.subtract, tf.sub)
+ self._compareGPU(x, y, np.multiply, tf.mul)
+ self._compareGPU(x, y + 0.1, np.true_divide, tf.truediv)
+
+ #def _GetGradientArgs(self, xs, ys):
+ #with self.test_session(use_gpu=True) as sess:
+ # return sess.run(_broadcast_gradient_args(xs, ys))
+
+ #def testBroadcast(self):
+ #r0, r1 = self._GetGradientArgs([2, 3, 5], [1])
+ #self.assertAllEqual(r0, [])
+ #self.assertAllEqual(r1, [0, 1, 2])
+
+if __name__ == "__main__":
+ tf.test.main()
diff --git a/tensorflow/python/kernel_tests/cwise_ops_test.py b/tensorflow/python/kernel_tests/cwise_ops_test.py
index bca2030d01..3322d22b9f 100644
--- a/tensorflow/python/kernel_tests/cwise_ops_test.py
+++ b/tensorflow/python/kernel_tests/cwise_ops_test.py
@@ -1778,9 +1778,17 @@ class IsFiniteInfNanTest(tf.test.TestCase):
class RoundingTest(tf.test.TestCase):
- def _compare(self, x, use_gpu):
+ def _compare_values(self, x, y=None):
+ y = np.rint(x) if y is None else np.asarray(y)
+ with self.test_session() as sess:
+ tf_rint = tf.rint(x)
+ np_rint = sess.run(tf_rint)
+ self.assertAllEqual(y, np_rint)
+ self.assertShapeEqual(y, tf_rint)
+
+ def _compare(self, x):
np_floor, np_ceil = np.floor(x), np.ceil(x)
- with self.test_session(use_gpu=use_gpu) as sess:
+ with self.test_session() as sess:
inx = tf.convert_to_tensor(x)
ofloor, oceil = tf.floor(inx), tf.ceil(inx)
tf_floor, tf_ceil = sess.run([ofloor, oceil])
@@ -1790,9 +1798,20 @@ class RoundingTest(tf.test.TestCase):
self.assertShapeEqual(np_ceil, oceil)
def _testDtype(self, dtype):
- data = (np.arange(-3, 3) / 4.).reshape([1, 3, 2]).astype(dtype)
- self._compare(data, use_gpu=True)
- self._compare(data, use_gpu=True)
+ data = (np.arange(-3, 3) / 4.).reshape(1, 3, 2).astype(dtype)
+ self._compare(data)
+ # TODO: rint op is not supported for float16
+ if dtype is np.float16:
+ return
+ self._compare_values(data)
+ x = [0.5, 0.5000001]
+ y = [0.0, 1.0]
+ self._compare_values(x, y=y)
+
+ # numpy example
+ x = [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]
+ y = [-2., -2., -0., 0., 2., 2., 2.]
+ self._compare_values(x, y=y)
def testTypes(self):
for dtype in [np.float16, np.float32, np.float64]:
diff --git a/tensorflow/python/kernel_tests/init_ops_test.py b/tensorflow/python/kernel_tests/init_ops_test.py
index fd39b8f4c6..1285ed1912 100644
--- a/tensorflow/python/kernel_tests/init_ops_test.py
+++ b/tensorflow/python/kernel_tests/init_ops_test.py
@@ -28,25 +28,27 @@ from tensorflow.python.ops import init_ops
# Returns true iff the two initializers produce the same tensor to
# within a tiny tolerance.
-def identicaltest(tc, init1, init2):
+def identicaltest(tc, init1, init2, shape=None):
"""Tests if two initializations are identical to within tiny tolerances.
Args:
tc: An instance of TensorFlowTestCase.
init1: An Initializer that generates a tensor of a given shape
init2: An Initializer that generates a tensor of a given shape
+ shape: Shape of the tensor to initialize or `None` to use a vector of length 100.
Returns:
True or False as determined by test.
"""
- num = 100
+ if shape is None:
+ shape = [100]
with tc.test_session(graph=tf.Graph()):
- t1 = init1([num]).eval()
+ t1 = init1(shape).eval()
with tc.test_session(graph=tf.Graph()):
- t2 = init2([num]).eval()
+ t2 = init2(shape).eval()
return np.allclose(t1, t2, rtol=1e-15, atol=1e-15)
-def duplicated_initializer(tc, init, graph_seed):
+def duplicated_initializer(tc, init, graph_seed, shape=None):
"""Tests duplicated random initializer within the same graph.
This test generates two random kernels from the same initializer to the same
@@ -58,14 +60,16 @@ def duplicated_initializer(tc, init, graph_seed):
tc: An instance of TensorFlowTestCase.
init: An Initializer that generates a tensor of a given shape
graph_seed: A graph-level seed to use.
+ shape: Shape of the tensor to initialize or `None` to use a vector of length 100.
Returns:
True or False as determined by test.
"""
- num = 100
+ if shape is None:
+ shape = [100]
with tc.test_session(graph=tf.Graph()):
random_seed.set_random_seed(graph_seed)
- t1 = init([num]).eval()
- t2 = init([num]).eval()
+ t1 = init(shape).eval()
+ t2 = init(shape).eval()
return np.allclose(t1, t2, rtol=1e-15, atol=1e-15)
@@ -444,5 +448,59 @@ class DeviceTest(tf.test.TestCase):
self.assertDeviceEqual("/job:ps", var.initializer.device)
+class OrthogonalInitializerTest(tf.test.TestCase):
+
+ def testInitializerIdentical(self):
+ for dtype in [tf.float32, tf.float64]:
+ init1 = tf.orthogonal_initializer(seed=1, dtype=dtype)
+ init2 = tf.orthogonal_initializer(seed=1, dtype=dtype)
+ self.assertTrue(identicaltest(self, init1, init2, (10, 10)))
+
+ def testInitializerDifferent(self):
+ for dtype in [tf.float32, tf.float64]:
+ init1 = tf.orthogonal_initializer(seed=1, dtype=dtype)
+ init2 = tf.orthogonal_initializer(seed=2, dtype=dtype)
+ self.assertFalse(identicaltest(self, init1, init2, (10, 10)))
+
+ def testDuplicatedInitializer(self):
+ init = tf.orthogonal_initializer()
+ self.assertFalse(duplicated_initializer(self, init, 1, (10, 10)))
+
+ def testInvalidDataType(self):
+ self.assertRaises(
+ ValueError,
+ tf.orthogonal_initializer, dtype=tf.string)
+
+ def testInvalidShape(self):
+ init1 = tf.orthogonal_initializer()
+ with self.test_session(graph=tf.Graph(), use_gpu=True):
+ self.assertRaises(ValueError, init1, shape=[5])
+
+ def testGain(self):
+ shape = (10, 10)
+ for dtype in [tf.float32, tf.float64]:
+ init1 = tf.orthogonal_initializer(seed=1, dtype=dtype)
+ init2 = tf.orthogonal_initializer(gain=3.14, seed=1, dtype=dtype)
+ with self.test_session(graph=tf.Graph(), use_gpu=True):
+ t1 = init1(shape).eval()
+ with self.test_session(graph=tf.Graph(), use_gpu=True):
+ t2 = init2(shape).eval()
+ return np.allclose(t1, t2 / 3.14, rtol=1e-15, atol=1e-15)
+
+ def testShapesValues(self):
+ for dtype in [tf.float32, tf.float64]:
+ for shape in [(10, 10), (10, 9, 8), (100, 5, 5), (50, 40), (40, 50)]:
+ init = tf.orthogonal_initializer(dtype=dtype)
+ with self.test_session(graph=tf.Graph(), use_gpu=True):
+ # Check the shape
+ t = init(shape).eval()
+ self.assertAllEqual(shape, t.shape)
+ # Check orthogonality by computing the inner product
+ t = t.reshape((np.prod(t.shape[:-1]), t.shape[-1]))
+ if t.shape[0] > t.shape[1]:
+ self.assertAllClose(np.dot(t.T, t), np.eye(t.shape[1]))
+ else:
+ self.assertAllClose(np.dot(t, t.T), np.eye(t.shape[0]))
+
if __name__ == "__main__":
tf.test.main()
diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py
index a037069d2f..0197e8f390 100644
--- a/tensorflow/python/ops/array_grad.py
+++ b/tensorflow/python/ops/array_grad.py
@@ -523,6 +523,10 @@ def _ExtractImagePatchesGrad(op, grad):
batch_size, rows_in, cols_in, channels = [
dim.value for dim in op.inputs[0].get_shape()
]
+ input_bhwc = array_ops.shape(op.inputs[0])
+ batch_size = input_bhwc[0]
+ channels = input_bhwc[3]
+
_, rows_out, cols_out, _ = [
dim.value for dim in op.outputs[0].get_shape()
]
diff --git a/tensorflow/python/ops/image_grad.py b/tensorflow/python/ops/image_grad.py
index 81f089660a..b6b61ab92c 100644
--- a/tensorflow/python/ops/image_grad.py
+++ b/tensorflow/python/ops/image_grad.py
@@ -35,10 +35,16 @@ def _ResizeNearestNeighborGrad(op, grad):
Returns:
The gradients w.r.t. the input and the output.
"""
+ image = op.inputs[0]
+ if image.get_shape()[1:3].is_fully_defined():
+ image_shape = image.get_shape()[1:3]
+ else:
+ image_shape = array_ops.shape(image)[1:3]
+
# pylint: disable=protected-access
grads = gen_image_ops._resize_nearest_neighbor_grad(
grad,
- op.inputs[0].get_shape()[1:3],
+ image_shape,
align_corners=op.get_attr("align_corners"))
# pylint: enable=protected-access
return [grads, None]
diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py
index 94364b408a..bbe9977d4b 100644
--- a/tensorflow/python/ops/init_ops.py
+++ b/tensorflow/python/ops/init_ops.py
@@ -40,6 +40,7 @@ from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import linalg_ops
def _assert_float_dtype(dtype):
@@ -343,3 +344,55 @@ class _RandomWalkInitializer(object):
"""Generate a tensor used to initialize a variable."""
return random_ops._random_walk(shape, self._nonlinearity, dtype,
seed=self._seed)
+
+
+def orthogonal_initializer(gain=1.0, dtype=dtypes.float32, seed=None):
+ """Returns an initializer that generates an orthogonal matrix or a reshaped
+ orthogonal matrix.
+
+ If the shape of the tensor to initialize is two-dimensional, i is initialized
+ with an orthogonal matrix obtained from the singular value decomposition of a
+ matrix of uniform random numbers.
+
+ If the shape of the tensor to initialize is more than two-dimensional, a matrix
+ of shape `(shape[0] * ... * shape[n - 2], shape[n - 1])` is initialized, where
+ `n` is the length of the shape vector. The matrix is subsequently reshaped to
+ give a tensor of the desired shape.
+
+ Args:
+ gain: multiplicative factor to apply to the orthogonal matrix
+ dtype: The type of the output.
+ seed: A Python integer. Used to create random seeds. See
+ [`set_random_seed`](../../api_docs/python/constant_op.md#set_random_seed)
+ for behavior.
+
+ Returns:
+ An initializer that generates orthogonal tensors
+
+ Raises:
+ ValueError: if `dtype` is not a floating point type or if `shape` has fewer than two entries.
+ """
+ def _initializer(shape, dtype=_assert_float_dtype(dtype), partition_info=None):
+ # Check the shape
+ if len(shape) < 2:
+ raise ValueError('the tensor to initialize must be at least two-dimensional')
+ # Flatten the input shape with the last dimension remaining its original shape so it works for conv2d
+ num_rows = 1
+ for dim in shape[:-1]:
+ num_rows *= dim
+ num_cols = shape[-1]
+ flat_shape = (num_rows, num_cols)
+
+ # Generate a random matrix
+ a = random_ops.random_uniform(flat_shape, dtype=dtype, seed=seed)
+ # Compute the svd
+ _, u, v = linalg_ops.svd(a, full_matrices=False)
+ # Pick the appropriate singular value decomposition
+ if num_rows > num_cols:
+ q = u
+ else:
+ # Tensorflow departs from numpy conventions such that we need to transpose axes here
+ q = array_ops.transpose(v)
+ return gain * array_ops.reshape(q, shape)
+
+ return _initializer
diff --git a/tensorflow/python/ops/math_grad.py b/tensorflow/python/ops/math_grad.py
index 8d999f0074..2e8328a98b 100644
--- a/tensorflow/python/ops/math_grad.py
+++ b/tensorflow/python/ops/math_grad.py
@@ -796,6 +796,12 @@ def _FloorGrad(_, unused_grad):
return [None]
+@ops.RegisterGradient("Rint")
+def _RintGrad(_, unused_grad):
+ # the gradient of Rint is zero
+ return [None]
+
+
@ops.RegisterGradient("BatchMatMul")
def _BatchMatMul(op, grad):
"""Returns the gradient of x and y given the gradient of x * y."""
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index a431b66b85..d9a723a163 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -75,6 +75,7 @@ mathematical functions to your graph.
@@zeta
@@polygamma
@@betainc
+@@rint
## Matrix Math Functions
diff --git a/tensorflow/python/ops/state_ops.py b/tensorflow/python/ops/state_ops.py
index 232b6e9cdf..f2a201a609 100644
--- a/tensorflow/python/ops/state_ops.py
+++ b/tensorflow/python/ops/state_ops.py
@@ -69,6 +69,7 @@ create variables contingent on certain conditions.
@@uniform_unit_scaling_initializer
@@zeros_initializer
@@ones_initializer
+@@orthogonal_initializer
## Variable Partitioners for Sharding
diff --git a/tensorflow/python/platform/tf_logging.py b/tensorflow/python/platform/tf_logging.py
index 7af533e74f..70ecda1dda 100644
--- a/tensorflow/python/platform/tf_logging.py
+++ b/tensorflow/python/platform/tf_logging.py
@@ -171,7 +171,6 @@ def google2_log_prefix(level, timestamp=None, file_and_line=None):
"""Assemble a logline prefix using the google2 format."""
# pylint: disable=global-variable-not-assigned
global _level_names
- global _logfile_map, _logfile_map_mutex
# pylint: enable=global-variable-not-assigned
# Record current time
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 31b4461dad..47a83d51a4 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -41,6 +41,7 @@ def check_version(bazel_version):
load(
"//tensorflow/core:platform/default/build_config_root.bzl",
"tf_cuda_tests_tags",
+ "tf_sycl_tests_tags",
)
load(
"@local_config_cuda//cuda:build_defs.bzl",
@@ -886,6 +887,20 @@ def cuda_py_test(name, srcs, size="medium", data=[], main=None, args=[],
additional_deps=additional_deps,
flaky=flaky)
+def sycl_py_test(name, srcs, size="medium", data=[], main=None, args=[],
+ shard_count=1, additional_deps=[], tags=[], flaky=0):
+ test_tags = tags + tf_sycl_tests_tags()
+ tf_py_test(name=name,
+ size=size,
+ srcs=srcs,
+ data=data,
+ main=main,
+ args=args,
+ tags=test_tags,
+ shard_count=shard_count,
+ additional_deps=additional_deps,
+ flaky=flaky)
+
def py_tests(name,
srcs,
size="medium",
diff --git a/tensorflow/tools/ci_build/windows/cpu/cmake/run_py.bat b/tensorflow/tools/ci_build/windows/cpu/cmake/run_py.bat
index c8f65402ff..45ddfaee9c 100644
--- a/tensorflow/tools/ci_build/windows/cpu/cmake/run_py.bat
+++ b/tensorflow/tools/ci_build/windows/cpu/cmake/run_py.bat
@@ -28,10 +28,10 @@ CALL %REPO_ROOT%\tensorflow\tools\ci_build\windows\cpu\cmake\run_build.bat
SET PIP_EXE="C:\Program Files\Anaconda3\Scripts\pip.exe"
:: Uninstall tensorflow pip package, which might be a leftover from old runs.
-%PIP_EXE% uninstall tensorflow
+%PIP_EXE% uninstall -y tensorflow
:: Install the pip package.
-%PIP_EXE% install %REPO_ROOT%\%BUILD_DIR%\tf_python\dist\tensorflow-0.11.0rc2_cmake_experimental-py3-none-any.whl
+%PIP_EXE% install --upgrade %REPO_ROOT%\%BUILD_DIR%\tf_python\dist\tensorflow-0.11.0rc2_cmake_experimental-py3-none-any.whl
:: Run all python tests
ctest -C Release --output-on-failure
diff --git a/tensorflow/tools/docker/Dockerfile b/tensorflow/tools/docker/Dockerfile
index 39da8b2e65..82973226fc 100644
--- a/tensorflow/tools/docker/Dockerfile
+++ b/tensorflow/tools/docker/Dockerfile
@@ -33,7 +33,7 @@ RUN pip --no-cache-dir install \
&& \
python -m ipykernel.kernelspec
-ENV TENSORFLOW_VERSION 0.11.0rc2
+ENV TENSORFLOW_VERSION 0.11.0
# --- DO NOT EDIT OR DELETE BETWEEN THE LINES --- #
# These lines will be edited automatically by parameterized_docker_build.sh. #
diff --git a/tensorflow/tools/docker/Dockerfile.gpu b/tensorflow/tools/docker/Dockerfile.gpu
index b369bc10e7..30de920130 100644
--- a/tensorflow/tools/docker/Dockerfile.gpu
+++ b/tensorflow/tools/docker/Dockerfile.gpu
@@ -33,7 +33,7 @@ RUN pip --no-cache-dir install \
&& \
python -m ipykernel.kernelspec
-ENV TENSORFLOW_VERSION 0.11.0rc2
+ENV TENSORFLOW_VERSION 0.11.0
# --- DO NOT EDIT OR DELETE BETWEEN THE LINES --- #
# These lines will be edited automatically by parameterized_docker_build.sh. #
diff --git a/tensorflow/tools/gcs_test/Dockerfile b/tensorflow/tools/gcs_test/Dockerfile
index 3b95e32665..a9db7ce9b0 100644
--- a/tensorflow/tools/gcs_test/Dockerfile
+++ b/tensorflow/tools/gcs_test/Dockerfile
@@ -17,7 +17,7 @@ RUN ./install_google_cloud_sdk.bash --disable-prompts --install-dir=/var/gcloud
# Install nightly TensorFlow pip
RUN pip install \
- https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
+ https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-cp27-none-linux_x86_64.whl
# Copy test files
RUN mkdir -p /gcs-smoke/python
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 84c6a06a9a..8a15e0a296 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -26,7 +26,7 @@ from setuptools import find_packages, setup, Command
from setuptools.command.install import install as InstallCommandBase
from setuptools.dist import Distribution
-_VERSION = '0.11.0rc2'
+_VERSION = '0.11.0'
REQUIRED_PACKAGES = [
'numpy >= 1.11.0',
diff --git a/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc b/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc
index c31f3c4e81..c5b3ca38ba 100644
--- a/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc
+++ b/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc
@@ -154,7 +154,7 @@ class Generator {
string GetPackageReferencePrefix(const FileDescriptor* fd) {
string result = "::";
const string& package = fd->package();
- for (int i = 0; i < package.size(); ++i) {
+ for (size_t i = 0; i < package.size(); ++i) {
if (package[i] == '.') {
result += "::";
} else {
@@ -446,6 +446,7 @@ void Generator::AppendParseMessageFunction(const Descriptor& md) {
Print("StringPiece identifier;");
Print("if (!scanner->GetResult(nullptr, &identifier)) return false;");
Print("bool parsed_colon = false;");
+ Print("(void)parsed_colon;"); // Avoid "set but not used" compiler warning
Print("ProtoSpaceAndComments(scanner);");
Print("if (scanner->Peek() == ':') {");
Nest().Print("parsed_colon = true;");
@@ -675,7 +676,7 @@ void Generator::AppendMessageFunctions(const Descriptor& md) {
void Generator::AddNamespaceToCurrentSection(const string& package, bool open) {
Print();
std::vector<string> parts = {""};
- for (int i = 0; i < package.size(); ++i) {
+ for (size_t i = 0; i < package.size(); ++i) {
if (package[i] == '.') {
parts.resize(parts.size() + 1);
} else {
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 617101a306..06e16cdb04 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -17,8 +17,8 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
# These lines need to be changed when updating Eigen. They are parsed from
# this file by the cmake and make builds to determine the eigen version and
# hash.
- eigen_version = "3d41a24add9b"
- eigen_sha256 = "ce91f8db04493096c0f8bd5ebca7f5a295c88874cda99b3e9d99c9ed10154f99"
+ eigen_version = "62bdceacdafa"
+ eigen_sha256 = "c66f4693a0fd1f5c2cf009e01eb49671ce9cbb56874c3d07d3b8928ffc132cec"
native.new_http_archive(
name = "eigen_archive",