Merge changes from github.

Change: 139516555
author: A. Unique TensorFlower <gardener@tensorflow.org> 2016-11-17 15:37:00 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2016-11-17 15:45:18 -0800
commit: 54e5000e0b980abe905900599c4493fadae34a15 (patch)
tree: c1f0751e5565882a77646589360fe090bec3f3fc /tensorflow
parent: 8a5610cd9f0b7087c1a7e97071ba1cf9b885315a (diff)
108 files changed, 1402 insertions, 253 deletions
diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc
index 17d72835a0..a9c426cc34 100644
--- a/tensorflow/c/c_api.cc
+++ b/tensorflow/c/c_api.cc
@@ -1612,7 +1612,7 @@ TF_Operation* TF_GraphNextOperation(TF_Graph* graph, size_t* pos) {
   }
 
   mutex_lock l(graph->mu);
-  while (*pos < graph->graph.num_node_ids()) {
+  while (*pos < static_cast<size_t>(graph->graph.num_node_ids())) {
     Node* node = graph->graph.FindNodeId(*pos);
     // FindNodeId() returns nullptr for nodes that have been deleted.
     // We aren't currently allowing nodes to be deleted, but it is safer
diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index a935e31f17..b5246cb151 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -4,6 +4,9 @@ cmake_minimum_required(VERSION 3.1)
 # Project
 project(tensorflow C CXX)
 
+# Set C++14 as standard for the whole project
+set(CMAKE_CXX_STANDARD 14)
+
 # Actual source is the ../../.. directory
 get_filename_component(tf_contrib_source_dir ${tensorflow_SOURCE_DIR} PATH)
 get_filename_component(tf_tf_source_dir ${tf_contrib_source_dir} PATH)
diff --git a/tensorflow/contrib/cmake/external/farmhash.cmake b/tensorflow/contrib/cmake/external/farmhash.cmake
index b2c13a14fb..f6805a33aa 100644
--- a/tensorflow/contrib/cmake/external/farmhash.cmake
+++ b/tensorflow/contrib/cmake/external/farmhash.cmake
@@ -3,8 +3,8 @@ include (ExternalProject)
 set(farmhash_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/farmhash_archive ${CMAKE_CURRENT_BINARY_DIR}/external/farmhash_archive/util)
 set(farmhash_URL https://github.com/google/farmhash/archive/34c13ddfab0e35422f4c3979f360635a8c050260.zip)
 set(farmhash_HASH SHA256=e3d37a59101f38fd58fb799ed404d630f0eee18bfc2a2433910977cc8fea9c28)
-set(farmhash_BUILD ${CMAKE_BINARY_DIR}/farmhash/src/farmhash)
-set(farmhash_INSTALL ${CMAKE_BINARY_DIR}/farmhash/install)
+set(farmhash_BUILD ${CMAKE_CURRENT_BINARY_DIR}/farmhash/src/farmhash)
+set(farmhash_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/farmhash/install)
 set(farmhash_INCLUDES ${farmhash_BUILD})
 set(farmhash_HEADERS
     "${farmhash_BUILD}/src/farmhash.h"
@@ -19,7 +19,7 @@ if(WIN32)
       URL_HASH ${farmhash_HASH}
       DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
       BUILD_IN_SOURCE 1
-      PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/farmhash/CMakeLists.txt ${farmhash_BUILD}
+      PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/farmhash/CMakeLists.txt ${farmhash_BUILD}
       INSTALL_DIR ${farmhash_INSTALL}
       CMAKE_CACHE_ARGS
           -DCMAKE_BUILD_TYPE:STRING=Release
diff --git a/tensorflow/contrib/cmake/external/gemmlowp.cmake b/tensorflow/contrib/cmake/external/gemmlowp.cmake
index 024c064cf4..93a0c8d864 100644
--- a/tensorflow/contrib/cmake/external/gemmlowp.cmake
+++ b/tensorflow/contrib/cmake/external/gemmlowp.cmake
@@ -2,8 +2,8 @@ include (ExternalProject)
 
 set(gemmlowp_URL http://github.com/google/gemmlowp/archive/a6f29d8ac48d63293f845f2253eccbf86bc28321.tar.gz)
 set(gemmlowp_HASH SHA256=75d40ea8e68b0d1644f052fffe8f14a410b2a73d40ccb859a95c0578d194ec26)
-set(gemmlowp_BUILD ${CMAKE_BINARY_DIR}/gemmlowp/src/gemmlowp)
-set(gemmlowp_INCLUDE_DIR ${CMAKE_BINARY_DIR}/gemmlowp/src/gemmlowp)
+set(gemmlowp_BUILD ${CMAKE_CURRENT_BINARY_DIR}/gemmlowp/src/gemmlowp)
+set(gemmlowp_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/gemmlowp/src/gemmlowp)
 
 ExternalProject_Add(gemmlowp
     PREFIX gemmlowp
@@ -11,5 +11,5 @@ ExternalProject_Add(gemmlowp
     URL_HASH ${gemmlowp_HASH}
     DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
     BUILD_IN_SOURCE 1
-    PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/gemmlowp/CMakeLists.txt ${gemmlowp_BUILD}
+    PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/gemmlowp/CMakeLists.txt ${gemmlowp_BUILD}
     INSTALL_COMMAND "")
diff --git a/tensorflow/contrib/cmake/external/grpc.cmake b/tensorflow/contrib/cmake/external/grpc.cmake
index 1c34458ec8..c33b0dd81e 100644
--- a/tensorflow/contrib/cmake/external/grpc.cmake
+++ b/tensorflow/contrib/cmake/external/grpc.cmake
@@ -24,7 +24,7 @@ ExternalProject_Add(grpc
     GIT_TAG ${GRPC_TAG}
     DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
     BUILD_IN_SOURCE 1
-    PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/grpc/CMakeLists.txt ${GRPC_BUILD}
+    PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/grpc/CMakeLists.txt ${GRPC_BUILD}
     INSTALL_COMMAND ""
     CMAKE_CACHE_ARGS
         -DCMAKE_BUILD_TYPE:STRING=Release
diff --git a/tensorflow/contrib/cmake/external/highwayhash.cmake b/tensorflow/contrib/cmake/external/highwayhash.cmake
index a19ebfe24f..9f80be32cb 100644
--- a/tensorflow/contrib/cmake/external/highwayhash.cmake
+++ b/tensorflow/contrib/cmake/external/highwayhash.cmake
@@ -3,8 +3,8 @@ include (ExternalProject)
 set(highwayhash_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/highwayhash)
 set(highwayhash_URL https://github.com/google/highwayhash.git)
 set(highwayhash_TAG be5edafc2e1a455768e260ccd68ae7317b6690ee)
-set(highwayhash_BUILD ${CMAKE_BINARY_DIR}/highwayhash/src/highwayhash)
-set(highwayhash_INSTALL ${CMAKE_BINARY_DIR}/highwayhash/install)
+set(highwayhash_BUILD ${CMAKE_CURRENT_BINARY_DIR}/highwayhash/src/highwayhash)
+set(highwayhash_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/highwayhash/install)
 
 # put highwayhash includes in the directory where they are expected
 add_custom_target(highwayhash_create_destination_dir
@@ -28,7 +28,7 @@ ExternalProject_Add(highwayhash
     GIT_TAG ${highwayhash_TAG}
     DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
     BUILD_IN_SOURCE 1
-    PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/highwayhash/CMakeLists.txt ${highwayhash_BUILD}
+    PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/highwayhash/CMakeLists.txt ${highwayhash_BUILD}
     INSTALL_DIR ${highwayhash_INSTALL}
     CMAKE_CACHE_ARGS
         -DCMAKE_BUILD_TYPE:STRING=Release
diff --git a/tensorflow/contrib/cmake/external/jpeg.cmake b/tensorflow/contrib/cmake/external/jpeg.cmake
index a94eb65ddb..cde037949c 100644
--- a/tensorflow/contrib/cmake/external/jpeg.cmake
+++ b/tensorflow/contrib/cmake/external/jpeg.cmake
@@ -3,8 +3,8 @@ include (ExternalProject)
 set(jpeg_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/jpeg_archive)
 set(jpeg_URL http://www.ijg.org/files/jpegsrc.v9a.tar.gz)
 set(jpeg_HASH SHA256=3a753ea48d917945dd54a2d97de388aa06ca2eb1066cbfdc6652036349fe05a7)
-set(jpeg_BUILD ${CMAKE_BINARY_DIR}/jpeg/src/jpeg)
-set(jpeg_INSTALL ${CMAKE_BINARY_DIR}/jpeg/install)
+set(jpeg_BUILD ${CMAKE_CURRENT_BINARY_DIR}/jpeg/src/jpeg)
+set(jpeg_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/jpeg/install)
 
 if(WIN32)
   set(jpeg_STATIC_LIBRARIES ${jpeg_INSTALL}/lib/libjpeg.lib)
@@ -32,7 +32,7 @@ if (WIN32)
         PREFIX jpeg
         URL ${jpeg_URL}
         URL_HASH ${jpeg_HASH}
-        PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/jpeg/CMakeLists.txt ${jpeg_BUILD}
+        PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/patches/jpeg/CMakeLists.txt ${jpeg_BUILD}
         INSTALL_DIR ${jpeg_INSTALL}
         DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
         CMAKE_CACHE_ARGS
@@ -42,7 +42,7 @@ if (WIN32)
     )
 
     ExternalProject_Add_Step(jpeg copy_jconfig
-        COMMAND ${CMAKE_COMMAND} -E copy 
+        COMMAND ${CMAKE_COMMAND} -E copy
             ${jpeg_BUILD}/jconfig.vc ${jpeg_BUILD}/jconfig.h
         DEPENDEES patch
         DEPENDERS build
diff --git a/tensorflow/contrib/cmake/external/jsoncpp.cmake b/tensorflow/contrib/cmake/external/jsoncpp.cmake
index 75d5d72703..43d6e0456c 100644
--- a/tensorflow/contrib/cmake/external/jsoncpp.cmake
+++ b/tensorflow/contrib/cmake/external/jsoncpp.cmake
@@ -4,7 +4,7 @@ set(jsoncpp_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/jsoncpp/src/jsoncpp)
 #set(jsoncpp_EXTRA_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/jsoncpp/src)
 set(jsoncpp_URL https://github.com/open-source-parsers/jsoncpp.git)
 set(jsoncpp_TAG 4356d9b)
-set(jsoncpp_BUILD ${CMAKE_BINARY_DIR}/jsoncpp/src/jsoncpp/src/lib_json)
+set(jsoncpp_BUILD ${CMAKE_CURRENT_BINARY_DIR}/jsoncpp/src/jsoncpp/src/lib_json)
 set(jsoncpp_LIBRARIES ${jsoncpp_BUILD}/obj/so/libjsoncpp.so)
 set(jsoncpp_INCLUDES ${jsoncpp_BUILD})
 
diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake
index 2155c30185..5ee6987175 100644
--- a/tensorflow/contrib/cmake/external/protobuf.cmake
+++ b/tensorflow/contrib/cmake/external/protobuf.cmake
@@ -20,7 +20,7 @@ ExternalProject_Add(protobuf
     GIT_TAG ${PROTOBUF_TAG}
     DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
     BUILD_IN_SOURCE 1
-    SOURCE_DIR ${CMAKE_BINARY_DIR}/protobuf/src/protobuf
+    SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf
     CONFIGURE_COMMAND ${CMAKE_COMMAND} cmake/
         -Dprotobuf_BUILD_TESTS=OFF
         -DCMAKE_POSITION_INDEPENDENT_CODE=ON
diff --git a/tensorflow/contrib/cmake/setup.py b/tensorflow/contrib/cmake/setup.py
index 78cb546f85..b036016f5e 100644
--- a/tensorflow/contrib/cmake/setup.py
+++ b/tensorflow/contrib/cmake/setup.py
@@ -26,7 +26,7 @@ from setuptools import find_packages, setup, Command
 from setuptools.command.install import install as InstallCommandBase
 from setuptools.dist import Distribution
 
-_VERSION = '0.11.0rc2-cmake-experimental'
+_VERSION = '0.11.0-cmake-experimental'
 
 REQUIRED_PACKAGES = [
     'numpy >= 1.11.0',
diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake
index d4f44803a3..99aa347f6d 100644
--- a/tensorflow/contrib/cmake/tf_core_kernels.cmake
+++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake
@@ -89,8 +89,6 @@ if(WIN32)
       "${tensorflow_source_dir}/tensorflow/core/kernels/meta_support.*"
       "${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.h"
       "${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.cc"
-      "${tensorflow_source_dir}/tensorflow/core/kernels/svd*.cc"
-      "${tensorflow_source_dir}/tensorflow/core/kernels/avgpooling_op.*"
   )
   list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_windows_exclude_srcs})
 endif(WIN32)
@@ -100,14 +98,6 @@ file(GLOB_RECURSE tf_core_gpu_kernels_srcs
    "${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/*.cu.cc"
 )
 
-if(WIN32)
-  file(GLOB_RECURSE tf_core_gpu_kernels_exclude_srcs
-      # not working on windows yet
-      "${tensorflow_source_dir}/tensorflow/core/kernels/avgpooling_op_gpu.cu.cc"
-  )
-  list(REMOVE_ITEM tf_core_gpu_kernels_srcs ${tf_core_gpu_kernels_exclude_srcs})
-endif(WIN32)
-
 add_library(tf_core_kernels OBJECT ${tf_core_kernels_srcs})
 add_dependencies(tf_core_kernels tf_core_cpu)
 
diff --git a/tensorflow/contrib/cmake/tf_core_ops.cmake b/tensorflow/contrib/cmake/tf_core_ops.cmake
index a25b37c754..5523023cb7 100644
--- a/tensorflow/contrib/cmake/tf_core_ops.cmake
+++ b/tensorflow/contrib/cmake/tf_core_ops.cmake
@@ -37,6 +37,17 @@ foreach(tf_op_lib_name ${tf_op_lib_names})
     add_dependencies(tf_${tf_op_lib_name} tf_core_framework)
 endforeach()
 
+function(GENERATE_CONTRIB_OP_LIBRARY op_lib_name cc_srcs)
+    add_library(tf_contrib_${op_lib_name}_ops OBJECT ${cc_srcs})
+    add_dependencies(tf_contrib_${op_lib_name}_ops tf_core_framework)
+endfunction()
+
+GENERATE_CONTRIB_OP_LIBRARY(cudnn_rnn "${tensorflow_source_dir}/tensorflow/contrib/cudnn_rnn/ops/cudnn_rnn_ops.cc")
+GENERATE_CONTRIB_OP_LIBRARY(factorization_clustering "${tensorflow_source_dir}/tensorflow/contrib/factorization/ops/clustering_ops.cc")
+GENERATE_CONTRIB_OP_LIBRARY(factorization_factorization "${tensorflow_source_dir}/tensorflow/contrib/factorization/ops/factorization_ops.cc")
+GENERATE_CONTRIB_OP_LIBRARY(framework_variable "${tensorflow_source_dir}/tensorflow/contrib/framework/ops/variable_ops.cc")
+
+
 ########################################################
 # tf_user_ops library
 ########################################################
diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake
index 9a9aed4375..072d01200e 100644
--- a/tensorflow/contrib/cmake/tf_python.cmake
+++ b/tensorflow/contrib/cmake/tf_python.cmake
@@ -48,24 +48,6 @@ endif(NOT NUMPY_INCLUDE_DIR)
 
 # TODO(mrry): Configure this to build in a directory other than tf_python/
 
-# tf_python_srcs contains all static .py files
-file(GLOB_RECURSE tf_python_srcs RELATIVE ${tensorflow_source_dir}
-    "${tensorflow_source_dir}/tensorflow/python/*.py"
-)
-list(APPEND tf_python_srcs "tensorflow/__init__.py")
-
-# tf_python_copy_scripts_to_destination copies all Python files
-# (including static source and generated protobuf wrappers, but *not*
-# generated TensorFlow op wrappers) into tf_python/.
-add_custom_target(tf_python_copy_scripts_to_destination)
-
-# Copy static files to tf_python/.
-foreach(script ${tf_python_srcs})
-  get_filename_component(REL_DIR ${script} DIRECTORY)
-    add_custom_command(TARGET tf_python_copy_scripts_to_destination PRE_BUILD
-    COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/${script} ${CMAKE_CURRENT_BINARY_DIR}/tf_python/${script})
-endforeach()
-
 # Generates the Python protobuf wrappers.
 # ROOT_DIR must be absolute; subsequent arguments are interpreted as
 # paths of .proto files, and must be relative to ROOT_DIR.
@@ -129,6 +111,8 @@ endfunction()
 file(GLOB_RECURSE tf_protos_python_srcs RELATIVE ${tensorflow_source_dir}
     "${tensorflow_source_dir}/tensorflow/core/*.proto"
     "${tensorflow_source_dir}/tensorflow/python/*.proto"
+    "${tensorflow_source_dir}/tensorflow/contrib/session_bundle/*.proto"
+    "${tensorflow_source_dir}/tensorflow/contrib/tensorboard/*.proto"
 )
 RELATIVE_PROTOBUF_GENERATE_PYTHON(
     ${tensorflow_source_dir} PYTHON_PROTO_GENFILES ${tf_protos_python_srcs}
@@ -140,18 +124,36 @@ RELATIVE_PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS
 
 add_library(tf_python_protos_cc ${PROTO_SRCS} ${PROTO_HDRS})
 
+
 # tf_python_touchup_modules adds empty __init__.py files to all
 # directories containing Python code, so that Python will recognize
 # them as modules.
-add_custom_target(tf_python_touchup_modules
-  DEPENDS tf_python_copy_scripts_to_destination
-)
+add_custom_target(tf_python_touchup_modules)
 
+# tf_python_copy_scripts_to_destination copies all Python files
+# (including static source and generated protobuf wrappers, but *not*
+# generated TensorFlow op wrappers) into tf_python/.
+add_custom_target(tf_python_copy_scripts_to_destination DEPENDS tf_python_touchup_modules)
+
+
+# tf_python_srcs contains all static .py files
 function(add_python_module MODULE_NAME)
+    set(options DONTCOPY)
+    cmake_parse_arguments(ADD_PYTHON_MODULE "${options}" "" "" ${ARGN})
     add_custom_command(TARGET tf_python_touchup_modules PRE_BUILD
         COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_CURRENT_BINARY_DIR}/tf_python/${MODULE_NAME}") 
     add_custom_command(TARGET tf_python_touchup_modules PRE_BUILD
         COMMAND ${CMAKE_COMMAND} -E touch "${CMAKE_CURRENT_BINARY_DIR}/tf_python/${MODULE_NAME}/__init__.py")
+    file(GLOB module_python_srcs RELATIVE ${tensorflow_source_dir}
+        "${tensorflow_source_dir}/${MODULE_NAME}/*.py"
+    )
+    if(NOT ${ADD_PYTHON_MODULE_DONTCOPY})
+        foreach(script ${module_python_srcs})
+            get_filename_component(REL_DIR ${script} DIRECTORY)
+            add_custom_command(TARGET tf_python_copy_scripts_to_destination PRE_BUILD
+              COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/${script} ${CMAKE_CURRENT_BINARY_DIR}/tf_python/${script})
+        endforeach()
+    endif()
 endfunction()
 
 add_python_module("tensorflow")
@@ -164,33 +166,205 @@ add_python_module("tensorflow/core/protobuf")
 add_python_module("tensorflow/core/util")
 add_python_module("tensorflow/python")
 add_python_module("tensorflow/python/client")
+add_python_module("tensorflow/python/debug")
+add_python_module("tensorflow/python/debug/cli")
+add_python_module("tensorflow/python/debug/examples")
+add_python_module("tensorflow/python/debug/wrappers")
 add_python_module("tensorflow/python/framework")
-add_python_module("tensorflow/python/ops")
 add_python_module("tensorflow/python/kernel_tests")
 add_python_module("tensorflow/python/lib")
 add_python_module("tensorflow/python/lib/core")
-add_python_module("tensorflow/python/lib/core/io")
+add_python_module("tensorflow/python/lib/io")
+add_python_module("tensorflow/python/ops")
 add_python_module("tensorflow/python/platform")
 add_python_module("tensorflow/python/platform/default")
 add_python_module("tensorflow/python/platform/summary")
-add_python_module("tensorflow/python/platform/summary/impl")
+add_python_module("tensorflow/python/summary")
+add_python_module("tensorflow/python/summary/impl")
+add_python_module("tensorflow/python/summary/writer")
 add_python_module("tensorflow/python/tools")
 add_python_module("tensorflow/python/training")
+add_python_module("tensorflow/python/user_ops")
 add_python_module("tensorflow/python/util")
 add_python_module("tensorflow/python/util/protobuf")
-add_python_module("tensorflow/contrib")
+
+add_python_module("tensorflow/contrib/")
+add_python_module("tensorflow/contrib/android")
+add_python_module("tensorflow/contrib/android/java")
+add_python_module("tensorflow/contrib/android/java/org")
+add_python_module("tensorflow/contrib/android/java/org/tensorflow")
+add_python_module("tensorflow/contrib/android/java/org/tensorflow/contrib")
+add_python_module("tensorflow/contrib/android/java/org/tensorflow/contrib/android")
+add_python_module("tensorflow/contrib/android/jni")
 add_python_module("tensorflow/contrib/bayesflow")
+add_python_module("tensorflow/contrib/bayesflow/examples")
+add_python_module("tensorflow/contrib/bayesflow/examples/reinforce_simple")
 add_python_module("tensorflow/contrib/bayesflow/python")
+add_python_module("tensorflow/contrib/bayesflow/python/kernel_tests")
 add_python_module("tensorflow/contrib/bayesflow/python/ops")
-add_python_module("tensorflow/contrib/bayesflow/python/ops/bernoulli")
+add_python_module("tensorflow/contrib/copy_graph")
+add_python_module("tensorflow/contrib/copy_graph/python")
+add_python_module("tensorflow/contrib/copy_graph/python/util")
+add_python_module("tensorflow/contrib/crf")
+add_python_module("tensorflow/contrib/crf/python")
+add_python_module("tensorflow/contrib/crf/python/kernel_tests")
+add_python_module("tensorflow/contrib/crf/python/ops")
+add_python_module("tensorflow/contrib/cudnn_rnn")
+add_python_module("tensorflow/contrib/cudnn_rnn/kernels")
+add_python_module("tensorflow/contrib/cudnn_rnn/ops")
+add_python_module("tensorflow/contrib/cudnn_rnn/python")
+add_python_module("tensorflow/contrib/cudnn_rnn/python/kernel_tests")
+add_python_module("tensorflow/contrib/cudnn_rnn/python/ops")
+add_python_module("tensorflow/contrib/distributions")
+add_python_module("tensorflow/contrib/distributions/python")
+add_python_module("tensorflow/contrib/distributions/python/kernel_tests")
+add_python_module("tensorflow/contrib/distributions/python/ops")
+add_python_module("tensorflow/contrib/factorization")
+add_python_module("tensorflow/contrib/factorization/examples")
+add_python_module("tensorflow/contrib/factorization/kernels")
+add_python_module("tensorflow/contrib/factorization/ops")
+add_python_module("tensorflow/contrib/factorization/python")
+add_python_module("tensorflow/contrib/factorization/python/kernel_tests")
+add_python_module("tensorflow/contrib/factorization/python/ops")
+add_python_module("tensorflow/contrib/ffmpeg")
+add_python_module("tensorflow/contrib/ffmpeg/default")
+add_python_module("tensorflow/contrib/ffmpeg/testdata")
 add_python_module("tensorflow/contrib/framework")
+add_python_module("tensorflow/contrib/framework/kernels")
+add_python_module("tensorflow/contrib/framework/ops")
 add_python_module("tensorflow/contrib/framework/python")
 add_python_module("tensorflow/contrib/framework/python/framework")
+add_python_module("tensorflow/contrib/framework/python/ops")
+add_python_module("tensorflow/contrib/graph_editor")
+add_python_module("tensorflow/contrib/graph_editor/examples")
+add_python_module("tensorflow/contrib/graph_editor/tests")
+add_python_module("tensorflow/contrib/grid_rnn")
+add_python_module("tensorflow/contrib/grid_rnn/python")
+add_python_module("tensorflow/contrib/grid_rnn/python/kernel_tests")
+add_python_module("tensorflow/contrib/grid_rnn/python/ops")
+add_python_module("tensorflow/contrib/integrate")
+add_python_module("tensorflow/contrib/integrate/python")
+add_python_module("tensorflow/contrib/integrate/python/ops")
+add_python_module("tensorflow/contrib/ios_examples")
+add_python_module("tensorflow/contrib/ios_examples/benchmark")
+add_python_module("tensorflow/contrib/ios_examples/benchmark/benchmark.xcodeproj")
+add_python_module("tensorflow/contrib/ios_examples/benchmark/data")
+add_python_module("tensorflow/contrib/ios_examples/camera")
+add_python_module("tensorflow/contrib/ios_examples/camera/camera_example.xcodeproj")
+add_python_module("tensorflow/contrib/ios_examples/camera/data")
+add_python_module("tensorflow/contrib/ios_examples/camera/en.lproj")
+add_python_module("tensorflow/contrib/ios_examples/simple")
+add_python_module("tensorflow/contrib/ios_examples/simple/data")
+add_python_module("tensorflow/contrib/ios_examples/simple/tf_ios_makefile_example.xcodeproj")
 add_python_module("tensorflow/contrib/layers")
+add_python_module("tensorflow/contrib/layers/kernels")
+add_python_module("tensorflow/contrib/layers/ops")
 add_python_module("tensorflow/contrib/layers/python")
+add_python_module("tensorflow/contrib/layers/python/kernel_tests")
 add_python_module("tensorflow/contrib/layers/python/layers")
 add_python_module("tensorflow/contrib/layers/python/ops")
-
+add_python_module("tensorflow/contrib/learn")
+add_python_module("tensorflow/contrib/learn/python")
+add_python_module("tensorflow/contrib/learn/python/learn")
+add_python_module("tensorflow/contrib/learn/python/learn/dataframe")
+add_python_module("tensorflow/contrib/learn/python/learn/dataframe/queues")
+add_python_module("tensorflow/contrib/learn/python/learn/dataframe/transforms")
+add_python_module("tensorflow/contrib/learn/python/learn/datasets")
+add_python_module("tensorflow/contrib/learn/python/learn/datasets/data")
+add_python_module("tensorflow/contrib/learn/python/learn/estimators")
+add_python_module("tensorflow/contrib/learn/python/learn/learn_io")
+add_python_module("tensorflow/contrib/learn/python/learn/ops")
+add_python_module("tensorflow/contrib/learn/python/learn/preprocessing")
+add_python_module("tensorflow/contrib/learn/python/learn/preprocessing/tests")
+add_python_module("tensorflow/contrib/learn/python/learn/tests")
+add_python_module("tensorflow/contrib/learn/python/learn/tests/dataframe")
+add_python_module("tensorflow/contrib/learn/python/learn/utils")
+add_python_module("tensorflow/contrib/linear_optimizer")
+add_python_module("tensorflow/contrib/linear_optimizer/kernels")
+add_python_module("tensorflow/contrib/linear_optimizer/kernels/g3doc")
+add_python_module("tensorflow/contrib/linear_optimizer/python")
+add_python_module("tensorflow/contrib/linear_optimizer/python/kernel_tests")
+add_python_module("tensorflow/contrib/linear_optimizer/python/ops")
+add_python_module("tensorflow/contrib/lookup")
+add_python_module("tensorflow/contrib/losses")
+add_python_module("tensorflow/contrib/losses/python")
+add_python_module("tensorflow/contrib/losses/python/losses")
+add_python_module("tensorflow/contrib/makefile")
+add_python_module("tensorflow/contrib/makefile/test")
+add_python_module("tensorflow/contrib/metrics")
+add_python_module("tensorflow/contrib/metrics/kernels")
+add_python_module("tensorflow/contrib/metrics/ops")
+add_python_module("tensorflow/contrib/metrics/python")
+add_python_module("tensorflow/contrib/metrics/python/kernel_tests")
+add_python_module("tensorflow/contrib/metrics/python/metrics")
+add_python_module("tensorflow/contrib/metrics/python/ops")
+add_python_module("tensorflow/contrib/ndlstm")
+add_python_module("tensorflow/contrib/ndlstm/python")
+add_python_module("tensorflow/contrib/opt")
+add_python_module("tensorflow/contrib/opt/python")
+add_python_module("tensorflow/contrib/opt/python/training")
+add_python_module("tensorflow/contrib/pi_examples")
+add_python_module("tensorflow/contrib/pi_examples/camera")
+add_python_module("tensorflow/contrib/pi_examples/label_image")
+add_python_module("tensorflow/contrib/pi_examples/label_image/data")
+add_python_module("tensorflow/contrib/quantization")
+add_python_module("tensorflow/contrib/quantization/python")
+add_python_module("tensorflow/contrib/rnn")
+add_python_module("tensorflow/contrib/rnn/kernels")
+add_python_module("tensorflow/contrib/rnn/ops")
+add_python_module("tensorflow/contrib/rnn/python")
+add_python_module("tensorflow/contrib/rnn/python/kernel_tests")
+add_python_module("tensorflow/contrib/rnn/python/ops")
+add_python_module("tensorflow/contrib/seq2seq")
+add_python_module("tensorflow/contrib/seq2seq/python")
+add_python_module("tensorflow/contrib/seq2seq/python/kernel_tests")
+add_python_module("tensorflow/contrib/seq2seq/python/ops")
+add_python_module("tensorflow/contrib/session_bundle")
+add_python_module("tensorflow/contrib/session_bundle/example")
+add_python_module("tensorflow/contrib/session_bundle/testdata")
+add_python_module("tensorflow/contrib/session_bundle/testdata/saved_model_half_plus_two")
+add_python_module("tensorflow/contrib/session_bundle/testdata/saved_model_half_plus_two/variables")
+add_python_module("tensorflow/contrib/slim")
+add_python_module("tensorflow/contrib/slim/python")
+add_python_module("tensorflow/contrib/slim/python/slim")
+add_python_module("tensorflow/contrib/slim/python/slim/data")
+add_python_module("tensorflow/contrib/slim/python/slim/nets")
+add_python_module("tensorflow/contrib/specs")
+add_python_module("tensorflow/contrib/specs/python")
+add_python_module("tensorflow/contrib/tensorboard")
+add_python_module("tensorflow/contrib/tensorboard/plugins")
+add_python_module("tensorflow/contrib/tensorboard/plugins/projector")
+add_python_module("tensorflow/contrib/tensor_forest")
+add_python_module("tensorflow/contrib/tensor_forest/client")
+add_python_module("tensorflow/contrib/tensor_forest/core")
+add_python_module("tensorflow/contrib/tensor_forest/core/ops")
+add_python_module("tensorflow/contrib/tensor_forest/data")
+add_python_module("tensorflow/contrib/tensor_forest/hybrid")
+add_python_module("tensorflow/contrib/tensor_forest/hybrid/core")
+add_python_module("tensorflow/contrib/tensor_forest/hybrid/core/ops")
+add_python_module("tensorflow/contrib/tensor_forest/hybrid/python")
+add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/kernel_tests")
+add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/layers")
+add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/models")
+add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/ops")
+add_python_module("tensorflow/contrib/tensor_forest/python")
+add_python_module("tensorflow/contrib/tensor_forest/python/kernel_tests")
+add_python_module("tensorflow/contrib/tensor_forest/python/ops")
+add_python_module("tensorflow/contrib/tensorboard")
+add_python_module("tensorflow/contrib/tensorboard")
+add_python_module("tensorflow/contrib/tensorboard/plugins")
+add_python_module("tensorflow/contrib/tensorboard/plugins/projector")
+add_python_module("tensorflow/contrib/testing")
+add_python_module("tensorflow/contrib/testing/python")
+add_python_module("tensorflow/contrib/testing/python/framework")
+add_python_module("tensorflow/contrib/tfprof" DONTCOPY)  # SWIG wrapper not implemented.
+#add_python_module("tensorflow/contrib/tfprof/python")
+#add_python_module("tensorflow/contrib/tfprof/python/tools")
+#add_python_module("tensorflow/contrib/tfprof/python/tools/tfprof")
+add_python_module("tensorflow/contrib/training")
+add_python_module("tensorflow/contrib/training/python")
+add_python_module("tensorflow/contrib/training/python/training")
+add_python_module("tensorflow/contrib/util")
 
 
 ########################################################
@@ -280,6 +454,15 @@ GENERATE_PYTHON_OP_LIB("user_ops")
 GENERATE_PYTHON_OP_LIB("training_ops"
   DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/training/gen_training_ops.py)
 
+GENERATE_PYTHON_OP_LIB("contrib_cudnn_rnn_ops"
+  DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/cudnn_rnn/ops/gen_cudnn_rnn_ops.py)
+GENERATE_PYTHON_OP_LIB("contrib_factorization_clustering_ops"
+  DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/factorization/python/ops/gen_clustering_ops.py)
+GENERATE_PYTHON_OP_LIB("contrib_factorization_factorization_ops"
+  DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/factorization/python/ops/gen_factorization_ops.py)
+GENERATE_PYTHON_OP_LIB("contrib_framework_variable_ops"
+  DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/framework/python/ops/gen_variable_ops.py)
+
 add_custom_target(tf_python_ops SOURCES ${tf_python_ops_generated_files} ${PYTHON_PROTO_GENFILES})
 add_dependencies(tf_python_ops tf_python_op_gen_main)
 
diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake
index 658e4c343c..3a9fd639e6 100644
--- a/tensorflow/contrib/cmake/tf_tests.cmake
+++ b/tensorflow/contrib/cmake/tf_tests.cmake
@@ -149,12 +149,8 @@ if (tensorflow_BUILD_PYTHON_TESTS)
       # issues related to windows fs
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/io_ops_test.py"
       # missing kernel      
-      "${tensorflow_source_dir}/tensorflow/python/kernel_tests/pooling_ops_test.py"
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/conv_ops_test.py"
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/depthwise_conv_op_test.py"
-      "${tensorflow_source_dir}/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py"
-      "${tensorflow_source_dir}/tensorflow/python/kernel_tests/pool_test.py"
-      "${tensorflow_source_dir}/tensorflow/python/kernel_tests/svd_op_test.py"
       # cuda launch failed
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/diag_op_test.py"
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/trace_op_test.py"
diff --git a/tensorflow/contrib/factorization/kernels/wals_solver_ops.cc b/tensorflow/contrib/factorization/kernels/wals_solver_ops.cc
index 4f35337c0c..92606225d7 100644
--- a/tensorflow/contrib/factorization/kernels/wals_solver_ops.cc
+++ b/tensorflow/contrib/factorization/kernels/wals_solver_ops.cc
@@ -257,7 +257,7 @@ class WALSComputePartialLhsAndRhsOp : public OpKernel {
       lhs_mat = lhs_symm;
       counter.DecrementCount();
     };
-    for (int i = 1; i < shards.size(); ++i) {
+    for (size_t i = 1; i < shards.size(); ++i) {
       worker_threads.workers->Schedule(std::bind(work, shards[i]));
     }
     // Inline execute the 1st shard.
diff --git a/tensorflow/contrib/makefile/proto_text_cc_files.txt b/tensorflow/contrib/makefile/proto_text_cc_files.txt
index d4adcec550..ccc2aaa07f 100644
--- a/tensorflow/contrib/makefile/proto_text_cc_files.txt
+++ b/tensorflow/contrib/makefile/proto_text_cc_files.txt
@@ -11,6 +11,7 @@ tensorflow/core/platform/posix/env.cc
 tensorflow/core/platform/posix/load_library.cc
 tensorflow/core/platform/file_system.cc
 tensorflow/core/platform/env.cc
+tensorflow/core/platform/setround.cc
 tensorflow/core/platform/denormal.cc
 tensorflow/core/platform/default/tracing.cc
 tensorflow/core/platform/default/logging.cc
diff --git a/tensorflow/contrib/metrics/kernels/set_kernels.cc b/tensorflow/contrib/metrics/kernels/set_kernels.cc
index 2d2f496da7..61fe250206 100644
--- a/tensorflow/contrib/metrics/kernels/set_kernels.cc
+++ b/tensorflow/contrib/metrics/kernels/set_kernels.cc
@@ -611,7 +611,7 @@ void SetOperationOp<T>::ComputeSparseToSparse(OpKernelContext* ctx) const {
 
     int64 compare_groups;
     CompareGroups(ctx, set1_group_indices, set2_group_indices, &compare_groups);
-    const std::vector<int64>* group_indices;
+    const std::vector<int64>* group_indices = nullptr;
 
     // Get values from set1, if applicable.
     set1_group_set.clear();
diff --git a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py
index b80bd7248b..b952040c30 100644
--- a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py
+++ b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py
@@ -294,10 +294,7 @@ class Image(ItemHandler):
     image_buffer = keys_to_tensors[self._image_key]
     image_format = keys_to_tensors[self._format_key]
 
-    image = self._decode(image_buffer, image_format)
-    if self._shape is not None:
-      image = array_ops.reshape(image, self._shape)
-    return image
+    return self._decode(image_buffer, image_format)
 
   def _decode(self, image_buffer, image_format):
     """Decodes the image buffer.
@@ -316,12 +313,23 @@ class Image(ItemHandler):
     def decode_jpg():
       return image_ops.decode_jpeg(image_buffer, self._channels)
 
-    image = control_flow_ops.case({
+    # For RGBA images JPEG is not a valid decoder option.
+    if self._channels > 3:
+      pred_fn_pairs = {
+        math_ops.logical_or(math_ops.equal(image_format, 'raw'),
+                            math_ops.equal(image_format, 'RAW')): decode_raw,
+      }
+      default_decoder = decode_png
+    else:
+      pred_fn_pairs = {
         math_ops.logical_or(math_ops.equal(image_format, 'png'),
                             math_ops.equal(image_format, 'PNG')): decode_png,
         math_ops.logical_or(math_ops.equal(image_format, 'raw'),
                             math_ops.equal(image_format, 'RAW')): decode_raw,
-    }, default=decode_jpg, exclusive=True)
+      }
+      default_decoder = decode_jpg
+
+    image = control_flow_ops.case(pred_fn_pairs, default=default_decoder, exclusive=True)
 
     image.set_shape([None, None, self._channels])
     if self._shape is not None:
diff --git a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder_test.py b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder_test.py
index cd75db8967..f572f9c3e9 100644
--- a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder_test.py
+++ b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder_test.py
@@ -168,7 +168,7 @@ class TFExampleDecoderTest(tf.test.TestCase):
       self.assertEqual(tf_decoded_image.get_shape().ndims, 3)
 
   def testDecodeExampleWithPngEncoding(self):
-    test_image_channels = [1, 3]
+    test_image_channels = [1, 3, 4]
     for channels in test_image_channels:
       image_shape = (2, 3, channels)
       image, serialized_example = self.GenerateImage(
@@ -183,7 +183,7 @@ class TFExampleDecoderTest(tf.test.TestCase):
       self.assertAllClose(image, decoded_image, atol=0)
 
   def testDecodeExampleWithPNGEncoding(self):
-    test_image_channels = [1, 3]
+    test_image_channels = [1, 3, 4]
     for channels in test_image_channels:
       image_shape = (2, 3, channels)
       image, serialized_example = self.GenerateImage(
diff --git a/tensorflow/contrib/tensor_forest/core/ops/tree_utils.cc b/tensorflow/contrib/tensor_forest/core/ops/tree_utils.cc
index e93267c1e3..544336b1ba 100644
--- a/tensorflow/contrib/tensor_forest/core/ops/tree_utils.cc
+++ b/tensorflow/contrib/tensor_forest/core/ops/tree_utils.cc
@@ -395,7 +395,7 @@ double getDistanceFromLambda3(double lambda3, const std::vector<float>& mu1,
   //   x = (lambda_1 1 + 2 mu1) / (2 - 2 lambda_3)
   //   y = (lambda_2 1 + 2 mu2) / (2 + 2 lambda_3)
   double dist = 0.0;
-  for (int i = 0; i < mu1.size(); i++) {
+  for (size_t i = 0; i < mu1.size(); i++) {
     double diff = (lambda1 + 2.0 * mu1[i]) / (2.0 - 2.0 * lambda3) - mu1[i];
     dist += diff * diff;
     diff = (lambda2 + 2.0 * mu2[i]) / (2.0 + 2.0 * lambda3) - mu2[i];
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index ec60e853b2..29b05cecba 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1118,6 +1118,7 @@ tf_version_info_genrule()
 cc_library(
     name = "version_lib",
     srcs = ["util/version_info.cc"],
+    hdrs = ["public/version.h"],
     copts = tf_copts(),
 )
 
@@ -1129,7 +1130,6 @@ tf_cuda_library(
             "example/**/*.cc",
             "framework/**/*.h",
             "framework/**/*.cc",
-            "public/version.h",
             "util/**/*.h",
             "util/**/*.cc",
         ],
@@ -1142,6 +1142,7 @@ tf_cuda_library(
             "framework/fake_input.*",
             "util/memmapped_file_system.*",
             "util/memmapped_file_system_writer.*",
+            "util/version_info.cc",
         ],
     ) + select({
         "//tensorflow:windows": [],
@@ -1394,11 +1395,13 @@ tf_cuda_library(
 cc_library(
     name = "sycl_runtime",
     srcs = if_not_windows([
+        "common_runtime/sycl/sycl_allocator.cc",
         "common_runtime/sycl/sycl_device.cc",
         "common_runtime/sycl/sycl_device_context.cc",
         "common_runtime/sycl/sycl_device_factory.cc",
     ]),
     hdrs = if_not_windows([
+        "common_runtime/sycl/sycl_allocator.h",
         "common_runtime/sycl/sycl_device.h",
         "common_runtime/sycl/sycl_device_context.h",
     ]),
diff --git a/tensorflow/core/common_runtime/sycl/sycl_allocator.cc b/tensorflow/core/common_runtime/sycl/sycl_allocator.cc
new file mode 100644
index 0000000000..175b784825
--- /dev/null
+++ b/tensorflow/core/common_runtime/sycl/sycl_allocator.cc
@@ -0,0 +1,35 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifdef TENSORFLOW_USE_SYCL
+
+#include "tensorflow/core/common_runtime/sycl/sycl_allocator.h"
+
+namespace tensorflow {
+
+SYCLAllocator::~SYCLAllocator() { }
+
+string SYCLAllocator::Name() { return "device:SYCL"; }
+
+void *SYCLAllocator::AllocateRaw(size_t alignment, size_t num_bytes) {
+  auto p = device_->allocate(num_bytes);
+  return p;
+}
+
+void SYCLAllocator::DeallocateRaw(void *ptr) { device_->deallocate(ptr); }
+
+} // namespace tensorflow
+
+#endif // TENSORFLOW_USE_SYCL
diff --git a/tensorflow/core/common_runtime/sycl/sycl_allocator.h b/tensorflow/core/common_runtime/sycl/sycl_allocator.h
new file mode 100644
index 0000000000..887c727f6e
--- /dev/null
+++ b/tensorflow/core/common_runtime/sycl/sycl_allocator.h
@@ -0,0 +1,45 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if !TENSORFLOW_USE_SYCL
+#error This file must only be included when building TensorFlow with SYCL support
+#endif
+
+#ifndef TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_ALLOCATOR_H_
+#define TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_ALLOCATOR_H_
+
+#include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/platform/types.h"
+#define EIGEN_USE_SYCL
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
+namespace tensorflow {
+
+class SYCLAllocator : public Allocator {
+public:
+  SYCLAllocator(Eigen::SyclDevice* device) : device_(device) {}
+  virtual ~SYCLAllocator() override;
+  string Name() override;
+  void *AllocateRaw(size_t alignment, size_t num_bytes) override;
+  void DeallocateRaw(void *ptr) override;
+
+private:
+  Eigen::SyclDevice *device_;  // not owned
+  TF_DISALLOW_COPY_AND_ASSIGN(SYCLAllocator);
+};
+
+} // namespace tensorflow
+
+#endif // TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_ALLOCATOR_H_
diff --git a/tensorflow/core/common_runtime/sycl/sycl_device.cc b/tensorflow/core/common_runtime/sycl/sycl_device.cc
index dc4e8db7ba..10a037c02d 100644
--- a/tensorflow/core/common_runtime/sycl/sycl_device.cc
+++ b/tensorflow/core/common_runtime/sycl/sycl_device.cc
@@ -23,25 +23,13 @@ limitations under the License.
 
 namespace tensorflow {
 
-cl::sycl::gpu_selector s;
-cl::sycl::queue q(s);
-
-SYCLDevice::SYCLDevice(const SessionOptions& options, const string& name,
-                       Bytes memory_limit, const DeviceLocality& locality,
-                       const string& physical_device_desc, Allocator* allocator)
-    : LocalDevice(options,
-                  Device::BuildDeviceAttributes(name, DEVICE_SYCL, memory_limit,
-                                                locality, physical_device_desc),
-                  allocator),
-      allocator_(allocator),
-      device_context_(new SYCLDeviceContext()),
-      device_(q) {
-  set_eigen_sycl_device(&device_);
+SYCLDevice::~SYCLDevice() {
+  device_context_->Unref();
+  delete sycl_allocator_;
+  delete sycl_device_;
 }
 
-SYCLDevice::~SYCLDevice() { device_context_->Unref(); }
-
-void SYCLDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) {
+void SYCLDevice::Compute(OpKernel *op_kernel, OpKernelContext *context) {
   assert(context);
   if (port::Tracing::IsActive()) {
     // TODO(pbar) We really need a useful identifier of the graph node.
@@ -52,28 +40,45 @@ void SYCLDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) {
   op_kernel->Compute(context);
 }
 
-Allocator* SYCLDevice::GetAllocator(AllocatorAttributes attr) {
-  return allocator_;
+Allocator *SYCLDevice::GetAllocator(AllocatorAttributes attr) {
+  if (attr.on_host())
+    return cpu_allocator_;
+  else
+    return sycl_allocator_;
 }
 
-Status SYCLDevice::MakeTensorFromProto(const TensorProto& tensor_proto,
+Status SYCLDevice::MakeTensorFromProto(const TensorProto &tensor_proto,
                                        const AllocatorAttributes alloc_attrs,
-                                       Tensor* tensor) {
+                                       Tensor *tensor) {
+  AllocatorAttributes attr;
+  attr.set_on_host(true);
+  attr.set_gpu_compatible(true);
+  Allocator *host_alloc = GetAllocator(attr);
   Tensor parsed(tensor_proto.dtype());
-  if (!parsed.FromProto(cpu_allocator(), tensor_proto)) {
+  if (!parsed.FromProto(host_alloc, tensor_proto)) {
     return errors::InvalidArgument("Cannot parse tensor from proto: ",
-                                   ProtoDebugString(tensor_proto));
+                                   tensor_proto.DebugString());
   }
-  *tensor = std::move(parsed);
-  return Status::OK();
+  Status status;
+  if (alloc_attrs.on_host()) {
+    *tensor = parsed;
+  } else {
+    Tensor copy(GetAllocator(alloc_attrs), parsed.dtype(), parsed.shape());
+    device_context_->CopyCPUTensorToDevice(&parsed, this, &copy,
+                                           [&status](const Status &s) {
+					       status = s;
+					   });
+    *tensor = copy;
+  }
+  return status;
 }
 
-Status SYCLDevice::FillContextMap(const Graph* graph,
-                                  DeviceContextMap* device_context_map) {
+Status SYCLDevice::FillContextMap(const Graph *graph,
+                                  DeviceContextMap *device_context_map) {
   // Fill in the context map.  It is OK for this map to contain
   // duplicate DeviceContexts so long as we increment the refcount.
   device_context_map->resize(graph->num_node_ids());
-  for (Node* n : graph->nodes()) {
+  for (Node *n : graph->nodes()) {
     device_context_->Ref();
     (*device_context_map)[n->id()] = device_context_;
   }
@@ -81,6 +86,6 @@ Status SYCLDevice::FillContextMap(const Graph* graph,
   return Status::OK();
 }
 
-}  // namespace tensorflow
+} // namespace tensorflow
 
-#endif  // TENSORFLOW_USE_SYCL
+#endif // TENSORFLOW_USE_SYCL
diff --git a/tensorflow/core/common_runtime/sycl/sycl_device.h b/tensorflow/core/common_runtime/sycl/sycl_device.h
index eaa9429b16..d3b3db2a71 100644
--- a/tensorflow/core/common_runtime/sycl/sycl_device.h
+++ b/tensorflow/core/common_runtime/sycl/sycl_device.h
@@ -24,26 +24,40 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/device_factory.h"
 #include "tensorflow/core/common_runtime/local_device.h"
+#include "tensorflow/core/common_runtime/sycl/sycl_allocator.h"
 #include "tensorflow/core/common_runtime/sycl/sycl_device_context.h"
 #include "tensorflow/core/public/session_options.h"
 
 namespace tensorflow {
 
+
 class SYCLDevice : public LocalDevice {
- public:
-  SYCLDevice(const SessionOptions& options, const string& name,
-             Bytes memory_limit, const DeviceLocality& locality,
-             const string& physical_device_desc, Allocator* allocator);
+public:
+  template <typename SYCLSelector>
+  SYCLDevice(const SessionOptions &options, const string &name,
+             Bytes memory_limit, const DeviceLocality &locality,
+             const string &physical_device_desc, SYCLSelector sycl_selector,
+             Allocator *cpu_allocator)
+      : LocalDevice(options, Device::BuildDeviceAttributes(
+                    name, DEVICE_SYCL, memory_limit, locality,
+                    physical_device_desc), nullptr),
+        cpu_allocator_(cpu_allocator),
+        sycl_device_(new Eigen::SyclDevice(sycl_selector)),
+        sycl_allocator_(new SYCLAllocator(sycl_device_)),
+        device_context_(new SYCLDeviceContext()) {
+    set_eigen_sycl_device(sycl_device_);
+  }
+
   ~SYCLDevice() override;
 
-  void Compute(OpKernel* op_kernel, OpKernelContext* context) override;
-  Allocator* GetAllocator(AllocatorAttributes attr) override;
-  Status MakeTensorFromProto(const TensorProto& tensor_proto,
+  void Compute(OpKernel *op_kernel, OpKernelContext *context) override;
+  Allocator *GetAllocator(AllocatorAttributes attr) override;
+  Status MakeTensorFromProto(const TensorProto &tensor_proto,
                              const AllocatorAttributes alloc_attrs,
-                             Tensor* tensor) override;
+                             Tensor *tensor) override;
 
-  Status FillContextMap(const Graph* graph,
-                        DeviceContextMap* device_context_map) override;
+  Status FillContextMap(const Graph *graph,
+                        DeviceContextMap *device_context_map) override;
 
   Status Sync() override { return Status::OK(); }
   static string GetShortDeviceDescription(/*int device_id,
@@ -51,12 +65,13 @@ class SYCLDevice : public LocalDevice {
     return strings::StrCat("device: 0, name SYCL, pci bus id: 0");
   }
 
- private:
-  Allocator* allocator_;  // Not owned
-  SYCLDeviceContext* device_context_;
-  Eigen::SyclDevice device_;
+private:
+  Allocator *cpu_allocator_;         // owned
+  Eigen::SyclDevice* sycl_device_;   // owned
+  SYCLAllocator *sycl_allocator_;    // owned
+  SYCLDeviceContext *device_context_;
 };
 
-}  // namespace tensorflow
+} // namespace tensorflow
 
-#endif  // TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_
+#endif // TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_
diff --git a/tensorflow/core/common_runtime/sycl/sycl_device_context.cc b/tensorflow/core/common_runtime/sycl/sycl_device_context.cc
index 70b1a0e93b..9dd289bebd 100644
--- a/tensorflow/core/common_runtime/sycl/sycl_device_context.cc
+++ b/tensorflow/core/common_runtime/sycl/sycl_device_context.cc
@@ -13,36 +13,171 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#if TENSORFLOW_USE_SYCL
+
+#define EIGEN_USE_SYCL
+
 #include "tensorflow/core/common_runtime/sycl/sycl_device_context.h"
 #include "tensorflow/core/common_runtime/dma_helper.h"
 
+#define EIGEN_USE_SYCL
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
 namespace tensorflow {
 
-void SYCLDeviceContext::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
-                                              Device* device,
-                                              Tensor* device_tensor,
+void SYCLDeviceContext::CopyCPUTensorToDevice(const Tensor *cpu_tensor,
+                                              Device *device,
+                                              Tensor *device_tensor,
                                               StatusCallback done) const {
   const int64 total_bytes = cpu_tensor->TotalBytes();
   if (total_bytes > 0) {
-    const void* src_ptr = DMAHelper::base(cpu_tensor);
-    void* dst_ptr = DMAHelper::base(device_tensor);
-    ::memcpy(dst_ptr, src_ptr, total_bytes);
+    const void *src_ptr = DMAHelper::base(cpu_tensor);
+    void *dst_ptr = DMAHelper::base(device_tensor);
+    switch (cpu_tensor->dtype()) {
+    case DT_FLOAT:
+      device->eigen_sycl_device()->memcpyHostToDevice(
+          static_cast<float *>(dst_ptr), static_cast<const float *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_DOUBLE:
+      device->eigen_sycl_device()->memcpyHostToDevice(
+          static_cast<double *>(dst_ptr), static_cast<const double *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_INT32:
+      device->eigen_sycl_device()->memcpyHostToDevice(
+          static_cast<int32 *>(dst_ptr), static_cast<const int32 *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_INT64:
+      device->eigen_sycl_device()->memcpyHostToDevice(
+          static_cast<int64 *>(dst_ptr), static_cast<const int64 *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_HALF:
+      device->eigen_sycl_device()->memcpyHostToDevice(
+          static_cast<Eigen::half *>(dst_ptr),
+          static_cast<const Eigen::half *>(src_ptr), total_bytes);
+      break;
+    case DT_COMPLEX64:
+      device->eigen_sycl_device()->memcpyHostToDevice(
+          static_cast<std::complex<float> *>(dst_ptr),
+          static_cast<const std::complex<float> *>(src_ptr), total_bytes);
+      break;
+    case DT_COMPLEX128:
+      device->eigen_sycl_device()->memcpyHostToDevice(
+          static_cast<std::complex<double> *>(dst_ptr),
+          static_cast<const std::complex<double> *>(src_ptr), total_bytes);
+      break;
+    case DT_INT8:
+      device->eigen_sycl_device()->memcpyHostToDevice(
+          static_cast<int8 *>(dst_ptr), static_cast<const int8 *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_INT16:
+      device->eigen_sycl_device()->memcpyHostToDevice(
+          static_cast<int16 *>(dst_ptr), static_cast<const int16 *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_UINT8:
+      device->eigen_sycl_device()->memcpyHostToDevice(
+          static_cast<uint8 *>(dst_ptr), static_cast<const uint8 *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_UINT16:
+      device->eigen_sycl_device()->memcpyHostToDevice(
+          static_cast<uint16 *>(dst_ptr), static_cast<const uint16 *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_BOOL:
+      device->eigen_sycl_device()->memcpyHostToDevice(
+          static_cast<bool *>(dst_ptr), static_cast<const bool *>(src_ptr),
+          total_bytes);
+      break;
+    default:
+      assert(false && "unsupported type");
+    }
   }
   done(Status::OK());
 }
 
-void SYCLDeviceContext::CopyDeviceTensorToCPU(const Tensor* device_tensor,
+void SYCLDeviceContext::CopyDeviceTensorToCPU(const Tensor *device_tensor,
                                               StringPiece edge_name,
-                                              Device* device,
-                                              Tensor* cpu_tensor,
+                                              Device *device,
+                                              Tensor *cpu_tensor,
                                               StatusCallback done) {
   const int64 total_bytes = device_tensor->TotalBytes();
   if (total_bytes > 0) {
+    device->eigen_sycl_device()->deallocate_all();
     const void* src_ptr = DMAHelper::base(device_tensor);
     void* dst_ptr = DMAHelper::base(cpu_tensor);
-    ::memcpy(dst_ptr, src_ptr, total_bytes);
+    switch (device_tensor->dtype()) {
+    case DT_FLOAT:
+      device->eigen_sycl_device()->memcpyDeviceToHost(
+          static_cast<float *>(dst_ptr), static_cast<const float *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_DOUBLE:
+      device->eigen_sycl_device()->memcpyDeviceToHost(
+          static_cast<double *>(dst_ptr), static_cast<const double *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_INT32:
+      device->eigen_sycl_device()->memcpyDeviceToHost(
+          static_cast<int32 *>(dst_ptr), static_cast<const int32 *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_INT64:
+      device->eigen_sycl_device()->memcpyDeviceToHost(
+          static_cast<int64 *>(dst_ptr), static_cast<const int64 *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_HALF:
+      device->eigen_sycl_device()->memcpyDeviceToHost(
+          static_cast<Eigen::half *>(dst_ptr),
+          static_cast<const Eigen::half *>(src_ptr), total_bytes);
+      break;
+    case DT_COMPLEX64:
+      device->eigen_sycl_device()->memcpyDeviceToHost(
+          static_cast<std::complex<float> *>(dst_ptr),
+          static_cast<const std::complex<float> *>(src_ptr), total_bytes);
+      break;
+    case DT_COMPLEX128:
+      device->eigen_sycl_device()->memcpyDeviceToHost(
+          static_cast<std::complex<double> *>(dst_ptr),
+          static_cast<const std::complex<double> *>(src_ptr), total_bytes);
+      break;
+    case DT_INT8:
+      device->eigen_sycl_device()->memcpyDeviceToHost(
+          static_cast<int8 *>(dst_ptr), static_cast<const int8 *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_INT16:
+      device->eigen_sycl_device()->memcpyDeviceToHost(
+          static_cast<int16 *>(dst_ptr), static_cast<const int16 *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_UINT8:
+      device->eigen_sycl_device()->memcpyDeviceToHost(
+          static_cast<uint8 *>(dst_ptr), static_cast<const uint8 *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_UINT16:
+      device->eigen_sycl_device()->memcpyDeviceToHost(
+          static_cast<uint16 *>(dst_ptr), static_cast<const uint16 *>(src_ptr),
+          total_bytes);
+      break;
+    case DT_BOOL:
+      device->eigen_sycl_device()->memcpyDeviceToHost(
+          static_cast<bool *>(dst_ptr), static_cast<const bool *>(src_ptr),
+          total_bytes);
+      break;
+    default:
+      assert(false && "unsupported type");
+    }
   }
   done(Status::OK());
 }
 
 }  // namespace tensorflow
+#endif // TENSORFLOW_USE_SYCL
diff --git a/tensorflow/core/common_runtime/sycl/sycl_device_context.h b/tensorflow/core/common_runtime/sycl/sycl_device_context.h
index 67cfe47aa1..1f7ad543d9 100644
--- a/tensorflow/core/common_runtime/sycl/sycl_device_context.h
+++ b/tensorflow/core/common_runtime/sycl/sycl_device_context.h
@@ -13,6 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#if !TENSORFLOW_USE_SYCL
+#error This file must only be included when building TensorFlow with SYCL support
+#endif
+
 #ifndef TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_
 #define TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_
 
@@ -22,20 +26,20 @@ limitations under the License.
 namespace tensorflow {
 
 class SYCLDeviceContext : public DeviceContext {
- public:
+public:
   SYCLDeviceContext() {}
 
   ~SYCLDeviceContext() override {}
 
-  void CopyCPUTensorToDevice(const Tensor* cpu_tensor, Device* device,
-                             Tensor* device_tensor,
+  void CopyCPUTensorToDevice(const Tensor *cpu_tensor, Device *device,
+                             Tensor *device_tensor,
                              StatusCallback done) const override;
 
-  void CopyDeviceTensorToCPU(const Tensor* device_tensor, StringPiece edge_name,
-                             Device* device, Tensor* cpu_tensor,
+  void CopyDeviceTensorToCPU(const Tensor *device_tensor, StringPiece edge_name,
+                             Device *device, Tensor *cpu_tensor,
                              StatusCallback done) override;
 };
 
-}  // namespace tensorflow
+} // namespace tensorflow
 
-#endif  // TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_
+#endif // TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_
diff --git a/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc b/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc
index 97c4c2c236..9b8770420c 100644
--- a/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc
+++ b/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc
@@ -20,9 +20,9 @@ limitations under the License.
 namespace tensorflow {
 
 class SYCLDeviceFactory : public DeviceFactory {
- public:
-  Status CreateDevices(const SessionOptions& options, const string& name_prefix,
-                       std::vector<Device*>* devices) override {
+public:
+  Status CreateDevices(const SessionOptions &options, const string &name_prefix,
+                       std::vector<Device *> *devices) override {
     int n = 1;
     auto iter = options.config.device_count().find("SYCL");
     if (iter != options.config.device_count().end()) {
@@ -30,9 +30,10 @@ class SYCLDeviceFactory : public DeviceFactory {
     }
     for (int i = 0; i < n; i++) {
       string name = strings::StrCat(name_prefix, "/device:SYCL:", i);
-      devices->push_back(new SYCLDevice(
-          options, name, Bytes(256 << 20), DeviceLocality(),
-          SYCLDevice::GetShortDeviceDescription(), cpu_allocator()));
+      devices->push_back(new SYCLDevice(options, name, Bytes(256 << 20),
+                                        DeviceLocality(),
+                                        SYCLDevice::GetShortDeviceDescription(),
+                                        cl::sycl::gpu_selector(), cpu_allocator()));
     }
     return Status::OK();
   }
@@ -41,4 +42,4 @@ class SYCLDeviceFactory : public DeviceFactory {
 REGISTER_LOCAL_DEVICE_FACTORY("SYCL", SYCLDeviceFactory);
 }
 
-#endif  // TENSORFLOW_USE_SYCL
+#endif // TENSORFLOW_USE_SYCL
diff --git a/tensorflow/core/distributed_runtime/master.cc b/tensorflow/core/distributed_runtime/master.cc
index 741282be31..de468f00b7 100644
--- a/tensorflow/core/distributed_runtime/master.cc
+++ b/tensorflow/core/distributed_runtime/master.cc
@@ -91,8 +91,8 @@ void Master::GC() {
     std::vector<string> handles;
     const int64 num_micros = static_cast<int64>(session_gc_seconds_ * 1000000);
     for (const auto& entry : sessions_) {
-      auto lat = entry.second->last_access_time_usec();
-      if (env->NowMicros() - lat > num_micros) {
+      int64 lat = entry.second->last_access_time_usec();
+      if (static_cast<int64>(env->NowMicros()) - lat > num_micros) {
         handles.push_back(entry.first);
         auto* sess = entry.second;
         SchedClosure([this, sess]() {
@@ -399,7 +399,7 @@ void Master::CleanupWorkers(const ResetRequest& reset) {
       }
       ++c;
     }
-    for (int i = 0; i < n.size(); ++i) {
+    for (size_t i = 0; i < n.size(); ++i) {
       n[i].WaitForNotification();
     }
   }
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc b/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc
index eb188a7984..31a3404a07 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc
@@ -69,7 +69,7 @@ Status ValidateHostPortPair(const string& host_port) {
 Status GrpcChannelSpec::AddHostPortsJob(const string& job_id,
                                         const std::vector<string>& host_ports) {
   std::map<int, string> host_ports_map;
-  for (int i = 0; i < host_ports.size(); ++i) {
+  for (size_t i = 0; i < host_ports.size(); ++i) {
     host_ports_map[i] = host_ports[i];
   }
   return AddHostPortsJob(job_id, host_ports_map);
diff --git a/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc b/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc
index 3b0614cc75..c4bb37fcbe 100644
--- a/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc
+++ b/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc
@@ -156,7 +156,7 @@ class RpcRecvTensorFreeList {
  public:
   RpcRecvTensorFreeList() {}
   ~RpcRecvTensorFreeList() {
-    for (int i = 0; i < objects_.size(); i++) {
+    for (size_t i = 0; i < objects_.size(); i++) {
       delete objects_[i];
     }
   }
diff --git a/tensorflow/core/distributed_runtime/tensor_coding.cc b/tensorflow/core/distributed_runtime/tensor_coding.cc
index b26970b606..55b7d5fe82 100644
--- a/tensorflow/core/distributed_runtime/tensor_coding.cc
+++ b/tensorflow/core/distributed_runtime/tensor_coding.cc
@@ -192,7 +192,7 @@ bool TensorResponse::ParseTensorSubmessage(
         TensorShape shape(tensor_meta->tensor_shape());
         Tensor t(allocator_, tensor_meta->dtype(), shape);
         StringPiece buf = t.tensor_data();
-        if (num_bytes != buf.size()) return false;
+        if (static_cast<size_t>(num_bytes) != buf.size()) return false;
         // TODO(jeff,sanjay): Figure out a way to avoid this copy if
         // the underlying ZeroCopyInputStream data is properly aligned
         // and compatible with what allocator_ wants.
diff --git a/tensorflow/core/kernels/avgpooling_op_gpu.cu.cc b/tensorflow/core/kernels/avgpooling_op_gpu.cu.cc
index a190b2168a..2be330d142 100644
--- a/tensorflow/core/kernels/avgpooling_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/avgpooling_op_gpu.cu.cc
@@ -72,7 +72,7 @@ __global__ void AvePoolBackwardNHWC(const int nthreads,
         wstart = max(wstart, 0);
         int pool_size = (hend - hstart) * (wend - wstart);
         gradient +=
-            top_diff_slice[(ph * pooled_width + pw) * channels] / pool_size;
+            top_diff_slice[(ph * pooled_width + pw) * channels] / dtype(pool_size);
       }
     }
     bottom_diff[index] = gradient;
diff --git a/tensorflow/core/kernels/bcast_ops.cc b/tensorflow/core/kernels/bcast_ops.cc
index 10354cbb56..db8842a547 100644
--- a/tensorflow/core/kernels/bcast_ops.cc
+++ b/tensorflow/core/kernels/bcast_ops.cc
@@ -90,4 +90,14 @@ REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
                             .HostMemory("r1"),
                         BCastGradArgsOp);
 
+#if TENSORFLOW_USE_SYCL
+REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
+                            .Device(DEVICE_SYCL)
+                            .TypeConstraint<int32>("T")
+                            .HostMemory("s0")
+                            .HostMemory("s1")
+                            .HostMemory("r0")
+                            .HostMemory("r1"),
+                        BCastGradArgsOp);
+#endif
 }  // end namespace tensorflow
diff --git a/tensorflow/core/kernels/constant_op.cc b/tensorflow/core/kernels/constant_op.cc
index c2d5af0316..4c8c9939bc 100644
--- a/tensorflow/core/kernels/constant_op.cc
+++ b/tensorflow/core/kernels/constant_op.cc
@@ -16,6 +16,9 @@ limitations under the License.
 // See docs in ../ops/array_ops.cc.
 
 #define EIGEN_USE_THREADS
+#if TENSORFLOW_USE_SYCL
+#define EIGEN_USE_SYCL
+#endif
 
 #include "tensorflow/core/kernels/constant_op.h"
 
diff --git a/tensorflow/core/kernels/control_flow_ops.cc b/tensorflow/core/kernels/control_flow_ops.cc
index 65413a09b2..1a8c17b1ef 100644
--- a/tensorflow/core/kernels/control_flow_ops.cc
+++ b/tensorflow/core/kernels/control_flow_ops.cc
@@ -112,6 +112,15 @@ REGISTER_GPU_HOST_REF_KERNEL(string);
 #undef REGISTER_GPU_HOST_KERNEL
 #undef REGISTER_GPU_HOST_REF_KERNEL
 
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(type)  \
+  REGISTER_KERNEL_BUILDER(          \
+      Name("Switch").Device(DEVICE_SYCL).TypeConstraint<type>("T"), SwitchOp)
+REGISTER_SYCL_KERNEL(bool);
+TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif
+
 class RefSelectOp : public OpKernel {
  public:
   explicit RefSelectOp(OpKernelConstruction* context) : OpKernel(context) {
@@ -209,6 +218,15 @@ REGISTER_GPU_REF_KERNEL(bool);
 #undef REGISTER_GPU_KERNEL
 #undef REGISTER_GPU_REF_KERNEL
 
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(type)  \
+  REGISTER_KERNEL_BUILDER(          \
+  Name("Merge").Device(DEVICE_SYCL).TypeConstraint<type>("T"), MergeOp)
+REGISTER_SYCL_KERNEL(bool);
+TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif
+
 // Special GPU kernels for int32 and string.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
 // registration requires all int32 inputs and outputs to be in host memory.
@@ -259,6 +277,15 @@ REGISTER_GPU_REF_KERNEL(bool);
 #undef REGISTER_GPU_KERNEL
 #undef REGISTER_GPU_REF_KERNEL
 
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(type)  \
+  REGISTER_KERNEL_BUILDER(          \
+      Name("Enter").Device(DEVICE_SYCL).TypeConstraint<type>("T"), EnterOp)
+REGISTER_SYCL_KERNEL(bool);
+TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif
+
 // Special GPU kernels for int32 and string.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
 // registration requires all int32 inputs and outputs to be in host memory.
@@ -310,6 +337,15 @@ REGISTER_GPU_KERNEL(bool);
 #undef REGISTER_GPU_KERNEL
 #undef REGISTER_GPU_REF_KERNEL
 
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(type)  \
+  REGISTER_KERNEL_BUILDER(          \
+  Name("Exit").Device(DEVICE_SYCL).TypeConstraint<type>("T"), ExitOp)
+REGISTER_SYCL_KERNEL(bool);
+TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif
+
 // Special GPU kernels for int32 and string.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
 // registration requires all int32 inputs and outputs to be in host memory.
@@ -380,6 +416,15 @@ REGISTER_GPU_HOST_KERNEL(string);
 
 #undef REGISTER_GPU_HOST_KERNEL
 
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(type)  \
+  REGISTER_KERNEL_BUILDER(          \
+	Name("NextIteration").Device(DEVICE_SYCL).TypeConstraint<type>("T"), NextIterationOp)
+  REGISTER_SYCL_KERNEL(bool);
+  TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif
+
 // A LoopCond op has one input and one output. The input is a boolean
 // scalar representing the taken branches of the "pivot" Switch that
 // determines loop termination. As a contract, any high-level front-end
diff --git a/tensorflow/core/kernels/cwise_op_add_1.cc b/tensorflow/core/kernels/cwise_op_add_1.cc
index 75c6118795..44c552d18e 100644
--- a/tensorflow/core/kernels/cwise_op_add_1.cc
+++ b/tensorflow/core/kernels/cwise_op_add_1.cc
@@ -18,6 +18,18 @@ limitations under the License.
 namespace tensorflow {
 REGISTER5(BinaryOp, CPU, "Add", functor::add, float, Eigen::half, double, int32,
           int64);
+          
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Add")                                 \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          BinaryOp<SYCLDevice, functor::add<TYPE>>);
+TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
+          
 #if GOOGLE_CUDA
 REGISTER3(BinaryOp, GPU, "Add", functor::add, float, Eigen::half, double);
 
diff --git a/tensorflow/core/kernels/cwise_op_div.cc b/tensorflow/core/kernels/cwise_op_div.cc
index 925c9e9916..c2b05a69b2 100644
--- a/tensorflow/core/kernels/cwise_op_div.cc
+++ b/tensorflow/core/kernels/cwise_op_div.cc
@@ -24,6 +24,16 @@ REGISTER5(BinaryOp, CPU, "TruncateDiv", functor::safe_div, uint8, uint16, int16,
           int32, int64);
 REGISTER5(BinaryOp, CPU, "RealDiv", functor::div, float, Eigen::half, double,
           complex64, complex128);
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Div")                                 \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          BinaryOp<SYCLDevice, functor::div<TYPE>>);
+REGISTER_SYCL_KERNEL(float)
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
 #if GOOGLE_CUDA
 REGISTER9(BinaryOp, GPU, "Div", functor::div, float, Eigen::half, double, uint8,
           uint16, int16, int64, complex64, complex128);
diff --git a/tensorflow/core/kernels/cwise_op_floor_div.cc b/tensorflow/core/kernels/cwise_op_floor_div.cc
index 83b2771ed2..7930d83413 100644
--- a/tensorflow/core/kernels/cwise_op_floor_div.cc
+++ b/tensorflow/core/kernels/cwise_op_floor_div.cc
@@ -18,6 +18,16 @@ limitations under the License.
 namespace tensorflow {
 REGISTER5(BinaryOp, CPU, "FloorDiv", functor::safe_floor_div, uint8, uint16,
           int16, int32, int64);
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("FloorDiv")                            \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          BinaryOp<SYCLDevice, functor::floor_div<TYPE>>);
+TF_CALL_INTEGRAL_TYPES(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
 #if GOOGLE_CUDA
 REGISTER4(BinaryOp, GPU, "FloorDiv", functor::floor_div, uint8, uint16, int16,
           int64);
diff --git a/tensorflow/core/kernels/cwise_op_gpu_rint.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_rint.cu.cc
new file mode 100644
index 0000000000..028b944d27
--- /dev/null
+++ b/tensorflow/core/kernels/cwise_op_gpu_rint.cu.cc
@@ -0,0 +1,26 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA
+
+#include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
+
+namespace tensorflow {
+namespace functor {
+DEFINE_UNARY2(rint, float, double);
+}  // namespace functor
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/cwise_op_isfinite.cc b/tensorflow/core/kernels/cwise_op_isfinite.cc
index 954b5d25bd..e38b271318 100644
--- a/tensorflow/core/kernels/cwise_op_isfinite.cc
+++ b/tensorflow/core/kernels/cwise_op_isfinite.cc
@@ -18,6 +18,16 @@ limitations under the License.
 namespace tensorflow {
 REGISTER3(UnaryOp, CPU, "IsFinite", functor::isfinite, float, Eigen::half,
           double);
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("IsFinite")                            \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          UnaryOp<SYCLDevice, functor::isfinite<TYPE>>);
+TF_CALL_REAL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
 #if GOOGLE_CUDA
 REGISTER3(UnaryOp, GPU, "IsFinite", functor::isfinite, float, Eigen::half,
           double);
diff --git a/tensorflow/core/kernels/cwise_op_isinf.cc b/tensorflow/core/kernels/cwise_op_isinf.cc
index 407dadcb69..bf056dbe0e 100644
--- a/tensorflow/core/kernels/cwise_op_isinf.cc
+++ b/tensorflow/core/kernels/cwise_op_isinf.cc
@@ -17,6 +17,16 @@ limitations under the License.
 
 namespace tensorflow {
 REGISTER3(UnaryOp, CPU, "IsInf", functor::isinf, float, Eigen::half, double);
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("IsInf")                               \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          UnaryOp<SYCLDevice, functor::isinf<TYPE>>);
+TF_CALL_REAL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
 #if GOOGLE_CUDA
 REGISTER3(UnaryOp, GPU, "IsInf", functor::isinf, float, Eigen::half, double);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_isnan.cc b/tensorflow/core/kernels/cwise_op_isnan.cc
index f150b2f3f4..d2bac23882 100644
--- a/tensorflow/core/kernels/cwise_op_isnan.cc
+++ b/tensorflow/core/kernels/cwise_op_isnan.cc
@@ -17,6 +17,16 @@ limitations under the License.
 
 namespace tensorflow {
 REGISTER3(UnaryOp, CPU, "IsNan", functor::isnan, float, Eigen::half, double);
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("IsNan")                               \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          UnaryOp<SYCLDevice, functor::isnan<TYPE>>);
+TF_CALL_REAL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
 #if GOOGLE_CUDA
 REGISTER3(UnaryOp, GPU, "IsNan", functor::isnan, float, Eigen::half, double);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_mul_1.cc b/tensorflow/core/kernels/cwise_op_mul_1.cc
index 09e9f070da..e23fe6761d 100644
--- a/tensorflow/core/kernels/cwise_op_mul_1.cc
+++ b/tensorflow/core/kernels/cwise_op_mul_1.cc
@@ -19,6 +19,17 @@ namespace tensorflow {
 
 REGISTER5(BinaryOp, CPU, "Mul", functor::mul, float, Eigen::half, double,
           uint8, int32);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Mul")                                 \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          BinaryOp<SYCLDevice, functor::mul<TYPE>>);
+REGISTER_SYCL_KERNEL(float)
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
 #if GOOGLE_CUDA
 REGISTER4(BinaryOp, GPU, "Mul", functor::mul, float, Eigen::half, double,
            uint8);
diff --git a/tensorflow/core/kernels/cwise_op_rint.cc b/tensorflow/core/kernels/cwise_op_rint.cc
new file mode 100644
index 0000000000..a741b3d718
--- /dev/null
+++ b/tensorflow/core/kernels/cwise_op_rint.cc
@@ -0,0 +1,23 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/kernels/cwise_ops_common.h"
+
+namespace tensorflow {
+REGISTER2(UnaryOp, CPU, "Rint", functor::rint, float, double);
+#if GOOGLE_CUDA
+REGISTER2(UnaryOp, GPU, "Rint", functor::rint, float, double);
+#endif
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_sub.cc b/tensorflow/core/kernels/cwise_op_sub.cc
index d000e54cfc..ed78ba37a8 100644
--- a/tensorflow/core/kernels/cwise_op_sub.cc
+++ b/tensorflow/core/kernels/cwise_op_sub.cc
@@ -24,6 +24,16 @@ REGISTER7(BinaryOp, CPU, "Sub", functor::sub, float, Eigen::half, double, int32,
 // int32 version of this op is needed, so explicitly include it.
 REGISTER(BinaryOp, CPU, "Sub", functor::sub, int32);
 #endif  // __ANDROID_TYPES_SLIM__
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Sub")                                 \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          BinaryOp<SYCLDevice, functor::sub<TYPE>>);
+TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
 #if GOOGLE_CUDA
 REGISTER6(BinaryOp, GPU, "Sub", functor::sub, float, Eigen::half, double, int64,
           complex64, complex128);
diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h
index b038d73e17..7f35e03feb 100644
--- a/tensorflow/core/kernels/cwise_ops.h
+++ b/tensorflow/core/kernels/cwise_ops.h
@@ -521,6 +521,27 @@ struct round : base<T, Eigen::internal::scalar_round_op_google<T>> {};
 template <typename T>
 struct ceil : base<T, Eigen::internal::scalar_ceil_op<T>> {};
 
+/** this should go in Eigen
+  * \brief Template functor to compute the round to int value of a scalar
+  */
+template <typename Scalar>
+struct scalar_rint_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_rint_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar
+  operator()(const Scalar& a) const {
+#if defined(__CUDACC__)
+    return ::rint(a);
+#elif defined(__ANDROID__)
+    return rint(a);
+#else
+    return std::rint(a);
+#endif
+  }
+};
+
+template <typename T>
+struct rint : base<T, scalar_rint_op<T>> {};
+
 ////////////////////////////////////////////////////////////////////////////////
 // Binary functors
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/tensorflow/core/kernels/cwise_ops_sycl_common.h b/tensorflow/core/kernels/cwise_ops_sycl_common.h
index c66ae42c2d..4c22cc4855 100644
--- a/tensorflow/core/kernels/cwise_ops_sycl_common.h
+++ b/tensorflow/core/kernels/cwise_ops_sycl_common.h
@@ -22,6 +22,8 @@ limitations under the License.
 
 #define EIGEN_USE_SYCL
 
+#include "tensorflow/core/framework/register_types.h"
+
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/kernels/cwise_ops.h"
 #include "tensorflow/core/platform/logging.h"
@@ -32,6 +34,14 @@ namespace functor {
 
 typedef Eigen::SyclDevice SYCLDevice;
 
+template <typename Index, int N> Eigen::array<Index, N> GenerateArrayOfOnes() {
+  Eigen::array<Index, N> result;
+  for (int i = 0; i < N; ++i) {
+    result[i] = 1;
+  }
+  return result;
+}
+
 template <typename OUT, typename RHS>
 void Assign(const SYCLDevice& d, OUT out, RHS rhs) {
   out.device(d) = rhs;
@@ -52,23 +62,31 @@ struct BinaryFunctor<SYCLDevice, Functor, NDIMS, has_errors> {
   void operator()(const SYCLDevice& d, typename Functor::tout_type out,
                   typename Functor::tin_type in0,
                   typename Functor::tin_type in1, bool* error) {
-    Assign(d, out, in0.binaryExpr(in1, typename Functor::func()));
+    To32Bit(out).device(d) = To32Bit(in0).binaryExpr(in1, typename Functor::func());
   }
 
   void Left(const SYCLDevice& d, typename Functor::tout_type out,
             typename Functor::tscalar_type scalar,
             typename Functor::tin_type in, bool* error) {
-    LOG(FATAL) << "BinaryFunctor::Left NOT IMPLEMENTED ! ";
+    typedef typename Functor::func Binary;
+    constexpr int NumDims = Functor::tin_type::NumDimensions; 
+    typedef typename Functor::tin_type::Scalar T;
+    typedef typename Functor::tin_type::Index Index;
+    Eigen::array<Index, NumDims> scalar_dim = GenerateArrayOfOnes<Index, NumDims>();
+    Eigen::TensorMap<Eigen::Tensor<T, NumDims, Eigen::RowMajor>> tmp(scalar.data(), scalar_dim);
+    out.device(d) = tmp.broadcast(in.dimensions()).binaryExpr(in, Binary());
   }
 
   void Right(const SYCLDevice& d, typename Functor::tout_type out,
              typename Functor::tin_type in,
              typename Functor::tscalar_type scalar, bool* error) {
-    typedef typename Functor::out_type Tout;
-    typedef typename Functor::in_type Tin;
     typedef typename Functor::func Binary;
-    typedef typename Eigen::internal::scalar_right<Tout, Tin, Binary> Unary;
-    Assign(d, out, in.unaryExpr(Unary(scalar.data())));
+    constexpr int NumDims = Functor::tin_type::NumDimensions;
+    typedef typename Functor::tin_type::Scalar T;
+    typedef typename Functor::tin_type::Index Index;
+    Eigen::array<Index, NumDims> scalar_dim = GenerateArrayOfOnes<Index, NumDims>();
+    Eigen::TensorMap<Eigen::Tensor<T, NumDims, Eigen::RowMajor>> tmp(scalar.data(), scalar_dim);
+    out.device(d) = in.binaryExpr(tmp.broadcast(in.dimensions()), Binary());
   }
 
   void BCast(const SYCLDevice& d,
@@ -78,7 +96,25 @@ struct BinaryFunctor<SYCLDevice, Functor, NDIMS, has_errors> {
              typename TTypes<typename Functor::in_type, NDIMS>::ConstTensor in1,
              typename Eigen::array<Eigen::DenseIndex, NDIMS> bcast1,
              bool* error) {
-    LOG(FATAL) << "BinaryFunctor::BCast NOT IMPLEMENTED ";
+    typedef typename Functor::in_type T;
+    typename Functor::func func;
+    if ((NDIMS == 2) && Functor::use_bcast_optimization &&
+        use_bcast_optimization<T>::value) {
+      const bool bcast0_all_one = AllOne<NDIMS>(bcast0);
+      const bool bcast1_all_one = AllOne<NDIMS>(bcast1);
+      if (bcast0_all_one && !bcast1_all_one) {
+        To32Bit(out).device(d) =
+            To32Bit(in0).binaryExpr(To32Bit(in1).broadcast(bcast1), func);
+        return;
+      }
+      if (!bcast0_all_one && bcast1_all_one) {
+        To32Bit(out).device(d) =
+            To32Bit(in0).broadcast(bcast0).binaryExpr(To32Bit(in1), func);
+        return;
+      }
+    }
+    To32Bit(out).device(d) = To32Bit(in0).broadcast(bcast0).binaryExpr(
+        To32Bit(in1).broadcast(bcast1), func);
   }
 };
 
diff --git a/tensorflow/core/kernels/cwise_ops_test.cc b/tensorflow/core/kernels/cwise_ops_test.cc
index 823e7e14ed..6250928aca 100644
--- a/tensorflow/core/kernels/cwise_ops_test.cc
+++ b/tensorflow/core/kernels/cwise_ops_test.cc
@@ -59,6 +59,11 @@ BM_UNARY(gpu, Conj, std::complex<float>, DT_COMPLEX64);
 BM_UNARY(cpu, Conj, std::complex<double>, DT_COMPLEX128);
 BM_UNARY(gpu, Conj, std::complex<double>, DT_COMPLEX128);
 
+BM_UNARY(cpu, Rint, double, DT_DOUBLE);
+BM_UNARY(gpu, Rint, double, DT_DOUBLE);
+BM_UNARY(cpu, Rint, float, DT_FLOAT);
+BM_UNARY(gpu, Rint, float, DT_FLOAT);
+
 // data func scalar.
 static Graph* BinaryScalar(int num, const string& func) {
   Graph* g = new Graph(OpRegistry::Global());
diff --git a/tensorflow/core/kernels/dense_update_ops.cc b/tensorflow/core/kernels/dense_update_ops.cc
index 025e9a8de1..baa8f83091 100644
--- a/tensorflow/core/kernels/dense_update_ops.cc
+++ b/tensorflow/core/kernels/dense_update_ops.cc
@@ -14,6 +14,9 @@ limitations under the License.
 ==============================================================================*/
 
 #define EIGEN_USE_THREADS
+#if TENSORFLOW_USE_SYCL
+#define EIGEN_USE_SYCL
+#endif
 
 #include "tensorflow/core/kernels/dense_update_ops.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -92,6 +95,18 @@ TF_CALL_ALL_TYPES(REGISTER_KERNELS);
 TF_CALL_QUANTIZED_TYPES(REGISTER_KERNELS);
 #undef REGISTER_KERNELS
 
+#if TENSORFLOW_USE_SYCL
+typedef Eigen::SyclDevice SYCLDevice;
+#define REGISTER_SYCL_KERNEL(type)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Assign")                              \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<type>("T"),                 \
+                          AssignOpT<SYCLDevice, type>);
+TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif
+
 #if GOOGLE_CUDA
 // Only register 'Assign' on GPU for the subset of types also supported by
 // 'Variable' (see variable_ops.cc.)
diff --git a/tensorflow/core/kernels/eigen_pooling.h b/tensorflow/core/kernels/eigen_pooling.h
index 96acbe824a..8eea1b0f9d 100644
--- a/tensorflow/core/kernels/eigen_pooling.h
+++ b/tensorflow/core/kernels/eigen_pooling.h
@@ -325,7 +325,7 @@ struct AvgPoolMeanReducer {
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
     eigen_assert(scalarCount_ > 0);
-    return accum / scalarCount_;
+    return accum / T(scalarCount_);
   }
 
 #if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__)
diff --git a/tensorflow/core/kernels/eigen_spatial_convolutions.h b/tensorflow/core/kernels/eigen_spatial_convolutions.h
index a2a9e15301..7702f3e70a 100644
--- a/tensorflow/core/kernels/eigen_spatial_convolutions.h
+++ b/tensorflow/core/kernels/eigen_spatial_convolutions.h
@@ -991,6 +991,9 @@ EIGEN_DEVICE_FUNC
       out_width = numext::ceil(InputCols / static_cast<float>(col_stride));
       break;
     default:
+      // Initialize unused variables to avoid a compiler warning
+      out_height = 0;
+      out_width  = 0;
       eigen_assert(false && "unexpected padding");
   }
 
diff --git a/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc b/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc
index dd9f83afbc..56ffe58569 100644
--- a/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc
@@ -72,8 +72,8 @@ struct GatherNdSlice<GPUDevice, T, Index, IXDIM> {
     Eigen::array<int64, IXDIM> batch_strides;
     Eigen::array<int64, IXDIM> batch_indices;
     if (IXDIM > 0) {
-      batch_strides[IXDIM - 1] = s_size;
-      batch_indices[IXDIM - 1] = Tparams.dimension(IXDIM - 1);
+      batch_strides[size_t(IXDIM - 1)] = s_size;
+      batch_indices[size_t(IXDIM - 1)] = Tparams.dimension(IXDIM - 1);
     }
     for (int i = IXDIM - 1; i > 0; --i) {
       batch_indices[i - 1] = Tparams.dimension(i - 1);
diff --git a/tensorflow/core/kernels/identity_op.cc b/tensorflow/core/kernels/identity_op.cc
index 711afd1bf3..8ede544b7f 100644
--- a/tensorflow/core/kernels/identity_op.cc
+++ b/tensorflow/core/kernels/identity_op.cc
@@ -68,6 +68,7 @@ REGISTER_GPU_KERNEL(bfloat16);
 
 #undef REGISTER_GPU_KERNEL
 
+
 #if GOOGLE_CUDA
 // A special GPU kernel for int32 and bool.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
diff --git a/tensorflow/core/kernels/matrix_inverse_op.cc b/tensorflow/core/kernels/matrix_inverse_op.cc
index 884e3d61a4..0572d48b3e 100644
--- a/tensorflow/core/kernels/matrix_inverse_op.cc
+++ b/tensorflow/core/kernels/matrix_inverse_op.cc
@@ -52,7 +52,7 @@ class MatrixInverseOp : public LinearAlgebraOp<Scalar> {
     Eigen::PartialPivLU<Matrix> lu_decomposition;
     if (adjoint_) {
       // TODO(rmlarsen): For Eigen 3.2, this creates a temporary copy.
-      // Make sure to backport: https://bitbucket.org/eigen/eigen/commits/ \
+      // Make sure to backport: https://bitbucket.org/eigen/eigen/commits/
       // bd2219a74c96dfe3f6bc2c23588749e36d2d8173
       lu_decomposition.compute(input.adjoint());
     } else {
diff --git a/tensorflow/core/kernels/matrix_solve_op.cc b/tensorflow/core/kernels/matrix_solve_op.cc
index a2dfd58bd1..e10a102871 100644
--- a/tensorflow/core/kernels/matrix_solve_op.cc
+++ b/tensorflow/core/kernels/matrix_solve_op.cc
@@ -75,7 +75,7 @@ class MatrixSolveOp : public LinearAlgebraOp<Scalar> {
     Eigen::PartialPivLU<Matrix> lu_decomposition(matrix.rows());
     if (adjoint_) {
       // TODO(rmlarsen): For Eigen 3.2, this creates a temporary copy.
-      // Make sure to backport: https://bitbucket.org/eigen/eigen/commits/ \
+      // Make sure to backport: https://bitbucket.org/eigen/eigen/commits/
       // bd2219a74c96dfe3f6bc2c23588749e36d2d8173
       lu_decomposition.compute(matrix.adjoint());
     } else {
@@ -95,7 +95,7 @@ class MatrixSolveOp : public LinearAlgebraOp<Scalar> {
 
     // TODO(rmlarsen): Add check based on condition number estimation.
     // The necessary changes to Eigen are in
-    // https://bitbucket.org/eigen/eigen/pull-requests/174/ \
+    // https://bitbucket.org/eigen/eigen/pull-requests/174/
     // add-matrix-condition-number-estimation/diff
     outputs->at(0) = lu_decomposition.solve(rhs);
   }
diff --git a/tensorflow/core/kernels/scatter_nd_op.cc b/tensorflow/core/kernels/scatter_nd_op.cc
index 5aeb3d2c0e..7704c5f65a 100644
--- a/tensorflow/core/kernels/scatter_nd_op.cc
+++ b/tensorflow/core/kernels/scatter_nd_op.cc
@@ -317,9 +317,9 @@ class ScatterNdUpdateOp : public OpKernel {
                                     scatter_nd_op::UpdateOp::SUB);
 // TODO(simister): Find a way to reduce amount of templated generated code
 // to reduce build size, then re-enable these additional operations.
-// REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdMul",     \
-//                                   scatter_nd_op::UpdateOp::MUL); \
-// REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdDiv",     \
+// REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdMul",
+//                                   scatter_nd_op::UpdateOp::MUL);
+// REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdDiv",
 //                                   scatter_nd_op::UpdateOp::DIV);
 
 #define REGISTER_SCATTER_ND(type, dev) \
diff --git a/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h b/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h
index 442721d37b..bbe2c6864f 100644
--- a/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h
+++ b/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h
@@ -175,7 +175,7 @@ struct ScatterNdFunctor<CPUDevice, T, Index, OP, IXDIM> {
   REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::SUB);
 // TODO(simister): Re-enable after identifying a way to reduce the binary size
 // due to too many template instantiations.
-//  REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::MUL);      \
+//  REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::MUL);
 //  REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::DIV);
 
 TF_CALL_ALL_TYPES(REGISTER_SCATTER_ND_UPDATE);
diff --git a/tensorflow/core/kernels/sendrecv_ops.cc b/tensorflow/core/kernels/sendrecv_ops.cc
index 9e9cdda382..1c7d50e161 100644
--- a/tensorflow/core/kernels/sendrecv_ops.cc
+++ b/tensorflow/core/kernels/sendrecv_ops.cc
@@ -80,6 +80,8 @@ REGISTER_KERNEL_BUILDER(Name("_Send").Device(DEVICE_GPU), SendOp);
 
 #if TENSORFLOW_USE_SYCL
 REGISTER_KERNEL_BUILDER(Name("_Send").Device(DEVICE_SYCL), SendOp);
+REGISTER_KERNEL_BUILDER(
+    Name("_HostSend").Device(DEVICE_SYCL).HostMemory("tensor"), SendOp);
 #endif
 
 REGISTER_KERNEL_BUILDER(Name("_HostSend").Device(DEVICE_CPU), SendOp);
@@ -148,4 +150,9 @@ REGISTER_KERNEL_BUILDER(Name("_HostRecv").Device(DEVICE_CPU), RecvOp);
 REGISTER_KERNEL_BUILDER(
     Name("_HostRecv").Device(DEVICE_GPU).HostMemory("tensor"), RecvOp);
 
+#if TENSORFLOW_USE_SYCL
+REGISTER_KERNEL_BUILDER(
+    Name("_HostRecv").Device(DEVICE_SYCL).HostMemory("tensor"), RecvOp);
+#endif
+
 }  // end namespace tensorflow
diff --git a/tensorflow/core/kernels/variable_ops.cc b/tensorflow/core/kernels/variable_ops.cc
index b97df3e00c..532825a2a8 100644
--- a/tensorflow/core/kernels/variable_ops.cc
+++ b/tensorflow/core/kernels/variable_ops.cc
@@ -31,6 +31,17 @@ REGISTER_KERNEL_BUILDER(Name("DestroyTemporaryVariable").Device(DEVICE_CPU),
 REGISTER_KERNEL_BUILDER(Name("IsVariableInitialized").Device(DEVICE_CPU),
                         IsVariableInitializedOp);
 
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Variable")                            \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("dtype"),             \
+                          VariableOp);
+TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+#undef REGISTER_SYCL_KERNEL
+#endif
+
 #if GOOGLE_CUDA
 // Only register 'Variable' on GPU for the subset of types also supported by
 // 'Assign' (see dense_update_ops.cc.)
diff --git a/tensorflow/core/lib/core/threadpool.cc b/tensorflow/core/lib/core/threadpool.cc
index 534ef902fb..a2245bb28e 100644
--- a/tensorflow/core/lib/core/threadpool.cc
+++ b/tensorflow/core/lib/core/threadpool.cc
@@ -21,9 +21,11 @@ limitations under the License.
 #include "tensorflow/core/platform/denormal.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/setround.h"
 #include "tensorflow/core/platform/tracing.h"
 #include "tensorflow/core/platform/types.h"
 
+
 namespace tensorflow {
 namespace thread {
 
@@ -50,6 +52,8 @@ struct EigenEnvironment {
     return env_->StartThread(thread_options_, name_, [=]() {
       // Set the processor flag to flush denormals to zero
       port::ScopedFlushDenormal flush;
+      // Set the C++ rounding mode to ROUND TO NEAREST
+      port::ScopedSetRound round;
       f();
     });
   }
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 1fd641ee36..25972252c5 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -3859,7 +3859,7 @@ strides: 1-D of length 4. How far the centers of two consecutive patches are in
 rates: 1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the
   input stride, specifying how far two consecutive patch samples are in the
   input. Equivalent to extracting patches with
-  `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1), followed by
+  `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by
   subsampling them spatially by a factor of `rates`.
 padding: The type of padding algorithm to use.
 
diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index 56bd594644..f854ad288b 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -472,6 +472,25 @@ REGISTER_OP("Ceil")
 Returns element-wise smallest integer in not less than x.
 )doc");
 
+REGISTER_OP("Rint")
+    .Input("x: T")
+    .Output("y: T")
+    .Attr("T: {float, double}")
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Returns element-wise integer closest to x.
+
+If the result is midway between two representable values,
+the even representable is chosen.
+For example:
+
+```
+rint(-1.5) ==> -2.0
+rint(0.5000001) ==> 1.0
+rint([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) ==> [-2., -2., -0., 0., 2., 2., 2.]
+```
+)doc");
+
 // Declares cwise binary operations signature: 't, 't -> 't.
 
 #define BINARY_MORE()                              \
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index bf87c0a495..6d100a718d 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -6591,7 +6591,7 @@ op {
   attr {
     name: "rates"
     type: "list(int)"
-    description: "1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the\ninput stride, specifying how far two consecutive patch samples are in the\ninput. Equivalent to extracting patches with\n`patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1), followed by\nsubsampling them spatially by a factor of `rates`."
+    description: "1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the\ninput stride, specifying how far two consecutive patch samples are in the\ninput. Equivalent to extracting patches with\n`patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by\nsubsampling them spatially by a factor of `rates`."
     has_minimum: true
     minimum: 4
   }
diff --git a/tensorflow/core/platform/default/build_config_root.bzl b/tensorflow/core/platform/default/build_config_root.bzl
index 439bf97a2c..2fa2726bde 100644
--- a/tensorflow/core/platform/default/build_config_root.bzl
+++ b/tensorflow/core/platform/default/build_config_root.bzl
@@ -4,3 +4,6 @@
 
 def tf_cuda_tests_tags():
   return ["local"]
+
+def tf_sycl_tests_tags():
+  return ["local"]
diff --git a/tensorflow/core/platform/setround.cc b/tensorflow/core/platform/setround.cc
new file mode 100644
index 0000000000..febb47781c
--- /dev/null
+++ b/tensorflow/core/platform/setround.cc
@@ -0,0 +1,35 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/platform/setround.h"
+
+#ifdef __STDC_IEC_559__
+#include <fenv.h> // fesetround, FE_*
+#endif
+
+namespace tensorflow {
+namespace port {
+
+ScopedSetRound::ScopedSetRound() {
+#ifdef __STDC_IEC_559__
+   std::fesetround(FE_TONEAREST);
+#endif
+}
+
+ScopedSetRound::~ScopedSetRound() {
+}
+
+}  // namespace port
+}  // namespace tensorflow
diff --git a/tensorflow/core/platform/setround.h b/tensorflow/core/platform/setround.h
new file mode 100644
index 0000000000..4b1b3fd497
--- /dev/null
+++ b/tensorflow/core/platform/setround.h
@@ -0,0 +1,38 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_PLATFORM_SETROUND_H_
+#define TENSORFLOW_PLATFORM_SETROUND_H_
+
+#include "tensorflow/core/platform/macros.h"
+
+namespace tensorflow {
+namespace port {
+
+// While this class is active, floating point numbers are rounded to NEAREST
+// to zero.  The destructor restores the original flags.
+class ScopedSetRound {
+ public:
+  ScopedSetRound();
+  ~ScopedSetRound();
+
+ private:
+  TF_DISALLOW_COPY_AND_ASSIGN(ScopedSetRound);
+};
+
+}  // namespace port
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_PLATFORM_SETROUN_H_
diff --git a/tensorflow/core/platform/windows/env.cc b/tensorflow/core/platform/windows/env.cc
index 41ce5d9320..904d06e2a9 100644
--- a/tensorflow/core/platform/windows/env.cc
+++ b/tensorflow/core/platform/windows/env.cc
@@ -72,7 +72,7 @@ class WindowsEnv : public Env {
   }
 
   bool MatchPath(const string& path, const string& pattern) override {
-    return PathMatchSpec(path.c_str(), pattern.c_str()) == S_OK;
+    return PathMatchSpec(path.c_str(), pattern.c_str()) == TRUE;
   }
 
   uint64 NowMicros() override {
diff --git a/tensorflow/core/platform/windows/windows_file_system.cc b/tensorflow/core/platform/windows/windows_file_system.cc
index c6c42f0150..31516bb2ee 100644
--- a/tensorflow/core/platform/windows/windows_file_system.cc
+++ b/tensorflow/core/platform/windows/windows_file_system.cc
@@ -386,7 +386,7 @@ Status WindowsFileSystem::GetChildren(const string& dir,
 
   string pattern = translated_dir;
   if (!pattern.empty() && pattern.back() != '\\' && pattern.back() != '/') {
-    pattern += '\\*';
+    pattern += "\\*";
   } else {
     pattern += '*';
   }
diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index 66af1897be..dd2dad417d 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -20,7 +20,7 @@ limitations under the License.
 
 #define TF_MAJOR_VERSION 0
 #define TF_MINOR_VERSION 11
-#define TF_PATCH_VERSION 0rc2
+#define TF_PATCH_VERSION head
 
 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
 // "-beta", "-rc", "-rc.1")
diff --git a/tensorflow/core/util/example_proto_fast_parsing.cc b/tensorflow/core/util/example_proto_fast_parsing.cc
index abf8d77f86..f1885bb980 100644
--- a/tensorflow/core/util/example_proto_fast_parsing.cc
+++ b/tensorflow/core/util/example_proto_fast_parsing.cc
@@ -87,6 +87,8 @@ class Feature {
         *dtype = DT_INT64;
         break;
       default:
+        // Initialize variable to avoid compiler warning
+        *dtype = DT_INVALID;
         return errors::InvalidArgument("Unsuported datatype.");
     }
     return Status::OK();
diff --git a/tensorflow/core/util/tensor_format.h b/tensorflow/core/util/tensor_format.h
index 560b6310b2..a21dee5d58 100644
--- a/tensorflow/core/util/tensor_format.h
+++ b/tensorflow/core/util/tensor_format.h
@@ -58,6 +58,7 @@ inline int32 GetTensorDimIndex(TensorFormat format, char dimension) {
         return 1 + NDIMS;
       default:
         LOG(FATAL) << "Invalid dimension: " << dimension;
+        return -1; // Avoid compiler warning about missing return value
     }
   } else if (format == FORMAT_NCHW) {
     switch (dimension) {
@@ -77,9 +78,11 @@ inline int32 GetTensorDimIndex(TensorFormat format, char dimension) {
         return NDIMS + 1;
       default:
         LOG(FATAL) << "Invalid dimension: " << dimension;
+        return -1; // Avoid compiler warning about missing return value
     }
   } else {
     LOG(FATAL) << "Invalid format: " << static_cast<int>(format);
+    return -1; // Avoid compiler warning about missing return value
   }
 }
 
diff --git a/tensorflow/core/util/tensor_slice_reader_cache.cc b/tensorflow/core/util/tensor_slice_reader_cache.cc
index 9ac44f328e..06fc9aa444 100644
--- a/tensorflow/core/util/tensor_slice_reader_cache.cc
+++ b/tensorflow/core/util/tensor_slice_reader_cache.cc
@@ -52,7 +52,7 @@ const TensorSliceReader* TensorSliceReaderCache::GetReader(
     TensorSliceReader::OpenTableFunction open_function, int preferred_shard) {
   mutex_lock l(mu_);
 
-#ifdef __GXX_RTTI
+#if defined(__GXX_RTTI) ||  defined(_CPPRTTI)
   // Get the function pointer from the open_function value.
   TensorSliceReaderCache::OpenFuncType* func_ptr =
       open_function.target<TensorSliceReaderCache::OpenFuncType>();
diff --git a/tensorflow/g3doc/api_docs/python/array_ops.md b/tensorflow/g3doc/api_docs/python/array_ops.md
index 61fecfdf09..b076bd28cf 100644
--- a/tensorflow/g3doc/api_docs/python/array_ops.md
+++ b/tensorflow/g3doc/api_docs/python/array_ops.md
@@ -1428,7 +1428,7 @@ Extract `patches` from `images` and put them in the "depth" output dimension.
     1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the
     input stride, specifying how far two consecutive patch samples are in the
     input. Equivalent to extracting patches with
-    `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1), followed by
+    `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by
     subsampling them spatially by a factor of `rates`.
 *  <b>`padding`</b>: A `string` from: `"SAME", "VALID"`.
     The type of padding algorithm to use.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.linspace.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.linspace.md
index e79dfd2f4d..3fbb0c303a 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.linspace.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.linspace.md
@@ -3,7 +3,7 @@
 Generates values in an interval.
 
 A sequence of `num` evenly-spaced values are generated beginning at `start`.
-If `num > 1`, the values in the sequence increase by `stop - start / num - 1`,
+If `num > 1`, the values in the sequence increase by `(stop - start) / (num - 1)`,
 so that the last one is exactly `stop`.
 
 For example:
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.nn.sampled_softmax_loss.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.nn.sampled_softmax_loss.md
index 82f44f6f4d..e898cbaa4f 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.nn.sampled_softmax_loss.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.nn.sampled_softmax_loss.md
@@ -11,8 +11,8 @@ the full softmax loss.
 At inference time, you can compute full softmax probabilities with the
 expression `tf.nn.softmax(tf.matmul(inputs, tf.transpose(weights)) + biases)`.
 
-See our [Candidate Sampling Algorithms Reference]
-(../../extras/candidate_sampling.pdf)
+See our
+[Candidate Sampling Algorithms Reference](../../extras/candidate_sampling.pdf)
 
 Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007)
 ([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.extract_image_patches.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.extract_image_patches.md
index bf6f268d4f..853ce0176f 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.extract_image_patches.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.extract_image_patches.md
@@ -16,7 +16,7 @@ Extract `patches` from `images` and put them in the "depth" output dimension.
     1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the
     input stride, specifying how far two consecutive patch samples are in the
     input. Equivalent to extracting patches with
-    `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1), followed by
+    `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by
     subsampling them spatially by a factor of `rates`.
 *  <b>`padding`</b>: A `string` from: `"SAME", "VALID"`.
     The type of padding algorithm to use.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.depthwise_conv2d_native.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.depthwise_conv2d_native.md
index 403621dc00..0867e30876 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.depthwise_conv2d_native.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.depthwise_conv2d_native.md
@@ -17,7 +17,7 @@ for k in 0..in_channels-1
                         filter[di, dj, k, q]
 
 Must have `strides[0] = strides[3] = 1`.  For the most common case of the same
-horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
+horizontal and vertical strides, `strides = [1, stride, stride, 1]`.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.nce_loss.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.nce_loss.md
index f405aa7707..186209d3f5 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.nce_loss.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.nce_loss.md
@@ -42,8 +42,7 @@ with an otherwise unused class.
       where a sampled class equals one of the target classes.  If set to
       `True`, this is a "Sampled Logistic" loss instead of NCE, and we are
       learning to generate log-odds instead of log probabilities.  See
-      our [Candidate Sampling Algorithms Reference]
-      (../../extras/candidate_sampling.pdf).
+      our [Candidate Sampling Algorithms Reference](../../extras/candidate_sampling.pdf).
       Default is False.
 *  <b>`partition_strategy`</b>: A string specifying the partitioning strategy, relevant
       if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.nn.local_response_normalization.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.nn.local_response_normalization.md
index 4feb46302d..e7d32923a1 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.nn.local_response_normalization.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.nn.local_response_normalization.md
@@ -11,8 +11,8 @@ each component is divided by the weighted, squared sum of inputs within
         sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2)
     output = input / (bias + alpha * sqr_sum) ** beta
 
-For details, see [Krizhevsky et al., ImageNet classification with deep
-convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).
+For details, see
+[Krizhevsky et al., ImageNet classification with deep convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.conv2d.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.conv2d.md
index a7d2cf094f..8153224ed4 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.conv2d.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.conv2d.md
@@ -22,7 +22,7 @@ In detail, with the default NHWC format,
                         filter[di, dj, q, k]
 
 Must have `strides[0] = strides[3] = 1`.  For the most common case of the same
-horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
+horizontal and vertical strides, `strides = [1, stride, stride, 1]`.
 
 ##### Args:
 
diff --git a/tensorflow/g3doc/get_started/os_setup.md b/tensorflow/g3doc/get_started/os_setup.md
index 5ef608c7b8..431e214b30 100644
--- a/tensorflow/g3doc/get_started/os_setup.md
+++ b/tensorflow/g3doc/get_started/os_setup.md
@@ -63,37 +63,37 @@ Then, select the correct binary to install:
 
 ```bash
 # Ubuntu/Linux 64-bit, CPU only, Python 2.7
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl
 
 # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7
 # Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl
 
 # Mac OS X, CPU only, Python 2.7:
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py2-none-any.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py2-none-any.whl
 
 # Mac OS X, GPU enabled, Python 2.7:
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py2-none-any.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py2-none-any.whl
 
 # Ubuntu/Linux 64-bit, CPU only, Python 3.4
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl
 
 # Ubuntu/Linux 64-bit, GPU enabled, Python 3.4
 # Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl
 
 # Ubuntu/Linux 64-bit, CPU only, Python 3.5
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl
 
 # Ubuntu/Linux 64-bit, GPU enabled, Python 3.5
 # Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl
 
 # Mac OS X, CPU only, Python 3.4 or 3.5:
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py3-none-any.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py3-none-any.whl
 
 # Mac OS X, GPU enabled, Python 3.4 or 3.5:
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py3-none-any.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py3-none-any.whl
 ```
 
 Install TensorFlow:
@@ -159,37 +159,37 @@ Now, install TensorFlow just as you would for a regular Pip installation. First
 
 ```bash
 # Ubuntu/Linux 64-bit, CPU only, Python 2.7
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl
 
 # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7
 # Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl
 
 # Mac OS X, CPU only, Python 2.7:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py2-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py2-none-any.whl
 
 # Mac OS X, GPU enabled, Python 2.7:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py2-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py2-none-any.whl
 
 # Ubuntu/Linux 64-bit, CPU only, Python 3.4
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl
 
 # Ubuntu/Linux 64-bit, GPU enabled, Python 3.4
 # Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl
 
 # Ubuntu/Linux 64-bit, CPU only, Python 3.5
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl
 
 # Ubuntu/Linux 64-bit, GPU enabled, Python 3.5
 # Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl
 
 # Mac OS X, CPU only, Python 3.4 or 3.5:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py3-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py3-none-any.whl
 
 # Mac OS X, GPU enabled, Python 3.4 or 3.5:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py3-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py3-none-any.whl
 ```
 
 Finally install TensorFlow:
@@ -298,37 +298,37 @@ select the correct binary to install:
 
 ```bash
 # Ubuntu/Linux 64-bit, CPU only, Python 2.7
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl
 
 # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7
 # Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl
 
 # Mac OS X, CPU only, Python 2.7:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py2-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py2-none-any.whl
 
 # Mac OS X, GPU enabled, Python 2.7:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py2-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py2-none-any.whl
 
 # Ubuntu/Linux 64-bit, CPU only, Python 3.4
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl
 
 # Ubuntu/Linux 64-bit, GPU enabled, Python 3.4
 # Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp34-cp34m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp34-cp34m-linux_x86_64.whl
 
 # Ubuntu/Linux 64-bit, CPU only, Python 3.5
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl
 
 # Ubuntu/Linux 64-bit, GPU enabled, Python 3.5
 # Requires CUDA toolkit 8.0 and CuDNN v5. For other versions, see "Installing from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0-cp35-cp35m-linux_x86_64.whl
 
 # Mac OS X, CPU only, Python 3.4 or 3.5:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc2-py3-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0-py3-none-any.whl
 
 # Mac OS X, GPU enabled, Python 3.4 or 3.5:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc2-py3-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0-py3-none-any.whl
 ```
 
 Finally install TensorFlow:
@@ -396,7 +396,7 @@ code.
 code.
 
 We also have tags with `latest` replaced by a released version (e.g.,
-`0.11.0rc2-gpu`).
+`0.11.0-gpu`).
 
 With Docker the installation is as follows:
 
@@ -781,7 +781,7 @@ $ bazel build -c opt --config=cuda //tensorflow/tools/pip_package:build_pip_pack
 $ bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
 
 # The name of the .whl file will depend on your platform.
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-0.11.0rc2-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-0.11.0-py2-none-any.whl
 ```
 
 ## Setting up TensorFlow for Development
diff --git a/tensorflow/g3doc/how_tos/adding_an_op/index.md b/tensorflow/g3doc/how_tos/adding_an_op/index.md
index 3be1efb13f..aa5dcfb4f2 100644
--- a/tensorflow/g3doc/how_tos/adding_an_op/index.md
+++ b/tensorflow/g3doc/how_tos/adding_an_op/index.md
@@ -44,6 +44,8 @@ add a call to the `REGISTER_OP` macro that defines the interface for such an Op:
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/shape_inference.h"
 
+using namespace tensorflow;
+
 REGISTER_OP("ZeroOut")
     .Input("to_zero: int32")
     .Output("zeroed: int32")
@@ -236,12 +238,26 @@ class ZeroOutTest(tf.test.TestCase):
     with self.test_session():
       result = zero_out_module.zero_out([5, 4, 3, 2, 1])
       self.assertAllEqual(result.eval(), [5, 0, 0, 0, 0])
+
+if __name__ == "__main__":
+  tf.test.main()
+```
+
+Add a 'zero_out_op_test' target to `tensorflow/python/kernel_tests/BUILD` among the other CPU-only test targets:
+
+```
+tf_py_test(
+    name = "zero_out_op_test",
+    size = "small",
+    srcs = ["zero_out_op_test.py"],
+    additional_deps = ["//tensorflow:tensorflow_py"],
+)
 ```
 
 Then run your test:
 
 ```sh
-$ bazel test tensorflow/python:zero_out_op_test
+$ bazel test //tensorflow/python/kernel_tests:zero_out_op_test
 ```
 
 ## Validation
@@ -895,7 +911,7 @@ For more details, see
 
 In general, changes to specifications must be backwards-compatible: changing the
 specification of an Op must not break prior serialized `GraphDef` protocol
-buffers constructed from older specfications.  The details of `GraphDef`
+buffers constructed from older specifications.  The details of `GraphDef`
 compatibility are [described here](../../resources/versions.md#graphs).
 
 There are several ways to preserve backwards-compatibility.
@@ -1117,7 +1133,7 @@ found in [common_shape_fns.h](https://www.tensorflow.org/code/tensorflow/core/fr
 REGISTER_OP("ZeroOut")
     .Input("to_zero: int32")
     .Output("zeroed: int32")
-    .SetShapeFn([](::tensorflow::shape_inference::UnchangedShape);
+    .SetShapeFn(::tensorflow::shape_inference::UnchangedShape);
 ```
 
 A shape function can also constrain the shape of an input. For the version of
@@ -1193,7 +1209,7 @@ the following:
 ```
 
 This specifies that the shape function should use the C++-implemented
-shape specfication defined in your `REGISTER_OP` declaration above.  Note
+shape specification defined in your `REGISTER_OP` declaration above.  Note
 that TensorFlow will soon make this the default, so you only need
 to define the shape function once in C++ to get shape inference for
 free in Python.
diff --git a/tensorflow/g3doc/index.md b/tensorflow/g3doc/index.md
index 4d7149961a..3bfba40416 100644
--- a/tensorflow/g3doc/index.md
+++ b/tensorflow/g3doc/index.md
@@ -1,10 +1,5 @@
 # TensorFlow for Googlers
 
-This site has TensorFlow documentation for Google engineers. The menu at the
-left lists those parts of the public TensorFlow documentation that pertain to
-Google engineers, along with some internal-only resources written specifically
-for Google engineers.
-
 TensorFlow™ is an open source software library for numerical computation using
 data flow graphs. Nodes in the graph represent mathematical operations, while
 the graph edges represent the multidimensional data arrays (tensors) that flow
@@ -18,4 +13,4 @@ applicable in a wide variety of other domains as well. The following documents
 show you how to set up and use the TensorFlow system.
 
 ## Table of Contents
-<!--#include virtual="sitemap.md" -->
-\ No newline at end of file
+<!--#include virtual="sitemap.md" -->
diff --git a/tensorflow/models/embedding/word2vec.py b/tensorflow/models/embedding/word2vec.py
index c5cb52bfbc..e463e300c1 100644
--- a/tensorflow/models/embedding/word2vec.py
+++ b/tensorflow/models/embedding/word2vec.py
@@ -147,6 +147,8 @@ class Options(object):
 
     # Where to write out summaries.
     self.save_path = FLAGS.save_path
+    if not os.path.exists(self.save_path):
+      os.makedirs(self.save_path)
 
     # Eval options.
     # The text file for eval.
diff --git a/tensorflow/models/embedding/word2vec_optimized.py b/tensorflow/models/embedding/word2vec_optimized.py
index a6e8f9277d..2efdf66867 100644
--- a/tensorflow/models/embedding/word2vec_optimized.py
+++ b/tensorflow/models/embedding/word2vec_optimized.py
@@ -126,6 +126,8 @@ class Options(object):
 
     # Where to write out summaries.
     self.save_path = FLAGS.save_path
+    if not os.path.exists(self.save_path):
+      os.makedirs(self.save_path)
 
     # Eval options.
 
diff --git a/tensorflow/models/image/cifar10/cifar10.py b/tensorflow/models/image/cifar10/cifar10.py
index 7df2149d40..1c51b76f09 100644
--- a/tensorflow/models/image/cifar10/cifar10.py
+++ b/tensorflow/models/image/cifar10/cifar10.py
@@ -207,8 +207,8 @@ def inference(images):
                                          wd=0.0)
     conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
     biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
-    bias = tf.nn.bias_add(conv, biases)
-    conv1 = tf.nn.relu(bias, name=scope.name)
+    pre_activation = tf.nn.bias_add(conv, biases)
+    conv1 = tf.nn.relu(pre_activation, name=scope.name)
     _activation_summary(conv1)
 
   # pool1
@@ -226,8 +226,8 @@ def inference(images):
                                          wd=0.0)
     conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')
     biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
-    bias = tf.nn.bias_add(conv, biases)
-    conv2 = tf.nn.relu(bias, name=scope.name)
+    pre_activation = tf.nn.bias_add(conv, biases)
+    conv2 = tf.nn.relu(pre_activation, name=scope.name)
     _activation_summary(conv2)
 
   # norm2
diff --git a/tensorflow/python/client/session.py b/tensorflow/python/client/session.py
index d626165dc0..dd55154a3a 100644
--- a/tensorflow/python/client/session.py
+++ b/tensorflow/python/client/session.py
@@ -122,6 +122,54 @@ _REGISTERED_EXPANSIONS = [
      lambda feed: [feed])]
 # pylint: enable=g-long-lambda
 
+def register_session_run_conversion_functions(tensor_type, fetch_function,
+    feed_function=None, feed_function_for_partial_run=None):
+  """Register fetch and feed conversion functions for `tf.Session.run()`.
+
+  This function registers a triple of conversion functions for fetching and/or
+  feeding values of user-defined types in a call to tf.Session.run().
+
+  An example
+
+  ```python
+     class SquaredTensor(object):
+       def __init__(self, tensor):
+         self.sq = tf.square(tensor)
+     #you can define conversion functions as follows:
+     fetch_function = lambda squared_tensor:([squared_tensor.sq],
+                                             lambda val: val[0])
+     feed_function = lambda feed, feed_val: [(feed.sq, feed_val)]
+     feed_function_for_partial_run = lambda feed: [feed.sq]
+     #then after invoking this register function, you can use as follows:
+     session.run(squared_tensor1,
+                 feed_dict = {squared_tensor2 : some_numpy_array})
+  ```
+
+  Args:
+    tensor_type: The type for which you want to register a conversion function.
+    fetch_function: A callable that takes an object of type `tensor_type` and
+      returns a tuple, where the first element is a list of `tf.Tensor` objects,
+      and the second element is a callable that takes a list of ndarrays and
+      returns an object of some value type that corresponds to `tensor_type`.
+      fetch_function describes how to expand fetch into its component Tensors
+      and how to contract the fetched results back into a single return value.
+    feed_function: A callable that takes feed_key and feed_value as input, and
+      returns a list of tuples (feed_tensor, feed_val), feed_key must have type
+      `tensor_type`, and feed_tensor must have type `tf.Tensor`. Each feed
+      function describes how to unpack a single fed value and map it to feeds
+      of one or more tensors and their corresponding values.
+    feed_function_for_partial_run: A callable for specifying tensor values to
+      feed when setting up a partial run, which takes a `tensor_type` type
+      object as input, and returns a list of Tensors.
+  """
+  for conversion_function in _REGISTERED_EXPANSIONS:
+    if issubclass(conversion_function[0], tensor_type):
+      raise ValueError(
+          '%s has already been registered so ignore it.', tensor_type)
+      return
+  _REGISTERED_EXPANSIONS.insert(0,
+    (tensor_type, fetch_function, feed_function, feed_function_for_partial_run))
+
 
 class _FetchMapper(object):
   """Definition of the interface provided by fetch mappers.
diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py
index bf0a964867..a20376b91d 100644
--- a/tensorflow/python/client/session_test.py
+++ b/tensorflow/python/client/session_test.py
@@ -1554,6 +1554,33 @@ class SessionTest(test_util.TensorFlowTestCase):
         sess.run(enqueue_op)
       self.assertEqual(sess.run(q.size()), num_epochs * 2)
 
+  def testRegisterFetchAndFeedConversionFunctions(self):
+    class SquaredTensor(object):
+      def __init__(self, tensor):
+        self.sq = math_ops.square(tensor)
+
+    fetch_fn = lambda squared_tensor: ([squared_tensor.sq], lambda val: val[0])
+    feed_fn1 = lambda feed, feed_val: [(feed.sq, feed_val)]
+    feed_fn2 = lambda feed: [feed.sq]
+
+    session.register_session_run_conversion_functions(SquaredTensor, fetch_fn,
+        feed_fn1, feed_fn2)
+    with self.assertRaises(ValueError):
+      session.register_session_run_conversion_functions(SquaredTensor,
+          fetch_fn, feed_fn1, feed_fn2)
+    with self.test_session() as sess:
+      np1 = np.array([1.0, 1.5, 2.0, 2.5])
+      np2 = np.array([3.0, 3.5, 4.0, 4.5])
+      squared_tensor = SquaredTensor(np2)
+      squared_eval = sess.run(squared_tensor)
+      self.assertAllClose(np2 * np2, squared_eval)
+      squared_eval = sess.run(squared_tensor, feed_dict={
+        squared_tensor : np1 * np1})
+      self.assertAllClose(np1 * np1, squared_eval)
+      partial_run = sess.partial_run_setup([squared_tensor], [])
+      squared_eval = sess.partial_run(partial_run, squared_tensor)
+      self.assertAllClose(np2 * np2, squared_eval)
+
 
 if __name__ == '__main__':
   googletest.main()
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index dc7c72e220..8db55a9359 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -12,6 +12,7 @@ licenses(["notice"])  # Apache 2.0
 
 load("//tensorflow:tensorflow.bzl", "tf_py_test")
 load("//tensorflow:tensorflow.bzl", "cuda_py_test")
+load("//tensorflow:tensorflow.bzl", "sycl_py_test")
 
 # CPU only tests should use tf_py_test, GPU tests use cuda_py_test
 # Please avoid the py_tests and cuda_py_tests (plural) while we
@@ -1362,6 +1363,13 @@ cuda_py_test(
     tags = ["nomsan"],  # fails in msan from numpy calls
 )
 
+sycl_py_test(
+    name = "basic_gpu_test",
+    size = "small",
+    srcs = ["basic_gpu_test.py"],
+    additional_deps = ["//tensorflow:tensorflow_py"],
+)
+
 filegroup(
     name = "all_files",
     srcs = glob(
diff --git a/tensorflow/python/kernel_tests/basic_gpu_test.py b/tensorflow/python/kernel_tests/basic_gpu_test.py
new file mode 100644
index 0000000000..541c95aa28
--- /dev/null
+++ b/tensorflow/python/kernel_tests/basic_gpu_test.py
@@ -0,0 +1,61 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functional tests for basic component wise operations using a GPU device."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+import math
+import numpy as np
+from tensorflow.python.ops import gen_math_ops
+from tensorflow.python.ops.gen_array_ops import _broadcast_gradient_args
+
+class GPUBinaryOpsTest(tf.test.TestCase):
+  def _compareGPU(self, x, y, np_func, tf_func):
+    with self.test_session(use_gpu=True) as sess:
+      inx = tf.convert_to_tensor(x)
+      iny = tf.convert_to_tensor(y)
+      out = tf_func(inx, iny)
+      tf_gpu = sess.run(out)
+
+    with self.test_session(use_gpu=False) as sess:
+      inx = tf.convert_to_tensor(x)
+      iny = tf.convert_to_tensor(y)
+      out = tf_func(inx, iny)
+      tf_cpu = sess.run(out)
+
+    self.assertAllClose(tf_cpu, tf_gpu)
+    
+  def testFloatBasic(self):
+    x = np.linspace(-5, 20, 15).reshape(1, 3, 5).astype(np.float32)
+    y = np.linspace(20, -5, 15).reshape(1, 3, 5).astype(np.float32)
+    self._compareGPU(x, y, np.add, tf.add)
+    self._compareGPU(x, y, np.subtract, tf.sub)
+    self._compareGPU(x, y, np.multiply, tf.mul)
+    self._compareGPU(x, y + 0.1, np.true_divide, tf.truediv)
+
+  #def _GetGradientArgs(self, xs, ys):
+    #with self.test_session(use_gpu=True) as sess:
+     # return sess.run(_broadcast_gradient_args(xs, ys))
+
+  #def testBroadcast(self):
+    #r0, r1 = self._GetGradientArgs([2, 3, 5], [1])
+    #self.assertAllEqual(r0, [])
+    #self.assertAllEqual(r1, [0, 1, 2])
+      
+if __name__ == "__main__":
+  tf.test.main()
diff --git a/tensorflow/python/kernel_tests/cwise_ops_test.py b/tensorflow/python/kernel_tests/cwise_ops_test.py
index bca2030d01..3322d22b9f 100644
--- a/tensorflow/python/kernel_tests/cwise_ops_test.py
+++ b/tensorflow/python/kernel_tests/cwise_ops_test.py
@@ -1778,9 +1778,17 @@ class IsFiniteInfNanTest(tf.test.TestCase):
 
 class RoundingTest(tf.test.TestCase):
 
-  def _compare(self, x, use_gpu):
+  def _compare_values(self, x, y=None):
+    y = np.rint(x) if y is None else np.asarray(y)
+    with self.test_session() as sess:
+      tf_rint = tf.rint(x)
+      np_rint = sess.run(tf_rint)
+    self.assertAllEqual(y, np_rint)
+    self.assertShapeEqual(y, tf_rint)
+
+  def _compare(self, x):
     np_floor, np_ceil = np.floor(x), np.ceil(x)
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.test_session() as sess:
       inx = tf.convert_to_tensor(x)
       ofloor, oceil = tf.floor(inx), tf.ceil(inx)
       tf_floor, tf_ceil = sess.run([ofloor, oceil])
@@ -1790,9 +1798,20 @@ class RoundingTest(tf.test.TestCase):
     self.assertShapeEqual(np_ceil, oceil)
 
   def _testDtype(self, dtype):
-    data = (np.arange(-3, 3) / 4.).reshape([1, 3, 2]).astype(dtype)
-    self._compare(data, use_gpu=True)
-    self._compare(data, use_gpu=True)
+    data = (np.arange(-3, 3) / 4.).reshape(1, 3, 2).astype(dtype)
+    self._compare(data)
+    # TODO: rint op is not supported for float16
+    if dtype is np.float16:
+      return
+    self._compare_values(data)
+    x = [0.5, 0.5000001]
+    y = [0.0, 1.0]
+    self._compare_values(x, y=y)
+
+    # numpy example
+    x = [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]
+    y = [-2., -2., -0.,  0.,  2.,  2.,  2.]
+    self._compare_values(x, y=y)
 
   def testTypes(self):
     for dtype in [np.float16, np.float32, np.float64]:
diff --git a/tensorflow/python/kernel_tests/init_ops_test.py b/tensorflow/python/kernel_tests/init_ops_test.py
index fd39b8f4c6..1285ed1912 100644
--- a/tensorflow/python/kernel_tests/init_ops_test.py
+++ b/tensorflow/python/kernel_tests/init_ops_test.py
@@ -28,25 +28,27 @@ from tensorflow.python.ops import init_ops
 
 # Returns true iff the two initializers produce the same tensor to
 # within a tiny tolerance.
-def identicaltest(tc, init1, init2):
+def identicaltest(tc, init1, init2, shape=None):
   """Tests if two initializations are identical to within tiny tolerances.
 
   Args:
     tc: An instance of TensorFlowTestCase.
     init1: An Initializer that generates a tensor of a given shape
     init2: An Initializer that generates a tensor of a given shape
+    shape: Shape of the tensor to initialize or `None` to use a vector of length 100.
   Returns:
     True or False as determined by test.
   """
-  num = 100
+  if shape is None:
+    shape = [100]
   with tc.test_session(graph=tf.Graph()):
-    t1 = init1([num]).eval()
+    t1 = init1(shape).eval()
   with tc.test_session(graph=tf.Graph()):
-    t2 = init2([num]).eval()
+    t2 = init2(shape).eval()
   return np.allclose(t1, t2, rtol=1e-15, atol=1e-15)
 
 
-def duplicated_initializer(tc, init, graph_seed):
+def duplicated_initializer(tc, init, graph_seed, shape=None):
   """Tests duplicated random initializer within the same graph.
 
   This test generates two random kernels from the same initializer to the same
@@ -58,14 +60,16 @@ def duplicated_initializer(tc, init, graph_seed):
     tc: An instance of TensorFlowTestCase.
     init: An Initializer that generates a tensor of a given shape
     graph_seed: A graph-level seed to use.
+    shape: Shape of the tensor to initialize or `None` to use a vector of length 100.
   Returns:
     True or False as determined by test.
   """
-  num = 100
+  if shape is None:
+    shape = [100]
   with tc.test_session(graph=tf.Graph()):
     random_seed.set_random_seed(graph_seed)
-    t1 = init([num]).eval()
-    t2 = init([num]).eval()
+    t1 = init(shape).eval()
+    t2 = init(shape).eval()
     return np.allclose(t1, t2, rtol=1e-15, atol=1e-15)
 
 
@@ -444,5 +448,59 @@ class DeviceTest(tf.test.TestCase):
     self.assertDeviceEqual("/job:ps", var.initializer.device)
 
 
+class OrthogonalInitializerTest(tf.test.TestCase):
+
+  def testInitializerIdentical(self):
+    for dtype in [tf.float32, tf.float64]:
+      init1 = tf.orthogonal_initializer(seed=1, dtype=dtype)
+      init2 = tf.orthogonal_initializer(seed=1, dtype=dtype)
+      self.assertTrue(identicaltest(self, init1, init2, (10, 10)))
+
+  def testInitializerDifferent(self):
+    for dtype in [tf.float32, tf.float64]:
+      init1 = tf.orthogonal_initializer(seed=1, dtype=dtype)
+      init2 = tf.orthogonal_initializer(seed=2, dtype=dtype)
+      self.assertFalse(identicaltest(self, init1, init2, (10, 10)))
+
+  def testDuplicatedInitializer(self):
+    init = tf.orthogonal_initializer()
+    self.assertFalse(duplicated_initializer(self, init, 1, (10, 10)))
+
+  def testInvalidDataType(self):
+    self.assertRaises(
+      ValueError,
+      tf.orthogonal_initializer, dtype=tf.string)
+
+  def testInvalidShape(self):
+    init1 = tf.orthogonal_initializer()
+    with self.test_session(graph=tf.Graph(), use_gpu=True):
+      self.assertRaises(ValueError, init1, shape=[5])
+
+  def testGain(self):
+    shape = (10, 10)
+    for dtype in [tf.float32, tf.float64]:
+      init1 = tf.orthogonal_initializer(seed=1, dtype=dtype)
+      init2 = tf.orthogonal_initializer(gain=3.14, seed=1, dtype=dtype)
+      with self.test_session(graph=tf.Graph(), use_gpu=True):
+        t1 = init1(shape).eval()
+      with self.test_session(graph=tf.Graph(), use_gpu=True):
+        t2 = init2(shape).eval()
+      return np.allclose(t1, t2 / 3.14, rtol=1e-15, atol=1e-15)
+
+  def testShapesValues(self):
+    for dtype in [tf.float32, tf.float64]:
+      for shape in [(10, 10), (10, 9, 8), (100, 5, 5), (50, 40), (40, 50)]:
+        init = tf.orthogonal_initializer(dtype=dtype)
+        with self.test_session(graph=tf.Graph(), use_gpu=True):
+          # Check the shape
+          t = init(shape).eval()
+          self.assertAllEqual(shape, t.shape)
+          # Check orthogonality by computing the inner product
+          t = t.reshape((np.prod(t.shape[:-1]), t.shape[-1]))
+          if t.shape[0] > t.shape[1]:
+            self.assertAllClose(np.dot(t.T, t), np.eye(t.shape[1]))
+          else:
+            self.assertAllClose(np.dot(t, t.T), np.eye(t.shape[0]))
+
 if __name__ == "__main__":
   tf.test.main()
diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py
index a037069d2f..0197e8f390 100644
--- a/tensorflow/python/ops/array_grad.py
+++ b/tensorflow/python/ops/array_grad.py
@@ -523,6 +523,10 @@ def _ExtractImagePatchesGrad(op, grad):
   batch_size, rows_in, cols_in, channels = [
     dim.value for dim in op.inputs[0].get_shape()
   ]
+  input_bhwc = array_ops.shape(op.inputs[0])
+  batch_size = input_bhwc[0]
+  channels = input_bhwc[3]
+
   _, rows_out, cols_out, _ = [
     dim.value for dim in op.outputs[0].get_shape()
   ]
diff --git a/tensorflow/python/ops/image_grad.py b/tensorflow/python/ops/image_grad.py
index 81f089660a..b6b61ab92c 100644
--- a/tensorflow/python/ops/image_grad.py
+++ b/tensorflow/python/ops/image_grad.py
@@ -35,10 +35,16 @@ def _ResizeNearestNeighborGrad(op, grad):
   Returns:
     The gradients w.r.t. the input and the output.
   """
+  image = op.inputs[0]
+  if image.get_shape()[1:3].is_fully_defined():
+    image_shape = image.get_shape()[1:3]
+  else:
+    image_shape = array_ops.shape(image)[1:3]
+
   # pylint: disable=protected-access
   grads = gen_image_ops._resize_nearest_neighbor_grad(
       grad,
-      op.inputs[0].get_shape()[1:3],
+      image_shape,
       align_corners=op.get_attr("align_corners"))
   # pylint: enable=protected-access
   return [grads, None]
diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py
index 94364b408a..bbe9977d4b 100644
--- a/tensorflow/python/ops/init_ops.py
+++ b/tensorflow/python/ops/init_ops.py
@@ -40,6 +40,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import linalg_ops
 
 
 def _assert_float_dtype(dtype):
@@ -343,3 +344,55 @@ class _RandomWalkInitializer(object):
     """Generate a tensor used to initialize a variable."""
     return random_ops._random_walk(shape, self._nonlinearity, dtype,
                                    seed=self._seed)
+
+
+def orthogonal_initializer(gain=1.0, dtype=dtypes.float32, seed=None):
+  """Returns an initializer that generates an orthogonal matrix or a reshaped 
+  orthogonal matrix.
+
+  If the shape of the tensor to initialize is two-dimensional, i is initialized 
+  with an orthogonal matrix obtained from the singular value decomposition of a 
+  matrix of uniform random numbers.
+
+  If the shape of the tensor to initialize is more than two-dimensional, a matrix
+  of shape `(shape[0] * ... * shape[n - 2], shape[n - 1])` is initialized, where
+  `n` is the length of the shape vector. The matrix is subsequently reshaped to
+  give a tensor of the desired shape.
+
+  Args:
+    gain: multiplicative factor to apply to the orthogonal matrix
+    dtype: The type of the output.
+    seed: A Python integer. Used to create random seeds. See
+      [`set_random_seed`](../../api_docs/python/constant_op.md#set_random_seed)
+      for behavior.
+
+  Returns:
+    An initializer that generates orthogonal tensors
+
+  Raises:
+    ValueError: if `dtype` is not a floating point type or if `shape` has fewer than two entries.
+  """
+  def _initializer(shape, dtype=_assert_float_dtype(dtype), partition_info=None):
+    # Check the shape
+    if len(shape) < 2:
+      raise ValueError('the tensor to initialize must be at least two-dimensional')
+    # Flatten the input shape with the last dimension remaining its original shape so it works for conv2d
+    num_rows = 1
+    for dim in shape[:-1]:
+      num_rows *= dim
+    num_cols = shape[-1]
+    flat_shape = (num_rows, num_cols)
+
+    # Generate a random matrix
+    a = random_ops.random_uniform(flat_shape, dtype=dtype, seed=seed)
+    # Compute the svd
+    _, u, v = linalg_ops.svd(a, full_matrices=False)
+    # Pick the appropriate singular value decomposition
+    if num_rows > num_cols:
+      q = u
+    else:
+      # Tensorflow departs from numpy conventions such that we need to transpose axes here
+      q = array_ops.transpose(v)
+    return gain * array_ops.reshape(q, shape)
+
+  return _initializer
diff --git a/tensorflow/python/ops/math_grad.py b/tensorflow/python/ops/math_grad.py
index 8d999f0074..2e8328a98b 100644
--- a/tensorflow/python/ops/math_grad.py
+++ b/tensorflow/python/ops/math_grad.py
@@ -796,6 +796,12 @@ def _FloorGrad(_, unused_grad):
   return [None]
 
 
+@ops.RegisterGradient("Rint")
+def _RintGrad(_, unused_grad):
+  # the gradient of Rint is zero
+  return [None]
+
+
 @ops.RegisterGradient("BatchMatMul")
 def _BatchMatMul(op, grad):
   """Returns the gradient of x and y given the gradient of x * y."""
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index a431b66b85..d9a723a163 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -75,6 +75,7 @@ mathematical functions to your graph.
 @@zeta
 @@polygamma
 @@betainc
+@@rint
 
 ## Matrix Math Functions
 
diff --git a/tensorflow/python/ops/state_ops.py b/tensorflow/python/ops/state_ops.py
index 232b6e9cdf..f2a201a609 100644
--- a/tensorflow/python/ops/state_ops.py
+++ b/tensorflow/python/ops/state_ops.py
@@ -69,6 +69,7 @@ create variables contingent on certain conditions.
 @@uniform_unit_scaling_initializer
 @@zeros_initializer
 @@ones_initializer
+@@orthogonal_initializer
 
 ## Variable Partitioners for Sharding
 
diff --git a/tensorflow/python/platform/tf_logging.py b/tensorflow/python/platform/tf_logging.py
index 7af533e74f..70ecda1dda 100644
--- a/tensorflow/python/platform/tf_logging.py
+++ b/tensorflow/python/platform/tf_logging.py
@@ -171,7 +171,6 @@ def google2_log_prefix(level, timestamp=None, file_and_line=None):
   """Assemble a logline prefix using the google2 format."""
   # pylint: disable=global-variable-not-assigned
   global _level_names
-  global _logfile_map, _logfile_map_mutex
   # pylint: enable=global-variable-not-assigned
 
   # Record current time
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 31b4461dad..47a83d51a4 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -41,6 +41,7 @@ def check_version(bazel_version):
 load(
     "//tensorflow/core:platform/default/build_config_root.bzl",
     "tf_cuda_tests_tags",
+    "tf_sycl_tests_tags",
 )
 load(
     "@local_config_cuda//cuda:build_defs.bzl",
@@ -886,6 +887,20 @@ def cuda_py_test(name, srcs, size="medium", data=[], main=None, args=[],
              additional_deps=additional_deps,
              flaky=flaky)
 
+def sycl_py_test(name, srcs, size="medium", data=[], main=None, args=[],
+                shard_count=1, additional_deps=[], tags=[], flaky=0):
+ test_tags = tags + tf_sycl_tests_tags()
+ tf_py_test(name=name,
+            size=size,
+            srcs=srcs,
+            data=data,
+            main=main,
+            args=args,
+            tags=test_tags,
+            shard_count=shard_count,
+            additional_deps=additional_deps,
+            flaky=flaky)
+
 def py_tests(name,
              srcs,
              size="medium",
diff --git a/tensorflow/tools/ci_build/windows/cpu/cmake/run_py.bat b/tensorflow/tools/ci_build/windows/cpu/cmake/run_py.bat
index c8f65402ff..45ddfaee9c 100644
--- a/tensorflow/tools/ci_build/windows/cpu/cmake/run_py.bat
+++ b/tensorflow/tools/ci_build/windows/cpu/cmake/run_py.bat
@@ -28,10 +28,10 @@ CALL %REPO_ROOT%\tensorflow\tools\ci_build\windows\cpu\cmake\run_build.bat
 SET PIP_EXE="C:\Program Files\Anaconda3\Scripts\pip.exe"
 
 :: Uninstall tensorflow pip package, which might be a leftover from old runs.
-%PIP_EXE% uninstall tensorflow
+%PIP_EXE% uninstall -y tensorflow
 
 :: Install the pip package.
-%PIP_EXE% install %REPO_ROOT%\%BUILD_DIR%\tf_python\dist\tensorflow-0.11.0rc2_cmake_experimental-py3-none-any.whl
+%PIP_EXE% install --upgrade %REPO_ROOT%\%BUILD_DIR%\tf_python\dist\tensorflow-0.11.0rc2_cmake_experimental-py3-none-any.whl
 
 :: Run all python tests
 ctest -C Release --output-on-failure
diff --git a/tensorflow/tools/docker/Dockerfile b/tensorflow/tools/docker/Dockerfile
index 39da8b2e65..82973226fc 100644
--- a/tensorflow/tools/docker/Dockerfile
+++ b/tensorflow/tools/docker/Dockerfile
@@ -33,7 +33,7 @@ RUN pip --no-cache-dir install \
         && \
     python -m ipykernel.kernelspec
 
-ENV TENSORFLOW_VERSION 0.11.0rc2
+ENV TENSORFLOW_VERSION 0.11.0
 
 # --- DO NOT EDIT OR DELETE BETWEEN THE LINES --- #
 # These lines will be edited automatically by parameterized_docker_build.sh. #
diff --git a/tensorflow/tools/docker/Dockerfile.gpu b/tensorflow/tools/docker/Dockerfile.gpu
index b369bc10e7..30de920130 100644
--- a/tensorflow/tools/docker/Dockerfile.gpu
+++ b/tensorflow/tools/docker/Dockerfile.gpu
@@ -33,7 +33,7 @@ RUN pip --no-cache-dir install \
         && \
     python -m ipykernel.kernelspec
 
-ENV TENSORFLOW_VERSION 0.11.0rc2
+ENV TENSORFLOW_VERSION 0.11.0
 
 # --- DO NOT EDIT OR DELETE BETWEEN THE LINES --- #
 # These lines will be edited automatically by parameterized_docker_build.sh. #
diff --git a/tensorflow/tools/gcs_test/Dockerfile b/tensorflow/tools/gcs_test/Dockerfile
index 3b95e32665..a9db7ce9b0 100644
--- a/tensorflow/tools/gcs_test/Dockerfile
+++ b/tensorflow/tools/gcs_test/Dockerfile
@@ -17,7 +17,7 @@ RUN ./install_google_cloud_sdk.bash --disable-prompts --install-dir=/var/gcloud
 
 # Install nightly TensorFlow pip
 RUN pip install \
-   https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl
+   https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0-cp27-none-linux_x86_64.whl
 
 # Copy test files
 RUN mkdir -p /gcs-smoke/python
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 84c6a06a9a..8a15e0a296 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -26,7 +26,7 @@ from setuptools import find_packages, setup, Command
 from setuptools.command.install import install as InstallCommandBase
 from setuptools.dist import Distribution
 
-_VERSION = '0.11.0rc2'
+_VERSION = '0.11.0'
 
 REQUIRED_PACKAGES = [
     'numpy >= 1.11.0',
diff --git a/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc b/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc
index c31f3c4e81..c5b3ca38ba 100644
--- a/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc
+++ b/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc
@@ -154,7 +154,7 @@ class Generator {
 string GetPackageReferencePrefix(const FileDescriptor* fd) {
   string result = "::";
   const string& package = fd->package();
-  for (int i = 0; i < package.size(); ++i) {
+  for (size_t i = 0; i < package.size(); ++i) {
     if (package[i] == '.') {
       result += "::";
     } else {
@@ -446,6 +446,7 @@ void Generator::AppendParseMessageFunction(const Descriptor& md) {
   Print("StringPiece identifier;");
   Print("if (!scanner->GetResult(nullptr, &identifier)) return false;");
   Print("bool parsed_colon = false;");
+  Print("(void)parsed_colon;"); // Avoid "set but not used" compiler warning
   Print("ProtoSpaceAndComments(scanner);");
   Print("if (scanner->Peek() == ':') {");
   Nest().Print("parsed_colon = true;");
@@ -675,7 +676,7 @@ void Generator::AppendMessageFunctions(const Descriptor& md) {
 void Generator::AddNamespaceToCurrentSection(const string& package, bool open) {
   Print();
   std::vector<string> parts = {""};
-  for (int i = 0; i < package.size(); ++i) {
+  for (size_t i = 0; i < package.size(); ++i) {
     if (package[i] == '.') {
       parts.resize(parts.size() + 1);
     } else {
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 617101a306..06e16cdb04 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -17,8 +17,8 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
   # These lines need to be changed when updating Eigen. They are parsed from
   # this file by the cmake and make builds to determine the eigen version and
   # hash.
-  eigen_version = "3d41a24add9b"
-  eigen_sha256 = "ce91f8db04493096c0f8bd5ebca7f5a295c88874cda99b3e9d99c9ed10154f99"
+  eigen_version = "62bdceacdafa"
+  eigen_sha256 = "c66f4693a0fd1f5c2cf009e01eb49671ce9cbb56874c3d07d3b8928ffc132cec"
 
   native.new_http_archive(
     name = "eigen_archive",
author	A. Unique TensorFlower <gardener@tensorflow.org>	2016-11-17 15:37:00 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2016-11-17 15:45:18 -0800
commit	54e5000e0b980abe905900599c4493fadae34a15 (patch)
tree	c1f0751e5565882a77646589360fe090bec3f3fc /tensorflow
parent	8a5610cd9f0b7087c1a7e97071ba1cf9b885315a (diff)