86 files changed, 792 insertions, 230 deletions
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index fe40c691c5..991fc2f29d 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -949,12 +949,12 @@ cc_library(
 # Libraries with GPU facilities that are useful for writing kernels.
 cc_library(
     name = "gpu_lib",
-    srcs = if_not_windows([
+    srcs = [
         "common_runtime/gpu/gpu_event_mgr.cc",
-    ]),
-    hdrs = if_not_windows([
+    ],
+    hdrs = [
         "common_runtime/gpu/gpu_event_mgr.h",
-    ]),
+    ],
     copts = tf_copts(),
     visibility = ["//visibility:public"],
     deps = [
@@ -964,7 +964,8 @@ cc_library(
         ":lib_internal",
         ":proto_text",
         ":protos_all_cc",
-    ] + if_not_windows([":stream_executor"]),
+        ":stream_executor",
+    ],
 )
 
 cc_library(
@@ -982,7 +983,7 @@ tf_proto_library_cc(
     name = "worker_proto",
     srcs = ["protobuf/worker.proto"],
     cc_api_version = 2,
-    cc_libs = [":protos_all_cc"],
+    protodeps = [":protos_all"],
     visibility = [
         "//tensorflow:internal",
     ],
@@ -993,8 +994,8 @@ tf_proto_library_cc(
     srcs = ["protobuf/worker_service.proto"],
     has_services = 1,
     cc_api_version = 2,
-    cc_libs = [":worker_proto_cc"],
     cc_stubby_versions = ["2"],
+    protodeps = [":worker_proto"],
     visibility = [
         "//tensorflow:internal",
     ],
@@ -1004,7 +1005,7 @@ tf_proto_library_cc(
     name = "master_proto",
     srcs = ["protobuf/master.proto"],
     cc_api_version = 2,
-    cc_libs = [":protos_all_cc"],
+    protodeps = [":protos_all"],
     visibility = [
         "//tensorflow:internal",
     ],
@@ -1015,8 +1016,8 @@ tf_proto_library_cc(
     srcs = ["protobuf/master_service.proto"],
     has_services = 1,
     cc_api_version = 2,
-    cc_libs = [":master_proto_cc"],
     cc_stubby_versions = ["2"],
+    protodeps = [":master_proto"],
     visibility = [
         "//tensorflow:internal",
     ],
@@ -1417,7 +1418,7 @@ tf_cuda_library(
 
 tf_cuda_library(
     name = "gpu_runtime",
-    srcs = if_not_windows([
+    srcs = [
         "common_runtime/gpu/gpu_bfc_allocator.cc",
         "common_runtime/gpu/gpu_debug_allocator.cc",
         "common_runtime/gpu/gpu_device.cc",
@@ -1429,8 +1430,8 @@ tf_cuda_library(
         "common_runtime/gpu/pool_allocator.cc",
         "common_runtime/gpu/process_state.cc",
         "common_runtime/gpu_device_context.h",
-    ]),
-    hdrs = if_not_windows([
+    ],
+    hdrs = [
         "common_runtime/gpu/gpu_bfc_allocator.h",
         "common_runtime/gpu/gpu_debug_allocator.h",
         "common_runtime/gpu/gpu_device.h",
@@ -1439,7 +1440,7 @@ tf_cuda_library(
         "common_runtime/gpu/gpu_util.h",
         "common_runtime/gpu/pool_allocator.h",
         "common_runtime/gpu/process_state.h",
-    ]),
+    ],
     copts = tf_copts(),
     linkstatic = 1,
     deps = [
@@ -1451,10 +1452,9 @@ tf_cuda_library(
         ":lib",
         ":lib_internal",
         ":protos_all_cc",
-        "//third_party/eigen3",
-    ] + if_not_windows([
         ":stream_executor",
-    ]),
+        "//third_party/eigen3",
+    ],
     alwayslink = 1,
 )
 
diff --git a/tensorflow/core/common_runtime/direct_session_test.cc b/tensorflow/core/common_runtime/direct_session_test.cc
index 44f17d6260..4b0165bae7 100644
--- a/tensorflow/core/common_runtime/direct_session_test.cc
+++ b/tensorflow/core/common_runtime/direct_session_test.cc
@@ -835,7 +835,7 @@ static void TestSessionInterOpThreadsImpl(bool use_function_lib) {
   FunctionLibraryDefinition flib(OpRegistry::Global(), library_graph_def);
   Graph g(&flib);
   Tensor t(DT_FLOAT, TensorShape({}));
-  t.scalar<float>()() = {1.2};
+  t.scalar<float>()() = {1.2f};
   Node* x = test::graph::Constant(&g, t);
   Node* y;
   if (use_function_lib) {
@@ -945,7 +945,7 @@ TEST(DirectSessionTest, TestSessionInterOpThreadsWithFunctions) {
 TEST(DirectSessionTest, TestSessionInterOpThreadsInvalidOptions) {
   Graph g(OpRegistry::Global());
   Tensor t(DT_FLOAT, TensorShape({}));
-  t.scalar<float>()() = {1.2};
+  t.scalar<float>()() = {1.2f};
   Node* x = test::graph::Constant(&g, t);
   GraphDef def;
   test::graph::ToGraphDef(&g, &def);
@@ -979,7 +979,7 @@ TEST(DirectSessionTest, TestDirectSessionRunClose) {
   // Construct a graph with a variable and a single assign.
   Graph g(OpRegistry::Global());
   Tensor t(DT_FLOAT, TensorShape({}));
-  t.scalar<float>()() = {1.2};
+  t.scalar<float>()() = {1.2f};
   Node* var_val = test::graph::Constant(&g, t);
   Node* var = test::graph::Var(&g, DT_FLOAT, {});
   Node* var_assign = test::graph::Assign(&g, var, var_val);
@@ -1063,7 +1063,7 @@ TEST(DirectSessionTest, TestDirectSessionReset) {
   // Construct a graph with a variable and a single assign.
   Graph g(OpRegistry::Global());
   Tensor t(DT_FLOAT, TensorShape({}));
-  t.scalar<float>()() = {1.2};
+  t.scalar<float>()() = {1.2f};
   Node* var_val = test::graph::Constant(&g, t);
   Node* var = test::graph::Var(&g, DT_FLOAT, {});
   Node* var_assign = test::graph::Assign(&g, var, var_val);
diff --git a/tensorflow/core/common_runtime/executor.h b/tensorflow/core/common_runtime/executor.h
index 8cca22fb6f..239c9666e3 100644
--- a/tensorflow/core/common_runtime/executor.h
+++ b/tensorflow/core/common_runtime/executor.h
@@ -39,7 +39,7 @@ class StepStatsCollector;
 //   Rendezvous* rendezvous = NewNaiveRendezvous();
 //   TF_CHECK_OK(rendezvous->Send("input", some_input_tensor));
 //   TF_CHECK_OK(executor->Run({ExecutorOpts, rendezvous, nullptr}));
-//   TF_CHECK_OK(rendezvous->Recv("input", &output_tensor));
+//   TF_CHECK_OK(rendezvous->Recv("output", &output_tensor));
 //   ... ...
 //
 // Multiple threads can call Executor::Run concurrently.
diff --git a/tensorflow/core/common_runtime/sycl/sycl_allocator.cc b/tensorflow/core/common_runtime/sycl/sycl_allocator.cc
index 175b784825..699b54f345 100644
--- a/tensorflow/core/common_runtime/sycl/sycl_allocator.cc
+++ b/tensorflow/core/common_runtime/sycl/sycl_allocator.cc
@@ -19,16 +19,26 @@ limitations under the License.
 
 namespace tensorflow {
 
-SYCLAllocator::~SYCLAllocator() { }
+SYCLAllocator::~SYCLAllocator() {}
 
 string SYCLAllocator::Name() { return "device:SYCL"; }
 
 void *SYCLAllocator::AllocateRaw(size_t alignment, size_t num_bytes) {
+  assert(device_);
   auto p = device_->allocate(num_bytes);
   return p;
 }
 
-void SYCLAllocator::DeallocateRaw(void *ptr) { device_->deallocate(ptr); }
+void SYCLAllocator::DeallocateRaw(void *ptr) {
+  if (device_) {
+    device_->deallocate(ptr);
+  }
+}
+
+void SYCLAllocator::EnterLameDuckMode() {
+  device_->deallocate_all();
+  device_ = nullptr;
+}
 
 } // namespace tensorflow
 
diff --git a/tensorflow/core/common_runtime/sycl/sycl_allocator.h b/tensorflow/core/common_runtime/sycl/sycl_allocator.h
index 887c727f6e..8558b6c873 100644
--- a/tensorflow/core/common_runtime/sycl/sycl_allocator.h
+++ b/tensorflow/core/common_runtime/sycl/sycl_allocator.h
@@ -29,14 +29,16 @@ namespace tensorflow {
 
 class SYCLAllocator : public Allocator {
 public:
-  SYCLAllocator(Eigen::SyclDevice* device) : device_(device) {}
+  SYCLAllocator(Eigen::QueueInterface* device) : device_(device) {}
   virtual ~SYCLAllocator() override;
   string Name() override;
   void *AllocateRaw(size_t alignment, size_t num_bytes) override;
   void DeallocateRaw(void *ptr) override;
 
+  void EnterLameDuckMode();
+  virtual bool ShouldAllocateEmptyTensors() override final { return true; }
 private:
-  Eigen::SyclDevice *device_;  // not owned
+  Eigen::QueueInterface *device_;  // not owned
   TF_DISALLOW_COPY_AND_ASSIGN(SYCLAllocator);
 };
 
diff --git a/tensorflow/core/common_runtime/sycl/sycl_device.cc b/tensorflow/core/common_runtime/sycl/sycl_device.cc
index 10a037c02d..e5fe85bcf5 100644
--- a/tensorflow/core/common_runtime/sycl/sycl_device.cc
+++ b/tensorflow/core/common_runtime/sycl/sycl_device.cc
@@ -25,8 +25,9 @@ namespace tensorflow {
 
 SYCLDevice::~SYCLDevice() {
   device_context_->Unref();
-  delete sycl_allocator_;
+  sycl_allocator_->EnterLameDuckMode();
   delete sycl_device_;
+  delete sycl_queue_;
 }
 
 void SYCLDevice::Compute(OpKernel *op_kernel, OpKernelContext *context) {
@@ -50,12 +51,8 @@ Allocator *SYCLDevice::GetAllocator(AllocatorAttributes attr) {
 Status SYCLDevice::MakeTensorFromProto(const TensorProto &tensor_proto,
                                        const AllocatorAttributes alloc_attrs,
                                        Tensor *tensor) {
-  AllocatorAttributes attr;
-  attr.set_on_host(true);
-  attr.set_gpu_compatible(true);
-  Allocator *host_alloc = GetAllocator(attr);
   Tensor parsed(tensor_proto.dtype());
-  if (!parsed.FromProto(host_alloc, tensor_proto)) {
+  if (!parsed.FromProto(cpu_allocator_, tensor_proto)) {
     return errors::InvalidArgument("Cannot parse tensor from proto: ",
                                    tensor_proto.DebugString());
   }
@@ -86,6 +83,12 @@ Status SYCLDevice::FillContextMap(const Graph *graph,
   return Status::OK();
 }
 
+Status SYCLDevice::Sync() {
+  sycl_device_->synchronize();
+  return Status::OK();
+}
+
+
 } // namespace tensorflow
 
 #endif // TENSORFLOW_USE_SYCL
diff --git a/tensorflow/core/common_runtime/sycl/sycl_device.h b/tensorflow/core/common_runtime/sycl/sycl_device.h
index d3b3db2a71..2759053df5 100644
--- a/tensorflow/core/common_runtime/sycl/sycl_device.h
+++ b/tensorflow/core/common_runtime/sycl/sycl_device.h
@@ -22,7 +22,6 @@ limitations under the License.
 
 #define EIGEN_USE_SYCL
 
-#include "tensorflow/core/common_runtime/device_factory.h"
 #include "tensorflow/core/common_runtime/local_device.h"
 #include "tensorflow/core/common_runtime/sycl/sycl_allocator.h"
 #include "tensorflow/core/common_runtime/sycl/sycl_device_context.h"
@@ -30,7 +29,6 @@ limitations under the License.
 
 namespace tensorflow {
 
-
 class SYCLDevice : public LocalDevice {
 public:
   template <typename SYCLSelector>
@@ -42,8 +40,9 @@ public:
                     name, DEVICE_SYCL, memory_limit, locality,
                     physical_device_desc), nullptr),
         cpu_allocator_(cpu_allocator),
-        sycl_device_(new Eigen::SyclDevice(sycl_selector)),
-        sycl_allocator_(new SYCLAllocator(sycl_device_)),
+        sycl_queue_(new Eigen::QueueInterface(sycl_selector)),
+        sycl_device_(new Eigen::SyclDevice(sycl_queue_)),
+        sycl_allocator_(new SYCLAllocator(sycl_queue_)),
         device_context_(new SYCLDeviceContext()) {
     set_eigen_sycl_device(sycl_device_);
   }
@@ -59,16 +58,17 @@ public:
   Status FillContextMap(const Graph *graph,
                         DeviceContextMap *device_context_map) override;
 
-  Status Sync() override { return Status::OK(); }
+  Status Sync() override;
   static string GetShortDeviceDescription(/*int device_id,
                                           const DeviceDescription& desc*/) {
     return strings::StrCat("device: 0, name SYCL, pci bus id: 0");
   }
 
 private:
-  Allocator *cpu_allocator_;         // owned
-  Eigen::SyclDevice* sycl_device_;   // owned
-  SYCLAllocator *sycl_allocator_;    // owned
+  Allocator *cpu_allocator_;          // owned
+  Eigen::QueueInterface* sycl_queue_; // owned
+  Eigen::SyclDevice* sycl_device_;    // owned
+  SYCLAllocator *sycl_allocator_;     // owned
   SYCLDeviceContext *device_context_;
 };
 
diff --git a/tensorflow/core/common_runtime/sycl/sycl_device_context.cc b/tensorflow/core/common_runtime/sycl/sycl_device_context.cc
index 9dd289bebd..b487d24c20 100644
--- a/tensorflow/core/common_runtime/sycl/sycl_device_context.cc
+++ b/tensorflow/core/common_runtime/sycl/sycl_device_context.cc
@@ -16,13 +16,11 @@ limitations under the License.
 #if TENSORFLOW_USE_SYCL
 
 #define EIGEN_USE_SYCL
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 
 #include "tensorflow/core/common_runtime/sycl/sycl_device_context.h"
 #include "tensorflow/core/common_runtime/dma_helper.h"
 
-#define EIGEN_USE_SYCL
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-
 namespace tensorflow {
 
 void SYCLDeviceContext::CopyCPUTensorToDevice(const Tensor *cpu_tensor,
@@ -108,7 +106,6 @@ void SYCLDeviceContext::CopyDeviceTensorToCPU(const Tensor *device_tensor,
                                               StatusCallback done) {
   const int64 total_bytes = device_tensor->TotalBytes();
   if (total_bytes > 0) {
-    device->eigen_sycl_device()->deallocate_all();
     const void* src_ptr = DMAHelper::base(device_tensor);
     void* dst_ptr = DMAHelper::base(cpu_tensor);
     switch (device_tensor->dtype()) {
diff --git a/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc b/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc
index 9b8770420c..cf9e349e01 100644
--- a/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc
+++ b/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #if TENSORFLOW_USE_SYCL
 
+#include "tensorflow/core/common_runtime/device_factory.h"
 #include "tensorflow/core/common_runtime/sycl/sycl_device.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/debug/BUILD b/tensorflow/core/debug/BUILD
index 2363b69390..3e4ab5bc17 100644
--- a/tensorflow/core/debug/BUILD
+++ b/tensorflow/core/debug/BUILD
@@ -36,7 +36,7 @@ tf_proto_library_cc(
     has_services = 1,
     cc_api_version = 2,
     cc_grpc_version = 1,
-    cc_libs = ["//tensorflow/core:protos_all_cc"],
+    protodeps = ["//tensorflow/core:protos_all"],
 )
 
 # Depending on this target causes a concrete DebuggerState implementation
diff --git a/tensorflow/core/debug/debug_gateway_test.cc b/tensorflow/core/debug/debug_gateway_test.cc
index 1f6e766663..963cea8419 100644
--- a/tensorflow/core/debug/debug_gateway_test.cc
+++ b/tensorflow/core/debug/debug_gateway_test.cc
@@ -372,9 +372,9 @@ TEST_F(SessionDebugMinusAXTest,
   debug_gateway.SetNodeValueCallback(
       [this, &mu, &val_callback_count, &a_debug_identity_node_name,
        &x_debug_identity_node_name, &y_debug_identity_node_name,
-       &debug_identity_tensor_vals,
-       &callbacks_done](const string& node_name, const int output_slot,
-                        const Tensor& tensor_value, const bool is_ref) {
+       &debug_identity_tensor_vals, &callbacks_done, &kConcurrentRuns](
+           const string& node_name, const int output_slot,
+           const Tensor& tensor_value, const bool is_ref) {
         mutex_lock l(mu);
 
         if (node_name == a_debug_identity_node_name && output_slot == 0) {
diff --git a/tensorflow/core/debug/debug_io_utils.cc b/tensorflow/core/debug/debug_io_utils.cc
index 41868ce8da..4b5ecaa9b6 100644
--- a/tensorflow/core/debug/debug_io_utils.cc
+++ b/tensorflow/core/debug/debug_io_utils.cc
@@ -18,6 +18,12 @@ limitations under the License.
 #include <vector>
 
 #include "grpc++/create_channel.h"
+
+#if defined(PLATFORM_WINDOWS)
+// winsock2.h is used in grpc, so Ws2_32.lib is needed
+#pragma comment(lib,"Ws2_32.lib")
+#endif
+
 #include "tensorflow/core/debug/debug_service.grpc.pb.h"
 #include "tensorflow/core/framework/summary.pb.h"
 #include "tensorflow/core/lib/io/path.h"
diff --git a/tensorflow/core/debug/debug_io_utils_test.cc b/tensorflow/core/debug/debug_io_utils_test.cc
index 1ddab1689b..ab020517b0 100644
--- a/tensorflow/core/debug/debug_io_utils_test.cc
+++ b/tensorflow/core/debug/debug_io_utils_test.cc
@@ -273,7 +273,8 @@ TEST_F(DebugIOUtilsTest, PublishTensorConcurrentlyToPartiallyOverlappingPaths) {
 
   auto fn = [this, &dump_count, &done_count, &mu, &dump_root_base, &dump_roots,
              &dump_file_paths, &wall_time, &tensor_name, &debug_node_name,
-             &kNodeName, &kDebugOpName, &kConcurrentPubs, &all_done]() {
+             &kNodeName, &kDebugOpName, &kConcurrentPubs, &kOutputSlot,
+             &all_done]() {
     // "gumpy" is the shared directory part of the path.
     string dump_root;
     string debug_url;
diff --git a/tensorflow/core/framework/partial_tensor_shape_test.cc b/tensorflow/core/framework/partial_tensor_shape_test.cc
index b008a93c03..23f3d908fb 100644
--- a/tensorflow/core/framework/partial_tensor_shape_test.cc
+++ b/tensorflow/core/framework/partial_tensor_shape_test.cc
@@ -220,7 +220,7 @@ TEST(PartialTensorShapeTest, PartialShapeMergeWith) {
 
 TEST(PartialTensorShapeTest, MakePartialShapeEmpty) {
   // Empty made partial shapes should still be fully defined
-  const int64 dims[0] = {};
+  const int64 dims[1] = {};
   PartialTensorShape shape;
   EXPECT_FALSE(shape.IsFullyDefined());
   TF_ASSERT_OK(PartialTensorShape::MakePartialShape(dims, 0, &shape));
diff --git a/tensorflow/core/framework/tensor_testutil.h b/tensorflow/core/framework/tensor_testutil.h
index 73afca40ac..29b9de5c07 100644
--- a/tensorflow/core/framework/tensor_testutil.h
+++ b/tensorflow/core/framework/tensor_testutil.h
@@ -16,6 +16,8 @@ limitations under the License.
 #ifndef TENSORFLOW_FRAMEWORK_TENSOR_TESTUTIL_H_
 #define TENSORFLOW_FRAMEWORK_TENSOR_TESTUTIL_H_
 
+#include <numeric>
+
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/core/platform/logging.h"
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 2315c2ffb6..e99ed9dfa8 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -2342,7 +2342,6 @@ cc_library(
         ":batch_norm_op",
         ":bias_op",
         ":conv_ops",
-        ":depthwise_conv_grad_op",
         ":dilation_ops",
         ":fused_batch_norm_op",
         ":in_topk_op",
@@ -2354,7 +2353,10 @@ cc_library(
         ":softsign_op",
         ":topk_op",
         ":xent_op",
-    ] + if_not_windows([":depthwise_conv_op"]),
+    ] + if_not_windows([
+        ":depthwise_conv_grad_op",
+        ":depthwise_conv_op",
+    ]),
 )
 
 NN_DEPS = [
diff --git a/tensorflow/core/kernels/adjust_contrast_op_test.cc b/tensorflow/core/kernels/adjust_contrast_op_test.cc
index b925dc6883..06fd7ca419 100644
--- a/tensorflow/core/kernels/adjust_contrast_op_test.cc
+++ b/tensorflow/core/kernels/adjust_contrast_op_test.cc
@@ -56,7 +56,7 @@ TEST_F(AdjustContrastOpTest, Simple_1223) {
   TF_EXPECT_OK(InitOp());
   AddInputFromArray<float>(TensorShape({1, 2, 2, 3}),
                            {1, 5, 9, 2, 6, 10, 3, 7, 11, 4, 8, 12});
-  AddInputFromArray<float>(TensorShape({}), {0.2});
+  AddInputFromArray<float>(TensorShape({}), {0.2f});
   TF_ASSERT_OK(RunOpKernel());
 
   Tensor expected(allocator(), DT_FLOAT, TensorShape({1, 2, 2, 3}));
@@ -78,7 +78,7 @@ TEST_F(AdjustContrastOpTest, Big_99x99x3) {
   }
 
   AddInputFromArray<float>(TensorShape({1, 99, 99, 3}), values);
-  AddInputFromArray<float>(TensorShape({}), {0.2});
+  AddInputFromArray<float>(TensorShape({}), {0.2f});
   TF_ASSERT_OK(RunOpKernel());
 }
 
diff --git a/tensorflow/core/kernels/batch_norm_op_test.cc b/tensorflow/core/kernels/batch_norm_op_test.cc
index 746b0d46ad..c5e55346eb 100644
--- a/tensorflow/core/kernels/batch_norm_op_test.cc
+++ b/tensorflow/core/kernels/batch_norm_op_test.cc
@@ -47,15 +47,15 @@ TEST_F(BatchNormOpTest, Simple) {
   AddInputFromArray<float>(TensorShape({1, 1, 6, 2}),
                            {1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6});
   AddInputFromArray<float>(TensorShape({2}), {10, 20});
-  AddInputFromArray<float>(TensorShape({2}), {0.25, 0.5});
-  AddInputFromArray<float>(TensorShape({2}), {0.1, 0.6});
-  AddInputFromArray<float>(TensorShape({2}), {0.0, 0.0});
+  AddInputFromArray<float>(TensorShape({2}), {0.25f, 0.5f});
+  AddInputFromArray<float>(TensorShape({2}), {0.1f, 0.6f});
+  AddInputFromArray<float>(TensorShape({2}), {0.0f, 0.0f});
   TF_ASSERT_OK(RunOpKernel());
 
   Tensor expected(allocator(), DT_FLOAT, TensorShape({1, 1, 6, 2}));
   test::FillValues<float>(
-      &expected, {-17.86, -22.00, -15.87, -20.59, -13.87, -19.18, -21.86,
-                  -33.31, -23.85, -34.72, -25.85, -36.13});
+      &expected, {-17.86f, -22.00f, -15.87f, -20.59f, -13.87f, -19.18f, -21.86f,
+                  -33.31f, -23.85f, -34.72f, -25.85f, -36.13f });
   test::ExpectTensorNear<float>(expected, *GetOutput(0), 0.01);
 }
 
diff --git a/tensorflow/core/kernels/cast_op_test.cc b/tensorflow/core/kernels/cast_op_test.cc
index ffad7fd02e..5b7529bb8a 100644
--- a/tensorflow/core/kernels/cast_op_test.cc
+++ b/tensorflow/core/kernels/cast_op_test.cc
@@ -49,17 +49,18 @@ class CastOpTest : public OpsTestBase {
     TF_EXPECT_OK(InitOp());
   }
 
-  template <typename IN, typename OUT>
+  template <typename INPUT, typename OUTPUT>
   void CheckCast() {
-    DataType in_type = DataTypeToEnum<IN>::v();
-    DataType out_type = DataTypeToEnum<OUT>::v();
+    DataType in_type = DataTypeToEnum<INPUT>::v();
+    DataType out_type = DataTypeToEnum<OUTPUT>::v();
     MakeOp(in_type, out_type);
-    AddInputFromArray<IN>(TensorShape({1, 2, 2, 1}),
-                          {IN(1), IN(2), IN(3), IN(4)});
+    AddInputFromArray<INPUT>(TensorShape({1, 2, 2, 1}),
+                             {INPUT(1), INPUT(2), INPUT(3), INPUT(4)});
     TF_ASSERT_OK(RunOpKernel());
     Tensor expected(allocator(), out_type, TensorShape({1, 2, 2, 1}));
-    test::FillValues<OUT>(&expected, {OUT(1), OUT(2), OUT(3), OUT(4)});
-    test::ExpectTensorEqual<OUT>(expected, *GetOutput(0));
+    test::FillValues<OUTPUT>(&expected,
+                             {OUTPUT(1), OUTPUT(2), OUTPUT(3), OUTPUT(4)});
+    test::ExpectTensorEqual<OUTPUT>(expected, *GetOutput(0));
   }
 };
 
diff --git a/tensorflow/core/kernels/colorspace_op_test.cc b/tensorflow/core/kernels/colorspace_op_test.cc
index 4719a59b63..943d25a975 100644
--- a/tensorflow/core/kernels/colorspace_op_test.cc
+++ b/tensorflow/core/kernels/colorspace_op_test.cc
@@ -71,7 +71,7 @@ class RGBToHSVOpTest : public OpsTestBase {
 
   void CheckRedMax(DataType data_type) {
     // Test case where red channel dominates
-    AddInputFromArray<T>(TensorShape({3}), {.8, .4, .2});
+    AddInputFromArray<T>(TensorShape({3}), {.8f, .4f, .2f});
     TF_ASSERT_OK(RunOpKernel());
 
     T expected_h = 1. / 6. * .2 / .6;
@@ -85,7 +85,7 @@ class RGBToHSVOpTest : public OpsTestBase {
 
   void CheckGreenMax(DataType data_type) {
     // Test case where green channel dominates
-    AddInputFromArray<T>(TensorShape({3}), {.2, .8, .4});
+    AddInputFromArray<T>(TensorShape({3}), {.2f, .8f, .4f});
     TF_ASSERT_OK(RunOpKernel());
 
     T expected_h = 1. / 6. * (2.0 + (.2 / .6));
@@ -99,7 +99,7 @@ class RGBToHSVOpTest : public OpsTestBase {
 
   void CheckBlueMax(DataType data_type) {
     // Test case where blue channel dominates
-    AddInputFromArray<T>(TensorShape({3}), {.4, .2, .8});
+    AddInputFromArray<T>(TensorShape({3}), {.4f, .2f, .8f});
     TF_ASSERT_OK(RunOpKernel());
 
     T expected_h = 1. / 6. * (4.0 + (.2 / .6));
@@ -112,7 +112,7 @@ class RGBToHSVOpTest : public OpsTestBase {
   }
 
   void CheckNegativeDifference(DataType data_type) {
-    AddInputFromArray<T>(TensorShape({3}), {0, .1, .2});
+    AddInputFromArray<T>(TensorShape({3}), {0, .1f, .2f});
     TF_ASSERT_OK(RunOpKernel());
 
     T expected_h = 1. / 6. * (4.0 + (-.1 / .2));
@@ -220,7 +220,7 @@ class HSVToRGBOpTest : public OpsTestBase {
     TF_ASSERT_OK(RunOpKernel());
 
     Tensor expected(allocator(), data_type, TensorShape({3}));
-    test::FillValues<T>(&expected, {0, .1, .2});
+    test::FillValues<T>(&expected, {0, .1f, .2f});
     test::ExpectTensorNear<T>(expected, *GetOutput(0), 1e-6);
   }
 };
diff --git a/tensorflow/core/kernels/control_flow_ops.cc b/tensorflow/core/kernels/control_flow_ops.cc
index e92b11efc6..b01263f288 100644
--- a/tensorflow/core/kernels/control_flow_ops.cc
+++ b/tensorflow/core/kernels/control_flow_ops.cc
@@ -113,9 +113,12 @@ REGISTER_GPU_HOST_REF_KERNEL(string);
 #undef REGISTER_GPU_HOST_REF_KERNEL
 
 #if TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(type)  \
-  REGISTER_KERNEL_BUILDER(          \
-      Name("Switch").Device(DEVICE_SYCL).TypeConstraint<type>("T"), SwitchOp)
+#define REGISTER_SYCL_KERNEL(type)                       \
+  REGISTER_KERNEL_BUILDER(Name("Switch")                 \
+                              .Device(DEVICE_SYCL)       \
+                              .TypeConstraint<type>("T") \
+                              .HostMemory("pred"),       \
+                          SwitchOp)
 REGISTER_SYCL_KERNEL(bool);
 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
 #undef REGISTER_SYCL_KERNEL
@@ -219,9 +222,12 @@ REGISTER_GPU_REF_KERNEL(bool);
 #undef REGISTER_GPU_REF_KERNEL
 
 #if TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(type)  \
-  REGISTER_KERNEL_BUILDER(          \
-  Name("Merge").Device(DEVICE_SYCL).TypeConstraint<type>("T"), MergeOp)
+#define REGISTER_SYCL_KERNEL(type)                        \
+  REGISTER_KERNEL_BUILDER(Name("Merge")                   \
+                              .Device(DEVICE_SYCL)        \
+                              .TypeConstraint<type>("T")  \
+                              .HostMemory("value_index"), \
+                          MergeOp)
 REGISTER_SYCL_KERNEL(bool);
 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
 #undef REGISTER_SYCL_KERNEL
@@ -418,8 +424,12 @@ REGISTER_GPU_HOST_KERNEL(string);
 
 #if TENSORFLOW_USE_SYCL
 #define REGISTER_SYCL_KERNEL(type)  \
-  REGISTER_KERNEL_BUILDER(          \
-	Name("NextIteration").Device(DEVICE_SYCL).TypeConstraint<type>("T"), NextIterationOp)
+  REGISTER_KERNEL_BUILDER(Name("NextIteration")           \
+                              .Device(DEVICE_SYCL)        \
+                              .HostMemory("data")         \
+                              .HostMemory("output")       \
+                              .TypeConstraint<type>("T"), \
+                          NextIterationOp)
   REGISTER_SYCL_KERNEL(bool);
   TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
 #undef REGISTER_SYCL_KERNEL
diff --git a/tensorflow/core/kernels/control_flow_ops_test.cc b/tensorflow/core/kernels/control_flow_ops_test.cc
index 97f56c392a..301609e04d 100644
--- a/tensorflow/core/kernels/control_flow_ops_test.cc
+++ b/tensorflow/core/kernels/control_flow_ops_test.cc
@@ -85,13 +85,27 @@ class AbortOpTest : public OpsTestBase {
  protected:
 };
 
+#ifdef PLATFORM_WINDOWS
+#define SIGABRT 3
+
+class KilledBySignal {
+ public:
+  explicit KilledBySignal(int signum) : signum_(signum) {}
+  bool operator()(int exit_status) const { return exit_status == signum_; }
+ private:
+  const int signum_;
+};
+#else
+#define KilledBySignal ::testing::KilledBySignal
+#endif
+
 // Pass an error message to the op.
 TEST_F(AbortOpTest, pass_error_msg) {
   TF_ASSERT_OK(NodeDefBuilder("abort_op", "Abort")
                    .Attr("error_msg", "abort_op_test")
                    .Finalize(node_def()));
   TF_ASSERT_OK(InitOp());
-  EXPECT_EXIT(RunOpKernel(), ::testing::KilledBySignal(SIGABRT),
+  EXPECT_EXIT(RunOpKernel(), KilledBySignal(SIGABRT),
               "Abort_op intentional failure; abort_op_test");
 }
 
@@ -99,7 +113,7 @@ TEST_F(AbortOpTest, pass_error_msg) {
 TEST_F(AbortOpTest, default_msg) {
   TF_ASSERT_OK(NodeDefBuilder("abort_op", "Abort").Finalize(node_def()));
   TF_ASSERT_OK(InitOp());
-  EXPECT_EXIT(RunOpKernel(), ::testing::KilledBySignal(SIGABRT),
+  EXPECT_EXIT(RunOpKernel(), KilledBySignal(SIGABRT),
               "Abort_op intentional failure; ");
 }
 
diff --git a/tensorflow/core/kernels/cwise_op_abs.cc b/tensorflow/core/kernels/cwise_op_abs.cc
index fbc23b3b6f..8cf1eac41e 100644
--- a/tensorflow/core/kernels/cwise_op_abs.cc
+++ b/tensorflow/core/kernels/cwise_op_abs.cc
@@ -21,6 +21,18 @@ REGISTER5(UnaryOp, CPU, "Abs", functor::abs, float, Eigen::half, double, int32,
 #if !defined(IS_MOBILE_PLATFORM)
 REGISTER2(UnaryOp, CPU, "ComplexAbs", functor::abs, complex64, complex128);
 #endif
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Abs")                                 \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          UnaryOp<SYCLDevice, functor::abs<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
+
 #if GOOGLE_CUDA
 REGISTER4(UnaryOp, GPU, "Abs", functor::abs, float, Eigen::half, double, int64);
 REGISTER2(UnaryOp, GPU, "ComplexAbs", functor::abs, complex64, complex128);
diff --git a/tensorflow/core/kernels/cwise_op_acos.cc b/tensorflow/core/kernels/cwise_op_acos.cc
index c44c8bc6f6..1d2d815027 100644
--- a/tensorflow/core/kernels/cwise_op_acos.cc
+++ b/tensorflow/core/kernels/cwise_op_acos.cc
@@ -17,6 +17,18 @@ limitations under the License.
 
 namespace tensorflow {
 REGISTER2(UnaryOp, CPU, "Acos", functor::acos, float, double);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Acos")                                \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          UnaryOp<SYCLDevice, functor::acos<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
+
 #if GOOGLE_CUDA
 REGISTER2(UnaryOp, GPU, "Acos", functor::acos, float, double);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_add_1.cc b/tensorflow/core/kernels/cwise_op_add_1.cc
index 44c552d18e..a6bff78694 100644
--- a/tensorflow/core/kernels/cwise_op_add_1.cc
+++ b/tensorflow/core/kernels/cwise_op_add_1.cc
@@ -26,7 +26,7 @@ REGISTER5(BinaryOp, CPU, "Add", functor::add, float, Eigen::half, double, int32,
                           .Device(DEVICE_SYCL)                        \
                           .TypeConstraint<TYPE>("T"),                 \
                           BinaryOp<SYCLDevice, functor::add<TYPE>>);
-TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+  REGISTER_SYCL_KERNEL(float);
 #undef REGISTER_SYCL_KERNEL
 #endif // TENSORFLOW_USE_SYCL
           
diff --git a/tensorflow/core/kernels/cwise_op_asin.cc b/tensorflow/core/kernels/cwise_op_asin.cc
index bba20aa6af..92a22e90c4 100644
--- a/tensorflow/core/kernels/cwise_op_asin.cc
+++ b/tensorflow/core/kernels/cwise_op_asin.cc
@@ -17,6 +17,18 @@ limitations under the License.
 
 namespace tensorflow {
 REGISTER2(UnaryOp, CPU, "Asin", functor::asin, float, double);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Asin")                                \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          UnaryOp<SYCLDevice, functor::asin<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
+
 #if GOOGLE_CUDA
 REGISTER2(UnaryOp, GPU, "Asin", functor::asin, float, double);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_atan.cc b/tensorflow/core/kernels/cwise_op_atan.cc
index 055b8289d4..825e85283f 100644
--- a/tensorflow/core/kernels/cwise_op_atan.cc
+++ b/tensorflow/core/kernels/cwise_op_atan.cc
@@ -17,6 +17,18 @@ limitations under the License.
 
 namespace tensorflow {
 REGISTER2(UnaryOp, CPU, "Atan", functor::atan, float, double);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Atan")                                \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          UnaryOp<SYCLDevice, functor::atan<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
+
 #if GOOGLE_CUDA
 REGISTER2(UnaryOp, GPU, "Atan", functor::atan, float, double);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_ceil.cc b/tensorflow/core/kernels/cwise_op_ceil.cc
index 08ac1b4194..c5a4aaf831 100644
--- a/tensorflow/core/kernels/cwise_op_ceil.cc
+++ b/tensorflow/core/kernels/cwise_op_ceil.cc
@@ -17,6 +17,18 @@ limitations under the License.
 
 namespace tensorflow {
 REGISTER3(UnaryOp, CPU, "Ceil", functor::ceil, float, Eigen::half, double);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Ceil")                                \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          UnaryOp<SYCLDevice, functor::ceil<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
+
 #if GOOGLE_CUDA
 REGISTER3(UnaryOp, GPU, "Ceil", functor::ceil, float, Eigen::half, double);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_cos.cc b/tensorflow/core/kernels/cwise_op_cos.cc
index 2680143d65..a758da5842 100644
--- a/tensorflow/core/kernels/cwise_op_cos.cc
+++ b/tensorflow/core/kernels/cwise_op_cos.cc
@@ -18,6 +18,18 @@ limitations under the License.
 namespace tensorflow {
 REGISTER5(UnaryOp, CPU, "Cos", functor::cos, float, Eigen::half, double,
           complex64, complex128);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Cos")                                 \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          UnaryOp<SYCLDevice, functor::cos<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
+
 #if GOOGLE_CUDA
 REGISTER3(UnaryOp, GPU, "Cos", functor::cos, float, Eigen::half, double);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_div.cc b/tensorflow/core/kernels/cwise_op_div.cc
index c2b05a69b2..ef8c477e48 100644
--- a/tensorflow/core/kernels/cwise_op_div.cc
+++ b/tensorflow/core/kernels/cwise_op_div.cc
@@ -30,6 +30,11 @@ REGISTER5(BinaryOp, CPU, "RealDiv", functor::div, float, Eigen::half, double,
                           Name("Div")                                 \
                           .Device(DEVICE_SYCL)                        \
                           .TypeConstraint<TYPE>("T"),                 \
+                          BinaryOp<SYCLDevice, functor::div<TYPE>>);  \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("RealDiv")                             \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
                           BinaryOp<SYCLDevice, functor::div<TYPE>>);
 REGISTER_SYCL_KERNEL(float)
 #undef REGISTER_SYCL_KERNEL
diff --git a/tensorflow/core/kernels/cwise_op_exp.cc b/tensorflow/core/kernels/cwise_op_exp.cc
index 7ec3526282..0ee47f7dee 100644
--- a/tensorflow/core/kernels/cwise_op_exp.cc
+++ b/tensorflow/core/kernels/cwise_op_exp.cc
@@ -18,6 +18,18 @@ limitations under the License.
 namespace tensorflow {
 REGISTER5(UnaryOp, CPU, "Exp", functor::exp, float, Eigen::half, double,
           complex64, complex128);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Exp")                                 \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          UnaryOp<SYCLDevice, functor::exp<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
+
 #if GOOGLE_CUDA
 REGISTER3(UnaryOp, GPU, "Exp", functor::exp, float, Eigen::half, double);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_floor.cc b/tensorflow/core/kernels/cwise_op_floor.cc
index 732087d4cb..129d754b82 100644
--- a/tensorflow/core/kernels/cwise_op_floor.cc
+++ b/tensorflow/core/kernels/cwise_op_floor.cc
@@ -17,6 +17,18 @@ limitations under the License.
 
 namespace tensorflow {
 REGISTER3(UnaryOp, CPU, "Floor", functor::floor, float, Eigen::half, double);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Floor")                               \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          UnaryOp<SYCLDevice, functor::floor<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
+
 #if GOOGLE_CUDA
 REGISTER3(UnaryOp, GPU, "Floor", functor::floor, float, Eigen::half, double);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_floor_div.cc b/tensorflow/core/kernels/cwise_op_floor_div.cc
index a5767476c3..69dbb70b83 100644
--- a/tensorflow/core/kernels/cwise_op_floor_div.cc
+++ b/tensorflow/core/kernels/cwise_op_floor_div.cc
@@ -18,6 +18,9 @@ limitations under the License.
 namespace tensorflow {
 REGISTER5(BinaryOp, CPU, "FloorDiv", functor::safe_floor_div, uint8, uint16,
           int16, int32, int64);
+REGISTER3(BinaryOp, CPU, "FloorDiv", functor::floor_div_real, float,
+          Eigen::half, double);
+
 #if TENSORFLOW_USE_SYCL
 #define REGISTER_SYCL_KERNEL(TYPE)                                    \
   REGISTER_KERNEL_BUILDER(                                            \
@@ -25,11 +28,10 @@ REGISTER5(BinaryOp, CPU, "FloorDiv", functor::safe_floor_div, uint8, uint16,
                           .Device(DEVICE_SYCL)                        \
                           .TypeConstraint<TYPE>("T"),                 \
                           BinaryOp<SYCLDevice, functor::floor_div<TYPE>>);
-TF_CALL_INTEGRAL_TYPES(REGISTER_SYCL_KERNEL);
+REGISTER_SYCL_KERNEL(float)
 #undef REGISTER_SYCL_KERNEL
 #endif // TENSORFLOW_USE_SYCL
-REGISTER3(BinaryOp, CPU, "FloorDiv", functor::floor_div_real, float,
-          Eigen::half, double);
+
 #if GOOGLE_CUDA
 REGISTER4(BinaryOp, GPU, "FloorDiv", functor::floor_div, uint8, uint16, int16,
           int64);
diff --git a/tensorflow/core/kernels/cwise_op_isfinite.cc b/tensorflow/core/kernels/cwise_op_isfinite.cc
index e38b271318..59976141c7 100644
--- a/tensorflow/core/kernels/cwise_op_isfinite.cc
+++ b/tensorflow/core/kernels/cwise_op_isfinite.cc
@@ -18,6 +18,7 @@ limitations under the License.
 namespace tensorflow {
 REGISTER3(UnaryOp, CPU, "IsFinite", functor::isfinite, float, Eigen::half,
           double);
+
 #if TENSORFLOW_USE_SYCL
 #define REGISTER_SYCL_KERNEL(TYPE)                                    \
   REGISTER_KERNEL_BUILDER(                                            \
@@ -25,9 +26,10 @@ REGISTER3(UnaryOp, CPU, "IsFinite", functor::isfinite, float, Eigen::half,
                           .Device(DEVICE_SYCL)                        \
                           .TypeConstraint<TYPE>("T"),                 \
                           UnaryOp<SYCLDevice, functor::isfinite<TYPE>>);
-TF_CALL_REAL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+REGISTER_SYCL_KERNEL(float);
 #undef REGISTER_SYCL_KERNEL
 #endif // TENSORFLOW_USE_SYCL
+
 #if GOOGLE_CUDA
 REGISTER3(UnaryOp, GPU, "IsFinite", functor::isfinite, float, Eigen::half,
           double);
diff --git a/tensorflow/core/kernels/cwise_op_isinf.cc b/tensorflow/core/kernels/cwise_op_isinf.cc
index bf056dbe0e..675cb95b95 100644
--- a/tensorflow/core/kernels/cwise_op_isinf.cc
+++ b/tensorflow/core/kernels/cwise_op_isinf.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 namespace tensorflow {
 REGISTER3(UnaryOp, CPU, "IsInf", functor::isinf, float, Eigen::half, double);
+
 #if TENSORFLOW_USE_SYCL
 #define REGISTER_SYCL_KERNEL(TYPE)                                    \
   REGISTER_KERNEL_BUILDER(                                            \
@@ -24,9 +25,10 @@ REGISTER3(UnaryOp, CPU, "IsInf", functor::isinf, float, Eigen::half, double);
                           .Device(DEVICE_SYCL)                        \
                           .TypeConstraint<TYPE>("T"),                 \
                           UnaryOp<SYCLDevice, functor::isinf<TYPE>>);
-TF_CALL_REAL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+REGISTER_SYCL_KERNEL(float);
 #undef REGISTER_SYCL_KERNEL
 #endif // TENSORFLOW_USE_SYCL
+
 #if GOOGLE_CUDA
 REGISTER3(UnaryOp, GPU, "IsInf", functor::isinf, float, Eigen::half, double);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_isnan.cc b/tensorflow/core/kernels/cwise_op_isnan.cc
index d2bac23882..c394087ed8 100644
--- a/tensorflow/core/kernels/cwise_op_isnan.cc
+++ b/tensorflow/core/kernels/cwise_op_isnan.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 namespace tensorflow {
 REGISTER3(UnaryOp, CPU, "IsNan", functor::isnan, float, Eigen::half, double);
+
 #if TENSORFLOW_USE_SYCL
 #define REGISTER_SYCL_KERNEL(TYPE)                                    \
   REGISTER_KERNEL_BUILDER(                                            \
@@ -24,9 +25,10 @@ REGISTER3(UnaryOp, CPU, "IsNan", functor::isnan, float, Eigen::half, double);
                           .Device(DEVICE_SYCL)                        \
                           .TypeConstraint<TYPE>("T"),                 \
                           UnaryOp<SYCLDevice, functor::isnan<TYPE>>);
-TF_CALL_REAL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+REGISTER_SYCL_KERNEL(float);
 #undef REGISTER_SYCL_KERNEL
 #endif // TENSORFLOW_USE_SYCL
+
 #if GOOGLE_CUDA
 REGISTER3(UnaryOp, GPU, "IsNan", functor::isnan, float, Eigen::half, double);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_log.cc b/tensorflow/core/kernels/cwise_op_log.cc
index be184f03de..71c4588b3d 100644
--- a/tensorflow/core/kernels/cwise_op_log.cc
+++ b/tensorflow/core/kernels/cwise_op_log.cc
@@ -18,6 +18,18 @@ limitations under the License.
 namespace tensorflow {
 REGISTER5(UnaryOp, CPU, "Log", functor::log, float, Eigen::half, double,
           complex64, complex128);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Log")                                 \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          UnaryOp<SYCLDevice, functor::log<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
+
 #if GOOGLE_CUDA
 REGISTER3(UnaryOp, GPU, "Log", functor::log, float, Eigen::half, double);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_log1p.cc b/tensorflow/core/kernels/cwise_op_log1p.cc
index 91a14989e6..03ea3a0a89 100644
--- a/tensorflow/core/kernels/cwise_op_log1p.cc
+++ b/tensorflow/core/kernels/cwise_op_log1p.cc
@@ -18,6 +18,18 @@ limitations under the License.
 namespace tensorflow {
 REGISTER5(UnaryOp, CPU, "Log1p", functor::log1p, float, Eigen::half, double,
           complex64, complex128);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Log1p")                               \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          UnaryOp<SYCLDevice, functor::log1p<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
+
 #if GOOGLE_CUDA
 REGISTER3(UnaryOp, GPU, "Log1p", functor::log1p, float, Eigen::half, double);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_neg.cc b/tensorflow/core/kernels/cwise_op_neg.cc
index 67b088e110..4221fc0710 100644
--- a/tensorflow/core/kernels/cwise_op_neg.cc
+++ b/tensorflow/core/kernels/cwise_op_neg.cc
@@ -18,6 +18,18 @@ limitations under the License.
 namespace tensorflow {
 REGISTER7(UnaryOp, CPU, "Neg", functor::neg, float, Eigen::half, double, int32,
           complex64, int64, complex128);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Neg")                                 \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          UnaryOp<SYCLDevice, functor::neg<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
+
 #if GOOGLE_CUDA
 REGISTER4(UnaryOp, GPU, "Neg", functor::neg, float, Eigen::half, double, int64);
 
diff --git a/tensorflow/core/kernels/cwise_op_pow.cc b/tensorflow/core/kernels/cwise_op_pow.cc
index dd28b36519..8eeba6ab14 100644
--- a/tensorflow/core/kernels/cwise_op_pow.cc
+++ b/tensorflow/core/kernels/cwise_op_pow.cc
@@ -18,6 +18,18 @@ limitations under the License.
 namespace tensorflow {
 REGISTER7(BinaryOp, CPU, "Pow", functor::pow, float, Eigen::half, double, int32,
           int64, complex64, complex128);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Pow")                                 \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          BinaryOp<SYCLDevice, functor::pow<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
+
 #if GOOGLE_CUDA
 REGISTER4(BinaryOp, GPU, "Pow", functor::pow, float, Eigen::half, double,
           int64);
diff --git a/tensorflow/core/kernels/cwise_op_rsqrt.cc b/tensorflow/core/kernels/cwise_op_rsqrt.cc
index 3207166e94..7dc96d47a6 100644
--- a/tensorflow/core/kernels/cwise_op_rsqrt.cc
+++ b/tensorflow/core/kernels/cwise_op_rsqrt.cc
@@ -18,6 +18,18 @@ limitations under the License.
 namespace tensorflow {
 REGISTER5(UnaryOp, CPU, "Rsqrt", functor::rsqrt, float, Eigen::half, double,
           complex64, complex128);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Rsqrt")                               \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          UnaryOp<SYCLDevice, functor::rsqrt<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
+
 #if GOOGLE_CUDA
 REGISTER3(UnaryOp, GPU, "Rsqrt", functor::rsqrt, float, Eigen::half, double);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_sin.cc b/tensorflow/core/kernels/cwise_op_sin.cc
index 1e3880beb1..8d0c0959f7 100644
--- a/tensorflow/core/kernels/cwise_op_sin.cc
+++ b/tensorflow/core/kernels/cwise_op_sin.cc
@@ -18,6 +18,18 @@ limitations under the License.
 namespace tensorflow {
 REGISTER5(UnaryOp, CPU, "Sin", functor::sin, float, Eigen::half, double,
           complex64, complex128);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Sin")                                 \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          UnaryOp<SYCLDevice, functor::sin<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYC
+
 #if GOOGLE_CUDA
 REGISTER3(UnaryOp, GPU, "Sin", functor::sin, float, Eigen::half, double);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_sqrt.cc b/tensorflow/core/kernels/cwise_op_sqrt.cc
index aecffda4ba..710001517b 100644
--- a/tensorflow/core/kernels/cwise_op_sqrt.cc
+++ b/tensorflow/core/kernels/cwise_op_sqrt.cc
@@ -18,6 +18,18 @@ limitations under the License.
 namespace tensorflow {
 REGISTER5(UnaryOp, CPU, "Sqrt", functor::sqrt, float, Eigen::half, double,
           complex64, complex128);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Sqrt")                                \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          UnaryOp<SYCLDevice, functor::sqrt<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYC
+
 #if GOOGLE_CUDA
 REGISTER3(UnaryOp, GPU, "Sqrt", functor::sqrt, float, Eigen::half, double);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_square.cc b/tensorflow/core/kernels/cwise_op_square.cc
index 0ce4473d83..f867f127a7 100644
--- a/tensorflow/core/kernels/cwise_op_square.cc
+++ b/tensorflow/core/kernels/cwise_op_square.cc
@@ -18,6 +18,18 @@ limitations under the License.
 namespace tensorflow {
 REGISTER7(UnaryOp, CPU, "Square", functor::square, float, Eigen::half, double,
           int32, int64, complex64, complex128);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Square")                              \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          UnaryOp<SYCLDevice, functor::square<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYC
+
 #if GOOGLE_CUDA
 REGISTER4(UnaryOp, GPU, "Square", functor::square, float, Eigen::half, double,
           int64);
diff --git a/tensorflow/core/kernels/cwise_op_sub.cc b/tensorflow/core/kernels/cwise_op_sub.cc
index ed78ba37a8..e1326dbed1 100644
--- a/tensorflow/core/kernels/cwise_op_sub.cc
+++ b/tensorflow/core/kernels/cwise_op_sub.cc
@@ -31,7 +31,7 @@ REGISTER(BinaryOp, CPU, "Sub", functor::sub, int32);
                           .Device(DEVICE_SYCL)                        \
                           .TypeConstraint<TYPE>("T"),                 \
                           BinaryOp<SYCLDevice, functor::sub<TYPE>>);
-TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+  REGISTER_SYCL_KERNEL(float);
 #undef REGISTER_SYCL_KERNEL
 #endif // TENSORFLOW_USE_SYCL
 #if GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/cwise_op_tan.cc b/tensorflow/core/kernels/cwise_op_tan.cc
index fca1addfa4..ac49cad88f 100644
--- a/tensorflow/core/kernels/cwise_op_tan.cc
+++ b/tensorflow/core/kernels/cwise_op_tan.cc
@@ -17,6 +17,18 @@ limitations under the License.
 
 namespace tensorflow {
 REGISTER2(UnaryOp, CPU, "Tan", functor::tan, float, double);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Tan")                                 \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          UnaryOp<SYCLDevice, functor::tan<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYC
+
 #if GOOGLE_CUDA
 REGISTER2(UnaryOp, GPU, "Tan", functor::tan, float, double);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_tanh.cc b/tensorflow/core/kernels/cwise_op_tanh.cc
index a4c4aad053..ae2c473e20 100644
--- a/tensorflow/core/kernels/cwise_op_tanh.cc
+++ b/tensorflow/core/kernels/cwise_op_tanh.cc
@@ -19,6 +19,18 @@ limitations under the License.
 namespace tensorflow {
 REGISTER5(UnaryOp, CPU, "Tanh", functor::tanh, float, Eigen::half, double,
           complex64, complex128);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Tanh")                                \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          UnaryOp<SYCLDevice, functor::tanh<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYC
+
 #if GOOGLE_CUDA
 REGISTER3(UnaryOp, GPU, "Tanh", functor::tanh, float, Eigen::half, double);
 #endif
diff --git a/tensorflow/core/kernels/cwise_ops_sycl_common.h b/tensorflow/core/kernels/cwise_ops_sycl_common.h
index 4c22cc4855..3fcf0759d4 100644
--- a/tensorflow/core/kernels/cwise_ops_sycl_common.h
+++ b/tensorflow/core/kernels/cwise_ops_sycl_common.h
@@ -21,12 +21,10 @@ limitations under the License.
 #define TENSORFLOW_CORE_KERNELS_CWISE_OPS_SYCL_COMMON_H_
 
 #define EIGEN_USE_SYCL
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 
 #include "tensorflow/core/framework/register_types.h"
-
-#include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/kernels/cwise_ops.h"
-#include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
@@ -62,14 +60,14 @@ struct BinaryFunctor<SYCLDevice, Functor, NDIMS, has_errors> {
   void operator()(const SYCLDevice& d, typename Functor::tout_type out,
                   typename Functor::tin_type in0,
                   typename Functor::tin_type in1, bool* error) {
-    To32Bit(out).device(d) = To32Bit(in0).binaryExpr(in1, typename Functor::func());
+    To32Bit(out).device(d) = To32Bit(in0).binaryExpr(To32Bit(in1), typename Functor::func());
   }
 
   void Left(const SYCLDevice& d, typename Functor::tout_type out,
             typename Functor::tscalar_type scalar,
             typename Functor::tin_type in, bool* error) {
     typedef typename Functor::func Binary;
-    constexpr int NumDims = Functor::tin_type::NumDimensions; 
+    constexpr int NumDims = Functor::tin_type::NumDimensions;
     typedef typename Functor::tin_type::Scalar T;
     typedef typename Functor::tin_type::Index Index;
     Eigen::array<Index, NumDims> scalar_dim = GenerateArrayOfOnes<Index, NumDims>();
diff --git a/tensorflow/core/kernels/debug_ops.cc b/tensorflow/core/kernels/debug_ops.cc
index 1a4d70c36b..78d386a5af 100644
--- a/tensorflow/core/kernels/debug_ops.cc
+++ b/tensorflow/core/kernels/debug_ops.cc
@@ -28,6 +28,16 @@ REGISTER_KERNEL_BUILDER(Name("Copy").Device(DEVICE_CPU), CopyOp);
 
 REGISTER_KERNEL_BUILDER(Name("CopyHost").Device(DEVICE_CPU), CopyOp);
 
+#ifdef TENSORFLOW_USE_SYCL
+REGISTER_KERNEL_BUILDER(Name("Copy").Device(DEVICE_SYCL), CopyOp);
+
+REGISTER_KERNEL_BUILDER(Name("CopyHost")
+                            .Device(DEVICE_SYCL)
+                            .HostMemory("input")
+                            .HostMemory("output"),
+                        CopyOp);
+#endif // TENSORFLOW_USE_SYCL
+
 #if GOOGLE_CUDA
 REGISTER_KERNEL_BUILDER(Name("Copy").Device(DEVICE_GPU), CopyOp);
 
@@ -50,6 +60,14 @@ REGISTER_KERNEL_BUILDER(Name("DebugIdentity")
                         DebugIdentityOp);
 #endif
 
+#ifdef TENSORFLOW_USE_SYCL
+REGISTER_KERNEL_BUILDER(Name("DebugIdentity")
+                            .Device(DEVICE_SYCL)
+                            .HostMemory("input")
+                            .HostMemory("output"),
+                        DebugIdentityOp);
+#endif // TENSORFLOW_USE_SYCL
+
 // Register debug NaN-counter (non-ref and ref) ops.
 #define REGISTER_DEBUG_NAN_COUNT(type)                                    \
   REGISTER_KERNEL_BUILDER(                                                \
@@ -70,4 +88,15 @@ REGISTER_GPU_DEBUG_NAN_COUNT(float);
 REGISTER_GPU_DEBUG_NAN_COUNT(double);
 #endif
 
+#ifdef TENSORFLOW_USE_SYCL
+#define REGISTER_GPU_DEBUG_NAN_COUNT(type)                \
+  REGISTER_KERNEL_BUILDER(Name("DebugNanCount")           \
+                              .Device(DEVICE_SYCL)        \
+                              .HostMemory("input")        \
+                              .HostMemory("output")       \
+                              .TypeConstraint<type>("T"), \
+                          DebugNanCountOp<type>);
+REGISTER_GPU_DEBUG_NAN_COUNT(float);
+#endif // TENSORFLOW_USE_SYCL
+
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/dense_update_ops.cc b/tensorflow/core/kernels/dense_update_ops.cc
index baa8f83091..5216a4b5d0 100644
--- a/tensorflow/core/kernels/dense_update_ops.cc
+++ b/tensorflow/core/kernels/dense_update_ops.cc
@@ -97,13 +97,20 @@ TF_CALL_QUANTIZED_TYPES(REGISTER_KERNELS);
 
 #if TENSORFLOW_USE_SYCL
 typedef Eigen::SyclDevice SYCLDevice;
-#define REGISTER_SYCL_KERNEL(type)                                    \
-  REGISTER_KERNEL_BUILDER(                                            \
-                          Name("Assign")                              \
-                          .Device(DEVICE_SYCL)                        \
-                          .TypeConstraint<type>("T"),                 \
-                          AssignOpT<SYCLDevice, type>);
-TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+#define REGISTER_SYCL_KERNEL(type)                                     \
+  REGISTER_KERNEL_BUILDER(                                             \
+                          Name("Assign")                               \
+                          .Device(DEVICE_SYCL)                         \
+                          .TypeConstraint<type>("T"),                  \
+                          AssignOpT<SYCLDevice, type>);                \
+  REGISTER_KERNEL_BUILDER(                                             \
+      Name("AssignAdd").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
+      DenseUpdateOp<SYCLDevice, type, DenseUpdateType::ADD>);          \
+  REGISTER_KERNEL_BUILDER(                                             \
+      Name("AssignSub").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
+      DenseUpdateOp<SYCLDevice, type, DenseUpdateType::SUB>);
+
+REGISTER_SYCL_KERNEL(float);
 #undef REGISTER_SYCL_KERNEL
 #endif
 
diff --git a/tensorflow/core/kernels/fact_op.cc b/tensorflow/core/kernels/fact_op.cc
index 52ad2d0c1f..f1ab4c4a4d 100644
--- a/tensorflow/core/kernels/fact_op.cc
+++ b/tensorflow/core/kernels/fact_op.cc
@@ -73,25 +73,46 @@ static void E(string* s) {
   }
 }
 
-template <const char* const FACTS[], uint64 N>
 class FactOpKernel : public OpKernel {
  public:
   explicit FactOpKernel(OpKernelConstruction* context) : OpKernel(context) {}
 
-  void Compute(OpKernelContext* context) override {
+  void Compute(OpKernelContext* context) override = 0;
+
+ protected:
+  void Compute(OpKernelContext* context, const char* const facts[],
+               uint64 count) {
     Tensor* output_tensor = NULL;
     OP_REQUIRES_OK(
         context, context->allocate_output(0, TensorShape({}), &output_tensor));
     auto output = output_tensor->template scalar<string>();
 
-    string coded = FACTS[context->env()->NowMicros() % N];
+    string coded = facts[context->env()->NowMicros() % count];
     E(&coded);
     output() = coded;
   }
 };
 
+class FactOpKernel1 : public FactOpKernel {
+ public:
+  FactOpKernel1(OpKernelConstruction* context) : FactOpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    FactOpKernel::Compute(context, kFacts1, kNum1);
+  }
+};
+
+class FactOpKernel2 : public FactOpKernel {
+ public:
+  FactOpKernel2(OpKernelConstruction* context) : FactOpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    FactOpKernel::Compute(context, kFacts2, kNum2);
+  }
+};
+
 REGISTER_KERNEL_BUILDER(Name("Fact").Device(DEVICE_GPU).HostMemory("fact"),
-                        FactOpKernel<kFacts1, kNum1>);
+                        FactOpKernel1);
 
 static string D(const char* s) {
   string ret(s);
@@ -102,10 +123,10 @@ static string D(const char* s) {
 REGISTER_KERNEL_BUILDER(Name("Fact")
                             .Device(DEVICE_CPU)
                             .Label(D("Yoxmos").c_str()),
-                        FactOpKernel<kFacts2, kNum2>);
+                        FactOpKernel2);
 REGISTER_KERNEL_BUILDER(Name("Fact")
                             .Device(DEVICE_CPU)
                             .Label(D("yoxmos").c_str()),
-                        FactOpKernel<kFacts2, kNum2>);
+                        FactOpKernel2);
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/fused_batch_norm_op_test.cc b/tensorflow/core/kernels/fused_batch_norm_op_test.cc
index c4b942c56f..a3f760b746 100644
--- a/tensorflow/core/kernels/fused_batch_norm_op_test.cc
+++ b/tensorflow/core/kernels/fused_batch_norm_op_test.cc
@@ -79,7 +79,7 @@ TEST_F(FusedBatchNormOpTest, Inference) {
   AddInputFromArray<float>(TensorShape({2}), {4.0, 4.0});
   AddInputFromArray<float>(TensorShape({2}), {2.0, 2.0});
   AddInputFromArray<float>(TensorShape({2}), {10, 10});
-  AddInputFromArray<float>(TensorShape({2}), {11.67, 11.67});
+  AddInputFromArray<float>(TensorShape({2}), {11.67f, 11.67f});
 
   TF_ASSERT_OK(RunOpKernel());
 
@@ -106,8 +106,8 @@ TEST_F(FusedBatchNormGradOpTest, Simple) {
   AddInputFromArray<float>(TensorShape({1, 1, 6, 2}),
                            {1, 1, 7, 7, 4, 4, -3, -3, -11, -11, 13, 13});
   AddInputFromArray<float>(TensorShape({2}), {4, 4});
-  AddInputFromArray<float>(TensorShape({2}), {1.833, 1.833});
-  AddInputFromArray<float>(TensorShape({2}), {57.472, 57.472});
+  AddInputFromArray<float>(TensorShape({2}), {1.833f, 1.833f});
+  AddInputFromArray<float>(TensorShape({2}), {57.472f, 57.472f});
 
   TF_ASSERT_OK(RunOpKernel());
 
diff --git a/tensorflow/core/kernels/non_max_suppression_op_test.cc b/tensorflow/core/kernels/non_max_suppression_op_test.cc
index 070dd49aef..72e368db77 100644
--- a/tensorflow/core/kernels/non_max_suppression_op_test.cc
+++ b/tensorflow/core/kernels/non_max_suppression_op_test.cc
@@ -45,9 +45,9 @@ class NonMaxSuppressionOpTest : public OpsTestBase {
 TEST_F(NonMaxSuppressionOpTest, TestSelectFromThreeClusters) {
   MakeOp(.5);
   AddInputFromArray<float>(TensorShape({6, 4}),
-                           {0, 0,  1, 1,  0, 0.1,  1, 1.1,  0, -0.1, 1, 0.9,
-                            0, 10, 1, 11, 0, 10.1, 1, 11.1, 0, 100,  1, 101});
-  AddInputFromArray<float>(TensorShape({6}), {.9, .75, .6, .95, .5, .3});
+                           {0, 0,  1, 1,  0, 0.1f,  1, 1.1f,  0, -0.1f, 1, 0.9f,
+                            0, 10, 1, 11, 0, 10.1f, 1, 11.1f, 0, 100,  1, 101});
+  AddInputFromArray<float>(TensorShape({6}), {.9f, .75f, .6f, .95f, .5f, .3f});
   AddInputFromArray<int>(TensorShape({}), {3});
   TF_ASSERT_OK(RunOpKernel());
 
@@ -59,9 +59,9 @@ TEST_F(NonMaxSuppressionOpTest, TestSelectFromThreeClusters) {
 TEST_F(NonMaxSuppressionOpTest, TestSelectFromThreeClustersFlippedCoordinates) {
   MakeOp(.5);
   AddInputFromArray<float>(TensorShape({6, 4}),
-                           {1, 1,  0, 0,  0, 0.1,  1, 1.1,  0, .9,  1, -0.1,
-                            0, 10, 1, 11, 1, 10.1, 0, 11.1, 1, 101, 0, 100});
-  AddInputFromArray<float>(TensorShape({6}), {.9, .75, .6, .95, .5, .3});
+                           {1, 1,  0, 0,  0, 0.1f,  1, 1.1f,  0, .9f,  1, -0.1f,
+                            0, 10, 1, 11, 1, 10.1f, 0, 11.1f, 1, 101, 0, 100});
+  AddInputFromArray<float>(TensorShape({6}), {.9f, .75f, .6f, .95f, .5f, .3f});
   AddInputFromArray<int>(TensorShape({}), {3});
   TF_ASSERT_OK(RunOpKernel());
 
@@ -73,9 +73,9 @@ TEST_F(NonMaxSuppressionOpTest, TestSelectFromThreeClustersFlippedCoordinates) {
 TEST_F(NonMaxSuppressionOpTest, TestSelectAtMostTwoBoxesFromThreeClusters) {
   MakeOp(.5);
   AddInputFromArray<float>(TensorShape({6, 4}),
-                           {0, 0,  1, 1,  0, 0.1,  1, 1.1,  0, -0.1, 1, 0.9,
-                            0, 10, 1, 11, 0, 10.1, 1, 11.1, 0, 100,  1, 101});
-  AddInputFromArray<float>(TensorShape({6}), {.9, .75, .6, .95, .5, .3});
+                           {0, 0,  1, 1,  0, 0.1f,  1, 1.1f,  0, -0.1f, 1, 0.9f,
+                            0, 10, 1, 11, 0, 10.1f, 1, 11.1f, 0, 100,  1, 101});
+  AddInputFromArray<float>(TensorShape({6}), {.9f, .75f, .6f, .95f, .5f, .3f});
   AddInputFromArray<int>(TensorShape({}), {2});
   TF_ASSERT_OK(RunOpKernel());
 
@@ -87,9 +87,9 @@ TEST_F(NonMaxSuppressionOpTest, TestSelectAtMostTwoBoxesFromThreeClusters) {
 TEST_F(NonMaxSuppressionOpTest, TestSelectAtMostThirtyBoxesFromThreeClusters) {
   MakeOp(.5);
   AddInputFromArray<float>(TensorShape({6, 4}),
-                           {0, 0,  1, 1,  0, 0.1,  1, 1.1,  0, -0.1, 1, 0.9,
-                            0, 10, 1, 11, 0, 10.1, 1, 11.1, 0, 100,  1, 101});
-  AddInputFromArray<float>(TensorShape({6}), {.9, .75, .6, .95, .5, .3});
+                           {0, 0,  1, 1,  0, 0.1f,  1, 1.1f,  0, -0.1f, 1, 0.9f,
+                            0, 10, 1, 11, 0, 10.1f, 1, 11.1f, 0, 100,  1, 101});
+  AddInputFromArray<float>(TensorShape({6}), {.9f, .75f, .6f, .95f, .5f, .3f});
   AddInputFromArray<int>(TensorShape({}), {30});
   TF_ASSERT_OK(RunOpKernel());
 
@@ -101,7 +101,7 @@ TEST_F(NonMaxSuppressionOpTest, TestSelectAtMostThirtyBoxesFromThreeClusters) {
 TEST_F(NonMaxSuppressionOpTest, TestSelectSingleBox) {
   MakeOp(.5);
   AddInputFromArray<float>(TensorShape({1, 4}), {0, 0, 1, 1});
-  AddInputFromArray<float>(TensorShape({1}), {.9});
+  AddInputFromArray<float>(TensorShape({1}), {.9f});
   AddInputFromArray<int>(TensorShape({}), {3});
   TF_ASSERT_OK(RunOpKernel());
 
@@ -136,9 +136,9 @@ TEST_F(NonMaxSuppressionOpTest, TestSelectFromTenIdenticalBoxes) {
 TEST_F(NonMaxSuppressionOpTest, TestInconsistentBoxAndScoreShapes) {
   MakeOp(.5);
   AddInputFromArray<float>(TensorShape({6, 4}),
-                           {0, 0,  1, 1,  0, 0.1,  1, 1.1,  0, -0.1, 1, 0.9,
-                            0, 10, 1, 11, 0, 10.1, 1, 11.1, 0, 100,  1, 101});
-  AddInputFromArray<float>(TensorShape({5}), {.9, .75, .6, .95, .5});
+                           {0, 0,  1, 1,  0, 0.1f,  1, 1.1f,  0, -0.1f, 1, 0.9f,
+                            0, 10, 1, 11, 0, 10.1f, 1, 11.1f, 0, 100,  1, 101});
+  AddInputFromArray<float>(TensorShape({5}), {.9f, .75f, .6f, .95f, .5f});
   AddInputFromArray<int>(TensorShape({}), {30});
   Status s = RunOpKernel();
 
@@ -151,7 +151,7 @@ TEST_F(NonMaxSuppressionOpTest, TestInconsistentBoxAndScoreShapes) {
 TEST_F(NonMaxSuppressionOpTest, TestInvalidIOUThreshold) {
   MakeOp(1.2);
   AddInputFromArray<float>(TensorShape({1, 4}), {0, 0, 1, 1});
-  AddInputFromArray<float>(TensorShape({1}), {.9});
+  AddInputFromArray<float>(TensorShape({1}), {.9f});
   AddInputFromArray<int>(TensorShape({}), {3});
   Status s = RunOpKernel();
 
diff --git a/tensorflow/core/kernels/resize_bilinear_op_test.cc b/tensorflow/core/kernels/resize_bilinear_op_test.cc
index deb36849e7..66836ff788 100644
--- a/tensorflow/core/kernels/resize_bilinear_op_test.cc
+++ b/tensorflow/core/kernels/resize_bilinear_op_test.cc
@@ -95,9 +95,10 @@ TEST_F(ResizeBilinearOpTest, TestBilinear2x2To3x3) {
 
   // clang-format off
   test::FillValues<float>(&expected,
-    {1,     5.0/3,   2,
-     7.0/3, 3,       10.0/3,
-     3,     11.0/3,  4});
+    {1,        5.0f / 3,  2,
+     7.0f / 3, 3,         10.0f / 3,
+     3,        11.0f / 3, 4});
+
 
   // clang-format on
   test::ExpectTensorEqual<float>(expected, *GetOutput(0));
@@ -206,9 +207,9 @@ TEST_F(ResizeBilinearOpTest, TestBilinear4x4To3x3) {
 
   // clang-format off
   test::FillValues<float>(&expected,
-    {1,       7.0/3, 11.0/3,
-     19.0/3, 23.0/3, 27.0/3,
-     35.0/3, 39.0/3, 43.0/3});
+    {1,        7.0f/3, 11.0f/3,
+     19.0f/3, 23.0f/3, 27.0f/3,
+     35.0f/3, 39.0f/3, 43.0f/3});
 
   // clang-format on
   test::ExpectTensorEqual<float>(expected, *GetOutput(0));
@@ -251,8 +252,8 @@ TEST_F(ResizeBilinearOpTest, TestBilinear2x2To3x3Batch2) {
   Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3, 3, 1}));
   // clang-format off
   test::FillValues<float>(&expected,
-    {1, 5.0/3, 2, 7.0/3, 3, 10.0/3, 3, 11.0/3, 4,
-     1, 5.0/3, 2, 7.0/3, 3, 10.0/3, 3, 11.0/3, 4
+    {1, 5.0f/3, 2, 7.0f/3, 3, 10.0f/3, 3, 11.0f/3, 4,
+     1, 5.0f/3, 2, 7.0f/3, 3, 10.0f/3, 3, 11.0f/3, 4
     });
   // clang-format on
   test::ExpectTensorEqual<float>(expected, *GetOutput(0));
@@ -268,15 +269,15 @@ TEST_F(ResizeBilinearOpTest, TestBilinear2x2x2To3x3x2) {
   // clang-format off
   test::FillValues<float>(&expected,
     {
-      1,      -1,
-      5.0/3,  -5.0/3,
-      2,      -2,
-      7.0/3,  -7.0/3,
-      3,      -3,
-      10.0/3, -10.0/3,
-      3,      -3,
-      11.0/3, -11.0/3,
-      4,      -4
+      1,       -1,
+      5.0f/3,  -5.0f/3,
+      2,       -2,
+      7.0f/3,  -7.0f/3,
+      3,       -3,
+      10.0f/3, -10.0f/3,
+      3,       -3,
+      11.0f/3, -11.0f/3,
+      4,       -4
     });
   // clang-format on
   test::ExpectTensorEqual<float>(expected, *GetOutput(0));
diff --git a/tensorflow/core/kernels/spacetobatch_benchmark_test.cc b/tensorflow/core/kernels/spacetobatch_benchmark_test.cc
index a9a9bd46b7..a8c4b3746a 100644
--- a/tensorflow/core/kernels/spacetobatch_benchmark_test.cc
+++ b/tensorflow/core/kernels/spacetobatch_benchmark_test.cc
@@ -54,6 +54,8 @@ static Graph* ConstructSpaceToBatchGraph(
   return g;
 }
 
+// The BM_Expand macro is needed for this to build with VC++.
+#define BM_Expand(x) x
 #define BM_SpaceToBatchDev(OP, DEVICE, DTYPE, B, H, W, D, BS, P00, P01, P10,                            \
                            P11)                                                                         \
   static void                                                                                           \
@@ -69,10 +71,10 @@ static Graph* ConstructSpaceToBatchGraph(
   BENCHMARK(                                                                                            \
       BM_##OP##_##DEVICE##_##DTYPE##_##B##_##H##_##W##_##D##_bs##BS##_pad##P00##_##P01##_##P10##_##P11);
 #define BM_SpaceToBatch(OP, ...)                      \
-  BM_SpaceToBatchDev(OP, cpu, DT_FLOAT, __VA_ARGS__); \
-  BM_SpaceToBatchDev(OP, gpu, DT_FLOAT, __VA_ARGS__); \
-  BM_SpaceToBatchDev(OP, cpu, DT_HALF, __VA_ARGS__);  \
-  BM_SpaceToBatchDev(OP, gpu, DT_HALF, __VA_ARGS__);
+  BM_Expand(BM_SpaceToBatchDev(OP, cpu, DT_FLOAT, __VA_ARGS__)); \
+  BM_Expand(BM_SpaceToBatchDev(OP, gpu, DT_FLOAT, __VA_ARGS__)); \
+  BM_Expand(BM_SpaceToBatchDev(OP, cpu, DT_HALF, __VA_ARGS__));  \
+  BM_Expand(BM_SpaceToBatchDev(OP, gpu, DT_HALF, __VA_ARGS__));
 
 BM_SpaceToBatch(SpaceToBatch, 64, 100, 100, 64, 2, 0, 0, 0, 0);
 BM_SpaceToBatch(SpaceToBatch, 64, 100, 100, 1, 2, 0, 0, 0, 0);
diff --git a/tensorflow/core/kernels/sparse_add_op_test.cc b/tensorflow/core/kernels/sparse_add_op_test.cc
index 7baf27c1d0..4cad02bbee 100644
--- a/tensorflow/core/kernels/sparse_add_op_test.cc
+++ b/tensorflow/core/kernels/sparse_add_op_test.cc
@@ -61,8 +61,10 @@ TEST_F(SparseAddOpTest, TwoD_AddSparseTensorWithSelf) {
   // [3   4]
 
   const auto indices_shape = TensorShape({4, 2});
-  const gtl::ArraySlice<int64> indices = {0, 1, 1, 0, 2, 0, 2, 1};
-  const gtl::ArraySlice<int64> shape = {3, 2};
+  std::initializer_list<int64> in{ 0, 1, 1, 0, 2, 0, 2, 1 };
+  const gtl::ArraySlice<int64> indices(in);
+  std::initializer_list<int64> sh{ 3, 2 };
+  const gtl::ArraySlice<int64> shape(sh);
 
 #define ADD_TENSOR_INPUT()                                  \
   AddInputFromArray<int64>(indices_shape, indices);         \
@@ -99,8 +101,10 @@ TEST_F(SparseAddOpTest, TwoD_AddSparseTensorWithSelf) {
     DataType val_dtype = tensorflow::DataTypeToEnum<VALTYPE>::value;        \
                                                                             \
     const auto indices_shape = TensorShape({4, 2});                         \
-    const gtl::ArraySlice<int64> indices = {0, 1, 1, 0, 2, 0, 2, 1};        \
-    const gtl::ArraySlice<int64> shape = {3, 2};                            \
+    std::initializer_list<int64> in{0, 1, 1, 0, 2, 0, 2, 1};                \
+    const gtl::ArraySlice<int64> indices(in);                               \
+    std::initializer_list<int64> sh{3, 2};                                  \
+    const gtl::ArraySlice<int64> shape(sh);                                 \
                                                                             \
     AddInputFromArray<int64>(indices_shape, indices);                       \
     AddInputFromArray<VALTYPE>(TensorShape({4}), {1, 2, 3, 4});             \
@@ -154,8 +158,10 @@ RUN_TEST(complex128);
     MakeOp<VALTYPE>();                                                   \
     DataType val_dtype = tensorflow::DataTypeToEnum<VALTYPE>::value;     \
     const auto indices_shape = TensorShape({4, 2});                      \
-    const gtl::ArraySlice<int64> indices = {0, 1, 1, 0, 2, 0, 2, 1};     \
-    const gtl::ArraySlice<int64> shape = {3, 2};                         \
+    std::initializer_list<int64> in{0, 1, 1, 0, 2, 0, 2, 1};             \
+    const gtl::ArraySlice<int64> indices(in);                            \
+    std::initializer_list<int64> sh{3, 2};                               \
+    const gtl::ArraySlice<int64> shape(sh);                              \
                                                                          \
     auto AddSparseTensor = [indices, indices_shape, shape,               \
                             this](bool negate) {                         \
@@ -192,10 +198,10 @@ RUN_TEST(complex128);
   }
 
 RUN_TEST(int64, 1);
-RUN_TEST(float, 1e-3);
-RUN_TEST(double, 1e-3);
-RUN_TEST(complex64, 1e-3);
-RUN_TEST(complex128, 1e-3);
+RUN_TEST(float, 1e-3f);
+RUN_TEST(double, 1e-3f);
+RUN_TEST(complex64, 1e-3f);
+RUN_TEST(complex128, 1e-3f);
 #undef RUN_TEST
 
 }  // namespace
diff --git a/tensorflow/core/kernels/sparse_dense_binary_op_shared_test.cc b/tensorflow/core/kernels/sparse_dense_binary_op_shared_test.cc
index 7ef3070d06..eaf1884243 100644
--- a/tensorflow/core/kernels/sparse_dense_binary_op_shared_test.cc
+++ b/tensorflow/core/kernels/sparse_dense_binary_op_shared_test.cc
@@ -96,8 +96,10 @@ TEST_F(SparseDenseCDivTest, SameShape) {
   // [2    ]  cdiv [dense: same shape, all 1's]
   // [3   4]
   const auto indices_shape = TensorShape({4, 2});
-  const gtl::ArraySlice<int64> indices = {0, 1, 1, 0, 2, 0, 2, 1};
-  const gtl::ArraySlice<int64> shape = {3, 2};
+  std::initializer_list<int64> in{ 0, 1, 1, 0, 2, 0, 2, 1 };
+  const gtl::ArraySlice<int64> indices(in);
+  std::initializer_list<int64> sh{ 3, 2 };
+  const gtl::ArraySlice<int64> shape(sh);
 
   // Tensor dense(DT_FLOAT, TensorShape({3, 1}));
   Tensor dense(DT_FLOAT, TensorShape(shape));
@@ -123,8 +125,10 @@ TEST_F(SparseDenseCDivTest, BroadcastDenseSameDims) {
   // [2    ]  cdiv [dense: shape [3,1], all 1's]
   // [3   4]
   const auto indices_shape = TensorShape({4, 2});
-  const gtl::ArraySlice<int64> indices = {0, 1, 1, 0, 2, 0, 2, 1};
-  const gtl::ArraySlice<int64> shape = {3, 2};
+  std::initializer_list<int64> in{ 0, 1, 1, 0, 2, 0, 2, 1 };
+  const gtl::ArraySlice<int64> indices(in);
+  std::initializer_list<int64> sh{ 3, 2 };
+  const gtl::ArraySlice<int64> shape(sh);
 
   Tensor dense(DT_FLOAT, TensorShape({3, 1}));
   auto dense_flat = dense.flat<float>();
@@ -148,8 +152,10 @@ TEST_F(SparseDenseCDivTest, BroadcastDenseFewerDims) {
   // [2    ]  cdiv [dense: shape [2]]
   // [3   4]
   const auto indices_shape = TensorShape({4, 2});
-  const gtl::ArraySlice<int64> indices = {0, 1, 1, 0, 2, 0, 2, 1};
-  const gtl::ArraySlice<int64> shape = {3, 2};
+  std::initializer_list<int64> in{ 0, 1, 1, 0, 2, 0, 2, 1 };
+  const gtl::ArraySlice<int64> indices(in);
+  std::initializer_list<int64> sh{ 3, 2 };
+  const gtl::ArraySlice<int64> shape(sh);
 
   Tensor dense(DT_FLOAT, TensorShape({2}));
   auto dense_flat = dense.flat<float>();
@@ -178,8 +184,10 @@ TEST_F(SparseDenseCMulTest, BroadcastDense) {
   // [1   ?]  where ? remains implicitly zero.
   // [1.5 0]
   const auto indices_shape = TensorShape({4, 2});
-  const gtl::ArraySlice<int64> indices = {0, 1, 1, 0, 2, 0, 2, 1};
-  const gtl::ArraySlice<int64> shape = {3, 2};
+  std::initializer_list<int64> in{ 0, 1, 1, 0, 2, 0, 2, 1 };
+  const gtl::ArraySlice<int64> indices(in);
+  std::initializer_list<int64> sh{ 3, 2 };
+  const gtl::ArraySlice<int64> shape(sh);
 
   Tensor dense(DT_FLOAT, TensorShape({2}));
   auto dense_flat = dense.flat<float>();
diff --git a/tensorflow/core/kernels/sparse_reduce_sum_op_test.cc b/tensorflow/core/kernels/sparse_reduce_sum_op_test.cc
index 2fb78a2a21..110376be42 100644
--- a/tensorflow/core/kernels/sparse_reduce_sum_op_test.cc
+++ b/tensorflow/core/kernels/sparse_reduce_sum_op_test.cc
@@ -51,8 +51,10 @@ TEST_F(SparseReduceSumOpTest, SimpleReduce) {
   // [3   4]
 
   const auto indices_shape = TensorShape({4, 2});
-  const gtl::ArraySlice<int64> indices = {0, 1, 1, 0, 2, 0, 2, 1};
-  const gtl::ArraySlice<int64> shape = {3, 2};
+  std::initializer_list<int64> in{ 0, 1, 1, 0, 2, 0, 2, 1 };
+  const gtl::ArraySlice<int64> indices(in);
+  std::initializer_list<int64> sh{ 3, 2 };
+  const gtl::ArraySlice<int64> shape(sh);
 
   AddInputFromArray<int64>(indices_shape, indices);
   AddInputFromArray<float>(TensorShape({4}), {1, 2, 3, 4});
@@ -91,8 +93,10 @@ TEST_F(SparseReduceSumSparseOpTest, SimpleReduce) {
   // [3   4]
 
   const auto indices_shape = TensorShape({4, 2});
-  const gtl::ArraySlice<int64> indices = {0, 1, 1, 0, 2, 0, 2, 1};
-  const gtl::ArraySlice<int64> shape = {3, 2};
+  std::initializer_list<int64> in{ 0, 1, 1, 0, 2, 0, 2, 1 };
+  const gtl::ArraySlice<int64> indices(in);
+  std::initializer_list<int64> sh{ 3, 2 };
+  const gtl::ArraySlice<int64> shape(sh);
 
   AddInputFromArray<int64>(indices_shape, indices);
   AddInputFromArray<float>(TensorShape({4}), {2, 2, 3, 4});
diff --git a/tensorflow/core/kernels/summary_image_op_test.cc b/tensorflow/core/kernels/summary_image_op_test.cc
index 96a4d4183f..f936276925 100644
--- a/tensorflow/core/kernels/summary_image_op_test.cc
+++ b/tensorflow/core/kernels/summary_image_op_test.cc
@@ -126,16 +126,16 @@ TEST_F(SummaryImageOpTest, OneColorImage4dInput) {
   AddInputFromArray<float>(
       TensorShape({1 /*batch*/, 5 /*rows*/, 2 /*columns*/, 3 /*depth*/}),
       {
-          /* r0, c0, RGB */ 1.0, 0.1, 0.2,
-          /* r0, c1, RGB */ 1.0, 0.3, 0.4,
-          /* r1, c0, RGB */ 0.0, 1.0, 0.0,
-          /* r1, c1, RGB */ 0.0, 1.0, 0.0,
-          /* r2, c0, RGB */ 0.0, 0.0, 1.0,
-          /* r2, c1, RGB */ 0.0, 0.0, 1.0,
-          /* r3, c0, RGB */ 1.0, 1.0, 0.0,
-          /* r3, c1, RGB */ 1.0, 0.0, 1.0,
-          /* r4, c0, RGB */ 1.0, 1.0, 0.0,
-          /* r4, c1, RGB */ 1.0, 0.0, 1.0,
+          /* r0, c0, RGB */ 1.0f, 0.1f, 0.2f,
+          /* r0, c1, RGB */ 1.0f, 0.3f, 0.4f,
+          /* r1, c0, RGB */ 0.0f, 1.0f, 0.0f,
+          /* r1, c1, RGB */ 0.0f, 1.0f, 0.0f,
+          /* r2, c0, RGB */ 0.0f, 0.0f, 1.0f,
+          /* r2, c1, RGB */ 0.0f, 0.0f, 1.0f,
+          /* r3, c0, RGB */ 1.0f, 1.0f, 0.0f,
+          /* r3, c1, RGB */ 1.0f, 0.0f, 1.0f,
+          /* r4, c0, RGB */ 1.0f, 1.0f, 0.0f,
+          /* r4, c1, RGB */ 1.0f, 0.0f, 1.0f,
       });
   TF_ASSERT_OK(RunOpKernel());
 
diff --git a/tensorflow/core/kernels/summary_op_test.cc b/tensorflow/core/kernels/summary_op_test.cc
index 9fd2bd2b5e..05b1687e5f 100644
--- a/tensorflow/core/kernels/summary_op_test.cc
+++ b/tensorflow/core/kernels/summary_op_test.cc
@@ -61,7 +61,7 @@ TEST_F(SummaryScalarOpTest, SimpleFloat) {
 
   // Feed and run
   AddInputFromArray<string>(TensorShape({3}), {"tag1", "tag2", "tag3"});
-  AddInputFromArray<float>(TensorShape({3}), {1.0, -0.73, 10000.0});
+  AddInputFromArray<float>(TensorShape({3}), {1.0f, -0.73f, 10000.0f});
   TF_ASSERT_OK(RunOpKernel());
 
   // Check the output size.
@@ -121,7 +121,7 @@ TEST_F(SummaryScalarOpTest, Error_MismatchedSize) {
 
   // Feed and run
   AddInputFromArray<string>(TensorShape({2}), {"tag1", "tag2"});
-  AddInputFromArray<float>(TensorShape({3}), {1.0, -0.73, 10000.0});
+  AddInputFromArray<float>(TensorShape({3}), {1.0f, -0.73f, 10000.0f});
   Status s = RunOpKernel();
   EXPECT_TRUE(StringPiece(s.ToString()).contains("not the same shape")) << s;
 }
@@ -131,7 +131,7 @@ TEST_F(SummaryScalarOpTest, Error_WrongDimsTags) {
 
   // Feed and run
   AddInputFromArray<string>(TensorShape({2, 1}), {"tag1", "tag2"});
-  AddInputFromArray<float>(TensorShape({2}), {1.0, -0.73});
+  AddInputFromArray<float>(TensorShape({2}), {1.0f, -0.73f});
   Status s = RunOpKernel();
   EXPECT_TRUE(
       StringPiece(s.ToString()).contains("tags and values not the same shape"))
@@ -143,7 +143,7 @@ TEST_F(SummaryScalarOpTest, Error_WrongDimsValues) {
 
   // Feed and run
   AddInputFromArray<string>(TensorShape({2}), {"tag1", "tag2"});
-  AddInputFromArray<float>(TensorShape({2, 1}), {1.0, -0.73});
+  AddInputFromArray<float>(TensorShape({2, 1}), {1.0f, -0.73f});
   Status s = RunOpKernel();
   EXPECT_TRUE(
       StringPiece(s.ToString()).contains("tags and values not the same shape"))
@@ -169,7 +169,8 @@ TEST_F(SummaryHistoOpTest, SimpleFloat) {
 
   // Feed and run
   AddInputFromArray<string>(TensorShape({}), {"taghisto"});
-  AddInputFromArray<float>(TensorShape({3, 2}), {0.1, -0.7, 4.1, 4., 5., 4.});
+  AddInputFromArray<float>(TensorShape({3, 2}),
+                           {0.1f, -0.7f, 4.1f, 4., 5.f, 4.f});
   TF_ASSERT_OK(RunOpKernel());
 
   // Check the output size.
@@ -254,7 +255,7 @@ TEST_F(SummaryHistoOpTest, Error_WrongDimsTags) {
 
   // Feed and run
   AddInputFromArray<string>(TensorShape({2, 1}), {"tag1", "tag2"});
-  AddInputFromArray<float>(TensorShape({2}), {1.0, -0.73});
+  AddInputFromArray<float>(TensorShape({2}), {1.0f, -0.73f});
   Status s = RunOpKernel();
   EXPECT_TRUE(StringPiece(s.ToString()).contains("tags must be scalar")) << s;
 }
@@ -264,7 +265,7 @@ TEST_F(SummaryHistoOpTest, Error_TooManyTagValues) {
 
   // Feed and run
   AddInputFromArray<string>(TensorShape({2}), {"tag1", "tag2"});
-  AddInputFromArray<float>(TensorShape({2, 1}), {1.0, -0.73});
+  AddInputFromArray<float>(TensorShape({2, 1}), {1.0f, -0.73f});
   Status s = RunOpKernel();
   EXPECT_TRUE(StringPiece(s.ToString()).contains("tags must be scalar")) << s;
 }
diff --git a/tensorflow/core/kernels/training_ops_gpu.cu.cc b/tensorflow/core/kernels/training_ops_gpu.cu.cc
index 733278e440..f6acdf2422 100644
--- a/tensorflow/core/kernels/training_ops_gpu.cu.cc
+++ b/tensorflow/core/kernels/training_ops_gpu.cu.cc
@@ -64,7 +64,7 @@ struct ApplyAdadelta<GPUDevice, T> {
     bcast[0] = grad.dimension(0);
     Eigen::Sizes<1> single;
 
-    accum.device(d) = accum_update * rho.reshape(single).broadcast(bcast) +
+    accum.device(d) = accum * rho.reshape(single).broadcast(bcast) +
                       grad.square() * (grad.constant(T(1)) -
                                        rho.reshape(single).broadcast(bcast));
     const auto update =
diff --git a/tensorflow/core/kernels/variable_ops.cc b/tensorflow/core/kernels/variable_ops.cc
index 1a9aa4d903..34e227156d 100644
--- a/tensorflow/core/kernels/variable_ops.cc
+++ b/tensorflow/core/kernels/variable_ops.cc
@@ -33,14 +33,31 @@ REGISTER_KERNEL_BUILDER(Name("IsVariableInitialized").Device(DEVICE_CPU),
                         IsVariableInitializedOp);
 
 #if TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(TYPE)                                          \
-  REGISTER_KERNEL_BUILDER(                                                  \
-      Name("Variable").Device(DEVICE_SYCL).TypeConstraint<TYPE>("dtype"),   \
-      VariableOp);                                                          \
-  REGISTER_KERNEL_BUILDER(                                                  \
-      Name("VariableV2").Device(DEVICE_SYCL).TypeConstraint<TYPE>("dtype"), \
-      VariableOp);
-TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+#define REGISTER_SYCL_KERNEL(TYPE)                                      \
+  REGISTER_KERNEL_BUILDER(                                              \
+                          Name("Variable")                              \
+                          .Device(DEVICE_SYCL)                          \
+                          .TypeConstraint<TYPE>("dtype"),               \
+                          VariableOp);                                  \
+  REGISTER_KERNEL_BUILDER(Name("VariableV2")                            \
+                          .Device(DEVICE_SYCL)                          \
+                          .TypeConstraint<TYPE>("dtype"),               \
+                          VariableOp);                                  \
+  REGISTER_KERNEL_BUILDER(Name("TemporaryVariable")                     \
+                          .Device(DEVICE_SYCL)                          \
+                          .TypeConstraint<TYPE>("dtype"),               \
+                          TemporaryVariableOp);                         \
+  REGISTER_KERNEL_BUILDER(Name("DestroyTemporaryVariable")              \
+                          .Device(DEVICE_SYCL)                          \
+                          .TypeConstraint<TYPE>("T"),                   \
+                          DestroyTemporaryVariableOp);                  \
+  REGISTER_KERNEL_BUILDER(Name("IsVariableInitialized")                 \
+                          .Device(DEVICE_SYCL)                          \
+                          .TypeConstraint<TYPE>("dtype")                \
+                          .HostMemory("is_initialized"),                \
+                          IsVariableInitializedOp);
+
+REGISTER_SYCL_KERNEL(float);
 #undef REGISTER_SYCL_KERNEL
 #endif
 
diff --git a/tensorflow/core/lib/core/notification_test.cc b/tensorflow/core/lib/core/notification_test.cc
index 8cb1c895ad..9d96708b6f 100644
--- a/tensorflow/core/lib/core/notification_test.cc
+++ b/tensorflow/core/lib/core/notification_test.cc
@@ -67,7 +67,9 @@ TEST(NotificationTest, TestMultipleThreadsWaitingOnNotification) {
       ++counter;
     });
   }
-  sleep(1);
+
+  // Sleep 1 second.
+  Env::Default()->SleepForMicroseconds(1 * 1000 * 1000);
 
   EXPECT_EQ(0, counter);
 
diff --git a/tensorflow/core/lib/gtl/cleanup.h b/tensorflow/core/lib/gtl/cleanup.h
index 230cdb624b..6053e98640 100644
--- a/tensorflow/core/lib/gtl/cleanup.h
+++ b/tensorflow/core/lib/gtl/cleanup.h
@@ -96,7 +96,7 @@ class Cleanup {
   bool is_released() const { return released_; }
 
  private:
-  static_assert(!std::is_reference<F>(), "F must not be a reference");
+  static_assert(!std::is_reference<F>::value, "F must not be a reference");
 
   bool released_ = false;
   F f_;
diff --git a/tensorflow/core/lib/gtl/edit_distance_test.cc b/tensorflow/core/lib/gtl/edit_distance_test.cc
index 02968b6ae8..18a400713f 100644
--- a/tensorflow/core/lib/gtl/edit_distance_test.cc
+++ b/tensorflow/core/lib/gtl/edit_distance_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/lib/gtl/edit_distance.h"
 
+#include <cctype>
 #include <vector>
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/test.h"
diff --git a/tensorflow/core/lib/strings/strcat_test.cc b/tensorflow/core/lib/strings/strcat_test.cc
index 25561f1bd1..c556b1f676 100644
--- a/tensorflow/core/lib/strings/strcat_test.cc
+++ b/tensorflow/core/lib/strings/strcat_test.cc
@@ -22,6 +22,11 @@ limitations under the License.
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/types.h"
 
+#ifdef _MSC_VER
+// ssize_t is not a standard C++ type.
+typedef ptrdiff_t ssize_t;
+#endif
+
 namespace tensorflow {
 namespace strings {
 
diff --git a/tensorflow/core/ops/nn_ops_test.cc b/tensorflow/core/ops/nn_ops_test.cc
index 3618769dc0..974d7aa87b 100644
--- a/tensorflow/core/ops/nn_ops_test.cc
+++ b/tensorflow/core/ops/nn_ops_test.cc
@@ -507,7 +507,7 @@ TEST(NNOpsTest, FractionalPool_ShapeFn) {
                        .Finalize(&op.node_def));
     };
 
-    set_op(std::vector<float>{2.0, 1, 1 / 1.5, 1 / 2.0});
+    set_op(std::vector<float>{2.0f, 1, 1 / 1.5f, 1 / 2.0f});
 
     // Rank check.
     INFER_ERROR("must be rank 4", op, "[?,?,?]");
diff --git a/tensorflow/core/ops/state_ops.cc b/tensorflow/core/ops/state_ops.cc
index d1f63589ea..8370e57b88 100644
--- a/tensorflow/core/ops/state_ops.cc
+++ b/tensorflow/core/ops/state_ops.cc
@@ -295,7 +295,7 @@ This operation outputs `ref` after the update is done.
 This makes it easier to chain operations that need to use the reset value.
 
 If values in `ref` is to be updated more than once, because there are
-duplicate entires in `indices`, the order at which the updates happen
+duplicate entries in `indices`, the order at which the updates happen
 for each value is undefined.
 
 Requires `updates.shape = indices.shape + ref.shape[1:]`.
diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl
index 64a6ab0c7a..83a2a17d48 100644
--- a/tensorflow/core/platform/default/build_config.bzl
+++ b/tensorflow/core/platform/default/build_config.bzl
@@ -25,7 +25,7 @@ def tf_deps(deps, suffix):
   return tf_deps
 
 def tf_proto_library_cc(name, srcs = [], has_services = None,
-                        deps = [], visibility = [], testonly = 0,
+                        protodeps = [], visibility = [], testonly = 0,
                         cc_libs = [],
                         cc_stubby_versions = None,
                         cc_grpc_version = None,
@@ -34,7 +34,7 @@ def tf_proto_library_cc(name, srcs = [], has_services = None,
                         js_api_version = 2, js_codegen = "jspb"):
   native.filegroup(
       name = name + "_proto_srcs",
-      srcs = srcs + tf_deps(deps, "_proto_srcs"),
+      srcs = srcs + tf_deps(protodeps, "_proto_srcs"),
       testonly = testonly,
   )
 
@@ -43,10 +43,14 @@ def tf_proto_library_cc(name, srcs = [], has_services = None,
     use_grpc_plugin = True
   cc_proto_library(
       name = name + "_cc",
-      srcs = srcs + tf_deps(deps, "_proto_srcs"),
-      deps = deps + ["@protobuf//:cc_wkt_protos"],
+      srcs = srcs,
+      deps = tf_deps(protodeps, "_cc") + ["@protobuf//:cc_wkt_protos"],
       cc_libs = cc_libs + ["@protobuf//:protobuf"],
-      copts = ["-Wno-unused-but-set-variable", "-Wno-sign-compare"],
+      copts = [
+          "-Wno-unknown-warning-option",
+          "-Wno-unused-but-set-variable",
+          "-Wno-sign-compare",
+      ],
       protoc = "@protobuf//:protoc",
       default_runtime = "@protobuf//:protobuf",
       use_grpc_plugin = use_grpc_plugin,
@@ -54,13 +58,14 @@ def tf_proto_library_cc(name, srcs = [], has_services = None,
       visibility = visibility,
   )
 
-def tf_proto_library_py(name, srcs=[], deps=[], visibility=[], testonly=0,
+def tf_proto_library_py(name, srcs=[], protodeps=[], deps=[], visibility=[],
+                        testonly=0,
                         srcs_version="PY2AND3"):
   py_proto_library(
       name = name + "_py",
       srcs = srcs,
       srcs_version = srcs_version,
-      deps = deps,
+      deps = deps + tf_deps(protodeps, "_py") + ["@protobuf//:protobuf_python"],
       protoc = "@protobuf//:protoc",
       default_runtime = "@protobuf//:protobuf_python",
       visibility = visibility,
@@ -68,15 +73,16 @@ def tf_proto_library_py(name, srcs=[], deps=[], visibility=[], testonly=0,
   )
 
 def tf_proto_library(name, srcs = [], has_services = None,
-                     deps = [], visibility = [], testonly = 0,
+                     protodeps = [], visibility = [], testonly = 0,
                      cc_libs = [],
                      cc_api_version = 2, go_api_version = 2,
                      java_api_version = 2, py_api_version = 2,
                      js_api_version = 2, js_codegen = "jspb"):
+  """Make a proto library, possibly depending on other proto libraries."""
   tf_proto_library_cc(
       name = name,
-      srcs = srcs + tf_deps(deps, "_proto_srcs"),
-      deps = deps,
+      srcs = srcs,
+      protodeps = protodeps,
       cc_libs = cc_libs,
       testonly = testonly,
       visibility = visibility,
@@ -84,9 +90,9 @@ def tf_proto_library(name, srcs = [], has_services = None,
 
   tf_proto_library_py(
       name = name,
-      srcs = srcs + tf_deps(deps, "_proto_srcs"),
+      srcs = srcs,
+      protodeps = protodeps,
       srcs_version = "PY2AND3",
-      deps = deps + ["@protobuf//:protobuf_python"],
       testonly = testonly,
       visibility = visibility,
   )
@@ -155,7 +161,16 @@ def tf_additional_test_deps():
   return []
 
 def tf_additional_test_srcs():
-  return ["platform/default/test_benchmark.cc", "platform/posix/test.cc"]
+  return [
+      "platform/default/test_benchmark.cc",
+  ] + select({
+      "//tensorflow:windows" : [
+          "platform/windows/test.cc"
+        ],
+      "//conditions:default" : [
+          "platform/posix/test.cc",
+        ],
+    })
 
 def tf_kernel_tests_linkstatic():
   return 0
diff --git a/tensorflow/core/platform/default/build_config/BUILD b/tensorflow/core/platform/default/build_config/BUILD
index 4ef795edcc..0857010f7c 100644
--- a/tensorflow/core/platform/default/build_config/BUILD
+++ b/tensorflow/core/platform/default/build_config/BUILD
@@ -10,7 +10,6 @@ exports_files(["LICENSE"])
 load("//tensorflow:tensorflow.bzl", "if_cuda")
 load("//tensorflow:tensorflow.bzl", "tf_copts")
 load("//tensorflow:tensorflow.bzl", "tf_cuda_library")
-load("@local_config_cuda//cuda:platform.bzl", "cuda_library_path")
 load("@local_config_sycl//sycl:platform.bzl", "sycl_library_path")
 
 cc_library(
@@ -138,7 +137,7 @@ filegroup(
 cc_library(
     name = "cuda",
     data = [
-        "@local_config_cuda//cuda:{}".format(cuda_library_path("cudart")),
+        "@local_config_cuda//cuda:cudart",
     ],
     linkopts = select({
         "@local_config_cuda//cuda:darwin": [
diff --git a/tensorflow/core/platform/default/logging.cc b/tensorflow/core/platform/default/logging.cc
index e7808ca08d..1d03725c78 100644
--- a/tensorflow/core/platform/default/logging.cc
+++ b/tensorflow/core/platform/default/logging.cc
@@ -81,7 +81,41 @@ void LogMessage::GenerateLogMessage() {
 }
 #endif
 
-LogMessage::~LogMessage() { GenerateLogMessage(); }
+
+namespace {
+
+int64 MinLogLevel() {
+  const char* tf_env_var_val = getenv("TF_CPP_MIN_LOG_LEVEL");
+  if (tf_env_var_val == nullptr) {
+    return 0;
+  }
+
+  // Ideally we would use env_var / safe_strto64, but it is
+  // hard to use here without pulling in a lot of dependencies,
+  // so we do a poor-man's parsing.
+  string min_log_level(tf_env_var_val);
+  if (min_log_level == "1") {
+    // Maps to WARNING
+    return 1;
+  } else if (min_log_level == "2") {
+    // Maps to ERROR
+    return 2;
+  } else if (min_log_level == "3") {
+    // Maps to FATAL
+    return 3;
+  } else {
+    // Maps to INFO (the default).
+    return 0;
+  }
+}
+
+}  // namespace
+
+LogMessage::~LogMessage() {
+  // Read the min log level once during the first call to logging.
+  static int64 min_log_level = MinLogLevel();
+  if (TF_PREDICT_TRUE(severity_ >= min_log_level)) GenerateLogMessage();
+}
 
 LogMessageFatal::LogMessageFatal(const char* file, int line)
     : LogMessage(file, line, FATAL) {}
diff --git a/tensorflow/core/platform/env.h b/tensorflow/core/platform/env.h
index 787ebe654b..428a45576f 100644
--- a/tensorflow/core/platform/env.h
+++ b/tensorflow/core/platform/env.h
@@ -208,12 +208,10 @@ class Env {
   // TODO(jeff,sanjay): if needed, tighten spec so relative to epoch, or
   // provide a routine to get the absolute time.
 
-  /// \brief Returns the number of micro-seconds since some fixed point in
-  /// time. Only useful for computing deltas of time.
+  /// \brief Returns the number of micro-seconds since the Unix epoch.
   virtual uint64 NowMicros() = 0;
 
-  /// \brief Returns the number of seconds since some fixed point in
-  /// time. Only useful for computing deltas of time.
+  /// \brief Returns the number of seconds since the Unix epoch.
   virtual uint64 NowSeconds() { return NowMicros() / 1000000L; }
 
   /// Sleeps/delays the thread for the prescribed number of micro-seconds.
diff --git a/tensorflow/core/platform/hadoop/hadoop_file_system.cc b/tensorflow/core/platform/hadoop/hadoop_file_system.cc
index 3de3b17517..b0f0cbe3f1 100644
--- a/tensorflow/core/platform/hadoop/hadoop_file_system.cc
+++ b/tensorflow/core/platform/hadoop/hadoop_file_system.cc
@@ -112,6 +112,11 @@ class LibHDFS {
     }
     string path = io::JoinPath(hdfs_home, "lib", "native", "libhdfs.so");
     status_ = TryLoadAndBind(path.c_str(), &handle_);
+    if (!status_.ok()) {
+      // try load libhdfs.so using dynamic loader's search path in case libhdfs.so
+      // is installed in non-standard location
+      status_ = TryLoadAndBind("libhdfs.so", &handle_);
+    }
     return;
   }
 
diff --git a/tensorflow/core/platform/port_test.cc b/tensorflow/core/platform/port_test.cc
index 78d000bff8..402c718e4f 100644
--- a/tensorflow/core/platform/port_test.cc
+++ b/tensorflow/core/platform/port_test.cc
@@ -36,8 +36,14 @@ TEST(ConditionVariable, WaitForMilliseconds_Timeout) {
   mutex m;
   mutex_lock l(m);
   condition_variable cv;
+  ConditionResult result = kCond_MaybeNotified;
   time_t start = time(NULL);
-  EXPECT_EQ(WaitForMilliseconds(&l, &cv, 3000), kCond_Timeout);
+  // Condition variables are subject to spurious wakeups on some platforms,
+  // so need to check for a timeout within a loop.
+  while (result == kCond_MaybeNotified) {
+    result = WaitForMilliseconds(&l, &cv, 3000);
+  }
+  EXPECT_EQ(result, kCond_Timeout);
   time_t finish = time(NULL);
   EXPECT_GE(finish - start, 3);
 }
@@ -51,7 +57,7 @@ TEST(ConditionVariable, WaitForMilliseconds_Signalled) {
   // Sleep for just 1 second then notify.  We have a timeout of 3 secs,
   // so the condition variable will notice the cv signal before the timeout.
   pool.Schedule([&m, &cv]() {
-    sleep(1);
+    Env::Default()->SleepForMicroseconds(1 * 1000 * 1000);
     mutex_lock l(m);
     cv.notify_all();
   });
diff --git a/tensorflow/core/platform/subprocess.h b/tensorflow/core/platform/subprocess.h
index 7dfd38688d..dfdcf82173 100644
--- a/tensorflow/core/platform/subprocess.h
+++ b/tensorflow/core/platform/subprocess.h
@@ -53,7 +53,7 @@ class SubProcess;
     defined(PLATFORM_GOOGLE_ANDROID)
 #include "tensorflow/core/platform/posix/subprocess.h"
 #elif defined(PLATFORM_WINDOWS)
-#error SubProcess not yet implemented for Windows
+#include "tensorflow/core/platform/windows/subprocess.h"
 #else
 #error Define the appropriate PLATFORM_<foo> macro for this platform
 #endif
diff --git a/tensorflow/core/platform/windows/subprocess.h b/tensorflow/core/platform/windows/subprocess.h
new file mode 100644
index 0000000000..b65313363e
--- /dev/null
+++ b/tensorflow/core/platform/windows/subprocess.h
@@ -0,0 +1,27 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_PLATFORM_WINDOWS_SUBPROCESS_H_
+#define TENSORFLOW_PLATFORM_WINDOWS_SUBPROCESS_H_
+
+namespace tensorflow {
+
+// SubProcess is not yet implemented for Windows.
+class SubProcess {
+};
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_PLATFORM_WINDOWS_SUBPROCESS_H_
diff --git a/tensorflow/core/platform/windows/test.cc b/tensorflow/core/platform/windows/test.cc
new file mode 100644
index 0000000000..0ffd02ff14
--- /dev/null
+++ b/tensorflow/core/platform/windows/test.cc
@@ -0,0 +1,51 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/platform/net.h"
+#include "tensorflow/core/platform/test.h"
+
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace tensorflow {
+namespace testing {
+
+std::unique_ptr<SubProcess> CreateSubProcess(const std::vector<string>& argv) {
+  LOG(FATAL) << "CreateSubProcess NOT IMPLEMENTED for Windows yet ! ";
+  return nullptr;
+}
+
+int PickUnusedPortOrDie() { return internal::PickUnusedPortOrDie(); }
+
+string TensorFlowSrcRoot() {
+  // 'bazel test' and cmake set TEST_SRCDIR.
+  // New versions of bazel also set TEST_WORKSPACE.
+  const char* env = getenv("TEST_SRCDIR");
+  const char* workspace = getenv("TEST_WORKSPACE");
+  if (env && env[0] != '\0') {
+    if (workspace && workspace[0] != '\0') {
+      return strings::StrCat(env, "/", workspace, "/tensorflow");
+    } else {
+      return strings::StrCat(env, "/tensorflow");
+    }
+  } else {
+    LOG(WARNING) << "TEST_SRCDIR environment variable not set: "
+                 << "using $PWD/tensorflow as TensorFlowSrcRoot() for tests.";
+    return "tensorflow";
+  }
+}
+
+}  // namespace testing
+}  // namespace tensorflow
diff --git a/tensorflow/core/platform/windows/windows_file_system.cc b/tensorflow/core/platform/windows/windows_file_system.cc
index 31516bb2ee..670abf3fdf 100644
--- a/tensorflow/core/platform/windows/windows_file_system.cc
+++ b/tensorflow/core/platform/windows/windows_file_system.cc
@@ -467,6 +467,23 @@ Status WindowsFileSystem::RenameFile(const string& src, const string& target) {
   return result;
 }
 
+Status WindowsFileSystem::GetMatchingPaths(const string& pattern,
+                                           std::vector<string>* results) {
+  // NOTE(mrry): The existing implementation of FileSystem::GetMatchingPaths()
+  // does not handle Windows paths containing backslashes correctly. Since
+  // Windows APIs will accept forward and backslashes equivalently, we
+  // convert the pattern to use forward slashes exclusively. Note that this
+  // is not ideal, since the API expects backslash as an escape character,
+  // but no code appears to rely on this behavior.
+  string converted_pattern(pattern);
+  std::replace(converted_pattern.begin(), converted_pattern.end(), '\\', '/');
+  TF_RETURN_IF_ERROR(FileSystem::GetMatchingPaths(converted_pattern, results));
+  for (string& result : *results) {
+    std::replace(result.begin(), result.end(), '/', '\\');
+  }
+  return Status::OK();
+}
+
 Status WindowsFileSystem::Stat(const string& fname, FileStatistics* stat) {
   Status result;
   struct _stat sbuf;
diff --git a/tensorflow/core/platform/windows/windows_file_system.h b/tensorflow/core/platform/windows/windows_file_system.h
index dd83a27caf..507290e9e6 100644
--- a/tensorflow/core/platform/windows/windows_file_system.h
+++ b/tensorflow/core/platform/windows/windows_file_system.h
@@ -48,6 +48,9 @@ class WindowsFileSystem : public FileSystem {
 
   Status GetChildren(const string& dir, std::vector<string>* result) override;
 
+  Status GetMatchingPaths(const string& pattern,
+                          std::vector<string>* result) override;
+
   Status Stat(const string& fname, FileStatistics* stat) override;
 
   Status DeleteFile(const string& fname) override;
diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index 1de976fb3d..34673be216 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -19,7 +19,7 @@ limitations under the License.
 // TensorFlow uses semantic versioning, see http://semver.org/.
 
 #define TF_MAJOR_VERSION 0
-#define TF_MINOR_VERSION 11
+#define TF_MINOR_VERSION 12
 #define TF_PATCH_VERSION head
 
 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
diff --git a/tensorflow/core/util/memmapped_file_system.cc b/tensorflow/core/util/memmapped_file_system.cc
index d67f948f1d..e077e94cf8 100644
--- a/tensorflow/core/util/memmapped_file_system.cc
+++ b/tensorflow/core/util/memmapped_file_system.cc
@@ -177,8 +177,13 @@ const void* MemmappedFileSystem::GetMemoryWithOffset(uint64 offset) const {
   return reinterpret_cast<const uint8*>(mapped_memory_->data()) + offset;
 }
 
+#if defined(COMPILER_MSVC)
+constexpr char* MemmappedFileSystem::kMemmappedPackagePrefix;
+constexpr char* MemmappedFileSystem::kMemmappedPackageDefaultGraphDef;
+#else
 constexpr char MemmappedFileSystem::kMemmappedPackagePrefix[];
 constexpr char MemmappedFileSystem::kMemmappedPackageDefaultGraphDef[];
+#endif
 
 Status MemmappedFileSystem::InitializeFromFile(Env* env,
                                                const string& filename) {
diff --git a/tensorflow/core/util/memmapped_file_system.h b/tensorflow/core/util/memmapped_file_system.h
index d64c4a765c..541587aeab 100644
--- a/tensorflow/core/util/memmapped_file_system.h
+++ b/tensorflow/core/util/memmapped_file_system.h
@@ -53,9 +53,19 @@ class MemmappedFileSystem : public FileSystem {
  public:
   // Memmapped regions use this prefix to distinguish from
   // the filesystem.
-  static constexpr char kMemmappedPackagePrefix[] = "memmapped_package://";
-  // The default graphdef in the package.
+#if defined(COMPILER_MSVC)
+  static constexpr char* kMemmappedPackagePrefix =
+#else
+  static constexpr char kMemmappedPackagePrefix[] =
+#endif
+      "memmapped_package://";
+
+// The default graphdef in the package.
+#if defined(COMPILER_MSVC)
+  static constexpr char* kMemmappedPackageDefaultGraphDef =
+#else
   static constexpr char kMemmappedPackageDefaultGraphDef[] =
+#endif
       "memmapped_package://.";
 
   MemmappedFileSystem();
diff --git a/tensorflow/core/util/memmapped_file_system_test.cc b/tensorflow/core/util/memmapped_file_system_test.cc
index c7d919041a..179c72c1f5 100644
--- a/tensorflow/core/util/memmapped_file_system_test.cc
+++ b/tensorflow/core/util/memmapped_file_system_test.cc
@@ -137,8 +137,15 @@ TEST(MemmappedFileSystemTest, ProxyToDefault) {
   const string dir = testing::TmpDir();
   const string filename = io::JoinPath(dir, "test_file");
   // Check that we can create write and read ordinary file.
-  std::unique_ptr<WritableFile> writable_file;
-  TF_ASSERT_OK(memmapped_env.NewAppendableFile(filename, &writable_file));
+  std::unique_ptr<WritableFile> writable_file_temp;
+  TF_ASSERT_OK(memmapped_env.NewAppendableFile(filename, &writable_file_temp));
+  // Making sure to clean up after the test finishes.
+  const auto adh = [&memmapped_env, &filename](WritableFile* f) {
+      delete f;
+      memmapped_env.DeleteFile(filename);
+  };
+  std::unique_ptr<WritableFile, decltype(adh)> writable_file(
+      writable_file_temp.release(), adh);
   const string test_string = "bla-bla-bla";
   TF_ASSERT_OK(writable_file->Append(test_string));
   TF_ASSERT_OK(writable_file->Close());
diff --git a/tensorflow/core/util/semver_test.cc b/tensorflow/core/util/semver_test.cc
index 75994a658e..0647f670c7 100644
--- a/tensorflow/core/util/semver_test.cc
+++ b/tensorflow/core/util/semver_test.cc
@@ -63,6 +63,10 @@ TEST(SemverTest, VersionStringFollowsSemver) {
   if (major == 0 && minor <= 11) {
     return;
   }
+  if (str_util::ConsumePrefix(&semver, "head")) {
+    ASSERT_TRUE(semver.empty());
+    return;
+  }
   ASSERT_TRUE(str_util::ConsumeLeadingDigits(&semver, &patch));
   if (semver.empty()) return;
   if (semver[0] == '-') {
diff --git a/tensorflow/core/util/sparse/sparse_tensor.h b/tensorflow/core/util/sparse/sparse_tensor.h
index a575d98da3..9d6f9e8bb5 100644
--- a/tensorflow/core/util/sparse/sparse_tensor.h
+++ b/tensorflow/core/util/sparse/sparse_tensor.h
@@ -17,8 +17,9 @@ limitations under the License.
 #define TENSORFLOW_UTIL_SPARSE_SPARSE_TENSOR_H_
 
 #include <limits>
-
+#include <numeric>
 #include <vector>
+
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_types.h"
diff --git a/tensorflow/core/util/stat_summarizer.cc b/tensorflow/core/util/stat_summarizer.cc
index 0b675eaac9..6bd3d9c780 100644
--- a/tensorflow/core/util/stat_summarizer.cc
+++ b/tensorflow/core/util/stat_summarizer.cc
@@ -340,10 +340,10 @@ std::string StatSummarizer::GetStatsByOrderOfNodeDefinitions(
 
 std::string StatSummarizer::GetOutputString() const {
   std::stringstream stream;
-  stream << "Total time (us): " << run_total_micros_;
+  stream << "Total time (us): " << run_total_micros_ << std::endl;
   stream << GetTimingStatsByRunOrder();
   stream << GetTimingStatsByTopDurations();
-  stream << "Total Memory (bytes): " << memory_;
+  stream << "Total Memory (bytes): " << memory_ << std::endl;
   stream << GetMemoryStatsByRunOrder();
   stream << GetMemoryStatsByUsage();
   return stream.str();