diff options
Diffstat (limited to 'tensorflow/core')
86 files changed, 792 insertions, 230 deletions
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index fe40c691c5..991fc2f29d 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -949,12 +949,12 @@ cc_library( # Libraries with GPU facilities that are useful for writing kernels. cc_library( name = "gpu_lib", - srcs = if_not_windows([ + srcs = [ "common_runtime/gpu/gpu_event_mgr.cc", - ]), - hdrs = if_not_windows([ + ], + hdrs = [ "common_runtime/gpu/gpu_event_mgr.h", - ]), + ], copts = tf_copts(), visibility = ["//visibility:public"], deps = [ @@ -964,7 +964,8 @@ cc_library( ":lib_internal", ":proto_text", ":protos_all_cc", - ] + if_not_windows([":stream_executor"]), + ":stream_executor", + ], ) cc_library( @@ -982,7 +983,7 @@ tf_proto_library_cc( name = "worker_proto", srcs = ["protobuf/worker.proto"], cc_api_version = 2, - cc_libs = [":protos_all_cc"], + protodeps = [":protos_all"], visibility = [ "//tensorflow:internal", ], @@ -993,8 +994,8 @@ tf_proto_library_cc( srcs = ["protobuf/worker_service.proto"], has_services = 1, cc_api_version = 2, - cc_libs = [":worker_proto_cc"], cc_stubby_versions = ["2"], + protodeps = [":worker_proto"], visibility = [ "//tensorflow:internal", ], @@ -1004,7 +1005,7 @@ tf_proto_library_cc( name = "master_proto", srcs = ["protobuf/master.proto"], cc_api_version = 2, - cc_libs = [":protos_all_cc"], + protodeps = [":protos_all"], visibility = [ "//tensorflow:internal", ], @@ -1015,8 +1016,8 @@ tf_proto_library_cc( srcs = ["protobuf/master_service.proto"], has_services = 1, cc_api_version = 2, - cc_libs = [":master_proto_cc"], cc_stubby_versions = ["2"], + protodeps = [":master_proto"], visibility = [ "//tensorflow:internal", ], @@ -1417,7 +1418,7 @@ tf_cuda_library( tf_cuda_library( name = "gpu_runtime", - srcs = if_not_windows([ + srcs = [ "common_runtime/gpu/gpu_bfc_allocator.cc", "common_runtime/gpu/gpu_debug_allocator.cc", "common_runtime/gpu/gpu_device.cc", @@ -1429,8 +1430,8 @@ tf_cuda_library( "common_runtime/gpu/pool_allocator.cc", "common_runtime/gpu/process_state.cc", "common_runtime/gpu_device_context.h", - ]), - hdrs = if_not_windows([ + ], + hdrs = [ "common_runtime/gpu/gpu_bfc_allocator.h", "common_runtime/gpu/gpu_debug_allocator.h", "common_runtime/gpu/gpu_device.h", @@ -1439,7 +1440,7 @@ tf_cuda_library( "common_runtime/gpu/gpu_util.h", "common_runtime/gpu/pool_allocator.h", "common_runtime/gpu/process_state.h", - ]), + ], copts = tf_copts(), linkstatic = 1, deps = [ @@ -1451,10 +1452,9 @@ tf_cuda_library( ":lib", ":lib_internal", ":protos_all_cc", - "//third_party/eigen3", - ] + if_not_windows([ ":stream_executor", - ]), + "//third_party/eigen3", + ], alwayslink = 1, ) diff --git a/tensorflow/core/common_runtime/direct_session_test.cc b/tensorflow/core/common_runtime/direct_session_test.cc index 44f17d6260..4b0165bae7 100644 --- a/tensorflow/core/common_runtime/direct_session_test.cc +++ b/tensorflow/core/common_runtime/direct_session_test.cc @@ -835,7 +835,7 @@ static void TestSessionInterOpThreadsImpl(bool use_function_lib) { FunctionLibraryDefinition flib(OpRegistry::Global(), library_graph_def); Graph g(&flib); Tensor t(DT_FLOAT, TensorShape({})); - t.scalar<float>()() = {1.2}; + t.scalar<float>()() = {1.2f}; Node* x = test::graph::Constant(&g, t); Node* y; if (use_function_lib) { @@ -945,7 +945,7 @@ TEST(DirectSessionTest, TestSessionInterOpThreadsWithFunctions) { TEST(DirectSessionTest, TestSessionInterOpThreadsInvalidOptions) { Graph g(OpRegistry::Global()); Tensor t(DT_FLOAT, TensorShape({})); - t.scalar<float>()() = {1.2}; + t.scalar<float>()() = {1.2f}; Node* x = test::graph::Constant(&g, t); GraphDef def; test::graph::ToGraphDef(&g, &def); @@ -979,7 +979,7 @@ TEST(DirectSessionTest, TestDirectSessionRunClose) { // Construct a graph with a variable and a single assign. Graph g(OpRegistry::Global()); Tensor t(DT_FLOAT, TensorShape({})); - t.scalar<float>()() = {1.2}; + t.scalar<float>()() = {1.2f}; Node* var_val = test::graph::Constant(&g, t); Node* var = test::graph::Var(&g, DT_FLOAT, {}); Node* var_assign = test::graph::Assign(&g, var, var_val); @@ -1063,7 +1063,7 @@ TEST(DirectSessionTest, TestDirectSessionReset) { // Construct a graph with a variable and a single assign. Graph g(OpRegistry::Global()); Tensor t(DT_FLOAT, TensorShape({})); - t.scalar<float>()() = {1.2}; + t.scalar<float>()() = {1.2f}; Node* var_val = test::graph::Constant(&g, t); Node* var = test::graph::Var(&g, DT_FLOAT, {}); Node* var_assign = test::graph::Assign(&g, var, var_val); diff --git a/tensorflow/core/common_runtime/executor.h b/tensorflow/core/common_runtime/executor.h index 8cca22fb6f..239c9666e3 100644 --- a/tensorflow/core/common_runtime/executor.h +++ b/tensorflow/core/common_runtime/executor.h @@ -39,7 +39,7 @@ class StepStatsCollector; // Rendezvous* rendezvous = NewNaiveRendezvous(); // TF_CHECK_OK(rendezvous->Send("input", some_input_tensor)); // TF_CHECK_OK(executor->Run({ExecutorOpts, rendezvous, nullptr})); -// TF_CHECK_OK(rendezvous->Recv("input", &output_tensor)); +// TF_CHECK_OK(rendezvous->Recv("output", &output_tensor)); // ... ... // // Multiple threads can call Executor::Run concurrently. diff --git a/tensorflow/core/common_runtime/sycl/sycl_allocator.cc b/tensorflow/core/common_runtime/sycl/sycl_allocator.cc index 175b784825..699b54f345 100644 --- a/tensorflow/core/common_runtime/sycl/sycl_allocator.cc +++ b/tensorflow/core/common_runtime/sycl/sycl_allocator.cc @@ -19,16 +19,26 @@ limitations under the License. namespace tensorflow { -SYCLAllocator::~SYCLAllocator() { } +SYCLAllocator::~SYCLAllocator() {} string SYCLAllocator::Name() { return "device:SYCL"; } void *SYCLAllocator::AllocateRaw(size_t alignment, size_t num_bytes) { + assert(device_); auto p = device_->allocate(num_bytes); return p; } -void SYCLAllocator::DeallocateRaw(void *ptr) { device_->deallocate(ptr); } +void SYCLAllocator::DeallocateRaw(void *ptr) { + if (device_) { + device_->deallocate(ptr); + } +} + +void SYCLAllocator::EnterLameDuckMode() { + device_->deallocate_all(); + device_ = nullptr; +} } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/sycl/sycl_allocator.h b/tensorflow/core/common_runtime/sycl/sycl_allocator.h index 887c727f6e..8558b6c873 100644 --- a/tensorflow/core/common_runtime/sycl/sycl_allocator.h +++ b/tensorflow/core/common_runtime/sycl/sycl_allocator.h @@ -29,14 +29,16 @@ namespace tensorflow { class SYCLAllocator : public Allocator { public: - SYCLAllocator(Eigen::SyclDevice* device) : device_(device) {} + SYCLAllocator(Eigen::QueueInterface* device) : device_(device) {} virtual ~SYCLAllocator() override; string Name() override; void *AllocateRaw(size_t alignment, size_t num_bytes) override; void DeallocateRaw(void *ptr) override; + void EnterLameDuckMode(); + virtual bool ShouldAllocateEmptyTensors() override final { return true; } private: - Eigen::SyclDevice *device_; // not owned + Eigen::QueueInterface *device_; // not owned TF_DISALLOW_COPY_AND_ASSIGN(SYCLAllocator); }; diff --git a/tensorflow/core/common_runtime/sycl/sycl_device.cc b/tensorflow/core/common_runtime/sycl/sycl_device.cc index 10a037c02d..e5fe85bcf5 100644 --- a/tensorflow/core/common_runtime/sycl/sycl_device.cc +++ b/tensorflow/core/common_runtime/sycl/sycl_device.cc @@ -25,8 +25,9 @@ namespace tensorflow { SYCLDevice::~SYCLDevice() { device_context_->Unref(); - delete sycl_allocator_; + sycl_allocator_->EnterLameDuckMode(); delete sycl_device_; + delete sycl_queue_; } void SYCLDevice::Compute(OpKernel *op_kernel, OpKernelContext *context) { @@ -50,12 +51,8 @@ Allocator *SYCLDevice::GetAllocator(AllocatorAttributes attr) { Status SYCLDevice::MakeTensorFromProto(const TensorProto &tensor_proto, const AllocatorAttributes alloc_attrs, Tensor *tensor) { - AllocatorAttributes attr; - attr.set_on_host(true); - attr.set_gpu_compatible(true); - Allocator *host_alloc = GetAllocator(attr); Tensor parsed(tensor_proto.dtype()); - if (!parsed.FromProto(host_alloc, tensor_proto)) { + if (!parsed.FromProto(cpu_allocator_, tensor_proto)) { return errors::InvalidArgument("Cannot parse tensor from proto: ", tensor_proto.DebugString()); } @@ -86,6 +83,12 @@ Status SYCLDevice::FillContextMap(const Graph *graph, return Status::OK(); } +Status SYCLDevice::Sync() { + sycl_device_->synchronize(); + return Status::OK(); +} + + } // namespace tensorflow #endif // TENSORFLOW_USE_SYCL diff --git a/tensorflow/core/common_runtime/sycl/sycl_device.h b/tensorflow/core/common_runtime/sycl/sycl_device.h index d3b3db2a71..2759053df5 100644 --- a/tensorflow/core/common_runtime/sycl/sycl_device.h +++ b/tensorflow/core/common_runtime/sycl/sycl_device.h @@ -22,7 +22,6 @@ limitations under the License. #define EIGEN_USE_SYCL -#include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/common_runtime/local_device.h" #include "tensorflow/core/common_runtime/sycl/sycl_allocator.h" #include "tensorflow/core/common_runtime/sycl/sycl_device_context.h" @@ -30,7 +29,6 @@ limitations under the License. namespace tensorflow { - class SYCLDevice : public LocalDevice { public: template <typename SYCLSelector> @@ -42,8 +40,9 @@ public: name, DEVICE_SYCL, memory_limit, locality, physical_device_desc), nullptr), cpu_allocator_(cpu_allocator), - sycl_device_(new Eigen::SyclDevice(sycl_selector)), - sycl_allocator_(new SYCLAllocator(sycl_device_)), + sycl_queue_(new Eigen::QueueInterface(sycl_selector)), + sycl_device_(new Eigen::SyclDevice(sycl_queue_)), + sycl_allocator_(new SYCLAllocator(sycl_queue_)), device_context_(new SYCLDeviceContext()) { set_eigen_sycl_device(sycl_device_); } @@ -59,16 +58,17 @@ public: Status FillContextMap(const Graph *graph, DeviceContextMap *device_context_map) override; - Status Sync() override { return Status::OK(); } + Status Sync() override; static string GetShortDeviceDescription(/*int device_id, const DeviceDescription& desc*/) { return strings::StrCat("device: 0, name SYCL, pci bus id: 0"); } private: - Allocator *cpu_allocator_; // owned - Eigen::SyclDevice* sycl_device_; // owned - SYCLAllocator *sycl_allocator_; // owned + Allocator *cpu_allocator_; // owned + Eigen::QueueInterface* sycl_queue_; // owned + Eigen::SyclDevice* sycl_device_; // owned + SYCLAllocator *sycl_allocator_; // owned SYCLDeviceContext *device_context_; }; diff --git a/tensorflow/core/common_runtime/sycl/sycl_device_context.cc b/tensorflow/core/common_runtime/sycl/sycl_device_context.cc index 9dd289bebd..b487d24c20 100644 --- a/tensorflow/core/common_runtime/sycl/sycl_device_context.cc +++ b/tensorflow/core/common_runtime/sycl/sycl_device_context.cc @@ -16,13 +16,11 @@ limitations under the License. #if TENSORFLOW_USE_SYCL #define EIGEN_USE_SYCL +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/common_runtime/sycl/sycl_device_context.h" #include "tensorflow/core/common_runtime/dma_helper.h" -#define EIGEN_USE_SYCL -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" - namespace tensorflow { void SYCLDeviceContext::CopyCPUTensorToDevice(const Tensor *cpu_tensor, @@ -108,7 +106,6 @@ void SYCLDeviceContext::CopyDeviceTensorToCPU(const Tensor *device_tensor, StatusCallback done) { const int64 total_bytes = device_tensor->TotalBytes(); if (total_bytes > 0) { - device->eigen_sycl_device()->deallocate_all(); const void* src_ptr = DMAHelper::base(device_tensor); void* dst_ptr = DMAHelper::base(cpu_tensor); switch (device_tensor->dtype()) { diff --git a/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc b/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc index 9b8770420c..cf9e349e01 100644 --- a/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc +++ b/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc @@ -15,6 +15,7 @@ limitations under the License. #if TENSORFLOW_USE_SYCL +#include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/common_runtime/sycl/sycl_device.h" namespace tensorflow { diff --git a/tensorflow/core/debug/BUILD b/tensorflow/core/debug/BUILD index 2363b69390..3e4ab5bc17 100644 --- a/tensorflow/core/debug/BUILD +++ b/tensorflow/core/debug/BUILD @@ -36,7 +36,7 @@ tf_proto_library_cc( has_services = 1, cc_api_version = 2, cc_grpc_version = 1, - cc_libs = ["//tensorflow/core:protos_all_cc"], + protodeps = ["//tensorflow/core:protos_all"], ) # Depending on this target causes a concrete DebuggerState implementation diff --git a/tensorflow/core/debug/debug_gateway_test.cc b/tensorflow/core/debug/debug_gateway_test.cc index 1f6e766663..963cea8419 100644 --- a/tensorflow/core/debug/debug_gateway_test.cc +++ b/tensorflow/core/debug/debug_gateway_test.cc @@ -372,9 +372,9 @@ TEST_F(SessionDebugMinusAXTest, debug_gateway.SetNodeValueCallback( [this, &mu, &val_callback_count, &a_debug_identity_node_name, &x_debug_identity_node_name, &y_debug_identity_node_name, - &debug_identity_tensor_vals, - &callbacks_done](const string& node_name, const int output_slot, - const Tensor& tensor_value, const bool is_ref) { + &debug_identity_tensor_vals, &callbacks_done, &kConcurrentRuns]( + const string& node_name, const int output_slot, + const Tensor& tensor_value, const bool is_ref) { mutex_lock l(mu); if (node_name == a_debug_identity_node_name && output_slot == 0) { diff --git a/tensorflow/core/debug/debug_io_utils.cc b/tensorflow/core/debug/debug_io_utils.cc index 41868ce8da..4b5ecaa9b6 100644 --- a/tensorflow/core/debug/debug_io_utils.cc +++ b/tensorflow/core/debug/debug_io_utils.cc @@ -18,6 +18,12 @@ limitations under the License. #include <vector> #include "grpc++/create_channel.h" + +#if defined(PLATFORM_WINDOWS) +// winsock2.h is used in grpc, so Ws2_32.lib is needed +#pragma comment(lib,"Ws2_32.lib") +#endif + #include "tensorflow/core/debug/debug_service.grpc.pb.h" #include "tensorflow/core/framework/summary.pb.h" #include "tensorflow/core/lib/io/path.h" diff --git a/tensorflow/core/debug/debug_io_utils_test.cc b/tensorflow/core/debug/debug_io_utils_test.cc index 1ddab1689b..ab020517b0 100644 --- a/tensorflow/core/debug/debug_io_utils_test.cc +++ b/tensorflow/core/debug/debug_io_utils_test.cc @@ -273,7 +273,8 @@ TEST_F(DebugIOUtilsTest, PublishTensorConcurrentlyToPartiallyOverlappingPaths) { auto fn = [this, &dump_count, &done_count, &mu, &dump_root_base, &dump_roots, &dump_file_paths, &wall_time, &tensor_name, &debug_node_name, - &kNodeName, &kDebugOpName, &kConcurrentPubs, &all_done]() { + &kNodeName, &kDebugOpName, &kConcurrentPubs, &kOutputSlot, + &all_done]() { // "gumpy" is the shared directory part of the path. string dump_root; string debug_url; diff --git a/tensorflow/core/framework/partial_tensor_shape_test.cc b/tensorflow/core/framework/partial_tensor_shape_test.cc index b008a93c03..23f3d908fb 100644 --- a/tensorflow/core/framework/partial_tensor_shape_test.cc +++ b/tensorflow/core/framework/partial_tensor_shape_test.cc @@ -220,7 +220,7 @@ TEST(PartialTensorShapeTest, PartialShapeMergeWith) { TEST(PartialTensorShapeTest, MakePartialShapeEmpty) { // Empty made partial shapes should still be fully defined - const int64 dims[0] = {}; + const int64 dims[1] = {}; PartialTensorShape shape; EXPECT_FALSE(shape.IsFullyDefined()); TF_ASSERT_OK(PartialTensorShape::MakePartialShape(dims, 0, &shape)); diff --git a/tensorflow/core/framework/tensor_testutil.h b/tensorflow/core/framework/tensor_testutil.h index 73afca40ac..29b9de5c07 100644 --- a/tensorflow/core/framework/tensor_testutil.h +++ b/tensorflow/core/framework/tensor_testutil.h @@ -16,6 +16,8 @@ limitations under the License. #ifndef TENSORFLOW_FRAMEWORK_TENSOR_TESTUTIL_H_ #define TENSORFLOW_FRAMEWORK_TENSOR_TESTUTIL_H_ +#include <numeric> + #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/platform/logging.h" diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 2315c2ffb6..e99ed9dfa8 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -2342,7 +2342,6 @@ cc_library( ":batch_norm_op", ":bias_op", ":conv_ops", - ":depthwise_conv_grad_op", ":dilation_ops", ":fused_batch_norm_op", ":in_topk_op", @@ -2354,7 +2353,10 @@ cc_library( ":softsign_op", ":topk_op", ":xent_op", - ] + if_not_windows([":depthwise_conv_op"]), + ] + if_not_windows([ + ":depthwise_conv_grad_op", + ":depthwise_conv_op", + ]), ) NN_DEPS = [ diff --git a/tensorflow/core/kernels/adjust_contrast_op_test.cc b/tensorflow/core/kernels/adjust_contrast_op_test.cc index b925dc6883..06fd7ca419 100644 --- a/tensorflow/core/kernels/adjust_contrast_op_test.cc +++ b/tensorflow/core/kernels/adjust_contrast_op_test.cc @@ -56,7 +56,7 @@ TEST_F(AdjustContrastOpTest, Simple_1223) { TF_EXPECT_OK(InitOp()); AddInputFromArray<float>(TensorShape({1, 2, 2, 3}), {1, 5, 9, 2, 6, 10, 3, 7, 11, 4, 8, 12}); - AddInputFromArray<float>(TensorShape({}), {0.2}); + AddInputFromArray<float>(TensorShape({}), {0.2f}); TF_ASSERT_OK(RunOpKernel()); Tensor expected(allocator(), DT_FLOAT, TensorShape({1, 2, 2, 3})); @@ -78,7 +78,7 @@ TEST_F(AdjustContrastOpTest, Big_99x99x3) { } AddInputFromArray<float>(TensorShape({1, 99, 99, 3}), values); - AddInputFromArray<float>(TensorShape({}), {0.2}); + AddInputFromArray<float>(TensorShape({}), {0.2f}); TF_ASSERT_OK(RunOpKernel()); } diff --git a/tensorflow/core/kernels/batch_norm_op_test.cc b/tensorflow/core/kernels/batch_norm_op_test.cc index 746b0d46ad..c5e55346eb 100644 --- a/tensorflow/core/kernels/batch_norm_op_test.cc +++ b/tensorflow/core/kernels/batch_norm_op_test.cc @@ -47,15 +47,15 @@ TEST_F(BatchNormOpTest, Simple) { AddInputFromArray<float>(TensorShape({1, 1, 6, 2}), {1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6}); AddInputFromArray<float>(TensorShape({2}), {10, 20}); - AddInputFromArray<float>(TensorShape({2}), {0.25, 0.5}); - AddInputFromArray<float>(TensorShape({2}), {0.1, 0.6}); - AddInputFromArray<float>(TensorShape({2}), {0.0, 0.0}); + AddInputFromArray<float>(TensorShape({2}), {0.25f, 0.5f}); + AddInputFromArray<float>(TensorShape({2}), {0.1f, 0.6f}); + AddInputFromArray<float>(TensorShape({2}), {0.0f, 0.0f}); TF_ASSERT_OK(RunOpKernel()); Tensor expected(allocator(), DT_FLOAT, TensorShape({1, 1, 6, 2})); test::FillValues<float>( - &expected, {-17.86, -22.00, -15.87, -20.59, -13.87, -19.18, -21.86, - -33.31, -23.85, -34.72, -25.85, -36.13}); + &expected, {-17.86f, -22.00f, -15.87f, -20.59f, -13.87f, -19.18f, -21.86f, + -33.31f, -23.85f, -34.72f, -25.85f, -36.13f }); test::ExpectTensorNear<float>(expected, *GetOutput(0), 0.01); } diff --git a/tensorflow/core/kernels/cast_op_test.cc b/tensorflow/core/kernels/cast_op_test.cc index ffad7fd02e..5b7529bb8a 100644 --- a/tensorflow/core/kernels/cast_op_test.cc +++ b/tensorflow/core/kernels/cast_op_test.cc @@ -49,17 +49,18 @@ class CastOpTest : public OpsTestBase { TF_EXPECT_OK(InitOp()); } - template <typename IN, typename OUT> + template <typename INPUT, typename OUTPUT> void CheckCast() { - DataType in_type = DataTypeToEnum<IN>::v(); - DataType out_type = DataTypeToEnum<OUT>::v(); + DataType in_type = DataTypeToEnum<INPUT>::v(); + DataType out_type = DataTypeToEnum<OUTPUT>::v(); MakeOp(in_type, out_type); - AddInputFromArray<IN>(TensorShape({1, 2, 2, 1}), - {IN(1), IN(2), IN(3), IN(4)}); + AddInputFromArray<INPUT>(TensorShape({1, 2, 2, 1}), + {INPUT(1), INPUT(2), INPUT(3), INPUT(4)}); TF_ASSERT_OK(RunOpKernel()); Tensor expected(allocator(), out_type, TensorShape({1, 2, 2, 1})); - test::FillValues<OUT>(&expected, {OUT(1), OUT(2), OUT(3), OUT(4)}); - test::ExpectTensorEqual<OUT>(expected, *GetOutput(0)); + test::FillValues<OUTPUT>(&expected, + {OUTPUT(1), OUTPUT(2), OUTPUT(3), OUTPUT(4)}); + test::ExpectTensorEqual<OUTPUT>(expected, *GetOutput(0)); } }; diff --git a/tensorflow/core/kernels/colorspace_op_test.cc b/tensorflow/core/kernels/colorspace_op_test.cc index 4719a59b63..943d25a975 100644 --- a/tensorflow/core/kernels/colorspace_op_test.cc +++ b/tensorflow/core/kernels/colorspace_op_test.cc @@ -71,7 +71,7 @@ class RGBToHSVOpTest : public OpsTestBase { void CheckRedMax(DataType data_type) { // Test case where red channel dominates - AddInputFromArray<T>(TensorShape({3}), {.8, .4, .2}); + AddInputFromArray<T>(TensorShape({3}), {.8f, .4f, .2f}); TF_ASSERT_OK(RunOpKernel()); T expected_h = 1. / 6. * .2 / .6; @@ -85,7 +85,7 @@ class RGBToHSVOpTest : public OpsTestBase { void CheckGreenMax(DataType data_type) { // Test case where green channel dominates - AddInputFromArray<T>(TensorShape({3}), {.2, .8, .4}); + AddInputFromArray<T>(TensorShape({3}), {.2f, .8f, .4f}); TF_ASSERT_OK(RunOpKernel()); T expected_h = 1. / 6. * (2.0 + (.2 / .6)); @@ -99,7 +99,7 @@ class RGBToHSVOpTest : public OpsTestBase { void CheckBlueMax(DataType data_type) { // Test case where blue channel dominates - AddInputFromArray<T>(TensorShape({3}), {.4, .2, .8}); + AddInputFromArray<T>(TensorShape({3}), {.4f, .2f, .8f}); TF_ASSERT_OK(RunOpKernel()); T expected_h = 1. / 6. * (4.0 + (.2 / .6)); @@ -112,7 +112,7 @@ class RGBToHSVOpTest : public OpsTestBase { } void CheckNegativeDifference(DataType data_type) { - AddInputFromArray<T>(TensorShape({3}), {0, .1, .2}); + AddInputFromArray<T>(TensorShape({3}), {0, .1f, .2f}); TF_ASSERT_OK(RunOpKernel()); T expected_h = 1. / 6. * (4.0 + (-.1 / .2)); @@ -220,7 +220,7 @@ class HSVToRGBOpTest : public OpsTestBase { TF_ASSERT_OK(RunOpKernel()); Tensor expected(allocator(), data_type, TensorShape({3})); - test::FillValues<T>(&expected, {0, .1, .2}); + test::FillValues<T>(&expected, {0, .1f, .2f}); test::ExpectTensorNear<T>(expected, *GetOutput(0), 1e-6); } }; diff --git a/tensorflow/core/kernels/control_flow_ops.cc b/tensorflow/core/kernels/control_flow_ops.cc index e92b11efc6..b01263f288 100644 --- a/tensorflow/core/kernels/control_flow_ops.cc +++ b/tensorflow/core/kernels/control_flow_ops.cc @@ -113,9 +113,12 @@ REGISTER_GPU_HOST_REF_KERNEL(string); #undef REGISTER_GPU_HOST_REF_KERNEL #if TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("Switch").Device(DEVICE_SYCL).TypeConstraint<type>("T"), SwitchOp) +#define REGISTER_SYCL_KERNEL(type) \ + REGISTER_KERNEL_BUILDER(Name("Switch") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<type>("T") \ + .HostMemory("pred"), \ + SwitchOp) REGISTER_SYCL_KERNEL(bool); TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL); #undef REGISTER_SYCL_KERNEL @@ -219,9 +222,12 @@ REGISTER_GPU_REF_KERNEL(bool); #undef REGISTER_GPU_REF_KERNEL #if TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("Merge").Device(DEVICE_SYCL).TypeConstraint<type>("T"), MergeOp) +#define REGISTER_SYCL_KERNEL(type) \ + REGISTER_KERNEL_BUILDER(Name("Merge") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<type>("T") \ + .HostMemory("value_index"), \ + MergeOp) REGISTER_SYCL_KERNEL(bool); TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL); #undef REGISTER_SYCL_KERNEL @@ -418,8 +424,12 @@ REGISTER_GPU_HOST_KERNEL(string); #if TENSORFLOW_USE_SYCL #define REGISTER_SYCL_KERNEL(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("NextIteration").Device(DEVICE_SYCL).TypeConstraint<type>("T"), NextIterationOp) + REGISTER_KERNEL_BUILDER(Name("NextIteration") \ + .Device(DEVICE_SYCL) \ + .HostMemory("data") \ + .HostMemory("output") \ + .TypeConstraint<type>("T"), \ + NextIterationOp) REGISTER_SYCL_KERNEL(bool); TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL); #undef REGISTER_SYCL_KERNEL diff --git a/tensorflow/core/kernels/control_flow_ops_test.cc b/tensorflow/core/kernels/control_flow_ops_test.cc index 97f56c392a..301609e04d 100644 --- a/tensorflow/core/kernels/control_flow_ops_test.cc +++ b/tensorflow/core/kernels/control_flow_ops_test.cc @@ -85,13 +85,27 @@ class AbortOpTest : public OpsTestBase { protected: }; +#ifdef PLATFORM_WINDOWS +#define SIGABRT 3 + +class KilledBySignal { + public: + explicit KilledBySignal(int signum) : signum_(signum) {} + bool operator()(int exit_status) const { return exit_status == signum_; } + private: + const int signum_; +}; +#else +#define KilledBySignal ::testing::KilledBySignal +#endif + // Pass an error message to the op. TEST_F(AbortOpTest, pass_error_msg) { TF_ASSERT_OK(NodeDefBuilder("abort_op", "Abort") .Attr("error_msg", "abort_op_test") .Finalize(node_def())); TF_ASSERT_OK(InitOp()); - EXPECT_EXIT(RunOpKernel(), ::testing::KilledBySignal(SIGABRT), + EXPECT_EXIT(RunOpKernel(), KilledBySignal(SIGABRT), "Abort_op intentional failure; abort_op_test"); } @@ -99,7 +113,7 @@ TEST_F(AbortOpTest, pass_error_msg) { TEST_F(AbortOpTest, default_msg) { TF_ASSERT_OK(NodeDefBuilder("abort_op", "Abort").Finalize(node_def())); TF_ASSERT_OK(InitOp()); - EXPECT_EXIT(RunOpKernel(), ::testing::KilledBySignal(SIGABRT), + EXPECT_EXIT(RunOpKernel(), KilledBySignal(SIGABRT), "Abort_op intentional failure; "); } diff --git a/tensorflow/core/kernels/cwise_op_abs.cc b/tensorflow/core/kernels/cwise_op_abs.cc index fbc23b3b6f..8cf1eac41e 100644 --- a/tensorflow/core/kernels/cwise_op_abs.cc +++ b/tensorflow/core/kernels/cwise_op_abs.cc @@ -21,6 +21,18 @@ REGISTER5(UnaryOp, CPU, "Abs", functor::abs, float, Eigen::half, double, int32, #if !defined(IS_MOBILE_PLATFORM) REGISTER2(UnaryOp, CPU, "ComplexAbs", functor::abs, complex64, complex128); #endif + +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Abs") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("T"), \ + UnaryOp<SYCLDevice, functor::abs<TYPE>>); +REGISTER_SYCL_KERNEL(float); +#undef REGISTER_SYCL_KERNEL +#endif // TENSORFLOW_USE_SYCL + #if GOOGLE_CUDA REGISTER4(UnaryOp, GPU, "Abs", functor::abs, float, Eigen::half, double, int64); REGISTER2(UnaryOp, GPU, "ComplexAbs", functor::abs, complex64, complex128); diff --git a/tensorflow/core/kernels/cwise_op_acos.cc b/tensorflow/core/kernels/cwise_op_acos.cc index c44c8bc6f6..1d2d815027 100644 --- a/tensorflow/core/kernels/cwise_op_acos.cc +++ b/tensorflow/core/kernels/cwise_op_acos.cc @@ -17,6 +17,18 @@ limitations under the License. namespace tensorflow { REGISTER2(UnaryOp, CPU, "Acos", functor::acos, float, double); + +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Acos") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("T"), \ + UnaryOp<SYCLDevice, functor::acos<TYPE>>); +REGISTER_SYCL_KERNEL(float); +#undef REGISTER_SYCL_KERNEL +#endif // TENSORFLOW_USE_SYCL + #if GOOGLE_CUDA REGISTER2(UnaryOp, GPU, "Acos", functor::acos, float, double); #endif diff --git a/tensorflow/core/kernels/cwise_op_add_1.cc b/tensorflow/core/kernels/cwise_op_add_1.cc index 44c552d18e..a6bff78694 100644 --- a/tensorflow/core/kernels/cwise_op_add_1.cc +++ b/tensorflow/core/kernels/cwise_op_add_1.cc @@ -26,7 +26,7 @@ REGISTER5(BinaryOp, CPU, "Add", functor::add, float, Eigen::half, double, int32, .Device(DEVICE_SYCL) \ .TypeConstraint<TYPE>("T"), \ BinaryOp<SYCLDevice, functor::add<TYPE>>); -TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL); + REGISTER_SYCL_KERNEL(float); #undef REGISTER_SYCL_KERNEL #endif // TENSORFLOW_USE_SYCL diff --git a/tensorflow/core/kernels/cwise_op_asin.cc b/tensorflow/core/kernels/cwise_op_asin.cc index bba20aa6af..92a22e90c4 100644 --- a/tensorflow/core/kernels/cwise_op_asin.cc +++ b/tensorflow/core/kernels/cwise_op_asin.cc @@ -17,6 +17,18 @@ limitations under the License. namespace tensorflow { REGISTER2(UnaryOp, CPU, "Asin", functor::asin, float, double); + +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Asin") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("T"), \ + UnaryOp<SYCLDevice, functor::asin<TYPE>>); +REGISTER_SYCL_KERNEL(float); +#undef REGISTER_SYCL_KERNEL +#endif // TENSORFLOW_USE_SYCL + #if GOOGLE_CUDA REGISTER2(UnaryOp, GPU, "Asin", functor::asin, float, double); #endif diff --git a/tensorflow/core/kernels/cwise_op_atan.cc b/tensorflow/core/kernels/cwise_op_atan.cc index 055b8289d4..825e85283f 100644 --- a/tensorflow/core/kernels/cwise_op_atan.cc +++ b/tensorflow/core/kernels/cwise_op_atan.cc @@ -17,6 +17,18 @@ limitations under the License. namespace tensorflow { REGISTER2(UnaryOp, CPU, "Atan", functor::atan, float, double); + +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Atan") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("T"), \ + UnaryOp<SYCLDevice, functor::atan<TYPE>>); +REGISTER_SYCL_KERNEL(float); +#undef REGISTER_SYCL_KERNEL +#endif // TENSORFLOW_USE_SYCL + #if GOOGLE_CUDA REGISTER2(UnaryOp, GPU, "Atan", functor::atan, float, double); #endif diff --git a/tensorflow/core/kernels/cwise_op_ceil.cc b/tensorflow/core/kernels/cwise_op_ceil.cc index 08ac1b4194..c5a4aaf831 100644 --- a/tensorflow/core/kernels/cwise_op_ceil.cc +++ b/tensorflow/core/kernels/cwise_op_ceil.cc @@ -17,6 +17,18 @@ limitations under the License. namespace tensorflow { REGISTER3(UnaryOp, CPU, "Ceil", functor::ceil, float, Eigen::half, double); + +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Ceil") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("T"), \ + UnaryOp<SYCLDevice, functor::ceil<TYPE>>); +REGISTER_SYCL_KERNEL(float); +#undef REGISTER_SYCL_KERNEL +#endif // TENSORFLOW_USE_SYCL + #if GOOGLE_CUDA REGISTER3(UnaryOp, GPU, "Ceil", functor::ceil, float, Eigen::half, double); #endif diff --git a/tensorflow/core/kernels/cwise_op_cos.cc b/tensorflow/core/kernels/cwise_op_cos.cc index 2680143d65..a758da5842 100644 --- a/tensorflow/core/kernels/cwise_op_cos.cc +++ b/tensorflow/core/kernels/cwise_op_cos.cc @@ -18,6 +18,18 @@ limitations under the License. namespace tensorflow { REGISTER5(UnaryOp, CPU, "Cos", functor::cos, float, Eigen::half, double, complex64, complex128); + +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Cos") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("T"), \ + UnaryOp<SYCLDevice, functor::cos<TYPE>>); +REGISTER_SYCL_KERNEL(float); +#undef REGISTER_SYCL_KERNEL +#endif // TENSORFLOW_USE_SYCL + #if GOOGLE_CUDA REGISTER3(UnaryOp, GPU, "Cos", functor::cos, float, Eigen::half, double); #endif diff --git a/tensorflow/core/kernels/cwise_op_div.cc b/tensorflow/core/kernels/cwise_op_div.cc index c2b05a69b2..ef8c477e48 100644 --- a/tensorflow/core/kernels/cwise_op_div.cc +++ b/tensorflow/core/kernels/cwise_op_div.cc @@ -30,6 +30,11 @@ REGISTER5(BinaryOp, CPU, "RealDiv", functor::div, float, Eigen::half, double, Name("Div") \ .Device(DEVICE_SYCL) \ .TypeConstraint<TYPE>("T"), \ + BinaryOp<SYCLDevice, functor::div<TYPE>>); \ + REGISTER_KERNEL_BUILDER( \ + Name("RealDiv") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("T"), \ BinaryOp<SYCLDevice, functor::div<TYPE>>); REGISTER_SYCL_KERNEL(float) #undef REGISTER_SYCL_KERNEL diff --git a/tensorflow/core/kernels/cwise_op_exp.cc b/tensorflow/core/kernels/cwise_op_exp.cc index 7ec3526282..0ee47f7dee 100644 --- a/tensorflow/core/kernels/cwise_op_exp.cc +++ b/tensorflow/core/kernels/cwise_op_exp.cc @@ -18,6 +18,18 @@ limitations under the License. namespace tensorflow { REGISTER5(UnaryOp, CPU, "Exp", functor::exp, float, Eigen::half, double, complex64, complex128); + +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Exp") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("T"), \ + UnaryOp<SYCLDevice, functor::exp<TYPE>>); +REGISTER_SYCL_KERNEL(float); +#undef REGISTER_SYCL_KERNEL +#endif // TENSORFLOW_USE_SYCL + #if GOOGLE_CUDA REGISTER3(UnaryOp, GPU, "Exp", functor::exp, float, Eigen::half, double); #endif diff --git a/tensorflow/core/kernels/cwise_op_floor.cc b/tensorflow/core/kernels/cwise_op_floor.cc index 732087d4cb..129d754b82 100644 --- a/tensorflow/core/kernels/cwise_op_floor.cc +++ b/tensorflow/core/kernels/cwise_op_floor.cc @@ -17,6 +17,18 @@ limitations under the License. namespace tensorflow { REGISTER3(UnaryOp, CPU, "Floor", functor::floor, float, Eigen::half, double); + +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Floor") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("T"), \ + UnaryOp<SYCLDevice, functor::floor<TYPE>>); +REGISTER_SYCL_KERNEL(float); +#undef REGISTER_SYCL_KERNEL +#endif // TENSORFLOW_USE_SYCL + #if GOOGLE_CUDA REGISTER3(UnaryOp, GPU, "Floor", functor::floor, float, Eigen::half, double); #endif diff --git a/tensorflow/core/kernels/cwise_op_floor_div.cc b/tensorflow/core/kernels/cwise_op_floor_div.cc index a5767476c3..69dbb70b83 100644 --- a/tensorflow/core/kernels/cwise_op_floor_div.cc +++ b/tensorflow/core/kernels/cwise_op_floor_div.cc @@ -18,6 +18,9 @@ limitations under the License. namespace tensorflow { REGISTER5(BinaryOp, CPU, "FloorDiv", functor::safe_floor_div, uint8, uint16, int16, int32, int64); +REGISTER3(BinaryOp, CPU, "FloorDiv", functor::floor_div_real, float, + Eigen::half, double); + #if TENSORFLOW_USE_SYCL #define REGISTER_SYCL_KERNEL(TYPE) \ REGISTER_KERNEL_BUILDER( \ @@ -25,11 +28,10 @@ REGISTER5(BinaryOp, CPU, "FloorDiv", functor::safe_floor_div, uint8, uint16, .Device(DEVICE_SYCL) \ .TypeConstraint<TYPE>("T"), \ BinaryOp<SYCLDevice, functor::floor_div<TYPE>>); -TF_CALL_INTEGRAL_TYPES(REGISTER_SYCL_KERNEL); +REGISTER_SYCL_KERNEL(float) #undef REGISTER_SYCL_KERNEL #endif // TENSORFLOW_USE_SYCL -REGISTER3(BinaryOp, CPU, "FloorDiv", functor::floor_div_real, float, - Eigen::half, double); + #if GOOGLE_CUDA REGISTER4(BinaryOp, GPU, "FloorDiv", functor::floor_div, uint8, uint16, int16, int64); diff --git a/tensorflow/core/kernels/cwise_op_isfinite.cc b/tensorflow/core/kernels/cwise_op_isfinite.cc index e38b271318..59976141c7 100644 --- a/tensorflow/core/kernels/cwise_op_isfinite.cc +++ b/tensorflow/core/kernels/cwise_op_isfinite.cc @@ -18,6 +18,7 @@ limitations under the License. namespace tensorflow { REGISTER3(UnaryOp, CPU, "IsFinite", functor::isfinite, float, Eigen::half, double); + #if TENSORFLOW_USE_SYCL #define REGISTER_SYCL_KERNEL(TYPE) \ REGISTER_KERNEL_BUILDER( \ @@ -25,9 +26,10 @@ REGISTER3(UnaryOp, CPU, "IsFinite", functor::isfinite, float, Eigen::half, .Device(DEVICE_SYCL) \ .TypeConstraint<TYPE>("T"), \ UnaryOp<SYCLDevice, functor::isfinite<TYPE>>); -TF_CALL_REAL_NUMBER_TYPES(REGISTER_SYCL_KERNEL); +REGISTER_SYCL_KERNEL(float); #undef REGISTER_SYCL_KERNEL #endif // TENSORFLOW_USE_SYCL + #if GOOGLE_CUDA REGISTER3(UnaryOp, GPU, "IsFinite", functor::isfinite, float, Eigen::half, double); diff --git a/tensorflow/core/kernels/cwise_op_isinf.cc b/tensorflow/core/kernels/cwise_op_isinf.cc index bf056dbe0e..675cb95b95 100644 --- a/tensorflow/core/kernels/cwise_op_isinf.cc +++ b/tensorflow/core/kernels/cwise_op_isinf.cc @@ -17,6 +17,7 @@ limitations under the License. namespace tensorflow { REGISTER3(UnaryOp, CPU, "IsInf", functor::isinf, float, Eigen::half, double); + #if TENSORFLOW_USE_SYCL #define REGISTER_SYCL_KERNEL(TYPE) \ REGISTER_KERNEL_BUILDER( \ @@ -24,9 +25,10 @@ REGISTER3(UnaryOp, CPU, "IsInf", functor::isinf, float, Eigen::half, double); .Device(DEVICE_SYCL) \ .TypeConstraint<TYPE>("T"), \ UnaryOp<SYCLDevice, functor::isinf<TYPE>>); -TF_CALL_REAL_NUMBER_TYPES(REGISTER_SYCL_KERNEL); +REGISTER_SYCL_KERNEL(float); #undef REGISTER_SYCL_KERNEL #endif // TENSORFLOW_USE_SYCL + #if GOOGLE_CUDA REGISTER3(UnaryOp, GPU, "IsInf", functor::isinf, float, Eigen::half, double); #endif diff --git a/tensorflow/core/kernels/cwise_op_isnan.cc b/tensorflow/core/kernels/cwise_op_isnan.cc index d2bac23882..c394087ed8 100644 --- a/tensorflow/core/kernels/cwise_op_isnan.cc +++ b/tensorflow/core/kernels/cwise_op_isnan.cc @@ -17,6 +17,7 @@ limitations under the License. namespace tensorflow { REGISTER3(UnaryOp, CPU, "IsNan", functor::isnan, float, Eigen::half, double); + #if TENSORFLOW_USE_SYCL #define REGISTER_SYCL_KERNEL(TYPE) \ REGISTER_KERNEL_BUILDER( \ @@ -24,9 +25,10 @@ REGISTER3(UnaryOp, CPU, "IsNan", functor::isnan, float, Eigen::half, double); .Device(DEVICE_SYCL) \ .TypeConstraint<TYPE>("T"), \ UnaryOp<SYCLDevice, functor::isnan<TYPE>>); -TF_CALL_REAL_NUMBER_TYPES(REGISTER_SYCL_KERNEL); +REGISTER_SYCL_KERNEL(float); #undef REGISTER_SYCL_KERNEL #endif // TENSORFLOW_USE_SYCL + #if GOOGLE_CUDA REGISTER3(UnaryOp, GPU, "IsNan", functor::isnan, float, Eigen::half, double); #endif diff --git a/tensorflow/core/kernels/cwise_op_log.cc b/tensorflow/core/kernels/cwise_op_log.cc index be184f03de..71c4588b3d 100644 --- a/tensorflow/core/kernels/cwise_op_log.cc +++ b/tensorflow/core/kernels/cwise_op_log.cc @@ -18,6 +18,18 @@ limitations under the License. namespace tensorflow { REGISTER5(UnaryOp, CPU, "Log", functor::log, float, Eigen::half, double, complex64, complex128); + +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Log") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("T"), \ + UnaryOp<SYCLDevice, functor::log<TYPE>>); +REGISTER_SYCL_KERNEL(float); +#undef REGISTER_SYCL_KERNEL +#endif // TENSORFLOW_USE_SYCL + #if GOOGLE_CUDA REGISTER3(UnaryOp, GPU, "Log", functor::log, float, Eigen::half, double); #endif diff --git a/tensorflow/core/kernels/cwise_op_log1p.cc b/tensorflow/core/kernels/cwise_op_log1p.cc index 91a14989e6..03ea3a0a89 100644 --- a/tensorflow/core/kernels/cwise_op_log1p.cc +++ b/tensorflow/core/kernels/cwise_op_log1p.cc @@ -18,6 +18,18 @@ limitations under the License. namespace tensorflow { REGISTER5(UnaryOp, CPU, "Log1p", functor::log1p, float, Eigen::half, double, complex64, complex128); + +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Log1p") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("T"), \ + UnaryOp<SYCLDevice, functor::log1p<TYPE>>); +REGISTER_SYCL_KERNEL(float); +#undef REGISTER_SYCL_KERNEL +#endif // TENSORFLOW_USE_SYCL + #if GOOGLE_CUDA REGISTER3(UnaryOp, GPU, "Log1p", functor::log1p, float, Eigen::half, double); #endif diff --git a/tensorflow/core/kernels/cwise_op_neg.cc b/tensorflow/core/kernels/cwise_op_neg.cc index 67b088e110..4221fc0710 100644 --- a/tensorflow/core/kernels/cwise_op_neg.cc +++ b/tensorflow/core/kernels/cwise_op_neg.cc @@ -18,6 +18,18 @@ limitations under the License. namespace tensorflow { REGISTER7(UnaryOp, CPU, "Neg", functor::neg, float, Eigen::half, double, int32, complex64, int64, complex128); + +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Neg") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("T"), \ + UnaryOp<SYCLDevice, functor::neg<TYPE>>); +REGISTER_SYCL_KERNEL(float); +#undef REGISTER_SYCL_KERNEL +#endif // TENSORFLOW_USE_SYCL + #if GOOGLE_CUDA REGISTER4(UnaryOp, GPU, "Neg", functor::neg, float, Eigen::half, double, int64); diff --git a/tensorflow/core/kernels/cwise_op_pow.cc b/tensorflow/core/kernels/cwise_op_pow.cc index dd28b36519..8eeba6ab14 100644 --- a/tensorflow/core/kernels/cwise_op_pow.cc +++ b/tensorflow/core/kernels/cwise_op_pow.cc @@ -18,6 +18,18 @@ limitations under the License. namespace tensorflow { REGISTER7(BinaryOp, CPU, "Pow", functor::pow, float, Eigen::half, double, int32, int64, complex64, complex128); + +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Pow") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("T"), \ + BinaryOp<SYCLDevice, functor::pow<TYPE>>); +REGISTER_SYCL_KERNEL(float); +#undef REGISTER_SYCL_KERNEL +#endif // TENSORFLOW_USE_SYCL + #if GOOGLE_CUDA REGISTER4(BinaryOp, GPU, "Pow", functor::pow, float, Eigen::half, double, int64); diff --git a/tensorflow/core/kernels/cwise_op_rsqrt.cc b/tensorflow/core/kernels/cwise_op_rsqrt.cc index 3207166e94..7dc96d47a6 100644 --- a/tensorflow/core/kernels/cwise_op_rsqrt.cc +++ b/tensorflow/core/kernels/cwise_op_rsqrt.cc @@ -18,6 +18,18 @@ limitations under the License. namespace tensorflow { REGISTER5(UnaryOp, CPU, "Rsqrt", functor::rsqrt, float, Eigen::half, double, complex64, complex128); + +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Rsqrt") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("T"), \ + UnaryOp<SYCLDevice, functor::rsqrt<TYPE>>); +REGISTER_SYCL_KERNEL(float); +#undef REGISTER_SYCL_KERNEL +#endif // TENSORFLOW_USE_SYCL + #if GOOGLE_CUDA REGISTER3(UnaryOp, GPU, "Rsqrt", functor::rsqrt, float, Eigen::half, double); #endif diff --git a/tensorflow/core/kernels/cwise_op_sin.cc b/tensorflow/core/kernels/cwise_op_sin.cc index 1e3880beb1..8d0c0959f7 100644 --- a/tensorflow/core/kernels/cwise_op_sin.cc +++ b/tensorflow/core/kernels/cwise_op_sin.cc @@ -18,6 +18,18 @@ limitations under the License. namespace tensorflow { REGISTER5(UnaryOp, CPU, "Sin", functor::sin, float, Eigen::half, double, complex64, complex128); + +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Sin") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("T"), \ + UnaryOp<SYCLDevice, functor::sin<TYPE>>); +REGISTER_SYCL_KERNEL(float); +#undef REGISTER_SYCL_KERNEL +#endif // TENSORFLOW_USE_SYC + #if GOOGLE_CUDA REGISTER3(UnaryOp, GPU, "Sin", functor::sin, float, Eigen::half, double); #endif diff --git a/tensorflow/core/kernels/cwise_op_sqrt.cc b/tensorflow/core/kernels/cwise_op_sqrt.cc index aecffda4ba..710001517b 100644 --- a/tensorflow/core/kernels/cwise_op_sqrt.cc +++ b/tensorflow/core/kernels/cwise_op_sqrt.cc @@ -18,6 +18,18 @@ limitations under the License. namespace tensorflow { REGISTER5(UnaryOp, CPU, "Sqrt", functor::sqrt, float, Eigen::half, double, complex64, complex128); + +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Sqrt") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("T"), \ + UnaryOp<SYCLDevice, functor::sqrt<TYPE>>); +REGISTER_SYCL_KERNEL(float); +#undef REGISTER_SYCL_KERNEL +#endif // TENSORFLOW_USE_SYC + #if GOOGLE_CUDA REGISTER3(UnaryOp, GPU, "Sqrt", functor::sqrt, float, Eigen::half, double); #endif diff --git a/tensorflow/core/kernels/cwise_op_square.cc b/tensorflow/core/kernels/cwise_op_square.cc index 0ce4473d83..f867f127a7 100644 --- a/tensorflow/core/kernels/cwise_op_square.cc +++ b/tensorflow/core/kernels/cwise_op_square.cc @@ -18,6 +18,18 @@ limitations under the License. namespace tensorflow { REGISTER7(UnaryOp, CPU, "Square", functor::square, float, Eigen::half, double, int32, int64, complex64, complex128); + +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Square") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("T"), \ + UnaryOp<SYCLDevice, functor::square<TYPE>>); +REGISTER_SYCL_KERNEL(float); +#undef REGISTER_SYCL_KERNEL +#endif // TENSORFLOW_USE_SYC + #if GOOGLE_CUDA REGISTER4(UnaryOp, GPU, "Square", functor::square, float, Eigen::half, double, int64); diff --git a/tensorflow/core/kernels/cwise_op_sub.cc b/tensorflow/core/kernels/cwise_op_sub.cc index ed78ba37a8..e1326dbed1 100644 --- a/tensorflow/core/kernels/cwise_op_sub.cc +++ b/tensorflow/core/kernels/cwise_op_sub.cc @@ -31,7 +31,7 @@ REGISTER(BinaryOp, CPU, "Sub", functor::sub, int32); .Device(DEVICE_SYCL) \ .TypeConstraint<TYPE>("T"), \ BinaryOp<SYCLDevice, functor::sub<TYPE>>); -TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL); + REGISTER_SYCL_KERNEL(float); #undef REGISTER_SYCL_KERNEL #endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA diff --git a/tensorflow/core/kernels/cwise_op_tan.cc b/tensorflow/core/kernels/cwise_op_tan.cc index fca1addfa4..ac49cad88f 100644 --- a/tensorflow/core/kernels/cwise_op_tan.cc +++ b/tensorflow/core/kernels/cwise_op_tan.cc @@ -17,6 +17,18 @@ limitations under the License. namespace tensorflow { REGISTER2(UnaryOp, CPU, "Tan", functor::tan, float, double); + +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Tan") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("T"), \ + UnaryOp<SYCLDevice, functor::tan<TYPE>>); +REGISTER_SYCL_KERNEL(float); +#undef REGISTER_SYCL_KERNEL +#endif // TENSORFLOW_USE_SYC + #if GOOGLE_CUDA REGISTER2(UnaryOp, GPU, "Tan", functor::tan, float, double); #endif diff --git a/tensorflow/core/kernels/cwise_op_tanh.cc b/tensorflow/core/kernels/cwise_op_tanh.cc index a4c4aad053..ae2c473e20 100644 --- a/tensorflow/core/kernels/cwise_op_tanh.cc +++ b/tensorflow/core/kernels/cwise_op_tanh.cc @@ -19,6 +19,18 @@ limitations under the License. namespace tensorflow { REGISTER5(UnaryOp, CPU, "Tanh", functor::tanh, float, Eigen::half, double, complex64, complex128); + +#if TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Tanh") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("T"), \ + UnaryOp<SYCLDevice, functor::tanh<TYPE>>); +REGISTER_SYCL_KERNEL(float); +#undef REGISTER_SYCL_KERNEL +#endif // TENSORFLOW_USE_SYC + #if GOOGLE_CUDA REGISTER3(UnaryOp, GPU, "Tanh", functor::tanh, float, Eigen::half, double); #endif diff --git a/tensorflow/core/kernels/cwise_ops_sycl_common.h b/tensorflow/core/kernels/cwise_ops_sycl_common.h index 4c22cc4855..3fcf0759d4 100644 --- a/tensorflow/core/kernels/cwise_ops_sycl_common.h +++ b/tensorflow/core/kernels/cwise_ops_sycl_common.h @@ -21,12 +21,10 @@ limitations under the License. #define TENSORFLOW_CORE_KERNELS_CWISE_OPS_SYCL_COMMON_H_ #define EIGEN_USE_SYCL +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/register_types.h" - -#include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/kernels/cwise_ops.h" -#include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" namespace tensorflow { @@ -62,14 +60,14 @@ struct BinaryFunctor<SYCLDevice, Functor, NDIMS, has_errors> { void operator()(const SYCLDevice& d, typename Functor::tout_type out, typename Functor::tin_type in0, typename Functor::tin_type in1, bool* error) { - To32Bit(out).device(d) = To32Bit(in0).binaryExpr(in1, typename Functor::func()); + To32Bit(out).device(d) = To32Bit(in0).binaryExpr(To32Bit(in1), typename Functor::func()); } void Left(const SYCLDevice& d, typename Functor::tout_type out, typename Functor::tscalar_type scalar, typename Functor::tin_type in, bool* error) { typedef typename Functor::func Binary; - constexpr int NumDims = Functor::tin_type::NumDimensions; + constexpr int NumDims = Functor::tin_type::NumDimensions; typedef typename Functor::tin_type::Scalar T; typedef typename Functor::tin_type::Index Index; Eigen::array<Index, NumDims> scalar_dim = GenerateArrayOfOnes<Index, NumDims>(); diff --git a/tensorflow/core/kernels/debug_ops.cc b/tensorflow/core/kernels/debug_ops.cc index 1a4d70c36b..78d386a5af 100644 --- a/tensorflow/core/kernels/debug_ops.cc +++ b/tensorflow/core/kernels/debug_ops.cc @@ -28,6 +28,16 @@ REGISTER_KERNEL_BUILDER(Name("Copy").Device(DEVICE_CPU), CopyOp); REGISTER_KERNEL_BUILDER(Name("CopyHost").Device(DEVICE_CPU), CopyOp); +#ifdef TENSORFLOW_USE_SYCL +REGISTER_KERNEL_BUILDER(Name("Copy").Device(DEVICE_SYCL), CopyOp); + +REGISTER_KERNEL_BUILDER(Name("CopyHost") + .Device(DEVICE_SYCL) + .HostMemory("input") + .HostMemory("output"), + CopyOp); +#endif // TENSORFLOW_USE_SYCL + #if GOOGLE_CUDA REGISTER_KERNEL_BUILDER(Name("Copy").Device(DEVICE_GPU), CopyOp); @@ -50,6 +60,14 @@ REGISTER_KERNEL_BUILDER(Name("DebugIdentity") DebugIdentityOp); #endif +#ifdef TENSORFLOW_USE_SYCL +REGISTER_KERNEL_BUILDER(Name("DebugIdentity") + .Device(DEVICE_SYCL) + .HostMemory("input") + .HostMemory("output"), + DebugIdentityOp); +#endif // TENSORFLOW_USE_SYCL + // Register debug NaN-counter (non-ref and ref) ops. #define REGISTER_DEBUG_NAN_COUNT(type) \ REGISTER_KERNEL_BUILDER( \ @@ -70,4 +88,15 @@ REGISTER_GPU_DEBUG_NAN_COUNT(float); REGISTER_GPU_DEBUG_NAN_COUNT(double); #endif +#ifdef TENSORFLOW_USE_SYCL +#define REGISTER_GPU_DEBUG_NAN_COUNT(type) \ + REGISTER_KERNEL_BUILDER(Name("DebugNanCount") \ + .Device(DEVICE_SYCL) \ + .HostMemory("input") \ + .HostMemory("output") \ + .TypeConstraint<type>("T"), \ + DebugNanCountOp<type>); +REGISTER_GPU_DEBUG_NAN_COUNT(float); +#endif // TENSORFLOW_USE_SYCL + } // namespace tensorflow diff --git a/tensorflow/core/kernels/dense_update_ops.cc b/tensorflow/core/kernels/dense_update_ops.cc index baa8f83091..5216a4b5d0 100644 --- a/tensorflow/core/kernels/dense_update_ops.cc +++ b/tensorflow/core/kernels/dense_update_ops.cc @@ -97,13 +97,20 @@ TF_CALL_QUANTIZED_TYPES(REGISTER_KERNELS); #if TENSORFLOW_USE_SYCL typedef Eigen::SyclDevice SYCLDevice; -#define REGISTER_SYCL_KERNEL(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("Assign") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint<type>("T"), \ - AssignOpT<SYCLDevice, type>); -TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL); +#define REGISTER_SYCL_KERNEL(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("Assign") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<type>("T"), \ + AssignOpT<SYCLDevice, type>); \ + REGISTER_KERNEL_BUILDER( \ + Name("AssignAdd").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \ + DenseUpdateOp<SYCLDevice, type, DenseUpdateType::ADD>); \ + REGISTER_KERNEL_BUILDER( \ + Name("AssignSub").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \ + DenseUpdateOp<SYCLDevice, type, DenseUpdateType::SUB>); + +REGISTER_SYCL_KERNEL(float); #undef REGISTER_SYCL_KERNEL #endif diff --git a/tensorflow/core/kernels/fact_op.cc b/tensorflow/core/kernels/fact_op.cc index 52ad2d0c1f..f1ab4c4a4d 100644 --- a/tensorflow/core/kernels/fact_op.cc +++ b/tensorflow/core/kernels/fact_op.cc @@ -73,25 +73,46 @@ static void E(string* s) { } } -template <const char* const FACTS[], uint64 N> class FactOpKernel : public OpKernel { public: explicit FactOpKernel(OpKernelConstruction* context) : OpKernel(context) {} - void Compute(OpKernelContext* context) override { + void Compute(OpKernelContext* context) override = 0; + + protected: + void Compute(OpKernelContext* context, const char* const facts[], + uint64 count) { Tensor* output_tensor = NULL; OP_REQUIRES_OK( context, context->allocate_output(0, TensorShape({}), &output_tensor)); auto output = output_tensor->template scalar<string>(); - string coded = FACTS[context->env()->NowMicros() % N]; + string coded = facts[context->env()->NowMicros() % count]; E(&coded); output() = coded; } }; +class FactOpKernel1 : public FactOpKernel { + public: + FactOpKernel1(OpKernelConstruction* context) : FactOpKernel(context) {} + + void Compute(OpKernelContext* context) override { + FactOpKernel::Compute(context, kFacts1, kNum1); + } +}; + +class FactOpKernel2 : public FactOpKernel { + public: + FactOpKernel2(OpKernelConstruction* context) : FactOpKernel(context) {} + + void Compute(OpKernelContext* context) override { + FactOpKernel::Compute(context, kFacts2, kNum2); + } +}; + REGISTER_KERNEL_BUILDER(Name("Fact").Device(DEVICE_GPU).HostMemory("fact"), - FactOpKernel<kFacts1, kNum1>); + FactOpKernel1); static string D(const char* s) { string ret(s); @@ -102,10 +123,10 @@ static string D(const char* s) { REGISTER_KERNEL_BUILDER(Name("Fact") .Device(DEVICE_CPU) .Label(D("Yoxmos").c_str()), - FactOpKernel<kFacts2, kNum2>); + FactOpKernel2); REGISTER_KERNEL_BUILDER(Name("Fact") .Device(DEVICE_CPU) .Label(D("yoxmos").c_str()), - FactOpKernel<kFacts2, kNum2>); + FactOpKernel2); } // namespace tensorflow diff --git a/tensorflow/core/kernels/fused_batch_norm_op_test.cc b/tensorflow/core/kernels/fused_batch_norm_op_test.cc index c4b942c56f..a3f760b746 100644 --- a/tensorflow/core/kernels/fused_batch_norm_op_test.cc +++ b/tensorflow/core/kernels/fused_batch_norm_op_test.cc @@ -79,7 +79,7 @@ TEST_F(FusedBatchNormOpTest, Inference) { AddInputFromArray<float>(TensorShape({2}), {4.0, 4.0}); AddInputFromArray<float>(TensorShape({2}), {2.0, 2.0}); AddInputFromArray<float>(TensorShape({2}), {10, 10}); - AddInputFromArray<float>(TensorShape({2}), {11.67, 11.67}); + AddInputFromArray<float>(TensorShape({2}), {11.67f, 11.67f}); TF_ASSERT_OK(RunOpKernel()); @@ -106,8 +106,8 @@ TEST_F(FusedBatchNormGradOpTest, Simple) { AddInputFromArray<float>(TensorShape({1, 1, 6, 2}), {1, 1, 7, 7, 4, 4, -3, -3, -11, -11, 13, 13}); AddInputFromArray<float>(TensorShape({2}), {4, 4}); - AddInputFromArray<float>(TensorShape({2}), {1.833, 1.833}); - AddInputFromArray<float>(TensorShape({2}), {57.472, 57.472}); + AddInputFromArray<float>(TensorShape({2}), {1.833f, 1.833f}); + AddInputFromArray<float>(TensorShape({2}), {57.472f, 57.472f}); TF_ASSERT_OK(RunOpKernel()); diff --git a/tensorflow/core/kernels/non_max_suppression_op_test.cc b/tensorflow/core/kernels/non_max_suppression_op_test.cc index 070dd49aef..72e368db77 100644 --- a/tensorflow/core/kernels/non_max_suppression_op_test.cc +++ b/tensorflow/core/kernels/non_max_suppression_op_test.cc @@ -45,9 +45,9 @@ class NonMaxSuppressionOpTest : public OpsTestBase { TEST_F(NonMaxSuppressionOpTest, TestSelectFromThreeClusters) { MakeOp(.5); AddInputFromArray<float>(TensorShape({6, 4}), - {0, 0, 1, 1, 0, 0.1, 1, 1.1, 0, -0.1, 1, 0.9, - 0, 10, 1, 11, 0, 10.1, 1, 11.1, 0, 100, 1, 101}); - AddInputFromArray<float>(TensorShape({6}), {.9, .75, .6, .95, .5, .3}); + {0, 0, 1, 1, 0, 0.1f, 1, 1.1f, 0, -0.1f, 1, 0.9f, + 0, 10, 1, 11, 0, 10.1f, 1, 11.1f, 0, 100, 1, 101}); + AddInputFromArray<float>(TensorShape({6}), {.9f, .75f, .6f, .95f, .5f, .3f}); AddInputFromArray<int>(TensorShape({}), {3}); TF_ASSERT_OK(RunOpKernel()); @@ -59,9 +59,9 @@ TEST_F(NonMaxSuppressionOpTest, TestSelectFromThreeClusters) { TEST_F(NonMaxSuppressionOpTest, TestSelectFromThreeClustersFlippedCoordinates) { MakeOp(.5); AddInputFromArray<float>(TensorShape({6, 4}), - {1, 1, 0, 0, 0, 0.1, 1, 1.1, 0, .9, 1, -0.1, - 0, 10, 1, 11, 1, 10.1, 0, 11.1, 1, 101, 0, 100}); - AddInputFromArray<float>(TensorShape({6}), {.9, .75, .6, .95, .5, .3}); + {1, 1, 0, 0, 0, 0.1f, 1, 1.1f, 0, .9f, 1, -0.1f, + 0, 10, 1, 11, 1, 10.1f, 0, 11.1f, 1, 101, 0, 100}); + AddInputFromArray<float>(TensorShape({6}), {.9f, .75f, .6f, .95f, .5f, .3f}); AddInputFromArray<int>(TensorShape({}), {3}); TF_ASSERT_OK(RunOpKernel()); @@ -73,9 +73,9 @@ TEST_F(NonMaxSuppressionOpTest, TestSelectFromThreeClustersFlippedCoordinates) { TEST_F(NonMaxSuppressionOpTest, TestSelectAtMostTwoBoxesFromThreeClusters) { MakeOp(.5); AddInputFromArray<float>(TensorShape({6, 4}), - {0, 0, 1, 1, 0, 0.1, 1, 1.1, 0, -0.1, 1, 0.9, - 0, 10, 1, 11, 0, 10.1, 1, 11.1, 0, 100, 1, 101}); - AddInputFromArray<float>(TensorShape({6}), {.9, .75, .6, .95, .5, .3}); + {0, 0, 1, 1, 0, 0.1f, 1, 1.1f, 0, -0.1f, 1, 0.9f, + 0, 10, 1, 11, 0, 10.1f, 1, 11.1f, 0, 100, 1, 101}); + AddInputFromArray<float>(TensorShape({6}), {.9f, .75f, .6f, .95f, .5f, .3f}); AddInputFromArray<int>(TensorShape({}), {2}); TF_ASSERT_OK(RunOpKernel()); @@ -87,9 +87,9 @@ TEST_F(NonMaxSuppressionOpTest, TestSelectAtMostTwoBoxesFromThreeClusters) { TEST_F(NonMaxSuppressionOpTest, TestSelectAtMostThirtyBoxesFromThreeClusters) { MakeOp(.5); AddInputFromArray<float>(TensorShape({6, 4}), - {0, 0, 1, 1, 0, 0.1, 1, 1.1, 0, -0.1, 1, 0.9, - 0, 10, 1, 11, 0, 10.1, 1, 11.1, 0, 100, 1, 101}); - AddInputFromArray<float>(TensorShape({6}), {.9, .75, .6, .95, .5, .3}); + {0, 0, 1, 1, 0, 0.1f, 1, 1.1f, 0, -0.1f, 1, 0.9f, + 0, 10, 1, 11, 0, 10.1f, 1, 11.1f, 0, 100, 1, 101}); + AddInputFromArray<float>(TensorShape({6}), {.9f, .75f, .6f, .95f, .5f, .3f}); AddInputFromArray<int>(TensorShape({}), {30}); TF_ASSERT_OK(RunOpKernel()); @@ -101,7 +101,7 @@ TEST_F(NonMaxSuppressionOpTest, TestSelectAtMostThirtyBoxesFromThreeClusters) { TEST_F(NonMaxSuppressionOpTest, TestSelectSingleBox) { MakeOp(.5); AddInputFromArray<float>(TensorShape({1, 4}), {0, 0, 1, 1}); - AddInputFromArray<float>(TensorShape({1}), {.9}); + AddInputFromArray<float>(TensorShape({1}), {.9f}); AddInputFromArray<int>(TensorShape({}), {3}); TF_ASSERT_OK(RunOpKernel()); @@ -136,9 +136,9 @@ TEST_F(NonMaxSuppressionOpTest, TestSelectFromTenIdenticalBoxes) { TEST_F(NonMaxSuppressionOpTest, TestInconsistentBoxAndScoreShapes) { MakeOp(.5); AddInputFromArray<float>(TensorShape({6, 4}), - {0, 0, 1, 1, 0, 0.1, 1, 1.1, 0, -0.1, 1, 0.9, - 0, 10, 1, 11, 0, 10.1, 1, 11.1, 0, 100, 1, 101}); - AddInputFromArray<float>(TensorShape({5}), {.9, .75, .6, .95, .5}); + {0, 0, 1, 1, 0, 0.1f, 1, 1.1f, 0, -0.1f, 1, 0.9f, + 0, 10, 1, 11, 0, 10.1f, 1, 11.1f, 0, 100, 1, 101}); + AddInputFromArray<float>(TensorShape({5}), {.9f, .75f, .6f, .95f, .5f}); AddInputFromArray<int>(TensorShape({}), {30}); Status s = RunOpKernel(); @@ -151,7 +151,7 @@ TEST_F(NonMaxSuppressionOpTest, TestInconsistentBoxAndScoreShapes) { TEST_F(NonMaxSuppressionOpTest, TestInvalidIOUThreshold) { MakeOp(1.2); AddInputFromArray<float>(TensorShape({1, 4}), {0, 0, 1, 1}); - AddInputFromArray<float>(TensorShape({1}), {.9}); + AddInputFromArray<float>(TensorShape({1}), {.9f}); AddInputFromArray<int>(TensorShape({}), {3}); Status s = RunOpKernel(); diff --git a/tensorflow/core/kernels/resize_bilinear_op_test.cc b/tensorflow/core/kernels/resize_bilinear_op_test.cc index deb36849e7..66836ff788 100644 --- a/tensorflow/core/kernels/resize_bilinear_op_test.cc +++ b/tensorflow/core/kernels/resize_bilinear_op_test.cc @@ -95,9 +95,10 @@ TEST_F(ResizeBilinearOpTest, TestBilinear2x2To3x3) { // clang-format off test::FillValues<float>(&expected, - {1, 5.0/3, 2, - 7.0/3, 3, 10.0/3, - 3, 11.0/3, 4}); + {1, 5.0f / 3, 2, + 7.0f / 3, 3, 10.0f / 3, + 3, 11.0f / 3, 4}); + // clang-format on test::ExpectTensorEqual<float>(expected, *GetOutput(0)); @@ -206,9 +207,9 @@ TEST_F(ResizeBilinearOpTest, TestBilinear4x4To3x3) { // clang-format off test::FillValues<float>(&expected, - {1, 7.0/3, 11.0/3, - 19.0/3, 23.0/3, 27.0/3, - 35.0/3, 39.0/3, 43.0/3}); + {1, 7.0f/3, 11.0f/3, + 19.0f/3, 23.0f/3, 27.0f/3, + 35.0f/3, 39.0f/3, 43.0f/3}); // clang-format on test::ExpectTensorEqual<float>(expected, *GetOutput(0)); @@ -251,8 +252,8 @@ TEST_F(ResizeBilinearOpTest, TestBilinear2x2To3x3Batch2) { Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3, 3, 1})); // clang-format off test::FillValues<float>(&expected, - {1, 5.0/3, 2, 7.0/3, 3, 10.0/3, 3, 11.0/3, 4, - 1, 5.0/3, 2, 7.0/3, 3, 10.0/3, 3, 11.0/3, 4 + {1, 5.0f/3, 2, 7.0f/3, 3, 10.0f/3, 3, 11.0f/3, 4, + 1, 5.0f/3, 2, 7.0f/3, 3, 10.0f/3, 3, 11.0f/3, 4 }); // clang-format on test::ExpectTensorEqual<float>(expected, *GetOutput(0)); @@ -268,15 +269,15 @@ TEST_F(ResizeBilinearOpTest, TestBilinear2x2x2To3x3x2) { // clang-format off test::FillValues<float>(&expected, { - 1, -1, - 5.0/3, -5.0/3, - 2, -2, - 7.0/3, -7.0/3, - 3, -3, - 10.0/3, -10.0/3, - 3, -3, - 11.0/3, -11.0/3, - 4, -4 + 1, -1, + 5.0f/3, -5.0f/3, + 2, -2, + 7.0f/3, -7.0f/3, + 3, -3, + 10.0f/3, -10.0f/3, + 3, -3, + 11.0f/3, -11.0f/3, + 4, -4 }); // clang-format on test::ExpectTensorEqual<float>(expected, *GetOutput(0)); diff --git a/tensorflow/core/kernels/spacetobatch_benchmark_test.cc b/tensorflow/core/kernels/spacetobatch_benchmark_test.cc index a9a9bd46b7..a8c4b3746a 100644 --- a/tensorflow/core/kernels/spacetobatch_benchmark_test.cc +++ b/tensorflow/core/kernels/spacetobatch_benchmark_test.cc @@ -54,6 +54,8 @@ static Graph* ConstructSpaceToBatchGraph( return g; } +// The BM_Expand macro is needed for this to build with VC++. +#define BM_Expand(x) x #define BM_SpaceToBatchDev(OP, DEVICE, DTYPE, B, H, W, D, BS, P00, P01, P10, \ P11) \ static void \ @@ -69,10 +71,10 @@ static Graph* ConstructSpaceToBatchGraph( BENCHMARK( \ BM_##OP##_##DEVICE##_##DTYPE##_##B##_##H##_##W##_##D##_bs##BS##_pad##P00##_##P01##_##P10##_##P11); #define BM_SpaceToBatch(OP, ...) \ - BM_SpaceToBatchDev(OP, cpu, DT_FLOAT, __VA_ARGS__); \ - BM_SpaceToBatchDev(OP, gpu, DT_FLOAT, __VA_ARGS__); \ - BM_SpaceToBatchDev(OP, cpu, DT_HALF, __VA_ARGS__); \ - BM_SpaceToBatchDev(OP, gpu, DT_HALF, __VA_ARGS__); + BM_Expand(BM_SpaceToBatchDev(OP, cpu, DT_FLOAT, __VA_ARGS__)); \ + BM_Expand(BM_SpaceToBatchDev(OP, gpu, DT_FLOAT, __VA_ARGS__)); \ + BM_Expand(BM_SpaceToBatchDev(OP, cpu, DT_HALF, __VA_ARGS__)); \ + BM_Expand(BM_SpaceToBatchDev(OP, gpu, DT_HALF, __VA_ARGS__)); BM_SpaceToBatch(SpaceToBatch, 64, 100, 100, 64, 2, 0, 0, 0, 0); BM_SpaceToBatch(SpaceToBatch, 64, 100, 100, 1, 2, 0, 0, 0, 0); diff --git a/tensorflow/core/kernels/sparse_add_op_test.cc b/tensorflow/core/kernels/sparse_add_op_test.cc index 7baf27c1d0..4cad02bbee 100644 --- a/tensorflow/core/kernels/sparse_add_op_test.cc +++ b/tensorflow/core/kernels/sparse_add_op_test.cc @@ -61,8 +61,10 @@ TEST_F(SparseAddOpTest, TwoD_AddSparseTensorWithSelf) { // [3 4] const auto indices_shape = TensorShape({4, 2}); - const gtl::ArraySlice<int64> indices = {0, 1, 1, 0, 2, 0, 2, 1}; - const gtl::ArraySlice<int64> shape = {3, 2}; + std::initializer_list<int64> in{ 0, 1, 1, 0, 2, 0, 2, 1 }; + const gtl::ArraySlice<int64> indices(in); + std::initializer_list<int64> sh{ 3, 2 }; + const gtl::ArraySlice<int64> shape(sh); #define ADD_TENSOR_INPUT() \ AddInputFromArray<int64>(indices_shape, indices); \ @@ -99,8 +101,10 @@ TEST_F(SparseAddOpTest, TwoD_AddSparseTensorWithSelf) { DataType val_dtype = tensorflow::DataTypeToEnum<VALTYPE>::value; \ \ const auto indices_shape = TensorShape({4, 2}); \ - const gtl::ArraySlice<int64> indices = {0, 1, 1, 0, 2, 0, 2, 1}; \ - const gtl::ArraySlice<int64> shape = {3, 2}; \ + std::initializer_list<int64> in{0, 1, 1, 0, 2, 0, 2, 1}; \ + const gtl::ArraySlice<int64> indices(in); \ + std::initializer_list<int64> sh{3, 2}; \ + const gtl::ArraySlice<int64> shape(sh); \ \ AddInputFromArray<int64>(indices_shape, indices); \ AddInputFromArray<VALTYPE>(TensorShape({4}), {1, 2, 3, 4}); \ @@ -154,8 +158,10 @@ RUN_TEST(complex128); MakeOp<VALTYPE>(); \ DataType val_dtype = tensorflow::DataTypeToEnum<VALTYPE>::value; \ const auto indices_shape = TensorShape({4, 2}); \ - const gtl::ArraySlice<int64> indices = {0, 1, 1, 0, 2, 0, 2, 1}; \ - const gtl::ArraySlice<int64> shape = {3, 2}; \ + std::initializer_list<int64> in{0, 1, 1, 0, 2, 0, 2, 1}; \ + const gtl::ArraySlice<int64> indices(in); \ + std::initializer_list<int64> sh{3, 2}; \ + const gtl::ArraySlice<int64> shape(sh); \ \ auto AddSparseTensor = [indices, indices_shape, shape, \ this](bool negate) { \ @@ -192,10 +198,10 @@ RUN_TEST(complex128); } RUN_TEST(int64, 1); -RUN_TEST(float, 1e-3); -RUN_TEST(double, 1e-3); -RUN_TEST(complex64, 1e-3); -RUN_TEST(complex128, 1e-3); +RUN_TEST(float, 1e-3f); +RUN_TEST(double, 1e-3f); +RUN_TEST(complex64, 1e-3f); +RUN_TEST(complex128, 1e-3f); #undef RUN_TEST } // namespace diff --git a/tensorflow/core/kernels/sparse_dense_binary_op_shared_test.cc b/tensorflow/core/kernels/sparse_dense_binary_op_shared_test.cc index 7ef3070d06..eaf1884243 100644 --- a/tensorflow/core/kernels/sparse_dense_binary_op_shared_test.cc +++ b/tensorflow/core/kernels/sparse_dense_binary_op_shared_test.cc @@ -96,8 +96,10 @@ TEST_F(SparseDenseCDivTest, SameShape) { // [2 ] cdiv [dense: same shape, all 1's] // [3 4] const auto indices_shape = TensorShape({4, 2}); - const gtl::ArraySlice<int64> indices = {0, 1, 1, 0, 2, 0, 2, 1}; - const gtl::ArraySlice<int64> shape = {3, 2}; + std::initializer_list<int64> in{ 0, 1, 1, 0, 2, 0, 2, 1 }; + const gtl::ArraySlice<int64> indices(in); + std::initializer_list<int64> sh{ 3, 2 }; + const gtl::ArraySlice<int64> shape(sh); // Tensor dense(DT_FLOAT, TensorShape({3, 1})); Tensor dense(DT_FLOAT, TensorShape(shape)); @@ -123,8 +125,10 @@ TEST_F(SparseDenseCDivTest, BroadcastDenseSameDims) { // [2 ] cdiv [dense: shape [3,1], all 1's] // [3 4] const auto indices_shape = TensorShape({4, 2}); - const gtl::ArraySlice<int64> indices = {0, 1, 1, 0, 2, 0, 2, 1}; - const gtl::ArraySlice<int64> shape = {3, 2}; + std::initializer_list<int64> in{ 0, 1, 1, 0, 2, 0, 2, 1 }; + const gtl::ArraySlice<int64> indices(in); + std::initializer_list<int64> sh{ 3, 2 }; + const gtl::ArraySlice<int64> shape(sh); Tensor dense(DT_FLOAT, TensorShape({3, 1})); auto dense_flat = dense.flat<float>(); @@ -148,8 +152,10 @@ TEST_F(SparseDenseCDivTest, BroadcastDenseFewerDims) { // [2 ] cdiv [dense: shape [2]] // [3 4] const auto indices_shape = TensorShape({4, 2}); - const gtl::ArraySlice<int64> indices = {0, 1, 1, 0, 2, 0, 2, 1}; - const gtl::ArraySlice<int64> shape = {3, 2}; + std::initializer_list<int64> in{ 0, 1, 1, 0, 2, 0, 2, 1 }; + const gtl::ArraySlice<int64> indices(in); + std::initializer_list<int64> sh{ 3, 2 }; + const gtl::ArraySlice<int64> shape(sh); Tensor dense(DT_FLOAT, TensorShape({2})); auto dense_flat = dense.flat<float>(); @@ -178,8 +184,10 @@ TEST_F(SparseDenseCMulTest, BroadcastDense) { // [1 ?] where ? remains implicitly zero. // [1.5 0] const auto indices_shape = TensorShape({4, 2}); - const gtl::ArraySlice<int64> indices = {0, 1, 1, 0, 2, 0, 2, 1}; - const gtl::ArraySlice<int64> shape = {3, 2}; + std::initializer_list<int64> in{ 0, 1, 1, 0, 2, 0, 2, 1 }; + const gtl::ArraySlice<int64> indices(in); + std::initializer_list<int64> sh{ 3, 2 }; + const gtl::ArraySlice<int64> shape(sh); Tensor dense(DT_FLOAT, TensorShape({2})); auto dense_flat = dense.flat<float>(); diff --git a/tensorflow/core/kernels/sparse_reduce_sum_op_test.cc b/tensorflow/core/kernels/sparse_reduce_sum_op_test.cc index 2fb78a2a21..110376be42 100644 --- a/tensorflow/core/kernels/sparse_reduce_sum_op_test.cc +++ b/tensorflow/core/kernels/sparse_reduce_sum_op_test.cc @@ -51,8 +51,10 @@ TEST_F(SparseReduceSumOpTest, SimpleReduce) { // [3 4] const auto indices_shape = TensorShape({4, 2}); - const gtl::ArraySlice<int64> indices = {0, 1, 1, 0, 2, 0, 2, 1}; - const gtl::ArraySlice<int64> shape = {3, 2}; + std::initializer_list<int64> in{ 0, 1, 1, 0, 2, 0, 2, 1 }; + const gtl::ArraySlice<int64> indices(in); + std::initializer_list<int64> sh{ 3, 2 }; + const gtl::ArraySlice<int64> shape(sh); AddInputFromArray<int64>(indices_shape, indices); AddInputFromArray<float>(TensorShape({4}), {1, 2, 3, 4}); @@ -91,8 +93,10 @@ TEST_F(SparseReduceSumSparseOpTest, SimpleReduce) { // [3 4] const auto indices_shape = TensorShape({4, 2}); - const gtl::ArraySlice<int64> indices = {0, 1, 1, 0, 2, 0, 2, 1}; - const gtl::ArraySlice<int64> shape = {3, 2}; + std::initializer_list<int64> in{ 0, 1, 1, 0, 2, 0, 2, 1 }; + const gtl::ArraySlice<int64> indices(in); + std::initializer_list<int64> sh{ 3, 2 }; + const gtl::ArraySlice<int64> shape(sh); AddInputFromArray<int64>(indices_shape, indices); AddInputFromArray<float>(TensorShape({4}), {2, 2, 3, 4}); diff --git a/tensorflow/core/kernels/summary_image_op_test.cc b/tensorflow/core/kernels/summary_image_op_test.cc index 96a4d4183f..f936276925 100644 --- a/tensorflow/core/kernels/summary_image_op_test.cc +++ b/tensorflow/core/kernels/summary_image_op_test.cc @@ -126,16 +126,16 @@ TEST_F(SummaryImageOpTest, OneColorImage4dInput) { AddInputFromArray<float>( TensorShape({1 /*batch*/, 5 /*rows*/, 2 /*columns*/, 3 /*depth*/}), { - /* r0, c0, RGB */ 1.0, 0.1, 0.2, - /* r0, c1, RGB */ 1.0, 0.3, 0.4, - /* r1, c0, RGB */ 0.0, 1.0, 0.0, - /* r1, c1, RGB */ 0.0, 1.0, 0.0, - /* r2, c0, RGB */ 0.0, 0.0, 1.0, - /* r2, c1, RGB */ 0.0, 0.0, 1.0, - /* r3, c0, RGB */ 1.0, 1.0, 0.0, - /* r3, c1, RGB */ 1.0, 0.0, 1.0, - /* r4, c0, RGB */ 1.0, 1.0, 0.0, - /* r4, c1, RGB */ 1.0, 0.0, 1.0, + /* r0, c0, RGB */ 1.0f, 0.1f, 0.2f, + /* r0, c1, RGB */ 1.0f, 0.3f, 0.4f, + /* r1, c0, RGB */ 0.0f, 1.0f, 0.0f, + /* r1, c1, RGB */ 0.0f, 1.0f, 0.0f, + /* r2, c0, RGB */ 0.0f, 0.0f, 1.0f, + /* r2, c1, RGB */ 0.0f, 0.0f, 1.0f, + /* r3, c0, RGB */ 1.0f, 1.0f, 0.0f, + /* r3, c1, RGB */ 1.0f, 0.0f, 1.0f, + /* r4, c0, RGB */ 1.0f, 1.0f, 0.0f, + /* r4, c1, RGB */ 1.0f, 0.0f, 1.0f, }); TF_ASSERT_OK(RunOpKernel()); diff --git a/tensorflow/core/kernels/summary_op_test.cc b/tensorflow/core/kernels/summary_op_test.cc index 9fd2bd2b5e..05b1687e5f 100644 --- a/tensorflow/core/kernels/summary_op_test.cc +++ b/tensorflow/core/kernels/summary_op_test.cc @@ -61,7 +61,7 @@ TEST_F(SummaryScalarOpTest, SimpleFloat) { // Feed and run AddInputFromArray<string>(TensorShape({3}), {"tag1", "tag2", "tag3"}); - AddInputFromArray<float>(TensorShape({3}), {1.0, -0.73, 10000.0}); + AddInputFromArray<float>(TensorShape({3}), {1.0f, -0.73f, 10000.0f}); TF_ASSERT_OK(RunOpKernel()); // Check the output size. @@ -121,7 +121,7 @@ TEST_F(SummaryScalarOpTest, Error_MismatchedSize) { // Feed and run AddInputFromArray<string>(TensorShape({2}), {"tag1", "tag2"}); - AddInputFromArray<float>(TensorShape({3}), {1.0, -0.73, 10000.0}); + AddInputFromArray<float>(TensorShape({3}), {1.0f, -0.73f, 10000.0f}); Status s = RunOpKernel(); EXPECT_TRUE(StringPiece(s.ToString()).contains("not the same shape")) << s; } @@ -131,7 +131,7 @@ TEST_F(SummaryScalarOpTest, Error_WrongDimsTags) { // Feed and run AddInputFromArray<string>(TensorShape({2, 1}), {"tag1", "tag2"}); - AddInputFromArray<float>(TensorShape({2}), {1.0, -0.73}); + AddInputFromArray<float>(TensorShape({2}), {1.0f, -0.73f}); Status s = RunOpKernel(); EXPECT_TRUE( StringPiece(s.ToString()).contains("tags and values not the same shape")) @@ -143,7 +143,7 @@ TEST_F(SummaryScalarOpTest, Error_WrongDimsValues) { // Feed and run AddInputFromArray<string>(TensorShape({2}), {"tag1", "tag2"}); - AddInputFromArray<float>(TensorShape({2, 1}), {1.0, -0.73}); + AddInputFromArray<float>(TensorShape({2, 1}), {1.0f, -0.73f}); Status s = RunOpKernel(); EXPECT_TRUE( StringPiece(s.ToString()).contains("tags and values not the same shape")) @@ -169,7 +169,8 @@ TEST_F(SummaryHistoOpTest, SimpleFloat) { // Feed and run AddInputFromArray<string>(TensorShape({}), {"taghisto"}); - AddInputFromArray<float>(TensorShape({3, 2}), {0.1, -0.7, 4.1, 4., 5., 4.}); + AddInputFromArray<float>(TensorShape({3, 2}), + {0.1f, -0.7f, 4.1f, 4., 5.f, 4.f}); TF_ASSERT_OK(RunOpKernel()); // Check the output size. @@ -254,7 +255,7 @@ TEST_F(SummaryHistoOpTest, Error_WrongDimsTags) { // Feed and run AddInputFromArray<string>(TensorShape({2, 1}), {"tag1", "tag2"}); - AddInputFromArray<float>(TensorShape({2}), {1.0, -0.73}); + AddInputFromArray<float>(TensorShape({2}), {1.0f, -0.73f}); Status s = RunOpKernel(); EXPECT_TRUE(StringPiece(s.ToString()).contains("tags must be scalar")) << s; } @@ -264,7 +265,7 @@ TEST_F(SummaryHistoOpTest, Error_TooManyTagValues) { // Feed and run AddInputFromArray<string>(TensorShape({2}), {"tag1", "tag2"}); - AddInputFromArray<float>(TensorShape({2, 1}), {1.0, -0.73}); + AddInputFromArray<float>(TensorShape({2, 1}), {1.0f, -0.73f}); Status s = RunOpKernel(); EXPECT_TRUE(StringPiece(s.ToString()).contains("tags must be scalar")) << s; } diff --git a/tensorflow/core/kernels/training_ops_gpu.cu.cc b/tensorflow/core/kernels/training_ops_gpu.cu.cc index 733278e440..f6acdf2422 100644 --- a/tensorflow/core/kernels/training_ops_gpu.cu.cc +++ b/tensorflow/core/kernels/training_ops_gpu.cu.cc @@ -64,7 +64,7 @@ struct ApplyAdadelta<GPUDevice, T> { bcast[0] = grad.dimension(0); Eigen::Sizes<1> single; - accum.device(d) = accum_update * rho.reshape(single).broadcast(bcast) + + accum.device(d) = accum * rho.reshape(single).broadcast(bcast) + grad.square() * (grad.constant(T(1)) - rho.reshape(single).broadcast(bcast)); const auto update = diff --git a/tensorflow/core/kernels/variable_ops.cc b/tensorflow/core/kernels/variable_ops.cc index 1a9aa4d903..34e227156d 100644 --- a/tensorflow/core/kernels/variable_ops.cc +++ b/tensorflow/core/kernels/variable_ops.cc @@ -33,14 +33,31 @@ REGISTER_KERNEL_BUILDER(Name("IsVariableInitialized").Device(DEVICE_CPU), IsVariableInitializedOp); #if TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("Variable").Device(DEVICE_SYCL).TypeConstraint<TYPE>("dtype"), \ - VariableOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("VariableV2").Device(DEVICE_SYCL).TypeConstraint<TYPE>("dtype"), \ - VariableOp); -TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL); +#define REGISTER_SYCL_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Variable") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("dtype"), \ + VariableOp); \ + REGISTER_KERNEL_BUILDER(Name("VariableV2") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("dtype"), \ + VariableOp); \ + REGISTER_KERNEL_BUILDER(Name("TemporaryVariable") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("dtype"), \ + TemporaryVariableOp); \ + REGISTER_KERNEL_BUILDER(Name("DestroyTemporaryVariable") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("T"), \ + DestroyTemporaryVariableOp); \ + REGISTER_KERNEL_BUILDER(Name("IsVariableInitialized") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<TYPE>("dtype") \ + .HostMemory("is_initialized"), \ + IsVariableInitializedOp); + +REGISTER_SYCL_KERNEL(float); #undef REGISTER_SYCL_KERNEL #endif diff --git a/tensorflow/core/lib/core/notification_test.cc b/tensorflow/core/lib/core/notification_test.cc index 8cb1c895ad..9d96708b6f 100644 --- a/tensorflow/core/lib/core/notification_test.cc +++ b/tensorflow/core/lib/core/notification_test.cc @@ -67,7 +67,9 @@ TEST(NotificationTest, TestMultipleThreadsWaitingOnNotification) { ++counter; }); } - sleep(1); + + // Sleep 1 second. + Env::Default()->SleepForMicroseconds(1 * 1000 * 1000); EXPECT_EQ(0, counter); diff --git a/tensorflow/core/lib/gtl/cleanup.h b/tensorflow/core/lib/gtl/cleanup.h index 230cdb624b..6053e98640 100644 --- a/tensorflow/core/lib/gtl/cleanup.h +++ b/tensorflow/core/lib/gtl/cleanup.h @@ -96,7 +96,7 @@ class Cleanup { bool is_released() const { return released_; } private: - static_assert(!std::is_reference<F>(), "F must not be a reference"); + static_assert(!std::is_reference<F>::value, "F must not be a reference"); bool released_ = false; F f_; diff --git a/tensorflow/core/lib/gtl/edit_distance_test.cc b/tensorflow/core/lib/gtl/edit_distance_test.cc index 02968b6ae8..18a400713f 100644 --- a/tensorflow/core/lib/gtl/edit_distance_test.cc +++ b/tensorflow/core/lib/gtl/edit_distance_test.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/core/lib/gtl/edit_distance.h" +#include <cctype> #include <vector> #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/test.h" diff --git a/tensorflow/core/lib/strings/strcat_test.cc b/tensorflow/core/lib/strings/strcat_test.cc index 25561f1bd1..c556b1f676 100644 --- a/tensorflow/core/lib/strings/strcat_test.cc +++ b/tensorflow/core/lib/strings/strcat_test.cc @@ -22,6 +22,11 @@ limitations under the License. #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/types.h" +#ifdef _MSC_VER +// ssize_t is not a standard C++ type. +typedef ptrdiff_t ssize_t; +#endif + namespace tensorflow { namespace strings { diff --git a/tensorflow/core/ops/nn_ops_test.cc b/tensorflow/core/ops/nn_ops_test.cc index 3618769dc0..974d7aa87b 100644 --- a/tensorflow/core/ops/nn_ops_test.cc +++ b/tensorflow/core/ops/nn_ops_test.cc @@ -507,7 +507,7 @@ TEST(NNOpsTest, FractionalPool_ShapeFn) { .Finalize(&op.node_def)); }; - set_op(std::vector<float>{2.0, 1, 1 / 1.5, 1 / 2.0}); + set_op(std::vector<float>{2.0f, 1, 1 / 1.5f, 1 / 2.0f}); // Rank check. INFER_ERROR("must be rank 4", op, "[?,?,?]"); diff --git a/tensorflow/core/ops/state_ops.cc b/tensorflow/core/ops/state_ops.cc index d1f63589ea..8370e57b88 100644 --- a/tensorflow/core/ops/state_ops.cc +++ b/tensorflow/core/ops/state_ops.cc @@ -295,7 +295,7 @@ This operation outputs `ref` after the update is done. This makes it easier to chain operations that need to use the reset value. If values in `ref` is to be updated more than once, because there are -duplicate entires in `indices`, the order at which the updates happen +duplicate entries in `indices`, the order at which the updates happen for each value is undefined. Requires `updates.shape = indices.shape + ref.shape[1:]`. diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index 64a6ab0c7a..83a2a17d48 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -25,7 +25,7 @@ def tf_deps(deps, suffix): return tf_deps def tf_proto_library_cc(name, srcs = [], has_services = None, - deps = [], visibility = [], testonly = 0, + protodeps = [], visibility = [], testonly = 0, cc_libs = [], cc_stubby_versions = None, cc_grpc_version = None, @@ -34,7 +34,7 @@ def tf_proto_library_cc(name, srcs = [], has_services = None, js_api_version = 2, js_codegen = "jspb"): native.filegroup( name = name + "_proto_srcs", - srcs = srcs + tf_deps(deps, "_proto_srcs"), + srcs = srcs + tf_deps(protodeps, "_proto_srcs"), testonly = testonly, ) @@ -43,10 +43,14 @@ def tf_proto_library_cc(name, srcs = [], has_services = None, use_grpc_plugin = True cc_proto_library( name = name + "_cc", - srcs = srcs + tf_deps(deps, "_proto_srcs"), - deps = deps + ["@protobuf//:cc_wkt_protos"], + srcs = srcs, + deps = tf_deps(protodeps, "_cc") + ["@protobuf//:cc_wkt_protos"], cc_libs = cc_libs + ["@protobuf//:protobuf"], - copts = ["-Wno-unused-but-set-variable", "-Wno-sign-compare"], + copts = [ + "-Wno-unknown-warning-option", + "-Wno-unused-but-set-variable", + "-Wno-sign-compare", + ], protoc = "@protobuf//:protoc", default_runtime = "@protobuf//:protobuf", use_grpc_plugin = use_grpc_plugin, @@ -54,13 +58,14 @@ def tf_proto_library_cc(name, srcs = [], has_services = None, visibility = visibility, ) -def tf_proto_library_py(name, srcs=[], deps=[], visibility=[], testonly=0, +def tf_proto_library_py(name, srcs=[], protodeps=[], deps=[], visibility=[], + testonly=0, srcs_version="PY2AND3"): py_proto_library( name = name + "_py", srcs = srcs, srcs_version = srcs_version, - deps = deps, + deps = deps + tf_deps(protodeps, "_py") + ["@protobuf//:protobuf_python"], protoc = "@protobuf//:protoc", default_runtime = "@protobuf//:protobuf_python", visibility = visibility, @@ -68,15 +73,16 @@ def tf_proto_library_py(name, srcs=[], deps=[], visibility=[], testonly=0, ) def tf_proto_library(name, srcs = [], has_services = None, - deps = [], visibility = [], testonly = 0, + protodeps = [], visibility = [], testonly = 0, cc_libs = [], cc_api_version = 2, go_api_version = 2, java_api_version = 2, py_api_version = 2, js_api_version = 2, js_codegen = "jspb"): + """Make a proto library, possibly depending on other proto libraries.""" tf_proto_library_cc( name = name, - srcs = srcs + tf_deps(deps, "_proto_srcs"), - deps = deps, + srcs = srcs, + protodeps = protodeps, cc_libs = cc_libs, testonly = testonly, visibility = visibility, @@ -84,9 +90,9 @@ def tf_proto_library(name, srcs = [], has_services = None, tf_proto_library_py( name = name, - srcs = srcs + tf_deps(deps, "_proto_srcs"), + srcs = srcs, + protodeps = protodeps, srcs_version = "PY2AND3", - deps = deps + ["@protobuf//:protobuf_python"], testonly = testonly, visibility = visibility, ) @@ -155,7 +161,16 @@ def tf_additional_test_deps(): return [] def tf_additional_test_srcs(): - return ["platform/default/test_benchmark.cc", "platform/posix/test.cc"] + return [ + "platform/default/test_benchmark.cc", + ] + select({ + "//tensorflow:windows" : [ + "platform/windows/test.cc" + ], + "//conditions:default" : [ + "platform/posix/test.cc", + ], + }) def tf_kernel_tests_linkstatic(): return 0 diff --git a/tensorflow/core/platform/default/build_config/BUILD b/tensorflow/core/platform/default/build_config/BUILD index 4ef795edcc..0857010f7c 100644 --- a/tensorflow/core/platform/default/build_config/BUILD +++ b/tensorflow/core/platform/default/build_config/BUILD @@ -10,7 +10,6 @@ exports_files(["LICENSE"]) load("//tensorflow:tensorflow.bzl", "if_cuda") load("//tensorflow:tensorflow.bzl", "tf_copts") load("//tensorflow:tensorflow.bzl", "tf_cuda_library") -load("@local_config_cuda//cuda:platform.bzl", "cuda_library_path") load("@local_config_sycl//sycl:platform.bzl", "sycl_library_path") cc_library( @@ -138,7 +137,7 @@ filegroup( cc_library( name = "cuda", data = [ - "@local_config_cuda//cuda:{}".format(cuda_library_path("cudart")), + "@local_config_cuda//cuda:cudart", ], linkopts = select({ "@local_config_cuda//cuda:darwin": [ diff --git a/tensorflow/core/platform/default/logging.cc b/tensorflow/core/platform/default/logging.cc index e7808ca08d..1d03725c78 100644 --- a/tensorflow/core/platform/default/logging.cc +++ b/tensorflow/core/platform/default/logging.cc @@ -81,7 +81,41 @@ void LogMessage::GenerateLogMessage() { } #endif -LogMessage::~LogMessage() { GenerateLogMessage(); } + +namespace { + +int64 MinLogLevel() { + const char* tf_env_var_val = getenv("TF_CPP_MIN_LOG_LEVEL"); + if (tf_env_var_val == nullptr) { + return 0; + } + + // Ideally we would use env_var / safe_strto64, but it is + // hard to use here without pulling in a lot of dependencies, + // so we do a poor-man's parsing. + string min_log_level(tf_env_var_val); + if (min_log_level == "1") { + // Maps to WARNING + return 1; + } else if (min_log_level == "2") { + // Maps to ERROR + return 2; + } else if (min_log_level == "3") { + // Maps to FATAL + return 3; + } else { + // Maps to INFO (the default). + return 0; + } +} + +} // namespace + +LogMessage::~LogMessage() { + // Read the min log level once during the first call to logging. + static int64 min_log_level = MinLogLevel(); + if (TF_PREDICT_TRUE(severity_ >= min_log_level)) GenerateLogMessage(); +} LogMessageFatal::LogMessageFatal(const char* file, int line) : LogMessage(file, line, FATAL) {} diff --git a/tensorflow/core/platform/env.h b/tensorflow/core/platform/env.h index 787ebe654b..428a45576f 100644 --- a/tensorflow/core/platform/env.h +++ b/tensorflow/core/platform/env.h @@ -208,12 +208,10 @@ class Env { // TODO(jeff,sanjay): if needed, tighten spec so relative to epoch, or // provide a routine to get the absolute time. - /// \brief Returns the number of micro-seconds since some fixed point in - /// time. Only useful for computing deltas of time. + /// \brief Returns the number of micro-seconds since the Unix epoch. virtual uint64 NowMicros() = 0; - /// \brief Returns the number of seconds since some fixed point in - /// time. Only useful for computing deltas of time. + /// \brief Returns the number of seconds since the Unix epoch. virtual uint64 NowSeconds() { return NowMicros() / 1000000L; } /// Sleeps/delays the thread for the prescribed number of micro-seconds. diff --git a/tensorflow/core/platform/hadoop/hadoop_file_system.cc b/tensorflow/core/platform/hadoop/hadoop_file_system.cc index 3de3b17517..b0f0cbe3f1 100644 --- a/tensorflow/core/platform/hadoop/hadoop_file_system.cc +++ b/tensorflow/core/platform/hadoop/hadoop_file_system.cc @@ -112,6 +112,11 @@ class LibHDFS { } string path = io::JoinPath(hdfs_home, "lib", "native", "libhdfs.so"); status_ = TryLoadAndBind(path.c_str(), &handle_); + if (!status_.ok()) { + // try load libhdfs.so using dynamic loader's search path in case libhdfs.so + // is installed in non-standard location + status_ = TryLoadAndBind("libhdfs.so", &handle_); + } return; } diff --git a/tensorflow/core/platform/port_test.cc b/tensorflow/core/platform/port_test.cc index 78d000bff8..402c718e4f 100644 --- a/tensorflow/core/platform/port_test.cc +++ b/tensorflow/core/platform/port_test.cc @@ -36,8 +36,14 @@ TEST(ConditionVariable, WaitForMilliseconds_Timeout) { mutex m; mutex_lock l(m); condition_variable cv; + ConditionResult result = kCond_MaybeNotified; time_t start = time(NULL); - EXPECT_EQ(WaitForMilliseconds(&l, &cv, 3000), kCond_Timeout); + // Condition variables are subject to spurious wakeups on some platforms, + // so need to check for a timeout within a loop. + while (result == kCond_MaybeNotified) { + result = WaitForMilliseconds(&l, &cv, 3000); + } + EXPECT_EQ(result, kCond_Timeout); time_t finish = time(NULL); EXPECT_GE(finish - start, 3); } @@ -51,7 +57,7 @@ TEST(ConditionVariable, WaitForMilliseconds_Signalled) { // Sleep for just 1 second then notify. We have a timeout of 3 secs, // so the condition variable will notice the cv signal before the timeout. pool.Schedule([&m, &cv]() { - sleep(1); + Env::Default()->SleepForMicroseconds(1 * 1000 * 1000); mutex_lock l(m); cv.notify_all(); }); diff --git a/tensorflow/core/platform/subprocess.h b/tensorflow/core/platform/subprocess.h index 7dfd38688d..dfdcf82173 100644 --- a/tensorflow/core/platform/subprocess.h +++ b/tensorflow/core/platform/subprocess.h @@ -53,7 +53,7 @@ class SubProcess; defined(PLATFORM_GOOGLE_ANDROID) #include "tensorflow/core/platform/posix/subprocess.h" #elif defined(PLATFORM_WINDOWS) -#error SubProcess not yet implemented for Windows +#include "tensorflow/core/platform/windows/subprocess.h" #else #error Define the appropriate PLATFORM_<foo> macro for this platform #endif diff --git a/tensorflow/core/platform/windows/subprocess.h b/tensorflow/core/platform/windows/subprocess.h new file mode 100644 index 0000000000..b65313363e --- /dev/null +++ b/tensorflow/core/platform/windows/subprocess.h @@ -0,0 +1,27 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_PLATFORM_WINDOWS_SUBPROCESS_H_ +#define TENSORFLOW_PLATFORM_WINDOWS_SUBPROCESS_H_ + +namespace tensorflow { + +// SubProcess is not yet implemented for Windows. +class SubProcess { +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_PLATFORM_WINDOWS_SUBPROCESS_H_ diff --git a/tensorflow/core/platform/windows/test.cc b/tensorflow/core/platform/windows/test.cc new file mode 100644 index 0000000000..0ffd02ff14 --- /dev/null +++ b/tensorflow/core/platform/windows/test.cc @@ -0,0 +1,51 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/platform/net.h" +#include "tensorflow/core/platform/test.h" + +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/logging.h" + +namespace tensorflow { +namespace testing { + +std::unique_ptr<SubProcess> CreateSubProcess(const std::vector<string>& argv) { + LOG(FATAL) << "CreateSubProcess NOT IMPLEMENTED for Windows yet ! "; + return nullptr; +} + +int PickUnusedPortOrDie() { return internal::PickUnusedPortOrDie(); } + +string TensorFlowSrcRoot() { + // 'bazel test' and cmake set TEST_SRCDIR. + // New versions of bazel also set TEST_WORKSPACE. + const char* env = getenv("TEST_SRCDIR"); + const char* workspace = getenv("TEST_WORKSPACE"); + if (env && env[0] != '\0') { + if (workspace && workspace[0] != '\0') { + return strings::StrCat(env, "/", workspace, "/tensorflow"); + } else { + return strings::StrCat(env, "/tensorflow"); + } + } else { + LOG(WARNING) << "TEST_SRCDIR environment variable not set: " + << "using $PWD/tensorflow as TensorFlowSrcRoot() for tests."; + return "tensorflow"; + } +} + +} // namespace testing +} // namespace tensorflow diff --git a/tensorflow/core/platform/windows/windows_file_system.cc b/tensorflow/core/platform/windows/windows_file_system.cc index 31516bb2ee..670abf3fdf 100644 --- a/tensorflow/core/platform/windows/windows_file_system.cc +++ b/tensorflow/core/platform/windows/windows_file_system.cc @@ -467,6 +467,23 @@ Status WindowsFileSystem::RenameFile(const string& src, const string& target) { return result; } +Status WindowsFileSystem::GetMatchingPaths(const string& pattern, + std::vector<string>* results) { + // NOTE(mrry): The existing implementation of FileSystem::GetMatchingPaths() + // does not handle Windows paths containing backslashes correctly. Since + // Windows APIs will accept forward and backslashes equivalently, we + // convert the pattern to use forward slashes exclusively. Note that this + // is not ideal, since the API expects backslash as an escape character, + // but no code appears to rely on this behavior. + string converted_pattern(pattern); + std::replace(converted_pattern.begin(), converted_pattern.end(), '\\', '/'); + TF_RETURN_IF_ERROR(FileSystem::GetMatchingPaths(converted_pattern, results)); + for (string& result : *results) { + std::replace(result.begin(), result.end(), '/', '\\'); + } + return Status::OK(); +} + Status WindowsFileSystem::Stat(const string& fname, FileStatistics* stat) { Status result; struct _stat sbuf; diff --git a/tensorflow/core/platform/windows/windows_file_system.h b/tensorflow/core/platform/windows/windows_file_system.h index dd83a27caf..507290e9e6 100644 --- a/tensorflow/core/platform/windows/windows_file_system.h +++ b/tensorflow/core/platform/windows/windows_file_system.h @@ -48,6 +48,9 @@ class WindowsFileSystem : public FileSystem { Status GetChildren(const string& dir, std::vector<string>* result) override; + Status GetMatchingPaths(const string& pattern, + std::vector<string>* result) override; + Status Stat(const string& fname, FileStatistics* stat) override; Status DeleteFile(const string& fname) override; diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 1de976fb3d..34673be216 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -19,7 +19,7 @@ limitations under the License. // TensorFlow uses semantic versioning, see http://semver.org/. #define TF_MAJOR_VERSION 0 -#define TF_MINOR_VERSION 11 +#define TF_MINOR_VERSION 12 #define TF_PATCH_VERSION head // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", diff --git a/tensorflow/core/util/memmapped_file_system.cc b/tensorflow/core/util/memmapped_file_system.cc index d67f948f1d..e077e94cf8 100644 --- a/tensorflow/core/util/memmapped_file_system.cc +++ b/tensorflow/core/util/memmapped_file_system.cc @@ -177,8 +177,13 @@ const void* MemmappedFileSystem::GetMemoryWithOffset(uint64 offset) const { return reinterpret_cast<const uint8*>(mapped_memory_->data()) + offset; } +#if defined(COMPILER_MSVC) +constexpr char* MemmappedFileSystem::kMemmappedPackagePrefix; +constexpr char* MemmappedFileSystem::kMemmappedPackageDefaultGraphDef; +#else constexpr char MemmappedFileSystem::kMemmappedPackagePrefix[]; constexpr char MemmappedFileSystem::kMemmappedPackageDefaultGraphDef[]; +#endif Status MemmappedFileSystem::InitializeFromFile(Env* env, const string& filename) { diff --git a/tensorflow/core/util/memmapped_file_system.h b/tensorflow/core/util/memmapped_file_system.h index d64c4a765c..541587aeab 100644 --- a/tensorflow/core/util/memmapped_file_system.h +++ b/tensorflow/core/util/memmapped_file_system.h @@ -53,9 +53,19 @@ class MemmappedFileSystem : public FileSystem { public: // Memmapped regions use this prefix to distinguish from // the filesystem. - static constexpr char kMemmappedPackagePrefix[] = "memmapped_package://"; - // The default graphdef in the package. +#if defined(COMPILER_MSVC) + static constexpr char* kMemmappedPackagePrefix = +#else + static constexpr char kMemmappedPackagePrefix[] = +#endif + "memmapped_package://"; + +// The default graphdef in the package. +#if defined(COMPILER_MSVC) + static constexpr char* kMemmappedPackageDefaultGraphDef = +#else static constexpr char kMemmappedPackageDefaultGraphDef[] = +#endif "memmapped_package://."; MemmappedFileSystem(); diff --git a/tensorflow/core/util/memmapped_file_system_test.cc b/tensorflow/core/util/memmapped_file_system_test.cc index c7d919041a..179c72c1f5 100644 --- a/tensorflow/core/util/memmapped_file_system_test.cc +++ b/tensorflow/core/util/memmapped_file_system_test.cc @@ -137,8 +137,15 @@ TEST(MemmappedFileSystemTest, ProxyToDefault) { const string dir = testing::TmpDir(); const string filename = io::JoinPath(dir, "test_file"); // Check that we can create write and read ordinary file. - std::unique_ptr<WritableFile> writable_file; - TF_ASSERT_OK(memmapped_env.NewAppendableFile(filename, &writable_file)); + std::unique_ptr<WritableFile> writable_file_temp; + TF_ASSERT_OK(memmapped_env.NewAppendableFile(filename, &writable_file_temp)); + // Making sure to clean up after the test finishes. + const auto adh = [&memmapped_env, &filename](WritableFile* f) { + delete f; + memmapped_env.DeleteFile(filename); + }; + std::unique_ptr<WritableFile, decltype(adh)> writable_file( + writable_file_temp.release(), adh); const string test_string = "bla-bla-bla"; TF_ASSERT_OK(writable_file->Append(test_string)); TF_ASSERT_OK(writable_file->Close()); diff --git a/tensorflow/core/util/semver_test.cc b/tensorflow/core/util/semver_test.cc index 75994a658e..0647f670c7 100644 --- a/tensorflow/core/util/semver_test.cc +++ b/tensorflow/core/util/semver_test.cc @@ -63,6 +63,10 @@ TEST(SemverTest, VersionStringFollowsSemver) { if (major == 0 && minor <= 11) { return; } + if (str_util::ConsumePrefix(&semver, "head")) { + ASSERT_TRUE(semver.empty()); + return; + } ASSERT_TRUE(str_util::ConsumeLeadingDigits(&semver, &patch)); if (semver.empty()) return; if (semver[0] == '-') { diff --git a/tensorflow/core/util/sparse/sparse_tensor.h b/tensorflow/core/util/sparse/sparse_tensor.h index a575d98da3..9d6f9e8bb5 100644 --- a/tensorflow/core/util/sparse/sparse_tensor.h +++ b/tensorflow/core/util/sparse/sparse_tensor.h @@ -17,8 +17,9 @@ limitations under the License. #define TENSORFLOW_UTIL_SPARSE_SPARSE_TENSOR_H_ #include <limits> - +#include <numeric> #include <vector> + #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_types.h" diff --git a/tensorflow/core/util/stat_summarizer.cc b/tensorflow/core/util/stat_summarizer.cc index 0b675eaac9..6bd3d9c780 100644 --- a/tensorflow/core/util/stat_summarizer.cc +++ b/tensorflow/core/util/stat_summarizer.cc @@ -340,10 +340,10 @@ std::string StatSummarizer::GetStatsByOrderOfNodeDefinitions( std::string StatSummarizer::GetOutputString() const { std::stringstream stream; - stream << "Total time (us): " << run_total_micros_; + stream << "Total time (us): " << run_total_micros_ << std::endl; stream << GetTimingStatsByRunOrder(); stream << GetTimingStatsByTopDurations(); - stream << "Total Memory (bytes): " << memory_; + stream << "Total Memory (bytes): " << memory_ << std::endl; stream << GetMemoryStatsByRunOrder(); stream << GetMemoryStatsByUsage(); return stream.str(); |