aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core')
-rw-r--r--tensorflow/core/BUILD32
-rw-r--r--tensorflow/core/common_runtime/direct_session_test.cc8
-rw-r--r--tensorflow/core/common_runtime/executor.h2
-rw-r--r--tensorflow/core/common_runtime/sycl/sycl_allocator.cc14
-rw-r--r--tensorflow/core/common_runtime/sycl/sycl_allocator.h6
-rw-r--r--tensorflow/core/common_runtime/sycl/sycl_device.cc15
-rw-r--r--tensorflow/core/common_runtime/sycl/sycl_device.h16
-rw-r--r--tensorflow/core/common_runtime/sycl/sycl_device_context.cc5
-rw-r--r--tensorflow/core/common_runtime/sycl/sycl_device_factory.cc1
-rw-r--r--tensorflow/core/debug/BUILD2
-rw-r--r--tensorflow/core/debug/debug_gateway_test.cc6
-rw-r--r--tensorflow/core/debug/debug_io_utils.cc6
-rw-r--r--tensorflow/core/debug/debug_io_utils_test.cc3
-rw-r--r--tensorflow/core/framework/partial_tensor_shape_test.cc2
-rw-r--r--tensorflow/core/framework/tensor_testutil.h2
-rw-r--r--tensorflow/core/kernels/BUILD6
-rw-r--r--tensorflow/core/kernels/adjust_contrast_op_test.cc4
-rw-r--r--tensorflow/core/kernels/batch_norm_op_test.cc10
-rw-r--r--tensorflow/core/kernels/cast_op_test.cc15
-rw-r--r--tensorflow/core/kernels/colorspace_op_test.cc10
-rw-r--r--tensorflow/core/kernels/control_flow_ops.cc26
-rw-r--r--tensorflow/core/kernels/control_flow_ops_test.cc18
-rw-r--r--tensorflow/core/kernels/cwise_op_abs.cc12
-rw-r--r--tensorflow/core/kernels/cwise_op_acos.cc12
-rw-r--r--tensorflow/core/kernels/cwise_op_add_1.cc2
-rw-r--r--tensorflow/core/kernels/cwise_op_asin.cc12
-rw-r--r--tensorflow/core/kernels/cwise_op_atan.cc12
-rw-r--r--tensorflow/core/kernels/cwise_op_ceil.cc12
-rw-r--r--tensorflow/core/kernels/cwise_op_cos.cc12
-rw-r--r--tensorflow/core/kernels/cwise_op_div.cc5
-rw-r--r--tensorflow/core/kernels/cwise_op_exp.cc12
-rw-r--r--tensorflow/core/kernels/cwise_op_floor.cc12
-rw-r--r--tensorflow/core/kernels/cwise_op_floor_div.cc8
-rw-r--r--tensorflow/core/kernels/cwise_op_isfinite.cc4
-rw-r--r--tensorflow/core/kernels/cwise_op_isinf.cc4
-rw-r--r--tensorflow/core/kernels/cwise_op_isnan.cc4
-rw-r--r--tensorflow/core/kernels/cwise_op_log.cc12
-rw-r--r--tensorflow/core/kernels/cwise_op_log1p.cc12
-rw-r--r--tensorflow/core/kernels/cwise_op_neg.cc12
-rw-r--r--tensorflow/core/kernels/cwise_op_pow.cc12
-rw-r--r--tensorflow/core/kernels/cwise_op_rsqrt.cc12
-rw-r--r--tensorflow/core/kernels/cwise_op_sin.cc12
-rw-r--r--tensorflow/core/kernels/cwise_op_sqrt.cc12
-rw-r--r--tensorflow/core/kernels/cwise_op_square.cc12
-rw-r--r--tensorflow/core/kernels/cwise_op_sub.cc2
-rw-r--r--tensorflow/core/kernels/cwise_op_tan.cc12
-rw-r--r--tensorflow/core/kernels/cwise_op_tanh.cc12
-rw-r--r--tensorflow/core/kernels/cwise_ops_sycl_common.h8
-rw-r--r--tensorflow/core/kernels/debug_ops.cc29
-rw-r--r--tensorflow/core/kernels/dense_update_ops.cc21
-rw-r--r--tensorflow/core/kernels/fact_op.cc33
-rw-r--r--tensorflow/core/kernels/fused_batch_norm_op_test.cc6
-rw-r--r--tensorflow/core/kernels/non_max_suppression_op_test.cc34
-rw-r--r--tensorflow/core/kernels/resize_bilinear_op_test.cc35
-rw-r--r--tensorflow/core/kernels/spacetobatch_benchmark_test.cc10
-rw-r--r--tensorflow/core/kernels/sparse_add_op_test.cc26
-rw-r--r--tensorflow/core/kernels/sparse_dense_binary_op_shared_test.cc24
-rw-r--r--tensorflow/core/kernels/sparse_reduce_sum_op_test.cc12
-rw-r--r--tensorflow/core/kernels/summary_image_op_test.cc20
-rw-r--r--tensorflow/core/kernels/summary_op_test.cc15
-rw-r--r--tensorflow/core/kernels/training_ops_gpu.cu.cc2
-rw-r--r--tensorflow/core/kernels/variable_ops.cc33
-rw-r--r--tensorflow/core/lib/core/notification_test.cc4
-rw-r--r--tensorflow/core/lib/gtl/cleanup.h2
-rw-r--r--tensorflow/core/lib/gtl/edit_distance_test.cc1
-rw-r--r--tensorflow/core/lib/strings/strcat_test.cc5
-rw-r--r--tensorflow/core/ops/nn_ops_test.cc2
-rw-r--r--tensorflow/core/ops/state_ops.cc2
-rw-r--r--tensorflow/core/platform/default/build_config.bzl41
-rw-r--r--tensorflow/core/platform/default/build_config/BUILD3
-rw-r--r--tensorflow/core/platform/default/logging.cc36
-rw-r--r--tensorflow/core/platform/env.h6
-rw-r--r--tensorflow/core/platform/hadoop/hadoop_file_system.cc5
-rw-r--r--tensorflow/core/platform/port_test.cc10
-rw-r--r--tensorflow/core/platform/subprocess.h2
-rw-r--r--tensorflow/core/platform/windows/subprocess.h27
-rw-r--r--tensorflow/core/platform/windows/test.cc51
-rw-r--r--tensorflow/core/platform/windows/windows_file_system.cc17
-rw-r--r--tensorflow/core/platform/windows/windows_file_system.h3
-rw-r--r--tensorflow/core/public/version.h2
-rw-r--r--tensorflow/core/util/memmapped_file_system.cc5
-rw-r--r--tensorflow/core/util/memmapped_file_system.h14
-rw-r--r--tensorflow/core/util/memmapped_file_system_test.cc11
-rw-r--r--tensorflow/core/util/semver_test.cc4
-rw-r--r--tensorflow/core/util/sparse/sparse_tensor.h3
-rw-r--r--tensorflow/core/util/stat_summarizer.cc4
86 files changed, 792 insertions, 230 deletions
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index fe40c691c5..991fc2f29d 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -949,12 +949,12 @@ cc_library(
# Libraries with GPU facilities that are useful for writing kernels.
cc_library(
name = "gpu_lib",
- srcs = if_not_windows([
+ srcs = [
"common_runtime/gpu/gpu_event_mgr.cc",
- ]),
- hdrs = if_not_windows([
+ ],
+ hdrs = [
"common_runtime/gpu/gpu_event_mgr.h",
- ]),
+ ],
copts = tf_copts(),
visibility = ["//visibility:public"],
deps = [
@@ -964,7 +964,8 @@ cc_library(
":lib_internal",
":proto_text",
":protos_all_cc",
- ] + if_not_windows([":stream_executor"]),
+ ":stream_executor",
+ ],
)
cc_library(
@@ -982,7 +983,7 @@ tf_proto_library_cc(
name = "worker_proto",
srcs = ["protobuf/worker.proto"],
cc_api_version = 2,
- cc_libs = [":protos_all_cc"],
+ protodeps = [":protos_all"],
visibility = [
"//tensorflow:internal",
],
@@ -993,8 +994,8 @@ tf_proto_library_cc(
srcs = ["protobuf/worker_service.proto"],
has_services = 1,
cc_api_version = 2,
- cc_libs = [":worker_proto_cc"],
cc_stubby_versions = ["2"],
+ protodeps = [":worker_proto"],
visibility = [
"//tensorflow:internal",
],
@@ -1004,7 +1005,7 @@ tf_proto_library_cc(
name = "master_proto",
srcs = ["protobuf/master.proto"],
cc_api_version = 2,
- cc_libs = [":protos_all_cc"],
+ protodeps = [":protos_all"],
visibility = [
"//tensorflow:internal",
],
@@ -1015,8 +1016,8 @@ tf_proto_library_cc(
srcs = ["protobuf/master_service.proto"],
has_services = 1,
cc_api_version = 2,
- cc_libs = [":master_proto_cc"],
cc_stubby_versions = ["2"],
+ protodeps = [":master_proto"],
visibility = [
"//tensorflow:internal",
],
@@ -1417,7 +1418,7 @@ tf_cuda_library(
tf_cuda_library(
name = "gpu_runtime",
- srcs = if_not_windows([
+ srcs = [
"common_runtime/gpu/gpu_bfc_allocator.cc",
"common_runtime/gpu/gpu_debug_allocator.cc",
"common_runtime/gpu/gpu_device.cc",
@@ -1429,8 +1430,8 @@ tf_cuda_library(
"common_runtime/gpu/pool_allocator.cc",
"common_runtime/gpu/process_state.cc",
"common_runtime/gpu_device_context.h",
- ]),
- hdrs = if_not_windows([
+ ],
+ hdrs = [
"common_runtime/gpu/gpu_bfc_allocator.h",
"common_runtime/gpu/gpu_debug_allocator.h",
"common_runtime/gpu/gpu_device.h",
@@ -1439,7 +1440,7 @@ tf_cuda_library(
"common_runtime/gpu/gpu_util.h",
"common_runtime/gpu/pool_allocator.h",
"common_runtime/gpu/process_state.h",
- ]),
+ ],
copts = tf_copts(),
linkstatic = 1,
deps = [
@@ -1451,10 +1452,9 @@ tf_cuda_library(
":lib",
":lib_internal",
":protos_all_cc",
- "//third_party/eigen3",
- ] + if_not_windows([
":stream_executor",
- ]),
+ "//third_party/eigen3",
+ ],
alwayslink = 1,
)
diff --git a/tensorflow/core/common_runtime/direct_session_test.cc b/tensorflow/core/common_runtime/direct_session_test.cc
index 44f17d6260..4b0165bae7 100644
--- a/tensorflow/core/common_runtime/direct_session_test.cc
+++ b/tensorflow/core/common_runtime/direct_session_test.cc
@@ -835,7 +835,7 @@ static void TestSessionInterOpThreadsImpl(bool use_function_lib) {
FunctionLibraryDefinition flib(OpRegistry::Global(), library_graph_def);
Graph g(&flib);
Tensor t(DT_FLOAT, TensorShape({}));
- t.scalar<float>()() = {1.2};
+ t.scalar<float>()() = {1.2f};
Node* x = test::graph::Constant(&g, t);
Node* y;
if (use_function_lib) {
@@ -945,7 +945,7 @@ TEST(DirectSessionTest, TestSessionInterOpThreadsWithFunctions) {
TEST(DirectSessionTest, TestSessionInterOpThreadsInvalidOptions) {
Graph g(OpRegistry::Global());
Tensor t(DT_FLOAT, TensorShape({}));
- t.scalar<float>()() = {1.2};
+ t.scalar<float>()() = {1.2f};
Node* x = test::graph::Constant(&g, t);
GraphDef def;
test::graph::ToGraphDef(&g, &def);
@@ -979,7 +979,7 @@ TEST(DirectSessionTest, TestDirectSessionRunClose) {
// Construct a graph with a variable and a single assign.
Graph g(OpRegistry::Global());
Tensor t(DT_FLOAT, TensorShape({}));
- t.scalar<float>()() = {1.2};
+ t.scalar<float>()() = {1.2f};
Node* var_val = test::graph::Constant(&g, t);
Node* var = test::graph::Var(&g, DT_FLOAT, {});
Node* var_assign = test::graph::Assign(&g, var, var_val);
@@ -1063,7 +1063,7 @@ TEST(DirectSessionTest, TestDirectSessionReset) {
// Construct a graph with a variable and a single assign.
Graph g(OpRegistry::Global());
Tensor t(DT_FLOAT, TensorShape({}));
- t.scalar<float>()() = {1.2};
+ t.scalar<float>()() = {1.2f};
Node* var_val = test::graph::Constant(&g, t);
Node* var = test::graph::Var(&g, DT_FLOAT, {});
Node* var_assign = test::graph::Assign(&g, var, var_val);
diff --git a/tensorflow/core/common_runtime/executor.h b/tensorflow/core/common_runtime/executor.h
index 8cca22fb6f..239c9666e3 100644
--- a/tensorflow/core/common_runtime/executor.h
+++ b/tensorflow/core/common_runtime/executor.h
@@ -39,7 +39,7 @@ class StepStatsCollector;
// Rendezvous* rendezvous = NewNaiveRendezvous();
// TF_CHECK_OK(rendezvous->Send("input", some_input_tensor));
// TF_CHECK_OK(executor->Run({ExecutorOpts, rendezvous, nullptr}));
-// TF_CHECK_OK(rendezvous->Recv("input", &output_tensor));
+// TF_CHECK_OK(rendezvous->Recv("output", &output_tensor));
// ... ...
//
// Multiple threads can call Executor::Run concurrently.
diff --git a/tensorflow/core/common_runtime/sycl/sycl_allocator.cc b/tensorflow/core/common_runtime/sycl/sycl_allocator.cc
index 175b784825..699b54f345 100644
--- a/tensorflow/core/common_runtime/sycl/sycl_allocator.cc
+++ b/tensorflow/core/common_runtime/sycl/sycl_allocator.cc
@@ -19,16 +19,26 @@ limitations under the License.
namespace tensorflow {
-SYCLAllocator::~SYCLAllocator() { }
+SYCLAllocator::~SYCLAllocator() {}
string SYCLAllocator::Name() { return "device:SYCL"; }
void *SYCLAllocator::AllocateRaw(size_t alignment, size_t num_bytes) {
+ assert(device_);
auto p = device_->allocate(num_bytes);
return p;
}
-void SYCLAllocator::DeallocateRaw(void *ptr) { device_->deallocate(ptr); }
+void SYCLAllocator::DeallocateRaw(void *ptr) {
+ if (device_) {
+ device_->deallocate(ptr);
+ }
+}
+
+void SYCLAllocator::EnterLameDuckMode() {
+ device_->deallocate_all();
+ device_ = nullptr;
+}
} // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/sycl/sycl_allocator.h b/tensorflow/core/common_runtime/sycl/sycl_allocator.h
index 887c727f6e..8558b6c873 100644
--- a/tensorflow/core/common_runtime/sycl/sycl_allocator.h
+++ b/tensorflow/core/common_runtime/sycl/sycl_allocator.h
@@ -29,14 +29,16 @@ namespace tensorflow {
class SYCLAllocator : public Allocator {
public:
- SYCLAllocator(Eigen::SyclDevice* device) : device_(device) {}
+ SYCLAllocator(Eigen::QueueInterface* device) : device_(device) {}
virtual ~SYCLAllocator() override;
string Name() override;
void *AllocateRaw(size_t alignment, size_t num_bytes) override;
void DeallocateRaw(void *ptr) override;
+ void EnterLameDuckMode();
+ virtual bool ShouldAllocateEmptyTensors() override final { return true; }
private:
- Eigen::SyclDevice *device_; // not owned
+ Eigen::QueueInterface *device_; // not owned
TF_DISALLOW_COPY_AND_ASSIGN(SYCLAllocator);
};
diff --git a/tensorflow/core/common_runtime/sycl/sycl_device.cc b/tensorflow/core/common_runtime/sycl/sycl_device.cc
index 10a037c02d..e5fe85bcf5 100644
--- a/tensorflow/core/common_runtime/sycl/sycl_device.cc
+++ b/tensorflow/core/common_runtime/sycl/sycl_device.cc
@@ -25,8 +25,9 @@ namespace tensorflow {
SYCLDevice::~SYCLDevice() {
device_context_->Unref();
- delete sycl_allocator_;
+ sycl_allocator_->EnterLameDuckMode();
delete sycl_device_;
+ delete sycl_queue_;
}
void SYCLDevice::Compute(OpKernel *op_kernel, OpKernelContext *context) {
@@ -50,12 +51,8 @@ Allocator *SYCLDevice::GetAllocator(AllocatorAttributes attr) {
Status SYCLDevice::MakeTensorFromProto(const TensorProto &tensor_proto,
const AllocatorAttributes alloc_attrs,
Tensor *tensor) {
- AllocatorAttributes attr;
- attr.set_on_host(true);
- attr.set_gpu_compatible(true);
- Allocator *host_alloc = GetAllocator(attr);
Tensor parsed(tensor_proto.dtype());
- if (!parsed.FromProto(host_alloc, tensor_proto)) {
+ if (!parsed.FromProto(cpu_allocator_, tensor_proto)) {
return errors::InvalidArgument("Cannot parse tensor from proto: ",
tensor_proto.DebugString());
}
@@ -86,6 +83,12 @@ Status SYCLDevice::FillContextMap(const Graph *graph,
return Status::OK();
}
+Status SYCLDevice::Sync() {
+ sycl_device_->synchronize();
+ return Status::OK();
+}
+
+
} // namespace tensorflow
#endif // TENSORFLOW_USE_SYCL
diff --git a/tensorflow/core/common_runtime/sycl/sycl_device.h b/tensorflow/core/common_runtime/sycl/sycl_device.h
index d3b3db2a71..2759053df5 100644
--- a/tensorflow/core/common_runtime/sycl/sycl_device.h
+++ b/tensorflow/core/common_runtime/sycl/sycl_device.h
@@ -22,7 +22,6 @@ limitations under the License.
#define EIGEN_USE_SYCL
-#include "tensorflow/core/common_runtime/device_factory.h"
#include "tensorflow/core/common_runtime/local_device.h"
#include "tensorflow/core/common_runtime/sycl/sycl_allocator.h"
#include "tensorflow/core/common_runtime/sycl/sycl_device_context.h"
@@ -30,7 +29,6 @@ limitations under the License.
namespace tensorflow {
-
class SYCLDevice : public LocalDevice {
public:
template <typename SYCLSelector>
@@ -42,8 +40,9 @@ public:
name, DEVICE_SYCL, memory_limit, locality,
physical_device_desc), nullptr),
cpu_allocator_(cpu_allocator),
- sycl_device_(new Eigen::SyclDevice(sycl_selector)),
- sycl_allocator_(new SYCLAllocator(sycl_device_)),
+ sycl_queue_(new Eigen::QueueInterface(sycl_selector)),
+ sycl_device_(new Eigen::SyclDevice(sycl_queue_)),
+ sycl_allocator_(new SYCLAllocator(sycl_queue_)),
device_context_(new SYCLDeviceContext()) {
set_eigen_sycl_device(sycl_device_);
}
@@ -59,16 +58,17 @@ public:
Status FillContextMap(const Graph *graph,
DeviceContextMap *device_context_map) override;
- Status Sync() override { return Status::OK(); }
+ Status Sync() override;
static string GetShortDeviceDescription(/*int device_id,
const DeviceDescription& desc*/) {
return strings::StrCat("device: 0, name SYCL, pci bus id: 0");
}
private:
- Allocator *cpu_allocator_; // owned
- Eigen::SyclDevice* sycl_device_; // owned
- SYCLAllocator *sycl_allocator_; // owned
+ Allocator *cpu_allocator_; // owned
+ Eigen::QueueInterface* sycl_queue_; // owned
+ Eigen::SyclDevice* sycl_device_; // owned
+ SYCLAllocator *sycl_allocator_; // owned
SYCLDeviceContext *device_context_;
};
diff --git a/tensorflow/core/common_runtime/sycl/sycl_device_context.cc b/tensorflow/core/common_runtime/sycl/sycl_device_context.cc
index 9dd289bebd..b487d24c20 100644
--- a/tensorflow/core/common_runtime/sycl/sycl_device_context.cc
+++ b/tensorflow/core/common_runtime/sycl/sycl_device_context.cc
@@ -16,13 +16,11 @@ limitations under the License.
#if TENSORFLOW_USE_SYCL
#define EIGEN_USE_SYCL
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/common_runtime/sycl/sycl_device_context.h"
#include "tensorflow/core/common_runtime/dma_helper.h"
-#define EIGEN_USE_SYCL
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-
namespace tensorflow {
void SYCLDeviceContext::CopyCPUTensorToDevice(const Tensor *cpu_tensor,
@@ -108,7 +106,6 @@ void SYCLDeviceContext::CopyDeviceTensorToCPU(const Tensor *device_tensor,
StatusCallback done) {
const int64 total_bytes = device_tensor->TotalBytes();
if (total_bytes > 0) {
- device->eigen_sycl_device()->deallocate_all();
const void* src_ptr = DMAHelper::base(device_tensor);
void* dst_ptr = DMAHelper::base(cpu_tensor);
switch (device_tensor->dtype()) {
diff --git a/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc b/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc
index 9b8770420c..cf9e349e01 100644
--- a/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc
+++ b/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc
@@ -15,6 +15,7 @@ limitations under the License.
#if TENSORFLOW_USE_SYCL
+#include "tensorflow/core/common_runtime/device_factory.h"
#include "tensorflow/core/common_runtime/sycl/sycl_device.h"
namespace tensorflow {
diff --git a/tensorflow/core/debug/BUILD b/tensorflow/core/debug/BUILD
index 2363b69390..3e4ab5bc17 100644
--- a/tensorflow/core/debug/BUILD
+++ b/tensorflow/core/debug/BUILD
@@ -36,7 +36,7 @@ tf_proto_library_cc(
has_services = 1,
cc_api_version = 2,
cc_grpc_version = 1,
- cc_libs = ["//tensorflow/core:protos_all_cc"],
+ protodeps = ["//tensorflow/core:protos_all"],
)
# Depending on this target causes a concrete DebuggerState implementation
diff --git a/tensorflow/core/debug/debug_gateway_test.cc b/tensorflow/core/debug/debug_gateway_test.cc
index 1f6e766663..963cea8419 100644
--- a/tensorflow/core/debug/debug_gateway_test.cc
+++ b/tensorflow/core/debug/debug_gateway_test.cc
@@ -372,9 +372,9 @@ TEST_F(SessionDebugMinusAXTest,
debug_gateway.SetNodeValueCallback(
[this, &mu, &val_callback_count, &a_debug_identity_node_name,
&x_debug_identity_node_name, &y_debug_identity_node_name,
- &debug_identity_tensor_vals,
- &callbacks_done](const string& node_name, const int output_slot,
- const Tensor& tensor_value, const bool is_ref) {
+ &debug_identity_tensor_vals, &callbacks_done, &kConcurrentRuns](
+ const string& node_name, const int output_slot,
+ const Tensor& tensor_value, const bool is_ref) {
mutex_lock l(mu);
if (node_name == a_debug_identity_node_name && output_slot == 0) {
diff --git a/tensorflow/core/debug/debug_io_utils.cc b/tensorflow/core/debug/debug_io_utils.cc
index 41868ce8da..4b5ecaa9b6 100644
--- a/tensorflow/core/debug/debug_io_utils.cc
+++ b/tensorflow/core/debug/debug_io_utils.cc
@@ -18,6 +18,12 @@ limitations under the License.
#include <vector>
#include "grpc++/create_channel.h"
+
+#if defined(PLATFORM_WINDOWS)
+// winsock2.h is used in grpc, so Ws2_32.lib is needed
+#pragma comment(lib,"Ws2_32.lib")
+#endif
+
#include "tensorflow/core/debug/debug_service.grpc.pb.h"
#include "tensorflow/core/framework/summary.pb.h"
#include "tensorflow/core/lib/io/path.h"
diff --git a/tensorflow/core/debug/debug_io_utils_test.cc b/tensorflow/core/debug/debug_io_utils_test.cc
index 1ddab1689b..ab020517b0 100644
--- a/tensorflow/core/debug/debug_io_utils_test.cc
+++ b/tensorflow/core/debug/debug_io_utils_test.cc
@@ -273,7 +273,8 @@ TEST_F(DebugIOUtilsTest, PublishTensorConcurrentlyToPartiallyOverlappingPaths) {
auto fn = [this, &dump_count, &done_count, &mu, &dump_root_base, &dump_roots,
&dump_file_paths, &wall_time, &tensor_name, &debug_node_name,
- &kNodeName, &kDebugOpName, &kConcurrentPubs, &all_done]() {
+ &kNodeName, &kDebugOpName, &kConcurrentPubs, &kOutputSlot,
+ &all_done]() {
// "gumpy" is the shared directory part of the path.
string dump_root;
string debug_url;
diff --git a/tensorflow/core/framework/partial_tensor_shape_test.cc b/tensorflow/core/framework/partial_tensor_shape_test.cc
index b008a93c03..23f3d908fb 100644
--- a/tensorflow/core/framework/partial_tensor_shape_test.cc
+++ b/tensorflow/core/framework/partial_tensor_shape_test.cc
@@ -220,7 +220,7 @@ TEST(PartialTensorShapeTest, PartialShapeMergeWith) {
TEST(PartialTensorShapeTest, MakePartialShapeEmpty) {
// Empty made partial shapes should still be fully defined
- const int64 dims[0] = {};
+ const int64 dims[1] = {};
PartialTensorShape shape;
EXPECT_FALSE(shape.IsFullyDefined());
TF_ASSERT_OK(PartialTensorShape::MakePartialShape(dims, 0, &shape));
diff --git a/tensorflow/core/framework/tensor_testutil.h b/tensorflow/core/framework/tensor_testutil.h
index 73afca40ac..29b9de5c07 100644
--- a/tensorflow/core/framework/tensor_testutil.h
+++ b/tensorflow/core/framework/tensor_testutil.h
@@ -16,6 +16,8 @@ limitations under the License.
#ifndef TENSORFLOW_FRAMEWORK_TENSOR_TESTUTIL_H_
#define TENSORFLOW_FRAMEWORK_TENSOR_TESTUTIL_H_
+#include <numeric>
+
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/lib/gtl/array_slice.h"
#include "tensorflow/core/platform/logging.h"
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 2315c2ffb6..e99ed9dfa8 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -2342,7 +2342,6 @@ cc_library(
":batch_norm_op",
":bias_op",
":conv_ops",
- ":depthwise_conv_grad_op",
":dilation_ops",
":fused_batch_norm_op",
":in_topk_op",
@@ -2354,7 +2353,10 @@ cc_library(
":softsign_op",
":topk_op",
":xent_op",
- ] + if_not_windows([":depthwise_conv_op"]),
+ ] + if_not_windows([
+ ":depthwise_conv_grad_op",
+ ":depthwise_conv_op",
+ ]),
)
NN_DEPS = [
diff --git a/tensorflow/core/kernels/adjust_contrast_op_test.cc b/tensorflow/core/kernels/adjust_contrast_op_test.cc
index b925dc6883..06fd7ca419 100644
--- a/tensorflow/core/kernels/adjust_contrast_op_test.cc
+++ b/tensorflow/core/kernels/adjust_contrast_op_test.cc
@@ -56,7 +56,7 @@ TEST_F(AdjustContrastOpTest, Simple_1223) {
TF_EXPECT_OK(InitOp());
AddInputFromArray<float>(TensorShape({1, 2, 2, 3}),
{1, 5, 9, 2, 6, 10, 3, 7, 11, 4, 8, 12});
- AddInputFromArray<float>(TensorShape({}), {0.2});
+ AddInputFromArray<float>(TensorShape({}), {0.2f});
TF_ASSERT_OK(RunOpKernel());
Tensor expected(allocator(), DT_FLOAT, TensorShape({1, 2, 2, 3}));
@@ -78,7 +78,7 @@ TEST_F(AdjustContrastOpTest, Big_99x99x3) {
}
AddInputFromArray<float>(TensorShape({1, 99, 99, 3}), values);
- AddInputFromArray<float>(TensorShape({}), {0.2});
+ AddInputFromArray<float>(TensorShape({}), {0.2f});
TF_ASSERT_OK(RunOpKernel());
}
diff --git a/tensorflow/core/kernels/batch_norm_op_test.cc b/tensorflow/core/kernels/batch_norm_op_test.cc
index 746b0d46ad..c5e55346eb 100644
--- a/tensorflow/core/kernels/batch_norm_op_test.cc
+++ b/tensorflow/core/kernels/batch_norm_op_test.cc
@@ -47,15 +47,15 @@ TEST_F(BatchNormOpTest, Simple) {
AddInputFromArray<float>(TensorShape({1, 1, 6, 2}),
{1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6});
AddInputFromArray<float>(TensorShape({2}), {10, 20});
- AddInputFromArray<float>(TensorShape({2}), {0.25, 0.5});
- AddInputFromArray<float>(TensorShape({2}), {0.1, 0.6});
- AddInputFromArray<float>(TensorShape({2}), {0.0, 0.0});
+ AddInputFromArray<float>(TensorShape({2}), {0.25f, 0.5f});
+ AddInputFromArray<float>(TensorShape({2}), {0.1f, 0.6f});
+ AddInputFromArray<float>(TensorShape({2}), {0.0f, 0.0f});
TF_ASSERT_OK(RunOpKernel());
Tensor expected(allocator(), DT_FLOAT, TensorShape({1, 1, 6, 2}));
test::FillValues<float>(
- &expected, {-17.86, -22.00, -15.87, -20.59, -13.87, -19.18, -21.86,
- -33.31, -23.85, -34.72, -25.85, -36.13});
+ &expected, {-17.86f, -22.00f, -15.87f, -20.59f, -13.87f, -19.18f, -21.86f,
+ -33.31f, -23.85f, -34.72f, -25.85f, -36.13f });
test::ExpectTensorNear<float>(expected, *GetOutput(0), 0.01);
}
diff --git a/tensorflow/core/kernels/cast_op_test.cc b/tensorflow/core/kernels/cast_op_test.cc
index ffad7fd02e..5b7529bb8a 100644
--- a/tensorflow/core/kernels/cast_op_test.cc
+++ b/tensorflow/core/kernels/cast_op_test.cc
@@ -49,17 +49,18 @@ class CastOpTest : public OpsTestBase {
TF_EXPECT_OK(InitOp());
}
- template <typename IN, typename OUT>
+ template <typename INPUT, typename OUTPUT>
void CheckCast() {
- DataType in_type = DataTypeToEnum<IN>::v();
- DataType out_type = DataTypeToEnum<OUT>::v();
+ DataType in_type = DataTypeToEnum<INPUT>::v();
+ DataType out_type = DataTypeToEnum<OUTPUT>::v();
MakeOp(in_type, out_type);
- AddInputFromArray<IN>(TensorShape({1, 2, 2, 1}),
- {IN(1), IN(2), IN(3), IN(4)});
+ AddInputFromArray<INPUT>(TensorShape({1, 2, 2, 1}),
+ {INPUT(1), INPUT(2), INPUT(3), INPUT(4)});
TF_ASSERT_OK(RunOpKernel());
Tensor expected(allocator(), out_type, TensorShape({1, 2, 2, 1}));
- test::FillValues<OUT>(&expected, {OUT(1), OUT(2), OUT(3), OUT(4)});
- test::ExpectTensorEqual<OUT>(expected, *GetOutput(0));
+ test::FillValues<OUTPUT>(&expected,
+ {OUTPUT(1), OUTPUT(2), OUTPUT(3), OUTPUT(4)});
+ test::ExpectTensorEqual<OUTPUT>(expected, *GetOutput(0));
}
};
diff --git a/tensorflow/core/kernels/colorspace_op_test.cc b/tensorflow/core/kernels/colorspace_op_test.cc
index 4719a59b63..943d25a975 100644
--- a/tensorflow/core/kernels/colorspace_op_test.cc
+++ b/tensorflow/core/kernels/colorspace_op_test.cc
@@ -71,7 +71,7 @@ class RGBToHSVOpTest : public OpsTestBase {
void CheckRedMax(DataType data_type) {
// Test case where red channel dominates
- AddInputFromArray<T>(TensorShape({3}), {.8, .4, .2});
+ AddInputFromArray<T>(TensorShape({3}), {.8f, .4f, .2f});
TF_ASSERT_OK(RunOpKernel());
T expected_h = 1. / 6. * .2 / .6;
@@ -85,7 +85,7 @@ class RGBToHSVOpTest : public OpsTestBase {
void CheckGreenMax(DataType data_type) {
// Test case where green channel dominates
- AddInputFromArray<T>(TensorShape({3}), {.2, .8, .4});
+ AddInputFromArray<T>(TensorShape({3}), {.2f, .8f, .4f});
TF_ASSERT_OK(RunOpKernel());
T expected_h = 1. / 6. * (2.0 + (.2 / .6));
@@ -99,7 +99,7 @@ class RGBToHSVOpTest : public OpsTestBase {
void CheckBlueMax(DataType data_type) {
// Test case where blue channel dominates
- AddInputFromArray<T>(TensorShape({3}), {.4, .2, .8});
+ AddInputFromArray<T>(TensorShape({3}), {.4f, .2f, .8f});
TF_ASSERT_OK(RunOpKernel());
T expected_h = 1. / 6. * (4.0 + (.2 / .6));
@@ -112,7 +112,7 @@ class RGBToHSVOpTest : public OpsTestBase {
}
void CheckNegativeDifference(DataType data_type) {
- AddInputFromArray<T>(TensorShape({3}), {0, .1, .2});
+ AddInputFromArray<T>(TensorShape({3}), {0, .1f, .2f});
TF_ASSERT_OK(RunOpKernel());
T expected_h = 1. / 6. * (4.0 + (-.1 / .2));
@@ -220,7 +220,7 @@ class HSVToRGBOpTest : public OpsTestBase {
TF_ASSERT_OK(RunOpKernel());
Tensor expected(allocator(), data_type, TensorShape({3}));
- test::FillValues<T>(&expected, {0, .1, .2});
+ test::FillValues<T>(&expected, {0, .1f, .2f});
test::ExpectTensorNear<T>(expected, *GetOutput(0), 1e-6);
}
};
diff --git a/tensorflow/core/kernels/control_flow_ops.cc b/tensorflow/core/kernels/control_flow_ops.cc
index e92b11efc6..b01263f288 100644
--- a/tensorflow/core/kernels/control_flow_ops.cc
+++ b/tensorflow/core/kernels/control_flow_ops.cc
@@ -113,9 +113,12 @@ REGISTER_GPU_HOST_REF_KERNEL(string);
#undef REGISTER_GPU_HOST_REF_KERNEL
#if TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(type) \
- REGISTER_KERNEL_BUILDER( \
- Name("Switch").Device(DEVICE_SYCL).TypeConstraint<type>("T"), SwitchOp)
+#define REGISTER_SYCL_KERNEL(type) \
+ REGISTER_KERNEL_BUILDER(Name("Switch") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<type>("T") \
+ .HostMemory("pred"), \
+ SwitchOp)
REGISTER_SYCL_KERNEL(bool);
TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
#undef REGISTER_SYCL_KERNEL
@@ -219,9 +222,12 @@ REGISTER_GPU_REF_KERNEL(bool);
#undef REGISTER_GPU_REF_KERNEL
#if TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(type) \
- REGISTER_KERNEL_BUILDER( \
- Name("Merge").Device(DEVICE_SYCL).TypeConstraint<type>("T"), MergeOp)
+#define REGISTER_SYCL_KERNEL(type) \
+ REGISTER_KERNEL_BUILDER(Name("Merge") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<type>("T") \
+ .HostMemory("value_index"), \
+ MergeOp)
REGISTER_SYCL_KERNEL(bool);
TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
#undef REGISTER_SYCL_KERNEL
@@ -418,8 +424,12 @@ REGISTER_GPU_HOST_KERNEL(string);
#if TENSORFLOW_USE_SYCL
#define REGISTER_SYCL_KERNEL(type) \
- REGISTER_KERNEL_BUILDER( \
- Name("NextIteration").Device(DEVICE_SYCL).TypeConstraint<type>("T"), NextIterationOp)
+ REGISTER_KERNEL_BUILDER(Name("NextIteration") \
+ .Device(DEVICE_SYCL) \
+ .HostMemory("data") \
+ .HostMemory("output") \
+ .TypeConstraint<type>("T"), \
+ NextIterationOp)
REGISTER_SYCL_KERNEL(bool);
TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
#undef REGISTER_SYCL_KERNEL
diff --git a/tensorflow/core/kernels/control_flow_ops_test.cc b/tensorflow/core/kernels/control_flow_ops_test.cc
index 97f56c392a..301609e04d 100644
--- a/tensorflow/core/kernels/control_flow_ops_test.cc
+++ b/tensorflow/core/kernels/control_flow_ops_test.cc
@@ -85,13 +85,27 @@ class AbortOpTest : public OpsTestBase {
protected:
};
+#ifdef PLATFORM_WINDOWS
+#define SIGABRT 3
+
+class KilledBySignal {
+ public:
+ explicit KilledBySignal(int signum) : signum_(signum) {}
+ bool operator()(int exit_status) const { return exit_status == signum_; }
+ private:
+ const int signum_;
+};
+#else
+#define KilledBySignal ::testing::KilledBySignal
+#endif
+
// Pass an error message to the op.
TEST_F(AbortOpTest, pass_error_msg) {
TF_ASSERT_OK(NodeDefBuilder("abort_op", "Abort")
.Attr("error_msg", "abort_op_test")
.Finalize(node_def()));
TF_ASSERT_OK(InitOp());
- EXPECT_EXIT(RunOpKernel(), ::testing::KilledBySignal(SIGABRT),
+ EXPECT_EXIT(RunOpKernel(), KilledBySignal(SIGABRT),
"Abort_op intentional failure; abort_op_test");
}
@@ -99,7 +113,7 @@ TEST_F(AbortOpTest, pass_error_msg) {
TEST_F(AbortOpTest, default_msg) {
TF_ASSERT_OK(NodeDefBuilder("abort_op", "Abort").Finalize(node_def()));
TF_ASSERT_OK(InitOp());
- EXPECT_EXIT(RunOpKernel(), ::testing::KilledBySignal(SIGABRT),
+ EXPECT_EXIT(RunOpKernel(), KilledBySignal(SIGABRT),
"Abort_op intentional failure; ");
}
diff --git a/tensorflow/core/kernels/cwise_op_abs.cc b/tensorflow/core/kernels/cwise_op_abs.cc
index fbc23b3b6f..8cf1eac41e 100644
--- a/tensorflow/core/kernels/cwise_op_abs.cc
+++ b/tensorflow/core/kernels/cwise_op_abs.cc
@@ -21,6 +21,18 @@ REGISTER5(UnaryOp, CPU, "Abs", functor::abs, float, Eigen::half, double, int32,
#if !defined(IS_MOBILE_PLATFORM)
REGISTER2(UnaryOp, CPU, "ComplexAbs", functor::abs, complex64, complex128);
#endif
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Abs") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("T"), \
+ UnaryOp<SYCLDevice, functor::abs<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
+
#if GOOGLE_CUDA
REGISTER4(UnaryOp, GPU, "Abs", functor::abs, float, Eigen::half, double, int64);
REGISTER2(UnaryOp, GPU, "ComplexAbs", functor::abs, complex64, complex128);
diff --git a/tensorflow/core/kernels/cwise_op_acos.cc b/tensorflow/core/kernels/cwise_op_acos.cc
index c44c8bc6f6..1d2d815027 100644
--- a/tensorflow/core/kernels/cwise_op_acos.cc
+++ b/tensorflow/core/kernels/cwise_op_acos.cc
@@ -17,6 +17,18 @@ limitations under the License.
namespace tensorflow {
REGISTER2(UnaryOp, CPU, "Acos", functor::acos, float, double);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Acos") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("T"), \
+ UnaryOp<SYCLDevice, functor::acos<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
+
#if GOOGLE_CUDA
REGISTER2(UnaryOp, GPU, "Acos", functor::acos, float, double);
#endif
diff --git a/tensorflow/core/kernels/cwise_op_add_1.cc b/tensorflow/core/kernels/cwise_op_add_1.cc
index 44c552d18e..a6bff78694 100644
--- a/tensorflow/core/kernels/cwise_op_add_1.cc
+++ b/tensorflow/core/kernels/cwise_op_add_1.cc
@@ -26,7 +26,7 @@ REGISTER5(BinaryOp, CPU, "Add", functor::add, float, Eigen::half, double, int32,
.Device(DEVICE_SYCL) \
.TypeConstraint<TYPE>("T"), \
BinaryOp<SYCLDevice, functor::add<TYPE>>);
-TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+ REGISTER_SYCL_KERNEL(float);
#undef REGISTER_SYCL_KERNEL
#endif // TENSORFLOW_USE_SYCL
diff --git a/tensorflow/core/kernels/cwise_op_asin.cc b/tensorflow/core/kernels/cwise_op_asin.cc
index bba20aa6af..92a22e90c4 100644
--- a/tensorflow/core/kernels/cwise_op_asin.cc
+++ b/tensorflow/core/kernels/cwise_op_asin.cc
@@ -17,6 +17,18 @@ limitations under the License.
namespace tensorflow {
REGISTER2(UnaryOp, CPU, "Asin", functor::asin, float, double);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Asin") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("T"), \
+ UnaryOp<SYCLDevice, functor::asin<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
+
#if GOOGLE_CUDA
REGISTER2(UnaryOp, GPU, "Asin", functor::asin, float, double);
#endif
diff --git a/tensorflow/core/kernels/cwise_op_atan.cc b/tensorflow/core/kernels/cwise_op_atan.cc
index 055b8289d4..825e85283f 100644
--- a/tensorflow/core/kernels/cwise_op_atan.cc
+++ b/tensorflow/core/kernels/cwise_op_atan.cc
@@ -17,6 +17,18 @@ limitations under the License.
namespace tensorflow {
REGISTER2(UnaryOp, CPU, "Atan", functor::atan, float, double);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Atan") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("T"), \
+ UnaryOp<SYCLDevice, functor::atan<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
+
#if GOOGLE_CUDA
REGISTER2(UnaryOp, GPU, "Atan", functor::atan, float, double);
#endif
diff --git a/tensorflow/core/kernels/cwise_op_ceil.cc b/tensorflow/core/kernels/cwise_op_ceil.cc
index 08ac1b4194..c5a4aaf831 100644
--- a/tensorflow/core/kernels/cwise_op_ceil.cc
+++ b/tensorflow/core/kernels/cwise_op_ceil.cc
@@ -17,6 +17,18 @@ limitations under the License.
namespace tensorflow {
REGISTER3(UnaryOp, CPU, "Ceil", functor::ceil, float, Eigen::half, double);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Ceil") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("T"), \
+ UnaryOp<SYCLDevice, functor::ceil<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
+
#if GOOGLE_CUDA
REGISTER3(UnaryOp, GPU, "Ceil", functor::ceil, float, Eigen::half, double);
#endif
diff --git a/tensorflow/core/kernels/cwise_op_cos.cc b/tensorflow/core/kernels/cwise_op_cos.cc
index 2680143d65..a758da5842 100644
--- a/tensorflow/core/kernels/cwise_op_cos.cc
+++ b/tensorflow/core/kernels/cwise_op_cos.cc
@@ -18,6 +18,18 @@ limitations under the License.
namespace tensorflow {
REGISTER5(UnaryOp, CPU, "Cos", functor::cos, float, Eigen::half, double,
complex64, complex128);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Cos") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("T"), \
+ UnaryOp<SYCLDevice, functor::cos<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
+
#if GOOGLE_CUDA
REGISTER3(UnaryOp, GPU, "Cos", functor::cos, float, Eigen::half, double);
#endif
diff --git a/tensorflow/core/kernels/cwise_op_div.cc b/tensorflow/core/kernels/cwise_op_div.cc
index c2b05a69b2..ef8c477e48 100644
--- a/tensorflow/core/kernels/cwise_op_div.cc
+++ b/tensorflow/core/kernels/cwise_op_div.cc
@@ -30,6 +30,11 @@ REGISTER5(BinaryOp, CPU, "RealDiv", functor::div, float, Eigen::half, double,
Name("Div") \
.Device(DEVICE_SYCL) \
.TypeConstraint<TYPE>("T"), \
+ BinaryOp<SYCLDevice, functor::div<TYPE>>); \
+ REGISTER_KERNEL_BUILDER( \
+ Name("RealDiv") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("T"), \
BinaryOp<SYCLDevice, functor::div<TYPE>>);
REGISTER_SYCL_KERNEL(float)
#undef REGISTER_SYCL_KERNEL
diff --git a/tensorflow/core/kernels/cwise_op_exp.cc b/tensorflow/core/kernels/cwise_op_exp.cc
index 7ec3526282..0ee47f7dee 100644
--- a/tensorflow/core/kernels/cwise_op_exp.cc
+++ b/tensorflow/core/kernels/cwise_op_exp.cc
@@ -18,6 +18,18 @@ limitations under the License.
namespace tensorflow {
REGISTER5(UnaryOp, CPU, "Exp", functor::exp, float, Eigen::half, double,
complex64, complex128);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Exp") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("T"), \
+ UnaryOp<SYCLDevice, functor::exp<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
+
#if GOOGLE_CUDA
REGISTER3(UnaryOp, GPU, "Exp", functor::exp, float, Eigen::half, double);
#endif
diff --git a/tensorflow/core/kernels/cwise_op_floor.cc b/tensorflow/core/kernels/cwise_op_floor.cc
index 732087d4cb..129d754b82 100644
--- a/tensorflow/core/kernels/cwise_op_floor.cc
+++ b/tensorflow/core/kernels/cwise_op_floor.cc
@@ -17,6 +17,18 @@ limitations under the License.
namespace tensorflow {
REGISTER3(UnaryOp, CPU, "Floor", functor::floor, float, Eigen::half, double);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Floor") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("T"), \
+ UnaryOp<SYCLDevice, functor::floor<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
+
#if GOOGLE_CUDA
REGISTER3(UnaryOp, GPU, "Floor", functor::floor, float, Eigen::half, double);
#endif
diff --git a/tensorflow/core/kernels/cwise_op_floor_div.cc b/tensorflow/core/kernels/cwise_op_floor_div.cc
index a5767476c3..69dbb70b83 100644
--- a/tensorflow/core/kernels/cwise_op_floor_div.cc
+++ b/tensorflow/core/kernels/cwise_op_floor_div.cc
@@ -18,6 +18,9 @@ limitations under the License.
namespace tensorflow {
REGISTER5(BinaryOp, CPU, "FloorDiv", functor::safe_floor_div, uint8, uint16,
int16, int32, int64);
+REGISTER3(BinaryOp, CPU, "FloorDiv", functor::floor_div_real, float,
+ Eigen::half, double);
+
#if TENSORFLOW_USE_SYCL
#define REGISTER_SYCL_KERNEL(TYPE) \
REGISTER_KERNEL_BUILDER( \
@@ -25,11 +28,10 @@ REGISTER5(BinaryOp, CPU, "FloorDiv", functor::safe_floor_div, uint8, uint16,
.Device(DEVICE_SYCL) \
.TypeConstraint<TYPE>("T"), \
BinaryOp<SYCLDevice, functor::floor_div<TYPE>>);
-TF_CALL_INTEGRAL_TYPES(REGISTER_SYCL_KERNEL);
+REGISTER_SYCL_KERNEL(float)
#undef REGISTER_SYCL_KERNEL
#endif // TENSORFLOW_USE_SYCL
-REGISTER3(BinaryOp, CPU, "FloorDiv", functor::floor_div_real, float,
- Eigen::half, double);
+
#if GOOGLE_CUDA
REGISTER4(BinaryOp, GPU, "FloorDiv", functor::floor_div, uint8, uint16, int16,
int64);
diff --git a/tensorflow/core/kernels/cwise_op_isfinite.cc b/tensorflow/core/kernels/cwise_op_isfinite.cc
index e38b271318..59976141c7 100644
--- a/tensorflow/core/kernels/cwise_op_isfinite.cc
+++ b/tensorflow/core/kernels/cwise_op_isfinite.cc
@@ -18,6 +18,7 @@ limitations under the License.
namespace tensorflow {
REGISTER3(UnaryOp, CPU, "IsFinite", functor::isfinite, float, Eigen::half,
double);
+
#if TENSORFLOW_USE_SYCL
#define REGISTER_SYCL_KERNEL(TYPE) \
REGISTER_KERNEL_BUILDER( \
@@ -25,9 +26,10 @@ REGISTER3(UnaryOp, CPU, "IsFinite", functor::isfinite, float, Eigen::half,
.Device(DEVICE_SYCL) \
.TypeConstraint<TYPE>("T"), \
UnaryOp<SYCLDevice, functor::isfinite<TYPE>>);
-TF_CALL_REAL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+REGISTER_SYCL_KERNEL(float);
#undef REGISTER_SYCL_KERNEL
#endif // TENSORFLOW_USE_SYCL
+
#if GOOGLE_CUDA
REGISTER3(UnaryOp, GPU, "IsFinite", functor::isfinite, float, Eigen::half,
double);
diff --git a/tensorflow/core/kernels/cwise_op_isinf.cc b/tensorflow/core/kernels/cwise_op_isinf.cc
index bf056dbe0e..675cb95b95 100644
--- a/tensorflow/core/kernels/cwise_op_isinf.cc
+++ b/tensorflow/core/kernels/cwise_op_isinf.cc
@@ -17,6 +17,7 @@ limitations under the License.
namespace tensorflow {
REGISTER3(UnaryOp, CPU, "IsInf", functor::isinf, float, Eigen::half, double);
+
#if TENSORFLOW_USE_SYCL
#define REGISTER_SYCL_KERNEL(TYPE) \
REGISTER_KERNEL_BUILDER( \
@@ -24,9 +25,10 @@ REGISTER3(UnaryOp, CPU, "IsInf", functor::isinf, float, Eigen::half, double);
.Device(DEVICE_SYCL) \
.TypeConstraint<TYPE>("T"), \
UnaryOp<SYCLDevice, functor::isinf<TYPE>>);
-TF_CALL_REAL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+REGISTER_SYCL_KERNEL(float);
#undef REGISTER_SYCL_KERNEL
#endif // TENSORFLOW_USE_SYCL
+
#if GOOGLE_CUDA
REGISTER3(UnaryOp, GPU, "IsInf", functor::isinf, float, Eigen::half, double);
#endif
diff --git a/tensorflow/core/kernels/cwise_op_isnan.cc b/tensorflow/core/kernels/cwise_op_isnan.cc
index d2bac23882..c394087ed8 100644
--- a/tensorflow/core/kernels/cwise_op_isnan.cc
+++ b/tensorflow/core/kernels/cwise_op_isnan.cc
@@ -17,6 +17,7 @@ limitations under the License.
namespace tensorflow {
REGISTER3(UnaryOp, CPU, "IsNan", functor::isnan, float, Eigen::half, double);
+
#if TENSORFLOW_USE_SYCL
#define REGISTER_SYCL_KERNEL(TYPE) \
REGISTER_KERNEL_BUILDER( \
@@ -24,9 +25,10 @@ REGISTER3(UnaryOp, CPU, "IsNan", functor::isnan, float, Eigen::half, double);
.Device(DEVICE_SYCL) \
.TypeConstraint<TYPE>("T"), \
UnaryOp<SYCLDevice, functor::isnan<TYPE>>);
-TF_CALL_REAL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+REGISTER_SYCL_KERNEL(float);
#undef REGISTER_SYCL_KERNEL
#endif // TENSORFLOW_USE_SYCL
+
#if GOOGLE_CUDA
REGISTER3(UnaryOp, GPU, "IsNan", functor::isnan, float, Eigen::half, double);
#endif
diff --git a/tensorflow/core/kernels/cwise_op_log.cc b/tensorflow/core/kernels/cwise_op_log.cc
index be184f03de..71c4588b3d 100644
--- a/tensorflow/core/kernels/cwise_op_log.cc
+++ b/tensorflow/core/kernels/cwise_op_log.cc
@@ -18,6 +18,18 @@ limitations under the License.
namespace tensorflow {
REGISTER5(UnaryOp, CPU, "Log", functor::log, float, Eigen::half, double,
complex64, complex128);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Log") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("T"), \
+ UnaryOp<SYCLDevice, functor::log<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
+
#if GOOGLE_CUDA
REGISTER3(UnaryOp, GPU, "Log", functor::log, float, Eigen::half, double);
#endif
diff --git a/tensorflow/core/kernels/cwise_op_log1p.cc b/tensorflow/core/kernels/cwise_op_log1p.cc
index 91a14989e6..03ea3a0a89 100644
--- a/tensorflow/core/kernels/cwise_op_log1p.cc
+++ b/tensorflow/core/kernels/cwise_op_log1p.cc
@@ -18,6 +18,18 @@ limitations under the License.
namespace tensorflow {
REGISTER5(UnaryOp, CPU, "Log1p", functor::log1p, float, Eigen::half, double,
complex64, complex128);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Log1p") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("T"), \
+ UnaryOp<SYCLDevice, functor::log1p<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
+
#if GOOGLE_CUDA
REGISTER3(UnaryOp, GPU, "Log1p", functor::log1p, float, Eigen::half, double);
#endif
diff --git a/tensorflow/core/kernels/cwise_op_neg.cc b/tensorflow/core/kernels/cwise_op_neg.cc
index 67b088e110..4221fc0710 100644
--- a/tensorflow/core/kernels/cwise_op_neg.cc
+++ b/tensorflow/core/kernels/cwise_op_neg.cc
@@ -18,6 +18,18 @@ limitations under the License.
namespace tensorflow {
REGISTER7(UnaryOp, CPU, "Neg", functor::neg, float, Eigen::half, double, int32,
complex64, int64, complex128);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Neg") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("T"), \
+ UnaryOp<SYCLDevice, functor::neg<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
+
#if GOOGLE_CUDA
REGISTER4(UnaryOp, GPU, "Neg", functor::neg, float, Eigen::half, double, int64);
diff --git a/tensorflow/core/kernels/cwise_op_pow.cc b/tensorflow/core/kernels/cwise_op_pow.cc
index dd28b36519..8eeba6ab14 100644
--- a/tensorflow/core/kernels/cwise_op_pow.cc
+++ b/tensorflow/core/kernels/cwise_op_pow.cc
@@ -18,6 +18,18 @@ limitations under the License.
namespace tensorflow {
REGISTER7(BinaryOp, CPU, "Pow", functor::pow, float, Eigen::half, double, int32,
int64, complex64, complex128);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Pow") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("T"), \
+ BinaryOp<SYCLDevice, functor::pow<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
+
#if GOOGLE_CUDA
REGISTER4(BinaryOp, GPU, "Pow", functor::pow, float, Eigen::half, double,
int64);
diff --git a/tensorflow/core/kernels/cwise_op_rsqrt.cc b/tensorflow/core/kernels/cwise_op_rsqrt.cc
index 3207166e94..7dc96d47a6 100644
--- a/tensorflow/core/kernels/cwise_op_rsqrt.cc
+++ b/tensorflow/core/kernels/cwise_op_rsqrt.cc
@@ -18,6 +18,18 @@ limitations under the License.
namespace tensorflow {
REGISTER5(UnaryOp, CPU, "Rsqrt", functor::rsqrt, float, Eigen::half, double,
complex64, complex128);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Rsqrt") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("T"), \
+ UnaryOp<SYCLDevice, functor::rsqrt<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYCL
+
#if GOOGLE_CUDA
REGISTER3(UnaryOp, GPU, "Rsqrt", functor::rsqrt, float, Eigen::half, double);
#endif
diff --git a/tensorflow/core/kernels/cwise_op_sin.cc b/tensorflow/core/kernels/cwise_op_sin.cc
index 1e3880beb1..8d0c0959f7 100644
--- a/tensorflow/core/kernels/cwise_op_sin.cc
+++ b/tensorflow/core/kernels/cwise_op_sin.cc
@@ -18,6 +18,18 @@ limitations under the License.
namespace tensorflow {
REGISTER5(UnaryOp, CPU, "Sin", functor::sin, float, Eigen::half, double,
complex64, complex128);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Sin") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("T"), \
+ UnaryOp<SYCLDevice, functor::sin<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYC
+
#if GOOGLE_CUDA
REGISTER3(UnaryOp, GPU, "Sin", functor::sin, float, Eigen::half, double);
#endif
diff --git a/tensorflow/core/kernels/cwise_op_sqrt.cc b/tensorflow/core/kernels/cwise_op_sqrt.cc
index aecffda4ba..710001517b 100644
--- a/tensorflow/core/kernels/cwise_op_sqrt.cc
+++ b/tensorflow/core/kernels/cwise_op_sqrt.cc
@@ -18,6 +18,18 @@ limitations under the License.
namespace tensorflow {
REGISTER5(UnaryOp, CPU, "Sqrt", functor::sqrt, float, Eigen::half, double,
complex64, complex128);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Sqrt") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("T"), \
+ UnaryOp<SYCLDevice, functor::sqrt<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYC
+
#if GOOGLE_CUDA
REGISTER3(UnaryOp, GPU, "Sqrt", functor::sqrt, float, Eigen::half, double);
#endif
diff --git a/tensorflow/core/kernels/cwise_op_square.cc b/tensorflow/core/kernels/cwise_op_square.cc
index 0ce4473d83..f867f127a7 100644
--- a/tensorflow/core/kernels/cwise_op_square.cc
+++ b/tensorflow/core/kernels/cwise_op_square.cc
@@ -18,6 +18,18 @@ limitations under the License.
namespace tensorflow {
REGISTER7(UnaryOp, CPU, "Square", functor::square, float, Eigen::half, double,
int32, int64, complex64, complex128);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Square") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("T"), \
+ UnaryOp<SYCLDevice, functor::square<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYC
+
#if GOOGLE_CUDA
REGISTER4(UnaryOp, GPU, "Square", functor::square, float, Eigen::half, double,
int64);
diff --git a/tensorflow/core/kernels/cwise_op_sub.cc b/tensorflow/core/kernels/cwise_op_sub.cc
index ed78ba37a8..e1326dbed1 100644
--- a/tensorflow/core/kernels/cwise_op_sub.cc
+++ b/tensorflow/core/kernels/cwise_op_sub.cc
@@ -31,7 +31,7 @@ REGISTER(BinaryOp, CPU, "Sub", functor::sub, int32);
.Device(DEVICE_SYCL) \
.TypeConstraint<TYPE>("T"), \
BinaryOp<SYCLDevice, functor::sub<TYPE>>);
-TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+ REGISTER_SYCL_KERNEL(float);
#undef REGISTER_SYCL_KERNEL
#endif // TENSORFLOW_USE_SYCL
#if GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/cwise_op_tan.cc b/tensorflow/core/kernels/cwise_op_tan.cc
index fca1addfa4..ac49cad88f 100644
--- a/tensorflow/core/kernels/cwise_op_tan.cc
+++ b/tensorflow/core/kernels/cwise_op_tan.cc
@@ -17,6 +17,18 @@ limitations under the License.
namespace tensorflow {
REGISTER2(UnaryOp, CPU, "Tan", functor::tan, float, double);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Tan") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("T"), \
+ UnaryOp<SYCLDevice, functor::tan<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYC
+
#if GOOGLE_CUDA
REGISTER2(UnaryOp, GPU, "Tan", functor::tan, float, double);
#endif
diff --git a/tensorflow/core/kernels/cwise_op_tanh.cc b/tensorflow/core/kernels/cwise_op_tanh.cc
index a4c4aad053..ae2c473e20 100644
--- a/tensorflow/core/kernels/cwise_op_tanh.cc
+++ b/tensorflow/core/kernels/cwise_op_tanh.cc
@@ -19,6 +19,18 @@ limitations under the License.
namespace tensorflow {
REGISTER5(UnaryOp, CPU, "Tanh", functor::tanh, float, Eigen::half, double,
complex64, complex128);
+
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Tanh") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("T"), \
+ UnaryOp<SYCLDevice, functor::tanh<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYC
+
#if GOOGLE_CUDA
REGISTER3(UnaryOp, GPU, "Tanh", functor::tanh, float, Eigen::half, double);
#endif
diff --git a/tensorflow/core/kernels/cwise_ops_sycl_common.h b/tensorflow/core/kernels/cwise_ops_sycl_common.h
index 4c22cc4855..3fcf0759d4 100644
--- a/tensorflow/core/kernels/cwise_ops_sycl_common.h
+++ b/tensorflow/core/kernels/cwise_ops_sycl_common.h
@@ -21,12 +21,10 @@ limitations under the License.
#define TENSORFLOW_CORE_KERNELS_CWISE_OPS_SYCL_COMMON_H_
#define EIGEN_USE_SYCL
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/framework/register_types.h"
-
-#include "tensorflow/core/framework/tensor_types.h"
#include "tensorflow/core/kernels/cwise_ops.h"
-#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/types.h"
namespace tensorflow {
@@ -62,14 +60,14 @@ struct BinaryFunctor<SYCLDevice, Functor, NDIMS, has_errors> {
void operator()(const SYCLDevice& d, typename Functor::tout_type out,
typename Functor::tin_type in0,
typename Functor::tin_type in1, bool* error) {
- To32Bit(out).device(d) = To32Bit(in0).binaryExpr(in1, typename Functor::func());
+ To32Bit(out).device(d) = To32Bit(in0).binaryExpr(To32Bit(in1), typename Functor::func());
}
void Left(const SYCLDevice& d, typename Functor::tout_type out,
typename Functor::tscalar_type scalar,
typename Functor::tin_type in, bool* error) {
typedef typename Functor::func Binary;
- constexpr int NumDims = Functor::tin_type::NumDimensions;
+ constexpr int NumDims = Functor::tin_type::NumDimensions;
typedef typename Functor::tin_type::Scalar T;
typedef typename Functor::tin_type::Index Index;
Eigen::array<Index, NumDims> scalar_dim = GenerateArrayOfOnes<Index, NumDims>();
diff --git a/tensorflow/core/kernels/debug_ops.cc b/tensorflow/core/kernels/debug_ops.cc
index 1a4d70c36b..78d386a5af 100644
--- a/tensorflow/core/kernels/debug_ops.cc
+++ b/tensorflow/core/kernels/debug_ops.cc
@@ -28,6 +28,16 @@ REGISTER_KERNEL_BUILDER(Name("Copy").Device(DEVICE_CPU), CopyOp);
REGISTER_KERNEL_BUILDER(Name("CopyHost").Device(DEVICE_CPU), CopyOp);
+#ifdef TENSORFLOW_USE_SYCL
+REGISTER_KERNEL_BUILDER(Name("Copy").Device(DEVICE_SYCL), CopyOp);
+
+REGISTER_KERNEL_BUILDER(Name("CopyHost")
+ .Device(DEVICE_SYCL)
+ .HostMemory("input")
+ .HostMemory("output"),
+ CopyOp);
+#endif // TENSORFLOW_USE_SYCL
+
#if GOOGLE_CUDA
REGISTER_KERNEL_BUILDER(Name("Copy").Device(DEVICE_GPU), CopyOp);
@@ -50,6 +60,14 @@ REGISTER_KERNEL_BUILDER(Name("DebugIdentity")
DebugIdentityOp);
#endif
+#ifdef TENSORFLOW_USE_SYCL
+REGISTER_KERNEL_BUILDER(Name("DebugIdentity")
+ .Device(DEVICE_SYCL)
+ .HostMemory("input")
+ .HostMemory("output"),
+ DebugIdentityOp);
+#endif // TENSORFLOW_USE_SYCL
+
// Register debug NaN-counter (non-ref and ref) ops.
#define REGISTER_DEBUG_NAN_COUNT(type) \
REGISTER_KERNEL_BUILDER( \
@@ -70,4 +88,15 @@ REGISTER_GPU_DEBUG_NAN_COUNT(float);
REGISTER_GPU_DEBUG_NAN_COUNT(double);
#endif
+#ifdef TENSORFLOW_USE_SYCL
+#define REGISTER_GPU_DEBUG_NAN_COUNT(type) \
+ REGISTER_KERNEL_BUILDER(Name("DebugNanCount") \
+ .Device(DEVICE_SYCL) \
+ .HostMemory("input") \
+ .HostMemory("output") \
+ .TypeConstraint<type>("T"), \
+ DebugNanCountOp<type>);
+REGISTER_GPU_DEBUG_NAN_COUNT(float);
+#endif // TENSORFLOW_USE_SYCL
+
} // namespace tensorflow
diff --git a/tensorflow/core/kernels/dense_update_ops.cc b/tensorflow/core/kernels/dense_update_ops.cc
index baa8f83091..5216a4b5d0 100644
--- a/tensorflow/core/kernels/dense_update_ops.cc
+++ b/tensorflow/core/kernels/dense_update_ops.cc
@@ -97,13 +97,20 @@ TF_CALL_QUANTIZED_TYPES(REGISTER_KERNELS);
#if TENSORFLOW_USE_SYCL
typedef Eigen::SyclDevice SYCLDevice;
-#define REGISTER_SYCL_KERNEL(type) \
- REGISTER_KERNEL_BUILDER( \
- Name("Assign") \
- .Device(DEVICE_SYCL) \
- .TypeConstraint<type>("T"), \
- AssignOpT<SYCLDevice, type>);
-TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+#define REGISTER_SYCL_KERNEL(type) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Assign") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<type>("T"), \
+ AssignOpT<SYCLDevice, type>); \
+ REGISTER_KERNEL_BUILDER( \
+ Name("AssignAdd").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
+ DenseUpdateOp<SYCLDevice, type, DenseUpdateType::ADD>); \
+ REGISTER_KERNEL_BUILDER( \
+ Name("AssignSub").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
+ DenseUpdateOp<SYCLDevice, type, DenseUpdateType::SUB>);
+
+REGISTER_SYCL_KERNEL(float);
#undef REGISTER_SYCL_KERNEL
#endif
diff --git a/tensorflow/core/kernels/fact_op.cc b/tensorflow/core/kernels/fact_op.cc
index 52ad2d0c1f..f1ab4c4a4d 100644
--- a/tensorflow/core/kernels/fact_op.cc
+++ b/tensorflow/core/kernels/fact_op.cc
@@ -73,25 +73,46 @@ static void E(string* s) {
}
}
-template <const char* const FACTS[], uint64 N>
class FactOpKernel : public OpKernel {
public:
explicit FactOpKernel(OpKernelConstruction* context) : OpKernel(context) {}
- void Compute(OpKernelContext* context) override {
+ void Compute(OpKernelContext* context) override = 0;
+
+ protected:
+ void Compute(OpKernelContext* context, const char* const facts[],
+ uint64 count) {
Tensor* output_tensor = NULL;
OP_REQUIRES_OK(
context, context->allocate_output(0, TensorShape({}), &output_tensor));
auto output = output_tensor->template scalar<string>();
- string coded = FACTS[context->env()->NowMicros() % N];
+ string coded = facts[context->env()->NowMicros() % count];
E(&coded);
output() = coded;
}
};
+class FactOpKernel1 : public FactOpKernel {
+ public:
+ FactOpKernel1(OpKernelConstruction* context) : FactOpKernel(context) {}
+
+ void Compute(OpKernelContext* context) override {
+ FactOpKernel::Compute(context, kFacts1, kNum1);
+ }
+};
+
+class FactOpKernel2 : public FactOpKernel {
+ public:
+ FactOpKernel2(OpKernelConstruction* context) : FactOpKernel(context) {}
+
+ void Compute(OpKernelContext* context) override {
+ FactOpKernel::Compute(context, kFacts2, kNum2);
+ }
+};
+
REGISTER_KERNEL_BUILDER(Name("Fact").Device(DEVICE_GPU).HostMemory("fact"),
- FactOpKernel<kFacts1, kNum1>);
+ FactOpKernel1);
static string D(const char* s) {
string ret(s);
@@ -102,10 +123,10 @@ static string D(const char* s) {
REGISTER_KERNEL_BUILDER(Name("Fact")
.Device(DEVICE_CPU)
.Label(D("Yoxmos").c_str()),
- FactOpKernel<kFacts2, kNum2>);
+ FactOpKernel2);
REGISTER_KERNEL_BUILDER(Name("Fact")
.Device(DEVICE_CPU)
.Label(D("yoxmos").c_str()),
- FactOpKernel<kFacts2, kNum2>);
+ FactOpKernel2);
} // namespace tensorflow
diff --git a/tensorflow/core/kernels/fused_batch_norm_op_test.cc b/tensorflow/core/kernels/fused_batch_norm_op_test.cc
index c4b942c56f..a3f760b746 100644
--- a/tensorflow/core/kernels/fused_batch_norm_op_test.cc
+++ b/tensorflow/core/kernels/fused_batch_norm_op_test.cc
@@ -79,7 +79,7 @@ TEST_F(FusedBatchNormOpTest, Inference) {
AddInputFromArray<float>(TensorShape({2}), {4.0, 4.0});
AddInputFromArray<float>(TensorShape({2}), {2.0, 2.0});
AddInputFromArray<float>(TensorShape({2}), {10, 10});
- AddInputFromArray<float>(TensorShape({2}), {11.67, 11.67});
+ AddInputFromArray<float>(TensorShape({2}), {11.67f, 11.67f});
TF_ASSERT_OK(RunOpKernel());
@@ -106,8 +106,8 @@ TEST_F(FusedBatchNormGradOpTest, Simple) {
AddInputFromArray<float>(TensorShape({1, 1, 6, 2}),
{1, 1, 7, 7, 4, 4, -3, -3, -11, -11, 13, 13});
AddInputFromArray<float>(TensorShape({2}), {4, 4});
- AddInputFromArray<float>(TensorShape({2}), {1.833, 1.833});
- AddInputFromArray<float>(TensorShape({2}), {57.472, 57.472});
+ AddInputFromArray<float>(TensorShape({2}), {1.833f, 1.833f});
+ AddInputFromArray<float>(TensorShape({2}), {57.472f, 57.472f});
TF_ASSERT_OK(RunOpKernel());
diff --git a/tensorflow/core/kernels/non_max_suppression_op_test.cc b/tensorflow/core/kernels/non_max_suppression_op_test.cc
index 070dd49aef..72e368db77 100644
--- a/tensorflow/core/kernels/non_max_suppression_op_test.cc
+++ b/tensorflow/core/kernels/non_max_suppression_op_test.cc
@@ -45,9 +45,9 @@ class NonMaxSuppressionOpTest : public OpsTestBase {
TEST_F(NonMaxSuppressionOpTest, TestSelectFromThreeClusters) {
MakeOp(.5);
AddInputFromArray<float>(TensorShape({6, 4}),
- {0, 0, 1, 1, 0, 0.1, 1, 1.1, 0, -0.1, 1, 0.9,
- 0, 10, 1, 11, 0, 10.1, 1, 11.1, 0, 100, 1, 101});
- AddInputFromArray<float>(TensorShape({6}), {.9, .75, .6, .95, .5, .3});
+ {0, 0, 1, 1, 0, 0.1f, 1, 1.1f, 0, -0.1f, 1, 0.9f,
+ 0, 10, 1, 11, 0, 10.1f, 1, 11.1f, 0, 100, 1, 101});
+ AddInputFromArray<float>(TensorShape({6}), {.9f, .75f, .6f, .95f, .5f, .3f});
AddInputFromArray<int>(TensorShape({}), {3});
TF_ASSERT_OK(RunOpKernel());
@@ -59,9 +59,9 @@ TEST_F(NonMaxSuppressionOpTest, TestSelectFromThreeClusters) {
TEST_F(NonMaxSuppressionOpTest, TestSelectFromThreeClustersFlippedCoordinates) {
MakeOp(.5);
AddInputFromArray<float>(TensorShape({6, 4}),
- {1, 1, 0, 0, 0, 0.1, 1, 1.1, 0, .9, 1, -0.1,
- 0, 10, 1, 11, 1, 10.1, 0, 11.1, 1, 101, 0, 100});
- AddInputFromArray<float>(TensorShape({6}), {.9, .75, .6, .95, .5, .3});
+ {1, 1, 0, 0, 0, 0.1f, 1, 1.1f, 0, .9f, 1, -0.1f,
+ 0, 10, 1, 11, 1, 10.1f, 0, 11.1f, 1, 101, 0, 100});
+ AddInputFromArray<float>(TensorShape({6}), {.9f, .75f, .6f, .95f, .5f, .3f});
AddInputFromArray<int>(TensorShape({}), {3});
TF_ASSERT_OK(RunOpKernel());
@@ -73,9 +73,9 @@ TEST_F(NonMaxSuppressionOpTest, TestSelectFromThreeClustersFlippedCoordinates) {
TEST_F(NonMaxSuppressionOpTest, TestSelectAtMostTwoBoxesFromThreeClusters) {
MakeOp(.5);
AddInputFromArray<float>(TensorShape({6, 4}),
- {0, 0, 1, 1, 0, 0.1, 1, 1.1, 0, -0.1, 1, 0.9,
- 0, 10, 1, 11, 0, 10.1, 1, 11.1, 0, 100, 1, 101});
- AddInputFromArray<float>(TensorShape({6}), {.9, .75, .6, .95, .5, .3});
+ {0, 0, 1, 1, 0, 0.1f, 1, 1.1f, 0, -0.1f, 1, 0.9f,
+ 0, 10, 1, 11, 0, 10.1f, 1, 11.1f, 0, 100, 1, 101});
+ AddInputFromArray<float>(TensorShape({6}), {.9f, .75f, .6f, .95f, .5f, .3f});
AddInputFromArray<int>(TensorShape({}), {2});
TF_ASSERT_OK(RunOpKernel());
@@ -87,9 +87,9 @@ TEST_F(NonMaxSuppressionOpTest, TestSelectAtMostTwoBoxesFromThreeClusters) {
TEST_F(NonMaxSuppressionOpTest, TestSelectAtMostThirtyBoxesFromThreeClusters) {
MakeOp(.5);
AddInputFromArray<float>(TensorShape({6, 4}),
- {0, 0, 1, 1, 0, 0.1, 1, 1.1, 0, -0.1, 1, 0.9,
- 0, 10, 1, 11, 0, 10.1, 1, 11.1, 0, 100, 1, 101});
- AddInputFromArray<float>(TensorShape({6}), {.9, .75, .6, .95, .5, .3});
+ {0, 0, 1, 1, 0, 0.1f, 1, 1.1f, 0, -0.1f, 1, 0.9f,
+ 0, 10, 1, 11, 0, 10.1f, 1, 11.1f, 0, 100, 1, 101});
+ AddInputFromArray<float>(TensorShape({6}), {.9f, .75f, .6f, .95f, .5f, .3f});
AddInputFromArray<int>(TensorShape({}), {30});
TF_ASSERT_OK(RunOpKernel());
@@ -101,7 +101,7 @@ TEST_F(NonMaxSuppressionOpTest, TestSelectAtMostThirtyBoxesFromThreeClusters) {
TEST_F(NonMaxSuppressionOpTest, TestSelectSingleBox) {
MakeOp(.5);
AddInputFromArray<float>(TensorShape({1, 4}), {0, 0, 1, 1});
- AddInputFromArray<float>(TensorShape({1}), {.9});
+ AddInputFromArray<float>(TensorShape({1}), {.9f});
AddInputFromArray<int>(TensorShape({}), {3});
TF_ASSERT_OK(RunOpKernel());
@@ -136,9 +136,9 @@ TEST_F(NonMaxSuppressionOpTest, TestSelectFromTenIdenticalBoxes) {
TEST_F(NonMaxSuppressionOpTest, TestInconsistentBoxAndScoreShapes) {
MakeOp(.5);
AddInputFromArray<float>(TensorShape({6, 4}),
- {0, 0, 1, 1, 0, 0.1, 1, 1.1, 0, -0.1, 1, 0.9,
- 0, 10, 1, 11, 0, 10.1, 1, 11.1, 0, 100, 1, 101});
- AddInputFromArray<float>(TensorShape({5}), {.9, .75, .6, .95, .5});
+ {0, 0, 1, 1, 0, 0.1f, 1, 1.1f, 0, -0.1f, 1, 0.9f,
+ 0, 10, 1, 11, 0, 10.1f, 1, 11.1f, 0, 100, 1, 101});
+ AddInputFromArray<float>(TensorShape({5}), {.9f, .75f, .6f, .95f, .5f});
AddInputFromArray<int>(TensorShape({}), {30});
Status s = RunOpKernel();
@@ -151,7 +151,7 @@ TEST_F(NonMaxSuppressionOpTest, TestInconsistentBoxAndScoreShapes) {
TEST_F(NonMaxSuppressionOpTest, TestInvalidIOUThreshold) {
MakeOp(1.2);
AddInputFromArray<float>(TensorShape({1, 4}), {0, 0, 1, 1});
- AddInputFromArray<float>(TensorShape({1}), {.9});
+ AddInputFromArray<float>(TensorShape({1}), {.9f});
AddInputFromArray<int>(TensorShape({}), {3});
Status s = RunOpKernel();
diff --git a/tensorflow/core/kernels/resize_bilinear_op_test.cc b/tensorflow/core/kernels/resize_bilinear_op_test.cc
index deb36849e7..66836ff788 100644
--- a/tensorflow/core/kernels/resize_bilinear_op_test.cc
+++ b/tensorflow/core/kernels/resize_bilinear_op_test.cc
@@ -95,9 +95,10 @@ TEST_F(ResizeBilinearOpTest, TestBilinear2x2To3x3) {
// clang-format off
test::FillValues<float>(&expected,
- {1, 5.0/3, 2,
- 7.0/3, 3, 10.0/3,
- 3, 11.0/3, 4});
+ {1, 5.0f / 3, 2,
+ 7.0f / 3, 3, 10.0f / 3,
+ 3, 11.0f / 3, 4});
+
// clang-format on
test::ExpectTensorEqual<float>(expected, *GetOutput(0));
@@ -206,9 +207,9 @@ TEST_F(ResizeBilinearOpTest, TestBilinear4x4To3x3) {
// clang-format off
test::FillValues<float>(&expected,
- {1, 7.0/3, 11.0/3,
- 19.0/3, 23.0/3, 27.0/3,
- 35.0/3, 39.0/3, 43.0/3});
+ {1, 7.0f/3, 11.0f/3,
+ 19.0f/3, 23.0f/3, 27.0f/3,
+ 35.0f/3, 39.0f/3, 43.0f/3});
// clang-format on
test::ExpectTensorEqual<float>(expected, *GetOutput(0));
@@ -251,8 +252,8 @@ TEST_F(ResizeBilinearOpTest, TestBilinear2x2To3x3Batch2) {
Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3, 3, 1}));
// clang-format off
test::FillValues<float>(&expected,
- {1, 5.0/3, 2, 7.0/3, 3, 10.0/3, 3, 11.0/3, 4,
- 1, 5.0/3, 2, 7.0/3, 3, 10.0/3, 3, 11.0/3, 4
+ {1, 5.0f/3, 2, 7.0f/3, 3, 10.0f/3, 3, 11.0f/3, 4,
+ 1, 5.0f/3, 2, 7.0f/3, 3, 10.0f/3, 3, 11.0f/3, 4
});
// clang-format on
test::ExpectTensorEqual<float>(expected, *GetOutput(0));
@@ -268,15 +269,15 @@ TEST_F(ResizeBilinearOpTest, TestBilinear2x2x2To3x3x2) {
// clang-format off
test::FillValues<float>(&expected,
{
- 1, -1,
- 5.0/3, -5.0/3,
- 2, -2,
- 7.0/3, -7.0/3,
- 3, -3,
- 10.0/3, -10.0/3,
- 3, -3,
- 11.0/3, -11.0/3,
- 4, -4
+ 1, -1,
+ 5.0f/3, -5.0f/3,
+ 2, -2,
+ 7.0f/3, -7.0f/3,
+ 3, -3,
+ 10.0f/3, -10.0f/3,
+ 3, -3,
+ 11.0f/3, -11.0f/3,
+ 4, -4
});
// clang-format on
test::ExpectTensorEqual<float>(expected, *GetOutput(0));
diff --git a/tensorflow/core/kernels/spacetobatch_benchmark_test.cc b/tensorflow/core/kernels/spacetobatch_benchmark_test.cc
index a9a9bd46b7..a8c4b3746a 100644
--- a/tensorflow/core/kernels/spacetobatch_benchmark_test.cc
+++ b/tensorflow/core/kernels/spacetobatch_benchmark_test.cc
@@ -54,6 +54,8 @@ static Graph* ConstructSpaceToBatchGraph(
return g;
}
+// The BM_Expand macro is needed for this to build with VC++.
+#define BM_Expand(x) x
#define BM_SpaceToBatchDev(OP, DEVICE, DTYPE, B, H, W, D, BS, P00, P01, P10, \
P11) \
static void \
@@ -69,10 +71,10 @@ static Graph* ConstructSpaceToBatchGraph(
BENCHMARK( \
BM_##OP##_##DEVICE##_##DTYPE##_##B##_##H##_##W##_##D##_bs##BS##_pad##P00##_##P01##_##P10##_##P11);
#define BM_SpaceToBatch(OP, ...) \
- BM_SpaceToBatchDev(OP, cpu, DT_FLOAT, __VA_ARGS__); \
- BM_SpaceToBatchDev(OP, gpu, DT_FLOAT, __VA_ARGS__); \
- BM_SpaceToBatchDev(OP, cpu, DT_HALF, __VA_ARGS__); \
- BM_SpaceToBatchDev(OP, gpu, DT_HALF, __VA_ARGS__);
+ BM_Expand(BM_SpaceToBatchDev(OP, cpu, DT_FLOAT, __VA_ARGS__)); \
+ BM_Expand(BM_SpaceToBatchDev(OP, gpu, DT_FLOAT, __VA_ARGS__)); \
+ BM_Expand(BM_SpaceToBatchDev(OP, cpu, DT_HALF, __VA_ARGS__)); \
+ BM_Expand(BM_SpaceToBatchDev(OP, gpu, DT_HALF, __VA_ARGS__));
BM_SpaceToBatch(SpaceToBatch, 64, 100, 100, 64, 2, 0, 0, 0, 0);
BM_SpaceToBatch(SpaceToBatch, 64, 100, 100, 1, 2, 0, 0, 0, 0);
diff --git a/tensorflow/core/kernels/sparse_add_op_test.cc b/tensorflow/core/kernels/sparse_add_op_test.cc
index 7baf27c1d0..4cad02bbee 100644
--- a/tensorflow/core/kernels/sparse_add_op_test.cc
+++ b/tensorflow/core/kernels/sparse_add_op_test.cc
@@ -61,8 +61,10 @@ TEST_F(SparseAddOpTest, TwoD_AddSparseTensorWithSelf) {
// [3 4]
const auto indices_shape = TensorShape({4, 2});
- const gtl::ArraySlice<int64> indices = {0, 1, 1, 0, 2, 0, 2, 1};
- const gtl::ArraySlice<int64> shape = {3, 2};
+ std::initializer_list<int64> in{ 0, 1, 1, 0, 2, 0, 2, 1 };
+ const gtl::ArraySlice<int64> indices(in);
+ std::initializer_list<int64> sh{ 3, 2 };
+ const gtl::ArraySlice<int64> shape(sh);
#define ADD_TENSOR_INPUT() \
AddInputFromArray<int64>(indices_shape, indices); \
@@ -99,8 +101,10 @@ TEST_F(SparseAddOpTest, TwoD_AddSparseTensorWithSelf) {
DataType val_dtype = tensorflow::DataTypeToEnum<VALTYPE>::value; \
\
const auto indices_shape = TensorShape({4, 2}); \
- const gtl::ArraySlice<int64> indices = {0, 1, 1, 0, 2, 0, 2, 1}; \
- const gtl::ArraySlice<int64> shape = {3, 2}; \
+ std::initializer_list<int64> in{0, 1, 1, 0, 2, 0, 2, 1}; \
+ const gtl::ArraySlice<int64> indices(in); \
+ std::initializer_list<int64> sh{3, 2}; \
+ const gtl::ArraySlice<int64> shape(sh); \
\
AddInputFromArray<int64>(indices_shape, indices); \
AddInputFromArray<VALTYPE>(TensorShape({4}), {1, 2, 3, 4}); \
@@ -154,8 +158,10 @@ RUN_TEST(complex128);
MakeOp<VALTYPE>(); \
DataType val_dtype = tensorflow::DataTypeToEnum<VALTYPE>::value; \
const auto indices_shape = TensorShape({4, 2}); \
- const gtl::ArraySlice<int64> indices = {0, 1, 1, 0, 2, 0, 2, 1}; \
- const gtl::ArraySlice<int64> shape = {3, 2}; \
+ std::initializer_list<int64> in{0, 1, 1, 0, 2, 0, 2, 1}; \
+ const gtl::ArraySlice<int64> indices(in); \
+ std::initializer_list<int64> sh{3, 2}; \
+ const gtl::ArraySlice<int64> shape(sh); \
\
auto AddSparseTensor = [indices, indices_shape, shape, \
this](bool negate) { \
@@ -192,10 +198,10 @@ RUN_TEST(complex128);
}
RUN_TEST(int64, 1);
-RUN_TEST(float, 1e-3);
-RUN_TEST(double, 1e-3);
-RUN_TEST(complex64, 1e-3);
-RUN_TEST(complex128, 1e-3);
+RUN_TEST(float, 1e-3f);
+RUN_TEST(double, 1e-3f);
+RUN_TEST(complex64, 1e-3f);
+RUN_TEST(complex128, 1e-3f);
#undef RUN_TEST
} // namespace
diff --git a/tensorflow/core/kernels/sparse_dense_binary_op_shared_test.cc b/tensorflow/core/kernels/sparse_dense_binary_op_shared_test.cc
index 7ef3070d06..eaf1884243 100644
--- a/tensorflow/core/kernels/sparse_dense_binary_op_shared_test.cc
+++ b/tensorflow/core/kernels/sparse_dense_binary_op_shared_test.cc
@@ -96,8 +96,10 @@ TEST_F(SparseDenseCDivTest, SameShape) {
// [2 ] cdiv [dense: same shape, all 1's]
// [3 4]
const auto indices_shape = TensorShape({4, 2});
- const gtl::ArraySlice<int64> indices = {0, 1, 1, 0, 2, 0, 2, 1};
- const gtl::ArraySlice<int64> shape = {3, 2};
+ std::initializer_list<int64> in{ 0, 1, 1, 0, 2, 0, 2, 1 };
+ const gtl::ArraySlice<int64> indices(in);
+ std::initializer_list<int64> sh{ 3, 2 };
+ const gtl::ArraySlice<int64> shape(sh);
// Tensor dense(DT_FLOAT, TensorShape({3, 1}));
Tensor dense(DT_FLOAT, TensorShape(shape));
@@ -123,8 +125,10 @@ TEST_F(SparseDenseCDivTest, BroadcastDenseSameDims) {
// [2 ] cdiv [dense: shape [3,1], all 1's]
// [3 4]
const auto indices_shape = TensorShape({4, 2});
- const gtl::ArraySlice<int64> indices = {0, 1, 1, 0, 2, 0, 2, 1};
- const gtl::ArraySlice<int64> shape = {3, 2};
+ std::initializer_list<int64> in{ 0, 1, 1, 0, 2, 0, 2, 1 };
+ const gtl::ArraySlice<int64> indices(in);
+ std::initializer_list<int64> sh{ 3, 2 };
+ const gtl::ArraySlice<int64> shape(sh);
Tensor dense(DT_FLOAT, TensorShape({3, 1}));
auto dense_flat = dense.flat<float>();
@@ -148,8 +152,10 @@ TEST_F(SparseDenseCDivTest, BroadcastDenseFewerDims) {
// [2 ] cdiv [dense: shape [2]]
// [3 4]
const auto indices_shape = TensorShape({4, 2});
- const gtl::ArraySlice<int64> indices = {0, 1, 1, 0, 2, 0, 2, 1};
- const gtl::ArraySlice<int64> shape = {3, 2};
+ std::initializer_list<int64> in{ 0, 1, 1, 0, 2, 0, 2, 1 };
+ const gtl::ArraySlice<int64> indices(in);
+ std::initializer_list<int64> sh{ 3, 2 };
+ const gtl::ArraySlice<int64> shape(sh);
Tensor dense(DT_FLOAT, TensorShape({2}));
auto dense_flat = dense.flat<float>();
@@ -178,8 +184,10 @@ TEST_F(SparseDenseCMulTest, BroadcastDense) {
// [1 ?] where ? remains implicitly zero.
// [1.5 0]
const auto indices_shape = TensorShape({4, 2});
- const gtl::ArraySlice<int64> indices = {0, 1, 1, 0, 2, 0, 2, 1};
- const gtl::ArraySlice<int64> shape = {3, 2};
+ std::initializer_list<int64> in{ 0, 1, 1, 0, 2, 0, 2, 1 };
+ const gtl::ArraySlice<int64> indices(in);
+ std::initializer_list<int64> sh{ 3, 2 };
+ const gtl::ArraySlice<int64> shape(sh);
Tensor dense(DT_FLOAT, TensorShape({2}));
auto dense_flat = dense.flat<float>();
diff --git a/tensorflow/core/kernels/sparse_reduce_sum_op_test.cc b/tensorflow/core/kernels/sparse_reduce_sum_op_test.cc
index 2fb78a2a21..110376be42 100644
--- a/tensorflow/core/kernels/sparse_reduce_sum_op_test.cc
+++ b/tensorflow/core/kernels/sparse_reduce_sum_op_test.cc
@@ -51,8 +51,10 @@ TEST_F(SparseReduceSumOpTest, SimpleReduce) {
// [3 4]
const auto indices_shape = TensorShape({4, 2});
- const gtl::ArraySlice<int64> indices = {0, 1, 1, 0, 2, 0, 2, 1};
- const gtl::ArraySlice<int64> shape = {3, 2};
+ std::initializer_list<int64> in{ 0, 1, 1, 0, 2, 0, 2, 1 };
+ const gtl::ArraySlice<int64> indices(in);
+ std::initializer_list<int64> sh{ 3, 2 };
+ const gtl::ArraySlice<int64> shape(sh);
AddInputFromArray<int64>(indices_shape, indices);
AddInputFromArray<float>(TensorShape({4}), {1, 2, 3, 4});
@@ -91,8 +93,10 @@ TEST_F(SparseReduceSumSparseOpTest, SimpleReduce) {
// [3 4]
const auto indices_shape = TensorShape({4, 2});
- const gtl::ArraySlice<int64> indices = {0, 1, 1, 0, 2, 0, 2, 1};
- const gtl::ArraySlice<int64> shape = {3, 2};
+ std::initializer_list<int64> in{ 0, 1, 1, 0, 2, 0, 2, 1 };
+ const gtl::ArraySlice<int64> indices(in);
+ std::initializer_list<int64> sh{ 3, 2 };
+ const gtl::ArraySlice<int64> shape(sh);
AddInputFromArray<int64>(indices_shape, indices);
AddInputFromArray<float>(TensorShape({4}), {2, 2, 3, 4});
diff --git a/tensorflow/core/kernels/summary_image_op_test.cc b/tensorflow/core/kernels/summary_image_op_test.cc
index 96a4d4183f..f936276925 100644
--- a/tensorflow/core/kernels/summary_image_op_test.cc
+++ b/tensorflow/core/kernels/summary_image_op_test.cc
@@ -126,16 +126,16 @@ TEST_F(SummaryImageOpTest, OneColorImage4dInput) {
AddInputFromArray<float>(
TensorShape({1 /*batch*/, 5 /*rows*/, 2 /*columns*/, 3 /*depth*/}),
{
- /* r0, c0, RGB */ 1.0, 0.1, 0.2,
- /* r0, c1, RGB */ 1.0, 0.3, 0.4,
- /* r1, c0, RGB */ 0.0, 1.0, 0.0,
- /* r1, c1, RGB */ 0.0, 1.0, 0.0,
- /* r2, c0, RGB */ 0.0, 0.0, 1.0,
- /* r2, c1, RGB */ 0.0, 0.0, 1.0,
- /* r3, c0, RGB */ 1.0, 1.0, 0.0,
- /* r3, c1, RGB */ 1.0, 0.0, 1.0,
- /* r4, c0, RGB */ 1.0, 1.0, 0.0,
- /* r4, c1, RGB */ 1.0, 0.0, 1.0,
+ /* r0, c0, RGB */ 1.0f, 0.1f, 0.2f,
+ /* r0, c1, RGB */ 1.0f, 0.3f, 0.4f,
+ /* r1, c0, RGB */ 0.0f, 1.0f, 0.0f,
+ /* r1, c1, RGB */ 0.0f, 1.0f, 0.0f,
+ /* r2, c0, RGB */ 0.0f, 0.0f, 1.0f,
+ /* r2, c1, RGB */ 0.0f, 0.0f, 1.0f,
+ /* r3, c0, RGB */ 1.0f, 1.0f, 0.0f,
+ /* r3, c1, RGB */ 1.0f, 0.0f, 1.0f,
+ /* r4, c0, RGB */ 1.0f, 1.0f, 0.0f,
+ /* r4, c1, RGB */ 1.0f, 0.0f, 1.0f,
});
TF_ASSERT_OK(RunOpKernel());
diff --git a/tensorflow/core/kernels/summary_op_test.cc b/tensorflow/core/kernels/summary_op_test.cc
index 9fd2bd2b5e..05b1687e5f 100644
--- a/tensorflow/core/kernels/summary_op_test.cc
+++ b/tensorflow/core/kernels/summary_op_test.cc
@@ -61,7 +61,7 @@ TEST_F(SummaryScalarOpTest, SimpleFloat) {
// Feed and run
AddInputFromArray<string>(TensorShape({3}), {"tag1", "tag2", "tag3"});
- AddInputFromArray<float>(TensorShape({3}), {1.0, -0.73, 10000.0});
+ AddInputFromArray<float>(TensorShape({3}), {1.0f, -0.73f, 10000.0f});
TF_ASSERT_OK(RunOpKernel());
// Check the output size.
@@ -121,7 +121,7 @@ TEST_F(SummaryScalarOpTest, Error_MismatchedSize) {
// Feed and run
AddInputFromArray<string>(TensorShape({2}), {"tag1", "tag2"});
- AddInputFromArray<float>(TensorShape({3}), {1.0, -0.73, 10000.0});
+ AddInputFromArray<float>(TensorShape({3}), {1.0f, -0.73f, 10000.0f});
Status s = RunOpKernel();
EXPECT_TRUE(StringPiece(s.ToString()).contains("not the same shape")) << s;
}
@@ -131,7 +131,7 @@ TEST_F(SummaryScalarOpTest, Error_WrongDimsTags) {
// Feed and run
AddInputFromArray<string>(TensorShape({2, 1}), {"tag1", "tag2"});
- AddInputFromArray<float>(TensorShape({2}), {1.0, -0.73});
+ AddInputFromArray<float>(TensorShape({2}), {1.0f, -0.73f});
Status s = RunOpKernel();
EXPECT_TRUE(
StringPiece(s.ToString()).contains("tags and values not the same shape"))
@@ -143,7 +143,7 @@ TEST_F(SummaryScalarOpTest, Error_WrongDimsValues) {
// Feed and run
AddInputFromArray<string>(TensorShape({2}), {"tag1", "tag2"});
- AddInputFromArray<float>(TensorShape({2, 1}), {1.0, -0.73});
+ AddInputFromArray<float>(TensorShape({2, 1}), {1.0f, -0.73f});
Status s = RunOpKernel();
EXPECT_TRUE(
StringPiece(s.ToString()).contains("tags and values not the same shape"))
@@ -169,7 +169,8 @@ TEST_F(SummaryHistoOpTest, SimpleFloat) {
// Feed and run
AddInputFromArray<string>(TensorShape({}), {"taghisto"});
- AddInputFromArray<float>(TensorShape({3, 2}), {0.1, -0.7, 4.1, 4., 5., 4.});
+ AddInputFromArray<float>(TensorShape({3, 2}),
+ {0.1f, -0.7f, 4.1f, 4., 5.f, 4.f});
TF_ASSERT_OK(RunOpKernel());
// Check the output size.
@@ -254,7 +255,7 @@ TEST_F(SummaryHistoOpTest, Error_WrongDimsTags) {
// Feed and run
AddInputFromArray<string>(TensorShape({2, 1}), {"tag1", "tag2"});
- AddInputFromArray<float>(TensorShape({2}), {1.0, -0.73});
+ AddInputFromArray<float>(TensorShape({2}), {1.0f, -0.73f});
Status s = RunOpKernel();
EXPECT_TRUE(StringPiece(s.ToString()).contains("tags must be scalar")) << s;
}
@@ -264,7 +265,7 @@ TEST_F(SummaryHistoOpTest, Error_TooManyTagValues) {
// Feed and run
AddInputFromArray<string>(TensorShape({2}), {"tag1", "tag2"});
- AddInputFromArray<float>(TensorShape({2, 1}), {1.0, -0.73});
+ AddInputFromArray<float>(TensorShape({2, 1}), {1.0f, -0.73f});
Status s = RunOpKernel();
EXPECT_TRUE(StringPiece(s.ToString()).contains("tags must be scalar")) << s;
}
diff --git a/tensorflow/core/kernels/training_ops_gpu.cu.cc b/tensorflow/core/kernels/training_ops_gpu.cu.cc
index 733278e440..f6acdf2422 100644
--- a/tensorflow/core/kernels/training_ops_gpu.cu.cc
+++ b/tensorflow/core/kernels/training_ops_gpu.cu.cc
@@ -64,7 +64,7 @@ struct ApplyAdadelta<GPUDevice, T> {
bcast[0] = grad.dimension(0);
Eigen::Sizes<1> single;
- accum.device(d) = accum_update * rho.reshape(single).broadcast(bcast) +
+ accum.device(d) = accum * rho.reshape(single).broadcast(bcast) +
grad.square() * (grad.constant(T(1)) -
rho.reshape(single).broadcast(bcast));
const auto update =
diff --git a/tensorflow/core/kernels/variable_ops.cc b/tensorflow/core/kernels/variable_ops.cc
index 1a9aa4d903..34e227156d 100644
--- a/tensorflow/core/kernels/variable_ops.cc
+++ b/tensorflow/core/kernels/variable_ops.cc
@@ -33,14 +33,31 @@ REGISTER_KERNEL_BUILDER(Name("IsVariableInitialized").Device(DEVICE_CPU),
IsVariableInitializedOp);
#if TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(TYPE) \
- REGISTER_KERNEL_BUILDER( \
- Name("Variable").Device(DEVICE_SYCL).TypeConstraint<TYPE>("dtype"), \
- VariableOp); \
- REGISTER_KERNEL_BUILDER( \
- Name("VariableV2").Device(DEVICE_SYCL).TypeConstraint<TYPE>("dtype"), \
- VariableOp);
-TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
+#define REGISTER_SYCL_KERNEL(TYPE) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Variable") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("dtype"), \
+ VariableOp); \
+ REGISTER_KERNEL_BUILDER(Name("VariableV2") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("dtype"), \
+ VariableOp); \
+ REGISTER_KERNEL_BUILDER(Name("TemporaryVariable") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("dtype"), \
+ TemporaryVariableOp); \
+ REGISTER_KERNEL_BUILDER(Name("DestroyTemporaryVariable") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("T"), \
+ DestroyTemporaryVariableOp); \
+ REGISTER_KERNEL_BUILDER(Name("IsVariableInitialized") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<TYPE>("dtype") \
+ .HostMemory("is_initialized"), \
+ IsVariableInitializedOp);
+
+REGISTER_SYCL_KERNEL(float);
#undef REGISTER_SYCL_KERNEL
#endif
diff --git a/tensorflow/core/lib/core/notification_test.cc b/tensorflow/core/lib/core/notification_test.cc
index 8cb1c895ad..9d96708b6f 100644
--- a/tensorflow/core/lib/core/notification_test.cc
+++ b/tensorflow/core/lib/core/notification_test.cc
@@ -67,7 +67,9 @@ TEST(NotificationTest, TestMultipleThreadsWaitingOnNotification) {
++counter;
});
}
- sleep(1);
+
+ // Sleep 1 second.
+ Env::Default()->SleepForMicroseconds(1 * 1000 * 1000);
EXPECT_EQ(0, counter);
diff --git a/tensorflow/core/lib/gtl/cleanup.h b/tensorflow/core/lib/gtl/cleanup.h
index 230cdb624b..6053e98640 100644
--- a/tensorflow/core/lib/gtl/cleanup.h
+++ b/tensorflow/core/lib/gtl/cleanup.h
@@ -96,7 +96,7 @@ class Cleanup {
bool is_released() const { return released_; }
private:
- static_assert(!std::is_reference<F>(), "F must not be a reference");
+ static_assert(!std::is_reference<F>::value, "F must not be a reference");
bool released_ = false;
F f_;
diff --git a/tensorflow/core/lib/gtl/edit_distance_test.cc b/tensorflow/core/lib/gtl/edit_distance_test.cc
index 02968b6ae8..18a400713f 100644
--- a/tensorflow/core/lib/gtl/edit_distance_test.cc
+++ b/tensorflow/core/lib/gtl/edit_distance_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
#include "tensorflow/core/lib/gtl/edit_distance.h"
+#include <cctype>
#include <vector>
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/test.h"
diff --git a/tensorflow/core/lib/strings/strcat_test.cc b/tensorflow/core/lib/strings/strcat_test.cc
index 25561f1bd1..c556b1f676 100644
--- a/tensorflow/core/lib/strings/strcat_test.cc
+++ b/tensorflow/core/lib/strings/strcat_test.cc
@@ -22,6 +22,11 @@ limitations under the License.
#include "tensorflow/core/platform/test.h"
#include "tensorflow/core/platform/types.h"
+#ifdef _MSC_VER
+// ssize_t is not a standard C++ type.
+typedef ptrdiff_t ssize_t;
+#endif
+
namespace tensorflow {
namespace strings {
diff --git a/tensorflow/core/ops/nn_ops_test.cc b/tensorflow/core/ops/nn_ops_test.cc
index 3618769dc0..974d7aa87b 100644
--- a/tensorflow/core/ops/nn_ops_test.cc
+++ b/tensorflow/core/ops/nn_ops_test.cc
@@ -507,7 +507,7 @@ TEST(NNOpsTest, FractionalPool_ShapeFn) {
.Finalize(&op.node_def));
};
- set_op(std::vector<float>{2.0, 1, 1 / 1.5, 1 / 2.0});
+ set_op(std::vector<float>{2.0f, 1, 1 / 1.5f, 1 / 2.0f});
// Rank check.
INFER_ERROR("must be rank 4", op, "[?,?,?]");
diff --git a/tensorflow/core/ops/state_ops.cc b/tensorflow/core/ops/state_ops.cc
index d1f63589ea..8370e57b88 100644
--- a/tensorflow/core/ops/state_ops.cc
+++ b/tensorflow/core/ops/state_ops.cc
@@ -295,7 +295,7 @@ This operation outputs `ref` after the update is done.
This makes it easier to chain operations that need to use the reset value.
If values in `ref` is to be updated more than once, because there are
-duplicate entires in `indices`, the order at which the updates happen
+duplicate entries in `indices`, the order at which the updates happen
for each value is undefined.
Requires `updates.shape = indices.shape + ref.shape[1:]`.
diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl
index 64a6ab0c7a..83a2a17d48 100644
--- a/tensorflow/core/platform/default/build_config.bzl
+++ b/tensorflow/core/platform/default/build_config.bzl
@@ -25,7 +25,7 @@ def tf_deps(deps, suffix):
return tf_deps
def tf_proto_library_cc(name, srcs = [], has_services = None,
- deps = [], visibility = [], testonly = 0,
+ protodeps = [], visibility = [], testonly = 0,
cc_libs = [],
cc_stubby_versions = None,
cc_grpc_version = None,
@@ -34,7 +34,7 @@ def tf_proto_library_cc(name, srcs = [], has_services = None,
js_api_version = 2, js_codegen = "jspb"):
native.filegroup(
name = name + "_proto_srcs",
- srcs = srcs + tf_deps(deps, "_proto_srcs"),
+ srcs = srcs + tf_deps(protodeps, "_proto_srcs"),
testonly = testonly,
)
@@ -43,10 +43,14 @@ def tf_proto_library_cc(name, srcs = [], has_services = None,
use_grpc_plugin = True
cc_proto_library(
name = name + "_cc",
- srcs = srcs + tf_deps(deps, "_proto_srcs"),
- deps = deps + ["@protobuf//:cc_wkt_protos"],
+ srcs = srcs,
+ deps = tf_deps(protodeps, "_cc") + ["@protobuf//:cc_wkt_protos"],
cc_libs = cc_libs + ["@protobuf//:protobuf"],
- copts = ["-Wno-unused-but-set-variable", "-Wno-sign-compare"],
+ copts = [
+ "-Wno-unknown-warning-option",
+ "-Wno-unused-but-set-variable",
+ "-Wno-sign-compare",
+ ],
protoc = "@protobuf//:protoc",
default_runtime = "@protobuf//:protobuf",
use_grpc_plugin = use_grpc_plugin,
@@ -54,13 +58,14 @@ def tf_proto_library_cc(name, srcs = [], has_services = None,
visibility = visibility,
)
-def tf_proto_library_py(name, srcs=[], deps=[], visibility=[], testonly=0,
+def tf_proto_library_py(name, srcs=[], protodeps=[], deps=[], visibility=[],
+ testonly=0,
srcs_version="PY2AND3"):
py_proto_library(
name = name + "_py",
srcs = srcs,
srcs_version = srcs_version,
- deps = deps,
+ deps = deps + tf_deps(protodeps, "_py") + ["@protobuf//:protobuf_python"],
protoc = "@protobuf//:protoc",
default_runtime = "@protobuf//:protobuf_python",
visibility = visibility,
@@ -68,15 +73,16 @@ def tf_proto_library_py(name, srcs=[], deps=[], visibility=[], testonly=0,
)
def tf_proto_library(name, srcs = [], has_services = None,
- deps = [], visibility = [], testonly = 0,
+ protodeps = [], visibility = [], testonly = 0,
cc_libs = [],
cc_api_version = 2, go_api_version = 2,
java_api_version = 2, py_api_version = 2,
js_api_version = 2, js_codegen = "jspb"):
+ """Make a proto library, possibly depending on other proto libraries."""
tf_proto_library_cc(
name = name,
- srcs = srcs + tf_deps(deps, "_proto_srcs"),
- deps = deps,
+ srcs = srcs,
+ protodeps = protodeps,
cc_libs = cc_libs,
testonly = testonly,
visibility = visibility,
@@ -84,9 +90,9 @@ def tf_proto_library(name, srcs = [], has_services = None,
tf_proto_library_py(
name = name,
- srcs = srcs + tf_deps(deps, "_proto_srcs"),
+ srcs = srcs,
+ protodeps = protodeps,
srcs_version = "PY2AND3",
- deps = deps + ["@protobuf//:protobuf_python"],
testonly = testonly,
visibility = visibility,
)
@@ -155,7 +161,16 @@ def tf_additional_test_deps():
return []
def tf_additional_test_srcs():
- return ["platform/default/test_benchmark.cc", "platform/posix/test.cc"]
+ return [
+ "platform/default/test_benchmark.cc",
+ ] + select({
+ "//tensorflow:windows" : [
+ "platform/windows/test.cc"
+ ],
+ "//conditions:default" : [
+ "platform/posix/test.cc",
+ ],
+ })
def tf_kernel_tests_linkstatic():
return 0
diff --git a/tensorflow/core/platform/default/build_config/BUILD b/tensorflow/core/platform/default/build_config/BUILD
index 4ef795edcc..0857010f7c 100644
--- a/tensorflow/core/platform/default/build_config/BUILD
+++ b/tensorflow/core/platform/default/build_config/BUILD
@@ -10,7 +10,6 @@ exports_files(["LICENSE"])
load("//tensorflow:tensorflow.bzl", "if_cuda")
load("//tensorflow:tensorflow.bzl", "tf_copts")
load("//tensorflow:tensorflow.bzl", "tf_cuda_library")
-load("@local_config_cuda//cuda:platform.bzl", "cuda_library_path")
load("@local_config_sycl//sycl:platform.bzl", "sycl_library_path")
cc_library(
@@ -138,7 +137,7 @@ filegroup(
cc_library(
name = "cuda",
data = [
- "@local_config_cuda//cuda:{}".format(cuda_library_path("cudart")),
+ "@local_config_cuda//cuda:cudart",
],
linkopts = select({
"@local_config_cuda//cuda:darwin": [
diff --git a/tensorflow/core/platform/default/logging.cc b/tensorflow/core/platform/default/logging.cc
index e7808ca08d..1d03725c78 100644
--- a/tensorflow/core/platform/default/logging.cc
+++ b/tensorflow/core/platform/default/logging.cc
@@ -81,7 +81,41 @@ void LogMessage::GenerateLogMessage() {
}
#endif
-LogMessage::~LogMessage() { GenerateLogMessage(); }
+
+namespace {
+
+int64 MinLogLevel() {
+ const char* tf_env_var_val = getenv("TF_CPP_MIN_LOG_LEVEL");
+ if (tf_env_var_val == nullptr) {
+ return 0;
+ }
+
+ // Ideally we would use env_var / safe_strto64, but it is
+ // hard to use here without pulling in a lot of dependencies,
+ // so we do a poor-man's parsing.
+ string min_log_level(tf_env_var_val);
+ if (min_log_level == "1") {
+ // Maps to WARNING
+ return 1;
+ } else if (min_log_level == "2") {
+ // Maps to ERROR
+ return 2;
+ } else if (min_log_level == "3") {
+ // Maps to FATAL
+ return 3;
+ } else {
+ // Maps to INFO (the default).
+ return 0;
+ }
+}
+
+} // namespace
+
+LogMessage::~LogMessage() {
+ // Read the min log level once during the first call to logging.
+ static int64 min_log_level = MinLogLevel();
+ if (TF_PREDICT_TRUE(severity_ >= min_log_level)) GenerateLogMessage();
+}
LogMessageFatal::LogMessageFatal(const char* file, int line)
: LogMessage(file, line, FATAL) {}
diff --git a/tensorflow/core/platform/env.h b/tensorflow/core/platform/env.h
index 787ebe654b..428a45576f 100644
--- a/tensorflow/core/platform/env.h
+++ b/tensorflow/core/platform/env.h
@@ -208,12 +208,10 @@ class Env {
// TODO(jeff,sanjay): if needed, tighten spec so relative to epoch, or
// provide a routine to get the absolute time.
- /// \brief Returns the number of micro-seconds since some fixed point in
- /// time. Only useful for computing deltas of time.
+ /// \brief Returns the number of micro-seconds since the Unix epoch.
virtual uint64 NowMicros() = 0;
- /// \brief Returns the number of seconds since some fixed point in
- /// time. Only useful for computing deltas of time.
+ /// \brief Returns the number of seconds since the Unix epoch.
virtual uint64 NowSeconds() { return NowMicros() / 1000000L; }
/// Sleeps/delays the thread for the prescribed number of micro-seconds.
diff --git a/tensorflow/core/platform/hadoop/hadoop_file_system.cc b/tensorflow/core/platform/hadoop/hadoop_file_system.cc
index 3de3b17517..b0f0cbe3f1 100644
--- a/tensorflow/core/platform/hadoop/hadoop_file_system.cc
+++ b/tensorflow/core/platform/hadoop/hadoop_file_system.cc
@@ -112,6 +112,11 @@ class LibHDFS {
}
string path = io::JoinPath(hdfs_home, "lib", "native", "libhdfs.so");
status_ = TryLoadAndBind(path.c_str(), &handle_);
+ if (!status_.ok()) {
+ // try load libhdfs.so using dynamic loader's search path in case libhdfs.so
+ // is installed in non-standard location
+ status_ = TryLoadAndBind("libhdfs.so", &handle_);
+ }
return;
}
diff --git a/tensorflow/core/platform/port_test.cc b/tensorflow/core/platform/port_test.cc
index 78d000bff8..402c718e4f 100644
--- a/tensorflow/core/platform/port_test.cc
+++ b/tensorflow/core/platform/port_test.cc
@@ -36,8 +36,14 @@ TEST(ConditionVariable, WaitForMilliseconds_Timeout) {
mutex m;
mutex_lock l(m);
condition_variable cv;
+ ConditionResult result = kCond_MaybeNotified;
time_t start = time(NULL);
- EXPECT_EQ(WaitForMilliseconds(&l, &cv, 3000), kCond_Timeout);
+ // Condition variables are subject to spurious wakeups on some platforms,
+ // so need to check for a timeout within a loop.
+ while (result == kCond_MaybeNotified) {
+ result = WaitForMilliseconds(&l, &cv, 3000);
+ }
+ EXPECT_EQ(result, kCond_Timeout);
time_t finish = time(NULL);
EXPECT_GE(finish - start, 3);
}
@@ -51,7 +57,7 @@ TEST(ConditionVariable, WaitForMilliseconds_Signalled) {
// Sleep for just 1 second then notify. We have a timeout of 3 secs,
// so the condition variable will notice the cv signal before the timeout.
pool.Schedule([&m, &cv]() {
- sleep(1);
+ Env::Default()->SleepForMicroseconds(1 * 1000 * 1000);
mutex_lock l(m);
cv.notify_all();
});
diff --git a/tensorflow/core/platform/subprocess.h b/tensorflow/core/platform/subprocess.h
index 7dfd38688d..dfdcf82173 100644
--- a/tensorflow/core/platform/subprocess.h
+++ b/tensorflow/core/platform/subprocess.h
@@ -53,7 +53,7 @@ class SubProcess;
defined(PLATFORM_GOOGLE_ANDROID)
#include "tensorflow/core/platform/posix/subprocess.h"
#elif defined(PLATFORM_WINDOWS)
-#error SubProcess not yet implemented for Windows
+#include "tensorflow/core/platform/windows/subprocess.h"
#else
#error Define the appropriate PLATFORM_<foo> macro for this platform
#endif
diff --git a/tensorflow/core/platform/windows/subprocess.h b/tensorflow/core/platform/windows/subprocess.h
new file mode 100644
index 0000000000..b65313363e
--- /dev/null
+++ b/tensorflow/core/platform/windows/subprocess.h
@@ -0,0 +1,27 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_PLATFORM_WINDOWS_SUBPROCESS_H_
+#define TENSORFLOW_PLATFORM_WINDOWS_SUBPROCESS_H_
+
+namespace tensorflow {
+
+// SubProcess is not yet implemented for Windows.
+class SubProcess {
+};
+
+} // namespace tensorflow
+
+#endif // TENSORFLOW_PLATFORM_WINDOWS_SUBPROCESS_H_
diff --git a/tensorflow/core/platform/windows/test.cc b/tensorflow/core/platform/windows/test.cc
new file mode 100644
index 0000000000..0ffd02ff14
--- /dev/null
+++ b/tensorflow/core/platform/windows/test.cc
@@ -0,0 +1,51 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/platform/net.h"
+#include "tensorflow/core/platform/test.h"
+
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace tensorflow {
+namespace testing {
+
+std::unique_ptr<SubProcess> CreateSubProcess(const std::vector<string>& argv) {
+ LOG(FATAL) << "CreateSubProcess NOT IMPLEMENTED for Windows yet ! ";
+ return nullptr;
+}
+
+int PickUnusedPortOrDie() { return internal::PickUnusedPortOrDie(); }
+
+string TensorFlowSrcRoot() {
+ // 'bazel test' and cmake set TEST_SRCDIR.
+ // New versions of bazel also set TEST_WORKSPACE.
+ const char* env = getenv("TEST_SRCDIR");
+ const char* workspace = getenv("TEST_WORKSPACE");
+ if (env && env[0] != '\0') {
+ if (workspace && workspace[0] != '\0') {
+ return strings::StrCat(env, "/", workspace, "/tensorflow");
+ } else {
+ return strings::StrCat(env, "/tensorflow");
+ }
+ } else {
+ LOG(WARNING) << "TEST_SRCDIR environment variable not set: "
+ << "using $PWD/tensorflow as TensorFlowSrcRoot() for tests.";
+ return "tensorflow";
+ }
+}
+
+} // namespace testing
+} // namespace tensorflow
diff --git a/tensorflow/core/platform/windows/windows_file_system.cc b/tensorflow/core/platform/windows/windows_file_system.cc
index 31516bb2ee..670abf3fdf 100644
--- a/tensorflow/core/platform/windows/windows_file_system.cc
+++ b/tensorflow/core/platform/windows/windows_file_system.cc
@@ -467,6 +467,23 @@ Status WindowsFileSystem::RenameFile(const string& src, const string& target) {
return result;
}
+Status WindowsFileSystem::GetMatchingPaths(const string& pattern,
+ std::vector<string>* results) {
+ // NOTE(mrry): The existing implementation of FileSystem::GetMatchingPaths()
+ // does not handle Windows paths containing backslashes correctly. Since
+ // Windows APIs will accept forward and backslashes equivalently, we
+ // convert the pattern to use forward slashes exclusively. Note that this
+ // is not ideal, since the API expects backslash as an escape character,
+ // but no code appears to rely on this behavior.
+ string converted_pattern(pattern);
+ std::replace(converted_pattern.begin(), converted_pattern.end(), '\\', '/');
+ TF_RETURN_IF_ERROR(FileSystem::GetMatchingPaths(converted_pattern, results));
+ for (string& result : *results) {
+ std::replace(result.begin(), result.end(), '/', '\\');
+ }
+ return Status::OK();
+}
+
Status WindowsFileSystem::Stat(const string& fname, FileStatistics* stat) {
Status result;
struct _stat sbuf;
diff --git a/tensorflow/core/platform/windows/windows_file_system.h b/tensorflow/core/platform/windows/windows_file_system.h
index dd83a27caf..507290e9e6 100644
--- a/tensorflow/core/platform/windows/windows_file_system.h
+++ b/tensorflow/core/platform/windows/windows_file_system.h
@@ -48,6 +48,9 @@ class WindowsFileSystem : public FileSystem {
Status GetChildren(const string& dir, std::vector<string>* result) override;
+ Status GetMatchingPaths(const string& pattern,
+ std::vector<string>* result) override;
+
Status Stat(const string& fname, FileStatistics* stat) override;
Status DeleteFile(const string& fname) override;
diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index 1de976fb3d..34673be216 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -19,7 +19,7 @@ limitations under the License.
// TensorFlow uses semantic versioning, see http://semver.org/.
#define TF_MAJOR_VERSION 0
-#define TF_MINOR_VERSION 11
+#define TF_MINOR_VERSION 12
#define TF_PATCH_VERSION head
// TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
diff --git a/tensorflow/core/util/memmapped_file_system.cc b/tensorflow/core/util/memmapped_file_system.cc
index d67f948f1d..e077e94cf8 100644
--- a/tensorflow/core/util/memmapped_file_system.cc
+++ b/tensorflow/core/util/memmapped_file_system.cc
@@ -177,8 +177,13 @@ const void* MemmappedFileSystem::GetMemoryWithOffset(uint64 offset) const {
return reinterpret_cast<const uint8*>(mapped_memory_->data()) + offset;
}
+#if defined(COMPILER_MSVC)
+constexpr char* MemmappedFileSystem::kMemmappedPackagePrefix;
+constexpr char* MemmappedFileSystem::kMemmappedPackageDefaultGraphDef;
+#else
constexpr char MemmappedFileSystem::kMemmappedPackagePrefix[];
constexpr char MemmappedFileSystem::kMemmappedPackageDefaultGraphDef[];
+#endif
Status MemmappedFileSystem::InitializeFromFile(Env* env,
const string& filename) {
diff --git a/tensorflow/core/util/memmapped_file_system.h b/tensorflow/core/util/memmapped_file_system.h
index d64c4a765c..541587aeab 100644
--- a/tensorflow/core/util/memmapped_file_system.h
+++ b/tensorflow/core/util/memmapped_file_system.h
@@ -53,9 +53,19 @@ class MemmappedFileSystem : public FileSystem {
public:
// Memmapped regions use this prefix to distinguish from
// the filesystem.
- static constexpr char kMemmappedPackagePrefix[] = "memmapped_package://";
- // The default graphdef in the package.
+#if defined(COMPILER_MSVC)
+ static constexpr char* kMemmappedPackagePrefix =
+#else
+ static constexpr char kMemmappedPackagePrefix[] =
+#endif
+ "memmapped_package://";
+
+// The default graphdef in the package.
+#if defined(COMPILER_MSVC)
+ static constexpr char* kMemmappedPackageDefaultGraphDef =
+#else
static constexpr char kMemmappedPackageDefaultGraphDef[] =
+#endif
"memmapped_package://.";
MemmappedFileSystem();
diff --git a/tensorflow/core/util/memmapped_file_system_test.cc b/tensorflow/core/util/memmapped_file_system_test.cc
index c7d919041a..179c72c1f5 100644
--- a/tensorflow/core/util/memmapped_file_system_test.cc
+++ b/tensorflow/core/util/memmapped_file_system_test.cc
@@ -137,8 +137,15 @@ TEST(MemmappedFileSystemTest, ProxyToDefault) {
const string dir = testing::TmpDir();
const string filename = io::JoinPath(dir, "test_file");
// Check that we can create write and read ordinary file.
- std::unique_ptr<WritableFile> writable_file;
- TF_ASSERT_OK(memmapped_env.NewAppendableFile(filename, &writable_file));
+ std::unique_ptr<WritableFile> writable_file_temp;
+ TF_ASSERT_OK(memmapped_env.NewAppendableFile(filename, &writable_file_temp));
+ // Making sure to clean up after the test finishes.
+ const auto adh = [&memmapped_env, &filename](WritableFile* f) {
+ delete f;
+ memmapped_env.DeleteFile(filename);
+ };
+ std::unique_ptr<WritableFile, decltype(adh)> writable_file(
+ writable_file_temp.release(), adh);
const string test_string = "bla-bla-bla";
TF_ASSERT_OK(writable_file->Append(test_string));
TF_ASSERT_OK(writable_file->Close());
diff --git a/tensorflow/core/util/semver_test.cc b/tensorflow/core/util/semver_test.cc
index 75994a658e..0647f670c7 100644
--- a/tensorflow/core/util/semver_test.cc
+++ b/tensorflow/core/util/semver_test.cc
@@ -63,6 +63,10 @@ TEST(SemverTest, VersionStringFollowsSemver) {
if (major == 0 && minor <= 11) {
return;
}
+ if (str_util::ConsumePrefix(&semver, "head")) {
+ ASSERT_TRUE(semver.empty());
+ return;
+ }
ASSERT_TRUE(str_util::ConsumeLeadingDigits(&semver, &patch));
if (semver.empty()) return;
if (semver[0] == '-') {
diff --git a/tensorflow/core/util/sparse/sparse_tensor.h b/tensorflow/core/util/sparse/sparse_tensor.h
index a575d98da3..9d6f9e8bb5 100644
--- a/tensorflow/core/util/sparse/sparse_tensor.h
+++ b/tensorflow/core/util/sparse/sparse_tensor.h
@@ -17,8 +17,9 @@ limitations under the License.
#define TENSORFLOW_UTIL_SPARSE_SPARSE_TENSOR_H_
#include <limits>
-
+#include <numeric>
#include <vector>
+
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/framework/tensor_types.h"
diff --git a/tensorflow/core/util/stat_summarizer.cc b/tensorflow/core/util/stat_summarizer.cc
index 0b675eaac9..6bd3d9c780 100644
--- a/tensorflow/core/util/stat_summarizer.cc
+++ b/tensorflow/core/util/stat_summarizer.cc
@@ -340,10 +340,10 @@ std::string StatSummarizer::GetStatsByOrderOfNodeDefinitions(
std::string StatSummarizer::GetOutputString() const {
std::stringstream stream;
- stream << "Total time (us): " << run_total_micros_;
+ stream << "Total time (us): " << run_total_micros_ << std::endl;
stream << GetTimingStatsByRunOrder();
stream << GetTimingStatsByTopDurations();
- stream << "Total Memory (bytes): " << memory_;
+ stream << "Total Memory (bytes): " << memory_ << std::endl;
stream << GetMemoryStatsByRunOrder();
stream << GetMemoryStatsByUsage();
return stream.str();