From 8acc9eb46e9b2aa88d90dac77f37cfc98a4a739c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 6 Oct 2016 12:28:12 -0800
Subject: Fixes https://github.com/tensorflow/tensorflow/issues/4673 Change:
 135397388

---
 tensorflow/core/kernels/batchtospace_op.cc         |  2 +-
 .../core/kernels/spacetobatch_benchmark_test.cc    | 61 +++++++++++++---------
 .../core/kernels/spacetobatch_functor_gpu.cu.cc    |  2 +-
 tensorflow/core/kernels/spacetobatch_op.cc         |  2 +-
 4 files changed, 40 insertions(+), 27 deletions(-)
diff --git a/tensorflow/core/kernels/batchtospace_op.cc b/tensorflow/core/kernels/batchtospace_op.cc
index 8a2c5e21ac..277f90cdad 100644
--- a/tensorflow/core/kernels/batchtospace_op.cc
+++ b/tensorflow/core/kernels/batchtospace_op.cc
@@ -275,7 +275,7 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER);
                               .HostMemory("crops"),        \
                           BatchToSpaceOp<GPUDevice, T>);
 
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER);
+TF_CALL_GPU_NUMBER_TYPES(REGISTER);
 #undef REGISTER
 #endif  // GOOGLE_CUDA
 
diff --git a/tensorflow/core/kernels/spacetobatch_benchmark_test.cc b/tensorflow/core/kernels/spacetobatch_benchmark_test.cc
index 1730d85fac..a9a9bd46b7 100644
--- a/tensorflow/core/kernels/spacetobatch_benchmark_test.cc
+++ b/tensorflow/core/kernels/spacetobatch_benchmark_test.cc
@@ -23,12 +23,10 @@ namespace tensorflow {
 
 static Graph* ConstructSpaceToBatchGraph(
     const char* op_name, const TensorShape& input_shape, const int block_size,
-    const std::vector<std::pair<int, int>>& paddings) {
+    DataType dtype, const std::vector<std::pair<int, int>>& paddings) {
   const int num_block_dims = 2;
   CHECK_EQ(num_block_dims, paddings.size());
   Graph* g = new Graph(OpRegistry::Global());
-  Tensor input(DT_FLOAT, input_shape);
-  input.flat<float>().setRandom();
   Tensor paddings_tensor(DT_INT32, TensorShape({num_block_dims, 2}));
   auto paddings_eigen_tensor = paddings_tensor.matrix<int32>();
   for (int block_dim = 0; block_dim < num_block_dims; ++block_dim) {
@@ -36,30 +34,45 @@ static Graph* ConstructSpaceToBatchGraph(
     paddings_eigen_tensor(block_dim, 1) = paddings[block_dim].second;
   }
   Node* ret;
-  NodeBuilder(g->NewName("n"), op_name)
-      .Input(test::graph::Constant(g, input))
-      .Input(test::graph::Constant(g, paddings_tensor))
-      .Attr("block_size", block_size)
-      .Finalize(g, &ret);
+  if (dtype == DT_FLOAT) {
+    Tensor input(DT_FLOAT, input_shape);
+    input.flat<float>().setRandom();
+    NodeBuilder(g->NewName("n"), op_name)
+        .Input(test::graph::Constant(g, input))
+        .Input(test::graph::Constant(g, paddings_tensor))
+        .Attr("block_size", block_size)
+        .Finalize(g, &ret);
+  } else if (dtype == DT_HALF) {
+    Tensor input(DT_HALF, input_shape);
+    input.flat<Eigen::half>().setRandom();
+    NodeBuilder(g->NewName("n"), op_name)
+        .Input(test::graph::Constant(g, input))
+        .Input(test::graph::Constant(g, paddings_tensor))
+        .Attr("block_size", block_size)
+        .Finalize(g, &ret);
+  }
   return g;
 }
 
-#define BM_SpaceToBatchDev(OP, DEVICE, B, H, W, D, BS, P00, P01, P10, P11)                    \
-  static void                                                                                 \
-      BM_##OP##_##DEVICE##_##B##_##H##_##W##_##D##_bs##BS##_pad##P00##_##P01##_##P10##_##P11( \
-          int iters) {                                                                        \
-    testing::ItemsProcessed(static_cast<int64>(iters) * B * (H + P00 + P01) *                 \
-                            (W + P10 + P11) * D);                                             \
-    test::Benchmark(#DEVICE,                                                                  \
-                    ConstructSpaceToBatchGraph(#OP, TensorShape({B, H, W, D}),                \
-                                               BS, {{P00, P01}, {P10, P11}}))                 \
-        .Run(iters);                                                                          \
-  }                                                                                           \
-  BENCHMARK(                                                                                  \
-      BM_##OP##_##DEVICE##_##B##_##H##_##W##_##D##_bs##BS##_pad##P00##_##P01##_##P10##_##P11);
-#define BM_SpaceToBatch(OP, ...)            \
-  BM_SpaceToBatchDev(OP, cpu, __VA_ARGS__); \
-  BM_SpaceToBatchDev(OP, gpu, __VA_ARGS__);
+#define BM_SpaceToBatchDev(OP, DEVICE, DTYPE, B, H, W, D, BS, P00, P01, P10,                            \
+                           P11)                                                                         \
+  static void                                                                                           \
+      BM_##OP##_##DEVICE##_##DTYPE##_##B##_##H##_##W##_##D##_bs##BS##_pad##P00##_##P01##_##P10##_##P11( \
+          int iters) {                                                                                  \
+    testing::ItemsProcessed(static_cast<int64>(iters) * B * (H + P00 + P01) *                           \
+                            (W + P10 + P11) * D);                                                       \
+    test::Benchmark(#DEVICE, ConstructSpaceToBatchGraph(                                                \
+                                 #OP, TensorShape({B, H, W, D}), BS, DTYPE,                             \
+                                 {{P00, P01}, {P10, P11}}))                                             \
+        .Run(iters);                                                                                    \
+  }                                                                                                     \
+  BENCHMARK(                                                                                            \
+      BM_##OP##_##DEVICE##_##DTYPE##_##B##_##H##_##W##_##D##_bs##BS##_pad##P00##_##P01##_##P10##_##P11);
+#define BM_SpaceToBatch(OP, ...)                      \
+  BM_SpaceToBatchDev(OP, cpu, DT_FLOAT, __VA_ARGS__); \
+  BM_SpaceToBatchDev(OP, gpu, DT_FLOAT, __VA_ARGS__); \
+  BM_SpaceToBatchDev(OP, cpu, DT_HALF, __VA_ARGS__);  \
+  BM_SpaceToBatchDev(OP, gpu, DT_HALF, __VA_ARGS__);
 
 BM_SpaceToBatch(SpaceToBatch, 64, 100, 100, 64, 2, 0, 0, 0, 0);
 BM_SpaceToBatch(SpaceToBatch, 64, 100, 100, 1, 2, 0, 0, 0, 0);
diff --git a/tensorflow/core/kernels/spacetobatch_functor_gpu.cu.cc b/tensorflow/core/kernels/spacetobatch_functor_gpu.cu.cc
index e7848be2e3..db8d419c38 100644
--- a/tensorflow/core/kernels/spacetobatch_functor_gpu.cu.cc
+++ b/tensorflow/core/kernels/spacetobatch_functor_gpu.cu.cc
@@ -158,7 +158,7 @@ struct SpaceToBatchFunctor<GPUDevice, T, NUM_BLOCK_DIMS, B2S> {
 #define INSTANTIATE_FOR_T(T) \
   TF_SPACETOBATCH_FOR_EACH_NUM_BLOCK_DIMS(INSTANTIATE, T)
 
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(INSTANTIATE_FOR_T)
+TF_CALL_GPU_NUMBER_TYPES(INSTANTIATE_FOR_T)
 
 #undef INSTANTIATE_FOR_T
 #undef INSTANTIATE
diff --git a/tensorflow/core/kernels/spacetobatch_op.cc b/tensorflow/core/kernels/spacetobatch_op.cc
index a22c4e8f53..e2f34a3b54 100644
--- a/tensorflow/core/kernels/spacetobatch_op.cc
+++ b/tensorflow/core/kernels/spacetobatch_op.cc
@@ -274,7 +274,7 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER);
                               .HostMemory("paddings"),     \
                           SpaceToBatchOp<GPUDevice, T>);
 
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER);
+TF_CALL_GPU_NUMBER_TYPES(REGISTER);
 #undef REGISTER
 #endif  // GOOGLE_CUDA
 
-- 
cgit v1.2.3