aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2016-10-06 12:28:12 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-10-06 13:33:13 -0700
commit8acc9eb46e9b2aa88d90dac77f37cfc98a4a739c (patch)
tree92599649e4b0bd79e08f02bf32e789375214a949
parent9e16b3048c534fadb0f791c0cbdfa756ade6490a (diff)
Change: 135397388
-rw-r--r--tensorflow/core/kernels/batchtospace_op.cc2
-rw-r--r--tensorflow/core/kernels/spacetobatch_benchmark_test.cc61
-rw-r--r--tensorflow/core/kernels/spacetobatch_functor_gpu.cu.cc2
-rw-r--r--tensorflow/core/kernels/spacetobatch_op.cc2
4 files changed, 40 insertions, 27 deletions
diff --git a/tensorflow/core/kernels/batchtospace_op.cc b/tensorflow/core/kernels/batchtospace_op.cc
index 8a2c5e21ac..277f90cdad 100644
--- a/tensorflow/core/kernels/batchtospace_op.cc
+++ b/tensorflow/core/kernels/batchtospace_op.cc
@@ -275,7 +275,7 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER);
.HostMemory("crops"), \
BatchToSpaceOp<GPUDevice, T>);
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER);
+TF_CALL_GPU_NUMBER_TYPES(REGISTER);
#undef REGISTER
#endif // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/spacetobatch_benchmark_test.cc b/tensorflow/core/kernels/spacetobatch_benchmark_test.cc
index 1730d85fac..a9a9bd46b7 100644
--- a/tensorflow/core/kernels/spacetobatch_benchmark_test.cc
+++ b/tensorflow/core/kernels/spacetobatch_benchmark_test.cc
@@ -23,12 +23,10 @@ namespace tensorflow {
static Graph* ConstructSpaceToBatchGraph(
const char* op_name, const TensorShape& input_shape, const int block_size,
- const std::vector<std::pair<int, int>>& paddings) {
+ DataType dtype, const std::vector<std::pair<int, int>>& paddings) {
const int num_block_dims = 2;
CHECK_EQ(num_block_dims, paddings.size());
Graph* g = new Graph(OpRegistry::Global());
- Tensor input(DT_FLOAT, input_shape);
- input.flat<float>().setRandom();
Tensor paddings_tensor(DT_INT32, TensorShape({num_block_dims, 2}));
auto paddings_eigen_tensor = paddings_tensor.matrix<int32>();
for (int block_dim = 0; block_dim < num_block_dims; ++block_dim) {
@@ -36,30 +34,45 @@ static Graph* ConstructSpaceToBatchGraph(
paddings_eigen_tensor(block_dim, 1) = paddings[block_dim].second;
}
Node* ret;
- NodeBuilder(g->NewName("n"), op_name)
- .Input(test::graph::Constant(g, input))
- .Input(test::graph::Constant(g, paddings_tensor))
- .Attr("block_size", block_size)
- .Finalize(g, &ret);
+ if (dtype == DT_FLOAT) {
+ Tensor input(DT_FLOAT, input_shape);
+ input.flat<float>().setRandom();
+ NodeBuilder(g->NewName("n"), op_name)
+ .Input(test::graph::Constant(g, input))
+ .Input(test::graph::Constant(g, paddings_tensor))
+ .Attr("block_size", block_size)
+ .Finalize(g, &ret);
+ } else if (dtype == DT_HALF) {
+ Tensor input(DT_HALF, input_shape);
+ input.flat<Eigen::half>().setRandom();
+ NodeBuilder(g->NewName("n"), op_name)
+ .Input(test::graph::Constant(g, input))
+ .Input(test::graph::Constant(g, paddings_tensor))
+ .Attr("block_size", block_size)
+ .Finalize(g, &ret);
+ }
return g;
}
-#define BM_SpaceToBatchDev(OP, DEVICE, B, H, W, D, BS, P00, P01, P10, P11) \
- static void \
- BM_##OP##_##DEVICE##_##B##_##H##_##W##_##D##_bs##BS##_pad##P00##_##P01##_##P10##_##P11( \
- int iters) { \
- testing::ItemsProcessed(static_cast<int64>(iters) * B * (H + P00 + P01) * \
- (W + P10 + P11) * D); \
- test::Benchmark(#DEVICE, \
- ConstructSpaceToBatchGraph(#OP, TensorShape({B, H, W, D}), \
- BS, {{P00, P01}, {P10, P11}})) \
- .Run(iters); \
- } \
- BENCHMARK( \
- BM_##OP##_##DEVICE##_##B##_##H##_##W##_##D##_bs##BS##_pad##P00##_##P01##_##P10##_##P11);
-#define BM_SpaceToBatch(OP, ...) \
- BM_SpaceToBatchDev(OP, cpu, __VA_ARGS__); \
- BM_SpaceToBatchDev(OP, gpu, __VA_ARGS__);
+#define BM_SpaceToBatchDev(OP, DEVICE, DTYPE, B, H, W, D, BS, P00, P01, P10, \
+ P11) \
+ static void \
+ BM_##OP##_##DEVICE##_##DTYPE##_##B##_##H##_##W##_##D##_bs##BS##_pad##P00##_##P01##_##P10##_##P11( \
+ int iters) { \
+ testing::ItemsProcessed(static_cast<int64>(iters) * B * (H + P00 + P01) * \
+ (W + P10 + P11) * D); \
+ test::Benchmark(#DEVICE, ConstructSpaceToBatchGraph( \
+ #OP, TensorShape({B, H, W, D}), BS, DTYPE, \
+ {{P00, P01}, {P10, P11}})) \
+ .Run(iters); \
+ } \
+ BENCHMARK( \
+ BM_##OP##_##DEVICE##_##DTYPE##_##B##_##H##_##W##_##D##_bs##BS##_pad##P00##_##P01##_##P10##_##P11);
+#define BM_SpaceToBatch(OP, ...) \
+ BM_SpaceToBatchDev(OP, cpu, DT_FLOAT, __VA_ARGS__); \
+ BM_SpaceToBatchDev(OP, gpu, DT_FLOAT, __VA_ARGS__); \
+ BM_SpaceToBatchDev(OP, cpu, DT_HALF, __VA_ARGS__); \
+ BM_SpaceToBatchDev(OP, gpu, DT_HALF, __VA_ARGS__);
BM_SpaceToBatch(SpaceToBatch, 64, 100, 100, 64, 2, 0, 0, 0, 0);
BM_SpaceToBatch(SpaceToBatch, 64, 100, 100, 1, 2, 0, 0, 0, 0);
diff --git a/tensorflow/core/kernels/spacetobatch_functor_gpu.cu.cc b/tensorflow/core/kernels/spacetobatch_functor_gpu.cu.cc
index e7848be2e3..db8d419c38 100644
--- a/tensorflow/core/kernels/spacetobatch_functor_gpu.cu.cc
+++ b/tensorflow/core/kernels/spacetobatch_functor_gpu.cu.cc
@@ -158,7 +158,7 @@ struct SpaceToBatchFunctor<GPUDevice, T, NUM_BLOCK_DIMS, B2S> {
#define INSTANTIATE_FOR_T(T) \
TF_SPACETOBATCH_FOR_EACH_NUM_BLOCK_DIMS(INSTANTIATE, T)
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(INSTANTIATE_FOR_T)
+TF_CALL_GPU_NUMBER_TYPES(INSTANTIATE_FOR_T)
#undef INSTANTIATE_FOR_T
#undef INSTANTIATE
diff --git a/tensorflow/core/kernels/spacetobatch_op.cc b/tensorflow/core/kernels/spacetobatch_op.cc
index a22c4e8f53..e2f34a3b54 100644
--- a/tensorflow/core/kernels/spacetobatch_op.cc
+++ b/tensorflow/core/kernels/spacetobatch_op.cc
@@ -274,7 +274,7 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER);
.HostMemory("paddings"), \
SpaceToBatchOp<GPUDevice, T>);
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER);
+TF_CALL_GPU_NUMBER_TYPES(REGISTER);
#undef REGISTER
#endif // GOOGLE_CUDA