From 8acc9eb46e9b2aa88d90dac77f37cfc98a4a739c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 6 Oct 2016 12:28:12 -0800 Subject: Fixes https://github.com/tensorflow/tensorflow/issues/4673 Change: 135397388 --- tensorflow/core/kernels/batchtospace_op.cc | 2 +- .../core/kernels/spacetobatch_benchmark_test.cc | 61 +++++++++++++--------- .../core/kernels/spacetobatch_functor_gpu.cu.cc | 2 +- tensorflow/core/kernels/spacetobatch_op.cc | 2 +- 4 files changed, 40 insertions(+), 27 deletions(-) diff --git a/tensorflow/core/kernels/batchtospace_op.cc b/tensorflow/core/kernels/batchtospace_op.cc index 8a2c5e21ac..277f90cdad 100644 --- a/tensorflow/core/kernels/batchtospace_op.cc +++ b/tensorflow/core/kernels/batchtospace_op.cc @@ -275,7 +275,7 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER); .HostMemory("crops"), \ BatchToSpaceOp); -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER); +TF_CALL_GPU_NUMBER_TYPES(REGISTER); #undef REGISTER #endif // GOOGLE_CUDA diff --git a/tensorflow/core/kernels/spacetobatch_benchmark_test.cc b/tensorflow/core/kernels/spacetobatch_benchmark_test.cc index 1730d85fac..a9a9bd46b7 100644 --- a/tensorflow/core/kernels/spacetobatch_benchmark_test.cc +++ b/tensorflow/core/kernels/spacetobatch_benchmark_test.cc @@ -23,12 +23,10 @@ namespace tensorflow { static Graph* ConstructSpaceToBatchGraph( const char* op_name, const TensorShape& input_shape, const int block_size, - const std::vector>& paddings) { + DataType dtype, const std::vector>& paddings) { const int num_block_dims = 2; CHECK_EQ(num_block_dims, paddings.size()); Graph* g = new Graph(OpRegistry::Global()); - Tensor input(DT_FLOAT, input_shape); - input.flat().setRandom(); Tensor paddings_tensor(DT_INT32, TensorShape({num_block_dims, 2})); auto paddings_eigen_tensor = paddings_tensor.matrix(); for (int block_dim = 0; block_dim < num_block_dims; ++block_dim) { @@ -36,30 +34,45 @@ static Graph* ConstructSpaceToBatchGraph( paddings_eigen_tensor(block_dim, 1) = paddings[block_dim].second; } Node* ret; - NodeBuilder(g->NewName("n"), op_name) - .Input(test::graph::Constant(g, input)) - .Input(test::graph::Constant(g, paddings_tensor)) - .Attr("block_size", block_size) - .Finalize(g, &ret); + if (dtype == DT_FLOAT) { + Tensor input(DT_FLOAT, input_shape); + input.flat().setRandom(); + NodeBuilder(g->NewName("n"), op_name) + .Input(test::graph::Constant(g, input)) + .Input(test::graph::Constant(g, paddings_tensor)) + .Attr("block_size", block_size) + .Finalize(g, &ret); + } else if (dtype == DT_HALF) { + Tensor input(DT_HALF, input_shape); + input.flat().setRandom(); + NodeBuilder(g->NewName("n"), op_name) + .Input(test::graph::Constant(g, input)) + .Input(test::graph::Constant(g, paddings_tensor)) + .Attr("block_size", block_size) + .Finalize(g, &ret); + } return g; } -#define BM_SpaceToBatchDev(OP, DEVICE, B, H, W, D, BS, P00, P01, P10, P11) \ - static void \ - BM_##OP##_##DEVICE##_##B##_##H##_##W##_##D##_bs##BS##_pad##P00##_##P01##_##P10##_##P11( \ - int iters) { \ - testing::ItemsProcessed(static_cast(iters) * B * (H + P00 + P01) * \ - (W + P10 + P11) * D); \ - test::Benchmark(#DEVICE, \ - ConstructSpaceToBatchGraph(#OP, TensorShape({B, H, W, D}), \ - BS, {{P00, P01}, {P10, P11}})) \ - .Run(iters); \ - } \ - BENCHMARK( \ - BM_##OP##_##DEVICE##_##B##_##H##_##W##_##D##_bs##BS##_pad##P00##_##P01##_##P10##_##P11); -#define BM_SpaceToBatch(OP, ...) \ - BM_SpaceToBatchDev(OP, cpu, __VA_ARGS__); \ - BM_SpaceToBatchDev(OP, gpu, __VA_ARGS__); +#define BM_SpaceToBatchDev(OP, DEVICE, DTYPE, B, H, W, D, BS, P00, P01, P10, \ + P11) \ + static void \ + BM_##OP##_##DEVICE##_##DTYPE##_##B##_##H##_##W##_##D##_bs##BS##_pad##P00##_##P01##_##P10##_##P11( \ + int iters) { \ + testing::ItemsProcessed(static_cast(iters) * B * (H + P00 + P01) * \ + (W + P10 + P11) * D); \ + test::Benchmark(#DEVICE, ConstructSpaceToBatchGraph( \ + #OP, TensorShape({B, H, W, D}), BS, DTYPE, \ + {{P00, P01}, {P10, P11}})) \ + .Run(iters); \ + } \ + BENCHMARK( \ + BM_##OP##_##DEVICE##_##DTYPE##_##B##_##H##_##W##_##D##_bs##BS##_pad##P00##_##P01##_##P10##_##P11); +#define BM_SpaceToBatch(OP, ...) \ + BM_SpaceToBatchDev(OP, cpu, DT_FLOAT, __VA_ARGS__); \ + BM_SpaceToBatchDev(OP, gpu, DT_FLOAT, __VA_ARGS__); \ + BM_SpaceToBatchDev(OP, cpu, DT_HALF, __VA_ARGS__); \ + BM_SpaceToBatchDev(OP, gpu, DT_HALF, __VA_ARGS__); BM_SpaceToBatch(SpaceToBatch, 64, 100, 100, 64, 2, 0, 0, 0, 0); BM_SpaceToBatch(SpaceToBatch, 64, 100, 100, 1, 2, 0, 0, 0, 0); diff --git a/tensorflow/core/kernels/spacetobatch_functor_gpu.cu.cc b/tensorflow/core/kernels/spacetobatch_functor_gpu.cu.cc index e7848be2e3..db8d419c38 100644 --- a/tensorflow/core/kernels/spacetobatch_functor_gpu.cu.cc +++ b/tensorflow/core/kernels/spacetobatch_functor_gpu.cu.cc @@ -158,7 +158,7 @@ struct SpaceToBatchFunctor { #define INSTANTIATE_FOR_T(T) \ TF_SPACETOBATCH_FOR_EACH_NUM_BLOCK_DIMS(INSTANTIATE, T) -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(INSTANTIATE_FOR_T) +TF_CALL_GPU_NUMBER_TYPES(INSTANTIATE_FOR_T) #undef INSTANTIATE_FOR_T #undef INSTANTIATE diff --git a/tensorflow/core/kernels/spacetobatch_op.cc b/tensorflow/core/kernels/spacetobatch_op.cc index a22c4e8f53..e2f34a3b54 100644 --- a/tensorflow/core/kernels/spacetobatch_op.cc +++ b/tensorflow/core/kernels/spacetobatch_op.cc @@ -274,7 +274,7 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER); .HostMemory("paddings"), \ SpaceToBatchOp); -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER); +TF_CALL_GPU_NUMBER_TYPES(REGISTER); #undef REGISTER #endif // GOOGLE_CUDA -- cgit v1.2.3