diff options
author | 2016-10-06 12:28:12 -0800 | |
---|---|---|
committer | 2016-10-06 13:33:13 -0700 | |
commit | 8acc9eb46e9b2aa88d90dac77f37cfc98a4a739c (patch) | |
tree | 92599649e4b0bd79e08f02bf32e789375214a949 | |
parent | 9e16b3048c534fadb0f791c0cbdfa756ade6490a (diff) |
Change: 135397388
-rw-r--r-- | tensorflow/core/kernels/batchtospace_op.cc | 2 | ||||
-rw-r--r-- | tensorflow/core/kernels/spacetobatch_benchmark_test.cc | 61 | ||||
-rw-r--r-- | tensorflow/core/kernels/spacetobatch_functor_gpu.cu.cc | 2 | ||||
-rw-r--r-- | tensorflow/core/kernels/spacetobatch_op.cc | 2 |
4 files changed, 40 insertions, 27 deletions
diff --git a/tensorflow/core/kernels/batchtospace_op.cc b/tensorflow/core/kernels/batchtospace_op.cc index 8a2c5e21ac..277f90cdad 100644 --- a/tensorflow/core/kernels/batchtospace_op.cc +++ b/tensorflow/core/kernels/batchtospace_op.cc @@ -275,7 +275,7 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER); .HostMemory("crops"), \ BatchToSpaceOp<GPUDevice, T>); -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER); +TF_CALL_GPU_NUMBER_TYPES(REGISTER); #undef REGISTER #endif // GOOGLE_CUDA diff --git a/tensorflow/core/kernels/spacetobatch_benchmark_test.cc b/tensorflow/core/kernels/spacetobatch_benchmark_test.cc index 1730d85fac..a9a9bd46b7 100644 --- a/tensorflow/core/kernels/spacetobatch_benchmark_test.cc +++ b/tensorflow/core/kernels/spacetobatch_benchmark_test.cc @@ -23,12 +23,10 @@ namespace tensorflow { static Graph* ConstructSpaceToBatchGraph( const char* op_name, const TensorShape& input_shape, const int block_size, - const std::vector<std::pair<int, int>>& paddings) { + DataType dtype, const std::vector<std::pair<int, int>>& paddings) { const int num_block_dims = 2; CHECK_EQ(num_block_dims, paddings.size()); Graph* g = new Graph(OpRegistry::Global()); - Tensor input(DT_FLOAT, input_shape); - input.flat<float>().setRandom(); Tensor paddings_tensor(DT_INT32, TensorShape({num_block_dims, 2})); auto paddings_eigen_tensor = paddings_tensor.matrix<int32>(); for (int block_dim = 0; block_dim < num_block_dims; ++block_dim) { @@ -36,30 +34,45 @@ static Graph* ConstructSpaceToBatchGraph( paddings_eigen_tensor(block_dim, 1) = paddings[block_dim].second; } Node* ret; - NodeBuilder(g->NewName("n"), op_name) - .Input(test::graph::Constant(g, input)) - .Input(test::graph::Constant(g, paddings_tensor)) - .Attr("block_size", block_size) - .Finalize(g, &ret); + if (dtype == DT_FLOAT) { + Tensor input(DT_FLOAT, input_shape); + input.flat<float>().setRandom(); + NodeBuilder(g->NewName("n"), op_name) + .Input(test::graph::Constant(g, input)) + .Input(test::graph::Constant(g, paddings_tensor)) + .Attr("block_size", block_size) + .Finalize(g, &ret); + } else if (dtype == DT_HALF) { + Tensor input(DT_HALF, input_shape); + input.flat<Eigen::half>().setRandom(); + NodeBuilder(g->NewName("n"), op_name) + .Input(test::graph::Constant(g, input)) + .Input(test::graph::Constant(g, paddings_tensor)) + .Attr("block_size", block_size) + .Finalize(g, &ret); + } return g; } -#define BM_SpaceToBatchDev(OP, DEVICE, B, H, W, D, BS, P00, P01, P10, P11) \ - static void \ - BM_##OP##_##DEVICE##_##B##_##H##_##W##_##D##_bs##BS##_pad##P00##_##P01##_##P10##_##P11( \ - int iters) { \ - testing::ItemsProcessed(static_cast<int64>(iters) * B * (H + P00 + P01) * \ - (W + P10 + P11) * D); \ - test::Benchmark(#DEVICE, \ - ConstructSpaceToBatchGraph(#OP, TensorShape({B, H, W, D}), \ - BS, {{P00, P01}, {P10, P11}})) \ - .Run(iters); \ - } \ - BENCHMARK( \ - BM_##OP##_##DEVICE##_##B##_##H##_##W##_##D##_bs##BS##_pad##P00##_##P01##_##P10##_##P11); -#define BM_SpaceToBatch(OP, ...) \ - BM_SpaceToBatchDev(OP, cpu, __VA_ARGS__); \ - BM_SpaceToBatchDev(OP, gpu, __VA_ARGS__); +#define BM_SpaceToBatchDev(OP, DEVICE, DTYPE, B, H, W, D, BS, P00, P01, P10, \ + P11) \ + static void \ + BM_##OP##_##DEVICE##_##DTYPE##_##B##_##H##_##W##_##D##_bs##BS##_pad##P00##_##P01##_##P10##_##P11( \ + int iters) { \ + testing::ItemsProcessed(static_cast<int64>(iters) * B * (H + P00 + P01) * \ + (W + P10 + P11) * D); \ + test::Benchmark(#DEVICE, ConstructSpaceToBatchGraph( \ + #OP, TensorShape({B, H, W, D}), BS, DTYPE, \ + {{P00, P01}, {P10, P11}})) \ + .Run(iters); \ + } \ + BENCHMARK( \ + BM_##OP##_##DEVICE##_##DTYPE##_##B##_##H##_##W##_##D##_bs##BS##_pad##P00##_##P01##_##P10##_##P11); +#define BM_SpaceToBatch(OP, ...) \ + BM_SpaceToBatchDev(OP, cpu, DT_FLOAT, __VA_ARGS__); \ + BM_SpaceToBatchDev(OP, gpu, DT_FLOAT, __VA_ARGS__); \ + BM_SpaceToBatchDev(OP, cpu, DT_HALF, __VA_ARGS__); \ + BM_SpaceToBatchDev(OP, gpu, DT_HALF, __VA_ARGS__); BM_SpaceToBatch(SpaceToBatch, 64, 100, 100, 64, 2, 0, 0, 0, 0); BM_SpaceToBatch(SpaceToBatch, 64, 100, 100, 1, 2, 0, 0, 0, 0); diff --git a/tensorflow/core/kernels/spacetobatch_functor_gpu.cu.cc b/tensorflow/core/kernels/spacetobatch_functor_gpu.cu.cc index e7848be2e3..db8d419c38 100644 --- a/tensorflow/core/kernels/spacetobatch_functor_gpu.cu.cc +++ b/tensorflow/core/kernels/spacetobatch_functor_gpu.cu.cc @@ -158,7 +158,7 @@ struct SpaceToBatchFunctor<GPUDevice, T, NUM_BLOCK_DIMS, B2S> { #define INSTANTIATE_FOR_T(T) \ TF_SPACETOBATCH_FOR_EACH_NUM_BLOCK_DIMS(INSTANTIATE, T) -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(INSTANTIATE_FOR_T) +TF_CALL_GPU_NUMBER_TYPES(INSTANTIATE_FOR_T) #undef INSTANTIATE_FOR_T #undef INSTANTIATE diff --git a/tensorflow/core/kernels/spacetobatch_op.cc b/tensorflow/core/kernels/spacetobatch_op.cc index a22c4e8f53..e2f34a3b54 100644 --- a/tensorflow/core/kernels/spacetobatch_op.cc +++ b/tensorflow/core/kernels/spacetobatch_op.cc @@ -274,7 +274,7 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER); .HostMemory("paddings"), \ SpaceToBatchOp<GPUDevice, T>); -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER); +TF_CALL_GPU_NUMBER_TYPES(REGISTER); #undef REGISTER #endif // GOOGLE_CUDA |