diff options
author | 2018-02-20 12:25:33 -0800 | |
---|---|---|
committer | 2018-02-20 12:29:53 -0800 | |
commit | f0dff20a242f74c98706680fd41a80c9b5437191 (patch) | |
tree | 205764802eac834af926542327147a355e31b6b0 /tensorflow/core/kernels/cwise_ops_test.cc | |
parent | 53700ca21a4521ad62904fc596cf5f14c4cc46d1 (diff) |
More BcastAdd benchmarks in cwise_ops_test.cc
PiperOrigin-RevId: 186344120
Diffstat (limited to 'tensorflow/core/kernels/cwise_ops_test.cc')
-rw-r--r-- | tensorflow/core/kernels/cwise_ops_test.cc | 72 |
1 files changed, 67 insertions, 5 deletions
diff --git a/tensorflow/core/kernels/cwise_ops_test.cc b/tensorflow/core/kernels/cwise_ops_test.cc index 39f497e716..696d5840e8 100644 --- a/tensorflow/core/kernels/cwise_ops_test.cc +++ b/tensorflow/core/kernels/cwise_ops_test.cc @@ -231,14 +231,22 @@ BM_BIAS_ADD_GRAD_ALL(gpu, NHWC, half, DT_HALF); Graph* BcastAdd(int rows, int cols, int dim) { Graph* g = new Graph(OpRegistry::Global()); - Tensor lhs(DT_FLOAT, TensorShape({rows, cols})); - lhs.flat<float>().setRandom(); - TensorShape rhs_shape; - if (dim == 0) { + TensorShape lhs_shape, rhs_shape; + if (dim == 0) { // row + lhs_shape = TensorShape({rows, cols}); rhs_shape = TensorShape({rows, 1}); - } else { + } else if (dim == 1) { // col + lhs_shape = TensorShape({rows, cols}); rhs_shape = TensorShape({cols}); + } else if (dim == 2) { // cross_rc + lhs_shape = TensorShape({rows, 1}); + rhs_shape = TensorShape({1, cols}); + } else { // cross_cr + lhs_shape = TensorShape({1, cols}); + rhs_shape = TensorShape({rows, 1}); } + Tensor lhs(DT_FLOAT, lhs_shape); + lhs.flat<float>().setRandom(); Tensor rhs(DT_FLOAT, rhs_shape); rhs.flat<float>().setRandom(); test::graph::Binary(g, "Add", test::graph::Constant(g, lhs), @@ -298,5 +306,59 @@ BM_BCAST_ADD_COL_ALL(sycl); #undef BM_BCAST_ADD_COL_ALL #undef BM_BCAST_ADD_COL +#define BM_BCAST_ADD_CROSS_RC(DEVICE, R, C) \ + void BM_##DEVICE##_BcastAddCrossRC_R##R##_C##C(int iters, int arg) { \ + const int rows = RowsFromArg(arg); \ + const int cols = ColsFromArg(arg); \ + const int64 tot = static_cast<int64>(iters) * rows * cols; \ + testing::ItemsProcessed(tot); \ + testing::BytesProcessed(tot * sizeof(float)); \ + test::Benchmark(#DEVICE, BcastAdd(rows, cols, 2)).Run(iters); \ + } \ + BENCHMARK(BM_##DEVICE##_BcastAddCrossRC_R##R##_C##C) \ + ->Arg(RowsAndColsArg(R, C)); + +#define BM_BCAST_ADD_CROSS_RC_ALL(DEVICE) \ + BM_BCAST_ADD_CROSS_RC(DEVICE, 512, 2048); \ + BM_BCAST_ADD_CROSS_RC(DEVICE, 512, 4096); \ + BM_BCAST_ADD_CROSS_RC(DEVICE, 2048, 512); \ + BM_BCAST_ADD_CROSS_RC(DEVICE, 4096, 512); +BM_BCAST_ADD_CROSS_RC_ALL(cpu); +#if GOOGLE_CUDA +BM_BCAST_ADD_CROSS_RC_ALL(gpu); +#endif // GOOGLE_CUDA +#ifdef TENSORFLOW_USE_SYCL +BM_BCAST_ADD_CROSS_RC_ALL(sycl); +#endif // TENSORFLOW_USE_SYCL +#undef BM_BCAST_ADD_CROSS_RC_ALL +#undef BM_BCAST_ADD_CROSS_RC + +#define BM_BCAST_ADD_CROSS_CR(DEVICE, R, C) \ + void BM_##DEVICE##_BcastAddCrossCR_R##R##_C##C(int iters, int arg) { \ + const int rows = RowsFromArg(arg); \ + const int cols = ColsFromArg(arg); \ + const int64 tot = static_cast<int64>(iters) * rows * cols; \ + testing::ItemsProcessed(tot); \ + testing::BytesProcessed(tot * sizeof(float)); \ + test::Benchmark(#DEVICE, BcastAdd(rows, cols, 3)).Run(iters); \ + } \ + BENCHMARK(BM_##DEVICE##_BcastAddCrossCR_R##R##_C##C) \ + ->Arg(RowsAndColsArg(R, C)); + +#define BM_BCAST_ADD_CROSS_CR_ALL(DEVICE) \ + BM_BCAST_ADD_CROSS_CR(DEVICE, 512, 2048); \ + BM_BCAST_ADD_CROSS_CR(DEVICE, 512, 4096); \ + BM_BCAST_ADD_CROSS_CR(DEVICE, 2048, 512); \ + BM_BCAST_ADD_CROSS_CR(DEVICE, 4096, 512); +BM_BCAST_ADD_CROSS_CR_ALL(cpu); +#if GOOGLE_CUDA +BM_BCAST_ADD_CROSS_CR_ALL(gpu); +#endif // GOOGLE_CUDA +#ifdef TENSORFLOW_USE_SYCL +BM_BCAST_ADD_CROSS_CR_ALL(sycl); +#endif // TENSORFLOW_USE_SYCL +#undef BM_BCAST_ADD_CROSS_CR_ALL +#undef BM_BCAST_ADD_CROSS_CR + } // namespace } // namespace tensorflow |