More BcastAdd benchmarks in cwise_ops_test.cc

PiperOrigin-RevId: 186344120
author: A. Unique TensorFlower <gardener@tensorflow.org> 2018-02-20 12:25:33 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2018-02-20 12:29:53 -0800
commit: f0dff20a242f74c98706680fd41a80c9b5437191 (patch)
tree: 205764802eac834af926542327147a355e31b6b0 /tensorflow/core/kernels/cwise_ops_test.cc
parent: 53700ca21a4521ad62904fc596cf5f14c4cc46d1 (diff)
1 files changed, 67 insertions, 5 deletions
diff --git a/tensorflow/core/kernels/cwise_ops_test.cc b/tensorflow/core/kernels/cwise_ops_test.cc
index 39f497e716..696d5840e8 100644
--- a/tensorflow/core/kernels/cwise_ops_test.cc
+++ b/tensorflow/core/kernels/cwise_ops_test.cc
@@ -231,14 +231,22 @@ BM_BIAS_ADD_GRAD_ALL(gpu, NHWC, half, DT_HALF);
 
 Graph* BcastAdd(int rows, int cols, int dim) {
   Graph* g = new Graph(OpRegistry::Global());
-  Tensor lhs(DT_FLOAT, TensorShape({rows, cols}));
-  lhs.flat<float>().setRandom();
-  TensorShape rhs_shape;
-  if (dim == 0) {
+  TensorShape lhs_shape, rhs_shape;
+  if (dim == 0) {  // row
+    lhs_shape = TensorShape({rows, cols});
     rhs_shape = TensorShape({rows, 1});
-  } else {
+  } else if (dim == 1) {  // col
+    lhs_shape = TensorShape({rows, cols});
     rhs_shape = TensorShape({cols});
+  } else if (dim == 2) {  // cross_rc
+    lhs_shape = TensorShape({rows, 1});
+    rhs_shape = TensorShape({1, cols});
+  } else {  // cross_cr
+    lhs_shape = TensorShape({1, cols});
+    rhs_shape = TensorShape({rows, 1});
   }
+  Tensor lhs(DT_FLOAT, lhs_shape);
+  lhs.flat<float>().setRandom();
   Tensor rhs(DT_FLOAT, rhs_shape);
   rhs.flat<float>().setRandom();
   test::graph::Binary(g, "Add", test::graph::Constant(g, lhs),
@@ -298,5 +306,59 @@ BM_BCAST_ADD_COL_ALL(sycl);
 #undef BM_BCAST_ADD_COL_ALL
 #undef BM_BCAST_ADD_COL
 
+#define BM_BCAST_ADD_CROSS_RC(DEVICE, R, C)                            \
+  void BM_##DEVICE##_BcastAddCrossRC_R##R##_C##C(int iters, int arg) { \
+    const int rows = RowsFromArg(arg);                                 \
+    const int cols = ColsFromArg(arg);                                 \
+    const int64 tot = static_cast<int64>(iters) * rows * cols;         \
+    testing::ItemsProcessed(tot);                                      \
+    testing::BytesProcessed(tot * sizeof(float));                      \
+    test::Benchmark(#DEVICE, BcastAdd(rows, cols, 2)).Run(iters);      \
+  }                                                                    \
+  BENCHMARK(BM_##DEVICE##_BcastAddCrossRC_R##R##_C##C)                 \
+      ->Arg(RowsAndColsArg(R, C));
+
+#define BM_BCAST_ADD_CROSS_RC_ALL(DEVICE)   \
+  BM_BCAST_ADD_CROSS_RC(DEVICE, 512, 2048); \
+  BM_BCAST_ADD_CROSS_RC(DEVICE, 512, 4096); \
+  BM_BCAST_ADD_CROSS_RC(DEVICE, 2048, 512); \
+  BM_BCAST_ADD_CROSS_RC(DEVICE, 4096, 512);
+BM_BCAST_ADD_CROSS_RC_ALL(cpu);
+#if GOOGLE_CUDA
+BM_BCAST_ADD_CROSS_RC_ALL(gpu);
+#endif  // GOOGLE_CUDA
+#ifdef TENSORFLOW_USE_SYCL
+BM_BCAST_ADD_CROSS_RC_ALL(sycl);
+#endif  // TENSORFLOW_USE_SYCL
+#undef BM_BCAST_ADD_CROSS_RC_ALL
+#undef BM_BCAST_ADD_CROSS_RC
+
+#define BM_BCAST_ADD_CROSS_CR(DEVICE, R, C)                            \
+  void BM_##DEVICE##_BcastAddCrossCR_R##R##_C##C(int iters, int arg) { \
+    const int rows = RowsFromArg(arg);                                 \
+    const int cols = ColsFromArg(arg);                                 \
+    const int64 tot = static_cast<int64>(iters) * rows * cols;         \
+    testing::ItemsProcessed(tot);                                      \
+    testing::BytesProcessed(tot * sizeof(float));                      \
+    test::Benchmark(#DEVICE, BcastAdd(rows, cols, 3)).Run(iters);      \
+  }                                                                    \
+  BENCHMARK(BM_##DEVICE##_BcastAddCrossCR_R##R##_C##C)                 \
+      ->Arg(RowsAndColsArg(R, C));
+
+#define BM_BCAST_ADD_CROSS_CR_ALL(DEVICE)   \
+  BM_BCAST_ADD_CROSS_CR(DEVICE, 512, 2048); \
+  BM_BCAST_ADD_CROSS_CR(DEVICE, 512, 4096); \
+  BM_BCAST_ADD_CROSS_CR(DEVICE, 2048, 512); \
+  BM_BCAST_ADD_CROSS_CR(DEVICE, 4096, 512);
+BM_BCAST_ADD_CROSS_CR_ALL(cpu);
+#if GOOGLE_CUDA
+BM_BCAST_ADD_CROSS_CR_ALL(gpu);
+#endif  // GOOGLE_CUDA
+#ifdef TENSORFLOW_USE_SYCL
+BM_BCAST_ADD_CROSS_CR_ALL(sycl);
+#endif  // TENSORFLOW_USE_SYCL
+#undef BM_BCAST_ADD_CROSS_CR_ALL
+#undef BM_BCAST_ADD_CROSS_CR
+
 }  // namespace
 }  // namespace tensorflow
author	A. Unique TensorFlower <gardener@tensorflow.org>	2018-02-20 12:25:33 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2018-02-20 12:29:53 -0800
commit	f0dff20a242f74c98706680fd41a80c9b5437191 (patch)
tree	205764802eac834af926542327147a355e31b6b0 /tensorflow/core/kernels/cwise_ops_test.cc
parent	53700ca21a4521ad62904fc596cf5f14c4cc46d1 (diff)