aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/cwise_ops_test.cc
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-02-20 12:25:33 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-02-20 12:29:53 -0800
commitf0dff20a242f74c98706680fd41a80c9b5437191 (patch)
tree205764802eac834af926542327147a355e31b6b0 /tensorflow/core/kernels/cwise_ops_test.cc
parent53700ca21a4521ad62904fc596cf5f14c4cc46d1 (diff)
More BcastAdd benchmarks in cwise_ops_test.cc
PiperOrigin-RevId: 186344120
Diffstat (limited to 'tensorflow/core/kernels/cwise_ops_test.cc')
-rw-r--r--tensorflow/core/kernels/cwise_ops_test.cc72
1 files changed, 67 insertions, 5 deletions
diff --git a/tensorflow/core/kernels/cwise_ops_test.cc b/tensorflow/core/kernels/cwise_ops_test.cc
index 39f497e716..696d5840e8 100644
--- a/tensorflow/core/kernels/cwise_ops_test.cc
+++ b/tensorflow/core/kernels/cwise_ops_test.cc
@@ -231,14 +231,22 @@ BM_BIAS_ADD_GRAD_ALL(gpu, NHWC, half, DT_HALF);
Graph* BcastAdd(int rows, int cols, int dim) {
Graph* g = new Graph(OpRegistry::Global());
- Tensor lhs(DT_FLOAT, TensorShape({rows, cols}));
- lhs.flat<float>().setRandom();
- TensorShape rhs_shape;
- if (dim == 0) {
+ TensorShape lhs_shape, rhs_shape;
+ if (dim == 0) { // row
+ lhs_shape = TensorShape({rows, cols});
rhs_shape = TensorShape({rows, 1});
- } else {
+ } else if (dim == 1) { // col
+ lhs_shape = TensorShape({rows, cols});
rhs_shape = TensorShape({cols});
+ } else if (dim == 2) { // cross_rc
+ lhs_shape = TensorShape({rows, 1});
+ rhs_shape = TensorShape({1, cols});
+ } else { // cross_cr
+ lhs_shape = TensorShape({1, cols});
+ rhs_shape = TensorShape({rows, 1});
}
+ Tensor lhs(DT_FLOAT, lhs_shape);
+ lhs.flat<float>().setRandom();
Tensor rhs(DT_FLOAT, rhs_shape);
rhs.flat<float>().setRandom();
test::graph::Binary(g, "Add", test::graph::Constant(g, lhs),
@@ -298,5 +306,59 @@ BM_BCAST_ADD_COL_ALL(sycl);
#undef BM_BCAST_ADD_COL_ALL
#undef BM_BCAST_ADD_COL
+#define BM_BCAST_ADD_CROSS_RC(DEVICE, R, C) \
+ void BM_##DEVICE##_BcastAddCrossRC_R##R##_C##C(int iters, int arg) { \
+ const int rows = RowsFromArg(arg); \
+ const int cols = ColsFromArg(arg); \
+ const int64 tot = static_cast<int64>(iters) * rows * cols; \
+ testing::ItemsProcessed(tot); \
+ testing::BytesProcessed(tot * sizeof(float)); \
+ test::Benchmark(#DEVICE, BcastAdd(rows, cols, 2)).Run(iters); \
+ } \
+ BENCHMARK(BM_##DEVICE##_BcastAddCrossRC_R##R##_C##C) \
+ ->Arg(RowsAndColsArg(R, C));
+
+#define BM_BCAST_ADD_CROSS_RC_ALL(DEVICE) \
+ BM_BCAST_ADD_CROSS_RC(DEVICE, 512, 2048); \
+ BM_BCAST_ADD_CROSS_RC(DEVICE, 512, 4096); \
+ BM_BCAST_ADD_CROSS_RC(DEVICE, 2048, 512); \
+ BM_BCAST_ADD_CROSS_RC(DEVICE, 4096, 512);
+BM_BCAST_ADD_CROSS_RC_ALL(cpu);
+#if GOOGLE_CUDA
+BM_BCAST_ADD_CROSS_RC_ALL(gpu);
+#endif // GOOGLE_CUDA
+#ifdef TENSORFLOW_USE_SYCL
+BM_BCAST_ADD_CROSS_RC_ALL(sycl);
+#endif // TENSORFLOW_USE_SYCL
+#undef BM_BCAST_ADD_CROSS_RC_ALL
+#undef BM_BCAST_ADD_CROSS_RC
+
+#define BM_BCAST_ADD_CROSS_CR(DEVICE, R, C) \
+ void BM_##DEVICE##_BcastAddCrossCR_R##R##_C##C(int iters, int arg) { \
+ const int rows = RowsFromArg(arg); \
+ const int cols = ColsFromArg(arg); \
+ const int64 tot = static_cast<int64>(iters) * rows * cols; \
+ testing::ItemsProcessed(tot); \
+ testing::BytesProcessed(tot * sizeof(float)); \
+ test::Benchmark(#DEVICE, BcastAdd(rows, cols, 3)).Run(iters); \
+ } \
+ BENCHMARK(BM_##DEVICE##_BcastAddCrossCR_R##R##_C##C) \
+ ->Arg(RowsAndColsArg(R, C));
+
+#define BM_BCAST_ADD_CROSS_CR_ALL(DEVICE) \
+ BM_BCAST_ADD_CROSS_CR(DEVICE, 512, 2048); \
+ BM_BCAST_ADD_CROSS_CR(DEVICE, 512, 4096); \
+ BM_BCAST_ADD_CROSS_CR(DEVICE, 2048, 512); \
+ BM_BCAST_ADD_CROSS_CR(DEVICE, 4096, 512);
+BM_BCAST_ADD_CROSS_CR_ALL(cpu);
+#if GOOGLE_CUDA
+BM_BCAST_ADD_CROSS_CR_ALL(gpu);
+#endif // GOOGLE_CUDA
+#ifdef TENSORFLOW_USE_SYCL
+BM_BCAST_ADD_CROSS_CR_ALL(sycl);
+#endif // TENSORFLOW_USE_SYCL
+#undef BM_BCAST_ADD_CROSS_CR_ALL
+#undef BM_BCAST_ADD_CROSS_CR
+
} // namespace
} // namespace tensorflow