aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/cwise_ops_test.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core/kernels/cwise_ops_test.cc')
-rw-r--r--tensorflow/core/kernels/cwise_ops_test.cc167
1 files changed, 167 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/cwise_ops_test.cc b/tensorflow/core/kernels/cwise_ops_test.cc
new file mode 100644
index 0000000000..56af248117
--- /dev/null
+++ b/tensorflow/core/kernels/cwise_ops_test.cc
@@ -0,0 +1,167 @@
+#include "tensorflow/core/public/tensor.h"
+#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
+#include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/platform/test_benchmark.h"
+#include <gtest/gtest.h>
+
+namespace tensorflow {
+
+// Creates a Graph which applies a unary "func" on a 3D float tensor
+// of "num" elements.
+static Graph* Unary(const string& func, int num) {
+ RequireDefaultOps();
+ Graph* g = new Graph(OpRegistry::Global());
+ Tensor data(DT_FLOAT, TensorShape({64, 64, num / (64 * 64)}));
+ CHECK_GT(data.NumElements(), 0);
+ data.flat<float>().setRandom();
+ test::graph::Unary(g, func, test::graph::Constant(g, data), 0);
+ return g;
+}
+
+static int kRows = 100000;
+
+static int RowsAndColsArg(int r, int c) { return r * kRows + c; }
+static int RowsFromArg(int arg) { return (arg / kRows); }
+static int ColsFromArg(int arg) { return (arg % kRows); }
+
+#define BM_UNARY(DEVICE, FUNC) \
+ static void BM_##DEVICE##_##FUNC(int iters, int num) { \
+ const int64 tot = static_cast<int64>(iters) * num; \
+ testing::ItemsProcessed(tot); \
+ testing::BytesProcessed(tot * sizeof(float)); \
+ test::Benchmark(#DEVICE, Unary(#FUNC, num)).Run(iters); \
+ } \
+ BENCHMARK(BM_##DEVICE##_##FUNC)->Range(4 << 10, 1 << 20);
+
+BM_UNARY(cpu, Floor);
+BM_UNARY(gpu, Floor);
+
+// data func scalar.
+static Graph* BinaryScalar(int num, const string& func) {
+ RequireDefaultOps();
+ Graph* g = new Graph(OpRegistry::Global());
+ Tensor lhs(DT_FLOAT, TensorShape({64, 64, num / (64 * 64)}));
+ lhs.flat<float>().setRandom();
+ Tensor rhs(DT_FLOAT, TensorShape({}));
+ rhs.flat<float>().setRandom();
+ test::graph::Binary(g, func, test::graph::Constant(g, lhs),
+ test::graph::Constant(g, rhs));
+ return g;
+}
+
+#define BM_BINARY_SCALAR(DEVICE, FUNC) \
+ static void BM_##DEVICE##_##FUNC##_scalar(int iters, int num) { \
+ const int64 tot = static_cast<int64>(iters) * num; \
+ testing::ItemsProcessed(tot); \
+ testing::BytesProcessed(tot * sizeof(float)); \
+ test::Benchmark(#DEVICE, BinaryScalar(num, #FUNC)).Run(iters); \
+ } \
+ BENCHMARK(BM_##DEVICE##_##FUNC##_scalar) \
+ ->Arg(4096) /* must >= 4096 */ \
+ ->Arg(32768) \
+ ->Arg(131072) \
+ ->Arg(1048576);
+
+BM_BINARY_SCALAR(cpu, Less);
+BM_BINARY_SCALAR(gpu, Less);
+BM_BINARY_SCALAR(cpu, Add);
+BM_BINARY_SCALAR(gpu, Add);
+#undef BM_BINARY_SCALAR
+
+static Graph* BiasAdd(int rows, int cols) {
+ RequireDefaultOps();
+ Graph* g = new Graph(OpRegistry::Global());
+ Tensor lhs(DT_FLOAT, TensorShape({rows, cols}));
+ lhs.flat<float>().setRandom();
+ TensorShape rhs_shape;
+ rhs_shape = TensorShape({cols});
+ Tensor rhs(DT_FLOAT, rhs_shape);
+ rhs.flat<float>().setRandom();
+ test::graph::Binary(g, "BiasAdd", test::graph::Constant(g, lhs),
+ test::graph::Constant(g, rhs));
+ return g;
+}
+
+#define BM_BIAS_ADD(DEVICE, R, C) \
+ static void BM_##DEVICE##_BiasAdd_R##R##_C##C(int iters, int arg) { \
+ const int rows = RowsFromArg(arg); \
+ const int cols = ColsFromArg(arg); \
+ const int64 tot = static_cast<int64>(iters) * rows * cols; \
+ testing::ItemsProcessed(tot); \
+ testing::BytesProcessed(tot * sizeof(float)); \
+ test::Benchmark(#DEVICE, BiasAdd(rows, cols)).Run(iters); \
+ } \
+ BENCHMARK(BM_##DEVICE##_BiasAdd_R##R##_C##C)->Arg(RowsAndColsArg(R, C));
+
+#define BM_BIAS_ADD_ALL(DEVICE) \
+ BM_BIAS_ADD(DEVICE, 512, 2048); \
+ BM_BIAS_ADD(DEVICE, 512, 4096); \
+ BM_BIAS_ADD(DEVICE, 2048, 512); \
+ BM_BIAS_ADD(DEVICE, 4096, 512);
+
+BM_BIAS_ADD_ALL(cpu);
+BM_BIAS_ADD_ALL(gpu);
+#undef BM_BIAS_ADD_ALL
+#undef BM_BIAS_ADD
+
+static Graph* BcastAdd(int rows, int cols, int dim) {
+ RequireDefaultOps();
+ Graph* g = new Graph(OpRegistry::Global());
+ Tensor lhs(DT_FLOAT, TensorShape({rows, cols}));
+ lhs.flat<float>().setRandom();
+ TensorShape rhs_shape;
+ if (dim == 0) {
+ rhs_shape = TensorShape({rows, 1});
+ } else {
+ rhs_shape = TensorShape({cols});
+ }
+ Tensor rhs(DT_FLOAT, rhs_shape);
+ rhs.flat<float>().setRandom();
+ test::graph::Binary(g, "Add", test::graph::Constant(g, lhs),
+ test::graph::Constant(g, rhs));
+ return g;
+}
+
+#define BM_BCAST_ADD_ROW(DEVICE, R, C) \
+ static void BM_##DEVICE##_BcastAddRow_R##R##_C##C(int iters, int arg) { \
+ const int rows = RowsFromArg(arg); \
+ const int cols = ColsFromArg(arg); \
+ const int64 tot = static_cast<int64>(iters) * rows * cols; \
+ testing::ItemsProcessed(tot); \
+ testing::BytesProcessed(tot * sizeof(float)); \
+ test::Benchmark(#DEVICE, BcastAdd(rows, cols, 0)).Run(iters); \
+ } \
+ BENCHMARK(BM_##DEVICE##_BcastAddRow_R##R##_C##C)->Arg(RowsAndColsArg(R, C));
+
+#define BM_BCAST_ADD_ROW_ALL(DEVICE) \
+ BM_BCAST_ADD_ROW(DEVICE, 512, 2048); \
+ BM_BCAST_ADD_ROW(DEVICE, 512, 4096); \
+ BM_BCAST_ADD_ROW(DEVICE, 2048, 512); \
+ BM_BCAST_ADD_ROW(DEVICE, 4096, 512);
+BM_BCAST_ADD_ROW_ALL(cpu);
+BM_BCAST_ADD_ROW_ALL(gpu);
+#undef BM_BCAST_ADD_ROW_ALL
+#undef BM_BCAST_ADD_ROW
+
+#define BM_BCAST_ADD_COL(DEVICE, R, C) \
+ static void BM_##DEVICE##_BcastAddCol_R##R##_C##C(int iters, int arg) { \
+ const int rows = RowsFromArg(arg); \
+ const int cols = ColsFromArg(arg); \
+ const int64 tot = static_cast<int64>(iters) * rows * cols; \
+ testing::ItemsProcessed(tot); \
+ testing::BytesProcessed(tot * sizeof(float)); \
+ test::Benchmark(#DEVICE, BcastAdd(rows, cols, 1)).Run(iters); \
+ } \
+ BENCHMARK(BM_##DEVICE##_BcastAddCol_R##R##_C##C)->Arg(RowsAndColsArg(R, C));
+
+#define BM_BCAST_ADD_COL_ALL(DEVICE) \
+ BM_BCAST_ADD_COL(DEVICE, 512, 2048); \
+ BM_BCAST_ADD_COL(DEVICE, 512, 4096); \
+ BM_BCAST_ADD_COL(DEVICE, 2048, 512); \
+ BM_BCAST_ADD_COL(DEVICE, 4096, 512);
+BM_BCAST_ADD_COL_ALL(cpu);
+BM_BCAST_ADD_COL_ALL(gpu);
+#undef BM_BCAST_ADD_COL_ALL
+#undef BM_BCAST_ADD_COL
+
+} // end namespace tensorflow