diff options
Diffstat (limited to 'tensorflow/core/kernels/cwise_ops_test.cc')
-rw-r--r-- | tensorflow/core/kernels/cwise_ops_test.cc | 167 |
1 files changed, 167 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/cwise_ops_test.cc b/tensorflow/core/kernels/cwise_ops_test.cc new file mode 100644 index 0000000000..56af248117 --- /dev/null +++ b/tensorflow/core/kernels/cwise_ops_test.cc @@ -0,0 +1,167 @@ +#include "tensorflow/core/public/tensor.h" +#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/platform/test_benchmark.h" +#include <gtest/gtest.h> + +namespace tensorflow { + +// Creates a Graph which applies a unary "func" on a 3D float tensor +// of "num" elements. +static Graph* Unary(const string& func, int num) { + RequireDefaultOps(); + Graph* g = new Graph(OpRegistry::Global()); + Tensor data(DT_FLOAT, TensorShape({64, 64, num / (64 * 64)})); + CHECK_GT(data.NumElements(), 0); + data.flat<float>().setRandom(); + test::graph::Unary(g, func, test::graph::Constant(g, data), 0); + return g; +} + +static int kRows = 100000; + +static int RowsAndColsArg(int r, int c) { return r * kRows + c; } +static int RowsFromArg(int arg) { return (arg / kRows); } +static int ColsFromArg(int arg) { return (arg % kRows); } + +#define BM_UNARY(DEVICE, FUNC) \ + static void BM_##DEVICE##_##FUNC(int iters, int num) { \ + const int64 tot = static_cast<int64>(iters) * num; \ + testing::ItemsProcessed(tot); \ + testing::BytesProcessed(tot * sizeof(float)); \ + test::Benchmark(#DEVICE, Unary(#FUNC, num)).Run(iters); \ + } \ + BENCHMARK(BM_##DEVICE##_##FUNC)->Range(4 << 10, 1 << 20); + +BM_UNARY(cpu, Floor); +BM_UNARY(gpu, Floor); + +// data func scalar. +static Graph* BinaryScalar(int num, const string& func) { + RequireDefaultOps(); + Graph* g = new Graph(OpRegistry::Global()); + Tensor lhs(DT_FLOAT, TensorShape({64, 64, num / (64 * 64)})); + lhs.flat<float>().setRandom(); + Tensor rhs(DT_FLOAT, TensorShape({})); + rhs.flat<float>().setRandom(); + test::graph::Binary(g, func, test::graph::Constant(g, lhs), + test::graph::Constant(g, rhs)); + return g; +} + +#define BM_BINARY_SCALAR(DEVICE, FUNC) \ + static void BM_##DEVICE##_##FUNC##_scalar(int iters, int num) { \ + const int64 tot = static_cast<int64>(iters) * num; \ + testing::ItemsProcessed(tot); \ + testing::BytesProcessed(tot * sizeof(float)); \ + test::Benchmark(#DEVICE, BinaryScalar(num, #FUNC)).Run(iters); \ + } \ + BENCHMARK(BM_##DEVICE##_##FUNC##_scalar) \ + ->Arg(4096) /* must >= 4096 */ \ + ->Arg(32768) \ + ->Arg(131072) \ + ->Arg(1048576); + +BM_BINARY_SCALAR(cpu, Less); +BM_BINARY_SCALAR(gpu, Less); +BM_BINARY_SCALAR(cpu, Add); +BM_BINARY_SCALAR(gpu, Add); +#undef BM_BINARY_SCALAR + +static Graph* BiasAdd(int rows, int cols) { + RequireDefaultOps(); + Graph* g = new Graph(OpRegistry::Global()); + Tensor lhs(DT_FLOAT, TensorShape({rows, cols})); + lhs.flat<float>().setRandom(); + TensorShape rhs_shape; + rhs_shape = TensorShape({cols}); + Tensor rhs(DT_FLOAT, rhs_shape); + rhs.flat<float>().setRandom(); + test::graph::Binary(g, "BiasAdd", test::graph::Constant(g, lhs), + test::graph::Constant(g, rhs)); + return g; +} + +#define BM_BIAS_ADD(DEVICE, R, C) \ + static void BM_##DEVICE##_BiasAdd_R##R##_C##C(int iters, int arg) { \ + const int rows = RowsFromArg(arg); \ + const int cols = ColsFromArg(arg); \ + const int64 tot = static_cast<int64>(iters) * rows * cols; \ + testing::ItemsProcessed(tot); \ + testing::BytesProcessed(tot * sizeof(float)); \ + test::Benchmark(#DEVICE, BiasAdd(rows, cols)).Run(iters); \ + } \ + BENCHMARK(BM_##DEVICE##_BiasAdd_R##R##_C##C)->Arg(RowsAndColsArg(R, C)); + +#define BM_BIAS_ADD_ALL(DEVICE) \ + BM_BIAS_ADD(DEVICE, 512, 2048); \ + BM_BIAS_ADD(DEVICE, 512, 4096); \ + BM_BIAS_ADD(DEVICE, 2048, 512); \ + BM_BIAS_ADD(DEVICE, 4096, 512); + +BM_BIAS_ADD_ALL(cpu); +BM_BIAS_ADD_ALL(gpu); +#undef BM_BIAS_ADD_ALL +#undef BM_BIAS_ADD + +static Graph* BcastAdd(int rows, int cols, int dim) { + RequireDefaultOps(); + Graph* g = new Graph(OpRegistry::Global()); + Tensor lhs(DT_FLOAT, TensorShape({rows, cols})); + lhs.flat<float>().setRandom(); + TensorShape rhs_shape; + if (dim == 0) { + rhs_shape = TensorShape({rows, 1}); + } else { + rhs_shape = TensorShape({cols}); + } + Tensor rhs(DT_FLOAT, rhs_shape); + rhs.flat<float>().setRandom(); + test::graph::Binary(g, "Add", test::graph::Constant(g, lhs), + test::graph::Constant(g, rhs)); + return g; +} + +#define BM_BCAST_ADD_ROW(DEVICE, R, C) \ + static void BM_##DEVICE##_BcastAddRow_R##R##_C##C(int iters, int arg) { \ + const int rows = RowsFromArg(arg); \ + const int cols = ColsFromArg(arg); \ + const int64 tot = static_cast<int64>(iters) * rows * cols; \ + testing::ItemsProcessed(tot); \ + testing::BytesProcessed(tot * sizeof(float)); \ + test::Benchmark(#DEVICE, BcastAdd(rows, cols, 0)).Run(iters); \ + } \ + BENCHMARK(BM_##DEVICE##_BcastAddRow_R##R##_C##C)->Arg(RowsAndColsArg(R, C)); + +#define BM_BCAST_ADD_ROW_ALL(DEVICE) \ + BM_BCAST_ADD_ROW(DEVICE, 512, 2048); \ + BM_BCAST_ADD_ROW(DEVICE, 512, 4096); \ + BM_BCAST_ADD_ROW(DEVICE, 2048, 512); \ + BM_BCAST_ADD_ROW(DEVICE, 4096, 512); +BM_BCAST_ADD_ROW_ALL(cpu); +BM_BCAST_ADD_ROW_ALL(gpu); +#undef BM_BCAST_ADD_ROW_ALL +#undef BM_BCAST_ADD_ROW + +#define BM_BCAST_ADD_COL(DEVICE, R, C) \ + static void BM_##DEVICE##_BcastAddCol_R##R##_C##C(int iters, int arg) { \ + const int rows = RowsFromArg(arg); \ + const int cols = ColsFromArg(arg); \ + const int64 tot = static_cast<int64>(iters) * rows * cols; \ + testing::ItemsProcessed(tot); \ + testing::BytesProcessed(tot * sizeof(float)); \ + test::Benchmark(#DEVICE, BcastAdd(rows, cols, 1)).Run(iters); \ + } \ + BENCHMARK(BM_##DEVICE##_BcastAddCol_R##R##_C##C)->Arg(RowsAndColsArg(R, C)); + +#define BM_BCAST_ADD_COL_ALL(DEVICE) \ + BM_BCAST_ADD_COL(DEVICE, 512, 2048); \ + BM_BCAST_ADD_COL(DEVICE, 512, 4096); \ + BM_BCAST_ADD_COL(DEVICE, 2048, 512); \ + BM_BCAST_ADD_COL(DEVICE, 4096, 512); +BM_BCAST_ADD_COL_ALL(cpu); +BM_BCAST_ADD_COL_ALL(gpu); +#undef BM_BCAST_ADD_COL_ALL +#undef BM_BCAST_ADD_COL + +} // end namespace tensorflow |