aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/matmul_op_test.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core/kernels/matmul_op_test.cc')
-rw-r--r--tensorflow/core/kernels/matmul_op_test.cc56
1 files changed, 56 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/matmul_op_test.cc b/tensorflow/core/kernels/matmul_op_test.cc
new file mode 100644
index 0000000000..b2b8f3d905
--- /dev/null
+++ b/tensorflow/core/kernels/matmul_op_test.cc
@@ -0,0 +1,56 @@
+#include "tensorflow/core/public/tensor.h"
+#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
+#include "tensorflow/core/platform/test_benchmark.h"
+#include <gtest/gtest.h>
+
+namespace tensorflow {
+
+static Graph* Matmul(int m, int k, int n, bool transpose_a, bool transpose_b) {
+ Graph* g = new Graph(OpRegistry::Global());
+ Tensor in0(DT_FLOAT, transpose_a ? TensorShape({k, m}) : TensorShape({m, k}));
+ in0.flat<float>().setRandom();
+ Tensor in1(DT_FLOAT, transpose_b ? TensorShape({n, k}) : TensorShape({k, n}));
+ in1.flat<float>().setRandom();
+ test::graph::Matmul(g, test::graph::Constant(g, in0),
+ test::graph::Constant(g, in1), transpose_a, transpose_b);
+ return g;
+}
+
+#define BM_MatmulDev(M, K, N, TA, TB, DEVICE) \
+ static void BM_Matmul##_##M##_##K##_##N##_##TA##_##TB##_##DEVICE( \
+ int iters) { \
+ testing::ItemsProcessed(static_cast<int64>(iters) * M * K * N * 2); \
+ test::Benchmark(#DEVICE, Matmul(M, K, N, TA, TB)).Run(iters); \
+ } \
+ BENCHMARK(BM_Matmul##_##M##_##K##_##N##_##TA##_##TB##_##DEVICE);
+
+#define BM_Matmul(M, K, N, TA, TB) \
+ BM_MatmulDev(M, K, N, TA, TB, cpu); \
+ BM_MatmulDev(M, K, N, TA, TB, gpu);
+
+// Typical fully connected layers
+BM_Matmul(8, 512, 512, false, false);
+BM_Matmul(16, 512, 512, false, false);
+BM_Matmul(128, 512, 512, false, false);
+
+BM_Matmul(8, 1024, 1024, false, false);
+BM_Matmul(16, 1024, 1024, false, false);
+BM_Matmul(128, 1024, 1024, false, false);
+BM_Matmul(4096, 4096, 4096, false, false);
+
+// Backward for fully connected layers
+BM_Matmul(8, 1024, 1024, false, true);
+BM_Matmul(16, 1024, 1024, false, true);
+BM_Matmul(128, 1024, 1024, false, true);
+
+// Forward softmax with large output size
+BM_Matmul(8, 200, 10000, false, false);
+BM_Matmul(20, 200, 10000, false, false);
+BM_Matmul(20, 200, 20000, false, false);
+
+// Backward softmax with large output size
+BM_Matmul(8, 10000, 200, false, true);
+BM_Matmul(20, 10000, 200, false, true);
+BM_Matmul(20, 20000, 200, false, true);
+
+} // end namespace tensorflow