aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/sparse_matmul_op_test.cc
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2017-02-27 11:32:44 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-02-27 12:09:10 -0800
commitdb8ea4ff07ad75cf5f0220428fbe4b84fcf68f4a (patch)
treefb6288c5b6dcc6d28167c745fa5b7688417cbaf8 /tensorflow/core/kernels/sparse_matmul_op_test.cc
parent332ee5051fc38babb53cc8cbf3c3120e5651f4e8 (diff)
- Upgraded libxsmm to 1.7.1.
- Applied LLVM optimization patch to libxsmm (https://github.com/hfp/libxsmm/commit/0e412d5d2769a8754cace64e56e26e14093f887d.patch). - Limited outstanding libxsmm sparse matrix multiply handle counts to limit memory usage for temporary space. - Added extra logging to libxsmm handle management in TensorFlow. - Added support for running multiple sparse matrix multiplies simultaneously in performance benchmark to match some practical use cases. - Added more size combinations to sparse matrix multiply benchmark. - Fixed dependencies for xsmm_conv2d_test. Change: 148672973
Diffstat (limited to 'tensorflow/core/kernels/sparse_matmul_op_test.cc')
-rw-r--r--tensorflow/core/kernels/sparse_matmul_op_test.cc105
1 files changed, 91 insertions, 14 deletions
diff --git a/tensorflow/core/kernels/sparse_matmul_op_test.cc b/tensorflow/core/kernels/sparse_matmul_op_test.cc
index 42fdde23dd..b5c69466f8 100644
--- a/tensorflow/core/kernels/sparse_matmul_op_test.cc
+++ b/tensorflow/core/kernels/sparse_matmul_op_test.cc
@@ -94,6 +94,16 @@ static Graph* SparseMatMul(int m, int n, int d, float sparsity_a,
transpose_a, transpose_b);
}
+static Graph* ReplicatedSparseMatMul(int m, int n, int d, float sparsity_1,
+ float sparsity_2, int copies) {
+ Graph* g = new Graph(OpRegistry::Global());
+ for (int i = 0; i < copies; ++i) {
+ SparseMatMulHelper<float, float>(g, m, n, d, sparsity_1, sparsity_2, false,
+ false);
+ }
+ return g;
+}
+
#define BM_SPARSE(M, K, N, S1, S2, TRA, TRB, TA, TB) \
static void \
BM_Sparse##_##M##_##K##_##N##_##S1##_##S2##_##TRA##_##TRB##_##TA##_##TB( \
@@ -112,6 +122,23 @@ static Graph* SparseMatMul(int m, int n, int d, float sparsity_a,
BENCHMARK( \
BM_Sparse##_##M##_##K##_##N##_##S1##_##S2##_##TRA##_##TRB##_##TA##_##TB);
+#define BM_SPARSE_REPLICATED(M, K, N, S1, S2, Copies) \
+ static void BM_Sparse_replicated##_##M##_##K##_##N##_##S1##_##S2##_##Copies( \
+ int iters) { \
+ testing::StopTiming(); \
+ testing::ItemsProcessed(static_cast<int64>(iters) * M * K * N * Copies * \
+ 2); \
+ std::string label = strings::Printf("copies: %d sp_a: %0.2f sp_b: %0.2f", \
+ (Copies), S1 / 100.0, S2 / 100.0); \
+ testing::SetLabel(label); \
+ testing::UseRealTime(); \
+ auto g = \
+ ReplicatedSparseMatMul(M, N, K, S1 / 100.0, S2 / 100.0, (Copies)); \
+ testing::StartTiming(); \
+ test::Benchmark("cpu", g).Run(iters); \
+ } \
+ BENCHMARK(BM_Sparse_replicated##_##M##_##K##_##N##_##S1##_##S2##_##Copies);
+
#define BM_SPARSE_FLOAT(M, K, N, S1, S2, TRA, TRB) \
BM_SPARSE(M, K, N, S1, S2, TRA, TRB, float, float)
#define BM_SPARSE_BFLOAT16(M, K, N, S1, S2, TRA, TRB) \
@@ -144,6 +171,33 @@ BM_SPARSE_FLOAT(1024, 1024, 1024, 1, 0, false, false);
BM_SPARSE_FLOAT(1024, 1024, 1024, 85, 0, false, false);
BM_SPARSE_FLOAT(256, 256, 256, 1, 0, false, false);
BM_SPARSE_FLOAT(512, 512, 512, 1, 0, false, false);
+BM_SPARSE_FLOAT(2560, 400, 1024, 85, 0, false, false);
+BM_SPARSE_FLOAT(2560, 400, 1024, 85, 0, true, false);
+
+BM_SPARSE_FLOAT(400, 800, 2560, 85, 0, false, false);
+BM_SPARSE_FLOAT(400, 2560, 1024, 85, 0, false, false);
+BM_SPARSE_FLOAT(400, 1024, 256, 85, 0, false, false);
+BM_SPARSE_FLOAT(400, 256, 1, 85, 0, false, false);
+
+BM_SPARSE_REPLICATED(400, 800, 2560, 85, 0, 6);
+BM_SPARSE_REPLICATED(400, 2560, 1024, 85, 0, 6);
+BM_SPARSE_REPLICATED(400, 1024, 256, 85, 0, 6);
+BM_SPARSE_REPLICATED(400, 256, 1, 85, 0, 6);
+
+BM_SPARSE_FLOAT(2048, 1792, 1024, 85, 0, false, false);
+BM_SPARSE_FLOAT(2048, 1024, 768, 85, 0, false, false);
+BM_SPARSE_FLOAT(2048, 768, 512, 85, 0, false, false);
+BM_SPARSE_FLOAT(2048, 512, 256, 85, 0, false, false);
+
+BM_SPARSE_FLOAT(2049, 1792, 1024, 85, 0, false, false);
+BM_SPARSE_FLOAT(2049, 1024, 768, 85, 0, false, false);
+BM_SPARSE_FLOAT(2049, 768, 512, 85, 0, false, false);
+BM_SPARSE_FLOAT(2049, 512, 256, 85, 0, false, false);
+
+BM_SPARSE_REPLICATED(2048, 1792, 1024, 85, 0, 6);
+BM_SPARSE_REPLICATED(2048, 1024, 768, 85, 0, 6);
+BM_SPARSE_REPLICATED(2048, 768, 512, 85, 0, 6);
+BM_SPARSE_REPLICATED(2048, 512, 256, 85, 0, 6);
// Test bfloat16
BM_SPARSE_BFLOAT16(2048, 2048, 2048, 0, 0, false, false);
@@ -156,30 +210,53 @@ BM_SPARSE_FLOAT_BFLOAT16(2048, 2048, 2048, 85, 0, false, false);
BM_SPARSE_FLOAT_BFLOAT16(2048, 2048, 2048, 99, 0, false, false);
static Graph* MultiSparseMatMul(int m, int n, int d, float sparsity_1,
- float sparsity_2) {
+ float sparsity_2, int copies) {
Graph* g = new Graph(OpRegistry::Global());
- SparseMatMulHelper<float, float>(g, d, n, m, sparsity_1, sparsity_2, true,
- false);
- SparseMatMulHelper<float, float>(g, m, d, n, sparsity_2, 0, false, true);
+ for (int i = 0; i < copies; ++i) {
+ SparseMatMulHelper<float, float>(g, d, n, m, sparsity_1, sparsity_2, true,
+ false);
+ SparseMatMulHelper<float, float>(g, m, d, n, sparsity_2, 0, false, true);
+ }
return g;
}
-#define BM_SPARSE_MULTI(M, K, N, S1, S2) \
- static void BM_Sparse_Multi##_##M##_##K##_##N##_##S1##_##S2(int iters) { \
+#define BM_SPARSE_MULTI(M, K, N, S1, S2, Copies) \
+ static void BM_Sparse_Multi##_##M##_##K##_##N##_##S1##_##S2##_##Copies( \
+ int iters) { \
testing::StopTiming(); \
- testing::ItemsProcessed(static_cast<int64>(iters) * M * K * N * 2 * 3); \
- std::string label = strings::Printf("%d_%d_%d_%0.2f_%0.2f", M, K, N, \
- S1 / 100.0, S2 / 100.0); \
+ testing::ItemsProcessed(static_cast<int64>(iters) * M * K * N * 2 * 2 * \
+ Copies); \
+ std::string label = strings::Printf("%d_%d_%d_%d_%0.2f_%0.2f", M, K, N, \
+ Copies, S1 / 100.0, S2 / 100.0); \
testing::SetLabel(label); \
testing::UseRealTime(); \
- auto g = MultiSparseMatMul(M, N, K, S1 / 100.0, S2 / 100.0); \
+ auto g = MultiSparseMatMul(M, N, K, S1 / 100.0, S2 / 100.0, Copies); \
testing::StartTiming(); \
test::Benchmark("cpu", g).Run(iters); \
} \
- BENCHMARK(BM_Sparse_Multi##_##M##_##K##_##N##_##S1##_##S2);
-
-BM_SPARSE_MULTI(1024, 2140, 4096, 0, 82);
-BM_SPARSE_MULTI(1024, 4096, 2048, 83, 83);
+ BENCHMARK(BM_Sparse_Multi##_##M##_##K##_##N##_##S1##_##S2##_##Copies);
+
+BM_SPARSE_MULTI(1024, 2140, 4096, 0, 82, 1);
+BM_SPARSE_MULTI(1024, 4096, 2048, 83, 83, 1);
+BM_SPARSE_MULTI(400, 800, 2560, 85, 85, 1);
+BM_SPARSE_MULTI(400, 2560, 1024, 85, 85, 1);
+BM_SPARSE_MULTI(400, 1024, 256, 85, 85, 1);
+BM_SPARSE_MULTI(400, 256, 1, 85, 85, 1);
+
+BM_SPARSE_MULTI(2048, 1792, 1024, 85, 85, 1);
+BM_SPARSE_MULTI(2048, 1024, 768, 85, 85, 1);
+BM_SPARSE_MULTI(2048, 768, 512, 85, 85, 1);
+BM_SPARSE_MULTI(2048, 512, 256, 85, 85, 1);
+
+BM_SPARSE_MULTI(2048, 1792, 1024, 85, 85, 3);
+BM_SPARSE_MULTI(2048, 1024, 768, 85, 85, 3);
+BM_SPARSE_MULTI(2048, 768, 512, 85, 85, 3);
+BM_SPARSE_MULTI(2048, 512, 256, 85, 85, 3);
+
+BM_SPARSE_MULTI(2048, 1792, 1024, 85, 85, 6);
+BM_SPARSE_MULTI(2048, 1024, 768, 85, 85, 6);
+BM_SPARSE_MULTI(2048, 768, 512, 85, 85, 6);
+BM_SPARSE_MULTI(2048, 512, 256, 85, 85, 6);
} // end namespace tensorflow