diff options
author | 2017-02-27 11:32:44 -0800 | |
---|---|---|
committer | 2017-02-27 12:09:10 -0800 | |
commit | db8ea4ff07ad75cf5f0220428fbe4b84fcf68f4a (patch) | |
tree | fb6288c5b6dcc6d28167c745fa5b7688417cbaf8 /tensorflow/core/kernels/sparse_matmul_op_test.cc | |
parent | 332ee5051fc38babb53cc8cbf3c3120e5651f4e8 (diff) |
- Upgraded libxsmm to 1.7.1.
- Applied LLVM optimization patch to libxsmm
(https://github.com/hfp/libxsmm/commit/0e412d5d2769a8754cace64e56e26e14093f887d.patch).
- Limited outstanding libxsmm sparse matrix multiply handle counts to limit
memory usage for temporary space.
- Added extra logging to libxsmm handle management in TensorFlow.
- Added support for running multiple sparse matrix multiplies simultaneously in
performance benchmark to match some practical use cases.
- Added more size combinations to sparse matrix multiply benchmark.
- Fixed dependencies for xsmm_conv2d_test.
Change: 148672973
Diffstat (limited to 'tensorflow/core/kernels/sparse_matmul_op_test.cc')
-rw-r--r-- | tensorflow/core/kernels/sparse_matmul_op_test.cc | 105 |
1 files changed, 91 insertions, 14 deletions
diff --git a/tensorflow/core/kernels/sparse_matmul_op_test.cc b/tensorflow/core/kernels/sparse_matmul_op_test.cc index 42fdde23dd..b5c69466f8 100644 --- a/tensorflow/core/kernels/sparse_matmul_op_test.cc +++ b/tensorflow/core/kernels/sparse_matmul_op_test.cc @@ -94,6 +94,16 @@ static Graph* SparseMatMul(int m, int n, int d, float sparsity_a, transpose_a, transpose_b); } +static Graph* ReplicatedSparseMatMul(int m, int n, int d, float sparsity_1, + float sparsity_2, int copies) { + Graph* g = new Graph(OpRegistry::Global()); + for (int i = 0; i < copies; ++i) { + SparseMatMulHelper<float, float>(g, m, n, d, sparsity_1, sparsity_2, false, + false); + } + return g; +} + #define BM_SPARSE(M, K, N, S1, S2, TRA, TRB, TA, TB) \ static void \ BM_Sparse##_##M##_##K##_##N##_##S1##_##S2##_##TRA##_##TRB##_##TA##_##TB( \ @@ -112,6 +122,23 @@ static Graph* SparseMatMul(int m, int n, int d, float sparsity_a, BENCHMARK( \ BM_Sparse##_##M##_##K##_##N##_##S1##_##S2##_##TRA##_##TRB##_##TA##_##TB); +#define BM_SPARSE_REPLICATED(M, K, N, S1, S2, Copies) \ + static void BM_Sparse_replicated##_##M##_##K##_##N##_##S1##_##S2##_##Copies( \ + int iters) { \ + testing::StopTiming(); \ + testing::ItemsProcessed(static_cast<int64>(iters) * M * K * N * Copies * \ + 2); \ + std::string label = strings::Printf("copies: %d sp_a: %0.2f sp_b: %0.2f", \ + (Copies), S1 / 100.0, S2 / 100.0); \ + testing::SetLabel(label); \ + testing::UseRealTime(); \ + auto g = \ + ReplicatedSparseMatMul(M, N, K, S1 / 100.0, S2 / 100.0, (Copies)); \ + testing::StartTiming(); \ + test::Benchmark("cpu", g).Run(iters); \ + } \ + BENCHMARK(BM_Sparse_replicated##_##M##_##K##_##N##_##S1##_##S2##_##Copies); + #define BM_SPARSE_FLOAT(M, K, N, S1, S2, TRA, TRB) \ BM_SPARSE(M, K, N, S1, S2, TRA, TRB, float, float) #define BM_SPARSE_BFLOAT16(M, K, N, S1, S2, TRA, TRB) \ @@ -144,6 +171,33 @@ BM_SPARSE_FLOAT(1024, 1024, 1024, 1, 0, false, false); BM_SPARSE_FLOAT(1024, 1024, 1024, 85, 0, false, false); BM_SPARSE_FLOAT(256, 256, 256, 1, 0, false, false); BM_SPARSE_FLOAT(512, 512, 512, 1, 0, false, false); +BM_SPARSE_FLOAT(2560, 400, 1024, 85, 0, false, false); +BM_SPARSE_FLOAT(2560, 400, 1024, 85, 0, true, false); + +BM_SPARSE_FLOAT(400, 800, 2560, 85, 0, false, false); +BM_SPARSE_FLOAT(400, 2560, 1024, 85, 0, false, false); +BM_SPARSE_FLOAT(400, 1024, 256, 85, 0, false, false); +BM_SPARSE_FLOAT(400, 256, 1, 85, 0, false, false); + +BM_SPARSE_REPLICATED(400, 800, 2560, 85, 0, 6); +BM_SPARSE_REPLICATED(400, 2560, 1024, 85, 0, 6); +BM_SPARSE_REPLICATED(400, 1024, 256, 85, 0, 6); +BM_SPARSE_REPLICATED(400, 256, 1, 85, 0, 6); + +BM_SPARSE_FLOAT(2048, 1792, 1024, 85, 0, false, false); +BM_SPARSE_FLOAT(2048, 1024, 768, 85, 0, false, false); +BM_SPARSE_FLOAT(2048, 768, 512, 85, 0, false, false); +BM_SPARSE_FLOAT(2048, 512, 256, 85, 0, false, false); + +BM_SPARSE_FLOAT(2049, 1792, 1024, 85, 0, false, false); +BM_SPARSE_FLOAT(2049, 1024, 768, 85, 0, false, false); +BM_SPARSE_FLOAT(2049, 768, 512, 85, 0, false, false); +BM_SPARSE_FLOAT(2049, 512, 256, 85, 0, false, false); + +BM_SPARSE_REPLICATED(2048, 1792, 1024, 85, 0, 6); +BM_SPARSE_REPLICATED(2048, 1024, 768, 85, 0, 6); +BM_SPARSE_REPLICATED(2048, 768, 512, 85, 0, 6); +BM_SPARSE_REPLICATED(2048, 512, 256, 85, 0, 6); // Test bfloat16 BM_SPARSE_BFLOAT16(2048, 2048, 2048, 0, 0, false, false); @@ -156,30 +210,53 @@ BM_SPARSE_FLOAT_BFLOAT16(2048, 2048, 2048, 85, 0, false, false); BM_SPARSE_FLOAT_BFLOAT16(2048, 2048, 2048, 99, 0, false, false); static Graph* MultiSparseMatMul(int m, int n, int d, float sparsity_1, - float sparsity_2) { + float sparsity_2, int copies) { Graph* g = new Graph(OpRegistry::Global()); - SparseMatMulHelper<float, float>(g, d, n, m, sparsity_1, sparsity_2, true, - false); - SparseMatMulHelper<float, float>(g, m, d, n, sparsity_2, 0, false, true); + for (int i = 0; i < copies; ++i) { + SparseMatMulHelper<float, float>(g, d, n, m, sparsity_1, sparsity_2, true, + false); + SparseMatMulHelper<float, float>(g, m, d, n, sparsity_2, 0, false, true); + } return g; } -#define BM_SPARSE_MULTI(M, K, N, S1, S2) \ - static void BM_Sparse_Multi##_##M##_##K##_##N##_##S1##_##S2(int iters) { \ +#define BM_SPARSE_MULTI(M, K, N, S1, S2, Copies) \ + static void BM_Sparse_Multi##_##M##_##K##_##N##_##S1##_##S2##_##Copies( \ + int iters) { \ testing::StopTiming(); \ - testing::ItemsProcessed(static_cast<int64>(iters) * M * K * N * 2 * 3); \ - std::string label = strings::Printf("%d_%d_%d_%0.2f_%0.2f", M, K, N, \ - S1 / 100.0, S2 / 100.0); \ + testing::ItemsProcessed(static_cast<int64>(iters) * M * K * N * 2 * 2 * \ + Copies); \ + std::string label = strings::Printf("%d_%d_%d_%d_%0.2f_%0.2f", M, K, N, \ + Copies, S1 / 100.0, S2 / 100.0); \ testing::SetLabel(label); \ testing::UseRealTime(); \ - auto g = MultiSparseMatMul(M, N, K, S1 / 100.0, S2 / 100.0); \ + auto g = MultiSparseMatMul(M, N, K, S1 / 100.0, S2 / 100.0, Copies); \ testing::StartTiming(); \ test::Benchmark("cpu", g).Run(iters); \ } \ - BENCHMARK(BM_Sparse_Multi##_##M##_##K##_##N##_##S1##_##S2); - -BM_SPARSE_MULTI(1024, 2140, 4096, 0, 82); -BM_SPARSE_MULTI(1024, 4096, 2048, 83, 83); + BENCHMARK(BM_Sparse_Multi##_##M##_##K##_##N##_##S1##_##S2##_##Copies); + +BM_SPARSE_MULTI(1024, 2140, 4096, 0, 82, 1); +BM_SPARSE_MULTI(1024, 4096, 2048, 83, 83, 1); +BM_SPARSE_MULTI(400, 800, 2560, 85, 85, 1); +BM_SPARSE_MULTI(400, 2560, 1024, 85, 85, 1); +BM_SPARSE_MULTI(400, 1024, 256, 85, 85, 1); +BM_SPARSE_MULTI(400, 256, 1, 85, 85, 1); + +BM_SPARSE_MULTI(2048, 1792, 1024, 85, 85, 1); +BM_SPARSE_MULTI(2048, 1024, 768, 85, 85, 1); +BM_SPARSE_MULTI(2048, 768, 512, 85, 85, 1); +BM_SPARSE_MULTI(2048, 512, 256, 85, 85, 1); + +BM_SPARSE_MULTI(2048, 1792, 1024, 85, 85, 3); +BM_SPARSE_MULTI(2048, 1024, 768, 85, 85, 3); +BM_SPARSE_MULTI(2048, 768, 512, 85, 85, 3); +BM_SPARSE_MULTI(2048, 512, 256, 85, 85, 3); + +BM_SPARSE_MULTI(2048, 1792, 1024, 85, 85, 6); +BM_SPARSE_MULTI(2048, 1024, 768, 85, 85, 6); +BM_SPARSE_MULTI(2048, 768, 512, 85, 85, 6); +BM_SPARSE_MULTI(2048, 512, 256, 85, 85, 6); } // end namespace tensorflow |