- Upgraded libxsmm to 1.7.1.

- Applied LLVM optimization patch to libxsmm (https://github.com/hfp/libxsmm/commit/0e412d5d2769a8754cace64e56e26e14093f887d.patch). - Limited outstanding libxsmm sparse matrix multiply handle counts to limit memory usage for temporary space. - Added extra logging to libxsmm handle management in TensorFlow. - Added support for running multiple sparse matrix multiplies simultaneously in performance benchmark to match some practical use cases. - Added more size combinations to sparse matrix multiply benchmark. - Fixed dependencies for xsmm_conv2d_test. Change: 148672973
author: A. Unique TensorFlower <gardener@tensorflow.org> 2017-02-27 11:32:44 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-02-27 12:09:10 -0800
commit: db8ea4ff07ad75cf5f0220428fbe4b84fcf68f4a (patch)
tree: fb6288c5b6dcc6d28167c745fa5b7688417cbaf8 /tensorflow/core/kernels/sparse_matmul_op_test.cc
parent: 332ee5051fc38babb53cc8cbf3c3120e5651f4e8 (diff)
1 files changed, 91 insertions, 14 deletions
diff --git a/tensorflow/core/kernels/sparse_matmul_op_test.cc b/tensorflow/core/kernels/sparse_matmul_op_test.cc
index 42fdde23dd..b5c69466f8 100644
--- a/tensorflow/core/kernels/sparse_matmul_op_test.cc
+++ b/tensorflow/core/kernels/sparse_matmul_op_test.cc
@@ -94,6 +94,16 @@ static Graph* SparseMatMul(int m, int n, int d, float sparsity_a,
                                     transpose_a, transpose_b);
 }
 
+static Graph* ReplicatedSparseMatMul(int m, int n, int d, float sparsity_1,
+                                     float sparsity_2, int copies) {
+  Graph* g = new Graph(OpRegistry::Global());
+  for (int i = 0; i < copies; ++i) {
+    SparseMatMulHelper<float, float>(g, m, n, d, sparsity_1, sparsity_2, false,
+                                     false);
+  }
+  return g;
+}
+
 #define BM_SPARSE(M, K, N, S1, S2, TRA, TRB, TA, TB)                           \
   static void                                                                  \
       BM_Sparse##_##M##_##K##_##N##_##S1##_##S2##_##TRA##_##TRB##_##TA##_##TB( \
@@ -112,6 +122,23 @@ static Graph* SparseMatMul(int m, int n, int d, float sparsity_a,
   BENCHMARK(                                                                   \
       BM_Sparse##_##M##_##K##_##N##_##S1##_##S2##_##TRA##_##TRB##_##TA##_##TB);
 
+#define BM_SPARSE_REPLICATED(M, K, N, S1, S2, Copies)                          \
+  static void BM_Sparse_replicated##_##M##_##K##_##N##_##S1##_##S2##_##Copies( \
+      int iters) {                                                             \
+    testing::StopTiming();                                                     \
+    testing::ItemsProcessed(static_cast<int64>(iters) * M * K * N * Copies *   \
+                            2);                                                \
+    std::string label = strings::Printf("copies: %d sp_a: %0.2f sp_b: %0.2f",  \
+                                        (Copies), S1 / 100.0, S2 / 100.0);     \
+    testing::SetLabel(label);                                                  \
+    testing::UseRealTime();                                                    \
+    auto g =                                                                   \
+        ReplicatedSparseMatMul(M, N, K, S1 / 100.0, S2 / 100.0, (Copies));     \
+    testing::StartTiming();                                                    \
+    test::Benchmark("cpu", g).Run(iters);                                      \
+  }                                                                            \
+  BENCHMARK(BM_Sparse_replicated##_##M##_##K##_##N##_##S1##_##S2##_##Copies);
+
 #define BM_SPARSE_FLOAT(M, K, N, S1, S2, TRA, TRB) \
   BM_SPARSE(M, K, N, S1, S2, TRA, TRB, float, float)
 #define BM_SPARSE_BFLOAT16(M, K, N, S1, S2, TRA, TRB) \
@@ -144,6 +171,33 @@ BM_SPARSE_FLOAT(1024, 1024, 1024, 1, 0, false, false);
 BM_SPARSE_FLOAT(1024, 1024, 1024, 85, 0, false, false);
 BM_SPARSE_FLOAT(256, 256, 256, 1, 0, false, false);
 BM_SPARSE_FLOAT(512, 512, 512, 1, 0, false, false);
+BM_SPARSE_FLOAT(2560, 400, 1024, 85, 0, false, false);
+BM_SPARSE_FLOAT(2560, 400, 1024, 85, 0, true, false);
+
+BM_SPARSE_FLOAT(400, 800, 2560, 85, 0, false, false);
+BM_SPARSE_FLOAT(400, 2560, 1024, 85, 0, false, false);
+BM_SPARSE_FLOAT(400, 1024, 256, 85, 0, false, false);
+BM_SPARSE_FLOAT(400, 256, 1, 85, 0, false, false);
+
+BM_SPARSE_REPLICATED(400, 800, 2560, 85, 0, 6);
+BM_SPARSE_REPLICATED(400, 2560, 1024, 85, 0, 6);
+BM_SPARSE_REPLICATED(400, 1024, 256, 85, 0, 6);
+BM_SPARSE_REPLICATED(400, 256, 1, 85, 0, 6);
+
+BM_SPARSE_FLOAT(2048, 1792, 1024, 85, 0, false, false);
+BM_SPARSE_FLOAT(2048, 1024, 768, 85, 0, false, false);
+BM_SPARSE_FLOAT(2048, 768, 512, 85, 0, false, false);
+BM_SPARSE_FLOAT(2048, 512, 256, 85, 0, false, false);
+
+BM_SPARSE_FLOAT(2049, 1792, 1024, 85, 0, false, false);
+BM_SPARSE_FLOAT(2049, 1024, 768, 85, 0, false, false);
+BM_SPARSE_FLOAT(2049, 768, 512, 85, 0, false, false);
+BM_SPARSE_FLOAT(2049, 512, 256, 85, 0, false, false);
+
+BM_SPARSE_REPLICATED(2048, 1792, 1024, 85, 0, 6);
+BM_SPARSE_REPLICATED(2048, 1024, 768, 85, 0, 6);
+BM_SPARSE_REPLICATED(2048, 768, 512, 85, 0, 6);
+BM_SPARSE_REPLICATED(2048, 512, 256, 85, 0, 6);
 
 // Test bfloat16
 BM_SPARSE_BFLOAT16(2048, 2048, 2048, 0, 0, false, false);
@@ -156,30 +210,53 @@ BM_SPARSE_FLOAT_BFLOAT16(2048, 2048, 2048, 85, 0, false, false);
 BM_SPARSE_FLOAT_BFLOAT16(2048, 2048, 2048, 99, 0, false, false);
 
 static Graph* MultiSparseMatMul(int m, int n, int d, float sparsity_1,
-                                float sparsity_2) {
+                                float sparsity_2, int copies) {
   Graph* g = new Graph(OpRegistry::Global());
-  SparseMatMulHelper<float, float>(g, d, n, m, sparsity_1, sparsity_2, true,
-                                   false);
-  SparseMatMulHelper<float, float>(g, m, d, n, sparsity_2, 0, false, true);
+  for (int i = 0; i < copies; ++i) {
+    SparseMatMulHelper<float, float>(g, d, n, m, sparsity_1, sparsity_2, true,
+                                     false);
+    SparseMatMulHelper<float, float>(g, m, d, n, sparsity_2, 0, false, true);
+  }
   return g;
 }
 
-#define BM_SPARSE_MULTI(M, K, N, S1, S2)                                    \
-  static void BM_Sparse_Multi##_##M##_##K##_##N##_##S1##_##S2(int iters) {  \
+#define BM_SPARSE_MULTI(M, K, N, S1, S2, Copies)                            \
+  static void BM_Sparse_Multi##_##M##_##K##_##N##_##S1##_##S2##_##Copies(   \
+      int iters) {                                                          \
     testing::StopTiming();                                                  \
-    testing::ItemsProcessed(static_cast<int64>(iters) * M * K * N * 2 * 3); \
-    std::string label = strings::Printf("%d_%d_%d_%0.2f_%0.2f", M, K, N,    \
-                                        S1 / 100.0, S2 / 100.0);            \
+    testing::ItemsProcessed(static_cast<int64>(iters) * M * K * N * 2 * 2 * \
+                            Copies);                                        \
+    std::string label = strings::Printf("%d_%d_%d_%d_%0.2f_%0.2f", M, K, N, \
+                                        Copies, S1 / 100.0, S2 / 100.0);    \
     testing::SetLabel(label);                                               \
     testing::UseRealTime();                                                 \
-    auto g = MultiSparseMatMul(M, N, K, S1 / 100.0, S2 / 100.0);            \
+    auto g = MultiSparseMatMul(M, N, K, S1 / 100.0, S2 / 100.0, Copies);    \
     testing::StartTiming();                                                 \
     test::Benchmark("cpu", g).Run(iters);                                   \
   }                                                                         \
-  BENCHMARK(BM_Sparse_Multi##_##M##_##K##_##N##_##S1##_##S2);
-
-BM_SPARSE_MULTI(1024, 2140, 4096, 0, 82);
-BM_SPARSE_MULTI(1024, 4096, 2048, 83, 83);
+  BENCHMARK(BM_Sparse_Multi##_##M##_##K##_##N##_##S1##_##S2##_##Copies);
+
+BM_SPARSE_MULTI(1024, 2140, 4096, 0, 82, 1);
+BM_SPARSE_MULTI(1024, 4096, 2048, 83, 83, 1);
+BM_SPARSE_MULTI(400, 800, 2560, 85, 85, 1);
+BM_SPARSE_MULTI(400, 2560, 1024, 85, 85, 1);
+BM_SPARSE_MULTI(400, 1024, 256, 85, 85, 1);
+BM_SPARSE_MULTI(400, 256, 1, 85, 85, 1);
+
+BM_SPARSE_MULTI(2048, 1792, 1024, 85, 85, 1);
+BM_SPARSE_MULTI(2048, 1024, 768, 85, 85, 1);
+BM_SPARSE_MULTI(2048, 768, 512, 85, 85, 1);
+BM_SPARSE_MULTI(2048, 512, 256, 85, 85, 1);
+
+BM_SPARSE_MULTI(2048, 1792, 1024, 85, 85, 3);
+BM_SPARSE_MULTI(2048, 1024, 768, 85, 85, 3);
+BM_SPARSE_MULTI(2048, 768, 512, 85, 85, 3);
+BM_SPARSE_MULTI(2048, 512, 256, 85, 85, 3);
+
+BM_SPARSE_MULTI(2048, 1792, 1024, 85, 85, 6);
+BM_SPARSE_MULTI(2048, 1024, 768, 85, 85, 6);
+BM_SPARSE_MULTI(2048, 768, 512, 85, 85, 6);
+BM_SPARSE_MULTI(2048, 512, 256, 85, 85, 6);
 
 }  // end namespace tensorflow
author	A. Unique TensorFlower <gardener@tensorflow.org>	2017-02-27 11:32:44 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-02-27 12:09:10 -0800
commit	db8ea4ff07ad75cf5f0220428fbe4b84fcf68f4a (patch)
tree	fb6288c5b6dcc6d28167c745fa5b7688417cbaf8 /tensorflow/core/kernels/sparse_matmul_op_test.cc
parent	332ee5051fc38babb53cc8cbf3c3120e5651f4e8 (diff)