Make sure that the block size computation is tested by our unit test.

author: Gael Guennebaud <g.gael@free.fr> 2015-02-26 17:00:36 +0100
committer: Gael Guennebaud <g.gael@free.fr> 2015-02-26 17:00:36 +0100
commit: 4ec3f04b3ab42a595b99bb2902758f7d748ab80c (patch)
tree: 308f2fcb1079f77a46f05af857af289c59477bf4 /Eigen/src
parent: 2e9cb06a87213138618487e62d7893ad3a9b4546 (diff)
1 files changed, 13 insertions, 7 deletions
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index 65310e637..06c7f362e 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -153,8 +153,13 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads
   }
   else {
     // In unit tests we do not want to use extra large matrices,
-    // so we reduce the block size to check the blocking strategy is not flawed
-#ifndef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
+    // so we reduce the cache size to check the blocking strategy is not flawed
+#ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
+    l1 = 4*1024;
+    l2 = 32*1024;
+    l3 = 512*1024;
+#endif
+    
     // Early return for small problems because the computation below are time consuming for small problems.
     // Perhaps it would make more sense to consider k*n*m??
     // Note that for very tiny problem, this function should be bypassed anyway
@@ -195,7 +200,13 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads
     //      actual_l2 = max(l2, l3/nb_core_sharing_l3)
     // The number below is quite conservative: it is better to underestimate the cache size rather than overestimating it)
     // For instance, it corresponds to 6MB of L3 shared among 4 cores.
+    #ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
+    const Index actual_l2 = l3;
+    #else
     const Index actual_l2 = 1572864; // == 1.5 MB
+    #endif
+    
+    
     
     // Here, nc is chosen such that a block of kc x nc of the rhs fit within half of L2.
     // The second half is implicitly reserved to access the result and lhs coefficients.
@@ -240,11 +251,6 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads
       m = (m%mc)==0 ? mc
                     : (mc - Traits::mr * ((mc/*-1*/-(m%mc))/(Traits::mr*(m/mc+1))));
     }
-#else
-    k = std::min<Index>(k,24);
-    n = std::min<Index>(n,384/sizeof(RhsScalar));
-    m = std::min<Index>(m,384/sizeof(RhsScalar));
-#endif
   }
 }
author	Gael Guennebaud <g.gael@free.fr>	2015-02-26 17:00:36 +0100
committer	Gael Guennebaud <g.gael@free.fr>	2015-02-26 17:00:36 +0100
commit	4ec3f04b3ab42a595b99bb2902758f7d748ab80c (patch)
tree	308f2fcb1079f77a46f05af857af289c59477bf4 /Eigen/src
parent	2e9cb06a87213138618487e62d7893ad3a9b4546 (diff)