aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Benoit Jacob <benoitjacob@google.com>2015-02-18 09:43:55 -0500
committerGravatar Benoit Jacob <benoitjacob@google.com>2015-02-18 09:43:55 -0500
commit4a3e6c8be1d4752b3172a7e26631c4669e28dde7 (patch)
tree2013d0d26b109ccd401920450c849a4dfffd9934 /Eigen
parentc7bb1e8ea8dfc984788d0cb77b82a90468393c2e (diff)
bug #958 - Allow testing specific blocking sizes
This is only a debugging/testing patch. It allows testing specific product blocking sizes, typically to study the impact on performance. Example usage: int testk, testm, testn; #define EIGEN_TEST_SPECIFIC_BLOCKING_SIZES #define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K testk #define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M testm #define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N testn #include <Eigen/Core>
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/src/Core/products/GeneralBlockPanelKernel.h16
1 files changed, 16 insertions, 0 deletions
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index ce5494182..f3fede441 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -84,6 +84,22 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff
template<typename LhsScalar, typename RhsScalar, int KcFactor, typename SizeType>
void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n, int num_threads)
{
+#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
+ EIGEN_UNUSED_VARIABLE(num_threads);
+ typedef gebp_traits<LhsScalar,RhsScalar> Traits;
+ enum {
+ kr = 16,
+ mr = Traits::mr,
+ nr = Traits::nr
+ };
+ k = std::min<SizeType>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K);
+ if (k > kr) k -= k % kr;
+ m = std::min<SizeType>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M);
+ if (m > mr) m -= m % mr;
+ n = std::min<SizeType>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N);
+ if (n > nr) n -= n % nr;
+ return;
+#endif
// Explanations:
// Let's recall the product algorithms form kc x nc horizontal panels B' on the rhs and
// mc x kc blocks A' on the lhs. A' has to fit into L2 cache. Moreover, B' is processed