diff options
author | 2015-02-18 09:43:55 -0500 | |
---|---|---|
committer | 2015-02-18 09:43:55 -0500 | |
commit | 4a3e6c8be1d4752b3172a7e26631c4669e28dde7 (patch) | |
tree | 2013d0d26b109ccd401920450c849a4dfffd9934 /Eigen | |
parent | c7bb1e8ea8dfc984788d0cb77b82a90468393c2e (diff) |
bug #958 - Allow testing specific blocking sizes
This is only a debugging/testing patch. It allows testing specific
product blocking sizes, typically to study the impact on performance.
Example usage:
int testk, testm, testn;
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K testk
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M testm
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N testn
#include <Eigen/Core>
Diffstat (limited to 'Eigen')
-rw-r--r-- | Eigen/src/Core/products/GeneralBlockPanelKernel.h | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index ce5494182..f3fede441 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -84,6 +84,22 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff template<typename LhsScalar, typename RhsScalar, int KcFactor, typename SizeType> void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n, int num_threads) { +#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES + EIGEN_UNUSED_VARIABLE(num_threads); + typedef gebp_traits<LhsScalar,RhsScalar> Traits; + enum { + kr = 16, + mr = Traits::mr, + nr = Traits::nr + }; + k = std::min<SizeType>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K); + if (k > kr) k -= k % kr; + m = std::min<SizeType>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M); + if (m > mr) m -= m % mr; + n = std::min<SizeType>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N); + if (n > nr) n -= n % nr; + return; +#endif // Explanations: // Let's recall the product algorithms form kc x nc horizontal panels B' on the rhs and // mc x kc blocks A' on the lhs. A' has to fit into L2 cache. Moreover, B' is processed |