aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/products/GeneralBlockPanelKernel.h
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2010-06-24 11:50:28 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2010-06-24 11:50:28 +0200
commitd44fce501bf299692d578349b92c899c3f0d79cd (patch)
tree7c8061b30cace79ccf503f4faa4823e0e80e2290 /Eigen/src/Core/products/GeneralBlockPanelKernel.h
parent0068d3ccf62c3e2960e3f9eb4a31562c0f5025d7 (diff)
fix computation of blocking sizes for small triangular matrices
Diffstat (limited to 'Eigen/src/Core/products/GeneralBlockPanelKernel.h')
-rw-r--r--Eigen/src/Core/products/GeneralBlockPanelKernel.h10
1 files changed, 8 insertions, 2 deletions
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index cf48ca2f4..7e42eed6e 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -101,7 +101,7 @@ inline void setCpuCacheSizes(std::ptrdiff_t l1, std::ptrdiff_t l2)
* - the number of scalars that fit into a packet (when vectorization is enabled).
*
* \sa setCpuCacheSizes */
-template<typename LhsScalar, typename RhsScalar>
+template<typename LhsScalar, typename RhsScalar, int KcFactor>
void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrdiff_t& n)
{
// Explanations:
@@ -114,7 +114,7 @@ void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrd
std::ptrdiff_t l1, l2;
enum {
- kdiv = 2 * ei_product_blocking_traits<RhsScalar>::nr
+ kdiv = KcFactor * 2 * ei_product_blocking_traits<RhsScalar>::nr
* ei_packet_traits<RhsScalar>::size * sizeof(RhsScalar),
mr = ei_product_blocking_traits<LhsScalar>::mr,
mr_mask = (0xffffffff/mr)*mr
@@ -127,6 +127,12 @@ void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrd
n = n;
}
+template<typename LhsScalar, typename RhsScalar>
+inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrdiff_t& n)
+{
+ computeProductBlockingSizes<LhsScalar,RhsScalar,1>(k, m, n);
+}
+
#ifdef EIGEN_HAS_FUSE_CJMADD
#define CJMADD(A,B,C,T) C = cj.pmadd(A,B,C);
#else