diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-04-11 17:20:17 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-04-11 17:20:17 -0700 |
commit | d6e596174d09446236b3f398d8ec39148c638ed9 (patch) | |
tree | ccb4116b05dc11d7931bac0129fd1394abe1e0b0 /Eigen/src/Core/products/TriangularMatrixMatrix.h | |
parent | 3ca1ae2bb761d7738bcdad885639f422a6b7c914 (diff) | |
parent | 833efb39bfe4957934982112fe435ab30a0c3b4f (diff) |
Pull latest updates from upstream
Diffstat (limited to 'Eigen/src/Core/products/TriangularMatrixMatrix.h')
-rw-r--r-- | Eigen/src/Core/products/TriangularMatrixMatrix.h | 8 |
1 files changed, 6 insertions, 2 deletions
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h index 39ab87df8..8a2f7cd78 100644 --- a/Eigen/src/Core/products/TriangularMatrixMatrix.h +++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h @@ -126,6 +126,10 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true, Index kc = blocking.kc(); // cache block size along the K direction Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction + // The small panel size must not be larger than blocking size. + // Usually this should never be the case because SmallPanelWidth^2 is very small + // compared to L2 cache size, but let's be safe: + Index panelWidth = (std::min)(Index(SmallPanelWidth),(std::min)(kc,mc)); std::size_t sizeA = kc*mc; std::size_t sizeB = kc*cols; @@ -169,9 +173,9 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true, if(IsLower || actual_k2<rows) { // for each small vertical panels of lhs - for (Index k1=0; k1<actual_kc; k1+=SmallPanelWidth) + for (Index k1=0; k1<actual_kc; k1+=panelWidth) { - Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth); + Index actualPanelWidth = std::min<Index>(actual_kc-k1, panelWidth); Index lengthTarget = IsLower ? actual_kc-k1-actualPanelWidth : k1; Index startBlock = actual_k2+k1; Index blockBOffset = k1; |