diff options
author | Gael Guennebaud <g.gael@free.fr> | 2015-02-26 17:01:33 +0100 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2015-02-26 17:01:33 +0100 |
commit | bcf9bb5c1f1834487f10721abc909be4a62574d1 (patch) | |
tree | 904721ecdd01211da1405e96a4f768dd017844d4 /Eigen/src/Core/products | |
parent | 4ec3f04b3ab42a595b99bb2902758f7d748ab80c (diff) |
Avoid packing rhs multiple-times when blocking on the lhs only.
Diffstat (limited to 'Eigen/src/Core/products')
-rw-r--r-- | Eigen/src/Core/products/GeneralMatrixMatrix.h | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index c38c12c31..c76f48154 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -164,6 +164,8 @@ static void run(Index rows, Index cols, Index depth, ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA()); ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB()); + + const bool pack_rhs_once = mc!=rows && kc==depth && nc==cols; // For each horizontal panel of the rhs, and corresponding panel of the lhs... for(Index i2=0; i2<rows; i2+=mc) @@ -188,7 +190,8 @@ static void run(Index rows, Index cols, Index depth, // We pack the rhs's block into a sequential chunk of memory (L2 caching) // Note that this block will be read a very high number of times, which is equal to the number of // micro horizontal panel of the large rhs's panel (e.g., rows/12 times). - pack_rhs(blockB, rhs.getSubMapper(k2,j2), actual_kc, actual_nc); + if((!pack_rhs_once) || i2==0) + pack_rhs(blockB, rhs.getSubMapper(k2,j2), actual_kc, actual_nc); // Everything is packed, we can now call the panel * block kernel: gebp(res.getSubMapper(i2, j2), blockA, blockB, actual_mc, actual_kc, actual_nc, alpha); |