diff options
author | Gael Guennebaud <g.gael@free.fr> | 2014-06-20 15:55:44 +0200 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2014-06-20 15:55:44 +0200 |
commit | b29b81a1f46ad3b7340c9bbb8d1e23685e5ca756 (patch) | |
tree | ec31545094cba7c9d72c9132963fa3fecd448726 /Eigen/src/Core/products/TriangularMatrixMatrix.h | |
parent | 47585c8ab238f6a49b8097e221fa4b30763ef942 (diff) | |
parent | 963d338922e9ef1addcd29c1b43e9b66243207c0 (diff) |
merge with default branch
Diffstat (limited to 'Eigen/src/Core/products/TriangularMatrixMatrix.h')
-rw-r--r-- | Eigen/src/Core/products/TriangularMatrixMatrix.h | 18 |
1 files changed, 7 insertions, 11 deletions
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h index 3f0618410..d93277cb8 100644 --- a/Eigen/src/Core/products/TriangularMatrixMatrix.h +++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h @@ -125,11 +125,9 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true, std::size_t sizeA = kc*mc; std::size_t sizeB = kc*cols; - std::size_t sizeW = kc*Traits::WorkSpaceFactor; ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); - ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW()); Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer; triangularBuffer.setZero(); @@ -187,7 +185,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true, pack_lhs(blockA, triangularBuffer.data(), triangularBuffer.outerStride(), actualPanelWidth, actualPanelWidth); gebp_kernel(res+startBlock, resStride, blockA, blockB, actualPanelWidth, actualPanelWidth, cols, alpha, - actualPanelWidth, actual_kc, 0, blockBOffset, blockW); + actualPanelWidth, actual_kc, 0, blockBOffset); // GEBP with remaining micro panel if (lengthTarget>0) @@ -197,7 +195,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true, pack_lhs(blockA, &lhs(startTarget,startBlock), lhsStride, actualPanelWidth, lengthTarget); gebp_kernel(res+startTarget, resStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, alpha, - actualPanelWidth, actual_kc, 0, blockBOffset, blockW); + actualPanelWidth, actual_kc, 0, blockBOffset); } } } @@ -211,7 +209,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true, gemm_pack_lhs<Scalar, Index, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>() (blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc); - gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0, blockW); + gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0); } } } @@ -265,12 +263,10 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false, Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction std::size_t sizeA = kc*mc; - std::size_t sizeB = kc*cols; - std::size_t sizeW = kc*Traits::WorkSpaceFactor; + std::size_t sizeB = kc*cols+EIGEN_ALIGN_BYTES/sizeof(Scalar); ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); - ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW()); Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer; triangularBuffer.setZero(); @@ -304,6 +300,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false, Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc; Scalar* geb = blockB+ts*ts; + geb = geb + internal::first_aligned(geb,EIGEN_ALIGN_BYTES/sizeof(Scalar)); pack_rhs(geb, &rhs(actual_k2,IsLower ? 0 : k2), rhsStride, actual_kc, rs); @@ -357,14 +354,13 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false, actual_mc, panelLength, actualPanelWidth, alpha, actual_kc, actual_kc, // strides - blockOffset, blockOffset,// offsets - blockW); // workspace + blockOffset, blockOffset);// offsets } } gebp_kernel(res+i2+(IsLower ? 0 : k2)*resStride, resStride, blockA, geb, actual_mc, actual_kc, rs, alpha, - -1, -1, 0, 0, blockW); + -1, -1, 0, 0); } } } |