From c06e6fd115d747c42a2b2ea029c53bbdf41276d6 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 10 Sep 2019 23:29:52 +0200 Subject: bug #1741: fix SelfAdjointView::rankUpdate and product to triangular part for destination with non-trivial inner stride --- .../Core/products/GeneralMatrixMatrixTriangular.h | 64 ++++++++++++---------- Eigen/src/Core/products/SelfadjointProduct.h | 4 +- 2 files changed, 37 insertions(+), 31 deletions(-) (limited to 'Eigen/src/Core/products') diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h index ec2825bf0..6ba0d9bdb 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h @@ -25,51 +25,54 @@ namespace internal { **********************************************************************/ // forward declarations (defined at the end of this file) -template +template struct tribb_kernel; /* Optimized matrix-matrix product evaluating only one triangular half */ template + int ResStorageOrder, int ResInnerStride, int UpLo, int Version = Specialized> struct general_matrix_matrix_triangular_product; // as usual if the result is row major => we transpose the product template -struct general_matrix_matrix_triangular_product + typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, + int ResInnerStride, int UpLo, int Version> +struct general_matrix_matrix_triangular_product { typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride, - const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, + const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resIncr, Index resStride, const ResScalar& alpha, level3_blocking& blocking) { general_matrix_matrix_triangular_product - ::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha,blocking); + ColMajor, ResInnerStride, UpLo==Lower?Upper:Lower> + ::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resIncr,resStride,alpha,blocking); } }; template -struct general_matrix_matrix_triangular_product + typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, + int ResInnerStride, int UpLo, int Version> +struct general_matrix_matrix_triangular_product { typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride, - const RhsScalar* _rhs, Index rhsStride, ResScalar* _res, Index resStride, + const RhsScalar* _rhs, Index rhsStride, + ResScalar* _res, Index resIncr, Index resStride, const ResScalar& alpha, level3_blocking& blocking) { typedef gebp_traits Traits; typedef const_blas_data_mapper LhsMapper; typedef const_blas_data_mapper RhsMapper; - typedef blas_data_mapper ResMapper; + typedef blas_data_mapper ResMapper; LhsMapper lhs(_lhs,lhsStride); RhsMapper rhs(_rhs,rhsStride); - ResMapper res(_res, resStride); + ResMapper res(_res, resStride, resIncr); Index kc = blocking.kc(); Index mc = (std::min)(size,blocking.mc()); @@ -87,7 +90,7 @@ struct general_matrix_matrix_triangular_product pack_lhs; gemm_pack_rhs pack_rhs; gebp_kernel gebp; - tribb_kernel sybb; + tribb_kernel sybb; for(Index k2=0; k2 +template struct tribb_kernel { typedef gebp_traits Traits; @@ -141,11 +144,13 @@ struct tribb_kernel enum { BlockSize = meta_least_common_multiple::ret }; - void operator()(ResScalar* _res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha) + void operator()(ResScalar* _res, Index resIncr, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha) { - typedef blas_data_mapper ResMapper; - ResMapper res(_res, resStride); - gebp_kernel gebp_kernel; + typedef blas_data_mapper ResMapper; + typedef blas_data_mapper BufferMapper; + ResMapper res(_res, resStride, resIncr); + gebp_kernel gebp_kernel1; + gebp_kernel gebp_kernel2; Matrix buffer((internal::constructor_without_unaligned_array_assert())); @@ -157,32 +162,32 @@ struct tribb_kernel const RhsScalar* actual_b = blockB+j*depth; if(UpLo==Upper) - gebp_kernel(res.getSubMapper(0, j), blockA, actual_b, j, depth, actualBlockSize, alpha, - -1, -1, 0, 0); + gebp_kernel1(res.getSubMapper(0, j), blockA, actual_b, j, depth, actualBlockSize, alpha, + -1, -1, 0, 0); // selfadjoint micro block { Index i = j; buffer.setZero(); // 1 - apply the kernel on the temporary buffer - gebp_kernel(ResMapper(buffer.data(), BlockSize), blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha, - -1, -1, 0, 0); + gebp_kernel2(BufferMapper(buffer.data(), BlockSize), blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha, + -1, -1, 0, 0); // 2 - triangular accumulation for(Index j1=0; j1 internal::general_matrix_matrix_triangular_product + IsRowMajor ? RowMajor : ColMajor, MatrixType::InnerStrideAtCompileTime, UpLo&(Lower|Upper)> ::run(size, depth, &actualLhs.coeffRef(SkipDiag&&(UpLo&Lower)==Lower ? 1 : 0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,SkipDiag&&(UpLo&Upper)==Upper ? 1 : 0), actualRhs.outerStride(), - mat.data() + (SkipDiag ? (bool(IsRowMajor) != ((UpLo&Lower)==Lower) ? 1 : mat.outerStride() ) : 0), mat.outerStride(), actualAlpha, blocking); + mat.data() + (SkipDiag ? (bool(IsRowMajor) != ((UpLo&Lower)==Lower) ? mat.innerStride() : mat.outerStride() ) : 0), + mat.innerStride(), mat.outerStride(), actualAlpha, blocking); } }; diff --git a/Eigen/src/Core/products/SelfadjointProduct.h b/Eigen/src/Core/products/SelfadjointProduct.h index 39c5b59ff..61e8894e7 100644 --- a/Eigen/src/Core/products/SelfadjointProduct.h +++ b/Eigen/src/Core/products/SelfadjointProduct.h @@ -109,10 +109,10 @@ struct selfadjoint_product_selector internal::general_matrix_matrix_triangular_product::IsComplex, Scalar, OtherIsRowMajor ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits::IsComplex, - IsRowMajor ? RowMajor : ColMajor, UpLo> + IsRowMajor ? RowMajor : ColMajor, MatrixType::InnerStrideAtCompileTime, UpLo> ::run(size, depth, &actualOther.coeffRef(0,0), actualOther.outerStride(), &actualOther.coeffRef(0,0), actualOther.outerStride(), - mat.data(), mat.outerStride(), actualAlpha, blocking); + mat.data(), mat.innerStride(), mat.outerStride(), actualAlpha, blocking); } }; -- cgit v1.2.3