From 031f17117d93d38d7078ef02892afdba549a265c Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 11 Sep 2019 15:04:25 +0200 Subject: bug #1741: fix self-adjoint*matrix, triangular*matrix, and triangular^1*matrix with a destination having a non-trivial inner-stride --- Eigen/src/Core/SolveTriangular.h | 6 +- Eigen/src/Core/products/SelfadjointMatrixMatrix.h | 54 +++++---- .../Core/products/SelfadjointMatrixMatrix_BLAS.h | 24 ++-- Eigen/src/Core/products/TriangularMatrixMatrix.h | 54 +++++---- .../Core/products/TriangularMatrixMatrix_BLAS.h | 18 +-- Eigen/src/Core/products/TriangularSolverMatrix.h | 62 +++++----- .../Core/products/TriangularSolverMatrix_BLAS.h | 12 +- blas/level3_impl.h | 132 ++++++++++----------- test/product_symm.cpp | 20 +++- test/product_trmm.cpp | 12 +- test/product_trsolve.cpp | 13 ++ 11 files changed, 235 insertions(+), 172 deletions(-) diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h index 2e061caab..813fef0db 100644 --- a/Eigen/src/Core/SolveTriangular.h +++ b/Eigen/src/Core/SolveTriangular.h @@ -19,7 +19,7 @@ namespace internal { template struct triangular_solve_vector; -template +template struct triangular_solve_matrix; // small helper struct extracting some traits on the underlying solver operation @@ -98,8 +98,8 @@ struct triangular_solver_selector BlockingType blocking(rhs.rows(), rhs.cols(), size, 1, false); triangular_solve_matrix - ::run(size, othersize, &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &rhs.coeffRef(0,0), rhs.outerStride(), blocking); + (Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor, Rhs::InnerStrideAtCompileTime> + ::run(size, othersize, &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &rhs.coeffRef(0,0), rhs.innerStride(), rhs.outerStride(), blocking); } }; diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h index 673073601..33ecf10f6 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h @@ -294,20 +294,21 @@ struct symm_pack_rhs template + int ResStorageOrder, int ResInnerStride> struct product_selfadjoint_matrix; template -struct product_selfadjoint_matrix + int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs, + int ResInnerStride> +struct product_selfadjoint_matrix { static EIGEN_STRONG_INLINE void run( Index rows, Index cols, const Scalar* lhs, Index lhsStride, const Scalar* rhs, Index rhsStride, - Scalar* res, Index resStride, + Scalar* res, Index resIncr, Index resStride, const Scalar& alpha, level3_blocking& blocking) { product_selfadjoint_matrix::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs), EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor, LhsSelfAdjoint, NumTraits::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs), - ColMajor> - ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking); + ColMajor,ResInnerStride> + ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resIncr, resStride, alpha, blocking); } }; template -struct product_selfadjoint_matrix + int RhsStorageOrder, bool ConjugateRhs, + int ResInnerStride> +struct product_selfadjoint_matrix { static EIGEN_DONT_INLINE void run( Index rows, Index cols, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, - Scalar* res, Index resStride, + Scalar* res, Index resIncr, Index resStride, const Scalar& alpha, level3_blocking& blocking); }; template -EIGEN_DONT_INLINE void product_selfadjoint_matrix::run( + int RhsStorageOrder, bool ConjugateRhs, + int ResInnerStride> +EIGEN_DONT_INLINE void product_selfadjoint_matrix::run( Index rows, Index cols, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, - Scalar* _res, Index resStride, + Scalar* _res, Index resIncr, Index resStride, const Scalar& alpha, level3_blocking& blocking) { Index size = rows; @@ -351,11 +354,11 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix LhsMapper; typedef const_blas_data_mapper LhsTransposeMapper; typedef const_blas_data_mapper RhsMapper; - typedef blas_data_mapper ResMapper; + typedef blas_data_mapper ResMapper; LhsMapper lhs(_lhs,lhsStride); LhsTransposeMapper lhs_transpose(_lhs,lhsStride); RhsMapper rhs(_rhs,rhsStride); - ResMapper res(_res, resStride); + ResMapper res(_res, resStride, resIncr); Index kc = blocking.kc(); // cache block size along the K direction Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction @@ -415,26 +418,28 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix -struct product_selfadjoint_matrix + int RhsStorageOrder, bool ConjugateRhs, + int ResInnerStride> +struct product_selfadjoint_matrix { static EIGEN_DONT_INLINE void run( Index rows, Index cols, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, - Scalar* res, Index resStride, + Scalar* res, Index resIncr, Index resStride, const Scalar& alpha, level3_blocking& blocking); }; template -EIGEN_DONT_INLINE void product_selfadjoint_matrix::run( + int RhsStorageOrder, bool ConjugateRhs, + int ResInnerStride> +EIGEN_DONT_INLINE void product_selfadjoint_matrix::run( Index rows, Index cols, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, - Scalar* _res, Index resStride, + Scalar* _res, Index resIncr, Index resStride, const Scalar& alpha, level3_blocking& blocking) { Index size = cols; @@ -442,9 +447,9 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix Traits; typedef const_blas_data_mapper LhsMapper; - typedef blas_data_mapper ResMapper; + typedef blas_data_mapper ResMapper; LhsMapper lhs(_lhs,lhsStride); - ResMapper res(_res,resStride); + ResMapper res(_res,resStride, resIncr); Index kc = blocking.kc(); // cache block size along the K direction Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction @@ -520,12 +525,13 @@ struct selfadjoint_product_impl NumTraits::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)), EIGEN_LOGICAL_XOR(RhsIsUpper,internal::traits::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint, NumTraits::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)), - internal::traits::Flags&RowMajorBit ? RowMajor : ColMajor> + internal::traits::Flags&RowMajorBit ? RowMajor : ColMajor, + Dest::InnerStrideAtCompileTime> ::run( lhs.rows(), rhs.cols(), // sizes &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info - &dst.coeffRef(0,0), dst.outerStride(), // result info + &dst.coeffRef(0,0), dst.innerStride(), dst.outerStride(), // result info actualAlpha, blocking // alpha ); } diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h index 9a5318507..61396dbdf 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h @@ -44,16 +44,18 @@ namespace internal { template \ -struct product_selfadjoint_matrix \ +struct product_selfadjoint_matrix \ {\ \ static void run( \ Index rows, Index cols, \ const EIGTYPE* _lhs, Index lhsStride, \ const EIGTYPE* _rhs, Index rhsStride, \ - EIGTYPE* res, Index resStride, \ + EIGTYPE* res, Index resIncr, Index resStride, \ EIGTYPE alpha, level3_blocking& /*blocking*/) \ { \ + EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \ + eigen_assert(resIncr == 1); \ char side='L', uplo='L'; \ BlasIndex m, n, lda, ldb, ldc; \ const EIGTYPE *a, *b; \ @@ -91,15 +93,17 @@ struct product_selfadjoint_matrix \ -struct product_selfadjoint_matrix \ +struct product_selfadjoint_matrix \ {\ static void run( \ Index rows, Index cols, \ const EIGTYPE* _lhs, Index lhsStride, \ const EIGTYPE* _rhs, Index rhsStride, \ - EIGTYPE* res, Index resStride, \ + EIGTYPE* res, Index resIncr, Index resStride, \ EIGTYPE alpha, level3_blocking& /*blocking*/) \ { \ + EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \ + eigen_assert(resIncr == 1); \ char side='L', uplo='L'; \ BlasIndex m, n, lda, ldb, ldc; \ const EIGTYPE *a, *b; \ @@ -167,16 +171,18 @@ EIGEN_BLAS_HEMM_L(scomplex, float, cf, chemm_) template \ -struct product_selfadjoint_matrix \ +struct product_selfadjoint_matrix \ {\ \ static void run( \ Index rows, Index cols, \ const EIGTYPE* _lhs, Index lhsStride, \ const EIGTYPE* _rhs, Index rhsStride, \ - EIGTYPE* res, Index resStride, \ + EIGTYPE* res, Index resIncr, Index resStride, \ EIGTYPE alpha, level3_blocking& /*blocking*/) \ { \ + EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \ + eigen_assert(resIncr == 1); \ char side='R', uplo='L'; \ BlasIndex m, n, lda, ldb, ldc; \ const EIGTYPE *a, *b; \ @@ -213,15 +219,17 @@ struct product_selfadjoint_matrix \ -struct product_selfadjoint_matrix \ +struct product_selfadjoint_matrix \ {\ static void run( \ Index rows, Index cols, \ const EIGTYPE* _lhs, Index lhsStride, \ const EIGTYPE* _rhs, Index rhsStride, \ - EIGTYPE* res, Index resStride, \ + EIGTYPE* res, Index resIncr, Index resStride, \ EIGTYPE alpha, level3_blocking& /*blocking*/) \ { \ + EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \ + eigen_assert(resIncr == 1); \ char side='R', uplo='L'; \ BlasIndex m, n, lda, ldb, ldc; \ const EIGTYPE *a, *b; \ diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h index 85fd744b9..f0c60507a 100644 --- a/Eigen/src/Core/products/TriangularMatrixMatrix.h +++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h @@ -45,22 +45,24 @@ template + int ResStorageOrder, int ResInnerStride, + int Version = Specialized> struct product_triangular_matrix_matrix; template + int RhsStorageOrder, bool ConjugateRhs, + int ResInnerStride, int Version> struct product_triangular_matrix_matrix + RhsStorageOrder,ConjugateRhs,RowMajor,ResInnerStride,Version> { static EIGEN_STRONG_INLINE void run( Index rows, Index cols, Index depth, const Scalar* lhs, Index lhsStride, const Scalar* rhs, Index rhsStride, - Scalar* res, Index resStride, + Scalar* res, Index resIncr, Index resStride, const Scalar& alpha, level3_blocking& blocking) { product_triangular_matrix_matrix - ::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking); + ColMajor, ResInnerStride> + ::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resIncr, resStride, alpha, blocking); } }; // implements col-major += alpha * op(triangular) * op(general) template + int RhsStorageOrder, bool ConjugateRhs, + int ResInnerStride, int Version> struct product_triangular_matrix_matrix + RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version> { typedef gebp_traits Traits; @@ -95,20 +98,21 @@ struct product_triangular_matrix_matrix& blocking); }; template + int RhsStorageOrder, bool ConjugateRhs, + int ResInnerStride, int Version> EIGEN_DONT_INLINE void product_triangular_matrix_matrix::run( + RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>::run( Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, - Scalar* _res, Index resStride, + Scalar* _res, Index resIncr, Index resStride, const Scalar& alpha, level3_blocking& blocking) { // strip zeros @@ -119,10 +123,10 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix LhsMapper; typedef const_blas_data_mapper RhsMapper; - typedef blas_data_mapper ResMapper; + typedef blas_data_mapper ResMapper; LhsMapper lhs(_lhs,lhsStride); RhsMapper rhs(_rhs,rhsStride); - ResMapper res(_res, resStride); + ResMapper res(_res, resStride, resIncr); Index kc = blocking.kc(); // cache block size along the K direction Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction @@ -235,10 +239,11 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix + int RhsStorageOrder, bool ConjugateRhs, + int ResInnerStride, int Version> struct product_triangular_matrix_matrix + RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version> { typedef gebp_traits Traits; enum { @@ -251,20 +256,21 @@ struct product_triangular_matrix_matrix& blocking); }; template + int RhsStorageOrder, bool ConjugateRhs, + int ResInnerStride, int Version> EIGEN_DONT_INLINE void product_triangular_matrix_matrix::run( + RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>::run( Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, - Scalar* _res, Index resStride, + Scalar* _res, Index resIncr, Index resStride, const Scalar& alpha, level3_blocking& blocking) { const Index PacketBytes = packet_traits::size*sizeof(Scalar); @@ -276,10 +282,10 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix LhsMapper; typedef const_blas_data_mapper RhsMapper; - typedef blas_data_mapper ResMapper; + typedef blas_data_mapper ResMapper; LhsMapper lhs(_lhs,lhsStride); RhsMapper rhs(_rhs,rhsStride); - ResMapper res(_res, resStride); + ResMapper res(_res, resStride, resIncr); Index kc = blocking.kc(); // cache block size along the K direction Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction @@ -433,12 +439,12 @@ struct triangular_product_impl Mode, LhsIsTriangular, (internal::traits::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, (internal::traits::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, - (internal::traits::Flags&RowMajorBit) ? RowMajor : ColMajor> + (internal::traits::Flags&RowMajorBit) ? RowMajor : ColMajor, Dest::InnerStrideAtCompileTime> ::run( stripedRows, stripedCols, stripedDepth, // sizes &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info - &dst.coeffRef(0,0), dst.outerStride(), // result info + &dst.coeffRef(0,0), dst.innerStride(), dst.outerStride(), // result info actualAlpha, blocking ); diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h b/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h index 6620965a7..a98d12e4a 100644 --- a/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +++ b/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h @@ -46,7 +46,7 @@ template {}; + RhsStorageOrder, ConjugateRhs, ResStorageOrder, 1, BuiltIn> {}; // try to go to BLAS specialization @@ -55,13 +55,15 @@ template \ struct product_triangular_matrix_matrix { \ + LhsStorageOrder,ConjugateLhs, RhsStorageOrder,ConjugateRhs,ColMajor,1,Specialized> { \ static inline void run(Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride,\ - const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking& blocking) { \ + const Scalar* _rhs, Index rhsStride, Scalar* res, Index resIncr, Index resStride, Scalar alpha, level3_blocking& blocking) { \ + EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \ + eigen_assert(resIncr == 1); \ product_triangular_matrix_matrix_trmm::run( \ - _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \ + _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \ } \ }; @@ -115,8 +117,8 @@ struct product_triangular_matrix_matrix_trmm::run( \ - _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \ + LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, 1, BuiltIn>::run( \ + _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, 1, resStride, alpha, blocking); \ /*std::cout << "TRMM_L: A is not square! Go to Eigen TRMM implementation!\n";*/ \ } else { \ /* Make sense to call GEMM */ \ @@ -232,8 +234,8 @@ struct product_triangular_matrix_matrix_trmm::run( \ - _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \ + LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, 1, BuiltIn>::run( \ + _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, 1, resStride, alpha, blocking); \ /*std::cout << "TRMM_R: A is not square! Go to Eigen TRMM implementation!\n";*/ \ } else { \ /* Make sense to call GEMM */ \ diff --git a/Eigen/src/Core/products/TriangularSolverMatrix.h b/Eigen/src/Core/products/TriangularSolverMatrix.h index 8ff2e9d9d..0dcf3bb52 100644 --- a/Eigen/src/Core/products/TriangularSolverMatrix.h +++ b/Eigen/src/Core/products/TriangularSolverMatrix.h @@ -15,48 +15,48 @@ namespace Eigen { namespace internal { // if the rhs is row major, let's transpose the product -template -struct triangular_solve_matrix +template +struct triangular_solve_matrix { static void run( Index size, Index cols, const Scalar* tri, Index triStride, - Scalar* _other, Index otherStride, + Scalar* _other, Index otherIncr, Index otherStride, level3_blocking& blocking) { triangular_solve_matrix< Scalar, Index, Side==OnTheLeft?OnTheRight:OnTheLeft, (Mode&UnitDiag) | ((Mode&Upper) ? Lower : Upper), NumTraits::IsComplex && Conjugate, - TriStorageOrder==RowMajor ? ColMajor : RowMajor, ColMajor> - ::run(size, cols, tri, triStride, _other, otherStride, blocking); + TriStorageOrder==RowMajor ? ColMajor : RowMajor, ColMajor, OtherInnerStride> + ::run(size, cols, tri, triStride, _other, otherIncr, otherStride, blocking); } }; /* Optimized triangular solver with multiple right hand side and the triangular matrix on the left */ -template -struct triangular_solve_matrix +template +struct triangular_solve_matrix { static EIGEN_DONT_INLINE void run( Index size, Index otherSize, const Scalar* _tri, Index triStride, - Scalar* _other, Index otherStride, + Scalar* _other, Index otherIncr, Index otherStride, level3_blocking& blocking); }; -template -EIGEN_DONT_INLINE void triangular_solve_matrix::run( +template +EIGEN_DONT_INLINE void triangular_solve_matrix::run( Index size, Index otherSize, const Scalar* _tri, Index triStride, - Scalar* _other, Index otherStride, + Scalar* _other, Index otherIncr, Index otherStride, level3_blocking& blocking) { Index cols = otherSize; typedef const_blas_data_mapper TriMapper; - typedef blas_data_mapper OtherMapper; + typedef blas_data_mapper OtherMapper; TriMapper tri(_tri, triStride); - OtherMapper other(_other, otherStride); + OtherMapper other(_other, otherStride, otherIncr); typedef gebp_traits Traits; @@ -128,19 +128,19 @@ EIGEN_DONT_INLINE void triangular_solve_matrix -struct triangular_solve_matrix +template +struct triangular_solve_matrix { static EIGEN_DONT_INLINE void run( Index size, Index otherSize, const Scalar* _tri, Index triStride, - Scalar* _other, Index otherStride, + Scalar* _other, Index otherIncr, Index otherStride, level3_blocking& blocking); }; -template -EIGEN_DONT_INLINE void triangular_solve_matrix::run( +template +EIGEN_DONT_INLINE void triangular_solve_matrix::run( Index size, Index otherSize, const Scalar* _tri, Index triStride, - Scalar* _other, Index otherStride, + Scalar* _other, Index otherIncr, Index otherStride, level3_blocking& blocking) { Index rows = otherSize; typedef typename NumTraits::Real RealScalar; - typedef blas_data_mapper LhsMapper; + typedef blas_data_mapper LhsMapper; typedef const_blas_data_mapper RhsMapper; - LhsMapper lhs(_other, otherStride); + LhsMapper lhs(_other, otherStride, otherIncr); RhsMapper rhs(_tri, triStride); typedef gebp_traits Traits; @@ -297,24 +297,24 @@ EIGEN_DONT_INLINE void triangular_solve_matrix \ -struct triangular_solve_matrix \ +struct triangular_solve_matrix \ { \ enum { \ IsLower = (Mode&Lower) == Lower, \ @@ -51,8 +51,10 @@ struct triangular_solve_matrix& /*blocking*/) \ + EIGTYPE* _other, Index otherIncr, Index otherStride, level3_blocking& /*blocking*/) \ { \ + EIGEN_ONLY_USED_FOR_DEBUG(otherIncr); \ + eigen_assert(otherIncr == 1); \ BlasIndex m = convert_index(size), n = convert_index(otherSize), lda, ldb; \ char side = 'L', uplo, diag='N', transa; \ /* Set alpha_ */ \ @@ -99,7 +101,7 @@ EIGEN_BLAS_TRSM_L(scomplex, float, ctrsm_) // implements RightSide general * op(triangular)^-1 #define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASFUNC) \ template \ -struct triangular_solve_matrix \ +struct triangular_solve_matrix \ { \ enum { \ IsLower = (Mode&Lower) == Lower, \ @@ -110,8 +112,10 @@ struct triangular_solve_matrix& /*blocking*/) \ + EIGTYPE* _other, Index otherIncr, Index otherStride, level3_blocking& /*blocking*/) \ { \ + EIGEN_ONLY_USED_FOR_DEBUG(otherIncr); \ + eigen_assert(otherIncr == 1); \ BlasIndex m = convert_index(otherSize), n = convert_index(size), lda, ldb; \ char side = 'R', uplo, diag='N', transa; \ /* Set alpha_ */ \ diff --git a/blas/level3_impl.h b/blas/level3_impl.h index 1b8ad7b26..6dd6338b4 100644 --- a/blas/level3_impl.h +++ b/blas/level3_impl.h @@ -79,63 +79,63 @@ int EIGEN_BLAS_FUNC(trsm)(const char *side, const char *uplo, const char *opa, c const RealScalar *palpha, const RealScalar *pa, const int *lda, RealScalar *pb, const int *ldb) { // std::cerr << "in trsm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << "," << *n << " " << *palpha << " " << *lda << " " << *ldb<< "\n"; - typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, internal::level3_blocking&); + typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, DenseIndex, internal::level3_blocking&); static const functype func[32] = { // array index: NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4) - (internal::triangular_solve_matrix::run), + (internal::triangular_solve_matrix::run), // array index: TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4) - (internal::triangular_solve_matrix::run), + (internal::triangular_solve_matrix::run), // array index: ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4) - (internal::triangular_solve_matrix::run),\ + (internal::triangular_solve_matrix::run),\ 0, // array index: NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4) - (internal::triangular_solve_matrix::run), + (internal::triangular_solve_matrix::run), // array index: TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4) - (internal::triangular_solve_matrix::run), + (internal::triangular_solve_matrix::run), // array index: ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4) - (internal::triangular_solve_matrix::run), + (internal::triangular_solve_matrix::run), 0, // array index: NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4) - (internal::triangular_solve_matrix::run), + (internal::triangular_solve_matrix::run), // array index: TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4) - (internal::triangular_solve_matrix::run), + (internal::triangular_solve_matrix::run), // array index: ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4) - (internal::triangular_solve_matrix::run), + (internal::triangular_solve_matrix::run), 0, // array index: NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4) - (internal::triangular_solve_matrix::run), + (internal::triangular_solve_matrix::run), // array index: TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4) - (internal::triangular_solve_matrix::run), + (internal::triangular_solve_matrix::run), // array index: ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4) - (internal::triangular_solve_matrix::run), + (internal::triangular_solve_matrix::run), 0, // array index: NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4) - (internal::triangular_solve_matrix::run), + (internal::triangular_solve_matrix::run), // array index: TR | (LEFT << 2) | (UP << 3) | (UNIT << 4) - (internal::triangular_solve_matrix::run), + (internal::triangular_solve_matrix::run), // array index: ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4) - (internal::triangular_solve_matrix::run), + (internal::triangular_solve_matrix::run), 0, // array index: NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4) - (internal::triangular_solve_matrix::run), + (internal::triangular_solve_matrix::run), // array index: TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4) - (internal::triangular_solve_matrix::run), + (internal::triangular_solve_matrix::run), // array index: ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4) - (internal::triangular_solve_matrix::run), + (internal::triangular_solve_matrix::run), 0, // array index: NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4) - (internal::triangular_solve_matrix::run), + (internal::triangular_solve_matrix::run), // array index: TR | (LEFT << 2) | (LO << 3) | (UNIT << 4) - (internal::triangular_solve_matrix::run), + (internal::triangular_solve_matrix::run), // array index: ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4) - (internal::triangular_solve_matrix::run), + (internal::triangular_solve_matrix::run), 0, // array index: NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4) - (internal::triangular_solve_matrix::run), + (internal::triangular_solve_matrix::run), // array index: TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4) - (internal::triangular_solve_matrix::run), + (internal::triangular_solve_matrix::run), // array index: ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4) - (internal::triangular_solve_matrix::run), + (internal::triangular_solve_matrix::run), 0 }; @@ -163,12 +163,12 @@ int EIGEN_BLAS_FUNC(trsm)(const char *side, const char *uplo, const char *opa, c if(SIDE(*side)==LEFT) { internal::gemm_blocking_space blocking(*m,*n,*m,1,false); - func[code](*m, *n, a, *lda, b, *ldb, blocking); + func[code](*m, *n, a, *lda, b, 1, *ldb, blocking); } else { internal::gemm_blocking_space blocking(*m,*n,*n,1,false); - func[code](*n, *m, a, *lda, b, *ldb, blocking); + func[code](*n, *m, a, *lda, b, 1, *ldb, blocking); } if(alpha!=Scalar(1)) @@ -184,63 +184,63 @@ int EIGEN_BLAS_FUNC(trmm)(const char *side, const char *uplo, const char *opa, c const RealScalar *palpha, const RealScalar *pa, const int *lda, RealScalar *pb, const int *ldb) { // std::cerr << "in trmm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << " " << *n << " " << *lda << " " << *ldb << " " << *palpha << "\n"; - typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&, internal::level3_blocking&); + typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, DenseIndex, const Scalar&, internal::level3_blocking&); static const functype func[32] = { // array index: NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4) - (internal::product_triangular_matrix_matrix::run), + (internal::product_triangular_matrix_matrix::run), // array index: TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4) - (internal::product_triangular_matrix_matrix::run), + (internal::product_triangular_matrix_matrix::run), // array index: ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4) - (internal::product_triangular_matrix_matrix::run), + (internal::product_triangular_matrix_matrix::run), 0, // array index: NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4) - (internal::product_triangular_matrix_matrix::run), + (internal::product_triangular_matrix_matrix::run), // array index: TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4) - (internal::product_triangular_matrix_matrix::run), + (internal::product_triangular_matrix_matrix::run), // array index: ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4) - (internal::product_triangular_matrix_matrix::run), + (internal::product_triangular_matrix_matrix::run), 0, // array index: NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4) - (internal::product_triangular_matrix_matrix::run), + (internal::product_triangular_matrix_matrix::run), // array index: TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4) - (internal::product_triangular_matrix_matrix::run), + (internal::product_triangular_matrix_matrix::run), // array index: ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4) - (internal::product_triangular_matrix_matrix::run), + (internal::product_triangular_matrix_matrix::run), 0, // array index: NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4) - (internal::product_triangular_matrix_matrix::run), + (internal::product_triangular_matrix_matrix::run), // array index: TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4) - (internal::product_triangular_matrix_matrix::run), + (internal::product_triangular_matrix_matrix::run), // array index: ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4) - (internal::product_triangular_matrix_matrix::run), + (internal::product_triangular_matrix_matrix::run), 0, // array index: NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4) - (internal::product_triangular_matrix_matrix::run), + (internal::product_triangular_matrix_matrix::run), // array index: TR | (LEFT << 2) | (UP << 3) | (UNIT << 4) - (internal::product_triangular_matrix_matrix::run), + (internal::product_triangular_matrix_matrix::run), // array index: ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4) - (internal::product_triangular_matrix_matrix::run), + (internal::product_triangular_matrix_matrix::run), 0, // array index: NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4) - (internal::product_triangular_matrix_matrix::run), + (internal::product_triangular_matrix_matrix::run), // array index: TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4) - (internal::product_triangular_matrix_matrix::run), + (internal::product_triangular_matrix_matrix::run), // array index: ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4) - (internal::product_triangular_matrix_matrix::run), + (internal::product_triangular_matrix_matrix::run), 0, // array index: NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4) - (internal::product_triangular_matrix_matrix::run), + (internal::product_triangular_matrix_matrix::run), // array index: TR | (LEFT << 2) | (LO << 3) | (UNIT << 4) - (internal::product_triangular_matrix_matrix::run), + (internal::product_triangular_matrix_matrix::run), // array index: ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4) - (internal::product_triangular_matrix_matrix::run), + (internal::product_triangular_matrix_matrix::run), 0, // array index: NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4) - (internal::product_triangular_matrix_matrix::run), + (internal::product_triangular_matrix_matrix::run), // array index: TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4) - (internal::product_triangular_matrix_matrix::run), + (internal::product_triangular_matrix_matrix::run), // array index: ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4) - (internal::product_triangular_matrix_matrix::run), + (internal::product_triangular_matrix_matrix::run), 0 }; @@ -272,12 +272,12 @@ int EIGEN_BLAS_FUNC(trmm)(const char *side, const char *uplo, const char *opa, c if(SIDE(*side)==LEFT) { internal::gemm_blocking_space blocking(*m,*n,*m,1,false); - func[code](*m, *n, *m, a, *lda, tmp.data(), tmp.outerStride(), b, *ldb, alpha, blocking); + func[code](*m, *n, *m, a, *lda, tmp.data(), tmp.outerStride(), b, 1, *ldb, alpha, blocking); } else { internal::gemm_blocking_space blocking(*m,*n,*n,1,false); - func[code](*m, *n, *n, tmp.data(), tmp.outerStride(), a, *lda, b, *ldb, alpha, blocking); + func[code](*m, *n, *n, tmp.data(), tmp.outerStride(), a, *lda, b, 1, *ldb, alpha, blocking); } return 1; } @@ -338,12 +338,12 @@ int EIGEN_BLAS_FUNC(symm)(const char *side, const char *uplo, const int *m, cons internal::gemm_blocking_space blocking(*m,*n,size,1,false); if(SIDE(*side)==LEFT) - if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking); - else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking); + if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha, blocking); + else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha, blocking); else return 0; else if(SIDE(*side)==RIGHT) - if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking); - else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking); + if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha, blocking); + else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha, blocking); else return 0; else return 0; @@ -537,18 +537,18 @@ int EIGEN_BLAS_FUNC(hemm)(const char *side, const char *uplo, const int *m, cons if(SIDE(*side)==LEFT) { - if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix - ::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking); - else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix - ::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking); + if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix + ::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha, blocking); + else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix + ::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha, blocking); else return 0; } else if(SIDE(*side)==RIGHT) { - if(UPLO(*uplo)==UP) matrix(c,*m,*n,*ldc) += alpha * matrix(b,*m,*n,*ldb) * matrix(a,*n,*n,*lda).selfadjointView();/*internal::product_selfadjoint_matrix - ::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking);*/ - else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix - ::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking); + if(UPLO(*uplo)==UP) matrix(c,*m,*n,*ldc) += alpha * matrix(b,*m,*n,*ldb) * matrix(a,*n,*n,*lda).selfadjointView();/*internal::product_selfadjoint_matrix + ::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha, blocking);*/ + else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix + ::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha, blocking); else return 0; } else diff --git a/test/product_symm.cpp b/test/product_symm.cpp index 7d786467d..ea8d4d5cf 100644 --- a/test/product_symm.cpp +++ b/test/product_symm.cpp @@ -75,12 +75,12 @@ template void symm(int size = Size, in rhs13 = (s1*m1.adjoint()) * (s2*rhs2.adjoint())); // test row major = <...> - m2 = m1.template triangularView(); rhs12.setRandom(); rhs13 = rhs12; - VERIFY_IS_APPROX(rhs12 -= (s1*m2).template selfadjointView() * (s2*rhs3), + m2 = m1.template triangularView(); rhs32.setRandom(); rhs13 = rhs32; + VERIFY_IS_APPROX(rhs32.noalias() -= (s1*m2).template selfadjointView() * (s2*rhs3), rhs13 -= (s1*m1) * (s2 * rhs3)); m2 = m1.template triangularView(); - VERIFY_IS_APPROX(rhs12 = (s1*m2.adjoint()).template selfadjointView() * (s2*rhs3).conjugate(), + VERIFY_IS_APPROX(rhs32.noalias() = (s1*m2.adjoint()).template selfadjointView() * (s2*rhs3).conjugate(), rhs13 = (s1*m1.adjoint()) * (s2*rhs3).conjugate()); @@ -92,6 +92,20 @@ template void symm(int size = Size, in VERIFY_IS_APPROX(rhs22 = (rhs2) * (m2).template selfadjointView(), rhs23 = (rhs2) * (m1)); VERIFY_IS_APPROX(rhs22 = (s2*rhs2) * (s1*m2).template selfadjointView(), rhs23 = (s2*rhs2) * (s1*m1)); + // destination with a non-default inner-stride + // see bug 1741 + { + typedef Matrix MatrixX; + MatrixX buffer(2*cols,2*othersize); + Map > map1(buffer.data(),cols,othersize,Stride(2*rows,2)); + buffer.setZero(); + VERIFY_IS_APPROX( map1.noalias() = (s1*m2).template selfadjointView() * (s2*rhs1), + rhs13 = (s1*m1) * (s2*rhs1)); + + Map > map2(buffer.data(),rhs22.rows(),rhs22.cols(),Stride(2*rhs22.outerStride(),2)); + buffer.setZero(); + VERIFY_IS_APPROX(map2 = (rhs2) * (m2).template selfadjointView(), rhs23 = (rhs2) * (m1)); + } } EIGEN_DECLARE_TEST(product_symm) diff --git a/test/product_trmm.cpp b/test/product_trmm.cpp index c7594e512..2bb4b9e47 100644 --- a/test/product_trmm.cpp +++ b/test/product_trmm.cpp @@ -76,8 +76,18 @@ void trmm(int rows=get_random_size(), VERIFY_IS_APPROX( ge_xs = (s1*mat).adjoint().template triangularView() * ge_left.adjoint(), numext::conj(s1) * triTr.conjugate() * ge_left.adjoint()); VERIFY_IS_APPROX( ge_xs = (s1*mat).transpose().template triangularView() * ge_left.adjoint(), s1triTr * ge_left.adjoint()); - // TODO check with sub-matrix expressions ? + + // destination with a non-default inner-stride + // see bug 1741 + { + VERIFY_IS_APPROX( ge_xs.noalias() = mat.template triangularView() * ge_right, tri * ge_right); + typedef Matrix MatrixX; + MatrixX buffer(2*ge_xs.rows(),2*ge_xs.cols()); + Map > map1(buffer.data(),ge_xs.rows(),ge_xs.cols(),Stride(2*ge_xs.outerStride(),2)); + buffer.setZero(); + VERIFY_IS_APPROX( map1.noalias() = mat.template triangularView() * ge_right, tri * ge_right); + } } template diff --git a/test/product_trsolve.cpp b/test/product_trsolve.cpp index c927cb635..c59748c5b 100644 --- a/test/product_trsolve.cpp +++ b/test/product_trsolve.cpp @@ -72,6 +72,19 @@ template void trsolve(int size=Size,int cols VERIFY_TRSM(rmLhs.template triangularView(), rmRhs.col(c)); VERIFY_TRSM(cmLhs.template triangularView(), rmRhs.col(c)); + // destination with a non-default inner-stride + // see bug 1741 + { + typedef Matrix MatrixX; + MatrixX buffer(2*cmRhs.rows(),2*cmRhs.cols()); + Map,0,Stride > map1(buffer.data(),cmRhs.rows(),cmRhs.cols(),Stride(2*cmRhs.outerStride(),2)); + Map,0,Stride > map2(buffer.data(),rmRhs.rows(),rmRhs.cols(),Stride(2*rmRhs.outerStride(),2)); + buffer.setZero(); + VERIFY_TRSM(cmLhs.conjugate().template triangularView(), map1); + buffer.setZero(); + VERIFY_TRSM(cmLhs .template triangularView(), map2); + } + if(Size==Dynamic) { cmLhs.resize(0,0); -- cgit v1.2.3