diff options
-rw-r--r-- | Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h | 44 | ||||
-rw-r--r-- | Eigen/src/Core/products/SelfadjointProduct.h | 24 | ||||
-rw-r--r-- | test/nomalloc.cpp | 9 |
3 files changed, 57 insertions, 20 deletions
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h index a36eb2fe0..f80f3b410 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h @@ -42,13 +42,14 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder, { typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride, - const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha) + const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, + const ResScalar& alpha, level3_blocking<LhsScalar,RhsScalar>& blocking) { general_matrix_matrix_triangular_product<Index, RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs, LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs, ColMajor, UpLo==Lower?Upper:Lower> - ::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha); + ::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha,blocking); } }; @@ -58,7 +59,8 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder, { typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride, - const RhsScalar* _rhs, Index rhsStride, ResScalar* _res, Index resStride, const ResScalar& alpha) + const RhsScalar* _rhs, Index rhsStride, ResScalar* _res, Index resStride, + const ResScalar& alpha, level3_blocking<LhsScalar,RhsScalar>& blocking) { typedef gebp_traits<LhsScalar,RhsScalar> Traits; @@ -73,12 +75,20 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder, Index mc = size; // cache block size along the M direction Index nc = size; // cache block size along the N direction computeProductBlockingSizes<LhsScalar,RhsScalar>(kc, mc, nc, 1); + + kc = blocking.kc(); + mc = (std::min)(size,blocking.mc()); + nc = (std::min)(size,blocking.nc()); + // !!! mc must be a multiple of nr: if(mc > Traits::nr) mc = (mc/Traits::nr)*Traits::nr; - ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, kc*mc, 0); - ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, kc*size, 0); + std::size_t sizeA = kc*mc; + std::size_t sizeB = kc*size; + + ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA()); + ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB()); gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs; @@ -256,13 +266,27 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false> typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived()); + enum { + IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0, + LhsIsRowMajor = _ActualLhs::Flags&RowMajorBit ? 1 : 0, + RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0 + }; + + Index size = mat.cols(); + Index depth = actualLhs.cols(); + + typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,typename Lhs::Scalar,typename Rhs::Scalar, + MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime, _ActualRhs::MaxColsAtCompileTime> BlockingType; + + BlockingType blocking(size, size, depth, 1, false); + internal::general_matrix_matrix_triangular_product<Index, - typename Lhs::Scalar, _ActualLhs::Flags&RowMajorBit ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, - typename Rhs::Scalar, _ActualRhs::Flags&RowMajorBit ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, - MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo> - ::run(mat.cols(), actualLhs.cols(), + typename Lhs::Scalar, LhsIsRowMajor ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, + typename Rhs::Scalar, RhsIsRowMajor ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, + IsRowMajor ? RowMajor : ColMajor, UpLo> + ::run(size, depth, &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(), - mat.data(), mat.outerStride(), actualAlpha); + mat.data(), mat.outerStride(), actualAlpha, blocking); } }; diff --git a/Eigen/src/Core/products/SelfadjointProduct.h b/Eigen/src/Core/products/SelfadjointProduct.h index 2af00058d..f038d686f 100644 --- a/Eigen/src/Core/products/SelfadjointProduct.h +++ b/Eigen/src/Core/products/SelfadjointProduct.h @@ -92,15 +92,27 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false> Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived()); - enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 }; + enum { + IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0, + OtherIsRowMajor = _ActualOtherType::Flags&RowMajorBit ? 1 : 0 + }; + + Index size = mat.cols(); + Index depth = actualOther.cols(); + + typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,Scalar,Scalar, + MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime, _ActualOtherType::MaxColsAtCompileTime> BlockingType; + + BlockingType blocking(size, size, depth, 1, false); + internal::general_matrix_matrix_triangular_product<Index, - Scalar, _ActualOtherType::Flags&RowMajorBit ? RowMajor : ColMajor, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex, - Scalar, _ActualOtherType::Flags&RowMajorBit ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex, - MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo> - ::run(mat.cols(), actualOther.cols(), + Scalar, OtherIsRowMajor ? RowMajor : ColMajor, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex, + Scalar, OtherIsRowMajor ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex, + IsRowMajor ? RowMajor : ColMajor, UpLo> + ::run(size, depth, &actualOther.coeffRef(0,0), actualOther.outerStride(), &actualOther.coeffRef(0,0), actualOther.outerStride(), - mat.data(), mat.outerStride(), actualAlpha); + mat.data(), mat.outerStride(), actualAlpha, blocking); } }; diff --git a/test/nomalloc.cpp b/test/nomalloc.cpp index 060276a20..d85e9e5bc 100644 --- a/test/nomalloc.cpp +++ b/test/nomalloc.cpp @@ -81,11 +81,12 @@ template<typename MatrixType> void nomalloc(const MatrixType& m) m2.template selfadjointView<Lower>().rankUpdate(m1.row(0),-1); // The following fancy matrix-matrix products are not safe yet regarding static allocation -// m1 += m1.template triangularView<Upper>() * m2.col(; -// m1.template selfadjointView<Lower>().rankUpdate(m2); -// m1 += m1.template triangularView<Upper>() * m2; +// m1.col(1) += m1.template triangularView<Upper>() * m2.col(0); + m2.template selfadjointView<Lower>().rankUpdate(m1); + m2 += m2.template triangularView<Upper>() * m1; + m2.template triangularView<Upper>() = m2 * m2; // m1 += m1.template selfadjointView<Lower>() * m2; -// VERIFY_IS_APPROX(m1,m1); + VERIFY_IS_APPROX(m2,m2); } template<typename Scalar> |