diff options
author | Gael Guennebaud <g.gael@free.fr> | 2010-11-10 18:58:55 +0100 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2010-11-10 18:58:55 +0100 |
commit | 2577ef90c027d8bad4dd632022fa65cec6313159 (patch) | |
tree | 69b63c743bcc79daa4af6ab7df29c3dac6e1e034 | |
parent | c810d14d4d595f6a9e748f625ddb419bb91b8262 (diff) |
generalize our internal rank K update routine to support more general A*B product while evaluating only one triangular part and make it available via, e.g.:
R.triangularView<Lower>() += s * A * B;
-rw-r--r-- | Eigen/Core | 1 | ||||
-rw-r--r-- | Eigen/src/Core/TriangularMatrix.h | 50 | ||||
-rw-r--r-- | Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h | 225 | ||||
-rw-r--r-- | Eigen/src/Core/products/SelfadjointProduct.h | 176 | ||||
-rw-r--r-- | test/CMakeLists.txt | 1 | ||||
-rw-r--r-- | test/product_mmtr.cpp | 80 |
6 files changed, 360 insertions, 173 deletions
diff --git a/Eigen/Core b/Eigen/Core index 5e81d10d1..48a5cb2c1 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -308,6 +308,7 @@ using std::size_t; #include "src/Core/products/GeneralBlockPanelKernel.h" #include "src/Core/products/GeneralMatrixVector.h" #include "src/Core/products/GeneralMatrixMatrix.h" +#include "src/Core/products/GeneralMatrixMatrixTriangular.h" #include "src/Core/products/SelfadjointMatrixVector.h" #include "src/Core/products/SelfadjointMatrixMatrix.h" #include "src/Core/products/SelfadjointProduct.h" diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h index e361af643..ba0f2741e 100644 --- a/Eigen/src/Core/TriangularMatrix.h +++ b/Eigen/src/Core/TriangularMatrix.h @@ -189,9 +189,9 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView inline Index innerStride() const { return m_matrix.innerStride(); } /** \sa MatrixBase::operator+=() */ - template<typename Other> TriangularView& operator+=(const Other& other) { return *this = m_matrix + other; } + template<typename Other> TriangularView& operator+=(const DenseBase<Other>& other) { return *this = m_matrix + other.derived(); } /** \sa MatrixBase::operator-=() */ - template<typename Other> TriangularView& operator-=(const Other& other) { return *this = m_matrix - other; } + template<typename Other> TriangularView& operator-=(const DenseBase<Other>& other) { return *this = m_matrix - other.derived(); } /** \sa MatrixBase::operator*=() */ TriangularView& operator*=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = m_matrix * other; } /** \sa MatrixBase::operator/=() */ @@ -292,7 +292,6 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView (lhs.derived(),rhs.m_matrix); } - template<int Side, typename OtherDerived> typename internal::plain_matrix_type_column_major<OtherDerived>::type solve(const MatrixBase<OtherDerived>& other) const; @@ -341,8 +340,51 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView else return m_matrix.diagonal().prod(); } - + + // TODO simplify the following: + template<typename ProductDerived, typename Lhs, typename Rhs> + EIGEN_STRONG_INLINE TriangularView& operator=(const ProductBase<ProductDerived, Lhs,Rhs>& other) + { + setZero(); + return assignProduct(other,1); + } + + template<typename ProductDerived, typename Lhs, typename Rhs> + EIGEN_STRONG_INLINE TriangularView& operator+=(const ProductBase<ProductDerived, Lhs,Rhs>& other) + { + return assignProduct(other,1); + } + + template<typename ProductDerived, typename Lhs, typename Rhs> + EIGEN_STRONG_INLINE TriangularView& operator-=(const ProductBase<ProductDerived, Lhs,Rhs>& other) + { + return assignProduct(other,-1); + } + + + template<typename ProductDerived> + EIGEN_STRONG_INLINE TriangularView& operator=(const ScaledProduct<ProductDerived>& other) + { + setZero(); + return assignProduct(other,other.alpha()); + } + + template<typename ProductDerived> + EIGEN_STRONG_INLINE TriangularView& operator+=(const ScaledProduct<ProductDerived>& other) + { + return assignProduct(other,other.alpha()); + } + + template<typename ProductDerived> + EIGEN_STRONG_INLINE TriangularView& operator-=(const ScaledProduct<ProductDerived>& other) + { + return assignProduct(other,-other.alpha()); + } + protected: + + template<typename ProductDerived, typename Lhs, typename Rhs> + EIGEN_STRONG_INLINE TriangularView& assignProduct(const ProductBase<ProductDerived, Lhs,Rhs>& prod, const Scalar& alpha); const MatrixTypeNested m_matrix; }; diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h new file mode 100644 index 000000000..c29ea90fd --- /dev/null +++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h @@ -0,0 +1,225 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see <http://www.gnu.org/licenses/>. + +#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H +#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H + +namespace internal { + +/********************************************************************** +* This file implements a general A * B product while +* evaluating only one triangular part of the product. +* This is more general version of self adjoint product (C += A A^T) +* as the level 3 SYRK Blas routine. +**********************************************************************/ + +// forward declarations (defined at the end of this file) +template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo> +struct tribb_kernel; + +/* Optimized matrix-matrix product evaluating only one triangular half */ +template <typename Index, + typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, + typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, + int ResStorageOrder, int UpLo> +struct general_matrix_matrix_triangular_product; + +// as usual if the result is row major => we transpose the product +template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, + typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo> +struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo> +{ + typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; + static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride, + const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha) + { + general_matrix_matrix_triangular_product<Index, + RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs, + LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs, + ColMajor, UpLo==Lower?Upper:Lower> + ::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha); + } +}; + +template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, + typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo> +struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo> +{ + typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; + static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride, + const RhsScalar* _rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha) + { + const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride); + const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride); + + typedef gebp_traits<LhsScalar,RhsScalar> Traits; + + Index kc = depth; // cache block size along the K direction + Index mc = size; // cache block size along the M direction + Index nc = size; // cache block size along the N direction + computeProductBlockingSizes<LhsScalar,RhsScalar>(kc, mc, nc); + // !!! mc must be a multiple of nr: + if(mc > Traits::nr) + mc = (mc/Traits::nr)*Traits::nr; + + LhsScalar* blockA = ei_aligned_stack_new(LhsScalar, kc*mc); + std::size_t sizeW = kc*Traits::WorkSpaceFactor; + std::size_t sizeB = sizeW + kc*size; + RhsScalar* allocatedBlockB = ei_aligned_stack_new(RhsScalar, sizeB); + RhsScalar* blockB = allocatedBlockB + sizeW; + + gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; + gemm_pack_rhs<RhsScalar, Index, Traits::nr, RhsStorageOrder> pack_rhs; + gebp_kernel <LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp; + tribb_kernel<LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs, UpLo> sybb; + + for(Index k2=0; k2<depth; k2+=kc) + { + const Index actual_kc = std::min(k2+kc,depth)-k2; + + // note that the actual rhs is the transpose/adjoint of mat + pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, size); + + for(Index i2=0; i2<size; i2+=mc) + { + const Index actual_mc = std::min(i2+mc,size)-i2; + + pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc); + + // the selected actual_mc * size panel of res is split into three different part: + // 1 - before the diagonal => processed with gebp or skipped + // 2 - the actual_mc x actual_mc symmetric block => processed with a special kernel + // 3 - after the diagonal => processed with gebp or skipped + if (UpLo==Lower) + gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, std::min(size,i2), alpha, + -1, -1, 0, 0, allocatedBlockB); + + sybb(res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha, allocatedBlockB); + + if (UpLo==Upper) + { + Index j2 = i2+actual_mc; + gebp(res+resStride*j2+i2, resStride, blockA, blockB+actual_kc*j2, actual_mc, actual_kc, std::max(Index(0), size-j2), alpha, + -1, -1, 0, 0, allocatedBlockB); + } + } + } + ei_aligned_stack_delete(LhsScalar, blockA, kc*mc); + ei_aligned_stack_delete(RhsScalar, allocatedBlockB, sizeB); + } +}; + +// Optimized packed Block * packed Block product kernel evaluating only one given triangular part +// This kernel is built on top of the gebp kernel: +// - the current destination block is processed per panel of actual_mc x BlockSize +// where BlockSize is set to the minimal value allowing gebp to be as fast as possible +// - then, as usual, each panel is split into three parts along the diagonal, +// the sub blocks above and below the diagonal are processed as usual, +// while the triangular block overlapping the diagonal is evaluated into a +// small temporary buffer which is then accumulated into the result using a +// triangular traversal. +template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo> +struct tribb_kernel +{ + typedef gebp_traits<LhsScalar,RhsScalar,ConjLhs,ConjRhs> Traits; + typedef typename Traits::ResScalar ResScalar; + + enum { + BlockSize = EIGEN_PLAIN_ENUM_MAX(mr,nr) + }; + void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, ResScalar alpha, RhsScalar* workspace) + { + gebp_kernel<LhsScalar, RhsScalar, Index, mr, nr, ConjLhs, ConjRhs> gebp_kernel; + Matrix<ResScalar,BlockSize,BlockSize,ColMajor> buffer; + + // let's process the block per panel of actual_mc x BlockSize, + // again, each is split into three parts, etc. + for (Index j=0; j<size; j+=BlockSize) + { + Index actualBlockSize = std::min<Index>(BlockSize,size - j); + const RhsScalar* actual_b = blockB+j*depth; + + if(UpLo==Upper) + gebp_kernel(res+j*resStride, resStride, blockA, actual_b, j, depth, actualBlockSize, alpha, + -1, -1, 0, 0, workspace); + + // selfadjoint micro block + { + Index i = j; + buffer.setZero(); + // 1 - apply the kernel on the temporary buffer + gebp_kernel(buffer.data(), BlockSize, blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha, + -1, -1, 0, 0, workspace); + // 2 - triangular accumulation + for(Index j1=0; j1<actualBlockSize; ++j1) + { + ResScalar* r = res + (j+j1)*resStride + i; + for(Index i1=UpLo==Lower ? j1 : 0; + UpLo==Lower ? i1<actualBlockSize : i1<=j1; ++i1) + r[i1] += buffer(i1,j1); + } + } + + if(UpLo==Lower) + { + Index i = j+actualBlockSize; + gebp_kernel(res+j*resStride+i, resStride, blockA+depth*i, actual_b, size-i, depth, actualBlockSize, alpha, + -1, -1, 0, 0, workspace); + } + } + } +}; + +} // end namespace internal + +// high level API + +template<typename MatrixType, unsigned int UpLo> +template<typename ProductDerived, typename Lhs, typename Rhs> +TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct(const ProductBase<ProductDerived, Lhs,Rhs>& prod, const Scalar& alpha) +{ + typedef internal::blas_traits<Lhs> LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs; + typedef typename internal::remove_all<ActualLhs>::type _ActualLhs; + const ActualLhs actualLhs = LhsBlasTraits::extract(prod.lhs()); + + typedef internal::blas_traits<Rhs> RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs; + typedef typename internal::remove_all<ActualRhs>::type _ActualRhs; + const ActualRhs actualRhs = RhsBlasTraits::extract(prod.rhs()); + + typename ProductDerived::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived()); + + internal::general_matrix_matrix_triangular_product<Index, + typename Lhs::Scalar, _ActualLhs::Flags&RowMajorBit ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, + typename Rhs::Scalar, _ActualRhs::Flags&RowMajorBit ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, + MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo> + ::run(m_matrix.cols(), actualLhs.cols(), + &actualLhs.coeff(0,0), actualLhs.outerStride(), &actualRhs.coeff(0,0), actualRhs.outerStride(), + const_cast<Scalar*>(m_matrix.data()), m_matrix.outerStride(), actualAlpha); + + return *this; +} + +#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H diff --git a/Eigen/src/Core/products/SelfadjointProduct.h b/Eigen/src/Core/products/SelfadjointProduct.h index 16320fa17..c6a5287b5 100644 --- a/Eigen/src/Core/products/SelfadjointProduct.h +++ b/Eigen/src/Core/products/SelfadjointProduct.h @@ -25,175 +25,12 @@ #ifndef EIGEN_SELFADJOINT_PRODUCT_H #define EIGEN_SELFADJOINT_PRODUCT_H -namespace internal { - /********************************************************************** * This file implements a self adjoint product: C += A A^T updating only * half of the selfadjoint matrix C. * It corresponds to the level 3 SYRK Blas routine. **********************************************************************/ -// forward declarations (defined at the end of this file) -template<typename Scalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo> -struct sybb_kernel; - -/* Optimized selfadjoint product (_SYRK) */ -template <typename Scalar, typename Index, - int RhsStorageOrder, - int ResStorageOrder, bool AAT, int UpLo> -struct selfadjoint_product; - -// as usual if the result is row major => we transpose the product -template <typename Scalar, typename Index, int MatStorageOrder, bool AAT, int UpLo> -struct selfadjoint_product<Scalar, Index, MatStorageOrder, RowMajor, AAT, UpLo> -{ - static EIGEN_STRONG_INLINE void run(Index size, Index depth, const Scalar* mat, Index matStride, Scalar* res, Index resStride, Scalar alpha) - { - selfadjoint_product<Scalar, Index, MatStorageOrder, ColMajor, !AAT, UpLo==Lower?Upper:Lower> - ::run(size, depth, mat, matStride, res, resStride, alpha); - } -}; - -template <typename Scalar, typename Index, - int MatStorageOrder, bool AAT, int UpLo> -struct selfadjoint_product<Scalar, Index, MatStorageOrder, ColMajor, AAT, UpLo> -{ - - static EIGEN_DONT_INLINE void run( - Index size, Index depth, - const Scalar* _mat, Index matStride, - Scalar* res, Index resStride, - Scalar alpha) - { - const_blas_data_mapper<Scalar, Index, MatStorageOrder> mat(_mat,matStride); - -// if(AAT) -// alpha = conj(alpha); - - typedef gebp_traits<Scalar,Scalar> Traits; - - Index kc = depth; // cache block size along the K direction - Index mc = size; // cache block size along the M direction - Index nc = size; // cache block size along the N direction - computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc); - // !!! mc must be a multiple of nr: - if(mc>Traits::nr) - mc = (mc/Traits::nr)*Traits::nr; - - Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc); - std::size_t sizeW = kc*Traits::WorkSpaceFactor; - std::size_t sizeB = sizeW + kc*size; - Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB); - Scalar* blockB = allocatedBlockB + sizeW; - - // note that the actual rhs is the transpose/adjoint of mat - enum { - ConjLhs = NumTraits<Scalar>::IsComplex && !AAT, - ConjRhs = NumTraits<Scalar>::IsComplex && AAT - }; - - gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjLhs, ConjRhs> gebp_kernel; - gemm_pack_rhs<Scalar, Index, Traits::nr,MatStorageOrder==RowMajor ? ColMajor : RowMajor> pack_rhs; - gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, MatStorageOrder, false> pack_lhs; - sybb_kernel<Scalar, Index, Traits::mr, Traits::nr, ConjLhs, ConjRhs, UpLo> sybb; - - for(Index k2=0; k2<depth; k2+=kc) - { - const Index actual_kc = std::min(k2+kc,depth)-k2; - - // note that the actual rhs is the transpose/adjoint of mat - pack_rhs(blockB, &mat(0,k2), matStride, actual_kc, size); - - for(Index i2=0; i2<size; i2+=mc) - { - const Index actual_mc = std::min(i2+mc,size)-i2; - - pack_lhs(blockA, &mat(i2, k2), matStride, actual_kc, actual_mc); - - // the selected actual_mc * size panel of res is split into three different part: - // 1 - before the diagonal => processed with gebp or skipped - // 2 - the actual_mc x actual_mc symmetric block => processed with a special kernel - // 3 - after the diagonal => processed with gebp or skipped - if (UpLo==Lower) - gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, std::min(size,i2), alpha, - -1, -1, 0, 0, allocatedBlockB); - - sybb(res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha, allocatedBlockB); - - if (UpLo==Upper) - { - Index j2 = i2+actual_mc; - gebp_kernel(res+resStride*j2+i2, resStride, blockA, blockB+actual_kc*j2, actual_mc, actual_kc, std::max(Index(0), size-j2), alpha, - -1, -1, 0, 0, allocatedBlockB); - } - } - } - ei_aligned_stack_delete(Scalar, blockA, kc*mc); - ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB); - } -}; - -// Optimized SYmmetric packed Block * packed Block product kernel. -// This kernel is built on top of the gebp kernel: -// - the current selfadjoint block (res) is processed per panel of actual_mc x BlockSize -// where BlockSize is set to the minimal value allowing gebp to be as fast as possible -// - then, as usual, each panel is split into three parts along the diagonal, -// the sub blocks above and below the diagonal are processed as usual, -// while the selfadjoint block overlapping the diagonal is evaluated into a -// small temporary buffer which is then accumulated into the result using a -// triangular traversal. -template<typename Scalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo> -struct sybb_kernel -{ - enum { - PacketSize = packet_traits<Scalar>::size, - BlockSize = EIGEN_PLAIN_ENUM_MAX(mr,nr) - }; - void operator()(Scalar* res, Index resStride, const Scalar* blockA, const Scalar* blockB, Index size, Index depth, Scalar alpha, Scalar* workspace) - { - gebp_kernel<Scalar, Scalar, Index, mr, nr, ConjLhs, ConjRhs> gebp_kernel; - Matrix<Scalar,BlockSize,BlockSize,ColMajor> buffer; - - // let's process the block per panel of actual_mc x BlockSize, - // again, each is split into three parts, etc. - for (Index j=0; j<size; j+=BlockSize) - { - Index actualBlockSize = std::min<Index>(BlockSize,size - j); - const Scalar* actual_b = blockB+j*depth; - - if(UpLo==Upper) - gebp_kernel(res+j*resStride, resStride, blockA, actual_b, j, depth, actualBlockSize, alpha, - -1, -1, 0, 0, workspace); - - // selfadjoint micro block - { - Index i = j; - buffer.setZero(); - // 1 - apply the kernel on the temporary buffer - gebp_kernel(buffer.data(), BlockSize, blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha, - -1, -1, 0, 0, workspace); - // 2 - triangular accumulation - for(Index j1=0; j1<actualBlockSize; ++j1) - { - Scalar* r = res + (j+j1)*resStride + i; - for(Index i1=UpLo==Lower ? j1 : 0; - UpLo==Lower ? i1<actualBlockSize : i1<=j1; ++i1) - r[i1] += buffer(i1,j1); - } - } - - if(UpLo==Lower) - { - Index i = j+actualBlockSize; - gebp_kernel(res+j*resStride+i, resStride, blockA+depth*i, actual_b, size-i, depth, actualBlockSize, alpha, - -1, -1, 0, 0, workspace); - } - } - } -}; - -} // end namespace internal - // high level API template<typename MatrixType, unsigned int UpLo> @@ -209,12 +46,13 @@ SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo> Scalar actualAlpha = alpha * UBlasTraits::extractScalarFactor(u.derived()); enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 }; - - internal::selfadjoint_product<Scalar, Index, - _ActualUType::Flags&RowMajorBit ? RowMajor : ColMajor, - MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, - !UBlasTraits::NeedToConjugate, UpLo> - ::run(_expression().cols(), actualU.cols(), &actualU.coeff(0,0), actualU.outerStride(), + + internal::general_matrix_matrix_triangular_product<Index, + Scalar, _ActualUType::Flags&RowMajorBit ? RowMajor : ColMajor, UBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex, + Scalar, _ActualUType::Flags&RowMajorBit ? ColMajor : RowMajor, (!UBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex, + MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo> + ::run(_expression().cols(), actualU.cols(), + &actualU.coeff(0,0), actualU.outerStride(), &actualU.coeff(0,0), actualU.outerStride(), const_cast<Scalar*>(_expression().data()), _expression().outerStride(), actualAlpha); return *this; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 507eef31d..5f6e50a08 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -73,6 +73,7 @@ ei_add_test(product_syrk) ei_add_test(product_trmv) ei_add_test(product_trmm) ei_add_test(product_trsolve) +ei_add_test(product_mmtr) ei_add_test(product_notemporary) ei_add_test(stable_norm) ei_add_test(bandmatrix) diff --git a/test/product_mmtr.cpp b/test/product_mmtr.cpp new file mode 100644 index 000000000..1048a894d --- /dev/null +++ b/test/product_mmtr.cpp @@ -0,0 +1,80 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see <http://www.gnu.org/licenses/>. + +#include "main.h" + +#define CHECK_MMTR(DEST, TRI, OP) { \ + ref2 = ref1 = DEST; \ + DEST.template triangularView<TRI>() OP; \ + ref1 OP; \ + ref2.template triangularView<TRI>() = ref1; \ + VERIFY_IS_APPROX(DEST,ref2); \ + } + +template<typename Scalar> void mmtr(int size) +{ + typedef typename NumTraits<Scalar>::Real RealScalar; + + typedef Matrix<Scalar,Dynamic,Dynamic,ColMajor> MatrixColMaj; + typedef Matrix<Scalar,Dynamic,Dynamic,RowMajor> MatrixRowMaj; + + DenseIndex othersize = internal::random<DenseIndex>(1,200); + + MatrixColMaj matc(size, size); + MatrixRowMaj matr(size, size); + MatrixColMaj ref1(size, size), ref2(size, size); + + MatrixColMaj soc(size,othersize); soc.setRandom(); + MatrixColMaj osc(othersize,size); osc.setRandom(); + MatrixRowMaj sor(size,othersize); sor.setRandom(); + MatrixRowMaj osr(othersize,size); osr.setRandom(); + + Scalar s = internal::random<Scalar>(); + + CHECK_MMTR(matc, Lower, = s*soc*sor.adjoint()); + CHECK_MMTR(matc, Upper, = s*(soc*soc.adjoint())); + CHECK_MMTR(matr, Lower, = s*soc*soc.adjoint()); + CHECK_MMTR(matr, Upper, = soc*(s*sor.adjoint())); + + CHECK_MMTR(matc, Lower, += s*soc*soc.adjoint()); + CHECK_MMTR(matc, Upper, += s*(soc*sor.transpose())); + CHECK_MMTR(matr, Lower, += s*sor*soc.adjoint()); + CHECK_MMTR(matr, Upper, += soc*(s*soc.adjoint())); + + CHECK_MMTR(matc, Lower, -= s*soc*soc.adjoint()); + CHECK_MMTR(matc, Upper, -= s*(osc.transpose()*osc.conjugate())); + CHECK_MMTR(matr, Lower, -= s*soc*soc.adjoint()); + CHECK_MMTR(matr, Upper, -= soc*(s*soc.adjoint())); +} + +void test_product_mmtr() +{ + for(int i = 0; i < g_repeat ; i++) + { + CALL_SUBTEST_1((mmtr<float>(internal::random<int>(1,320)))); + CALL_SUBTEST_2((mmtr<double>(internal::random<int>(1,320)))); + CALL_SUBTEST_3((mmtr<std::complex<float> >(internal::random<int>(1,200)))); + CALL_SUBTEST_4((mmtr<std::complex<double> >(internal::random<int>(1,200)))); + } +} |