diff options
author | Gael Guennebaud <g.gael@free.fr> | 2009-07-11 21:14:59 +0200 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2009-07-11 21:14:59 +0200 |
commit | a2087cd7a3674c3d3ef74a474e417a3ea1f1e82b (patch) | |
tree | 601492688aa27c6e69ddf93b51892bfc676a2121 /Eigen/src | |
parent | b47dea8b7aeab10cf584f2d3275192d90d8df2ed (diff) |
Add an efficient rank2 update function (like the level2 blas xSYR2 routine).
Note that it is already used in Tridiagonalization.
Diffstat (limited to 'Eigen/src')
-rw-r--r-- | Eigen/src/Core/Product.h | 58 | ||||
-rw-r--r-- | Eigen/src/Core/SelfAdjointView.h | 10 | ||||
-rw-r--r-- | Eigen/src/Core/SolveTriangular.h | 17 | ||||
-rw-r--r-- | Eigen/src/Core/products/GeneralMatrixMatrix.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/products/GeneralMatrixVector.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/products/SelfadjointMatrixVector.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/products/SelfadjointRank2Update.h | 96 | ||||
-rw-r--r-- | Eigen/src/QR/Tridiagonalization.h | 6 |
8 files changed, 150 insertions, 43 deletions
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index bd99bfee8..44e3f606e 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -76,7 +76,7 @@ struct ProductReturnType<Lhs,Rhs,CacheFriendlyProduct> /* Helper class to analyze the factors of a Product expression. * In particular it allows to pop out operator-, scalar multiples, * and conjugate */ -template<typename XprType> struct ei_product_factor_traits +template<typename XprType> struct ei_blas_traits { typedef typename ei_traits<XprType>::Scalar Scalar; typedef XprType ActualXprType; @@ -85,15 +85,19 @@ template<typename XprType> struct ei_product_factor_traits NeedToConjugate = false, ActualAccess = int(ei_traits<XprType>::Flags)&DirectAccessBit ? HasDirectAccess : NoDirectAccess }; + typedef typename ei_meta_if<int(ActualAccess)==HasDirectAccess, + const ActualXprType&, + typename ActualXprType::PlainMatrixType + >::ret DirectLinearAccessType; static inline const ActualXprType& extract(const XprType& x) { return x; } static inline Scalar extractScalarFactor(const XprType&) { return Scalar(1); } }; // pop conjugate -template<typename Scalar, typename NestedXpr> struct ei_product_factor_traits<CwiseUnaryOp<ei_scalar_conjugate_op<Scalar>, NestedXpr> > - : ei_product_factor_traits<NestedXpr> +template<typename Scalar, typename NestedXpr> struct ei_blas_traits<CwiseUnaryOp<ei_scalar_conjugate_op<Scalar>, NestedXpr> > + : ei_blas_traits<NestedXpr> { - typedef ei_product_factor_traits<NestedXpr> Base; + typedef ei_blas_traits<NestedXpr> Base; typedef CwiseUnaryOp<ei_scalar_conjugate_op<Scalar>, NestedXpr> XprType; typedef typename Base::ActualXprType ActualXprType; @@ -106,10 +110,10 @@ template<typename Scalar, typename NestedXpr> struct ei_product_factor_traits<Cw }; // pop scalar multiple -template<typename Scalar, typename NestedXpr> struct ei_product_factor_traits<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, NestedXpr> > - : ei_product_factor_traits<NestedXpr> +template<typename Scalar, typename NestedXpr> struct ei_blas_traits<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, NestedXpr> > + : ei_blas_traits<NestedXpr> { - typedef ei_product_factor_traits<NestedXpr> Base; + typedef ei_blas_traits<NestedXpr> Base; typedef CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, NestedXpr> XprType; typedef typename Base::ActualXprType ActualXprType; static inline const ActualXprType& extract(const XprType& x) { return Base::extract(x._expression()); } @@ -118,10 +122,10 @@ template<typename Scalar, typename NestedXpr> struct ei_product_factor_traits<Cw }; // pop opposite -template<typename Scalar, typename NestedXpr> struct ei_product_factor_traits<CwiseUnaryOp<ei_scalar_opposite_op<Scalar>, NestedXpr> > - : ei_product_factor_traits<NestedXpr> +template<typename Scalar, typename NestedXpr> struct ei_blas_traits<CwiseUnaryOp<ei_scalar_opposite_op<Scalar>, NestedXpr> > + : ei_blas_traits<NestedXpr> { - typedef ei_product_factor_traits<NestedXpr> Base; + typedef ei_blas_traits<NestedXpr> Base; typedef CwiseUnaryOp<ei_scalar_opposite_op<Scalar>, NestedXpr> XprType; typedef typename Base::ActualXprType ActualXprType; static inline const ActualXprType& extract(const XprType& x) { return Base::extract(x._expression()); } @@ -130,11 +134,11 @@ template<typename Scalar, typename NestedXpr> struct ei_product_factor_traits<Cw }; // pop opposite -template<typename NestedXpr> struct ei_product_factor_traits<NestByValue<NestedXpr> > - : ei_product_factor_traits<NestedXpr> +template<typename NestedXpr> struct ei_blas_traits<NestByValue<NestedXpr> > + : ei_blas_traits<NestedXpr> { typedef typename NestedXpr::Scalar Scalar; - typedef ei_product_factor_traits<NestedXpr> Base; + typedef ei_blas_traits<NestedXpr> Base; typedef NestByValue<NestedXpr> XprType; typedef typename Base::ActualXprType ActualXprType; static inline const ActualXprType& extract(const XprType& x) { return Base::extract(static_cast<const NestedXpr&>(x)); } @@ -148,8 +152,8 @@ template<typename NestedXpr> struct ei_product_factor_traits<NestByValue<NestedX */ template<typename Lhs, typename Rhs> struct ei_product_mode { - typedef typename ei_product_factor_traits<Lhs>::ActualXprType ActualLhs; - typedef typename ei_product_factor_traits<Rhs>::ActualXprType ActualRhs; + typedef typename ei_blas_traits<Lhs>::ActualXprType ActualLhs; + typedef typename ei_blas_traits<Rhs>::ActualXprType ActualRhs; enum{ value = Lhs::MaxColsAtCompileTime == Dynamic @@ -600,10 +604,10 @@ static void ei_cache_friendly_product_rowmajor_times_vector( template<typename ProductType, int LhsRows = ei_traits<ProductType>::RowsAtCompileTime, int LhsOrder = int(ei_traits<ProductType>::LhsFlags)&RowMajorBit ? RowMajor : ColMajor, - int LhsHasDirectAccess = ei_product_factor_traits<typename ei_traits<ProductType>::_LhsNested>::ActualAccess, + int LhsHasDirectAccess = ei_blas_traits<typename ei_traits<ProductType>::_LhsNested>::ActualAccess, int RhsCols = ei_traits<ProductType>::ColsAtCompileTime, int RhsOrder = int(ei_traits<ProductType>::RhsFlags)&RowMajorBit ? RowMajor : ColMajor, - int RhsHasDirectAccess = ei_product_factor_traits<typename ei_traits<ProductType>::_RhsNested>::ActualAccess> + int RhsHasDirectAccess = ei_blas_traits<typename ei_traits<ProductType>::_RhsNested>::ActualAccess> struct ei_cache_friendly_product_selector { template<typename DestDerived> @@ -633,8 +637,8 @@ template<typename ProductType, int LhsRows, int RhsOrder, int RhsAccess> struct ei_cache_friendly_product_selector<ProductType,LhsRows,ColMajor,HasDirectAccess,1,RhsOrder,RhsAccess> { typedef typename ProductType::Scalar Scalar; - typedef ei_product_factor_traits<typename ei_traits<ProductType>::_LhsNested> LhsProductTraits; - typedef ei_product_factor_traits<typename ei_traits<ProductType>::_RhsNested> RhsProductTraits; + typedef ei_blas_traits<typename ei_traits<ProductType>::_LhsNested> LhsProductTraits; + typedef ei_blas_traits<typename ei_traits<ProductType>::_RhsNested> RhsProductTraits; typedef typename LhsProductTraits::ActualXprType ActualLhsType; typedef typename RhsProductTraits::ActualXprType ActualRhsType; @@ -694,8 +698,8 @@ template<typename ProductType, int LhsOrder, int LhsAccess, int RhsCols> struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCols,RowMajor,HasDirectAccess> { typedef typename ProductType::Scalar Scalar; - typedef ei_product_factor_traits<typename ei_traits<ProductType>::_LhsNested> LhsProductTraits; - typedef ei_product_factor_traits<typename ei_traits<ProductType>::_RhsNested> RhsProductTraits; + typedef ei_blas_traits<typename ei_traits<ProductType>::_LhsNested> LhsProductTraits; + typedef ei_blas_traits<typename ei_traits<ProductType>::_RhsNested> RhsProductTraits; typedef typename LhsProductTraits::ActualXprType ActualLhsType; typedef typename RhsProductTraits::ActualXprType ActualRhsType; @@ -740,8 +744,8 @@ struct ei_cache_friendly_product_selector<ProductType,LhsRows,RowMajor,HasDirect { typedef typename ProductType::Scalar Scalar; - typedef ei_product_factor_traits<typename ei_traits<ProductType>::_LhsNested> LhsProductTraits; - typedef ei_product_factor_traits<typename ei_traits<ProductType>::_RhsNested> RhsProductTraits; + typedef ei_blas_traits<typename ei_traits<ProductType>::_LhsNested> LhsProductTraits; + typedef ei_blas_traits<typename ei_traits<ProductType>::_RhsNested> RhsProductTraits; typedef typename LhsProductTraits::ActualXprType ActualLhsType; typedef typename RhsProductTraits::ActualXprType ActualRhsType; @@ -783,8 +787,8 @@ struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCo { typedef typename ProductType::Scalar Scalar; - typedef ei_product_factor_traits<typename ei_traits<ProductType>::_LhsNested> LhsProductTraits; - typedef ei_product_factor_traits<typename ei_traits<ProductType>::_RhsNested> RhsProductTraits; + typedef ei_blas_traits<typename ei_traits<ProductType>::_LhsNested> LhsProductTraits; + typedef ei_blas_traits<typename ei_traits<ProductType>::_RhsNested> RhsProductTraits; typedef typename LhsProductTraits::ActualXprType ActualLhsType; typedef typename RhsProductTraits::ActualXprType ActualRhsType; @@ -903,8 +907,8 @@ template<typename Lhs, typename Rhs, int ProductMode> template<typename DestDerived> inline void Product<Lhs,Rhs,ProductMode>::_cacheFriendlyEvalAndAdd(DestDerived& res, Scalar alpha) const { - typedef ei_product_factor_traits<_LhsNested> LhsProductTraits; - typedef ei_product_factor_traits<_RhsNested> RhsProductTraits; + typedef ei_blas_traits<_LhsNested> LhsProductTraits; + typedef ei_blas_traits<_RhsNested> RhsProductTraits; typedef typename LhsProductTraits::ActualXprType ActualLhsType; typedef typename RhsProductTraits::ActualXprType ActualRhsType; diff --git a/Eigen/src/Core/SelfAdjointView.h b/Eigen/src/Core/SelfAdjointView.h index 2f66cfa45..28f44cbbc 100644 --- a/Eigen/src/Core/SelfAdjointView.h +++ b/Eigen/src/Core/SelfAdjointView.h @@ -106,6 +106,16 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView return ei_selfadjoint_vector_product_returntype<SelfAdjointView,OtherDerived>(*this, rhs.derived()); } + /** Perform a symmetric rank 2 update of the selfadjoint matrix \c *this: + * \f$ this = this + \alpha ( u v^* + v u^*) \f$ + * + * The vectors \a u and \c v \b must be column vectors, however they can be + * a adjoint expression without any overhead. Only the meaningful triangular + * part of the matrix is updated, the rest is left unchanged. + */ + template<typename DerivedU, typename DerivedV> + void rank2update(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, Scalar alpha = Scalar(1)); + /////////// Cholesky module /////////// const LLT<PlainMatrixType, UpLo> llt() const; diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h index 3a65a8b27..200b4a325 100644 --- a/Eigen/src/Core/SolveTriangular.h +++ b/Eigen/src/Core/SolveTriangular.h @@ -33,12 +33,12 @@ template<typename Lhs, typename Rhs, > struct ei_triangular_solver_selector; -// forward substitution, row-major +// forward and backward substitution, row-major template<typename Lhs, typename Rhs, int Mode> struct ei_triangular_solver_selector<Lhs,Rhs,Mode,NoUnrolling,RowMajor> { typedef typename Rhs::Scalar Scalar; - typedef ei_product_factor_traits<Lhs> LhsProductTraits; + typedef ei_blas_traits<Lhs> LhsProductTraits; typedef typename LhsProductTraits::ActualXprType ActualLhsType; enum { IsLowerTriangular = ((Mode&LowerTriangularBit)==LowerTriangularBit) @@ -60,6 +60,9 @@ struct ei_triangular_solver_selector<Lhs,Rhs,Mode,NoUnrolling,RowMajor> int r = IsLowerTriangular ? pi : size - pi; // remaining size if (r > 0) { + // let's directly call the low level product function because: + // 1 - it is faster to compile + // 2 - it is slighlty faster at runtime int startRow = IsLowerTriangular ? pi : pi-actualPanelWidth; int startCol = IsLowerTriangular ? 0 : pi; Block<Rhs,Dynamic,1> target(other,startRow,c,actualPanelWidth,1); @@ -86,17 +89,13 @@ struct ei_triangular_solver_selector<Lhs,Rhs,Mode,NoUnrolling,RowMajor> } }; -// Implements the following configurations: -// - inv(LowerTriangular, ColMajor) * Column vectors -// - inv(LowerTriangular,UnitDiag,ColMajor) * Column vectors -// - inv(UpperTriangular, ColMajor) * Column vectors -// - inv(UpperTriangular,UnitDiag,ColMajor) * Column vectors +// forward and backward substitution, column-major template<typename Lhs, typename Rhs, int Mode> struct ei_triangular_solver_selector<Lhs,Rhs,Mode,NoUnrolling,ColMajor> { typedef typename Rhs::Scalar Scalar; typedef typename ei_packet_traits<Scalar>::type Packet; - typedef ei_product_factor_traits<Lhs> LhsProductTraits; + typedef ei_blas_traits<Lhs> LhsProductTraits; typedef typename LhsProductTraits::ActualXprType ActualLhsType; enum { PacketSize = ei_packet_traits<Scalar>::size, @@ -136,7 +135,7 @@ struct ei_triangular_solver_selector<Lhs,Rhs,Mode,NoUnrolling,ColMajor> int r = IsLowerTriangular ? size - endBlock : startBlock; // remaining size if (r > 0) { - // let's directly call this function because: + // let's directly call the low level product function because: // 1 - it is faster to compile // 2 - it is slighlty faster at runtime ei_cache_friendly_product_colmajor_times_vector<LhsProductTraits::NeedToConjugate,false>( diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index 0036fe390..fe3e877e1 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr> +// Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr> // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h index ccaafb8bd..57875035a 100644 --- a/Eigen/src/Core/products/GeneralMatrixVector.h +++ b/Eigen/src/Core/products/GeneralMatrixVector.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr> +// Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr> // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h index fbdeb148f..aa3187a07 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixVector.h +++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr> +// Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr> // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public diff --git a/Eigen/src/Core/products/SelfadjointRank2Update.h b/Eigen/src/Core/products/SelfadjointRank2Update.h new file mode 100644 index 000000000..edb57ecd5 --- /dev/null +++ b/Eigen/src/Core/products/SelfadjointRank2Update.h @@ -0,0 +1,96 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr> +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see <http://www.gnu.org/licenses/>. + +#ifndef EIGEN_SELFADJOINTRANK2UPTADE_H +#define EIGEN_SELFADJOINTRANK2UPTADE_H + +/* Optimized selfadjoint matrix += alpha * uv' + vu' + * It corresponds to the Level2 syr2 BLAS routine + */ + +template<typename Scalar, typename UType, typename VType, int UpLo> +struct ei_selfadjoint_rank2_update_selector; + +template<typename Scalar, typename UType, typename VType> +struct ei_selfadjoint_rank2_update_selector<Scalar,UType,VType,LowerTriangular> +{ + static void run(Scalar* mat, int stride, const UType& u, const VType& v, Scalar alpha) + { + const int size = u.size(); +// std::cerr << "lower \n" << u.transpose() << "\n" << v.transpose() << "\n\n"; + for (int i=0; i<size; ++i) + { +// std::cerr << + Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i+i, size-i) += + (alpha * ei_conj(u.coeff(i))) * v.end(size-i) + + (alpha * ei_conj(v.coeff(i))) * u.end(size-i); + } + } +}; + +template<typename Scalar, typename UType, typename VType> +struct ei_selfadjoint_rank2_update_selector<Scalar,UType,VType,UpperTriangular> +{ + static void run(Scalar* mat, int stride, const UType& u, const VType& v, Scalar alpha) + { + const int size = u.size(); + for (int i=0; i<size; ++i) + Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i, i+1) += + (alpha * ei_conj(u.coeff(i))) * v.start(i+1) + + (alpha * ei_conj(v.coeff(i))) * u.start(i+1); + } +}; + +template<bool Cond, typename T> struct ei_conj_expr_if + : ei_meta_if<!Cond, T, + CwiseUnaryOp<ei_scalar_conjugate_op<typename ei_traits<T>::Scalar>,T> > {}; + + +template<typename MatrixType, unsigned int UpLo> +template<typename DerivedU, typename DerivedV> +void SelfAdjointView<MatrixType,UpLo> +::rank2update(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, Scalar alpha) +{ + typedef ei_blas_traits<DerivedU> UBlasTraits; + typedef typename UBlasTraits::DirectLinearAccessType ActualUType; + typedef typename ei_cleantype<ActualUType>::type _ActualUType; + const ActualUType actualU = UBlasTraits::extract(u.derived()); + + typedef ei_blas_traits<DerivedV> VBlasTraits; + typedef typename VBlasTraits::DirectLinearAccessType ActualVType; + typedef typename ei_cleantype<ActualVType>::type _ActualVType; + const ActualVType actualV = VBlasTraits::extract(v.derived()); + + Scalar actualAlpha = alpha * UBlasTraits::extractScalarFactor(u.derived()) + * VBlasTraits::extractScalarFactor(v.derived()); + + enum { IsRowMajor = (ei_traits<MatrixType>::Flags&RowMajorBit)?1:0 }; + ei_selfadjoint_rank2_update_selector<Scalar, + typename ei_conj_expr_if<IsRowMajor ^ UBlasTraits::NeedToConjugate,_ActualUType>::ret, + typename ei_conj_expr_if<IsRowMajor ^ VBlasTraits::NeedToConjugate,_ActualVType>::ret, + (IsRowMajor ? (UpLo==UpperTriangular ? LowerTriangular : UpperTriangular) : UpLo)> + ::run(const_cast<Scalar*>(_expression().data()),_expression().stride(),actualU,actualV,actualAlpha); +} + +#endif // EIGEN_SELFADJOINTRANK2UPTADE_H diff --git a/Eigen/src/QR/Tridiagonalization.h b/Eigen/src/QR/Tridiagonalization.h index bd8ff4fe3..4808b69ce 100644 --- a/Eigen/src/QR/Tridiagonalization.h +++ b/Eigen/src/QR/Tridiagonalization.h @@ -236,10 +236,8 @@ void Tridiagonalization<MatrixType>::_compute(MatrixType& matA, CoeffVectorType& + (h*ei_conj(h)*Scalar(-0.5)*(matA.col(i).end(n-i-1).dot(hCoeffs.end(n-i-1)))) * matA.col(i).end(n-i-1); - // symmetric rank-2 update - for (int j1=i+1; j1<n; ++j1) - matA.col(j1).end(n-j1) -= matA.col(i).end(n-j1) * ei_conj(hCoeffs.coeff(j1-1)) - + hCoeffs.end(n-j1) * ei_conj(matA.coeff(j1,i)); + matA.corner(BottomRight, n-i-1, n-i-1).template selfadjointView<LowerTriangular>() + .rank2update(matA.col(i).end(n-i-1), hCoeffs.end(n-i-1), -1); // note: at that point matA(i+1,i+1) is the (i+1)-th element of the final diagonal // note: the sequence of the beta values leads to the subdiagonal entries |