diff options
author | Gael Guennebaud <g.gael@free.fr> | 2008-08-18 22:17:42 +0000 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2008-08-18 22:17:42 +0000 |
commit | 95dd09bea62010ba3aba36d4833a3cd1594a2372 (patch) | |
tree | 40c7e490d0f1cd4b733011d9a4d4fb22fc42decd /Eigen/src/Core/SolveTriangular.h | |
parent | e778ae2559a37b65e9bdd6df687032e33c505cc4 (diff) |
* revert the previous interface change in solveTriangular (pointer vs reference)
* remove the cast operators in the Geometry module: they are replaced by constructors
and new operator= in Matrix
* extended the operations supported by Rotation2D
* rewrite in solveTriangular:
- merge the Upper and Lower specializations
- big optimization of the path for row-major triangular matrices
Diffstat (limited to 'Eigen/src/Core/SolveTriangular.h')
-rwxr-xr-x | Eigen/src/Core/SolveTriangular.h | 198 |
1 files changed, 98 insertions, 100 deletions
diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h index 5f6cf7c28..ccb13991c 100755 --- a/Eigen/src/Core/SolveTriangular.h +++ b/Eigen/src/Core/SolveTriangular.h @@ -29,19 +29,19 @@ template<typename XprType> struct ei_is_part { enum {value=false}; }; template<typename XprType, unsigned int Mode> struct ei_is_part<Part<XprType,Mode> > { enum {value=true}; }; template<typename Lhs, typename Rhs, - int TriangularPart = ei_is_part<Lhs>::value ? -1 // this is to solve ambiguous specializations - : (int(Lhs::Flags) & LowerTriangularBit) + int TriangularPart = (int(Lhs::Flags) & LowerTriangularBit) ? Lower : (int(Lhs::Flags) & UpperTriangularBit) ? Upper : -1, - int StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor + int StorageOrder = ei_is_part<Lhs>::value ? -1 // this is to solve ambiguous specializations + : int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor > struct ei_solve_triangular_selector; // transform a Part xpr to a Flagged xpr -template<typename Lhs, unsigned int LhsMode, typename Rhs, int TriangularPart, int StorageOrder> -struct ei_solve_triangular_selector<Part<Lhs,LhsMode>,Rhs,TriangularPart,StorageOrder> +template<typename Lhs, unsigned int LhsMode, typename Rhs, int UpLo, int StorageOrder> +struct ei_solve_triangular_selector<Part<Lhs,LhsMode>,Rhs,UpLo,StorageOrder> { static void run(const Part<Lhs,LhsMode>& lhs, Rhs& other) { @@ -50,88 +50,129 @@ struct ei_solve_triangular_selector<Part<Lhs,LhsMode>,Rhs,TriangularPart,Storage }; // forward substitution, row-major -template<typename Lhs, typename Rhs> -struct ei_solve_triangular_selector<Lhs,Rhs,Lower,RowMajor> +template<typename Lhs, typename Rhs, int UpLo> +struct ei_solve_triangular_selector<Lhs,Rhs,UpLo,RowMajor> { typedef typename Rhs::Scalar Scalar; static void run(const Lhs& lhs, Rhs& other) { + const bool IsLower = (UpLo==Lower); + const int size = lhs.cols(); + /* We perform the inverse product per block of 4 rows such that we perfectly match + * our optimized matrix * vector product. blockyStart represents the number of rows + * we have process first using the non-block version. + */ + int blockyStart = (std::max(size-5,0)/4)*4; + if (IsLower) + blockyStart = size - blockyStart; + else + blockyStart -= 1; for(int c=0 ; c<other.cols() ; ++c) { + // process first rows using the non block version if(!(Lhs::Flags & UnitDiagBit)) - other.coeffRef(0,c) = other.coeff(0,c)/lhs.coeff(0, 0); - for(int i=1; i<lhs.rows(); ++i) + if (IsLower) + other.coeffRef(0,c) = other.coeff(0,c)/lhs.coeff(0, 0); + else + other.coeffRef(size-1,c) = other.coeff(size-1, c)/lhs.coeff(size-1, size-1); + for(int i=(IsLower ? 1 : size-2); IsLower ? i<blockyStart : i>blockyStart; i += (IsLower ? 1 : -1) ) { - Scalar tmp = other.coeff(i,c) - ((lhs.row(i).start(i)) * other.col(c).start(i)).coeff(0,0); + Scalar tmp = other.coeff(i,c) + - (IsLower ? ((lhs.row(i).start(i)) * other.col(c).start(i)).coeff(0,0) + : ((lhs.row(i).end(size-i-1)) * other.col(c).end(size-i-1)).coeff(0,0)); if (Lhs::Flags & UnitDiagBit) other.coeffRef(i,c) = tmp; else other.coeffRef(i,c) = tmp/lhs.coeff(i,i); } - } - } -}; -// backward substitution, row-major -template<typename Lhs, typename Rhs> -struct ei_solve_triangular_selector<Lhs,Rhs,Upper,RowMajor> -{ - typedef typename Rhs::Scalar Scalar; - static void run(const Lhs& lhs, Rhs& other) - { - const int size = lhs.cols(); - for(int c=0 ; c<other.cols() ; ++c) - { - if(!(Lhs::Flags & UnitDiagBit)) - other.coeffRef(size-1,c) = other.coeff(size-1, c)/lhs.coeff(size-1, size-1); - for(int i=size-2 ; i>=0 ; --i) + // now let process the remaining rows 4 at once + for(int i=blockyStart; IsLower ? i<size : i>0; ) { - Scalar tmp = other.coeff(i,c) - - ((lhs.row(i).end(size-i-1)) * other.col(c).end(size-i-1)).coeff(0,0); - if (Lhs::Flags & UnitDiagBit) - other.coeffRef(i,c) = tmp; + int startBlock = i; + int endBlock = startBlock + (IsLower ? 4 : -4); + + /* Process the i cols times 4 rows block, and keep the result in a temporary vector */ + Matrix<Scalar,4,1> btmp; + if (IsLower) + btmp = lhs.block(startBlock,0,4,i) * other.col(c).start(i); else - other.coeffRef(i,c) = tmp/lhs.coeff(i,i); + btmp = lhs.block(i-3,i+1,4,size-1-i) * other.col(c).end(size-1-i); + + /* Let's process the 4x4 sub-matrix as usual. + * btmp stores the diagonal coefficients used to update the remaining part of the result. + */ + { + Scalar tmp = other.coeff(startBlock,c)-btmp.coeff(IsLower?0:3); + if (Lhs::Flags & UnitDiagBit) + other.coeffRef(i,c) = tmp; + else + other.coeffRef(i,c) = tmp/lhs.coeff(i,i); + } + + i += IsLower ? 1 : -1; + for (;IsLower ? i<endBlock : i>endBlock; i += IsLower ? 1 : -1) + { + int remainingSize = IsLower ? i-startBlock : startBlock-i; + Scalar tmp = other.coeff(i,c) + - btmp.coeff(IsLower ? remainingSize : 3-remainingSize) + - ( lhs.row(i).block(IsLower ? startBlock : i+1, remainingSize) + * other.col(c).block(IsLower ? startBlock : i+1, remainingSize)).coeff(0,0); + + if (Lhs::Flags & UnitDiagBit) + other.coeffRef(i,c) = tmp; + else + other.coeffRef(i,c) = tmp/lhs.coeff(i,i); + } } } } }; -// forward substitution, col-major -// FIXME the Lower and Upper specialization could be merged using a small helper class -// performing reflexions on the coordinates... -template<typename Lhs, typename Rhs> -struct ei_solve_triangular_selector<Lhs,Rhs,Lower,ColMajor> +// Implements the following configurations: +// - inv(Lower, ColMajor) * Column vector +// - inv(Lower,UnitDiag,ColMajor) * Column vector +// - inv(Upper, ColMajor) * Column vector +// - inv(Upper,UnitDiag,ColMajor) * Column vector +template<typename Lhs, typename Rhs, int UpLo> +struct ei_solve_triangular_selector<Lhs,Rhs,UpLo,ColMajor> { typedef typename Rhs::Scalar Scalar; typedef typename ei_packet_traits<Scalar>::type Packet; - enum {PacketSize = ei_packet_traits<Scalar>::size}; + enum { PacketSize = ei_packet_traits<Scalar>::size }; static void run(const Lhs& lhs, Rhs& other) { + static const bool IsLower = (UpLo==Lower); const int size = lhs.cols(); for(int c=0 ; c<other.cols() ; ++c) { /* let's perform the inverse product per block of 4 columns such that we perfectly match - * our optimized matrix * vector product. + * our optimized matrix * vector product. blockyEnd represents the number of rows + * we can process using the block version. */ int blockyEnd = (std::max(size-5,0)/4)*4; - for(int i=0; i<blockyEnd;) + if (!IsLower) + blockyEnd = size-1 - blockyEnd; + for(int i=IsLower ? 0 : size-1; IsLower ? i<blockyEnd : i>blockyEnd;) { /* Let's process the 4x4 sub-matrix as usual. * btmp stores the diagonal coefficients used to update the remaining part of the result. */ int startBlock = i; - int endBlock = startBlock+4; + int endBlock = startBlock + (IsLower ? 4 : -4); Matrix<Scalar,4,1> btmp; - for (;i<endBlock;++i) + for (;IsLower ? i<endBlock : i>endBlock; + i += IsLower ? 1 : -1) { if(!(Lhs::Flags & UnitDiagBit)) other.coeffRef(i,c) /= lhs.coeff(i,i); - int remainingSize = endBlock-i-1; + int remainingSize = IsLower ? endBlock-i-1 : i-endBlock-1; if (remainingSize>0) - other.col(c).block(i+1,remainingSize) -= other.coeffRef(i,c) * Block<Lhs,Dynamic,1>(lhs, i+1, i, remainingSize, 1); - btmp.coeffRef(i-startBlock) = -other.coeffRef(i,c); + other.col(c).block((IsLower ? i : endBlock) + 1, remainingSize) -= + other.coeffRef(i,c) + * Block<Lhs,Dynamic,1>(lhs, (IsLower ? i : endBlock) + 1, i, remainingSize, 1); + btmp.coeffRef(IsLower ? i-startBlock : remainingSize) = -other.coeffRef(i,c); } /* Now we can efficiently update the remaining part of the result as a matrix * vector product. @@ -143,13 +184,15 @@ struct ei_solve_triangular_selector<Lhs,Rhs,Lower,ColMajor> // FIXME this is cool but what about conjugate/adjoint expressions ? do we want to evaluate them ? // this is a more general problem though. ei_cache_friendly_product_colmajor_times_vector( - size-endBlock, &(lhs.const_cast_derived().coeffRef(endBlock,startBlock)), lhs.stride(), - btmp, &(other.coeffRef(endBlock,c))); + IsLower ? size-endBlock : endBlock+1, + &(lhs.const_cast_derived().coeffRef(IsLower ? endBlock : 0, IsLower ? startBlock : endBlock+1)), + lhs.stride(), + btmp, &(other.coeffRef(IsLower ? endBlock : 0, c))); } /* Now we have to process the remaining part as usual */ int i; - for(i=blockyEnd; i<size-1; ++i) + for(i=blockyEnd; IsLower ? i<size-1 : i>0; i += (IsLower ? 1 : -1) ) { if(!(Lhs::Flags & UnitDiagBit)) other.coeffRef(i,c) /= lhs.coeff(i,i); @@ -157,7 +200,10 @@ struct ei_solve_triangular_selector<Lhs,Rhs,Lower,ColMajor> /* NOTE we cannot use lhs.col(i).end(size-i-1) because Part::coeffRef gets called by .col() to * get the address of the start of the row */ - other.col(c).end(size-i-1) -= other.coeffRef(i,c) * Block<Lhs,Dynamic,1>(lhs, i+1,i, size-i-1,1); + if(IsLower) + other.col(c).end(size-i-1) -= other.coeffRef(i,c) * Block<Lhs,Dynamic,1>(lhs, i+1,i, size-i-1,1); + else + other.col(c).start(i) -= other.coeffRef(i,c) * Block<Lhs,Dynamic,1>(lhs, 0,i, i, 1); } if(!(Lhs::Flags & UnitDiagBit)) other.coeffRef(i,c) /= lhs.coeff(i,i); @@ -165,68 +211,20 @@ struct ei_solve_triangular_selector<Lhs,Rhs,Lower,ColMajor> } }; -// backward substitution, col-major -// see the previous specialization for details on the algorithm -template<typename Lhs, typename Rhs> -struct ei_solve_triangular_selector<Lhs,Rhs,Upper,ColMajor> -{ - typedef typename Rhs::Scalar Scalar; - static void run(const Lhs& lhs, Rhs& other) - { - const int size = lhs.cols(); - for(int c=0 ; c<other.cols() ; ++c) - { - int blockyEnd = size-1 - (std::max(size-5,0)/4)*4; - for(int i=size-1; i>blockyEnd;) - { - int startBlock = i; - int endBlock = startBlock-4; - Matrix<Scalar,4,1> btmp; - /* Let's process the 4x4 sub-matrix as usual. - * btmp stores the diagonal coefficients used to update the remaining part of the result. - */ - for (; i>endBlock; --i) - { - if(!(Lhs::Flags & UnitDiagBit)) - other.coeffRef(i,c) /= lhs.coeff(i,i); - int remainingSize = i-endBlock-1; - if (remainingSize>0) - other.col(c).block(endBlock+1,remainingSize) -= other.coeffRef(i,c) * Block<Lhs,Dynamic,1>(lhs, endBlock+1, i, remainingSize, 1); - btmp.coeffRef(remainingSize) = -other.coeffRef(i,c); - } - - ei_cache_friendly_product_colmajor_times_vector( - endBlock+1, &(lhs.const_cast_derived().coeffRef(0,endBlock+1)), lhs.stride(), - btmp, &(other.coeffRef(0,c))); - } - - for(int i=blockyEnd; i>0; --i) - { - if(!(Lhs::Flags & UnitDiagBit)) - other.coeffRef(i,c) /= lhs.coeff(i,i); - other.col(c).start(i) -= other.coeffRef(i,c) * Block<Lhs,Dynamic,1>(lhs, 0,i, i, 1); - } - if(!(Lhs::Flags & UnitDiagBit)) - other.coeffRef(0,c) /= lhs.coeff(0,0); - } - } -}; - /** "in-place" version of MatrixBase::solveTriangular() where the result is written in \a other * * See MatrixBase:solveTriangular() for the details. */ template<typename Derived> template<typename OtherDerived> -void MatrixBase<Derived>::solveTriangularInPlace(MatrixBase<OtherDerived>* p_other) const +void MatrixBase<Derived>::solveTriangularInPlace(MatrixBase<OtherDerived>& other) const { - ei_assert(p_other!=0); ei_assert(derived().cols() == derived().rows()); - ei_assert(derived().cols() == p_other->rows()); + ei_assert(derived().cols() == other.rows()); ei_assert(!(Flags & ZeroDiagBit)); ei_assert(Flags & (UpperTriangularBit|LowerTriangularBit)); - ei_solve_triangular_selector<Derived, OtherDerived>::run(derived(), p_other->derived()); + ei_solve_triangular_selector<Derived, OtherDerived>::run(derived(), other.derived()); } /** \returns the product of the inverse of \c *this with \a other, \a *this being triangular. @@ -265,7 +263,7 @@ template<typename OtherDerived> typename OtherDerived::Eval MatrixBase<Derived>::solveTriangular(const MatrixBase<OtherDerived>& other) const { typename OtherDerived::Eval res(other); - solveTriangularInPlace(&res); + solveTriangularInPlace(res); return res; } |