aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/SolveTriangular.h
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2008-08-18 22:17:42 +0000
committerGravatar Gael Guennebaud <g.gael@free.fr>2008-08-18 22:17:42 +0000
commit95dd09bea62010ba3aba36d4833a3cd1594a2372 (patch)
tree40c7e490d0f1cd4b733011d9a4d4fb22fc42decd /Eigen/src/Core/SolveTriangular.h
parente778ae2559a37b65e9bdd6df687032e33c505cc4 (diff)
* revert the previous interface change in solveTriangular (pointer vs reference)
* remove the cast operators in the Geometry module: they are replaced by constructors and new operator= in Matrix * extended the operations supported by Rotation2D * rewrite in solveTriangular: - merge the Upper and Lower specializations - big optimization of the path for row-major triangular matrices
Diffstat (limited to 'Eigen/src/Core/SolveTriangular.h')
-rwxr-xr-xEigen/src/Core/SolveTriangular.h198
1 files changed, 98 insertions, 100 deletions
diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h
index 5f6cf7c28..ccb13991c 100755
--- a/Eigen/src/Core/SolveTriangular.h
+++ b/Eigen/src/Core/SolveTriangular.h
@@ -29,19 +29,19 @@ template<typename XprType> struct ei_is_part { enum {value=false}; };
template<typename XprType, unsigned int Mode> struct ei_is_part<Part<XprType,Mode> > { enum {value=true}; };
template<typename Lhs, typename Rhs,
- int TriangularPart = ei_is_part<Lhs>::value ? -1 // this is to solve ambiguous specializations
- : (int(Lhs::Flags) & LowerTriangularBit)
+ int TriangularPart = (int(Lhs::Flags) & LowerTriangularBit)
? Lower
: (int(Lhs::Flags) & UpperTriangularBit)
? Upper
: -1,
- int StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor
+ int StorageOrder = ei_is_part<Lhs>::value ? -1 // this is to solve ambiguous specializations
+ : int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor
>
struct ei_solve_triangular_selector;
// transform a Part xpr to a Flagged xpr
-template<typename Lhs, unsigned int LhsMode, typename Rhs, int TriangularPart, int StorageOrder>
-struct ei_solve_triangular_selector<Part<Lhs,LhsMode>,Rhs,TriangularPart,StorageOrder>
+template<typename Lhs, unsigned int LhsMode, typename Rhs, int UpLo, int StorageOrder>
+struct ei_solve_triangular_selector<Part<Lhs,LhsMode>,Rhs,UpLo,StorageOrder>
{
static void run(const Part<Lhs,LhsMode>& lhs, Rhs& other)
{
@@ -50,88 +50,129 @@ struct ei_solve_triangular_selector<Part<Lhs,LhsMode>,Rhs,TriangularPart,Storage
};
// forward substitution, row-major
-template<typename Lhs, typename Rhs>
-struct ei_solve_triangular_selector<Lhs,Rhs,Lower,RowMajor>
+template<typename Lhs, typename Rhs, int UpLo>
+struct ei_solve_triangular_selector<Lhs,Rhs,UpLo,RowMajor>
{
typedef typename Rhs::Scalar Scalar;
static void run(const Lhs& lhs, Rhs& other)
{
+ const bool IsLower = (UpLo==Lower);
+ const int size = lhs.cols();
+ /* We perform the inverse product per block of 4 rows such that we perfectly match
+ * our optimized matrix * vector product. blockyStart represents the number of rows
+ * we have process first using the non-block version.
+ */
+ int blockyStart = (std::max(size-5,0)/4)*4;
+ if (IsLower)
+ blockyStart = size - blockyStart;
+ else
+ blockyStart -= 1;
for(int c=0 ; c<other.cols() ; ++c)
{
+ // process first rows using the non block version
if(!(Lhs::Flags & UnitDiagBit))
- other.coeffRef(0,c) = other.coeff(0,c)/lhs.coeff(0, 0);
- for(int i=1; i<lhs.rows(); ++i)
+ if (IsLower)
+ other.coeffRef(0,c) = other.coeff(0,c)/lhs.coeff(0, 0);
+ else
+ other.coeffRef(size-1,c) = other.coeff(size-1, c)/lhs.coeff(size-1, size-1);
+ for(int i=(IsLower ? 1 : size-2); IsLower ? i<blockyStart : i>blockyStart; i += (IsLower ? 1 : -1) )
{
- Scalar tmp = other.coeff(i,c) - ((lhs.row(i).start(i)) * other.col(c).start(i)).coeff(0,0);
+ Scalar tmp = other.coeff(i,c)
+ - (IsLower ? ((lhs.row(i).start(i)) * other.col(c).start(i)).coeff(0,0)
+ : ((lhs.row(i).end(size-i-1)) * other.col(c).end(size-i-1)).coeff(0,0));
if (Lhs::Flags & UnitDiagBit)
other.coeffRef(i,c) = tmp;
else
other.coeffRef(i,c) = tmp/lhs.coeff(i,i);
}
- }
- }
-};
-// backward substitution, row-major
-template<typename Lhs, typename Rhs>
-struct ei_solve_triangular_selector<Lhs,Rhs,Upper,RowMajor>
-{
- typedef typename Rhs::Scalar Scalar;
- static void run(const Lhs& lhs, Rhs& other)
- {
- const int size = lhs.cols();
- for(int c=0 ; c<other.cols() ; ++c)
- {
- if(!(Lhs::Flags & UnitDiagBit))
- other.coeffRef(size-1,c) = other.coeff(size-1, c)/lhs.coeff(size-1, size-1);
- for(int i=size-2 ; i>=0 ; --i)
+ // now let process the remaining rows 4 at once
+ for(int i=blockyStart; IsLower ? i<size : i>0; )
{
- Scalar tmp = other.coeff(i,c)
- - ((lhs.row(i).end(size-i-1)) * other.col(c).end(size-i-1)).coeff(0,0);
- if (Lhs::Flags & UnitDiagBit)
- other.coeffRef(i,c) = tmp;
+ int startBlock = i;
+ int endBlock = startBlock + (IsLower ? 4 : -4);
+
+ /* Process the i cols times 4 rows block, and keep the result in a temporary vector */
+ Matrix<Scalar,4,1> btmp;
+ if (IsLower)
+ btmp = lhs.block(startBlock,0,4,i) * other.col(c).start(i);
else
- other.coeffRef(i,c) = tmp/lhs.coeff(i,i);
+ btmp = lhs.block(i-3,i+1,4,size-1-i) * other.col(c).end(size-1-i);
+
+ /* Let's process the 4x4 sub-matrix as usual.
+ * btmp stores the diagonal coefficients used to update the remaining part of the result.
+ */
+ {
+ Scalar tmp = other.coeff(startBlock,c)-btmp.coeff(IsLower?0:3);
+ if (Lhs::Flags & UnitDiagBit)
+ other.coeffRef(i,c) = tmp;
+ else
+ other.coeffRef(i,c) = tmp/lhs.coeff(i,i);
+ }
+
+ i += IsLower ? 1 : -1;
+ for (;IsLower ? i<endBlock : i>endBlock; i += IsLower ? 1 : -1)
+ {
+ int remainingSize = IsLower ? i-startBlock : startBlock-i;
+ Scalar tmp = other.coeff(i,c)
+ - btmp.coeff(IsLower ? remainingSize : 3-remainingSize)
+ - ( lhs.row(i).block(IsLower ? startBlock : i+1, remainingSize)
+ * other.col(c).block(IsLower ? startBlock : i+1, remainingSize)).coeff(0,0);
+
+ if (Lhs::Flags & UnitDiagBit)
+ other.coeffRef(i,c) = tmp;
+ else
+ other.coeffRef(i,c) = tmp/lhs.coeff(i,i);
+ }
}
}
}
};
-// forward substitution, col-major
-// FIXME the Lower and Upper specialization could be merged using a small helper class
-// performing reflexions on the coordinates...
-template<typename Lhs, typename Rhs>
-struct ei_solve_triangular_selector<Lhs,Rhs,Lower,ColMajor>
+// Implements the following configurations:
+// - inv(Lower, ColMajor) * Column vector
+// - inv(Lower,UnitDiag,ColMajor) * Column vector
+// - inv(Upper, ColMajor) * Column vector
+// - inv(Upper,UnitDiag,ColMajor) * Column vector
+template<typename Lhs, typename Rhs, int UpLo>
+struct ei_solve_triangular_selector<Lhs,Rhs,UpLo,ColMajor>
{
typedef typename Rhs::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type Packet;
- enum {PacketSize = ei_packet_traits<Scalar>::size};
+ enum { PacketSize = ei_packet_traits<Scalar>::size };
static void run(const Lhs& lhs, Rhs& other)
{
+ static const bool IsLower = (UpLo==Lower);
const int size = lhs.cols();
for(int c=0 ; c<other.cols() ; ++c)
{
/* let's perform the inverse product per block of 4 columns such that we perfectly match
- * our optimized matrix * vector product.
+ * our optimized matrix * vector product. blockyEnd represents the number of rows
+ * we can process using the block version.
*/
int blockyEnd = (std::max(size-5,0)/4)*4;
- for(int i=0; i<blockyEnd;)
+ if (!IsLower)
+ blockyEnd = size-1 - blockyEnd;
+ for(int i=IsLower ? 0 : size-1; IsLower ? i<blockyEnd : i>blockyEnd;)
{
/* Let's process the 4x4 sub-matrix as usual.
* btmp stores the diagonal coefficients used to update the remaining part of the result.
*/
int startBlock = i;
- int endBlock = startBlock+4;
+ int endBlock = startBlock + (IsLower ? 4 : -4);
Matrix<Scalar,4,1> btmp;
- for (;i<endBlock;++i)
+ for (;IsLower ? i<endBlock : i>endBlock;
+ i += IsLower ? 1 : -1)
{
if(!(Lhs::Flags & UnitDiagBit))
other.coeffRef(i,c) /= lhs.coeff(i,i);
- int remainingSize = endBlock-i-1;
+ int remainingSize = IsLower ? endBlock-i-1 : i-endBlock-1;
if (remainingSize>0)
- other.col(c).block(i+1,remainingSize) -= other.coeffRef(i,c) * Block<Lhs,Dynamic,1>(lhs, i+1, i, remainingSize, 1);
- btmp.coeffRef(i-startBlock) = -other.coeffRef(i,c);
+ other.col(c).block((IsLower ? i : endBlock) + 1, remainingSize) -=
+ other.coeffRef(i,c)
+ * Block<Lhs,Dynamic,1>(lhs, (IsLower ? i : endBlock) + 1, i, remainingSize, 1);
+ btmp.coeffRef(IsLower ? i-startBlock : remainingSize) = -other.coeffRef(i,c);
}
/* Now we can efficiently update the remaining part of the result as a matrix * vector product.
@@ -143,13 +184,15 @@ struct ei_solve_triangular_selector<Lhs,Rhs,Lower,ColMajor>
// FIXME this is cool but what about conjugate/adjoint expressions ? do we want to evaluate them ?
// this is a more general problem though.
ei_cache_friendly_product_colmajor_times_vector(
- size-endBlock, &(lhs.const_cast_derived().coeffRef(endBlock,startBlock)), lhs.stride(),
- btmp, &(other.coeffRef(endBlock,c)));
+ IsLower ? size-endBlock : endBlock+1,
+ &(lhs.const_cast_derived().coeffRef(IsLower ? endBlock : 0, IsLower ? startBlock : endBlock+1)),
+ lhs.stride(),
+ btmp, &(other.coeffRef(IsLower ? endBlock : 0, c)));
}
/* Now we have to process the remaining part as usual */
int i;
- for(i=blockyEnd; i<size-1; ++i)
+ for(i=blockyEnd; IsLower ? i<size-1 : i>0; i += (IsLower ? 1 : -1) )
{
if(!(Lhs::Flags & UnitDiagBit))
other.coeffRef(i,c) /= lhs.coeff(i,i);
@@ -157,7 +200,10 @@ struct ei_solve_triangular_selector<Lhs,Rhs,Lower,ColMajor>
/* NOTE we cannot use lhs.col(i).end(size-i-1) because Part::coeffRef gets called by .col() to
* get the address of the start of the row
*/
- other.col(c).end(size-i-1) -= other.coeffRef(i,c) * Block<Lhs,Dynamic,1>(lhs, i+1,i, size-i-1,1);
+ if(IsLower)
+ other.col(c).end(size-i-1) -= other.coeffRef(i,c) * Block<Lhs,Dynamic,1>(lhs, i+1,i, size-i-1,1);
+ else
+ other.col(c).start(i) -= other.coeffRef(i,c) * Block<Lhs,Dynamic,1>(lhs, 0,i, i, 1);
}
if(!(Lhs::Flags & UnitDiagBit))
other.coeffRef(i,c) /= lhs.coeff(i,i);
@@ -165,68 +211,20 @@ struct ei_solve_triangular_selector<Lhs,Rhs,Lower,ColMajor>
}
};
-// backward substitution, col-major
-// see the previous specialization for details on the algorithm
-template<typename Lhs, typename Rhs>
-struct ei_solve_triangular_selector<Lhs,Rhs,Upper,ColMajor>
-{
- typedef typename Rhs::Scalar Scalar;
- static void run(const Lhs& lhs, Rhs& other)
- {
- const int size = lhs.cols();
- for(int c=0 ; c<other.cols() ; ++c)
- {
- int blockyEnd = size-1 - (std::max(size-5,0)/4)*4;
- for(int i=size-1; i>blockyEnd;)
- {
- int startBlock = i;
- int endBlock = startBlock-4;
- Matrix<Scalar,4,1> btmp;
- /* Let's process the 4x4 sub-matrix as usual.
- * btmp stores the diagonal coefficients used to update the remaining part of the result.
- */
- for (; i>endBlock; --i)
- {
- if(!(Lhs::Flags & UnitDiagBit))
- other.coeffRef(i,c) /= lhs.coeff(i,i);
- int remainingSize = i-endBlock-1;
- if (remainingSize>0)
- other.col(c).block(endBlock+1,remainingSize) -= other.coeffRef(i,c) * Block<Lhs,Dynamic,1>(lhs, endBlock+1, i, remainingSize, 1);
- btmp.coeffRef(remainingSize) = -other.coeffRef(i,c);
- }
-
- ei_cache_friendly_product_colmajor_times_vector(
- endBlock+1, &(lhs.const_cast_derived().coeffRef(0,endBlock+1)), lhs.stride(),
- btmp, &(other.coeffRef(0,c)));
- }
-
- for(int i=blockyEnd; i>0; --i)
- {
- if(!(Lhs::Flags & UnitDiagBit))
- other.coeffRef(i,c) /= lhs.coeff(i,i);
- other.col(c).start(i) -= other.coeffRef(i,c) * Block<Lhs,Dynamic,1>(lhs, 0,i, i, 1);
- }
- if(!(Lhs::Flags & UnitDiagBit))
- other.coeffRef(0,c) /= lhs.coeff(0,0);
- }
- }
-};
-
/** "in-place" version of MatrixBase::solveTriangular() where the result is written in \a other
*
* See MatrixBase:solveTriangular() for the details.
*/
template<typename Derived>
template<typename OtherDerived>
-void MatrixBase<Derived>::solveTriangularInPlace(MatrixBase<OtherDerived>* p_other) const
+void MatrixBase<Derived>::solveTriangularInPlace(MatrixBase<OtherDerived>& other) const
{
- ei_assert(p_other!=0);
ei_assert(derived().cols() == derived().rows());
- ei_assert(derived().cols() == p_other->rows());
+ ei_assert(derived().cols() == other.rows());
ei_assert(!(Flags & ZeroDiagBit));
ei_assert(Flags & (UpperTriangularBit|LowerTriangularBit));
- ei_solve_triangular_selector<Derived, OtherDerived>::run(derived(), p_other->derived());
+ ei_solve_triangular_selector<Derived, OtherDerived>::run(derived(), other.derived());
}
/** \returns the product of the inverse of \c *this with \a other, \a *this being triangular.
@@ -265,7 +263,7 @@ template<typename OtherDerived>
typename OtherDerived::Eval MatrixBase<Derived>::solveTriangular(const MatrixBase<OtherDerived>& other) const
{
typename OtherDerived::Eval res(other);
- solveTriangularInPlace(&res);
+ solveTriangularInPlace(res);
return res;
}