aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2008-07-26 20:40:29 +0000
committerGravatar Gael Guennebaud <g.gael@free.fr>2008-07-26 20:40:29 +0000
commite77ccf29288a8536e11dc5ea4fadcf775e8a2b8a (patch)
tree23710b6b882d17c2939562c700c1299af0f26ff3 /Eigen
parent2940617e6f0abaf1d09b3f054687a0adac788505 (diff)
* Rewrite the triangular solver so that we can take advantage of our efficient matrix-vector products:
=> up to 6 times faster ! * Added DirectAccessBit to Part * Added an exemple of a cwise operator * Renamed perpendicular() => someOrthogonal() (geometry module) * Fix a weired bug in ei_constant_functor: the default copy constructor did not copy the imaginary part when the single member of the class is a complex...
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/Core2
-rw-r--r--Eigen/src/Core/CwiseBinaryOp.h5
-rw-r--r--Eigen/src/Core/CwiseNullaryOp.h2
-rw-r--r--Eigen/src/Core/Functors.h1
-rwxr-xr-xEigen/src/Core/InverseProduct.h191
-rw-r--r--Eigen/src/Core/Part.h7
-rw-r--r--Eigen/src/Geometry/OrthoMethods.h (renamed from Eigen/src/Geometry/Cross.h)2
-rw-r--r--Eigen/src/Sparse/TriangularSolver.h15
8 files changed, 183 insertions, 42 deletions
diff --git a/Eigen/Core b/Eigen/Core
index a233222e0..af3b4de98 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -40,10 +40,10 @@ namespace Eigen {
#include "src/Core/CwiseBinaryOp.h"
#include "src/Core/CwiseUnaryOp.h"
#include "src/Core/CwiseNullaryOp.h"
-#include "src/Core/InverseProduct.h"
#include "src/Core/Dot.h"
#include "src/Core/Product.h"
#include "src/Core/DiagonalProduct.h"
+#include "src/Core/InverseProduct.h"
#include "src/Core/Block.h"
#include "src/Core/Minor.h"
#include "src/Core/Transpose.h"
diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h
index 8df4097c3..ac5440c22 100644
--- a/Eigen/src/Core/CwiseBinaryOp.h
+++ b/Eigen/src/Core/CwiseBinaryOp.h
@@ -177,6 +177,11 @@ MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)
/** \returns an expression of the Schur product (coefficient wise product) of *this and \a other
*
+ * \addexample CwiseProduct \label How to perform a component wise product of two matrices.
+ *
+ * Example: \include Cwise_product.cpp
+ * Output: \verbinclude Cwise_product.out
+ *
* \sa class CwiseBinaryOp
*/
template<typename ExpressionType>
diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h
index 343be79a1..a7957a426 100644
--- a/Eigen/src/Core/CwiseNullaryOp.h
+++ b/Eigen/src/Core/CwiseNullaryOp.h
@@ -450,7 +450,7 @@ Derived& MatrixBase<Derived>::setOnes()
* This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
* it is redundant to pass \a rows and \a cols as arguments, so Identity() should be used
* instead.
- *
+ *
* \addexample Identity \label How to get an identity matrix
*
* Example: \include MatrixBase_identity_int_int.cpp
diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h
index cb14585f6..cfbc7affb 100644
--- a/Eigen/src/Core/Functors.h
+++ b/Eigen/src/Core/Functors.h
@@ -318,6 +318,7 @@ struct ei_scalar_constant_op<Scalar,true> {
};
template<typename Scalar>
struct ei_scalar_constant_op<Scalar,false> {
+ inline ei_scalar_constant_op(const ei_scalar_constant_op& other) : m_other(other.m_other) { }
inline ei_scalar_constant_op(const Scalar& other) : m_other(other) { }
inline const Scalar operator() (int, int = 0) const { return m_other; }
const Scalar m_other;
diff --git a/Eigen/src/Core/InverseProduct.h b/Eigen/src/Core/InverseProduct.h
index 0ee54a3fb..87f426af5 100755
--- a/Eigen/src/Core/InverseProduct.h
+++ b/Eigen/src/Core/InverseProduct.h
@@ -25,51 +25,186 @@
#ifndef EIGEN_INVERSEPRODUCT_H
#define EIGEN_INVERSEPRODUCT_H
+template<typename Lhs, typename Rhs,
+ int TriangularPart = (int(Lhs::Flags) & LowerTriangularBit)
+ ? Lower
+ : (int(Lhs::Flags) & UpperTriangularBit)
+ ? Upper
+ : -1,
+ int StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor
+ >
+struct ei_trisolve_selector;
-/** "in-place" version of MatrixBase::inverseProduct() where the result is written in \a other
- *
- * \sa inverseProduct()
- */
-template<typename Derived>
-template<typename OtherDerived>
-void MatrixBase<Derived>::inverseProductInPlace(MatrixBase<OtherDerived>& other) const
+// forward substitution, row-major
+template<typename Lhs, typename Rhs>
+struct ei_trisolve_selector<Lhs,Rhs,Lower,RowMajor>
{
- ei_assert(cols() == other.rows());
- ei_assert(!(Flags & ZeroDiagBit));
- ei_assert(Flags & (UpperTriangularBit|LowerTriangularBit));
-
- for(int c=0 ; c<other.cols() ; ++c)
+ typedef typename Rhs::Scalar Scalar;
+ static void run(const Lhs& lhs, Rhs& other)
{
- if(Flags & LowerTriangularBit)
+ for(int c=0 ; c<other.cols() ; ++c)
{
- // forward substitution
- if(!(Flags & UnitDiagBit))
- other.coeffRef(0,c) = other.coeff(0,c)/coeff(0, 0);
- for(int i=1; i<rows(); ++i)
+ if(!(Lhs::Flags & UnitDiagBit))
+ other.coeffRef(0,c) = other.coeff(0,c)/lhs.coeff(0, 0);
+ for(int i=1; i<lhs.rows(); ++i)
{
- Scalar tmp = other.coeff(i,c) - ((this->row(i).start(i)) * other.col(c).start(i)).coeff(0,0);
- if (Flags & UnitDiagBit)
+ Scalar tmp = other.coeff(i,c) - ((lhs.row(i).start(i)) * other.col(c).start(i)).coeff(0,0);
+ if (Lhs::Flags & UnitDiagBit)
other.coeffRef(i,c) = tmp;
else
- other.coeffRef(i,c) = tmp/coeff(i,i);
+ other.coeffRef(i,c) = tmp/lhs.coeff(i,i);
}
}
- else
+ }
+};
+
+// backward substitution, row-major
+template<typename Lhs, typename Rhs>
+struct ei_trisolve_selector<Lhs,Rhs,Upper,RowMajor>
+{
+ typedef typename Rhs::Scalar Scalar;
+ static void run(const Lhs& lhs, Rhs& other)
+ {
+ const int size = lhs.cols();
+ for(int c=0 ; c<other.cols() ; ++c)
{
- // backward substitution
- if(!(Flags & UnitDiagBit))
- other.coeffRef(cols()-1,c) = other.coeff(cols()-1, c)/coeff(rows()-1, cols()-1);
- for(int i=rows()-2 ; i>=0 ; --i)
+ if(!(Lhs::Flags & UnitDiagBit))
+ other.coeffRef(size-1,c) = other.coeff(size-1, c)/lhs.coeff(size-1, size-1);
+ for(int i=size-2 ; i>=0 ; --i)
{
Scalar tmp = other.coeff(i,c)
- - ((this->row(i).end(cols()-i-1)) * other.col(c).end(cols()-i-1)).coeff(0,0);
- if (Flags & UnitDiagBit)
+ - ((lhs.row(i).end(size-i-1)) * other.col(c).end(size-i-1)).coeff(0,0);
+ if (Lhs::Flags & UnitDiagBit)
other.coeffRef(i,c) = tmp;
else
- other.coeffRef(i,c) = tmp/coeff(i,i);
+ other.coeffRef(i,c) = tmp/lhs.coeff(i,i);
}
}
}
+};
+
+// forward substitution, col-major
+template<typename Lhs, typename Rhs>
+struct ei_trisolve_selector<Lhs,Rhs,Lower,ColMajor>
+{
+ typedef typename Rhs::Scalar Scalar;
+ typedef typename ei_packet_traits<Scalar>::type Packet;
+ enum {PacketSize = ei_packet_traits<Scalar>::size};
+
+ static void run(const Lhs& lhs, Rhs& other)
+ {
+ const int size = lhs.cols();
+ for(int c=0 ; c<other.cols() ; ++c)
+ {
+ /* let's perform the inverse product per block of 4 columns such that we perfectly match
+ * our optimized matrix * vector product.
+ */
+ int blockyEnd = (std::max(size-5,0)/4)*4;
+ for(int i=0; i<blockyEnd;)
+ {
+ int startBlock = i;
+ int endBlock = startBlock+4;
+ Matrix<Scalar,4,1> btmp;
+ /* Let's process the 4x4 sub-matrix as usual.
+ * btmp stores the diagonal coefficients used to update the remaining part of the result.
+ */
+ for (;i<endBlock;++i)
+ {
+ if(!(Lhs::Flags & UnitDiagBit))
+ other.coeffRef(i,c) /= lhs.coeff(i,i);
+ int remainingSize = endBlock-i-1;
+ if (remainingSize>0)
+ other.col(c).block(i+1,remainingSize) -= other.coeffRef(i,c) * Block<Lhs,Dynamic,1>(lhs, i+1, i, remainingSize, 1);
+ btmp.coeffRef(i-startBlock) = -other.coeffRef(i,c);
+ }
+
+ /* Now we can efficiently update the remaining part of the result as a matrix * vector product.
+ * NOTE in order to reduce both compilation time and binary size, let's directly call
+ * the fast product implementation. It is equivalent to the following code:
+ * other.col(c).end(size-endBlock) += (lhs.block(endBlock, startBlock, size-endBlock, endBlock-startBlock)
+ * * other.col(c).block(startBlock,endBlock-startBlock)).lazy();
+ */
+ ei_cache_friendly_product_colmajor_times_vector(
+ size-endBlock, &(lhs.const_cast_derived().coeffRef(endBlock,startBlock)), lhs.stride(),
+ btmp, &(other.coeffRef(endBlock,c)));
+ }
+
+ /* Now we have to process the remaining part as usual */
+ int i;
+ for(i=blockyEnd; i<size-1; ++i)
+ {
+ if(!(Lhs::Flags & UnitDiagBit))
+ other.coeffRef(i,c) /= lhs.coeff(i,i);
+ // NOTE we cannot use lhs.col(i).end(size-i-1) because Part::coeffRef gets called by .col() to
+ // get the address of the start of the row
+ other.col(c).end(size-i-1) -= other.coeffRef(i,c) * Block<Lhs,Dynamic,1>(lhs, i+1,i, size-i-1,1);
+ }
+ if(!(Lhs::Flags & UnitDiagBit))
+ other.coeffRef(i,c) /= lhs.coeff(i,i);
+ }
+ }
+};
+
+// backward substitution, col-major
+template<typename Lhs, typename Rhs>
+struct ei_trisolve_selector<Lhs,Rhs,Upper,ColMajor>
+{
+ typedef typename Rhs::Scalar Scalar;
+ static void run(const Lhs& lhs, Rhs& other)
+ {
+ const int size = lhs.cols();
+ for(int c=0 ; c<other.cols() ; ++c)
+ {
+ int blockyEnd = size-1 - (std::max(size-5,0)/4)*4;
+ for(int i=size-1; i>blockyEnd;)
+ {
+ int startBlock = i;
+ int endBlock = startBlock-4;
+ Matrix<Scalar,4,1> btmp;
+ /* Let's process the 4x4 sub-matrix as usual.
+ * btmp stores the diagonal coefficients used to update the remaining part of the result.
+ */
+ for (; i>endBlock; --i)
+ {
+ if(!(Lhs::Flags & UnitDiagBit))
+ other.coeffRef(i,c) /= lhs.coeff(i,i);
+ int remainingSize = i-endBlock-1;
+ if (remainingSize>0)
+ other.col(c).block(endBlock+1,remainingSize) -= other.coeffRef(i,c) * Block<Lhs,Dynamic,1>(lhs, endBlock+1, i, remainingSize, 1);
+ btmp.coeffRef(remainingSize) = -other.coeffRef(i,c);
+ }
+
+ ei_cache_friendly_product_colmajor_times_vector(
+ endBlock+1, &(lhs.const_cast_derived().coeffRef(0,endBlock+1)), lhs.stride(),
+ btmp, &(other.coeffRef(0,c)));
+ }
+
+ for(int i=blockyEnd; i>0; --i)
+ {
+ if(!(Lhs::Flags & UnitDiagBit))
+ other.coeffRef(i,c) /= lhs.coeff(i,i);
+ other.col(c).start(i) -= other.coeffRef(i,c) * Block<Lhs,Dynamic,1>(lhs, 0,i, i, 1);
+ }
+ if(!(Lhs::Flags & UnitDiagBit))
+ other.coeffRef(0,c) /= lhs.coeff(0,0);
+ }
+ }
+};
+
+/** "in-place" version of MatrixBase::inverseProduct() where the result is written in \a other
+ *
+ * \sa inverseProduct()
+ */
+template<typename Derived>
+template<typename OtherDerived>
+void MatrixBase<Derived>::inverseProductInPlace(MatrixBase<OtherDerived>& other) const
+{
+ ei_assert(derived().cols() == derived().rows());
+ ei_assert(derived().cols() == other.rows());
+ ei_assert(!(Flags & ZeroDiagBit));
+ ei_assert(Flags & (UpperTriangularBit|LowerTriangularBit));
+
+ ei_trisolve_selector<Derived, OtherDerived>::run(derived(), other.derived());
}
/** \returns the product of the inverse of \c *this with \a other, \a *this being triangular.
diff --git a/Eigen/src/Core/Part.h b/Eigen/src/Core/Part.h
index cd349855d..1a7c7f82a 100644
--- a/Eigen/src/Core/Part.h
+++ b/Eigen/src/Core/Part.h
@@ -53,7 +53,7 @@ struct ei_traits<Part<MatrixType, Mode> >
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
- Flags = (_MatrixTypeNested::Flags & ~(PacketAccessBit | LinearAccessBit | DirectAccessBit)) | Mode,
+ Flags = (_MatrixTypeNested::Flags & (HereditaryBits | DirectAccessBit) & (~(PacketAccessBit | LinearAccessBit))) | Mode,
CoeffReadCost = _MatrixTypeNested::CoeffReadCost
};
};
@@ -84,6 +84,7 @@ template<typename MatrixType, unsigned int Mode> class Part
inline int rows() const { return m_matrix.rows(); }
inline int cols() const { return m_matrix.cols(); }
+ inline int stride() const { return m_matrix.stride(); }
inline Scalar coeff(int row, int col) const
{
@@ -97,7 +98,7 @@ template<typename MatrixType, unsigned int Mode> class Part
return m_matrix.coeff(row, col);
}
- inline Scalar coeffRef(int row, int col) const
+ inline Scalar& coeffRef(int row, int col)
{
EIGEN_STATIC_ASSERT(!(Flags & UnitDiagBit), writting_to_triangular_part_with_unit_diag_is_not_supported);
EIGEN_STATIC_ASSERT(!(Flags & SelfAdjointBit), default_writting_to_selfadjoint_not_supported);
@@ -105,7 +106,7 @@ template<typename MatrixType, unsigned int Mode> class Part
|| (Mode==Lower && col<=row)
|| (Mode==StrictlyUpper && col>row)
|| (Mode==StrictlyLower && col<row));
- return m_matrix.coeffRef(row, col);
+ return m_matrix.const_cast_derived().coeffRef(row, col);
}
/** discard any writes to a row */
diff --git a/Eigen/src/Geometry/Cross.h b/Eigen/src/Geometry/OrthoMethods.h
index a9d9493bc..5955ce223 100644
--- a/Eigen/src/Geometry/Cross.h
+++ b/Eigen/src/Geometry/OrthoMethods.h
@@ -101,7 +101,7 @@ struct ei_perpendicular_selector<Derived,2>
*/
template<typename Derived>
typename ei_eval<Derived>::type
-MatrixBase<Derived>::perpendicular() const
+MatrixBase<Derived>::someOrthogonal() const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
return ei_perpendicular_selector<Derived>::run(derived());
diff --git a/Eigen/src/Sparse/TriangularSolver.h b/Eigen/src/Sparse/TriangularSolver.h
index 8634e114c..41361a471 100644
--- a/Eigen/src/Sparse/TriangularSolver.h
+++ b/Eigen/src/Sparse/TriangularSolver.h
@@ -33,11 +33,11 @@ template<typename Lhs, typename Rhs,
: -1,
int StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor
>
-struct ei_inverse_product_selector;
+struct ei_sparse_trisolve_selector;
// forward substitution, row-major
template<typename Lhs, typename Rhs>
-struct ei_inverse_product_selector<Lhs,Rhs,Lower,RowMajor>
+struct ei_sparse_trisolve_selector<Lhs,Rhs,Lower,RowMajor>
{
typedef typename Rhs::Scalar Scalar;
static void run(const Lhs& lhs, const Rhs& rhs, Rhs& res)
@@ -69,7 +69,7 @@ struct ei_inverse_product_selector<Lhs,Rhs,Lower,RowMajor>
// backward substitution, row-major
template<typename Lhs, typename Rhs>
-struct ei_inverse_product_selector<Lhs,Rhs,Upper,RowMajor>
+struct ei_sparse_trisolve_selector<Lhs,Rhs,Upper,RowMajor>
{
typedef typename Rhs::Scalar Scalar;
static void run(const Lhs& lhs, const Rhs& rhs, Rhs& res)
@@ -100,7 +100,7 @@ struct ei_inverse_product_selector<Lhs,Rhs,Upper,RowMajor>
// forward substitution, col-major
template<typename Lhs, typename Rhs>
-struct ei_inverse_product_selector<Lhs,Rhs,Lower,ColMajor>
+struct ei_sparse_trisolve_selector<Lhs,Rhs,Lower,ColMajor>
{
typedef typename Rhs::Scalar Scalar;
static void run(const Lhs& lhs, const Rhs& rhs, Rhs& res)
@@ -127,7 +127,7 @@ struct ei_inverse_product_selector<Lhs,Rhs,Lower,ColMajor>
// backward substitution, col-major
template<typename Lhs, typename Rhs>
-struct ei_inverse_product_selector<Lhs,Rhs,Upper,ColMajor>
+struct ei_sparse_trisolve_selector<Lhs,Rhs,Upper,ColMajor>
{
typedef typename Rhs::Scalar Scalar;
static void run(const Lhs& lhs, const Rhs& rhs, Rhs& res)
@@ -155,15 +155,14 @@ struct ei_inverse_product_selector<Lhs,Rhs,Upper,ColMajor>
template<typename Derived>
template<typename OtherDerived>
-OtherDerived
-SparseMatrixBase<Derived>::inverseProduct(const MatrixBase<OtherDerived>& other) const
+OtherDerived SparseMatrixBase<Derived>::inverseProduct(const MatrixBase<OtherDerived>& other) const
{
ei_assert(derived().cols() == other.rows());
ei_assert(!(Flags & ZeroDiagBit));
ei_assert(Flags & (UpperTriangularBit|LowerTriangularBit));
OtherDerived res(other.rows(), other.cols());
- ei_inverse_product_selector<Derived, OtherDerived>::run(derived(), other.derived(), res);
+ ei_sparse_trisolve_selector<Derived, OtherDerived>::run(derived(), other.derived(), res);
return res;
}