aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2008-05-28 04:38:16 +0000
committerGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2008-05-28 04:38:16 +0000
commitaebecae510dd29f5573d3f86dfed526e6d8be9a8 (patch)
treeb9bb9d2d15011409fe5f66e1fbb56f1998f5cc91
parent559233c73e86474d67f5730f9b20e46ccea93b7f (diff)
* find the proper way of nesting the expression in Flagged:
finally that's more subtle than just using ei_nested, because when flagging with NestByValueBit we want to store the expression by value already, regardless of whether it already had the NestByValueBit set. * rename temporary() ----> nestByValue() * move the old Product.h to disabled/, replace by what was ProductWIP.h * tweak -O and -g flags for tests and examples * reorder the tests -- basic things go first * simplifications, e.g. in many methoeds return derived() and count on implicit casting to the actual return type. * strip some not-really-useful stuff from the heaviest tests
-rw-r--r--CMakeLists.txt2
-rw-r--r--Eigen/Core5
-rw-r--r--Eigen/src/Core/CwiseUnaryOp.h22
-rw-r--r--Eigen/src/Core/DiagonalMatrix.h2
-rw-r--r--Eigen/src/Core/Flagged.h10
-rw-r--r--Eigen/src/Core/MatrixBase.h2
-rw-r--r--Eigen/src/Core/Product.h322
-rw-r--r--Eigen/src/Core/Transpose.h6
-rw-r--r--Eigen/src/Core/util/Constants.h6
-rw-r--r--disabled/Product.h (renamed from Eigen/src/Core/ProductWIP.h)324
-rw-r--r--doc/CMakeLists.txt6
-rw-r--r--test/CMakeLists.txt14
-rw-r--r--test/adjoint.cpp14
-rw-r--r--test/linearstructure.cpp4
-rw-r--r--test/product.cpp20
15 files changed, 377 insertions, 382 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b533e3b66..95a8a2311 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7,7 +7,7 @@ SET(CMAKE_INCLUDE_CURRENT_DIR ON)
IF(CMAKE_COMPILER_IS_GNUCXX)
IF(CMAKE_SYSTEM_NAME MATCHES Linux)
- SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O1 -g -pedantic -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fno-exceptions -fno-check-new -fno-common -fstrict-aliasing")
+ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fno-exceptions -fno-check-new -fno-common -fstrict-aliasing")
IF(TEST_OPENMP)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
MESSAGE("Enabling OpenMP in tests/examples")
diff --git a/Eigen/Core b/Eigen/Core
index 3e1b5184d..c81341103 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -35,7 +35,9 @@ namespace Eigen {
#include "src/Core/CwiseBinaryOp.h"
#include "src/Core/CwiseUnaryOp.h"
#include "src/Core/CwiseNullaryOp.h"
-#include "src/Core/ProductWIP.h"
+#include "src/Core/InverseProduct.h"
+#include "src/Core/CacheFriendlyProduct.h"
+#include "src/Core/Product.h"
#include "src/Core/Block.h"
#include "src/Core/Minor.h"
#include "src/Core/Transpose.h"
@@ -51,7 +53,6 @@ namespace Eigen {
#include "src/Core/CommaInitializer.h"
#include "src/Core/Extract.h"
#include "src/Core/Part.h"
-#include "src/Core/InverseProduct.h"
} // namespace Eigen
diff --git a/Eigen/src/Core/CwiseUnaryOp.h b/Eigen/src/Core/CwiseUnaryOp.h
index 26197b369..8d2737e12 100644
--- a/Eigen/src/Core/CwiseUnaryOp.h
+++ b/Eigen/src/Core/CwiseUnaryOp.h
@@ -118,7 +118,7 @@ template<typename Derived>
inline const CwiseUnaryOp<ei_scalar_opposite_op<typename ei_traits<Derived>::Scalar>,Derived>
MatrixBase<Derived>::operator-() const
{
- return CwiseUnaryOp<ei_scalar_opposite_op<Scalar>, Derived>(derived());
+ return derived();
}
/** \returns an expression of the coefficient-wise absolute value of \c *this
@@ -127,7 +127,7 @@ template<typename Derived>
inline const CwiseUnaryOp<ei_scalar_abs_op<typename ei_traits<Derived>::Scalar>,Derived>
MatrixBase<Derived>::cwiseAbs() const
{
- return CwiseUnaryOp<ei_scalar_abs_op<Scalar>,Derived>(derived());
+ return derived();
}
/** \returns an expression of the coefficient-wise squared absolute value of \c *this
@@ -136,7 +136,7 @@ template<typename Derived>
inline const CwiseUnaryOp<ei_scalar_abs2_op<typename ei_traits<Derived>::Scalar>,Derived>
MatrixBase<Derived>::cwiseAbs2() const
{
- return CwiseUnaryOp<ei_scalar_abs2_op<Scalar>,Derived>(derived());
+ return derived();
}
/** \returns an expression of the complex conjugate of *this.
@@ -146,7 +146,7 @@ template<typename Derived>
inline const CwiseUnaryOp<ei_scalar_conjugate_op<typename ei_traits<Derived>::Scalar>, Derived>
MatrixBase<Derived>::conjugate() const
{
- return CwiseUnaryOp<ei_scalar_conjugate_op<Scalar>, Derived>(derived());
+ return derived();
}
/** \returns an expression of *this with the \a Scalar type casted to
@@ -161,7 +161,7 @@ template<typename NewType>
inline const CwiseUnaryOp<ei_scalar_cast_op<typename ei_traits<Derived>::Scalar, NewType>, Derived>
MatrixBase<Derived>::cast() const
{
- return CwiseUnaryOp<ei_scalar_cast_op<Scalar, NewType>, Derived>(derived());
+ return derived();
}
/** \relates MatrixBase */
@@ -201,7 +201,7 @@ template<typename Derived>
inline const CwiseUnaryOp<ei_scalar_sqrt_op<typename ei_traits<Derived>::Scalar>, Derived>
MatrixBase<Derived>::cwiseSqrt() const
{
- return CwiseUnaryOp<ei_scalar_sqrt_op<Scalar>, Derived>(derived());
+ return derived();
}
/** \returns an expression of the coefficient-wise exponential of *this. */
@@ -209,7 +209,7 @@ template<typename Derived>
inline const CwiseUnaryOp<ei_scalar_exp_op<typename ei_traits<Derived>::Scalar>, Derived>
MatrixBase<Derived>::cwiseExp() const
{
- return CwiseUnaryOp<ei_scalar_exp_op<Scalar>, Derived>(derived());
+ return derived();
}
/** \returns an expression of the coefficient-wise logarithm of *this. */
@@ -217,7 +217,7 @@ template<typename Derived>
inline const CwiseUnaryOp<ei_scalar_log_op<typename ei_traits<Derived>::Scalar>, Derived>
MatrixBase<Derived>::cwiseLog() const
{
- return CwiseUnaryOp<ei_scalar_log_op<Scalar>, Derived>(derived());
+ return derived();
}
/** \returns an expression of the coefficient-wise cosine of *this. */
@@ -225,7 +225,7 @@ template<typename Derived>
inline const CwiseUnaryOp<ei_scalar_cos_op<typename ei_traits<Derived>::Scalar>, Derived>
MatrixBase<Derived>::cwiseCos() const
{
- return CwiseUnaryOp<ei_scalar_cos_op<Scalar>, Derived>(derived());
+ return derived();
}
/** \returns an expression of the coefficient-wise sine of *this. */
@@ -233,10 +233,10 @@ template<typename Derived>
inline const CwiseUnaryOp<ei_scalar_sin_op<typename ei_traits<Derived>::Scalar>, Derived>
MatrixBase<Derived>::cwiseSin() const
{
- return CwiseUnaryOp<ei_scalar_sin_op<Scalar>, Derived>(derived());
+ return derived();
}
-/** \relates MatrixBase */
+/** \returns an expression of the coefficient-wise power of *this to the given exponent. */
template<typename Derived>
inline const CwiseUnaryOp<ei_scalar_pow_op<typename ei_traits<Derived>::Scalar>, Derived>
MatrixBase<Derived>::cwisePow(const Scalar& exponent) const
diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h
index bd420511f..0581c669c 100644
--- a/Eigen/src/Core/DiagonalMatrix.h
+++ b/Eigen/src/Core/DiagonalMatrix.h
@@ -95,7 +95,7 @@ template<typename Derived>
inline const DiagonalMatrix<Derived>
MatrixBase<Derived>::asDiagonal() const
{
- return DiagonalMatrix<Derived>(derived());
+ return derived();
}
/** \returns true if *this is approximately equal to a diagonal matrix,
diff --git a/Eigen/src/Core/Flagged.h b/Eigen/src/Core/Flagged.h
index 9167c8a97..f287abee4 100644
--- a/Eigen/src/Core/Flagged.h
+++ b/Eigen/src/Core/Flagged.h
@@ -33,7 +33,7 @@
* \param Added the flags added to the expression
* \param Removed the flags removed from the expression (has priority over Added).
*
- * This class represents an expression whose flags have been modified
+ * This class represents an expression whose flags have been modified.
* It is the return type of MatrixBase::flagged()
* and most of the time this is the only way it is used.
*
@@ -94,7 +94,11 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas
}
protected:
- const ExpressionType m_matrix;
+ const typename ei_meta_if<
+ Added & ~Removed & NestByValueBit,
+ ExpressionType,
+ typename ExpressionType::Nested
+ >::ret m_matrix;
};
/** \returns an expression of *this with added flags
@@ -121,7 +125,7 @@ MatrixBase<Derived>::lazy() const
*/
template<typename Derived>
inline const Flagged<Derived, NestByValueBit, 0>
-MatrixBase<Derived>::temporary() const
+MatrixBase<Derived>::nestByValue() const
{
return derived();
}
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index 18525a5d1..1a634ce37 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -452,7 +452,7 @@ template<typename Derived> class MatrixBase
template<unsigned int Added>
const Flagged<Derived, Added, 0> marked() const;
const Flagged<Derived, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit> lazy() const;
- const Flagged<Derived, NestByValueBit, 0> temporary() const;
+ const Flagged<Derived, NestByValueBit, 0> nestByValue() const;
/** \returns number of elements to skip to pass from one row (resp. column) to another
* for a row-major (resp. column-major) matrix.
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index 5d3e99281..15867d704 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -41,7 +41,7 @@ template<int Size, typename Lhs, typename Rhs>
struct ei_product_unroller<0, Size, Lhs, Rhs>
{
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs,
- typename Lhs::Scalar &res)
+ typename Lhs::Scalar &res)
{
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
}
@@ -60,12 +60,6 @@ struct ei_product_unroller<Index, 0, Lhs, Rhs>
inline static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {}
};
-template<typename Lhs, typename Rhs>
-struct ei_product_unroller<0, Dynamic, Lhs, Rhs>
-{
- static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {}
-};
-
template<bool RowMajor, int Index, int Size, typename Lhs, typename Rhs, typename PacketScalar>
struct ei_packet_product_unroller;
@@ -119,12 +113,6 @@ struct ei_packet_product_unroller<false, Index, Dynamic, Lhs, Rhs, PacketScalar>
inline static void run(int, int, const Lhs&, const Rhs&, PacketScalar&) {}
};
-template<typename Lhs, typename Rhs, typename PacketScalar>
-struct ei_packet_product_unroller<false, 0, Dynamic, Lhs, Rhs, PacketScalar>
-{
- static void run(int, int, const Lhs&, const Rhs&, PacketScalar&) {}
-};
-
template<typename Product, bool RowMajor = true> struct ProductPacketCoeffImpl {
inline static typename Product::PacketScalar execute(const Product& product, int row, int col)
{ return product._packetCoeffRowMajor(row,col); }
@@ -153,18 +141,74 @@ template<typename Lhs, typename Rhs> struct ei_product_eval_mode
{
enum{ value = Lhs::MaxRowsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
&& Rhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
- && (!( (Lhs::Flags&RowMajorBit) && ((Rhs::Flags&RowMajorBit) ^ RowMajorBit)))
+ && Lhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
? CacheFriendlyProduct : NormalProduct };
};
+template<typename T> class ei_product_eval_to_column_major
+{
+ typedef typename ei_traits<T>::Scalar _Scalar;
+ enum {_MaxRows = ei_traits<T>::MaxRowsAtCompileTime,
+ _MaxCols = ei_traits<T>::MaxColsAtCompileTime,
+ _Flags = ei_traits<T>::Flags
+ };
+
+ public:
+ typedef Matrix<_Scalar,
+ ei_traits<T>::RowsAtCompileTime,
+ ei_traits<T>::ColsAtCompileTime,
+ ei_corrected_matrix_flags<_Scalar, ei_size_at_compile_time<_MaxRows,_MaxCols>::ret, _Flags>::ret & ~RowMajorBit,
+ ei_traits<T>::MaxRowsAtCompileTime,
+ ei_traits<T>::MaxColsAtCompileTime> type;
+};
+
+template<typename T, int n=1> struct ei_product_nested_rhs
+{
+ typedef typename ei_meta_if<
+ (ei_traits<T>::Flags & NestByValueBit) && (!(ei_traits<T>::Flags & RowMajorBit)) && (int(ei_traits<T>::Flags) & DirectAccessBit),
+ T,
+ typename ei_meta_if<
+ ((ei_traits<T>::Flags & EvalBeforeNestingBit)
+ || (ei_traits<T>::Flags & RowMajorBit)
+ || (!(ei_traits<T>::Flags & DirectAccessBit))
+ || (n+1) * (NumTraits<typename ei_traits<T>::Scalar>::ReadCost) < (n-1) * T::CoeffReadCost),
+ typename ei_product_eval_to_column_major<T>::type,
+ const T&
+ >::ret
+ >::ret type;
+};
+
+template<typename T, int n=1> struct ei_product_nested_lhs
+{
+ typedef typename ei_meta_if<
+ ei_traits<T>::Flags & NestByValueBit && (int(ei_traits<T>::Flags) & DirectAccessBit),
+ T,
+ typename ei_meta_if<
+ int(ei_traits<T>::Flags) & EvalBeforeNestingBit
+ || (!(int(ei_traits<T>::Flags) & DirectAccessBit))
+ || (n+1) * int(NumTraits<typename ei_traits<T>::Scalar>::ReadCost) < (n-1) * int(T::CoeffReadCost),
+ typename ei_eval<T>::type,
+ const T&
+ >::ret
+ >::ret type;
+};
+
template<typename Lhs, typename Rhs, int EvalMode>
struct ei_traits<Product<Lhs, Rhs, EvalMode> >
{
typedef typename Lhs::Scalar Scalar;
- typedef typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
- typedef typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
- typedef typename ei_unref<LhsNested>::type _LhsNested;
- typedef typename ei_unref<RhsNested>::type _RhsNested;
+ // the cache friendly product evals lhs once only
+ // FIXME what to do if we chose to dynamically call the normal product from the cache friendly one for small matrices ?
+ typedef typename ei_meta_if<EvalMode==CacheFriendlyProduct,
+ typename ei_product_nested_lhs<Lhs,1>::type,
+ typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type>::ret LhsNested;
+
+ // NOTE that rhs must be ColumnMajor, so we might need a special nested type calculation
+ typedef typename ei_meta_if<EvalMode==CacheFriendlyProduct,
+ typename ei_product_nested_rhs<Rhs,Lhs::RowsAtCompileTime>::type,
+ typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type>::ret RhsNested;
+ typedef typename ei_unconst<typename ei_unref<LhsNested>::type>::type _LhsNested;
+ typedef typename ei_unconst<typename ei_unref<RhsNested>::type>::type _RhsNested;
enum {
LhsCoeffReadCost = _LhsNested::CoeffReadCost,
RhsCoeffReadCost = _RhsNested::CoeffReadCost,
@@ -174,6 +218,8 @@ struct ei_traits<Product<Lhs, Rhs, EvalMode> >
ColsAtCompileTime = Rhs::ColsAtCompileTime,
MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime,
MaxColsAtCompileTime = Rhs::MaxColsAtCompileTime,
+ // the vectorization flags are only used by the normal product,
+ // the other one is always vectorized !
_RhsVectorizable = (RhsFlags & RowMajorBit) && (RhsFlags & VectorizableBit) && (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
_LhsVectorizable = (!(LhsFlags & RowMajorBit)) && (LhsFlags & VectorizableBit) && (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
_Vectorizable = (_LhsVectorizable || _RhsVectorizable) ? 1 : 0,
@@ -207,6 +253,10 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
typedef typename ei_traits<Product>::_LhsNested _LhsNested;
typedef typename ei_traits<Product>::_RhsNested _RhsNested;
+ enum {
+ PacketSize = ei_packet_traits<Scalar>::size
+ };
+
inline Product(const Lhs& lhs, const Rhs& rhs)
: m_lhs(lhs), m_rhs(rhs)
{
@@ -214,12 +264,12 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
}
/** \internal */
- template<typename DestDerived, int AlignedMode>
- void _cacheOptimalEval(DestDerived& res, ei_meta_false) const;
- #ifdef EIGEN_VECTORIZE
- template<typename DestDerived, int AlignedMode>
- void _cacheOptimalEval(DestDerived& res, ei_meta_true) const;
- #endif
+ template<typename DestDerived>
+ void _cacheFriendlyEval(DestDerived& res) const;
+
+ /** \internal */
+ template<typename DestDerived>
+ void _cacheFriendlyEvalAndAdd(DestDerived& res) const;
private:
@@ -252,7 +302,7 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
if(Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT)
{
PacketScalar res;
- ei_packet_product_unroller<Flags&RowMajorBit, Lhs::ColsAtCompileTime-1,
+ ei_packet_product_unroller<Flags&RowMajorBit ? true : false, Lhs::ColsAtCompileTime-1,
Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT
? Lhs::ColsAtCompileTime : Dynamic,
_LhsNested, _RhsNested, PacketScalar>
@@ -279,16 +329,10 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
for(int i = 1; i < m_lhs.cols(); i++)
res = ei_pmadd(m_lhs.template packetCoeff<Aligned>(row, i), ei_pset1(m_rhs.coeff(i, col)), res);
return res;
-// const PacketScalar tmp[4];
-// ei_punpack(m_rhs.packetCoeff(0,col), tmp);
-//
-// return
-// ei_pmadd(m_lhs.packetCoeff(row, 0), tmp[0],
-// ei_pmadd(m_lhs.packetCoeff(row, 1), tmp[1],
-// ei_pmadd(m_lhs.packetCoeff(row, 2), tmp[2]
-// ei_pmul(m_lhs.packetCoeff(row, 3), tmp[3]))));
}
+ template<typename Lhs_, typename Rhs_, int EvalMode_, typename DestDerived_, bool DirectAccess_>
+ friend struct ei_cache_friendly_selector;
protected:
const LhsNested m_lhs;
@@ -297,9 +341,6 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
/** \returns the matrix product of \c *this and \a other.
*
- * \note This function causes an immediate evaluation. If you want to perform a matrix product
- * without immediate evaluation, call .lazy() on one of the matrices before taking the product.
- *
* \sa lazy(), operator*=(const MatrixBase&)
*/
template<typename Derived>
@@ -322,168 +363,107 @@ MatrixBase<Derived>::operator*=(const MatrixBase<OtherDerived> &other)
return *this = *this * other;
}
+/** \internal */
+template<typename Derived>
+template<typename Lhs,typename Rhs>
+inline Derived&
+MatrixBase<Derived>::operator+=(const Flagged<Product<Lhs,Rhs,CacheFriendlyProduct>, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit>& other)
+{
+ other._expression()._cacheFriendlyEvalAndAdd(const_cast_derived());
+ return derived();
+}
+
template<typename Derived>
template<typename Lhs, typename Rhs>
inline Derived& MatrixBase<Derived>::lazyAssign(const Product<Lhs,Rhs,CacheFriendlyProduct>& product)
{
- product.template _cacheOptimalEval<Derived, Aligned>(derived(),
- #ifdef EIGEN_VECTORIZE
- typename ei_meta_if<Flags & VectorizableBit, ei_meta_true, ei_meta_false>::ret()
- #else
- ei_meta_false()
- #endif
- );
+ product._cacheFriendlyEval(derived());
return derived();
}
-template<typename Lhs, typename Rhs, int EvalMode>
-template<typename DestDerived, int AlignedMode>
-void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_false) const
+template<typename Lhs, typename Rhs, int EvalMode, typename DestDerived, bool DirectAccess>
+struct ei_cache_friendly_selector
{
- res.setZero();
- const int cols4 = m_lhs.cols() & 0xfffffffC;
- if (Lhs::Flags&RowMajorBit)
+ typedef Product<Lhs,Rhs,EvalMode> Prod;
+ typedef typename Prod::_LhsNested _LhsNested;
+ typedef typename Prod::_RhsNested _RhsNested;
+ typedef typename Prod::Scalar Scalar;
+ static inline void eval(const Prod& product, DestDerived& res)
{
-// std::cout << "opt rhs\n";
- int j=0;
- for(; j<cols4; j+=4)
+ if ( product._rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+ && product._cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+ && product.m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+ )
{
- for(int k=0; k<this->rows(); ++k)
- {
- const Scalar tmp0 = m_lhs.coeff(k,j );
- const Scalar tmp1 = m_lhs.coeff(k,j+1);
- const Scalar tmp2 = m_lhs.coeff(k,j+2);
- const Scalar tmp3 = m_lhs.coeff(k,j+3);
- for (int i=0; i<this->cols(); ++i)
- res.coeffRef(k,i) += tmp0 * m_rhs.coeff(j+0,i) + tmp1 * m_rhs.coeff(j+1,i)
- + tmp2 * m_rhs.coeff(j+2,i) + tmp3 * m_rhs.coeff(j+3,i);
- }
+ res.setZero();
+ ei_cache_friendly_product<Scalar>(
+ product._rows(), product._cols(), product.m_lhs.cols(),
+ _LhsNested::Flags&RowMajorBit, &(product.m_lhs.const_cast_derived().coeffRef(0,0)), product.m_lhs.stride(),
+ _RhsNested::Flags&RowMajorBit, &(product.m_rhs.const_cast_derived().coeffRef(0,0)), product.m_rhs.stride(),
+ Prod::Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride()
+ );
}
- for(; j<m_lhs.cols(); ++j)
+ else
{
- for(int k=0; k<this->rows(); ++k)
- {
- const Scalar tmp = m_rhs.coeff(k,j);
- for (int i=0; i<this->cols(); ++i)
- res.coeffRef(k,i) += tmp * m_lhs.coeff(j,i);
- }
+ res = Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy();
}
}
- else
+
+ static inline void eval_and_add(const Prod& product, DestDerived& res)
{
-// std::cout << "opt lhs\n";
- int j = 0;
- for(; j<cols4; j+=4)
+ if ( product._rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+ && product._cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+ && product.m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+ )
{
- for(int k=0; k<this->cols(); ++k)
- {
- const Scalar tmp0 = m_rhs.coeff(j ,k);
- const Scalar tmp1 = m_rhs.coeff(j+1,k);
- const Scalar tmp2 = m_rhs.coeff(j+2,k);
- const Scalar tmp3 = m_rhs.coeff(j+3,k);
- for (int i=0; i<this->rows(); ++i)
- res.coeffRef(i,k) += tmp0 * m_lhs.coeff(i,j+0) + tmp1 * m_lhs.coeff(i,j+1)
- + tmp2 * m_lhs.coeff(i,j+2) + tmp3 * m_lhs.coeff(i,j+3);
- }
+ ei_cache_friendly_product<Scalar>(
+ product._rows(), product._cols(), product.m_lhs.cols(),
+ _LhsNested::Flags&RowMajorBit, &(product.m_lhs.const_cast_derived().coeffRef(0,0)), product.m_lhs.stride(),
+ _RhsNested::Flags&RowMajorBit, &(product.m_rhs.const_cast_derived().coeffRef(0,0)), product.m_rhs.stride(),
+ Prod::Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride()
+ );
}
- for(; j<m_lhs.cols(); ++j)
+ else
{
- for(int k=0; k<this->cols(); ++k)
- {
- const Scalar tmp = m_rhs.coeff(j,k);
- for (int i=0; i<this->rows(); ++i)
- res.coeffRef(i,k) += tmp * m_lhs.coeff(i,j);
- }
+ res += Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy();
}
}
-}
+};
-#ifdef EIGEN_VECTORIZE
-template<typename Lhs, typename Rhs, int EvalMode>
-template<typename DestDerived, int AlignedMode>
-void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_true) const
+template<typename Lhs, typename Rhs, int EvalMode, typename DestDerived>
+struct ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived,false>
{
-
- if (((Lhs::Flags&RowMajorBit) && (_cols() % ei_packet_traits<Scalar>::size != 0))
- || (_rows() % ei_packet_traits<Scalar>::size != 0))
+ typedef Product<Lhs,Rhs,EvalMode> Prod;
+ typedef typename Prod::_LhsNested _LhsNested;
+ typedef typename Prod::_RhsNested _RhsNested;
+ typedef typename Prod::Scalar Scalar;
+ static inline void eval(const Prod& product, DestDerived& res)
{
- return _cacheOptimalEval<DestDerived, AlignedMode>(res, ei_meta_false());
+ res = Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy();
}
- res.setZero();
- const int cols4 = m_lhs.cols() & 0xfffffffC;
- if (Lhs::Flags&RowMajorBit)
- {
-// std::cout << "packet rhs\n";
- int j=0;
- for(; j<cols4; j+=4)
- {
- for(int k=0; k<this->rows(); k++)
- {
- const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_lhs.coeff(k,j+0));
- const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_lhs.coeff(k,j+1));
- const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_lhs.coeff(k,j+2));
- const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_lhs.coeff(k,j+3));
- for (int i=0; i<this->cols(); i+=ei_packet_traits<Scalar>::size)
- {
- res.template writePacketCoeff<AlignedMode>(k,i,
- ei_pmadd(tmp0, m_rhs.template packetCoeff<AlignedMode>(j+0,i),
- ei_pmadd(tmp1, m_rhs.template packetCoeff<AlignedMode>(j+1,i),
- ei_pmadd(tmp2, m_rhs.template packetCoeff<AlignedMode>(j+2,i),
- ei_pmadd(tmp3, m_rhs.template packetCoeff<AlignedMode>(j+3,i),
- res.template packetCoeff<AlignedMode>(k,i)))))
- );
- }
- }
- }
- for(; j<m_lhs.cols(); ++j)
- {
- for(int k=0; k<this->rows(); k++)
- {
- const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_lhs.coeff(k,j));
- for (int i=0; i<this->cols(); i+=ei_packet_traits<Scalar>::size)
- res.template writePacketCoeff<AlignedMode>(k,i,
- ei_pmadd(tmp, m_rhs.template packetCoeff<AlignedMode>(j,i), res.template packetCoeff<AlignedMode>(k,i)));
- }
- }
- }
- else
+ static inline void eval_and_add(const Prod& product, DestDerived& res)
{
-// std::cout << "packet lhs\n";
- int k=0;
- for(; k<cols4; k+=4)
- {
- for(int j=0; j<this->cols(); j+=1)
- {
- const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_rhs.coeff(k+0,j));
- const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_rhs.coeff(k+1,j));
- const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_rhs.coeff(k+2,j));
- const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_rhs.coeff(k+3,j));
-
- for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size)
- {
- res.template writePacketCoeff<AlignedMode>(i,j,
- ei_pmadd(tmp0, m_lhs.template packetCoeff<AlignedMode>(i,k),
- ei_pmadd(tmp1, m_lhs.template packetCoeff<AlignedMode>(i,k+1),
- ei_pmadd(tmp2, m_lhs.template packetCoeff<AlignedMode>(i,k+2),
- ei_pmadd(tmp3, m_lhs.template packetCoeff<AlignedMode>(i,k+3),
- res.template packetCoeff<AlignedMode>(i,j)))))
- );
- }
- }
- }
- for(; k<m_lhs.cols(); ++k)
- {
- for(int j=0; j<this->cols(); j++)
- {
- const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_rhs.coeff(k,j));
- for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size)
- res.template writePacketCoeff<AlignedMode>(k,j,
- ei_pmadd(tmp, m_lhs.template packetCoeff<AlignedMode>(i,k), res.template packetCoeff<AlignedMode>(i,j)));
- }
- }
+ res += Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy();
}
+};
+
+template<typename Lhs, typename Rhs, int EvalMode>
+template<typename DestDerived>
+inline void Product<Lhs,Rhs,EvalMode>::_cacheFriendlyEval(DestDerived& res) const
+{
+ ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived,
+ _LhsNested::Flags&_RhsNested::Flags&DirectAccessBit>
+ ::eval(*this, res);
+}
+
+template<typename Lhs, typename Rhs, int EvalMode>
+template<typename DestDerived>
+inline void Product<Lhs,Rhs,EvalMode>::_cacheFriendlyEvalAndAdd(DestDerived& res) const
+{
+ ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived,
+ _LhsNested::Flags&_RhsNested::Flags&DirectAccessBit>
+ ::eval_and_add(*this, res);
}
-#endif // EIGEN_VECTORIZE
#endif // EIGEN_PRODUCT_H
diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h
index fd28f4bab..c536a4608 100644
--- a/Eigen/src/Core/Transpose.h
+++ b/Eigen/src/Core/Transpose.h
@@ -107,7 +107,7 @@ template<typename Derived>
inline Transpose<Derived>
MatrixBase<Derived>::transpose()
{
- return Transpose<Derived>(derived());
+ return derived();
}
/** This is the const version of transpose(). \sa adjoint() */
@@ -115,7 +115,7 @@ template<typename Derived>
inline const Transpose<Derived>
MatrixBase<Derived>::transpose() const
{
- return Transpose<Derived>(derived());
+ return derived();
}
/** \returns an expression of the adjoint (i.e. conjugate transpose) of *this.
@@ -130,7 +130,7 @@ inline const Transpose<
, NestByValueBit, 0> >
MatrixBase<Derived>::adjoint() const
{
- return conjugate().temporary().transpose();
+ return conjugate().nestByValue();
}
#endif // EIGEN_TRANSPOSE_H
diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h
index e599d8a3d..0e6ed4b21 100644
--- a/Eigen/src/Core/util/Constants.h
+++ b/Eigen/src/Core/util/Constants.h
@@ -53,16 +53,16 @@ const unsigned int HereditaryBits = RowMajorBit
| EvalBeforeAssigningBit
| LargeBit;
-// Possible values for the PartType parameter of part() and the ExtractType parameter of extract()
+// Possible values for the Mode parameter of part() and of extract()
const unsigned int Upper = UpperTriangularBit;
const unsigned int StrictlyUpper = UpperTriangularBit | ZeroDiagBit;
const unsigned int Lower = LowerTriangularBit;
const unsigned int StrictlyLower = LowerTriangularBit | ZeroDiagBit;
-// additional possible values for the PartType parameter of part()
+// additional possible values for the Mode parameter of part()
const unsigned int SelfAdjoint = SelfAdjointBit;
-// additional possible values for the ExtractType parameter of extract()
+// additional possible values for the Mode parameter of extract()
const unsigned int UnitUpper = UpperTriangularBit | UnitDiagBit;
const unsigned int UnitLower = LowerTriangularBit | UnitDiagBit;
diff --git a/Eigen/src/Core/ProductWIP.h b/disabled/Product.h
index d1bc86a13..5d3e99281 100644
--- a/Eigen/src/Core/ProductWIP.h
+++ b/disabled/Product.h
@@ -26,8 +26,6 @@
#ifndef EIGEN_PRODUCT_H
#define EIGEN_PRODUCT_H
-#include "CacheFriendlyProduct.h"
-
template<int Index, int Size, typename Lhs, typename Rhs>
struct ei_product_unroller
{
@@ -43,7 +41,7 @@ template<int Size, typename Lhs, typename Rhs>
struct ei_product_unroller<0, Size, Lhs, Rhs>
{
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs,
- typename Lhs::Scalar &res)
+ typename Lhs::Scalar &res)
{
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
}
@@ -62,6 +60,12 @@ struct ei_product_unroller<Index, 0, Lhs, Rhs>
inline static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {}
};
+template<typename Lhs, typename Rhs>
+struct ei_product_unroller<0, Dynamic, Lhs, Rhs>
+{
+ static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {}
+};
+
template<bool RowMajor, int Index, int Size, typename Lhs, typename Rhs, typename PacketScalar>
struct ei_packet_product_unroller;
@@ -115,6 +119,12 @@ struct ei_packet_product_unroller<false, Index, Dynamic, Lhs, Rhs, PacketScalar>
inline static void run(int, int, const Lhs&, const Rhs&, PacketScalar&) {}
};
+template<typename Lhs, typename Rhs, typename PacketScalar>
+struct ei_packet_product_unroller<false, 0, Dynamic, Lhs, Rhs, PacketScalar>
+{
+ static void run(int, int, const Lhs&, const Rhs&, PacketScalar&) {}
+};
+
template<typename Product, bool RowMajor = true> struct ProductPacketCoeffImpl {
inline static typename Product::PacketScalar execute(const Product& product, int row, int col)
{ return product._packetCoeffRowMajor(row,col); }
@@ -143,74 +153,18 @@ template<typename Lhs, typename Rhs> struct ei_product_eval_mode
{
enum{ value = Lhs::MaxRowsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
&& Rhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
- && Lhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+ && (!( (Lhs::Flags&RowMajorBit) && ((Rhs::Flags&RowMajorBit) ^ RowMajorBit)))
? CacheFriendlyProduct : NormalProduct };
};
-template<typename T> class ei_product_eval_to_column_major
-{
- typedef typename ei_traits<T>::Scalar _Scalar;
- enum {_MaxRows = ei_traits<T>::MaxRowsAtCompileTime,
- _MaxCols = ei_traits<T>::MaxColsAtCompileTime,
- _Flags = ei_traits<T>::Flags
- };
-
- public:
- typedef Matrix<_Scalar,
- ei_traits<T>::RowsAtCompileTime,
- ei_traits<T>::ColsAtCompileTime,
- ei_corrected_matrix_flags<_Scalar, ei_size_at_compile_time<_MaxRows,_MaxCols>::ret, _Flags>::ret & ~RowMajorBit,
- ei_traits<T>::MaxRowsAtCompileTime,
- ei_traits<T>::MaxColsAtCompileTime> type;
-};
-
-template<typename T, int n=1> struct ei_product_nested_rhs
-{
- typedef typename ei_meta_if<
- (ei_traits<T>::Flags & NestByValueBit) && (!(ei_traits<T>::Flags & RowMajorBit)) && (int(ei_traits<T>::Flags) & DirectAccessBit),
- T,
- typename ei_meta_if<
- ((ei_traits<T>::Flags & EvalBeforeNestingBit)
- || (ei_traits<T>::Flags & RowMajorBit)
- || (!(ei_traits<T>::Flags & DirectAccessBit))
- || (n+1) * (NumTraits<typename ei_traits<T>::Scalar>::ReadCost) < (n-1) * T::CoeffReadCost),
- typename ei_product_eval_to_column_major<T>::type,
- const T&
- >::ret
- >::ret type;
-};
-
-template<typename T, int n=1> struct ei_product_nested_lhs
-{
- typedef typename ei_meta_if<
- ei_traits<T>::Flags & NestByValueBit && (int(ei_traits<T>::Flags) & DirectAccessBit),
- T,
- typename ei_meta_if<
- int(ei_traits<T>::Flags) & EvalBeforeNestingBit
- || (!(int(ei_traits<T>::Flags) & DirectAccessBit))
- || (n+1) * int(NumTraits<typename ei_traits<T>::Scalar>::ReadCost) < (n-1) * int(T::CoeffReadCost),
- typename ei_eval<T>::type,
- const T&
- >::ret
- >::ret type;
-};
-
template<typename Lhs, typename Rhs, int EvalMode>
struct ei_traits<Product<Lhs, Rhs, EvalMode> >
{
typedef typename Lhs::Scalar Scalar;
- // the cache friendly product evals lhs once only
- // FIXME what to do if we chose to dynamically call the normal product from the cache friendly one for small matrices ?
- typedef typename ei_meta_if<EvalMode==CacheFriendlyProduct,
- typename ei_product_nested_lhs<Lhs,1>::type,
- typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type>::ret LhsNested;
-
- // NOTE that rhs must be ColumnMajor, so we might need a special nested type calculation
- typedef typename ei_meta_if<EvalMode==CacheFriendlyProduct,
- typename ei_product_nested_rhs<Rhs,Lhs::RowsAtCompileTime>::type,
- typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type>::ret RhsNested;
- typedef typename ei_unconst<typename ei_unref<LhsNested>::type>::type _LhsNested;
- typedef typename ei_unconst<typename ei_unref<RhsNested>::type>::type _RhsNested;
+ typedef typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
+ typedef typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
+ typedef typename ei_unref<LhsNested>::type _LhsNested;
+ typedef typename ei_unref<RhsNested>::type _RhsNested;
enum {
LhsCoeffReadCost = _LhsNested::CoeffReadCost,
RhsCoeffReadCost = _RhsNested::CoeffReadCost,
@@ -220,8 +174,6 @@ struct ei_traits<Product<Lhs, Rhs, EvalMode> >
ColsAtCompileTime = Rhs::ColsAtCompileTime,
MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime,
MaxColsAtCompileTime = Rhs::MaxColsAtCompileTime,
- // the vectorization flags are only used by the normal product,
- // the other one is always vectorized !
_RhsVectorizable = (RhsFlags & RowMajorBit) && (RhsFlags & VectorizableBit) && (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
_LhsVectorizable = (!(LhsFlags & RowMajorBit)) && (LhsFlags & VectorizableBit) && (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
_Vectorizable = (_LhsVectorizable || _RhsVectorizable) ? 1 : 0,
@@ -255,10 +207,6 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
typedef typename ei_traits<Product>::_LhsNested _LhsNested;
typedef typename ei_traits<Product>::_RhsNested _RhsNested;
- enum {
- PacketSize = ei_packet_traits<Scalar>::size
- };
-
inline Product(const Lhs& lhs, const Rhs& rhs)
: m_lhs(lhs), m_rhs(rhs)
{
@@ -266,12 +214,12 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
}
/** \internal */
- template<typename DestDerived>
- void _cacheFriendlyEval(DestDerived& res) const;
-
- /** \internal */
- template<typename DestDerived>
- void _cacheFriendlyEvalAndAdd(DestDerived& res) const;
+ template<typename DestDerived, int AlignedMode>
+ void _cacheOptimalEval(DestDerived& res, ei_meta_false) const;
+ #ifdef EIGEN_VECTORIZE
+ template<typename DestDerived, int AlignedMode>
+ void _cacheOptimalEval(DestDerived& res, ei_meta_true) const;
+ #endif
private:
@@ -304,7 +252,7 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
if(Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT)
{
PacketScalar res;
- ei_packet_product_unroller<Flags&RowMajorBit ? true : false, Lhs::ColsAtCompileTime-1,
+ ei_packet_product_unroller<Flags&RowMajorBit, Lhs::ColsAtCompileTime-1,
Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT
? Lhs::ColsAtCompileTime : Dynamic,
_LhsNested, _RhsNested, PacketScalar>
@@ -331,10 +279,16 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
for(int i = 1; i < m_lhs.cols(); i++)
res = ei_pmadd(m_lhs.template packetCoeff<Aligned>(row, i), ei_pset1(m_rhs.coeff(i, col)), res);
return res;
+// const PacketScalar tmp[4];
+// ei_punpack(m_rhs.packetCoeff(0,col), tmp);
+//
+// return
+// ei_pmadd(m_lhs.packetCoeff(row, 0), tmp[0],
+// ei_pmadd(m_lhs.packetCoeff(row, 1), tmp[1],
+// ei_pmadd(m_lhs.packetCoeff(row, 2), tmp[2]
+// ei_pmul(m_lhs.packetCoeff(row, 3), tmp[3]))));
}
- template<typename Lhs_, typename Rhs_, int EvalMode_, typename DestDerived_, bool DirectAccess_>
- friend struct ei_cache_friendly_selector;
protected:
const LhsNested m_lhs;
@@ -343,6 +297,9 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
/** \returns the matrix product of \c *this and \a other.
*
+ * \note This function causes an immediate evaluation. If you want to perform a matrix product
+ * without immediate evaluation, call .lazy() on one of the matrices before taking the product.
+ *
* \sa lazy(), operator*=(const MatrixBase&)
*/
template<typename Derived>
@@ -365,107 +322,168 @@ MatrixBase<Derived>::operator*=(const MatrixBase<OtherDerived> &other)
return *this = *this * other;
}
-/** \internal */
-template<typename Derived>
-template<typename Lhs,typename Rhs>
-inline Derived&
-MatrixBase<Derived>::operator+=(const Flagged<Product<Lhs,Rhs,CacheFriendlyProduct>, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit>& other)
-{
- other._expression()._cacheFriendlyEvalAndAdd(const_cast_derived());
- return derived();
-}
-
template<typename Derived>
template<typename Lhs, typename Rhs>
inline Derived& MatrixBase<Derived>::lazyAssign(const Product<Lhs,Rhs,CacheFriendlyProduct>& product)
{
- product._cacheFriendlyEval(derived());
+ product.template _cacheOptimalEval<Derived, Aligned>(derived(),
+ #ifdef EIGEN_VECTORIZE
+ typename ei_meta_if<Flags & VectorizableBit, ei_meta_true, ei_meta_false>::ret()
+ #else
+ ei_meta_false()
+ #endif
+ );
return derived();
}
-template<typename Lhs, typename Rhs, int EvalMode, typename DestDerived, bool DirectAccess>
-struct ei_cache_friendly_selector
+template<typename Lhs, typename Rhs, int EvalMode>
+template<typename DestDerived, int AlignedMode>
+void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_false) const
{
- typedef Product<Lhs,Rhs,EvalMode> Prod;
- typedef typename Prod::_LhsNested _LhsNested;
- typedef typename Prod::_RhsNested _RhsNested;
- typedef typename Prod::Scalar Scalar;
- static inline void eval(const Prod& product, DestDerived& res)
+ res.setZero();
+ const int cols4 = m_lhs.cols() & 0xfffffffC;
+ if (Lhs::Flags&RowMajorBit)
{
- if ( product._rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
- && product._cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
- && product.m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
- )
+// std::cout << "opt rhs\n";
+ int j=0;
+ for(; j<cols4; j+=4)
{
- res.setZero();
- ei_cache_friendly_product<Scalar>(
- product._rows(), product._cols(), product.m_lhs.cols(),
- _LhsNested::Flags&RowMajorBit, &(product.m_lhs.const_cast_derived().coeffRef(0,0)), product.m_lhs.stride(),
- _RhsNested::Flags&RowMajorBit, &(product.m_rhs.const_cast_derived().coeffRef(0,0)), product.m_rhs.stride(),
- Prod::Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride()
- );
+ for(int k=0; k<this->rows(); ++k)
+ {
+ const Scalar tmp0 = m_lhs.coeff(k,j );
+ const Scalar tmp1 = m_lhs.coeff(k,j+1);
+ const Scalar tmp2 = m_lhs.coeff(k,j+2);
+ const Scalar tmp3 = m_lhs.coeff(k,j+3);
+ for (int i=0; i<this->cols(); ++i)
+ res.coeffRef(k,i) += tmp0 * m_rhs.coeff(j+0,i) + tmp1 * m_rhs.coeff(j+1,i)
+ + tmp2 * m_rhs.coeff(j+2,i) + tmp3 * m_rhs.coeff(j+3,i);
+ }
}
- else
+ for(; j<m_lhs.cols(); ++j)
{
- res = Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy();
+ for(int k=0; k<this->rows(); ++k)
+ {
+ const Scalar tmp = m_rhs.coeff(k,j);
+ for (int i=0; i<this->cols(); ++i)
+ res.coeffRef(k,i) += tmp * m_lhs.coeff(j,i);
+ }
}
}
-
- static inline void eval_and_add(const Prod& product, DestDerived& res)
+ else
{
- if ( product._rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
- && product._cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
- && product.m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
- )
+// std::cout << "opt lhs\n";
+ int j = 0;
+ for(; j<cols4; j+=4)
{
- ei_cache_friendly_product<Scalar>(
- product._rows(), product._cols(), product.m_lhs.cols(),
- _LhsNested::Flags&RowMajorBit, &(product.m_lhs.const_cast_derived().coeffRef(0,0)), product.m_lhs.stride(),
- _RhsNested::Flags&RowMajorBit, &(product.m_rhs.const_cast_derived().coeffRef(0,0)), product.m_rhs.stride(),
- Prod::Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride()
- );
+ for(int k=0; k<this->cols(); ++k)
+ {
+ const Scalar tmp0 = m_rhs.coeff(j ,k);
+ const Scalar tmp1 = m_rhs.coeff(j+1,k);
+ const Scalar tmp2 = m_rhs.coeff(j+2,k);
+ const Scalar tmp3 = m_rhs.coeff(j+3,k);
+ for (int i=0; i<this->rows(); ++i)
+ res.coeffRef(i,k) += tmp0 * m_lhs.coeff(i,j+0) + tmp1 * m_lhs.coeff(i,j+1)
+ + tmp2 * m_lhs.coeff(i,j+2) + tmp3 * m_lhs.coeff(i,j+3);
+ }
}
- else
+ for(; j<m_lhs.cols(); ++j)
{
- res += Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy();
+ for(int k=0; k<this->cols(); ++k)
+ {
+ const Scalar tmp = m_rhs.coeff(j,k);
+ for (int i=0; i<this->rows(); ++i)
+ res.coeffRef(i,k) += tmp * m_lhs.coeff(i,j);
+ }
}
}
-};
+}
-template<typename Lhs, typename Rhs, int EvalMode, typename DestDerived>
-struct ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived,false>
+#ifdef EIGEN_VECTORIZE
+template<typename Lhs, typename Rhs, int EvalMode>
+template<typename DestDerived, int AlignedMode>
+void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_true) const
{
- typedef Product<Lhs,Rhs,EvalMode> Prod;
- typedef typename Prod::_LhsNested _LhsNested;
- typedef typename Prod::_RhsNested _RhsNested;
- typedef typename Prod::Scalar Scalar;
- static inline void eval(const Prod& product, DestDerived& res)
+
+ if (((Lhs::Flags&RowMajorBit) && (_cols() % ei_packet_traits<Scalar>::size != 0))
+ || (_rows() % ei_packet_traits<Scalar>::size != 0))
{
- res = Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy();
+ return _cacheOptimalEval<DestDerived, AlignedMode>(res, ei_meta_false());
}
- static inline void eval_and_add(const Prod& product, DestDerived& res)
+ res.setZero();
+ const int cols4 = m_lhs.cols() & 0xfffffffC;
+ if (Lhs::Flags&RowMajorBit)
{
- res += Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy();
+// std::cout << "packet rhs\n";
+ int j=0;
+ for(; j<cols4; j+=4)
+ {
+ for(int k=0; k<this->rows(); k++)
+ {
+ const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_lhs.coeff(k,j+0));
+ const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_lhs.coeff(k,j+1));
+ const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_lhs.coeff(k,j+2));
+ const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_lhs.coeff(k,j+3));
+ for (int i=0; i<this->cols(); i+=ei_packet_traits<Scalar>::size)
+ {
+ res.template writePacketCoeff<AlignedMode>(k,i,
+ ei_pmadd(tmp0, m_rhs.template packetCoeff<AlignedMode>(j+0,i),
+ ei_pmadd(tmp1, m_rhs.template packetCoeff<AlignedMode>(j+1,i),
+ ei_pmadd(tmp2, m_rhs.template packetCoeff<AlignedMode>(j+2,i),
+ ei_pmadd(tmp3, m_rhs.template packetCoeff<AlignedMode>(j+3,i),
+ res.template packetCoeff<AlignedMode>(k,i)))))
+ );
+ }
+ }
+ }
+ for(; j<m_lhs.cols(); ++j)
+ {
+ for(int k=0; k<this->rows(); k++)
+ {
+ const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_lhs.coeff(k,j));
+ for (int i=0; i<this->cols(); i+=ei_packet_traits<Scalar>::size)
+ res.template writePacketCoeff<AlignedMode>(k,i,
+ ei_pmadd(tmp, m_rhs.template packetCoeff<AlignedMode>(j,i), res.template packetCoeff<AlignedMode>(k,i)));
+ }
+ }
+ }
+ else
+ {
+// std::cout << "packet lhs\n";
+ int k=0;
+ for(; k<cols4; k+=4)
+ {
+ for(int j=0; j<this->cols(); j+=1)
+ {
+ const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_rhs.coeff(k+0,j));
+ const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_rhs.coeff(k+1,j));
+ const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_rhs.coeff(k+2,j));
+ const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_rhs.coeff(k+3,j));
+
+ for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size)
+ {
+ res.template writePacketCoeff<AlignedMode>(i,j,
+ ei_pmadd(tmp0, m_lhs.template packetCoeff<AlignedMode>(i,k),
+ ei_pmadd(tmp1, m_lhs.template packetCoeff<AlignedMode>(i,k+1),
+ ei_pmadd(tmp2, m_lhs.template packetCoeff<AlignedMode>(i,k+2),
+ ei_pmadd(tmp3, m_lhs.template packetCoeff<AlignedMode>(i,k+3),
+ res.template packetCoeff<AlignedMode>(i,j)))))
+ );
+ }
+ }
+ }
+ for(; k<m_lhs.cols(); ++k)
+ {
+ for(int j=0; j<this->cols(); j++)
+ {
+ const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_rhs.coeff(k,j));
+ for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size)
+ res.template writePacketCoeff<AlignedMode>(k,j,
+ ei_pmadd(tmp, m_lhs.template packetCoeff<AlignedMode>(i,k), res.template packetCoeff<AlignedMode>(i,j)));
+ }
+ }
}
-};
-
-template<typename Lhs, typename Rhs, int EvalMode>
-template<typename DestDerived>
-inline void Product<Lhs,Rhs,EvalMode>::_cacheFriendlyEval(DestDerived& res) const
-{
- ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived,
- _LhsNested::Flags&_RhsNested::Flags&DirectAccessBit>
- ::eval(*this, res);
-}
-
-template<typename Lhs, typename Rhs, int EvalMode>
-template<typename DestDerived>
-inline void Product<Lhs,Rhs,EvalMode>::_cacheFriendlyEvalAndAdd(DestDerived& res) const
-{
- ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived,
- _LhsNested::Flags&_RhsNested::Flags&DirectAccessBit>
- ::eval_and_add(*this, res);
}
+#endif // EIGEN_VECTORIZE
#endif // EIGEN_PRODUCT_H
diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt
index 39af38afe..4e1a7696d 100644
--- a/doc/CMakeLists.txt
+++ b/doc/CMakeLists.txt
@@ -1,5 +1,11 @@
IF(BUILD_DOC)
+IF(CMAKE_COMPILER_IS_GNUCXX)
+ IF(CMAKE_SYSTEM_NAME MATCHES Linux)
+ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O1 -g1")
+ ENDIF(CMAKE_SYSTEM_NAME MATCHES Linux)
+ENDIF(CMAKE_COMPILER_IS_GNUCXX)
+
CONFIGURE_FILE(
${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in
${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 6cd98800a..238006bdf 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -1,5 +1,11 @@
IF(BUILD_TESTS)
+IF(CMAKE_COMPILER_IS_GNUCXX)
+ IF(CMAKE_SYSTEM_NAME MATCHES Linux)
+ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O1 -g2")
+ ENDIF(CMAKE_SYSTEM_NAME MATCHES Linux)
+ENDIF(CMAKE_COMPILER_IS_GNUCXX)
+
OPTION(EIGEN_NO_ASSERTION_CHECKING "Disable checking of assertions" OFF)
# similar to SET_TARGET_PROPERTIES but append the property instead of overwritting it
@@ -64,14 +70,14 @@ FIND_PACKAGE(Qt4 REQUIRED)
INCLUDE_DIRECTORIES( ${QT_INCLUDE_DIR} )
EI_ADD_TEST(basicstuff)
-EI_ADD_TEST(miscmatrices)
+EI_ADD_TEST(linearstructure)
+EI_ADD_TEST(cwiseop)
+EI_ADD_TEST(product)
EI_ADD_TEST(adjoint)
EI_ADD_TEST(submatrices)
+EI_ADD_TEST(miscmatrices)
EI_ADD_TEST(smallvectors)
-EI_ADD_TEST(cwiseop)
EI_ADD_TEST(map)
-EI_ADD_TEST(linearstructure)
-EI_ADD_TEST(product)
EI_ADD_TEST(triangular)
EI_ADD_TEST(cholesky)
# EI_ADD_TEST(determinant)
diff --git a/test/adjoint.cpp b/test/adjoint.cpp
index ae1b002e5..9a15c4986 100644
--- a/test/adjoint.cpp
+++ b/test/adjoint.cpp
@@ -51,23 +51,15 @@ template<typename MatrixType> void adjoint(const MatrixType& m)
Scalar s1 = ei_random<Scalar>(),
s2 = ei_random<Scalar>();
- // check involutivity of adjoint, transpose, conjugate
- VERIFY_IS_APPROX(m1.transpose().transpose(), m1);
- VERIFY_IS_APPROX(m1.conjugate().conjugate(), m1);
- VERIFY_IS_APPROX(m1.adjoint().adjoint(), m1);
-
// check basic compatibility of adjoint, transpose, conjugate
VERIFY_IS_APPROX(m1.transpose().conjugate().adjoint(), m1);
VERIFY_IS_APPROX(m1.adjoint().conjugate().transpose(), m1);
- if(!NumTraits<Scalar>::IsComplex)
- VERIFY_IS_APPROX(m1.adjoint().transpose(), m1);
// check multiplicative behavior
- VERIFY_IS_APPROX((m1.transpose() * m2).transpose(), m2.transpose() * m1);
+ std::cout << (m1.adjoint() * m2).adjoint() << std::endl;
+ std::cout << "------------------------------" << std::endl;
+ std::cout << m2.adjoint() * m1 << std::endl;
VERIFY_IS_APPROX((m1.adjoint() * m2).adjoint(), m2.adjoint() * m1);
- VERIFY_IS_APPROX((m1.transpose() * m2).conjugate(), m1.adjoint() * m2.conjugate());
- VERIFY_IS_APPROX((s1 * m1).transpose(), s1 * m1.transpose());
- VERIFY_IS_APPROX((s1 * m1).conjugate(), ei_conj(s1) * m1.conjugate());
VERIFY_IS_APPROX((s1 * m1).adjoint(), ei_conj(s1) * m1.adjoint());
// check basic properties of dot, norm, norm2
diff --git a/test/linearstructure.cpp b/test/linearstructure.cpp
index a7b058d69..8beb33925 100644
--- a/test/linearstructure.cpp
+++ b/test/linearstructure.cpp
@@ -62,11 +62,7 @@ template<typename MatrixType> void linearStructure(const MatrixType& m)
VERIFY_IS_APPROX(-m2+m1+m2, m1);
VERIFY_IS_APPROX(m1*s1, s1*m1);
VERIFY_IS_APPROX((m1+m2)*s1, s1*m1+s1*m2);
- VERIFY_IS_APPROX((s1+s2)*m1, m1*s1+m1*s2);
- VERIFY_IS_APPROX((m1-m2)*s1, s1*m1-s1*m2);
- VERIFY_IS_APPROX((s1-s2)*m1, m1*s1-m1*s2);
VERIFY_IS_APPROX((-m1+m2)*s1, -s1*m1+s1*m2);
- VERIFY_IS_APPROX((-s1+s2)*m1, -m1*s1+m1*s2);
m3 = m2; m3 += m1;
VERIFY_IS_APPROX(m3, m1+m2);
m3 = m2; m3 -= m1;
diff --git a/test/product.cpp b/test/product.cpp
index 70b41212a..a89497763 100644
--- a/test/product.cpp
+++ b/test/product.cpp
@@ -73,14 +73,10 @@ template<typename MatrixType> void product(const MatrixType& m)
VERIFY_IS_APPROX(s1*(square*m1), (s1*square)*m1);
VERIFY_IS_APPROX(s1*(square*m1), square*(m1*s1));
- // continue testing Product.h: lazy product
- VERIFY_IS_APPROX(square.lazy() * m1, square*m1);
- VERIFY_IS_APPROX(square * m1.lazy(), square*m1);
// again, test operator() to check const-qualification
s1 += (square.lazy() * m1)(r,c);
// test Product.h together with Identity.h
- VERIFY_IS_APPROX(m1, identity*m1);
VERIFY_IS_APPROX(v1, identity*v1);
// again, test operator() to check const-qualification
VERIFY_IS_APPROX(MatrixType::identity(rows, cols)(r,c), static_cast<Scalar>(r==c));
@@ -92,18 +88,14 @@ template<typename MatrixType> void product(const MatrixType& m)
void test_product()
{
for(int i = 0; i < g_repeat; i++) {
- CALL_SUBTEST( product(Matrix<float, 1, 1>()) );
- CALL_SUBTEST( product(Matrix<float, 3, 3>()) );
- CALL_SUBTEST( product(Matrix<float, 4, 2>()) );
+ CALL_SUBTEST( product(Matrix3i()) );
+ CALL_SUBTEST( product(Matrix<float, 3, 2>()) );
CALL_SUBTEST( product(Matrix4d()) );
}
for(int i = 0; i < g_repeat; i++) {
- int rows = ei_random<int>(1,320);
- int cols = ei_random<int>(1,320);
- CALL_SUBTEST( product(MatrixXf(rows, cols)) );
- CALL_SUBTEST( product(MatrixXd(rows, cols)) );
- CALL_SUBTEST( product(MatrixXi(rows, cols)) );
- CALL_SUBTEST( product(MatrixXcf(rows, cols)) );
- CALL_SUBTEST( product(MatrixXcd(rows, cols)) );
+ CALL_SUBTEST( product(MatrixXf(ei_random<int>(1,320), ei_random<int>(1,320))) );
+ CALL_SUBTEST( product(MatrixXd(ei_random<int>(1,320), ei_random<int>(1,320))) );
+ CALL_SUBTEST( product(MatrixXi(ei_random<int>(1,320), ei_random<int>(1,320))) );
+ CALL_SUBTEST( product(MatrixXcf(ei_random<int>(1,50), ei_random<int>(1,50))) );
}
}