diff options
author | Benoit Jacob <jacob.benoit.1@gmail.com> | 2008-05-28 04:38:16 +0000 |
---|---|---|
committer | Benoit Jacob <jacob.benoit.1@gmail.com> | 2008-05-28 04:38:16 +0000 |
commit | aebecae510dd29f5573d3f86dfed526e6d8be9a8 (patch) | |
tree | b9bb9d2d15011409fe5f66e1fbb56f1998f5cc91 | |
parent | 559233c73e86474d67f5730f9b20e46ccea93b7f (diff) |
* find the proper way of nesting the expression in Flagged:
finally that's more subtle than just using ei_nested, because when
flagging with NestByValueBit we want to store the expression by value
already, regardless of whether it already had the NestByValueBit set.
* rename temporary() ----> nestByValue()
* move the old Product.h to disabled/, replace by what was ProductWIP.h
* tweak -O and -g flags for tests and examples
* reorder the tests -- basic things go first
* simplifications, e.g. in many methoeds return derived() and count on
implicit casting to the actual return type.
* strip some not-really-useful stuff from the heaviest tests
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | Eigen/Core | 5 | ||||
-rw-r--r-- | Eigen/src/Core/CwiseUnaryOp.h | 22 | ||||
-rw-r--r-- | Eigen/src/Core/DiagonalMatrix.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/Flagged.h | 10 | ||||
-rw-r--r-- | Eigen/src/Core/MatrixBase.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/Product.h | 322 | ||||
-rw-r--r-- | Eigen/src/Core/Transpose.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/util/Constants.h | 6 | ||||
-rw-r--r-- | disabled/Product.h (renamed from Eigen/src/Core/ProductWIP.h) | 324 | ||||
-rw-r--r-- | doc/CMakeLists.txt | 6 | ||||
-rw-r--r-- | test/CMakeLists.txt | 14 | ||||
-rw-r--r-- | test/adjoint.cpp | 14 | ||||
-rw-r--r-- | test/linearstructure.cpp | 4 | ||||
-rw-r--r-- | test/product.cpp | 20 |
15 files changed, 377 insertions, 382 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index b533e3b66..95a8a2311 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ SET(CMAKE_INCLUDE_CURRENT_DIR ON) IF(CMAKE_COMPILER_IS_GNUCXX) IF(CMAKE_SYSTEM_NAME MATCHES Linux) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O1 -g -pedantic -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fno-exceptions -fno-check-new -fno-common -fstrict-aliasing") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fno-exceptions -fno-check-new -fno-common -fstrict-aliasing") IF(TEST_OPENMP) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") MESSAGE("Enabling OpenMP in tests/examples") diff --git a/Eigen/Core b/Eigen/Core index 3e1b5184d..c81341103 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -35,7 +35,9 @@ namespace Eigen { #include "src/Core/CwiseBinaryOp.h" #include "src/Core/CwiseUnaryOp.h" #include "src/Core/CwiseNullaryOp.h" -#include "src/Core/ProductWIP.h" +#include "src/Core/InverseProduct.h" +#include "src/Core/CacheFriendlyProduct.h" +#include "src/Core/Product.h" #include "src/Core/Block.h" #include "src/Core/Minor.h" #include "src/Core/Transpose.h" @@ -51,7 +53,6 @@ namespace Eigen { #include "src/Core/CommaInitializer.h" #include "src/Core/Extract.h" #include "src/Core/Part.h" -#include "src/Core/InverseProduct.h" } // namespace Eigen diff --git a/Eigen/src/Core/CwiseUnaryOp.h b/Eigen/src/Core/CwiseUnaryOp.h index 26197b369..8d2737e12 100644 --- a/Eigen/src/Core/CwiseUnaryOp.h +++ b/Eigen/src/Core/CwiseUnaryOp.h @@ -118,7 +118,7 @@ template<typename Derived> inline const CwiseUnaryOp<ei_scalar_opposite_op<typename ei_traits<Derived>::Scalar>,Derived> MatrixBase<Derived>::operator-() const { - return CwiseUnaryOp<ei_scalar_opposite_op<Scalar>, Derived>(derived()); + return derived(); } /** \returns an expression of the coefficient-wise absolute value of \c *this @@ -127,7 +127,7 @@ template<typename Derived> inline const CwiseUnaryOp<ei_scalar_abs_op<typename ei_traits<Derived>::Scalar>,Derived> MatrixBase<Derived>::cwiseAbs() const { - return CwiseUnaryOp<ei_scalar_abs_op<Scalar>,Derived>(derived()); + return derived(); } /** \returns an expression of the coefficient-wise squared absolute value of \c *this @@ -136,7 +136,7 @@ template<typename Derived> inline const CwiseUnaryOp<ei_scalar_abs2_op<typename ei_traits<Derived>::Scalar>,Derived> MatrixBase<Derived>::cwiseAbs2() const { - return CwiseUnaryOp<ei_scalar_abs2_op<Scalar>,Derived>(derived()); + return derived(); } /** \returns an expression of the complex conjugate of *this. @@ -146,7 +146,7 @@ template<typename Derived> inline const CwiseUnaryOp<ei_scalar_conjugate_op<typename ei_traits<Derived>::Scalar>, Derived> MatrixBase<Derived>::conjugate() const { - return CwiseUnaryOp<ei_scalar_conjugate_op<Scalar>, Derived>(derived()); + return derived(); } /** \returns an expression of *this with the \a Scalar type casted to @@ -161,7 +161,7 @@ template<typename NewType> inline const CwiseUnaryOp<ei_scalar_cast_op<typename ei_traits<Derived>::Scalar, NewType>, Derived> MatrixBase<Derived>::cast() const { - return CwiseUnaryOp<ei_scalar_cast_op<Scalar, NewType>, Derived>(derived()); + return derived(); } /** \relates MatrixBase */ @@ -201,7 +201,7 @@ template<typename Derived> inline const CwiseUnaryOp<ei_scalar_sqrt_op<typename ei_traits<Derived>::Scalar>, Derived> MatrixBase<Derived>::cwiseSqrt() const { - return CwiseUnaryOp<ei_scalar_sqrt_op<Scalar>, Derived>(derived()); + return derived(); } /** \returns an expression of the coefficient-wise exponential of *this. */ @@ -209,7 +209,7 @@ template<typename Derived> inline const CwiseUnaryOp<ei_scalar_exp_op<typename ei_traits<Derived>::Scalar>, Derived> MatrixBase<Derived>::cwiseExp() const { - return CwiseUnaryOp<ei_scalar_exp_op<Scalar>, Derived>(derived()); + return derived(); } /** \returns an expression of the coefficient-wise logarithm of *this. */ @@ -217,7 +217,7 @@ template<typename Derived> inline const CwiseUnaryOp<ei_scalar_log_op<typename ei_traits<Derived>::Scalar>, Derived> MatrixBase<Derived>::cwiseLog() const { - return CwiseUnaryOp<ei_scalar_log_op<Scalar>, Derived>(derived()); + return derived(); } /** \returns an expression of the coefficient-wise cosine of *this. */ @@ -225,7 +225,7 @@ template<typename Derived> inline const CwiseUnaryOp<ei_scalar_cos_op<typename ei_traits<Derived>::Scalar>, Derived> MatrixBase<Derived>::cwiseCos() const { - return CwiseUnaryOp<ei_scalar_cos_op<Scalar>, Derived>(derived()); + return derived(); } /** \returns an expression of the coefficient-wise sine of *this. */ @@ -233,10 +233,10 @@ template<typename Derived> inline const CwiseUnaryOp<ei_scalar_sin_op<typename ei_traits<Derived>::Scalar>, Derived> MatrixBase<Derived>::cwiseSin() const { - return CwiseUnaryOp<ei_scalar_sin_op<Scalar>, Derived>(derived()); + return derived(); } -/** \relates MatrixBase */ +/** \returns an expression of the coefficient-wise power of *this to the given exponent. */ template<typename Derived> inline const CwiseUnaryOp<ei_scalar_pow_op<typename ei_traits<Derived>::Scalar>, Derived> MatrixBase<Derived>::cwisePow(const Scalar& exponent) const diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h index bd420511f..0581c669c 100644 --- a/Eigen/src/Core/DiagonalMatrix.h +++ b/Eigen/src/Core/DiagonalMatrix.h @@ -95,7 +95,7 @@ template<typename Derived> inline const DiagonalMatrix<Derived> MatrixBase<Derived>::asDiagonal() const { - return DiagonalMatrix<Derived>(derived()); + return derived(); } /** \returns true if *this is approximately equal to a diagonal matrix, diff --git a/Eigen/src/Core/Flagged.h b/Eigen/src/Core/Flagged.h index 9167c8a97..f287abee4 100644 --- a/Eigen/src/Core/Flagged.h +++ b/Eigen/src/Core/Flagged.h @@ -33,7 +33,7 @@ * \param Added the flags added to the expression * \param Removed the flags removed from the expression (has priority over Added). * - * This class represents an expression whose flags have been modified + * This class represents an expression whose flags have been modified. * It is the return type of MatrixBase::flagged() * and most of the time this is the only way it is used. * @@ -94,7 +94,11 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas } protected: - const ExpressionType m_matrix; + const typename ei_meta_if< + Added & ~Removed & NestByValueBit, + ExpressionType, + typename ExpressionType::Nested + >::ret m_matrix; }; /** \returns an expression of *this with added flags @@ -121,7 +125,7 @@ MatrixBase<Derived>::lazy() const */ template<typename Derived> inline const Flagged<Derived, NestByValueBit, 0> -MatrixBase<Derived>::temporary() const +MatrixBase<Derived>::nestByValue() const { return derived(); } diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index 18525a5d1..1a634ce37 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -452,7 +452,7 @@ template<typename Derived> class MatrixBase template<unsigned int Added> const Flagged<Derived, Added, 0> marked() const; const Flagged<Derived, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit> lazy() const; - const Flagged<Derived, NestByValueBit, 0> temporary() const; + const Flagged<Derived, NestByValueBit, 0> nestByValue() const; /** \returns number of elements to skip to pass from one row (resp. column) to another * for a row-major (resp. column-major) matrix. diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index 5d3e99281..15867d704 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -41,7 +41,7 @@ template<int Size, typename Lhs, typename Rhs> struct ei_product_unroller<0, Size, Lhs, Rhs> { inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, - typename Lhs::Scalar &res) + typename Lhs::Scalar &res) { res = lhs.coeff(row, 0) * rhs.coeff(0, col); } @@ -60,12 +60,6 @@ struct ei_product_unroller<Index, 0, Lhs, Rhs> inline static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {} }; -template<typename Lhs, typename Rhs> -struct ei_product_unroller<0, Dynamic, Lhs, Rhs> -{ - static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {} -}; - template<bool RowMajor, int Index, int Size, typename Lhs, typename Rhs, typename PacketScalar> struct ei_packet_product_unroller; @@ -119,12 +113,6 @@ struct ei_packet_product_unroller<false, Index, Dynamic, Lhs, Rhs, PacketScalar> inline static void run(int, int, const Lhs&, const Rhs&, PacketScalar&) {} }; -template<typename Lhs, typename Rhs, typename PacketScalar> -struct ei_packet_product_unroller<false, 0, Dynamic, Lhs, Rhs, PacketScalar> -{ - static void run(int, int, const Lhs&, const Rhs&, PacketScalar&) {} -}; - template<typename Product, bool RowMajor = true> struct ProductPacketCoeffImpl { inline static typename Product::PacketScalar execute(const Product& product, int row, int col) { return product._packetCoeffRowMajor(row,col); } @@ -153,18 +141,74 @@ template<typename Lhs, typename Rhs> struct ei_product_eval_mode { enum{ value = Lhs::MaxRowsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD && Rhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD - && (!( (Lhs::Flags&RowMajorBit) && ((Rhs::Flags&RowMajorBit) ^ RowMajorBit))) + && Lhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ? CacheFriendlyProduct : NormalProduct }; }; +template<typename T> class ei_product_eval_to_column_major +{ + typedef typename ei_traits<T>::Scalar _Scalar; + enum {_MaxRows = ei_traits<T>::MaxRowsAtCompileTime, + _MaxCols = ei_traits<T>::MaxColsAtCompileTime, + _Flags = ei_traits<T>::Flags + }; + + public: + typedef Matrix<_Scalar, + ei_traits<T>::RowsAtCompileTime, + ei_traits<T>::ColsAtCompileTime, + ei_corrected_matrix_flags<_Scalar, ei_size_at_compile_time<_MaxRows,_MaxCols>::ret, _Flags>::ret & ~RowMajorBit, + ei_traits<T>::MaxRowsAtCompileTime, + ei_traits<T>::MaxColsAtCompileTime> type; +}; + +template<typename T, int n=1> struct ei_product_nested_rhs +{ + typedef typename ei_meta_if< + (ei_traits<T>::Flags & NestByValueBit) && (!(ei_traits<T>::Flags & RowMajorBit)) && (int(ei_traits<T>::Flags) & DirectAccessBit), + T, + typename ei_meta_if< + ((ei_traits<T>::Flags & EvalBeforeNestingBit) + || (ei_traits<T>::Flags & RowMajorBit) + || (!(ei_traits<T>::Flags & DirectAccessBit)) + || (n+1) * (NumTraits<typename ei_traits<T>::Scalar>::ReadCost) < (n-1) * T::CoeffReadCost), + typename ei_product_eval_to_column_major<T>::type, + const T& + >::ret + >::ret type; +}; + +template<typename T, int n=1> struct ei_product_nested_lhs +{ + typedef typename ei_meta_if< + ei_traits<T>::Flags & NestByValueBit && (int(ei_traits<T>::Flags) & DirectAccessBit), + T, + typename ei_meta_if< + int(ei_traits<T>::Flags) & EvalBeforeNestingBit + || (!(int(ei_traits<T>::Flags) & DirectAccessBit)) + || (n+1) * int(NumTraits<typename ei_traits<T>::Scalar>::ReadCost) < (n-1) * int(T::CoeffReadCost), + typename ei_eval<T>::type, + const T& + >::ret + >::ret type; +}; + template<typename Lhs, typename Rhs, int EvalMode> struct ei_traits<Product<Lhs, Rhs, EvalMode> > { typedef typename Lhs::Scalar Scalar; - typedef typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested; - typedef typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested; - typedef typename ei_unref<LhsNested>::type _LhsNested; - typedef typename ei_unref<RhsNested>::type _RhsNested; + // the cache friendly product evals lhs once only + // FIXME what to do if we chose to dynamically call the normal product from the cache friendly one for small matrices ? + typedef typename ei_meta_if<EvalMode==CacheFriendlyProduct, + typename ei_product_nested_lhs<Lhs,1>::type, + typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type>::ret LhsNested; + + // NOTE that rhs must be ColumnMajor, so we might need a special nested type calculation + typedef typename ei_meta_if<EvalMode==CacheFriendlyProduct, + typename ei_product_nested_rhs<Rhs,Lhs::RowsAtCompileTime>::type, + typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type>::ret RhsNested; + typedef typename ei_unconst<typename ei_unref<LhsNested>::type>::type _LhsNested; + typedef typename ei_unconst<typename ei_unref<RhsNested>::type>::type _RhsNested; enum { LhsCoeffReadCost = _LhsNested::CoeffReadCost, RhsCoeffReadCost = _RhsNested::CoeffReadCost, @@ -174,6 +218,8 @@ struct ei_traits<Product<Lhs, Rhs, EvalMode> > ColsAtCompileTime = Rhs::ColsAtCompileTime, MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime, MaxColsAtCompileTime = Rhs::MaxColsAtCompileTime, + // the vectorization flags are only used by the normal product, + // the other one is always vectorized ! _RhsVectorizable = (RhsFlags & RowMajorBit) && (RhsFlags & VectorizableBit) && (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0), _LhsVectorizable = (!(LhsFlags & RowMajorBit)) && (LhsFlags & VectorizableBit) && (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0), _Vectorizable = (_LhsVectorizable || _RhsVectorizable) ? 1 : 0, @@ -207,6 +253,10 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm typedef typename ei_traits<Product>::_LhsNested _LhsNested; typedef typename ei_traits<Product>::_RhsNested _RhsNested; + enum { + PacketSize = ei_packet_traits<Scalar>::size + }; + inline Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) { @@ -214,12 +264,12 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm } /** \internal */ - template<typename DestDerived, int AlignedMode> - void _cacheOptimalEval(DestDerived& res, ei_meta_false) const; - #ifdef EIGEN_VECTORIZE - template<typename DestDerived, int AlignedMode> - void _cacheOptimalEval(DestDerived& res, ei_meta_true) const; - #endif + template<typename DestDerived> + void _cacheFriendlyEval(DestDerived& res) const; + + /** \internal */ + template<typename DestDerived> + void _cacheFriendlyEvalAndAdd(DestDerived& res) const; private: @@ -252,7 +302,7 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm if(Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT) { PacketScalar res; - ei_packet_product_unroller<Flags&RowMajorBit, Lhs::ColsAtCompileTime-1, + ei_packet_product_unroller<Flags&RowMajorBit ? true : false, Lhs::ColsAtCompileTime-1, Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT ? Lhs::ColsAtCompileTime : Dynamic, _LhsNested, _RhsNested, PacketScalar> @@ -279,16 +329,10 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm for(int i = 1; i < m_lhs.cols(); i++) res = ei_pmadd(m_lhs.template packetCoeff<Aligned>(row, i), ei_pset1(m_rhs.coeff(i, col)), res); return res; -// const PacketScalar tmp[4]; -// ei_punpack(m_rhs.packetCoeff(0,col), tmp); -// -// return -// ei_pmadd(m_lhs.packetCoeff(row, 0), tmp[0], -// ei_pmadd(m_lhs.packetCoeff(row, 1), tmp[1], -// ei_pmadd(m_lhs.packetCoeff(row, 2), tmp[2] -// ei_pmul(m_lhs.packetCoeff(row, 3), tmp[3])))); } + template<typename Lhs_, typename Rhs_, int EvalMode_, typename DestDerived_, bool DirectAccess_> + friend struct ei_cache_friendly_selector; protected: const LhsNested m_lhs; @@ -297,9 +341,6 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm /** \returns the matrix product of \c *this and \a other. * - * \note This function causes an immediate evaluation. If you want to perform a matrix product - * without immediate evaluation, call .lazy() on one of the matrices before taking the product. - * * \sa lazy(), operator*=(const MatrixBase&) */ template<typename Derived> @@ -322,168 +363,107 @@ MatrixBase<Derived>::operator*=(const MatrixBase<OtherDerived> &other) return *this = *this * other; } +/** \internal */ +template<typename Derived> +template<typename Lhs,typename Rhs> +inline Derived& +MatrixBase<Derived>::operator+=(const Flagged<Product<Lhs,Rhs,CacheFriendlyProduct>, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit>& other) +{ + other._expression()._cacheFriendlyEvalAndAdd(const_cast_derived()); + return derived(); +} + template<typename Derived> template<typename Lhs, typename Rhs> inline Derived& MatrixBase<Derived>::lazyAssign(const Product<Lhs,Rhs,CacheFriendlyProduct>& product) { - product.template _cacheOptimalEval<Derived, Aligned>(derived(), - #ifdef EIGEN_VECTORIZE - typename ei_meta_if<Flags & VectorizableBit, ei_meta_true, ei_meta_false>::ret() - #else - ei_meta_false() - #endif - ); + product._cacheFriendlyEval(derived()); return derived(); } -template<typename Lhs, typename Rhs, int EvalMode> -template<typename DestDerived, int AlignedMode> -void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_false) const +template<typename Lhs, typename Rhs, int EvalMode, typename DestDerived, bool DirectAccess> +struct ei_cache_friendly_selector { - res.setZero(); - const int cols4 = m_lhs.cols() & 0xfffffffC; - if (Lhs::Flags&RowMajorBit) + typedef Product<Lhs,Rhs,EvalMode> Prod; + typedef typename Prod::_LhsNested _LhsNested; + typedef typename Prod::_RhsNested _RhsNested; + typedef typename Prod::Scalar Scalar; + static inline void eval(const Prod& product, DestDerived& res) { -// std::cout << "opt rhs\n"; - int j=0; - for(; j<cols4; j+=4) + if ( product._rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + && product._cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + && product.m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + ) { - for(int k=0; k<this->rows(); ++k) - { - const Scalar tmp0 = m_lhs.coeff(k,j ); - const Scalar tmp1 = m_lhs.coeff(k,j+1); - const Scalar tmp2 = m_lhs.coeff(k,j+2); - const Scalar tmp3 = m_lhs.coeff(k,j+3); - for (int i=0; i<this->cols(); ++i) - res.coeffRef(k,i) += tmp0 * m_rhs.coeff(j+0,i) + tmp1 * m_rhs.coeff(j+1,i) - + tmp2 * m_rhs.coeff(j+2,i) + tmp3 * m_rhs.coeff(j+3,i); - } + res.setZero(); + ei_cache_friendly_product<Scalar>( + product._rows(), product._cols(), product.m_lhs.cols(), + _LhsNested::Flags&RowMajorBit, &(product.m_lhs.const_cast_derived().coeffRef(0,0)), product.m_lhs.stride(), + _RhsNested::Flags&RowMajorBit, &(product.m_rhs.const_cast_derived().coeffRef(0,0)), product.m_rhs.stride(), + Prod::Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride() + ); } - for(; j<m_lhs.cols(); ++j) + else { - for(int k=0; k<this->rows(); ++k) - { - const Scalar tmp = m_rhs.coeff(k,j); - for (int i=0; i<this->cols(); ++i) - res.coeffRef(k,i) += tmp * m_lhs.coeff(j,i); - } + res = Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy(); } } - else + + static inline void eval_and_add(const Prod& product, DestDerived& res) { -// std::cout << "opt lhs\n"; - int j = 0; - for(; j<cols4; j+=4) + if ( product._rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + && product._cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + && product.m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + ) { - for(int k=0; k<this->cols(); ++k) - { - const Scalar tmp0 = m_rhs.coeff(j ,k); - const Scalar tmp1 = m_rhs.coeff(j+1,k); - const Scalar tmp2 = m_rhs.coeff(j+2,k); - const Scalar tmp3 = m_rhs.coeff(j+3,k); - for (int i=0; i<this->rows(); ++i) - res.coeffRef(i,k) += tmp0 * m_lhs.coeff(i,j+0) + tmp1 * m_lhs.coeff(i,j+1) - + tmp2 * m_lhs.coeff(i,j+2) + tmp3 * m_lhs.coeff(i,j+3); - } + ei_cache_friendly_product<Scalar>( + product._rows(), product._cols(), product.m_lhs.cols(), + _LhsNested::Flags&RowMajorBit, &(product.m_lhs.const_cast_derived().coeffRef(0,0)), product.m_lhs.stride(), + _RhsNested::Flags&RowMajorBit, &(product.m_rhs.const_cast_derived().coeffRef(0,0)), product.m_rhs.stride(), + Prod::Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride() + ); } - for(; j<m_lhs.cols(); ++j) + else { - for(int k=0; k<this->cols(); ++k) - { - const Scalar tmp = m_rhs.coeff(j,k); - for (int i=0; i<this->rows(); ++i) - res.coeffRef(i,k) += tmp * m_lhs.coeff(i,j); - } + res += Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy(); } } -} +}; -#ifdef EIGEN_VECTORIZE -template<typename Lhs, typename Rhs, int EvalMode> -template<typename DestDerived, int AlignedMode> -void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_true) const +template<typename Lhs, typename Rhs, int EvalMode, typename DestDerived> +struct ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived,false> { - - if (((Lhs::Flags&RowMajorBit) && (_cols() % ei_packet_traits<Scalar>::size != 0)) - || (_rows() % ei_packet_traits<Scalar>::size != 0)) + typedef Product<Lhs,Rhs,EvalMode> Prod; + typedef typename Prod::_LhsNested _LhsNested; + typedef typename Prod::_RhsNested _RhsNested; + typedef typename Prod::Scalar Scalar; + static inline void eval(const Prod& product, DestDerived& res) { - return _cacheOptimalEval<DestDerived, AlignedMode>(res, ei_meta_false()); + res = Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy(); } - res.setZero(); - const int cols4 = m_lhs.cols() & 0xfffffffC; - if (Lhs::Flags&RowMajorBit) - { -// std::cout << "packet rhs\n"; - int j=0; - for(; j<cols4; j+=4) - { - for(int k=0; k<this->rows(); k++) - { - const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_lhs.coeff(k,j+0)); - const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_lhs.coeff(k,j+1)); - const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_lhs.coeff(k,j+2)); - const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_lhs.coeff(k,j+3)); - for (int i=0; i<this->cols(); i+=ei_packet_traits<Scalar>::size) - { - res.template writePacketCoeff<AlignedMode>(k,i, - ei_pmadd(tmp0, m_rhs.template packetCoeff<AlignedMode>(j+0,i), - ei_pmadd(tmp1, m_rhs.template packetCoeff<AlignedMode>(j+1,i), - ei_pmadd(tmp2, m_rhs.template packetCoeff<AlignedMode>(j+2,i), - ei_pmadd(tmp3, m_rhs.template packetCoeff<AlignedMode>(j+3,i), - res.template packetCoeff<AlignedMode>(k,i))))) - ); - } - } - } - for(; j<m_lhs.cols(); ++j) - { - for(int k=0; k<this->rows(); k++) - { - const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_lhs.coeff(k,j)); - for (int i=0; i<this->cols(); i+=ei_packet_traits<Scalar>::size) - res.template writePacketCoeff<AlignedMode>(k,i, - ei_pmadd(tmp, m_rhs.template packetCoeff<AlignedMode>(j,i), res.template packetCoeff<AlignedMode>(k,i))); - } - } - } - else + static inline void eval_and_add(const Prod& product, DestDerived& res) { -// std::cout << "packet lhs\n"; - int k=0; - for(; k<cols4; k+=4) - { - for(int j=0; j<this->cols(); j+=1) - { - const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_rhs.coeff(k+0,j)); - const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_rhs.coeff(k+1,j)); - const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_rhs.coeff(k+2,j)); - const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_rhs.coeff(k+3,j)); - - for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size) - { - res.template writePacketCoeff<AlignedMode>(i,j, - ei_pmadd(tmp0, m_lhs.template packetCoeff<AlignedMode>(i,k), - ei_pmadd(tmp1, m_lhs.template packetCoeff<AlignedMode>(i,k+1), - ei_pmadd(tmp2, m_lhs.template packetCoeff<AlignedMode>(i,k+2), - ei_pmadd(tmp3, m_lhs.template packetCoeff<AlignedMode>(i,k+3), - res.template packetCoeff<AlignedMode>(i,j))))) - ); - } - } - } - for(; k<m_lhs.cols(); ++k) - { - for(int j=0; j<this->cols(); j++) - { - const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_rhs.coeff(k,j)); - for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size) - res.template writePacketCoeff<AlignedMode>(k,j, - ei_pmadd(tmp, m_lhs.template packetCoeff<AlignedMode>(i,k), res.template packetCoeff<AlignedMode>(i,j))); - } - } + res += Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy(); } +}; + +template<typename Lhs, typename Rhs, int EvalMode> +template<typename DestDerived> +inline void Product<Lhs,Rhs,EvalMode>::_cacheFriendlyEval(DestDerived& res) const +{ + ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived, + _LhsNested::Flags&_RhsNested::Flags&DirectAccessBit> + ::eval(*this, res); +} + +template<typename Lhs, typename Rhs, int EvalMode> +template<typename DestDerived> +inline void Product<Lhs,Rhs,EvalMode>::_cacheFriendlyEvalAndAdd(DestDerived& res) const +{ + ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived, + _LhsNested::Flags&_RhsNested::Flags&DirectAccessBit> + ::eval_and_add(*this, res); } -#endif // EIGEN_VECTORIZE #endif // EIGEN_PRODUCT_H diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h index fd28f4bab..c536a4608 100644 --- a/Eigen/src/Core/Transpose.h +++ b/Eigen/src/Core/Transpose.h @@ -107,7 +107,7 @@ template<typename Derived> inline Transpose<Derived> MatrixBase<Derived>::transpose() { - return Transpose<Derived>(derived()); + return derived(); } /** This is the const version of transpose(). \sa adjoint() */ @@ -115,7 +115,7 @@ template<typename Derived> inline const Transpose<Derived> MatrixBase<Derived>::transpose() const { - return Transpose<Derived>(derived()); + return derived(); } /** \returns an expression of the adjoint (i.e. conjugate transpose) of *this. @@ -130,7 +130,7 @@ inline const Transpose< , NestByValueBit, 0> > MatrixBase<Derived>::adjoint() const { - return conjugate().temporary().transpose(); + return conjugate().nestByValue(); } #endif // EIGEN_TRANSPOSE_H diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index e599d8a3d..0e6ed4b21 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -53,16 +53,16 @@ const unsigned int HereditaryBits = RowMajorBit | EvalBeforeAssigningBit | LargeBit; -// Possible values for the PartType parameter of part() and the ExtractType parameter of extract() +// Possible values for the Mode parameter of part() and of extract() const unsigned int Upper = UpperTriangularBit; const unsigned int StrictlyUpper = UpperTriangularBit | ZeroDiagBit; const unsigned int Lower = LowerTriangularBit; const unsigned int StrictlyLower = LowerTriangularBit | ZeroDiagBit; -// additional possible values for the PartType parameter of part() +// additional possible values for the Mode parameter of part() const unsigned int SelfAdjoint = SelfAdjointBit; -// additional possible values for the ExtractType parameter of extract() +// additional possible values for the Mode parameter of extract() const unsigned int UnitUpper = UpperTriangularBit | UnitDiagBit; const unsigned int UnitLower = LowerTriangularBit | UnitDiagBit; diff --git a/Eigen/src/Core/ProductWIP.h b/disabled/Product.h index d1bc86a13..5d3e99281 100644 --- a/Eigen/src/Core/ProductWIP.h +++ b/disabled/Product.h @@ -26,8 +26,6 @@ #ifndef EIGEN_PRODUCT_H #define EIGEN_PRODUCT_H -#include "CacheFriendlyProduct.h" - template<int Index, int Size, typename Lhs, typename Rhs> struct ei_product_unroller { @@ -43,7 +41,7 @@ template<int Size, typename Lhs, typename Rhs> struct ei_product_unroller<0, Size, Lhs, Rhs> { inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, - typename Lhs::Scalar &res) + typename Lhs::Scalar &res) { res = lhs.coeff(row, 0) * rhs.coeff(0, col); } @@ -62,6 +60,12 @@ struct ei_product_unroller<Index, 0, Lhs, Rhs> inline static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {} }; +template<typename Lhs, typename Rhs> +struct ei_product_unroller<0, Dynamic, Lhs, Rhs> +{ + static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {} +}; + template<bool RowMajor, int Index, int Size, typename Lhs, typename Rhs, typename PacketScalar> struct ei_packet_product_unroller; @@ -115,6 +119,12 @@ struct ei_packet_product_unroller<false, Index, Dynamic, Lhs, Rhs, PacketScalar> inline static void run(int, int, const Lhs&, const Rhs&, PacketScalar&) {} }; +template<typename Lhs, typename Rhs, typename PacketScalar> +struct ei_packet_product_unroller<false, 0, Dynamic, Lhs, Rhs, PacketScalar> +{ + static void run(int, int, const Lhs&, const Rhs&, PacketScalar&) {} +}; + template<typename Product, bool RowMajor = true> struct ProductPacketCoeffImpl { inline static typename Product::PacketScalar execute(const Product& product, int row, int col) { return product._packetCoeffRowMajor(row,col); } @@ -143,74 +153,18 @@ template<typename Lhs, typename Rhs> struct ei_product_eval_mode { enum{ value = Lhs::MaxRowsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD && Rhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD - && Lhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + && (!( (Lhs::Flags&RowMajorBit) && ((Rhs::Flags&RowMajorBit) ^ RowMajorBit))) ? CacheFriendlyProduct : NormalProduct }; }; -template<typename T> class ei_product_eval_to_column_major -{ - typedef typename ei_traits<T>::Scalar _Scalar; - enum {_MaxRows = ei_traits<T>::MaxRowsAtCompileTime, - _MaxCols = ei_traits<T>::MaxColsAtCompileTime, - _Flags = ei_traits<T>::Flags - }; - - public: - typedef Matrix<_Scalar, - ei_traits<T>::RowsAtCompileTime, - ei_traits<T>::ColsAtCompileTime, - ei_corrected_matrix_flags<_Scalar, ei_size_at_compile_time<_MaxRows,_MaxCols>::ret, _Flags>::ret & ~RowMajorBit, - ei_traits<T>::MaxRowsAtCompileTime, - ei_traits<T>::MaxColsAtCompileTime> type; -}; - -template<typename T, int n=1> struct ei_product_nested_rhs -{ - typedef typename ei_meta_if< - (ei_traits<T>::Flags & NestByValueBit) && (!(ei_traits<T>::Flags & RowMajorBit)) && (int(ei_traits<T>::Flags) & DirectAccessBit), - T, - typename ei_meta_if< - ((ei_traits<T>::Flags & EvalBeforeNestingBit) - || (ei_traits<T>::Flags & RowMajorBit) - || (!(ei_traits<T>::Flags & DirectAccessBit)) - || (n+1) * (NumTraits<typename ei_traits<T>::Scalar>::ReadCost) < (n-1) * T::CoeffReadCost), - typename ei_product_eval_to_column_major<T>::type, - const T& - >::ret - >::ret type; -}; - -template<typename T, int n=1> struct ei_product_nested_lhs -{ - typedef typename ei_meta_if< - ei_traits<T>::Flags & NestByValueBit && (int(ei_traits<T>::Flags) & DirectAccessBit), - T, - typename ei_meta_if< - int(ei_traits<T>::Flags) & EvalBeforeNestingBit - || (!(int(ei_traits<T>::Flags) & DirectAccessBit)) - || (n+1) * int(NumTraits<typename ei_traits<T>::Scalar>::ReadCost) < (n-1) * int(T::CoeffReadCost), - typename ei_eval<T>::type, - const T& - >::ret - >::ret type; -}; - template<typename Lhs, typename Rhs, int EvalMode> struct ei_traits<Product<Lhs, Rhs, EvalMode> > { typedef typename Lhs::Scalar Scalar; - // the cache friendly product evals lhs once only - // FIXME what to do if we chose to dynamically call the normal product from the cache friendly one for small matrices ? - typedef typename ei_meta_if<EvalMode==CacheFriendlyProduct, - typename ei_product_nested_lhs<Lhs,1>::type, - typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type>::ret LhsNested; - - // NOTE that rhs must be ColumnMajor, so we might need a special nested type calculation - typedef typename ei_meta_if<EvalMode==CacheFriendlyProduct, - typename ei_product_nested_rhs<Rhs,Lhs::RowsAtCompileTime>::type, - typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type>::ret RhsNested; - typedef typename ei_unconst<typename ei_unref<LhsNested>::type>::type _LhsNested; - typedef typename ei_unconst<typename ei_unref<RhsNested>::type>::type _RhsNested; + typedef typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested; + typedef typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested; + typedef typename ei_unref<LhsNested>::type _LhsNested; + typedef typename ei_unref<RhsNested>::type _RhsNested; enum { LhsCoeffReadCost = _LhsNested::CoeffReadCost, RhsCoeffReadCost = _RhsNested::CoeffReadCost, @@ -220,8 +174,6 @@ struct ei_traits<Product<Lhs, Rhs, EvalMode> > ColsAtCompileTime = Rhs::ColsAtCompileTime, MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime, MaxColsAtCompileTime = Rhs::MaxColsAtCompileTime, - // the vectorization flags are only used by the normal product, - // the other one is always vectorized ! _RhsVectorizable = (RhsFlags & RowMajorBit) && (RhsFlags & VectorizableBit) && (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0), _LhsVectorizable = (!(LhsFlags & RowMajorBit)) && (LhsFlags & VectorizableBit) && (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0), _Vectorizable = (_LhsVectorizable || _RhsVectorizable) ? 1 : 0, @@ -255,10 +207,6 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm typedef typename ei_traits<Product>::_LhsNested _LhsNested; typedef typename ei_traits<Product>::_RhsNested _RhsNested; - enum { - PacketSize = ei_packet_traits<Scalar>::size - }; - inline Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) { @@ -266,12 +214,12 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm } /** \internal */ - template<typename DestDerived> - void _cacheFriendlyEval(DestDerived& res) const; - - /** \internal */ - template<typename DestDerived> - void _cacheFriendlyEvalAndAdd(DestDerived& res) const; + template<typename DestDerived, int AlignedMode> + void _cacheOptimalEval(DestDerived& res, ei_meta_false) const; + #ifdef EIGEN_VECTORIZE + template<typename DestDerived, int AlignedMode> + void _cacheOptimalEval(DestDerived& res, ei_meta_true) const; + #endif private: @@ -304,7 +252,7 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm if(Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT) { PacketScalar res; - ei_packet_product_unroller<Flags&RowMajorBit ? true : false, Lhs::ColsAtCompileTime-1, + ei_packet_product_unroller<Flags&RowMajorBit, Lhs::ColsAtCompileTime-1, Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT ? Lhs::ColsAtCompileTime : Dynamic, _LhsNested, _RhsNested, PacketScalar> @@ -331,10 +279,16 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm for(int i = 1; i < m_lhs.cols(); i++) res = ei_pmadd(m_lhs.template packetCoeff<Aligned>(row, i), ei_pset1(m_rhs.coeff(i, col)), res); return res; +// const PacketScalar tmp[4]; +// ei_punpack(m_rhs.packetCoeff(0,col), tmp); +// +// return +// ei_pmadd(m_lhs.packetCoeff(row, 0), tmp[0], +// ei_pmadd(m_lhs.packetCoeff(row, 1), tmp[1], +// ei_pmadd(m_lhs.packetCoeff(row, 2), tmp[2] +// ei_pmul(m_lhs.packetCoeff(row, 3), tmp[3])))); } - template<typename Lhs_, typename Rhs_, int EvalMode_, typename DestDerived_, bool DirectAccess_> - friend struct ei_cache_friendly_selector; protected: const LhsNested m_lhs; @@ -343,6 +297,9 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm /** \returns the matrix product of \c *this and \a other. * + * \note This function causes an immediate evaluation. If you want to perform a matrix product + * without immediate evaluation, call .lazy() on one of the matrices before taking the product. + * * \sa lazy(), operator*=(const MatrixBase&) */ template<typename Derived> @@ -365,107 +322,168 @@ MatrixBase<Derived>::operator*=(const MatrixBase<OtherDerived> &other) return *this = *this * other; } -/** \internal */ -template<typename Derived> -template<typename Lhs,typename Rhs> -inline Derived& -MatrixBase<Derived>::operator+=(const Flagged<Product<Lhs,Rhs,CacheFriendlyProduct>, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit>& other) -{ - other._expression()._cacheFriendlyEvalAndAdd(const_cast_derived()); - return derived(); -} - template<typename Derived> template<typename Lhs, typename Rhs> inline Derived& MatrixBase<Derived>::lazyAssign(const Product<Lhs,Rhs,CacheFriendlyProduct>& product) { - product._cacheFriendlyEval(derived()); + product.template _cacheOptimalEval<Derived, Aligned>(derived(), + #ifdef EIGEN_VECTORIZE + typename ei_meta_if<Flags & VectorizableBit, ei_meta_true, ei_meta_false>::ret() + #else + ei_meta_false() + #endif + ); return derived(); } -template<typename Lhs, typename Rhs, int EvalMode, typename DestDerived, bool DirectAccess> -struct ei_cache_friendly_selector +template<typename Lhs, typename Rhs, int EvalMode> +template<typename DestDerived, int AlignedMode> +void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_false) const { - typedef Product<Lhs,Rhs,EvalMode> Prod; - typedef typename Prod::_LhsNested _LhsNested; - typedef typename Prod::_RhsNested _RhsNested; - typedef typename Prod::Scalar Scalar; - static inline void eval(const Prod& product, DestDerived& res) + res.setZero(); + const int cols4 = m_lhs.cols() & 0xfffffffC; + if (Lhs::Flags&RowMajorBit) { - if ( product._rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD - && product._cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD - && product.m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD - ) +// std::cout << "opt rhs\n"; + int j=0; + for(; j<cols4; j+=4) { - res.setZero(); - ei_cache_friendly_product<Scalar>( - product._rows(), product._cols(), product.m_lhs.cols(), - _LhsNested::Flags&RowMajorBit, &(product.m_lhs.const_cast_derived().coeffRef(0,0)), product.m_lhs.stride(), - _RhsNested::Flags&RowMajorBit, &(product.m_rhs.const_cast_derived().coeffRef(0,0)), product.m_rhs.stride(), - Prod::Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride() - ); + for(int k=0; k<this->rows(); ++k) + { + const Scalar tmp0 = m_lhs.coeff(k,j ); + const Scalar tmp1 = m_lhs.coeff(k,j+1); + const Scalar tmp2 = m_lhs.coeff(k,j+2); + const Scalar tmp3 = m_lhs.coeff(k,j+3); + for (int i=0; i<this->cols(); ++i) + res.coeffRef(k,i) += tmp0 * m_rhs.coeff(j+0,i) + tmp1 * m_rhs.coeff(j+1,i) + + tmp2 * m_rhs.coeff(j+2,i) + tmp3 * m_rhs.coeff(j+3,i); + } } - else + for(; j<m_lhs.cols(); ++j) { - res = Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy(); + for(int k=0; k<this->rows(); ++k) + { + const Scalar tmp = m_rhs.coeff(k,j); + for (int i=0; i<this->cols(); ++i) + res.coeffRef(k,i) += tmp * m_lhs.coeff(j,i); + } } } - - static inline void eval_and_add(const Prod& product, DestDerived& res) + else { - if ( product._rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD - && product._cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD - && product.m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD - ) +// std::cout << "opt lhs\n"; + int j = 0; + for(; j<cols4; j+=4) { - ei_cache_friendly_product<Scalar>( - product._rows(), product._cols(), product.m_lhs.cols(), - _LhsNested::Flags&RowMajorBit, &(product.m_lhs.const_cast_derived().coeffRef(0,0)), product.m_lhs.stride(), - _RhsNested::Flags&RowMajorBit, &(product.m_rhs.const_cast_derived().coeffRef(0,0)), product.m_rhs.stride(), - Prod::Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride() - ); + for(int k=0; k<this->cols(); ++k) + { + const Scalar tmp0 = m_rhs.coeff(j ,k); + const Scalar tmp1 = m_rhs.coeff(j+1,k); + const Scalar tmp2 = m_rhs.coeff(j+2,k); + const Scalar tmp3 = m_rhs.coeff(j+3,k); + for (int i=0; i<this->rows(); ++i) + res.coeffRef(i,k) += tmp0 * m_lhs.coeff(i,j+0) + tmp1 * m_lhs.coeff(i,j+1) + + tmp2 * m_lhs.coeff(i,j+2) + tmp3 * m_lhs.coeff(i,j+3); + } } - else + for(; j<m_lhs.cols(); ++j) { - res += Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy(); + for(int k=0; k<this->cols(); ++k) + { + const Scalar tmp = m_rhs.coeff(j,k); + for (int i=0; i<this->rows(); ++i) + res.coeffRef(i,k) += tmp * m_lhs.coeff(i,j); + } } } -}; +} -template<typename Lhs, typename Rhs, int EvalMode, typename DestDerived> -struct ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived,false> +#ifdef EIGEN_VECTORIZE +template<typename Lhs, typename Rhs, int EvalMode> +template<typename DestDerived, int AlignedMode> +void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_true) const { - typedef Product<Lhs,Rhs,EvalMode> Prod; - typedef typename Prod::_LhsNested _LhsNested; - typedef typename Prod::_RhsNested _RhsNested; - typedef typename Prod::Scalar Scalar; - static inline void eval(const Prod& product, DestDerived& res) + + if (((Lhs::Flags&RowMajorBit) && (_cols() % ei_packet_traits<Scalar>::size != 0)) + || (_rows() % ei_packet_traits<Scalar>::size != 0)) { - res = Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy(); + return _cacheOptimalEval<DestDerived, AlignedMode>(res, ei_meta_false()); } - static inline void eval_and_add(const Prod& product, DestDerived& res) + res.setZero(); + const int cols4 = m_lhs.cols() & 0xfffffffC; + if (Lhs::Flags&RowMajorBit) { - res += Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy(); +// std::cout << "packet rhs\n"; + int j=0; + for(; j<cols4; j+=4) + { + for(int k=0; k<this->rows(); k++) + { + const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_lhs.coeff(k,j+0)); + const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_lhs.coeff(k,j+1)); + const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_lhs.coeff(k,j+2)); + const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_lhs.coeff(k,j+3)); + for (int i=0; i<this->cols(); i+=ei_packet_traits<Scalar>::size) + { + res.template writePacketCoeff<AlignedMode>(k,i, + ei_pmadd(tmp0, m_rhs.template packetCoeff<AlignedMode>(j+0,i), + ei_pmadd(tmp1, m_rhs.template packetCoeff<AlignedMode>(j+1,i), + ei_pmadd(tmp2, m_rhs.template packetCoeff<AlignedMode>(j+2,i), + ei_pmadd(tmp3, m_rhs.template packetCoeff<AlignedMode>(j+3,i), + res.template packetCoeff<AlignedMode>(k,i))))) + ); + } + } + } + for(; j<m_lhs.cols(); ++j) + { + for(int k=0; k<this->rows(); k++) + { + const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_lhs.coeff(k,j)); + for (int i=0; i<this->cols(); i+=ei_packet_traits<Scalar>::size) + res.template writePacketCoeff<AlignedMode>(k,i, + ei_pmadd(tmp, m_rhs.template packetCoeff<AlignedMode>(j,i), res.template packetCoeff<AlignedMode>(k,i))); + } + } + } + else + { +// std::cout << "packet lhs\n"; + int k=0; + for(; k<cols4; k+=4) + { + for(int j=0; j<this->cols(); j+=1) + { + const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_rhs.coeff(k+0,j)); + const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_rhs.coeff(k+1,j)); + const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_rhs.coeff(k+2,j)); + const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_rhs.coeff(k+3,j)); + + for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size) + { + res.template writePacketCoeff<AlignedMode>(i,j, + ei_pmadd(tmp0, m_lhs.template packetCoeff<AlignedMode>(i,k), + ei_pmadd(tmp1, m_lhs.template packetCoeff<AlignedMode>(i,k+1), + ei_pmadd(tmp2, m_lhs.template packetCoeff<AlignedMode>(i,k+2), + ei_pmadd(tmp3, m_lhs.template packetCoeff<AlignedMode>(i,k+3), + res.template packetCoeff<AlignedMode>(i,j))))) + ); + } + } + } + for(; k<m_lhs.cols(); ++k) + { + for(int j=0; j<this->cols(); j++) + { + const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_rhs.coeff(k,j)); + for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size) + res.template writePacketCoeff<AlignedMode>(k,j, + ei_pmadd(tmp, m_lhs.template packetCoeff<AlignedMode>(i,k), res.template packetCoeff<AlignedMode>(i,j))); + } + } } -}; - -template<typename Lhs, typename Rhs, int EvalMode> -template<typename DestDerived> -inline void Product<Lhs,Rhs,EvalMode>::_cacheFriendlyEval(DestDerived& res) const -{ - ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived, - _LhsNested::Flags&_RhsNested::Flags&DirectAccessBit> - ::eval(*this, res); -} - -template<typename Lhs, typename Rhs, int EvalMode> -template<typename DestDerived> -inline void Product<Lhs,Rhs,EvalMode>::_cacheFriendlyEvalAndAdd(DestDerived& res) const -{ - ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived, - _LhsNested::Flags&_RhsNested::Flags&DirectAccessBit> - ::eval_and_add(*this, res); } +#endif // EIGEN_VECTORIZE #endif // EIGEN_PRODUCT_H diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt index 39af38afe..4e1a7696d 100644 --- a/doc/CMakeLists.txt +++ b/doc/CMakeLists.txt @@ -1,5 +1,11 @@ IF(BUILD_DOC) +IF(CMAKE_COMPILER_IS_GNUCXX) + IF(CMAKE_SYSTEM_NAME MATCHES Linux) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O1 -g1") + ENDIF(CMAKE_SYSTEM_NAME MATCHES Linux) +ENDIF(CMAKE_COMPILER_IS_GNUCXX) + CONFIGURE_FILE( ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 6cd98800a..238006bdf 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,5 +1,11 @@ IF(BUILD_TESTS) +IF(CMAKE_COMPILER_IS_GNUCXX) + IF(CMAKE_SYSTEM_NAME MATCHES Linux) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O1 -g2") + ENDIF(CMAKE_SYSTEM_NAME MATCHES Linux) +ENDIF(CMAKE_COMPILER_IS_GNUCXX) + OPTION(EIGEN_NO_ASSERTION_CHECKING "Disable checking of assertions" OFF) # similar to SET_TARGET_PROPERTIES but append the property instead of overwritting it @@ -64,14 +70,14 @@ FIND_PACKAGE(Qt4 REQUIRED) INCLUDE_DIRECTORIES( ${QT_INCLUDE_DIR} ) EI_ADD_TEST(basicstuff) -EI_ADD_TEST(miscmatrices) +EI_ADD_TEST(linearstructure) +EI_ADD_TEST(cwiseop) +EI_ADD_TEST(product) EI_ADD_TEST(adjoint) EI_ADD_TEST(submatrices) +EI_ADD_TEST(miscmatrices) EI_ADD_TEST(smallvectors) -EI_ADD_TEST(cwiseop) EI_ADD_TEST(map) -EI_ADD_TEST(linearstructure) -EI_ADD_TEST(product) EI_ADD_TEST(triangular) EI_ADD_TEST(cholesky) # EI_ADD_TEST(determinant) diff --git a/test/adjoint.cpp b/test/adjoint.cpp index ae1b002e5..9a15c4986 100644 --- a/test/adjoint.cpp +++ b/test/adjoint.cpp @@ -51,23 +51,15 @@ template<typename MatrixType> void adjoint(const MatrixType& m) Scalar s1 = ei_random<Scalar>(), s2 = ei_random<Scalar>(); - // check involutivity of adjoint, transpose, conjugate - VERIFY_IS_APPROX(m1.transpose().transpose(), m1); - VERIFY_IS_APPROX(m1.conjugate().conjugate(), m1); - VERIFY_IS_APPROX(m1.adjoint().adjoint(), m1); - // check basic compatibility of adjoint, transpose, conjugate VERIFY_IS_APPROX(m1.transpose().conjugate().adjoint(), m1); VERIFY_IS_APPROX(m1.adjoint().conjugate().transpose(), m1); - if(!NumTraits<Scalar>::IsComplex) - VERIFY_IS_APPROX(m1.adjoint().transpose(), m1); // check multiplicative behavior - VERIFY_IS_APPROX((m1.transpose() * m2).transpose(), m2.transpose() * m1); + std::cout << (m1.adjoint() * m2).adjoint() << std::endl; + std::cout << "------------------------------" << std::endl; + std::cout << m2.adjoint() * m1 << std::endl; VERIFY_IS_APPROX((m1.adjoint() * m2).adjoint(), m2.adjoint() * m1); - VERIFY_IS_APPROX((m1.transpose() * m2).conjugate(), m1.adjoint() * m2.conjugate()); - VERIFY_IS_APPROX((s1 * m1).transpose(), s1 * m1.transpose()); - VERIFY_IS_APPROX((s1 * m1).conjugate(), ei_conj(s1) * m1.conjugate()); VERIFY_IS_APPROX((s1 * m1).adjoint(), ei_conj(s1) * m1.adjoint()); // check basic properties of dot, norm, norm2 diff --git a/test/linearstructure.cpp b/test/linearstructure.cpp index a7b058d69..8beb33925 100644 --- a/test/linearstructure.cpp +++ b/test/linearstructure.cpp @@ -62,11 +62,7 @@ template<typename MatrixType> void linearStructure(const MatrixType& m) VERIFY_IS_APPROX(-m2+m1+m2, m1); VERIFY_IS_APPROX(m1*s1, s1*m1); VERIFY_IS_APPROX((m1+m2)*s1, s1*m1+s1*m2); - VERIFY_IS_APPROX((s1+s2)*m1, m1*s1+m1*s2); - VERIFY_IS_APPROX((m1-m2)*s1, s1*m1-s1*m2); - VERIFY_IS_APPROX((s1-s2)*m1, m1*s1-m1*s2); VERIFY_IS_APPROX((-m1+m2)*s1, -s1*m1+s1*m2); - VERIFY_IS_APPROX((-s1+s2)*m1, -m1*s1+m1*s2); m3 = m2; m3 += m1; VERIFY_IS_APPROX(m3, m1+m2); m3 = m2; m3 -= m1; diff --git a/test/product.cpp b/test/product.cpp index 70b41212a..a89497763 100644 --- a/test/product.cpp +++ b/test/product.cpp @@ -73,14 +73,10 @@ template<typename MatrixType> void product(const MatrixType& m) VERIFY_IS_APPROX(s1*(square*m1), (s1*square)*m1); VERIFY_IS_APPROX(s1*(square*m1), square*(m1*s1)); - // continue testing Product.h: lazy product - VERIFY_IS_APPROX(square.lazy() * m1, square*m1); - VERIFY_IS_APPROX(square * m1.lazy(), square*m1); // again, test operator() to check const-qualification s1 += (square.lazy() * m1)(r,c); // test Product.h together with Identity.h - VERIFY_IS_APPROX(m1, identity*m1); VERIFY_IS_APPROX(v1, identity*v1); // again, test operator() to check const-qualification VERIFY_IS_APPROX(MatrixType::identity(rows, cols)(r,c), static_cast<Scalar>(r==c)); @@ -92,18 +88,14 @@ template<typename MatrixType> void product(const MatrixType& m) void test_product() { for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST( product(Matrix<float, 1, 1>()) ); - CALL_SUBTEST( product(Matrix<float, 3, 3>()) ); - CALL_SUBTEST( product(Matrix<float, 4, 2>()) ); + CALL_SUBTEST( product(Matrix3i()) ); + CALL_SUBTEST( product(Matrix<float, 3, 2>()) ); CALL_SUBTEST( product(Matrix4d()) ); } for(int i = 0; i < g_repeat; i++) { - int rows = ei_random<int>(1,320); - int cols = ei_random<int>(1,320); - CALL_SUBTEST( product(MatrixXf(rows, cols)) ); - CALL_SUBTEST( product(MatrixXd(rows, cols)) ); - CALL_SUBTEST( product(MatrixXi(rows, cols)) ); - CALL_SUBTEST( product(MatrixXcf(rows, cols)) ); - CALL_SUBTEST( product(MatrixXcd(rows, cols)) ); + CALL_SUBTEST( product(MatrixXf(ei_random<int>(1,320), ei_random<int>(1,320))) ); + CALL_SUBTEST( product(MatrixXd(ei_random<int>(1,320), ei_random<int>(1,320))) ); + CALL_SUBTEST( product(MatrixXi(ei_random<int>(1,320), ei_random<int>(1,320))) ); + CALL_SUBTEST( product(MatrixXcf(ei_random<int>(1,50), ei_random<int>(1,50))) ); } } |