diff options
Diffstat (limited to 'Eigen')
-rw-r--r-- | Eigen/Core | 5 | ||||
-rw-r--r-- | Eigen/src/Core/CwiseUnaryOp.h | 22 | ||||
-rw-r--r-- | Eigen/src/Core/DiagonalMatrix.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/Flagged.h | 10 | ||||
-rw-r--r-- | Eigen/src/Core/MatrixBase.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/Product.h | 322 | ||||
-rw-r--r-- | Eigen/src/Core/ProductWIP.h | 471 | ||||
-rw-r--r-- | Eigen/src/Core/Transpose.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/util/Constants.h | 6 |
9 files changed, 180 insertions, 666 deletions
diff --git a/Eigen/Core b/Eigen/Core index 3e1b5184d..c81341103 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -35,7 +35,9 @@ namespace Eigen { #include "src/Core/CwiseBinaryOp.h" #include "src/Core/CwiseUnaryOp.h" #include "src/Core/CwiseNullaryOp.h" -#include "src/Core/ProductWIP.h" +#include "src/Core/InverseProduct.h" +#include "src/Core/CacheFriendlyProduct.h" +#include "src/Core/Product.h" #include "src/Core/Block.h" #include "src/Core/Minor.h" #include "src/Core/Transpose.h" @@ -51,7 +53,6 @@ namespace Eigen { #include "src/Core/CommaInitializer.h" #include "src/Core/Extract.h" #include "src/Core/Part.h" -#include "src/Core/InverseProduct.h" } // namespace Eigen diff --git a/Eigen/src/Core/CwiseUnaryOp.h b/Eigen/src/Core/CwiseUnaryOp.h index 26197b369..8d2737e12 100644 --- a/Eigen/src/Core/CwiseUnaryOp.h +++ b/Eigen/src/Core/CwiseUnaryOp.h @@ -118,7 +118,7 @@ template<typename Derived> inline const CwiseUnaryOp<ei_scalar_opposite_op<typename ei_traits<Derived>::Scalar>,Derived> MatrixBase<Derived>::operator-() const { - return CwiseUnaryOp<ei_scalar_opposite_op<Scalar>, Derived>(derived()); + return derived(); } /** \returns an expression of the coefficient-wise absolute value of \c *this @@ -127,7 +127,7 @@ template<typename Derived> inline const CwiseUnaryOp<ei_scalar_abs_op<typename ei_traits<Derived>::Scalar>,Derived> MatrixBase<Derived>::cwiseAbs() const { - return CwiseUnaryOp<ei_scalar_abs_op<Scalar>,Derived>(derived()); + return derived(); } /** \returns an expression of the coefficient-wise squared absolute value of \c *this @@ -136,7 +136,7 @@ template<typename Derived> inline const CwiseUnaryOp<ei_scalar_abs2_op<typename ei_traits<Derived>::Scalar>,Derived> MatrixBase<Derived>::cwiseAbs2() const { - return CwiseUnaryOp<ei_scalar_abs2_op<Scalar>,Derived>(derived()); + return derived(); } /** \returns an expression of the complex conjugate of *this. @@ -146,7 +146,7 @@ template<typename Derived> inline const CwiseUnaryOp<ei_scalar_conjugate_op<typename ei_traits<Derived>::Scalar>, Derived> MatrixBase<Derived>::conjugate() const { - return CwiseUnaryOp<ei_scalar_conjugate_op<Scalar>, Derived>(derived()); + return derived(); } /** \returns an expression of *this with the \a Scalar type casted to @@ -161,7 +161,7 @@ template<typename NewType> inline const CwiseUnaryOp<ei_scalar_cast_op<typename ei_traits<Derived>::Scalar, NewType>, Derived> MatrixBase<Derived>::cast() const { - return CwiseUnaryOp<ei_scalar_cast_op<Scalar, NewType>, Derived>(derived()); + return derived(); } /** \relates MatrixBase */ @@ -201,7 +201,7 @@ template<typename Derived> inline const CwiseUnaryOp<ei_scalar_sqrt_op<typename ei_traits<Derived>::Scalar>, Derived> MatrixBase<Derived>::cwiseSqrt() const { - return CwiseUnaryOp<ei_scalar_sqrt_op<Scalar>, Derived>(derived()); + return derived(); } /** \returns an expression of the coefficient-wise exponential of *this. */ @@ -209,7 +209,7 @@ template<typename Derived> inline const CwiseUnaryOp<ei_scalar_exp_op<typename ei_traits<Derived>::Scalar>, Derived> MatrixBase<Derived>::cwiseExp() const { - return CwiseUnaryOp<ei_scalar_exp_op<Scalar>, Derived>(derived()); + return derived(); } /** \returns an expression of the coefficient-wise logarithm of *this. */ @@ -217,7 +217,7 @@ template<typename Derived> inline const CwiseUnaryOp<ei_scalar_log_op<typename ei_traits<Derived>::Scalar>, Derived> MatrixBase<Derived>::cwiseLog() const { - return CwiseUnaryOp<ei_scalar_log_op<Scalar>, Derived>(derived()); + return derived(); } /** \returns an expression of the coefficient-wise cosine of *this. */ @@ -225,7 +225,7 @@ template<typename Derived> inline const CwiseUnaryOp<ei_scalar_cos_op<typename ei_traits<Derived>::Scalar>, Derived> MatrixBase<Derived>::cwiseCos() const { - return CwiseUnaryOp<ei_scalar_cos_op<Scalar>, Derived>(derived()); + return derived(); } /** \returns an expression of the coefficient-wise sine of *this. */ @@ -233,10 +233,10 @@ template<typename Derived> inline const CwiseUnaryOp<ei_scalar_sin_op<typename ei_traits<Derived>::Scalar>, Derived> MatrixBase<Derived>::cwiseSin() const { - return CwiseUnaryOp<ei_scalar_sin_op<Scalar>, Derived>(derived()); + return derived(); } -/** \relates MatrixBase */ +/** \returns an expression of the coefficient-wise power of *this to the given exponent. */ template<typename Derived> inline const CwiseUnaryOp<ei_scalar_pow_op<typename ei_traits<Derived>::Scalar>, Derived> MatrixBase<Derived>::cwisePow(const Scalar& exponent) const diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h index bd420511f..0581c669c 100644 --- a/Eigen/src/Core/DiagonalMatrix.h +++ b/Eigen/src/Core/DiagonalMatrix.h @@ -95,7 +95,7 @@ template<typename Derived> inline const DiagonalMatrix<Derived> MatrixBase<Derived>::asDiagonal() const { - return DiagonalMatrix<Derived>(derived()); + return derived(); } /** \returns true if *this is approximately equal to a diagonal matrix, diff --git a/Eigen/src/Core/Flagged.h b/Eigen/src/Core/Flagged.h index 9167c8a97..f287abee4 100644 --- a/Eigen/src/Core/Flagged.h +++ b/Eigen/src/Core/Flagged.h @@ -33,7 +33,7 @@ * \param Added the flags added to the expression * \param Removed the flags removed from the expression (has priority over Added). * - * This class represents an expression whose flags have been modified + * This class represents an expression whose flags have been modified. * It is the return type of MatrixBase::flagged() * and most of the time this is the only way it is used. * @@ -94,7 +94,11 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas } protected: - const ExpressionType m_matrix; + const typename ei_meta_if< + Added & ~Removed & NestByValueBit, + ExpressionType, + typename ExpressionType::Nested + >::ret m_matrix; }; /** \returns an expression of *this with added flags @@ -121,7 +125,7 @@ MatrixBase<Derived>::lazy() const */ template<typename Derived> inline const Flagged<Derived, NestByValueBit, 0> -MatrixBase<Derived>::temporary() const +MatrixBase<Derived>::nestByValue() const { return derived(); } diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index 18525a5d1..1a634ce37 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -452,7 +452,7 @@ template<typename Derived> class MatrixBase template<unsigned int Added> const Flagged<Derived, Added, 0> marked() const; const Flagged<Derived, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit> lazy() const; - const Flagged<Derived, NestByValueBit, 0> temporary() const; + const Flagged<Derived, NestByValueBit, 0> nestByValue() const; /** \returns number of elements to skip to pass from one row (resp. column) to another * for a row-major (resp. column-major) matrix. diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index 5d3e99281..15867d704 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -41,7 +41,7 @@ template<int Size, typename Lhs, typename Rhs> struct ei_product_unroller<0, Size, Lhs, Rhs> { inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, - typename Lhs::Scalar &res) + typename Lhs::Scalar &res) { res = lhs.coeff(row, 0) * rhs.coeff(0, col); } @@ -60,12 +60,6 @@ struct ei_product_unroller<Index, 0, Lhs, Rhs> inline static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {} }; -template<typename Lhs, typename Rhs> -struct ei_product_unroller<0, Dynamic, Lhs, Rhs> -{ - static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {} -}; - template<bool RowMajor, int Index, int Size, typename Lhs, typename Rhs, typename PacketScalar> struct ei_packet_product_unroller; @@ -119,12 +113,6 @@ struct ei_packet_product_unroller<false, Index, Dynamic, Lhs, Rhs, PacketScalar> inline static void run(int, int, const Lhs&, const Rhs&, PacketScalar&) {} }; -template<typename Lhs, typename Rhs, typename PacketScalar> -struct ei_packet_product_unroller<false, 0, Dynamic, Lhs, Rhs, PacketScalar> -{ - static void run(int, int, const Lhs&, const Rhs&, PacketScalar&) {} -}; - template<typename Product, bool RowMajor = true> struct ProductPacketCoeffImpl { inline static typename Product::PacketScalar execute(const Product& product, int row, int col) { return product._packetCoeffRowMajor(row,col); } @@ -153,18 +141,74 @@ template<typename Lhs, typename Rhs> struct ei_product_eval_mode { enum{ value = Lhs::MaxRowsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD && Rhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD - && (!( (Lhs::Flags&RowMajorBit) && ((Rhs::Flags&RowMajorBit) ^ RowMajorBit))) + && Lhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ? CacheFriendlyProduct : NormalProduct }; }; +template<typename T> class ei_product_eval_to_column_major +{ + typedef typename ei_traits<T>::Scalar _Scalar; + enum {_MaxRows = ei_traits<T>::MaxRowsAtCompileTime, + _MaxCols = ei_traits<T>::MaxColsAtCompileTime, + _Flags = ei_traits<T>::Flags + }; + + public: + typedef Matrix<_Scalar, + ei_traits<T>::RowsAtCompileTime, + ei_traits<T>::ColsAtCompileTime, + ei_corrected_matrix_flags<_Scalar, ei_size_at_compile_time<_MaxRows,_MaxCols>::ret, _Flags>::ret & ~RowMajorBit, + ei_traits<T>::MaxRowsAtCompileTime, + ei_traits<T>::MaxColsAtCompileTime> type; +}; + +template<typename T, int n=1> struct ei_product_nested_rhs +{ + typedef typename ei_meta_if< + (ei_traits<T>::Flags & NestByValueBit) && (!(ei_traits<T>::Flags & RowMajorBit)) && (int(ei_traits<T>::Flags) & DirectAccessBit), + T, + typename ei_meta_if< + ((ei_traits<T>::Flags & EvalBeforeNestingBit) + || (ei_traits<T>::Flags & RowMajorBit) + || (!(ei_traits<T>::Flags & DirectAccessBit)) + || (n+1) * (NumTraits<typename ei_traits<T>::Scalar>::ReadCost) < (n-1) * T::CoeffReadCost), + typename ei_product_eval_to_column_major<T>::type, + const T& + >::ret + >::ret type; +}; + +template<typename T, int n=1> struct ei_product_nested_lhs +{ + typedef typename ei_meta_if< + ei_traits<T>::Flags & NestByValueBit && (int(ei_traits<T>::Flags) & DirectAccessBit), + T, + typename ei_meta_if< + int(ei_traits<T>::Flags) & EvalBeforeNestingBit + || (!(int(ei_traits<T>::Flags) & DirectAccessBit)) + || (n+1) * int(NumTraits<typename ei_traits<T>::Scalar>::ReadCost) < (n-1) * int(T::CoeffReadCost), + typename ei_eval<T>::type, + const T& + >::ret + >::ret type; +}; + template<typename Lhs, typename Rhs, int EvalMode> struct ei_traits<Product<Lhs, Rhs, EvalMode> > { typedef typename Lhs::Scalar Scalar; - typedef typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested; - typedef typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested; - typedef typename ei_unref<LhsNested>::type _LhsNested; - typedef typename ei_unref<RhsNested>::type _RhsNested; + // the cache friendly product evals lhs once only + // FIXME what to do if we chose to dynamically call the normal product from the cache friendly one for small matrices ? + typedef typename ei_meta_if<EvalMode==CacheFriendlyProduct, + typename ei_product_nested_lhs<Lhs,1>::type, + typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type>::ret LhsNested; + + // NOTE that rhs must be ColumnMajor, so we might need a special nested type calculation + typedef typename ei_meta_if<EvalMode==CacheFriendlyProduct, + typename ei_product_nested_rhs<Rhs,Lhs::RowsAtCompileTime>::type, + typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type>::ret RhsNested; + typedef typename ei_unconst<typename ei_unref<LhsNested>::type>::type _LhsNested; + typedef typename ei_unconst<typename ei_unref<RhsNested>::type>::type _RhsNested; enum { LhsCoeffReadCost = _LhsNested::CoeffReadCost, RhsCoeffReadCost = _RhsNested::CoeffReadCost, @@ -174,6 +218,8 @@ struct ei_traits<Product<Lhs, Rhs, EvalMode> > ColsAtCompileTime = Rhs::ColsAtCompileTime, MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime, MaxColsAtCompileTime = Rhs::MaxColsAtCompileTime, + // the vectorization flags are only used by the normal product, + // the other one is always vectorized ! _RhsVectorizable = (RhsFlags & RowMajorBit) && (RhsFlags & VectorizableBit) && (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0), _LhsVectorizable = (!(LhsFlags & RowMajorBit)) && (LhsFlags & VectorizableBit) && (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0), _Vectorizable = (_LhsVectorizable || _RhsVectorizable) ? 1 : 0, @@ -207,6 +253,10 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm typedef typename ei_traits<Product>::_LhsNested _LhsNested; typedef typename ei_traits<Product>::_RhsNested _RhsNested; + enum { + PacketSize = ei_packet_traits<Scalar>::size + }; + inline Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) { @@ -214,12 +264,12 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm } /** \internal */ - template<typename DestDerived, int AlignedMode> - void _cacheOptimalEval(DestDerived& res, ei_meta_false) const; - #ifdef EIGEN_VECTORIZE - template<typename DestDerived, int AlignedMode> - void _cacheOptimalEval(DestDerived& res, ei_meta_true) const; - #endif + template<typename DestDerived> + void _cacheFriendlyEval(DestDerived& res) const; + + /** \internal */ + template<typename DestDerived> + void _cacheFriendlyEvalAndAdd(DestDerived& res) const; private: @@ -252,7 +302,7 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm if(Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT) { PacketScalar res; - ei_packet_product_unroller<Flags&RowMajorBit, Lhs::ColsAtCompileTime-1, + ei_packet_product_unroller<Flags&RowMajorBit ? true : false, Lhs::ColsAtCompileTime-1, Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT ? Lhs::ColsAtCompileTime : Dynamic, _LhsNested, _RhsNested, PacketScalar> @@ -279,16 +329,10 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm for(int i = 1; i < m_lhs.cols(); i++) res = ei_pmadd(m_lhs.template packetCoeff<Aligned>(row, i), ei_pset1(m_rhs.coeff(i, col)), res); return res; -// const PacketScalar tmp[4]; -// ei_punpack(m_rhs.packetCoeff(0,col), tmp); -// -// return -// ei_pmadd(m_lhs.packetCoeff(row, 0), tmp[0], -// ei_pmadd(m_lhs.packetCoeff(row, 1), tmp[1], -// ei_pmadd(m_lhs.packetCoeff(row, 2), tmp[2] -// ei_pmul(m_lhs.packetCoeff(row, 3), tmp[3])))); } + template<typename Lhs_, typename Rhs_, int EvalMode_, typename DestDerived_, bool DirectAccess_> + friend struct ei_cache_friendly_selector; protected: const LhsNested m_lhs; @@ -297,9 +341,6 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm /** \returns the matrix product of \c *this and \a other. * - * \note This function causes an immediate evaluation. If you want to perform a matrix product - * without immediate evaluation, call .lazy() on one of the matrices before taking the product. - * * \sa lazy(), operator*=(const MatrixBase&) */ template<typename Derived> @@ -322,168 +363,107 @@ MatrixBase<Derived>::operator*=(const MatrixBase<OtherDerived> &other) return *this = *this * other; } +/** \internal */ +template<typename Derived> +template<typename Lhs,typename Rhs> +inline Derived& +MatrixBase<Derived>::operator+=(const Flagged<Product<Lhs,Rhs,CacheFriendlyProduct>, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit>& other) +{ + other._expression()._cacheFriendlyEvalAndAdd(const_cast_derived()); + return derived(); +} + template<typename Derived> template<typename Lhs, typename Rhs> inline Derived& MatrixBase<Derived>::lazyAssign(const Product<Lhs,Rhs,CacheFriendlyProduct>& product) { - product.template _cacheOptimalEval<Derived, Aligned>(derived(), - #ifdef EIGEN_VECTORIZE - typename ei_meta_if<Flags & VectorizableBit, ei_meta_true, ei_meta_false>::ret() - #else - ei_meta_false() - #endif - ); + product._cacheFriendlyEval(derived()); return derived(); } -template<typename Lhs, typename Rhs, int EvalMode> -template<typename DestDerived, int AlignedMode> -void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_false) const +template<typename Lhs, typename Rhs, int EvalMode, typename DestDerived, bool DirectAccess> +struct ei_cache_friendly_selector { - res.setZero(); - const int cols4 = m_lhs.cols() & 0xfffffffC; - if (Lhs::Flags&RowMajorBit) + typedef Product<Lhs,Rhs,EvalMode> Prod; + typedef typename Prod::_LhsNested _LhsNested; + typedef typename Prod::_RhsNested _RhsNested; + typedef typename Prod::Scalar Scalar; + static inline void eval(const Prod& product, DestDerived& res) { -// std::cout << "opt rhs\n"; - int j=0; - for(; j<cols4; j+=4) + if ( product._rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + && product._cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + && product.m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + ) { - for(int k=0; k<this->rows(); ++k) - { - const Scalar tmp0 = m_lhs.coeff(k,j ); - const Scalar tmp1 = m_lhs.coeff(k,j+1); - const Scalar tmp2 = m_lhs.coeff(k,j+2); - const Scalar tmp3 = m_lhs.coeff(k,j+3); - for (int i=0; i<this->cols(); ++i) - res.coeffRef(k,i) += tmp0 * m_rhs.coeff(j+0,i) + tmp1 * m_rhs.coeff(j+1,i) - + tmp2 * m_rhs.coeff(j+2,i) + tmp3 * m_rhs.coeff(j+3,i); - } + res.setZero(); + ei_cache_friendly_product<Scalar>( + product._rows(), product._cols(), product.m_lhs.cols(), + _LhsNested::Flags&RowMajorBit, &(product.m_lhs.const_cast_derived().coeffRef(0,0)), product.m_lhs.stride(), + _RhsNested::Flags&RowMajorBit, &(product.m_rhs.const_cast_derived().coeffRef(0,0)), product.m_rhs.stride(), + Prod::Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride() + ); } - for(; j<m_lhs.cols(); ++j) + else { - for(int k=0; k<this->rows(); ++k) - { - const Scalar tmp = m_rhs.coeff(k,j); - for (int i=0; i<this->cols(); ++i) - res.coeffRef(k,i) += tmp * m_lhs.coeff(j,i); - } + res = Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy(); } } - else + + static inline void eval_and_add(const Prod& product, DestDerived& res) { -// std::cout << "opt lhs\n"; - int j = 0; - for(; j<cols4; j+=4) + if ( product._rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + && product._cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + && product.m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + ) { - for(int k=0; k<this->cols(); ++k) - { - const Scalar tmp0 = m_rhs.coeff(j ,k); - const Scalar tmp1 = m_rhs.coeff(j+1,k); - const Scalar tmp2 = m_rhs.coeff(j+2,k); - const Scalar tmp3 = m_rhs.coeff(j+3,k); - for (int i=0; i<this->rows(); ++i) - res.coeffRef(i,k) += tmp0 * m_lhs.coeff(i,j+0) + tmp1 * m_lhs.coeff(i,j+1) - + tmp2 * m_lhs.coeff(i,j+2) + tmp3 * m_lhs.coeff(i,j+3); - } + ei_cache_friendly_product<Scalar>( + product._rows(), product._cols(), product.m_lhs.cols(), + _LhsNested::Flags&RowMajorBit, &(product.m_lhs.const_cast_derived().coeffRef(0,0)), product.m_lhs.stride(), + _RhsNested::Flags&RowMajorBit, &(product.m_rhs.const_cast_derived().coeffRef(0,0)), product.m_rhs.stride(), + Prod::Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride() + ); } - for(; j<m_lhs.cols(); ++j) + else { - for(int k=0; k<this->cols(); ++k) - { - const Scalar tmp = m_rhs.coeff(j,k); - for (int i=0; i<this->rows(); ++i) - res.coeffRef(i,k) += tmp * m_lhs.coeff(i,j); - } + res += Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy(); } } -} +}; -#ifdef EIGEN_VECTORIZE -template<typename Lhs, typename Rhs, int EvalMode> -template<typename DestDerived, int AlignedMode> -void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_true) const +template<typename Lhs, typename Rhs, int EvalMode, typename DestDerived> +struct ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived,false> { - - if (((Lhs::Flags&RowMajorBit) && (_cols() % ei_packet_traits<Scalar>::size != 0)) - || (_rows() % ei_packet_traits<Scalar>::size != 0)) + typedef Product<Lhs,Rhs,EvalMode> Prod; + typedef typename Prod::_LhsNested _LhsNested; + typedef typename Prod::_RhsNested _RhsNested; + typedef typename Prod::Scalar Scalar; + static inline void eval(const Prod& product, DestDerived& res) { - return _cacheOptimalEval<DestDerived, AlignedMode>(res, ei_meta_false()); + res = Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy(); } - res.setZero(); - const int cols4 = m_lhs.cols() & 0xfffffffC; - if (Lhs::Flags&RowMajorBit) - { -// std::cout << "packet rhs\n"; - int j=0; - for(; j<cols4; j+=4) - { - for(int k=0; k<this->rows(); k++) - { - const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_lhs.coeff(k,j+0)); - const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_lhs.coeff(k,j+1)); - const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_lhs.coeff(k,j+2)); - const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_lhs.coeff(k,j+3)); - for (int i=0; i<this->cols(); i+=ei_packet_traits<Scalar>::size) - { - res.template writePacketCoeff<AlignedMode>(k,i, - ei_pmadd(tmp0, m_rhs.template packetCoeff<AlignedMode>(j+0,i), - ei_pmadd(tmp1, m_rhs.template packetCoeff<AlignedMode>(j+1,i), - ei_pmadd(tmp2, m_rhs.template packetCoeff<AlignedMode>(j+2,i), - ei_pmadd(tmp3, m_rhs.template packetCoeff<AlignedMode>(j+3,i), - res.template packetCoeff<AlignedMode>(k,i))))) - ); - } - } - } - for(; j<m_lhs.cols(); ++j) - { - for(int k=0; k<this->rows(); k++) - { - const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_lhs.coeff(k,j)); - for (int i=0; i<this->cols(); i+=ei_packet_traits<Scalar>::size) - res.template writePacketCoeff<AlignedMode>(k,i, - ei_pmadd(tmp, m_rhs.template packetCoeff<AlignedMode>(j,i), res.template packetCoeff<AlignedMode>(k,i))); - } - } - } - else + static inline void eval_and_add(const Prod& product, DestDerived& res) { -// std::cout << "packet lhs\n"; - int k=0; - for(; k<cols4; k+=4) - { - for(int j=0; j<this->cols(); j+=1) - { - const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_rhs.coeff(k+0,j)); - const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_rhs.coeff(k+1,j)); - const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_rhs.coeff(k+2,j)); - const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_rhs.coeff(k+3,j)); - - for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size) - { - res.template writePacketCoeff<AlignedMode>(i,j, - ei_pmadd(tmp0, m_lhs.template packetCoeff<AlignedMode>(i,k), - ei_pmadd(tmp1, m_lhs.template packetCoeff<AlignedMode>(i,k+1), - ei_pmadd(tmp2, m_lhs.template packetCoeff<AlignedMode>(i,k+2), - ei_pmadd(tmp3, m_lhs.template packetCoeff<AlignedMode>(i,k+3), - res.template packetCoeff<AlignedMode>(i,j))))) - ); - } - } - } - for(; k<m_lhs.cols(); ++k) - { - for(int j=0; j<this->cols(); j++) - { - const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_rhs.coeff(k,j)); - for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size) - res.template writePacketCoeff<AlignedMode>(k,j, - ei_pmadd(tmp, m_lhs.template packetCoeff<AlignedMode>(i,k), res.template packetCoeff<AlignedMode>(i,j))); - } - } + res += Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy(); } +}; + +template<typename Lhs, typename Rhs, int EvalMode> +template<typename DestDerived> +inline void Product<Lhs,Rhs,EvalMode>::_cacheFriendlyEval(DestDerived& res) const +{ + ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived, + _LhsNested::Flags&_RhsNested::Flags&DirectAccessBit> + ::eval(*this, res); +} + +template<typename Lhs, typename Rhs, int EvalMode> +template<typename DestDerived> +inline void Product<Lhs,Rhs,EvalMode>::_cacheFriendlyEvalAndAdd(DestDerived& res) const +{ + ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived, + _LhsNested::Flags&_RhsNested::Flags&DirectAccessBit> + ::eval_and_add(*this, res); } -#endif // EIGEN_VECTORIZE #endif // EIGEN_PRODUCT_H diff --git a/Eigen/src/Core/ProductWIP.h b/Eigen/src/Core/ProductWIP.h deleted file mode 100644 index d1bc86a13..000000000 --- a/Eigen/src/Core/ProductWIP.h +++ /dev/null @@ -1,471 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr> -// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr> -// -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see <http://www.gnu.org/licenses/>. - -#ifndef EIGEN_PRODUCT_H -#define EIGEN_PRODUCT_H - -#include "CacheFriendlyProduct.h" - -template<int Index, int Size, typename Lhs, typename Rhs> -struct ei_product_unroller -{ - inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, - typename Lhs::Scalar &res) - { - ei_product_unroller<Index-1, Size, Lhs, Rhs>::run(row, col, lhs, rhs, res); - res += lhs.coeff(row, Index) * rhs.coeff(Index, col); - } -}; - -template<int Size, typename Lhs, typename Rhs> -struct ei_product_unroller<0, Size, Lhs, Rhs> -{ - inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, - typename Lhs::Scalar &res) - { - res = lhs.coeff(row, 0) * rhs.coeff(0, col); - } -}; - -template<int Index, typename Lhs, typename Rhs> -struct ei_product_unroller<Index, Dynamic, Lhs, Rhs> -{ - inline static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {} -}; - -// prevent buggy user code from causing an infinite recursion -template<int Index, typename Lhs, typename Rhs> -struct ei_product_unroller<Index, 0, Lhs, Rhs> -{ - inline static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {} -}; - -template<bool RowMajor, int Index, int Size, typename Lhs, typename Rhs, typename PacketScalar> -struct ei_packet_product_unroller; - -template<int Index, int Size, typename Lhs, typename Rhs, typename PacketScalar> -struct ei_packet_product_unroller<true, Index, Size, Lhs, Rhs, PacketScalar> -{ - inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) - { - ei_packet_product_unroller<true, Index-1, Size, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, res); - res = ei_pmadd(ei_pset1(lhs.coeff(row, Index)), rhs.template packetCoeff<Aligned>(Index, col), res); - } -}; - -template<int Index, int Size, typename Lhs, typename Rhs, typename PacketScalar> -struct ei_packet_product_unroller<false, Index, Size, Lhs, Rhs, PacketScalar> -{ - inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) - { - ei_packet_product_unroller<false, Index-1, Size, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, res); - res = ei_pmadd(lhs.template packetCoeff<Aligned>(row, Index), ei_pset1(rhs.coeff(Index, col)), res); - } -}; - -template<int Size, typename Lhs, typename Rhs, typename PacketScalar> -struct ei_packet_product_unroller<true, 0, Size, Lhs, Rhs, PacketScalar> -{ - inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) - { - res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packetCoeff<Aligned>(0, col)); - } -}; - -template<int Size, typename Lhs, typename Rhs, typename PacketScalar> -struct ei_packet_product_unroller<false, 0, Size, Lhs, Rhs, PacketScalar> -{ - inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) - { - res = ei_pmul(lhs.template packetCoeff<Aligned>(row, 0), ei_pset1(rhs.coeff(0, col))); - } -}; - -template<bool RowMajor, int Index, typename Lhs, typename Rhs, typename PacketScalar> -struct ei_packet_product_unroller<RowMajor, Index, Dynamic, Lhs, Rhs, PacketScalar> -{ - inline static void run(int, int, const Lhs&, const Rhs&, PacketScalar&) {} -}; - -template<int Index, typename Lhs, typename Rhs, typename PacketScalar> -struct ei_packet_product_unroller<false, Index, Dynamic, Lhs, Rhs, PacketScalar> -{ - inline static void run(int, int, const Lhs&, const Rhs&, PacketScalar&) {} -}; - -template<typename Product, bool RowMajor = true> struct ProductPacketCoeffImpl { - inline static typename Product::PacketScalar execute(const Product& product, int row, int col) - { return product._packetCoeffRowMajor(row,col); } -}; - -template<typename Product> struct ProductPacketCoeffImpl<Product, false> { - inline static typename Product::PacketScalar execute(const Product& product, int row, int col) - { return product._packetCoeffColumnMajor(row,col); } -}; - -/** \class Product - * - * \brief Expression of the product of two matrices - * - * \param Lhs the type of the left-hand side - * \param Rhs the type of the right-hand side - * \param EvalMode internal use only - * - * This class represents an expression of the product of two matrices. - * It is the return type of the operator* between matrices, and most of the time - * this is the only way it is used. - * - * \sa class Sum, class Difference - */ -template<typename Lhs, typename Rhs> struct ei_product_eval_mode -{ - enum{ value = Lhs::MaxRowsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD - && Rhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD - && Lhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD - ? CacheFriendlyProduct : NormalProduct }; -}; - -template<typename T> class ei_product_eval_to_column_major -{ - typedef typename ei_traits<T>::Scalar _Scalar; - enum {_MaxRows = ei_traits<T>::MaxRowsAtCompileTime, - _MaxCols = ei_traits<T>::MaxColsAtCompileTime, - _Flags = ei_traits<T>::Flags - }; - - public: - typedef Matrix<_Scalar, - ei_traits<T>::RowsAtCompileTime, - ei_traits<T>::ColsAtCompileTime, - ei_corrected_matrix_flags<_Scalar, ei_size_at_compile_time<_MaxRows,_MaxCols>::ret, _Flags>::ret & ~RowMajorBit, - ei_traits<T>::MaxRowsAtCompileTime, - ei_traits<T>::MaxColsAtCompileTime> type; -}; - -template<typename T, int n=1> struct ei_product_nested_rhs -{ - typedef typename ei_meta_if< - (ei_traits<T>::Flags & NestByValueBit) && (!(ei_traits<T>::Flags & RowMajorBit)) && (int(ei_traits<T>::Flags) & DirectAccessBit), - T, - typename ei_meta_if< - ((ei_traits<T>::Flags & EvalBeforeNestingBit) - || (ei_traits<T>::Flags & RowMajorBit) - || (!(ei_traits<T>::Flags & DirectAccessBit)) - || (n+1) * (NumTraits<typename ei_traits<T>::Scalar>::ReadCost) < (n-1) * T::CoeffReadCost), - typename ei_product_eval_to_column_major<T>::type, - const T& - >::ret - >::ret type; -}; - -template<typename T, int n=1> struct ei_product_nested_lhs -{ - typedef typename ei_meta_if< - ei_traits<T>::Flags & NestByValueBit && (int(ei_traits<T>::Flags) & DirectAccessBit), - T, - typename ei_meta_if< - int(ei_traits<T>::Flags) & EvalBeforeNestingBit - || (!(int(ei_traits<T>::Flags) & DirectAccessBit)) - || (n+1) * int(NumTraits<typename ei_traits<T>::Scalar>::ReadCost) < (n-1) * int(T::CoeffReadCost), - typename ei_eval<T>::type, - const T& - >::ret - >::ret type; -}; - -template<typename Lhs, typename Rhs, int EvalMode> -struct ei_traits<Product<Lhs, Rhs, EvalMode> > -{ - typedef typename Lhs::Scalar Scalar; - // the cache friendly product evals lhs once only - // FIXME what to do if we chose to dynamically call the normal product from the cache friendly one for small matrices ? - typedef typename ei_meta_if<EvalMode==CacheFriendlyProduct, - typename ei_product_nested_lhs<Lhs,1>::type, - typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type>::ret LhsNested; - - // NOTE that rhs must be ColumnMajor, so we might need a special nested type calculation - typedef typename ei_meta_if<EvalMode==CacheFriendlyProduct, - typename ei_product_nested_rhs<Rhs,Lhs::RowsAtCompileTime>::type, - typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type>::ret RhsNested; - typedef typename ei_unconst<typename ei_unref<LhsNested>::type>::type _LhsNested; - typedef typename ei_unconst<typename ei_unref<RhsNested>::type>::type _RhsNested; - enum { - LhsCoeffReadCost = _LhsNested::CoeffReadCost, - RhsCoeffReadCost = _RhsNested::CoeffReadCost, - LhsFlags = _LhsNested::Flags, - RhsFlags = _RhsNested::Flags, - RowsAtCompileTime = Lhs::RowsAtCompileTime, - ColsAtCompileTime = Rhs::ColsAtCompileTime, - MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime, - MaxColsAtCompileTime = Rhs::MaxColsAtCompileTime, - // the vectorization flags are only used by the normal product, - // the other one is always vectorized ! - _RhsVectorizable = (RhsFlags & RowMajorBit) && (RhsFlags & VectorizableBit) && (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0), - _LhsVectorizable = (!(LhsFlags & RowMajorBit)) && (LhsFlags & VectorizableBit) && (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0), - _Vectorizable = (_LhsVectorizable || _RhsVectorizable) ? 1 : 0, - _RowMajor = (RhsFlags & RowMajorBit) - && (EvalMode==(int)CacheFriendlyProduct ? (int)LhsFlags & RowMajorBit : (!_LhsVectorizable)), - _LostBits = HereditaryBits & ~( - (_RowMajor ? 0 : RowMajorBit) - | ((RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) ? 0 : LargeBit)), - Flags = ((unsigned int)(LhsFlags | RhsFlags) & _LostBits) - | EvalBeforeAssigningBit - | EvalBeforeNestingBit - | (_Vectorizable ? VectorizableBit : 0), - CoeffReadCost - = Lhs::ColsAtCompileTime == Dynamic - ? Dynamic - : Lhs::ColsAtCompileTime - * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) - + (Lhs::ColsAtCompileTime - 1) * NumTraits<Scalar>::AddCost - }; -}; - -template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignment_operator, - public MatrixBase<Product<Lhs, Rhs, EvalMode> > -{ - public: - - EIGEN_GENERIC_PUBLIC_INTERFACE(Product) - friend class ProductPacketCoeffImpl<Product,Flags&RowMajorBit>; - typedef typename ei_traits<Product>::LhsNested LhsNested; - typedef typename ei_traits<Product>::RhsNested RhsNested; - typedef typename ei_traits<Product>::_LhsNested _LhsNested; - typedef typename ei_traits<Product>::_RhsNested _RhsNested; - - enum { - PacketSize = ei_packet_traits<Scalar>::size - }; - - inline Product(const Lhs& lhs, const Rhs& rhs) - : m_lhs(lhs), m_rhs(rhs) - { - ei_assert(lhs.cols() == rhs.rows()); - } - - /** \internal */ - template<typename DestDerived> - void _cacheFriendlyEval(DestDerived& res) const; - - /** \internal */ - template<typename DestDerived> - void _cacheFriendlyEvalAndAdd(DestDerived& res) const; - - private: - - inline int _rows() const { return m_lhs.rows(); } - inline int _cols() const { return m_rhs.cols(); } - - const Scalar _coeff(int row, int col) const - { - Scalar res; - const bool unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT; - if(unroll) - { - ei_product_unroller<Lhs::ColsAtCompileTime-1, - unroll ? Lhs::ColsAtCompileTime : Dynamic, - _LhsNested, _RhsNested> - ::run(row, col, m_lhs, m_rhs, res); - } - else - { - res = m_lhs.coeff(row, 0) * m_rhs.coeff(0, col); - for(int i = 1; i < m_lhs.cols(); i++) - res += m_lhs.coeff(row, i) * m_rhs.coeff(i, col); - } - return res; - } - - template<int LoadMode> - const PacketScalar _packetCoeff(int row, int col) const - { - if(Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT) - { - PacketScalar res; - ei_packet_product_unroller<Flags&RowMajorBit ? true : false, Lhs::ColsAtCompileTime-1, - Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT - ? Lhs::ColsAtCompileTime : Dynamic, - _LhsNested, _RhsNested, PacketScalar> - ::run(row, col, m_lhs, m_rhs, res); - return res; - } - else - return ProductPacketCoeffImpl<Product,Flags&RowMajorBit>::execute(*this, row, col); - } - - const PacketScalar _packetCoeffRowMajor(int row, int col) const - { - PacketScalar res; - res = ei_pmul(ei_pset1(m_lhs.coeff(row, 0)),m_rhs.template packetCoeff<Aligned>(0, col)); - for(int i = 1; i < m_lhs.cols(); i++) - res = ei_pmadd(ei_pset1(m_lhs.coeff(row, i)), m_rhs.template packetCoeff<Aligned>(i, col), res); - return res; - } - - const PacketScalar _packetCoeffColumnMajor(int row, int col) const - { - PacketScalar res; - res = ei_pmul(m_lhs.template packetCoeff<Aligned>(row, 0), ei_pset1(m_rhs.coeff(0, col))); - for(int i = 1; i < m_lhs.cols(); i++) - res = ei_pmadd(m_lhs.template packetCoeff<Aligned>(row, i), ei_pset1(m_rhs.coeff(i, col)), res); - return res; - } - - template<typename Lhs_, typename Rhs_, int EvalMode_, typename DestDerived_, bool DirectAccess_> - friend struct ei_cache_friendly_selector; - - protected: - const LhsNested m_lhs; - const RhsNested m_rhs; -}; - -/** \returns the matrix product of \c *this and \a other. - * - * \sa lazy(), operator*=(const MatrixBase&) - */ -template<typename Derived> -template<typename OtherDerived> -inline const Product<Derived,OtherDerived> -MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const -{ - return Product<Derived,OtherDerived>(derived(), other.derived()); -} - -/** replaces \c *this by \c *this * \a other. - * - * \returns a reference to \c *this - */ -template<typename Derived> -template<typename OtherDerived> -inline Derived & -MatrixBase<Derived>::operator*=(const MatrixBase<OtherDerived> &other) -{ - return *this = *this * other; -} - -/** \internal */ -template<typename Derived> -template<typename Lhs,typename Rhs> -inline Derived& -MatrixBase<Derived>::operator+=(const Flagged<Product<Lhs,Rhs,CacheFriendlyProduct>, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit>& other) -{ - other._expression()._cacheFriendlyEvalAndAdd(const_cast_derived()); - return derived(); -} - -template<typename Derived> -template<typename Lhs, typename Rhs> -inline Derived& MatrixBase<Derived>::lazyAssign(const Product<Lhs,Rhs,CacheFriendlyProduct>& product) -{ - product._cacheFriendlyEval(derived()); - return derived(); -} - -template<typename Lhs, typename Rhs, int EvalMode, typename DestDerived, bool DirectAccess> -struct ei_cache_friendly_selector -{ - typedef Product<Lhs,Rhs,EvalMode> Prod; - typedef typename Prod::_LhsNested _LhsNested; - typedef typename Prod::_RhsNested _RhsNested; - typedef typename Prod::Scalar Scalar; - static inline void eval(const Prod& product, DestDerived& res) - { - if ( product._rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD - && product._cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD - && product.m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD - ) - { - res.setZero(); - ei_cache_friendly_product<Scalar>( - product._rows(), product._cols(), product.m_lhs.cols(), - _LhsNested::Flags&RowMajorBit, &(product.m_lhs.const_cast_derived().coeffRef(0,0)), product.m_lhs.stride(), - _RhsNested::Flags&RowMajorBit, &(product.m_rhs.const_cast_derived().coeffRef(0,0)), product.m_rhs.stride(), - Prod::Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride() - ); - } - else - { - res = Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy(); - } - } - - static inline void eval_and_add(const Prod& product, DestDerived& res) - { - if ( product._rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD - && product._cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD - && product.m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD - ) - { - ei_cache_friendly_product<Scalar>( - product._rows(), product._cols(), product.m_lhs.cols(), - _LhsNested::Flags&RowMajorBit, &(product.m_lhs.const_cast_derived().coeffRef(0,0)), product.m_lhs.stride(), - _RhsNested::Flags&RowMajorBit, &(product.m_rhs.const_cast_derived().coeffRef(0,0)), product.m_rhs.stride(), - Prod::Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride() - ); - } - else - { - res += Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy(); - } - } -}; - -template<typename Lhs, typename Rhs, int EvalMode, typename DestDerived> -struct ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived,false> -{ - typedef Product<Lhs,Rhs,EvalMode> Prod; - typedef typename Prod::_LhsNested _LhsNested; - typedef typename Prod::_RhsNested _RhsNested; - typedef typename Prod::Scalar Scalar; - static inline void eval(const Prod& product, DestDerived& res) - { - res = Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy(); - } - - static inline void eval_and_add(const Prod& product, DestDerived& res) - { - res += Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy(); - } -}; - -template<typename Lhs, typename Rhs, int EvalMode> -template<typename DestDerived> -inline void Product<Lhs,Rhs,EvalMode>::_cacheFriendlyEval(DestDerived& res) const -{ - ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived, - _LhsNested::Flags&_RhsNested::Flags&DirectAccessBit> - ::eval(*this, res); -} - -template<typename Lhs, typename Rhs, int EvalMode> -template<typename DestDerived> -inline void Product<Lhs,Rhs,EvalMode>::_cacheFriendlyEvalAndAdd(DestDerived& res) const -{ - ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived, - _LhsNested::Flags&_RhsNested::Flags&DirectAccessBit> - ::eval_and_add(*this, res); -} - -#endif // EIGEN_PRODUCT_H diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h index fd28f4bab..c536a4608 100644 --- a/Eigen/src/Core/Transpose.h +++ b/Eigen/src/Core/Transpose.h @@ -107,7 +107,7 @@ template<typename Derived> inline Transpose<Derived> MatrixBase<Derived>::transpose() { - return Transpose<Derived>(derived()); + return derived(); } /** This is the const version of transpose(). \sa adjoint() */ @@ -115,7 +115,7 @@ template<typename Derived> inline const Transpose<Derived> MatrixBase<Derived>::transpose() const { - return Transpose<Derived>(derived()); + return derived(); } /** \returns an expression of the adjoint (i.e. conjugate transpose) of *this. @@ -130,7 +130,7 @@ inline const Transpose< , NestByValueBit, 0> > MatrixBase<Derived>::adjoint() const { - return conjugate().temporary().transpose(); + return conjugate().nestByValue(); } #endif // EIGEN_TRANSPOSE_H diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index e599d8a3d..0e6ed4b21 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -53,16 +53,16 @@ const unsigned int HereditaryBits = RowMajorBit | EvalBeforeAssigningBit | LargeBit; -// Possible values for the PartType parameter of part() and the ExtractType parameter of extract() +// Possible values for the Mode parameter of part() and of extract() const unsigned int Upper = UpperTriangularBit; const unsigned int StrictlyUpper = UpperTriangularBit | ZeroDiagBit; const unsigned int Lower = LowerTriangularBit; const unsigned int StrictlyLower = LowerTriangularBit | ZeroDiagBit; -// additional possible values for the PartType parameter of part() +// additional possible values for the Mode parameter of part() const unsigned int SelfAdjoint = SelfAdjointBit; -// additional possible values for the ExtractType parameter of extract() +// additional possible values for the Mode parameter of extract() const unsigned int UnitUpper = UpperTriangularBit | UnitDiagBit; const unsigned int UnitLower = LowerTriangularBit | UnitDiagBit; |