diff options
Diffstat (limited to 'Eigen/src/Core/Product.h')
-rw-r--r-- | Eigen/src/Core/Product.h | 478 |
1 files changed, 296 insertions, 182 deletions
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index def183b38..f03ea4e8e 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -26,226 +26,175 @@ #ifndef EIGEN_PRODUCT_H #define EIGEN_PRODUCT_H -template<int Index, int Size, typename Lhs, typename Rhs> -struct ei_product_impl -{ - inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, - typename Lhs::Scalar &res) - { - ei_product_impl<Index-1, Size, Lhs, Rhs>::run(row, col, lhs, rhs, res); - res += lhs.coeff(row, Index) * rhs.coeff(Index, col); - } -}; +/*************************** +*** Forward declarations *** +***************************/ -template<int Size, typename Lhs, typename Rhs> -struct ei_product_impl<0, Size, Lhs, Rhs> -{ - inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, - typename Lhs::Scalar &res) - { - res = lhs.coeff(row, 0) * rhs.coeff(0, col); - } +enum { + ColMajorProduct, + RowMajorProduct }; -template<typename Lhs, typename Rhs> -struct ei_product_impl<Dynamic, Dynamic, Lhs, Rhs> -{ - inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar& res) - { - res = lhs.coeff(row, 0) * rhs.coeff(0, col); - for(int i = 1; i < lhs.cols(); i++) - res += lhs.coeff(row, i) * rhs.coeff(i, col); - } -}; - -// prevent buggy user code from causing an infinite recursion -template<int Index, typename Lhs, typename Rhs> -struct ei_product_impl<Index, 0, Lhs, Rhs> -{ - inline static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {} -}; +template<int VectorizationMode, int Index, typename Lhs, typename Rhs> +struct ei_product_coeff_impl; -//---------- +template<int StorageOrder, int Index, typename Lhs, typename Rhs, typename PacketScalar> +struct ei_product_packet_impl; -template<bool RowMajor, int Index, int Size, typename Lhs, typename Rhs, typename PacketScalar> -struct ei_packet_product_impl; +template<typename T> class ei_product_eval_to_column_major; -template<int Index, int Size, typename Lhs, typename Rhs, typename PacketScalar> -struct ei_packet_product_impl<true, Index, Size, Lhs, Rhs, PacketScalar> +/** \class ProductReturnType + * + * \brief Helper class to get the correct and optimized returned type of operator* + * + * \param Lhs the type of the left-hand side + * \param Rhs the type of the right-hand side + * \param ProductMode the type of the product (determined automatically by ei_product_mode) + * + * This class defines the typename Type representing the optimized product expression + * between two matrix expressions. In practice, using ProductReturnType<Lhs,Rhs>::Type + * is the recommended way to define the result type of a function returning an expression + * which involve a matrix product. The class Product or DiagonalProduct should never be + * used directly. + * + * \sa class Product, class DiagonalProduct, MatrixBase::operator*(const MatrixBase<OtherDerived>&) + */ +template<typename Lhs, typename Rhs, int ProductMode> +struct ProductReturnType { - inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) - { - ei_packet_product_impl<true, Index-1, Size, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, res); - res = ei_pmadd(ei_pset1(lhs.coeff(row, Index)), rhs.template packet<Aligned>(Index, col), res); - } -}; + typedef typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested; + typedef typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested; -template<int Index, int Size, typename Lhs, typename Rhs, typename PacketScalar> -struct ei_packet_product_impl<false, Index, Size, Lhs, Rhs, PacketScalar> -{ - inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) - { - ei_packet_product_impl<false, Index-1, Size, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, res); - res = ei_pmadd(lhs.template packet<Aligned>(row, Index), ei_pset1(rhs.coeff(Index, col)), res); - } + typedef Product<typename ei_unconst<LhsNested>::type, + typename ei_unconst<RhsNested>::type, ProductMode> Type; }; -template<int Size, typename Lhs, typename Rhs, typename PacketScalar> -struct ei_packet_product_impl<true, 0, Size, Lhs, Rhs, PacketScalar> +// cache friendly specialization +template<typename Lhs, typename Rhs> +struct ProductReturnType<Lhs,Rhs,CacheFriendlyProduct> { - inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) - { - res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet<Aligned>(0, col)); - } -}; + typedef typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested; -template<int Size, typename Lhs, typename Rhs, typename PacketScalar> -struct ei_packet_product_impl<false, 0, Size, Lhs, Rhs, PacketScalar> -{ - inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) - { - res = ei_pmul(lhs.template packet<Aligned>(row, 0), ei_pset1(rhs.coeff(0, col))); - } -}; + typedef typename ei_nested<Rhs,Lhs::RowsAtCompileTime, + typename ei_product_eval_to_column_major<Rhs>::type + >::type RhsNested; -template<bool RowMajor, int Index, typename Lhs, typename Rhs, typename PacketScalar> -struct ei_packet_product_impl<RowMajor, Index, Dynamic, Lhs, Rhs, PacketScalar> -{ - inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res) - { - res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet<Aligned>(0, col)); - for(int i = 1; i < lhs.cols(); i++) - res = ei_pmadd(ei_pset1(lhs.coeff(row, i)), rhs.template packet<Aligned>(i, col), res); - } + typedef Product<typename ei_unconst<LhsNested>::type, + typename ei_unconst<RhsNested>::type, CacheFriendlyProduct> Type; }; -template<int Index, typename Lhs, typename Rhs, typename PacketScalar> -struct ei_packet_product_impl<false, Index, Dynamic, Lhs, Rhs, PacketScalar> +/* Helper class to determine the type of the product, can be either: + * - NormalProduct + * - CacheFriendlyProduct + * - NormalProduct + */ +template<typename Lhs, typename Rhs> struct ei_product_mode { - inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res) - { - res = ei_pmul(lhs.template packet<Aligned>(row, 0), ei_pset1(rhs.coeff(0, col))); - for(int i = 1; i < lhs.cols(); i++) - res = ei_pmadd(lhs.template packet<Aligned>(row, i), ei_pset1(rhs.coeff(i, col)), res); - } + enum{ value = ((Rhs::Flags&Diagonal)==Diagonal) || ((Lhs::Flags&Diagonal)==Diagonal) + ? DiagonalProduct + : Lhs::MaxRowsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + && Rhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + && Lhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + ? CacheFriendlyProduct : NormalProduct }; }; /** \class Product * * \brief Expression of the product of two matrices * - * \param Lhs the type of the left-hand side - * \param Rhs the type of the right-hand side - * \param EvalMode internal use only + * \param LhsNested the type used to store the left-hand side + * \param RhsNested the type used to store the right-hand side + * \param ProductMode the type of the product * * This class represents an expression of the product of two matrices. - * It is the return type of the operator* between matrices, and most of the time - * this is the only way it is used. + * It is the return type of the operator* between matrices. Its template + * arguments are determined automatically by ProductReturnType. Therefore, + * Product should be used direclty. To determine the result type of a function + * which involve a matrix product, use ProductReturnType::Type. + * + * \sa ProductReturnType, MatrixBase::operator*(const MatrixBase<OtherDerived>&) */ -template<typename Lhs, typename Rhs> struct ei_product_eval_mode -{ - enum{ value = ((Rhs::Flags&Diagonal)==Diagonal) || ((Lhs::Flags&Diagonal)==Diagonal) - ? DiagonalProduct - : Lhs::MaxRowsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD - && Rhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD - && Lhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD - ? CacheFriendlyProduct : NormalProduct }; -}; - -template<typename T> class ei_product_eval_to_column_major -{ - typedef typename ei_traits<T>::Scalar _Scalar; - enum { - _Rows = ei_traits<T>::RowsAtCompileTime, - _Cols = ei_traits<T>::ColsAtCompileTime, - _MaxRows = ei_traits<T>::MaxRowsAtCompileTime, - _MaxCols = ei_traits<T>::MaxColsAtCompileTime, - _Flags = ei_traits<T>::Flags - }; - - public: - typedef Matrix<_Scalar, - _Rows, _Cols, _MaxRows, _MaxCols, - ei_corrected_matrix_flags< - _Scalar, - _Rows, _Cols, _MaxRows, _MaxCols, - _Flags - >::ret & ~RowMajorBit - > type; -}; - -// as ei_nested, but evaluate to a column-major matrix if an evaluation is required -template<typename T, int n=1> struct ei_product_nested_rhs -{ - typedef typename ei_meta_if< - ei_must_nest_by_value<T>::ret, - T, - typename ei_meta_if< - ((ei_traits<T>::Flags & EvalBeforeNestingBit) - || (n+1) * (NumTraits<typename ei_traits<T>::Scalar>::ReadCost) < (n-1) * T::CoeffReadCost), - typename ei_product_eval_to_column_major<T>::type, - const T& - >::ret - >::ret type; -}; - -template<typename Lhs, typename Rhs, int EvalMode> -struct ei_traits<Product<Lhs, Rhs, EvalMode> > +template<typename LhsNested, typename RhsNested, int ProductMode> +struct ei_traits<Product<LhsNested, RhsNested, ProductMode> > { - typedef typename Lhs::Scalar Scalar; - typedef typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested; - typedef typename ei_meta_if<EvalMode==CacheFriendlyProduct, - typename ei_product_nested_rhs<Rhs,Lhs::RowsAtCompileTime>::type, - typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type>::ret RhsNested; + // clean the nested types: typedef typename ei_unconst<typename ei_unref<LhsNested>::type>::type _LhsNested; typedef typename ei_unconst<typename ei_unref<RhsNested>::type>::type _RhsNested; + typedef typename _LhsNested::Scalar Scalar; + enum { LhsCoeffReadCost = _LhsNested::CoeffReadCost, RhsCoeffReadCost = _RhsNested::CoeffReadCost, LhsFlags = _LhsNested::Flags, RhsFlags = _RhsNested::Flags, - RowsAtCompileTime = Lhs::RowsAtCompileTime, - ColsAtCompileTime = Rhs::ColsAtCompileTime, - MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime, - MaxColsAtCompileTime = Rhs::MaxColsAtCompileTime, - // the vectorization flags are only used by the normal product, - // the other one is always vectorized ! - _RhsPacketAccess = (RhsFlags & RowMajorBit) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0), - _LhsPacketAccess = (!(LhsFlags & RowMajorBit)) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0), - _PacketAccess = (_LhsPacketAccess || _RhsPacketAccess) ? 1 : 0, - _RowMajor = (RhsFlags & RowMajorBit) - && (EvalMode==(int)CacheFriendlyProduct ? (int)LhsFlags & RowMajorBit : (!_LhsPacketAccess)), - _LostBits = ~((_RowMajor ? 0 : RowMajorBit) + + RowsAtCompileTime = _LhsNested::RowsAtCompileTime, + ColsAtCompileTime = _RhsNested::ColsAtCompileTime, + InnerSize = EIGEN_ENUM_MIN(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime), + + MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime, + MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime, + + LhsRowMajor = LhsFlags & RowMajorBit, + RhsRowMajor = RhsFlags & RowMajorBit, + + CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) + && (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0), + + CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) + && (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0), + + CanVectorizeInner = LhsRowMajor && (!RhsRowMajor) && (LhsFlags & PacketAccessBit) && (RhsFlags & PacketAccessBit) + && (InnerSize!=Dynamic) && (InnerSize % ei_packet_traits<Scalar>::size == 0), + + EvalToRowMajor = (RhsFlags & RowMajorBit) + && (ProductMode==(int)CacheFriendlyProduct ? (int)LhsFlags & RowMajorBit : (!CanVectorizeLhs)), + + RemovedBits = ~((EvalToRowMajor ? 0 : RowMajorBit) | ((RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) ? 0 : LargeBit) | LinearAccessBit), - Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & _LostBits) + + Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & RemovedBits) | EvalBeforeAssigningBit | EvalBeforeNestingBit - | (_PacketAccess ? PacketAccessBit : 0), - CoeffReadCost - = Lhs::ColsAtCompileTime == Dynamic - ? Dynamic - : Lhs::ColsAtCompileTime - * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) - + (Lhs::ColsAtCompileTime - 1) * NumTraits<Scalar>::AddCost + | (CanVectorizeLhs || CanVectorizeRhs ? PacketAccessBit : 0), + + CoeffReadCost = InnerSize == Dynamic ? Dynamic + : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) + + (InnerSize - 1) * NumTraits<Scalar>::AddCost }; }; -template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignment_operator, - public MatrixBase<Product<Lhs, Rhs, EvalMode> > +template<typename LhsNested, typename RhsNested, int ProductMode> class Product : ei_no_assignment_operator, + public MatrixBase<Product<LhsNested, RhsNested, ProductMode> > { public: EIGEN_GENERIC_PUBLIC_INTERFACE(Product) - typedef typename ei_traits<Product>::LhsNested LhsNested; - typedef typename ei_traits<Product>::RhsNested RhsNested; + + private: + typedef typename ei_traits<Product>::_LhsNested _LhsNested; typedef typename ei_traits<Product>::_RhsNested _RhsNested; enum { - PacketSize = ei_packet_traits<Scalar>::size + PacketSize = ei_packet_traits<Scalar>::size, + InnerSize = ei_traits<Product>::InnerSize, + Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT, + CanVectorizeInner = ei_traits<Product>::CanVectorizeInner && Unroll }; + typedef ei_product_coeff_impl<CanVectorizeInner ? InnerVectorization : NoVectorization, + Unroll ? InnerSize-1 : Dynamic, + _LhsNested, _RhsNested> ScalarCoeffImpl; + + typedef ei_product_packet_impl<Flags&RowMajorBit ? RowMajorProduct : ColMajorProduct, + Unroll ? InnerSize-1 : Dynamic, + _LhsNested, _RhsNested, PacketScalar> PacketCoeffImpl; + + public: + + template<typename Lhs, typename Rhs> inline Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) { @@ -268,23 +217,15 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm const Scalar _coeff(int row, int col) const { Scalar res; - const bool unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT; - ei_product_impl<unroll ? Lhs::ColsAtCompileTime-1 : Dynamic, - unroll ? Lhs::ColsAtCompileTime : Dynamic, - _LhsNested, _RhsNested> - ::run(row, col, m_lhs, m_rhs, res); + ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); return res; } template<int LoadMode> const PacketScalar _packet(int row, int col) const { - const bool unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT; PacketScalar res; - ei_packet_product_impl<Flags&RowMajorBit ? true : false, Lhs::ColsAtCompileTime-1, - unroll ? Lhs::ColsAtCompileTime : Dynamic, - _LhsNested, _RhsNested, PacketScalar> - ::run(row, col, m_lhs, m_rhs, res); + PacketCoeffImpl::run(row, col, m_lhs, m_rhs, res); return res; } @@ -302,11 +243,11 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm */ template<typename Derived> template<typename OtherDerived> -inline const typename MatrixBase<Derived>::template ProductReturnType<OtherDerived>::Type +inline const typename ProductReturnType<Derived,OtherDerived>::Type MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const { assert( (Derived::Flags&ArrayBit) == (OtherDerived::Flags&ArrayBit) ); - return typename ProductReturnType<OtherDerived>::Type(derived(), other.derived()); + return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived()); } /** replaces \c *this by \c *this * \a other. @@ -321,6 +262,157 @@ MatrixBase<Derived>::operator*=(const MatrixBase<OtherDerived> &other) return *this = *this * other; } +/*************************************************************************** +* Normal product .coeff() implementation (with meta-unrolling) +***************************************************************************/ + +/************************************** +*** Scalar path - no vectorization *** +**************************************/ + +template<int Index, typename Lhs, typename Rhs> +struct ei_product_coeff_impl<NoVectorization, Index, Lhs, Rhs> +{ + inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) + { + ei_product_coeff_impl<NoVectorization, Index-1, Lhs, Rhs>::run(row, col, lhs, rhs, res); + res += lhs.coeff(row, Index) * rhs.coeff(Index, col); + } +}; + +template<typename Lhs, typename Rhs> +struct ei_product_coeff_impl<NoVectorization, 0, Lhs, Rhs> +{ + inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) + { + res = lhs.coeff(row, 0) * rhs.coeff(0, col); + } +}; + +template<typename Lhs, typename Rhs> +struct ei_product_coeff_impl<NoVectorization, Dynamic, Lhs, Rhs> +{ + inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar& res) + { + res = lhs.coeff(row, 0) * rhs.coeff(0, col); + for(int i = 1; i < lhs.cols(); i++) + res += lhs.coeff(row, i) * rhs.coeff(i, col); + } +}; + +// prevent buggy user code from causing an infinite recursion +template<typename Lhs, typename Rhs> +struct ei_product_coeff_impl<NoVectorization, -1, Lhs, Rhs> +{ + inline static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {} +}; + +/******************************************* +*** Scalar path with inner vectorization *** +*******************************************/ + +template<int Index, typename Lhs, typename Rhs, typename PacketScalar> +struct ei_product_coeff_vectorized_impl +{ + enum { PacketSize = ei_packet_traits<typename Lhs::Scalar>::size }; + inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) + { + ei_product_coeff_vectorized_impl<Index-PacketSize, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, pres); + pres = ei_padd(pres, ei_pmul( lhs.template packet<Aligned>(row, Index) , rhs.template packet<Aligned>(Index, col) )); + } +}; + +template<typename Lhs, typename Rhs, typename PacketScalar> +struct ei_product_coeff_vectorized_impl<0, Lhs, Rhs, PacketScalar> +{ + inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) + { + pres = ei_pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col)); + } +}; + +template<int Index, typename Lhs, typename Rhs> +struct ei_product_coeff_impl<InnerVectorization, Index, Lhs, Rhs> +{ + typedef typename Lhs::PacketScalar PacketScalar; + enum { PacketSize = ei_packet_traits<typename Lhs::Scalar>::size }; + inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) + { + PacketScalar pres; + ei_product_coeff_vectorized_impl<Index+1-PacketSize, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, pres); + ei_product_coeff_impl<NoVectorization,Index,Lhs,Rhs>::run(row, col, lhs, rhs, res); + res = ei_predux(pres); + } +}; + +/******************* +*** Packet path *** +*******************/ + +template<int Index, typename Lhs, typename Rhs, typename PacketScalar> +struct ei_product_packet_impl<RowMajorProduct, Index, Lhs, Rhs, PacketScalar> +{ + inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) + { + ei_product_packet_impl<RowMajorProduct, Index-1, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, res); + res = ei_pmadd(ei_pset1(lhs.coeff(row, Index)), rhs.template packet<Aligned>(Index, col), res); + } +}; + +template<int Index, typename Lhs, typename Rhs, typename PacketScalar> +struct ei_product_packet_impl<ColMajorProduct, Index, Lhs, Rhs, PacketScalar> +{ + inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) + { + ei_product_packet_impl<ColMajorProduct, Index-1, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, res); + res = ei_pmadd(lhs.template packet<Aligned>(row, Index), ei_pset1(rhs.coeff(Index, col)), res); + } +}; + +template<typename Lhs, typename Rhs, typename PacketScalar> +struct ei_product_packet_impl<RowMajorProduct, 0, Lhs, Rhs, PacketScalar> +{ + inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) + { + res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet<Aligned>(0, col)); + } +}; + +template<typename Lhs, typename Rhs, typename PacketScalar> +struct ei_product_packet_impl<ColMajorProduct, 0, Lhs, Rhs, PacketScalar> +{ + inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) + { + res = ei_pmul(lhs.template packet<Aligned>(row, 0), ei_pset1(rhs.coeff(0, col))); + } +}; + +template<int StorageOrder, typename Lhs, typename Rhs, typename PacketScalar> +struct ei_product_packet_impl<StorageOrder, Dynamic, Lhs, Rhs, PacketScalar> +{ + inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res) + { + res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet<Aligned>(0, col)); + for(int i = 1; i < lhs.cols(); i++) + res = ei_pmadd(ei_pset1(lhs.coeff(row, i)), rhs.template packet<Aligned>(i, col), res); + } +}; + +template<typename Lhs, typename Rhs, typename PacketScalar> +struct ei_product_packet_impl<ColMajorProduct, Dynamic, Lhs, Rhs, PacketScalar> +{ + inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res) + { + res = ei_pmul(lhs.template packet<Aligned>(row, 0), ei_pset1(rhs.coeff(0, col))); + for(int i = 1; i < lhs.cols(); i++) + res = ei_pmadd(lhs.template packet<Aligned>(row, i), ei_pset1(rhs.coeff(i, col)), res); + } +}; + +/*************************************************************************** +* Cache friendly product callers and specific nested evaluation strategies +***************************************************************************/ + /** \internal */ template<typename Derived> template<typename Lhs,typename Rhs> @@ -339,6 +431,28 @@ inline Derived& MatrixBase<Derived>::lazyAssign(const Product<Lhs,Rhs,CacheFrien return derived(); } +template<typename T> class ei_product_eval_to_column_major +{ + typedef typename ei_traits<T>::Scalar _Scalar; + enum { + _Rows = ei_traits<T>::RowsAtCompileTime, + _Cols = ei_traits<T>::ColsAtCompileTime, + _MaxRows = ei_traits<T>::MaxRowsAtCompileTime, + _MaxCols = ei_traits<T>::MaxColsAtCompileTime, + _Flags = ei_traits<T>::Flags + }; + + public: + typedef Matrix<_Scalar, + _Rows, _Cols, _MaxRows, _MaxCols, + ei_corrected_matrix_flags< + _Scalar, + _Rows, _Cols, _MaxRows, _MaxCols, + _Flags + >::ret & ~RowMajorBit + > type; +}; + template<typename T> struct ei_product_copy_rhs { typedef typename ei_meta_if< |