// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2006-2008 Benoit Jacob // Copyright (C) 2008 Gael Guennebaud // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 3 of the License, or (at your option) any later version. // // Alternatively, you can redistribute it and/or // modify it under the terms of the GNU General Public License as // published by the Free Software Foundation; either version 2 of // the License, or (at your option) any later version. // // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the // GNU General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License and a copy of the GNU General Public License along with // Eigen. If not, see . #ifndef EIGEN_PRODUCT_H #define EIGEN_PRODUCT_H /*************************** *** Forward declarations *** ***************************/ template struct ei_product_coeff_impl; template struct ei_product_packet_impl; /** \class ProductReturnType * * \brief Helper class to get the correct and optimized returned type of operator* * * \param Lhs the type of the left-hand side * \param Rhs the type of the right-hand side * \param ProductMode the type of the product (determined automatically by ei_product_mode) * * This class defines the typename Type representing the optimized product expression * between two matrix expressions. In practice, using ProductReturnType::Type * is the recommended way to define the result type of a function returning an expression * which involve a matrix product. The class Product should never be * used directly. * * \sa class Product, MatrixBase::operator*(const MatrixBase&) */ template struct ProductReturnType { typedef typename ei_nested::type LhsNested; typedef typename ei_nested::type RhsNested; typedef Product Type; }; // cache friendly specialization template struct ProductReturnType { typedef typename ei_nested::type LhsNested; typedef typename ei_nested::type >::type RhsNested; typedef Product Type; }; /* Helper class to analyze the factors of a Product expression. * In particular it allows to pop out operator-, scalar multiples, * and conjugate */ template struct ei_blas_traits { typedef typename ei_traits::Scalar Scalar; typedef XprType ActualXprType; enum { IsComplex = NumTraits::IsComplex, NeedToConjugate = false, ActualAccess = int(ei_traits::Flags)&DirectAccessBit ? HasDirectAccess : NoDirectAccess }; typedef typename ei_meta_if::ret DirectLinearAccessType; static inline const ActualXprType& extract(const XprType& x) { return x; } static inline Scalar extractScalarFactor(const XprType&) { return Scalar(1); } }; // pop conjugate template struct ei_blas_traits, NestedXpr> > : ei_blas_traits { typedef ei_blas_traits Base; typedef CwiseUnaryOp, NestedXpr> XprType; typedef typename Base::ActualXprType ActualXprType; enum { IsComplex = NumTraits::IsComplex, NeedToConjugate = IsComplex }; static inline const ActualXprType& extract(const XprType& x) { return Base::extract(x._expression()); } static inline Scalar extractScalarFactor(const XprType& x) { return ei_conj(Base::extractScalarFactor(x._expression())); } }; // pop scalar multiple template struct ei_blas_traits, NestedXpr> > : ei_blas_traits { typedef ei_blas_traits Base; typedef CwiseUnaryOp, NestedXpr> XprType; typedef typename Base::ActualXprType ActualXprType; static inline const ActualXprType& extract(const XprType& x) { return Base::extract(x._expression()); } static inline Scalar extractScalarFactor(const XprType& x) { return x._functor().m_other * Base::extractScalarFactor(x._expression()); } }; // pop opposite template struct ei_blas_traits, NestedXpr> > : ei_blas_traits { typedef ei_blas_traits Base; typedef CwiseUnaryOp, NestedXpr> XprType; typedef typename Base::ActualXprType ActualXprType; static inline const ActualXprType& extract(const XprType& x) { return Base::extract(x._expression()); } static inline Scalar extractScalarFactor(const XprType& x) { return - Base::extractScalarFactor(x._expression()); } }; // pop opposite template struct ei_blas_traits > : ei_blas_traits { typedef typename NestedXpr::Scalar Scalar; typedef ei_blas_traits Base; typedef NestByValue XprType; typedef typename Base::ActualXprType ActualXprType; static inline const ActualXprType& extract(const XprType& x) { return Base::extract(static_cast(x)); } static inline Scalar extractScalarFactor(const XprType& x) { return Base::extractScalarFactor(static_cast(x)); } }; /* Helper class to determine the type of the product, can be either: * - NormalProduct * - CacheFriendlyProduct */ template struct ei_product_mode { typedef typename ei_blas_traits::ActualXprType ActualLhs; typedef typename ei_blas_traits::ActualXprType ActualRhs; enum{ // workaround sun studio: LhsIsVectorAtCompileTime = ei_traits::ColsAtCompileTime==1 || ei_traits::ColsAtCompileTime==1, value = ei_traits::MaxColsAtCompileTime == Dynamic && ( ei_traits::MaxRowsAtCompileTime == Dynamic || ei_traits::MaxColsAtCompileTime == Dynamic ) && (!(Rhs::IsVectorAtCompileTime && (ei_traits::Flags&RowMajorBit) && (!(ei_traits::Flags&DirectAccessBit)))) && (!(LhsIsVectorAtCompileTime && (!(ei_traits::Flags&RowMajorBit)) && (!(ei_traits::Flags&DirectAccessBit)))) && (ei_is_same_type::Scalar, typename ei_traits::Scalar>::ret) ? CacheFriendlyProduct : NormalProduct }; }; /** \class Product * * \brief Expression of the product of two matrices * * \param LhsNested the type used to store the left-hand side * \param RhsNested the type used to store the right-hand side * \param ProductMode the type of the product * * This class represents an expression of the product of two matrices. * It is the return type of the operator* between matrices. Its template * arguments are determined automatically by ProductReturnType. Therefore, * Product should never be used direclty. To determine the result type of a * function which involves a matrix product, use ProductReturnType::Type. * * \sa ProductReturnType, MatrixBase::operator*(const MatrixBase&) */ template struct ei_traits > { // clean the nested types: typedef typename ei_cleantype::type _LhsNested; typedef typename ei_cleantype::type _RhsNested; typedef typename ei_scalar_product_traits::ReturnType Scalar; enum { LhsCoeffReadCost = _LhsNested::CoeffReadCost, RhsCoeffReadCost = _RhsNested::CoeffReadCost, LhsFlags = _LhsNested::Flags, RhsFlags = _RhsNested::Flags, RowsAtCompileTime = _LhsNested::RowsAtCompileTime, ColsAtCompileTime = _RhsNested::ColsAtCompileTime, InnerSize = EIGEN_ENUM_MIN(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime), MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime, MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime, LhsRowMajor = LhsFlags & RowMajorBit, RhsRowMajor = RhsFlags & RowMajorBit, CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime == Dynamic || (ColsAtCompileTime % ei_packet_traits::size) == 0), CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime == Dynamic || (RowsAtCompileTime % ei_packet_traits::size) == 0), EvalToRowMajor = RhsRowMajor && (ProductMode==(int)CacheFriendlyProduct ? LhsRowMajor : (!CanVectorizeLhs)), RemovedBits = ~(EvalToRowMajor ? 0 : RowMajorBit), Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & RemovedBits) | EvalBeforeAssigningBit | EvalBeforeNestingBit | (CanVectorizeLhs || CanVectorizeRhs ? PacketAccessBit : 0) | (LhsFlags & RhsFlags & AlignedBit), CoeffReadCost = InnerSize == Dynamic ? Dynamic : InnerSize * (NumTraits::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) + (InnerSize - 1) * NumTraits::AddCost, /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. */ CanVectorizeInner = LhsRowMajor && (!RhsRowMajor) && (LhsFlags & RhsFlags & ActualPacketAccessBit) && (InnerSize % ei_packet_traits::size == 0) }; }; template class Product : ei_no_assignment_operator, public MatrixBase > { public: EIGEN_GENERIC_PUBLIC_INTERFACE(Product) private: typedef typename ei_traits::_LhsNested _LhsNested; typedef typename ei_traits::_RhsNested _RhsNested; enum { PacketSize = ei_packet_traits::size, InnerSize = ei_traits::InnerSize, Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT, CanVectorizeInner = ei_traits::CanVectorizeInner }; typedef ei_product_coeff_impl ScalarCoeffImpl; public: template inline Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) { // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable. // We still allow to mix T and complex. EIGEN_STATIC_ASSERT((ei_is_same_type::ret), YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) ei_assert(lhs.cols() == rhs.rows() && "invalid matrix product" && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); } /** \internal * compute \a res += \c *this using the cache friendly product. */ template void _cacheFriendlyEvalAndAdd(DestDerived& res, Scalar alpha) const; /** \internal * \returns whether it is worth it to use the cache friendly product. */ EIGEN_STRONG_INLINE bool _useCacheFriendlyProduct() const { // TODO do something more accurate here (especially for mat-vec products) return m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD && ( rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD || cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD); } EIGEN_STRONG_INLINE int rows() const { return m_lhs.rows(); } EIGEN_STRONG_INLINE int cols() const { return m_rhs.cols(); } EIGEN_STRONG_INLINE const Scalar coeff(int row, int col) const { Scalar res; ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); return res; } /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, * which is why we don't set the LinearAccessBit. */ EIGEN_STRONG_INLINE const Scalar coeff(int index) const { Scalar res; const int row = RowsAtCompileTime == 1 ? 0 : index; const int col = RowsAtCompileTime == 1 ? index : 0; ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); return res; } template EIGEN_STRONG_INLINE const PacketScalar packet(int row, int col) const { PacketScalar res; ei_product_packet_impl ::run(row, col, m_lhs, m_rhs, res); return res; } EIGEN_STRONG_INLINE const _LhsNested& lhs() const { return m_lhs; } EIGEN_STRONG_INLINE const _RhsNested& rhs() const { return m_rhs; } protected: const LhsNested m_lhs; const RhsNested m_rhs; }; /** \returns the matrix product of \c *this and \a other. * * \note If instead of the matrix product you want the coefficient-wise product, see Cwise::operator*(). * * \sa lazy(), operator*=(const MatrixBase&), Cwise::operator*() */ template template inline const typename ProductReturnType::Type MatrixBase::operator*(const MatrixBase &other) const { enum { ProductIsValid = Derived::ColsAtCompileTime==Dynamic || OtherDerived::RowsAtCompileTime==Dynamic || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime), AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime, SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived) }; // note to the lost user: // * for a dot product use: v1.dot(v2) // * for a coeff-wise product use: v1.cwise()*v2 EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes), INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS) EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors), INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION) EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT) return typename ProductReturnType::Type(derived(), other.derived()); } /*************************************************************************** * Normal product .coeff() implementation (with meta-unrolling) ***************************************************************************/ /************************************** *** Scalar path - no vectorization *** **************************************/ template struct ei_product_coeff_impl { EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) { ei_product_coeff_impl::run(row, col, lhs, rhs, res); res += lhs.coeff(row, Index) * rhs.coeff(Index, col); } }; template struct ei_product_coeff_impl { EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) { res = lhs.coeff(row, 0) * rhs.coeff(0, col); } }; template struct ei_product_coeff_impl { EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar& res) { ei_assert(lhs.cols()>0 && "you are using a non initialized matrix"); res = lhs.coeff(row, 0) * rhs.coeff(0, col); for(int i = 1; i < lhs.cols(); ++i) res += lhs.coeff(row, i) * rhs.coeff(i, col); } }; // prevent buggy user code from causing an infinite recursion template struct ei_product_coeff_impl { EIGEN_STRONG_INLINE static void run(int, int, const Lhs&, const Rhs&, RetScalar&) {} }; /******************************************* *** Scalar path with inner vectorization *** *******************************************/ template struct ei_product_coeff_vectorized_unroller { enum { PacketSize = ei_packet_traits::size }; EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) { ei_product_coeff_vectorized_unroller::run(row, col, lhs, rhs, pres); pres = ei_padd(pres, ei_pmul( lhs.template packet(row, Index) , rhs.template packet(Index, col) )); } }; template struct ei_product_coeff_vectorized_unroller<0, Lhs, Rhs, PacketScalar> { EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) { pres = ei_pmul(lhs.template packet(row, 0) , rhs.template packet(0, col)); } }; template struct ei_product_coeff_impl { typedef typename Lhs::PacketScalar PacketScalar; enum { PacketSize = ei_packet_traits::size }; EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) { PacketScalar pres; ei_product_coeff_vectorized_unroller::run(row, col, lhs, rhs, pres); ei_product_coeff_impl::run(row, col, lhs, rhs, res); res = ei_predux(pres); } }; template struct ei_product_coeff_vectorized_dyn_selector { EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { res = ei_dot_impl< Block::ColsAtCompileTime>, Block::RowsAtCompileTime, 1>, LinearVectorization, NoUnrolling>::run(lhs.row(row), rhs.col(col)); } }; // NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower // NOTE maybe they are now useless since we have a specialization for Block template struct ei_product_coeff_vectorized_dyn_selector { EIGEN_STRONG_INLINE static void run(int /*row*/, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { res = ei_dot_impl< Lhs, Block::RowsAtCompileTime, 1>, LinearVectorization, NoUnrolling>::run(lhs, rhs.col(col)); } }; template struct ei_product_coeff_vectorized_dyn_selector { EIGEN_STRONG_INLINE static void run(int row, int /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { res = ei_dot_impl< Block::ColsAtCompileTime>, Rhs, LinearVectorization, NoUnrolling>::run(lhs.row(row), rhs); } }; template struct ei_product_coeff_vectorized_dyn_selector { EIGEN_STRONG_INLINE static void run(int /*row*/, int /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { res = ei_dot_impl< Lhs, Rhs, LinearVectorization, NoUnrolling>::run(lhs, rhs); } }; template struct ei_product_coeff_impl { EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { ei_product_coeff_vectorized_dyn_selector::run(row, col, lhs, rhs, res); } }; /******************* *** Packet path *** *******************/ template struct ei_product_packet_impl { EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) { ei_product_packet_impl::run(row, col, lhs, rhs, res); res = ei_pmadd(ei_pset1(lhs.coeff(row, Index)), rhs.template packet(Index, col), res); } }; template struct ei_product_packet_impl { EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) { ei_product_packet_impl::run(row, col, lhs, rhs, res); res = ei_pmadd(lhs.template packet(row, Index), ei_pset1(rhs.coeff(Index, col)), res); } }; template struct ei_product_packet_impl { EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) { res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet(0, col)); } }; template struct ei_product_packet_impl { EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) { res = ei_pmul(lhs.template packet(row, 0), ei_pset1(rhs.coeff(0, col))); } }; template struct ei_product_packet_impl { EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res) { ei_assert(lhs.cols()>0 && "you are using a non initialized matrix"); res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet(0, col)); for(int i = 1; i < lhs.cols(); ++i) res = ei_pmadd(ei_pset1(lhs.coeff(row, i)), rhs.template packet(i, col), res); } }; template struct ei_product_packet_impl { EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res) { ei_assert(lhs.cols()>0 && "you are using a non initialized matrix"); res = ei_pmul(lhs.template packet(row, 0), ei_pset1(rhs.coeff(0, col))); for(int i = 1; i < lhs.cols(); ++i) res = ei_pmadd(lhs.template packet(row, i), ei_pset1(rhs.coeff(i, col)), res); } }; /*************************************************************************** * Cache friendly product callers and specific nested evaluation strategies ***************************************************************************/ // Forward declarations template static void ei_cache_friendly_product( int _rows, int _cols, int depth, bool _lhsRowMajor, const Scalar* _lhs, int _lhsStride, bool _rhsRowMajor, const Scalar* _rhs, int _rhsStride, bool resRowMajor, Scalar* res, int resStride, Scalar alpha); template static void ei_cache_friendly_product_colmajor_times_vector( int size, const Scalar* lhs, int lhsStride, const RhsType& rhs, Scalar* res, Scalar alpha); template static void ei_cache_friendly_product_rowmajor_times_vector( const Scalar* lhs, int lhsStride, const Scalar* rhs, int rhsSize, ResType& res, Scalar alpha); // This helper class aims to determine which optimized product to call, // and how to call it. We have to distinghish three major cases: // 1 - matrix-matrix // 2 - matrix-vector // 3 - vector-matrix // The storage order, and direct-access criteria are also important for in last 2 cases. // For instance, with a mat-vec product, the matrix coeff are evaluated only once, and // therefore it is useless to first evaluated it to next being able to directly access // its coefficient. template::RowsAtCompileTime, int LhsOrder = int(ei_traits::LhsFlags)&RowMajorBit ? RowMajor : ColMajor, int LhsHasDirectAccess = ei_blas_traits::_LhsNested>::ActualAccess, int RhsCols = ei_traits::ColsAtCompileTime, int RhsOrder = int(ei_traits::RhsFlags)&RowMajorBit ? RowMajor : ColMajor, int RhsHasDirectAccess = ei_blas_traits::_RhsNested>::ActualAccess> struct ei_cache_friendly_product_selector { template inline static void run(DestDerived& res, const ProductType& product, typename ProductType::Scalar alpha) { product._cacheFriendlyEvalAndAdd(res, alpha); } }; // optimized colmajor * vector path template struct ei_cache_friendly_product_selector { template inline static void run(DestDerived& res, const ProductType& product, typename ProductType::Scalar alpha) { ei_assert(alpha==typename ProductType::Scalar(1)); const int size = product.rhs().rows(); for (int k=0; k struct ei_cache_friendly_product_selector { typedef typename ProductType::Scalar Scalar; typedef ei_blas_traits::_LhsNested> LhsProductTraits; typedef ei_blas_traits::_RhsNested> RhsProductTraits; typedef typename LhsProductTraits::ActualXprType ActualLhsType; typedef typename RhsProductTraits::ActualXprType ActualRhsType; template inline static void run(DestDerived& res, const ProductType& product, typename ProductType::Scalar alpha) { const ActualLhsType& actualLhs = LhsProductTraits::extract(product.lhs()); const ActualRhsType& actualRhs = RhsProductTraits::extract(product.rhs()); Scalar actualAlpha = alpha * LhsProductTraits::extractScalarFactor(product.lhs()) * RhsProductTraits::extractScalarFactor(product.rhs()); enum { EvalToRes = (ei_packet_traits::size==1) ||((DestDerived::Flags&ActualPacketAccessBit) && (!(DestDerived::Flags & RowMajorBit))) }; Scalar* EIGEN_RESTRICT _res; if (EvalToRes) _res = &res.coeffRef(0); else { _res = ei_aligned_stack_new(Scalar,res.size()); Map >(_res, res.size()) = res; } // std::cerr << "colmajor * vector " << EvalToRes << "\n"; ei_cache_friendly_product_colmajor_times_vector ( res.size(), &actualLhs.const_cast_derived().coeffRef(0,0), actualLhs.stride(), actualRhs, _res, actualAlpha); if (!EvalToRes) { res = Map >(_res, res.size()); ei_aligned_stack_delete(Scalar, _res, res.size()); } } }; // optimized vector * rowmajor path template struct ei_cache_friendly_product_selector { template inline static void run(DestDerived& res, const ProductType& product, typename ProductType::Scalar alpha) { ei_assert(alpha==typename ProductType::Scalar(1)); const int cols = product.lhs().cols(); for (int j=0; j struct ei_cache_friendly_product_selector { typedef typename ProductType::Scalar Scalar; typedef ei_blas_traits::_LhsNested> LhsProductTraits; typedef ei_blas_traits::_RhsNested> RhsProductTraits; typedef typename LhsProductTraits::ActualXprType ActualLhsType; typedef typename RhsProductTraits::ActualXprType ActualRhsType; template inline static void run(DestDerived& res, const ProductType& product, typename ProductType::Scalar alpha) { const ActualLhsType& actualLhs = LhsProductTraits::extract(product.lhs()); const ActualRhsType& actualRhs = RhsProductTraits::extract(product.rhs()); Scalar actualAlpha = alpha * LhsProductTraits::extractScalarFactor(product.lhs()) * RhsProductTraits::extractScalarFactor(product.rhs()); enum { EvalToRes = (ei_packet_traits::size==1) ||((DestDerived::Flags & ActualPacketAccessBit) && (DestDerived::Flags & RowMajorBit)) }; Scalar* EIGEN_RESTRICT _res; if (EvalToRes) _res = &res.coeffRef(0); else { _res = ei_aligned_stack_new(Scalar, res.size()); Map >(_res, res.size()) = res; } ei_cache_friendly_product_colmajor_times_vector (res.size(), &actualRhs.const_cast_derived().coeffRef(0,0), actualRhs.stride(), actualLhs.transpose(), _res, actualAlpha); if (!EvalToRes) { res = Map >(_res, res.size()); ei_aligned_stack_delete(Scalar, _res, res.size()); } } }; // optimized rowmajor - vector product template struct ei_cache_friendly_product_selector { typedef typename ProductType::Scalar Scalar; typedef ei_blas_traits::_LhsNested> LhsProductTraits; typedef ei_blas_traits::_RhsNested> RhsProductTraits; typedef typename LhsProductTraits::ActualXprType ActualLhsType; typedef typename RhsProductTraits::ActualXprType ActualRhsType; enum { UseRhsDirectly = ((ei_packet_traits::size==1) || (ActualRhsType::Flags&ActualPacketAccessBit)) && (!(ActualRhsType::Flags & RowMajorBit)) }; template inline static void run(DestDerived& res, const ProductType& product, typename ProductType::Scalar alpha) { const ActualLhsType& actualLhs = LhsProductTraits::extract(product.lhs()); const ActualRhsType& actualRhs = RhsProductTraits::extract(product.rhs()); Scalar actualAlpha = alpha * LhsProductTraits::extractScalarFactor(product.lhs()) * RhsProductTraits::extractScalarFactor(product.rhs()); Scalar* EIGEN_RESTRICT _rhs; if (UseRhsDirectly) _rhs = &actualRhs.const_cast_derived().coeffRef(0); else { _rhs = ei_aligned_stack_new(Scalar, actualRhs.size()); Map >(_rhs, actualRhs.size()) = actualRhs; } ei_cache_friendly_product_rowmajor_times_vector ( &actualLhs.const_cast_derived().coeffRef(0,0), actualLhs.stride(), _rhs, product.rhs().size(), res, actualAlpha); if (!UseRhsDirectly) ei_aligned_stack_delete(Scalar, _rhs, product.rhs().size()); } }; // optimized vector - colmajor product template struct ei_cache_friendly_product_selector { typedef typename ProductType::Scalar Scalar; typedef ei_blas_traits::_LhsNested> LhsProductTraits; typedef ei_blas_traits::_RhsNested> RhsProductTraits; typedef typename LhsProductTraits::ActualXprType ActualLhsType; typedef typename RhsProductTraits::ActualXprType ActualRhsType; enum { UseLhsDirectly = ((ei_packet_traits::size==1) || (ActualLhsType::Flags&ActualPacketAccessBit)) && (ActualLhsType::Flags & RowMajorBit) }; template inline static void run(DestDerived& res, const ProductType& product, typename ProductType::Scalar alpha) { const ActualLhsType& actualLhs = LhsProductTraits::extract(product.lhs()); const ActualRhsType& actualRhs = RhsProductTraits::extract(product.rhs()); Scalar actualAlpha = alpha * LhsProductTraits::extractScalarFactor(product.lhs()) * RhsProductTraits::extractScalarFactor(product.rhs()); Scalar* EIGEN_RESTRICT _lhs; if (UseLhsDirectly) _lhs = &actualLhs.const_cast_derived().coeffRef(0); else { _lhs = ei_aligned_stack_new(Scalar, actualLhs.size()); Map >(_lhs, actualLhs.size()) = actualLhs; } ei_cache_friendly_product_rowmajor_times_vector ( &actualRhs.const_cast_derived().coeffRef(0,0), actualRhs.stride(), _lhs, product.lhs().size(), res, actualAlpha); if(!UseLhsDirectly) ei_aligned_stack_delete(Scalar, _lhs, product.lhs().size()); } }; // discard this case which has to be handled by the default path // (we keep it to be sure to hit a compilation error if this is not the case) template struct ei_cache_friendly_product_selector {}; // discard this case which has to be handled by the default path // (we keep it to be sure to hit a compilation error if this is not the case) template struct ei_cache_friendly_product_selector {}; /** \internal * Overloaded to perform an efficient C += A*B */ template template inline Derived& MatrixBase::operator+=(const Flagged, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit>& other) {//std::cerr << "operator+=\n"; if (other._expression()._useCacheFriendlyProduct()) ei_cache_friendly_product_selector >::run(const_cast_derived(), other._expression(), Scalar(1)); else { //std::cerr << "no cf\n"; lazyAssign(derived() + other._expression()); } return derived(); } /** \internal * Overloaded to perform an efficient C -= A*B */ template template inline Derived& MatrixBase::operator-=(const Flagged, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit>& other) { if (other._expression()._useCacheFriendlyProduct()) ei_cache_friendly_product_selector >::run(const_cast_derived(), other._expression(), Scalar(-1)); else lazyAssign(derived() - other._expression()); return derived(); } /** \internal * Overloaded to perform an efficient C = A*B */ template template inline Derived& MatrixBase::lazyAssign(const Product& product) { if (product._useCacheFriendlyProduct()) { setZero(); ei_cache_friendly_product_selector >::run(const_cast_derived(), product, Scalar(1)); } else { lazyAssign(static_cast > &>(product)); } return derived(); } template struct ei_product_copy_rhs { typedef typename ei_meta_if< (ei_traits::Flags & RowMajorBit) || (!(ei_traits::Flags & DirectAccessBit)), typename ei_plain_matrix_type_column_major::type, const T& >::ret type; }; template struct ei_product_copy_lhs { typedef typename ei_meta_if< (!(int(ei_traits::Flags) & DirectAccessBit)), typename ei_plain_matrix_type::type, const T& >::ret type; }; template template inline void Product::_cacheFriendlyEvalAndAdd(DestDerived& res, Scalar alpha) const { typedef ei_blas_traits<_LhsNested> LhsProductTraits; typedef ei_blas_traits<_RhsNested> RhsProductTraits; typedef typename LhsProductTraits::ActualXprType ActualLhsType; typedef typename RhsProductTraits::ActualXprType ActualRhsType; const ActualLhsType& actualLhs = LhsProductTraits::extract(m_lhs); const ActualRhsType& actualRhs = RhsProductTraits::extract(m_rhs); Scalar actualAlpha = alpha * LhsProductTraits::extractScalarFactor(m_lhs) * RhsProductTraits::extractScalarFactor(m_rhs); typedef typename ei_product_copy_lhs::type LhsCopy; typedef typename ei_unref::type _LhsCopy; typedef typename ei_product_copy_rhs::type RhsCopy; typedef typename ei_unref::type _RhsCopy; LhsCopy lhs(actualLhs); RhsCopy rhs(actualRhs); ei_cache_friendly_product ( rows(), cols(), lhs.cols(), _LhsCopy::Flags&RowMajorBit, (const Scalar*)&(lhs.const_cast_derived().coeffRef(0,0)), lhs.stride(), _RhsCopy::Flags&RowMajorBit, (const Scalar*)&(rhs.const_cast_derived().coeffRef(0,0)), rhs.stride(), Flags&RowMajorBit, (Scalar*)&(res.coeffRef(0,0)), res.stride(), actualAlpha ); } #endif // EIGEN_PRODUCT_H