// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2006-2008 Benoit Jacob // Copyright (C) 2008-2010 Gael Guennebaud // Copyright (C) 2011 Jitse Niesen // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_PRODUCTEVALUATORS_H #define EIGEN_PRODUCTEVALUATORS_H namespace Eigen { namespace internal { /** \internal * Evaluator of a product expression. * Since products require special treatments to handle all possible cases, * we simply deffer the evaluation logic to a product_evaluator class * which offers more partial specialization possibilities. * * \sa class product_evaluator */ template struct evaluator > : public product_evaluator > { typedef Product XprType; typedef product_evaluator Base; typedef evaluator type; typedef evaluator nestedType; EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} }; // Catch scalar * ( A * B ) and transform it to (A*scalar) * B // TODO we should apply that rule only if that's really helpful template struct evaluator, const Product > > : public evaluator,const Lhs>, Rhs, DefaultProduct> > { typedef CwiseUnaryOp, const Product > XprType; typedef evaluator,const Lhs>, Rhs, DefaultProduct> > Base; typedef evaluator type; typedef evaluator nestedType; EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr.functor().m_other * xpr.nestedExpression().lhs() * xpr.nestedExpression().rhs()) {} }; template struct evaluator, DiagIndex> > : public evaluator, DiagIndex> > { typedef Diagonal, DiagIndex> XprType; typedef evaluator, DiagIndex> > Base; typedef evaluator type; typedef evaluator nestedType; EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(Diagonal, DiagIndex>( Product(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()), xpr.index() )) {} }; // Helper class to perform a matrix product with the destination at hand. // Depending on the sizes of the factors, there are different evaluation strategies // as controlled by internal::product_type. template< typename Lhs, typename Rhs, typename LhsShape = typename evaluator_traits::Shape, typename RhsShape = typename evaluator_traits::Shape, int ProductType = internal::product_type::value> struct generic_product_impl; template struct evaluator_traits > : evaluator_traits_base > { enum { AssumeAliasing = 1 }; }; // This is the default evaluator implementation for products: // It creates a temporary and call generic_product_impl template struct product_evaluator, ProductTag, LhsShape, RhsShape, typename traits::Scalar, typename traits::Scalar> : public evaluator::PlainObject>::type { typedef Product XprType; typedef typename XprType::PlainObject PlainObject; typedef typename evaluator::type Base; enum { Flags = Base::Flags | EvalBeforeNestingBit // CoeffReadCost = 0 // FIXME why is it needed? (this was already the case before the evaluators, see traits) }; EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : m_result(xpr.rows(), xpr.cols()) { ::new (static_cast(this)) Base(m_result); // FIXME shall we handle nested_eval here? // typedef typename internal::nested_eval::type LhsNested; // typedef typename internal::nested_eval::type RhsNested; // typedef typename internal::remove_all::type LhsNestedCleaned; // typedef typename internal::remove_all::type RhsNestedCleaned; // // const LhsNested lhs(xpr.lhs()); // const RhsNested rhs(xpr.rhs()); // // generic_product_impl::evalTo(m_result, lhs, rhs); generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); } protected: PlainObject m_result; }; // Dense = Product template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> struct Assignment, internal::assign_op, Dense2Dense, Scalar> { typedef Product SrcXprType; static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { // FIXME shall we handle nested_eval here? generic_product_impl::evalTo(dst, src.lhs(), src.rhs()); } }; // Dense += Product template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> struct Assignment, internal::add_assign_op, Dense2Dense, Scalar> { typedef Product SrcXprType; static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) { // FIXME shall we handle nested_eval here? generic_product_impl::addTo(dst, src.lhs(), src.rhs()); } }; // Dense -= Product template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> struct Assignment, internal::sub_assign_op, Dense2Dense, Scalar> { typedef Product SrcXprType; static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) { // FIXME shall we handle nested_eval here? generic_product_impl::subTo(dst, src.lhs(), src.rhs()); } }; // Dense ?= scalar * Product // TODO we should apply that rule if that's really helpful // for instance, this is not good for inner products template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis> struct Assignment, const Product >, AssignFunc, Dense2Dense, Scalar> { typedef CwiseUnaryOp, const Product > SrcXprType; static void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func) { // TODO use operator* instead of prod() once we have made enough progress call_assignment(dst.noalias(), prod(src.functor().m_other * src.nestedExpression().lhs(), src.nestedExpression().rhs()), func); } }; template struct generic_product_impl { template static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum(); } template static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum(); } template static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); } }; /*********************************************************************** * Implementation of outer dense * dense vector product ***********************************************************************/ // Column major result template EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&) { typename evaluator::type rhsEval(rhs); // FIXME make sure lhs is sequentially stored // FIXME not very good if rhs is real and lhs complex while alpha is real too // FIXME we should probably build an evaluator for dst const Index cols = dst.cols(); for (Index j=0; j EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&) { typename evaluator::type lhsEval(lhs); // FIXME make sure rhs is sequentially stored // FIXME not very good if lhs is real and rhs complex while alpha is real too // FIXME we should probably build an evaluator for dst const Index rows = dst.rows(); for (Index i=0; i struct generic_product_impl { template struct IsRowMajor : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {}; typedef typename Product::Scalar Scalar; // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose struct set { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } }; struct add { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } }; struct sub { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } }; struct adds { Scalar m_scale; explicit adds(const Scalar& s) : m_scale(s) {} template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += m_scale * src; } }; template static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { internal::outer_product_selector_run(dst, lhs, rhs, set(), IsRowMajor()); } template static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { internal::outer_product_selector_run(dst, lhs, rhs, add(), IsRowMajor()); } template static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { internal::outer_product_selector_run(dst, lhs, rhs, sub(), IsRowMajor()); } template static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), IsRowMajor()); } }; // This base class provides default implementations for evalTo, addTo, subTo, in terms of scaleAndAddTo template struct generic_product_impl_base { typedef typename Product::Scalar Scalar; template static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); } template static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); } template static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); } template static void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); } }; template struct generic_product_impl : generic_product_impl_base > { typedef typename Product::Scalar Scalar; enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight }; typedef typename internal::conditional::type MatrixType; template static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { internal::gemv_dense_sense_selector::HasUsableDirectAccess) >::run(lhs, rhs, dst, alpha); } }; template struct generic_product_impl { typedef typename Product::Scalar Scalar; template static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { // TODO: use the following instead of calling call_assignment, same for the other methods // dst = lazyprod(lhs,rhs); call_assignment(dst, lazyprod(lhs,rhs), internal::assign_op()); } template static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { // dst += lazyprod(lhs,rhs); call_assignment(dst, lazyprod(lhs,rhs), internal::add_assign_op()); } template static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { // dst -= lazyprod(lhs,rhs); call_assignment(dst, lazyprod(lhs,rhs), internal::sub_assign_op()); } // template // static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) // { dst += alpha * lazyprod(lhs,rhs); } }; // This specialization enforces the use of a coefficient-based evaluation strategy template struct generic_product_impl : generic_product_impl {}; // Case 2: Evaluate coeff by coeff // // This is mostly taken from CoeffBasedProduct.h // The main difference is that we add an extra argument to the etor_product_*_impl::run() function // for the inner dimension of the product, because evaluator object do not know their size. template struct etor_product_coeff_impl; template struct etor_product_packet_impl; template struct product_evaluator, ProductTag, DenseShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar > : evaluator_base > { typedef Product XprType; typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::PacketScalar PacketScalar; typedef typename XprType::PacketReturnType PacketReturnType; EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : m_lhs(xpr.lhs()), m_rhs(xpr.rhs()), m_lhsImpl(m_lhs), // FIXME the creation of the evaluator objects should result in a no-op, but check that! m_rhsImpl(m_rhs), // Moreover, they are only useful for the packet path, so we could completely disable them when not needed, // or perhaps declare them on the fly on the packet method... We have experiment to check what's best. m_innerDim(xpr.lhs().cols()) { } // Everything below here is taken from CoeffBasedProduct.h typedef typename internal::nested_eval::type LhsNested; typedef typename internal::nested_eval::type RhsNested; typedef typename internal::remove_all::type LhsNestedCleaned; typedef typename internal::remove_all::type RhsNestedCleaned; typedef typename evaluator::type LhsEtorType; typedef typename evaluator::type RhsEtorType; enum { RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime, ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime, InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime), MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime, MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime, PacketSize = packet_traits::size, LhsCoeffReadCost = LhsEtorType::CoeffReadCost, RhsCoeffReadCost = RhsEtorType::CoeffReadCost, CoeffReadCost = InnerSize==0 ? NumTraits::ReadCost : (InnerSize == Dynamic || LhsCoeffReadCost==Dynamic || RhsCoeffReadCost==Dynamic || NumTraits::AddCost==Dynamic || NumTraits::MulCost==Dynamic) ? Dynamic : InnerSize * (NumTraits::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) + (InnerSize - 1) * NumTraits::AddCost, Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT, LhsFlags = LhsEtorType::Flags, RhsFlags = RhsEtorType::Flags, LhsRowMajor = LhsFlags & RowMajorBit, RhsRowMajor = RhsFlags & RowMajorBit, SameType = is_same::value, CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime == Dynamic || ( (ColsAtCompileTime % packet_traits::size) == 0 && (RhsFlags&AlignedBit) ) ), CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime == Dynamic || ( (RowsAtCompileTime % packet_traits::size) == 0 && (LhsFlags&AlignedBit) ) ), EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 : (RhsRowMajor && !CanVectorizeLhs), Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) | (EvalToRowMajor ? RowMajorBit : 0) | (CanVectorizeLhs ? (LhsFlags & AlignedBit) : 0) | (CanVectorizeRhs ? (RhsFlags & AlignedBit) : 0) // TODO enable vectorization for mixed types | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0), /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. */ CanVectorizeInner = SameType && LhsRowMajor && (!RhsRowMajor) && (LhsFlags & RhsFlags & ActualPacketAccessBit) && (LhsFlags & RhsFlags & AlignedBit) && (InnerSize % packet_traits::size == 0) }; EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index row, Index col) const { // TODO check performance regression wrt to Eigen 3.2 which has special handling of this function return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); } /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, * which is why we don't set the LinearAccessBit. * TODO: this seems possible when the result is a vector */ EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const { const Index row = RowsAtCompileTime == 1 ? 0 : index; const Index col = RowsAtCompileTime == 1 ? index : 0; // TODO check performance regression wrt to Eigen 3.2 which has special handling of this function return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); } template const PacketReturnType packet(Index row, Index col) const { PacketScalar res; typedef etor_product_packet_impl PacketImpl; PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); return res; } protected: const LhsNested m_lhs; const RhsNested m_rhs; LhsEtorType m_lhsImpl; RhsEtorType m_rhsImpl; // TODO: Get rid of m_innerDim if known at compile time Index m_innerDim; }; template struct product_evaluator, LazyCoeffBasedProductMode, DenseShape, DenseShape, typename traits::Scalar, typename traits::Scalar > : product_evaluator, CoeffBasedProductMode, DenseShape, DenseShape, typename traits::Scalar, typename traits::Scalar > { typedef Product XprType; typedef Product BaseProduct; typedef product_evaluator Base; enum { Flags = Base::Flags | EvalBeforeNestingBit }; EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : Base(BaseProduct(xpr.lhs(),xpr.rhs())) {} }; /**************************************** *** Coeff based product, Packet path *** ****************************************/ template struct etor_product_packet_impl { static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) { etor_product_packet_impl::run(row, col, lhs, rhs, innerDim, res); res = pmadd(pset1(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet(UnrollingIndex-1, col), res); } }; template struct etor_product_packet_impl { static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) { etor_product_packet_impl::run(row, col, lhs, rhs, innerDim, res); res = pmadd(lhs.template packet(row, UnrollingIndex-1), pset1(rhs.coeff(UnrollingIndex-1, col)), res); } }; template struct etor_product_packet_impl { static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) { res = pmul(pset1(lhs.coeff(row, 0)),rhs.template packet(0, col)); } }; template struct etor_product_packet_impl { static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) { res = pmul(lhs.template packet(row, 0), pset1(rhs.coeff(0, col))); } }; template struct etor_product_packet_impl { static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) { res = pset1(0); } }; template struct etor_product_packet_impl { static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) { res = pset1(0); } }; template struct etor_product_packet_impl { static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) { res = pset1(0); for(Index i = 0; i < innerDim; ++i) res = pmadd(pset1(lhs.coeff(row, i)), rhs.template packet(i, col), res); } }; template struct etor_product_packet_impl { static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) { res = pset1(0); for(Index i = 0; i < innerDim; ++i) res = pmadd(lhs.template packet(row, i), pset1(rhs.coeff(i, col)), res); } }; /*************************************************************************** * Triangular products ***************************************************************************/ template struct triangular_product_impl; template struct generic_product_impl : generic_product_impl_base > { typedef typename Product::Scalar Scalar; template static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { triangular_product_impl ::run(dst, lhs.nestedExpression(), rhs, alpha); } }; template struct generic_product_impl : generic_product_impl_base > { typedef typename Product::Scalar Scalar; template static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { triangular_product_impl::run(dst, lhs, rhs.nestedExpression(), alpha); } }; /*************************************************************************** * SelfAdjoint products ***************************************************************************/ template struct selfadjoint_product_impl; template struct generic_product_impl : generic_product_impl_base > { typedef typename Product::Scalar Scalar; template static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { selfadjoint_product_impl::run(dst, lhs.nestedExpression(), rhs, alpha); } }; template struct generic_product_impl : generic_product_impl_base > { typedef typename Product::Scalar Scalar; template static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { selfadjoint_product_impl::run(dst, lhs, rhs.nestedExpression(), alpha); } }; /*************************************************************************** * Diagonal products ***************************************************************************/ template struct diagonal_product_evaluator_base : evaluator_base { typedef typename scalar_product_traits::ReturnType Scalar; typedef typename internal::packet_traits::type PacketScalar; public: enum { CoeffReadCost = NumTraits::MulCost + evaluator::CoeffReadCost + evaluator::CoeffReadCost, MatrixFlags = evaluator::Flags, DiagFlags = evaluator::Flags, _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor, _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft) ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)), _SameTypes = is_same::value, // FIXME currently we need same types, but in the future the next rule should be the one //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))), _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))), _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0, Flags = ((HereditaryBits|_LinearAccessMask|AlignedBit) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0) }; diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag) : m_diagImpl(diag), m_matImpl(mat) { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const { return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx); } protected: template EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::true_type) const { return internal::pmul(m_matImpl.template packet(row, col), internal::pset1(m_diagImpl.coeff(id))); } template EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::false_type) const { enum { InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime, DiagonalPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagFlags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned) }; return internal::pmul(m_matImpl.template packet(row, col), m_diagImpl.template packet(id)); } typename evaluator::nestedType m_diagImpl; typename evaluator::nestedType m_matImpl; }; // diagonal * dense template struct product_evaluator, ProductTag, DiagonalShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar> : diagonal_product_evaluator_base, OnTheLeft> { typedef diagonal_product_evaluator_base, OnTheLeft> Base; using Base::m_diagImpl; using Base::m_matImpl; using Base::coeff; using Base::packet_impl; typedef typename Base::Scalar Scalar; typedef typename Base::PacketScalar PacketScalar; typedef Product XprType; typedef typename XprType::PlainObject PlainObject; enum { StorageOrder = int(Rhs::Flags) & RowMajorBit ? RowMajor : ColMajor }; EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : Base(xpr.rhs(), xpr.lhs().diagonal()) { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const { return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col); } #ifndef __CUDACC__ template EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const { // NVCC complains about template keyword, so we disable this function in CUDA mode return this->template packet_impl(row,col, row, typename internal::conditional::type()); } template EIGEN_STRONG_INLINE PacketScalar packet(Index idx) const { return packet(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); } #endif }; // dense * diagonal template struct product_evaluator, ProductTag, DenseShape, DiagonalShape, typename Lhs::Scalar, typename Rhs::Scalar> : diagonal_product_evaluator_base, OnTheRight> { typedef diagonal_product_evaluator_base, OnTheRight> Base; using Base::m_diagImpl; using Base::m_matImpl; using Base::coeff; using Base::packet_impl; typedef typename Base::Scalar Scalar; typedef typename Base::PacketScalar PacketScalar; typedef Product XprType; typedef typename XprType::PlainObject PlainObject; enum { StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor }; EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs().diagonal()) { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const { return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col); } #ifndef __CUDACC__ template EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const { return this->template packet_impl(row,col, col, typename internal::conditional::type()); } template EIGEN_STRONG_INLINE PacketScalar packet(Index idx) const { return packet(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); } #endif }; /*************************************************************************** * Products with permutation matrices ***************************************************************************/ template struct generic_product_impl { template static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) { permut_matrix_product_retval pmpr(lhs, rhs); pmpr.evalTo(dst); } }; template struct generic_product_impl { template static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) { permut_matrix_product_retval pmpr(rhs, lhs); pmpr.evalTo(dst); } }; template struct generic_product_impl, Rhs, PermutationShape, DenseShape, ProductTag> { template static void evalTo(Dest& dst, const Transpose& lhs, const Rhs& rhs) { permut_matrix_product_retval pmpr(lhs.nestedPermutation(), rhs); pmpr.evalTo(dst); } }; template struct generic_product_impl, DenseShape, PermutationShape, ProductTag> { template static void evalTo(Dest& dst, const Lhs& lhs, const Transpose& rhs) { permut_matrix_product_retval pmpr(rhs.nestedPermutation(), lhs); pmpr.evalTo(dst); } }; } // end namespace internal } // end namespace Eigen #endif // EIGEN_PRODUCT_EVALUATORS_H