// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2006-2008 Benoit Jacob // Copyright (C) 2008-2010 Gael Guennebaud // Copyright (C) 2011 Jitse Niesen // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_PRODUCTEVALUATORS_H #define EIGEN_PRODUCTEVALUATORS_H namespace Eigen { namespace internal { // Like more general binary expressions, products need they own evaluator: template< typename T, int ProductTag = internal::product_tag::ret, typename LhsShape = typename evaluator_traits::Shape, typename RhsShape = typename evaluator_traits::Shape, typename LhsScalar = typename T::Lhs::Scalar, typename RhsScalar = typename T::Rhs::Scalar > struct product_evaluator; template struct evaluator > : public product_evaluator > { typedef Product XprType; typedef product_evaluator Base; typedef evaluator type; typedef evaluator nestedType; evaluator(const XprType& xpr) : Base(xpr) {} }; // Helper class to perform a dense product with the destination at hand. // Depending on the sizes of the factors, there are different evaluation strategies // as controlled by internal::product_type. template::value> struct dense_product_impl; // The evaluator for default dense products creates a temporary and call dense_product_impl template struct product_evaluator, ProductTag, DenseShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar> : public evaluator::PlainObject>::type { typedef Product XprType; typedef typename XprType::PlainObject PlainObject; typedef typename evaluator::type Base; product_evaluator(const XprType& xpr) : m_result(xpr.rows(), xpr.cols()) { ::new (static_cast(this)) Base(m_result); dense_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); } protected: PlainObject m_result; }; template struct dense_product_impl { template static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum(); } template static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum(); } template static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); } }; template struct dense_product_impl { typedef typename Product::Scalar Scalar; template static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { // TODO bypass GeneralProduct class GeneralProduct(lhs,rhs).evalTo(dst); } template static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { // TODO bypass GeneralProduct class GeneralProduct(lhs,rhs).addTo(dst); } template static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { // TODO bypass GeneralProduct class GeneralProduct(lhs,rhs).subTo(dst); } template static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { // TODO bypass GeneralProduct class GeneralProduct(lhs,rhs).scaleAndAddTo(dst, alpha); } }; // This base class provides default implementations for evalTo, addTo, subTo, in terms of scaleAndAddTo template struct dense_product_impl_base { typedef typename Product::Scalar Scalar; template static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); } template static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); } template static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); } template static void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); } }; template struct dense_product_impl : dense_product_impl_base > { typedef typename Product::Scalar Scalar; enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight }; typedef typename internal::conditional::type MatrixType; template static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { internal::gemv_selector::HasUsableDirectAccess) >::run(GeneralProduct(lhs,rhs), dst, alpha); } }; template struct dense_product_impl : dense_product_impl_base > { typedef typename Product::Scalar Scalar; template static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { // TODO bypass GeneralProduct class GeneralProduct(lhs,rhs).scaleAndAddTo(dst, alpha); } }; template struct dense_product_impl { typedef typename Product::Scalar Scalar; template static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst = lazyprod(lhs,rhs); } template static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst += lazyprod(lhs,rhs); } template static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst -= lazyprod(lhs,rhs); } template static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { dst += alpha * lazyprod(lhs,rhs); } }; template struct dense_product_impl : dense_product_impl {}; // Case 2: Evaluate coeff by coeff // // This is mostly taken from CoeffBasedProduct.h // The main difference is that we add an extra argument to the etor_product_*_impl::run() function // for the inner dimension of the product, because evaluator object do not know their size. template struct etor_product_coeff_impl; template struct etor_product_packet_impl; template struct product_evaluator, ProductTag, DenseShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar > : evaluator_base > { typedef Product XprType; typedef CoeffBasedProduct CoeffBasedProductType; product_evaluator(const XprType& xpr) : m_lhsImpl(xpr.lhs()), m_rhsImpl(xpr.rhs()), m_innerDim(xpr.lhs().cols()) { } typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::PacketScalar PacketScalar; typedef typename XprType::PacketReturnType PacketReturnType; // Everything below here is taken from CoeffBasedProduct.h enum { RowsAtCompileTime = traits::RowsAtCompileTime, PacketSize = packet_traits::size, InnerSize = traits::InnerSize, CoeffReadCost = traits::CoeffReadCost, Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT, CanVectorizeInner = traits::CanVectorizeInner, Flags = CoeffBasedProductType::Flags }; typedef typename evaluator::type LhsEtorType; typedef typename evaluator::type RhsEtorType; typedef etor_product_coeff_impl CoeffImpl; const CoeffReturnType coeff(Index row, Index col) const { Scalar res; CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); return res; } /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, * which is why we don't set the LinearAccessBit. */ const CoeffReturnType coeff(Index index) const { Scalar res; const Index row = RowsAtCompileTime == 1 ? 0 : index; const Index col = RowsAtCompileTime == 1 ? index : 0; CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); return res; } template const PacketReturnType packet(Index row, Index col) const { PacketScalar res; typedef etor_product_packet_impl PacketImpl; PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); return res; } protected: typename evaluator::type m_lhsImpl; typename evaluator::type m_rhsImpl; // TODO: Get rid of m_innerDim if known at compile time Index m_innerDim; }; /*************************************************************************** * Normal product .coeff() implementation (with meta-unrolling) ***************************************************************************/ /************************************** *** Scalar path - no vectorization *** **************************************/ template struct etor_product_coeff_impl { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res) { etor_product_coeff_impl::run(row, col, lhs, rhs, innerDim, res); res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col); } }; template struct etor_product_coeff_impl { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, RetScalar &res) { res = lhs.coeff(row, 0) * rhs.coeff(0, col); } }; template struct etor_product_coeff_impl { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar& res) { eigen_assert(innerDim>0 && "you are using a non initialized matrix"); res = lhs.coeff(row, 0) * rhs.coeff(0, col); for(Index i = 1; i < innerDim; ++i) res += lhs.coeff(row, i) * rhs.coeff(i, col); } }; /******************************************* *** Scalar path with inner vectorization *** *******************************************/ template struct etor_product_coeff_vectorized_unroller { typedef typename Lhs::Index Index; enum { PacketSize = packet_traits::size }; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::PacketScalar &pres) { etor_product_coeff_vectorized_unroller::run(row, col, lhs, rhs, innerDim, pres); pres = padd(pres, pmul( lhs.template packet(row, UnrollingIndex) , rhs.template packet(UnrollingIndex, col) )); } }; template struct etor_product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet> { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::PacketScalar &pres) { pres = pmul(lhs.template packet(row, 0) , rhs.template packet(0, col)); } }; template struct etor_product_coeff_impl { typedef typename Lhs::PacketScalar Packet; typedef typename Lhs::Index Index; enum { PacketSize = packet_traits::size }; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res) { Packet pres; etor_product_coeff_vectorized_unroller::run(row, col, lhs, rhs, innerDim, pres); res = predux(pres); } }; template struct etor_product_coeff_vectorized_dyn_selector { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res) { res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum(); } }; // NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower // NOTE maybe they are now useless since we have a specialization for Block template struct etor_product_coeff_vectorized_dyn_selector { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res) { res = lhs.transpose().cwiseProduct(rhs.col(col)).sum(); } }; template struct etor_product_coeff_vectorized_dyn_selector { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res) { res = lhs.row(row).transpose().cwiseProduct(rhs).sum(); } }; template struct etor_product_coeff_vectorized_dyn_selector { typedef typename Lhs::Index Index; EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res) { res = lhs.transpose().cwiseProduct(rhs).sum(); } }; template struct etor_product_coeff_impl { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::Scalar &res) { etor_product_coeff_vectorized_dyn_selector::run(row, col, lhs, rhs, innerDim, res); } }; /******************* *** Packet path *** *******************/ template struct etor_product_packet_impl { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) { etor_product_packet_impl::run(row, col, lhs, rhs, innerDim, res); res = pmadd(pset1(lhs.coeff(row, UnrollingIndex)), rhs.template packet(UnrollingIndex, col), res); } }; template struct etor_product_packet_impl { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) { etor_product_packet_impl::run(row, col, lhs, rhs, innerDim, res); res = pmadd(lhs.template packet(row, UnrollingIndex), pset1(rhs.coeff(UnrollingIndex, col)), res); } }; template struct etor_product_packet_impl { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) { res = pmul(pset1(lhs.coeff(row, 0)),rhs.template packet(0, col)); } }; template struct etor_product_packet_impl { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) { res = pmul(lhs.template packet(row, 0), pset1(rhs.coeff(0, col))); } }; template struct etor_product_packet_impl { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) { eigen_assert(innerDim>0 && "you are using a non initialized matrix"); res = pmul(pset1(lhs.coeff(row, 0)),rhs.template packet(0, col)); for(Index i = 1; i < innerDim; ++i) res = pmadd(pset1(lhs.coeff(row, i)), rhs.template packet(i, col), res); } }; template struct etor_product_packet_impl { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) { eigen_assert(innerDim>0 && "you are using a non initialized matrix"); res = pmul(lhs.template packet(row, 0), pset1(rhs.coeff(0, col))); for(Index i = 1; i < innerDim; ++i) res = pmadd(lhs.template packet(row, i), pset1(rhs.coeff(i, col)), res); } }; } // end namespace internal } // end namespace Eigen #endif // EIGEN_PRODUCT_EVALUATORS_H