diff options
Diffstat (limited to 'third_party/eigen3/Eigen/src/Core/ProductEvaluators.h')
-rw-r--r-- | third_party/eigen3/Eigen/src/Core/ProductEvaluators.h | 411 |
1 files changed, 411 insertions, 0 deletions
diff --git a/third_party/eigen3/Eigen/src/Core/ProductEvaluators.h b/third_party/eigen3/Eigen/src/Core/ProductEvaluators.h new file mode 100644 index 0000000000..855914f2eb --- /dev/null +++ b/third_party/eigen3/Eigen/src/Core/ProductEvaluators.h @@ -0,0 +1,411 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> +// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2011 Jitse Niesen <jitse@maths.leeds.ac.uk> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_PRODUCTEVALUATORS_H +#define EIGEN_PRODUCTEVALUATORS_H + +namespace Eigen { + +namespace internal { + +// We can evaluate the product either all at once, like GeneralProduct and its evalTo() function, or +// traverse the matrix coefficient by coefficient, like CoeffBasedProduct. Use the existing logic +// in ProductReturnType to decide. + +template<typename XprType, typename ProductType> +struct product_evaluator_dispatcher; + +template<typename Lhs, typename Rhs> +struct evaluator_impl<Product<Lhs, Rhs> > + : product_evaluator_dispatcher<Product<Lhs, Rhs>, typename ProductReturnType<Lhs, Rhs>::Type> +{ + typedef Product<Lhs, Rhs> XprType; + typedef product_evaluator_dispatcher<XprType, typename ProductReturnType<Lhs, Rhs>::Type> Base; + + evaluator_impl(const XprType& xpr) : Base(xpr) + { } +}; + +template<typename XprType, typename ProductType> +struct product_evaluator_traits_dispatcher; + +template<typename Lhs, typename Rhs> +struct evaluator_traits<Product<Lhs, Rhs> > + : product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, typename ProductReturnType<Lhs, Rhs>::Type> +{ + static const int AssumeAliasing = 1; +}; + +// Case 1: Evaluate all at once +// +// We can view the GeneralProduct class as a part of the product evaluator. +// Four sub-cases: InnerProduct, OuterProduct, GemmProduct and GemvProduct. +// InnerProduct is special because GeneralProduct does not have an evalTo() method in this case. + +template<typename Lhs, typename Rhs> +struct product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, InnerProduct> > +{ + static const int HasEvalTo = 0; +}; + +template<typename Lhs, typename Rhs> +struct product_evaluator_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, InnerProduct> > + : public evaluator<typename Product<Lhs, Rhs>::PlainObject>::type +{ + typedef Product<Lhs, Rhs> XprType; + typedef typename XprType::PlainObject PlainObject; + typedef typename evaluator<PlainObject>::type evaluator_base; + + // TODO: Computation is too early (?) + product_evaluator_dispatcher(const XprType& xpr) : evaluator_base(m_result) + { + m_result.coeffRef(0,0) = (xpr.lhs().transpose().cwiseProduct(xpr.rhs())).sum(); + } + +protected: + PlainObject m_result; +}; + +// For the other three subcases, simply call the evalTo() method of GeneralProduct +// TODO: GeneralProduct should take evaluators, not expression objects. + +template<typename Lhs, typename Rhs, int ProductType> +struct product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, ProductType> > +{ + static const int HasEvalTo = 1; +}; + +template<typename Lhs, typename Rhs, int ProductType> +struct product_evaluator_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, ProductType> > +{ + typedef Product<Lhs, Rhs> XprType; + typedef typename XprType::PlainObject PlainObject; + typedef typename evaluator<PlainObject>::type evaluator_base; + + product_evaluator_dispatcher(const XprType& xpr) : m_xpr(xpr) + { } + + template<typename DstEvaluatorType, typename DstXprType> + void evalTo(DstEvaluatorType /* not used */, DstXprType& dst) const + { + dst.resize(m_xpr.rows(), m_xpr.cols()); + GeneralProduct<Lhs, Rhs, ProductType>(m_xpr.lhs(), m_xpr.rhs()).evalTo(dst); + } + +protected: + const XprType& m_xpr; +}; + +// Case 2: Evaluate coeff by coeff +// +// This is mostly taken from CoeffBasedProduct.h +// The main difference is that we add an extra argument to the etor_product_*_impl::run() function +// for the inner dimension of the product, because evaluator object do not know their size. + +template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> +struct etor_product_coeff_impl; + +template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> +struct etor_product_packet_impl; + +template<typename Lhs, typename Rhs, typename LhsNested, typename RhsNested, int Flags> +struct product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, CoeffBasedProduct<LhsNested, RhsNested, Flags> > +{ + static const int HasEvalTo = 0; +}; + +template<typename Lhs, typename Rhs, typename LhsNested, typename RhsNested, int Flags> +struct product_evaluator_dispatcher<Product<Lhs, Rhs>, CoeffBasedProduct<LhsNested, RhsNested, Flags> > + : evaluator_impl_base<Product<Lhs, Rhs> > +{ + typedef Product<Lhs, Rhs> XprType; + typedef CoeffBasedProduct<LhsNested, RhsNested, Flags> CoeffBasedProductType; + + product_evaluator_dispatcher(const XprType& xpr) + : m_lhsImpl(xpr.lhs()), + m_rhsImpl(xpr.rhs()), + m_innerDim(xpr.lhs().cols()) + { } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename XprType::PacketScalar PacketScalar; + typedef typename XprType::PacketReturnType PacketReturnType; + + // Everything below here is taken from CoeffBasedProduct.h + + enum { + RowsAtCompileTime = traits<CoeffBasedProductType>::RowsAtCompileTime, + PacketSize = packet_traits<Scalar>::size, + InnerSize = traits<CoeffBasedProductType>::InnerSize, + CoeffReadCost = traits<CoeffBasedProductType>::CoeffReadCost, + Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT, + CanVectorizeInner = traits<CoeffBasedProductType>::CanVectorizeInner + }; + + typedef typename evaluator<Lhs>::type LhsEtorType; + typedef typename evaluator<Rhs>::type RhsEtorType; + typedef etor_product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal, + Unroll ? InnerSize-1 : Dynamic, + LhsEtorType, RhsEtorType, Scalar> CoeffImpl; + + const CoeffReturnType coeff(Index row, Index col) const + { + Scalar res; + CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); + return res; + } + + /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, + * which is why we don't set the LinearAccessBit. + */ + const CoeffReturnType coeff(Index index) const + { + Scalar res; + const Index row = RowsAtCompileTime == 1 ? 0 : index; + const Index col = RowsAtCompileTime == 1 ? index : 0; + CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); + return res; + } + + template<int LoadMode> + const PacketReturnType packet(Index row, Index col) const + { + PacketScalar res; + typedef etor_product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor, + Unroll ? InnerSize-1 : Dynamic, + LhsEtorType, RhsEtorType, PacketScalar, LoadMode> PacketImpl; + PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); + return res; + } + +protected: + typename evaluator<Lhs>::type m_lhsImpl; + typename evaluator<Rhs>::type m_rhsImpl; + + // TODO: Get rid of m_innerDim if known at compile time + Index m_innerDim; +}; + +/*************************************************************************** +* Normal product .coeff() implementation (with meta-unrolling) +***************************************************************************/ + +/************************************** +*** Scalar path - no vectorization *** +**************************************/ + +template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> +struct etor_product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar> +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res) + { + etor_product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, innerDim, res); + res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col); + } +}; + +template<typename Lhs, typename Rhs, typename RetScalar> +struct etor_product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar> +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, RetScalar &res) + { + res = lhs.coeff(row, 0) * rhs.coeff(0, col); + } +}; + +template<typename Lhs, typename Rhs, typename RetScalar> +struct etor_product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar> +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar& res) + { + eigen_assert(innerDim>0 && "you are using a non initialized matrix"); + res = lhs.coeff(row, 0) * rhs.coeff(0, col); + for(Index i = 1; i < innerDim; ++i) + res += lhs.coeff(row, i) * rhs.coeff(i, col); + } +}; + +/******************************************* +*** Scalar path with inner vectorization *** +*******************************************/ + +template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet> +struct etor_product_coeff_vectorized_unroller +{ + typedef typename Lhs::Index Index; + enum { PacketSize = packet_traits<typename Lhs::Scalar>::size }; + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::PacketScalar &pres) + { + etor_product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres); + pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) )); + } +}; + +template<typename Lhs, typename Rhs, typename Packet> +struct etor_product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet> +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::PacketScalar &pres) + { + pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col)); + } +}; + +template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> +struct etor_product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar> +{ + typedef typename Lhs::PacketScalar Packet; + typedef typename Lhs::Index Index; + enum { PacketSize = packet_traits<typename Lhs::Scalar>::size }; + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res) + { + Packet pres; + etor_product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres); + etor_product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, innerDim, res); + res = predux(pres); + } +}; + +template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime> +struct etor_product_coeff_vectorized_dyn_selector +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res) + { + res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum(); + } +}; + +// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower +// NOTE maybe they are now useless since we have a specialization for Block<Matrix> +template<typename Lhs, typename Rhs, int RhsCols> +struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols> +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res) + { + res = lhs.transpose().cwiseProduct(rhs.col(col)).sum(); + } +}; + +template<typename Lhs, typename Rhs, int LhsRows> +struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1> +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res) + { + res = lhs.row(row).transpose().cwiseProduct(rhs).sum(); + } +}; + +template<typename Lhs, typename Rhs> +struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1> +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res) + { + res = lhs.transpose().cwiseProduct(rhs).sum(); + } +}; + +template<typename Lhs, typename Rhs, typename RetScalar> +struct etor_product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar> +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::Scalar &res) + { + etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, innerDim, res); + } +}; + +/******************* +*** Packet path *** +*******************/ + +template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> +struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) + { + etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res); + res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res); + } +}; + +template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> +struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) + { + etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res); + res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res); + } +}; + +template<typename Lhs, typename Rhs, typename Packet, int LoadMode> +struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) + { + res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); + } +}; + +template<typename Lhs, typename Rhs, typename Packet, int LoadMode> +struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) + { + res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col))); + } +}; + +template<typename Lhs, typename Rhs, typename Packet, int LoadMode> +struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) + { + eigen_assert(innerDim>0 && "you are using a non initialized matrix"); + res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); + for(Index i = 1; i < innerDim; ++i) + res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res); + } +}; + +template<typename Lhs, typename Rhs, typename Packet, int LoadMode> +struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) + { + eigen_assert(innerDim>0 && "you are using a non initialized matrix"); + res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col))); + for(Index i = 1; i < innerDim; ++i) + res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res); + } +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_PRODUCT_EVALUATORS_H |