diff options
Diffstat (limited to 'Eigen/src')
-rw-r--r-- | Eigen/src/Core/CoreEvaluators.h | 30 | ||||
-rw-r--r-- | Eigen/src/Core/Product.h | 21 | ||||
-rw-r--r-- | Eigen/src/Core/ProductEvaluators.h | 394 |
3 files changed, 400 insertions, 45 deletions
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index cc8477598..015e7d698 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -446,36 +446,6 @@ protected: typename evaluator<ArgType>::type m_argImpl; }; -// -------------------- Product -------------------- - -template<typename Lhs, typename Rhs> -struct evaluator_impl<Product<Lhs,Rhs> > : public evaluator<typename Product<Lhs,Rhs>::PlainObject>::type -{ - typedef Product<Lhs,Rhs> XprType; - typedef typename XprType::PlainObject PlainObject; - typedef typename evaluator<PlainObject>::type evaluator_base; - -// enum { -// EvaluateLhs = ; -// EvaluateRhs = ; -// }; - - evaluator_impl(const XprType& product) : evaluator_base(m_result) - { - // here we process the left and right hand sides with a specialized evaluator - // perhaps this step should be done by the TreeOptimizer to get a canonical tree and reduce evaluator instanciations - // typename product_operand_evaluator<Lhs>::type m_lhsImpl(product.lhs()); - // typename product_operand_evaluator<Rhs>::type m_rhsImpl(product.rhs()); - - // TODO do not rely on previous product mechanism !! - m_result.resize(product.rows(), product.cols()); - m_result.noalias() = product.lhs() * product.rhs(); - } - -protected: - PlainObject m_result; -}; - // -------------------- Map -------------------- template<typename Derived, int AccessorsType> diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index f7824aa80..6e66888c7 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -42,25 +42,16 @@ template<typename Lhs, typename Rhs, typename StorageKind> class ProductImpl; * */ +// Use ProductReturnType to get correct traits, in particular vectorization flags namespace internal { template<typename Lhs, typename Rhs> struct traits<Product<Lhs, Rhs> > -{ - typedef MatrixXpr XprKind; - typedef typename remove_all<Lhs>::type LhsCleaned; - typedef typename remove_all<Rhs>::type RhsCleaned; - typedef typename scalar_product_traits<typename traits<LhsCleaned>::Scalar, typename traits<RhsCleaned>::Scalar>::ReturnType Scalar; - typedef typename promote_storage_type<typename traits<LhsCleaned>::StorageKind, - typename traits<RhsCleaned>::StorageKind>::ret StorageKind; - typedef typename promote_index_type<typename traits<LhsCleaned>::Index, - typename traits<RhsCleaned>::Index>::type Index; + : traits<typename ProductReturnType<Lhs, Rhs>::Type> +{ + // We want A+B*C to be of type Product<Matrix, Sum> and not Product<Matrix, Matrix> + // TODO: This flag should eventually go in a separate evaluator traits class enum { - RowsAtCompileTime = LhsCleaned::RowsAtCompileTime, - ColsAtCompileTime = RhsCleaned::ColsAtCompileTime, - MaxRowsAtCompileTime = LhsCleaned::MaxRowsAtCompileTime, - MaxColsAtCompileTime = RhsCleaned::MaxColsAtCompileTime, - Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0), // TODO should be no storage order - CoeffReadCost = 0 // TODO CoeffReadCost should not be part of the expression traits + Flags = traits<typename ProductReturnType<Lhs, Rhs>::Type>::Flags & ~EvalBeforeNestingBit }; }; } // end namespace internal diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h new file mode 100644 index 000000000..aadaa9303 --- /dev/null +++ b/Eigen/src/Core/ProductEvaluators.h @@ -0,0 +1,394 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> +// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2011 Jitse Niesen <jitse@maths.leeds.ac.uk> +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see <http://www.gnu.org/licenses/>. + + +#ifndef EIGEN_PRODUCTEVALUATORS_H +#define EIGEN_PRODUCTEVALUATORS_H + +namespace Eigen { + +namespace internal { + +// We can evaluate the product either all at once, like GeneralProduct and its evalTo() function, or +// traverse the matrix coefficient by coefficient, like CoeffBasedProduct. Use the existing logic +// in ProductReturnType to decide. + +template<typename XprType, typename ProductType> +struct product_evaluator_dispatcher; + +template<typename Lhs, typename Rhs> +struct evaluator_impl<Product<Lhs, Rhs> > + : product_evaluator_dispatcher<Product<Lhs, Rhs>, typename ProductReturnType<Lhs, Rhs>::Type> +{ + typedef Product<Lhs, Rhs> XprType; + typedef product_evaluator_dispatcher<XprType, typename ProductReturnType<Lhs, Rhs>::Type> Base; + + evaluator_impl(const XprType& xpr) : Base(xpr) + { } +}; + +// Case 1: Evaluate all at once +// +// We can view the GeneralProduct class as a part of the product evaluator. +// Four sub-cases: InnerProduct, OuterProduct, GemmProduct and GemvProduct. +// InnerProduct is special because GeneralProduct does not have an evalTo() method in this case. + +template<typename Lhs, typename Rhs> +struct product_evaluator_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, InnerProduct> > + : public evaluator<typename Product<Lhs, Rhs>::PlainObject>::type +{ + typedef Product<Lhs, Rhs> XprType; + typedef typename XprType::PlainObject PlainObject; + typedef typename evaluator<PlainObject>::type evaluator_base; + + product_evaluator_dispatcher(const XprType& xpr) : evaluator_base(m_result) + { + m_result.coeffRef(0,0) = (xpr.lhs().transpose().cwiseProduct(xpr.rhs())).sum(); + } + +protected: + PlainObject m_result; +}; + +// For the other three subcases, simply call the evalTo() method of GeneralProduct +// TODO: GeneralProduct should take evaluators, not expression objects. + +template<typename Lhs, typename Rhs, int ProductType> +struct product_evaluator_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, ProductType> > + : public evaluator<typename Product<Lhs, Rhs>::PlainObject>::type +{ + typedef Product<Lhs, Rhs> XprType; + typedef typename XprType::PlainObject PlainObject; + typedef typename evaluator<PlainObject>::type evaluator_base; + + product_evaluator_dispatcher(const XprType& xpr) : evaluator_base(m_result) + { + m_result.resize(xpr.rows(), xpr.cols()); + GeneralProduct<Lhs, Rhs, ProductType>(xpr.lhs(), xpr.rhs()).evalTo(m_result); + } + +protected: + PlainObject m_result; +}; + +// Case 2: Evaluate coeff by coeff +// +// This is mostly taken from CoeffBasedProduct.h +// The main difference is that we add an extra argument to the etor_product_*_impl::run() function +// for the inner dimension of the product, because evaluator object do not know their size. + +template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> +struct etor_product_coeff_impl; + +template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> +struct etor_product_packet_impl; + +template<typename Lhs, typename Rhs, typename LhsNested, typename RhsNested, int Flags> +struct product_evaluator_dispatcher<Product<Lhs, Rhs>, CoeffBasedProduct<LhsNested, RhsNested, Flags> > + : evaluator_impl_base<Product<Lhs, Rhs> > +{ + typedef Product<Lhs, Rhs> XprType; + typedef CoeffBasedProduct<LhsNested, RhsNested, Flags> CoeffBasedProductType; + + product_evaluator_dispatcher(const XprType& xpr) + : m_lhsImpl(xpr.lhs()), + m_rhsImpl(xpr.rhs()), + m_innerDim(xpr.lhs().cols()) + { } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename XprType::PacketScalar PacketScalar; + typedef typename XprType::PacketReturnType PacketReturnType; + + // Everything below here is taken from CoeffBasedProduct.h + + enum { + RowsAtCompileTime = traits<CoeffBasedProductType>::RowsAtCompileTime, + PacketSize = packet_traits<Scalar>::size, + InnerSize = traits<CoeffBasedProductType>::InnerSize, + CoeffReadCost = traits<CoeffBasedProductType>::CoeffReadCost, + Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT, + CanVectorizeInner = traits<CoeffBasedProductType>::CanVectorizeInner + }; + + typedef typename evaluator<Lhs>::type LhsEtorType; + typedef typename evaluator<Rhs>::type RhsEtorType; + typedef etor_product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal, + Unroll ? InnerSize-1 : Dynamic, + LhsEtorType, RhsEtorType, Scalar> CoeffImpl; + + const CoeffReturnType coeff(Index row, Index col) const + { + Scalar res; + CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); + return res; + } + + /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, + * which is why we don't set the LinearAccessBit. + */ + const CoeffReturnType coeff(Index index) const + { + Scalar res; + const Index row = RowsAtCompileTime == 1 ? 0 : index; + const Index col = RowsAtCompileTime == 1 ? index : 0; + CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); + return res; + } + + template<int LoadMode> + const PacketReturnType packet(Index row, Index col) const + { + PacketScalar res; + typedef etor_product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor, + Unroll ? InnerSize-1 : Dynamic, + LhsEtorType, RhsEtorType, PacketScalar, LoadMode> PacketImpl; + PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); + return res; + } + +protected: + typename evaluator<Lhs>::type m_lhsImpl; + typename evaluator<Rhs>::type m_rhsImpl; + + // TODO: Get rid of m_innerDim if known at compile time + Index m_innerDim; +}; + +/*************************************************************************** +* Normal product .coeff() implementation (with meta-unrolling) +***************************************************************************/ + +/************************************** +*** Scalar path - no vectorization *** +**************************************/ + +template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> +struct etor_product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar> +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res) + { + etor_product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, innerDim, res); + res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col); + } +}; + +template<typename Lhs, typename Rhs, typename RetScalar> +struct etor_product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar> +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, RetScalar &res) + { + res = lhs.coeff(row, 0) * rhs.coeff(0, col); + } +}; + +template<typename Lhs, typename Rhs, typename RetScalar> +struct etor_product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar> +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar& res) + { + eigen_assert(innerDim>0 && "you are using a non initialized matrix"); + res = lhs.coeff(row, 0) * rhs.coeff(0, col); + for(Index i = 1; i < innerDim; ++i) + res += lhs.coeff(row, i) * rhs.coeff(i, col); + } +}; + +/******************************************* +*** Scalar path with inner vectorization *** +*******************************************/ + +template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet> +struct etor_product_coeff_vectorized_unroller +{ + typedef typename Lhs::Index Index; + enum { PacketSize = packet_traits<typename Lhs::Scalar>::size }; + EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::PacketScalar &pres) + { + etor_product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres); + pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) )); + } +}; + +template<typename Lhs, typename Rhs, typename Packet> +struct etor_product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet> +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::PacketScalar &pres) + { + pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col)); + } +}; + +template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> +struct etor_product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar> +{ + typedef typename Lhs::PacketScalar Packet; + typedef typename Lhs::Index Index; + enum { PacketSize = packet_traits<typename Lhs::Scalar>::size }; + EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res) + { + Packet pres; + etor_product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres); + etor_product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, innerDim, res); + res = predux(pres); + } +}; + +template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime> +struct etor_product_coeff_vectorized_dyn_selector +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res) + { + res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum(); + } +}; + +// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower +// NOTE maybe they are now useless since we have a specialization for Block<Matrix> +template<typename Lhs, typename Rhs, int RhsCols> +struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols> +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res) + { + res = lhs.transpose().cwiseProduct(rhs.col(col)).sum(); + } +}; + +template<typename Lhs, typename Rhs, int LhsRows> +struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1> +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res) + { + res = lhs.row(row).transpose().cwiseProduct(rhs).sum(); + } +}; + +template<typename Lhs, typename Rhs> +struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1> +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res) + { + res = lhs.transpose().cwiseProduct(rhs).sum(); + } +}; + +template<typename Lhs, typename Rhs, typename RetScalar> +struct etor_product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar> +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::Scalar &res) + { + etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, innerDim, res); + } +}; + +/******************* +*** Packet path *** +*******************/ + +template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> +struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) + { + etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res); + res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res); + } +}; + +template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> +struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) + { + etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res); + res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res); + } +}; + +template<typename Lhs, typename Rhs, typename Packet, int LoadMode> +struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) + { + res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); + } +}; + +template<typename Lhs, typename Rhs, typename Packet, int LoadMode> +struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) + { + res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col))); + } +}; + +template<typename Lhs, typename Rhs, typename Packet, int LoadMode> +struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) + { + eigen_assert(innerDim>0 && "you are using a non initialized matrix"); + res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); + for(Index i = 1; i < innerDim; ++i) + res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res); + } +}; + +template<typename Lhs, typename Rhs, typename Packet, int LoadMode> +struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) + { + eigen_assert(innerDim>0 && "you are using a non initialized matrix"); + res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col))); + for(Index i = 1; i < innerDim; ++i) + res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res); + } +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_PRODUCT_EVALUATORS_H |