diff options
author | Jitse Niesen <jitse@maths.leeds.ac.uk> | 2011-04-13 11:49:48 +0100 |
---|---|---|
committer | Jitse Niesen <jitse@maths.leeds.ac.uk> | 2011-04-13 11:49:48 +0100 |
commit | e654405900d2fa6d958bb0eefbeb8523f6a38099 (patch) | |
tree | 2b65c5853cd731f04e3d405bdf7cf4e081b19f52 | |
parent | 7e863248987b06440742b1a02feaeb35cb1d75b6 (diff) |
Implement unrolling in copy_using_evaluator() .
-rw-r--r-- | Eigen/src/Core/AssignEvaluator.h | 300 | ||||
-rw-r--r-- | Eigen/src/Core/CoreEvaluators.h | 6 | ||||
-rw-r--r-- | test/evaluators.cpp | 1 |
3 files changed, 300 insertions, 7 deletions
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 886b0aeba..c49c2a50f 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -29,10 +29,15 @@ // This implementation is based on Assign.h -// copy_using_evaluator_traits is based on assign_traits - namespace internal { +/*************************************************************************** +* Part 1 : the logic deciding a strategy for traversal and unrolling * +***************************************************************************/ + +// copy_using_evaluator_traits is based on assign_traits +// (actually, it's identical) + template <typename Derived, typename OtherDerived> struct copy_using_evaluator_traits { @@ -101,15 +106,15 @@ public: enum { Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal)) ? ( - int(MayUnrollCompletely) ? int(NoUnrolling) // int(CompleteUnrolling) - : int(MayUnrollInner) ? int(NoUnrolling) // int(InnerUnrolling) + int(MayUnrollCompletely) ? int(CompleteUnrolling) + : int(MayUnrollInner) ? int(InnerUnrolling) : int(NoUnrolling) ) : int(Traversal) == int(LinearVectorizedTraversal) - ? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(NoUnrolling) // int(CompleteUnrolling) + ? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) ) : int(Traversal) == int(LinearTraversal) - ? ( bool(MayUnrollCompletely) ? int(NoUnrolling) // int(CompleteUnrolling) + ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) ) : int(NoUnrolling) }; @@ -138,6 +143,175 @@ public: #endif }; +/*************************************************************************** +* Part 2 : meta-unrollers +***************************************************************************/ + +// TODO:`Ideally, we want to use only the evaluator objects here, not the expression objects +// However, we need to access .rowIndexByOuterInner() which is in the expression object + +/************************ +*** Default traversal *** +************************/ + +template<typename DstXprType, typename SrcXprType, int Index, int Stop> +struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling +{ + enum { + outer = Index / DstXprType::InnerSizeAtCompileTime, + inner = Index % DstXprType::InnerSizeAtCompileTime + }; + + typedef typename evaluator<DstXprType>::type DstEvaluatorType; + typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; + + EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, + SrcEvaluatorType &srcEvaluator, + const DstXprType &dst) + { + // TODO: Use copyCoeffByOuterInner ? + typename DstXprType::Index row = dst.rowIndexByOuterInner(outer, inner); + typename DstXprType::Index col = dst.colIndexByOuterInner(outer, inner); + dstEvaluator.coeffRef(row, col) = srcEvaluator.coeff(row, col); + copy_using_evaluator_DefaultTraversal_CompleteUnrolling<DstXprType, SrcXprType, Index+1, Stop> + ::run(dstEvaluator, srcEvaluator, dst); + } +}; + +template<typename DstXprType, typename SrcXprType, int Stop> +struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<DstXprType, SrcXprType, Stop, Stop> +{ + typedef typename evaluator<DstXprType>::type DstEvaluatorType; + typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; + EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, const DstXprType&) { } +}; + +template<typename DstXprType, typename SrcXprType, int Index, int Stop> +struct copy_using_evaluator_DefaultTraversal_InnerUnrolling +{ + typedef typename evaluator<DstXprType>::type DstEvaluatorType; + typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; + + EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, + SrcEvaluatorType &srcEvaluator, + const DstXprType &dst, + int outer) + { + // TODO: Use copyCoeffByOuterInner ? + typename DstXprType::Index row = dst.rowIndexByOuterInner(outer, Index); + typename DstXprType::Index col = dst.colIndexByOuterInner(outer, Index); + dstEvaluator.coeffRef(row, col) = srcEvaluator.coeff(row, col); + copy_using_evaluator_DefaultTraversal_InnerUnrolling<DstXprType, SrcXprType, Index+1, Stop> + ::run(dstEvaluator, srcEvaluator, dst, outer); + } +}; + +template<typename DstXprType, typename SrcXprType, int Stop> +struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<DstXprType, SrcXprType, Stop, Stop> +{ + typedef typename evaluator<DstXprType>::type DstEvaluatorType; + typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; + EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, const DstXprType&, int) { } +}; + +/*********************** +*** Linear traversal *** +***********************/ + +template<typename DstXprType, typename SrcXprType, int Index, int Stop> +struct copy_using_evaluator_LinearTraversal_CompleteUnrolling +{ + typedef typename evaluator<DstXprType>::type DstEvaluatorType; + typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; + + EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, + SrcEvaluatorType &srcEvaluator, + const DstXprType &dst) + { + // use copyCoeff ? + dstEvaluator.coeffRef(Index) = srcEvaluator.coeff(Index); + copy_using_evaluator_LinearTraversal_CompleteUnrolling<DstXprType, SrcXprType, Index+1, Stop> + ::run(dstEvaluator, srcEvaluator, dst); + } +}; + +template<typename DstXprType, typename SrcXprType, int Stop> +struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<DstXprType, SrcXprType, Stop, Stop> +{ + typedef typename evaluator<DstXprType>::type DstEvaluatorType; + typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; + EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, const DstXprType&) { } +}; + +/************************** +*** Inner vectorization *** +**************************/ + +template<typename DstXprType, typename SrcXprType, int Index, int Stop> +struct copy_using_evaluator_innervec_CompleteUnrolling +{ + enum { + outer = Index / DstXprType::InnerSizeAtCompileTime, + inner = Index % DstXprType::InnerSizeAtCompileTime, + JointAlignment = copy_using_evaluator_traits<DstXprType,SrcXprType>::JointAlignment + }; + + typedef typename evaluator<DstXprType>::type DstEvaluatorType; + typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; + + EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, + SrcEvaluatorType &srcEvaluator, + const DstXprType &dst) + { + // TODO: Use copyPacketByOuterInner ? + typename DstXprType::Index row = dst.rowIndexByOuterInner(outer, inner); + typename DstXprType::Index col = dst.colIndexByOuterInner(outer, inner); + dstEvaluator.template writePacket<Aligned>(row, col, srcEvaluator.template packet<JointAlignment>(row, col)); + copy_using_evaluator_innervec_CompleteUnrolling<DstXprType, SrcXprType, + Index+packet_traits<typename DstXprType::Scalar>::size, Stop>::run(dstEvaluator, srcEvaluator, dst); + } +}; + +template<typename DstXprType, typename SrcXprType, int Stop> +struct copy_using_evaluator_innervec_CompleteUnrolling<DstXprType, SrcXprType, Stop, Stop> +{ + typedef typename evaluator<DstXprType>::type DstEvaluatorType; + typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; + EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, const DstXprType&) { } +}; + +template<typename DstXprType, typename SrcXprType, int Index, int Stop> +struct copy_using_evaluator_innervec_InnerUnrolling +{ + typedef typename evaluator<DstXprType>::type DstEvaluatorType; + typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; + + EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, + SrcEvaluatorType &srcEvaluator, + const DstXprType &dst, + int outer) + { + // TODO: Use copyPacketByOuterInner ? + typename DstXprType::Index row = dst.rowIndexByOuterInner(outer, Index); + typename DstXprType::Index col = dst.colIndexByOuterInner(outer, Index); + dstEvaluator.template writePacket<Aligned>(row, col, srcEvaluator.template packet<Aligned>(row, col)); + copy_using_evaluator_innervec_InnerUnrolling<DstXprType, SrcXprType, + Index+packet_traits<typename DstXprType::Scalar>::size, Stop>::run(dstEvaluator, srcEvaluator, dst, outer); + } +}; + +template<typename DstXprType, typename SrcXprType, int Stop> +struct copy_using_evaluator_innervec_InnerUnrolling<DstXprType, SrcXprType, Stop, Stop> +{ + typedef typename evaluator<DstXprType>::type DstEvaluatorType; + typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; + EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, const DstXprType&, int) { } +}; + +/*************************************************************************** +* Part 3 : implementation of all cases +***************************************************************************/ + // copy_using_evaluator_impl is based on assign_impl template<typename DstXprType, typename SrcXprType, @@ -171,6 +345,41 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, NoUnr } }; +template<typename DstXprType, typename SrcXprType> +struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, CompleteUnrolling> +{ + EIGEN_STRONG_INLINE static void run(const DstXprType &dst, const SrcXprType &src) + { + typedef typename evaluator<DstXprType>::type DstEvaluatorType; + typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; + + DstEvaluatorType dstEvaluator(dst.const_cast_derived()); + SrcEvaluatorType srcEvaluator(src); + + copy_using_evaluator_DefaultTraversal_CompleteUnrolling<DstXprType, SrcXprType, 0, DstXprType::SizeAtCompileTime> + ::run(dstEvaluator, srcEvaluator, dst); + } +}; + +template<typename DstXprType, typename SrcXprType> +struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, InnerUnrolling> +{ + typedef typename DstXprType::Index Index; + EIGEN_STRONG_INLINE static void run(const DstXprType &dst, const SrcXprType &src) + { + typedef typename evaluator<DstXprType>::type DstEvaluatorType; + typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; + + DstEvaluatorType dstEvaluator(dst.const_cast_derived()); + SrcEvaluatorType srcEvaluator(src); + + const Index outerSize = dst.outerSize(); + for(Index outer = 0; outer < outerSize; ++outer) + copy_using_evaluator_DefaultTraversal_InnerUnrolling<DstXprType, SrcXprType, 0, DstXprType::InnerSizeAtCompileTime> + ::run(dstEvaluator, srcEvaluator, dst, outer); + } +}; + /*************************** *** Linear vectorization *** ***************************/ @@ -239,6 +448,29 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTravers } }; +template<typename DstXprType, typename SrcXprType> +struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTraversal, CompleteUnrolling> +{ + typedef typename DstXprType::Index Index; + EIGEN_STRONG_INLINE static void run(const DstXprType &dst, const SrcXprType &src) + { + typedef typename evaluator<DstXprType>::type DstEvaluatorType; + typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; + + DstEvaluatorType dstEvaluator(dst.const_cast_derived()); + SrcEvaluatorType srcEvaluator(src); + + enum { size = DstXprType::SizeAtCompileTime, + packetSize = packet_traits<typename DstXprType::Scalar>::size, + alignedSize = (size/packetSize)*packetSize }; + + copy_using_evaluator_innervec_CompleteUnrolling<DstXprType, SrcXprType, 0, alignedSize> + ::run(dstEvaluator, srcEvaluator, dst); + copy_using_evaluator_DefaultTraversal_CompleteUnrolling<DstXprType, SrcXprType, alignedSize, size> + ::run(dstEvaluator, srcEvaluator, dst); + } +}; + /************************** *** Inner vectorization *** **************************/ @@ -260,6 +492,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversa const Index packetSize = packet_traits<typename DstXprType::Scalar>::size; for(Index outer = 0; outer < outerSize; ++outer) for(Index inner = 0; inner < innerSize; inner+=packetSize) { + // TODO: Use copyPacketByOuterInner ? Index row = dst.rowIndexByOuterInner(outer, inner); Index col = dst.colIndexByOuterInner(outer, inner); dstEvaluator.template writePacket<Aligned>(row, col, srcEvaluator.template packet<Aligned>(row, col)); @@ -267,6 +500,41 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversa } }; +template<typename DstXprType, typename SrcXprType> +struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, CompleteUnrolling> +{ + EIGEN_STRONG_INLINE static void run(const DstXprType &dst, const SrcXprType &src) + { + typedef typename evaluator<DstXprType>::type DstEvaluatorType; + typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; + + DstEvaluatorType dstEvaluator(dst.const_cast_derived()); + SrcEvaluatorType srcEvaluator(src); + + copy_using_evaluator_innervec_CompleteUnrolling<DstXprType, SrcXprType, 0, DstXprType::SizeAtCompileTime> + ::run(dstEvaluator, srcEvaluator, dst); + } +}; + +template<typename DstXprType, typename SrcXprType> +struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, InnerUnrolling> +{ + typedef typename DstXprType::Index Index; + EIGEN_STRONG_INLINE static void run(const DstXprType &dst, const SrcXprType &src) + { + typedef typename evaluator<DstXprType>::type DstEvaluatorType; + typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; + + DstEvaluatorType dstEvaluator(dst.const_cast_derived()); + SrcEvaluatorType srcEvaluator(src); + + const Index outerSize = dst.outerSize(); + for(Index outer = 0; outer < outerSize; ++outer) + copy_using_evaluator_innervec_InnerUnrolling<DstXprType, SrcXprType, 0, DstXprType::InnerSizeAtCompileTime> + ::run(dstEvaluator, srcEvaluator, dst, outer); + } +}; + /*********************** *** Linear traversal *** ***********************/ @@ -289,6 +557,22 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, NoUnro } }; +template<typename DstXprType, typename SrcXprType> +struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, CompleteUnrolling> +{ + EIGEN_STRONG_INLINE static void run(const DstXprType &dst, const SrcXprType &src) + { + typedef typename evaluator<DstXprType>::type DstEvaluatorType; + typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; + + DstEvaluatorType dstEvaluator(dst.const_cast_derived()); + SrcEvaluatorType srcEvaluator(src); + + copy_using_evaluator_LinearTraversal_CompleteUnrolling<DstXprType, SrcXprType, 0, DstXprType::SizeAtCompileTime> + ::run(dstEvaluator, srcEvaluator, dst); + } +}; + /************************** *** Slice vectorization *** ***************************/ @@ -348,6 +632,10 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, SliceVectorizedTraversa } }; +/*************************************************************************** +* Part 4 : Entry points +***************************************************************************/ + // Based on DenseBase::LazyAssign() template<typename DstXprType, typename SrcXprType> diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index db6faca10..6b08c78a0 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -220,6 +220,12 @@ struct evaluator_impl<CwiseNullaryOp<NullaryOp,PlainObjectType> > } template<int LoadMode> + PacketScalar packet(Index row, Index col) const + { + return m_functor.packetOp(row, col); + } + + template<int LoadMode> PacketScalar packet(Index index) const { return m_functor.packetOp(index); diff --git a/test/evaluators.cpp b/test/evaluators.cpp index aa57e4ad5..4c55736eb 100644 --- a/test/evaluators.cpp +++ b/test/evaluators.cpp @@ -1,4 +1,3 @@ - #define EIGEN_ENABLE_EVALUATORS #include "main.h" |