diff options
-rw-r--r-- | Eigen/Core | 1 | ||||
-rw-r--r-- | Eigen/src/Core/AssignEvaluator.h | 463 | ||||
-rw-r--r-- | Eigen/src/Core/CoreEvaluators.h | 12 | ||||
-rw-r--r-- | Eigen/src/Core/functors/AssignmentFunctors.h | 167 | ||||
-rw-r--r-- | test/evaluator_common.h | 0 |
5 files changed, 478 insertions, 165 deletions
diff --git a/Eigen/Core b/Eigen/Core index bf2d3a908..722a49030 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -383,6 +383,7 @@ using std::ptrdiff_t; #include "src/Core/ArrayWrapper.h" #ifdef EIGEN_ENABLE_EVALUATORS +#include "src/Core/functors/AssignmentFunctors.h" #include "src/Core/Product.h" #include "src/Core/CoreEvaluators.h" #include "src/Core/AssignEvaluator.h" diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 5f2fb9d38..adea7ef13 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -2,7 +2,7 @@ // for linear algebra. // // Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com> -// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2011-2013 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk> // // This Source Code Form is subject to the terms of the Mozilla @@ -94,7 +94,7 @@ public: enum { Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal)) ? ( - int(MayUnrollCompletely) ? int(CompleteUnrolling) + int(MayUnrollCompletely) ? int(CompleteUnrolling) : int(MayUnrollInner) ? int(InnerUnrolling) : int(NoUnrolling) ) @@ -139,7 +139,7 @@ public: *** Default traversal *** ************************/ -template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop> +template<typename DstEvaluatorType, typename SrcEvaluatorType, typename Kernel, int Index, int Stop> struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling { typedef typename DstEvaluatorType::XprType DstXprType; @@ -150,69 +150,74 @@ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling }; static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, - SrcEvaluatorType &srcEvaluator) + SrcEvaluatorType &srcEvaluator, + const Kernel &kernel + ) { - dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator); + kernel.assignCoeffByOuterInner(outer, inner, dstEvaluator, srcEvaluator); copy_using_evaluator_DefaultTraversal_CompleteUnrolling - <DstEvaluatorType, SrcEvaluatorType, Index+1, Stop> - ::run(dstEvaluator, srcEvaluator); + <DstEvaluatorType, SrcEvaluatorType, Kernel, Index+1, Stop> + ::run(dstEvaluator, srcEvaluator, kernel); } }; -template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop> -struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop> +template<typename DstEvaluatorType, typename SrcEvaluatorType, typename Kernel, int Stop> +struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Kernel, Stop, Stop> { - static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&) { } + static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, const Kernel&) { } }; -template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop> +template<typename DstEvaluatorType, typename SrcEvaluatorType, typename Kernel, int Index, int Stop> struct copy_using_evaluator_DefaultTraversal_InnerUnrolling { static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, - SrcEvaluatorType &srcEvaluator, + SrcEvaluatorType &srcEvaluator, + const Kernel &kernel, int outer) { - dstEvaluator.copyCoeffByOuterInner(outer, Index, srcEvaluator); + kernel.assignCoeffByOuterInner(outer, Index, dstEvaluator, srcEvaluator); copy_using_evaluator_DefaultTraversal_InnerUnrolling - <DstEvaluatorType, SrcEvaluatorType, Index+1, Stop> - ::run(dstEvaluator, srcEvaluator, outer); + <DstEvaluatorType, SrcEvaluatorType, Kernel, Index+1, Stop> + ::run(dstEvaluator, srcEvaluator, kernel, outer); } }; -template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop> -struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop> +template<typename DstEvaluatorType, typename SrcEvaluatorType, typename Kernel, int Stop> +struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<DstEvaluatorType, SrcEvaluatorType, Kernel, Stop, Stop> { - static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, int) { } + static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, const Kernel&, int) { } }; /*********************** *** Linear traversal *** ***********************/ -template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop> +template<typename DstEvaluatorType, typename SrcEvaluatorType, typename Kernel, int Index, int Stop> struct copy_using_evaluator_LinearTraversal_CompleteUnrolling { static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, - SrcEvaluatorType &srcEvaluator) + SrcEvaluatorType &srcEvaluator, + const Kernel& kernel + ) { - dstEvaluator.copyCoeff(Index, srcEvaluator); + kernel.assignCoeff(Index, dstEvaluator, srcEvaluator); copy_using_evaluator_LinearTraversal_CompleteUnrolling - <DstEvaluatorType, SrcEvaluatorType, Index+1, Stop> - ::run(dstEvaluator, srcEvaluator); + <DstEvaluatorType, SrcEvaluatorType, Kernel, Index+1, Stop> + ::run(dstEvaluator, srcEvaluator, kernel); } }; -template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop> -struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop> +template<typename DstEvaluatorType, typename SrcEvaluatorType, typename Kernel, int Stop> +struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Kernel, Stop, Stop> { - static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&) { } + static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, const Kernel&) { } }; /************************** *** Inner vectorization *** **************************/ -template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop> +template<typename DstEvaluatorType, typename SrcEvaluatorType, typename Kernel, int Index, int Stop> struct copy_using_evaluator_innervec_CompleteUnrolling { typedef typename DstEvaluatorType::XprType DstXprType; @@ -225,63 +230,66 @@ struct copy_using_evaluator_innervec_CompleteUnrolling }; static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, - SrcEvaluatorType &srcEvaluator) + SrcEvaluatorType &srcEvaluator, + const Kernel &kernel + ) { - dstEvaluator.template copyPacketByOuterInner<Aligned, JointAlignment>(outer, inner, srcEvaluator); + kernel.template assignPacketByOuterInner<Aligned, JointAlignment>(outer, inner, dstEvaluator, srcEvaluator); enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size }; copy_using_evaluator_innervec_CompleteUnrolling - <DstEvaluatorType, SrcEvaluatorType, NextIndex, Stop> - ::run(dstEvaluator, srcEvaluator); + <DstEvaluatorType, SrcEvaluatorType, Kernel, NextIndex, Stop> + ::run(dstEvaluator, srcEvaluator, kernel); } }; -template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop> -struct copy_using_evaluator_innervec_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop> +template<typename DstEvaluatorType, typename SrcEvaluatorType, typename Kernel, int Stop> +struct copy_using_evaluator_innervec_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Kernel, Stop, Stop> { - static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&) { } + static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, const Kernel&) { } }; -template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop> +template<typename DstEvaluatorType, typename SrcEvaluatorType, typename Kernel, int Index, int Stop> struct copy_using_evaluator_innervec_InnerUnrolling { static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, - SrcEvaluatorType &srcEvaluator, + SrcEvaluatorType &srcEvaluator, + const Kernel &kernel, int outer) { - dstEvaluator.template copyPacketByOuterInner<Aligned, Aligned>(outer, Index, srcEvaluator); + kernel.template assignPacketByOuterInner<Aligned, Aligned>(outer, Index, dstEvaluator, srcEvaluator); typedef typename DstEvaluatorType::XprType DstXprType; enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size }; copy_using_evaluator_innervec_InnerUnrolling - <DstEvaluatorType, SrcEvaluatorType, NextIndex, Stop> - ::run(dstEvaluator, srcEvaluator, outer); + <DstEvaluatorType, SrcEvaluatorType, Kernel, NextIndex, Stop> + ::run(dstEvaluator, srcEvaluator, kernel, outer); } }; -template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop> -struct copy_using_evaluator_innervec_InnerUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop> +template<typename DstEvaluatorType, typename SrcEvaluatorType, typename Kernel, int Stop> +struct copy_using_evaluator_innervec_InnerUnrolling<DstEvaluatorType, SrcEvaluatorType, Kernel, Stop, Stop> { - static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, int) { } + static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, const Kernel &, int) { } }; /*************************************************************************** * Part 3 : implementation of all cases ***************************************************************************/ -// copy_using_evaluator_impl is based on assign_impl +// dense_assignment_loop is based on assign_impl -template<typename DstXprType, typename SrcXprType, +template<typename DstXprType, typename SrcXprType, typename Kernel, int Traversal = copy_using_evaluator_traits<DstXprType, SrcXprType>::Traversal, int Unrolling = copy_using_evaluator_traits<DstXprType, SrcXprType>::Unrolling> -struct copy_using_evaluator_impl; +struct dense_assignment_loop; /************************ *** Default traversal *** ************************/ -template<typename DstXprType, typename SrcXprType> -struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, NoUnrolling> +template<typename DstXprType, typename SrcXprType, typename Kernel> +struct dense_assignment_loop<DstXprType, SrcXprType, Kernel, DefaultTraversal, NoUnrolling> { - static void run(DstXprType& dst, const SrcXprType& src) + static void run(DstXprType& dst, const SrcXprType& src, const Kernel &kernel) { typedef typename evaluator<DstXprType>::type DstEvaluatorType; typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; @@ -292,16 +300,16 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, NoUnr for(Index outer = 0; outer < dst.outerSize(); ++outer) { for(Index inner = 0; inner < dst.innerSize(); ++inner) { - dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator); + kernel.assignCoeffByOuterInner(outer, inner, dstEvaluator, srcEvaluator); } } } }; -template<typename DstXprType, typename SrcXprType> -struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, CompleteUnrolling> +template<typename DstXprType, typename SrcXprType, typename Kernel> +struct dense_assignment_loop<DstXprType, SrcXprType, Kernel, DefaultTraversal, CompleteUnrolling> { - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src) + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator<DstXprType>::type DstEvaluatorType; typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; @@ -310,16 +318,16 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, Compl SrcEvaluatorType srcEvaluator(src); copy_using_evaluator_DefaultTraversal_CompleteUnrolling - <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::SizeAtCompileTime> - ::run(dstEvaluator, srcEvaluator); + <DstEvaluatorType, SrcEvaluatorType, Kernel, 0, DstXprType::SizeAtCompileTime> + ::run(dstEvaluator, srcEvaluator, kernel); } }; -template<typename DstXprType, typename SrcXprType> -struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, InnerUnrolling> +template<typename DstXprType, typename SrcXprType, typename Kernel> +struct dense_assignment_loop<DstXprType, SrcXprType, Kernel, DefaultTraversal, InnerUnrolling> { typedef typename DstXprType::Index Index; - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src) + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator<DstXprType>::type DstEvaluatorType; typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; @@ -330,8 +338,8 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, Inner const Index outerSize = dst.outerSize(); for(Index outer = 0; outer < outerSize; ++outer) copy_using_evaluator_DefaultTraversal_InnerUnrolling - <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::InnerSizeAtCompileTime> - ::run(dstEvaluator, srcEvaluator, outer); + <DstEvaluatorType, SrcEvaluatorType, Kernel, 0, DstXprType::InnerSizeAtCompileTime> + ::run(dstEvaluator, srcEvaluator, kernel, outer); } }; @@ -339,43 +347,50 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, Inner *** Linear vectorization *** ***************************/ + +// The goal of unaligned_dense_assignment_loop is simply to factorize the handling +// of the non vectorizable beginning and ending parts + template <bool IsAligned = false> -struct unaligned_copy_using_evaluator_impl +struct unaligned_dense_assignment_loop { // if IsAligned = true, then do nothing - template <typename SrcEvaluatorType, typename DstEvaluatorType> - static EIGEN_STRONG_INLINE void run(const SrcEvaluatorType&, DstEvaluatorType&, + template <typename SrcEvaluatorType, typename DstEvaluatorType, typename Kernel> + static EIGEN_STRONG_INLINE void run(const SrcEvaluatorType&, DstEvaluatorType&, const Kernel&, typename SrcEvaluatorType::Index, typename SrcEvaluatorType::Index) {} }; template <> -struct unaligned_copy_using_evaluator_impl<false> +struct unaligned_dense_assignment_loop<false> { // MSVC must not inline this functions. If it does, it fails to optimize the // packet access path. + // FIXME check which version exhibits this issue #ifdef _MSC_VER - template <typename DstEvaluatorType, typename SrcEvaluatorType> + template <typename DstEvaluatorType, typename SrcEvaluatorType, typename Kernel> static EIGEN_DONT_INLINE void run(DstEvaluatorType &dstEvaluator, const SrcEvaluatorType &srcEvaluator, + const Kernel &kernel, typename DstEvaluatorType::Index start, typename DstEvaluatorType::Index end) #else - template <typename DstEvaluatorType, typename SrcEvaluatorType> + template <typename DstEvaluatorType, typename SrcEvaluatorType, typename Kernel> static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, const SrcEvaluatorType &srcEvaluator, + const Kernel &kernel, typename DstEvaluatorType::Index start, typename DstEvaluatorType::Index end) #endif { for (typename DstEvaluatorType::Index index = start; index < end; ++index) - dstEvaluator.copyCoeff(index, srcEvaluator); + kernel.assignCoeff(index, dstEvaluator, srcEvaluator); } }; -template<typename DstXprType, typename SrcXprType> -struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTraversal, NoUnrolling> +template<typename DstXprType, typename SrcXprType, typename Kernel> +struct dense_assignment_loop<DstXprType, SrcXprType, Kernel, LinearVectorizedTraversal, NoUnrolling> { - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src) + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator<DstXprType>::type DstEvaluatorType; typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; @@ -395,22 +410,20 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTravers const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned(&dstEvaluator.coeffRef(0), size); const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; - unaligned_copy_using_evaluator_impl<dstIsAligned!=0>::run(dstEvaluator, srcEvaluator, 0, alignedStart); + unaligned_dense_assignment_loop<dstIsAligned!=0>::run(dstEvaluator, srcEvaluator, kernel, 0, alignedStart); for(Index index = alignedStart; index < alignedEnd; index += packetSize) - { - dstEvaluator.template copyPacket<dstAlignment, srcAlignment>(index, srcEvaluator); - } + kernel.template assignPacket<dstAlignment, srcAlignment>(index, dstEvaluator, srcEvaluator); - unaligned_copy_using_evaluator_impl<>::run(dstEvaluator, srcEvaluator, alignedEnd, size); + unaligned_dense_assignment_loop<>::run(dstEvaluator, srcEvaluator, kernel, alignedEnd, size); } }; -template<typename DstXprType, typename SrcXprType> -struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTraversal, CompleteUnrolling> +template<typename DstXprType, typename SrcXprType, typename Kernel> +struct dense_assignment_loop<DstXprType, SrcXprType, Kernel, LinearVectorizedTraversal, CompleteUnrolling> { typedef typename DstXprType::Index Index; - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src) + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator<DstXprType>::type DstEvaluatorType; typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; @@ -423,11 +436,11 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTravers alignedSize = (size/packetSize)*packetSize }; copy_using_evaluator_innervec_CompleteUnrolling - <DstEvaluatorType, SrcEvaluatorType, 0, alignedSize> - ::run(dstEvaluator, srcEvaluator); + <DstEvaluatorType, SrcEvaluatorType, Kernel, 0, alignedSize> + ::run(dstEvaluator, srcEvaluator, kernel); copy_using_evaluator_DefaultTraversal_CompleteUnrolling - <DstEvaluatorType, SrcEvaluatorType, alignedSize, size> - ::run(dstEvaluator, srcEvaluator); + <DstEvaluatorType, SrcEvaluatorType, Kernel, alignedSize, size> + ::run(dstEvaluator, srcEvaluator, kernel); } }; @@ -435,10 +448,10 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTravers *** Inner vectorization *** **************************/ -template<typename DstXprType, typename SrcXprType> -struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, NoUnrolling> +template<typename DstXprType, typename SrcXprType, typename Kernel> +struct dense_assignment_loop<DstXprType, SrcXprType, Kernel, InnerVectorizedTraversal, NoUnrolling> { - static inline void run(DstXprType &dst, const SrcXprType &src) + static inline void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator<DstXprType>::type DstEvaluatorType; typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; @@ -451,16 +464,15 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversa const Index outerSize = dst.outerSize(); const Index packetSize = packet_traits<typename DstXprType::Scalar>::size; for(Index outer = 0; outer < outerSize; ++outer) - for(Index inner = 0; inner < innerSize; inner+=packetSize) { - dstEvaluator.template copyPacketByOuterInner<Aligned, Aligned>(outer, inner, srcEvaluator); - } + for(Index inner = 0; inner < innerSize; inner+=packetSize) + kernel.template assignPacketByOuterInner<Aligned, Aligned>(outer, inner, dstEvaluator, srcEvaluator); } }; -template<typename DstXprType, typename SrcXprType> -struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, CompleteUnrolling> +template<typename DstXprType, typename SrcXprType, typename Kernel> +struct dense_assignment_loop<DstXprType, SrcXprType, Kernel, InnerVectorizedTraversal, CompleteUnrolling> { - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src) + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator<DstXprType>::type DstEvaluatorType; typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; @@ -469,16 +481,16 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversa SrcEvaluatorType srcEvaluator(src); copy_using_evaluator_innervec_CompleteUnrolling - <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::SizeAtCompileTime> - ::run(dstEvaluator, srcEvaluator); + <DstEvaluatorType, SrcEvaluatorType, Kernel, 0, DstXprType::SizeAtCompileTime> + ::run(dstEvaluator, srcEvaluator, kernel); } }; -template<typename DstXprType, typename SrcXprType> -struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, InnerUnrolling> +template<typename DstXprType, typename SrcXprType, typename Kernel> +struct dense_assignment_loop<DstXprType, SrcXprType, Kernel, InnerVectorizedTraversal, InnerUnrolling> { typedef typename DstXprType::Index Index; - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src) + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator<DstXprType>::type DstEvaluatorType; typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; @@ -489,8 +501,8 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversa const Index outerSize = dst.outerSize(); for(Index outer = 0; outer < outerSize; ++outer) copy_using_evaluator_innervec_InnerUnrolling - <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::InnerSizeAtCompileTime> - ::run(dstEvaluator, srcEvaluator, outer); + <DstEvaluatorType, SrcEvaluatorType, Kernel, 0, DstXprType::InnerSizeAtCompileTime> + ::run(dstEvaluator, srcEvaluator, kernel, outer); } }; @@ -498,10 +510,10 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversa *** Linear traversal *** ***********************/ -template<typename DstXprType, typename SrcXprType> -struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, NoUnrolling> +template<typename DstXprType, typename SrcXprType, typename Kernel> +struct dense_assignment_loop<DstXprType, SrcXprType, Kernel, LinearTraversal, NoUnrolling> { - static inline void run(DstXprType &dst, const SrcXprType &src) + static inline void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator<DstXprType>::type DstEvaluatorType; typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; @@ -512,14 +524,14 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, NoUnro const Index size = dst.size(); for(Index i = 0; i < size; ++i) - dstEvaluator.copyCoeff(i, srcEvaluator); + kernel.assignCoeff(i, dstEvaluator, srcEvaluator); } }; -template<typename DstXprType, typename SrcXprType> -struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, CompleteUnrolling> +template<typename DstXprType, typename SrcXprType, typename Kernel> +struct dense_assignment_loop<DstXprType, SrcXprType, Kernel, LinearTraversal, CompleteUnrolling> { - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src) + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator<DstXprType>::type DstEvaluatorType; typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; @@ -528,8 +540,8 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, Comple SrcEvaluatorType srcEvaluator(src); copy_using_evaluator_LinearTraversal_CompleteUnrolling - <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::SizeAtCompileTime> - ::run(dstEvaluator, srcEvaluator); + <DstEvaluatorType, SrcEvaluatorType, Kernel, 0, DstXprType::SizeAtCompileTime> + ::run(dstEvaluator, srcEvaluator, kernel); } }; @@ -537,10 +549,10 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, Comple *** Slice vectorization *** ***************************/ -template<typename DstXprType, typename SrcXprType> -struct copy_using_evaluator_impl<DstXprType, SrcXprType, SliceVectorizedTraversal, NoUnrolling> +template<typename DstXprType, typename SrcXprType, typename Kernel> +struct dense_assignment_loop<DstXprType, SrcXprType, Kernel, SliceVectorizedTraversal, NoUnrolling> { - static inline void run(DstXprType &dst, const SrcXprType &src) + static inline void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator<DstXprType>::type DstEvaluatorType; typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; @@ -566,19 +578,16 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, SliceVectorizedTraversa { const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask); // do the non-vectorizable part of the assignment - for(Index inner = 0; inner<alignedStart ; ++inner) { - dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator); - } + for(Index inner = 0; inner<alignedStart ; ++inner) + kernel.assignCoeffByOuterInner(outer, inner, dstEvaluator, srcEvaluator); // do the vectorizable part of the assignment - for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize) { - dstEvaluator.template copyPacketByOuterInner<dstAlignment, Unaligned>(outer, inner, srcEvaluator); - } + for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize) + kernel.template assignPacketByOuterInner<dstAlignment, Unaligned>(outer, inner, dstEvaluator, srcEvaluator); // do the non-vectorizable part of the assignment - for(Index inner = alignedEnd; inner<innerSize ; ++inner) { - dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator); - } + for(Index inner = alignedEnd; inner<innerSize ; ++inner) + kernel.assignCoeffByOuterInner(outer, inner, dstEvaluator, srcEvaluator); alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize); } @@ -589,10 +598,12 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, SliceVectorizedTraversa *** All-at-once traversal *** ****************************/ -template<typename DstXprType, typename SrcXprType> -struct copy_using_evaluator_impl<DstXprType, SrcXprType, AllAtOnceTraversal, NoUnrolling> +// TODO: this 'AllAtOnceTraversal' should be dropped or caught earlier (Gael) +// Indeed, what to do with the kernel?? +template<typename DstXprType, typename SrcXprType, typename Kernel> +struct dense_assignment_loop<DstXprType, SrcXprType, Kernel, AllAtOnceTraversal, NoUnrolling> { - static inline void run(DstXprType &dst, const SrcXprType &src) + static inline void run(DstXprType &dst, const SrcXprType &src, const Kernel &/*kernel*/) { typedef typename evaluator<DstXprType>::type DstEvaluatorType; typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; @@ -601,23 +612,126 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, AllAtOnceTraversal, NoU SrcEvaluatorType srcEvaluator(src); // Evaluate rhs in temporary to prevent aliasing problems in a = a * a; - // TODO: Do not pass the xpr object to evalTo() + // TODO: Do not pass the xpr object to evalTo() (Jitse) srcEvaluator.evalTo(dstEvaluator, dst); } }; /*************************************************************************** -* Part 4 : Entry points +* Part 4 : Generic Assignment routine +***************************************************************************/ + +// This class generalize the assignment of a coefficient (or packet) from one dense evaluator +// to another dense writable evaluator. +// It is parametrized by the actual assignment functor. This abstraction level permits +// to keep the evaluation loops as simple and as generic as possible. +// One can customize the assignment using this generic dense_assignment_kernel with different +// functors, or by completely overloading it, by-passing a functor. +// FIXME: This kernel could also holds the destination and source evaluator +// thus simplifying the dense_assignment_loop prototypes. (Gael) +template<typename Functor> +struct generic_dense_assignment_kernel +{ + const Functor &m_functor; + generic_dense_assignment_kernel(const Functor &func) : m_functor(func) {} + + template<typename DstEvaluatorType, typename SrcEvaluatorType> + void assignCoeff(typename DstEvaluatorType::Index row, typename DstEvaluatorType::Index col, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + { + m_functor.assignCoeff(dst.coeffRef(row,col), src.coeff(row,col)); + } + + template<typename DstEvaluatorType, typename SrcEvaluatorType> + void assignCoeff(typename DstEvaluatorType::Index index, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + { + m_functor.assignCoeff(dst.coeffRef(index), src.coeff(index)); + } + + template<typename DstEvaluatorType, typename SrcEvaluatorType> + void assignCoeffByOuterInner(typename DstEvaluatorType::Index outer, typename DstEvaluatorType::Index inner, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + { + typedef typename DstEvaluatorType::Index Index; + Index row = rowIndexByOuterInner<DstEvaluatorType>(outer, inner); + Index col = colIndexByOuterInner<DstEvaluatorType>(outer, inner); + assignCoeff(row, col, dst, src); + } + + + template<int StoreMode, int LoadMode, typename DstEvaluatorType, typename SrcEvaluatorType> + void assignPacket(typename DstEvaluatorType::Index row, typename DstEvaluatorType::Index col, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + { + m_functor.assignPacket<StoreMode>(&dst.coeffRef(row,col), src.template packet<LoadMode>(row,col)); + } + + template<int StoreMode, int LoadMode, typename DstEvaluatorType, typename SrcEvaluatorType> + void assignPacket(typename DstEvaluatorType::Index index, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + { + m_functor.assignPacket<StoreMode>(&dst.coeffRef(index), src.template packet<LoadMode>(index)); + } + + template<int StoreMode, int LoadMode, typename DstEvaluatorType, typename SrcEvaluatorType> + void assignPacketByOuterInner(typename DstEvaluatorType::Index outer, typename DstEvaluatorType::Index inner, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + { + typedef typename DstEvaluatorType::Index Index; + Index row = rowIndexByOuterInner<DstEvaluatorType>(outer, inner); + Index col = colIndexByOuterInner<DstEvaluatorType>(outer, inner); + assignPacket<StoreMode,LoadMode>(row, col, dst, src); + } + + template<typename EvaluatorType, typename Index> + static Index rowIndexByOuterInner(Index outer, Index inner) + { + typedef typename EvaluatorType::ExpressionTraits Traits; + return int(Traits::RowsAtCompileTime) == 1 ? 0 + : int(Traits::ColsAtCompileTime) == 1 ? inner + : int(Traits::Flags)&RowMajorBit ? outer + : inner; + } + + template<typename EvaluatorType, typename Index> + static Index colIndexByOuterInner(Index outer, Index inner) + { + typedef typename EvaluatorType::ExpressionTraits Traits; + return int(Traits::ColsAtCompileTime) == 1 ? 0 + : int(Traits::RowsAtCompileTime) == 1 ? inner + : int(Traits::Flags)&RowMajorBit ? inner + : outer; + } +}; + +template<typename DstXprType, typename SrcXprType, typename Functor> +void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func) +{ +#ifdef EIGEN_DEBUG_ASSIGN + internal::copy_using_evaluator_traits<DstXprType, SrcXprType>::debug(); +#endif + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + + typedef generic_dense_assignment_kernel<Functor> Kernel; + Kernel kernel(func); + + dense_assignment_loop<DstXprType, SrcXprType, Kernel>::run(const_cast<DstXprType&>(dst), src, kernel); +} + +template<typename DstXprType, typename SrcXprType> +void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src) +{ + call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar>()); +} + +/*************************************************************************** +* Part 5 : Entry points ***************************************************************************/ // Based on DenseBase::LazyAssign() +// The following functions are just for testing and they are meant to be moved to operator= and the likes. template<typename DstXprType, template <typename> class StorageBase, typename SrcXprType> EIGEN_STRONG_INLINE const DstXprType& copy_using_evaluator(const NoAlias<DstXprType, StorageBase>& dst, const EigenBase<SrcXprType>& src) { - return noalias_copy_using_evaluator(dst.expression(), src.derived()); + return noalias_copy_using_evaluator(dst.expression(), src.derived(), internal::assign_op<typename DstXprType::Scalar>()); } template<typename XprType, int AssumeAliasing = evaluator_traits<XprType>::AssumeAliasing> @@ -641,57 +755,90 @@ struct AddEvalIfAssumingAliasing<XprType, 1> } }; -template<typename DstXprType, typename SrcXprType> +template<typename DstXprType, typename SrcXprType, typename Functor> EIGEN_STRONG_INLINE -const DstXprType& copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src) +const DstXprType& copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src, const Functor &func) { return noalias_copy_using_evaluator(dst.const_cast_derived(), - AddEvalIfAssumingAliasing<SrcXprType>::run(src.derived())); + AddEvalIfAssumingAliasing<SrcXprType>::run(src.derived()), + func + ); } +// this mimics operator= template<typename DstXprType, typename SrcXprType> EIGEN_STRONG_INLINE -const DstXprType& noalias_copy_using_evaluator(const PlainObjectBase<DstXprType>& dst, const EigenBase<SrcXprType>& src) +const DstXprType& copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src) +{ + return copy_using_evaluator(dst.const_cast_derived(), src.derived(), internal::assign_op<typename DstXprType::Scalar>()); +} + +template<typename DstXprType, typename SrcXprType, typename Functor> +EIGEN_STRONG_INLINE +const DstXprType& noalias_copy_using_evaluator(const PlainObjectBase<DstXprType>& dst, const EigenBase<SrcXprType>& src, const Functor &func) { #ifdef EIGEN_DEBUG_ASSIGN internal::copy_using_evaluator_traits<DstXprType, SrcXprType>::debug(); #endif #ifdef EIGEN_NO_AUTOMATIC_RESIZING eigen_assert((dst.size()==0 || (IsVectorAtCompileTime ? (dst.size() == src.size()) - : (dst.rows() == src.rows() && dst.cols() == src.cols()))) - && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined"); + : (dst.rows() == src.rows() && dst.cols() == src.cols()))) + && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined"); #else dst.const_cast_derived().resizeLike(src.derived()); #endif - return copy_using_evaluator_without_resizing(dst.const_cast_derived(), src.derived()); + call_dense_assignment_loop(dst.const_cast_derived(), src.derived(), func); + return dst.derived(); } -template<typename DstXprType, typename SrcXprType> +template<typename DstXprType, typename SrcXprType, typename Functor> EIGEN_STRONG_INLINE -const DstXprType& noalias_copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src) +const DstXprType& noalias_copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src, const Functor &func) { - return copy_using_evaluator_without_resizing(dst.const_cast_derived(), src.derived()); -} - -template<typename DstXprType, typename SrcXprType> -const DstXprType& copy_using_evaluator_without_resizing(const DstXprType& dst, const SrcXprType& src) -{ -#ifdef EIGEN_DEBUG_ASSIGN - internal::copy_using_evaluator_traits<DstXprType, SrcXprType>::debug(); -#endif - eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); - copy_using_evaluator_impl<DstXprType, SrcXprType>::run(const_cast<DstXprType&>(dst), src); - return dst; + call_dense_assignment_loop(dst.const_cast_derived(), src.derived(), func); + return dst.derived(); } // Based on DenseBase::swap() -// TODO: Chech whether we need to do something special for swapping two -// Arrays or Matrices. +// TODO: Check whether we need to do something special for swapping two +// Arrays or Matrices. (Jitse) +// Overload default assignPacket behavior for swapping them +template<typename Scalar> +struct swap_kernel : generic_dense_assignment_kernel<swap_assign_op<Scalar> > +{ + typedef generic_dense_assignment_kernel<swap_assign_op<Scalar> > Base; + using Base::m_functor; + swap_kernel() : Base(swap_assign_op<Scalar>()) {} + + template<int StoreMode, int LoadMode, typename DstEvaluatorType, typename SrcEvaluatorType> + void assignPacket(typename DstEvaluatorType::Index row, typename DstEvaluatorType::Index col, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + { + m_functor.template swapPacket<StoreMode,LoadMode,typename DstEvaluatorType::PacketScalar>(&dst.coeffRef(row,col), &const_cast<SrcEvaluatorType&>(src).coeffRef(row,col)); + } + + template<int StoreMode, int LoadMode, typename DstEvaluatorType, typename SrcEvaluatorType> + void assignPacket(typename DstEvaluatorType::Index index, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + { + m_functor.template swapPacket<StoreMode,LoadMode,typename DstEvaluatorType::PacketScalar>(&dst.coeffRef(index), &const_cast<SrcEvaluatorType&>(src).coeffRef(index)); + } + + // TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael) + template<int StoreMode, int LoadMode, typename DstEvaluatorType, typename SrcEvaluatorType> + void assignPacketByOuterInner(typename DstEvaluatorType::Index outer, typename DstEvaluatorType::Index inner, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + { + typedef typename DstEvaluatorType::Index Index; + Index row = Base::template rowIndexByOuterInner<DstEvaluatorType>(outer, inner); + Index col = Base::template colIndexByOuterInner<DstEvaluatorType>(outer, inner); + assignPacket<StoreMode,LoadMode>(row, col, dst, src); + } +}; + template<typename DstXprType, typename SrcXprType> void swap_using_evaluator(const DstXprType& dst, const SrcXprType& src) { - copy_using_evaluator(SwapWrapper<DstXprType>(const_cast<DstXprType&>(dst)), src); + typedef swap_kernel<typename DstXprType::Scalar> kernel; + dense_assignment_loop<DstXprType, SrcXprType, kernel>::run(const_cast<DstXprType&>(dst), src, kernel()); } // Based on MatrixBase::operator+= (in CwiseBinaryOp.h) @@ -699,8 +846,7 @@ template<typename DstXprType, typename SrcXprType> void add_assign_using_evaluator(const MatrixBase<DstXprType>& dst, const MatrixBase<SrcXprType>& src) { typedef typename DstXprType::Scalar Scalar; - SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); - copy_using_evaluator(tmp, src.derived()); + copy_using_evaluator(dst.derived(), src.derived(), add_assign_op<Scalar>()); } // Based on ArrayBase::operator+= @@ -708,42 +854,37 @@ template<typename DstXprType, typename SrcXprType> void add_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src) { typedef typename DstXprType::Scalar Scalar; - SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); - copy_using_evaluator(tmp, src.derived()); + copy_using_evaluator(dst.derived(), src.derived(), add_assign_op<Scalar>()); } -// TODO: Add add_assign_using_evaluator for EigenBase ? +// TODO: Add add_assign_using_evaluator for EigenBase ? (Jitse) template<typename DstXprType, typename SrcXprType> void subtract_assign_using_evaluator(const MatrixBase<DstXprType>& dst, const MatrixBase<SrcXprType>& src) { typedef typename DstXprType::Scalar Scalar; - SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); - copy_using_evaluator(tmp, src.derived()); + copy_using_evaluator(dst.derived(), src.derived(), sub_assign_op<Scalar>()); } template<typename DstXprType, typename SrcXprType> void subtract_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src) { typedef typename DstXprType::Scalar Scalar; - SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); - copy_using_evaluator(tmp, src.derived()); + copy_using_evaluator(dst.derived(), src.derived(), sub_assign_op<Scalar>()); } template<typename DstXprType, typename SrcXprType> void multiply_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src) { typedef typename DstXprType::Scalar Scalar; - SelfCwiseBinaryOp<internal::scalar_product_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); - copy_using_evaluator(tmp, src.derived()); + copy_using_evaluator(dst.derived(), src.derived(), mul_assign_op<Scalar>()); } template<typename DstXprType, typename SrcXprType> void divide_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src) { typedef typename DstXprType::Scalar Scalar; - SelfCwiseBinaryOp<internal::scalar_quotient_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); - copy_using_evaluator(tmp, src.derived()); + copy_using_evaluator(dst.derived(), src.derived(), div_assign_op<Scalar>()); } diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 3240ec6ed..082c00df4 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -78,6 +78,8 @@ template<typename ExpressionType> struct evaluator_impl_base { typedef typename ExpressionType::Index Index; + // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices. + typedef traits<ExpressionType> ExpressionTraits; template<typename OtherEvaluatorType> void copyCoeff(Index row, Index col, const OtherEvaluatorType& other) @@ -307,15 +309,17 @@ struct evaluator_impl<EvalToTemp<ArgType> > evaluator_impl(const XprType& xpr) : m_result(xpr.rows(), xpr.cols()), m_resultImpl(m_result) - { - copy_using_evaluator_without_resizing(m_result, xpr.arg()); + { + // TODO we should simply do m_result(xpr.arg()); + call_dense_assignment_loop(m_result, xpr.arg()); } // This constructor is used when nesting an EvalTo evaluator in another evaluator evaluator_impl(const ArgType& arg) : m_result(arg.rows(), arg.cols()), m_resultImpl(m_result) - { - copy_using_evaluator_without_resizing(m_result, arg); + { + // TODO we should simply do m_result(xpr.arg()); + call_dense_assignment_loop(m_result, arg); } typedef typename PlainObject::Index Index; diff --git a/Eigen/src/Core/functors/AssignmentFunctors.h b/Eigen/src/Core/functors/AssignmentFunctors.h new file mode 100644 index 000000000..ae264aa64 --- /dev/null +++ b/Eigen/src/Core/functors/AssignmentFunctors.h @@ -0,0 +1,167 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ASSIGNMENT_FUNCTORS_H +#define EIGEN_ASSIGNMENT_FUNCTORS_H + +namespace Eigen { + +namespace internal { + +/** \internal + * \brief Template functor for scalar/packet assignment + * + */ +template<typename Scalar> struct assign_op { + + EIGEN_EMPTY_STRUCT_CTOR(assign_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a = b; } + + template<int Alignment, typename Packet> + EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const + { internal::pstoret<Scalar,Packet,Alignment>(a,b); } +}; +template<typename Scalar> +struct functor_traits<assign_op<Scalar> > { + enum { + Cost = NumTraits<Scalar>::ReadCost, + PacketAccess = packet_traits<Scalar>::IsVectorized + }; +}; + +/** \internal + * \brief Template functor for scalar/packet assignment with addition + * + */ +template<typename Scalar> struct add_assign_op { + + EIGEN_EMPTY_STRUCT_CTOR(add_assign_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a += b; } + + template<int Alignment, typename Packet> + EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const + { internal::pstoret<Scalar,Packet,Alignment>(a,internal::padd(internal::ploadt<Packet,Alignment>(a),b)); } +}; +template<typename Scalar> +struct functor_traits<add_assign_op<Scalar> > { + enum { + Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::AddCost, + PacketAccess = packet_traits<Scalar>::HasAdd + }; +}; + +/** \internal + * \brief Template functor for scalar/packet assignment with subtraction + * + */ +template<typename Scalar> struct sub_assign_op { + + EIGEN_EMPTY_STRUCT_CTOR(sub_assign_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a -= b; } + + template<int Alignment, typename Packet> + EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const + { internal::pstoret<Scalar,Packet,Alignment>(a,internal::psub(internal::ploadt<Packet,Alignment>(a),b)); } +}; +template<typename Scalar> +struct functor_traits<sub_assign_op<Scalar> > { + enum { + Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::AddCost, + PacketAccess = packet_traits<Scalar>::HasAdd + }; +}; + +/** \internal + * \brief Template functor for scalar/packet assignment with multiplication + * + */ +template<typename Scalar> struct mul_assign_op { + + EIGEN_EMPTY_STRUCT_CTOR(mul_assign_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a *= b; } + + template<int Alignment, typename Packet> + EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const + { internal::pstoret<Scalar,Packet,Alignment>(a,internal::pmul(internal::ploadt<Packet,Alignment>(a),b)); } +}; +template<typename Scalar> +struct functor_traits<mul_assign_op<Scalar> > { + enum { + Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::MulCost, + PacketAccess = packet_traits<Scalar>::HasMul + }; +}; + +/** \internal + * \brief Template functor for scalar/packet assignment with diviving + * + */ +template<typename Scalar> struct div_assign_op { + + EIGEN_EMPTY_STRUCT_CTOR(div_assign_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a /= b; } + + template<int Alignment, typename Packet> + EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const + { internal::pstoret<Scalar,Packet,Alignment>(a,internal::pdiv(internal::ploadt<Packet,Alignment>(a),b)); } +}; +template<typename Scalar> +struct functor_traits<div_assign_op<Scalar> > { + enum { + Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::MulCost, + PacketAccess = packet_traits<Scalar>::HasMul + }; +}; + + +/** \internal + * \brief Template functor for scalar/packet assignment with swaping + * + * It works as follow. For a non-vectorized evaluation loop, we have: + * for(i) func(A.coeffRef(i), B.coeff(i)); + * where B is a SwapWrapper expression. The trick is to make SwapWrapper::coeff behaves like a non-const coeffRef. + * Actually, SwapWrapper might not even be needed since even if B is a plain expression, since it has to be writable + * B.coeff already returns a const reference to the underlying scalar value. + * + * The case of a vectorized loop is more tricky: + * for(i,j) func.assignPacket<A_Align>(&A.coeffRef(i,j), B.packet<B_Align>(i,j)); + * Here, B must be a SwapWrapper whose packet function actually returns a proxy object holding a Scalar*, + * the actual alignment and Packet type. + * + */ +template<typename Scalar> struct swap_assign_op { + + EIGEN_EMPTY_STRUCT_CTOR(swap_assign_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const + { + using std::swap; + swap(a,const_cast<Scalar&>(b)); + } + + template<int LhsAlignment, int RhsAlignment, typename Packet> + EIGEN_STRONG_INLINE void swapPacket(Scalar* a, Scalar* b) const + { + Packet tmp = internal::ploadt<Packet,RhsAlignment>(b); + internal::pstoret<Scalar,Packet,RhsAlignment>(b, internal::ploadt<Packet,LhsAlignment>(a)); + internal::pstoret<Scalar,Packet,LhsAlignment>(a, tmp); + } +}; +template<typename Scalar> +struct functor_traits<swap_assign_op<Scalar> > { + enum { + Cost = 3 * NumTraits<Scalar>::ReadCost, + PacketAccess = packet_traits<Scalar>::IsVectorized + }; +}; + +} // namespace internal + +} // namespace Eigen + +#endif // EIGEN_ASSIGNMENT_FUNCTORS_H diff --git a/test/evaluator_common.h b/test/evaluator_common.h new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/test/evaluator_common.h |