diff options
-rw-r--r-- | Eigen/src/Core/Assign.h | 143 | ||||
-rw-r--r-- | Eigen/src/Core/Dot.h | 26 | ||||
-rw-r--r-- | Eigen/src/Core/Redux.h | 28 | ||||
-rw-r--r-- | Eigen/src/Core/products/GeneralUnrolled.h | 28 | ||||
-rw-r--r-- | Eigen/src/Core/util/Constants.h | 11 | ||||
-rw-r--r-- | test/vectorization_logic.cpp | 55 |
6 files changed, 176 insertions, 115 deletions
diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h index 8dc015715..86ba45481 100644 --- a/Eigen/src/Core/Assign.h +++ b/Eigen/src/Core/Assign.h @@ -57,40 +57,46 @@ private: && ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit)), MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 && int(DstIsAligned) && int(SrcIsAligned), - MayLinearVectorize = MightVectorize && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit) - && (DstIsAligned || InnerMaxSize == Dynamic),/* If the destination isn't aligned, - we have to do runtime checks and we don't unroll, so it's only good for large enough sizes. See remark below - about InnerMaxSize. */ - MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize /* slice vectorization can be slow, so we only - want it if the slices are big, which is indicated by InnerMaxSize rather than InnerSize, think of the case - of a dynamic block in a fixed-size matrix */ + MayLinearize = (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit), + MayLinearVectorize = MightVectorize && MayLinearize + && (DstIsAligned || InnerMaxSize == Dynamic), + /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, + so it's only good for large enough sizes. See remark below about InnerMaxSize. */ + MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize + /* slice vectorization can be slow, so we only want it if the slices are big, which is + indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block + in a fixed-size matrix */ }; public: enum { - Vectorization = int(MayInnerVectorize) ? int(InnerVectorization) - : int(MayLinearVectorize) ? int(LinearVectorization) - : int(MaySliceVectorize) ? int(SliceVectorization) - : int(NoVectorization) + Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal) + : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) + : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) + : int(MayLinearize) ? int(LinearTraversal) + : int(DefaultTraversal), + Vectorized = int(Traversal) != LinearTraversal && int(Traversal) == DefaultTraversal }; private: enum { - UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize)), + UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? 1 : int(PacketSize)), MayUnrollCompletely = int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit), MayUnrollInner = int(InnerSize * OtherDerived::CoeffReadCost) <= int(UnrollingLimit) }; public: enum { - Unrolling = (int(Vectorization) == int(InnerVectorization) || int(Vectorization) == int(NoVectorization)) - ? ( - int(MayUnrollCompletely) ? int(CompleteUnrolling) - : int(MayUnrollInner) ? int(InnerUnrolling) - : int(NoUnrolling) - ) - : int(Vectorization) == int(LinearVectorization) - ? ( int(MayUnrollCompletely) && int(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) ) + Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal)) + ? ( + int(MayUnrollCompletely) ? int(CompleteUnrolling) + : int(MayUnrollInner) ? int(InnerUnrolling) + : int(NoUnrolling) + ) + : int(Traversal) == int(LinearVectorizedTraversal) + ? ( int(MayUnrollCompletely) && int(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) ) + : int(Traversal) == int(LinearTraversal) + ? ( int(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) ) : int(NoUnrolling) }; @@ -106,7 +112,7 @@ public: EIGEN_DEBUG_VAR(MayInnerVectorize) EIGEN_DEBUG_VAR(MayLinearVectorize) EIGEN_DEBUG_VAR(MaySliceVectorize) - EIGEN_DEBUG_VAR(Vectorization) + EIGEN_DEBUG_VAR(Traversal) EIGEN_DEBUG_VAR(UnrollingLimit) EIGEN_DEBUG_VAR(MayUnrollCompletely) EIGEN_DEBUG_VAR(MayUnrollInner) @@ -118,12 +124,12 @@ public: * Part 2 : meta-unrollers ***************************************************************************/ -/*********************** -*** No vectorization *** -***********************/ +/************************ +*** Default traversal *** +************************/ template<typename Derived1, typename Derived2, int Index, int Stop> -struct ei_assign_novec_CompleteUnrolling +struct ei_assign_DefaultTraversal_CompleteUnrolling { enum { row = int(Derived1::Flags)&RowMajorBit @@ -137,18 +143,18 @@ struct ei_assign_novec_CompleteUnrolling EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) { dst.copyCoeff(row, col, src); - ei_assign_novec_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src); + ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src); } }; template<typename Derived1, typename Derived2, int Stop> -struct ei_assign_novec_CompleteUnrolling<Derived1, Derived2, Stop, Stop> +struct ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop> { EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {} }; template<typename Derived1, typename Derived2, int Index, int Stop> -struct ei_assign_novec_InnerUnrolling +struct ei_assign_DefaultTraversal_InnerUnrolling { EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int row_or_col) { @@ -156,16 +162,36 @@ struct ei_assign_novec_InnerUnrolling const int row = rowMajor ? row_or_col : Index; const int col = rowMajor ? Index : row_or_col; dst.copyCoeff(row, col, src); - ei_assign_novec_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, row_or_col); + ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, row_or_col); } }; template<typename Derived1, typename Derived2, int Stop> -struct ei_assign_novec_InnerUnrolling<Derived1, Derived2, Stop, Stop> +struct ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop> { EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &, int) {} }; +/*********************** +*** Linear traversal *** +***********************/ + +template<typename Derived1, typename Derived2, int Index, int Stop> +struct ei_assign_LinearTraversal_CompleteUnrolling +{ + EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) + { + dst.copyCoeff(Index, src); + ei_assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src); + } +}; + +template<typename Derived1, typename Derived2, int Stop> +struct ei_assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop> +{ + EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {} +}; + /************************** *** Inner vectorization *** **************************/ @@ -221,16 +247,16 @@ struct ei_assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop> ***************************************************************************/ template<typename Derived1, typename Derived2, - int Vectorization = ei_assign_traits<Derived1, Derived2>::Vectorization, + int Traversal = ei_assign_traits<Derived1, Derived2>::Traversal, int Unrolling = ei_assign_traits<Derived1, Derived2>::Unrolling> struct ei_assign_impl; -/*********************** -*** No vectorization *** -***********************/ +/************************ +*** Default traversal *** +************************/ template<typename Derived1, typename Derived2> -struct ei_assign_impl<Derived1, Derived2, NoVectorization, NoUnrolling> +struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling> { inline static void run(Derived1 &dst, const Derived2 &src) { @@ -248,17 +274,17 @@ struct ei_assign_impl<Derived1, Derived2, NoVectorization, NoUnrolling> }; template<typename Derived1, typename Derived2> -struct ei_assign_impl<Derived1, Derived2, NoVectorization, CompleteUnrolling> +struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling> { EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) { - ei_assign_novec_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime> + ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime> ::run(dst, src); } }; template<typename Derived1, typename Derived2> -struct ei_assign_impl<Derived1, Derived2, NoVectorization, InnerUnrolling> +struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling> { EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) { @@ -266,17 +292,42 @@ struct ei_assign_impl<Derived1, Derived2, NoVectorization, InnerUnrolling> const int innerSize = rowMajor ? Derived1::ColsAtCompileTime : Derived1::RowsAtCompileTime; const int outerSize = dst.outerSize(); for(int j = 0; j < outerSize; ++j) - ei_assign_novec_InnerUnrolling<Derived1, Derived2, 0, innerSize> + ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, 0, innerSize> ::run(dst, src, j); } }; +/*********************** +*** Linear traversal *** +***********************/ + +template<typename Derived1, typename Derived2> +struct ei_assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling> +{ + inline static void run(Derived1 &dst, const Derived2 &src) + { + const int size = dst.size(); + for(int i = 0; i < size; ++i) + dst.copyCoeff(i, src); + } +}; + +template<typename Derived1, typename Derived2> +struct ei_assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling> +{ + EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) + { + ei_assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime> + ::run(dst, src); + } +}; + /************************** *** Inner vectorization *** **************************/ template<typename Derived1, typename Derived2> -struct ei_assign_impl<Derived1, Derived2, InnerVectorization, NoUnrolling> +struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling> { inline static void run(Derived1 &dst, const Derived2 &src) { @@ -295,7 +346,7 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorization, NoUnrolling> }; template<typename Derived1, typename Derived2> -struct ei_assign_impl<Derived1, Derived2, InnerVectorization, CompleteUnrolling> +struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, CompleteUnrolling> { EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) { @@ -305,7 +356,7 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorization, CompleteUnrolling> }; template<typename Derived1, typename Derived2> -struct ei_assign_impl<Derived1, Derived2, InnerVectorization, InnerUnrolling> +struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolling> { EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) { @@ -323,7 +374,7 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorization, InnerUnrolling> ***************************/ template<typename Derived1, typename Derived2> -struct ei_assign_impl<Derived1, Derived2, LinearVectorization, NoUnrolling> +struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling> { inline static void run(Derived1 &dst, const Derived2 &src) { @@ -347,7 +398,7 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorization, NoUnrolling> }; template<typename Derived1, typename Derived2> -struct ei_assign_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling> +struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling> { EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) { @@ -356,7 +407,7 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling const int alignedSize = (size/packetSize)*packetSize; ei_assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, alignedSize>::run(dst, src); - ei_assign_novec_CompleteUnrolling<Derived1, Derived2, alignedSize, size>::run(dst, src); + ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, alignedSize, size>::run(dst, src); } }; @@ -365,7 +416,7 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling ***************************/ template<typename Derived1, typename Derived2> -struct ei_assign_impl<Derived1, Derived2, SliceVectorization, NoUnrolling> +struct ei_assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling> { inline static void run(Derived1 &dst, const Derived2 &src) { diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index 631124f2b..a8983d4ce 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -34,10 +34,10 @@ struct ei_dot_traits { public: enum { - Vectorization = (int(Derived1::Flags)&int(Derived2::Flags)&ActualPacketAccessBit) + Traversal = (int(Derived1::Flags)&int(Derived2::Flags)&ActualPacketAccessBit) && (int(Derived1::Flags)&int(Derived2::Flags)&LinearAccessBit) - ? LinearVectorization - : NoVectorization + ? LinearVectorizedTraversal + : DefaultTraversal }; private: @@ -46,7 +46,7 @@ private: PacketSize = ei_packet_traits<Scalar>::size, Cost = Derived1::SizeAtCompileTime * (Derived1::CoeffReadCost + Derived2::CoeffReadCost + NumTraits<Scalar>::MulCost) + (Derived1::SizeAtCompileTime-1) * NumTraits<Scalar>::AddCost, - UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize)) + UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize)) }; public: @@ -142,13 +142,13 @@ struct ei_dot_vec_unroller<Derived1, Derived2, Index, Stop, true> ***************************************************************************/ template<typename Derived1, typename Derived2, - int Vectorization = ei_dot_traits<Derived1, Derived2>::Vectorization, + int Traversal = ei_dot_traits<Derived1, Derived2>::Traversal, int Unrolling = ei_dot_traits<Derived1, Derived2>::Unrolling > struct ei_dot_impl; template<typename Derived1, typename Derived2> -struct ei_dot_impl<Derived1, Derived2, NoVectorization, NoUnrolling> +struct ei_dot_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling> { typedef typename Derived1::Scalar Scalar; static Scalar run(const Derived1& v1, const Derived2& v2) @@ -163,12 +163,12 @@ struct ei_dot_impl<Derived1, Derived2, NoVectorization, NoUnrolling> }; template<typename Derived1, typename Derived2> -struct ei_dot_impl<Derived1, Derived2, NoVectorization, CompleteUnrolling> +struct ei_dot_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling> : public ei_dot_novec_unroller<Derived1, Derived2, 0, Derived1::SizeAtCompileTime> {}; template<typename Derived1, typename Derived2> -struct ei_dot_impl<Derived1, Derived2, LinearVectorization, NoUnrolling> +struct ei_dot_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling> { typedef typename Derived1::Scalar Scalar; typedef typename ei_packet_traits<Scalar>::type PacketScalar; @@ -221,20 +221,20 @@ struct ei_dot_impl<Derived1, Derived2, LinearVectorization, NoUnrolling> }; template<typename Derived1, typename Derived2> -struct ei_dot_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling> +struct ei_dot_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling> { typedef typename Derived1::Scalar Scalar; typedef typename ei_packet_traits<Scalar>::type PacketScalar; enum { PacketSize = ei_packet_traits<Scalar>::size, Size = Derived1::SizeAtCompileTime, - VectorizationSize = (Size / PacketSize) * PacketSize + VectorizedSize = (Size / PacketSize) * PacketSize }; static Scalar run(const Derived1& v1, const Derived2& v2) { - Scalar res = ei_predux(ei_dot_vec_unroller<Derived1, Derived2, 0, VectorizationSize>::run(v1, v2)); - if (VectorizationSize != Size) - res += ei_dot_novec_unroller<Derived1, Derived2, VectorizationSize, Size-VectorizationSize>::run(v1, v2); + Scalar res = ei_predux(ei_dot_vec_unroller<Derived1, Derived2, 0, VectorizedSize>::run(v1, v2)); + if (VectorizedSize != Size) + res += ei_dot_novec_unroller<Derived1, Derived2, VectorizedSize, Size-VectorizedSize>::run(v1, v2); return res; } }; diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index 9f796157a..171f6dcf5 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -54,16 +54,16 @@ private: public: enum { - Vectorization = int(MayLinearVectorize) ? int(LinearVectorization) - : int(MaySliceVectorize) ? int(SliceVectorization) - : int(NoVectorization) + Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal) + : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) + : int(DefaultTraversal) }; private: enum { Cost = Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * NumTraits<typename Derived::Scalar>::AddCost, - UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize)) + UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize)) }; public: @@ -171,13 +171,13 @@ struct ei_redux_vec_unroller<Func, Derived, Start, 1> ***************************************************************************/ template<typename Func, typename Derived, - int Vectorization = ei_redux_traits<Func, Derived>::Vectorization, + int Traversal = ei_redux_traits<Func, Derived>::Traversal, int Unrolling = ei_redux_traits<Func, Derived>::Unrolling > struct ei_redux_impl; template<typename Func, typename Derived> -struct ei_redux_impl<Func, Derived, NoVectorization, NoUnrolling> +struct ei_redux_impl<Func, Derived, DefaultTraversal, NoUnrolling> { typedef typename Derived::Scalar Scalar; static Scalar run(const Derived& mat, const Func& func) @@ -195,12 +195,12 @@ struct ei_redux_impl<Func, Derived, NoVectorization, NoUnrolling> }; template<typename Func, typename Derived> -struct ei_redux_impl<Func,Derived, NoVectorization, CompleteUnrolling> +struct ei_redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling> : public ei_redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime> {}; template<typename Func, typename Derived> -struct ei_redux_impl<Func, Derived, LinearVectorization, NoUnrolling> +struct ei_redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling> { typedef typename Derived::Scalar Scalar; typedef typename ei_packet_traits<Scalar>::type PacketScalar; @@ -246,7 +246,7 @@ struct ei_redux_impl<Func, Derived, LinearVectorization, NoUnrolling> }; template<typename Func, typename Derived> -struct ei_redux_impl<Func, Derived, SliceVectorization, NoUnrolling> +struct ei_redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling> { typedef typename Derived::Scalar Scalar; typedef typename ei_packet_traits<Scalar>::type PacketScalar; @@ -277,7 +277,7 @@ struct ei_redux_impl<Func, Derived, SliceVectorization, NoUnrolling> else // too small to vectorize anything. // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. { - res = ei_redux_impl<Func, Derived, NoVectorization, NoUnrolling>::run(mat, func); + res = ei_redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func); } return res; @@ -285,20 +285,20 @@ struct ei_redux_impl<Func, Derived, SliceVectorization, NoUnrolling> }; template<typename Func, typename Derived> -struct ei_redux_impl<Func, Derived, LinearVectorization, CompleteUnrolling> +struct ei_redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling> { typedef typename Derived::Scalar Scalar; typedef typename ei_packet_traits<Scalar>::type PacketScalar; enum { PacketSize = ei_packet_traits<Scalar>::size, Size = Derived::SizeAtCompileTime, - VectorizationSize = (Size / PacketSize) * PacketSize + VectorizedSize = (Size / PacketSize) * PacketSize }; EIGEN_STRONG_INLINE static Scalar run(const Derived& mat, const Func& func) { Scalar res = func.predux(ei_redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func)); - if (VectorizationSize != Size) - res = func(res,ei_redux_novec_unroller<Func, Derived, VectorizationSize, Size-VectorizationSize>::run(mat,func)); + if (VectorizedSize != Size) + res = func(res,ei_redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func)); return res; } }; diff --git a/Eigen/src/Core/products/GeneralUnrolled.h b/Eigen/src/Core/products/GeneralUnrolled.h index 7241976a8..e8fb760ae 100644 --- a/Eigen/src/Core/products/GeneralUnrolled.h +++ b/Eigen/src/Core/products/GeneralUnrolled.h @@ -36,7 +36,7 @@ * Note that here the inner-loops should always be unrolled. */ -template<int VectorizationMode, int Index, typename Lhs, typename Rhs, typename RetScalar> +template<int Traversal, int Index, typename Lhs, typename Rhs, typename RetScalar> struct ei_product_coeff_impl; template<int StorageOrder, int Index, typename Lhs, typename Rhs, typename PacketScalar, int LoadMode> @@ -115,7 +115,7 @@ template<typename LhsNested, typename RhsNested> class GeneralProduct<LhsNested, CanVectorizeInner = ei_traits<GeneralProduct>::CanVectorizeInner }; - typedef ei_product_coeff_impl<CanVectorizeInner ? InnerVectorization : NoVectorization, + typedef ei_product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal, Unroll ? InnerSize-1 : Dynamic, _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl; @@ -182,17 +182,17 @@ template<typename LhsNested, typename RhsNested> class GeneralProduct<LhsNested, **************************************/ template<int Index, typename Lhs, typename Rhs, typename RetScalar> -struct ei_product_coeff_impl<NoVectorization, Index, Lhs, Rhs, RetScalar> +struct ei_product_coeff_impl<DefaultTraversal, Index, Lhs, Rhs, RetScalar> { EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) { - ei_product_coeff_impl<NoVectorization, Index-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res); + ei_product_coeff_impl<DefaultTraversal, Index-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res); res += lhs.coeff(row, Index) * rhs.coeff(Index, col); } }; template<typename Lhs, typename Rhs, typename RetScalar> -struct ei_product_coeff_impl<NoVectorization, 0, Lhs, Rhs, RetScalar> +struct ei_product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar> { EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) { @@ -201,7 +201,7 @@ struct ei_product_coeff_impl<NoVectorization, 0, Lhs, Rhs, RetScalar> }; template<typename Lhs, typename Rhs, typename RetScalar> -struct ei_product_coeff_impl<NoVectorization, Dynamic, Lhs, Rhs, RetScalar> +struct ei_product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar> { EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar& res) { @@ -214,7 +214,7 @@ struct ei_product_coeff_impl<NoVectorization, Dynamic, Lhs, Rhs, RetScalar> // prevent buggy user code from causing an infinite recursion template<typename Lhs, typename Rhs, typename RetScalar> -struct ei_product_coeff_impl<NoVectorization, -1, Lhs, Rhs, RetScalar> +struct ei_product_coeff_impl<DefaultTraversal, -1, Lhs, Rhs, RetScalar> { EIGEN_STRONG_INLINE static void run(int, int, const Lhs&, const Rhs&, RetScalar&) {} }; @@ -244,7 +244,7 @@ struct ei_product_coeff_vectorized_unroller<0, Lhs, Rhs, PacketScalar> }; template<int Index, typename Lhs, typename Rhs, typename RetScalar> -struct ei_product_coeff_impl<InnerVectorization, Index, Lhs, Rhs, RetScalar> +struct ei_product_coeff_impl<InnerVectorizedTraversal, Index, Lhs, Rhs, RetScalar> { typedef typename Lhs::PacketScalar PacketScalar; enum { PacketSize = ei_packet_traits<typename Lhs::Scalar>::size }; @@ -252,7 +252,7 @@ struct ei_product_coeff_impl<InnerVectorization, Index, Lhs, Rhs, RetScalar> { PacketScalar pres; ei_product_coeff_vectorized_unroller<Index+1-PacketSize, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, pres); - ei_product_coeff_impl<NoVectorization,Index,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res); + ei_product_coeff_impl<DefaultTraversal,Index,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res); res = ei_predux(pres); } }; @@ -265,7 +265,7 @@ struct ei_product_coeff_vectorized_dyn_selector res = ei_dot_impl< Block<Lhs, 1, ei_traits<Lhs>::ColsAtCompileTime>, Block<Rhs, ei_traits<Rhs>::RowsAtCompileTime, 1>, - LinearVectorization, NoUnrolling>::run(lhs.row(row), rhs.col(col)); + LinearVectorizedTraversal, NoUnrolling>::run(lhs.row(row), rhs.col(col)); } }; @@ -279,7 +279,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols> res = ei_dot_impl< Lhs, Block<Rhs, ei_traits<Rhs>::RowsAtCompileTime, 1>, - LinearVectorization, NoUnrolling>::run(lhs, rhs.col(col)); + LinearVectorizedTraversal, NoUnrolling>::run(lhs, rhs.col(col)); } }; @@ -291,7 +291,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1> res = ei_dot_impl< Block<Lhs, 1, ei_traits<Lhs>::ColsAtCompileTime>, Rhs, - LinearVectorization, NoUnrolling>::run(lhs.row(row), rhs); + LinearVectorizedTraversal, NoUnrolling>::run(lhs.row(row), rhs); } }; @@ -303,12 +303,12 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1> res = ei_dot_impl< Lhs, Rhs, - LinearVectorization, NoUnrolling>::run(lhs, rhs); + LinearVectorizedTraversal, NoUnrolling>::run(lhs, rhs); } }; template<typename Lhs, typename Rhs, typename RetScalar> -struct ei_product_coeff_impl<InnerVectorization, Dynamic, Lhs, Rhs, RetScalar> +struct ei_product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar> { EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index 487425f88..5489d063a 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -202,16 +202,19 @@ enum DirectionType { Vertical, Horizontal, BothDirections }; enum ProductEvaluationMode { NormalProduct, CacheFriendlyProduct, SparseTimeSparseProduct, SparseTimeDenseProduct, DenseTimeSparseProduct }; enum { + /** \internal Default traversal, no vectorization, no index-based access */ + DefaultTraversal, + /** \internal No vectorization, use index-based access to have only one for loop instead of 2 nested loops */ + LinearTraversal, /** \internal Equivalent to a slice vectorization for fixed-size matrices having good alignment * and good size */ - InnerVectorization, + InnerVectorizedTraversal, /** \internal Vectorization path using a single loop plus scalar loops for the * unaligned boundaries */ - LinearVectorization, + LinearVectorizedTraversal, /** \internal Generic vectorization path using one vectorized loop per row/column with some * scalar loops to handle the unaligned boundaries */ - SliceVectorization, - NoVectorization + SliceVectorizedTraversal }; enum { diff --git a/test/vectorization_logic.cpp b/test/vectorization_logic.cpp index 680adeb45..3772bf13d 100644 --- a/test/vectorization_logic.cpp +++ b/test/vectorization_logic.cpp @@ -26,17 +26,18 @@ #include <typeinfo> template<typename Dst, typename Src> -bool test_assign(const Dst&, const Src&, int vectorization, int unrolling) +bool test_assign(const Dst&, const Src&, int traversal, int unrolling) { - return ei_assign_traits<Dst,Src>::Vectorization==vectorization + ei_assign_traits<Dst,Src>::debug(); + return ei_assign_traits<Dst,Src>::Traversal==traversal && ei_assign_traits<Dst,Src>::Unrolling==unrolling; } template<typename Xpr> -bool test_redux(const Xpr&, int vectorization, int unrolling) +bool test_redux(const Xpr&, int traversal, int unrolling) { typedef ei_redux_traits<ei_scalar_sum_op<typename Xpr::Scalar>,Xpr> traits; - return traits::Vectorization==vectorization && traits::Unrolling==unrolling; + return traits::Traversal==traversal && traits::Unrolling==unrolling; } void test_vectorization_logic() @@ -45,61 +46,67 @@ void test_vectorization_logic() #ifdef EIGEN_VECTORIZE VERIFY(test_assign(Vector4f(),Vector4f(), - InnerVectorization,CompleteUnrolling)); + InnerVectorizedTraversal,CompleteUnrolling)); VERIFY(test_assign(Vector4f(),Vector4f()+Vector4f(), - InnerVectorization,CompleteUnrolling)); + InnerVectorizedTraversal,CompleteUnrolling)); VERIFY(test_assign(Vector4f(),Vector4f().cwise() * Vector4f(), - InnerVectorization,CompleteUnrolling)); + InnerVectorizedTraversal,CompleteUnrolling)); VERIFY(test_assign(Vector4f(),Vector4f().cast<float>(), - InnerVectorization,CompleteUnrolling)); + InnerVectorizedTraversal,CompleteUnrolling)); VERIFY(test_assign(Matrix4f(),Matrix4f(), - InnerVectorization,CompleteUnrolling)); + InnerVectorizedTraversal,CompleteUnrolling)); VERIFY(test_assign(Matrix4f(),Matrix4f()+Matrix4f(), - InnerVectorization,CompleteUnrolling)); + InnerVectorizedTraversal,CompleteUnrolling)); VERIFY(test_assign(Matrix4f(),Matrix4f().cwise() * Matrix4f(), - InnerVectorization,CompleteUnrolling)); + InnerVectorizedTraversal,CompleteUnrolling)); VERIFY(test_assign(Matrix<float,16,16>(),Matrix<float,16,16>()+Matrix<float,16,16>(), - InnerVectorization,InnerUnrolling)); + InnerVectorizedTraversal,InnerUnrolling)); VERIFY(test_assign(Matrix<float,16,16,DontAlign>(),Matrix<float,16,16>()+Matrix<float,16,16>(), - NoVectorization,InnerUnrolling)); + LinearTraversal,NoUnrolling)); + + VERIFY(test_assign(Matrix<float,2,2,DontAlign>(),Matrix<float,2,2>()+Matrix<float,2,2>(), + LinearTraversal,CompleteUnrolling)); VERIFY(test_assign(Matrix<float,6,2>(),Matrix<float,6,2>().cwise() / Matrix<float,6,2>(), - LinearVectorization,CompleteUnrolling)); + LinearVectorizedTraversal,CompleteUnrolling)); VERIFY(test_assign(Matrix<float,17,17>(),Matrix<float,17,17>()+Matrix<float,17,17>(), - NoVectorization,InnerUnrolling)); + LinearTraversal,NoUnrolling)); + + VERIFY(test_assign(Matrix<float,3,3>(),Matrix<float,3,3>()+Matrix<float,3,3>(), + LinearTraversal,CompleteUnrolling)); VERIFY(test_assign(Matrix<float,4,4>(),Matrix<float,17,17>().block<4,4>(2,3)+Matrix<float,17,17>().block<4,4>(10,4), - NoVectorization,CompleteUnrolling)); + DefaultTraversal,CompleteUnrolling)); VERIFY(test_assign(MatrixXf(10,10),MatrixXf(20,20).block(10,10,2,3), - SliceVectorization,NoUnrolling)); + SliceVectorizedTraversal,NoUnrolling)); VERIFY(test_redux(VectorXf(10), - LinearVectorization,NoUnrolling)); + LinearVectorizedTraversal,NoUnrolling)); VERIFY(test_redux(Matrix<float,5,2>(), - NoVectorization,CompleteUnrolling)); + DefaultTraversal,CompleteUnrolling)); VERIFY(test_redux(Matrix<float,6,2>(), - LinearVectorization,CompleteUnrolling)); + LinearVectorizedTraversal,CompleteUnrolling)); VERIFY(test_redux(Matrix<float,16,16>(), - LinearVectorization,NoUnrolling)); + LinearVectorizedTraversal,NoUnrolling)); VERIFY(test_redux(Matrix<float,16,16>().block<4,4>(1,2), - NoVectorization,CompleteUnrolling)); + DefaultTraversal,CompleteUnrolling)); VERIFY(test_redux(Matrix<float,16,16>().block<8,1>(1,2), - LinearVectorization,CompleteUnrolling)); + LinearVectorizedTraversal,CompleteUnrolling)); VERIFY(test_redux(Matrix<double,7,3>(), - NoVectorization,CompleteUnrolling)); + DefaultTraversal,CompleteUnrolling)); #endif // EIGEN_VECTORIZE |