aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--Eigen/src/Core/Assign.h143
-rw-r--r--Eigen/src/Core/Dot.h26
-rw-r--r--Eigen/src/Core/Redux.h28
-rw-r--r--Eigen/src/Core/products/GeneralUnrolled.h28
-rw-r--r--Eigen/src/Core/util/Constants.h11
-rw-r--r--test/vectorization_logic.cpp55
6 files changed, 176 insertions, 115 deletions
diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h
index 8dc015715..86ba45481 100644
--- a/Eigen/src/Core/Assign.h
+++ b/Eigen/src/Core/Assign.h
@@ -57,40 +57,46 @@ private:
&& ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit)),
MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
&& int(DstIsAligned) && int(SrcIsAligned),
- MayLinearVectorize = MightVectorize && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit)
- && (DstIsAligned || InnerMaxSize == Dynamic),/* If the destination isn't aligned,
- we have to do runtime checks and we don't unroll, so it's only good for large enough sizes. See remark below
- about InnerMaxSize. */
- MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize /* slice vectorization can be slow, so we only
- want it if the slices are big, which is indicated by InnerMaxSize rather than InnerSize, think of the case
- of a dynamic block in a fixed-size matrix */
+ MayLinearize = (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
+ MayLinearVectorize = MightVectorize && MayLinearize
+ && (DstIsAligned || InnerMaxSize == Dynamic),
+ /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
+ so it's only good for large enough sizes. See remark below about InnerMaxSize. */
+ MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize
+ /* slice vectorization can be slow, so we only want it if the slices are big, which is
+ indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
+ in a fixed-size matrix */
};
public:
enum {
- Vectorization = int(MayInnerVectorize) ? int(InnerVectorization)
- : int(MayLinearVectorize) ? int(LinearVectorization)
- : int(MaySliceVectorize) ? int(SliceVectorization)
- : int(NoVectorization)
+ Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
+ : int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
+ : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
+ : int(MayLinearize) ? int(LinearTraversal)
+ : int(DefaultTraversal),
+ Vectorized = int(Traversal) != LinearTraversal && int(Traversal) == DefaultTraversal
};
private:
enum {
- UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize)),
+ UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? 1 : int(PacketSize)),
MayUnrollCompletely = int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit),
MayUnrollInner = int(InnerSize * OtherDerived::CoeffReadCost) <= int(UnrollingLimit)
};
public:
enum {
- Unrolling = (int(Vectorization) == int(InnerVectorization) || int(Vectorization) == int(NoVectorization))
- ? (
- int(MayUnrollCompletely) ? int(CompleteUnrolling)
- : int(MayUnrollInner) ? int(InnerUnrolling)
- : int(NoUnrolling)
- )
- : int(Vectorization) == int(LinearVectorization)
- ? ( int(MayUnrollCompletely) && int(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) )
+ Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
+ ? (
+ int(MayUnrollCompletely) ? int(CompleteUnrolling)
+ : int(MayUnrollInner) ? int(InnerUnrolling)
+ : int(NoUnrolling)
+ )
+ : int(Traversal) == int(LinearVectorizedTraversal)
+ ? ( int(MayUnrollCompletely) && int(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) )
+ : int(Traversal) == int(LinearTraversal)
+ ? ( int(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) )
: int(NoUnrolling)
};
@@ -106,7 +112,7 @@ public:
EIGEN_DEBUG_VAR(MayInnerVectorize)
EIGEN_DEBUG_VAR(MayLinearVectorize)
EIGEN_DEBUG_VAR(MaySliceVectorize)
- EIGEN_DEBUG_VAR(Vectorization)
+ EIGEN_DEBUG_VAR(Traversal)
EIGEN_DEBUG_VAR(UnrollingLimit)
EIGEN_DEBUG_VAR(MayUnrollCompletely)
EIGEN_DEBUG_VAR(MayUnrollInner)
@@ -118,12 +124,12 @@ public:
* Part 2 : meta-unrollers
***************************************************************************/
-/***********************
-*** No vectorization ***
-***********************/
+/************************
+*** Default traversal ***
+************************/
template<typename Derived1, typename Derived2, int Index, int Stop>
-struct ei_assign_novec_CompleteUnrolling
+struct ei_assign_DefaultTraversal_CompleteUnrolling
{
enum {
row = int(Derived1::Flags)&RowMajorBit
@@ -137,18 +143,18 @@ struct ei_assign_novec_CompleteUnrolling
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
dst.copyCoeff(row, col, src);
- ei_assign_novec_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
+ ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
}
};
template<typename Derived1, typename Derived2, int Stop>
-struct ei_assign_novec_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
+struct ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
{
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2, int Index, int Stop>
-struct ei_assign_novec_InnerUnrolling
+struct ei_assign_DefaultTraversal_InnerUnrolling
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int row_or_col)
{
@@ -156,16 +162,36 @@ struct ei_assign_novec_InnerUnrolling
const int row = rowMajor ? row_or_col : Index;
const int col = rowMajor ? Index : row_or_col;
dst.copyCoeff(row, col, src);
- ei_assign_novec_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, row_or_col);
+ ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, row_or_col);
}
};
template<typename Derived1, typename Derived2, int Stop>
-struct ei_assign_novec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
+struct ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
{
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &, int) {}
};
+/***********************
+*** Linear traversal ***
+***********************/
+
+template<typename Derived1, typename Derived2, int Index, int Stop>
+struct ei_assign_LinearTraversal_CompleteUnrolling
+{
+ EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
+ {
+ dst.copyCoeff(Index, src);
+ ei_assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
+ }
+};
+
+template<typename Derived1, typename Derived2, int Stop>
+struct ei_assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
+{
+ EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
+};
+
/**************************
*** Inner vectorization ***
**************************/
@@ -221,16 +247,16 @@ struct ei_assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
***************************************************************************/
template<typename Derived1, typename Derived2,
- int Vectorization = ei_assign_traits<Derived1, Derived2>::Vectorization,
+ int Traversal = ei_assign_traits<Derived1, Derived2>::Traversal,
int Unrolling = ei_assign_traits<Derived1, Derived2>::Unrolling>
struct ei_assign_impl;
-/***********************
-*** No vectorization ***
-***********************/
+/************************
+*** Default traversal ***
+************************/
template<typename Derived1, typename Derived2>
-struct ei_assign_impl<Derived1, Derived2, NoVectorization, NoUnrolling>
+struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling>
{
inline static void run(Derived1 &dst, const Derived2 &src)
{
@@ -248,17 +274,17 @@ struct ei_assign_impl<Derived1, Derived2, NoVectorization, NoUnrolling>
};
template<typename Derived1, typename Derived2>
-struct ei_assign_impl<Derived1, Derived2, NoVectorization, CompleteUnrolling>
+struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling>
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
- ei_assign_novec_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
+ ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
::run(dst, src);
}
};
template<typename Derived1, typename Derived2>
-struct ei_assign_impl<Derived1, Derived2, NoVectorization, InnerUnrolling>
+struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling>
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
@@ -266,17 +292,42 @@ struct ei_assign_impl<Derived1, Derived2, NoVectorization, InnerUnrolling>
const int innerSize = rowMajor ? Derived1::ColsAtCompileTime : Derived1::RowsAtCompileTime;
const int outerSize = dst.outerSize();
for(int j = 0; j < outerSize; ++j)
- ei_assign_novec_InnerUnrolling<Derived1, Derived2, 0, innerSize>
+ ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, 0, innerSize>
::run(dst, src, j);
}
};
+/***********************
+*** Linear traversal ***
+***********************/
+
+template<typename Derived1, typename Derived2>
+struct ei_assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling>
+{
+ inline static void run(Derived1 &dst, const Derived2 &src)
+ {
+ const int size = dst.size();
+ for(int i = 0; i < size; ++i)
+ dst.copyCoeff(i, src);
+ }
+};
+
+template<typename Derived1, typename Derived2>
+struct ei_assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling>
+{
+ EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
+ {
+ ei_assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
+ ::run(dst, src);
+ }
+};
+
/**************************
*** Inner vectorization ***
**************************/
template<typename Derived1, typename Derived2>
-struct ei_assign_impl<Derived1, Derived2, InnerVectorization, NoUnrolling>
+struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling>
{
inline static void run(Derived1 &dst, const Derived2 &src)
{
@@ -295,7 +346,7 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorization, NoUnrolling>
};
template<typename Derived1, typename Derived2>
-struct ei_assign_impl<Derived1, Derived2, InnerVectorization, CompleteUnrolling>
+struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, CompleteUnrolling>
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
@@ -305,7 +356,7 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorization, CompleteUnrolling>
};
template<typename Derived1, typename Derived2>
-struct ei_assign_impl<Derived1, Derived2, InnerVectorization, InnerUnrolling>
+struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolling>
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
@@ -323,7 +374,7 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorization, InnerUnrolling>
***************************/
template<typename Derived1, typename Derived2>
-struct ei_assign_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
+struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling>
{
inline static void run(Derived1 &dst, const Derived2 &src)
{
@@ -347,7 +398,7 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
};
template<typename Derived1, typename Derived2>
-struct ei_assign_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling>
+struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling>
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
@@ -356,7 +407,7 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling
const int alignedSize = (size/packetSize)*packetSize;
ei_assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, alignedSize>::run(dst, src);
- ei_assign_novec_CompleteUnrolling<Derived1, Derived2, alignedSize, size>::run(dst, src);
+ ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, alignedSize, size>::run(dst, src);
}
};
@@ -365,7 +416,7 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling
***************************/
template<typename Derived1, typename Derived2>
-struct ei_assign_impl<Derived1, Derived2, SliceVectorization, NoUnrolling>
+struct ei_assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling>
{
inline static void run(Derived1 &dst, const Derived2 &src)
{
diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h
index 631124f2b..a8983d4ce 100644
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h
@@ -34,10 +34,10 @@ struct ei_dot_traits
{
public:
enum {
- Vectorization = (int(Derived1::Flags)&int(Derived2::Flags)&ActualPacketAccessBit)
+ Traversal = (int(Derived1::Flags)&int(Derived2::Flags)&ActualPacketAccessBit)
&& (int(Derived1::Flags)&int(Derived2::Flags)&LinearAccessBit)
- ? LinearVectorization
- : NoVectorization
+ ? LinearVectorizedTraversal
+ : DefaultTraversal
};
private:
@@ -46,7 +46,7 @@ private:
PacketSize = ei_packet_traits<Scalar>::size,
Cost = Derived1::SizeAtCompileTime * (Derived1::CoeffReadCost + Derived2::CoeffReadCost + NumTraits<Scalar>::MulCost)
+ (Derived1::SizeAtCompileTime-1) * NumTraits<Scalar>::AddCost,
- UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize))
+ UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
};
public:
@@ -142,13 +142,13 @@ struct ei_dot_vec_unroller<Derived1, Derived2, Index, Stop, true>
***************************************************************************/
template<typename Derived1, typename Derived2,
- int Vectorization = ei_dot_traits<Derived1, Derived2>::Vectorization,
+ int Traversal = ei_dot_traits<Derived1, Derived2>::Traversal,
int Unrolling = ei_dot_traits<Derived1, Derived2>::Unrolling
>
struct ei_dot_impl;
template<typename Derived1, typename Derived2>
-struct ei_dot_impl<Derived1, Derived2, NoVectorization, NoUnrolling>
+struct ei_dot_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling>
{
typedef typename Derived1::Scalar Scalar;
static Scalar run(const Derived1& v1, const Derived2& v2)
@@ -163,12 +163,12 @@ struct ei_dot_impl<Derived1, Derived2, NoVectorization, NoUnrolling>
};
template<typename Derived1, typename Derived2>
-struct ei_dot_impl<Derived1, Derived2, NoVectorization, CompleteUnrolling>
+struct ei_dot_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling>
: public ei_dot_novec_unroller<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
{};
template<typename Derived1, typename Derived2>
-struct ei_dot_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
+struct ei_dot_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling>
{
typedef typename Derived1::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
@@ -221,20 +221,20 @@ struct ei_dot_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
};
template<typename Derived1, typename Derived2>
-struct ei_dot_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling>
+struct ei_dot_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling>
{
typedef typename Derived1::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
enum {
PacketSize = ei_packet_traits<Scalar>::size,
Size = Derived1::SizeAtCompileTime,
- VectorizationSize = (Size / PacketSize) * PacketSize
+ VectorizedSize = (Size / PacketSize) * PacketSize
};
static Scalar run(const Derived1& v1, const Derived2& v2)
{
- Scalar res = ei_predux(ei_dot_vec_unroller<Derived1, Derived2, 0, VectorizationSize>::run(v1, v2));
- if (VectorizationSize != Size)
- res += ei_dot_novec_unroller<Derived1, Derived2, VectorizationSize, Size-VectorizationSize>::run(v1, v2);
+ Scalar res = ei_predux(ei_dot_vec_unroller<Derived1, Derived2, 0, VectorizedSize>::run(v1, v2));
+ if (VectorizedSize != Size)
+ res += ei_dot_novec_unroller<Derived1, Derived2, VectorizedSize, Size-VectorizedSize>::run(v1, v2);
return res;
}
};
diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h
index 9f796157a..171f6dcf5 100644
--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@@ -54,16 +54,16 @@ private:
public:
enum {
- Vectorization = int(MayLinearVectorize) ? int(LinearVectorization)
- : int(MaySliceVectorize) ? int(SliceVectorization)
- : int(NoVectorization)
+ Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
+ : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
+ : int(DefaultTraversal)
};
private:
enum {
Cost = Derived::SizeAtCompileTime * Derived::CoeffReadCost
+ (Derived::SizeAtCompileTime-1) * NumTraits<typename Derived::Scalar>::AddCost,
- UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize))
+ UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
};
public:
@@ -171,13 +171,13 @@ struct ei_redux_vec_unroller<Func, Derived, Start, 1>
***************************************************************************/
template<typename Func, typename Derived,
- int Vectorization = ei_redux_traits<Func, Derived>::Vectorization,
+ int Traversal = ei_redux_traits<Func, Derived>::Traversal,
int Unrolling = ei_redux_traits<Func, Derived>::Unrolling
>
struct ei_redux_impl;
template<typename Func, typename Derived>
-struct ei_redux_impl<Func, Derived, NoVectorization, NoUnrolling>
+struct ei_redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
{
typedef typename Derived::Scalar Scalar;
static Scalar run(const Derived& mat, const Func& func)
@@ -195,12 +195,12 @@ struct ei_redux_impl<Func, Derived, NoVectorization, NoUnrolling>
};
template<typename Func, typename Derived>
-struct ei_redux_impl<Func,Derived, NoVectorization, CompleteUnrolling>
+struct ei_redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling>
: public ei_redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
{};
template<typename Func, typename Derived>
-struct ei_redux_impl<Func, Derived, LinearVectorization, NoUnrolling>
+struct ei_redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
{
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
@@ -246,7 +246,7 @@ struct ei_redux_impl<Func, Derived, LinearVectorization, NoUnrolling>
};
template<typename Func, typename Derived>
-struct ei_redux_impl<Func, Derived, SliceVectorization, NoUnrolling>
+struct ei_redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
{
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
@@ -277,7 +277,7 @@ struct ei_redux_impl<Func, Derived, SliceVectorization, NoUnrolling>
else // too small to vectorize anything.
// since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
{
- res = ei_redux_impl<Func, Derived, NoVectorization, NoUnrolling>::run(mat, func);
+ res = ei_redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func);
}
return res;
@@ -285,20 +285,20 @@ struct ei_redux_impl<Func, Derived, SliceVectorization, NoUnrolling>
};
template<typename Func, typename Derived>
-struct ei_redux_impl<Func, Derived, LinearVectorization, CompleteUnrolling>
+struct ei_redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
{
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
enum {
PacketSize = ei_packet_traits<Scalar>::size,
Size = Derived::SizeAtCompileTime,
- VectorizationSize = (Size / PacketSize) * PacketSize
+ VectorizedSize = (Size / PacketSize) * PacketSize
};
EIGEN_STRONG_INLINE static Scalar run(const Derived& mat, const Func& func)
{
Scalar res = func.predux(ei_redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
- if (VectorizationSize != Size)
- res = func(res,ei_redux_novec_unroller<Func, Derived, VectorizationSize, Size-VectorizationSize>::run(mat,func));
+ if (VectorizedSize != Size)
+ res = func(res,ei_redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
return res;
}
};
diff --git a/Eigen/src/Core/products/GeneralUnrolled.h b/Eigen/src/Core/products/GeneralUnrolled.h
index 7241976a8..e8fb760ae 100644
--- a/Eigen/src/Core/products/GeneralUnrolled.h
+++ b/Eigen/src/Core/products/GeneralUnrolled.h
@@ -36,7 +36,7 @@
* Note that here the inner-loops should always be unrolled.
*/
-template<int VectorizationMode, int Index, typename Lhs, typename Rhs, typename RetScalar>
+template<int Traversal, int Index, typename Lhs, typename Rhs, typename RetScalar>
struct ei_product_coeff_impl;
template<int StorageOrder, int Index, typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
@@ -115,7 +115,7 @@ template<typename LhsNested, typename RhsNested> class GeneralProduct<LhsNested,
CanVectorizeInner = ei_traits<GeneralProduct>::CanVectorizeInner
};
- typedef ei_product_coeff_impl<CanVectorizeInner ? InnerVectorization : NoVectorization,
+ typedef ei_product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
Unroll ? InnerSize-1 : Dynamic,
_LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;
@@ -182,17 +182,17 @@ template<typename LhsNested, typename RhsNested> class GeneralProduct<LhsNested,
**************************************/
template<int Index, typename Lhs, typename Rhs, typename RetScalar>
-struct ei_product_coeff_impl<NoVectorization, Index, Lhs, Rhs, RetScalar>
+struct ei_product_coeff_impl<DefaultTraversal, Index, Lhs, Rhs, RetScalar>
{
EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
{
- ei_product_coeff_impl<NoVectorization, Index-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
+ ei_product_coeff_impl<DefaultTraversal, Index-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
res += lhs.coeff(row, Index) * rhs.coeff(Index, col);
}
};
template<typename Lhs, typename Rhs, typename RetScalar>
-struct ei_product_coeff_impl<NoVectorization, 0, Lhs, Rhs, RetScalar>
+struct ei_product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
{
EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
{
@@ -201,7 +201,7 @@ struct ei_product_coeff_impl<NoVectorization, 0, Lhs, Rhs, RetScalar>
};
template<typename Lhs, typename Rhs, typename RetScalar>
-struct ei_product_coeff_impl<NoVectorization, Dynamic, Lhs, Rhs, RetScalar>
+struct ei_product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
{
EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
{
@@ -214,7 +214,7 @@ struct ei_product_coeff_impl<NoVectorization, Dynamic, Lhs, Rhs, RetScalar>
// prevent buggy user code from causing an infinite recursion
template<typename Lhs, typename Rhs, typename RetScalar>
-struct ei_product_coeff_impl<NoVectorization, -1, Lhs, Rhs, RetScalar>
+struct ei_product_coeff_impl<DefaultTraversal, -1, Lhs, Rhs, RetScalar>
{
EIGEN_STRONG_INLINE static void run(int, int, const Lhs&, const Rhs&, RetScalar&) {}
};
@@ -244,7 +244,7 @@ struct ei_product_coeff_vectorized_unroller<0, Lhs, Rhs, PacketScalar>
};
template<int Index, typename Lhs, typename Rhs, typename RetScalar>
-struct ei_product_coeff_impl<InnerVectorization, Index, Lhs, Rhs, RetScalar>
+struct ei_product_coeff_impl<InnerVectorizedTraversal, Index, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::PacketScalar PacketScalar;
enum { PacketSize = ei_packet_traits<typename Lhs::Scalar>::size };
@@ -252,7 +252,7 @@ struct ei_product_coeff_impl<InnerVectorization, Index, Lhs, Rhs, RetScalar>
{
PacketScalar pres;
ei_product_coeff_vectorized_unroller<Index+1-PacketSize, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, pres);
- ei_product_coeff_impl<NoVectorization,Index,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res);
+ ei_product_coeff_impl<DefaultTraversal,Index,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res);
res = ei_predux(pres);
}
};
@@ -265,7 +265,7 @@ struct ei_product_coeff_vectorized_dyn_selector
res = ei_dot_impl<
Block<Lhs, 1, ei_traits<Lhs>::ColsAtCompileTime>,
Block<Rhs, ei_traits<Rhs>::RowsAtCompileTime, 1>,
- LinearVectorization, NoUnrolling>::run(lhs.row(row), rhs.col(col));
+ LinearVectorizedTraversal, NoUnrolling>::run(lhs.row(row), rhs.col(col));
}
};
@@ -279,7 +279,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
res = ei_dot_impl<
Lhs,
Block<Rhs, ei_traits<Rhs>::RowsAtCompileTime, 1>,
- LinearVectorization, NoUnrolling>::run(lhs, rhs.col(col));
+ LinearVectorizedTraversal, NoUnrolling>::run(lhs, rhs.col(col));
}
};
@@ -291,7 +291,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
res = ei_dot_impl<
Block<Lhs, 1, ei_traits<Lhs>::ColsAtCompileTime>,
Rhs,
- LinearVectorization, NoUnrolling>::run(lhs.row(row), rhs);
+ LinearVectorizedTraversal, NoUnrolling>::run(lhs.row(row), rhs);
}
};
@@ -303,12 +303,12 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
res = ei_dot_impl<
Lhs,
Rhs,
- LinearVectorization, NoUnrolling>::run(lhs, rhs);
+ LinearVectorizedTraversal, NoUnrolling>::run(lhs, rhs);
}
};
template<typename Lhs, typename Rhs, typename RetScalar>
-struct ei_product_coeff_impl<InnerVectorization, Dynamic, Lhs, Rhs, RetScalar>
+struct ei_product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
{
EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{
diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h
index 487425f88..5489d063a 100644
--- a/Eigen/src/Core/util/Constants.h
+++ b/Eigen/src/Core/util/Constants.h
@@ -202,16 +202,19 @@ enum DirectionType { Vertical, Horizontal, BothDirections };
enum ProductEvaluationMode { NormalProduct, CacheFriendlyProduct, SparseTimeSparseProduct, SparseTimeDenseProduct, DenseTimeSparseProduct };
enum {
+ /** \internal Default traversal, no vectorization, no index-based access */
+ DefaultTraversal,
+ /** \internal No vectorization, use index-based access to have only one for loop instead of 2 nested loops */
+ LinearTraversal,
/** \internal Equivalent to a slice vectorization for fixed-size matrices having good alignment
* and good size */
- InnerVectorization,
+ InnerVectorizedTraversal,
/** \internal Vectorization path using a single loop plus scalar loops for the
* unaligned boundaries */
- LinearVectorization,
+ LinearVectorizedTraversal,
/** \internal Generic vectorization path using one vectorized loop per row/column with some
* scalar loops to handle the unaligned boundaries */
- SliceVectorization,
- NoVectorization
+ SliceVectorizedTraversal
};
enum {
diff --git a/test/vectorization_logic.cpp b/test/vectorization_logic.cpp
index 680adeb45..3772bf13d 100644
--- a/test/vectorization_logic.cpp
+++ b/test/vectorization_logic.cpp
@@ -26,17 +26,18 @@
#include <typeinfo>
template<typename Dst, typename Src>
-bool test_assign(const Dst&, const Src&, int vectorization, int unrolling)
+bool test_assign(const Dst&, const Src&, int traversal, int unrolling)
{
- return ei_assign_traits<Dst,Src>::Vectorization==vectorization
+ ei_assign_traits<Dst,Src>::debug();
+ return ei_assign_traits<Dst,Src>::Traversal==traversal
&& ei_assign_traits<Dst,Src>::Unrolling==unrolling;
}
template<typename Xpr>
-bool test_redux(const Xpr&, int vectorization, int unrolling)
+bool test_redux(const Xpr&, int traversal, int unrolling)
{
typedef ei_redux_traits<ei_scalar_sum_op<typename Xpr::Scalar>,Xpr> traits;
- return traits::Vectorization==vectorization && traits::Unrolling==unrolling;
+ return traits::Traversal==traversal && traits::Unrolling==unrolling;
}
void test_vectorization_logic()
@@ -45,61 +46,67 @@ void test_vectorization_logic()
#ifdef EIGEN_VECTORIZE
VERIFY(test_assign(Vector4f(),Vector4f(),
- InnerVectorization,CompleteUnrolling));
+ InnerVectorizedTraversal,CompleteUnrolling));
VERIFY(test_assign(Vector4f(),Vector4f()+Vector4f(),
- InnerVectorization,CompleteUnrolling));
+ InnerVectorizedTraversal,CompleteUnrolling));
VERIFY(test_assign(Vector4f(),Vector4f().cwise() * Vector4f(),
- InnerVectorization,CompleteUnrolling));
+ InnerVectorizedTraversal,CompleteUnrolling));
VERIFY(test_assign(Vector4f(),Vector4f().cast<float>(),
- InnerVectorization,CompleteUnrolling));
+ InnerVectorizedTraversal,CompleteUnrolling));
VERIFY(test_assign(Matrix4f(),Matrix4f(),
- InnerVectorization,CompleteUnrolling));
+ InnerVectorizedTraversal,CompleteUnrolling));
VERIFY(test_assign(Matrix4f(),Matrix4f()+Matrix4f(),
- InnerVectorization,CompleteUnrolling));
+ InnerVectorizedTraversal,CompleteUnrolling));
VERIFY(test_assign(Matrix4f(),Matrix4f().cwise() * Matrix4f(),
- InnerVectorization,CompleteUnrolling));
+ InnerVectorizedTraversal,CompleteUnrolling));
VERIFY(test_assign(Matrix<float,16,16>(),Matrix<float,16,16>()+Matrix<float,16,16>(),
- InnerVectorization,InnerUnrolling));
+ InnerVectorizedTraversal,InnerUnrolling));
VERIFY(test_assign(Matrix<float,16,16,DontAlign>(),Matrix<float,16,16>()+Matrix<float,16,16>(),
- NoVectorization,InnerUnrolling));
+ LinearTraversal,NoUnrolling));
+
+ VERIFY(test_assign(Matrix<float,2,2,DontAlign>(),Matrix<float,2,2>()+Matrix<float,2,2>(),
+ LinearTraversal,CompleteUnrolling));
VERIFY(test_assign(Matrix<float,6,2>(),Matrix<float,6,2>().cwise() / Matrix<float,6,2>(),
- LinearVectorization,CompleteUnrolling));
+ LinearVectorizedTraversal,CompleteUnrolling));
VERIFY(test_assign(Matrix<float,17,17>(),Matrix<float,17,17>()+Matrix<float,17,17>(),
- NoVectorization,InnerUnrolling));
+ LinearTraversal,NoUnrolling));
+
+ VERIFY(test_assign(Matrix<float,3,3>(),Matrix<float,3,3>()+Matrix<float,3,3>(),
+ LinearTraversal,CompleteUnrolling));
VERIFY(test_assign(Matrix<float,4,4>(),Matrix<float,17,17>().block<4,4>(2,3)+Matrix<float,17,17>().block<4,4>(10,4),
- NoVectorization,CompleteUnrolling));
+ DefaultTraversal,CompleteUnrolling));
VERIFY(test_assign(MatrixXf(10,10),MatrixXf(20,20).block(10,10,2,3),
- SliceVectorization,NoUnrolling));
+ SliceVectorizedTraversal,NoUnrolling));
VERIFY(test_redux(VectorXf(10),
- LinearVectorization,NoUnrolling));
+ LinearVectorizedTraversal,NoUnrolling));
VERIFY(test_redux(Matrix<float,5,2>(),
- NoVectorization,CompleteUnrolling));
+ DefaultTraversal,CompleteUnrolling));
VERIFY(test_redux(Matrix<float,6,2>(),
- LinearVectorization,CompleteUnrolling));
+ LinearVectorizedTraversal,CompleteUnrolling));
VERIFY(test_redux(Matrix<float,16,16>(),
- LinearVectorization,NoUnrolling));
+ LinearVectorizedTraversal,NoUnrolling));
VERIFY(test_redux(Matrix<float,16,16>().block<4,4>(1,2),
- NoVectorization,CompleteUnrolling));
+ DefaultTraversal,CompleteUnrolling));
VERIFY(test_redux(Matrix<float,16,16>().block<8,1>(1,2),
- LinearVectorization,CompleteUnrolling));
+ LinearVectorizedTraversal,CompleteUnrolling));
VERIFY(test_redux(Matrix<double,7,3>(),
- NoVectorization,CompleteUnrolling));
+ DefaultTraversal,CompleteUnrolling));
#endif // EIGEN_VECTORIZE