aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/AssignEvaluator.h
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2016-07-28 13:47:33 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2016-07-28 13:47:33 +0200
commit4057f9b1fce05fef4730fa6b4bfe1b84a1f44973 (patch)
tree7e7d554fd1ebb12b7c7fad1c8fefc4ea18065784 /Eigen/src/Core/AssignEvaluator.h
parent5fbe7aa60453f31e24f9c3fce9e3f788f385d38c (diff)
Enable slice-vectorization+inner-unrolling when unaligned vectorization is allowed. For instance, this permits to vectorize 5x5 matrices (including product)
Diffstat (limited to 'Eigen/src/Core/AssignEvaluator.h')
-rw-r--r--Eigen/src/Core/AssignEvaluator.h54
1 files changed, 41 insertions, 13 deletions
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index 5b15b4ee9..b7cc7c0e9 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -88,10 +88,11 @@ private:
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
so it's only good for large enough sizes. */
MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess)
- && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*InnerPacketSize)
+ && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=(EIGEN_UNALIGNED_VECTORIZE?InnerPacketSize:(3*InnerPacketSize)))
/* slice vectorization can be slow, so we only want it if the slices are big, which is
indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
- in a fixed-size matrix */
+ in a fixed-size matrix
+ However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */
};
public:
@@ -136,6 +137,11 @@ public:
: int(Traversal) == int(LinearTraversal)
? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
: int(NoUnrolling) )
+#if EIGEN_UNALIGNED_VECTORIZE
+ : int(Traversal) == int(SliceVectorizedTraversal)
+ ? ( bool(MayUnrollInner) ? int(InnerUnrolling)
+ : int(NoUnrolling) )
+#endif
: int(NoUnrolling)
};
@@ -277,24 +283,20 @@ struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
};
-template<typename Kernel, int Index_, int Stop>
+template<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
struct copy_using_evaluator_innervec_InnerUnrolling
{
typedef typename Kernel::PacketType PacketType;
- enum {
- SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
- DstAlignment = Kernel::AssignmentTraits::DstAlignment
- };
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
{
kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);
enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
- copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop>::run(kernel, outer);
+ copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel, outer);
}
};
-template<typename Kernel, int Stop>
-struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop>
+template<typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
+struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment>
{
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { }
};
@@ -423,9 +425,10 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrollin
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ typedef typename Kernel::PacketType PacketType;
enum { size = DstXprType::SizeAtCompileTime,
- packetSize = packet_traits<typename Kernel::Scalar>::size,
+ packetSize =unpacket_traits<PacketType>::size,
alignedSize = (size/packetSize)*packetSize };
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
@@ -472,9 +475,11 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ typedef typename Kernel::AssignmentTraits Traits;
const Index outerSize = kernel.outerSize();
for(Index outer = 0; outer < outerSize; ++outer)
- copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
+ copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime,
+ Traits::SrcAlignment, Traits::DstAlignment>::run(kernel, outer);
}
};
@@ -554,6 +559,29 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
}
};
+#if EIGEN_UNALIGNED_VECTORIZE
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
+{
+ EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
+ {
+ typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ typedef typename Kernel::PacketType PacketType;
+
+ enum { size = DstXprType::InnerSizeAtCompileTime,
+ packetSize =unpacket_traits<PacketType>::size,
+ vectorizableSize = (size/packetSize)*packetSize };
+
+ for(Index outer = 0; outer < kernel.outerSize(); ++outer)
+ {
+ copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer);
+ copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, size>::run(kernel, outer);
+ }
+ }
+};
+#endif
+
+
/***************************************************************************
* Part 4 : Generic dense assignment kernel
***************************************************************************/
@@ -681,7 +709,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstX
typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
-
+
dense_assignment_loop<Kernel>::run(kernel);
}