diff options
author | Gael Guennebaud <g.gael@free.fr> | 2016-05-01 23:25:06 +0200 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2016-05-01 23:25:06 +0200 |
commit | b1bd53aa6bec39c53de475c90987eece86c206d2 (patch) | |
tree | 7942709c9d16453f5310883eca8558cc9d9d15ea /Eigen/src/Core/AssignEvaluator.h | |
parent | d6c9596fd8693b29b94fed984824480f285336ea (diff) |
Fix performance regression: with AVX, unaligned stores were emitted instead of aligned ones for fixed size assignement.
Diffstat (limited to 'Eigen/src/Core/AssignEvaluator.h')
-rw-r--r-- | Eigen/src/Core/AssignEvaluator.h | 15 |
1 files changed, 11 insertions, 4 deletions
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 9d4b315a0..b1193e421 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -256,12 +256,13 @@ struct copy_using_evaluator_innervec_CompleteUnrolling enum { outer = Index / DstXprType::InnerSizeAtCompileTime, inner = Index % DstXprType::InnerSizeAtCompileTime, - JointAlignment = Kernel::AssignmentTraits::JointAlignment + JointAlignment = Kernel::AssignmentTraits::JointAlignment, + DefaultAlignment = unpacket_traits<PacketType>::alignment }; EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { - kernel.template assignPacketByOuterInner<Aligned, JointAlignment, PacketType>(outer, inner); + kernel.template assignPacketByOuterInner<DefaultAlignment, JointAlignment, PacketType>(outer, inner); enum { NextIndex = Index + unpacket_traits<PacketType>::size }; copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel); } @@ -277,9 +278,12 @@ template<typename Kernel, int Index_, int Stop> struct copy_using_evaluator_innervec_InnerUnrolling { typedef typename Kernel::PacketType PacketType; + enum { + DefaultAlignment = unpacket_traits<PacketType>::alignment + }; EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer) { - kernel.template assignPacketByOuterInner<Aligned, Aligned, PacketType>(outer, Index_); + kernel.template assignPacketByOuterInner<DefaultAlignment, DefaultAlignment, PacketType>(outer, Index_); enum { NextIndex = Index_ + unpacket_traits<PacketType>::size }; copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop>::run(kernel, outer); } @@ -433,6 +437,9 @@ template<typename Kernel> struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling> { typedef typename Kernel::PacketType PacketType; + enum { + DefaultAlignment = unpacket_traits<PacketType>::alignment + }; EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { const Index innerSize = kernel.innerSize(); @@ -440,7 +447,7 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling> const Index packetSize = unpacket_traits<PacketType>::size; for(Index outer = 0; outer < outerSize; ++outer) for(Index inner = 0; inner < innerSize; inner+=packetSize) - kernel.template assignPacketByOuterInner<Aligned, Aligned, PacketType>(outer, inner); + kernel.template assignPacketByOuterInner<DefaultAlignment, DefaultAlignment, PacketType>(outer, inner); } }; |