aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/AssignEvaluator.h
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2016-05-01 23:25:06 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2016-05-01 23:25:06 +0200
commitb1bd53aa6bec39c53de475c90987eece86c206d2 (patch)
tree7942709c9d16453f5310883eca8558cc9d9d15ea /Eigen/src/Core/AssignEvaluator.h
parentd6c9596fd8693b29b94fed984824480f285336ea (diff)
Fix performance regression: with AVX, unaligned stores were emitted instead of aligned ones for fixed size assignement.
Diffstat (limited to 'Eigen/src/Core/AssignEvaluator.h')
-rw-r--r--Eigen/src/Core/AssignEvaluator.h15
1 files changed, 11 insertions, 4 deletions
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index 9d4b315a0..b1193e421 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -256,12 +256,13 @@ struct copy_using_evaluator_innervec_CompleteUnrolling
enum {
outer = Index / DstXprType::InnerSizeAtCompileTime,
inner = Index % DstXprType::InnerSizeAtCompileTime,
- JointAlignment = Kernel::AssignmentTraits::JointAlignment
+ JointAlignment = Kernel::AssignmentTraits::JointAlignment,
+ DefaultAlignment = unpacket_traits<PacketType>::alignment
};
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
- kernel.template assignPacketByOuterInner<Aligned, JointAlignment, PacketType>(outer, inner);
+ kernel.template assignPacketByOuterInner<DefaultAlignment, JointAlignment, PacketType>(outer, inner);
enum { NextIndex = Index + unpacket_traits<PacketType>::size };
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
}
@@ -277,9 +278,12 @@ template<typename Kernel, int Index_, int Stop>
struct copy_using_evaluator_innervec_InnerUnrolling
{
typedef typename Kernel::PacketType PacketType;
+ enum {
+ DefaultAlignment = unpacket_traits<PacketType>::alignment
+ };
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
{
- kernel.template assignPacketByOuterInner<Aligned, Aligned, PacketType>(outer, Index_);
+ kernel.template assignPacketByOuterInner<DefaultAlignment, DefaultAlignment, PacketType>(outer, Index_);
enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop>::run(kernel, outer);
}
@@ -433,6 +437,9 @@ template<typename Kernel>
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
{
typedef typename Kernel::PacketType PacketType;
+ enum {
+ DefaultAlignment = unpacket_traits<PacketType>::alignment
+ };
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
const Index innerSize = kernel.innerSize();
@@ -440,7 +447,7 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
const Index packetSize = unpacket_traits<PacketType>::size;
for(Index outer = 0; outer < outerSize; ++outer)
for(Index inner = 0; inner < innerSize; inner+=packetSize)
- kernel.template assignPacketByOuterInner<Aligned, Aligned, PacketType>(outer, inner);
+ kernel.template assignPacketByOuterInner<DefaultAlignment, DefaultAlignment, PacketType>(outer, inner);
}
};