aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2017-12-14 14:24:33 +0100
committerGravatar Gael Guennebaud <g.gael@free.fr>2017-12-14 14:24:33 +0100
commit9c3aed9d48d7dbc0f88d2fb92ca232dcbf0d402e (patch)
treef82ec866759e39ea90ec36bc4834f757fdab6a60
parent76c7dae600efc26a6fda212be518b804f101244f (diff)
Fix packet and alignment propagation logic of Block<Xpr> expressions. In particular, (A+B).col(j) lost vectorisation.
-rw-r--r--Eigen/src/Core/CoreEvaluators.h6
-rw-r--r--test/vectorization_logic.cpp6
2 files changed, 10 insertions, 2 deletions
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h
index 15b361b38..dfdb63e98 100644
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@@ -1034,7 +1034,7 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
OuterStrideAtCompileTime = HasSameStorageOrderAsArgType
? int(outer_stride_at_compile_time<ArgType>::ret)
: int(inner_stride_at_compile_time<ArgType>::ret),
- MaskPacketAccessBit = (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0,
+ MaskPacketAccessBit = (InnerStrideAtCompileTime == 1 || HasSameStorageOrderAsArgType) ? PacketAccessBit : 0,
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
FlagsRowMajorBit = XprType::Flags&RowMajorBit,
@@ -1044,7 +1044,9 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit,
PacketAlignment = unpacket_traits<PacketScalar>::alignment,
- Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0,
+ Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic)
+ && (OuterStrideAtCompileTime!=0)
+ && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0,
Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0)
};
typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;
diff --git a/test/vectorization_logic.cpp b/test/vectorization_logic.cpp
index 83c1439ad..37e7495f5 100644
--- a/test/vectorization_logic.cpp
+++ b/test/vectorization_logic.cpp
@@ -207,6 +207,12 @@ struct vectorization_logic
VERIFY(test_redux(Vector1(),
LinearVectorizedTraversal,CompleteUnrolling));
+ VERIFY(test_redux(Vector1().array()*Vector1().array(),
+ LinearVectorizedTraversal,CompleteUnrolling));
+
+ VERIFY(test_redux((Vector1().array()*Vector1().array()).col(0),
+ LinearVectorizedTraversal,CompleteUnrolling));
+
VERIFY(test_redux(Matrix<Scalar,PacketSize,3>(),
LinearVectorizedTraversal,CompleteUnrolling));