diff options
author | Gael Guennebaud <g.gael@free.fr> | 2017-12-14 14:24:33 +0100 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2017-12-14 14:24:33 +0100 |
commit | 9c3aed9d48d7dbc0f88d2fb92ca232dcbf0d402e (patch) | |
tree | f82ec866759e39ea90ec36bc4834f757fdab6a60 | |
parent | 76c7dae600efc26a6fda212be518b804f101244f (diff) |
Fix packet and alignment propagation logic of Block<Xpr> expressions. In particular, (A+B).col(j) lost vectorisation.
-rw-r--r-- | Eigen/src/Core/CoreEvaluators.h | 6 | ||||
-rw-r--r-- | test/vectorization_logic.cpp | 6 |
2 files changed, 10 insertions, 2 deletions
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 15b361b38..dfdb63e98 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -1034,7 +1034,7 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> > OuterStrideAtCompileTime = HasSameStorageOrderAsArgType ? int(outer_stride_at_compile_time<ArgType>::ret) : int(inner_stride_at_compile_time<ArgType>::ret), - MaskPacketAccessBit = (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0, + MaskPacketAccessBit = (InnerStrideAtCompileTime == 1 || HasSameStorageOrderAsArgType) ? PacketAccessBit : 0, FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0, FlagsRowMajorBit = XprType::Flags&RowMajorBit, @@ -1044,7 +1044,9 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> > Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit, PacketAlignment = unpacket_traits<PacketScalar>::alignment, - Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0, + Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) + && (OuterStrideAtCompileTime!=0) + && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0, Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0) }; typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type; diff --git a/test/vectorization_logic.cpp b/test/vectorization_logic.cpp index 83c1439ad..37e7495f5 100644 --- a/test/vectorization_logic.cpp +++ b/test/vectorization_logic.cpp @@ -207,6 +207,12 @@ struct vectorization_logic VERIFY(test_redux(Vector1(), LinearVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_redux(Vector1().array()*Vector1().array(), + LinearVectorizedTraversal,CompleteUnrolling)); + + VERIFY(test_redux((Vector1().array()*Vector1().array()).col(0), + LinearVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_redux(Matrix<Scalar,PacketSize,3>(), LinearVectorizedTraversal,CompleteUnrolling)); |