aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2015-10-21 20:58:33 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2015-10-21 20:58:33 +0200
commite78bc111f1dc0a7af2360b836c94c33d67e55fc5 (patch)
tree0c17ed598ca7faf949227f2dd92f189f6b0f0dcb
parentb178cc347968675bdae942dbdcb7de9ed9daa564 (diff)
bug #1090: fix a shortcoming in redux logic for which slice-vectorization plus unrolling might happen.
-rw-r--r--Eigen/src/Core/Redux.h5
-rw-r--r--test/redux.cpp8
2 files changed, 11 insertions, 2 deletions
diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h
index 87b4a9c46..309898b36 100644
--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@@ -269,8 +269,9 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
}
};
-template<typename Func, typename Derived>
-struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
+// NOTE: for SliceVectorizedTraversal we simply bypass unrolling
+template<typename Func, typename Derived, int Unrolling>
+struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling>
{
typedef typename Derived::Scalar Scalar;
typedef typename packet_traits<Scalar>::type PacketType;
diff --git a/test/redux.cpp b/test/redux.cpp
index 9b0767c73..849faf55e 100644
--- a/test/redux.cpp
+++ b/test/redux.cpp
@@ -56,6 +56,14 @@ template<typename MatrixType> void matrixRedux(const MatrixType& m)
VERIFY_IS_APPROX(m1_for_prod.block(r0,c0,r1,c1).prod(), m1_for_prod.block(r0,c0,r1,c1).eval().prod());
VERIFY_IS_APPROX(m1.block(r0,c0,r1,c1).real().minCoeff(), m1.block(r0,c0,r1,c1).real().eval().minCoeff());
VERIFY_IS_APPROX(m1.block(r0,c0,r1,c1).real().maxCoeff(), m1.block(r0,c0,r1,c1).real().eval().maxCoeff());
+
+ // regression for bug 1090
+ const int R1 = MatrixType::RowsAtCompileTime>=2 ? MatrixType::RowsAtCompileTime/2 : 6;
+ const int C1 = MatrixType::ColsAtCompileTime>=2 ? MatrixType::ColsAtCompileTime/2 : 6;
+ if(R1<=rows-r0 && C1<=cols-c0)
+ {
+ VERIFY_IS_APPROX( (m1.template block<R1,C1>(r0,c0).sum()), m1.block(r0,c0,R1,C1).sum() );
+ }
// test empty objects
VERIFY_IS_APPROX(m1.block(r0,c0,0,0).sum(), Scalar(0));