aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/SSE
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2013-03-20 18:28:40 +0100
committerGravatar Gael Guennebaud <g.gael@free.fr>2013-03-20 18:28:40 +0100
commitd63712163c00abb01ae8a9361968eb9a6581e50d (patch)
treee2509f3d701a5efdfcd241450f98c0f2811f919a /Eigen/src/Core/arch/SSE
parentda6219b19dd92231cd0afe380ae4880b62bfe88d (diff)
Add SSE4 min/max for integers
Diffstat (limited to 'Eigen/src/Core/arch/SSE')
-rw-r--r--Eigen/src/Core/arch/SSE/PacketMath.h8
1 files changed, 8 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index f84e5b3ec..addb2fc0e 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -173,18 +173,26 @@ template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b)
{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_min_epi32(a,b);
+#else
// after some bench, this version *is* faster than a scalar implementation
Packet4i mask = _mm_cmplt_epi32(a,b);
return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
+#endif
}
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b)
{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_max_epi32(a,b);
+#else
// after some bench, this version *is* faster than a scalar implementation
Packet4i mask = _mm_cmpgt_epi32(a,b);
return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
+#endif
}
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }