aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/SSE/PacketMath.h
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2015-10-07 15:48:35 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2015-10-07 15:48:35 +0200
commitf047ecc36a4e940417c1980d16e1b029539f8f10 (patch)
tree7e889d387c4eefa1985a49893a52fcb25d938f92 /Eigen/src/Core/arch/SSE/PacketMath.h
parentaba1eda71e8743454175fc315f3c0c2454e54291 (diff)
_mm_hadd_epi32 is for SSSE3 only (and not SSE3)
Diffstat (limited to 'Eigen/src/Core/arch/SSE/PacketMath.h')
-rwxr-xr-xEigen/src/Core/arch/SSE/PacketMath.h26
1 files changed, 14 insertions, 12 deletions
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index e7b676f4c..2e0a807bf 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -532,10 +532,6 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
{
return _mm_hadd_pd(vecs[0], vecs[1]);
}
-template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
-{
- return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3]));
-}
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
{
@@ -544,12 +540,6 @@ template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
}
template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return pfirst<Packet2d>(_mm_hadd_pd(a, a)); }
-
-template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
-{
- Packet4i tmp0 = _mm_hadd_epi32(a,a);
- return pfirst(_mm_hadd_epi32(tmp0,tmp0));
-}
#else
// SSE2 versions
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
@@ -580,7 +570,20 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
{
return _mm_add_pd(_mm_unpacklo_pd(vecs[0], vecs[1]), _mm_unpackhi_pd(vecs[0], vecs[1]));
}
+#endif // SSE3
+
+#ifdef EIGEN_VECTORIZE_SSSE3
+template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
+{
+ return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3]));
+}
+template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
+{
+ Packet4i tmp0 = _mm_hadd_epi32(a,a);
+ return pfirst(_mm_hadd_epi32(tmp0,tmp0));
+}
+#else
template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
{
Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a));
@@ -600,8 +603,7 @@ template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
tmp0 = _mm_unpackhi_epi64(tmp0, tmp1);
return _mm_add_epi32(tmp0, tmp2);
}
-#endif // SSE3
-
+#endif
// Other reduction functions:
// mul