aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2009-11-24 15:12:43 -0500
committerGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2009-11-24 15:12:43 -0500
commit684d76eba3244d5ae75602b8ec06e81ef74da79b (patch)
tree946a45047f4eb52aaccb5bc07b2beccaf299d077 /Eigen
parentabdb2a2bd55661d09db7a6b5ae7f603fc61db815 (diff)
add SSE4 support, start with integer multiplication
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/Core6
-rw-r--r--Eigen/src/Core/arch/SSE/PacketMath.h4
2 files changed, 10 insertions, 0 deletions
diff --git a/Eigen/Core b/Eigen/Core
index 2968e36c6..e095aee11 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -67,6 +67,12 @@
#ifdef __SSSE3__
#include <tmmintrin.h>
#endif
+ #ifdef __SSE4_1__
+ #include <smmintrin.h>
+ #endif
+ #ifdef __SSE4_2__
+ #include <nmmintrin.h>
+ #endif
#elif defined __ALTIVEC__
#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_ALTIVEC
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 60ccadc21..69f6979bd 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -118,6 +118,9 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pmul<Packet4f>(const Packet4f& a, con
template<> EIGEN_STRONG_INLINE Packet2d ei_pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
{
+#ifdef __SSE4_1__
+ return _mm_mullo_epi32(a,b);
+#else
// this version is slightly faster than 4 scalar products
return ei_vec4i_swizzle1(
ei_vec4i_swizzle2(
@@ -126,6 +129,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, con
ei_vec4i_swizzle1(b,1,0,3,2)),
0,2,0,2),
0,2,1,3);
+#endif
}
template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }