diff options
author | Benoit Jacob <jacob.benoit.1@gmail.com> | 2009-11-24 15:12:43 -0500 |
---|---|---|
committer | Benoit Jacob <jacob.benoit.1@gmail.com> | 2009-11-24 15:12:43 -0500 |
commit | 684d76eba3244d5ae75602b8ec06e81ef74da79b (patch) | |
tree | 946a45047f4eb52aaccb5bc07b2beccaf299d077 /Eigen | |
parent | abdb2a2bd55661d09db7a6b5ae7f603fc61db815 (diff) |
add SSE4 support, start with integer multiplication
Diffstat (limited to 'Eigen')
-rw-r--r-- | Eigen/Core | 6 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SSE/PacketMath.h | 4 |
2 files changed, 10 insertions, 0 deletions
diff --git a/Eigen/Core b/Eigen/Core index 2968e36c6..e095aee11 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -67,6 +67,12 @@ #ifdef __SSSE3__ #include <tmmintrin.h> #endif + #ifdef __SSE4_1__ + #include <smmintrin.h> + #endif + #ifdef __SSE4_2__ + #include <nmmintrin.h> + #endif #elif defined __ALTIVEC__ #define EIGEN_VECTORIZE #define EIGEN_VECTORIZE_ALTIVEC diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 60ccadc21..69f6979bd 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -118,6 +118,9 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pmul<Packet4f>(const Packet4f& a, con template<> EIGEN_STRONG_INLINE Packet2d ei_pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { +#ifdef __SSE4_1__ + return _mm_mullo_epi32(a,b); +#else // this version is slightly faster than 4 scalar products return ei_vec4i_swizzle1( ei_vec4i_swizzle2( @@ -126,6 +129,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, con ei_vec4i_swizzle1(b,1,0,3,2)), 0,2,0,2), 0,2,1,3); +#endif } template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); } |