From 684d76eba3244d5ae75602b8ec06e81ef74da79b Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Tue, 24 Nov 2009 15:12:43 -0500 Subject: add SSE4 support, start with integer multiplication --- Eigen/Core | 6 ++++++ Eigen/src/Core/arch/SSE/PacketMath.h | 4 ++++ 2 files changed, 10 insertions(+) (limited to 'Eigen') diff --git a/Eigen/Core b/Eigen/Core index 2968e36c6..e095aee11 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -67,6 +67,12 @@ #ifdef __SSSE3__ #include #endif + #ifdef __SSE4_1__ + #include + #endif + #ifdef __SSE4_2__ + #include + #endif #elif defined __ALTIVEC__ #define EIGEN_VECTORIZE #define EIGEN_VECTORIZE_ALTIVEC diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 60ccadc21..69f6979bd 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -118,6 +118,9 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pmul(const Packet4f& a, con template<> EIGEN_STRONG_INLINE Packet2d ei_pmul(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet4i ei_pmul(const Packet4i& a, const Packet4i& b) { +#ifdef __SSE4_1__ + return _mm_mullo_epi32(a,b); +#else // this version is slightly faster than 4 scalar products return ei_vec4i_swizzle1( ei_vec4i_swizzle2( @@ -126,6 +129,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pmul(const Packet4i& a, con ei_vec4i_swizzle1(b,1,0,3,2)), 0,2,0,2), 0,2,1,3); +#endif } template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); } -- cgit v1.2.3