diff options
Diffstat (limited to 'Eigen/src/Core/arch/SSE/PacketMath.h')
-rw-r--r-- | Eigen/src/Core/arch/SSE/PacketMath.h | 8 |
1 files changed, 6 insertions, 2 deletions
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 29c89c310..a633c7b7c 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -58,8 +58,8 @@ template<> struct ei_packet_traits<float> : ei_default_packet_traits { typedef Packet4f type; enum {size=4}; enum { - HasSin = 1, - HasCos = 1, + HasSin = EIGEN_FAST_MATH, + HasCos = EIGEN_FAST_MATH, HasLog = 1, HasExp = 1, HasSqrt = 1 @@ -118,6 +118,9 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pmul<Packet4f>(const Packet4f& a, con template<> EIGEN_STRONG_INLINE Packet2d ei_pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { +#ifdef __SSE4_1__ + return _mm_mullo_epi32(a,b); +#else // this version is slightly faster than 4 scalar products return ei_vec4i_swizzle1( ei_vec4i_swizzle2( @@ -126,6 +129,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, con ei_vec4i_swizzle1(b,1,0,3,2)), 0,2,0,2), 0,2,1,3); +#endif } template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); } |