aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/SSE/PacketMath.h
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src/Core/arch/SSE/PacketMath.h')
-rw-r--r--Eigen/src/Core/arch/SSE/PacketMath.h8
1 files changed, 6 insertions, 2 deletions
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 29c89c310..a633c7b7c 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -58,8 +58,8 @@ template<> struct ei_packet_traits<float> : ei_default_packet_traits
{
typedef Packet4f type; enum {size=4};
enum {
- HasSin = 1,
- HasCos = 1,
+ HasSin = EIGEN_FAST_MATH,
+ HasCos = EIGEN_FAST_MATH,
HasLog = 1,
HasExp = 1,
HasSqrt = 1
@@ -118,6 +118,9 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pmul<Packet4f>(const Packet4f& a, con
template<> EIGEN_STRONG_INLINE Packet2d ei_pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
{
+#ifdef __SSE4_1__
+ return _mm_mullo_epi32(a,b);
+#else
// this version is slightly faster than 4 scalar products
return ei_vec4i_swizzle1(
ei_vec4i_swizzle2(
@@ -126,6 +129,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, con
ei_vec4i_swizzle1(b,1,0,3,2)),
0,2,0,2),
0,2,1,3);
+#endif
}
template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }