aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/SSE/PacketMath.h
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2010-07-19 15:43:27 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2010-07-19 15:43:27 +0200
commitf8aae7a908c816810bf2005e59df016733884e81 (patch)
treeccb6617f995183614ac083cb9d5f4de77a9cee57 /Eigen/src/Core/arch/SSE/PacketMath.h
parentcd0e5dca9bba943869ab7c98d370fcfc8456997a (diff)
* _mm_loaddup_pd is slow
* optimize SSE ei_ploaddup<Packet4f>
Diffstat (limited to 'Eigen/src/Core/arch/SSE/PacketMath.h')
-rw-r--r--Eigen/src/Core/arch/SSE/PacketMath.h9
1 files changed, 2 insertions, 7 deletions
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 87ebb783a..c2459e99f 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -114,12 +114,9 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<Packet4f>(const float& from) {
return ei_vec4f_swizzle1(res,0,0,0,0);
}
template<> EIGEN_STRONG_INLINE Packet2d ei_pset1<Packet2d>(const double& from) {
-#ifdef EIGEN_VECTORIZE_SSE3
- return _mm_loaddup_pd(&from);
-#else
+ // NOTE the SSE3 intrinsic _mm_loaddup_pd is never faster but sometimes much slower
Packet2d res = _mm_set_sd(from);
return ei_vec2d_swizzle1(res, 0, 0);
-#endif
}
#else
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<Packet4f>(const float& from) { return _mm_set1_ps(from); }
@@ -259,9 +256,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu<Packet4i>(const int* from)
template<> EIGEN_STRONG_INLINE Packet4f ei_ploaddup<Packet4f>(const float* from)
{
- Packet4f tmp;
- tmp = _mm_loadl_pi(tmp,(__m64*)from);
- return ei_vec4f_swizzle1(tmp, 0, 0, 1, 1);
+ return ei_vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd((const double*)from)), 0, 0, 1, 1);
}
template<> EIGEN_STRONG_INLINE Packet2d ei_ploaddup<Packet2d>(const double* from)
{ return ei_pset1<Packet2d>(from[0]); }