From f8aae7a908c816810bf2005e59df016733884e81 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 19 Jul 2010 15:43:27 +0200 Subject: * _mm_loaddup_pd is slow * optimize SSE ei_ploaddup --- Eigen/src/Core/arch/SSE/PacketMath.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'Eigen/src/Core/arch/SSE/PacketMath.h') diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 87ebb783a..c2459e99f 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -114,12 +114,9 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pset1(const float& from) { return ei_vec4f_swizzle1(res,0,0,0,0); } template<> EIGEN_STRONG_INLINE Packet2d ei_pset1(const double& from) { -#ifdef EIGEN_VECTORIZE_SSE3 - return _mm_loaddup_pd(&from); -#else + // NOTE the SSE3 intrinsic _mm_loaddup_pd is never faster but sometimes much slower Packet2d res = _mm_set_sd(from); return ei_vec2d_swizzle1(res, 0, 0); -#endif } #else template<> EIGEN_STRONG_INLINE Packet4f ei_pset1(const float& from) { return _mm_set1_ps(from); } @@ -259,9 +256,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from) template<> EIGEN_STRONG_INLINE Packet4f ei_ploaddup(const float* from) { - Packet4f tmp; - tmp = _mm_loadl_pi(tmp,(__m64*)from); - return ei_vec4f_swizzle1(tmp, 0, 0, 1, 1); + return ei_vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd((const double*)from)), 0, 0, 1, 1); } template<> EIGEN_STRONG_INLINE Packet2d ei_ploaddup(const double* from) { return ei_pset1(from[0]); } -- cgit v1.2.3