From 9395326e4491b52d8fe0e6431e8843c9629acc79 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 17 Sep 2009 23:18:21 +0200 Subject: fix #53: performance regression, hopefully I did not resurected another perf. issue... --- Eigen/src/Core/arch/SSE/PacketMath.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'Eigen/src/Core/arch') diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 52e666d2f..5d9af130d 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -77,15 +77,16 @@ template<> struct ei_unpacket_traits { typedef int type; enum {size #ifdef __GNUC__ // Sometimes GCC implements _mm_set1_p* using multiple moves, // that is inefficient :( +// TODO make sure the new solution using the shuffle/unpacklo is ok template<> EIGEN_STRONG_INLINE Packet4f ei_pset1(const float& from) { Packet4f res = _mm_set_ss(from); - asm("shufps $0, %[x], %[x]" : [x] "+x" (res) : ); - return res; + return _mm_shuffle_ps(res,res,0); + //asm("shufps $0, %[x], %[x]" : [x] "+x" (res) : ); } template<> EIGEN_STRONG_INLINE Packet2d ei_pset1(const double& from) { Packet2d res = _mm_set_sd(from); - asm("unpcklpd %[x], %[x]" : [x] "+x" (res) : ); - return res; + return _mm_unpacklo_pd(res,res); +// asm("unpcklpd %[x], %[x]" : [x] "+x" (res) : ); } #else template<> EIGEN_STRONG_INLINE Packet4f ei_pset1(const float& from) { return _mm_set1_ps(from); } -- cgit v1.2.3