aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2009-09-17 23:18:21 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2009-09-17 23:18:21 +0200
commit9395326e4491b52d8fe0e6431e8843c9629acc79 (patch)
treec30f43971f31ee6449a6d9131b3c3d8303a6a84e /Eigen
parente4f94b8c58bcfe63c444463b69ac272122175d55 (diff)
fix #53: performance regression, hopefully I did not resurected another
perf. issue...
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/src/Core/arch/SSE/PacketMath.h9
1 files changed, 5 insertions, 4 deletions
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 52e666d2f..5d9af130d 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -77,15 +77,16 @@ template<> struct ei_unpacket_traits<Packet4i> { typedef int type; enum {size
#ifdef __GNUC__
// Sometimes GCC implements _mm_set1_p* using multiple moves,
// that is inefficient :(
+// TODO make sure the new solution using the shuffle/unpacklo is ok
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) {
Packet4f res = _mm_set_ss(from);
- asm("shufps $0, %[x], %[x]" : [x] "+x" (res) : );
- return res;
+ return _mm_shuffle_ps(res,res,0);
+ //asm("shufps $0, %[x], %[x]" : [x] "+x" (res) : );
}
template<> EIGEN_STRONG_INLINE Packet2d ei_pset1<double>(const double& from) {
Packet2d res = _mm_set_sd(from);
- asm("unpcklpd %[x], %[x]" : [x] "+x" (res) : );
- return res;
+ return _mm_unpacklo_pd(res,res);
+// asm("unpcklpd %[x], %[x]" : [x] "+x" (res) : );
}
#else
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) { return _mm_set1_ps(from); }