aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2010-07-09 18:51:17 -0400
committerGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2010-07-09 18:51:17 -0400
commit6dcd373b9d518c688b16800d9d0d7a88cb5f3dc2 (patch)
tree71c43173668381c889315a0e4a5c169ae8ce74fa
parent6ad3f1ab1f950b417788ca0b7bb15bde948c158c (diff)
let ei_pset1 use _mm_loaddup_pd. Not a significant speed improvement, but also not a speed regression, and replaces 3 instructions by 1 single instruction.
-rw-r--r--Eigen/src/Core/arch/SSE/PacketMath.h4
1 files changed, 4 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index a5d527271..dc28bdb56 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -109,8 +109,12 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) {
return ei_vec4f_swizzle1(res,0,0,0,0);
}
template<> EIGEN_STRONG_INLINE Packet2d ei_pset1<double>(const double& from) {
+#ifdef EIGEN_VECTORIZE_SSE3
+ return _mm_loaddup_pd(&from);
+#else
Packet2d res = _mm_set_sd(from);
return ei_vec2d_swizzle1(res, 0, 0);
+#endif
}
#else
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) { return _mm_set1_ps(from); }