aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2014-03-20 10:14:26 +0100
committerGravatar Gael Guennebaud <g.gael@free.fr>2014-03-20 10:14:26 +0100
commitc39a3fa7a1808233ad6556e169e0c08d3bc979e1 (patch)
treedf64685fcfb8eb318a7fc12097217a72dd88fe82 /Eigen
parent2a564695f0e9391eb3a0125bd5731c17aabdb680 (diff)
Makes gcc to generate a pshufd instruction for pset1
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/src/Core/arch/SSE/PacketMath.h15
1 files changed, 14 insertions, 1 deletions
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index f5a3dab52..ea14111e3 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -110,7 +110,20 @@ template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { re
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set_epi32(from,from,from,from); }
#else
-template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set1_ps(from); }
+
+// GCC generates a shufps instruction for set1_ps instead of the more efficient pshufd instruction.
+// However, with AVX, we want it to generate a vbroadcastss.
+// Moreover, we cannot use intrinsics here because then gcc generates crappy code in some cases (see bug 203)
+#if (defined __GNUC__) && (!defined __INTEL_COMPILER) && (!defined __clang__) && (!defined __AVX__)
+ template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
+ Packet4f res;
+ asm("pshufd $0, %[a], %[b]" : [b] "=x" (res) : [a] "x" (from));
+ return res;
+ }
+#else
+ template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps1(from); }
+#endif
+
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); }
#endif