aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2018-12-07 10:01:09 +0100
committerGravatar Gael Guennebaud <g.gael@free.fr>2018-12-07 10:01:09 +0100
commitf233c6194db032f31c14d06b5e962197b66ea296 (patch)
tree0a0b40824ef1d88384ae030058e0e4b6e5aae4e9
parentae59a7652bae0727a983d586395bcaf2c48385cc (diff)
bug #1637: workaround register spilling in gebp with clang>=6.0+AVX+FMA
-rw-r--r--Eigen/src/Core/arch/AVX/PacketMath.h13
1 files changed, 7 insertions, 6 deletions
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index 0a1b63155..35c821472 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -183,11 +183,12 @@ template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, co
#ifdef __FMA__
template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
-#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
- // clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
- // and gcc stupidly generates a vfmadd132ps instruction,
- // so let's enforce it to generate a vfmadd231ps instruction since the most common use case is to accumulate
- // the result of the product.
+#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )
+ // Clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
+ // and even register spilling with clang>=6.0 (bug 1637).
+ // Gcc stupidly generates a vfmadd132ps instruction.
+ // So let's enforce it to generate a vfmadd231ps instruction since the most common use
+ // case is to accumulate the result of the product.
Packet8f res = c;
__asm__("vfmadd231ps %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
return res;
@@ -196,7 +197,7 @@ template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f&
#endif
}
template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
-#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
+#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )
// see above
Packet4d res = c;
__asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));