aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/AVX
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-03-27 18:32:15 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-03-27 18:32:15 -0700
commit51e85c936d5b2266f2a8f0953bf9c10853620c49 (patch)
tree53a7f500e19896ff976c9c11e149f5ec550ed28d /Eigen/src/Core/arch/AVX
parent8a94cb3edde854b89031a0e985c524f2f6bf799d (diff)
parent58fe2fc2b21401365ace575738d878dad21eb184 (diff)
Merged latest changes from parent.
Diffstat (limited to 'Eigen/src/Core/arch/AVX')
-rw-r--r--Eigen/src/Core/arch/AVX/PacketMath.h14
1 files changed, 13 insertions, 1 deletions
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index aa2ac3b0b..6d5a5f53f 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -124,7 +124,19 @@ template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, co
}
#ifdef EIGEN_VECTORIZE_FMA
-template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) { return _mm256_fmadd_ps(a,b,c); }
+template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
+#if defined(__clang__) || defined(__GNUC__)
+ // clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
+ // and gcc stupidly generates a vfmadd132ps instruction,
+ // so let's enforce it to generate a vfmadd231ps instruction since the most common use case is to accumulate
+ // the result of the product.
+ Packet8f res = c;
+ asm("vfmadd231ps %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
+ return res;
+#else
+ return _mm256_fmadd_ps(a,b,c);
+#endif
+}
template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) { return _mm256_fmadd_pd(a,b,c); }
#endif