diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2014-03-27 18:32:15 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2014-03-27 18:32:15 -0700 |
commit | 51e85c936d5b2266f2a8f0953bf9c10853620c49 (patch) | |
tree | 53a7f500e19896ff976c9c11e149f5ec550ed28d /Eigen/src/Core/arch/AVX | |
parent | 8a94cb3edde854b89031a0e985c524f2f6bf799d (diff) | |
parent | 58fe2fc2b21401365ace575738d878dad21eb184 (diff) |
Merged latest changes from parent.
Diffstat (limited to 'Eigen/src/Core/arch/AVX')
-rw-r--r-- | Eigen/src/Core/arch/AVX/PacketMath.h | 14 |
1 files changed, 13 insertions, 1 deletions
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index aa2ac3b0b..6d5a5f53f 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -124,7 +124,19 @@ template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, co } #ifdef EIGEN_VECTORIZE_FMA -template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) { return _mm256_fmadd_ps(a,b,c); } +template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) { +#if defined(__clang__) || defined(__GNUC__) + // clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers, + // and gcc stupidly generates a vfmadd132ps instruction, + // so let's enforce it to generate a vfmadd231ps instruction since the most common use case is to accumulate + // the result of the product. + Packet8f res = c; + asm("vfmadd231ps %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b)); + return res; +#else + return _mm256_fmadd_ps(a,b,c); +#endif +} template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) { return _mm256_fmadd_pd(a,b,c); } #endif |