diff options
author | Benoit Jacob <benoitjacob@google.com> | 2016-04-04 16:41:47 -0400 |
---|---|---|
committer | Benoit Jacob <benoitjacob@google.com> | 2016-04-04 16:41:47 -0400 |
commit | 03f2997a119578b894ee717aff23e2641ab78f37 (patch) | |
tree | 4c2b35a36623495e458efd9f8ac96d8177cc22b8 /Eigen/src | |
parent | c4179dd470f72520b9ffba5b78d4dd1261ccc609 (diff) |
bug #1191 - Prevent Clang/ARM from rewriting VMLA into VMUL+VADD
Diffstat (limited to 'Eigen/src')
-rw-r--r-- | Eigen/src/Core/arch/NEON/PacketMath.h | 20 |
1 files changed, 19 insertions, 1 deletions
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index fead02916..10ef1d2b3 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -186,7 +186,25 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co // MLA: 10 GFlop/s ; FMA: 12 GFlops/s. template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vfmaq_f32(c,a,b); } #else -template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vmlaq_f32(c,a,b); } +template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { +#if EIGEN_COMP_CLANG && EIGEN_ARCH_ARM + // Clang/ARM will replace VMLA by VMUL+VADD at least for some values of -mcpu, + // at least -mcpu=cortex-a8 and -mcpu=cortex-a7. Since the former is the default on + // -march=armv7-a, that is a very common case. + // See e.g. this thread: + // http://lists.llvm.org/pipermail/llvm-dev/2013-December/068806.html + Packet4f r = c; + asm volatile( + "vmla.f32 %q[r], %q[a], %q[b]" + : [r] "+w" (r) + : [a] "w" (a), + [b] "w" (b) + : ); + return r; +#else + return vmlaq_f32(c,a,b); +#endif +} #endif // No FMA instruction for int, so use MLA unconditionally. |