bug #1191 - Prevent Clang/ARM from rewriting VMLA into VMUL+VADD

author: Benoit Jacob <benoitjacob@google.com> 2016-04-04 16:41:47 -0400
committer: Benoit Jacob <benoitjacob@google.com> 2016-04-04 16:41:47 -0400
commit: 03f2997a119578b894ee717aff23e2641ab78f37 (patch)
tree: 4c2b35a36623495e458efd9f8ac96d8177cc22b8 /Eigen/src/Core/arch/NEON/PacketMath.h
parent: c4179dd470f72520b9ffba5b78d4dd1261ccc609 (diff)
1 files changed, 19 insertions, 1 deletions
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index fead02916..10ef1d2b3 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -186,7 +186,25 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co
 // MLA: 10 GFlop/s ; FMA: 12 GFlops/s.
 template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vfmaq_f32(c,a,b); }
 #else
-template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vmlaq_f32(c,a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) {
+#if EIGEN_COMP_CLANG && EIGEN_ARCH_ARM
+  // Clang/ARM will replace VMLA by VMUL+VADD at least for some values of -mcpu,
+  // at least -mcpu=cortex-a8 and -mcpu=cortex-a7. Since the former is the default on
+  // -march=armv7-a, that is a very common case.
+  // See e.g. this thread:
+  //     http://lists.llvm.org/pipermail/llvm-dev/2013-December/068806.html
+  Packet4f r = c;
+  asm volatile(
+    "vmla.f32 %q[r], %q[a], %q[b]"
+    : [r] "+w" (r)
+    : [a] "w" (a),
+      [b] "w" (b)
+    : );
+  return r;
+#else
+  return vmlaq_f32(c,a,b);
+#endif
+}
 #endif
 
 // No FMA instruction for int, so use MLA unconditionally.
author	Benoit Jacob <benoitjacob@google.com>	2016-04-04 16:41:47 -0400
committer	Benoit Jacob <benoitjacob@google.com>	2016-04-04 16:41:47 -0400
commit	03f2997a119578b894ee717aff23e2641ab78f37 (patch)
tree	4c2b35a36623495e458efd9f8ac96d8177cc22b8 /Eigen/src/Core/arch/NEON/PacketMath.h
parent	c4179dd470f72520b9ffba5b78d4dd1261ccc609 (diff)