Remove old Clang compiler bug work-arounds. The two LLVM bugs referenced in the comments here have long been fixed. The workarounds were now detrimental because (1) they prevented using fused mul-add on Clang/ARM32 and (2) the unnecessary 'volatile' in 'asm volatile' prevented legitimate reordering by the compiler.

author: Benoit Jacob <benoitjacob@google.com> 2020-09-15 11:07:57 -0400
committer: Benoit Jacob <benoitjacob@google.com> 2020-09-15 20:54:14 -0400
commit: cc0c38ace87f9b77a21b2ad1b20b0c4f97b24719 (patch)
tree: 2c918434c012e4e43d7a4ac68bf60eecdbb08eea /Eigen/src/Core/arch/NEON/PacketMath.h
parent: bb56a62582929d4b3b0a73e49d19909b6b319f79 (diff)
1 files changed, 2 insertions, 29 deletions
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index 530adfeec..463ae58ad 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -1010,17 +1010,8 @@ template<> EIGEN_STRONG_INLINE Packet2ul pdiv<Packet2ul>(const Packet2ul& /*a*/,
   return pset1<Packet2ul>(0ULL);
 }
 
-// Clang/ARM wrongly advertises __ARM_FEATURE_FMA even when it's not available,
-// then implements a slow software scalar fallback calling fmaf()!
-// Filed LLVM bug:
-//     https://llvm.org/bugs/show_bug.cgi?id=27216
-#if (defined __ARM_FEATURE_FMA) && !(EIGEN_COMP_CLANG && EIGEN_ARCH_ARM)
-// See bug 936.
-// FMA is available on VFPv4 i.e. when compiling with -mfpu=neon-vfpv4.
-// FMA is a true fused multiply-add i.e. only 1 rounding at the end, no intermediate rounding.
-// MLA is not fused i.e. does 2 roundings.
-// In addition to giving better accuracy, FMA also gives better performance here on a Krait (Nexus 4):
-// MLA: 10 GFlop/s ; FMA: 12 GFlops/s.
+
+#ifdef __ARM_FEATURE_FMA
 template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)
 { return vfmaq_f32(c,a,b); }
 template<> EIGEN_STRONG_INLINE Packet2f pmadd(const Packet2f& a, const Packet2f& b, const Packet2f& c)
@@ -1028,25 +1019,7 @@ template<> EIGEN_STRONG_INLINE Packet2f pmadd(const Packet2f& a, const Packet2f&
 #else
 template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)
 {
-#if EIGEN_COMP_CLANG && EIGEN_ARCH_ARM
-  // Clang/ARM will replace VMLA by VMUL+VADD at least for some values of -mcpu,
-  // at least -mcpu=cortex-a8 and -mcpu=cortex-a7. Since the former is the default on
-  // -march=armv7-a, that is a very common case.
-  // See e.g. this thread:
-  //     http://lists.llvm.org/pipermail/llvm-dev/2013-December/068806.html
-  // Filed LLVM bug:
-  //     https://llvm.org/bugs/show_bug.cgi?id=27219
-  Packet4f r = c;
-  asm volatile(
-    "vmla.f32 %q[r], %q[a], %q[b]"
-    : [r] "+w" (r)
-    : [a] "w" (a),
-      [b] "w" (b)
-    : );
-  return r;
-#else
   return vmlaq_f32(c,a,b);
-#endif
 }
 template<> EIGEN_STRONG_INLINE Packet2f pmadd(const Packet2f& a, const Packet2f& b, const Packet2f& c)
 {
author	Benoit Jacob <benoitjacob@google.com>	2020-09-15 11:07:57 -0400
committer	Benoit Jacob <benoitjacob@google.com>	2020-09-15 20:54:14 -0400
commit	cc0c38ace87f9b77a21b2ad1b20b0c4f97b24719 (patch)
tree	2c918434c012e4e43d7a4ac68bf60eecdbb08eea /Eigen/src/Core/arch/NEON/PacketMath.h
parent	bb56a62582929d4b3b0a73e49d19909b6b319f79 (diff)