aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/NEON/PacketMath.h
diff options
context:
space:
mode:
authorGravatar Benoit Jacob <benoitjacob@google.com>2020-09-15 11:07:57 -0400
committerGravatar Benoit Jacob <benoitjacob@google.com>2020-09-15 20:54:14 -0400
commitcc0c38ace87f9b77a21b2ad1b20b0c4f97b24719 (patch)
tree2c918434c012e4e43d7a4ac68bf60eecdbb08eea /Eigen/src/Core/arch/NEON/PacketMath.h
parentbb56a62582929d4b3b0a73e49d19909b6b319f79 (diff)
Remove old Clang compiler bug work-arounds. The two LLVM bugs referenced in the comments here have long been fixed. The workarounds were now detrimental because (1) they prevented using fused mul-add on Clang/ARM32 and (2) the unnecessary 'volatile' in 'asm volatile' prevented legitimate reordering by the compiler.
Diffstat (limited to 'Eigen/src/Core/arch/NEON/PacketMath.h')
-rw-r--r--Eigen/src/Core/arch/NEON/PacketMath.h31
1 files changed, 2 insertions, 29 deletions
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index 530adfeec..463ae58ad 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -1010,17 +1010,8 @@ template<> EIGEN_STRONG_INLINE Packet2ul pdiv<Packet2ul>(const Packet2ul& /*a*/,
return pset1<Packet2ul>(0ULL);
}
-// Clang/ARM wrongly advertises __ARM_FEATURE_FMA even when it's not available,
-// then implements a slow software scalar fallback calling fmaf()!
-// Filed LLVM bug:
-// https://llvm.org/bugs/show_bug.cgi?id=27216
-#if (defined __ARM_FEATURE_FMA) && !(EIGEN_COMP_CLANG && EIGEN_ARCH_ARM)
-// See bug 936.
-// FMA is available on VFPv4 i.e. when compiling with -mfpu=neon-vfpv4.
-// FMA is a true fused multiply-add i.e. only 1 rounding at the end, no intermediate rounding.
-// MLA is not fused i.e. does 2 roundings.
-// In addition to giving better accuracy, FMA also gives better performance here on a Krait (Nexus 4):
-// MLA: 10 GFlop/s ; FMA: 12 GFlops/s.
+
+#ifdef __ARM_FEATURE_FMA
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)
{ return vfmaq_f32(c,a,b); }
template<> EIGEN_STRONG_INLINE Packet2f pmadd(const Packet2f& a, const Packet2f& b, const Packet2f& c)
@@ -1028,25 +1019,7 @@ template<> EIGEN_STRONG_INLINE Packet2f pmadd(const Packet2f& a, const Packet2f&
#else
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)
{
-#if EIGEN_COMP_CLANG && EIGEN_ARCH_ARM
- // Clang/ARM will replace VMLA by VMUL+VADD at least for some values of -mcpu,
- // at least -mcpu=cortex-a8 and -mcpu=cortex-a7. Since the former is the default on
- // -march=armv7-a, that is a very common case.
- // See e.g. this thread:
- // http://lists.llvm.org/pipermail/llvm-dev/2013-December/068806.html
- // Filed LLVM bug:
- // https://llvm.org/bugs/show_bug.cgi?id=27219
- Packet4f r = c;
- asm volatile(
- "vmla.f32 %q[r], %q[a], %q[b]"
- : [r] "+w" (r)
- : [a] "w" (a),
- [b] "w" (b)
- : );
- return r;
-#else
return vmlaq_f32(c,a,b);
-#endif
}
template<> EIGEN_STRONG_INLINE Packet2f pmadd(const Packet2f& a, const Packet2f& b, const Packet2f& c)
{