From 7222f0b6b58759b2207e6ec3224adb246fd23349 Mon Sep 17 00:00:00 2001 From: Antonio Sanchez Date: Tue, 23 Jun 2020 08:58:34 -0700 Subject: Fix packetmath_1 float tests for arm/aarch64. Added missing `pmadd` for NEON. This leads to significant improvement in precision than previous `pmul+padd`, which was causing the `pcos` tests to fail. Also added an approx test with `std::sin`/`std::cos` since otherwise returning any `a^2+b^2=1` would pass. Modified `log(denorm)` tests. Denorms are not always supported by all systems (returns `::min`), are always flushed to zero on 32-bit arm, and configurably flush to zero on sse/avx/aarch64. This leads to inconsistent results across different systems (i.e. `-inf` vs `nan`). Added a check for existence and exclude ARM. Removed logistic exactness test, since scalar and vectorized versions follow different code-paths due to differences in `pexp` and `pmadd`, which result in slightly different values. For example, exactness always fails on arm, aarch64, and altivec. --- Eigen/src/Core/arch/NEON/PacketMath.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'Eigen') diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 80ccd261b..2c4b5bfff 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -1023,6 +1023,8 @@ template<> EIGEN_STRONG_INLINE Packet2ul pdiv(const Packet2ul& /*a*/, // MLA: 10 GFlop/s ; FMA: 12 GFlops/s. template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vfmaq_f32(c,a,b); } +template<> EIGEN_STRONG_INLINE Packet2f pmadd(const Packet2f& a, const Packet2f& b, const Packet2f& c) +{ return vfma_f32(c,a,b); } #else template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { @@ -1046,6 +1048,10 @@ template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& return vmlaq_f32(c,a,b); #endif } +template<> EIGEN_STRONG_INLINE Packet2f pmadd(const Packet2f& a, const Packet2f& b, const Packet2f& c) +{ + return vmla_f32(c,a,b); +} #endif // No FMA instruction for int, so use MLA unconditionally. -- cgit v1.2.3