aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/NEON/PacketMath.h
diff options
context:
space:
mode:
authorGravatar Antonio Sanchez <cantonios@google.com>2020-06-23 08:58:34 -0700
committerGravatar Antonio Sanchez <cantonios@google.com>2020-06-24 14:03:35 -0700
commit7222f0b6b58759b2207e6ec3224adb246fd23349 (patch)
tree28ac31a00f7b1ed717f915ee10c9ab07f0512441 /Eigen/src/Core/arch/NEON/PacketMath.h
parent14f84978e8a96a8aea412fb418f86da01e52eb9b (diff)
Fix packetmath_1 float tests for arm/aarch64.
Added missing `pmadd<Packet2f>` for NEON. This leads to significant improvement in precision than previous `pmul+padd`, which was causing the `pcos` tests to fail. Also added an approx test with `std::sin`/`std::cos` since otherwise returning any `a^2+b^2=1` would pass. Modified `log(denorm)` tests. Denorms are not always supported by all systems (returns `::min`), are always flushed to zero on 32-bit arm, and configurably flush to zero on sse/avx/aarch64. This leads to inconsistent results across different systems (i.e. `-inf` vs `nan`). Added a check for existence and exclude ARM. Removed logistic exactness test, since scalar and vectorized versions follow different code-paths due to differences in `pexp` and `pmadd`, which result in slightly different values. For example, exactness always fails on arm, aarch64, and altivec.
Diffstat (limited to 'Eigen/src/Core/arch/NEON/PacketMath.h')
-rw-r--r--Eigen/src/Core/arch/NEON/PacketMath.h6
1 files changed, 6 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index 80ccd261b..2c4b5bfff 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -1023,6 +1023,8 @@ template<> EIGEN_STRONG_INLINE Packet2ul pdiv<Packet2ul>(const Packet2ul& /*a*/,
// MLA: 10 GFlop/s ; FMA: 12 GFlops/s.
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)
{ return vfmaq_f32(c,a,b); }
+template<> EIGEN_STRONG_INLINE Packet2f pmadd(const Packet2f& a, const Packet2f& b, const Packet2f& c)
+{ return vfma_f32(c,a,b); }
#else
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)
{
@@ -1046,6 +1048,10 @@ template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f&
return vmlaq_f32(c,a,b);
#endif
}
+template<> EIGEN_STRONG_INLINE Packet2f pmadd(const Packet2f& a, const Packet2f& b, const Packet2f& c)
+{
+ return vmla_f32(c,a,b);
+}
#endif
// No FMA instruction for int, so use MLA unconditionally.