diff options
author | Rasmus Munk Larsen <rmlarsen@google.com> | 2020-12-15 04:06:41 +0000 |
---|---|---|
committer | Rasmus Munk Larsen <rmlarsen@google.com> | 2020-12-15 04:06:41 +0000 |
commit | 6cee8d347e8a7e8e1a689a3b7de5fe413f3e1103 (patch) | |
tree | 751096df7a820ba4c42e8d65cfc7e005dffbdde4 /Eigen/src/Core/arch/NEON/PacketMath.h | |
parent | cf0b5b0344a3bfcf410e95bf22289015a2daf34b (diff) |
Add an additional step of Newton-Raphson for `psqrt<double>` on Arm, which otherwise has an error of ~1000 ulps.
Diffstat (limited to 'Eigen/src/Core/arch/NEON/PacketMath.h')
-rw-r--r-- | Eigen/src/Core/arch/NEON/PacketMath.h | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 90ffee767..5883eca38 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -3896,7 +3896,8 @@ template<> EIGEN_STRONG_INLINE Packet2d psqrt(const Packet2d& _x){ // Do a single step of Newton's iteration. //the number 1.5f was set reference to Quake3's fast inverse square root x = vmulq_f64(x, psub(pset1<Packet2d>(1.5), pmul(half, pmul(x, x)))); - // Do one more Newton's iteration to get more accurate result. + // Do two more Newton's iteration to get a result accurate to 1 ulp. + x = vmulq_f64(x, psub(pset1<Packet2d>(1.5), pmul(half, pmul(x, x)))); x = vmulq_f64(x, psub(pset1<Packet2d>(1.5), pmul(half, pmul(x, x)))); // Flush results for denormals to zero. return vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(pmul(_x, x)), denormal_mask)); |