aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/NEON/PacketMath.h
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2020-12-15 04:06:41 +0000
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2020-12-15 04:06:41 +0000
commit6cee8d347e8a7e8e1a689a3b7de5fe413f3e1103 (patch)
tree751096df7a820ba4c42e8d65cfc7e005dffbdde4 /Eigen/src/Core/arch/NEON/PacketMath.h
parentcf0b5b0344a3bfcf410e95bf22289015a2daf34b (diff)
Add an additional step of Newton-Raphson for `psqrt<double>` on Arm, which otherwise has an error of ~1000 ulps.
Diffstat (limited to 'Eigen/src/Core/arch/NEON/PacketMath.h')
-rw-r--r--Eigen/src/Core/arch/NEON/PacketMath.h3
1 files changed, 2 insertions, 1 deletions
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index 90ffee767..5883eca38 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -3896,7 +3896,8 @@ template<> EIGEN_STRONG_INLINE Packet2d psqrt(const Packet2d& _x){
// Do a single step of Newton's iteration.
//the number 1.5f was set reference to Quake3's fast inverse square root
x = vmulq_f64(x, psub(pset1<Packet2d>(1.5), pmul(half, pmul(x, x))));
- // Do one more Newton's iteration to get more accurate result.
+ // Do two more Newton's iteration to get a result accurate to 1 ulp.
+ x = vmulq_f64(x, psub(pset1<Packet2d>(1.5), pmul(half, pmul(x, x))));
x = vmulq_f64(x, psub(pset1<Packet2d>(1.5), pmul(half, pmul(x, x))));
// Flush results for denormals to zero.
return vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(pmul(_x, x)), denormal_mask));