aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/SSE/MathFunctions.h
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2016-10-04 15:08:41 -0700
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2016-10-04 15:08:41 -0700
commit765615609d40f485c718d60b76b35a277c36d5ce (patch)
tree1657e741a5f02111eb1d2a48f4d07d8052470fea /Eigen/src/Core/arch/SSE/MathFunctions.h
parent3ed67cb0bb4af65fbf243df598604a8c7630bf7d (diff)
Update comment for fast sqrt.
Diffstat (limited to 'Eigen/src/Core/arch/SSE/MathFunctions.h')
-rw-r--r--Eigen/src/Core/arch/SSE/MathFunctions.h9
1 files changed, 7 insertions, 2 deletions
diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h
index 2c34a869d..7b5f948e1 100644
--- a/Eigen/src/Core/arch/SSE/MathFunctions.h
+++ b/Eigen/src/Core/arch/SSE/MathFunctions.h
@@ -444,9 +444,14 @@ Packet4f pcos<Packet4f>(const Packet4f& _x)
#if EIGEN_FAST_MATH
-// This is based on Quake3's fast inverse square root.
+// Functions for sqrt.
+// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
+// of Newton's method, at a cost of 1-2 bits of precision as opposed to the
+// exact solution. It does not handle +inf, or denormalized numbers correctly.
+// The main advantage of this approach is not just speed, but also the fact that
+// it can be inlined and pipelined with other computations, further reducing its
+// effective latency. This is similar to Quake3's fast inverse square root.
// For detail see here: http://www.beyond3d.com/content/articles/8/
-// It lacks 1 (or 2 bits in some rare cases) of precision, and does not handle negative, +inf, or denormalized numbers correctly.
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f psqrt<Packet4f>(const Packet4f& _x)
{