aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/SSE/MathFunctions.h
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2013-08-19 16:02:27 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2013-08-19 16:02:27 +0200
commitd4dd6aaed2c70b5e32541e96b4864b90dc07c614 (patch)
tree1e422e8a2ec61c29beda15eecec10414f45ea618 /Eigen/src/Core/arch/SSE/MathFunctions.h
parentd3635b08da3b41c7e5201e45edd1882ddb158b35 (diff)
Fix bug #642: add vectorization of sqrt for doubles, and make sqrt really safe if EIGEN_FAST_MATH is disabled
Diffstat (limited to 'Eigen/src/Core/arch/SSE/MathFunctions.h')
-rw-r--r--Eigen/src/Core/arch/SSE/MathFunctions.h11
1 files changed, 11 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h
index 3376a984e..7a0aee658 100644
--- a/Eigen/src/Core/arch/SSE/MathFunctions.h
+++ b/Eigen/src/Core/arch/SSE/MathFunctions.h
@@ -442,8 +442,11 @@ Packet4f pcos<Packet4f>(const Packet4f& _x)
return _mm_xor_ps(y, sign_bit);
}
+#if EIGEN_FAST_MATH
+
// This is based on Quake3's fast inverse square root.
// For detail see here: http://www.beyond3d.com/content/articles/8/
+// It lacks 1 (or 2 bits in some rare cases) of precision, and does not handle negative, +inf, or denormalized numbers correctly.
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f psqrt<Packet4f>(const Packet4f& _x)
{
@@ -457,6 +460,14 @@ Packet4f psqrt<Packet4f>(const Packet4f& _x)
return pmul(_x,x);
}
+#else
+
+template<> EIGEN_STRONG_INLINE Packet4f psqrt<Packet4f>(const Packet4f& x) { return _mm_sqrt_ps(x); }
+
+#endif
+
+template<> EIGEN_STRONG_INLINE Packet4f psqrt<Packet2d>(const Packet2d& x) { return _mm_sqrt_pd(x); }
+
} // end namespace internal
} // end namespace Eigen