diff options
author | Gael Guennebaud <g.gael@free.fr> | 2013-08-19 16:02:27 +0200 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2013-08-19 16:02:27 +0200 |
commit | d4dd6aaed2c70b5e32541e96b4864b90dc07c614 (patch) | |
tree | 1e422e8a2ec61c29beda15eecec10414f45ea618 /Eigen/src/Core/arch/SSE/MathFunctions.h | |
parent | d3635b08da3b41c7e5201e45edd1882ddb158b35 (diff) |
Fix bug #642: add vectorization of sqrt for doubles, and make sqrt really safe if EIGEN_FAST_MATH is disabled
Diffstat (limited to 'Eigen/src/Core/arch/SSE/MathFunctions.h')
-rw-r--r-- | Eigen/src/Core/arch/SSE/MathFunctions.h | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h index 3376a984e..7a0aee658 100644 --- a/Eigen/src/Core/arch/SSE/MathFunctions.h +++ b/Eigen/src/Core/arch/SSE/MathFunctions.h @@ -442,8 +442,11 @@ Packet4f pcos<Packet4f>(const Packet4f& _x) return _mm_xor_ps(y, sign_bit); } +#if EIGEN_FAST_MATH + // This is based on Quake3's fast inverse square root. // For detail see here: http://www.beyond3d.com/content/articles/8/ +// It lacks 1 (or 2 bits in some rare cases) of precision, and does not handle negative, +inf, or denormalized numbers correctly. template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f psqrt<Packet4f>(const Packet4f& _x) { @@ -457,6 +460,14 @@ Packet4f psqrt<Packet4f>(const Packet4f& _x) return pmul(_x,x); } +#else + +template<> EIGEN_STRONG_INLINE Packet4f psqrt<Packet4f>(const Packet4f& x) { return _mm_sqrt_ps(x); } + +#endif + +template<> EIGEN_STRONG_INLINE Packet4f psqrt<Packet2d>(const Packet2d& x) { return _mm_sqrt_pd(x); } + } // end namespace internal } // end namespace Eigen |