From 9d6afdeb22d1ccc17a2d97966163c6d8f7651047 Mon Sep 17 00:00:00 2001 From: Hauke Heibel Date: Thu, 1 Apr 2010 15:10:52 +0200 Subject: ei_psqrt fix for zero input --- Eigen/src/Core/arch/SSE/MathFunctions.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'Eigen') diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h index 3c0020248..99662eb6d 100644 --- a/Eigen/src/Core/arch/SSE/MathFunctions.h +++ b/Eigen/src/Core/arch/SSE/MathFunctions.h @@ -369,10 +369,14 @@ static EIGEN_DONT_INLINE EIGEN_UNUSED Packet4f ei_pcos(Packet4f x) // For detail see here: http://www.beyond3d.com/content/articles/8/ static EIGEN_UNUSED Packet4f ei_psqrt(Packet4f _x) { - Packet4f half = ei_pmul(_x, ei_pset1(.5f)); - Packet4f x = _mm_rsqrt_ps(_x); - x = ei_pmul(x, ei_psub(ei_pset1(1.5f), ei_pmul(half, ei_pmul(x,x)))); - return ei_pmul(_x,x); + Packet4f half = ei_pmul(_x, ei_pset1(.5f)); + + /* select only the inverse sqrt of non-zero inputs */ + Packet4f non_zero_mask = _mm_cmpgt_ps(_x, ei_pset1(std::numeric_limits::epsilon())); + Packet4f x = _mm_and_ps(non_zero_mask, _mm_rsqrt_ps(_x)); + + x = ei_pmul(x, ei_psub(ei_pset1(1.5f), ei_pmul(half, ei_pmul(x,x)))); + return ei_pmul(_x,x); } #endif // EIGEN_MATH_FUNCTIONS_SSE_H -- cgit v1.2.3