From 73a8d572f5d2e7020b71026d48bfdf99decf8d5b Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Thu, 12 Dec 2019 19:34:25 +0000 Subject: Clamp tanh approximation outside [-c, c] where c is the smallest value where the approximation is exactly +/-1. Without FMA, c = 7.90531110763549805, with FMA c = 7.99881172180175781. --- Eigen/src/Core/MathFunctionsImpl.h | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'Eigen/src/Core/MathFunctionsImpl.h') diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h index aff3967ca..9ace5f32d 100644 --- a/Eigen/src/Core/MathFunctionsImpl.h +++ b/Eigen/src/Core/MathFunctionsImpl.h @@ -17,19 +17,25 @@ namespace internal { /** \internal \returns the hyperbolic tan of \a a (coeff-wise) Doesn't do anything fancy, just a 13/6-degree rational interpolant which - is accurate up to a couple of ulp in the range [-9, 9], outside of which - the tanh(x) = +/-1. + is accurate up to a couple of ulps in the (approximate) range [-8, 8], + outside of which tanh(x) = +/-1 in single precision. This is done by + Clamp the inputs to the range [-c, c]. The value c is chosen as the smallest + value where the approximation evaluates to exactly 1. This implementation works on both scalars and packets. */ template T generic_fast_tanh_float(const T& a_x) { - // Clamp the inputs to the range [-9, 9] since anything outside - // this range is +/-1.0f in single-precision. - const T plus_9 = pset1(9.f); - const T minus_9 = pset1(-9.f); - const T x = pmax(pmin(a_x, plus_9), minus_9); + // Clamp the inputs to the range [-c, c] +#ifdef EIGEN_VECTORIZE_FMA + const T plus_clamp = pset1(7.99881172180175781); + const T minus_clamp = pset1(-7.99881172180175781); +#else + const T plus_clamp = pset1(7.90531110763549805); + const T minus_clamp = pset1(-7.90531110763549805); +#endif + const T x = pmax(pmin(a_x, plus_clamp), minus_clamp); // The monomial coefficients of the numerator polynomial (odd). const T alpha_1 = pset1(4.89352455891786e-03f); const T alpha_3 = pset1(6.37261928875436e-04f); -- cgit v1.2.3