aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/MathFunctionsImpl.h
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-12-12 19:34:25 +0000
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-12-12 19:34:25 +0000
commit73a8d572f5d2e7020b71026d48bfdf99decf8d5b (patch)
treebd3033a54199807841448ab3a1633ee899720f17 /Eigen/src/Core/MathFunctionsImpl.h
parent88062b7feddcf3e8354c6857753523c2a80d864f (diff)
Clamp tanh approximation outside [-c, c] where c is the smallest value where the approximation is exactly +/-1. Without FMA, c = 7.90531110763549805, with FMA c = 7.99881172180175781.
Diffstat (limited to 'Eigen/src/Core/MathFunctionsImpl.h')
-rw-r--r--Eigen/src/Core/MathFunctionsImpl.h20
1 files changed, 13 insertions, 7 deletions
diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h
index aff3967ca..9ace5f32d 100644
--- a/Eigen/src/Core/MathFunctionsImpl.h
+++ b/Eigen/src/Core/MathFunctionsImpl.h
@@ -17,19 +17,25 @@ namespace internal {
/** \internal \returns the hyperbolic tan of \a a (coeff-wise)
Doesn't do anything fancy, just a 13/6-degree rational interpolant which
- is accurate up to a couple of ulp in the range [-9, 9], outside of which
- the tanh(x) = +/-1.
+ is accurate up to a couple of ulps in the (approximate) range [-8, 8],
+ outside of which tanh(x) = +/-1 in single precision. This is done by
+ Clamp the inputs to the range [-c, c]. The value c is chosen as the smallest
+ value where the approximation evaluates to exactly 1.
This implementation works on both scalars and packets.
*/
template<typename T>
T generic_fast_tanh_float(const T& a_x)
{
- // Clamp the inputs to the range [-9, 9] since anything outside
- // this range is +/-1.0f in single-precision.
- const T plus_9 = pset1<T>(9.f);
- const T minus_9 = pset1<T>(-9.f);
- const T x = pmax(pmin(a_x, plus_9), minus_9);
+ // Clamp the inputs to the range [-c, c]
+#ifdef EIGEN_VECTORIZE_FMA
+ const T plus_clamp = pset1<T>(7.99881172180175781);
+ const T minus_clamp = pset1<T>(-7.99881172180175781);
+#else
+ const T plus_clamp = pset1<T>(7.90531110763549805);
+ const T minus_clamp = pset1<T>(-7.90531110763549805);
+#endif
+ const T x = pmax(pmin(a_x, plus_clamp), minus_clamp);
// The monomial coefficients of the numerator polynomial (odd).
const T alpha_1 = pset1<T>(4.89352455891786e-03f);
const T alpha_3 = pset1<T>(6.37261928875436e-04f);