From a4c266f8271aa5c53ae0eaac1399a8ccd85259b5 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Tue, 23 Aug 2016 14:23:08 +0200
Subject: Factorize the 4 copies of tanh implementations, make numext::tanh
 consistent with array::tanh, enable fast tanh in fast-math mode only.

---
 Eigen/src/Core/MathFunctionsImpl.h | 74 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 Eigen/src/Core/MathFunctionsImpl.h

(limited to 'Eigen/src/Core/MathFunctionsImpl.h')
diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h
new file mode 100644
index 000000000..a9009a3ef
--- /dev/null
+++ b/Eigen/src/Core/MathFunctionsImpl.h
@@ -0,0 +1,74 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com)
+// Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MATHFUNCTIONSIMPL_H
+#define EIGEN_MATHFUNCTIONSIMPL_H
+
+namespace Eigen {
+
+namespace internal {
+
+/** \internal \returns the hyperbolic tan of \a a (coeff-wise)
+    Doesn't do anything fancy, just a 13/6-degree rational interpolant which
+    is accurate up to a couple of ulp in the range [-9, 9], outside of which
+    the tanh(x) = +/-1.
+
+    This implementation works on both scalars and packets.
+*/
+template<typename T>
+EIGEN_DONT_INLINE T generic_fast_tanh_float(const T& a_x)
+{
+  // Clamp the inputs to the range [-9, 9] since anything outside
+  // this range is +/-1.0f in single-precision.
+  const T plus_9 = pset1<T>(9.f);
+  const T minus_9 = pset1<T>(-9.f);
+  const T x = pmax(minus_9, pmin(plus_9, a_x));
+
+  // The monomial coefficients of the numerator polynomial (odd).
+  const T alpha_1 = pset1<T>(4.89352455891786e-03);
+  const T alpha_3 = pset1<T>(6.37261928875436e-04);
+  const T alpha_5 = pset1<T>(1.48572235717979e-05);
+  const T alpha_7 = pset1<T>(5.12229709037114e-08);
+  const T alpha_9 = pset1<T>(-8.60467152213735e-11);
+  const T alpha_11 = pset1<T>(2.00018790482477e-13);
+  const T alpha_13 = pset1<T>(-2.76076847742355e-16);
+
+  // The monomial coefficients of the denominator polynomial (even).
+  const T beta_0 = pset1<T>(4.89352518554385e-03);
+  const T beta_2 = pset1<T>(2.26843463243900e-03);
+  const T beta_4 = pset1<T>(1.18534705686654e-04);
+  const T beta_6 = pset1<T>(1.19825839466702e-06);
+
+  // Since the polynomials are odd/even, we need x^2.
+  const T x2 = pmul(x, x);
+
+  // Evaluate the numerator polynomial p.
+  T p = pmadd(x2, alpha_13, alpha_11);
+  p = pmadd(x2, p, alpha_9);
+  p = pmadd(x2, p, alpha_7);
+  p = pmadd(x2, p, alpha_5);
+  p = pmadd(x2, p, alpha_3);
+  p = pmadd(x2, p, alpha_1);
+  p = pmul(x, p);
+
+  // Evaluate the denominator polynomial p.
+  T q = pmadd(x2, beta_6, beta_4);
+  q = pmadd(x2, q, beta_2);
+  q = pmadd(x2, q, beta_0);
+
+  // Divide the numerator by the denominator.
+  return pdiv(p, q);
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_MATHFUNCTIONSIMPL_H
-- 
cgit v1.2.3


From fd9caa1bc2821af0c41a2658294827656d234284 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Sun, 28 Aug 2016 22:45:56 +0200
Subject: bug #1282: fix implicit double to float conversion warning

---
 Eigen/src/Core/MathFunctionsImpl.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'Eigen/src/Core/MathFunctionsImpl.h')

diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h
index a9009a3ef..9aaefe22e 100644
--- a/Eigen/src/Core/MathFunctionsImpl.h
+++ b/Eigen/src/Core/MathFunctionsImpl.h
@@ -32,19 +32,19 @@ EIGEN_DONT_INLINE T generic_fast_tanh_float(const T& a_x)
   const T x = pmax(minus_9, pmin(plus_9, a_x));
 
   // The monomial coefficients of the numerator polynomial (odd).
-  const T alpha_1 = pset1<T>(4.89352455891786e-03);
-  const T alpha_3 = pset1<T>(6.37261928875436e-04);
-  const T alpha_5 = pset1<T>(1.48572235717979e-05);
-  const T alpha_7 = pset1<T>(5.12229709037114e-08);
-  const T alpha_9 = pset1<T>(-8.60467152213735e-11);
-  const T alpha_11 = pset1<T>(2.00018790482477e-13);
-  const T alpha_13 = pset1<T>(-2.76076847742355e-16);
+  const T alpha_1 = pset1<T>(4.89352455891786e-03f);
+  const T alpha_3 = pset1<T>(6.37261928875436e-04f);
+  const T alpha_5 = pset1<T>(1.48572235717979e-05f);
+  const T alpha_7 = pset1<T>(5.12229709037114e-08f);
+  const T alpha_9 = pset1<T>(-8.60467152213735e-11f);
+  const T alpha_11 = pset1<T>(2.00018790482477e-13f);
+  const T alpha_13 = pset1<T>(-2.76076847742355e-16f);
 
   // The monomial coefficients of the denominator polynomial (even).
-  const T beta_0 = pset1<T>(4.89352518554385e-03);
-  const T beta_2 = pset1<T>(2.26843463243900e-03);
-  const T beta_4 = pset1<T>(1.18534705686654e-04);
-  const T beta_6 = pset1<T>(1.19825839466702e-06);
+  const T beta_0 = pset1<T>(4.89352518554385e-03f);
+  const T beta_2 = pset1<T>(2.26843463243900e-03f);
+  const T beta_4 = pset1<T>(1.18534705686654e-04f);
+  const T beta_6 = pset1<T>(1.19825839466702e-06f);
 
   // Since the polynomials are odd/even, we need x^2.
   const T x2 = pmul(x, x);
-- 
cgit v1.2.3


From 68e803a26ea37b8895f0bd45f7bfaa47c375d890 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Tue, 30 Aug 2016 09:21:57 +0200
Subject: Fix warning

---
 Eigen/src/Core/MathFunctionsImpl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Eigen/src/Core/MathFunctionsImpl.h')

diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h
index 9aaefe22e..0c77ee003 100644
--- a/Eigen/src/Core/MathFunctionsImpl.h
+++ b/Eigen/src/Core/MathFunctionsImpl.h
@@ -23,7 +23,7 @@ namespace internal {
     This implementation works on both scalars and packets.
 */
 template<typename T>
-EIGEN_DONT_INLINE T generic_fast_tanh_float(const T& a_x)
+T generic_fast_tanh_float(const T& a_x)
 {
   // Clamp the inputs to the range [-9, 9] since anything outside
   // this range is +/-1.0f in single-precision.
-- 
cgit v1.2.3


From 66cbabafed7957a7f6c03b34df854149233de596 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Thu, 22 Sep 2016 11:18:52 +0200
Subject: Add a note regarding gcc bug #72867

---
 Eigen/src/Core/MathFunctionsImpl.h | 8 ++++++--
 test/packetmath.cpp                | 1 +
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'Eigen/src/Core/MathFunctionsImpl.h')

diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h
index 0c77ee003..3c9ef22fa 100644
--- a/Eigen/src/Core/MathFunctionsImpl.h
+++ b/Eigen/src/Core/MathFunctionsImpl.h
@@ -29,8 +29,12 @@ T generic_fast_tanh_float(const T& a_x)
   // this range is +/-1.0f in single-precision.
   const T plus_9 = pset1<T>(9.f);
   const T minus_9 = pset1<T>(-9.f);
-  const T x = pmax(minus_9, pmin(plus_9, a_x));
-
+  // NOTE GCC prior to 6.3 might improperly optimize this max/min
+  //      step such that if a_x is nan, x will be either 9 or -9,
+  //      and tanh will return 1 or -1 instead of nan.
+  //      This is supposed to be fixed in gcc6.3,
+  //      see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867
+  const T x = pmax(minus_9,pmin(plus_9,a_x));
   // The monomial coefficients of the numerator polynomial (odd).
   const T alpha_1 = pset1<T>(4.89352455891786e-03f);
   const T alpha_3 = pset1<T>(6.37261928875436e-04f);
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index 77514d8a0..1394d9f2b 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -365,6 +365,7 @@ template<typename Scalar> void packetmath_real()
   }
 
   if (PacketTraits::HasTanh) {
+    // NOTE this test migh fail with GCC prior to 6.3, see MathFunctionsImpl.h for details.
     data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
     packet_helper<internal::packet_traits<Scalar>::HasTanh,Packet> h;
     h.store(data2, internal::ptanh(h.load(data1)));
-- 
cgit v1.2.3