aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2016-08-23 14:23:08 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2016-08-23 14:23:08 +0200
commita4c266f8271aa5c53ae0eaac1399a8ccd85259b5 (patch)
tree2fa3b037d5727cf5fdbf833a030fffa49d7912e2 /Eigen/src
parent82147cefffdf5ed445f79a8cdb57dd71012948bd (diff)
Factorize the 4 copies of tanh implementations, make numext::tanh consistent with array::tanh, enable fast tanh in fast-math mode only.
Diffstat (limited to 'Eigen/src')
-rw-r--r--Eigen/src/Core/MathFunctions.h7
-rw-r--r--Eigen/src/Core/MathFunctionsImpl.h74
-rw-r--r--Eigen/src/Core/arch/AVX/MathFunctions.h46
-rw-r--r--Eigen/src/Core/arch/SSE/MathFunctions.h46
-rw-r--r--Eigen/src/Core/functors/UnaryFunctors.h108
5 files changed, 88 insertions, 193 deletions
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index ac7ef6d23..79ae797b5 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -787,6 +787,8 @@ template<typename T> EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex<T>&
template<typename T> EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x);
template<typename T> EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x);
+template<typename T> T generic_fast_tanh_float(const T& a_x);
+
} // end namespace internal
/****************************************************************************
@@ -1178,6 +1180,11 @@ T tanh(const T &x) {
return tanh(x);
}
+#if (!defined(__CUDACC__)) && EIGEN_FAST_MATH
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float tanh(float x) { return internal::generic_fast_tanh_float(x); }
+#endif
+
#ifdef __CUDACC__
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float tanh(const float &x) { return ::tanhf(x); }
diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h
new file mode 100644
index 000000000..a9009a3ef
--- /dev/null
+++ b/Eigen/src/Core/MathFunctionsImpl.h
@@ -0,0 +1,74 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com)
+// Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MATHFUNCTIONSIMPL_H
+#define EIGEN_MATHFUNCTIONSIMPL_H
+
+namespace Eigen {
+
+namespace internal {
+
+/** \internal \returns the hyperbolic tan of \a a (coeff-wise)
+ Doesn't do anything fancy, just a 13/6-degree rational interpolant which
+ is accurate up to a couple of ulp in the range [-9, 9], outside of which
+ the tanh(x) = +/-1.
+
+ This implementation works on both scalars and packets.
+*/
+template<typename T>
+EIGEN_DONT_INLINE T generic_fast_tanh_float(const T& a_x)
+{
+ // Clamp the inputs to the range [-9, 9] since anything outside
+ // this range is +/-1.0f in single-precision.
+ const T plus_9 = pset1<T>(9.f);
+ const T minus_9 = pset1<T>(-9.f);
+ const T x = pmax(minus_9, pmin(plus_9, a_x));
+
+ // The monomial coefficients of the numerator polynomial (odd).
+ const T alpha_1 = pset1<T>(4.89352455891786e-03);
+ const T alpha_3 = pset1<T>(6.37261928875436e-04);
+ const T alpha_5 = pset1<T>(1.48572235717979e-05);
+ const T alpha_7 = pset1<T>(5.12229709037114e-08);
+ const T alpha_9 = pset1<T>(-8.60467152213735e-11);
+ const T alpha_11 = pset1<T>(2.00018790482477e-13);
+ const T alpha_13 = pset1<T>(-2.76076847742355e-16);
+
+ // The monomial coefficients of the denominator polynomial (even).
+ const T beta_0 = pset1<T>(4.89352518554385e-03);
+ const T beta_2 = pset1<T>(2.26843463243900e-03);
+ const T beta_4 = pset1<T>(1.18534705686654e-04);
+ const T beta_6 = pset1<T>(1.19825839466702e-06);
+
+ // Since the polynomials are odd/even, we need x^2.
+ const T x2 = pmul(x, x);
+
+ // Evaluate the numerator polynomial p.
+ T p = pmadd(x2, alpha_13, alpha_11);
+ p = pmadd(x2, p, alpha_9);
+ p = pmadd(x2, p, alpha_7);
+ p = pmadd(x2, p, alpha_5);
+ p = pmadd(x2, p, alpha_3);
+ p = pmadd(x2, p, alpha_1);
+ p = pmul(x, p);
+
+ // Evaluate the denominator polynomial p.
+ T q = pmadd(x2, beta_6, beta_4);
+ q = pmadd(x2, q, beta_2);
+ q = pmadd(x2, q, beta_0);
+
+ // Divide the numerator by the denominator.
+ return pdiv(p, q);
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_MATHFUNCTIONSIMPL_H
diff --git a/Eigen/src/Core/arch/AVX/MathFunctions.h b/Eigen/src/Core/arch/AVX/MathFunctions.h
index 98d8e029f..d21ec39dd 100644
--- a/Eigen/src/Core/arch/AVX/MathFunctions.h
+++ b/Eigen/src/Core/arch/AVX/MathFunctions.h
@@ -266,52 +266,10 @@ pexp<Packet8f>(const Packet8f& _x) {
}
// Hyperbolic Tangent function.
-// Doesn't do anything fancy, just a 13/6-degree rational interpolant which
-// is accurate up to a couple of ulp in the range [-9, 9], outside of which the
-// fl(tanh(x)) = +/-1.
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
-ptanh<Packet8f>(const Packet8f& _x) {
- // Clamp the inputs to the range [-9, 9] since anything outside
- // this range is +/-1.0f in single-precision.
- _EIGEN_DECLARE_CONST_Packet8f(plus_9, 9.0f);
- _EIGEN_DECLARE_CONST_Packet8f(minus_9, -9.0f);
- const Packet8f x = pmax(p8f_minus_9, pmin(p8f_plus_9, _x));
-
- // The monomial coefficients of the numerator polynomial (odd).
- _EIGEN_DECLARE_CONST_Packet8f(alpha_1, 4.89352455891786e-03f);
- _EIGEN_DECLARE_CONST_Packet8f(alpha_3, 6.37261928875436e-04f);
- _EIGEN_DECLARE_CONST_Packet8f(alpha_5, 1.48572235717979e-05f);
- _EIGEN_DECLARE_CONST_Packet8f(alpha_7, 5.12229709037114e-08f);
- _EIGEN_DECLARE_CONST_Packet8f(alpha_9, -8.60467152213735e-11f);
- _EIGEN_DECLARE_CONST_Packet8f(alpha_11, 2.00018790482477e-13f);
- _EIGEN_DECLARE_CONST_Packet8f(alpha_13, -2.76076847742355e-16f);
-
- // The monomial coefficients of the denominator polynomial (even).
- _EIGEN_DECLARE_CONST_Packet8f(beta_0, 4.89352518554385e-03f);
- _EIGEN_DECLARE_CONST_Packet8f(beta_2, 2.26843463243900e-03f);
- _EIGEN_DECLARE_CONST_Packet8f(beta_4, 1.18534705686654e-04f);
- _EIGEN_DECLARE_CONST_Packet8f(beta_6, 1.19825839466702e-06f);
-
- // Since the polynomials are odd/even, we need x^2.
- const Packet8f x2 = pmul(x, x);
-
- // Evaluate the numerator polynomial p.
- Packet8f p = pmadd(x2, p8f_alpha_13, p8f_alpha_11);
- p = pmadd(x2, p, p8f_alpha_9);
- p = pmadd(x2, p, p8f_alpha_7);
- p = pmadd(x2, p, p8f_alpha_5);
- p = pmadd(x2, p, p8f_alpha_3);
- p = pmadd(x2, p, p8f_alpha_1);
- p = pmul(x, p);
-
- // Evaluate the denominator polynomial p.
- Packet8f q = pmadd(x2, p8f_beta_6, p8f_beta_4);
- q = pmadd(x2, q, p8f_beta_2);
- q = pmadd(x2, q, p8f_beta_0);
-
- // Divide the numerator by the denominator.
- return pdiv(p, q);
+ptanh<Packet8f>(const Packet8f& x) {
+ return internal::generic_fast_tanh_float(x);
}
template <>
diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h
index 28f103eeb..ac2fd8103 100644
--- a/Eigen/src/Core/arch/SSE/MathFunctions.h
+++ b/Eigen/src/Core/arch/SSE/MathFunctions.h
@@ -517,52 +517,10 @@ Packet2d prsqrt<Packet2d>(const Packet2d& x) {
}
// Hyperbolic Tangent function.
-// Doesn't do anything fancy, just a 13/6-degree rational interpolant which
-// is accurate up to a couple of ulp in the range [-9, 9], outside of which the
-// fl(tanh(x)) = +/-1.
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
-ptanh<Packet4f>(const Packet4f& _x) {
- // Clamp the inputs to the range [-9, 9] since anything outside
- // this range is +/-1.0f in single-precision.
- _EIGEN_DECLARE_CONST_Packet4f(plus_9, 9.0f);
- _EIGEN_DECLARE_CONST_Packet4f(minus_9, -9.0f);
- const Packet4f x = pmax(p4f_minus_9, pmin(p4f_plus_9, _x));
-
- // The monomial coefficients of the numerator polynomial (odd).
- _EIGEN_DECLARE_CONST_Packet4f(alpha_1, 4.89352455891786e-03f);
- _EIGEN_DECLARE_CONST_Packet4f(alpha_3, 6.37261928875436e-04f);
- _EIGEN_DECLARE_CONST_Packet4f(alpha_5, 1.48572235717979e-05f);
- _EIGEN_DECLARE_CONST_Packet4f(alpha_7, 5.12229709037114e-08f);
- _EIGEN_DECLARE_CONST_Packet4f(alpha_9, -8.60467152213735e-11f);
- _EIGEN_DECLARE_CONST_Packet4f(alpha_11, 2.00018790482477e-13f);
- _EIGEN_DECLARE_CONST_Packet4f(alpha_13, -2.76076847742355e-16f);
-
- // The monomial coefficients of the denominator polynomial (even).
- _EIGEN_DECLARE_CONST_Packet4f(beta_0, 4.89352518554385e-03f);
- _EIGEN_DECLARE_CONST_Packet4f(beta_2, 2.26843463243900e-03f);
- _EIGEN_DECLARE_CONST_Packet4f(beta_4, 1.18534705686654e-04f);
- _EIGEN_DECLARE_CONST_Packet4f(beta_6, 1.19825839466702e-06f);
-
- // Since the polynomials are odd/even, we need x^2.
- const Packet4f x2 = pmul(x, x);
-
- // Evaluate the numerator polynomial p.
- Packet4f p = pmadd(x2, p4f_alpha_13, p4f_alpha_11);
- p = pmadd(x2, p, p4f_alpha_9);
- p = pmadd(x2, p, p4f_alpha_7);
- p = pmadd(x2, p, p4f_alpha_5);
- p = pmadd(x2, p, p4f_alpha_3);
- p = pmadd(x2, p, p4f_alpha_1);
- p = pmul(x, p);
-
- // Evaluate the denominator polynomial p.
- Packet4f q = pmadd(x2, p4f_beta_6, p4f_beta_4);
- q = pmadd(x2, q, p4f_beta_2);
- q = pmadd(x2, q, p4f_beta_0);
-
- // Divide the numerator by the denominator.
- return pdiv(p, q);
+ptanh<Packet4f>(const Packet4f& x) {
+ return internal::generic_fast_tanh_float(x);
}
} // end namespace internal
diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h
index e2f3d869f..2a57a5fd9 100644
--- a/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/Eigen/src/Core/functors/UnaryFunctors.h
@@ -498,113 +498,11 @@ struct functor_traits<scalar_atan_op<Scalar> >
template <typename Scalar>
struct scalar_tanh_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_op)
- EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const {
- /** \internal \returns the hyperbolic tan of \a a (coeff-wise)
- Doesn't do anything fancy, just a 13/6-degree rational interpolant
- which
- is accurate up to a couple of ulp in the range [-9, 9], outside of
- which
- the fl(tanh(x)) = +/-1. */
-
- // Clamp the inputs to the range [-9, 9] since anything outside
- // this range is +/-1.0f in single-precision.
- const Scalar plus_9 = static_cast<Scalar>(9.0);
- const Scalar minus_9 = static_cast<Scalar>(-9.0);
- const Scalar x = numext::maxi(minus_9, numext::mini(plus_9, a));
- // Scalarhe monomial coefficients of the numerator polynomial (odd).
- const Scalar alpha_1 = static_cast<Scalar>(4.89352455891786e-03);
- const Scalar alpha_3 = static_cast<Scalar>(6.37261928875436e-04);
- const Scalar alpha_5 = static_cast<Scalar>(1.48572235717979e-05);
- const Scalar alpha_7 = static_cast<Scalar>(5.12229709037114e-08);
- const Scalar alpha_9 = static_cast<Scalar>(-8.60467152213735e-11);
- const Scalar alpha_11 = static_cast<Scalar>(2.00018790482477e-13);
- const Scalar alpha_13 = static_cast<Scalar>(-2.76076847742355e-16);
- // Scalarhe monomial coefficients of the denominator polynomial (even).
- const Scalar beta_0 = static_cast<Scalar>(4.89352518554385e-03);
- const Scalar beta_2 = static_cast<Scalar>(2.26843463243900e-03);
- const Scalar beta_4 = static_cast<Scalar>(1.18534705686654e-04);
- const Scalar beta_6 = static_cast<Scalar>(1.19825839466702e-06);
- // Since the polynomials are odd/even, we need x^2.
- const Scalar x2 = x * x;
- // Evaluate the numerator polynomial p.
- Scalar p = x2 * alpha_13 + alpha_11;
- p = x2 * p + alpha_9;
- p = x2 * p + alpha_7;
- p = x2 * p + alpha_5;
- p = x2 * p + alpha_3;
- p = x2 * p + alpha_1;
- p = x * p;
- // Evaluate the denominator polynomial p.
- Scalar q = x2 * beta_6 + beta_4;
- q = x2 * q + beta_2;
- q = x2 * q + beta_0;
- // Divide the numerator by the denominator.
- return p / q;
- }
+ EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const { return numext::tanh(a); }
template <typename Packet>
- EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& _x) const {
- /** \internal \returns the hyperbolic tan of \a a (coeff-wise)
- Doesn't do anything fancy, just a 13/6-degree rational interpolant which
- is accurate up to a couple of ulp in the range [-9, 9], outside of which
- the
- fl(tanh(x)) = +/-1. */
-
- // Clamp the inputs to the range [-9, 9] since anything outside
- // this range is +/-1.0f in single-precision.
- const Packet plus_9 = pset1<Packet>(9.0);
- const Packet minus_9 = pset1<Packet>(-9.0);
- const Packet x = pmax(minus_9, pmin(plus_9, _x));
-
- // The monomial coefficients of the numerator polynomial (odd).
- const Packet alpha_1 = pset1<Packet>(4.89352455891786e-03);
- const Packet alpha_3 = pset1<Packet>(6.37261928875436e-04);
- const Packet alpha_5 = pset1<Packet>(1.48572235717979e-05);
- const Packet alpha_7 = pset1<Packet>(5.12229709037114e-08);
- const Packet alpha_9 = pset1<Packet>(-8.60467152213735e-11);
- const Packet alpha_11 = pset1<Packet>(2.00018790482477e-13);
- const Packet alpha_13 = pset1<Packet>(-2.76076847742355e-16);
-
- // The monomial coefficients of the denominator polynomial (even).
- const Packet beta_0 = pset1<Packet>(4.89352518554385e-03);
- const Packet beta_2 = pset1<Packet>(2.26843463243900e-03);
- const Packet beta_4 = pset1<Packet>(1.18534705686654e-04);
- const Packet beta_6 = pset1<Packet>(1.19825839466702e-06);
-
- // Since the polynomials are odd/even, we need x^2.
- const Packet x2 = pmul(x, x);
-
- // Evaluate the numerator polynomial p.
- Packet p = pmadd(x2, alpha_13, alpha_11);
- p = pmadd(x2, p, alpha_9);
- p = pmadd(x2, p, alpha_7);
- p = pmadd(x2, p, alpha_5);
- p = pmadd(x2, p, alpha_3);
- p = pmadd(x2, p, alpha_1);
- p = pmul(x, p);
-
- // Evaluate the denominator polynomial p.
- Packet q = pmadd(x2, beta_6, beta_4);
- q = pmadd(x2, q, beta_2);
- q = pmadd(x2, q, beta_0);
-
- // Divide the numerator by the denominator.
- return pdiv(p, q);
- }
-};
-template <>
-struct scalar_tanh_op<std::complex<double> > {
- EIGEN_DEVICE_FUNC inline const std::complex<double> operator()(
- const std::complex<double>& a) const {
- return numext::tanh(a);
- }
-};
-template <>
-struct scalar_tanh_op<std::complex<float> > {
- EIGEN_DEVICE_FUNC inline const std::complex<float> operator()(
- const std::complex<float>& a) const {
- return numext::tanh(a);
- }
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const { return ptanh(x); }
};
+
template <typename Scalar>
struct functor_traits<scalar_tanh_op<Scalar> > {
enum {