From a4c266f8271aa5c53ae0eaac1399a8ccd85259b5 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 23 Aug 2016 14:23:08 +0200 Subject: Factorize the 4 copies of tanh implementations, make numext::tanh consistent with array::tanh, enable fast tanh in fast-math mode only. --- Eigen/src/Core/MathFunctions.h | 7 +++ Eigen/src/Core/MathFunctionsImpl.h | 74 ++++++++++++++++++++++ Eigen/src/Core/arch/AVX/MathFunctions.h | 46 +------------- Eigen/src/Core/arch/SSE/MathFunctions.h | 46 +------------- Eigen/src/Core/functors/UnaryFunctors.h | 108 +------------------------------- 5 files changed, 88 insertions(+), 193 deletions(-) create mode 100644 Eigen/src/Core/MathFunctionsImpl.h (limited to 'Eigen/src') diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index ac7ef6d23..79ae797b5 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -787,6 +787,8 @@ template EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex& template EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex& x); template EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex& x); +template T generic_fast_tanh_float(const T& a_x); + } // end namespace internal /**************************************************************************** @@ -1178,6 +1180,11 @@ T tanh(const T &x) { return tanh(x); } +#if (!defined(__CUDACC__)) && EIGEN_FAST_MATH +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float tanh(float x) { return internal::generic_fast_tanh_float(x); } +#endif + #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tanh(const float &x) { return ::tanhf(x); } diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h new file mode 100644 index 000000000..a9009a3ef --- /dev/null +++ b/Eigen/src/Core/MathFunctionsImpl.h @@ -0,0 +1,74 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com) +// Copyright (C) 2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MATHFUNCTIONSIMPL_H +#define EIGEN_MATHFUNCTIONSIMPL_H + +namespace Eigen { + +namespace internal { + +/** \internal \returns the hyperbolic tan of \a a (coeff-wise) + Doesn't do anything fancy, just a 13/6-degree rational interpolant which + is accurate up to a couple of ulp in the range [-9, 9], outside of which + the tanh(x) = +/-1. + + This implementation works on both scalars and packets. +*/ +template +EIGEN_DONT_INLINE T generic_fast_tanh_float(const T& a_x) +{ + // Clamp the inputs to the range [-9, 9] since anything outside + // this range is +/-1.0f in single-precision. + const T plus_9 = pset1(9.f); + const T minus_9 = pset1(-9.f); + const T x = pmax(minus_9, pmin(plus_9, a_x)); + + // The monomial coefficients of the numerator polynomial (odd). + const T alpha_1 = pset1(4.89352455891786e-03); + const T alpha_3 = pset1(6.37261928875436e-04); + const T alpha_5 = pset1(1.48572235717979e-05); + const T alpha_7 = pset1(5.12229709037114e-08); + const T alpha_9 = pset1(-8.60467152213735e-11); + const T alpha_11 = pset1(2.00018790482477e-13); + const T alpha_13 = pset1(-2.76076847742355e-16); + + // The monomial coefficients of the denominator polynomial (even). + const T beta_0 = pset1(4.89352518554385e-03); + const T beta_2 = pset1(2.26843463243900e-03); + const T beta_4 = pset1(1.18534705686654e-04); + const T beta_6 = pset1(1.19825839466702e-06); + + // Since the polynomials are odd/even, we need x^2. + const T x2 = pmul(x, x); + + // Evaluate the numerator polynomial p. + T p = pmadd(x2, alpha_13, alpha_11); + p = pmadd(x2, p, alpha_9); + p = pmadd(x2, p, alpha_7); + p = pmadd(x2, p, alpha_5); + p = pmadd(x2, p, alpha_3); + p = pmadd(x2, p, alpha_1); + p = pmul(x, p); + + // Evaluate the denominator polynomial p. + T q = pmadd(x2, beta_6, beta_4); + q = pmadd(x2, q, beta_2); + q = pmadd(x2, q, beta_0); + + // Divide the numerator by the denominator. + return pdiv(p, q); +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_MATHFUNCTIONSIMPL_H diff --git a/Eigen/src/Core/arch/AVX/MathFunctions.h b/Eigen/src/Core/arch/AVX/MathFunctions.h index 98d8e029f..d21ec39dd 100644 --- a/Eigen/src/Core/arch/AVX/MathFunctions.h +++ b/Eigen/src/Core/arch/AVX/MathFunctions.h @@ -266,52 +266,10 @@ pexp(const Packet8f& _x) { } // Hyperbolic Tangent function. -// Doesn't do anything fancy, just a 13/6-degree rational interpolant which -// is accurate up to a couple of ulp in the range [-9, 9], outside of which the -// fl(tanh(x)) = +/-1. template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f -ptanh(const Packet8f& _x) { - // Clamp the inputs to the range [-9, 9] since anything outside - // this range is +/-1.0f in single-precision. - _EIGEN_DECLARE_CONST_Packet8f(plus_9, 9.0f); - _EIGEN_DECLARE_CONST_Packet8f(minus_9, -9.0f); - const Packet8f x = pmax(p8f_minus_9, pmin(p8f_plus_9, _x)); - - // The monomial coefficients of the numerator polynomial (odd). - _EIGEN_DECLARE_CONST_Packet8f(alpha_1, 4.89352455891786e-03f); - _EIGEN_DECLARE_CONST_Packet8f(alpha_3, 6.37261928875436e-04f); - _EIGEN_DECLARE_CONST_Packet8f(alpha_5, 1.48572235717979e-05f); - _EIGEN_DECLARE_CONST_Packet8f(alpha_7, 5.12229709037114e-08f); - _EIGEN_DECLARE_CONST_Packet8f(alpha_9, -8.60467152213735e-11f); - _EIGEN_DECLARE_CONST_Packet8f(alpha_11, 2.00018790482477e-13f); - _EIGEN_DECLARE_CONST_Packet8f(alpha_13, -2.76076847742355e-16f); - - // The monomial coefficients of the denominator polynomial (even). - _EIGEN_DECLARE_CONST_Packet8f(beta_0, 4.89352518554385e-03f); - _EIGEN_DECLARE_CONST_Packet8f(beta_2, 2.26843463243900e-03f); - _EIGEN_DECLARE_CONST_Packet8f(beta_4, 1.18534705686654e-04f); - _EIGEN_DECLARE_CONST_Packet8f(beta_6, 1.19825839466702e-06f); - - // Since the polynomials are odd/even, we need x^2. - const Packet8f x2 = pmul(x, x); - - // Evaluate the numerator polynomial p. - Packet8f p = pmadd(x2, p8f_alpha_13, p8f_alpha_11); - p = pmadd(x2, p, p8f_alpha_9); - p = pmadd(x2, p, p8f_alpha_7); - p = pmadd(x2, p, p8f_alpha_5); - p = pmadd(x2, p, p8f_alpha_3); - p = pmadd(x2, p, p8f_alpha_1); - p = pmul(x, p); - - // Evaluate the denominator polynomial p. - Packet8f q = pmadd(x2, p8f_beta_6, p8f_beta_4); - q = pmadd(x2, q, p8f_beta_2); - q = pmadd(x2, q, p8f_beta_0); - - // Divide the numerator by the denominator. - return pdiv(p, q); +ptanh(const Packet8f& x) { + return internal::generic_fast_tanh_float(x); } template <> diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h index 28f103eeb..ac2fd8103 100644 --- a/Eigen/src/Core/arch/SSE/MathFunctions.h +++ b/Eigen/src/Core/arch/SSE/MathFunctions.h @@ -517,52 +517,10 @@ Packet2d prsqrt(const Packet2d& x) { } // Hyperbolic Tangent function. -// Doesn't do anything fancy, just a 13/6-degree rational interpolant which -// is accurate up to a couple of ulp in the range [-9, 9], outside of which the -// fl(tanh(x)) = +/-1. template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f -ptanh(const Packet4f& _x) { - // Clamp the inputs to the range [-9, 9] since anything outside - // this range is +/-1.0f in single-precision. - _EIGEN_DECLARE_CONST_Packet4f(plus_9, 9.0f); - _EIGEN_DECLARE_CONST_Packet4f(minus_9, -9.0f); - const Packet4f x = pmax(p4f_minus_9, pmin(p4f_plus_9, _x)); - - // The monomial coefficients of the numerator polynomial (odd). - _EIGEN_DECLARE_CONST_Packet4f(alpha_1, 4.89352455891786e-03f); - _EIGEN_DECLARE_CONST_Packet4f(alpha_3, 6.37261928875436e-04f); - _EIGEN_DECLARE_CONST_Packet4f(alpha_5, 1.48572235717979e-05f); - _EIGEN_DECLARE_CONST_Packet4f(alpha_7, 5.12229709037114e-08f); - _EIGEN_DECLARE_CONST_Packet4f(alpha_9, -8.60467152213735e-11f); - _EIGEN_DECLARE_CONST_Packet4f(alpha_11, 2.00018790482477e-13f); - _EIGEN_DECLARE_CONST_Packet4f(alpha_13, -2.76076847742355e-16f); - - // The monomial coefficients of the denominator polynomial (even). - _EIGEN_DECLARE_CONST_Packet4f(beta_0, 4.89352518554385e-03f); - _EIGEN_DECLARE_CONST_Packet4f(beta_2, 2.26843463243900e-03f); - _EIGEN_DECLARE_CONST_Packet4f(beta_4, 1.18534705686654e-04f); - _EIGEN_DECLARE_CONST_Packet4f(beta_6, 1.19825839466702e-06f); - - // Since the polynomials are odd/even, we need x^2. - const Packet4f x2 = pmul(x, x); - - // Evaluate the numerator polynomial p. - Packet4f p = pmadd(x2, p4f_alpha_13, p4f_alpha_11); - p = pmadd(x2, p, p4f_alpha_9); - p = pmadd(x2, p, p4f_alpha_7); - p = pmadd(x2, p, p4f_alpha_5); - p = pmadd(x2, p, p4f_alpha_3); - p = pmadd(x2, p, p4f_alpha_1); - p = pmul(x, p); - - // Evaluate the denominator polynomial p. - Packet4f q = pmadd(x2, p4f_beta_6, p4f_beta_4); - q = pmadd(x2, q, p4f_beta_2); - q = pmadd(x2, q, p4f_beta_0); - - // Divide the numerator by the denominator. - return pdiv(p, q); +ptanh(const Packet4f& x) { + return internal::generic_fast_tanh_float(x); } } // end namespace internal diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index e2f3d869f..2a57a5fd9 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h @@ -498,113 +498,11 @@ struct functor_traits > template struct scalar_tanh_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_op) - EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const { - /** \internal \returns the hyperbolic tan of \a a (coeff-wise) - Doesn't do anything fancy, just a 13/6-degree rational interpolant - which - is accurate up to a couple of ulp in the range [-9, 9], outside of - which - the fl(tanh(x)) = +/-1. */ - - // Clamp the inputs to the range [-9, 9] since anything outside - // this range is +/-1.0f in single-precision. - const Scalar plus_9 = static_cast(9.0); - const Scalar minus_9 = static_cast(-9.0); - const Scalar x = numext::maxi(minus_9, numext::mini(plus_9, a)); - // Scalarhe monomial coefficients of the numerator polynomial (odd). - const Scalar alpha_1 = static_cast(4.89352455891786e-03); - const Scalar alpha_3 = static_cast(6.37261928875436e-04); - const Scalar alpha_5 = static_cast(1.48572235717979e-05); - const Scalar alpha_7 = static_cast(5.12229709037114e-08); - const Scalar alpha_9 = static_cast(-8.60467152213735e-11); - const Scalar alpha_11 = static_cast(2.00018790482477e-13); - const Scalar alpha_13 = static_cast(-2.76076847742355e-16); - // Scalarhe monomial coefficients of the denominator polynomial (even). - const Scalar beta_0 = static_cast(4.89352518554385e-03); - const Scalar beta_2 = static_cast(2.26843463243900e-03); - const Scalar beta_4 = static_cast(1.18534705686654e-04); - const Scalar beta_6 = static_cast(1.19825839466702e-06); - // Since the polynomials are odd/even, we need x^2. - const Scalar x2 = x * x; - // Evaluate the numerator polynomial p. - Scalar p = x2 * alpha_13 + alpha_11; - p = x2 * p + alpha_9; - p = x2 * p + alpha_7; - p = x2 * p + alpha_5; - p = x2 * p + alpha_3; - p = x2 * p + alpha_1; - p = x * p; - // Evaluate the denominator polynomial p. - Scalar q = x2 * beta_6 + beta_4; - q = x2 * q + beta_2; - q = x2 * q + beta_0; - // Divide the numerator by the denominator. - return p / q; - } + EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const { return numext::tanh(a); } template - EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& _x) const { - /** \internal \returns the hyperbolic tan of \a a (coeff-wise) - Doesn't do anything fancy, just a 13/6-degree rational interpolant which - is accurate up to a couple of ulp in the range [-9, 9], outside of which - the - fl(tanh(x)) = +/-1. */ - - // Clamp the inputs to the range [-9, 9] since anything outside - // this range is +/-1.0f in single-precision. - const Packet plus_9 = pset1(9.0); - const Packet minus_9 = pset1(-9.0); - const Packet x = pmax(minus_9, pmin(plus_9, _x)); - - // The monomial coefficients of the numerator polynomial (odd). - const Packet alpha_1 = pset1(4.89352455891786e-03); - const Packet alpha_3 = pset1(6.37261928875436e-04); - const Packet alpha_5 = pset1(1.48572235717979e-05); - const Packet alpha_7 = pset1(5.12229709037114e-08); - const Packet alpha_9 = pset1(-8.60467152213735e-11); - const Packet alpha_11 = pset1(2.00018790482477e-13); - const Packet alpha_13 = pset1(-2.76076847742355e-16); - - // The monomial coefficients of the denominator polynomial (even). - const Packet beta_0 = pset1(4.89352518554385e-03); - const Packet beta_2 = pset1(2.26843463243900e-03); - const Packet beta_4 = pset1(1.18534705686654e-04); - const Packet beta_6 = pset1(1.19825839466702e-06); - - // Since the polynomials are odd/even, we need x^2. - const Packet x2 = pmul(x, x); - - // Evaluate the numerator polynomial p. - Packet p = pmadd(x2, alpha_13, alpha_11); - p = pmadd(x2, p, alpha_9); - p = pmadd(x2, p, alpha_7); - p = pmadd(x2, p, alpha_5); - p = pmadd(x2, p, alpha_3); - p = pmadd(x2, p, alpha_1); - p = pmul(x, p); - - // Evaluate the denominator polynomial p. - Packet q = pmadd(x2, beta_6, beta_4); - q = pmadd(x2, q, beta_2); - q = pmadd(x2, q, beta_0); - - // Divide the numerator by the denominator. - return pdiv(p, q); - } -}; -template <> -struct scalar_tanh_op > { - EIGEN_DEVICE_FUNC inline const std::complex operator()( - const std::complex& a) const { - return numext::tanh(a); - } -}; -template <> -struct scalar_tanh_op > { - EIGEN_DEVICE_FUNC inline const std::complex operator()( - const std::complex& a) const { - return numext::tanh(a); - } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const { return ptanh(x); } }; + template struct functor_traits > { enum { -- cgit v1.2.3