Eigen/src/Core/MathFunctionsImpl.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149

// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com)
// Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

#ifndef EIGEN_MATHFUNCTIONSIMPL_H
#define EIGEN_MATHFUNCTIONSIMPL_H

namespace Eigen {

namespace internal {

/** \internal \returns the hyperbolic tan of \a a (coeff-wise)
    Doesn't do anything fancy, just a 13/6-degree rational interpolant which
    is accurate up to a couple of ulp in the range [-9, 9], outside of which
    the tanh(x) = +/-1.

    This implementation works on both scalars and packets.
*/
template<typename T>
T generic_fast_tanh_float(const T& a_x)
{
  // Clamp the inputs to the range [-9, 9] since anything outside
  // this range is +/-1.0f in single-precision.
  const T plus_9 = pset1<T>(9.f);
  const T minus_9 = pset1<T>(-9.f);
  const T x = pmax(pmin(a_x, plus_9), minus_9);
  // The monomial coefficients of the numerator polynomial (odd).
  const T alpha_1 = pset1<T>(4.89352455891786e-03f);
  const T alpha_3 = pset1<T>(6.37261928875436e-04f);
  const T alpha_5 = pset1<T>(1.48572235717979e-05f);
  const T alpha_7 = pset1<T>(5.12229709037114e-08f);
  const T alpha_9 = pset1<T>(-8.60467152213735e-11f);
  const T alpha_11 = pset1<T>(2.00018790482477e-13f);
  const T alpha_13 = pset1<T>(-2.76076847742355e-16f);

  // The monomial coefficients of the denominator polynomial (even).
  const T beta_0 = pset1<T>(4.89352518554385e-03f);
  const T beta_2 = pset1<T>(2.26843463243900e-03f);
  const T beta_4 = pset1<T>(1.18534705686654e-04f);
  const T beta_6 = pset1<T>(1.19825839466702e-06f);

  // Since the polynomials are odd/even, we need x^2.
  const T x2 = pmul(x, x);

  // Evaluate the numerator polynomial p.
  T p = pmadd(x2, alpha_13, alpha_11);
  p = pmadd(x2, p, alpha_9);
  p = pmadd(x2, p, alpha_7);
  p = pmadd(x2, p, alpha_5);
  p = pmadd(x2, p, alpha_3);
  p = pmadd(x2, p, alpha_1);
  p = pmul(x, p);

  // Evaluate the denominator polynomial p.
  T q = pmadd(x2, beta_6, beta_4);
  q = pmadd(x2, q, beta_2);
  q = pmadd(x2, q, beta_0);

  // Divide the numerator by the denominator.
  return pdiv(p, q);
}

/** \internal \returns the error function of \a a (coeff-wise)
    Doesn't do anything fancy, just a 13/8-degree rational interpolant which
    is accurate up to a couple of ulp in the range [-4, 4], outside of which
    fl(erf(x)) = +/-1.

    This implementation works on both scalars and Ts.
*/
template <typename T>
T generic_fast_erf_float(const T& a_x) {
  // Clamp the inputs to the range [-4, 4] since anything outside
  // this range is +/-1.0f in single-precision.
  const T plus_4 = pset1<T>(4.f);
  const T minus_4 = pset1<T>(-4.f);
  const T x = pmax(pmin(a_x, plus_4), minus_4);
  // The monomial coefficients of the numerator polynomial (odd).
  const T alpha_1 = pset1<T>(-1.60960333262415e-02f);
  const T alpha_3 = pset1<T>(-2.95459980854025e-03f);
  const T alpha_5 = pset1<T>(-7.34990630326855e-04f);
  const T alpha_7 = pset1<T>(-5.69250639462346e-05f);
  const T alpha_9 = pset1<T>(-2.10102402082508e-06f);
  const T alpha_11 = pset1<T>(2.77068142495902e-08f);
  const T alpha_13 = pset1<T>(-2.72614225801306e-10f);

  // The monomial coefficients of the denominator polynomial (even).
  const T beta_0 = pset1<T>(-1.42647390514189e-02f);
  const T beta_2 = pset1<T>(-7.37332916720468e-03f);
  const T beta_4 = pset1<T>(-1.68282697438203e-03f);
  const T beta_6 = pset1<T>(-2.13374055278905e-04f);
  const T beta_8 = pset1<T>(-1.45660718464996e-05f);

  // Since the polynomials are odd/even, we need x^2.
  const T x2 = pmul(x, x);

  // Evaluate the numerator polynomial p.
  T p = pmadd(x2, alpha_13, alpha_11);
  p = pmadd(x2, p, alpha_9);
  p = pmadd(x2, p, alpha_7);
  p = pmadd(x2, p, alpha_5);
  p = pmadd(x2, p, alpha_3);
  p = pmadd(x2, p, alpha_1);
  p = pmul(x, p);

  // Evaluate the denominator polynomial p.
  T q = pmadd(x2, beta_8, beta_6);
  q = pmadd(x2, q, beta_4);
  q = pmadd(x2, q, beta_2);
  q = pmadd(x2, q, beta_0);

  // Divide the numerator by the denominator.
  return pdiv(p, q);
}

template<typename RealScalar>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y)
{
  EIGEN_USING_STD_MATH(sqrt);
  RealScalar p, qp;
  p = numext::maxi(x,y);
  if(p==RealScalar(0)) return RealScalar(0);
  qp = numext::mini(y,x) / p;    
  return p * sqrt(RealScalar(1) + qp*qp);
}

template<typename Scalar>
struct hypot_impl
{
  typedef typename NumTraits<Scalar>::Real RealScalar;
  static EIGEN_DEVICE_FUNC
  inline RealScalar run(const Scalar& x, const Scalar& y)
  {
    EIGEN_USING_STD_MATH(abs);
    return positive_real_hypot<RealScalar>(abs(x), abs(y));
  }
};

} // end namespace internal

} // end namespace Eigen

#endif // EIGEN_MATHFUNCTIONSIMPL_H