aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/src
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-09-27 13:56:04 -0700
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-09-27 13:56:04 -0700
commit13ef08e5ac96e474ce40031b868b2f7625014573 (patch)
tree470748b41e3bb18c47e55377403c4276241eb354 /unsupported/Eigen/src
parent7c8bc0d9288f5152cf871dd2824a772a6003801b (diff)
Move implementation of vectorized error function erf() to SpecialFunctionsImpl.h.
Diffstat (limited to 'unsupported/Eigen/src')
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h68
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h4
2 files changed, 66 insertions, 6 deletions
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
index d6840a47c..56023508c 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
@@ -279,13 +279,63 @@ struct digamma_impl {
* Implementation of erf, requires C++11/C99 *
****************************************************************************/
-template <typename Scalar>
+/** \internal \returns the error function of \a a (coeff-wise)
+ Doesn't do anything fancy, just a 13/8-degree rational interpolant which
+ is accurate up to a couple of ulp in the range [-4, 4], outside of which
+ fl(erf(x)) = +/-1.
+
+ This implementation works on both scalars and Ts.
+*/
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_fast_erf_float(const T& a_x) {
+ // Clamp the inputs to the range [-4, 4] since anything outside
+ // this range is +/-1.0f in single-precision.
+ const T plus_4 = pset1<T>(4.f);
+ const T minus_4 = pset1<T>(-4.f);
+ const T x = pmax(pmin(a_x, plus_4), minus_4);
+ // The monomial coefficients of the numerator polynomial (odd).
+ const T alpha_1 = pset1<T>(-1.60960333262415e-02f);
+ const T alpha_3 = pset1<T>(-2.95459980854025e-03f);
+ const T alpha_5 = pset1<T>(-7.34990630326855e-04f);
+ const T alpha_7 = pset1<T>(-5.69250639462346e-05f);
+ const T alpha_9 = pset1<T>(-2.10102402082508e-06f);
+ const T alpha_11 = pset1<T>(2.77068142495902e-08f);
+ const T alpha_13 = pset1<T>(-2.72614225801306e-10f);
+
+ // The monomial coefficients of the denominator polynomial (even).
+ const T beta_0 = pset1<T>(-1.42647390514189e-02f);
+ const T beta_2 = pset1<T>(-7.37332916720468e-03f);
+ const T beta_4 = pset1<T>(-1.68282697438203e-03f);
+ const T beta_6 = pset1<T>(-2.13374055278905e-04f);
+ const T beta_8 = pset1<T>(-1.45660718464996e-05f);
+
+ // Since the polynomials are odd/even, we need x^2.
+ const T x2 = pmul(x, x);
+
+ // Evaluate the numerator polynomial p.
+ T p = pmadd(x2, alpha_13, alpha_11);
+ p = pmadd(x2, p, alpha_9);
+ p = pmadd(x2, p, alpha_7);
+ p = pmadd(x2, p, alpha_5);
+ p = pmadd(x2, p, alpha_3);
+ p = pmadd(x2, p, alpha_1);
+ p = pmul(x, p);
+
+ // Evaluate the denominator polynomial p.
+ T q = pmadd(x2, beta_8, beta_6);
+ q = pmadd(x2, q, beta_4);
+ q = pmadd(x2, q, beta_2);
+ q = pmadd(x2, q, beta_0);
+
+ // Divide the numerator by the denominator.
+ return pdiv(p, q);
+}
+
+template <typename T>
struct erf_impl {
EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
- EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
- THIS_TYPE_IS_NOT_SUPPORTED);
- return Scalar(0);
+ static EIGEN_STRONG_INLINE T run(const T x) {
+ return generic_fast_erf_float(x);
}
};
@@ -302,7 +352,7 @@ struct erf_impl<float> {
#if defined(SYCL_DEVICE_ONLY)
return cl::sycl::erf(x);
#else
- return ::erff(x);
+ return generic_fast_erf_float(x);
#endif
}
};
@@ -1893,6 +1943,12 @@ polygamma(const Scalar& n, const Scalar& x) {
}
template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erf, Scalar)
+ erf(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(erf, Scalar)::run(x);
+}
+
+template <typename Scalar>
EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar)
erfc(const Scalar& x) {
return EIGEN_MATHFUNC_IMPL(erfc, Scalar)::run(x);
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h
index 77fdb031a..2bb017921 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h
@@ -30,6 +30,10 @@ Packet pzeta(const Packet& x, const Packet& q) { using numext::zeta; return zeta
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet ppolygamma(const Packet& n, const Packet& x) { using numext::polygamma; return polygamma(n, x); }
+/** \internal \returns the erf(\a a) (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet perf(const Packet& a) { using numext::erf; return erf(a); }
+
/** \internal \returns the erfc(\a a) (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); }