Move implementation of vectorized error function erf() to SpecialFunctionsImpl.h.

author: Rasmus Munk Larsen <rmlarsen@google.com> 2019-09-27 13:56:04 -0700
committer: Rasmus Munk Larsen <rmlarsen@google.com> 2019-09-27 13:56:04 -0700
commit: 13ef08e5ac96e474ce40031b868b2f7625014573 (patch)
tree: 470748b41e3bb18c47e55377403c4276241eb354
parent: 7c8bc0d9288f5152cf871dd2824a772a6003801b (diff)
12 files changed, 67 insertions, 142 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index c102914ce..2d7b9f9e0 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -531,10 +531,6 @@ Packet pcosh(const Packet& a) { EIGEN_USING_STD_MATH(cosh); return cosh(a); }
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet ptanh(const Packet& a) { EIGEN_USING_STD_MATH(tanh); return tanh(a); }
 
-/** \internal \returns the error function of \a a (coeff-wise). */
-template <typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-Packet perf(const Packet& a) { return numext::erf(a); }
-
 /** \internal \returns the exp of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet pexp(const Packet& a) { EIGEN_USING_STD_MATH(exp); return exp(a); }
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index 950ebec43..f1feab508 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -891,7 +891,6 @@ template<typename T> EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x)
 template<typename T> EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x);
 
 template<typename T> T generic_fast_tanh_float(const T& a_x);
-template<typename T> T generic_fast_erf_float(const T& a_x);
 } // end namespace internal
 
 /****************************************************************************
@@ -1579,36 +1578,6 @@ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 double tanh(const double &x) { return ::tanh(x); }
 #endif
 
-#if EIGEN_HAS_CXX11
-template<typename T>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-T erf(const T &x) {
-  EIGEN_USING_STD_MATH(erf);
-  return erf(x);
-}
-#else
-template<typename T>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-T erf(const T& x);
-#endif
-
-#if (!defined(EIGEN_GPUCC)) && EIGEN_FAST_MATH && !defined(SYCL_DEVICE_ONLY)
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-float erf(float x) { return internal::generic_fast_erf_float(x); }
-#endif
-
-#if defined(SYCL_DEVICE_ONLY)
-SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(erf, erf)
-#endif
-
-#if !EIGEN_HAS_CXX11 || defined(EIGEN_GPUCC) 
-template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-float erf(const float &x) { return ::erff(x); }
-
-template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-double erf(const double &x) { return ::erf(x); }
-#endif
-
 template <typename T>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 T fmod(const T& a, const T& b) {
diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h
index c4957fbc2..aff3967ca 100644
--- a/Eigen/src/Core/MathFunctionsImpl.h
+++ b/Eigen/src/Core/MathFunctionsImpl.h
@@ -66,58 +66,6 @@ T generic_fast_tanh_float(const T& a_x)
   return pdiv(p, q);
 }
 
-/** \internal \returns the error function of \a a (coeff-wise)
-    Doesn't do anything fancy, just a 13/8-degree rational interpolant which
-    is accurate up to a couple of ulp in the range [-4, 4], outside of which
-    fl(erf(x)) = +/-1.
-
-    This implementation works on both scalars and Ts.
-*/
-template <typename T>
-T generic_fast_erf_float(const T& a_x) {
-  // Clamp the inputs to the range [-4, 4] since anything outside
-  // this range is +/-1.0f in single-precision.
-  const T plus_4 = pset1<T>(4.f);
-  const T minus_4 = pset1<T>(-4.f);
-  const T x = pmax(pmin(a_x, plus_4), minus_4);
-  // The monomial coefficients of the numerator polynomial (odd).
-  const T alpha_1 = pset1<T>(-1.60960333262415e-02f);
-  const T alpha_3 = pset1<T>(-2.95459980854025e-03f);
-  const T alpha_5 = pset1<T>(-7.34990630326855e-04f);
-  const T alpha_7 = pset1<T>(-5.69250639462346e-05f);
-  const T alpha_9 = pset1<T>(-2.10102402082508e-06f);
-  const T alpha_11 = pset1<T>(2.77068142495902e-08f);
-  const T alpha_13 = pset1<T>(-2.72614225801306e-10f);
-
-  // The monomial coefficients of the denominator polynomial (even).
-  const T beta_0 = pset1<T>(-1.42647390514189e-02f);
-  const T beta_2 = pset1<T>(-7.37332916720468e-03f);
-  const T beta_4 = pset1<T>(-1.68282697438203e-03f);
-  const T beta_6 = pset1<T>(-2.13374055278905e-04f);
-  const T beta_8 = pset1<T>(-1.45660718464996e-05f);
-
-  // Since the polynomials are odd/even, we need x^2.
-  const T x2 = pmul(x, x);
-
-  // Evaluate the numerator polynomial p.
-  T p = pmadd(x2, alpha_13, alpha_11);
-  p = pmadd(x2, p, alpha_9);
-  p = pmadd(x2, p, alpha_7);
-  p = pmadd(x2, p, alpha_5);
-  p = pmadd(x2, p, alpha_3);
-  p = pmadd(x2, p, alpha_1);
-  p = pmul(x, p);
-
-  // Evaluate the denominator polynomial p.
-  T q = pmadd(x2, beta_8, beta_6);
-  q = pmadd(x2, q, beta_4);
-  q = pmadd(x2, q, beta_2);
-  q = pmadd(x2, q, beta_0);
-
-  // Divide the numerator by the denominator.
-  return pdiv(p, q);
-}
-
 template<typename RealScalar>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y)
@@ -126,7 +74,7 @@ RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y)
   RealScalar p, qp;
   p = numext::maxi(x,y);
   if(p==RealScalar(0)) return RealScalar(0);
-  qp = numext::mini(y,x) / p;    
+  qp = numext::mini(y,x) / p;
   return p * sqrt(RealScalar(1) + qp*qp);
 }
 
diff --git a/Eigen/src/Core/arch/AVX/MathFunctions.h b/Eigen/src/Core/arch/AVX/MathFunctions.h
index 089a682ba..da1b1e3f8 100644
--- a/Eigen/src/Core/arch/AVX/MathFunctions.h
+++ b/Eigen/src/Core/arch/AVX/MathFunctions.h
@@ -62,13 +62,6 @@ ptanh<Packet8f>(const Packet8f& x) {
   return internal::generic_fast_tanh_float(x);
 }
 
-// Error function.
-template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
-perf<Packet8f>(const Packet8f& x) {
-  return internal::generic_fast_erf_float(x);
-}
-
 // Exponential function for doubles.
 template <>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
diff --git a/Eigen/src/Core/arch/AVX512/MathFunctions.h b/Eigen/src/Core/arch/AVX512/MathFunctions.h
index 66581964c..1cf7338c2 100644
--- a/Eigen/src/Core/arch/AVX512/MathFunctions.h
+++ b/Eigen/src/Core/arch/AVX512/MathFunctions.h
@@ -411,12 +411,6 @@ ptanh<Packet16f>(const Packet16f& _x) {
   return internal::generic_fast_tanh_float(_x);
 }
 
-template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
-perf<Packet16f>(const Packet16f& _x) {
-  return internal::generic_fast_erf_float(_x);
-}
-
 }  // end namespace internal
 
 }  // end namespace Eigen
diff --git a/Eigen/src/Core/arch/AltiVec/MathFunctions.h b/Eigen/src/Core/arch/AltiVec/MathFunctions.h
index 00a0248e2..3a7a32936 100644
--- a/Eigen/src/Core/arch/AltiVec/MathFunctions.h
+++ b/Eigen/src/Core/arch/AltiVec/MathFunctions.h
@@ -83,13 +83,6 @@ ptanh<Packet4f>(const Packet4f& x) {
   return internal::generic_fast_tanh_float(x);
 }
 
-// Error function.
-template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
-perf<Packet4f>(const Packet4f& x) {
-  return internal::generic_fast_erf_float(x);
-}
-
 }  // end namespace internal
 
 }  // end namespace Eigen
diff --git a/Eigen/src/Core/arch/MSA/MathFunctions.h b/Eigen/src/Core/arch/MSA/MathFunctions.h
index 23e4b8664..f5181b90e 100644
--- a/Eigen/src/Core/arch/MSA/MathFunctions.h
+++ b/Eigen/src/Core/arch/MSA/MathFunctions.h
@@ -321,13 +321,6 @@ pcos<Packet4f>(const Packet4f& x) {
   return psincos_inner_msa_float</* sine */ false>(x);
 }
 
-// Error function.
-template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
-perf<Packet4f>(const Packet4f& x) {
-  return internal::generic_fast_erf_float(x);
-}
-
 template <>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d
 pexp<Packet2d>(const Packet2d& _x) {
diff --git a/Eigen/src/Core/arch/NEON/MathFunctions.h b/Eigen/src/Core/arch/NEON/MathFunctions.h
index 13e3f4989..8bf572788 100644
--- a/Eigen/src/Core/arch/NEON/MathFunctions.h
+++ b/Eigen/src/Core/arch/NEON/MathFunctions.h
@@ -43,13 +43,6 @@ ptanh<Packet4f>(const Packet4f& x) {
   return internal::generic_fast_tanh_float(x);
 }
 
-// Error function.
-template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
-perf<Packet4f>(const Packet4f& x) {
-  return internal::generic_fast_erf_float(x);
-}
-
 } // end namespace internal
 
 } // end namespace Eigen
diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h
index a991b1a39..b21bb93bf 100644
--- a/Eigen/src/Core/arch/SSE/MathFunctions.h
+++ b/Eigen/src/Core/arch/SSE/MathFunctions.h
@@ -147,13 +147,6 @@ ptanh<Packet4f>(const Packet4f& x) {
   return internal::generic_fast_tanh_float(x);
 }
 
-// Error function.
-template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
-perf<Packet4f>(const Packet4f& a) {
-  return internal::generic_fast_erf_float(a);
-}
-
 } // end namespace internal
 
 namespace numext {
diff --git a/Eigen/src/Core/arch/ZVector/MathFunctions.h b/Eigen/src/Core/arch/ZVector/MathFunctions.h
index d4b33b56f..689ecc702 100644
--- a/Eigen/src/Core/arch/ZVector/MathFunctions.h
+++ b/Eigen/src/Core/arch/ZVector/MathFunctions.h
@@ -232,13 +232,6 @@ ptanh<Packet4f>(const Packet4f& x) {
   return internal::generic_fast_tanh_float(x);
 }
 
-// Error function.
-template <>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
-perf<Packet4f>(const Packet4f& x) {
-  return internal::generic_fast_erf_float(x);
-}
-
 }  // end namespace internal
 
 }  // end namespace Eigen
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
index d6840a47c..56023508c 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
@@ -279,13 +279,63 @@ struct digamma_impl {
  * Implementation of erf, requires C++11/C99                                *
  ****************************************************************************/
 
-template <typename Scalar>
+/** \internal \returns the error function of \a a (coeff-wise)
+    Doesn't do anything fancy, just a 13/8-degree rational interpolant which
+    is accurate up to a couple of ulp in the range [-4, 4], outside of which
+    fl(erf(x)) = +/-1.
+
+    This implementation works on both scalars and Ts.
+*/
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_fast_erf_float(const T& a_x) {
+  // Clamp the inputs to the range [-4, 4] since anything outside
+  // this range is +/-1.0f in single-precision.
+  const T plus_4 = pset1<T>(4.f);
+  const T minus_4 = pset1<T>(-4.f);
+  const T x = pmax(pmin(a_x, plus_4), minus_4);
+  // The monomial coefficients of the numerator polynomial (odd).
+  const T alpha_1 = pset1<T>(-1.60960333262415e-02f);
+  const T alpha_3 = pset1<T>(-2.95459980854025e-03f);
+  const T alpha_5 = pset1<T>(-7.34990630326855e-04f);
+  const T alpha_7 = pset1<T>(-5.69250639462346e-05f);
+  const T alpha_9 = pset1<T>(-2.10102402082508e-06f);
+  const T alpha_11 = pset1<T>(2.77068142495902e-08f);
+  const T alpha_13 = pset1<T>(-2.72614225801306e-10f);
+
+  // The monomial coefficients of the denominator polynomial (even).
+  const T beta_0 = pset1<T>(-1.42647390514189e-02f);
+  const T beta_2 = pset1<T>(-7.37332916720468e-03f);
+  const T beta_4 = pset1<T>(-1.68282697438203e-03f);
+  const T beta_6 = pset1<T>(-2.13374055278905e-04f);
+  const T beta_8 = pset1<T>(-1.45660718464996e-05f);
+
+  // Since the polynomials are odd/even, we need x^2.
+  const T x2 = pmul(x, x);
+
+  // Evaluate the numerator polynomial p.
+  T p = pmadd(x2, alpha_13, alpha_11);
+  p = pmadd(x2, p, alpha_9);
+  p = pmadd(x2, p, alpha_7);
+  p = pmadd(x2, p, alpha_5);
+  p = pmadd(x2, p, alpha_3);
+  p = pmadd(x2, p, alpha_1);
+  p = pmul(x, p);
+
+  // Evaluate the denominator polynomial p.
+  T q = pmadd(x2, beta_8, beta_6);
+  q = pmadd(x2, q, beta_4);
+  q = pmadd(x2, q, beta_2);
+  q = pmadd(x2, q, beta_0);
+
+  // Divide the numerator by the denominator.
+  return pdiv(p, q);
+}
+
+template <typename T>
 struct erf_impl {
   EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
-    EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
-                        THIS_TYPE_IS_NOT_SUPPORTED);
-    return Scalar(0);
+  static EIGEN_STRONG_INLINE T run(const T x) {
+    return generic_fast_erf_float(x);
   }
 };
 
@@ -302,7 +352,7 @@ struct erf_impl<float> {
 #if defined(SYCL_DEVICE_ONLY)
     return cl::sycl::erf(x);
 #else
-    return ::erff(x);
+    return generic_fast_erf_float(x);
 #endif
   }
 };
@@ -1893,6 +1943,12 @@ polygamma(const Scalar& n, const Scalar& x) {
 }
 
 template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erf, Scalar)
+    erf(const Scalar& x) {
+  return EIGEN_MATHFUNC_IMPL(erf, Scalar)::run(x);
+}
+
+template <typename Scalar>
 EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar)
     erfc(const Scalar& x) {
   return EIGEN_MATHFUNC_IMPL(erfc, Scalar)::run(x);
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h
index 77fdb031a..2bb017921 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h
@@ -30,6 +30,10 @@ Packet pzeta(const Packet& x, const Packet& q) { using numext::zeta; return zeta
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet ppolygamma(const Packet& n, const Packet& x) { using numext::polygamma; return polygamma(n, x); }
 
+/** \internal \returns the erf(\a a) (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet perf(const Packet& a) { using numext::erf; return erf(a); }
+
 /** \internal \returns the erfc(\a a) (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); }
author	Rasmus Munk Larsen <rmlarsen@google.com>	2019-09-27 13:56:04 -0700
committer	Rasmus Munk Larsen <rmlarsen@google.com>	2019-09-27 13:56:04 -0700
commit	13ef08e5ac96e474ce40031b868b2f7625014573 (patch)
tree	470748b41e3bb18c47e55377403c4276241eb354
parent	7c8bc0d9288f5152cf871dd2824a772a6003801b (diff)