diff options
author | Rasmus Munk Larsen <rmlarsen@google.com> | 2019-09-27 13:56:04 -0700 |
---|---|---|
committer | Rasmus Munk Larsen <rmlarsen@google.com> | 2019-09-27 13:56:04 -0700 |
commit | 13ef08e5ac96e474ce40031b868b2f7625014573 (patch) | |
tree | 470748b41e3bb18c47e55377403c4276241eb354 /Eigen/src | |
parent | 7c8bc0d9288f5152cf871dd2824a772a6003801b (diff) |
Move implementation of vectorized error function erf() to SpecialFunctionsImpl.h.
Diffstat (limited to 'Eigen/src')
-rw-r--r-- | Eigen/src/Core/GenericPacketMath.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/MathFunctions.h | 31 | ||||
-rw-r--r-- | Eigen/src/Core/MathFunctionsImpl.h | 54 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX/MathFunctions.h | 7 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX512/MathFunctions.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AltiVec/MathFunctions.h | 7 | ||||
-rw-r--r-- | Eigen/src/Core/arch/MSA/MathFunctions.h | 7 | ||||
-rw-r--r-- | Eigen/src/Core/arch/NEON/MathFunctions.h | 7 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SSE/MathFunctions.h | 7 | ||||
-rw-r--r-- | Eigen/src/Core/arch/ZVector/MathFunctions.h | 7 |
10 files changed, 1 insertions, 136 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index c102914ce..2d7b9f9e0 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -531,10 +531,6 @@ Packet pcosh(const Packet& a) { EIGEN_USING_STD_MATH(cosh); return cosh(a); } template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh(const Packet& a) { EIGEN_USING_STD_MATH(tanh); return tanh(a); } -/** \internal \returns the error function of \a a (coeff-wise). */ -template <typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet perf(const Packet& a) { return numext::erf(a); } - /** \internal \returns the exp of \a a (coeff-wise) */ template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet& a) { EIGEN_USING_STD_MATH(exp); return exp(a); } diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 950ebec43..f1feab508 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -891,7 +891,6 @@ template<typename T> EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x) template<typename T> EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x); template<typename T> T generic_fast_tanh_float(const T& a_x); -template<typename T> T generic_fast_erf_float(const T& a_x); } // end namespace internal /**************************************************************************** @@ -1579,36 +1578,6 @@ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double tanh(const double &x) { return ::tanh(x); } #endif -#if EIGEN_HAS_CXX11 -template<typename T> -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -T erf(const T &x) { - EIGEN_USING_STD_MATH(erf); - return erf(x); -} -#else -template<typename T> -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -T erf(const T& x); -#endif - -#if (!defined(EIGEN_GPUCC)) && EIGEN_FAST_MATH && !defined(SYCL_DEVICE_ONLY) -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -float erf(float x) { return internal::generic_fast_erf_float(x); } -#endif - -#if defined(SYCL_DEVICE_ONLY) -SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(erf, erf) -#endif - -#if !EIGEN_HAS_CXX11 || defined(EIGEN_GPUCC) -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -float erf(const float &x) { return ::erff(x); } - -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -double erf(const double &x) { return ::erf(x); } -#endif - template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T fmod(const T& a, const T& b) { diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h index c4957fbc2..aff3967ca 100644 --- a/Eigen/src/Core/MathFunctionsImpl.h +++ b/Eigen/src/Core/MathFunctionsImpl.h @@ -66,58 +66,6 @@ T generic_fast_tanh_float(const T& a_x) return pdiv(p, q); } -/** \internal \returns the error function of \a a (coeff-wise) - Doesn't do anything fancy, just a 13/8-degree rational interpolant which - is accurate up to a couple of ulp in the range [-4, 4], outside of which - fl(erf(x)) = +/-1. - - This implementation works on both scalars and Ts. -*/ -template <typename T> -T generic_fast_erf_float(const T& a_x) { - // Clamp the inputs to the range [-4, 4] since anything outside - // this range is +/-1.0f in single-precision. - const T plus_4 = pset1<T>(4.f); - const T minus_4 = pset1<T>(-4.f); - const T x = pmax(pmin(a_x, plus_4), minus_4); - // The monomial coefficients of the numerator polynomial (odd). - const T alpha_1 = pset1<T>(-1.60960333262415e-02f); - const T alpha_3 = pset1<T>(-2.95459980854025e-03f); - const T alpha_5 = pset1<T>(-7.34990630326855e-04f); - const T alpha_7 = pset1<T>(-5.69250639462346e-05f); - const T alpha_9 = pset1<T>(-2.10102402082508e-06f); - const T alpha_11 = pset1<T>(2.77068142495902e-08f); - const T alpha_13 = pset1<T>(-2.72614225801306e-10f); - - // The monomial coefficients of the denominator polynomial (even). - const T beta_0 = pset1<T>(-1.42647390514189e-02f); - const T beta_2 = pset1<T>(-7.37332916720468e-03f); - const T beta_4 = pset1<T>(-1.68282697438203e-03f); - const T beta_6 = pset1<T>(-2.13374055278905e-04f); - const T beta_8 = pset1<T>(-1.45660718464996e-05f); - - // Since the polynomials are odd/even, we need x^2. - const T x2 = pmul(x, x); - - // Evaluate the numerator polynomial p. - T p = pmadd(x2, alpha_13, alpha_11); - p = pmadd(x2, p, alpha_9); - p = pmadd(x2, p, alpha_7); - p = pmadd(x2, p, alpha_5); - p = pmadd(x2, p, alpha_3); - p = pmadd(x2, p, alpha_1); - p = pmul(x, p); - - // Evaluate the denominator polynomial p. - T q = pmadd(x2, beta_8, beta_6); - q = pmadd(x2, q, beta_4); - q = pmadd(x2, q, beta_2); - q = pmadd(x2, q, beta_0); - - // Divide the numerator by the denominator. - return pdiv(p, q); -} - template<typename RealScalar> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y) @@ -126,7 +74,7 @@ RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y) RealScalar p, qp; p = numext::maxi(x,y); if(p==RealScalar(0)) return RealScalar(0); - qp = numext::mini(y,x) / p; + qp = numext::mini(y,x) / p; return p * sqrt(RealScalar(1) + qp*qp); } diff --git a/Eigen/src/Core/arch/AVX/MathFunctions.h b/Eigen/src/Core/arch/AVX/MathFunctions.h index 089a682ba..da1b1e3f8 100644 --- a/Eigen/src/Core/arch/AVX/MathFunctions.h +++ b/Eigen/src/Core/arch/AVX/MathFunctions.h @@ -62,13 +62,6 @@ ptanh<Packet8f>(const Packet8f& x) { return internal::generic_fast_tanh_float(x); } -// Error function. -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f -perf<Packet8f>(const Packet8f& x) { - return internal::generic_fast_erf_float(x); -} - // Exponential function for doubles. template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d diff --git a/Eigen/src/Core/arch/AVX512/MathFunctions.h b/Eigen/src/Core/arch/AVX512/MathFunctions.h index 66581964c..1cf7338c2 100644 --- a/Eigen/src/Core/arch/AVX512/MathFunctions.h +++ b/Eigen/src/Core/arch/AVX512/MathFunctions.h @@ -411,12 +411,6 @@ ptanh<Packet16f>(const Packet16f& _x) { return internal::generic_fast_tanh_float(_x); } -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f -perf<Packet16f>(const Packet16f& _x) { - return internal::generic_fast_erf_float(_x); -} - } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/AltiVec/MathFunctions.h b/Eigen/src/Core/arch/AltiVec/MathFunctions.h index 00a0248e2..3a7a32936 100644 --- a/Eigen/src/Core/arch/AltiVec/MathFunctions.h +++ b/Eigen/src/Core/arch/AltiVec/MathFunctions.h @@ -83,13 +83,6 @@ ptanh<Packet4f>(const Packet4f& x) { return internal::generic_fast_tanh_float(x); } -// Error function. -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f -perf<Packet4f>(const Packet4f& x) { - return internal::generic_fast_erf_float(x); -} - } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/MSA/MathFunctions.h b/Eigen/src/Core/arch/MSA/MathFunctions.h index 23e4b8664..f5181b90e 100644 --- a/Eigen/src/Core/arch/MSA/MathFunctions.h +++ b/Eigen/src/Core/arch/MSA/MathFunctions.h @@ -321,13 +321,6 @@ pcos<Packet4f>(const Packet4f& x) { return psincos_inner_msa_float</* sine */ false>(x); } -// Error function. -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f -perf<Packet4f>(const Packet4f& x) { - return internal::generic_fast_erf_float(x); -} - template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d pexp<Packet2d>(const Packet2d& _x) { diff --git a/Eigen/src/Core/arch/NEON/MathFunctions.h b/Eigen/src/Core/arch/NEON/MathFunctions.h index 13e3f4989..8bf572788 100644 --- a/Eigen/src/Core/arch/NEON/MathFunctions.h +++ b/Eigen/src/Core/arch/NEON/MathFunctions.h @@ -43,13 +43,6 @@ ptanh<Packet4f>(const Packet4f& x) { return internal::generic_fast_tanh_float(x); } -// Error function. -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f -perf<Packet4f>(const Packet4f& x) { - return internal::generic_fast_erf_float(x); -} - } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h index a991b1a39..b21bb93bf 100644 --- a/Eigen/src/Core/arch/SSE/MathFunctions.h +++ b/Eigen/src/Core/arch/SSE/MathFunctions.h @@ -147,13 +147,6 @@ ptanh<Packet4f>(const Packet4f& x) { return internal::generic_fast_tanh_float(x); } -// Error function. -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f -perf<Packet4f>(const Packet4f& a) { - return internal::generic_fast_erf_float(a); -} - } // end namespace internal namespace numext { diff --git a/Eigen/src/Core/arch/ZVector/MathFunctions.h b/Eigen/src/Core/arch/ZVector/MathFunctions.h index d4b33b56f..689ecc702 100644 --- a/Eigen/src/Core/arch/ZVector/MathFunctions.h +++ b/Eigen/src/Core/arch/ZVector/MathFunctions.h @@ -232,13 +232,6 @@ ptanh<Packet4f>(const Packet4f& x) { return internal::generic_fast_tanh_float(x); } -// Error function. -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f -perf<Packet4f>(const Packet4f& x) { - return internal::generic_fast_erf_float(x); -} - } // end namespace internal } // end namespace Eigen |