From f7362772e3236cdb8ae4d9be175f83a0b19902a0 Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Thu, 24 Dec 2015 21:15:38 -0800 Subject: Add digamma for CPU + CUDA. Includes tests. --- Eigen/src/Core/GenericPacketMath.h | 5 + Eigen/src/Core/GlobalFunctions.h | 1 + Eigen/src/Core/SpecialFunctions.h | 426 ++++++++++++++++++++++++++----- Eigen/src/Core/arch/CUDA/MathFunctions.h | 14 + Eigen/src/Core/arch/CUDA/PacketMath.h | 2 + Eigen/src/Core/functors/UnaryFunctors.h | 22 ++ 6 files changed, 411 insertions(+), 59 deletions(-) (limited to 'Eigen/src/Core') diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 8ad51bad5..4c7d1d848 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -75,6 +75,7 @@ struct default_packet_traits HasCosh = 0, HasTanh = 0, HasLGamma = 0, + HasDiGamma = 0, HasErf = 0, HasErfc = 0, @@ -439,6 +440,10 @@ Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); } template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plgamma(const Packet& a) { using numext::lgamma; return lgamma(a); } +/** \internal \returns the derivative of lgamma, psi(\a a) (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pdigamma(const Packet& a) { using numext::digamma; return digamma(a); } + /** \internal \returns the erf(\a a) (coeff-wise) */ template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet perf(const Packet& a) { using numext::erf; return erf(a); } diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index 62fec7008..396da8e71 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -50,6 +50,7 @@ namespace Eigen EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op) diff --git a/Eigen/src/Core/SpecialFunctions.h b/Eigen/src/Core/SpecialFunctions.h index d43cf23a1..9fff3d74b 100644 --- a/Eigen/src/Core/SpecialFunctions.h +++ b/Eigen/src/Core/SpecialFunctions.h @@ -13,79 +13,390 @@ namespace Eigen { namespace internal { +namespace cephes { + +/* polevl (modified for Eigen) + * + * Evaluate polynomial + * + * + * + * SYNOPSIS: + * + * int N; + * Scalar x, y, coef[N+1]; + * + * y = polevl( x, coef); + * + * + * + * DESCRIPTION: + * + * Evaluates polynomial of degree N: + * + * 2 N + * y = C + C x + C x +...+ C x + * 0 1 2 N + * + * Coefficients are stored in reverse order: + * + * coef[0] = C , ..., coef[N] = C . + * N 0 + * + * The function p1evl() assumes that coef[N] = 1.0 and is + * omitted from the array. Its calling arguments are + * otherwise the same as polevl(). + * + * + * The Eigen implementation is templatized. For best speed, store + * coef as a const array (constexpr), e.g. + * + * const double coef[] = {1.0, 2.0, 3.0, ...}; + * + */ +template +struct polevl { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + static Scalar run(const Scalar x, const Scalar coef[]) { + EIGEN_STATIC_ASSERT(N > 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + + return polevl::run(x, coef) * x + coef[N]; + } +}; + +template +struct polevl { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + static Scalar run(const Scalar, const Scalar coef[]) { + return coef[0]; + } +}; + +} // end namespace cephes + /**************************************************************************** * Implementation of lgamma * ****************************************************************************/ -template -struct lgamma_impl -{ +template +struct lgamma_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +template +struct lgamma_retval { + typedef Scalar type; +}; + +#ifdef EIGEN_HAS_C99_MATH +template <> +struct lgamma_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE float run(float x) { return ::lgammaf(x); } +}; + +template <> +struct lgamma_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double run(double x) { return ::lgamma(x); } +}; +#endif + +/**************************************************************************** + * Implementation of digamma (psi) * + ****************************************************************************/ + +template +struct digamma_impl { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(const Scalar&) - { + static EIGEN_STRONG_INLINE Scalar run(const Scalar) { EIGEN_STATIC_ASSERT((internal::is_same::value == false), THIS_TYPE_IS_NOT_SUPPORTED); return Scalar(0); } }; -template -struct lgamma_retval -{ +template +struct digamma_retval { typedef Scalar type; }; #ifdef EIGEN_HAS_C99_MATH -template<> -struct lgamma_impl -{ +template <> +struct digamma_impl { + /* + * Psi (digamma) function (modified for Eigen) + * + * + * SYNOPSIS: + * + * float x, y, psif(); + * + * y = psif( x ); + * + * + * DESCRIPTION: + * + * d - + * psi(x) = -- ln | (x) + * dx + * + * is the logarithmic derivative of the gamma function. + * For integer x, + * n-1 + * - + * psi(n) = -EUL + > 1/k. + * - + * k=1 + * + * If x is negative, it is transformed to a positive argument by the + * reflection formula psi(1-x) = psi(x) + pi cot(pi x). + * For general positive x, the argument is made greater than 10 + * using the recurrence psi(x+1) = psi(x) + 1/x. + * Then the following asymptotic expansion is applied: + * + * inf. B + * - 2k + * psi(x) = log(x) - 1/2x - > ------- + * - 2k + * k=1 2k x + * + * where the B2k are Bernoulli numbers. + * + * ACCURACY: + * Absolute error, relative when |psi| > 1 : + * arithmetic domain # trials peak rms + * IEEE -33,0 30000 8.2e-7 1.2e-7 + * IEEE 0,33 100000 7.3e-7 7.7e-8 + * + * ERROR MESSAGES: + * message condition value returned + * psi singularity x integer <=0 INFINITY + */ + /* + Cephes Math Library Release 2.2: June, 1992 + Copyright 1984, 1987, 1992 by Stephen L. Moshier + Direct inquiries to 30 Frost Street, Cambridge, MA 02140 + */ EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE double run(const float& x) { return ::lgammaf(x); } + static float run(float xx) { + float p, q, nz, x, s, w, y, z; + bool negative; + + // Some necessary constants + const float PIF = 3.141592653589793238; + const float MAXNUMF = std::numeric_limits::infinity(); + + const float A[] = { + -4.16666666666666666667E-3, + 3.96825396825396825397E-3, + -8.33333333333333333333E-3, + 8.33333333333333333333E-2 + }; + + x = xx; + nz = 0.0f; + negative = 0; + if (x <= 0.0f) { + negative = 1; + q = x; + p = ::floor(q); + if (p == q) { + return (MAXNUMF); + } + nz = q - p; + if (nz != 0.5f) { + if (nz > 0.5f) { + p += 1.0f; + nz = q - p; + } + nz = PIF / ::tan(PIF * nz); + } else { + nz = 0.0f; + } + x = 1.0f - x; + } + + /* use the recurrence psi(x+1) = psi(x) + 1/x. */ + s = x; + w = 0.0f; + while (s < 10.0f) { + w += 1.0f / s; + s += 1.0f; + } + + if (s < 1.0e8f) { + z = 1.0f / (s * s); + y = z * cephes::polevl::run(z, A); + } else + y = 0.0f; + + y = ::log(s) - (0.5f / s) - y - w; + + return (negative) ? y - nz : y; + } }; -template<> -struct lgamma_impl -{ +template <> +struct digamma_impl { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE double run(const double& x) { return ::lgamma(x); } + static double run(double x) { + /* + * + * Psi (digamma) function (modified for Eigen) + * + * + * SYNOPSIS: + * + * double x, y, psi(); + * + * y = psi( x ); + * + * + * DESCRIPTION: + * + * d - + * psi(x) = -- ln | (x) + * dx + * + * is the logarithmic derivative of the gamma function. + * For integer x, + * n-1 + * - + * psi(n) = -EUL + > 1/k. + * - + * k=1 + * + * If x is negative, it is transformed to a positive argument by the + * reflection formula psi(1-x) = psi(x) + pi cot(pi x). + * For general positive x, the argument is made greater than 10 + * using the recurrence psi(x+1) = psi(x) + 1/x. + * Then the following asymptotic expansion is applied: + * + * inf. B + * - 2k + * psi(x) = log(x) - 1/2x - > ------- + * - 2k + * k=1 2k x + * + * where the B2k are Bernoulli numbers. + * + * ACCURACY: + * Relative error (except absolute when |psi| < 1): + * arithmetic domain # trials peak rms + * IEEE 0,30 30000 1.3e-15 1.4e-16 + * IEEE -30,0 40000 1.5e-15 2.2e-16 + * + * ERROR MESSAGES: + * message condition value returned + * psi singularity x integer <=0 INFINITY + */ + + /* + * Cephes Math Library Release 2.8: June, 2000 + * Copyright 1984, 1987, 1992, 2000 by Stephen L. Moshier + */ + double p, q, nz, s, w, y, z; + bool negative; + + const double A[] = { + 8.33333333333333333333E-2, + -2.10927960927960927961E-2, + 7.57575757575757575758E-3, + -4.16666666666666666667E-3, + 3.96825396825396825397E-3, + -8.33333333333333333333E-3, + 8.33333333333333333333E-2 + }; + + const double MAXNUM = std::numeric_limits::infinity(); + const double PI = 3.14159265358979323846; + + negative = 0; + nz = 0.0; + + if (x <= 0.0) { + negative = 1; + q = x; + p = ::floor(q); + if (p == q) { + return MAXNUM; + } + /* Remove the zeros of tan(PI x) + * by subtracting the nearest integer from x + */ + nz = q - p; + if (nz != 0.5) { + if (nz > 0.5) { + p += 1.0; + nz = q - p; + } + nz = PI / ::tan(PI * nz); + } + else { + nz = 0.0; + } + x = 1.0 - x; + } + + /* use the recurrence psi(x+1) = psi(x) + 1/x. */ + s = x; + w = 0.0; + while (s < 10.0) { + w += 1.0 / s; + s += 1.0; + } + + if (s < 1.0e17) { + z = 1.0 / (s * s); + y = z * cephes::polevl::run(z, A); + } + else + y = 0.0; + + y = ::log(s) - (0.5 / s) - y - w; + + return (negative) ? y - nz : y; + } }; + #endif /**************************************************************************** * Implementation of erf * ****************************************************************************/ -template -struct erf_impl -{ +template +struct erf_impl { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(const Scalar&) - { + static EIGEN_STRONG_INLINE Scalar run(const Scalar) { EIGEN_STATIC_ASSERT((internal::is_same::value == false), THIS_TYPE_IS_NOT_SUPPORTED); return Scalar(0); } }; -template -struct erf_retval -{ +template +struct erf_retval { typedef Scalar type; }; #ifdef EIGEN_HAS_C99_MATH -template<> -struct erf_impl -{ +template <> +struct erf_impl { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE float run(const float& x) { return ::erff(x); } + static EIGEN_STRONG_INLINE float run(float x) { return ::erff(x); } }; -template<> -struct erf_impl -{ +template <> +struct erf_impl { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE double run(const double& x) { return ::erf(x); } + static EIGEN_STRONG_INLINE double run(double x) { return ::erf(x); } }; #endif // EIGEN_HAS_C99_MATH @@ -93,35 +404,30 @@ struct erf_impl * Implementation of erfc * ****************************************************************************/ -template -struct erfc_impl -{ +template +struct erfc_impl { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(const Scalar&) - { + static EIGEN_STRONG_INLINE Scalar run(const Scalar) { EIGEN_STATIC_ASSERT((internal::is_same::value == false), THIS_TYPE_IS_NOT_SUPPORTED); return Scalar(0); } }; -template -struct erfc_retval -{ +template +struct erfc_retval { typedef Scalar type; }; #ifdef EIGEN_HAS_C99_MATH -template<> -struct erfc_impl -{ +template <> +struct erfc_impl { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE float run(const float x) { return ::erfcf(x); } }; -template<> -struct erfc_impl -{ +template <> +struct erfc_impl { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE double run(const double x) { return ::erfc(x); } }; @@ -129,27 +435,29 @@ struct erfc_impl } // end namespace internal - namespace numext { -template -EIGEN_DEVICE_FUNC -inline EIGEN_MATHFUNC_RETVAL(lgamma, Scalar) lgamma(const Scalar& x) -{ +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(lgamma, Scalar) + lgamma(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(lgamma, Scalar)::run(x); } -template -EIGEN_DEVICE_FUNC -inline EIGEN_MATHFUNC_RETVAL(erf, Scalar) erf(const Scalar& x) -{ +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(digamma, Scalar) + digamma(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(digamma, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erf, Scalar) + erf(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(erf, Scalar)::run(x); } -template -EIGEN_DEVICE_FUNC -inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar) erfc(const Scalar& x) -{ +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar) + erfc(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(erfc, Scalar)::run(x); } diff --git a/Eigen/src/Core/arch/CUDA/MathFunctions.h b/Eigen/src/Core/arch/CUDA/MathFunctions.h index ecd5c444e..a2c06a817 100644 --- a/Eigen/src/Core/arch/CUDA/MathFunctions.h +++ b/Eigen/src/Core/arch/CUDA/MathFunctions.h @@ -78,6 +78,20 @@ double2 plgamma(const double2& a) return make_double2(lgamma(a.x), lgamma(a.y)); } +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 pdigamma(const float4& a) +{ + using numext::digamma; + return make_float4(digamma(a.x), digamma(a.y), digamma(a.z), digamma(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 pdigamma(const double2& a) +{ + using numext::digamma; + return make_double2(digamma(a.x), digamma(a.y)); +} + template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 perf(const float4& a) { diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h index 9d5773106..d3d9f910e 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -40,6 +40,7 @@ template<> struct packet_traits : default_packet_traits HasSqrt = 1, HasRsqrt = 1, HasLGamma = 1, + HasDiGamma = 1, HasErf = 1, HasErfc = 1, @@ -63,6 +64,7 @@ template<> struct packet_traits : default_packet_traits HasSqrt = 1, HasRsqrt = 1, HasLGamma = 1, + HasDiGamma = 1, HasErf = 1, HasErfc = 1, diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index 01727f250..897ab04ba 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h @@ -427,6 +427,28 @@ struct functor_traits > }; }; +/** \internal + * \brief Template functor to compute psi, the derivative of lgamma of a scalar. + * \sa class CwiseUnaryOp, Cwise::digamma() + */ +template struct scalar_digamma_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_digamma_op) + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { + using numext::digamma; return digamma(a); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pdigamma(a); } +}; +template +struct functor_traits > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits::MulCost + 5 * NumTraits::AddCost, + PacketAccess = packet_traits::HasDiGamma + }; +}; + /** \internal * \brief Template functor to compute the Gauss error function of a * scalar -- cgit v1.2.3