From 18ceb3413d09afc4f143014f89552f941321209b Mon Sep 17 00:00:00 2001 From: Srinivas Vasudevan Date: Mon, 12 Aug 2019 19:26:29 -0400 Subject: Add ndtri function, the inverse of the normal distribution function. --- Eigen/src/Core/GenericPacketMath.h | 21 ++ Eigen/src/Core/GlobalFunctions.h | 1 + Eigen/src/Core/arch/AVX/PacketMath.h | 1 + Eigen/src/Core/arch/AVX512/PacketMath.h | 3 + .../Core/arch/Default/GenericPacketMathFunctions.h | 55 ++++ Eigen/src/Core/arch/GPU/PacketMath.h | 2 + Eigen/src/Core/arch/SSE/PacketMath.h | 3 + Eigen/src/Core/arch/SYCL/InteropHeaders.h | 1 + Eigen/src/Core/util/ForwardDeclarations.h | 1 + Eigen/src/plugins/ArrayCwiseUnaryOps.h | 28 +- test/packetmath.cpp | 6 + unsupported/Eigen/CXX11/src/Tensor/TensorBase.h | 6 + unsupported/Eigen/SpecialFunctions | 1 + .../SpecialFunctions/SpecialFunctionsFunctors.h | 25 ++ .../src/SpecialFunctions/SpecialFunctionsHalf.h | 3 + .../src/SpecialFunctions/SpecialFunctionsImpl.h | 311 +++++++++++++++++---- .../SpecialFunctions/SpecialFunctionsPacketMath.h | 7 + .../arch/GPU/GpuSpecialFunctions.h | 13 + unsupported/test/cxx11_tensor_gpu.cu | 64 +++++ unsupported/test/special_functions.cpp | 20 ++ 20 files changed, 502 insertions(+), 70 deletions(-) diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 6ab38994f..651e3f7b3 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -83,6 +83,7 @@ struct default_packet_traits HasPolygamma = 0, HasErf = 0, HasErfc = 0, + HasNdtri = 0, HasI0e = 0, HasI1e = 0, HasIGamma = 0, @@ -257,12 +258,32 @@ pldexp(const Packet &a, const Packet &exponent) { return std::ldexp(a,exponent); template EIGEN_DEVICE_FUNC inline Packet pzero(const Packet& a) { return pxor(a,a); } +template<> EIGEN_DEVICE_FUNC inline float pzero(const float& a) { + EIGEN_UNUSED_VARIABLE(a); + return 0.; +} + +template<> EIGEN_DEVICE_FUNC inline double pzero(const double& a) { + EIGEN_UNUSED_VARIABLE(a); + return 0.; +} + /** \internal \returns bits of \a or \b according to the input bit mask \a mask */ template EIGEN_DEVICE_FUNC inline Packet pselect(const Packet& mask, const Packet& a, const Packet& b) { return por(pand(a,mask),pandnot(b,mask)); } +template<> EIGEN_DEVICE_FUNC inline float pselect( + const float& mask, const float& a, const float&b) { + return mask == 0 ? b : a; +} + +template<> EIGEN_DEVICE_FUNC inline double pselect( + const double& mask, const double& a, const double& b) { + return mask == 0 ? b : a; +} + /** \internal \returns a <= b as a bit mask */ template EIGEN_DEVICE_FUNC inline Packet pcmp_le(const Packet& a, const Packet& b) { return a<=b ? ptrue(a) : pzero(a); } diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index 71377cee5..7f132bdd0 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -76,6 +76,7 @@ namespace Eigen EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op,derivative of lgamma,\sa ArrayBase::digamma) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op,error function,\sa ArrayBase::erf) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op,complement error function,\sa ArrayBase::erfc) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ndtri,scalar_ndtri_op,inverse normal distribution function,\sa ArrayBase::ndtri) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op,exponential,\sa ArrayBase::exp) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(expm1,scalar_expm1_op,exponential of a value minus 1,\sa ArrayBase::expm1) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op,natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log) diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 7ee9dee10..9feb96f8b 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -66,6 +66,7 @@ template<> struct packet_traits : default_packet_traits HasCos = EIGEN_FAST_MATH, HasLog = 1, HasExp = 1, + HasNdtri = 1, HasSqrt = 1, HasRsqrt = 1, HasTanh = EIGEN_FAST_MATH, diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index 383c49636..1312829d8 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -60,6 +60,9 @@ template<> struct packet_traits : default_packet_traits #if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT) #ifdef EIGEN_VECTORIZE_AVX512DQ HasLog = 1, + HasLog1p = 1, + HasExpm1 = 1, + HasNdtri = 1, #endif HasExp = 1, HasSqrt = EIGEN_FAST_MATH, diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index 43e827638..b021bd0b7 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h @@ -467,5 +467,60 @@ Packet pcos_float(const Packet& x) return psincos_float(x); } +/* polevl (modified for Eigen) + * + * Evaluate polynomial + * + * + * + * SYNOPSIS: + * + * int N; + * Scalar x, y, coef[N+1]; + * + * y = polevl( x, coef); + * + * + * + * DESCRIPTION: + * + * Evaluates polynomial of degree N: + * + * 2 N + * y = C + C x + C x +...+ C x + * 0 1 2 N + * + * Coefficients are stored in reverse order: + * + * coef[0] = C , ..., coef[N] = C . + * N 0 + * + * The function p1evl() assumes that coef[N] = 1.0 and is + * omitted from the array. Its calling arguments are + * otherwise the same as polevl(). + * + * + * The Eigen implementation is templatized. For best speed, store + * coef as a const array (constexpr), e.g. + * + * const double coef[] = {1.0, 2.0, 3.0, ...}; + * + */ +template +struct ppolevl { + static EIGEN_STRONG_INLINE Packet run(const Packet& x, const typename unpacket_traits::type coeff[]) { + EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + return pmadd(ppolevl::run(x, coeff), x, pset1(coeff[N])); + } +}; + +template +struct ppolevl { + static EIGEN_STRONG_INLINE Packet run(const Packet& x, const typename unpacket_traits::type coeff[]) { + EIGEN_UNUSED_VARIABLE(x); + return pset1(coeff[0]); + } +}; + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/GPU/PacketMath.h b/Eigen/src/Core/arch/GPU/PacketMath.h index 5084fc786..6ba2990d1 100644 --- a/Eigen/src/Core/arch/GPU/PacketMath.h +++ b/Eigen/src/Core/arch/GPU/PacketMath.h @@ -44,6 +44,7 @@ template<> struct packet_traits : default_packet_traits HasPolygamma = 1, HasErf = 1, HasErfc = 1, + HasNdtri = 1, HasI0e = 1, HasI1e = 1, HasIGamma = 1, @@ -78,6 +79,7 @@ template<> struct packet_traits : default_packet_traits HasPolygamma = 1, HasErf = 1, HasErfc = 1, + HasNdtri = 1, HasI0e = 1, HasI1e = 1, HasIGamma = 1, diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 0d571ce61..69daea8f7 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -111,6 +111,9 @@ template<> struct packet_traits : default_packet_traits HasCos = EIGEN_FAST_MATH, HasLog = 1, HasExp = 1, + HasLog1p = 1, + HasExpm1 = 1, + HasNdtri = 1, HasSqrt = 1, HasRsqrt = 1, HasTanh = EIGEN_FAST_MATH, diff --git a/Eigen/src/Core/arch/SYCL/InteropHeaders.h b/Eigen/src/Core/arch/SYCL/InteropHeaders.h index ef66fc7de..d76030419 100644 --- a/Eigen/src/Core/arch/SYCL/InteropHeaders.h +++ b/Eigen/src/Core/arch/SYCL/InteropHeaders.h @@ -54,6 +54,7 @@ struct sycl_packet_traits : default_packet_traits { HasPolygamma = 0, HasErf = 0, HasErfc = 0, + HasNdtri = 0, HasIGamma = 0, HasIGammac = 0, HasBetaInc = 0, diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 050d15e96..18889fcf4 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -214,6 +214,7 @@ template struct scalar_lgamma_op; template struct scalar_digamma_op; template struct scalar_erf_op; template struct scalar_erfc_op; +template struct scalar_ndtri_op; template struct scalar_igamma_op; template struct scalar_igammac_op; template struct scalar_zeta_op; diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h index 2f99ee0b2..4aef72d92 100644 --- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h @@ -536,14 +536,12 @@ typedef CwiseUnaryOp, const Derived> LgammaRe typedef CwiseUnaryOp, const Derived> DigammaReturnType; typedef CwiseUnaryOp, const Derived> ErfReturnType; typedef CwiseUnaryOp, const Derived> ErfcReturnType; +typedef CwiseUnaryOp, const Derived> NdtriReturnType; /** \cpp11 \returns an expression of the coefficient-wise ln(|gamma(*this)|). * * \specialfunctions_module * - * Example: \include Cwise_lgamma.cpp - * Output: \verbinclude Cwise_lgamma.out - * * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types, * or float/double in non c++11 mode, the user has to provide implementations of lgamma(T) for any scalar * type T to be supported. @@ -579,9 +577,6 @@ digamma() const * * \specialfunctions_module * - * Example: \include Cwise_erf.cpp - * Output: \verbinclude Cwise_erf.out - * * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types, * or float/double in non c++11 mode, the user has to provide implementations of erf(T) for any scalar * type T to be supported. @@ -600,9 +595,6 @@ erf() const * * \specialfunctions_module * - * Example: \include Cwise_erfc.cpp - * Output: \verbinclude Cwise_erfc.out - * * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types, * or float/double in non c++11 mode, the user has to provide implementations of erfc(T) for any scalar * type T to be supported. @@ -615,3 +607,21 @@ erfc() const { return ErfcReturnType(derived()); } + +/** \cpp11 \returns an expression of the coefficient-wise Complementary error + * function of *this. + * + * \specialfunctions_module + * + * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types, + * or float/double in non c++11 mode, the user has to provide implementations of ndtri(T) for any scalar + * type T to be supported. + * + * \sa Math functions, erf() + */ +EIGEN_DEVICE_FUNC +inline const NdtriReturnType +ndtri() const +{ + return NdtriReturnType(derived()); +} diff --git a/test/packetmath.cpp b/test/packetmath.cpp index f1448f335..d65869de7 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -590,6 +590,12 @@ template void packetmath_real() h.store(data2, internal::perfc(h.load(data1))); VERIFY((numext::isnan)(data2[0])); } + { + for (int i=0; i(0,1); + } + CHECK_CWISE1_IF(internal::packet_traits::HasNdtri, numext::ndtri, internal::pndtri); + } #endif // EIGEN_HAS_C99_MATH for (int i=0; i return unaryExpr(internal::scalar_erfc_op()); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + ndtri() const { + return unaryExpr(internal::scalar_ndtri_op()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> sigmoid() const { diff --git a/unsupported/Eigen/SpecialFunctions b/unsupported/Eigen/SpecialFunctions index 44fd99b43..a5abd407d 100644 --- a/unsupported/Eigen/SpecialFunctions +++ b/unsupported/Eigen/SpecialFunctions @@ -33,6 +33,7 @@ namespace Eigen { * - gamma_sample_der_alpha * - igammac * - digamma + * - ndtri * - polygamma * - zeta * - betainc diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h index c6fac91bb..13a72a3ee 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h @@ -283,6 +283,31 @@ struct functor_traits > }; }; +/** \internal + * \brief Template functor to compute the Inverse of the normal distribution + * function of a scalar + * \sa class CwiseUnaryOp, Cwise::ndtri() + */ +template struct scalar_ndtri_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_ndtri_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { + using numext::ndtri; return ndtri(a); + } + typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const { return internal::pndtri(a); } +}; +template +struct functor_traits > +{ + enum { + // On average, We are evaluating rational functions with degree N=9 in the + // numerator and denominator. This results in 2*N additions and 2*N + // multiplications. + Cost = 18 * NumTraits::MulCost + 18 * NumTraits::AddCost, + PacketAccess = packet_traits::HasNdtri + }; +}; + /** \internal * \brief Template functor to compute the exponentially scaled modified Bessel * function of order zero diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h index fbdfd299e..538db2afa 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h @@ -30,6 +30,9 @@ template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erf(const Eigen::ha template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erfc(const Eigen::half& a) { return Eigen::half(Eigen::numext::erfc(static_cast(a))); } +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half ndtri(const Eigen::half& a) { + return Eigen::half(Eigen::numext::ndtri(static_cast(a))); +} template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igamma(const Eigen::half& a, const Eigen::half& x) { return Eigen::half(Eigen::numext::igamma(static_cast(a), static_cast(x))); } diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h index 1464a9751..78050d0a1 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h @@ -38,63 +38,6 @@ namespace internal { namespace cephes { -/* polevl (modified for Eigen) - * - * Evaluate polynomial - * - * - * - * SYNOPSIS: - * - * int N; - * Scalar x, y, coef[N+1]; - * - * y = polevl( x, coef); - * - * - * - * DESCRIPTION: - * - * Evaluates polynomial of degree N: - * - * 2 N - * y = C + C x + C x +...+ C x - * 0 1 2 N - * - * Coefficients are stored in reverse order: - * - * coef[0] = C , ..., coef[N] = C . - * N 0 - * - * The function p1evl() assumes that coef[N] = 1.0 and is - * omitted from the array. Its calling arguments are - * otherwise the same as polevl(). - * - * - * The Eigen implementation is templatized. For best speed, store - * coef as a const array (constexpr), e.g. - * - * const double coef[] = {1.0, 2.0, 3.0, ...}; - * - */ -template -struct polevl { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(const Scalar x, const Scalar coef[]) { - EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); - - return polevl::run(x, coef) * x + coef[N]; - } -}; - -template -struct polevl { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(const Scalar, const Scalar coef[]) { - return coef[0]; - } -}; - /* chbevl (modified for Eigen) * * Evaluate Chebyshev series @@ -190,7 +133,7 @@ template <> struct lgamma_impl { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE float run(float x) { -#if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__) +#if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__) int dummy; return ::lgammaf_r(x, &dummy); #elif defined(SYCL_DEVICE_ONLY) @@ -205,7 +148,7 @@ template <> struct lgamma_impl { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE double run(double x) { -#if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__) +#if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__) int dummy; return ::lgamma_r(x, &dummy); #elif defined(SYCL_DEVICE_ONLY) @@ -264,7 +207,7 @@ struct digamma_impl_maybe_poly { float z; if (s < 1.0e8f) { z = 1.0f / (s * s); - return z * cephes::polevl::run(z, A); + return z * internal::ppolevl::run(z, A); } else return 0.0f; } }; @@ -286,7 +229,7 @@ struct digamma_impl_maybe_poly { double z; if (s < 1.0e17) { z = 1.0 / (s * s); - return z * cephes::polevl::run(z, A); + return z * internal::ppolevl::run(z, A); } else return 0.0; } @@ -494,6 +437,246 @@ struct erfc_impl { }; #endif // EIGEN_HAS_C99_MATH + +/*************************************************************************** +* Implementation of ndtri. * +****************************************************************************/ + +/* Inverse of Normal distribution function (modified for Eigen). + * + * + * SYNOPSIS: + * + * double x, y, ndtri(); + * + * x = ndtri( y ); + * + * + * + * DESCRIPTION: + * + * Returns the argument, x, for which the area under the + * Gaussian probability density function (integrated from + * minus infinity to x) is equal to y. + * + * + * For small arguments 0 < y < exp(-2), the program computes + * z = sqrt( -2.0 * log(y) ); then the approximation is + * x = z - log(z)/z - (1/z) P(1/z) / Q(1/z). + * There are two rational functions P/Q, one for 0 < y < exp(-32) + * and the other for y up to exp(-2). For larger arguments, + * w = y - 0.5, and x/sqrt(2pi) = w + w**3 R(w**2)/S(w**2)). + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * DEC 0.125, 1 5500 9.5e-17 2.1e-17 + * DEC 6e-39, 0.135 3500 5.7e-17 1.3e-17 + * IEEE 0.125, 1 20000 7.2e-16 1.3e-16 + * IEEE 3e-308, 0.135 50000 4.6e-16 9.8e-17 + * + * + * ERROR MESSAGES: + * + * message condition value returned + * ndtri domain x <= 0 -MAXNUM + * ndtri domain x >= 1 MAXNUM + * + */ + /* + Cephes Math Library Release 2.2: June, 1992 + Copyright 1985, 1987, 1992 by Stephen L. Moshier + Direct inquiries to 30 Frost Street, Cambridge, MA 02140 + */ + + +// TODO: Add a cheaper approximation for float. + + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T flipsign( + const T& should_flipsign, const T& x) { + const T sign_mask = pset1(-0.0); + T sign_bit = pand(should_flipsign, sign_mask); + return pxor(sign_bit, x); +} + +template<> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double flipsign( + const double& should_flipsign, const double& x) { + return should_flipsign == 0 ? x : -x; +} + +template<> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float flipsign( + const float& should_flipsign, const float& x) { + return should_flipsign == 0 ? x : -x; +} + +// We split this computation in to two so that in the scalar path +// only one branch is evaluated (due to our template specialization of pselect +// being an if statement.) + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_ndtri_gt_exp_neg_two(const T& b) { + const ScalarType p0[] = { + ScalarType(-5.99633501014107895267e1), + ScalarType(9.80010754185999661536e1), + ScalarType(-5.66762857469070293439e1), + ScalarType(1.39312609387279679503e1), + ScalarType(-1.23916583867381258016e0) + }; + const ScalarType q0[] = { + ScalarType(1.0), + ScalarType(1.95448858338141759834e0), + ScalarType(4.67627912898881538453e0), + ScalarType(8.63602421390890590575e1), + ScalarType(-2.25462687854119370527e2), + ScalarType(2.00260212380060660359e2), + ScalarType(-8.20372256168333339912e1), + ScalarType(1.59056225126211695515e1), + ScalarType(-1.18331621121330003142e0) + }; + const T sqrt2pi = pset1(ScalarType(2.50662827463100050242e0)); + const T half = pset1(ScalarType(0.5)); + T c, c2, ndtri_gt_exp_neg_two; + + c = psub(b, half); + c2 = pmul(c, c); + ndtri_gt_exp_neg_two = pmadd(c, pmul( + c2, pdiv( + internal::ppolevl::run(c2, p0), + internal::ppolevl::run(c2, q0))), c); + return pmul(ndtri_gt_exp_neg_two, sqrt2pi); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_ndtri_lt_exp_neg_two( + const T& b, const T& should_flipsign) { + /* Approximation for interval z = sqrt(-2 log a ) between 2 and 8 + * i.e., a between exp(-2) = .135 and exp(-32) = 1.27e-14. + */ + const ScalarType p1[] = { + ScalarType(4.05544892305962419923e0), + ScalarType(3.15251094599893866154e1), + ScalarType(5.71628192246421288162e1), + ScalarType(4.40805073893200834700e1), + ScalarType(1.46849561928858024014e1), + ScalarType(2.18663306850790267539e0), + ScalarType(-1.40256079171354495875e-1), + ScalarType(-3.50424626827848203418e-2), + ScalarType(-8.57456785154685413611e-4) + }; + const ScalarType q1[] = { + ScalarType(1.0), + ScalarType(1.57799883256466749731e1), + ScalarType(4.53907635128879210584e1), + ScalarType(4.13172038254672030440e1), + ScalarType(1.50425385692907503408e1), + ScalarType(2.50464946208309415979e0), + ScalarType(-1.42182922854787788574e-1), + ScalarType(-3.80806407691578277194e-2), + ScalarType(-9.33259480895457427372e-4) + }; + /* Approximation for interval z = sqrt(-2 log a ) between 8 and 64 + * i.e., a between exp(-32) = 1.27e-14 and exp(-2048) = 3.67e-890. + */ + const ScalarType p2[] = { + ScalarType(3.23774891776946035970e0), + ScalarType(6.91522889068984211695e0), + ScalarType(3.93881025292474443415e0), + ScalarType(1.33303460815807542389e0), + ScalarType(2.01485389549179081538e-1), + ScalarType(1.23716634817820021358e-2), + ScalarType(3.01581553508235416007e-4), + ScalarType(2.65806974686737550832e-6), + ScalarType(6.23974539184983293730e-9) + }; + const ScalarType q2[] = { + ScalarType(1.0), + ScalarType(6.02427039364742014255e0), + ScalarType(3.67983563856160859403e0), + ScalarType(1.37702099489081330271e0), + ScalarType(2.16236993594496635890e-1), + ScalarType(1.34204006088543189037e-2), + ScalarType(3.28014464682127739104e-4), + ScalarType(2.89247864745380683936e-6), + ScalarType(6.79019408009981274425e-9) + }; + const T eight = pset1(ScalarType(8.0)); + const T one = pset1(ScalarType(1)); + const T neg_two = pset1(ScalarType(-2)); + T x, x0, x1, z; + + x = psqrt(pmul(neg_two, plog(b))); + x0 = psub(x, pdiv(plog(x), x)); + z = one / x; + x1 = pmul( + z, pselect( + pcmp_lt(x, eight), + pdiv(internal::ppolevl::run(z, p1), + internal::ppolevl::run(z, q1)), + pdiv(internal::ppolevl::run(z, p2), + internal::ppolevl::run(z, q2)))); + return flipsign(should_flipsign, psub(x0, x1)); +} + +template +T generic_ndtri(const T& a) { + const T maxnum = pset1(NumTraits::infinity()); + const T neg_maxnum = pset1(-NumTraits::infinity()); + + const T zero = pset1(ScalarType(0)); + const T one = pset1(ScalarType(1)); + // exp(-2) + const T exp_neg_two = pset1(ScalarType(0.13533528323661269189)); + T b, ndtri, should_flipsign; + + should_flipsign = pcmp_le(a, psub(one, exp_neg_two)); + b = pselect(should_flipsign, a, psub(one, a)); + + ndtri = pselect( + pcmp_lt(exp_neg_two, b), + generic_ndtri_gt_exp_neg_two(b), + generic_ndtri_lt_exp_neg_two(b, should_flipsign)); + + return pselect( + pcmp_le(a, zero), neg_maxnum, + pselect(pcmp_le(one, a), maxnum, ndtri)); +} + +template +struct ndtri_retval { + typedef Scalar type; +}; + +#if !EIGEN_HAS_C99_MATH + +template +struct ndtri_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +# else + +template +struct ndtri_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar x) { + return generic_ndtri(x); + } +}; + +#endif // EIGEN_HAS_C99_MATH + + /************************************************************************************************************** * Implementation of igammac (complemented incomplete gamma integral), based on Cephes but requires C++11/C99 * **************************************************************************************************************/ @@ -2120,6 +2303,12 @@ EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar) return EIGEN_MATHFUNC_IMPL(erfc, Scalar)::run(x); } +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(ndtri, Scalar) + ndtri(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(ndtri, Scalar)::run(x); +} + template EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igamma, Scalar) igamma(const Scalar& a, const Scalar& x) { diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h index 465f41d54..b6323c4db 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h @@ -38,6 +38,13 @@ Packet perf(const Packet& a) { using numext::erf; return erf(a); } template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); } +/** \internal \returns the ndtri(\a a) (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pndtri(const Packet& a) { + typedef typename unpacket_traits::type ScalarType; + using internal::generic_ndtri; return generic_ndtri(a); +} + /** \internal \returns the incomplete gamma function igamma(\a a, \a x) */ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pigamma(const Packet& a, const Packet& x) { using numext::igamma; return igamma(a, x); } diff --git a/unsupported/Eigen/src/SpecialFunctions/arch/GPU/GpuSpecialFunctions.h b/unsupported/Eigen/src/SpecialFunctions/arch/GPU/GpuSpecialFunctions.h index 40abcee3a..c831edc17 100644 --- a/unsupported/Eigen/src/SpecialFunctions/arch/GPU/GpuSpecialFunctions.h +++ b/unsupported/Eigen/src/SpecialFunctions/arch/GPU/GpuSpecialFunctions.h @@ -101,6 +101,19 @@ double2 perfc(const double2& a) return make_double2(erfc(a.x), erfc(a.y)); } +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 pndtri(const float4& a) +{ + using numext::ndtri; + return make_float4(ndtri(a.x), ndtri(a.y), ndtri(a.z), ndtri(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 pndtri(const double2& a) +{ + using numext::ndtri; + return make_double2(ndtri(a.x), ndtri(a.y)); +} template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pigamma(const float4& a, const float4& x) diff --git a/unsupported/test/cxx11_tensor_gpu.cu b/unsupported/test/cxx11_tensor_gpu.cu index 94625e6a3..0a2fa8e61 100644 --- a/unsupported/test/cxx11_tensor_gpu.cu +++ b/unsupported/test/cxx11_tensor_gpu.cu @@ -1069,6 +1069,66 @@ void test_gpu_erfc(const Scalar stddev) gpuFree(d_out); } #endif +template +void test_gpu_ndtri() +{ + Tensor in_x(8); + Tensor out(8); + Tensor expected_out(8); + out.setZero(); + + in_x(0) = Scalar(1); + in_x(1) = Scalar(0.); + in_x(2) = Scalar(0.5); + in_x(3) = Scalar(0.2); + in_x(4) = Scalar(0.8); + in_x(5) = Scalar(0.9); + in_x(6) = Scalar(0.1); + in_x(7) = Scalar(0.99); + in_x(8) = Scalar(0.01); + + expected_out(0) = std::numeric_limits::infinity(); + expected_out(1) = -std::numeric_limits::infinity(); + expected_out(2) = Scalar(0.0); + expected_out(3) = Scalar(-0.8416212335729142); + expected_out(4) = Scalar(0.8416212335729142);j + expected_out(5) = Scalar(1.2815515655446004); + expected_out(6) = Scalar(-1.2815515655446004); + expected_out(7) = Scalar(2.3263478740408408); + expected_out(8) = Scalar(-2.3263478740408408); + + std::size_t bytes = in_x.size() * sizeof(Scalar); + + Scalar* d_in_x; + Scalar* d_out; + gpuMalloc((void**)(&d_in_x), bytes); + gpuMalloc((void**)(&d_out), bytes); + + gpuMemcpy(d_in_x, in_x.data(), bytes, gpuMemcpyHostToDevice); + + Eigen::GpuStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap > gpu_in_x(d_in_x, 6); + Eigen::TensorMap > gpu_out(d_out, 6); + + gpu_out.device(gpu_device) = gpu_in_x.ndtri(); + + assert(gpuMemcpyAsync(out.data(), d_out, bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess); + assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess); + + VERIFY_IS_EQUAL(out(0), expected_out(0)); + VERIFY((std::isnan)(out(3))); + + for (int i = 1; i < 6; ++i) { + if (i != 3) { + VERIFY_IS_APPROX(out(i), expected_out(i)); + } + } + + gpuFree(d_in_x); + gpuFree(d_out); +} template void test_gpu_betainc() @@ -1538,6 +1598,10 @@ EIGEN_DECLARE_TEST(cxx11_tensor_gpu) #if !defined(EIGEN_USE_HIP) // disable these tests on HIP for now. + + CALL_SUBTEST_5(test_gpu_ndtri()); + CALL_SUBTEST_5(test_gpu_ndtri()); + CALL_SUBTEST_5(test_gpu_digamma()); CALL_SUBTEST_5(test_gpu_digamma()); diff --git a/unsupported/test/special_functions.cpp b/unsupported/test/special_functions.cpp index 50dae6c93..140a5e4c1 100644 --- a/unsupported/test/special_functions.cpp +++ b/unsupported/test/special_functions.cpp @@ -133,6 +133,26 @@ template void array_special_functions() } #endif // EIGEN_HAS_C99_MATH + // Check the ndtri function against scipy.special.ndtri + { + ArrayType x(7), res(7), ref(7); + x << 0.5, 0.2, 0.8, 0.9, 0.1, 0.99, 0.01; + ref << 0., -0.8416212335729142, 0.8416212335729142, 1.2815515655446004, -1.2815515655446004, 2.3263478740408408, -2.3263478740408408; + CALL_SUBTEST( verify_component_wise(ref, ref); ); + CALL_SUBTEST( res = x.ndtri(); verify_component_wise(res, ref); ); + CALL_SUBTEST( res = ndtri(x); verify_component_wise(res, ref); ); + + // ndtri(normal_cdf(x)) ~= x + CALL_SUBTEST( + ArrayType m1 = ArrayType::Random(32); + using std::sqrt; + + ArrayType cdf_val = (m1 / sqrt(2.)).erf(); + cdf_val = (cdf_val + 1.) / 2.; + verify_component_wise(cdf_val.ndtri(), m1);); + + } + // Check the zeta function against scipy.special.zeta { ArrayType x(7), q(7), res(7), ref(7); -- cgit v1.2.3