aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Srinivas Vasudevan <srvasude@google.com>2019-08-12 19:26:29 -0400
committerGravatar Srinivas Vasudevan <srvasude@google.com>2019-08-12 19:26:29 -0400
commit18ceb3413d09afc4f143014f89552f941321209b (patch)
tree9396cacc0bd0e528219ea16a4c2e17e1aaa627f9
parentd55d392e7b1136655b4223bea8e99cb2fe0a8afd (diff)
Add ndtri function, the inverse of the normal distribution function.
-rw-r--r--Eigen/src/Core/GenericPacketMath.h21
-rw-r--r--Eigen/src/Core/GlobalFunctions.h1
-rw-r--r--Eigen/src/Core/arch/AVX/PacketMath.h1
-rw-r--r--Eigen/src/Core/arch/AVX512/PacketMath.h3
-rw-r--r--Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h55
-rw-r--r--Eigen/src/Core/arch/GPU/PacketMath.h2
-rwxr-xr-xEigen/src/Core/arch/SSE/PacketMath.h3
-rw-r--r--Eigen/src/Core/arch/SYCL/InteropHeaders.h1
-rw-r--r--Eigen/src/Core/util/ForwardDeclarations.h1
-rw-r--r--Eigen/src/plugins/ArrayCwiseUnaryOps.h28
-rw-r--r--test/packetmath.cpp6
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBase.h6
-rw-r--r--unsupported/Eigen/SpecialFunctions1
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h25
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h3
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h311
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h7
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/arch/GPU/GpuSpecialFunctions.h13
-rw-r--r--unsupported/test/cxx11_tensor_gpu.cu64
-rw-r--r--unsupported/test/special_functions.cpp20
20 files changed, 502 insertions, 70 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 6ab38994f..651e3f7b3 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -83,6 +83,7 @@ struct default_packet_traits
HasPolygamma = 0,
HasErf = 0,
HasErfc = 0,
+ HasNdtri = 0,
HasI0e = 0,
HasI1e = 0,
HasIGamma = 0,
@@ -257,12 +258,32 @@ pldexp(const Packet &a, const Packet &exponent) { return std::ldexp(a,exponent);
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pzero(const Packet& a) { return pxor(a,a); }
+template<> EIGEN_DEVICE_FUNC inline float pzero<float>(const float& a) {
+ EIGEN_UNUSED_VARIABLE(a);
+ return 0.;
+}
+
+template<> EIGEN_DEVICE_FUNC inline double pzero<double>(const double& a) {
+ EIGEN_UNUSED_VARIABLE(a);
+ return 0.;
+}
+
/** \internal \returns bits of \a or \b according to the input bit mask \a mask */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pselect(const Packet& mask, const Packet& a, const Packet& b) {
return por(pand(a,mask),pandnot(b,mask));
}
+template<> EIGEN_DEVICE_FUNC inline float pselect<float>(
+ const float& mask, const float& a, const float&b) {
+ return mask == 0 ? b : a;
+}
+
+template<> EIGEN_DEVICE_FUNC inline double pselect<double>(
+ const double& mask, const double& a, const double& b) {
+ return mask == 0 ? b : a;
+}
+
/** \internal \returns a <= b as a bit mask */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pcmp_le(const Packet& a, const Packet& b) { return a<=b ? ptrue(a) : pzero(a); }
diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h
index 71377cee5..7f132bdd0 100644
--- a/Eigen/src/Core/GlobalFunctions.h
+++ b/Eigen/src/Core/GlobalFunctions.h
@@ -76,6 +76,7 @@ namespace Eigen
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op,derivative of lgamma,\sa ArrayBase::digamma)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op,error function,\sa ArrayBase::erf)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op,complement error function,\sa ArrayBase::erfc)
+ EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ndtri,scalar_ndtri_op,inverse normal distribution function,\sa ArrayBase::ndtri)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op,exponential,\sa ArrayBase::exp)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(expm1,scalar_expm1_op,exponential of a value minus 1,\sa ArrayBase::expm1)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op,natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log)
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index 7ee9dee10..9feb96f8b 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -66,6 +66,7 @@ template<> struct packet_traits<float> : default_packet_traits
HasCos = EIGEN_FAST_MATH,
HasLog = 1,
HasExp = 1,
+ HasNdtri = 1,
HasSqrt = 1,
HasRsqrt = 1,
HasTanh = EIGEN_FAST_MATH,
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index 383c49636..1312829d8 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -60,6 +60,9 @@ template<> struct packet_traits<float> : default_packet_traits
#if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT)
#ifdef EIGEN_VECTORIZE_AVX512DQ
HasLog = 1,
+ HasLog1p = 1,
+ HasExpm1 = 1,
+ HasNdtri = 1,
#endif
HasExp = 1,
HasSqrt = EIGEN_FAST_MATH,
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index 43e827638..b021bd0b7 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@@ -467,5 +467,60 @@ Packet pcos_float(const Packet& x)
return psincos_float<false>(x);
}
+/* polevl (modified for Eigen)
+ *
+ * Evaluate polynomial
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * int N;
+ * Scalar x, y, coef[N+1];
+ *
+ * y = polevl<decltype(x), N>( x, coef);
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Evaluates polynomial of degree N:
+ *
+ * 2 N
+ * y = C + C x + C x +...+ C x
+ * 0 1 2 N
+ *
+ * Coefficients are stored in reverse order:
+ *
+ * coef[0] = C , ..., coef[N] = C .
+ * N 0
+ *
+ * The function p1evl() assumes that coef[N] = 1.0 and is
+ * omitted from the array. Its calling arguments are
+ * otherwise the same as polevl().
+ *
+ *
+ * The Eigen implementation is templatized. For best speed, store
+ * coef as a const array (constexpr), e.g.
+ *
+ * const double coef[] = {1.0, 2.0, 3.0, ...};
+ *
+ */
+template <typename Packet, int N>
+struct ppolevl {
+ static EIGEN_STRONG_INLINE Packet run(const Packet& x, const typename unpacket_traits<Packet>::type coeff[]) {
+ EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ return pmadd(ppolevl<Packet, N-1>::run(x, coeff), x, pset1<Packet>(coeff[N]));
+ }
+};
+
+template <typename Packet>
+struct ppolevl<Packet, 0> {
+ static EIGEN_STRONG_INLINE Packet run(const Packet& x, const typename unpacket_traits<Packet>::type coeff[]) {
+ EIGEN_UNUSED_VARIABLE(x);
+ return pset1<Packet>(coeff[0]);
+ }
+};
+
} // end namespace internal
} // end namespace Eigen
diff --git a/Eigen/src/Core/arch/GPU/PacketMath.h b/Eigen/src/Core/arch/GPU/PacketMath.h
index 5084fc786..6ba2990d1 100644
--- a/Eigen/src/Core/arch/GPU/PacketMath.h
+++ b/Eigen/src/Core/arch/GPU/PacketMath.h
@@ -44,6 +44,7 @@ template<> struct packet_traits<float> : default_packet_traits
HasPolygamma = 1,
HasErf = 1,
HasErfc = 1,
+ HasNdtri = 1,
HasI0e = 1,
HasI1e = 1,
HasIGamma = 1,
@@ -78,6 +79,7 @@ template<> struct packet_traits<double> : default_packet_traits
HasPolygamma = 1,
HasErf = 1,
HasErfc = 1,
+ HasNdtri = 1,
HasI0e = 1,
HasI1e = 1,
HasIGamma = 1,
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 0d571ce61..69daea8f7 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -111,6 +111,9 @@ template<> struct packet_traits<float> : default_packet_traits
HasCos = EIGEN_FAST_MATH,
HasLog = 1,
HasExp = 1,
+ HasLog1p = 1,
+ HasExpm1 = 1,
+ HasNdtri = 1,
HasSqrt = 1,
HasRsqrt = 1,
HasTanh = EIGEN_FAST_MATH,
diff --git a/Eigen/src/Core/arch/SYCL/InteropHeaders.h b/Eigen/src/Core/arch/SYCL/InteropHeaders.h
index ef66fc7de..d76030419 100644
--- a/Eigen/src/Core/arch/SYCL/InteropHeaders.h
+++ b/Eigen/src/Core/arch/SYCL/InteropHeaders.h
@@ -54,6 +54,7 @@ struct sycl_packet_traits : default_packet_traits {
HasPolygamma = 0,
HasErf = 0,
HasErfc = 0,
+ HasNdtri = 0,
HasIGamma = 0,
HasIGammac = 0,
HasBetaInc = 0,
diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h
index 050d15e96..18889fcf4 100644
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@@ -214,6 +214,7 @@ template<typename Scalar> struct scalar_lgamma_op;
template<typename Scalar> struct scalar_digamma_op;
template<typename Scalar> struct scalar_erf_op;
template<typename Scalar> struct scalar_erfc_op;
+template<typename Scalar> struct scalar_ndtri_op;
template<typename Scalar> struct scalar_igamma_op;
template<typename Scalar> struct scalar_igammac_op;
template<typename Scalar> struct scalar_zeta_op;
diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h
index 2f99ee0b2..4aef72d92 100644
--- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h
+++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h
@@ -536,14 +536,12 @@ typedef CwiseUnaryOp<internal::scalar_lgamma_op<Scalar>, const Derived> LgammaRe
typedef CwiseUnaryOp<internal::scalar_digamma_op<Scalar>, const Derived> DigammaReturnType;
typedef CwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived> ErfReturnType;
typedef CwiseUnaryOp<internal::scalar_erfc_op<Scalar>, const Derived> ErfcReturnType;
+typedef CwiseUnaryOp<internal::scalar_ndtri_op<Scalar>, const Derived> NdtriReturnType;
/** \cpp11 \returns an expression of the coefficient-wise ln(|gamma(*this)|).
*
* \specialfunctions_module
*
- * Example: \include Cwise_lgamma.cpp
- * Output: \verbinclude Cwise_lgamma.out
- *
* \note This function supports only float and double scalar types in c++11 mode. To support other scalar types,
* or float/double in non c++11 mode, the user has to provide implementations of lgamma(T) for any scalar
* type T to be supported.
@@ -579,9 +577,6 @@ digamma() const
*
* \specialfunctions_module
*
- * Example: \include Cwise_erf.cpp
- * Output: \verbinclude Cwise_erf.out
- *
* \note This function supports only float and double scalar types in c++11 mode. To support other scalar types,
* or float/double in non c++11 mode, the user has to provide implementations of erf(T) for any scalar
* type T to be supported.
@@ -600,9 +595,6 @@ erf() const
*
* \specialfunctions_module
*
- * Example: \include Cwise_erfc.cpp
- * Output: \verbinclude Cwise_erfc.out
- *
* \note This function supports only float and double scalar types in c++11 mode. To support other scalar types,
* or float/double in non c++11 mode, the user has to provide implementations of erfc(T) for any scalar
* type T to be supported.
@@ -615,3 +607,21 @@ erfc() const
{
return ErfcReturnType(derived());
}
+
+/** \cpp11 \returns an expression of the coefficient-wise Complementary error
+ * function of *this.
+ *
+ * \specialfunctions_module
+ *
+ * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types,
+ * or float/double in non c++11 mode, the user has to provide implementations of ndtri(T) for any scalar
+ * type T to be supported.
+ *
+ * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_ndtri">Math functions</a>, erf()
+ */
+EIGEN_DEVICE_FUNC
+inline const NdtriReturnType
+ndtri() const
+{
+ return NdtriReturnType(derived());
+}
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index f1448f335..d65869de7 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -590,6 +590,12 @@ template<typename Scalar,typename Packet> void packetmath_real()
h.store(data2, internal::perfc(h.load(data1)));
VERIFY((numext::isnan)(data2[0]));
}
+ {
+ for (int i=0; i<size; ++i) {
+ data1[i] = internal::random<Scalar>(0,1);
+ }
+ CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasNdtri, numext::ndtri, internal::pndtri);
+ }
#endif // EIGEN_HAS_C99_MATH
for (int i=0; i<size; ++i)
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
index dbacf494e..dbcfa4760 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
@@ -202,6 +202,12 @@ class TensorBase<Derived, ReadOnlyAccessors>
}
EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_ndtri_op<Scalar>, const Derived>
+ ndtri() const {
+ return unaryExpr(internal::scalar_ndtri_op<Scalar>());
+ }
+
+ EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_logistic_op<Scalar>, const Derived>
sigmoid() const {
return unaryExpr(internal::scalar_logistic_op<Scalar>());
diff --git a/unsupported/Eigen/SpecialFunctions b/unsupported/Eigen/SpecialFunctions
index 44fd99b43..a5abd407d 100644
--- a/unsupported/Eigen/SpecialFunctions
+++ b/unsupported/Eigen/SpecialFunctions
@@ -33,6 +33,7 @@ namespace Eigen {
* - gamma_sample_der_alpha
* - igammac
* - digamma
+ * - ndtri
* - polygamma
* - zeta
* - betainc
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h
index c6fac91bb..13a72a3ee 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h
@@ -284,6 +284,31 @@ struct functor_traits<scalar_erfc_op<Scalar> >
};
/** \internal
+ * \brief Template functor to compute the Inverse of the normal distribution
+ * function of a scalar
+ * \sa class CwiseUnaryOp, Cwise::ndtri()
+ */
+template<typename Scalar> struct scalar_ndtri_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_ndtri_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const {
+ using numext::ndtri; return ndtri(a);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const { return internal::pndtri(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_ndtri_op<Scalar> >
+{
+ enum {
+ // On average, We are evaluating rational functions with degree N=9 in the
+ // numerator and denominator. This results in 2*N additions and 2*N
+ // multiplications.
+ Cost = 18 * NumTraits<Scalar>::MulCost + 18 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasNdtri
+ };
+};
+
+/** \internal
* \brief Template functor to compute the exponentially scaled modified Bessel
* function of order zero
* \sa class CwiseUnaryOp, Cwise::i0e()
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h
index fbdfd299e..538db2afa 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h
@@ -30,6 +30,9 @@ template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erf(const Eigen::ha
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erfc(const Eigen::half& a) {
return Eigen::half(Eigen::numext::erfc(static_cast<float>(a)));
}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half ndtri(const Eigen::half& a) {
+ return Eigen::half(Eigen::numext::ndtri(static_cast<float>(a)));
+}
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igamma(const Eigen::half& a, const Eigen::half& x) {
return Eigen::half(Eigen::numext::igamma(static_cast<float>(a), static_cast<float>(x)));
}
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
index 1464a9751..78050d0a1 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
@@ -38,63 +38,6 @@ namespace internal {
namespace cephes {
-/* polevl (modified for Eigen)
- *
- * Evaluate polynomial
- *
- *
- *
- * SYNOPSIS:
- *
- * int N;
- * Scalar x, y, coef[N+1];
- *
- * y = polevl<decltype(x), N>( x, coef);
- *
- *
- *
- * DESCRIPTION:
- *
- * Evaluates polynomial of degree N:
- *
- * 2 N
- * y = C + C x + C x +...+ C x
- * 0 1 2 N
- *
- * Coefficients are stored in reverse order:
- *
- * coef[0] = C , ..., coef[N] = C .
- * N 0
- *
- * The function p1evl() assumes that coef[N] = 1.0 and is
- * omitted from the array. Its calling arguments are
- * otherwise the same as polevl().
- *
- *
- * The Eigen implementation is templatized. For best speed, store
- * coef as a const array (constexpr), e.g.
- *
- * const double coef[] = {1.0, 2.0, 3.0, ...};
- *
- */
-template <typename Scalar, int N>
-struct polevl {
- EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE Scalar run(const Scalar x, const Scalar coef[]) {
- EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
-
- return polevl<Scalar, N - 1>::run(x, coef) * x + coef[N];
- }
-};
-
-template <typename Scalar>
-struct polevl<Scalar, 0> {
- EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE Scalar run(const Scalar, const Scalar coef[]) {
- return coef[0];
- }
-};
-
/* chbevl (modified for Eigen)
*
* Evaluate Chebyshev series
@@ -190,7 +133,7 @@ template <>
struct lgamma_impl<float> {
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE float run(float x) {
-#if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__)
+#if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__)
int dummy;
return ::lgammaf_r(x, &dummy);
#elif defined(SYCL_DEVICE_ONLY)
@@ -205,7 +148,7 @@ template <>
struct lgamma_impl<double> {
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE double run(double x) {
-#if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__)
+#if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__)
int dummy;
return ::lgamma_r(x, &dummy);
#elif defined(SYCL_DEVICE_ONLY)
@@ -264,7 +207,7 @@ struct digamma_impl_maybe_poly<float> {
float z;
if (s < 1.0e8f) {
z = 1.0f / (s * s);
- return z * cephes::polevl<float, 3>::run(z, A);
+ return z * internal::ppolevl<float, 3>::run(z, A);
} else return 0.0f;
}
};
@@ -286,7 +229,7 @@ struct digamma_impl_maybe_poly<double> {
double z;
if (s < 1.0e17) {
z = 1.0 / (s * s);
- return z * cephes::polevl<double, 6>::run(z, A);
+ return z * internal::ppolevl<double, 6>::run(z, A);
}
else return 0.0;
}
@@ -494,6 +437,246 @@ struct erfc_impl<double> {
};
#endif // EIGEN_HAS_C99_MATH
+
+/***************************************************************************
+* Implementation of ndtri. *
+****************************************************************************/
+
+/* Inverse of Normal distribution function (modified for Eigen).
+ *
+ *
+ * SYNOPSIS:
+ *
+ * double x, y, ndtri();
+ *
+ * x = ndtri( y );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns the argument, x, for which the area under the
+ * Gaussian probability density function (integrated from
+ * minus infinity to x) is equal to y.
+ *
+ *
+ * For small arguments 0 < y < exp(-2), the program computes
+ * z = sqrt( -2.0 * log(y) ); then the approximation is
+ * x = z - log(z)/z - (1/z) P(1/z) / Q(1/z).
+ * There are two rational functions P/Q, one for 0 < y < exp(-32)
+ * and the other for y up to exp(-2). For larger arguments,
+ * w = y - 0.5, and x/sqrt(2pi) = w + w**3 R(w**2)/S(w**2)).
+ *
+ *
+ * ACCURACY:
+ *
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * DEC 0.125, 1 5500 9.5e-17 2.1e-17
+ * DEC 6e-39, 0.135 3500 5.7e-17 1.3e-17
+ * IEEE 0.125, 1 20000 7.2e-16 1.3e-16
+ * IEEE 3e-308, 0.135 50000 4.6e-16 9.8e-17
+ *
+ *
+ * ERROR MESSAGES:
+ *
+ * message condition value returned
+ * ndtri domain x <= 0 -MAXNUM
+ * ndtri domain x >= 1 MAXNUM
+ *
+ */
+ /*
+ Cephes Math Library Release 2.2: June, 1992
+ Copyright 1985, 1987, 1992 by Stephen L. Moshier
+ Direct inquiries to 30 Frost Street, Cambridge, MA 02140
+ */
+
+
+// TODO: Add a cheaper approximation for float.
+
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T flipsign(
+ const T& should_flipsign, const T& x) {
+ const T sign_mask = pset1<T>(-0.0);
+ T sign_bit = pand<T>(should_flipsign, sign_mask);
+ return pxor<T>(sign_bit, x);
+}
+
+template<>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double flipsign<double>(
+ const double& should_flipsign, const double& x) {
+ return should_flipsign == 0 ? x : -x;
+}
+
+template<>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float flipsign<float>(
+ const float& should_flipsign, const float& x) {
+ return should_flipsign == 0 ? x : -x;
+}
+
+// We split this computation in to two so that in the scalar path
+// only one branch is evaluated (due to our template specialization of pselect
+// being an if statement.)
+
+template <typename T, typename ScalarType>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_ndtri_gt_exp_neg_two(const T& b) {
+ const ScalarType p0[] = {
+ ScalarType(-5.99633501014107895267e1),
+ ScalarType(9.80010754185999661536e1),
+ ScalarType(-5.66762857469070293439e1),
+ ScalarType(1.39312609387279679503e1),
+ ScalarType(-1.23916583867381258016e0)
+ };
+ const ScalarType q0[] = {
+ ScalarType(1.0),
+ ScalarType(1.95448858338141759834e0),
+ ScalarType(4.67627912898881538453e0),
+ ScalarType(8.63602421390890590575e1),
+ ScalarType(-2.25462687854119370527e2),
+ ScalarType(2.00260212380060660359e2),
+ ScalarType(-8.20372256168333339912e1),
+ ScalarType(1.59056225126211695515e1),
+ ScalarType(-1.18331621121330003142e0)
+ };
+ const T sqrt2pi = pset1<T>(ScalarType(2.50662827463100050242e0));
+ const T half = pset1<T>(ScalarType(0.5));
+ T c, c2, ndtri_gt_exp_neg_two;
+
+ c = psub(b, half);
+ c2 = pmul(c, c);
+ ndtri_gt_exp_neg_two = pmadd(c, pmul(
+ c2, pdiv(
+ internal::ppolevl<T, 4>::run(c2, p0),
+ internal::ppolevl<T, 8>::run(c2, q0))), c);
+ return pmul(ndtri_gt_exp_neg_two, sqrt2pi);
+}
+
+template <typename T, typename ScalarType>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_ndtri_lt_exp_neg_two(
+ const T& b, const T& should_flipsign) {
+ /* Approximation for interval z = sqrt(-2 log a ) between 2 and 8
+ * i.e., a between exp(-2) = .135 and exp(-32) = 1.27e-14.
+ */
+ const ScalarType p1[] = {
+ ScalarType(4.05544892305962419923e0),
+ ScalarType(3.15251094599893866154e1),
+ ScalarType(5.71628192246421288162e1),
+ ScalarType(4.40805073893200834700e1),
+ ScalarType(1.46849561928858024014e1),
+ ScalarType(2.18663306850790267539e0),
+ ScalarType(-1.40256079171354495875e-1),
+ ScalarType(-3.50424626827848203418e-2),
+ ScalarType(-8.57456785154685413611e-4)
+ };
+ const ScalarType q1[] = {
+ ScalarType(1.0),
+ ScalarType(1.57799883256466749731e1),
+ ScalarType(4.53907635128879210584e1),
+ ScalarType(4.13172038254672030440e1),
+ ScalarType(1.50425385692907503408e1),
+ ScalarType(2.50464946208309415979e0),
+ ScalarType(-1.42182922854787788574e-1),
+ ScalarType(-3.80806407691578277194e-2),
+ ScalarType(-9.33259480895457427372e-4)
+ };
+ /* Approximation for interval z = sqrt(-2 log a ) between 8 and 64
+ * i.e., a between exp(-32) = 1.27e-14 and exp(-2048) = 3.67e-890.
+ */
+ const ScalarType p2[] = {
+ ScalarType(3.23774891776946035970e0),
+ ScalarType(6.91522889068984211695e0),
+ ScalarType(3.93881025292474443415e0),
+ ScalarType(1.33303460815807542389e0),
+ ScalarType(2.01485389549179081538e-1),
+ ScalarType(1.23716634817820021358e-2),
+ ScalarType(3.01581553508235416007e-4),
+ ScalarType(2.65806974686737550832e-6),
+ ScalarType(6.23974539184983293730e-9)
+ };
+ const ScalarType q2[] = {
+ ScalarType(1.0),
+ ScalarType(6.02427039364742014255e0),
+ ScalarType(3.67983563856160859403e0),
+ ScalarType(1.37702099489081330271e0),
+ ScalarType(2.16236993594496635890e-1),
+ ScalarType(1.34204006088543189037e-2),
+ ScalarType(3.28014464682127739104e-4),
+ ScalarType(2.89247864745380683936e-6),
+ ScalarType(6.79019408009981274425e-9)
+ };
+ const T eight = pset1<T>(ScalarType(8.0));
+ const T one = pset1<T>(ScalarType(1));
+ const T neg_two = pset1<T>(ScalarType(-2));
+ T x, x0, x1, z;
+
+ x = psqrt(pmul(neg_two, plog(b)));
+ x0 = psub(x, pdiv(plog(x), x));
+ z = one / x;
+ x1 = pmul(
+ z, pselect(
+ pcmp_lt(x, eight),
+ pdiv(internal::ppolevl<T, 8>::run(z, p1),
+ internal::ppolevl<T, 8>::run(z, q1)),
+ pdiv(internal::ppolevl<T, 8>::run(z, p2),
+ internal::ppolevl<T, 8>::run(z, q2))));
+ return flipsign(should_flipsign, psub(x0, x1));
+}
+
+template <typename T, typename ScalarType>
+T generic_ndtri(const T& a) {
+ const T maxnum = pset1<T>(NumTraits<ScalarType>::infinity());
+ const T neg_maxnum = pset1<T>(-NumTraits<ScalarType>::infinity());
+
+ const T zero = pset1<T>(ScalarType(0));
+ const T one = pset1<T>(ScalarType(1));
+ // exp(-2)
+ const T exp_neg_two = pset1<T>(ScalarType(0.13533528323661269189));
+ T b, ndtri, should_flipsign;
+
+ should_flipsign = pcmp_le(a, psub(one, exp_neg_two));
+ b = pselect(should_flipsign, a, psub(one, a));
+
+ ndtri = pselect(
+ pcmp_lt(exp_neg_two, b),
+ generic_ndtri_gt_exp_neg_two<T, ScalarType>(b),
+ generic_ndtri_lt_exp_neg_two<T, ScalarType>(b, should_flipsign));
+
+ return pselect(
+ pcmp_le(a, zero), neg_maxnum,
+ pselect(pcmp_le(one, a), maxnum, ndtri));
+}
+
+template <typename Scalar>
+struct ndtri_retval {
+ typedef Scalar type;
+};
+
+#if !EIGEN_HAS_C99_MATH
+
+template <typename Scalar>
+struct ndtri_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return Scalar(0);
+ }
+};
+
+# else
+
+template <typename Scalar>
+struct ndtri_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Scalar x) {
+ return generic_ndtri<Scalar, Scalar>(x);
+ }
+};
+
+#endif // EIGEN_HAS_C99_MATH
+
+
/**************************************************************************************************************
* Implementation of igammac (complemented incomplete gamma integral), based on Cephes but requires C++11/C99 *
**************************************************************************************************************/
@@ -2121,6 +2304,12 @@ EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar)
}
template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(ndtri, Scalar)
+ ndtri(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(ndtri, Scalar)::run(x);
+}
+
+template <typename Scalar>
EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igamma, Scalar)
igamma(const Scalar& a, const Scalar& x) {
return EIGEN_MATHFUNC_IMPL(igamma, Scalar)::run(a, x);
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h
index 465f41d54..b6323c4db 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h
@@ -38,6 +38,13 @@ Packet perf(const Packet& a) { using numext::erf; return erf(a); }
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); }
+/** \internal \returns the ndtri(\a a) (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pndtri(const Packet& a) {
+ typedef typename unpacket_traits<Packet>::type ScalarType;
+ using internal::generic_ndtri; return generic_ndtri<Packet, ScalarType>(a);
+}
+
/** \internal \returns the incomplete gamma function igamma(\a a, \a x) */
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Packet pigamma(const Packet& a, const Packet& x) { using numext::igamma; return igamma(a, x); }
diff --git a/unsupported/Eigen/src/SpecialFunctions/arch/GPU/GpuSpecialFunctions.h b/unsupported/Eigen/src/SpecialFunctions/arch/GPU/GpuSpecialFunctions.h
index 40abcee3a..c831edc17 100644
--- a/unsupported/Eigen/src/SpecialFunctions/arch/GPU/GpuSpecialFunctions.h
+++ b/unsupported/Eigen/src/SpecialFunctions/arch/GPU/GpuSpecialFunctions.h
@@ -101,6 +101,19 @@ double2 perfc<double2>(const double2& a)
return make_double2(erfc(a.x), erfc(a.y));
}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 pndtri<float4>(const float4& a)
+{
+ using numext::ndtri;
+ return make_float4(ndtri(a.x), ndtri(a.y), ndtri(a.z), ndtri(a.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 pndtri<double2>(const double2& a)
+{
+ using numext::ndtri;
+ return make_double2(ndtri(a.x), ndtri(a.y));
+}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 pigamma<float4>(const float4& a, const float4& x)
diff --git a/unsupported/test/cxx11_tensor_gpu.cu b/unsupported/test/cxx11_tensor_gpu.cu
index 94625e6a3..0a2fa8e61 100644
--- a/unsupported/test/cxx11_tensor_gpu.cu
+++ b/unsupported/test/cxx11_tensor_gpu.cu
@@ -1069,6 +1069,66 @@ void test_gpu_erfc(const Scalar stddev)
gpuFree(d_out);
}
#endif
+template <typename Scalar>
+void test_gpu_ndtri()
+{
+ Tensor<Scalar, 1> in_x(8);
+ Tensor<Scalar, 1> out(8);
+ Tensor<Scalar, 1> expected_out(8);
+ out.setZero();
+
+ in_x(0) = Scalar(1);
+ in_x(1) = Scalar(0.);
+ in_x(2) = Scalar(0.5);
+ in_x(3) = Scalar(0.2);
+ in_x(4) = Scalar(0.8);
+ in_x(5) = Scalar(0.9);
+ in_x(6) = Scalar(0.1);
+ in_x(7) = Scalar(0.99);
+ in_x(8) = Scalar(0.01);
+
+ expected_out(0) = std::numeric_limits<Scalar>::infinity();
+ expected_out(1) = -std::numeric_limits<Scalar>::infinity();
+ expected_out(2) = Scalar(0.0);
+ expected_out(3) = Scalar(-0.8416212335729142);
+ expected_out(4) = Scalar(0.8416212335729142);j
+ expected_out(5) = Scalar(1.2815515655446004);
+ expected_out(6) = Scalar(-1.2815515655446004);
+ expected_out(7) = Scalar(2.3263478740408408);
+ expected_out(8) = Scalar(-2.3263478740408408);
+
+ std::size_t bytes = in_x.size() * sizeof(Scalar);
+
+ Scalar* d_in_x;
+ Scalar* d_out;
+ gpuMalloc((void**)(&d_in_x), bytes);
+ gpuMalloc((void**)(&d_out), bytes);
+
+ gpuMemcpy(d_in_x, in_x.data(), bytes, gpuMemcpyHostToDevice);
+
+ Eigen::GpuStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_x(d_in_x, 6);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 6);
+
+ gpu_out.device(gpu_device) = gpu_in_x.ndtri();
+
+ assert(gpuMemcpyAsync(out.data(), d_out, bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
+ assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
+
+ VERIFY_IS_EQUAL(out(0), expected_out(0));
+ VERIFY((std::isnan)(out(3)));
+
+ for (int i = 1; i < 6; ++i) {
+ if (i != 3) {
+ VERIFY_IS_APPROX(out(i), expected_out(i));
+ }
+ }
+
+ gpuFree(d_in_x);
+ gpuFree(d_out);
+}
template <typename Scalar>
void test_gpu_betainc()
@@ -1538,6 +1598,10 @@ EIGEN_DECLARE_TEST(cxx11_tensor_gpu)
#if !defined(EIGEN_USE_HIP)
// disable these tests on HIP for now.
+
+ CALL_SUBTEST_5(test_gpu_ndtri<float>());
+ CALL_SUBTEST_5(test_gpu_ndtri<double>());
+
CALL_SUBTEST_5(test_gpu_digamma<float>());
CALL_SUBTEST_5(test_gpu_digamma<double>());
diff --git a/unsupported/test/special_functions.cpp b/unsupported/test/special_functions.cpp
index 50dae6c93..140a5e4c1 100644
--- a/unsupported/test/special_functions.cpp
+++ b/unsupported/test/special_functions.cpp
@@ -133,6 +133,26 @@ template<typename ArrayType> void array_special_functions()
}
#endif // EIGEN_HAS_C99_MATH
+ // Check the ndtri function against scipy.special.ndtri
+ {
+ ArrayType x(7), res(7), ref(7);
+ x << 0.5, 0.2, 0.8, 0.9, 0.1, 0.99, 0.01;
+ ref << 0., -0.8416212335729142, 0.8416212335729142, 1.2815515655446004, -1.2815515655446004, 2.3263478740408408, -2.3263478740408408;
+ CALL_SUBTEST( verify_component_wise(ref, ref); );
+ CALL_SUBTEST( res = x.ndtri(); verify_component_wise(res, ref); );
+ CALL_SUBTEST( res = ndtri(x); verify_component_wise(res, ref); );
+
+ // ndtri(normal_cdf(x)) ~= x
+ CALL_SUBTEST(
+ ArrayType m1 = ArrayType::Random(32);
+ using std::sqrt;
+
+ ArrayType cdf_val = (m1 / sqrt(2.)).erf();
+ cdf_val = (cdf_val + 1.) / 2.;
+ verify_component_wise(cdf_val.ndtri(), m1););
+
+ }
+
// Check the zeta function against scipy.special.zeta
{
ArrayType x(7), q(7), res(7), ref(7);