diff options
author | 2018-06-05 17:05:21 +0000 | |
---|---|---|
committer | 2018-06-05 17:05:21 +0000 | |
commit | e206f8d4a401fe2060bada4d4b5d92e3bf3b561c (patch) | |
tree | c5613a06e00edbb6a5c804ee39858911b87ef1e6 | |
parent | e2ed0cf8abfe275bd147f3228e4c59f5242993f6 (diff) | |
parent | f216854453887f31ac02ffefb7a7a569dc3fa54d (diff) |
Merged in mfigurnov/eigen (pull request PR-400)
Exponentially scaled modified Bessel functions of order zero and one.
Approved-by: Benoit Steiner <benoit.steiner.goog@gmail.com>
12 files changed, 731 insertions, 0 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 7979c3aff..888a3f7ea 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -82,6 +82,8 @@ struct default_packet_traits HasPolygamma = 0, HasErf = 0, HasErfc = 0, + HasI0e = 0, + HasI1e = 0, HasIGamma = 0, HasIGammac = 0, HasBetaInc = 0, diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h index 97a8abe59..704a4e0d9 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -44,6 +44,8 @@ template<> struct packet_traits<float> : default_packet_traits HasPolygamma = 1, HasErf = 1, HasErfc = 1, + HasI0e = 1, + HasI1e = 1, HasIGamma = 1, HasIGammac = 1, HasBetaInc = 1, @@ -73,6 +75,8 @@ template<> struct packet_traits<double> : default_packet_traits HasPolygamma = 1, HasErf = 1, HasErfc = 1, + HasI0e = 1, + HasI1e = 1, HasIGamma = 1, HasIGammac = 1, HasBetaInc = 1, diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 745b16d2c..a942c98dd 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -133,6 +133,18 @@ class TensorBase<Derived, ReadOnlyAccessors> return unaryExpr(internal::scalar_digamma_op<Scalar>()); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_i0e_op<Scalar>, const Derived> + i0e() const { + return unaryExpr(internal::scalar_i0e_op<Scalar>()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_i1e_op<Scalar>, const Derived> + i1e() const { + return unaryExpr(internal::scalar_i1e_op<Scalar>()); + } + // igamma(a = this, x = other) template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_igamma_op<Scalar>, const Derived, const OtherDerived> diff --git a/unsupported/Eigen/SpecialFunctions b/unsupported/Eigen/SpecialFunctions index a2ad4925e..482ec6e6f 100644 --- a/unsupported/Eigen/SpecialFunctions +++ b/unsupported/Eigen/SpecialFunctions @@ -34,6 +34,8 @@ namespace Eigen { * - polygamma * - zeta * - betainc + * - i0e + * - i1e * * \code * #include <unsupported/Eigen/SpecialFunctions> diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h index ed415db99..b7a9d035b 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h @@ -119,6 +119,52 @@ zeta(const Eigen::ArrayBase<DerivedX>& x, const Eigen::ArrayBase<DerivedQ>& q) ); } +/** \returns an expression of the coefficient-wise i0e(\a x) to the given + * arrays. + * + * It returns the exponentially scaled modified Bessel + * function of order zero. + * + * \param x is the argument + * + * \note This function supports only float and double scalar types. To support + * other scalar types, the user has to provide implementations of i0e(T) for + * any scalar type T to be supported. + * + * \sa ArrayBase::i0e() + */ +template <typename Derived> +inline const Eigen::CwiseUnaryOp< + Eigen::internal::scalar_i0e_op<typename Derived::Scalar>, const Derived> +i0e(const Eigen::ArrayBase<Derived>& x) { + return Eigen::CwiseUnaryOp< + Eigen::internal::scalar_i0e_op<typename Derived::Scalar>, + const Derived>(x.derived()); +} + +/** \returns an expression of the coefficient-wise i1e(\a x) to the given + * arrays. + * + * It returns the exponentially scaled modified Bessel + * function of order one. + * + * \param x is the argument + * + * \note This function supports only float and double scalar types. To support + * other scalar types, the user has to provide implementations of i1e(T) for + * any scalar type T to be supported. + * + * \sa ArrayBase::i1e() + */ +template <typename Derived> +inline const Eigen::CwiseUnaryOp< + Eigen::internal::scalar_i1e_op<typename Derived::Scalar>, const Derived> +i1e(const Eigen::ArrayBase<Derived>& x) { + return Eigen::CwiseUnaryOp< + Eigen::internal::scalar_i1e_op<typename Derived::Scalar>, + const Derived>(x.derived()); +} + } // end namespace Eigen #endif // EIGEN_SPECIALFUNCTIONS_ARRAYAPI_H diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h index d8f2363be..8420f0174 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h @@ -229,6 +229,60 @@ struct functor_traits<scalar_erfc_op<Scalar> > }; }; +/** \internal + * \brief Template functor to compute the exponentially scaled modified Bessel + * function of order zero + * \sa class CwiseUnaryOp, Cwise::i0e() + */ +template <typename Scalar> +struct scalar_i0e_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_i0e_op) + EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& x) const { + using numext::i0e; + return i0e(x); + } + typedef typename packet_traits<Scalar>::type Packet; + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const { + return internal::pi0e(x); + } +}; +template <typename Scalar> +struct functor_traits<scalar_i0e_op<Scalar> > { + enum { + // On average, a Chebyshev polynomial of order N=20 is computed. + // The cost is N multiplications and 2N additions. + Cost = 20 * NumTraits<Scalar>::MulCost + 40 * NumTraits<Scalar>::AddCost, + PacketAccess = packet_traits<Scalar>::HasI0e + }; +}; + +/** \internal + * \brief Template functor to compute the exponentially scaled modified Bessel + * function of order zero + * \sa class CwiseUnaryOp, Cwise::i1e() + */ +template <typename Scalar> +struct scalar_i1e_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_i1e_op) + EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& x) const { + using numext::i1e; + return i1e(x); + } + typedef typename packet_traits<Scalar>::type Packet; + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const { + return internal::pi1e(x); + } +}; +template <typename Scalar> +struct functor_traits<scalar_i1e_op<Scalar> > { + enum { + // On average, a Chebyshev polynomial of order N=20 is computed. + // The cost is N multiplications and 2N additions. + Cost = 20 * NumTraits<Scalar>::MulCost + 40 * NumTraits<Scalar>::AddCost, + PacketAccess = packet_traits<Scalar>::HasI1e + }; +}; + } // end namespace internal } // end namespace Eigen diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h index 553bcda6a..c5867002e 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h @@ -39,6 +39,14 @@ template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igammac(const Eigen template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half betainc(const Eigen::half& a, const Eigen::half& b, const Eigen::half& x) { return Eigen::half(Eigen::numext::betainc(static_cast<float>(a), static_cast<float>(b), static_cast<float>(x))); } +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half i0e(const Eigen::half& x) { + return Eigen::half(Eigen::numext::i0e(static_cast<float>(x))); +} +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half i1e(const Eigen::half& x) { + return Eigen::half(Eigen::numext::i1e(static_cast<float>(x))); +} #endif } // end namespace numext diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h index 97b2e5a91..6c7ac3f3b 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h @@ -95,6 +95,75 @@ struct polevl<Scalar, 0> { } }; +/* chbevl (modified for Eigen) + * + * Evaluate Chebyshev series + * + * + * + * SYNOPSIS: + * + * int N; + * Scalar x, y, coef[N], chebevl(); + * + * y = chbevl( x, coef, N ); + * + * + * + * DESCRIPTION: + * + * Evaluates the series + * + * N-1 + * - ' + * y = > coef[i] T (x/2) + * - i + * i=0 + * + * of Chebyshev polynomials Ti at argument x/2. + * + * Coefficients are stored in reverse order, i.e. the zero + * order term is last in the array. Note N is the number of + * coefficients, not the order. + * + * If coefficients are for the interval a to b, x must + * have been transformed to x -> 2(2x - b - a)/(b-a) before + * entering the routine. This maps x from (a, b) to (-1, 1), + * over which the Chebyshev polynomials are defined. + * + * If the coefficients are for the inverted interval, in + * which (a, b) is mapped to (1/b, 1/a), the transformation + * required is x -> 2(2ab/x - b - a)/(b-a). If b is infinity, + * this becomes x -> 4a/x - 1. + * + * + * + * SPEED: + * + * Taking advantage of the recurrence properties of the + * Chebyshev polynomials, the routine requires one more + * addition per loop than evaluating a nested polynomial of + * the same degree. + * + */ +template <typename Scalar, int N> +struct chebevl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(Scalar x, const Scalar coef[]) { + Scalar b0 = coef[0]; + Scalar b1 = 0; + Scalar b2; + + for (int i = 1; i < N; i++) { + b2 = b1; + b1 = b0; + b0 = x * b1 - b2 + coef[i]; + } + + return Scalar(0.5) * (b0 - b2); + } +}; + } // end namespace cephes /**************************************************************************** @@ -1505,6 +1574,334 @@ struct betainc_impl<double> { } }; +/**************************************************************************** + * Implementation of Bessel function, based on Cephes * + ****************************************************************************/ + +template <typename Scalar> +struct i0e_retval { + typedef Scalar type; +}; + +template <typename Scalar> +struct i0e_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar) { + EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +template <> +struct i0e_impl<float> { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE float run(float x) { + /* i0ef.c + * + * Modified Bessel function of order zero, + * exponentially scaled + * + * + * + * SYNOPSIS: + * + * float x, y, i0ef(); + * + * y = i0ef( x ); + * + * + * + * DESCRIPTION: + * + * Returns exponentially scaled modified Bessel function + * of order zero of the argument. + * + * The function is defined as i0e(x) = exp(-|x|) j0( ix ). + * + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0,30 100000 3.7e-7 7.0e-8 + * See i0f(). + * + */ + const float A[] = {-1.30002500998624804212E-8f, 6.04699502254191894932E-8f, + -2.67079385394061173391E-7f, 1.11738753912010371815E-6f, + -4.41673835845875056359E-6f, 1.64484480707288970893E-5f, + -5.75419501008210370398E-5f, 1.88502885095841655729E-4f, + -5.76375574538582365885E-4f, 1.63947561694133579842E-3f, + -4.32430999505057594430E-3f, 1.05464603945949983183E-2f, + -2.37374148058994688156E-2f, 4.93052842396707084878E-2f, + -9.49010970480476444210E-2f, 1.71620901522208775349E-1f, + -3.04682672343198398683E-1f, 6.76795274409476084995E-1f}; + + const float B[] = {3.39623202570838634515E-9f, 2.26666899049817806459E-8f, + 2.04891858946906374183E-7f, 2.89137052083475648297E-6f, + 6.88975834691682398426E-5f, 3.36911647825569408990E-3f, + 8.04490411014108831608E-1f}; + if (x < 0.0f) { + x = -x; + } + + if (x <= 8.0f) { + float y = 0.5f * x - 2.0f; + return cephes::chebevl<float, 18>::run(y, A); + } + + return cephes::chebevl<float, 7>::run(32.0f / x - 2.0f, B) / numext::sqrt(x); + } +}; + +template <> +struct i0e_impl<double> { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double run(double x) { + /* i0e.c + * + * Modified Bessel function of order zero, + * exponentially scaled + * + * + * + * SYNOPSIS: + * + * double x, y, i0e(); + * + * y = i0e( x ); + * + * + * + * DESCRIPTION: + * + * Returns exponentially scaled modified Bessel function + * of order zero of the argument. + * + * The function is defined as i0e(x) = exp(-|x|) j0( ix ). + * + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0,30 30000 5.4e-16 1.2e-16 + * See i0(). + * + */ + const double A[] = {-4.41534164647933937950E-18, 3.33079451882223809783E-17, + -2.43127984654795469359E-16, 1.71539128555513303061E-15, + -1.16853328779934516808E-14, 7.67618549860493561688E-14, + -4.85644678311192946090E-13, 2.95505266312963983461E-12, + -1.72682629144155570723E-11, 9.67580903537323691224E-11, + -5.18979560163526290666E-10, 2.65982372468238665035E-9, + -1.30002500998624804212E-8, 6.04699502254191894932E-8, + -2.67079385394061173391E-7, 1.11738753912010371815E-6, + -4.41673835845875056359E-6, 1.64484480707288970893E-5, + -5.75419501008210370398E-5, 1.88502885095841655729E-4, + -5.76375574538582365885E-4, 1.63947561694133579842E-3, + -4.32430999505057594430E-3, 1.05464603945949983183E-2, + -2.37374148058994688156E-2, 4.93052842396707084878E-2, + -9.49010970480476444210E-2, 1.71620901522208775349E-1, + -3.04682672343198398683E-1, 6.76795274409476084995E-1}; + const double B[] = { + -7.23318048787475395456E-18, -4.83050448594418207126E-18, + 4.46562142029675999901E-17, 3.46122286769746109310E-17, + -2.82762398051658348494E-16, -3.42548561967721913462E-16, + 1.77256013305652638360E-15, 3.81168066935262242075E-15, + -9.55484669882830764870E-15, -4.15056934728722208663E-14, + 1.54008621752140982691E-14, 3.85277838274214270114E-13, + 7.18012445138366623367E-13, -1.79417853150680611778E-12, + -1.32158118404477131188E-11, -3.14991652796324136454E-11, + 1.18891471078464383424E-11, 4.94060238822496958910E-10, + 3.39623202570838634515E-9, 2.26666899049817806459E-8, + 2.04891858946906374183E-7, 2.89137052083475648297E-6, + 6.88975834691682398426E-5, 3.36911647825569408990E-3, + 8.04490411014108831608E-1}; + + if (x < 0.0) { + x = -x; + } + + if (x <= 8.0) { + double y = (x / 2.0) - 2.0; + return cephes::chebevl<double, 30>::run(y, A); + } + + return cephes::chebevl<double, 25>::run(32.0 / x - 2.0, B) / + numext::sqrt(x); + } +}; + +template <typename Scalar> +struct i1e_retval { + typedef Scalar type; +}; + +template <typename Scalar> +struct i1e_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar) { + EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +template <> +struct i1e_impl<float> { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE float run(float x) { + /* i1ef.c + * + * Modified Bessel function of order one, + * exponentially scaled + * + * + * + * SYNOPSIS: + * + * float x, y, i1ef(); + * + * y = i1ef( x ); + * + * + * + * DESCRIPTION: + * + * Returns exponentially scaled modified Bessel function + * of order one of the argument. + * + * The function is defined as i1(x) = -i exp(-|x|) j1( ix ). + * + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0, 30 30000 1.5e-6 1.5e-7 + * See i1(). + * + */ + const float A[] = {9.38153738649577178388E-9f, -4.44505912879632808065E-8f, + 2.00329475355213526229E-7f, -8.56872026469545474066E-7f, + 3.47025130813767847674E-6f, -1.32731636560394358279E-5f, + 4.78156510755005422638E-5f, -1.61760815825896745588E-4f, + 5.12285956168575772895E-4f, -1.51357245063125314899E-3f, + 4.15642294431288815669E-3f, -1.05640848946261981558E-2f, + 2.47264490306265168283E-2f, -5.29459812080949914269E-2f, + 1.02643658689847095384E-1f, -1.76416518357834055153E-1f, + 2.52587186443633654823E-1f}; + + const float B[] = {-3.83538038596423702205E-9f, -2.63146884688951950684E-8f, + -2.51223623787020892529E-7f, -3.88256480887769039346E-6f, + -1.10588938762623716291E-4f, -9.76109749136146840777E-3f, + 7.78576235018280120474E-1f}; + + float z = numext::abs(x); + + if (z <= 8.0f) { + float y = 0.5f * z - 2.0f; + z = cephes::chebevl<float, 17>::run(y, A) * z; + } else { + z = cephes::chebevl<float, 7>::run(32.0f / z - 2.0f, B) / numext::sqrt(z); + } + + if (x < 0.0f) { + z = -z; + } + + return z; + } +}; + +template <> +struct i1e_impl<double> { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double run(double x) { + /* i1e.c + * + * Modified Bessel function of order one, + * exponentially scaled + * + * + * + * SYNOPSIS: + * + * double x, y, i1e(); + * + * y = i1e( x ); + * + * + * + * DESCRIPTION: + * + * Returns exponentially scaled modified Bessel function + * of order one of the argument. + * + * The function is defined as i1(x) = -i exp(-|x|) j1( ix ). + * + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0, 30 30000 2.0e-15 2.0e-16 + * See i1(). + * + */ + const double A[] = {2.77791411276104639959E-18, -2.11142121435816608115E-17, + 1.55363195773620046921E-16, -1.10559694773538630805E-15, + 7.60068429473540693410E-15, -5.04218550472791168711E-14, + 3.22379336594557470981E-13, -1.98397439776494371520E-12, + 1.17361862988909016308E-11, -6.66348972350202774223E-11, + 3.62559028155211703701E-10, -1.88724975172282928790E-9, + 9.38153738649577178388E-9, -4.44505912879632808065E-8, + 2.00329475355213526229E-7, -8.56872026469545474066E-7, + 3.47025130813767847674E-6, -1.32731636560394358279E-5, + 4.78156510755005422638E-5, -1.61760815825896745588E-4, + 5.12285956168575772895E-4, -1.51357245063125314899E-3, + 4.15642294431288815669E-3, -1.05640848946261981558E-2, + 2.47264490306265168283E-2, -5.29459812080949914269E-2, + 1.02643658689847095384E-1, -1.76416518357834055153E-1, + 2.52587186443633654823E-1}; + const double B[] = { + 7.51729631084210481353E-18, 4.41434832307170791151E-18, + -4.65030536848935832153E-17, -3.20952592199342395980E-17, + 2.96262899764595013876E-16, 3.30820231092092828324E-16, + -1.88035477551078244854E-15, -3.81440307243700780478E-15, + 1.04202769841288027642E-14, 4.27244001671195135429E-14, + -2.10154184277266431302E-14, -4.08355111109219731823E-13, + -7.19855177624590851209E-13, 2.03562854414708950722E-12, + 1.41258074366137813316E-11, 3.25260358301548823856E-11, + -1.89749581235054123450E-11, -5.58974346219658380687E-10, + -3.83538038596423702205E-9, -2.63146884688951950684E-8, + -2.51223623787020892529E-7, -3.88256480887769039346E-6, + -1.10588938762623716291E-4, -9.76109749136146840777E-3, + 7.78576235018280120474E-1}; + + double z = numext::abs(x); + + if (z <= 8.0) { + double y = (z / 2.0) - 2.0; + z = cephes::chebevl<double, 29>::run(y, A) * z; + } else { + z = cephes::chebevl<double, 25>::run(32.0 / z - 2.0, B) / numext::sqrt(z); + } + + if (x < 0.0) { + z = -z; + } + + return z; + } +}; + #endif // EIGEN_HAS_C99_MATH } // end namespace internal @@ -1565,6 +1962,18 @@ EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(betainc, Scalar) return EIGEN_MATHFUNC_IMPL(betainc, Scalar)::run(a, b, x); } +template <typename Scalar> +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(i0e, Scalar) + i0e(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(i0e, Scalar)::run(x); +} + +template <typename Scalar> +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(i1e, Scalar) + i1e(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(i1e, Scalar)::run(x); +} + } // end namespace numext diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h index 46d60d323..4c176716b 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h @@ -50,6 +50,18 @@ Packet pigammac(const Packet& a, const Packet& x) { using numext::igammac; retur template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pbetainc(const Packet& a, const Packet& b,const Packet& x) { using numext::betainc; return betainc(a, b, x); } +/** \internal \returns the exponentially scaled modified Bessel function of + * order zero i0e(\a a) (coeff-wise) */ +template <typename Packet> +EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pi0e(const Packet& x) { using numext::i0e; return i0e(x); } + +/** \internal \returns the exponentially scaled modified Bessel function of + * order one i1e(\a a) (coeff-wise) */ +template <typename Packet> +EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pi1e(const Packet& x) { using numext::i1e; return i1e(x); } + } // end namespace internal } // end namespace Eigen diff --git a/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h b/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h index e0e3a8be6..c25fea0b3 100644 --- a/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h +++ b/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h @@ -156,6 +156,32 @@ double2 pbetainc<double2>(const double2& a, const double2& b, const double2& x) return make_double2(betainc(a.x, b.x, x.x), betainc(a.y, b.y, x.y)); } +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pi0e<float4>(const float4& x) { + using numext::i0e; + return make_float4(i0e(x.x), i0e(x.y), i0e(x.z), i0e(x.w)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 +pi0e<double2>(const double2& x) { + using numext::i0e; + return make_double2(i0e(x.x), i0e(x.y)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pi1e<float4>(const float4& x) { + using numext::i1e; + return make_float4(i1e(x.x), i1e(x.y), i1e(x.z), i1e(x.w)); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 +pi1e<double2>(const double2& x) { + using numext::i1e; + return make_double2(i1e(x.x), i1e(x.y)); +} + #endif } // end namespace internal diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu index 9584a539f..63d0a345a 100644 --- a/unsupported/test/cxx11_tensor_cuda.cu +++ b/unsupported/test/cxx11_tensor_cuda.cu @@ -1208,6 +1208,116 @@ void test_cuda_betainc() cudaFree(d_out); } +template <typename Scalar> +void test_cuda_i0e() +{ + Tensor<Scalar, 1> in_x(21); + Tensor<Scalar, 1> out(21); + Tensor<Scalar, 1> expected_out(21); + out.setZero(); + + Array<Scalar, 1, Dynamic> in_x_array(21); + Array<Scalar, 1, Dynamic> expected_out_array(21); + + in_x_array << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0, + -2.0, 0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0; + + expected_out_array << 0.0897803118848, 0.0947062952128, 0.100544127361, + 0.107615251671, 0.116426221213, 0.127833337163, 0.143431781857, + 0.16665743264, 0.207001921224, 0.308508322554, 1.0, 0.308508322554, + 0.207001921224, 0.16665743264, 0.143431781857, 0.127833337163, + 0.116426221213, 0.107615251671, 0.100544127361, 0.0947062952128, + 0.0897803118848; + + for (int i = 0; i < 21; ++i) { + in_x(i) = in_x_array(i); + expected_out(i) = expected_out_array(i); + } + + std::size_t bytes = in_x.size() * sizeof(Scalar); + + Scalar* d_in; + Scalar* d_out; + cudaMalloc((void**)(&d_in), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_in, in_x.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in(d_in, 21); + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 21); + + gpu_out.device(gpu_device) = gpu_in.i0e(); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, + gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 21; ++i) { + VERIFY_IS_APPROX(out(i), expected_out(i)); + } + + cudaFree(d_in); + cudaFree(d_out); +} + +template <typename Scalar> +void test_cuda_i1e() +{ + Tensor<Scalar, 1> in_x(21); + Tensor<Scalar, 1> out(21); + Tensor<Scalar, 1> expected_out(21); + out.setZero(); + + Array<Scalar, 1, Dynamic> in_x_array(21); + Array<Scalar, 1, Dynamic> expected_out_array(21); + + in_x_array << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0, + -2.0, 0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0; + + expected_out_array << -0.0875062221833, -0.092036796872, -0.0973496147565, + -0.103697667463, -0.11146429929, -0.121262681384, -0.134142493293, + -0.152051459309, -0.178750839502, -0.215269289249, 0.0, 0.215269289249, + 0.178750839502, 0.152051459309, 0.134142493293, 0.121262681384, + 0.11146429929, 0.103697667463, 0.0973496147565, 0.092036796872, + 0.0875062221833; + + for (int i = 0; i < 21; ++i) { + in_x(i) = in_x_array(i); + expected_out(i) = expected_out_array(i); + } + + std::size_t bytes = in_x.size() * sizeof(Scalar); + + Scalar* d_in; + Scalar* d_out; + cudaMalloc((void**)(&d_in), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_in, in_x.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in(d_in, 21); + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 21); + + gpu_out.device(gpu_device) = gpu_in.i1e(); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, + gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 21; ++i) { + VERIFY_IS_APPROX(out(i), expected_out(i)); + } + + cudaFree(d_in); + cudaFree(d_out); +} + void test_cxx11_tensor_cuda() { @@ -1280,5 +1390,11 @@ void test_cxx11_tensor_cuda() CALL_SUBTEST_6(test_cuda_betainc<float>()); CALL_SUBTEST_6(test_cuda_betainc<double>()); + + CALL_SUBTEST_6(test_cuda_i0e<float>()); + CALL_SUBTEST_6(test_cuda_i0e<double>()); + + CALL_SUBTEST_6(test_cuda_i1e<float>()); + CALL_SUBTEST_6(test_cuda_i1e<double>()); #endif } diff --git a/unsupported/test/special_functions.cpp b/unsupported/test/special_functions.cpp index 057fb3e92..48d0db95e 100644 --- a/unsupported/test/special_functions.cpp +++ b/unsupported/test/special_functions.cpp @@ -335,6 +335,46 @@ template<typename ArrayType> void array_special_functions() ArrayType test = betainc(a, b + one, x) + eps; verify_component_wise(test, expected);); } + + // Test Bessel function i0e. Reference results obtained with SciPy. + { + ArrayType x(21); + ArrayType expected(21); + ArrayType res(21); + + x << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0, -2.0, 0.0, + 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0; + + expected << 0.0897803118848, 0.0947062952128, 0.100544127361, + 0.107615251671, 0.116426221213, 0.127833337163, 0.143431781857, + 0.16665743264, 0.207001921224, 0.308508322554, 1.0, 0.308508322554, + 0.207001921224, 0.16665743264, 0.143431781857, 0.127833337163, + 0.116426221213, 0.107615251671, 0.100544127361, 0.0947062952128, + 0.0897803118848; + + CALL_SUBTEST(res = i0e(x); + verify_component_wise(res, expected);); + } + + // Test Bessel function i1e. Reference results obtained with SciPy. + { + ArrayType x(21); + ArrayType expected(21); + ArrayType res(21); + + x << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0, -2.0, 0.0, + 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0; + + expected << -0.0875062221833, -0.092036796872, -0.0973496147565, + -0.103697667463, -0.11146429929, -0.121262681384, -0.134142493293, + -0.152051459309, -0.178750839502, -0.215269289249, 0.0, 0.215269289249, + 0.178750839502, 0.152051459309, 0.134142493293, 0.121262681384, + 0.11146429929, 0.103697667463, 0.0973496147565, 0.092036796872, + 0.0875062221833; + + CALL_SUBTEST(res = i1e(x); + verify_component_wise(res, expected);); + } #endif } |