aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported
diff options
context:
space:
mode:
authorGravatar Michael Figurnov <mfigurnov@google.com>2018-05-31 15:34:53 +0100
committerGravatar Michael Figurnov <mfigurnov@google.com>2018-05-31 15:34:53 +0100
commitf216854453887f31ac02ffefb7a7a569dc3fa54d (patch)
treebf748705db0da48a0dc8c08989184bcb888861fd /unsupported
parent6af1433cb50af7423a1a69afc24c098af9c76bb1 (diff)
Exponentially scaled modified Bessel functions of order zero and one.
The functions are conventionally called i0e and i1e. The exponentially scaled version is more numerically stable. The standard Bessel functions can be obtained as i0(x) = exp(|x|) i0e(x) The code is ported from Cephes and tested against SciPy.
Diffstat (limited to 'unsupported')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBase.h12
-rw-r--r--unsupported/Eigen/SpecialFunctions2
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h46
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h54
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h8
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h409
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h12
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h26
-rw-r--r--unsupported/test/cxx11_tensor_cuda.cu116
-rw-r--r--unsupported/test/special_functions.cpp40
10 files changed, 725 insertions, 0 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
index 745b16d2c..a942c98dd 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
@@ -133,6 +133,18 @@ class TensorBase<Derived, ReadOnlyAccessors>
return unaryExpr(internal::scalar_digamma_op<Scalar>());
}
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_i0e_op<Scalar>, const Derived>
+ i0e() const {
+ return unaryExpr(internal::scalar_i0e_op<Scalar>());
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_i1e_op<Scalar>, const Derived>
+ i1e() const {
+ return unaryExpr(internal::scalar_i1e_op<Scalar>());
+ }
+
// igamma(a = this, x = other)
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorCwiseBinaryOp<internal::scalar_igamma_op<Scalar>, const Derived, const OtherDerived>
diff --git a/unsupported/Eigen/SpecialFunctions b/unsupported/Eigen/SpecialFunctions
index a2ad4925e..482ec6e6f 100644
--- a/unsupported/Eigen/SpecialFunctions
+++ b/unsupported/Eigen/SpecialFunctions
@@ -34,6 +34,8 @@ namespace Eigen {
* - polygamma
* - zeta
* - betainc
+ * - i0e
+ * - i1e
*
* \code
* #include <unsupported/Eigen/SpecialFunctions>
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h
index ed415db99..b7a9d035b 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h
@@ -119,6 +119,52 @@ zeta(const Eigen::ArrayBase<DerivedX>& x, const Eigen::ArrayBase<DerivedQ>& q)
);
}
+/** \returns an expression of the coefficient-wise i0e(\a x) to the given
+ * arrays.
+ *
+ * It returns the exponentially scaled modified Bessel
+ * function of order zero.
+ *
+ * \param x is the argument
+ *
+ * \note This function supports only float and double scalar types. To support
+ * other scalar types, the user has to provide implementations of i0e(T) for
+ * any scalar type T to be supported.
+ *
+ * \sa ArrayBase::i0e()
+ */
+template <typename Derived>
+inline const Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_i0e_op<typename Derived::Scalar>, const Derived>
+i0e(const Eigen::ArrayBase<Derived>& x) {
+ return Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_i0e_op<typename Derived::Scalar>,
+ const Derived>(x.derived());
+}
+
+/** \returns an expression of the coefficient-wise i1e(\a x) to the given
+ * arrays.
+ *
+ * It returns the exponentially scaled modified Bessel
+ * function of order one.
+ *
+ * \param x is the argument
+ *
+ * \note This function supports only float and double scalar types. To support
+ * other scalar types, the user has to provide implementations of i1e(T) for
+ * any scalar type T to be supported.
+ *
+ * \sa ArrayBase::i1e()
+ */
+template <typename Derived>
+inline const Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_i1e_op<typename Derived::Scalar>, const Derived>
+i1e(const Eigen::ArrayBase<Derived>& x) {
+ return Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_i1e_op<typename Derived::Scalar>,
+ const Derived>(x.derived());
+}
+
} // end namespace Eigen
#endif // EIGEN_SPECIALFUNCTIONS_ARRAYAPI_H
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h
index d8f2363be..8420f0174 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h
@@ -229,6 +229,60 @@ struct functor_traits<scalar_erfc_op<Scalar> >
};
};
+/** \internal
+ * \brief Template functor to compute the exponentially scaled modified Bessel
+ * function of order zero
+ * \sa class CwiseUnaryOp, Cwise::i0e()
+ */
+template <typename Scalar>
+struct scalar_i0e_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_i0e_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& x) const {
+ using numext::i0e;
+ return i0e(x);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const {
+ return internal::pi0e(x);
+ }
+};
+template <typename Scalar>
+struct functor_traits<scalar_i0e_op<Scalar> > {
+ enum {
+ // On average, a Chebyshev polynomial of order N=20 is computed.
+ // The cost is N multiplications and 2N additions.
+ Cost = 20 * NumTraits<Scalar>::MulCost + 40 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasI0e
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the exponentially scaled modified Bessel
+ * function of order zero
+ * \sa class CwiseUnaryOp, Cwise::i1e()
+ */
+template <typename Scalar>
+struct scalar_i1e_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_i1e_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& x) const {
+ using numext::i1e;
+ return i1e(x);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const {
+ return internal::pi1e(x);
+ }
+};
+template <typename Scalar>
+struct functor_traits<scalar_i1e_op<Scalar> > {
+ enum {
+ // On average, a Chebyshev polynomial of order N=20 is computed.
+ // The cost is N multiplications and 2N additions.
+ Cost = 20 * NumTraits<Scalar>::MulCost + 40 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasI1e
+ };
+};
+
} // end namespace internal
} // end namespace Eigen
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h
index 553bcda6a..c5867002e 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h
@@ -39,6 +39,14 @@ template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igammac(const Eigen
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half betainc(const Eigen::half& a, const Eigen::half& b, const Eigen::half& x) {
return Eigen::half(Eigen::numext::betainc(static_cast<float>(a), static_cast<float>(b), static_cast<float>(x)));
}
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half i0e(const Eigen::half& x) {
+ return Eigen::half(Eigen::numext::i0e(static_cast<float>(x)));
+}
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half i1e(const Eigen::half& x) {
+ return Eigen::half(Eigen::numext::i1e(static_cast<float>(x)));
+}
#endif
} // end namespace numext
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
index 97b2e5a91..6c7ac3f3b 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
@@ -95,6 +95,75 @@ struct polevl<Scalar, 0> {
}
};
+/* chbevl (modified for Eigen)
+ *
+ * Evaluate Chebyshev series
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * int N;
+ * Scalar x, y, coef[N], chebevl();
+ *
+ * y = chbevl( x, coef, N );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Evaluates the series
+ *
+ * N-1
+ * - '
+ * y = > coef[i] T (x/2)
+ * - i
+ * i=0
+ *
+ * of Chebyshev polynomials Ti at argument x/2.
+ *
+ * Coefficients are stored in reverse order, i.e. the zero
+ * order term is last in the array. Note N is the number of
+ * coefficients, not the order.
+ *
+ * If coefficients are for the interval a to b, x must
+ * have been transformed to x -> 2(2x - b - a)/(b-a) before
+ * entering the routine. This maps x from (a, b) to (-1, 1),
+ * over which the Chebyshev polynomials are defined.
+ *
+ * If the coefficients are for the inverted interval, in
+ * which (a, b) is mapped to (1/b, 1/a), the transformation
+ * required is x -> 2(2ab/x - b - a)/(b-a). If b is infinity,
+ * this becomes x -> 4a/x - 1.
+ *
+ *
+ *
+ * SPEED:
+ *
+ * Taking advantage of the recurrence properties of the
+ * Chebyshev polynomials, the routine requires one more
+ * addition per loop than evaluating a nested polynomial of
+ * the same degree.
+ *
+ */
+template <typename Scalar, int N>
+struct chebevl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(Scalar x, const Scalar coef[]) {
+ Scalar b0 = coef[0];
+ Scalar b1 = 0;
+ Scalar b2;
+
+ for (int i = 1; i < N; i++) {
+ b2 = b1;
+ b1 = b0;
+ b0 = x * b1 - b2 + coef[i];
+ }
+
+ return Scalar(0.5) * (b0 - b2);
+ }
+};
+
} // end namespace cephes
/****************************************************************************
@@ -1505,6 +1574,334 @@ struct betainc_impl<double> {
}
};
+/****************************************************************************
+ * Implementation of Bessel function, based on Cephes *
+ ****************************************************************************/
+
+template <typename Scalar>
+struct i0e_retval {
+ typedef Scalar type;
+};
+
+template <typename Scalar>
+struct i0e_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return Scalar(0);
+ }
+};
+
+template <>
+struct i0e_impl<float> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE float run(float x) {
+ /* i0ef.c
+ *
+ * Modified Bessel function of order zero,
+ * exponentially scaled
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * float x, y, i0ef();
+ *
+ * y = i0ef( x );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns exponentially scaled modified Bessel function
+ * of order zero of the argument.
+ *
+ * The function is defined as i0e(x) = exp(-|x|) j0( ix ).
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0,30 100000 3.7e-7 7.0e-8
+ * See i0f().
+ *
+ */
+ const float A[] = {-1.30002500998624804212E-8f, 6.04699502254191894932E-8f,
+ -2.67079385394061173391E-7f, 1.11738753912010371815E-6f,
+ -4.41673835845875056359E-6f, 1.64484480707288970893E-5f,
+ -5.75419501008210370398E-5f, 1.88502885095841655729E-4f,
+ -5.76375574538582365885E-4f, 1.63947561694133579842E-3f,
+ -4.32430999505057594430E-3f, 1.05464603945949983183E-2f,
+ -2.37374148058994688156E-2f, 4.93052842396707084878E-2f,
+ -9.49010970480476444210E-2f, 1.71620901522208775349E-1f,
+ -3.04682672343198398683E-1f, 6.76795274409476084995E-1f};
+
+ const float B[] = {3.39623202570838634515E-9f, 2.26666899049817806459E-8f,
+ 2.04891858946906374183E-7f, 2.89137052083475648297E-6f,
+ 6.88975834691682398426E-5f, 3.36911647825569408990E-3f,
+ 8.04490411014108831608E-1f};
+ if (x < 0.0f) {
+ x = -x;
+ }
+
+ if (x <= 8.0f) {
+ float y = 0.5f * x - 2.0f;
+ return cephes::chebevl<float, 18>::run(y, A);
+ }
+
+ return cephes::chebevl<float, 7>::run(32.0f / x - 2.0f, B) / numext::sqrt(x);
+ }
+};
+
+template <>
+struct i0e_impl<double> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE double run(double x) {
+ /* i0e.c
+ *
+ * Modified Bessel function of order zero,
+ * exponentially scaled
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * double x, y, i0e();
+ *
+ * y = i0e( x );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns exponentially scaled modified Bessel function
+ * of order zero of the argument.
+ *
+ * The function is defined as i0e(x) = exp(-|x|) j0( ix ).
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0,30 30000 5.4e-16 1.2e-16
+ * See i0().
+ *
+ */
+ const double A[] = {-4.41534164647933937950E-18, 3.33079451882223809783E-17,
+ -2.43127984654795469359E-16, 1.71539128555513303061E-15,
+ -1.16853328779934516808E-14, 7.67618549860493561688E-14,
+ -4.85644678311192946090E-13, 2.95505266312963983461E-12,
+ -1.72682629144155570723E-11, 9.67580903537323691224E-11,
+ -5.18979560163526290666E-10, 2.65982372468238665035E-9,
+ -1.30002500998624804212E-8, 6.04699502254191894932E-8,
+ -2.67079385394061173391E-7, 1.11738753912010371815E-6,
+ -4.41673835845875056359E-6, 1.64484480707288970893E-5,
+ -5.75419501008210370398E-5, 1.88502885095841655729E-4,
+ -5.76375574538582365885E-4, 1.63947561694133579842E-3,
+ -4.32430999505057594430E-3, 1.05464603945949983183E-2,
+ -2.37374148058994688156E-2, 4.93052842396707084878E-2,
+ -9.49010970480476444210E-2, 1.71620901522208775349E-1,
+ -3.04682672343198398683E-1, 6.76795274409476084995E-1};
+ const double B[] = {
+ -7.23318048787475395456E-18, -4.83050448594418207126E-18,
+ 4.46562142029675999901E-17, 3.46122286769746109310E-17,
+ -2.82762398051658348494E-16, -3.42548561967721913462E-16,
+ 1.77256013305652638360E-15, 3.81168066935262242075E-15,
+ -9.55484669882830764870E-15, -4.15056934728722208663E-14,
+ 1.54008621752140982691E-14, 3.85277838274214270114E-13,
+ 7.18012445138366623367E-13, -1.79417853150680611778E-12,
+ -1.32158118404477131188E-11, -3.14991652796324136454E-11,
+ 1.18891471078464383424E-11, 4.94060238822496958910E-10,
+ 3.39623202570838634515E-9, 2.26666899049817806459E-8,
+ 2.04891858946906374183E-7, 2.89137052083475648297E-6,
+ 6.88975834691682398426E-5, 3.36911647825569408990E-3,
+ 8.04490411014108831608E-1};
+
+ if (x < 0.0) {
+ x = -x;
+ }
+
+ if (x <= 8.0) {
+ double y = (x / 2.0) - 2.0;
+ return cephes::chebevl<double, 30>::run(y, A);
+ }
+
+ return cephes::chebevl<double, 25>::run(32.0 / x - 2.0, B) /
+ numext::sqrt(x);
+ }
+};
+
+template <typename Scalar>
+struct i1e_retval {
+ typedef Scalar type;
+};
+
+template <typename Scalar>
+struct i1e_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return Scalar(0);
+ }
+};
+
+template <>
+struct i1e_impl<float> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE float run(float x) {
+ /* i1ef.c
+ *
+ * Modified Bessel function of order one,
+ * exponentially scaled
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * float x, y, i1ef();
+ *
+ * y = i1ef( x );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns exponentially scaled modified Bessel function
+ * of order one of the argument.
+ *
+ * The function is defined as i1(x) = -i exp(-|x|) j1( ix ).
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0, 30 30000 1.5e-6 1.5e-7
+ * See i1().
+ *
+ */
+ const float A[] = {9.38153738649577178388E-9f, -4.44505912879632808065E-8f,
+ 2.00329475355213526229E-7f, -8.56872026469545474066E-7f,
+ 3.47025130813767847674E-6f, -1.32731636560394358279E-5f,
+ 4.78156510755005422638E-5f, -1.61760815825896745588E-4f,
+ 5.12285956168575772895E-4f, -1.51357245063125314899E-3f,
+ 4.15642294431288815669E-3f, -1.05640848946261981558E-2f,
+ 2.47264490306265168283E-2f, -5.29459812080949914269E-2f,
+ 1.02643658689847095384E-1f, -1.76416518357834055153E-1f,
+ 2.52587186443633654823E-1f};
+
+ const float B[] = {-3.83538038596423702205E-9f, -2.63146884688951950684E-8f,
+ -2.51223623787020892529E-7f, -3.88256480887769039346E-6f,
+ -1.10588938762623716291E-4f, -9.76109749136146840777E-3f,
+ 7.78576235018280120474E-1f};
+
+ float z = numext::abs(x);
+
+ if (z <= 8.0f) {
+ float y = 0.5f * z - 2.0f;
+ z = cephes::chebevl<float, 17>::run(y, A) * z;
+ } else {
+ z = cephes::chebevl<float, 7>::run(32.0f / z - 2.0f, B) / numext::sqrt(z);
+ }
+
+ if (x < 0.0f) {
+ z = -z;
+ }
+
+ return z;
+ }
+};
+
+template <>
+struct i1e_impl<double> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE double run(double x) {
+ /* i1e.c
+ *
+ * Modified Bessel function of order one,
+ * exponentially scaled
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * double x, y, i1e();
+ *
+ * y = i1e( x );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns exponentially scaled modified Bessel function
+ * of order one of the argument.
+ *
+ * The function is defined as i1(x) = -i exp(-|x|) j1( ix ).
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0, 30 30000 2.0e-15 2.0e-16
+ * See i1().
+ *
+ */
+ const double A[] = {2.77791411276104639959E-18, -2.11142121435816608115E-17,
+ 1.55363195773620046921E-16, -1.10559694773538630805E-15,
+ 7.60068429473540693410E-15, -5.04218550472791168711E-14,
+ 3.22379336594557470981E-13, -1.98397439776494371520E-12,
+ 1.17361862988909016308E-11, -6.66348972350202774223E-11,
+ 3.62559028155211703701E-10, -1.88724975172282928790E-9,
+ 9.38153738649577178388E-9, -4.44505912879632808065E-8,
+ 2.00329475355213526229E-7, -8.56872026469545474066E-7,
+ 3.47025130813767847674E-6, -1.32731636560394358279E-5,
+ 4.78156510755005422638E-5, -1.61760815825896745588E-4,
+ 5.12285956168575772895E-4, -1.51357245063125314899E-3,
+ 4.15642294431288815669E-3, -1.05640848946261981558E-2,
+ 2.47264490306265168283E-2, -5.29459812080949914269E-2,
+ 1.02643658689847095384E-1, -1.76416518357834055153E-1,
+ 2.52587186443633654823E-1};
+ const double B[] = {
+ 7.51729631084210481353E-18, 4.41434832307170791151E-18,
+ -4.65030536848935832153E-17, -3.20952592199342395980E-17,
+ 2.96262899764595013876E-16, 3.30820231092092828324E-16,
+ -1.88035477551078244854E-15, -3.81440307243700780478E-15,
+ 1.04202769841288027642E-14, 4.27244001671195135429E-14,
+ -2.10154184277266431302E-14, -4.08355111109219731823E-13,
+ -7.19855177624590851209E-13, 2.03562854414708950722E-12,
+ 1.41258074366137813316E-11, 3.25260358301548823856E-11,
+ -1.89749581235054123450E-11, -5.58974346219658380687E-10,
+ -3.83538038596423702205E-9, -2.63146884688951950684E-8,
+ -2.51223623787020892529E-7, -3.88256480887769039346E-6,
+ -1.10588938762623716291E-4, -9.76109749136146840777E-3,
+ 7.78576235018280120474E-1};
+
+ double z = numext::abs(x);
+
+ if (z <= 8.0) {
+ double y = (z / 2.0) - 2.0;
+ z = cephes::chebevl<double, 29>::run(y, A) * z;
+ } else {
+ z = cephes::chebevl<double, 25>::run(32.0 / z - 2.0, B) / numext::sqrt(z);
+ }
+
+ if (x < 0.0) {
+ z = -z;
+ }
+
+ return z;
+ }
+};
+
#endif // EIGEN_HAS_C99_MATH
} // end namespace internal
@@ -1565,6 +1962,18 @@ EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(betainc, Scalar)
return EIGEN_MATHFUNC_IMPL(betainc, Scalar)::run(a, b, x);
}
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(i0e, Scalar)
+ i0e(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(i0e, Scalar)::run(x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(i1e, Scalar)
+ i1e(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(i1e, Scalar)::run(x);
+}
+
} // end namespace numext
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h
index 46d60d323..4c176716b 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h
@@ -50,6 +50,18 @@ Packet pigammac(const Packet& a, const Packet& x) { using numext::igammac; retur
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Packet pbetainc(const Packet& a, const Packet& b,const Packet& x) { using numext::betainc; return betainc(a, b, x); }
+/** \internal \returns the exponentially scaled modified Bessel function of
+ * order zero i0e(\a a) (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pi0e(const Packet& x) { using numext::i0e; return i0e(x); }
+
+/** \internal \returns the exponentially scaled modified Bessel function of
+ * order one i1e(\a a) (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pi1e(const Packet& x) { using numext::i1e; return i1e(x); }
+
} // end namespace internal
} // end namespace Eigen
diff --git a/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h b/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h
index e0e3a8be6..c25fea0b3 100644
--- a/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h
+++ b/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h
@@ -156,6 +156,32 @@ double2 pbetainc<double2>(const double2& a, const double2& b, const double2& x)
return make_double2(betainc(a.x, b.x, x.x), betainc(a.y, b.y, x.y));
}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pi0e<float4>(const float4& x) {
+ using numext::i0e;
+ return make_float4(i0e(x.x), i0e(x.y), i0e(x.z), i0e(x.w));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
+pi0e<double2>(const double2& x) {
+ using numext::i0e;
+ return make_double2(i0e(x.x), i0e(x.y));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pi1e<float4>(const float4& x) {
+ using numext::i1e;
+ return make_float4(i1e(x.x), i1e(x.y), i1e(x.z), i1e(x.w));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
+pi1e<double2>(const double2& x) {
+ using numext::i1e;
+ return make_double2(i1e(x.x), i1e(x.y));
+}
+
#endif
} // end namespace internal
diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu
index 9584a539f..63d0a345a 100644
--- a/unsupported/test/cxx11_tensor_cuda.cu
+++ b/unsupported/test/cxx11_tensor_cuda.cu
@@ -1208,6 +1208,116 @@ void test_cuda_betainc()
cudaFree(d_out);
}
+template <typename Scalar>
+void test_cuda_i0e()
+{
+ Tensor<Scalar, 1> in_x(21);
+ Tensor<Scalar, 1> out(21);
+ Tensor<Scalar, 1> expected_out(21);
+ out.setZero();
+
+ Array<Scalar, 1, Dynamic> in_x_array(21);
+ Array<Scalar, 1, Dynamic> expected_out_array(21);
+
+ in_x_array << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0,
+ -2.0, 0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0;
+
+ expected_out_array << 0.0897803118848, 0.0947062952128, 0.100544127361,
+ 0.107615251671, 0.116426221213, 0.127833337163, 0.143431781857,
+ 0.16665743264, 0.207001921224, 0.308508322554, 1.0, 0.308508322554,
+ 0.207001921224, 0.16665743264, 0.143431781857, 0.127833337163,
+ 0.116426221213, 0.107615251671, 0.100544127361, 0.0947062952128,
+ 0.0897803118848;
+
+ for (int i = 0; i < 21; ++i) {
+ in_x(i) = in_x_array(i);
+ expected_out(i) = expected_out_array(i);
+ }
+
+ std::size_t bytes = in_x.size() * sizeof(Scalar);
+
+ Scalar* d_in;
+ Scalar* d_out;
+ cudaMalloc((void**)(&d_in), bytes);
+ cudaMalloc((void**)(&d_out), bytes);
+
+ cudaMemcpy(d_in, in_x.data(), bytes, cudaMemcpyHostToDevice);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in(d_in, 21);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 21);
+
+ gpu_out.device(gpu_device) = gpu_in.i0e();
+
+ assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost,
+ gpu_device.stream()) == cudaSuccess);
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ for (int i = 0; i < 21; ++i) {
+ VERIFY_IS_APPROX(out(i), expected_out(i));
+ }
+
+ cudaFree(d_in);
+ cudaFree(d_out);
+}
+
+template <typename Scalar>
+void test_cuda_i1e()
+{
+ Tensor<Scalar, 1> in_x(21);
+ Tensor<Scalar, 1> out(21);
+ Tensor<Scalar, 1> expected_out(21);
+ out.setZero();
+
+ Array<Scalar, 1, Dynamic> in_x_array(21);
+ Array<Scalar, 1, Dynamic> expected_out_array(21);
+
+ in_x_array << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0,
+ -2.0, 0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0;
+
+ expected_out_array << -0.0875062221833, -0.092036796872, -0.0973496147565,
+ -0.103697667463, -0.11146429929, -0.121262681384, -0.134142493293,
+ -0.152051459309, -0.178750839502, -0.215269289249, 0.0, 0.215269289249,
+ 0.178750839502, 0.152051459309, 0.134142493293, 0.121262681384,
+ 0.11146429929, 0.103697667463, 0.0973496147565, 0.092036796872,
+ 0.0875062221833;
+
+ for (int i = 0; i < 21; ++i) {
+ in_x(i) = in_x_array(i);
+ expected_out(i) = expected_out_array(i);
+ }
+
+ std::size_t bytes = in_x.size() * sizeof(Scalar);
+
+ Scalar* d_in;
+ Scalar* d_out;
+ cudaMalloc((void**)(&d_in), bytes);
+ cudaMalloc((void**)(&d_out), bytes);
+
+ cudaMemcpy(d_in, in_x.data(), bytes, cudaMemcpyHostToDevice);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in(d_in, 21);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 21);
+
+ gpu_out.device(gpu_device) = gpu_in.i1e();
+
+ assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost,
+ gpu_device.stream()) == cudaSuccess);
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ for (int i = 0; i < 21; ++i) {
+ VERIFY_IS_APPROX(out(i), expected_out(i));
+ }
+
+ cudaFree(d_in);
+ cudaFree(d_out);
+}
+
void test_cxx11_tensor_cuda()
{
@@ -1280,5 +1390,11 @@ void test_cxx11_tensor_cuda()
CALL_SUBTEST_6(test_cuda_betainc<float>());
CALL_SUBTEST_6(test_cuda_betainc<double>());
+
+ CALL_SUBTEST_6(test_cuda_i0e<float>());
+ CALL_SUBTEST_6(test_cuda_i0e<double>());
+
+ CALL_SUBTEST_6(test_cuda_i1e<float>());
+ CALL_SUBTEST_6(test_cuda_i1e<double>());
#endif
}
diff --git a/unsupported/test/special_functions.cpp b/unsupported/test/special_functions.cpp
index 057fb3e92..48d0db95e 100644
--- a/unsupported/test/special_functions.cpp
+++ b/unsupported/test/special_functions.cpp
@@ -335,6 +335,46 @@ template<typename ArrayType> void array_special_functions()
ArrayType test = betainc(a, b + one, x) + eps;
verify_component_wise(test, expected););
}
+
+ // Test Bessel function i0e. Reference results obtained with SciPy.
+ {
+ ArrayType x(21);
+ ArrayType expected(21);
+ ArrayType res(21);
+
+ x << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0, -2.0, 0.0,
+ 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0;
+
+ expected << 0.0897803118848, 0.0947062952128, 0.100544127361,
+ 0.107615251671, 0.116426221213, 0.127833337163, 0.143431781857,
+ 0.16665743264, 0.207001921224, 0.308508322554, 1.0, 0.308508322554,
+ 0.207001921224, 0.16665743264, 0.143431781857, 0.127833337163,
+ 0.116426221213, 0.107615251671, 0.100544127361, 0.0947062952128,
+ 0.0897803118848;
+
+ CALL_SUBTEST(res = i0e(x);
+ verify_component_wise(res, expected););
+ }
+
+ // Test Bessel function i1e. Reference results obtained with SciPy.
+ {
+ ArrayType x(21);
+ ArrayType expected(21);
+ ArrayType res(21);
+
+ x << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0, -2.0, 0.0,
+ 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0;
+
+ expected << -0.0875062221833, -0.092036796872, -0.0973496147565,
+ -0.103697667463, -0.11146429929, -0.121262681384, -0.134142493293,
+ -0.152051459309, -0.178750839502, -0.215269289249, 0.0, 0.215269289249,
+ 0.178750839502, 0.152051459309, 0.134142493293, 0.121262681384,
+ 0.11146429929, 0.103697667463, 0.0973496147565, 0.092036796872,
+ 0.0875062221833;
+
+ CALL_SUBTEST(res = i1e(x);
+ verify_component_wise(res, expected););
+ }
#endif
}