aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--Eigen/src/Core/GenericPacketMath.h2
-rw-r--r--Eigen/src/Core/arch/CUDA/PacketMath.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBase.h12
-rw-r--r--unsupported/Eigen/SpecialFunctions2
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h46
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h54
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h8
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h409
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h12
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h26
-rw-r--r--unsupported/test/cxx11_tensor_cuda.cu116
-rw-r--r--unsupported/test/special_functions.cpp40
12 files changed, 731 insertions, 0 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 7979c3aff..888a3f7ea 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -82,6 +82,8 @@ struct default_packet_traits
HasPolygamma = 0,
HasErf = 0,
HasErfc = 0,
+ HasI0e = 0,
+ HasI1e = 0,
HasIGamma = 0,
HasIGammac = 0,
HasBetaInc = 0,
diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h
index 97a8abe59..704a4e0d9 100644
--- a/Eigen/src/Core/arch/CUDA/PacketMath.h
+++ b/Eigen/src/Core/arch/CUDA/PacketMath.h
@@ -44,6 +44,8 @@ template<> struct packet_traits<float> : default_packet_traits
HasPolygamma = 1,
HasErf = 1,
HasErfc = 1,
+ HasI0e = 1,
+ HasI1e = 1,
HasIGamma = 1,
HasIGammac = 1,
HasBetaInc = 1,
@@ -73,6 +75,8 @@ template<> struct packet_traits<double> : default_packet_traits
HasPolygamma = 1,
HasErf = 1,
HasErfc = 1,
+ HasI0e = 1,
+ HasI1e = 1,
HasIGamma = 1,
HasIGammac = 1,
HasBetaInc = 1,
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
index 745b16d2c..a942c98dd 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
@@ -133,6 +133,18 @@ class TensorBase<Derived, ReadOnlyAccessors>
return unaryExpr(internal::scalar_digamma_op<Scalar>());
}
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_i0e_op<Scalar>, const Derived>
+ i0e() const {
+ return unaryExpr(internal::scalar_i0e_op<Scalar>());
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_i1e_op<Scalar>, const Derived>
+ i1e() const {
+ return unaryExpr(internal::scalar_i1e_op<Scalar>());
+ }
+
// igamma(a = this, x = other)
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorCwiseBinaryOp<internal::scalar_igamma_op<Scalar>, const Derived, const OtherDerived>
diff --git a/unsupported/Eigen/SpecialFunctions b/unsupported/Eigen/SpecialFunctions
index a2ad4925e..482ec6e6f 100644
--- a/unsupported/Eigen/SpecialFunctions
+++ b/unsupported/Eigen/SpecialFunctions
@@ -34,6 +34,8 @@ namespace Eigen {
* - polygamma
* - zeta
* - betainc
+ * - i0e
+ * - i1e
*
* \code
* #include <unsupported/Eigen/SpecialFunctions>
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h
index ed415db99..b7a9d035b 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h
@@ -119,6 +119,52 @@ zeta(const Eigen::ArrayBase<DerivedX>& x, const Eigen::ArrayBase<DerivedQ>& q)
);
}
+/** \returns an expression of the coefficient-wise i0e(\a x) to the given
+ * arrays.
+ *
+ * It returns the exponentially scaled modified Bessel
+ * function of order zero.
+ *
+ * \param x is the argument
+ *
+ * \note This function supports only float and double scalar types. To support
+ * other scalar types, the user has to provide implementations of i0e(T) for
+ * any scalar type T to be supported.
+ *
+ * \sa ArrayBase::i0e()
+ */
+template <typename Derived>
+inline const Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_i0e_op<typename Derived::Scalar>, const Derived>
+i0e(const Eigen::ArrayBase<Derived>& x) {
+ return Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_i0e_op<typename Derived::Scalar>,
+ const Derived>(x.derived());
+}
+
+/** \returns an expression of the coefficient-wise i1e(\a x) to the given
+ * arrays.
+ *
+ * It returns the exponentially scaled modified Bessel
+ * function of order one.
+ *
+ * \param x is the argument
+ *
+ * \note This function supports only float and double scalar types. To support
+ * other scalar types, the user has to provide implementations of i1e(T) for
+ * any scalar type T to be supported.
+ *
+ * \sa ArrayBase::i1e()
+ */
+template <typename Derived>
+inline const Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_i1e_op<typename Derived::Scalar>, const Derived>
+i1e(const Eigen::ArrayBase<Derived>& x) {
+ return Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_i1e_op<typename Derived::Scalar>,
+ const Derived>(x.derived());
+}
+
} // end namespace Eigen
#endif // EIGEN_SPECIALFUNCTIONS_ARRAYAPI_H
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h
index d8f2363be..8420f0174 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h
@@ -229,6 +229,60 @@ struct functor_traits<scalar_erfc_op<Scalar> >
};
};
+/** \internal
+ * \brief Template functor to compute the exponentially scaled modified Bessel
+ * function of order zero
+ * \sa class CwiseUnaryOp, Cwise::i0e()
+ */
+template <typename Scalar>
+struct scalar_i0e_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_i0e_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& x) const {
+ using numext::i0e;
+ return i0e(x);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const {
+ return internal::pi0e(x);
+ }
+};
+template <typename Scalar>
+struct functor_traits<scalar_i0e_op<Scalar> > {
+ enum {
+ // On average, a Chebyshev polynomial of order N=20 is computed.
+ // The cost is N multiplications and 2N additions.
+ Cost = 20 * NumTraits<Scalar>::MulCost + 40 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasI0e
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the exponentially scaled modified Bessel
+ * function of order zero
+ * \sa class CwiseUnaryOp, Cwise::i1e()
+ */
+template <typename Scalar>
+struct scalar_i1e_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_i1e_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& x) const {
+ using numext::i1e;
+ return i1e(x);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const {
+ return internal::pi1e(x);
+ }
+};
+template <typename Scalar>
+struct functor_traits<scalar_i1e_op<Scalar> > {
+ enum {
+ // On average, a Chebyshev polynomial of order N=20 is computed.
+ // The cost is N multiplications and 2N additions.
+ Cost = 20 * NumTraits<Scalar>::MulCost + 40 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasI1e
+ };
+};
+
} // end namespace internal
} // end namespace Eigen
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h
index 553bcda6a..c5867002e 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h
@@ -39,6 +39,14 @@ template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igammac(const Eigen
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half betainc(const Eigen::half& a, const Eigen::half& b, const Eigen::half& x) {
return Eigen::half(Eigen::numext::betainc(static_cast<float>(a), static_cast<float>(b), static_cast<float>(x)));
}
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half i0e(const Eigen::half& x) {
+ return Eigen::half(Eigen::numext::i0e(static_cast<float>(x)));
+}
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half i1e(const Eigen::half& x) {
+ return Eigen::half(Eigen::numext::i1e(static_cast<float>(x)));
+}
#endif
} // end namespace numext
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
index 97b2e5a91..6c7ac3f3b 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
@@ -95,6 +95,75 @@ struct polevl<Scalar, 0> {
}
};
+/* chbevl (modified for Eigen)
+ *
+ * Evaluate Chebyshev series
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * int N;
+ * Scalar x, y, coef[N], chebevl();
+ *
+ * y = chbevl( x, coef, N );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Evaluates the series
+ *
+ * N-1
+ * - '
+ * y = > coef[i] T (x/2)
+ * - i
+ * i=0
+ *
+ * of Chebyshev polynomials Ti at argument x/2.
+ *
+ * Coefficients are stored in reverse order, i.e. the zero
+ * order term is last in the array. Note N is the number of
+ * coefficients, not the order.
+ *
+ * If coefficients are for the interval a to b, x must
+ * have been transformed to x -> 2(2x - b - a)/(b-a) before
+ * entering the routine. This maps x from (a, b) to (-1, 1),
+ * over which the Chebyshev polynomials are defined.
+ *
+ * If the coefficients are for the inverted interval, in
+ * which (a, b) is mapped to (1/b, 1/a), the transformation
+ * required is x -> 2(2ab/x - b - a)/(b-a). If b is infinity,
+ * this becomes x -> 4a/x - 1.
+ *
+ *
+ *
+ * SPEED:
+ *
+ * Taking advantage of the recurrence properties of the
+ * Chebyshev polynomials, the routine requires one more
+ * addition per loop than evaluating a nested polynomial of
+ * the same degree.
+ *
+ */
+template <typename Scalar, int N>
+struct chebevl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(Scalar x, const Scalar coef[]) {
+ Scalar b0 = coef[0];
+ Scalar b1 = 0;
+ Scalar b2;
+
+ for (int i = 1; i < N; i++) {
+ b2 = b1;
+ b1 = b0;
+ b0 = x * b1 - b2 + coef[i];
+ }
+
+ return Scalar(0.5) * (b0 - b2);
+ }
+};
+
} // end namespace cephes
/****************************************************************************
@@ -1505,6 +1574,334 @@ struct betainc_impl<double> {
}
};
+/****************************************************************************
+ * Implementation of Bessel function, based on Cephes *
+ ****************************************************************************/
+
+template <typename Scalar>
+struct i0e_retval {
+ typedef Scalar type;
+};
+
+template <typename Scalar>
+struct i0e_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return Scalar(0);
+ }
+};
+
+template <>
+struct i0e_impl<float> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE float run(float x) {
+ /* i0ef.c
+ *
+ * Modified Bessel function of order zero,
+ * exponentially scaled
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * float x, y, i0ef();
+ *
+ * y = i0ef( x );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns exponentially scaled modified Bessel function
+ * of order zero of the argument.
+ *
+ * The function is defined as i0e(x) = exp(-|x|) j0( ix ).
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0,30 100000 3.7e-7 7.0e-8
+ * See i0f().
+ *
+ */
+ const float A[] = {-1.30002500998624804212E-8f, 6.04699502254191894932E-8f,
+ -2.67079385394061173391E-7f, 1.11738753912010371815E-6f,
+ -4.41673835845875056359E-6f, 1.64484480707288970893E-5f,
+ -5.75419501008210370398E-5f, 1.88502885095841655729E-4f,
+ -5.76375574538582365885E-4f, 1.63947561694133579842E-3f,
+ -4.32430999505057594430E-3f, 1.05464603945949983183E-2f,
+ -2.37374148058994688156E-2f, 4.93052842396707084878E-2f,
+ -9.49010970480476444210E-2f, 1.71620901522208775349E-1f,
+ -3.04682672343198398683E-1f, 6.76795274409476084995E-1f};
+
+ const float B[] = {3.39623202570838634515E-9f, 2.26666899049817806459E-8f,
+ 2.04891858946906374183E-7f, 2.89137052083475648297E-6f,
+ 6.88975834691682398426E-5f, 3.36911647825569408990E-3f,
+ 8.04490411014108831608E-1f};
+ if (x < 0.0f) {
+ x = -x;
+ }
+
+ if (x <= 8.0f) {
+ float y = 0.5f * x - 2.0f;
+ return cephes::chebevl<float, 18>::run(y, A);
+ }
+
+ return cephes::chebevl<float, 7>::run(32.0f / x - 2.0f, B) / numext::sqrt(x);
+ }
+};
+
+template <>
+struct i0e_impl<double> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE double run(double x) {
+ /* i0e.c
+ *
+ * Modified Bessel function of order zero,
+ * exponentially scaled
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * double x, y, i0e();
+ *
+ * y = i0e( x );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns exponentially scaled modified Bessel function
+ * of order zero of the argument.
+ *
+ * The function is defined as i0e(x) = exp(-|x|) j0( ix ).
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0,30 30000 5.4e-16 1.2e-16
+ * See i0().
+ *
+ */
+ const double A[] = {-4.41534164647933937950E-18, 3.33079451882223809783E-17,
+ -2.43127984654795469359E-16, 1.71539128555513303061E-15,
+ -1.16853328779934516808E-14, 7.67618549860493561688E-14,
+ -4.85644678311192946090E-13, 2.95505266312963983461E-12,
+ -1.72682629144155570723E-11, 9.67580903537323691224E-11,
+ -5.18979560163526290666E-10, 2.65982372468238665035E-9,
+ -1.30002500998624804212E-8, 6.04699502254191894932E-8,
+ -2.67079385394061173391E-7, 1.11738753912010371815E-6,
+ -4.41673835845875056359E-6, 1.64484480707288970893E-5,
+ -5.75419501008210370398E-5, 1.88502885095841655729E-4,
+ -5.76375574538582365885E-4, 1.63947561694133579842E-3,
+ -4.32430999505057594430E-3, 1.05464603945949983183E-2,
+ -2.37374148058994688156E-2, 4.93052842396707084878E-2,
+ -9.49010970480476444210E-2, 1.71620901522208775349E-1,
+ -3.04682672343198398683E-1, 6.76795274409476084995E-1};
+ const double B[] = {
+ -7.23318048787475395456E-18, -4.83050448594418207126E-18,
+ 4.46562142029675999901E-17, 3.46122286769746109310E-17,
+ -2.82762398051658348494E-16, -3.42548561967721913462E-16,
+ 1.77256013305652638360E-15, 3.81168066935262242075E-15,
+ -9.55484669882830764870E-15, -4.15056934728722208663E-14,
+ 1.54008621752140982691E-14, 3.85277838274214270114E-13,
+ 7.18012445138366623367E-13, -1.79417853150680611778E-12,
+ -1.32158118404477131188E-11, -3.14991652796324136454E-11,
+ 1.18891471078464383424E-11, 4.94060238822496958910E-10,
+ 3.39623202570838634515E-9, 2.26666899049817806459E-8,
+ 2.04891858946906374183E-7, 2.89137052083475648297E-6,
+ 6.88975834691682398426E-5, 3.36911647825569408990E-3,
+ 8.04490411014108831608E-1};
+
+ if (x < 0.0) {
+ x = -x;
+ }
+
+ if (x <= 8.0) {
+ double y = (x / 2.0) - 2.0;
+ return cephes::chebevl<double, 30>::run(y, A);
+ }
+
+ return cephes::chebevl<double, 25>::run(32.0 / x - 2.0, B) /
+ numext::sqrt(x);
+ }
+};
+
+template <typename Scalar>
+struct i1e_retval {
+ typedef Scalar type;
+};
+
+template <typename Scalar>
+struct i1e_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return Scalar(0);
+ }
+};
+
+template <>
+struct i1e_impl<float> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE float run(float x) {
+ /* i1ef.c
+ *
+ * Modified Bessel function of order one,
+ * exponentially scaled
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * float x, y, i1ef();
+ *
+ * y = i1ef( x );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns exponentially scaled modified Bessel function
+ * of order one of the argument.
+ *
+ * The function is defined as i1(x) = -i exp(-|x|) j1( ix ).
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0, 30 30000 1.5e-6 1.5e-7
+ * See i1().
+ *
+ */
+ const float A[] = {9.38153738649577178388E-9f, -4.44505912879632808065E-8f,
+ 2.00329475355213526229E-7f, -8.56872026469545474066E-7f,
+ 3.47025130813767847674E-6f, -1.32731636560394358279E-5f,
+ 4.78156510755005422638E-5f, -1.61760815825896745588E-4f,
+ 5.12285956168575772895E-4f, -1.51357245063125314899E-3f,
+ 4.15642294431288815669E-3f, -1.05640848946261981558E-2f,
+ 2.47264490306265168283E-2f, -5.29459812080949914269E-2f,
+ 1.02643658689847095384E-1f, -1.76416518357834055153E-1f,
+ 2.52587186443633654823E-1f};
+
+ const float B[] = {-3.83538038596423702205E-9f, -2.63146884688951950684E-8f,
+ -2.51223623787020892529E-7f, -3.88256480887769039346E-6f,
+ -1.10588938762623716291E-4f, -9.76109749136146840777E-3f,
+ 7.78576235018280120474E-1f};
+
+ float z = numext::abs(x);
+
+ if (z <= 8.0f) {
+ float y = 0.5f * z - 2.0f;
+ z = cephes::chebevl<float, 17>::run(y, A) * z;
+ } else {
+ z = cephes::chebevl<float, 7>::run(32.0f / z - 2.0f, B) / numext::sqrt(z);
+ }
+
+ if (x < 0.0f) {
+ z = -z;
+ }
+
+ return z;
+ }
+};
+
+template <>
+struct i1e_impl<double> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE double run(double x) {
+ /* i1e.c
+ *
+ * Modified Bessel function of order one,
+ * exponentially scaled
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * double x, y, i1e();
+ *
+ * y = i1e( x );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns exponentially scaled modified Bessel function
+ * of order one of the argument.
+ *
+ * The function is defined as i1(x) = -i exp(-|x|) j1( ix ).
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0, 30 30000 2.0e-15 2.0e-16
+ * See i1().
+ *
+ */
+ const double A[] = {2.77791411276104639959E-18, -2.11142121435816608115E-17,
+ 1.55363195773620046921E-16, -1.10559694773538630805E-15,
+ 7.60068429473540693410E-15, -5.04218550472791168711E-14,
+ 3.22379336594557470981E-13, -1.98397439776494371520E-12,
+ 1.17361862988909016308E-11, -6.66348972350202774223E-11,
+ 3.62559028155211703701E-10, -1.88724975172282928790E-9,
+ 9.38153738649577178388E-9, -4.44505912879632808065E-8,
+ 2.00329475355213526229E-7, -8.56872026469545474066E-7,
+ 3.47025130813767847674E-6, -1.32731636560394358279E-5,
+ 4.78156510755005422638E-5, -1.61760815825896745588E-4,
+ 5.12285956168575772895E-4, -1.51357245063125314899E-3,
+ 4.15642294431288815669E-3, -1.05640848946261981558E-2,
+ 2.47264490306265168283E-2, -5.29459812080949914269E-2,
+ 1.02643658689847095384E-1, -1.76416518357834055153E-1,
+ 2.52587186443633654823E-1};
+ const double B[] = {
+ 7.51729631084210481353E-18, 4.41434832307170791151E-18,
+ -4.65030536848935832153E-17, -3.20952592199342395980E-17,
+ 2.96262899764595013876E-16, 3.30820231092092828324E-16,
+ -1.88035477551078244854E-15, -3.81440307243700780478E-15,
+ 1.04202769841288027642E-14, 4.27244001671195135429E-14,
+ -2.10154184277266431302E-14, -4.08355111109219731823E-13,
+ -7.19855177624590851209E-13, 2.03562854414708950722E-12,
+ 1.41258074366137813316E-11, 3.25260358301548823856E-11,
+ -1.89749581235054123450E-11, -5.58974346219658380687E-10,
+ -3.83538038596423702205E-9, -2.63146884688951950684E-8,
+ -2.51223623787020892529E-7, -3.88256480887769039346E-6,
+ -1.10588938762623716291E-4, -9.76109749136146840777E-3,
+ 7.78576235018280120474E-1};
+
+ double z = numext::abs(x);
+
+ if (z <= 8.0) {
+ double y = (z / 2.0) - 2.0;
+ z = cephes::chebevl<double, 29>::run(y, A) * z;
+ } else {
+ z = cephes::chebevl<double, 25>::run(32.0 / z - 2.0, B) / numext::sqrt(z);
+ }
+
+ if (x < 0.0) {
+ z = -z;
+ }
+
+ return z;
+ }
+};
+
#endif // EIGEN_HAS_C99_MATH
} // end namespace internal
@@ -1565,6 +1962,18 @@ EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(betainc, Scalar)
return EIGEN_MATHFUNC_IMPL(betainc, Scalar)::run(a, b, x);
}
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(i0e, Scalar)
+ i0e(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(i0e, Scalar)::run(x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(i1e, Scalar)
+ i1e(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(i1e, Scalar)::run(x);
+}
+
} // end namespace numext
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h
index 46d60d323..4c176716b 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h
@@ -50,6 +50,18 @@ Packet pigammac(const Packet& a, const Packet& x) { using numext::igammac; retur
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Packet pbetainc(const Packet& a, const Packet& b,const Packet& x) { using numext::betainc; return betainc(a, b, x); }
+/** \internal \returns the exponentially scaled modified Bessel function of
+ * order zero i0e(\a a) (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pi0e(const Packet& x) { using numext::i0e; return i0e(x); }
+
+/** \internal \returns the exponentially scaled modified Bessel function of
+ * order one i1e(\a a) (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pi1e(const Packet& x) { using numext::i1e; return i1e(x); }
+
} // end namespace internal
} // end namespace Eigen
diff --git a/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h b/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h
index e0e3a8be6..c25fea0b3 100644
--- a/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h
+++ b/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h
@@ -156,6 +156,32 @@ double2 pbetainc<double2>(const double2& a, const double2& b, const double2& x)
return make_double2(betainc(a.x, b.x, x.x), betainc(a.y, b.y, x.y));
}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pi0e<float4>(const float4& x) {
+ using numext::i0e;
+ return make_float4(i0e(x.x), i0e(x.y), i0e(x.z), i0e(x.w));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
+pi0e<double2>(const double2& x) {
+ using numext::i0e;
+ return make_double2(i0e(x.x), i0e(x.y));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pi1e<float4>(const float4& x) {
+ using numext::i1e;
+ return make_float4(i1e(x.x), i1e(x.y), i1e(x.z), i1e(x.w));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
+pi1e<double2>(const double2& x) {
+ using numext::i1e;
+ return make_double2(i1e(x.x), i1e(x.y));
+}
+
#endif
} // end namespace internal
diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu
index 9584a539f..63d0a345a 100644
--- a/unsupported/test/cxx11_tensor_cuda.cu
+++ b/unsupported/test/cxx11_tensor_cuda.cu
@@ -1208,6 +1208,116 @@ void test_cuda_betainc()
cudaFree(d_out);
}
+template <typename Scalar>
+void test_cuda_i0e()
+{
+ Tensor<Scalar, 1> in_x(21);
+ Tensor<Scalar, 1> out(21);
+ Tensor<Scalar, 1> expected_out(21);
+ out.setZero();
+
+ Array<Scalar, 1, Dynamic> in_x_array(21);
+ Array<Scalar, 1, Dynamic> expected_out_array(21);
+
+ in_x_array << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0,
+ -2.0, 0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0;
+
+ expected_out_array << 0.0897803118848, 0.0947062952128, 0.100544127361,
+ 0.107615251671, 0.116426221213, 0.127833337163, 0.143431781857,
+ 0.16665743264, 0.207001921224, 0.308508322554, 1.0, 0.308508322554,
+ 0.207001921224, 0.16665743264, 0.143431781857, 0.127833337163,
+ 0.116426221213, 0.107615251671, 0.100544127361, 0.0947062952128,
+ 0.0897803118848;
+
+ for (int i = 0; i < 21; ++i) {
+ in_x(i) = in_x_array(i);
+ expected_out(i) = expected_out_array(i);
+ }
+
+ std::size_t bytes = in_x.size() * sizeof(Scalar);
+
+ Scalar* d_in;
+ Scalar* d_out;
+ cudaMalloc((void**)(&d_in), bytes);
+ cudaMalloc((void**)(&d_out), bytes);
+
+ cudaMemcpy(d_in, in_x.data(), bytes, cudaMemcpyHostToDevice);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in(d_in, 21);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 21);
+
+ gpu_out.device(gpu_device) = gpu_in.i0e();
+
+ assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost,
+ gpu_device.stream()) == cudaSuccess);
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ for (int i = 0; i < 21; ++i) {
+ VERIFY_IS_APPROX(out(i), expected_out(i));
+ }
+
+ cudaFree(d_in);
+ cudaFree(d_out);
+}
+
+template <typename Scalar>
+void test_cuda_i1e()
+{
+ Tensor<Scalar, 1> in_x(21);
+ Tensor<Scalar, 1> out(21);
+ Tensor<Scalar, 1> expected_out(21);
+ out.setZero();
+
+ Array<Scalar, 1, Dynamic> in_x_array(21);
+ Array<Scalar, 1, Dynamic> expected_out_array(21);
+
+ in_x_array << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0,
+ -2.0, 0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0;
+
+ expected_out_array << -0.0875062221833, -0.092036796872, -0.0973496147565,
+ -0.103697667463, -0.11146429929, -0.121262681384, -0.134142493293,
+ -0.152051459309, -0.178750839502, -0.215269289249, 0.0, 0.215269289249,
+ 0.178750839502, 0.152051459309, 0.134142493293, 0.121262681384,
+ 0.11146429929, 0.103697667463, 0.0973496147565, 0.092036796872,
+ 0.0875062221833;
+
+ for (int i = 0; i < 21; ++i) {
+ in_x(i) = in_x_array(i);
+ expected_out(i) = expected_out_array(i);
+ }
+
+ std::size_t bytes = in_x.size() * sizeof(Scalar);
+
+ Scalar* d_in;
+ Scalar* d_out;
+ cudaMalloc((void**)(&d_in), bytes);
+ cudaMalloc((void**)(&d_out), bytes);
+
+ cudaMemcpy(d_in, in_x.data(), bytes, cudaMemcpyHostToDevice);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in(d_in, 21);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 21);
+
+ gpu_out.device(gpu_device) = gpu_in.i1e();
+
+ assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost,
+ gpu_device.stream()) == cudaSuccess);
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ for (int i = 0; i < 21; ++i) {
+ VERIFY_IS_APPROX(out(i), expected_out(i));
+ }
+
+ cudaFree(d_in);
+ cudaFree(d_out);
+}
+
void test_cxx11_tensor_cuda()
{
@@ -1280,5 +1390,11 @@ void test_cxx11_tensor_cuda()
CALL_SUBTEST_6(test_cuda_betainc<float>());
CALL_SUBTEST_6(test_cuda_betainc<double>());
+
+ CALL_SUBTEST_6(test_cuda_i0e<float>());
+ CALL_SUBTEST_6(test_cuda_i0e<double>());
+
+ CALL_SUBTEST_6(test_cuda_i1e<float>());
+ CALL_SUBTEST_6(test_cuda_i1e<double>());
#endif
}
diff --git a/unsupported/test/special_functions.cpp b/unsupported/test/special_functions.cpp
index 057fb3e92..48d0db95e 100644
--- a/unsupported/test/special_functions.cpp
+++ b/unsupported/test/special_functions.cpp
@@ -335,6 +335,46 @@ template<typename ArrayType> void array_special_functions()
ArrayType test = betainc(a, b + one, x) + eps;
verify_component_wise(test, expected););
}
+
+ // Test Bessel function i0e. Reference results obtained with SciPy.
+ {
+ ArrayType x(21);
+ ArrayType expected(21);
+ ArrayType res(21);
+
+ x << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0, -2.0, 0.0,
+ 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0;
+
+ expected << 0.0897803118848, 0.0947062952128, 0.100544127361,
+ 0.107615251671, 0.116426221213, 0.127833337163, 0.143431781857,
+ 0.16665743264, 0.207001921224, 0.308508322554, 1.0, 0.308508322554,
+ 0.207001921224, 0.16665743264, 0.143431781857, 0.127833337163,
+ 0.116426221213, 0.107615251671, 0.100544127361, 0.0947062952128,
+ 0.0897803118848;
+
+ CALL_SUBTEST(res = i0e(x);
+ verify_component_wise(res, expected););
+ }
+
+ // Test Bessel function i1e. Reference results obtained with SciPy.
+ {
+ ArrayType x(21);
+ ArrayType expected(21);
+ ArrayType res(21);
+
+ x << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0, -2.0, 0.0,
+ 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0;
+
+ expected << -0.0875062221833, -0.092036796872, -0.0973496147565,
+ -0.103697667463, -0.11146429929, -0.121262681384, -0.134142493293,
+ -0.152051459309, -0.178750839502, -0.215269289249, 0.0, 0.215269289249,
+ 0.178750839502, 0.152051459309, 0.134142493293, 0.121262681384,
+ 0.11146429929, 0.103697667463, 0.0973496147565, 0.092036796872,
+ 0.0875062221833;
+
+ CALL_SUBTEST(res = i1e(x);
+ verify_component_wise(res, expected););
+ }
#endif
}