Merged in mfigurnov/eigen (pull request PR-400)

Exponentially scaled modified Bessel functions of order zero and one. Approved-by: Benoit Steiner <benoit.steiner.goog@gmail.com>
author: Benoit Steiner <benoit.steiner.goog@gmail.com> 2018-06-05 17:05:21 +0000
committer: Benoit Steiner <benoit.steiner.goog@gmail.com> 2018-06-05 17:05:21 +0000
commit: e206f8d4a401fe2060bada4d4b5d92e3bf3b561c (patch)
tree: c5613a06e00edbb6a5c804ee39858911b87ef1e6
parent: e2ed0cf8abfe275bd147f3228e4c59f5242993f6 (diff)
parent: f216854453887f31ac02ffefb7a7a569dc3fa54d (diff)
12 files changed, 731 insertions, 0 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 7979c3aff..888a3f7ea 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -82,6 +82,8 @@ struct default_packet_traits
     HasPolygamma = 0,
     HasErf = 0,
     HasErfc = 0,
+    HasI0e = 0,
+    HasI1e = 0,
     HasIGamma = 0,
     HasIGammac = 0,
     HasBetaInc = 0,
diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h
index 97a8abe59..704a4e0d9 100644
--- a/Eigen/src/Core/arch/CUDA/PacketMath.h
+++ b/Eigen/src/Core/arch/CUDA/PacketMath.h
@@ -44,6 +44,8 @@ template<> struct packet_traits<float> : default_packet_traits
     HasPolygamma = 1,
     HasErf = 1,
     HasErfc = 1,
+    HasI0e = 1,
+    HasI1e = 1,
     HasIGamma = 1,
     HasIGammac = 1,
     HasBetaInc = 1,
@@ -73,6 +75,8 @@ template<> struct packet_traits<double> : default_packet_traits
     HasPolygamma = 1,
     HasErf = 1,
     HasErfc = 1,
+    HasI0e = 1,
+    HasI1e = 1,
     HasIGamma = 1,
     HasIGammac = 1,
     HasBetaInc = 1,
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
index 745b16d2c..a942c98dd 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
@@ -133,6 +133,18 @@ class TensorBase<Derived, ReadOnlyAccessors>
       return unaryExpr(internal::scalar_digamma_op<Scalar>());
     }
 
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_i0e_op<Scalar>, const Derived>
+    i0e() const {
+      return unaryExpr(internal::scalar_i0e_op<Scalar>());
+    }
+
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_i1e_op<Scalar>, const Derived>
+    i1e() const {
+      return unaryExpr(internal::scalar_i1e_op<Scalar>());
+    }
+
     // igamma(a = this, x = other)
     template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     const TensorCwiseBinaryOp<internal::scalar_igamma_op<Scalar>, const Derived, const OtherDerived>
diff --git a/unsupported/Eigen/SpecialFunctions b/unsupported/Eigen/SpecialFunctions
index a2ad4925e..482ec6e6f 100644
--- a/unsupported/Eigen/SpecialFunctions
+++ b/unsupported/Eigen/SpecialFunctions
@@ -34,6 +34,8 @@ namespace Eigen {
   * - polygamma
   * - zeta
   * - betainc
+  * - i0e
+  * - i1e
   *
   * \code
   * #include <unsupported/Eigen/SpecialFunctions>
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h
index ed415db99..b7a9d035b 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h
@@ -119,6 +119,52 @@ zeta(const Eigen::ArrayBase<DerivedX>& x, const Eigen::ArrayBase<DerivedQ>& q)
   );
 }
 
+/** \returns an expression of the coefficient-wise i0e(\a x) to the given
+ * arrays.
+  *
+  * It returns the exponentially scaled modified Bessel
+  * function of order zero.
+  *
+  * \param x is the argument
+  *
+  * \note This function supports only float and double scalar types. To support
+  * other scalar types, the user has to provide implementations of i0e(T) for
+  * any scalar type T to be supported.
+  *
+  * \sa ArrayBase::i0e()
+  */
+template <typename Derived>
+inline const Eigen::CwiseUnaryOp<
+    Eigen::internal::scalar_i0e_op<typename Derived::Scalar>, const Derived>
+i0e(const Eigen::ArrayBase<Derived>& x) {
+  return Eigen::CwiseUnaryOp<
+      Eigen::internal::scalar_i0e_op<typename Derived::Scalar>,
+      const Derived>(x.derived());
+}
+
+/** \returns an expression of the coefficient-wise i1e(\a x) to the given
+ * arrays.
+  *
+  * It returns the exponentially scaled modified Bessel
+  * function of order one.
+  *
+  * \param x is the argument
+  *
+  * \note This function supports only float and double scalar types. To support
+  * other scalar types, the user has to provide implementations of i1e(T) for
+  * any scalar type T to be supported.
+  *
+  * \sa ArrayBase::i1e()
+  */
+template <typename Derived>
+inline const Eigen::CwiseUnaryOp<
+    Eigen::internal::scalar_i1e_op<typename Derived::Scalar>, const Derived>
+i1e(const Eigen::ArrayBase<Derived>& x) {
+  return Eigen::CwiseUnaryOp<
+      Eigen::internal::scalar_i1e_op<typename Derived::Scalar>,
+      const Derived>(x.derived());
+}
+
 } // end namespace Eigen
 
 #endif // EIGEN_SPECIALFUNCTIONS_ARRAYAPI_H
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h
index d8f2363be..8420f0174 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h
@@ -229,6 +229,60 @@ struct functor_traits<scalar_erfc_op<Scalar> >
   };
 };
 
+/** \internal
+ * \brief Template functor to compute the exponentially scaled modified Bessel
+ * function of order zero
+ * \sa class CwiseUnaryOp, Cwise::i0e()
+ */
+template <typename Scalar>
+struct scalar_i0e_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_i0e_op)
+  EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& x) const {
+    using numext::i0e;
+    return i0e(x);
+  }
+  typedef typename packet_traits<Scalar>::type Packet;
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const {
+    return internal::pi0e(x);
+  }
+};
+template <typename Scalar>
+struct functor_traits<scalar_i0e_op<Scalar> > {
+  enum {
+    // On average, a Chebyshev polynomial of order N=20 is computed.
+    // The cost is N multiplications and 2N additions.
+    Cost = 20 * NumTraits<Scalar>::MulCost + 40 * NumTraits<Scalar>::AddCost,
+    PacketAccess = packet_traits<Scalar>::HasI0e
+  };
+};
+
+/** \internal
+ * \brief Template functor to compute the exponentially scaled modified Bessel
+ * function of order zero
+ * \sa class CwiseUnaryOp, Cwise::i1e()
+ */
+template <typename Scalar>
+struct scalar_i1e_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_i1e_op)
+  EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& x) const {
+    using numext::i1e;
+    return i1e(x);
+  }
+  typedef typename packet_traits<Scalar>::type Packet;
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const {
+    return internal::pi1e(x);
+  }
+};
+template <typename Scalar>
+struct functor_traits<scalar_i1e_op<Scalar> > {
+  enum {
+    // On average, a Chebyshev polynomial of order N=20 is computed.
+    // The cost is N multiplications and 2N additions.
+    Cost = 20 * NumTraits<Scalar>::MulCost + 40 * NumTraits<Scalar>::AddCost,
+    PacketAccess = packet_traits<Scalar>::HasI1e
+  };
+};
+
 } // end namespace internal
 
 } // end namespace Eigen
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h
index 553bcda6a..c5867002e 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h
@@ -39,6 +39,14 @@ template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igammac(const Eigen
 template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half betainc(const Eigen::half& a, const Eigen::half& b, const Eigen::half& x) {
   return Eigen::half(Eigen::numext::betainc(static_cast<float>(a), static_cast<float>(b), static_cast<float>(x)));
 }
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half i0e(const Eigen::half& x) {
+  return Eigen::half(Eigen::numext::i0e(static_cast<float>(x)));
+}
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half i1e(const Eigen::half& x) {
+  return Eigen::half(Eigen::numext::i1e(static_cast<float>(x)));
+}
 #endif
 
 }  // end namespace numext
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
index 97b2e5a91..6c7ac3f3b 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
@@ -95,6 +95,75 @@ struct polevl<Scalar, 0> {
   }
 };
 
+/* chbevl (modified for Eigen)
+ *
+ *     Evaluate Chebyshev series
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * int N;
+ * Scalar x, y, coef[N], chebevl();
+ *
+ * y = chbevl( x, coef, N );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Evaluates the series
+ *
+ *        N-1
+ *         - '
+ *  y  =   >   coef[i] T (x/2)
+ *         -            i
+ *        i=0
+ *
+ * of Chebyshev polynomials Ti at argument x/2.
+ *
+ * Coefficients are stored in reverse order, i.e. the zero
+ * order term is last in the array.  Note N is the number of
+ * coefficients, not the order.
+ *
+ * If coefficients are for the interval a to b, x must
+ * have been transformed to x -> 2(2x - b - a)/(b-a) before
+ * entering the routine.  This maps x from (a, b) to (-1, 1),
+ * over which the Chebyshev polynomials are defined.
+ *
+ * If the coefficients are for the inverted interval, in
+ * which (a, b) is mapped to (1/b, 1/a), the transformation
+ * required is x -> 2(2ab/x - b - a)/(b-a).  If b is infinity,
+ * this becomes x -> 4a/x - 1.
+ *
+ *
+ *
+ * SPEED:
+ *
+ * Taking advantage of the recurrence properties of the
+ * Chebyshev polynomials, the routine requires one more
+ * addition per loop than evaluating a nested polynomial of
+ * the same degree.
+ *
+ */
+template <typename Scalar, int N>
+struct chebevl {
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE Scalar run(Scalar x, const Scalar coef[]) {
+    Scalar b0 = coef[0];
+    Scalar b1 = 0;
+    Scalar b2;
+
+    for (int i = 1; i < N; i++) {
+      b2 = b1;
+      b1 = b0;
+      b0 = x * b1 - b2 + coef[i];
+    }
+
+    return Scalar(0.5) * (b0 - b2);
+  }
+};
+
 }  // end namespace cephes
 
 /****************************************************************************
@@ -1505,6 +1574,334 @@ struct betainc_impl<double> {
   }
 };
 
+/****************************************************************************
+ * Implementation of Bessel function, based on Cephes                       *
+ ****************************************************************************/
+
+template <typename Scalar>
+struct i0e_retval {
+  typedef Scalar type;
+};
+
+template <typename Scalar>
+struct i0e_impl {
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
+    EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+                        THIS_TYPE_IS_NOT_SUPPORTED);
+    return Scalar(0);
+  }
+};
+
+template <>
+struct i0e_impl<float> {
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE float run(float x) {
+    /*  i0ef.c
+     *
+     *  Modified Bessel function of order zero,
+     *  exponentially scaled
+     *
+     *
+     *
+     * SYNOPSIS:
+     *
+     * float x, y, i0ef();
+     *
+     * y = i0ef( x );
+     *
+     *
+     *
+     * DESCRIPTION:
+     *
+     * Returns exponentially scaled modified Bessel function
+     * of order zero of the argument.
+     *
+     * The function is defined as i0e(x) = exp(-|x|) j0( ix ).
+     *
+     *
+     *
+     * ACCURACY:
+     *
+     *                      Relative error:
+     * arithmetic   domain     # trials      peak         rms
+     *    IEEE      0,30        100000      3.7e-7      7.0e-8
+     * See i0f().
+     *
+     */
+    const float A[] = {-1.30002500998624804212E-8f, 6.04699502254191894932E-8f,
+                       -2.67079385394061173391E-7f, 1.11738753912010371815E-6f,
+                       -4.41673835845875056359E-6f, 1.64484480707288970893E-5f,
+                       -5.75419501008210370398E-5f, 1.88502885095841655729E-4f,
+                       -5.76375574538582365885E-4f, 1.63947561694133579842E-3f,
+                       -4.32430999505057594430E-3f, 1.05464603945949983183E-2f,
+                       -2.37374148058994688156E-2f, 4.93052842396707084878E-2f,
+                       -9.49010970480476444210E-2f, 1.71620901522208775349E-1f,
+                       -3.04682672343198398683E-1f, 6.76795274409476084995E-1f};
+
+    const float B[] = {3.39623202570838634515E-9f, 2.26666899049817806459E-8f,
+                       2.04891858946906374183E-7f, 2.89137052083475648297E-6f,
+                       6.88975834691682398426E-5f, 3.36911647825569408990E-3f,
+                       8.04490411014108831608E-1f};
+    if (x < 0.0f) {
+      x = -x;
+    }
+
+    if (x <= 8.0f) {
+      float y = 0.5f * x - 2.0f;
+      return cephes::chebevl<float, 18>::run(y, A);
+    }
+
+    return cephes::chebevl<float, 7>::run(32.0f / x - 2.0f, B) / numext::sqrt(x);
+  }
+};
+
+template <>
+struct i0e_impl<double> {
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE double run(double x) {
+    /*  i0e.c
+     *
+     *  Modified Bessel function of order zero,
+     *  exponentially scaled
+     *
+     *
+     *
+     * SYNOPSIS:
+     *
+     * double x, y, i0e();
+     *
+     * y = i0e( x );
+     *
+     *
+     *
+     * DESCRIPTION:
+     *
+     * Returns exponentially scaled modified Bessel function
+     * of order zero of the argument.
+     *
+     * The function is defined as i0e(x) = exp(-|x|) j0( ix ).
+     *
+     *
+     *
+     * ACCURACY:
+     *
+     *                      Relative error:
+     * arithmetic   domain     # trials      peak         rms
+     *    IEEE      0,30        30000       5.4e-16     1.2e-16
+     * See i0().
+     *
+     */
+    const double A[] = {-4.41534164647933937950E-18, 3.33079451882223809783E-17,
+                        -2.43127984654795469359E-16, 1.71539128555513303061E-15,
+                        -1.16853328779934516808E-14, 7.67618549860493561688E-14,
+                        -4.85644678311192946090E-13, 2.95505266312963983461E-12,
+                        -1.72682629144155570723E-11, 9.67580903537323691224E-11,
+                        -5.18979560163526290666E-10, 2.65982372468238665035E-9,
+                        -1.30002500998624804212E-8,  6.04699502254191894932E-8,
+                        -2.67079385394061173391E-7,  1.11738753912010371815E-6,
+                        -4.41673835845875056359E-6,  1.64484480707288970893E-5,
+                        -5.75419501008210370398E-5,  1.88502885095841655729E-4,
+                        -5.76375574538582365885E-4,  1.63947561694133579842E-3,
+                        -4.32430999505057594430E-3,  1.05464603945949983183E-2,
+                        -2.37374148058994688156E-2,  4.93052842396707084878E-2,
+                        -9.49010970480476444210E-2,  1.71620901522208775349E-1,
+                        -3.04682672343198398683E-1,  6.76795274409476084995E-1};
+    const double B[] = {
+        -7.23318048787475395456E-18, -4.83050448594418207126E-18,
+        4.46562142029675999901E-17,  3.46122286769746109310E-17,
+        -2.82762398051658348494E-16, -3.42548561967721913462E-16,
+        1.77256013305652638360E-15,  3.81168066935262242075E-15,
+        -9.55484669882830764870E-15, -4.15056934728722208663E-14,
+        1.54008621752140982691E-14,  3.85277838274214270114E-13,
+        7.18012445138366623367E-13,  -1.79417853150680611778E-12,
+        -1.32158118404477131188E-11, -3.14991652796324136454E-11,
+        1.18891471078464383424E-11,  4.94060238822496958910E-10,
+        3.39623202570838634515E-9,   2.26666899049817806459E-8,
+        2.04891858946906374183E-7,   2.89137052083475648297E-6,
+        6.88975834691682398426E-5,   3.36911647825569408990E-3,
+        8.04490411014108831608E-1};
+
+    if (x < 0.0) {
+      x = -x;
+    }
+
+    if (x <= 8.0) {
+      double y = (x / 2.0) - 2.0;
+      return cephes::chebevl<double, 30>::run(y, A);
+    }
+
+    return cephes::chebevl<double, 25>::run(32.0 / x - 2.0, B) /
+           numext::sqrt(x);
+  }
+};
+
+template <typename Scalar>
+struct i1e_retval {
+  typedef Scalar type;
+};
+
+template <typename Scalar>
+struct i1e_impl {
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
+    EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+                        THIS_TYPE_IS_NOT_SUPPORTED);
+    return Scalar(0);
+  }
+};
+
+template <>
+struct i1e_impl<float> {
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE float run(float x) {
+    /* i1ef.c
+     *
+     *  Modified Bessel function of order one,
+     *  exponentially scaled
+     *
+     *
+     *
+     * SYNOPSIS:
+     *
+     * float x, y, i1ef();
+     *
+     * y = i1ef( x );
+     *
+     *
+     *
+     * DESCRIPTION:
+     *
+     * Returns exponentially scaled modified Bessel function
+     * of order one of the argument.
+     *
+     * The function is defined as i1(x) = -i exp(-|x|) j1( ix ).
+     *
+     *
+     *
+     * ACCURACY:
+     *
+     *                      Relative error:
+     * arithmetic   domain     # trials      peak         rms
+     *    IEEE      0, 30       30000       1.5e-6      1.5e-7
+     * See i1().
+     *
+     */
+    const float A[] = {9.38153738649577178388E-9f, -4.44505912879632808065E-8f,
+                       2.00329475355213526229E-7f, -8.56872026469545474066E-7f,
+                       3.47025130813767847674E-6f, -1.32731636560394358279E-5f,
+                       4.78156510755005422638E-5f, -1.61760815825896745588E-4f,
+                       5.12285956168575772895E-4f, -1.51357245063125314899E-3f,
+                       4.15642294431288815669E-3f, -1.05640848946261981558E-2f,
+                       2.47264490306265168283E-2f, -5.29459812080949914269E-2f,
+                       1.02643658689847095384E-1f, -1.76416518357834055153E-1f,
+                       2.52587186443633654823E-1f};
+
+    const float B[] = {-3.83538038596423702205E-9f, -2.63146884688951950684E-8f,
+                       -2.51223623787020892529E-7f, -3.88256480887769039346E-6f,
+                       -1.10588938762623716291E-4f, -9.76109749136146840777E-3f,
+                       7.78576235018280120474E-1f};
+
+    float z = numext::abs(x);
+
+    if (z <= 8.0f) {
+      float y = 0.5f * z - 2.0f;
+      z = cephes::chebevl<float, 17>::run(y, A) * z;
+    } else {
+      z = cephes::chebevl<float, 7>::run(32.0f / z - 2.0f, B) / numext::sqrt(z);
+    }
+
+    if (x < 0.0f) {
+      z = -z;
+    }
+
+    return z;
+  }
+};
+
+template <>
+struct i1e_impl<double> {
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE double run(double x) {
+    /*  i1e.c
+     *
+     *  Modified Bessel function of order one,
+     *  exponentially scaled
+     *
+     *
+     *
+     * SYNOPSIS:
+     *
+     * double x, y, i1e();
+     *
+     * y = i1e( x );
+     *
+     *
+     *
+     * DESCRIPTION:
+     *
+     * Returns exponentially scaled modified Bessel function
+     * of order one of the argument.
+     *
+     * The function is defined as i1(x) = -i exp(-|x|) j1( ix ).
+     *
+     *
+     *
+     * ACCURACY:
+     *
+     *                      Relative error:
+     * arithmetic   domain     # trials      peak         rms
+     *    IEEE      0, 30       30000       2.0e-15     2.0e-16
+     * See i1().
+     *
+     */
+    const double A[] = {2.77791411276104639959E-18, -2.11142121435816608115E-17,
+                        1.55363195773620046921E-16, -1.10559694773538630805E-15,
+                        7.60068429473540693410E-15, -5.04218550472791168711E-14,
+                        3.22379336594557470981E-13, -1.98397439776494371520E-12,
+                        1.17361862988909016308E-11, -6.66348972350202774223E-11,
+                        3.62559028155211703701E-10, -1.88724975172282928790E-9,
+                        9.38153738649577178388E-9,  -4.44505912879632808065E-8,
+                        2.00329475355213526229E-7,  -8.56872026469545474066E-7,
+                        3.47025130813767847674E-6,  -1.32731636560394358279E-5,
+                        4.78156510755005422638E-5,  -1.61760815825896745588E-4,
+                        5.12285956168575772895E-4,  -1.51357245063125314899E-3,
+                        4.15642294431288815669E-3,  -1.05640848946261981558E-2,
+                        2.47264490306265168283E-2,  -5.29459812080949914269E-2,
+                        1.02643658689847095384E-1,  -1.76416518357834055153E-1,
+                        2.52587186443633654823E-1};
+    const double B[] = {
+        7.51729631084210481353E-18,  4.41434832307170791151E-18,
+        -4.65030536848935832153E-17, -3.20952592199342395980E-17,
+        2.96262899764595013876E-16,  3.30820231092092828324E-16,
+        -1.88035477551078244854E-15, -3.81440307243700780478E-15,
+        1.04202769841288027642E-14,  4.27244001671195135429E-14,
+        -2.10154184277266431302E-14, -4.08355111109219731823E-13,
+        -7.19855177624590851209E-13, 2.03562854414708950722E-12,
+        1.41258074366137813316E-11,  3.25260358301548823856E-11,
+        -1.89749581235054123450E-11, -5.58974346219658380687E-10,
+        -3.83538038596423702205E-9,  -2.63146884688951950684E-8,
+        -2.51223623787020892529E-7,  -3.88256480887769039346E-6,
+        -1.10588938762623716291E-4,  -9.76109749136146840777E-3,
+        7.78576235018280120474E-1};
+
+    double z = numext::abs(x);
+
+    if (z <= 8.0) {
+      double y = (z / 2.0) - 2.0;
+      z = cephes::chebevl<double, 29>::run(y, A) * z;
+    } else {
+      z = cephes::chebevl<double, 25>::run(32.0 / z - 2.0, B) / numext::sqrt(z);
+    }
+
+    if (x < 0.0) {
+      z = -z;
+    }
+
+    return z;
+  }
+};
+
 #endif  // EIGEN_HAS_C99_MATH
 
 }  // end namespace internal
@@ -1565,6 +1962,18 @@ EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(betainc, Scalar)
   return EIGEN_MATHFUNC_IMPL(betainc, Scalar)::run(a, b, x);
 }
 
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(i0e, Scalar)
+    i0e(const Scalar& x) {
+  return EIGEN_MATHFUNC_IMPL(i0e, Scalar)::run(x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(i1e, Scalar)
+    i1e(const Scalar& x) {
+  return EIGEN_MATHFUNC_IMPL(i1e, Scalar)::run(x);
+}
+
 }  // end namespace numext
 
 
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h
index 46d60d323..4c176716b 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h
@@ -50,6 +50,18 @@ Packet pigammac(const Packet& a, const Packet& x) { using numext::igammac; retur
 template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 Packet pbetainc(const Packet& a, const Packet& b,const Packet& x) { using numext::betainc; return betainc(a, b, x); }
 
+/** \internal \returns the exponentially scaled modified Bessel function of
+ * order zero i0e(\a a) (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pi0e(const Packet& x) { using numext::i0e; return i0e(x); }
+
+/** \internal \returns the exponentially scaled modified Bessel function of
+ * order one i1e(\a a) (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pi1e(const Packet& x) { using numext::i1e; return i1e(x); }
+
 } // end namespace internal
 
 } // end namespace Eigen
diff --git a/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h b/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h
index e0e3a8be6..c25fea0b3 100644
--- a/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h
+++ b/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h
@@ -156,6 +156,32 @@ double2 pbetainc<double2>(const double2& a, const double2& b, const double2& x)
   return make_double2(betainc(a.x, b.x, x.x), betainc(a.y, b.y, x.y));
 }
 
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pi0e<float4>(const float4& x) {
+  using numext::i0e;
+  return make_float4(i0e(x.x), i0e(x.y), i0e(x.z), i0e(x.w));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
+pi0e<double2>(const double2& x) {
+  using numext::i0e;
+  return make_double2(i0e(x.x), i0e(x.y));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pi1e<float4>(const float4& x) {
+  using numext::i1e;
+  return make_float4(i1e(x.x), i1e(x.y), i1e(x.z), i1e(x.w));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
+pi1e<double2>(const double2& x) {
+  using numext::i1e;
+  return make_double2(i1e(x.x), i1e(x.y));
+}
+
 #endif
 
 } // end namespace internal
diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu
index 9584a539f..63d0a345a 100644
--- a/unsupported/test/cxx11_tensor_cuda.cu
+++ b/unsupported/test/cxx11_tensor_cuda.cu
@@ -1208,6 +1208,116 @@ void test_cuda_betainc()
   cudaFree(d_out);
 }
 
+template <typename Scalar>
+void test_cuda_i0e()
+{
+  Tensor<Scalar, 1> in_x(21);
+  Tensor<Scalar, 1> out(21);
+  Tensor<Scalar, 1> expected_out(21);
+  out.setZero();
+
+  Array<Scalar, 1, Dynamic> in_x_array(21);
+  Array<Scalar, 1, Dynamic> expected_out_array(21);
+
+  in_x_array << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0,
+      -2.0, 0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0;
+
+  expected_out_array << 0.0897803118848, 0.0947062952128, 0.100544127361,
+      0.107615251671, 0.116426221213, 0.127833337163, 0.143431781857,
+      0.16665743264, 0.207001921224, 0.308508322554, 1.0, 0.308508322554,
+      0.207001921224, 0.16665743264, 0.143431781857, 0.127833337163,
+      0.116426221213, 0.107615251671, 0.100544127361, 0.0947062952128,
+      0.0897803118848;
+
+  for (int i = 0; i < 21; ++i) {
+    in_x(i) = in_x_array(i);
+    expected_out(i) = expected_out_array(i);
+  }
+
+  std::size_t bytes = in_x.size() * sizeof(Scalar);
+
+  Scalar* d_in;
+  Scalar* d_out;
+  cudaMalloc((void**)(&d_in), bytes);
+  cudaMalloc((void**)(&d_out), bytes);
+
+  cudaMemcpy(d_in, in_x.data(), bytes, cudaMemcpyHostToDevice);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in(d_in, 21);
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 21);
+
+  gpu_out.device(gpu_device) = gpu_in.i0e();
+
+  assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost,
+                         gpu_device.stream()) == cudaSuccess);
+  assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+  for (int i = 0; i < 21; ++i) {
+    VERIFY_IS_APPROX(out(i), expected_out(i));
+  }
+
+  cudaFree(d_in);
+  cudaFree(d_out);
+}
+
+template <typename Scalar>
+void test_cuda_i1e()
+{
+  Tensor<Scalar, 1> in_x(21);
+  Tensor<Scalar, 1> out(21);
+  Tensor<Scalar, 1> expected_out(21);
+  out.setZero();
+
+  Array<Scalar, 1, Dynamic> in_x_array(21);
+  Array<Scalar, 1, Dynamic> expected_out_array(21);
+
+  in_x_array << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0,
+      -2.0, 0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0;
+
+  expected_out_array << -0.0875062221833, -0.092036796872, -0.0973496147565,
+      -0.103697667463, -0.11146429929, -0.121262681384, -0.134142493293,
+      -0.152051459309, -0.178750839502, -0.215269289249, 0.0, 0.215269289249,
+      0.178750839502, 0.152051459309, 0.134142493293, 0.121262681384,
+      0.11146429929, 0.103697667463, 0.0973496147565, 0.092036796872,
+      0.0875062221833;
+
+  for (int i = 0; i < 21; ++i) {
+    in_x(i) = in_x_array(i);
+    expected_out(i) = expected_out_array(i);
+  }
+
+  std::size_t bytes = in_x.size() * sizeof(Scalar);
+
+  Scalar* d_in;
+  Scalar* d_out;
+  cudaMalloc((void**)(&d_in), bytes);
+  cudaMalloc((void**)(&d_out), bytes);
+
+  cudaMemcpy(d_in, in_x.data(), bytes, cudaMemcpyHostToDevice);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in(d_in, 21);
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 21);
+
+  gpu_out.device(gpu_device) = gpu_in.i1e();
+
+  assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost,
+                         gpu_device.stream()) == cudaSuccess);
+  assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+  for (int i = 0; i < 21; ++i) {
+    VERIFY_IS_APPROX(out(i), expected_out(i));
+  }
+
+  cudaFree(d_in);
+  cudaFree(d_out);
+}
+
 
 void test_cxx11_tensor_cuda()
 {
@@ -1280,5 +1390,11 @@ void test_cxx11_tensor_cuda()
 
   CALL_SUBTEST_6(test_cuda_betainc<float>());
   CALL_SUBTEST_6(test_cuda_betainc<double>());
+
+  CALL_SUBTEST_6(test_cuda_i0e<float>());
+  CALL_SUBTEST_6(test_cuda_i0e<double>());
+
+  CALL_SUBTEST_6(test_cuda_i1e<float>());
+  CALL_SUBTEST_6(test_cuda_i1e<double>());
 #endif
 }
diff --git a/unsupported/test/special_functions.cpp b/unsupported/test/special_functions.cpp
index 057fb3e92..48d0db95e 100644
--- a/unsupported/test/special_functions.cpp
+++ b/unsupported/test/special_functions.cpp
@@ -335,6 +335,46 @@ template<typename ArrayType> void array_special_functions()
         ArrayType test = betainc(a, b + one, x) + eps;
         verify_component_wise(test, expected););
   }
+
+  // Test Bessel function i0e. Reference results obtained with SciPy.
+  {
+    ArrayType x(21);
+    ArrayType expected(21);
+    ArrayType res(21);
+
+    x << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0, -2.0, 0.0,
+        2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0;
+
+    expected << 0.0897803118848, 0.0947062952128, 0.100544127361,
+        0.107615251671, 0.116426221213, 0.127833337163, 0.143431781857,
+        0.16665743264, 0.207001921224, 0.308508322554, 1.0, 0.308508322554,
+        0.207001921224, 0.16665743264, 0.143431781857, 0.127833337163,
+        0.116426221213, 0.107615251671, 0.100544127361, 0.0947062952128,
+        0.0897803118848;
+
+    CALL_SUBTEST(res = i0e(x);
+                 verify_component_wise(res, expected););
+  }
+
+  // Test Bessel function i1e. Reference results obtained with SciPy.
+  {
+    ArrayType x(21);
+    ArrayType expected(21);
+    ArrayType res(21);
+
+    x << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0, -2.0, 0.0,
+        2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0;
+
+    expected << -0.0875062221833, -0.092036796872, -0.0973496147565,
+        -0.103697667463, -0.11146429929, -0.121262681384, -0.134142493293,
+        -0.152051459309, -0.178750839502, -0.215269289249, 0.0, 0.215269289249,
+        0.178750839502, 0.152051459309, 0.134142493293, 0.121262681384,
+        0.11146429929, 0.103697667463, 0.0973496147565, 0.092036796872,
+        0.0875062221833;
+
+    CALL_SUBTEST(res = i1e(x);
+                 verify_component_wise(res, expected););
+  }
 #endif
 }
author	Benoit Steiner <benoit.steiner.goog@gmail.com>	2018-06-05 17:05:21 +0000
committer	Benoit Steiner <benoit.steiner.goog@gmail.com>	2018-06-05 17:05:21 +0000
commit	e206f8d4a401fe2060bada4d4b5d92e3bf3b561c (patch)
tree	c5613a06e00edbb6a5c804ee39858911b87ef1e6
parent	e2ed0cf8abfe275bd147f3228e4c59f5242993f6 (diff)
parent	f216854453887f31ac02ffefb7a7a569dc3fa54d (diff)