aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--Eigen/src/Core/arch/AVX/PacketMath.h2
-rw-r--r--Eigen/src/Core/arch/AVX512/PacketMath.h2
-rw-r--r--Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h91
-rwxr-xr-xEigen/src/Core/arch/SSE/PacketMath.h3
-rw-r--r--Eigen/src/Core/util/ForwardDeclarations.h2
-rw-r--r--test/packetmath.cpp2
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h229
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h10
8 files changed, 196 insertions, 145 deletions
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index e3363d006..2e5f5e5bc 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -73,6 +73,8 @@ template<> struct packet_traits<float> : default_packet_traits
HasExpm1 = 1,
HasExp = 1,
HasNdtri = 1,
+ HasI0e = 1,
+ HasI1e = 1,
HasSqrt = 1,
HasRsqrt = 1,
HasTanh = EIGEN_FAST_MATH,
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index 11c8dae02..67e667640 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -99,6 +99,8 @@ template<> struct packet_traits<float> : default_packet_traits
HasExpm1 = 1,
HasNdtri = 1,
#endif
+ HasI0e = 1,
+ HasI1e = 1,
HasExp = 1,
HasSqrt = EIGEN_FAST_MATH,
HasRsqrt = EIGEN_FAST_MATH,
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index 518db2207..0a4b66089 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@@ -570,6 +570,97 @@ struct ppolevl<Packet, 0> {
}
};
+/* chbevl (modified for Eigen)
+ *
+ * Evaluate Chebyshev series
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * int N;
+ * Scalar x, y, coef[N], chebevl();
+ *
+ * y = chbevl( x, coef, N );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Evaluates the series
+ *
+ * N-1
+ * - '
+ * y = > coef[i] T (x/2)
+ * - i
+ * i=0
+ *
+ * of Chebyshev polynomials Ti at argument x/2.
+ *
+ * Coefficients are stored in reverse order, i.e. the zero
+ * order term is last in the array. Note N is the number of
+ * coefficients, not the order.
+ *
+ * If coefficients are for the interval a to b, x must
+ * have been transformed to x -> 2(2x - b - a)/(b-a) before
+ * entering the routine. This maps x from (a, b) to (-1, 1),
+ * over which the Chebyshev polynomials are defined.
+ *
+ * If the coefficients are for the inverted interval, in
+ * which (a, b) is mapped to (1/b, 1/a), the transformation
+ * required is x -> 2(2ab/x - b - a)/(b-a). If b is infinity,
+ * this becomes x -> 4a/x - 1.
+ *
+ *
+ *
+ * SPEED:
+ *
+ * Taking advantage of the recurrence properties of the
+ * Chebyshev polynomials, the routine requires one more
+ * addition per loop than evaluating a nested polynomial of
+ * the same degree.
+ *
+ */
+template <typename Packet, int N>
+struct generic_cheb_recurrence {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Packet run(Packet x, const typename unpacket_traits<Packet>::type coef[]) {
+ EIGEN_STATIC_ASSERT((N > 2), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ return pmadd(
+ generic_cheb_recurrence<Packet, N - 1>::run(x, coef), x,
+ psub(pset1<Packet>(coef[N - 1]), generic_cheb_recurrence<Packet, N -
+ 2>::run(x, coef)));
+ }
+};
+
+template <typename Packet>
+struct generic_cheb_recurrence<Packet, 2> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Packet run(Packet x, const typename unpacket_traits<Packet>::type coef[]) {
+ return pmadd(pset1<Packet>(coef[0]), x, pset1<Packet>(coef[1]));
+ }
+};
+
+template <typename Packet>
+struct generic_cheb_recurrence<Packet, 1> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Packet run(Packet x, const typename unpacket_traits<Packet>::type coef[]) {
+ EIGEN_UNUSED_VARIABLE(x);
+ return pset1<Packet>(coef[0]);
+ }
+};
+
+template <typename Packet, int N>
+struct pchebevl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Packet run(Packet x, const typename unpacket_traits<Packet>::type coef[]) {
+ const Packet half = pset1<Packet>(0.5);
+ return pmul(half, psub(
+ generic_cheb_recurrence<Packet, N>::run(x, coef),
+ generic_cheb_recurrence<Packet, N - 2>::run(x, coef)));
+ }
+};
+
} // end namespace internal
} // end namespace Eigen
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index ddd2979af..0aadefab7 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -114,7 +114,8 @@ template<> struct packet_traits<float> : default_packet_traits
HasExpm1 = 1,
HasNdtri = 1,
HasExp = 1,
- HasNdtri = 1,
+ HasI0e = 1,
+ HasI1e = 1,
HasSqrt = 1,
HasRsqrt = 1,
HasTanh = EIGEN_FAST_MATH,
diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h
index 18889fcf4..749945031 100644
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@@ -215,6 +215,8 @@ template<typename Scalar> struct scalar_digamma_op;
template<typename Scalar> struct scalar_erf_op;
template<typename Scalar> struct scalar_erfc_op;
template<typename Scalar> struct scalar_ndtri_op;
+template<typename Scalar> struct scalar_i0e_op;
+template<typename Scalar> struct scalar_i1e_op;
template<typename Scalar> struct scalar_igamma_op;
template<typename Scalar> struct scalar_igammac_op;
template<typename Scalar> struct scalar_zeta_op;
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index 0e1e50e21..05739a31b 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -609,6 +609,8 @@ template<typename Scalar,typename Packet> void packetmath_real()
CHECK_CWISE1_IF(PacketTraits::HasSqrt, std::sqrt, internal::psqrt);
CHECK_CWISE1_IF(PacketTraits::HasSqrt, Scalar(1)/std::sqrt, internal::prsqrt);
CHECK_CWISE1_IF(PacketTraits::HasLog, std::log, internal::plog);
+ CHECK_CWISE1_IF(PacketTraits::HasI0e, numext::i0e, internal::pi0e);
+ CHECK_CWISE1_IF(PacketTraits::HasI1e, numext::i1e, internal::pi1e);
#if EIGEN_HAS_C99_MATH && (__cplusplus > 199711L)
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasLGamma, std::lgamma, internal::plgamma);
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasErf, std::erf, internal::perf);
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
index a9bc205ab..7c6d32049 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
@@ -36,78 +36,6 @@ namespace internal {
// Good luck with your project,
// Steve
-namespace cephes {
-
-/* chbevl (modified for Eigen)
- *
- * Evaluate Chebyshev series
- *
- *
- *
- * SYNOPSIS:
- *
- * int N;
- * Scalar x, y, coef[N], chebevl();
- *
- * y = chbevl( x, coef, N );
- *
- *
- *
- * DESCRIPTION:
- *
- * Evaluates the series
- *
- * N-1
- * - '
- * y = > coef[i] T (x/2)
- * - i
- * i=0
- *
- * of Chebyshev polynomials Ti at argument x/2.
- *
- * Coefficients are stored in reverse order, i.e. the zero
- * order term is last in the array. Note N is the number of
- * coefficients, not the order.
- *
- * If coefficients are for the interval a to b, x must
- * have been transformed to x -> 2(2x - b - a)/(b-a) before
- * entering the routine. This maps x from (a, b) to (-1, 1),
- * over which the Chebyshev polynomials are defined.
- *
- * If the coefficients are for the inverted interval, in
- * which (a, b) is mapped to (1/b, 1/a), the transformation
- * required is x -> 2(2ab/x - b - a)/(b-a). If b is infinity,
- * this becomes x -> 4a/x - 1.
- *
- *
- *
- * SPEED:
- *
- * Taking advantage of the recurrence properties of the
- * Chebyshev polynomials, the routine requires one more
- * addition per loop than evaluating a nested polynomial of
- * the same degree.
- *
- */
-template <typename Scalar, int N>
-struct chebevl {
- EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE Scalar run(Scalar x, const Scalar coef[]) {
- Scalar b0 = coef[0];
- Scalar b1 = 0;
- Scalar b2;
-
- for (int i = 1; i < N; i++) {
- b2 = b1;
- b1 = b0;
- b0 = x * b1 - b2 + coef[i];
- }
-
- return Scalar(0.5) * (b0 - b2);
- }
-};
-
-} // end namespace cephes
/****************************************************************************
* Implementation of lgamma, requires C++11/C99 *
@@ -1945,20 +1873,20 @@ struct i0e_retval {
typedef Scalar type;
};
-template <typename Scalar>
-struct i0e_impl {
+template <typename T, typename ScalarType>
+struct generic_i0e {
EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
- EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+ static EIGEN_STRONG_INLINE T run(const T&) {
+ EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
THIS_TYPE_IS_NOT_SUPPORTED);
- return Scalar(0);
+ return ScalarType(0);
}
};
-template <>
-struct i0e_impl<float> {
+template <typename T>
+struct generic_i0e<T, float> {
EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE float run(float x) {
+ static EIGEN_STRONG_INLINE T run(const T& x) {
/* i0ef.c
*
* Modified Bessel function of order zero,
@@ -1991,6 +1919,7 @@ struct i0e_impl<float> {
* See i0f().
*
*/
+
const float A[] = {-1.30002500998624804212E-8f, 6.04699502254191894932E-8f,
-2.67079385394061173391E-7f, 1.11738753912010371815E-6f,
-4.41673835845875056359E-6f, 1.64484480707288970893E-5f,
@@ -2005,23 +1934,24 @@ struct i0e_impl<float> {
2.04891858946906374183E-7f, 2.89137052083475648297E-6f,
6.88975834691682398426E-5f, 3.36911647825569408990E-3f,
8.04490411014108831608E-1f};
- if (x < 0.0f) {
- x = -x;
- }
-
- if (x <= 8.0f) {
- float y = 0.5f * x - 2.0f;
- return cephes::chebevl<float, 18>::run(y, A);
- }
-
- return cephes::chebevl<float, 7>::run(32.0f / x - 2.0f, B) / numext::sqrt(x);
+ T y = pabs(x);
+ T y_le_eight = internal::pchebevl<T, 18>::run(
+ pmadd(pset1<T>(0.5f), y, pset1<T>(-2.0f)), A);
+ T y_gt_eight = pdiv(
+ internal::pchebevl<T, 7>::run(
+ psub(pdiv(pset1<T>(32.0f), y), pset1<T>(2.0f)), B),
+ psqrt(y));
+ // TODO: Perhaps instead check whether all packet elements are in
+ // [-8, 8] and evaluate a branch based off of that. It's possible
+ // in practice most elements are in this region.
+ return pselect(pcmp_le(y, pset1<T>(8.0f)), y_le_eight, y_gt_eight);
}
};
-template <>
-struct i0e_impl<double> {
+template <typename T>
+struct generic_i0e<T, double> {
EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE double run(double x) {
+ static EIGEN_STRONG_INLINE T run(const T& x) {
/* i0e.c
*
* Modified Bessel function of order zero,
@@ -2054,6 +1984,7 @@ struct i0e_impl<double> {
* See i0().
*
*/
+
const double A[] = {-4.41534164647933937950E-18, 3.33079451882223809783E-17,
-2.43127984654795469359E-16, 1.71539128555513303061E-15,
-1.16853328779934516808E-14, 7.67618549860493561688E-14,
@@ -2083,40 +2014,48 @@ struct i0e_impl<double> {
2.04891858946906374183E-7, 2.89137052083475648297E-6,
6.88975834691682398426E-5, 3.36911647825569408990E-3,
8.04490411014108831608E-1};
+ T y = pabs(x);
+ T y_le_eight = internal::pchebevl<T, 30>::run(
+ pmadd(pset1<T>(0.5), y, pset1<T>(-2.0)), A);
+ T y_gt_eight = pdiv(
+ internal::pchebevl<T, 25>::run(
+ psub(pdiv(pset1<T>(32.0), y), pset1<T>(2.0)), B),
+ psqrt(y));
+ // TODO: Perhaps instead check whether all packet elements are in
+ // [-8, 8] and evaluate a branch based off of that. It's possible
+ // in practice most elements are in this region.
+ return pselect(pcmp_le(y, pset1<T>(8.0)), y_le_eight, y_gt_eight);
+ }
+};
- if (x < 0.0) {
- x = -x;
- }
-
- if (x <= 8.0) {
- double y = (x / 2.0) - 2.0;
- return cephes::chebevl<double, 30>::run(y, A);
- }
-
- return cephes::chebevl<double, 25>::run(32.0 / x - 2.0, B) /
- numext::sqrt(x);
+template <typename Scalar>
+struct i0e_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Scalar x) {
+ return generic_i0e<Scalar, Scalar>::run(x);
}
};
+
template <typename Scalar>
struct i1e_retval {
typedef Scalar type;
};
-template <typename Scalar>
-struct i1e_impl {
+template <typename T, typename ScalarType>
+struct generic_i1e {
EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
- EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+ static EIGEN_STRONG_INLINE T run(const T&) {
+ EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
THIS_TYPE_IS_NOT_SUPPORTED);
- return Scalar(0);
+ return ScalarType(0);
}
};
-template <>
-struct i1e_impl<float> {
+template <typename T>
+struct generic_i1e<T, float> {
EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE float run(float x) {
+ static EIGEN_STRONG_INLINE T run(const T& x) {
/* i1ef.c
*
* Modified Bessel function of order one,
@@ -2164,27 +2103,27 @@ struct i1e_impl<float> {
-1.10588938762623716291E-4f, -9.76109749136146840777E-3f,
7.78576235018280120474E-1f};
- float z = numext::abs(x);
-
- if (z <= 8.0f) {
- float y = 0.5f * z - 2.0f;
- z = cephes::chebevl<float, 17>::run(y, A) * z;
- } else {
- z = cephes::chebevl<float, 7>::run(32.0f / z - 2.0f, B) / numext::sqrt(z);
- }
- if (x < 0.0f) {
- z = -z;
- }
-
- return z;
+ T y = pabs(x);
+ T y_le_eight = pmul(y, internal::pchebevl<T, 17>::run(
+ pmadd(pset1<T>(0.5f), y, pset1<T>(-2.0f)), A));
+ T y_gt_eight = pdiv(
+ internal::pchebevl<T, 7>::run(
+ psub(pdiv(pset1<T>(32.0f), y),
+ pset1<T>(2.0f)), B),
+ psqrt(y));
+ // TODO: Perhaps instead check whether all packet elements are in
+ // [-8, 8] and evaluate a branch based off of that. It's possible
+ // in practice most elements are in this region.
+ y = pselect(pcmp_le(y, pset1<T>(8.0f)), y_le_eight, y_gt_eight);
+ return pselect(pcmp_lt(x, pset1<T>(0.0f)), -y, y);
}
};
-template <>
-struct i1e_impl<double> {
+template <typename T>
+struct generic_i1e<T, double> {
EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE double run(double x) {
+ static EIGEN_STRONG_INLINE T run(const T& x) {
/* i1e.c
*
* Modified Bessel function of order one,
@@ -2246,21 +2185,27 @@ struct i1e_impl<double> {
-2.51223623787020892529E-7, -3.88256480887769039346E-6,
-1.10588938762623716291E-4, -9.76109749136146840777E-3,
7.78576235018280120474E-1};
+ T y = pabs(x);
+ T y_le_eight = pmul(y, internal::pchebevl<T, 29>::run(
+ pmadd(pset1<T>(0.5), y, pset1<T>(-2.0)), A));
+ T y_gt_eight = pdiv(
+ internal::pchebevl<T, 25>::run(
+ psub(pdiv(pset1<T>(32.0), y),
+ pset1<T>(2.0)), B),
+ psqrt(y));
+ // TODO: Perhaps instead check whether all packet elements are in
+ // [-8, 8] and evaluate a branch based off of that. It's possible
+ // in practice most elements are in this region.
+ y = pselect(pcmp_le(y, pset1<T>(8.0)), y_le_eight, y_gt_eight);
+ return pselect(pcmp_lt(x, pset1<T>(0.0f)), -y, y);
+ }
+};
- double z = numext::abs(x);
-
- if (z <= 8.0) {
- double y = (z / 2.0) - 2.0;
- z = cephes::chebevl<double, 29>::run(y, A) * z;
- } else {
- z = cephes::chebevl<double, 25>::run(32.0 / z - 2.0, B) / numext::sqrt(z);
- }
-
- if (x < 0.0) {
- z = -z;
- }
-
- return z;
+template <typename Scalar>
+struct i1e_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Scalar x) {
+ return generic_i1e<Scalar, Scalar>::run(x);
}
};
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h
index b6323c4db..21908e512 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h
@@ -76,13 +76,19 @@ Packet pbetainc(const Packet& a, const Packet& b,const Packet& x) { using numext
* order zero i0e(\a a) (coeff-wise) */
template <typename Packet>
EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-Packet pi0e(const Packet& x) { using numext::i0e; return i0e(x); }
+Packet pi0e(const Packet& x) {
+ typedef typename unpacket_traits<Packet>::type ScalarType;
+ using internal::generic_i0e; return generic_i0e<Packet, ScalarType>::run(x);
+}
/** \internal \returns the exponentially scaled modified Bessel function of
* order one i1e(\a a) (coeff-wise) */
template <typename Packet>
EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-Packet pi1e(const Packet& x) { using numext::i1e; return i1e(x); }
+Packet pi1e(const Packet& x) {
+ typedef typename unpacket_traits<Packet>::type ScalarType;
+ using internal::generic_i1e; return generic_i1e<Packet, ScalarType>::run(x);
+}
} // end namespace internal