From bd0fac456f8ba4fa980a1cbca4b86ac207b82751 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Tue, 1 Oct 2019 13:15:30 -0700 Subject: Prevent infinite loop in the nvcc compiler while unrolling the recurrent templates for Chebyshev polynomial evaluation. --- .../Core/arch/Default/GenericPacketMathFunctions.h | 43 ++++++---------------- 1 file changed, 11 insertions(+), 32 deletions(-) (limited to 'Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h') diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index 0a4b66089..a354fb5fe 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h @@ -621,43 +621,22 @@ struct ppolevl { * the same degree. * */ -template -struct generic_cheb_recurrence { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Packet run(Packet x, const typename unpacket_traits::type coef[]) { - EIGEN_STATIC_ASSERT((N > 2), YOU_MADE_A_PROGRAMMING_MISTAKE); - return pmadd( - generic_cheb_recurrence::run(x, coef), x, - psub(pset1(coef[N - 1]), generic_cheb_recurrence::run(x, coef))); - } -}; - -template -struct generic_cheb_recurrence { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Packet run(Packet x, const typename unpacket_traits::type coef[]) { - return pmadd(pset1(coef[0]), x, pset1(coef[1])); - } -}; - -template -struct generic_cheb_recurrence { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Packet run(Packet x, const typename unpacket_traits::type coef[]) { - EIGEN_UNUSED_VARIABLE(x); - return pset1(coef[0]); - } -}; template struct pchebevl { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet run(Packet x, const typename unpacket_traits::type coef[]) { - const Packet half = pset1(0.5); - return pmul(half, psub( - generic_cheb_recurrence::run(x, coef), - generic_cheb_recurrence::run(x, coef))); + Packet b0 = pset1(coef[0]); + Packet b1 = pset1(0.f); + Packet b2; + + for (int i = 1; i < N; i++) { + b2 = b1; + b1 = b0; + b0 = padd(psub(pmul(x, b1), b2), pset1(coef[i])); + } + + return pmul(pset1(0.5f), psub(b0, b2)); } }; -- cgit v1.2.3