diff options
author | Justin Lebar <justin.lebar@gmail.com> | 2016-04-28 13:57:08 -0700 |
---|---|---|
committer | Justin Lebar <justin.lebar@gmail.com> | 2016-04-28 13:57:08 -0700 |
commit | 40d1e2f8c7a85b7c0522d11d6e3d0c6a18bc9721 (patch) | |
tree | e19334d23d57e3a72d8e3431c409eaa48187ed50 /Eigen/src | |
parent | 3ec81fc00f447ce47df079c66450c680155ceeb4 (diff) |
Eliminate mutual recursion in igamma{,c}_impl::Run.
Presently, igammac_impl::Run calls igamma_impl::Run, which in turn calls
igammac_impl::Run.
This isn't actually mutual recursion; the calls are guarded such that we never
get into a loop. Nonetheless, it's a stretch for clang to prove this. As a
result, clang emits a recursive call in both igammac_impl::Run and
igamma_impl::Run.
That this is suboptimal code is bad enough, but it's particularly bad when
compiling for CUDA/nvptx. nvptx allows recursion, but only begrudgingly: If
you have recursive calls in a kernel, it's on you to manually specify the
kernel's stack size. Otherwise, ptxas will dump a warning, make a guess, and
who knows if it's right.
This change explicitly eliminates the mutual recursion in igammac_impl::Run and
igamma_impl::Run.
Diffstat (limited to 'Eigen/src')
-rw-r--r-- | Eigen/src/Core/SpecialFunctions.h | 83 |
1 files changed, 67 insertions, 16 deletions
diff --git a/Eigen/src/Core/SpecialFunctions.h b/Eigen/src/Core/SpecialFunctions.h index a3857ae1f..10ff4371e 100644 --- a/Eigen/src/Core/SpecialFunctions.h +++ b/Eigen/src/Core/SpecialFunctions.h @@ -517,26 +517,51 @@ struct igammac_impl { */ const Scalar zero = 0; const Scalar one = 1; + const Scalar nan = NumTraits<Scalar>::quiet_NaN(); + + if ((x < zero) || (a <= zero)) { + // domain error + return nan; + } + + if ((x < one) || (x < a)) { + /* The checks above ensure that we meet the preconditions for + * igamma_impl::Impl(), so call it, rather than igamma_impl::Run(). + * Calling Run() would also work, but in that case the compiler may not be + * able to prove that igammac_impl::Run and igamma_impl::Run are not + * mutually recursive. This leads to worse code, particularly on + * platforms like nvptx, where recursion is allowed only begrudgingly. + */ + return (one - igamma_impl<Scalar>::Impl(a, x)); + } + + return Impl(a, x); + } + + private: + /* igamma_impl calls igammac_impl::Impl. */ + friend struct igamma_impl<Scalar>; + + /* Actually computes igamc(a, x). + * + * Preconditions: + * a > 0 + * x >= 1 + * x >= a + */ + static Scalar Impl(Scalar a, Scalar x) { + const Scalar zero = 0; + const Scalar one = 1; const Scalar two = 2; const Scalar machep = igamma_helper<Scalar>::machep(); const Scalar maxlog = numext::log(NumTraits<Scalar>::highest()); const Scalar big = igamma_helper<Scalar>::big(); const Scalar biginv = 1 / big; - const Scalar nan = NumTraits<Scalar>::quiet_NaN(); const Scalar inf = NumTraits<Scalar>::infinity(); Scalar ans, ax, c, yc, r, t, y, z; Scalar pk, pkm1, pkm2, qk, qkm1, qkm2; - if ((x < zero) || ( a <= zero)) { - // domain error - return nan; - } - - if ((x < one) || (x < a)) { - return (one - igamma_impl<Scalar>::run(a, x)); - } - if (x == inf) return zero; // std::isinf crashes on CUDA /* Compute x**a * exp(-x) / gamma(a) */ @@ -678,22 +703,48 @@ struct igamma_impl { */ const Scalar zero = 0; const Scalar one = 1; - const Scalar machep = igamma_helper<Scalar>::machep(); - const Scalar maxlog = numext::log(NumTraits<Scalar>::highest()); const Scalar nan = NumTraits<Scalar>::quiet_NaN(); - double ans, ax, c, r; - if (x == zero) return zero; - if ((x < zero) || ( a <= zero)) { // domain error + if ((x < zero) || (a <= zero)) { // domain error return nan; } if ((x > one) && (x > a)) { - return (one - igammac_impl<Scalar>::run(a, x)); + /* The checks above ensure that we meet the preconditions for + * igammac_impl::Impl(), so call it, rather than igammac_impl::Run(). + * Calling Run() would also work, but in that case the compiler may not be + * able to prove that igammac_impl::Run and igamma_impl::Run are not + * mutually recursive. This leads to worse code, particularly on + * platforms like nvptx, where recursion is allowed only begrudgingly. + */ + return (one - igammac_impl<Scalar>::Impl(a, x)); } + return Impl(a, x); + } + + private: + /* igammac_impl calls igamma_impl::Impl. */ + friend struct igammac_impl<Scalar>; + + /* Actually computes igam(a, x). + * + * Preconditions: + * x > 0 + * a > 0 + * !(x > 1 && x > a) + */ + static Scalar Impl(Scalar a, Scalar x) { + const Scalar zero = 0; + const Scalar one = 1; + const Scalar machep = igamma_helper<Scalar>::machep(); + const Scalar maxlog = numext::log(NumTraits<Scalar>::highest()); + const Scalar nan = NumTraits<Scalar>::quiet_NaN(); + + double ans, ax, c, r; + /* Compute x**a * exp(-x) / gamma(a) */ ax = a * numext::log(x) - x - lgamma_impl<Scalar>::run(a); if (ax < -maxlog) { |