Eliminate mutual recursion in igamma{,c}_impl::Run.

Presently, igammac_impl::Run calls igamma_impl::Run, which in turn calls igammac_impl::Run. This isn't actually mutual recursion; the calls are guarded such that we never get into a loop. Nonetheless, it's a stretch for clang to prove this. As a result, clang emits a recursive call in both igammac_impl::Run and igamma_impl::Run. That this is suboptimal code is bad enough, but it's particularly bad when compiling for CUDA/nvptx. nvptx allows recursion, but only begrudgingly: If you have recursive calls in a kernel, it's on you to manually specify the kernel's stack size. Otherwise, ptxas will dump a warning, make a guess, and who knows if it's right. This change explicitly eliminates the mutual recursion in igammac_impl::Run and igamma_impl::Run.
author: Justin Lebar <justin.lebar@gmail.com> 2016-04-28 13:57:08 -0700
committer: Justin Lebar <justin.lebar@gmail.com> 2016-04-28 13:57:08 -0700
commit: 40d1e2f8c7a85b7c0522d11d6e3d0c6a18bc9721 (patch)
tree: e19334d23d57e3a72d8e3431c409eaa48187ed50 /Eigen/src
parent: 3ec81fc00f447ce47df079c66450c680155ceeb4 (diff)
1 files changed, 67 insertions, 16 deletions
diff --git a/Eigen/src/Core/SpecialFunctions.h b/Eigen/src/Core/SpecialFunctions.h
index a3857ae1f..10ff4371e 100644
--- a/Eigen/src/Core/SpecialFunctions.h
+++ b/Eigen/src/Core/SpecialFunctions.h
@@ -517,26 +517,51 @@ struct igammac_impl {
     */
     const Scalar zero = 0;
     const Scalar one = 1;
+    const Scalar nan = NumTraits<Scalar>::quiet_NaN();
+
+    if ((x < zero) || (a <= zero)) {
+      // domain error
+      return nan;
+    }
+
+    if ((x < one) || (x < a)) {
+      /* The checks above ensure that we meet the preconditions for
+       * igamma_impl::Impl(), so call it, rather than igamma_impl::Run().
+       * Calling Run() would also work, but in that case the compiler may not be
+       * able to prove that igammac_impl::Run and igamma_impl::Run are not
+       * mutually recursive.  This leads to worse code, particularly on
+       * platforms like nvptx, where recursion is allowed only begrudgingly.
+       */
+      return (one - igamma_impl<Scalar>::Impl(a, x));
+    }
+
+    return Impl(a, x);
+  }
+
+ private:
+  /* igamma_impl calls igammac_impl::Impl. */
+  friend struct igamma_impl<Scalar>;
+
+  /* Actually computes igamc(a, x).
+   *
+   * Preconditions:
+   *   a > 0
+   *   x >= 1
+   *   x >= a
+   */
+  static Scalar Impl(Scalar a, Scalar x) {
+    const Scalar zero = 0;
+    const Scalar one = 1;
     const Scalar two = 2;
     const Scalar machep = igamma_helper<Scalar>::machep();
     const Scalar maxlog = numext::log(NumTraits<Scalar>::highest());
     const Scalar big = igamma_helper<Scalar>::big();
     const Scalar biginv = 1 / big;
-    const Scalar nan = NumTraits<Scalar>::quiet_NaN();
     const Scalar inf = NumTraits<Scalar>::infinity();
 
     Scalar ans, ax, c, yc, r, t, y, z;
     Scalar pk, pkm1, pkm2, qk, qkm1, qkm2;
 
-    if ((x < zero) || ( a <= zero)) {
-      // domain error
-      return nan;
-    }
-
-    if ((x < one) || (x < a)) {
-      return (one - igamma_impl<Scalar>::run(a, x));
-    }
-
     if (x == inf) return zero;  // std::isinf crashes on CUDA
 
     /* Compute  x**a * exp(-x) / gamma(a)  */
@@ -678,22 +703,48 @@ struct igamma_impl {
      */
     const Scalar zero = 0;
     const Scalar one = 1;
-    const Scalar machep = igamma_helper<Scalar>::machep();
-    const Scalar maxlog = numext::log(NumTraits<Scalar>::highest());
     const Scalar nan = NumTraits<Scalar>::quiet_NaN();
 
-    double ans, ax, c, r;
-
     if (x == zero) return zero;
 
-    if ((x < zero) || ( a <= zero)) {  // domain error
+    if ((x < zero) || (a <= zero)) {  // domain error
       return nan;
     }
 
     if ((x > one) && (x > a)) {
-      return (one - igammac_impl<Scalar>::run(a, x));
+      /* The checks above ensure that we meet the preconditions for
+       * igammac_impl::Impl(), so call it, rather than igammac_impl::Run().
+       * Calling Run() would also work, but in that case the compiler may not be
+       * able to prove that igammac_impl::Run and igamma_impl::Run are not
+       * mutually recursive.  This leads to worse code, particularly on
+       * platforms like nvptx, where recursion is allowed only begrudgingly.
+       */
+      return (one - igammac_impl<Scalar>::Impl(a, x));
     }
 
+    return Impl(a, x);
+  }
+
+ private:
+  /* igammac_impl calls igamma_impl::Impl. */
+  friend struct igammac_impl<Scalar>;
+
+  /* Actually computes igam(a, x).
+   *
+   * Preconditions:
+   *   x > 0
+   *   a > 0
+   *   !(x > 1 && x > a)
+   */
+  static Scalar Impl(Scalar a, Scalar x) {
+    const Scalar zero = 0;
+    const Scalar one = 1;
+    const Scalar machep = igamma_helper<Scalar>::machep();
+    const Scalar maxlog = numext::log(NumTraits<Scalar>::highest());
+    const Scalar nan = NumTraits<Scalar>::quiet_NaN();
+
+    double ans, ax, c, r;
+
     /* Compute  x**a * exp(-x) / gamma(a)  */
     ax = a * numext::log(x) - x - lgamma_impl<Scalar>::run(a);
     if (ax < -maxlog) {
author	Justin Lebar <justin.lebar@gmail.com>	2016-04-28 13:57:08 -0700
committer	Justin Lebar <justin.lebar@gmail.com>	2016-04-28 13:57:08 -0700
commit	40d1e2f8c7a85b7c0522d11d6e3d0c6a18bc9721 (patch)
tree	e19334d23d57e3a72d8e3431c409eaa48187ed50 /Eigen/src
parent	3ec81fc00f447ce47df079c66450c680155ceeb4 (diff)