From d1862967a89501f0382834e0d128a53ad5764377 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Thu, 10 Dec 2015 22:23:21 +0100
Subject: Make sure ADOLC is recent enough by searching for adtl.h

---
 cmake/FindAdolc.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'cmake')
diff --git a/cmake/FindAdolc.cmake b/cmake/FindAdolc.cmake
index 1a7ff3628..937e54990 100644
--- a/cmake/FindAdolc.cmake
+++ b/cmake/FindAdolc.cmake
@@ -5,7 +5,7 @@ endif (ADOLC_INCLUDES AND ADOLC_LIBRARIES)
 
 find_path(ADOLC_INCLUDES
   NAMES
-  adolc/adouble.h
+  adolc/adtl.h
   PATHS
   $ENV{ADOLCDIR}
   ${INCLUDE_INSTALL_DIR}
-- 
cgit v1.2.3


From 49d96aee6448d67edbb0382fefca746304c5baaa Mon Sep 17 00:00:00 2001
From: Christoph Hertzberg <chtz@informatik.uni-bremen.de>
Date: Wed, 16 Dec 2015 11:37:16 +0100
Subject: bug #1120: Make sure that SuperLU version is checked

---
 cmake/FindSuperLU.cmake | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'cmake')

diff --git a/cmake/FindSuperLU.cmake b/cmake/FindSuperLU.cmake
index 259ed7320..e4142fe4d 100644
--- a/cmake/FindSuperLU.cmake
+++ b/cmake/FindSuperLU.cmake
@@ -60,11 +60,21 @@ endif()
 
 cmake_pop_check_state()
 
+if(SuperLU_FIND_VERSION)
+  if(${SUPERLU_VERSION_VAR} VERSION_LESS ${SuperLU_FIND_VERSION})
+    set(SUPERLU_VERSION_OK FALSE)
+  else()
+    set(SUPERLU_VERSION_OK TRUE)
+  endif()
+else()
+  set(SUPERLU_VERSION_OK TRUE)
+endif()
+
 endif()
 
 include(FindPackageHandleStandardArgs)
 find_package_handle_standard_args(SUPERLU
-                                  REQUIRED_VARS SUPERLU_INCLUDES SUPERLU_LIBRARIES
+                                  REQUIRED_VARS SUPERLU_INCLUDES SUPERLU_LIBRARIES SUPERLU_VERSION_OK
                                   VERSION_VAR SUPERLU_VERSION_VAR)
 
 mark_as_advanced(SUPERLU_INCLUDES SUPERLU_LIBRARIES)
-- 
cgit v1.2.3


From 73220d2bb044e5f01b3eee4838c5b32ca679f027 Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@gmail.com>
Date: Tue, 8 Mar 2016 17:28:21 -0800
Subject: Resolve bad merge.

---
 Eigen/src/Core/GenericPacketMath.h        |  10 ++
 Eigen/src/Core/GlobalFunctions.h          |  30 +++++
 Eigen/src/Core/NumTraits.h                |   5 +
 Eigen/src/Core/arch/CUDA/MathFunctions.h  |  36 ++++++
 Eigen/src/Core/functors/BinaryFunctors.h  |  49 ++++++++
 Eigen/src/Core/util/ForwardDeclarations.h |   2 +
 Eigen/src/Core/util/Meta.h                |   5 +
 cmake/EigenTesting.cmake                  |   7 +-
 test/array.cpp                            |  51 +++++++-
 unsupported/test/cxx11_tensor_cuda.cu     | 201 ++++++++++++++++++++++++++++++
 10 files changed, 393 insertions(+), 3 deletions(-)

(limited to 'cmake')

diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 02882bdea..802def51d 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -78,6 +78,8 @@ struct default_packet_traits
     HasDiGamma = 0,
     HasErf = 0,
     HasErfc = 0,
+    HasIGamma = 0,
+    HasIGammac = 0,
 
     HasRound  = 0,
     HasFloor  = 0,
@@ -457,6 +459,14 @@ Packet perf(const Packet& a) { using numext::erf; return erf(a); }
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); }
 
+/** \internal \returns the incomplete gamma function igamma(\a a, \a x) */
+template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+Packet pigamma(const Packet& a, const Packet& x) { using numext::igamma; return igamma(a, x); }
+
+/** \internal \returns the complementary incomplete gamma function igammac(\a a, \a x) */
+template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+Packet pigammac(const Packet& a, const Packet& x) { using numext::igammac; return igammac(a, x); }
+
 /***************************************************************************
 * The following functions might not have to be overwritten for vectorized types
 ***************************************************************************/
diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h
index 396da8e71..7df0fdda9 100644
--- a/Eigen/src/Core/GlobalFunctions.h
+++ b/Eigen/src/Core/GlobalFunctions.h
@@ -129,6 +129,36 @@ namespace Eigen
     );
   }
 
+  /** \returns an expression of the coefficient-wise igamma(\a a, \a x) to the given arrays.
+    *
+    * This function computes the coefficient-wise incomplete gamma function.
+    *
+    */
+  template<typename Derived,typename ExponentDerived>
+  inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_op<typename Derived::Scalar>, const Derived, const ExponentDerived>
+  igamma(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerived>& x) 
+  {
+    return Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_op<typename Derived::Scalar>, const Derived, const ExponentDerived>(
+      a.derived(),
+      x.derived()
+    );
+  }
+
+  /** \returns an expression of the coefficient-wise igammac(\a a, \a x) to the given arrays.
+    *
+    * This function computes the coefficient-wise complementary incomplete gamma function.
+    *
+    */
+  template<typename Derived,typename ExponentDerived>
+  inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_igammac_op<typename Derived::Scalar>, const Derived, const ExponentDerived>
+  igammac(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerived>& x) 
+  {
+    return Eigen::CwiseBinaryOp<Eigen::internal::scalar_igammac_op<typename Derived::Scalar>, const Derived, const ExponentDerived>(
+      a.derived(),
+      x.derived()
+    );
+  }
+
   namespace internal
   {
     EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(real,scalar_real_op)
diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h
index 6a596bb7d..7ddb4a867 100644
--- a/Eigen/src/Core/NumTraits.h
+++ b/Eigen/src/Core/NumTraits.h
@@ -95,6 +95,11 @@ template<typename T> struct GenericNumTraits
   static inline T infinity() {
     return numext::numeric_limits<T>::infinity();
   }
+
+  EIGEN_DEVICE_FUNC
+  static inline T quiet_NaN() {
+    return numext::numeric_limits<T>::quiet_NaN();
+  }
 };
 
 template<typename T> struct NumTraits : GenericNumTraits<T>
diff --git a/Eigen/src/Core/arch/CUDA/MathFunctions.h b/Eigen/src/Core/arch/CUDA/MathFunctions.h
index a2c06a817..6822700f8 100644
--- a/Eigen/src/Core/arch/CUDA/MathFunctions.h
+++ b/Eigen/src/Core/arch/CUDA/MathFunctions.h
@@ -117,6 +117,42 @@ double2 perfc<double2>(const double2& a)
 }
 
 
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 pigamma<float4>(const float4& a, const float4& x)
+{
+  using numext::igamma;
+  return make_float4(
+      igamma(a.x, x.x),
+      igamma(a.y, x.y),
+      igamma(a.z, x.z),
+      igamma(a.w, x.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 pigamma<double2>(const double2& a, const double2& x)
+{
+  using numext::igamma;
+  return make_double2(igamma(a.x, x.x), igamma(a.y, x.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 pigammac<float4>(const float4& a, const float4& x)
+{
+  using numext::igammac;
+  return make_float4(
+      igammac(a.x, x.x),
+      igammac(a.y, x.y),
+      igammac(a.z, x.z),
+      igammac(a.w, x.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 pigammac<double2>(const double2& a, const double2& x)
+{
+  using numext::igammac;
+  return make_double2(igammac(a.x, x.x), igammac(a.y, x.y));
+}
+
 #endif
 
 } // end namespace internal
diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h
index 4962d625c..5cdfff845 100644
--- a/Eigen/src/Core/functors/BinaryFunctors.h
+++ b/Eigen/src/Core/functors/BinaryFunctors.h
@@ -337,6 +337,55 @@ template<> struct functor_traits<scalar_boolean_or_op> {
   };
 };
 
+/** \internal
+  * \brief Template functor to compute the incomplete gamma function igamma(a, x)
+  *
+  * \sa class CwiseBinaryOp, Cwise::igamma
+  */
+template<typename Scalar> struct scalar_igamma_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_igamma_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const {
+    using numext::igamma; return igamma(a, x);
+  }
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& x) const {
+    return internal::pigammac(a, x);
+  }
+};
+template<typename Scalar>
+struct functor_traits<scalar_igamma_op<Scalar> > {
+  enum {
+    // Guesstimate
+    Cost = 20 * NumTraits<Scalar>::MulCost + 10 * NumTraits<Scalar>::AddCost,
+    PacketAccess = packet_traits<Scalar>::HasIGamma
+  };
+};
+
+
+/** \internal
+  * \brief Template functor to compute the complementary incomplete gamma function igammac(a, x)
+  *
+  * \sa class CwiseBinaryOp, Cwise::igammac
+  */
+template<typename Scalar> struct scalar_igammac_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_igammac_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const {
+    using numext::igammac; return igammac(a, x);
+  }
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& x) const
+  {
+    return internal::pigammac(a, x);
+  }
+};
+template<typename Scalar>
+struct functor_traits<scalar_igammac_op<Scalar> > {
+  enum {
+    // Guesstimate
+    Cost = 20 * NumTraits<Scalar>::MulCost + 10 * NumTraits<Scalar>::AddCost,
+    PacketAccess = packet_traits<Scalar>::HasIGammac
+  };
+};
 
 
 //---------- binary functors bound to a constant, thus appearing as a unary functor ----------
diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h
index f09632375..a102e5457 100644
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@@ -206,6 +206,8 @@ template<typename Scalar> struct scalar_add_op;
 template<typename Scalar> struct scalar_constant_op;
 template<typename Scalar> struct scalar_identity_op;
 template<typename Scalar,bool iscpx> struct scalar_sign_op;
+template<typename Scalar> struct scalar_igamma_op;
+template<typename Scalar> struct scalar_igammac_op;
 
 template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_product_op;
 template<typename LhsScalar,typename RhsScalar> struct scalar_multiple2_op;
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index 6b35179f2..24e8a6d8a 100644
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -148,6 +148,7 @@ template<typename T> struct numeric_limits
   static T (max)() { assert(false && "Highest not supported for this type"); }
   static T (min)() { assert(false && "Lowest not supported for this type"); }
   static T infinity() { assert(false && "Infinity not supported for this type"); }
+  static T quiet_NaN() { assert(false && "quiet_NaN not supported for this type"); }
 };
 template<> struct numeric_limits<float>
 {
@@ -159,6 +160,8 @@ template<> struct numeric_limits<float>
   static float (min)() { return FLT_MIN; }
   EIGEN_DEVICE_FUNC
   static float infinity() { return CUDART_INF_F; }
+  EIGEN_DEVICE_FUNC
+  static float quiet_NaN() { return CUDART_NAN_F; }
 };
 template<> struct numeric_limits<double>
 {
@@ -170,6 +173,8 @@ template<> struct numeric_limits<double>
   static double (min)() { return DBL_MIN; }
   EIGEN_DEVICE_FUNC
   static double infinity() { return CUDART_INF; }
+  EIGEN_DEVICE_FUNC
+  static double quiet_NaN() { return CUDART_NAN; }
 };
 template<> struct numeric_limits<int>
 {
diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake
index 5022397a7..5ca800cfe 100644
--- a/cmake/EigenTesting.cmake
+++ b/cmake/EigenTesting.cmake
@@ -19,12 +19,15 @@ macro(ei_add_test_internal testname testname_with_suffix)
   endif()
   
   if(EIGEN_ADD_TEST_FILENAME_EXTENSION STREQUAL cu)
-    cuda_add_executable(${targetname} ${filename})
+    if (${ARGC} GREATER 2)
+      cuda_add_executable(${targetname} ${filename} OPTIONS ${ARGV2})
+    else()
+      cuda_add_executable(${targetname} ${filename})
+    endif()
   else()
     add_executable(${targetname} ${filename})
   endif()
   
-  
   if (targetname MATCHES "^eigen2_")
     add_dependencies(eigen2_buildtests ${targetname})
   else()
diff --git a/test/array.cpp b/test/array.cpp
index 96aef31c7..c61bfc8ed 100644
--- a/test/array.cpp
+++ b/test/array.cpp
@@ -295,7 +295,6 @@ template<typename ArrayType> void array_real(const ArrayType& m)
   VERIFY_IS_APPROX(Eigen::pow(m1,2*exponents), m1.square().square());
   VERIFY_IS_APPROX(m1.pow(2*exponents), m1.square().square());
   VERIFY_IS_APPROX(pow(m1(0,0), exponents), ArrayType::Constant(rows,cols,m1(0,0)*m1(0,0)));
-  
 
   VERIFY_IS_APPROX(m3.pow(RealScalar(0.5)), m3.sqrt());
   VERIFY_IS_APPROX(pow(m3,RealScalar(0.5)), m3.sqrt());
@@ -305,6 +304,14 @@ template<typename ArrayType> void array_real(const ArrayType& m)
 
   VERIFY_IS_APPROX(log10(m3), log(m3)/log(10));
 
+  // Smoke test to check any compilation issues
+  ArrayType m1_abs_p1 = m1.abs() + 1;
+  ArrayType m2_abs_p1 = m2.abs() + 1;
+  VERIFY_IS_APPROX(Eigen::igamma(m1_abs_p1, m2_abs_p1), Eigen::igamma(m1_abs_p1, m2_abs_p1));
+  VERIFY_IS_APPROX(Eigen::igammac(m1_abs_p1, m2_abs_p1), Eigen::igammac(m1_abs_p1, m2_abs_p1));
+  VERIFY_IS_APPROX(Eigen::igamma(m2_abs_p1, m1_abs_p1), Eigen::igamma(m2_abs_p1, m1_abs_p1));
+  VERIFY_IS_APPROX(Eigen::igammac(m2_abs_p1, m1_abs_p1), Eigen::igammac(m2_abs_p1, m1_abs_p1));
+
   // scalar by array division
   const RealScalar tiny = sqrt(std::numeric_limits<RealScalar>::epsilon());
   s1 += Scalar(tiny);
@@ -323,6 +330,48 @@ template<typename ArrayType> void array_real(const ArrayType& m)
                     std::numeric_limits<RealScalar>::infinity());
     VERIFY_IS_EQUAL(numext::digamma(Scalar(-1)),
                     std::numeric_limits<RealScalar>::infinity());
+
+    Scalar a_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)};
+    Scalar x_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)};
+
+    // location i*6+j corresponds to a_s[i], x_s[j].
+    Scalar nan = std::numeric_limits<Scalar>::quiet_NaN();
+    Scalar igamma_s[][6] = {{0.0, nan, nan, nan, nan, nan},
+                            {0.0, 0.6321205588285578, 0.7768698398515702,
+                             0.9816843611112658, 9.999500016666262e-05, 1.0},
+                            {0.0, 0.4275932955291202, 0.608374823728911,
+                             0.9539882943107686, 7.522076445089201e-07, 1.0},
+                            {0.0, 0.01898815687615381, 0.06564245437845008,
+                             0.5665298796332909, 4.166333347221828e-18, 1.0},
+                            {0.0, 0.9999780593618628, 0.9999899967080838,
+                             0.9999996219837988, 0.9991370418689945, 1.0},
+                            {0.0, 0.0, 0.0, 0.0, 0.0, 0.5042041932513908}};
+    Scalar igammac_s[][6] = {{nan, nan, nan, nan, nan, nan},
+                             {1.0, 0.36787944117144233, 0.22313016014842982,
+                              0.018315638888734182, 0.9999000049998333, 0.0},
+                             {1.0, 0.5724067044708798, 0.3916251762710878,
+                              0.04601170568923136, 0.9999992477923555, 0.0},
+                             {1.0, 0.9810118431238462, 0.9343575456215499,
+                              0.4334701203667089, 1.0, 0.0},
+                             {1.0, 2.1940638138146658e-05, 1.0003291916285e-05,
+                              3.7801620118431334e-07, 0.0008629581310054535,
+                              0.0},
+                             {1.0, 1.0, 1.0, 1.0, 1.0, 0.49579580674813944}};
+    for (int i = 0; i < 6; ++i) {
+      for (int j = 0; j < 6; ++j) {
+        if ((std::isnan)(igamma_s[i][j])) {
+          VERIFY((std::isnan)(numext::igamma(a_s[i], x_s[j])));
+        } else {
+          VERIFY_IS_APPROX(numext::igamma(a_s[i], x_s[j]), igamma_s[i][j]);
+        }
+
+        if ((std::isnan)(igammac_s[i][j])) {
+          VERIFY((std::isnan)(numext::igammac(a_s[i], x_s[j])));
+        } else {
+          VERIFY_IS_APPROX(numext::igammac(a_s[i], x_s[j]), igammac_s[i][j]);
+        }
+      }
+    }
   }
 #endif  // EIGEN_HAS_C99_MATH
 
diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu
index 58da21d3b..1964d9e07 100644
--- a/unsupported/test/cxx11_tensor_cuda.cu
+++ b/unsupported/test/cxx11_tensor_cuda.cu
@@ -574,6 +574,191 @@ void test_cuda_lgamma(const Scalar stddev)
   cudaFree(d_out);
 }
 
+template <typename Scalar>
+void test_cuda_digamma()
+{
+  Tensor<Scalar, 1> in(7);
+  Tensor<Scalar, 1> out(7);
+  Tensor<Scalar, 1> expected_out(7);
+  out.setZero();
+
+  in(0) = Scalar(1);
+  in(1) = Scalar(1.5);
+  in(2) = Scalar(4);
+  in(3) = Scalar(-10.5);
+  in(4) = Scalar(10000.5);
+  in(5) = Scalar(0);
+  in(6) = Scalar(-1);
+
+  expected_out(0) = Scalar(-0.5772156649015329);
+  expected_out(1) = Scalar(0.03648997397857645);
+  expected_out(2) = Scalar(1.2561176684318);
+  expected_out(3) = Scalar(2.398239129535781);
+  expected_out(4) = Scalar(9.210340372392849);
+  expected_out(5) = std::numeric_limits<Scalar>::infinity();
+  expected_out(6) = std::numeric_limits<Scalar>::infinity();
+
+  std::size_t bytes = in.size() * sizeof(Scalar);
+
+  Scalar* d_in;
+  Scalar* d_out;
+  cudaMalloc((void**)(&d_in), bytes);
+  cudaMalloc((void**)(&d_out), bytes);
+
+  cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in(d_in, 7);
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 7);
+
+  gpu_out.device(gpu_device) = gpu_in.digamma();
+
+  assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+  assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+  for (int i = 0; i < 5; ++i) {
+    VERIFY_IS_APPROX(out(i), expected_out(i));
+  }
+  for (int i = 5; i < 7; ++i) {
+    VERIFY_IS_EQUAL(out(i), expected_out(i));
+  }
+}
+
+template <typename Scalar>
+void test_cuda_igamma()
+{
+  Tensor<Scalar, 2> a(6, 6);
+  Tensor<Scalar, 2> x(6, 6);
+  Tensor<Scalar, 2> out(6, 6);
+  out.setZero();
+
+  Scalar a_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)};
+  Scalar x_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)};
+
+  for (int i = 0; i < 6; ++i) {
+    for (int j = 0; j < 6; ++j) {
+      a(i, j) = a_s[i];
+      x(i, j) = x_s[j];
+    }
+  }
+
+  Scalar nan = std::numeric_limits<Scalar>::quiet_NaN();
+  Scalar igamma_s[][6] = {{0.0, nan, nan, nan, nan, nan},
+                          {0.0, 0.6321205588285578, 0.7768698398515702,
+                           0.9816843611112658, 9.999500016666262e-05, 1.0},
+                          {0.0, 0.4275932955291202, 0.608374823728911,
+                           0.9539882943107686, 7.522076445089201e-07, 1.0},
+                          {0.0, 0.01898815687615381, 0.06564245437845008,
+                           0.5665298796332909, 4.166333347221828e-18, 1.0},
+                          {0.0, 0.9999780593618628, 0.9999899967080838,
+                           0.9999996219837988, 0.9991370418689945, 1.0},
+                          {0.0, 0.0, 0.0, 0.0, 0.0, 0.5042041932513908}};
+
+
+
+  std::size_t bytes = a.size() * sizeof(Scalar);
+
+  Scalar* d_a;
+  Scalar* d_x;
+  Scalar* d_out;
+  cudaMalloc((void**)(&d_a), bytes);
+  cudaMalloc((void**)(&d_x), bytes);
+  cudaMalloc((void**)(&d_out), bytes);
+
+  cudaMemcpy(d_a, a.data(), bytes, cudaMemcpyHostToDevice);
+  cudaMemcpy(d_x, x.data(), bytes, cudaMemcpyHostToDevice);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_a(d_a, 6, 6);
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_x(d_x, 6, 6);
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_out(d_out, 6, 6);
+
+  gpu_out.device(gpu_device) = gpu_a.igamma(gpu_x);
+
+  assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+  assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+  for (int i = 0; i < 6; ++i) {
+    for (int j = 0; j < 6; ++j) {
+      if ((std::isnan)(igamma_s[i][j])) {
+        VERIFY((std::isnan)(out(i, j)));
+      } else {
+        VERIFY_IS_APPROX(out(i, j), igamma_s[i][j]);
+      }
+    }
+  }
+}
+
+template <typename Scalar>
+void test_cuda_igammac()
+{
+  Tensor<Scalar, 2> a(6, 6);
+  Tensor<Scalar, 2> x(6, 6);
+  Tensor<Scalar, 2> out(6, 6);
+  out.setZero();
+
+  Scalar a_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)};
+  Scalar x_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)};
+
+  for (int i = 0; i < 6; ++i) {
+    for (int j = 0; j < 6; ++j) {
+      a(i, j) = a_s[i];
+      x(i, j) = x_s[j];
+    }
+  }
+
+  Scalar nan = std::numeric_limits<Scalar>::quiet_NaN();
+  Scalar igammac_s[][6] = {{nan, nan, nan, nan, nan, nan},
+                           {1.0, 0.36787944117144233, 0.22313016014842982,
+                            0.018315638888734182, 0.9999000049998333, 0.0},
+                           {1.0, 0.5724067044708798, 0.3916251762710878,
+                            0.04601170568923136, 0.9999992477923555, 0.0},
+                           {1.0, 0.9810118431238462, 0.9343575456215499,
+                            0.4334701203667089, 1.0, 0.0},
+                           {1.0, 2.1940638138146658e-05, 1.0003291916285e-05,
+                            3.7801620118431334e-07, 0.0008629581310054535,
+                            0.0},
+                           {1.0, 1.0, 1.0, 1.0, 1.0, 0.49579580674813944}};
+
+  std::size_t bytes = a.size() * sizeof(Scalar);
+
+  Scalar* d_a;
+  Scalar* d_x;
+  Scalar* d_out;
+  cudaMalloc((void**)(&d_a), bytes);
+  cudaMalloc((void**)(&d_x), bytes);
+  cudaMalloc((void**)(&d_out), bytes);
+
+  cudaMemcpy(d_a, a.data(), bytes, cudaMemcpyHostToDevice);
+  cudaMemcpy(d_x, x.data(), bytes, cudaMemcpyHostToDevice);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_a(d_a, 6, 6);
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_x(d_x, 6, 6);
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_out(d_out, 6, 6);
+
+  gpu_out.device(gpu_device) = gpu_a.igammac(gpu_x);
+
+  assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+  assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+  for (int i = 0; i < 6; ++i) {
+    for (int j = 0; j < 6; ++j) {
+      if ((std::isnan)(igammac_s[i][j])) {
+        VERIFY((std::isnan)(out(i, j)));
+      } else {
+        VERIFY_IS_APPROX(out(i, j), igammac_s[i][j]);
+      }
+    }
+  }
+}
+
 template <typename Scalar>
 void test_cuda_erf(const Scalar stddev)
 {
@@ -667,30 +852,46 @@ void test_cxx11_tensor_cuda()
   CALL_SUBTEST_3(test_cuda_convolution_2d<RowMajor>());
   CALL_SUBTEST_3(test_cuda_convolution_3d<ColMajor>());
   CALL_SUBTEST_3(test_cuda_convolution_3d<RowMajor>());
+
   CALL_SUBTEST_4(test_cuda_lgamma<float>(1.0f));
   CALL_SUBTEST_4(test_cuda_lgamma<float>(100.0f));
   CALL_SUBTEST_4(test_cuda_lgamma<float>(0.01f));
   CALL_SUBTEST_4(test_cuda_lgamma<float>(0.001f));
+
+  CALL_SUBTEST_4(test_cuda_digamma<float>());
+
   CALL_SUBTEST_4(test_cuda_erf<float>(1.0f));
   CALL_SUBTEST_4(test_cuda_erf<float>(100.0f));
   CALL_SUBTEST_4(test_cuda_erf<float>(0.01f));
   CALL_SUBTEST_4(test_cuda_erf<float>(0.001f));
+
   CALL_SUBTEST_4(test_cuda_erfc<float>(1.0f));
   // CALL_SUBTEST(test_cuda_erfc<float>(100.0f));
   CALL_SUBTEST_4(test_cuda_erfc<float>(5.0f)); // CUDA erfc lacks precision for large inputs
   CALL_SUBTEST_4(test_cuda_erfc<float>(0.01f));
   CALL_SUBTEST_4(test_cuda_erfc<float>(0.001f));
+
   CALL_SUBTEST_4(test_cuda_lgamma<double>(1.0));
   CALL_SUBTEST_4(test_cuda_lgamma<double>(100.0));
   CALL_SUBTEST_4(test_cuda_lgamma<double>(0.01));
   CALL_SUBTEST_4(test_cuda_lgamma<double>(0.001));
+
+  CALL_SUBTEST_4(test_cuda_digamma<double>());
+
   CALL_SUBTEST_4(test_cuda_erf<double>(1.0));
   CALL_SUBTEST_4(test_cuda_erf<double>(100.0));
   CALL_SUBTEST_4(test_cuda_erf<double>(0.01));
   CALL_SUBTEST_4(test_cuda_erf<double>(0.001));
+
   CALL_SUBTEST_4(test_cuda_erfc<double>(1.0));
   // CALL_SUBTEST(test_cuda_erfc<double>(100.0));
   CALL_SUBTEST_4(test_cuda_erfc<double>(5.0)); // CUDA erfc lacks precision for large inputs
   CALL_SUBTEST_4(test_cuda_erfc<double>(0.01));
   CALL_SUBTEST_4(test_cuda_erfc<double>(0.001));
+
+  CALL_SUBTEST_5(test_cuda_igamma<float>());
+  CALL_SUBTEST_5(test_cuda_igammac<float>());
+
+  CALL_SUBTEST_5(test_cuda_igamma<double>());
+  CALL_SUBTEST_5(test_cuda_igammac<double>());
 }
-- 
cgit v1.2.3


From bb0e73c1914c675755e8fab2a6db168b65a4de51 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <benoit.steiner.goog@gmail.com>
Date: Fri, 18 Mar 2016 12:17:37 -0700
Subject: Gate all the CUDA tests under the EIGEN_TEST_NVCC option

---
 cmake/EigenTesting.cmake        | 6 ++++++
 unsupported/test/CMakeLists.txt | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'cmake')

diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake
index 5ca800cfe..c70ec2c24 100644
--- a/cmake/EigenTesting.cmake
+++ b/cmake/EigenTesting.cmake
@@ -309,6 +309,12 @@ macro(ei_testing_print_summary)
       message(STATUS "C++11:             OFF")
     endif()
 
+    if(EIGEN_TEST_NVCC)
+      message(STATUS "CUDA:              ON")
+    else()
+      message(STATUS "CUDA:              OFF")
+    endif()
+
   endif() # vectorization / alignment options
 
   message(STATUS "\n${EIGEN_TESTING_SUMMARY}")
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index 79c26fb72..19893cc25 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -174,7 +174,7 @@ endif()
 
 # These tests needs nvcc
 find_package(CUDA 7.0)
-if(CUDA_FOUND)
+if(CUDA_FOUND AND EIGEN_TEST_NVCC)
   # Mke sure to compile without the -pedantic and -Wundef flags since they trigger thousands of compilation warnings in the CUDA runtime
   string(REPLACE "-pedantic" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
   string(REPLACE "-Wundef" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
-- 
cgit v1.2.3


From a9a6710e151ccd5d4fa9a6178db4413ed0c74911 Mon Sep 17 00:00:00 2001
From: Konstantinos Margaritis <markos@freevec.org>
Date: Mon, 21 Mar 2016 13:46:47 -0400
Subject: add initial s390x(zEC13) ZVECTOR support

---
 CMakeLists.txt           |  6 +++++-
 cmake/EigenTesting.cmake | 10 +++++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'cmake')

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 95f4c8d7c..51beba118 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -250,7 +250,11 @@ if(NOT MSVC)
     message(STATUS "Enabling NEON in tests/examples")
   endif()
 
-
+  option(EIGEN_TEST_ZVECTOR "Enable/Disable S390X(zEC13) ZVECTOR in tests/examples" OFF)
+  if(EIGEN_TEST_ZVECTOR)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=z13 -mzvector")
+    message(STATUS "Enabling S390X(zEC13) ZVECTOR in tests/examples")
+  endif()
 
   check_cxx_compiler_flag("-fopenmp" COMPILER_SUPPORT_OPENMP)
   if(COMPILER_SUPPORT_OPENMP)
diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake
index c70ec2c24..1709e0334 100644
--- a/cmake/EigenTesting.cmake
+++ b/cmake/EigenTesting.cmake
@@ -302,7 +302,13 @@ macro(ei_testing_print_summary)
     else()
       message(STATUS "ARMv8 NEON:        Using architecture defaults")
     endif()
-    
+ 
+    if(EIGEN_TEST_ZVECTOR)
+      message(STATUS "S390X ZVECTOR:     ON")
+    else()
+      message(STATUS "S390X ZVECTOR:     Using architecture defaults")
+    endif()
+   
     if(EIGEN_TEST_CXX11)
       message(STATUS "C++11:             ON")
     else()
@@ -446,6 +452,8 @@ macro(ei_get_cxxflags VAR)
     set(${VAR} NEON)
   elseif(EIGEN_TEST_NEON64)
     set(${VAR} NEON)
+  elseif(EIGEN_TEST_ZVECTOR)
+    set(${VAR} ZVECTOR)
   elseif(EIGEN_TEST_VSX)
     set(${VAR} VSX)
   elseif(EIGEN_TEST_ALTIVEC)
-- 
cgit v1.2.3


From 2b457f8e5ec2bd38ce5049bd9220d3f37c7fa1ff Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Mon, 4 Apr 2016 11:47:46 +0200
Subject: Fix cross-compiling windows version detection

---
 cmake/EigenDetermineOSVersion.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'cmake')

diff --git a/cmake/EigenDetermineOSVersion.cmake b/cmake/EigenDetermineOSVersion.cmake
index 3c48d4c37..9246fa67c 100644
--- a/cmake/EigenDetermineOSVersion.cmake
+++ b/cmake/EigenDetermineOSVersion.cmake
@@ -26,7 +26,7 @@ function(DetermineShortWindowsName WIN_VERSION win_num_version)
 endfunction()
 
 function(DetermineOSVersion OS_VERSION)
-  if (WIN32)
+  if (WIN32 AND CMAKE_HOST_SYSTEM_NAME MATCHES Windows)
     file (TO_NATIVE_PATH "$ENV{COMSPEC}" SHELL)
     exec_program( ${SHELL} ARGS "/c" "ver" OUTPUT_VARIABLE ver_output)
 				
-- 
cgit v1.2.3


From 7781f865cb6cc3faff3b1dfce557439abe3b56b9 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <benoit.steiner.goog@gmail.com>
Date: Wed, 6 Apr 2016 09:35:23 -0700
Subject: Renamed the EIGEN_TEST_NVCC cmake option into EIGEN_TEST_CUDA per the
 discussion in bug #1173.

---
 cmake/EigenTesting.cmake        | 2 +-
 test/CMakeLists.txt             | 8 ++++----
 unsupported/test/CMakeLists.txt | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'cmake')

diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake
index 1709e0334..d5e3972b5 100644
--- a/cmake/EigenTesting.cmake
+++ b/cmake/EigenTesting.cmake
@@ -315,7 +315,7 @@ macro(ei_testing_print_summary)
       message(STATUS "C++11:             OFF")
     endif()
 
-    if(EIGEN_TEST_NVCC)
+    if(EIGEN_TEST_CUDA)
       message(STATUS "CUDA:              ON")
     else()
       message(STATUS "CUDA:              OFF")
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 4420e0c51..841c4572b 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -325,9 +325,9 @@ if(EIGEN_TEST_EIGEN2)
 endif()
 
 
-# NVCC unit tests
-option(EIGEN_TEST_NVCC "Enable NVCC support in unit tests" OFF)
-if(EIGEN_TEST_NVCC)
+# CUDA unit tests
+option(EIGEN_TEST_CUDA "Enable CUDA support in unit tests" OFF)
+if(EIGEN_TEST_CUDA)
   
 find_package(CUDA 5.0)
 if(CUDA_FOUND)
@@ -345,7 +345,7 @@ if(CUDA_FOUND)
 
 endif(CUDA_FOUND)
 
-endif(EIGEN_TEST_NVCC)
+endif(EIGEN_TEST_CUDA)
 
 
 file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/failtests)    
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index 6bd8cfb92..7972e6776 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -175,7 +175,7 @@ endif()
 
 # These tests needs nvcc
 find_package(CUDA 7.0)
-if(CUDA_FOUND AND EIGEN_TEST_NVCC)
+if(CUDA_FOUND AND EIGEN_TEST_CUDA)
   # Make sure to compile without the -pedantic, -Wundef, -Wnon-virtual-dtor
   # and -fno-check-new flags since they trigger thousands of compilation warnings
   # in the CUDA runtime
-- 
cgit v1.2.3


From 0d2a532fc3b25199af03106b6d4ade0f92a30dfc Mon Sep 17 00:00:00 2001
From: Benoit Steiner <benoit.steiner.goog@gmail.com>
Date: Fri, 8 Apr 2016 13:16:08 -0700
Subject: Created the new EIGEN_TEST_CUDA_CLANG option to compile the CUDA
 tests using clang instead of nvcc

---
 cmake/EigenTesting.cmake        | 27 +++++++++++++++++++++++----
 test/CMakeLists.txt             | 11 ++++++++++-
 unsupported/test/CMakeLists.txt |  4 ++++
 3 files changed, 37 insertions(+), 5 deletions(-)

(limited to 'cmake')

diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake
index d5e3972b5..6f3661921 100644
--- a/cmake/EigenTesting.cmake
+++ b/cmake/EigenTesting.cmake
@@ -19,10 +19,25 @@ macro(ei_add_test_internal testname testname_with_suffix)
   endif()
   
   if(EIGEN_ADD_TEST_FILENAME_EXTENSION STREQUAL cu)
-    if (${ARGC} GREATER 2)
-      cuda_add_executable(${targetname} ${filename} OPTIONS ${ARGV2})
+    if(EIGEN_TEST_CUDA_CLANG)
+      set_source_files_properties(${filename} PROPERTIES LANGUAGE CXX)
+      if(CUDA_64_BIT_DEVICE_CODE)
+        link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64")
+      else()
+        link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib")
+      endif()
+      if (${ARGC} GREATER 2)
+        add_executable(${targetname} ${filename})
+      else()
+        add_executable(${targetname} ${filename} OPTIONS ${ARGV2})
+      endif()
+      target_link_libraries(${targetname} "cudart_static" "cuda" "dl" "rt" "pthread")
     else()
-      cuda_add_executable(${targetname} ${filename})
+      if (${ARGC} GREATER 2)
+        cuda_add_executable(${targetname} ${filename} OPTIONS ${ARGV2})
+      else()
+        cuda_add_executable(${targetname} ${filename})
+      endif()
     endif()
   else()
     add_executable(${targetname} ${filename})
@@ -316,7 +331,11 @@ macro(ei_testing_print_summary)
     endif()
 
     if(EIGEN_TEST_CUDA)
-      message(STATUS "CUDA:              ON")
+      if(EIGEN_TEST_CUDA_CLANG)
+        message(STATUS "CUDA:              ON (using clang)")
+      else()
+        message(STATUS "CUDA:              ON (using nvcc)")
+      endif()
     else()
       message(STATUS "CUDA:              OFF")
     endif()
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 841c4572b..7bed6a45c 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -327,8 +327,14 @@ endif()
 
 # CUDA unit tests
 option(EIGEN_TEST_CUDA "Enable CUDA support in unit tests" OFF)
+option(EIGEN_TEST_CUDA_CLANG "Use clang instead of nvcc to compile the CUDA tests" OFF)
+
+if(EIGEN_TEST_CUDA_CLANG AND NOT CMAKE_CXX_COMPILER MATCHES "clang")
+  message(WARNING "EIGEN_TEST_CUDA_CLANG is set, but CMAKE_CXX_COMPILER does not appear to be clang.")
+endif()
+
 if(EIGEN_TEST_CUDA)
-  
+
 find_package(CUDA 5.0)
 if(CUDA_FOUND)
   
@@ -336,6 +342,9 @@ if(CUDA_FOUND)
   if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") 
     set(CUDA_NVCC_FLAGS "-ccbin /usr/bin/clang" CACHE STRING "nvcc flags" FORCE)
   endif()
+  if(EIGEN_TEST_CUDA_CLANG)
+   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 --cuda-gpu-arch=sm_30")
+  endif()
   cuda_include_directories(${CMAKE_CURRENT_BINARY_DIR})
   set(EIGEN_ADD_TEST_FILENAME_EXTENSION  "cu")
   
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index c6a92fe73..b1931d80a 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -190,6 +190,10 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA)
   if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
     set(CUDA_NVCC_FLAGS "-ccbin /usr/bin/clang" CACHE STRING "nvcc flags" FORCE)
   endif()
+  if(EIGEN_TEST_CUDA_CLANG)
+   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 --cuda-gpu-arch=sm_${EIGEN_CUDA_COMPUTE_ARCH}")
+  endif()
+
   set(CUDA_NVCC_FLAGS "-std=c++11 --relaxed-constexpr -arch compute_${EIGEN_CUDA_COMPUTE_ARCH} -Xcudafe \"--display_error_number\"")
   cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
   set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
-- 
cgit v1.2.3