From f7124b3e467363e45c3d906b7003f1520a5f804a Mon Sep 17 00:00:00 2001
From: Andrea Bocci <andrea.bocci@cern.ch>
Date: Mon, 11 Jun 2018 18:33:24 +0200
Subject: Extend CUDA support to matrix inversion and selfadjointeigensolver

---
 Eigen/Core | 39 ++++++++++++++++++++++++++-------------
 1 file changed, 26 insertions(+), 13 deletions(-)

(limited to 'Eigen/Core')

diff --git a/Eigen/Core b/Eigen/Core
index f6bc18a08..5117461c7 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -45,27 +45,40 @@
   #ifdef EIGEN_EXCEPTIONS
   #undef EIGEN_EXCEPTIONS
   #endif
+#endif
 
-  // All functions callable from CUDA code must be qualified with __device__
-  #ifdef EIGEN_CUDACC
-    // Do not try to vectorize on CUDA and SYCL!
-    #ifndef EIGEN_DONT_VECTORIZE
-    #define EIGEN_DONT_VECTORIZE
-    #endif
+// All functions callable from CUDA code must be qualified with __device__
+#ifdef EIGEN_CUDACC
+  // Do not try to vectorize on CUDA and SYCL!
+  #ifndef EIGEN_DONT_VECTORIZE
+  #define EIGEN_DONT_VECTORIZE
+  #endif
 
-    #define EIGEN_DEVICE_FUNC __host__ __device__
-    // We need cuda_runtime.h to ensure that that EIGEN_USING_STD_MATH macro
-    // works properly on the device side
-    #include <cuda_runtime.h>
-  #else
-    #define EIGEN_DEVICE_FUNC
+  #define EIGEN_DEVICE_FUNC __host__ __device__
+  // We need cuda_runtime.h to ensure that that EIGEN_USING_STD_MATH macro
+  // works properly on the device side
+  #include <cuda_runtime.h>
+
+  #if EIGEN_HAS_CONSTEXPR
+    // While available already with c++11, this is useful mostly starting with c++14 and relaxed constexpr rules
+    #if defined(__NVCC__)
+      // nvcc considers constexpr functions as __host__ __device__ with the option --expt-relaxed-constexpr
+      #ifdef __CUDACC_RELAXED_CONSTEXPR__
+        #define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC
+      #endif
+    #elif defined(__clang__) && defined(__CUDA__)
+      // clang++ always considers constexpr functions as implicitly __host__ __device__
+      #define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC
+    #endif
   #endif
 #else
   #define EIGEN_DEVICE_FUNC
 #endif
 
 #ifdef __NVCC__
-#define EIGEN_DONT_VECTORIZE
+  #ifndef EIGEN_DONT_VECTORIZE
+  #define EIGEN_DONT_VECTORIZE
+  #endif
 #endif
 
 // When compiling CUDA device code with NVCC, pull in math functions from the
-- 
cgit v1.2.3