Extend CUDA support to matrix inversion and selfadjointeigensolver

author: Andrea Bocci <andrea.bocci@cern.ch> 2018-06-11 18:33:24 +0200
committer: Andrea Bocci <andrea.bocci@cern.ch> 2018-06-11 18:33:24 +0200
commit: f7124b3e467363e45c3d906b7003f1520a5f804a (patch)
tree: f5ba6d719fc4d8f1b5cd56f0043b784fb6b9e268 /Eigen/Core
parent: 05371239533012e652de0b88a3e0aa992a48a80f (diff)
1 files changed, 26 insertions, 13 deletions
diff --git a/Eigen/Core b/Eigen/Core
index f6bc18a08..5117461c7 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -45,27 +45,40 @@
   #ifdef EIGEN_EXCEPTIONS
   #undef EIGEN_EXCEPTIONS
   #endif
+#endif
 
-  // All functions callable from CUDA code must be qualified with __device__
-  #ifdef EIGEN_CUDACC
-    // Do not try to vectorize on CUDA and SYCL!
-    #ifndef EIGEN_DONT_VECTORIZE
-    #define EIGEN_DONT_VECTORIZE
-    #endif
+// All functions callable from CUDA code must be qualified with __device__
+#ifdef EIGEN_CUDACC
+  // Do not try to vectorize on CUDA and SYCL!
+  #ifndef EIGEN_DONT_VECTORIZE
+  #define EIGEN_DONT_VECTORIZE
+  #endif
 
-    #define EIGEN_DEVICE_FUNC __host__ __device__
-    // We need cuda_runtime.h to ensure that that EIGEN_USING_STD_MATH macro
-    // works properly on the device side
-    #include <cuda_runtime.h>
-  #else
-    #define EIGEN_DEVICE_FUNC
+  #define EIGEN_DEVICE_FUNC __host__ __device__
+  // We need cuda_runtime.h to ensure that that EIGEN_USING_STD_MATH macro
+  // works properly on the device side
+  #include <cuda_runtime.h>
+
+  #if EIGEN_HAS_CONSTEXPR
+    // While available already with c++11, this is useful mostly starting with c++14 and relaxed constexpr rules
+    #if defined(__NVCC__)
+      // nvcc considers constexpr functions as __host__ __device__ with the option --expt-relaxed-constexpr
+      #ifdef __CUDACC_RELAXED_CONSTEXPR__
+        #define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC
+      #endif
+    #elif defined(__clang__) && defined(__CUDA__)
+      // clang++ always considers constexpr functions as implicitly __host__ __device__
+      #define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC
+    #endif
   #endif
 #else
   #define EIGEN_DEVICE_FUNC
 #endif
 
 #ifdef __NVCC__
-#define EIGEN_DONT_VECTORIZE
+  #ifndef EIGEN_DONT_VECTORIZE
+  #define EIGEN_DONT_VECTORIZE
+  #endif
 #endif
 
 // When compiling CUDA device code with NVCC, pull in math functions from the
author	Andrea Bocci <andrea.bocci@cern.ch>	2018-06-11 18:33:24 +0200
committer	Andrea Bocci <andrea.bocci@cern.ch>	2018-06-11 18:33:24 +0200
commit	f7124b3e467363e45c3d906b7003f1520a5f804a (patch)
tree	f5ba6d719fc4d8f1b5cd56f0043b784fb6b9e268 /Eigen/Core
parent	05371239533012e652de0b88a3e0aa992a48a80f (diff)