diff options
author | Andrea Bocci <andrea.bocci@cern.ch> | 2018-06-11 18:33:24 +0200 |
---|---|---|
committer | Andrea Bocci <andrea.bocci@cern.ch> | 2018-06-11 18:33:24 +0200 |
commit | f7124b3e467363e45c3d906b7003f1520a5f804a (patch) | |
tree | f5ba6d719fc4d8f1b5cd56f0043b784fb6b9e268 /Eigen/Core | |
parent | 05371239533012e652de0b88a3e0aa992a48a80f (diff) |
Extend CUDA support to matrix inversion and selfadjointeigensolver
Diffstat (limited to 'Eigen/Core')
-rw-r--r-- | Eigen/Core | 39 |
1 files changed, 26 insertions, 13 deletions
diff --git a/Eigen/Core b/Eigen/Core index f6bc18a08..5117461c7 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -45,27 +45,40 @@ #ifdef EIGEN_EXCEPTIONS #undef EIGEN_EXCEPTIONS #endif +#endif - // All functions callable from CUDA code must be qualified with __device__ - #ifdef EIGEN_CUDACC - // Do not try to vectorize on CUDA and SYCL! - #ifndef EIGEN_DONT_VECTORIZE - #define EIGEN_DONT_VECTORIZE - #endif +// All functions callable from CUDA code must be qualified with __device__ +#ifdef EIGEN_CUDACC + // Do not try to vectorize on CUDA and SYCL! + #ifndef EIGEN_DONT_VECTORIZE + #define EIGEN_DONT_VECTORIZE + #endif - #define EIGEN_DEVICE_FUNC __host__ __device__ - // We need cuda_runtime.h to ensure that that EIGEN_USING_STD_MATH macro - // works properly on the device side - #include <cuda_runtime.h> - #else - #define EIGEN_DEVICE_FUNC + #define EIGEN_DEVICE_FUNC __host__ __device__ + // We need cuda_runtime.h to ensure that that EIGEN_USING_STD_MATH macro + // works properly on the device side + #include <cuda_runtime.h> + + #if EIGEN_HAS_CONSTEXPR + // While available already with c++11, this is useful mostly starting with c++14 and relaxed constexpr rules + #if defined(__NVCC__) + // nvcc considers constexpr functions as __host__ __device__ with the option --expt-relaxed-constexpr + #ifdef __CUDACC_RELAXED_CONSTEXPR__ + #define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC + #endif + #elif defined(__clang__) && defined(__CUDA__) + // clang++ always considers constexpr functions as implicitly __host__ __device__ + #define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC + #endif #endif #else #define EIGEN_DEVICE_FUNC #endif #ifdef __NVCC__ -#define EIGEN_DONT_VECTORIZE + #ifndef EIGEN_DONT_VECTORIZE + #define EIGEN_DONT_VECTORIZE + #endif #endif // When compiling CUDA device code with NVCC, pull in math functions from the |