Add a EIGEN_NO_CUDA option, and introduce EIGEN_CUDACC and EIGEN_CUDA_ARCH aliases

author: Gael Guennebaud <g.gael@free.fr> 2017-07-17 01:02:51 +0200
committer: Gael Guennebaud <g.gael@free.fr> 2017-07-17 01:02:51 +0200
commit: bbd97b4095ff9cbe9898d68b3ab7bdff8125f3fb (patch)
tree: 2e51268d6fbffc5e1d95b937358a7b7af1232f0f /unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
parent: f0b154a4b09914a9f11f5801220785f525217b9e (diff)
1 files changed, 5 insertions, 5 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
index 24a55a3d5..974eb7deb 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
@@ -14,7 +14,7 @@ namespace Eigen {
 namespace internal {
 
 
-#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
+#if defined(EIGEN_USE_GPU) && defined(EIGEN_CUDACC)
 // Full reducers for GPU, don't vectorize for now
 
 // Reducer function that enables multiple cuda thread to safely accumulate at the same
@@ -23,7 +23,7 @@ namespace internal {
 // updated the content of the output address it will try again.
 template <typename T, typename R>
 __device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) {
-#if __CUDA_ARCH__ >= 300
+#if EIGEN_CUDA_ARCH >= 300
   if (sizeof(T) == 4)
   {
     unsigned int oldval = *reinterpret_cast<unsigned int*>(output);
@@ -102,7 +102,7 @@ __device__ inline void atomicReduce(half2* output, half2 accum, R<half>& reducer
 
 template <>
 __device__ inline void atomicReduce(float* output, float accum, SumReducer<float>&) {
-#if __CUDA_ARCH__ >= 300
+#if EIGEN_CUDA_ARCH >= 300
   atomicAdd(output, accum);
 #else // __CUDA_ARCH__ >= 300
   assert(0 && "Shouldn't be called on unsupported device");
@@ -124,7 +124,7 @@ template <int BlockSize, int NumPerThread, typename Self,
           typename Reducer, typename Index>
 __global__ void FullReductionKernel(Reducer reducer, const Self input, Index num_coeffs,
                                     typename Self::CoeffReturnType* output, unsigned int* semaphore) {
-#if __CUDA_ARCH__ >= 300
+#if EIGEN_CUDA_ARCH >= 300
   // Initialize the output value
   const Index first_index = blockIdx.x * BlockSize * NumPerThread + threadIdx.x;
   if (gridDim.x == 1) {
@@ -372,7 +372,7 @@ template <int NumPerThread, typename Self,
           typename Reducer, typename Index>
 __global__ void InnerReductionKernel(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs,
                                          typename Self::CoeffReturnType* output) {
-#if __CUDA_ARCH__ >= 300
+#if EIGEN_CUDA_ARCH >= 300
   typedef typename Self::CoeffReturnType Type;
   eigen_assert(blockDim.y == 1);
   eigen_assert(blockDim.z == 1);
author	Gael Guennebaud <g.gael@free.fr>	2017-07-17 01:02:51 +0200
committer	Gael Guennebaud <g.gael@free.fr>	2017-07-17 01:02:51 +0200
commit	bbd97b4095ff9cbe9898d68b3ab7bdff8125f3fb (patch)
tree	2e51268d6fbffc5e1d95b937358a7b7af1232f0f /unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
parent	f0b154a4b09914a9f11f5801220785f525217b9e (diff)