From 1569a7d7ab35164b5de9c639cdf01c74a3a72050 Mon Sep 17 00:00:00 2001 From: Igor Babuschkin Date: Thu, 18 Aug 2016 17:15:12 +0100 Subject: Add the necessary CUDA >= 300 checks back --- unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h index af5c71247..fa7364fd6 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -23,6 +23,7 @@ namespace internal { // updated the content of the output address it will try again. template __device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) { +#if __CUDA_ARCH__ >= 300 if (sizeof(T) == 4) { unsigned int oldval = *reinterpret_cast(output); @@ -40,6 +41,9 @@ __device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) return; } } +#else + assert(0 && "Shouldn't be called on unsupported device"); +#endif } else if (sizeof(T) == 8) { unsigned long long oldval = *reinterpret_cast(output); @@ -98,7 +102,11 @@ __device__ inline void atomicReduce(half2* output, half2 accum, R& reducer template <> __device__ inline void atomicReduce(float* output, float accum, SumReducer&) { +#if __CUDA_ARCH__ >= 300 atomicAdd(output, accum); +#else + assert(0 && "Shouldn't be called on unsupported device"); +#endif } @@ -116,6 +124,7 @@ template __global__ void FullReductionKernel(Reducer reducer, const Self input, Index num_coeffs, typename Self::CoeffReturnType* output, unsigned int* semaphore) { +#if __CUDA_ARCH__ >= 300 // Initialize the output value const Index first_index = blockIdx.x * BlockSize * NumPerThread + threadIdx.x; if (gridDim.x == 1) { @@ -170,6 +179,9 @@ __global__ void FullReductionKernel(Reducer reducer, const Self input, Index num // Let the last block reset the semaphore atomicInc(semaphore, gridDim.x + 1); } +#else + assert(0 && "Shouldn't be called on unsupported device"); +#endif } @@ -684,7 +696,6 @@ struct OuterReducer { static const bool HasOptimizedImplementation = !Op::IsStateful && (internal::is_same::value || internal::is_same::value); - template static EIGEN_DEVICE_FUNC bool run(const Self&, Op&, const Device&, OutputType*, typename Self::Index, typename Self::Index) { assert(false && "Should only be called to reduce doubles or floats on a gpu device"); -- cgit v1.2.3