From eeb0d880ee6dac1b28ac820566017f641f45fcf9 Mon Sep 17 00:00:00 2001 From: Igor Babuschkin Date: Fri, 1 Jul 2016 19:08:26 +0100 Subject: Enable efficient Tensor reduction for doubles --- .../Eigen/CXX11/src/Tensor/TensorReductionCuda.h | 67 ++++++++++++++++------ 1 file changed, 50 insertions(+), 17 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h index d3894e625..4e2e416e6 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -67,6 +67,23 @@ __device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) #endif } +// We extend atomicExch to support extra data types +template +__device__ inline Type atomicExchCustom(Type* address, Type val) { + return atomicExch(address, val); +} + +template <> +__device__ inline double atomicExchCustom(double* address, double val) { + unsigned long long int* address_as_ull = (unsigned long long int*)address; + unsigned long long int old = *address_as_ull; + unsigned long long int assumed; + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val)); + } while (assumed != old); + return __longlong_as_double(old); +} #ifdef EIGEN_HAS_CUDA_FP16 template