diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-06-06 14:09:46 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-06-06 14:09:46 -0700 |
commit | 7ef9f47b5874c33d15649a3312d463ecbd290365 (patch) | |
tree | 240c7a3b9d73ab6f52dc7ccfe940c926bd227fab /unsupported/Eigen/CXX11 | |
parent | ea75dba2014ffa58acfcd160b5e59975c453f8da (diff) |
Misc small improvements to the reduction code.
Diffstat (limited to 'unsupported/Eigen/CXX11')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h | 18 |
1 files changed, 8 insertions, 10 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h index 0d1a098b7..e82530955 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -130,15 +130,17 @@ __global__ void FullReductionKernel(Reducer reducer, const Self input, Index num if (block == 0) { // We're the first block to run, initialize the output value atomicExch(output, reducer.initialize()); - unsigned int old = atomicExch(semaphore, 2u); - assert(old == 1u); + __threadfence(); + atomicExch(semaphore, 2u); } else { + // Wait for the first block to initialize the output value. // Use atomicCAS here to ensure that the reads aren't cached - unsigned int val = atomicCAS(semaphore, 2u, 2u); - while (val < 2u) { + unsigned int val; + do { val = atomicCAS(semaphore, 2u, 2u); } + while (val < 2u); } } } @@ -166,12 +168,8 @@ __global__ void FullReductionKernel(Reducer reducer, const Self input, Index num } if (gridDim.x > 1 && threadIdx.x == 0) { - unsigned int ticket = atomicInc(semaphore, UINT_MAX); - assert(ticket >= 2u); - if (ticket == gridDim.x + 1) { - // We're the last block, reset the semaphore - *semaphore = 0; - } + // Let the last block reset the semaphore + atomicInc(semaphore, gridDim.x + 1); } } |