aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-06-06 14:09:46 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-06-06 14:09:46 -0700
commit7ef9f47b5874c33d15649a3312d463ecbd290365 (patch)
tree240c7a3b9d73ab6f52dc7ccfe940c926bd227fab /unsupported/Eigen/CXX11
parentea75dba2014ffa58acfcd160b5e59975c453f8da (diff)
Misc small improvements to the reduction code.
Diffstat (limited to 'unsupported/Eigen/CXX11')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h18
1 files changed, 8 insertions, 10 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
index 0d1a098b7..e82530955 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
@@ -130,15 +130,17 @@ __global__ void FullReductionKernel(Reducer reducer, const Self input, Index num
if (block == 0) {
// We're the first block to run, initialize the output value
atomicExch(output, reducer.initialize());
- unsigned int old = atomicExch(semaphore, 2u);
- assert(old == 1u);
+ __threadfence();
+ atomicExch(semaphore, 2u);
}
else {
+ // Wait for the first block to initialize the output value.
// Use atomicCAS here to ensure that the reads aren't cached
- unsigned int val = atomicCAS(semaphore, 2u, 2u);
- while (val < 2u) {
+ unsigned int val;
+ do {
val = atomicCAS(semaphore, 2u, 2u);
}
+ while (val < 2u);
}
}
}
@@ -166,12 +168,8 @@ __global__ void FullReductionKernel(Reducer reducer, const Self input, Index num
}
if (gridDim.x > 1 && threadIdx.x == 0) {
- unsigned int ticket = atomicInc(semaphore, UINT_MAX);
- assert(ticket >= 2u);
- if (ticket == gridDim.x + 1) {
- // We're the last block, reset the semaphore
- *semaphore = 0;
- }
+ // Let the last block reset the semaphore
+ atomicInc(semaphore, gridDim.x + 1);
}
}