aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-03-22 15:24:23 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-03-22 15:24:23 -0700
commit002cf0d1c979857e057879d8c84b92439dbcc90d (patch)
tree0a317243eb0b125176826f704ddf7c68168ecb43 /unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
parent65a7113a36f70aeca34eac29f32b24ef865cb6e4 (diff)
Use a single Barrier instead of a collection of Notifications to reduce the thread synchronization overhead
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h10
1 files changed, 3 insertions, 7 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index 54da77bcf..6bbf235cc 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -127,20 +127,16 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable>
const Index blocksize = numext::maxi<Index>(PacketSize, (blocksz - (blocksz % PacketSize)));
const Index numblocks = size / blocksize;
- MaxSizeVector<Notification*> results(numblocks);
+ Barrier barrier(numblocks);
for (int i = 0; i < numblocks; ++i) {
- results.push_back(device.enqueue(&EvalRange<Evaluator, Index, Vectorizable>::run, evaluator, i*blocksize, (i+1)*blocksize));
+ device.enqueue_with_barrier(&barrier, &EvalRange<Evaluator, Index, Vectorizable>::run, evaluator, i*blocksize, (i+1)*blocksize);
}
if (numblocks * blocksize < size) {
EvalRange<Evaluator, Index, Vectorizable>::run(evaluator, numblocks * blocksize, size);
}
- for (int i = 0; i < numblocks; ++i) {
- wait_until_ready(results[i]);
- delete results[i];
- }
-
+ barrier.Wait();
}
evaluator.cleanup();
}