diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-03-22 15:24:23 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-03-22 15:24:23 -0700 |
commit | 002cf0d1c979857e057879d8c84b92439dbcc90d (patch) | |
tree | 0a317243eb0b125176826f704ddf7c68168ecb43 /unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | |
parent | 65a7113a36f70aeca34eac29f32b24ef865cb6e4 (diff) |
Use a single Barrier instead of a collection of Notifications to reduce the thread synchronization overhead
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | 10 |
1 files changed, 3 insertions, 7 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index 54da77bcf..6bbf235cc 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -127,20 +127,16 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable> const Index blocksize = numext::maxi<Index>(PacketSize, (blocksz - (blocksz % PacketSize))); const Index numblocks = size / blocksize; - MaxSizeVector<Notification*> results(numblocks); + Barrier barrier(numblocks); for (int i = 0; i < numblocks; ++i) { - results.push_back(device.enqueue(&EvalRange<Evaluator, Index, Vectorizable>::run, evaluator, i*blocksize, (i+1)*blocksize)); + device.enqueue_with_barrier(&barrier, &EvalRange<Evaluator, Index, Vectorizable>::run, evaluator, i*blocksize, (i+1)*blocksize); } if (numblocks * blocksize < size) { EvalRange<Evaluator, Index, Vectorizable>::run(evaluator, numblocks * blocksize, size); } - for (int i = 0; i < numblocks; ++i) { - wait_until_ready(results[i]); - delete results[i]; - } - + barrier.Wait(); } evaluator.cleanup(); } |