From 002cf0d1c979857e057879d8c84b92439dbcc90d Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 22 Mar 2016 15:24:23 -0700 Subject: Use a single Barrier instead of a collection of Notifications to reduce the thread synchronization overhead --- unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index 54da77bcf..6bbf235cc 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -127,20 +127,16 @@ class TensorExecutor const Index blocksize = numext::maxi(PacketSize, (blocksz - (blocksz % PacketSize))); const Index numblocks = size / blocksize; - MaxSizeVector results(numblocks); + Barrier barrier(numblocks); for (int i = 0; i < numblocks; ++i) { - results.push_back(device.enqueue(&EvalRange::run, evaluator, i*blocksize, (i+1)*blocksize)); + device.enqueue_with_barrier(&barrier, &EvalRange::run, evaluator, i*blocksize, (i+1)*blocksize); } if (numblocks * blocksize < size) { EvalRange::run(evaluator, numblocks * blocksize, size); } - for (int i = 0; i < numblocks; ++i) { - wait_until_ready(results[i]); - delete results[i]; - } - + barrier.Wait(); } evaluator.cleanup(); } -- cgit v1.2.3