diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2014-10-16 10:10:04 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2014-10-16 10:10:04 -0700 |
commit | d853adffdba52da6e1dd6b137724e4f4e783dcca (patch) | |
tree | f6ee833e04b90dfd64d048ba7355496258896208 /unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h | |
parent | bfdd9f3ac95d9a2b41e6f2ec1f7434331125b9e1 (diff) |
Avoid calling get_future() more than once on a given promise.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h | 13 |
1 files changed, 8 insertions, 5 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h index 8e4c7c11d..cf1352a31 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h @@ -198,8 +198,10 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT // this should really be numBlockAs * n_blocks; const Index num_kernel_promises = num_threads * n_blocks; std::vector<Promise> kernel_promises(num_kernel_promises); + std::vector<Future> kernel_futures(num_kernel_promises); for (int i = 0; i < kernel_promises.size(); ++i) { kernel_promises[i].set_value(); + kernel_futures[i] = kernel_promises[i].get_future(); } for (Index k_block_idx = 0; k_block_idx < k_blocks; k_block_idx++) { @@ -218,8 +220,9 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT int blockAId = (k_block_idx * m_blocks + mt_block_idx) % num_threads; for (int i = 0; i < n_blocks; ++i) { int future_id = (blockAId * n_blocks + i); - wait_until_ready(&kernel_promises[future_id]); + wait_until_ready(&kernel_futures[future_id]); kernel_promises[future_id] = Promise(); + kernel_futures[future_id] = kernel_promises[future_id].get_future(); } const packLArg arg = { blockAs[blockAId], // blockA @@ -248,7 +251,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT for (int i = num_blocks; i < num_threads; ++i) { int blockAId = (k_block_idx * m_blocks + i + m_block_idx) % num_threads; int future_id = (blockAId * n_blocks + n_block_idx); - wait_until_ready(&kernel_promises[future_id]); + wait_until_ready(&kernel_futures[future_id]); } } @@ -281,9 +284,9 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT } } - // collect the last frame of kernel futures - for (int i = 0; i < kernel_promises.size(); ++i) { - wait_until_ready(&kernel_promises[i]); + // Make sure all the kernels are done. + for (int i = 0; i < kernel_futures.size(); ++i) { + wait_until_ready(&kernel_futures[i]); } // deallocate all of the memory for both A and B's |