diff options
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | 10 |
1 files changed, 9 insertions, 1 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index 1155354cd..e0df13e78 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -137,6 +137,13 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable> { { const Index PacketSize = Vectorizable ? unpacket_traits<typename Evaluator::PacketReturnType>::size : 1; const Index size = array_prod(evaluator.dimensions()); +#if defined(EIGEN_USE_NONBLOCKING_THREAD_POOL) && defined(EIGEN_USE_COST_MODEL) + device.parallelFor(size, evaluator.costPerCoeff(Vectorizable), + EvalRange::alignBlockSize, + [&evaluator](Index first, Index last) { + EvalRange::run(&evaluator, first, last); + }); +#else size_t num_threads = device.numThreads(); #ifdef EIGEN_USE_COST_MODEL if (num_threads > 1) { @@ -163,11 +170,12 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable> { } barrier.Wait(); } +#endif // EIGEN_USE_NONBLOCKING_THREAD_POOL } evaluator.cleanup(); } }; -#endif +#endif // EIGEN_USE_THREADS // GPU: the evaluation of the expression is offloaded to a GPU. |