aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2016-05-12 14:07:22 -0700
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2016-05-12 14:07:22 -0700
commite55deb21c59109f3ed2ade858031116503b2c313 (patch)
treeb369d3ded141916c24ad6464001b9c951bdb8d1d /unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
parentae9688f3139579b9e87560ad48e62d1205fb3eb3 (diff)
Improvements to parallelFor.
Move some scalar functors from TensorFunctors. to Eigen core.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h10
1 files changed, 9 insertions, 1 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index 1155354cd..e0df13e78 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -137,6 +137,13 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable> {
{
const Index PacketSize = Vectorizable ? unpacket_traits<typename Evaluator::PacketReturnType>::size : 1;
const Index size = array_prod(evaluator.dimensions());
+#if defined(EIGEN_USE_NONBLOCKING_THREAD_POOL) && defined(EIGEN_USE_COST_MODEL)
+ device.parallelFor(size, evaluator.costPerCoeff(Vectorizable),
+ EvalRange::alignBlockSize,
+ [&evaluator](Index first, Index last) {
+ EvalRange::run(&evaluator, first, last);
+ });
+#else
size_t num_threads = device.numThreads();
#ifdef EIGEN_USE_COST_MODEL
if (num_threads > 1) {
@@ -163,11 +170,12 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable> {
}
barrier.Wait();
}
+#endif // EIGEN_USE_NONBLOCKING_THREAD_POOL
}
evaluator.cleanup();
}
};
-#endif
+#endif // EIGEN_USE_THREADS
// GPU: the evaluation of the expression is offloaded to a GPU.