Improvements to parallelFor.

Move some scalar functors from TensorFunctors. to Eigen core.
author: Rasmus Munk Larsen <rmlarsen@google.com> 2016-05-12 14:07:22 -0700
committer: Rasmus Munk Larsen <rmlarsen@google.com> 2016-05-12 14:07:22 -0700
commit: e55deb21c59109f3ed2ade858031116503b2c313 (patch)
tree: b369d3ded141916c24ad6464001b9c951bdb8d1d /unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
parent: ae9688f3139579b9e87560ad48e62d1205fb3eb3 (diff)
1 files changed, 9 insertions, 1 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index 1155354cd..e0df13e78 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -137,6 +137,13 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable> {
     {
       const Index PacketSize = Vectorizable ? unpacket_traits<typename Evaluator::PacketReturnType>::size : 1;
       const Index size = array_prod(evaluator.dimensions());
+#if defined(EIGEN_USE_NONBLOCKING_THREAD_POOL) && defined(EIGEN_USE_COST_MODEL)
+      device.parallelFor(size, evaluator.costPerCoeff(Vectorizable),
+                         EvalRange::alignBlockSize,
+                         [&evaluator](Index first, Index last) {
+                           EvalRange::run(&evaluator, first, last);
+                         });
+#else
       size_t num_threads = device.numThreads();
 #ifdef EIGEN_USE_COST_MODEL
       if (num_threads > 1) {
@@ -163,11 +170,12 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable> {
         }
         barrier.Wait();
       }
+#endif  // EIGEN_USE_NONBLOCKING_THREAD_POOL
     }
     evaluator.cleanup();
   }
 };
-#endif
+#endif  // EIGEN_USE_THREADS
 
 
 // GPU: the evaluation of the expression is offloaded to a GPU.
author	Rasmus Munk Larsen <rmlarsen@google.com>	2016-05-12 14:07:22 -0700
committer	Rasmus Munk Larsen <rmlarsen@google.com>	2016-05-12 14:07:22 -0700
commit	e55deb21c59109f3ed2ade858031116503b2c313 (patch)
tree	b369d3ded141916c24ad6464001b9c951bdb8d1d /unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
parent	ae9688f3139579b9e87560ad48e62d1205fb3eb3 (diff)