diff options
author | Rasmus Munk Larsen <rmlarsen@google.com> | 2019-03-06 11:54:30 -0800 |
---|---|---|
committer | Rasmus Munk Larsen <rmlarsen@google.com> | 2019-03-06 11:54:30 -0800 |
commit | 3c3f639fe25918806f6f126482bd8886ee824e47 (patch) | |
tree | 0ef235dec1b05d2c7c7f6e03966a3939ce71e2b0 | |
parent | f4ec8edea8a8396e1b744db9ea61de2c451bd15d (diff) | |
parent | 41cdc370d02cadc662cb29d20d99d282707d500c (diff) |
Merge.
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | 2 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h | 5 |
2 files changed, 6 insertions, 1 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index 1c44541bd..057e90e50 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -317,6 +317,7 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ tr // GPU: the evaluation of the expression is offloaded to a GPU. #if defined(EIGEN_USE_GPU) +#if defined(EIGEN_GPUCC) template <typename Expression, bool Vectorizable, bool Tileable> class TensorExecutor<Expression, GpuDevice, Vectorizable, Tileable> { @@ -326,7 +327,6 @@ class TensorExecutor<Expression, GpuDevice, Vectorizable, Tileable> { }; -#if defined(EIGEN_GPUCC) template <typename Evaluator, typename StorageIndex, bool Vectorizable> struct EigenMetaKernelEval { static __device__ EIGEN_ALWAYS_INLINE diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h index 49603d6c1..bd1910dcc 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h @@ -56,6 +56,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface { thread_data_[i].thread.reset( env_.CreateThread([this, i]() { WorkerLoop(i); })); } + global_steal_partition_ = EncodePartition(0, num_threads_); #ifndef EIGEN_THREAD_LOCAL // Wait for workers to initialize per_thread_map_. Otherwise we might race // with them in Schedule or CurrentThreadId. @@ -237,6 +238,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface { MaxSizeVector<ThreadData> thread_data_; MaxSizeVector<MaxSizeVector<unsigned>> all_coprimes_; MaxSizeVector<EventCount::Waiter> waiters_; + unsigned global_steal_partition_; std::atomic<unsigned> blocked_; std::atomic<bool> spinning_; std::atomic<bool> done_; @@ -354,6 +356,9 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface { Task LocalSteal() { PerThread* pt = GetPerThread(); unsigned partition = GetStealPartition(pt->thread_id); + // If thread steal partition is the same as global partition, there is no + // need to go through the steal loop twice. + if (global_steal_partition_ == partition) return Task(); unsigned start, limit; DecodePartition(partition, &start, &limit); AssertBounds(start, limit); |