diff options
author | Eugene Zhulenev <ezhulenev@google.com> | 2019-02-13 10:20:53 -0800 |
---|---|---|
committer | Eugene Zhulenev <ezhulenev@google.com> | 2019-02-13 10:20:53 -0800 |
commit | 8c2f30c7900b8e91df6d044431f4b5cc667993c3 (patch) | |
tree | 97d9103f494ea931d665040f5791567ac4cba2fd | |
parent | bdcb5f33043c559c7100f8fd5eb55fbbd0cdfc69 (diff) |
Speedup Tensor ThreadPool RunQueu::Empty()
-rw-r--r-- | unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h | 42 |
1 files changed, 28 insertions, 14 deletions
diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h b/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h index 73928c1d4..993d9e066 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h @@ -148,32 +148,46 @@ class RunQueue { return n; } - // Size returns current queue size. + // Size returns current queue size; if NeedSizeEstimate is false, only whether + // the size is 0 is guaranteed to be correct. // Can be called by any thread at any time. - unsigned Size() const { + template <bool NeedSizeEstimate> + unsigned SizeOrNotEmpty() const { // Emptiness plays critical role in thread pool blocking. So we go to great // effort to not produce false positives (claim non-empty queue as empty). + unsigned front = front_.load(std::memory_order_acquire); for (;;) { // Capture a consistent snapshot of front/tail. - unsigned front = front_.load(std::memory_order_acquire); unsigned back = back_.load(std::memory_order_acquire); unsigned front1 = front_.load(std::memory_order_relaxed); - if (front != front1) continue; - int size = (front & kMask2) - (back & kMask2); - // Fix overflow. - if (size < 0) size += 2 * kSize; - // Order of modification in push/pop is crafted to make the queue look - // larger than it is during concurrent modifications. E.g. pop can - // decrement size before the corresponding push has incremented it. - // So the computed size can be up to kSize + 1, fix it. - if (size > static_cast<int>(kSize)) size = kSize; - return size; + if (front != front1) { + front = front1; + std::atomic_thread_fence(std::memory_order_acquire); + continue; + } + if (NeedSizeEstimate) { + int size = (front & kMask2) - (back & kMask2); + // Fix overflow. + if (size < 0) size += 2 * kSize; + // Order of modification in push/pop is crafted to make the queue look + // larger than it is during concurrent modifications. E.g. pop can + // decrement size before the corresponding push has incremented it. + // So the computed size can be up to kSize + 1, fix it. + if (size > kSize) size = kSize; + return size; + } else { + return ((front ^ back) & kMask2); + } } } + // Size returns current queue size. + // Can be called by any thread at any time. + unsigned Size() const { return SizeOrNotEmpty<true>(); } + // Empty tests whether container is empty. // Can be called by any thread at any time. - bool Empty() const { return Size() == 0; } + bool Empty() const { return SizeOrNotEmpty<false>() == 0; } // Delete all the elements from the queue. void Flush() { |