aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Eugene Zhulenev <ezhulenev@google.com>2019-02-13 10:20:53 -0800
committerGravatar Eugene Zhulenev <ezhulenev@google.com>2019-02-13 10:20:53 -0800
commit8c2f30c7900b8e91df6d044431f4b5cc667993c3 (patch)
tree97d9103f494ea931d665040f5791567ac4cba2fd
parentbdcb5f33043c559c7100f8fd5eb55fbbd0cdfc69 (diff)
Speedup Tensor ThreadPool RunQueu::Empty()
-rw-r--r--unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h42
1 files changed, 28 insertions, 14 deletions
diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h b/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h
index 73928c1d4..993d9e066 100644
--- a/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h
+++ b/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h
@@ -148,32 +148,46 @@ class RunQueue {
return n;
}
- // Size returns current queue size.
+ // Size returns current queue size; if NeedSizeEstimate is false, only whether
+ // the size is 0 is guaranteed to be correct.
// Can be called by any thread at any time.
- unsigned Size() const {
+ template <bool NeedSizeEstimate>
+ unsigned SizeOrNotEmpty() const {
// Emptiness plays critical role in thread pool blocking. So we go to great
// effort to not produce false positives (claim non-empty queue as empty).
+ unsigned front = front_.load(std::memory_order_acquire);
for (;;) {
// Capture a consistent snapshot of front/tail.
- unsigned front = front_.load(std::memory_order_acquire);
unsigned back = back_.load(std::memory_order_acquire);
unsigned front1 = front_.load(std::memory_order_relaxed);
- if (front != front1) continue;
- int size = (front & kMask2) - (back & kMask2);
- // Fix overflow.
- if (size < 0) size += 2 * kSize;
- // Order of modification in push/pop is crafted to make the queue look
- // larger than it is during concurrent modifications. E.g. pop can
- // decrement size before the corresponding push has incremented it.
- // So the computed size can be up to kSize + 1, fix it.
- if (size > static_cast<int>(kSize)) size = kSize;
- return size;
+ if (front != front1) {
+ front = front1;
+ std::atomic_thread_fence(std::memory_order_acquire);
+ continue;
+ }
+ if (NeedSizeEstimate) {
+ int size = (front & kMask2) - (back & kMask2);
+ // Fix overflow.
+ if (size < 0) size += 2 * kSize;
+ // Order of modification in push/pop is crafted to make the queue look
+ // larger than it is during concurrent modifications. E.g. pop can
+ // decrement size before the corresponding push has incremented it.
+ // So the computed size can be up to kSize + 1, fix it.
+ if (size > kSize) size = kSize;
+ return size;
+ } else {
+ return ((front ^ back) & kMask2);
+ }
}
}
+ // Size returns current queue size.
+ // Can be called by any thread at any time.
+ unsigned Size() const { return SizeOrNotEmpty<true>(); }
+
// Empty tests whether container is empty.
// Can be called by any thread at any time.
- bool Empty() const { return Size() == 0; }
+ bool Empty() const { return SizeOrNotEmpty<false>() == 0; }
// Delete all the elements from the queue.
void Flush() {