summaryrefslogtreecommitdiff
path: root/absl/synchronization/mutex.cc
diff options
context:
space:
mode:
authorGravatar Abseil Team <absl-team@google.com>2022-05-17 01:42:50 -0700
committerGravatar Copybara-Service <copybara-worker@google.com>2022-05-17 01:43:44 -0700
commitaac2279f22eef04d01fc42e66fc183a32f08a9b4 (patch)
tree76ccdb36fa16672b0e230531d784167c0645afc0 /absl/synchronization/mutex.cc
parent63607288c14f87999e2b3914dd64e690d75f38f9 (diff)
absl: fix live-lock in CondVar
CondVar::WaitWithTimeout can live-lock when timeout is racing with Signal/SignalAll and Signal/SignalAll thread is not scheduled due to priorities, affinity or other scheduler artifacts. This could lead to stalls of up to tens of seconds in some cases. PiperOrigin-RevId: 449159670 Change-Id: I64bbd277c1f91964cfba3306ba8a80eeadf85f64
Diffstat (limited to 'absl/synchronization/mutex.cc')
-rw-r--r--absl/synchronization/mutex.cc17
1 files changed, 17 insertions, 0 deletions
diff --git a/absl/synchronization/mutex.cc b/absl/synchronization/mutex.cc
index 821047fd..52e2455d 100644
--- a/absl/synchronization/mutex.cc
+++ b/absl/synchronization/mutex.cc
@@ -2575,6 +2575,23 @@ bool CondVar::WaitCommon(Mutex *mutex, KernelTimeout t) {
while (waitp.thread->state.load(std::memory_order_acquire) ==
PerThreadSynch::kQueued) {
if (!Mutex::DecrementSynchSem(mutex, waitp.thread, t)) {
+ // DecrementSynchSem returned due to timeout.
+ // Now we will either (1) remove ourselves from the wait list in Remove
+ // below, in which case Remove will set thread.state = kAvailable and
+ // we will not call DecrementSynchSem again; or (2) Signal/SignalAll
+ // has removed us concurrently and is calling Wakeup, which will set
+ // thread.state = kAvailable and post to the semaphore.
+ // It's important to reset the timeout for the case (2) because otherwise
+ // we can live-lock in this loop since DecrementSynchSem will always
+ // return immediately due to timeout, but Signal/SignalAll is not
+ // necessary set thread.state = kAvailable yet (and is not scheduled
+ // due to thread priorities or other scheduler artifacts).
+ // Note this could also be resolved if Signal/SignalAll would set
+ // thread.state = kAvailable while holding the wait list spin lock.
+ // But this can't be easily done for SignalAll since it grabs the whole
+ // wait list with a single compare-exchange and does not really grab
+ // the spin lock.
+ t = KernelTimeout::Never();
this->Remove(waitp.thread);
rc = true;
}