diff options
author | Dmitry Vyukov <dvyukov@google.com> | 2023-09-20 10:26:25 -0700 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2023-09-20 10:27:09 -0700 |
commit | c45a4393c075fb0e50e408d9b69035a42ed2617c (patch) | |
tree | 2b0e228ce7a1a564b5d5d9724257706307c606e5 /absl/synchronization | |
parent | adcaae433fe10da72bc4f8b61eaf559604b81d03 (diff) |
absl:speed up Mutex::[Reader]TryLock
Tidy up Mutex::[Reader]TryLock codegen by outlining slow path
and non-tail function call, and un-unrolling the loop.
Current codegen:
https://gist.githubusercontent.com/dvyukov/a4d353fd71ac873af9332c1340675b60/raw/226537ffa305b25a79ef3a85277fa870fee5191d/gistfile1.txt
New codegen:
https://gist.githubusercontent.com/dvyukov/686a094c5aa357025689764f155e5a29/raw/e3125c1cdb5669fac60faf336e2f60395e29d888/gistfile1.txt
name old cpu/op new cpu/op delta
BM_TryLock 18.0ns ± 0% 17.7ns ± 0% -1.64% (p=0.016 n=4+5)
BM_ReaderTryLock/real_time/threads:1 17.9ns ± 0% 17.9ns ± 0% -0.10% (p=0.016 n=5+5)
BM_ReaderTryLock/real_time/threads:72 9.61µs ± 8% 8.42µs ± 7% -12.37% (p=0.008 n=5+5)
PiperOrigin-RevId: 567006472
Change-Id: Iea0747e71bbf2dc1f00c70a4235203071d795b99
Diffstat (limited to 'absl/synchronization')
-rw-r--r-- | absl/synchronization/mutex.cc | 108 | ||||
-rw-r--r-- | absl/synchronization/mutex.h | 4 |
2 files changed, 71 insertions, 41 deletions
diff --git a/absl/synchronization/mutex.cc b/absl/synchronization/mutex.cc index eb4b6e54..ac5f1696 100644 --- a/absl/synchronization/mutex.cc +++ b/absl/synchronization/mutex.cc @@ -1582,26 +1582,36 @@ bool Mutex::AwaitCommon(const Condition& cond, KernelTimeout t) { bool Mutex::TryLock() { ABSL_TSAN_MUTEX_PRE_LOCK(this, __tsan_mutex_try_lock); intptr_t v = mu_.load(std::memory_order_relaxed); - if ((v & (kMuWriter | kMuReader | kMuEvent)) == 0 && // try fast acquire - mu_.compare_exchange_strong(v, kMuWriter | v, std::memory_order_acquire, - std::memory_order_relaxed)) { - DebugOnlyLockEnter(this); - ABSL_TSAN_MUTEX_POST_LOCK(this, __tsan_mutex_try_lock, 0); - return true; - } - if ((v & kMuEvent) != 0) { // we're recording events - if ((v & kExclusive->slow_need_zero) == 0 && // try fast acquire - mu_.compare_exchange_strong( - v, (kExclusive->fast_or | v) + kExclusive->fast_add, - std::memory_order_acquire, std::memory_order_relaxed)) { + // Try fast acquire. + if (ABSL_PREDICT_TRUE((v & (kMuWriter | kMuReader | kMuEvent)) == 0)) { + if (ABSL_PREDICT_TRUE(mu_.compare_exchange_strong( + v, kMuWriter | v, std::memory_order_acquire, + std::memory_order_relaxed))) { DebugOnlyLockEnter(this); - PostSynchEvent(this, SYNCH_EV_TRYLOCK_SUCCESS); ABSL_TSAN_MUTEX_POST_LOCK(this, __tsan_mutex_try_lock, 0); return true; - } else { - PostSynchEvent(this, SYNCH_EV_TRYLOCK_FAILED); } + } else if (ABSL_PREDICT_FALSE((v & kMuEvent) != 0)) { + // We're recording events. + return TryLockSlow(); + } + ABSL_TSAN_MUTEX_POST_LOCK( + this, __tsan_mutex_try_lock | __tsan_mutex_try_lock_failed, 0); + return false; +} + +ABSL_ATTRIBUTE_NOINLINE bool Mutex::TryLockSlow() { + intptr_t v = mu_.load(std::memory_order_relaxed); + if ((v & kExclusive->slow_need_zero) == 0 && // try fast acquire + mu_.compare_exchange_strong( + v, (kExclusive->fast_or | v) + kExclusive->fast_add, + std::memory_order_acquire, std::memory_order_relaxed)) { + DebugOnlyLockEnter(this); + PostSynchEvent(this, SYNCH_EV_TRYLOCK_SUCCESS); + ABSL_TSAN_MUTEX_POST_LOCK(this, __tsan_mutex_try_lock, 0); + return true; } + PostSynchEvent(this, SYNCH_EV_TRYLOCK_FAILED); ABSL_TSAN_MUTEX_POST_LOCK( this, __tsan_mutex_try_lock | __tsan_mutex_try_lock_failed, 0); return false; @@ -1611,41 +1621,57 @@ bool Mutex::ReaderTryLock() { ABSL_TSAN_MUTEX_PRE_LOCK(this, __tsan_mutex_read_lock | __tsan_mutex_try_lock); intptr_t v = mu_.load(std::memory_order_relaxed); + // Clang tends to unroll the loop when compiling with optimization. + // But in this case it just unnecessary increases code size. + // If CAS is failing due to contention, the jump cost is negligible. +#if defined(__clang__) +#pragma nounroll +#endif // The while-loops (here and below) iterate only if the mutex word keeps - // changing (typically because the reader count changes) under the CAS. We - // limit the number of attempts to avoid having to think about livelock. - int loop_limit = 5; - while ((v & (kMuWriter | kMuWait | kMuEvent)) == 0 && loop_limit != 0) { - if (mu_.compare_exchange_strong(v, (kMuReader | v) + kMuOne, - std::memory_order_acquire, - std::memory_order_relaxed)) { + // changing (typically because the reader count changes) under the CAS. + // We limit the number of attempts to avoid having to think about livelock. + for (int loop_limit = 5; loop_limit != 0; loop_limit--) { + if (ABSL_PREDICT_FALSE((v & (kMuWriter | kMuWait | kMuEvent)) != 0)) { + break; + } + if (ABSL_PREDICT_TRUE(mu_.compare_exchange_strong( + v, (kMuReader | v) + kMuOne, std::memory_order_acquire, + std::memory_order_relaxed))) { DebugOnlyLockEnter(this); ABSL_TSAN_MUTEX_POST_LOCK( this, __tsan_mutex_read_lock | __tsan_mutex_try_lock, 0); return true; } - loop_limit--; - v = mu_.load(std::memory_order_relaxed); } - if ((v & kMuEvent) != 0) { // we're recording events - loop_limit = 5; - while ((v & kShared->slow_need_zero) == 0 && loop_limit != 0) { - if (mu_.compare_exchange_strong(v, (kMuReader | v) + kMuOne, - std::memory_order_acquire, - std::memory_order_relaxed)) { - DebugOnlyLockEnter(this); - PostSynchEvent(this, SYNCH_EV_READERTRYLOCK_SUCCESS); - ABSL_TSAN_MUTEX_POST_LOCK( - this, __tsan_mutex_read_lock | __tsan_mutex_try_lock, 0); - return true; - } - loop_limit--; - v = mu_.load(std::memory_order_relaxed); - } - if ((v & kMuEvent) != 0) { - PostSynchEvent(this, SYNCH_EV_READERTRYLOCK_FAILED); + if (ABSL_PREDICT_TRUE((v & kMuEvent) == 0)) { + ABSL_TSAN_MUTEX_POST_LOCK(this, + __tsan_mutex_read_lock | __tsan_mutex_try_lock | + __tsan_mutex_try_lock_failed, + 0); + return false; + } + // we're recording events + return ReaderTryLockSlow(); +} + +ABSL_ATTRIBUTE_NOINLINE bool Mutex::ReaderTryLockSlow() { + intptr_t v = mu_.load(std::memory_order_relaxed); +#if defined(__clang__) +#pragma nounroll +#endif + for (int loop_limit = 5; loop_limit != 0; loop_limit--) { + if ((v & kShared->slow_need_zero) == 0 && + mu_.compare_exchange_strong(v, (kMuReader | v) + kMuOne, + std::memory_order_acquire, + std::memory_order_relaxed)) { + DebugOnlyLockEnter(this); + PostSynchEvent(this, SYNCH_EV_READERTRYLOCK_SUCCESS); + ABSL_TSAN_MUTEX_POST_LOCK( + this, __tsan_mutex_read_lock | __tsan_mutex_try_lock, 0); + return true; } } + PostSynchEvent(this, SYNCH_EV_READERTRYLOCK_FAILED); ABSL_TSAN_MUTEX_POST_LOCK(this, __tsan_mutex_read_lock | __tsan_mutex_try_lock | __tsan_mutex_try_lock_failed, diff --git a/absl/synchronization/mutex.h b/absl/synchronization/mutex.h index d146b066..95726f6b 100644 --- a/absl/synchronization/mutex.h +++ b/absl/synchronization/mutex.h @@ -521,6 +521,10 @@ class ABSL_LOCKABLE Mutex { int flags) ABSL_ATTRIBUTE_COLD; // slow path release void UnlockSlow(SynchWaitParams* waitp) ABSL_ATTRIBUTE_COLD; + // TryLock slow path. + bool TryLockSlow(); + // ReaderTryLock slow path. + bool ReaderTryLockSlow(); // Common code between Await() and AwaitWithTimeout/Deadline() bool AwaitCommon(const Condition& cond, synchronization_internal::KernelTimeout t); |