summaryrefslogtreecommitdiff
path: root/absl
diff options
context:
space:
mode:
authorGravatar Dmitry Vyukov <dvyukov@google.com>2023-09-20 10:26:25 -0700
committerGravatar Copybara-Service <copybara-worker@google.com>2023-09-20 10:27:09 -0700
commitc45a4393c075fb0e50e408d9b69035a42ed2617c (patch)
tree2b0e228ce7a1a564b5d5d9724257706307c606e5 /absl
parentadcaae433fe10da72bc4f8b61eaf559604b81d03 (diff)
absl:speed up Mutex::[Reader]TryLock
Tidy up Mutex::[Reader]TryLock codegen by outlining slow path and non-tail function call, and un-unrolling the loop. Current codegen: https://gist.githubusercontent.com/dvyukov/a4d353fd71ac873af9332c1340675b60/raw/226537ffa305b25a79ef3a85277fa870fee5191d/gistfile1.txt New codegen: https://gist.githubusercontent.com/dvyukov/686a094c5aa357025689764f155e5a29/raw/e3125c1cdb5669fac60faf336e2f60395e29d888/gistfile1.txt name old cpu/op new cpu/op delta BM_TryLock 18.0ns ± 0% 17.7ns ± 0% -1.64% (p=0.016 n=4+5) BM_ReaderTryLock/real_time/threads:1 17.9ns ± 0% 17.9ns ± 0% -0.10% (p=0.016 n=5+5) BM_ReaderTryLock/real_time/threads:72 9.61µs ± 8% 8.42µs ± 7% -12.37% (p=0.008 n=5+5) PiperOrigin-RevId: 567006472 Change-Id: Iea0747e71bbf2dc1f00c70a4235203071d795b99
Diffstat (limited to 'absl')
-rw-r--r--absl/synchronization/mutex.cc108
-rw-r--r--absl/synchronization/mutex.h4
2 files changed, 71 insertions, 41 deletions
diff --git a/absl/synchronization/mutex.cc b/absl/synchronization/mutex.cc
index eb4b6e54..ac5f1696 100644
--- a/absl/synchronization/mutex.cc
+++ b/absl/synchronization/mutex.cc
@@ -1582,26 +1582,36 @@ bool Mutex::AwaitCommon(const Condition& cond, KernelTimeout t) {
bool Mutex::TryLock() {
ABSL_TSAN_MUTEX_PRE_LOCK(this, __tsan_mutex_try_lock);
intptr_t v = mu_.load(std::memory_order_relaxed);
- if ((v & (kMuWriter | kMuReader | kMuEvent)) == 0 && // try fast acquire
- mu_.compare_exchange_strong(v, kMuWriter | v, std::memory_order_acquire,
- std::memory_order_relaxed)) {
- DebugOnlyLockEnter(this);
- ABSL_TSAN_MUTEX_POST_LOCK(this, __tsan_mutex_try_lock, 0);
- return true;
- }
- if ((v & kMuEvent) != 0) { // we're recording events
- if ((v & kExclusive->slow_need_zero) == 0 && // try fast acquire
- mu_.compare_exchange_strong(
- v, (kExclusive->fast_or | v) + kExclusive->fast_add,
- std::memory_order_acquire, std::memory_order_relaxed)) {
+ // Try fast acquire.
+ if (ABSL_PREDICT_TRUE((v & (kMuWriter | kMuReader | kMuEvent)) == 0)) {
+ if (ABSL_PREDICT_TRUE(mu_.compare_exchange_strong(
+ v, kMuWriter | v, std::memory_order_acquire,
+ std::memory_order_relaxed))) {
DebugOnlyLockEnter(this);
- PostSynchEvent(this, SYNCH_EV_TRYLOCK_SUCCESS);
ABSL_TSAN_MUTEX_POST_LOCK(this, __tsan_mutex_try_lock, 0);
return true;
- } else {
- PostSynchEvent(this, SYNCH_EV_TRYLOCK_FAILED);
}
+ } else if (ABSL_PREDICT_FALSE((v & kMuEvent) != 0)) {
+ // We're recording events.
+ return TryLockSlow();
+ }
+ ABSL_TSAN_MUTEX_POST_LOCK(
+ this, __tsan_mutex_try_lock | __tsan_mutex_try_lock_failed, 0);
+ return false;
+}
+
+ABSL_ATTRIBUTE_NOINLINE bool Mutex::TryLockSlow() {
+ intptr_t v = mu_.load(std::memory_order_relaxed);
+ if ((v & kExclusive->slow_need_zero) == 0 && // try fast acquire
+ mu_.compare_exchange_strong(
+ v, (kExclusive->fast_or | v) + kExclusive->fast_add,
+ std::memory_order_acquire, std::memory_order_relaxed)) {
+ DebugOnlyLockEnter(this);
+ PostSynchEvent(this, SYNCH_EV_TRYLOCK_SUCCESS);
+ ABSL_TSAN_MUTEX_POST_LOCK(this, __tsan_mutex_try_lock, 0);
+ return true;
}
+ PostSynchEvent(this, SYNCH_EV_TRYLOCK_FAILED);
ABSL_TSAN_MUTEX_POST_LOCK(
this, __tsan_mutex_try_lock | __tsan_mutex_try_lock_failed, 0);
return false;
@@ -1611,41 +1621,57 @@ bool Mutex::ReaderTryLock() {
ABSL_TSAN_MUTEX_PRE_LOCK(this,
__tsan_mutex_read_lock | __tsan_mutex_try_lock);
intptr_t v = mu_.load(std::memory_order_relaxed);
+ // Clang tends to unroll the loop when compiling with optimization.
+ // But in this case it just unnecessary increases code size.
+ // If CAS is failing due to contention, the jump cost is negligible.
+#if defined(__clang__)
+#pragma nounroll
+#endif
// The while-loops (here and below) iterate only if the mutex word keeps
- // changing (typically because the reader count changes) under the CAS. We
- // limit the number of attempts to avoid having to think about livelock.
- int loop_limit = 5;
- while ((v & (kMuWriter | kMuWait | kMuEvent)) == 0 && loop_limit != 0) {
- if (mu_.compare_exchange_strong(v, (kMuReader | v) + kMuOne,
- std::memory_order_acquire,
- std::memory_order_relaxed)) {
+ // changing (typically because the reader count changes) under the CAS.
+ // We limit the number of attempts to avoid having to think about livelock.
+ for (int loop_limit = 5; loop_limit != 0; loop_limit--) {
+ if (ABSL_PREDICT_FALSE((v & (kMuWriter | kMuWait | kMuEvent)) != 0)) {
+ break;
+ }
+ if (ABSL_PREDICT_TRUE(mu_.compare_exchange_strong(
+ v, (kMuReader | v) + kMuOne, std::memory_order_acquire,
+ std::memory_order_relaxed))) {
DebugOnlyLockEnter(this);
ABSL_TSAN_MUTEX_POST_LOCK(
this, __tsan_mutex_read_lock | __tsan_mutex_try_lock, 0);
return true;
}
- loop_limit--;
- v = mu_.load(std::memory_order_relaxed);
}
- if ((v & kMuEvent) != 0) { // we're recording events
- loop_limit = 5;
- while ((v & kShared->slow_need_zero) == 0 && loop_limit != 0) {
- if (mu_.compare_exchange_strong(v, (kMuReader | v) + kMuOne,
- std::memory_order_acquire,
- std::memory_order_relaxed)) {
- DebugOnlyLockEnter(this);
- PostSynchEvent(this, SYNCH_EV_READERTRYLOCK_SUCCESS);
- ABSL_TSAN_MUTEX_POST_LOCK(
- this, __tsan_mutex_read_lock | __tsan_mutex_try_lock, 0);
- return true;
- }
- loop_limit--;
- v = mu_.load(std::memory_order_relaxed);
- }
- if ((v & kMuEvent) != 0) {
- PostSynchEvent(this, SYNCH_EV_READERTRYLOCK_FAILED);
+ if (ABSL_PREDICT_TRUE((v & kMuEvent) == 0)) {
+ ABSL_TSAN_MUTEX_POST_LOCK(this,
+ __tsan_mutex_read_lock | __tsan_mutex_try_lock |
+ __tsan_mutex_try_lock_failed,
+ 0);
+ return false;
+ }
+ // we're recording events
+ return ReaderTryLockSlow();
+}
+
+ABSL_ATTRIBUTE_NOINLINE bool Mutex::ReaderTryLockSlow() {
+ intptr_t v = mu_.load(std::memory_order_relaxed);
+#if defined(__clang__)
+#pragma nounroll
+#endif
+ for (int loop_limit = 5; loop_limit != 0; loop_limit--) {
+ if ((v & kShared->slow_need_zero) == 0 &&
+ mu_.compare_exchange_strong(v, (kMuReader | v) + kMuOne,
+ std::memory_order_acquire,
+ std::memory_order_relaxed)) {
+ DebugOnlyLockEnter(this);
+ PostSynchEvent(this, SYNCH_EV_READERTRYLOCK_SUCCESS);
+ ABSL_TSAN_MUTEX_POST_LOCK(
+ this, __tsan_mutex_read_lock | __tsan_mutex_try_lock, 0);
+ return true;
}
}
+ PostSynchEvent(this, SYNCH_EV_READERTRYLOCK_FAILED);
ABSL_TSAN_MUTEX_POST_LOCK(this,
__tsan_mutex_read_lock | __tsan_mutex_try_lock |
__tsan_mutex_try_lock_failed,
diff --git a/absl/synchronization/mutex.h b/absl/synchronization/mutex.h
index d146b066..95726f6b 100644
--- a/absl/synchronization/mutex.h
+++ b/absl/synchronization/mutex.h
@@ -521,6 +521,10 @@ class ABSL_LOCKABLE Mutex {
int flags) ABSL_ATTRIBUTE_COLD;
// slow path release
void UnlockSlow(SynchWaitParams* waitp) ABSL_ATTRIBUTE_COLD;
+ // TryLock slow path.
+ bool TryLockSlow();
+ // ReaderTryLock slow path.
+ bool ReaderTryLockSlow();
// Common code between Await() and AwaitWithTimeout/Deadline()
bool AwaitCommon(const Condition& cond,
synchronization_internal::KernelTimeout t);