From 070f6e47b33a2909d039e620c873204f78809492 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Wed, 7 Nov 2018 13:52:16 -0800 Subject: Export of internal Abseil changes. -- 178e7a9a76fc8fcd6df6335b59139cbe644a16b9 by Jon Cohen : Import of CCTZ from GitHub. PiperOrigin-RevId: 220523164 -- 59ef14fe7034a3148f1e9cef1f128b8ca264b444 by Jon Cohen : Don't assume how much std::vector's constructors allocate in InlinedVector's test for scoped_allocator_adaptor support. PiperOrigin-RevId: 220464683 -- 6f8351be43a44a8f10bf20612b2cc744a4a911c7 by Jon Cohen : Add VS Code and some Bazel output files to absl/.gitignore PiperOrigin-RevId: 220464362 -- 43fac22f8af6b6ed55309a784a9d25d837393d0e by Abseil Team : absl: fix SpinLock::EncodeWaitCycles If a thread has ever observed or set kSpinLockSleeper, it must never leave 0 in kWaitTimeMask because at this point it is expected to wake subsequent threads. Current calculations in EncodeWaitCycles can result in 0 in kWaitTimeMask and lead to missed wakeups. This is mostly theoretical today, because the futex call needs to finish within 128 cycles (futex can return immediately without waiting, but 128 cycles still look too low for this). But this can well fire in future if we bump granularity and/or threshold for recording contention. Use kSpinLockSleeper instead of 0. PiperOrigin-RevId: 220463123 -- def9b7e3d45c99d68cc52a4429256116d7f421f2 by Abseil Team : absl: optimize SpinLock::SlowLock Currently we record contention even after the first initial spin. This leads to several performance issues: 1. If we succeed in acquiring the lock after the initial spin, overall wait time can be within tens/hundreds of nanoseconds. Recording such low wait time looks completely unnecessary and excessive. From some point of view this is not even a wait, because we did not sleep. And, for example, Mutex does not record contention in this case. In majority of cases the lock should be acquired exactly during the initial spin, yet we still go through full overhead of submitting contention. 2. Whenever a thread submits contention it also calls FUTEX_WAKE (there is no way to understand if it's necessary or not when wait value is stored in the lock). So if there are just 2 threads and a brief contention, the second thread will still call FUTEX_WAKE which is completely unnecessary overhead. Don't record contention after the initial spin wait. FWIW this also removes 2 CycleClock::Now calls and EncodeWaitCycles from the common hot path. PiperOrigin-RevId: 220379972 -- 75b0c0cb214de904ea622f81ec3f4eabdc8695b0 by Derek Mauro : Supress MSVC warnings in raw_hash_set's use of TrailingZeros and LeadingZeros. https://github.com/abseil/abseil-cpp/issues/208 PiperOrigin-RevId: 220372204 GitOrigin-RevId: 178e7a9a76fc8fcd6df6335b59139cbe644a16b9 Change-Id: I3a66af4e050810a3274e45d4e055b2aa19ffba1b --- absl/base/internal/spinlock.cc | 47 +++++++++++++++++++++------------------ absl/base/internal/spinlock.h | 2 +- absl/base/spinlock_test_common.cc | 3 ++- 3 files changed, 28 insertions(+), 24 deletions(-) (limited to 'absl/base') diff --git a/absl/base/internal/spinlock.cc b/absl/base/internal/spinlock.cc index cef149e6..28ba1af7 100644 --- a/absl/base/internal/spinlock.cc +++ b/absl/base/internal/spinlock.cc @@ -95,13 +95,9 @@ void SpinLock::InitLinkerInitializedAndCooperative() { } // Monitor the lock to see if its value changes within some time period -// (adaptive_spin_count loop iterations). A timestamp indicating -// when the thread initially started waiting for the lock is passed in via -// the initial_wait_timestamp value. The total wait time in cycles for the -// lock is returned in the wait_cycles parameter. The last value read -// from the lock is returned from the method. -uint32_t SpinLock::SpinLoop(int64_t initial_wait_timestamp, - uint32_t *wait_cycles) { +// (adaptive_spin_count loop iterations). The last value read from the lock +// is returned from the method. +uint32_t SpinLock::SpinLoop() { // We are already in the slow path of SpinLock, initialize the // adaptive_spin_count here. ABSL_CONST_INIT static absl::once_flag init_adaptive_spin_count; @@ -115,22 +111,21 @@ uint32_t SpinLock::SpinLoop(int64_t initial_wait_timestamp, do { lock_value = lockword_.load(std::memory_order_relaxed); } while ((lock_value & kSpinLockHeld) != 0 && --c > 0); - uint32_t spin_loop_wait_cycles = - EncodeWaitCycles(initial_wait_timestamp, CycleClock::Now()); - *wait_cycles = spin_loop_wait_cycles; - - return TryLockInternal(lock_value, spin_loop_wait_cycles); + return lock_value; } void SpinLock::SlowLock() { + uint32_t lock_value = SpinLoop(); + lock_value = TryLockInternal(lock_value, 0); + if ((lock_value & kSpinLockHeld) == 0) { + return; + } // The lock was not obtained initially, so this thread needs to wait for // it. Record the current timestamp in the local variable wait_start_time // so the total wait time can be stored in the lockword once this thread // obtains the lock. int64_t wait_start_time = CycleClock::Now(); - uint32_t wait_cycles; - uint32_t lock_value = SpinLoop(wait_start_time, &wait_cycles); - + uint32_t wait_cycles = 0; int lock_wait_call_count = 0; while ((lock_value & kSpinLockHeld) != 0) { // If the lock is currently held, but not marked as having a sleeper, mark @@ -170,7 +165,9 @@ void SpinLock::SlowLock() { ABSL_TSAN_MUTEX_POST_DIVERT(this, 0); // Spin again after returning from the wait routine to give this thread // some chance of obtaining the lock. - lock_value = SpinLoop(wait_start_time, &wait_cycles); + lock_value = SpinLoop(); + wait_cycles = EncodeWaitCycles(wait_start_time, CycleClock::Now()); + lock_value = TryLockInternal(lock_value, wait_cycles); } } @@ -206,14 +203,20 @@ uint32_t SpinLock::EncodeWaitCycles(int64_t wait_start_time, (wait_end_time - wait_start_time) >> PROFILE_TIMESTAMP_SHIFT; // Return a representation of the time spent waiting that can be stored in - // the lock word's upper bits. bit_cast is required as Atomic32 is signed. - const uint32_t clamped = static_cast( + // the lock word's upper bits. + uint32_t clamped = static_cast( std::min(scaled_wait_time, kMaxWaitTime) << LOCKWORD_RESERVED_SHIFT); - // bump up value if necessary to avoid returning kSpinLockSleeper. - const uint32_t after_spinlock_sleeper = - kSpinLockSleeper + (1 << LOCKWORD_RESERVED_SHIFT); - return clamped == kSpinLockSleeper ? after_spinlock_sleeper : clamped; + if (clamped == 0) { + return kSpinLockSleeper; // Just wake waiters, but don't record contention. + } + // Bump up value if necessary to avoid returning kSpinLockSleeper. + const uint32_t kMinWaitTime = + kSpinLockSleeper + (1 << LOCKWORD_RESERVED_SHIFT); + if (clamped == kSpinLockSleeper) { + return kMinWaitTime; + } + return clamped; } uint64_t SpinLock::DecodeWaitCycles(uint32_t lock_value) { diff --git a/absl/base/internal/spinlock.h b/absl/base/internal/spinlock.h index 212abc66..dcce0109 100644 --- a/absl/base/internal/spinlock.h +++ b/absl/base/internal/spinlock.h @@ -161,7 +161,7 @@ class LOCKABLE SpinLock { void InitLinkerInitializedAndCooperative(); void SlowLock() ABSL_ATTRIBUTE_COLD; void SlowUnlock(uint32_t lock_value) ABSL_ATTRIBUTE_COLD; - uint32_t SpinLoop(int64_t initial_wait_timestamp, uint32_t* wait_cycles); + uint32_t SpinLoop(); inline bool TryLockImpl() { uint32_t lock_value = lockword_.load(std::memory_order_relaxed); diff --git a/absl/base/spinlock_test_common.cc b/absl/base/spinlock_test_common.cc index 1b508848..19170813 100644 --- a/absl/base/spinlock_test_common.cc +++ b/absl/base/spinlock_test_common.cc @@ -155,7 +155,8 @@ TEST(SpinLock, WaitCyclesEncoding) { // Test corner cases int64_t start_time = time_distribution(generator); - EXPECT_EQ(0, SpinLockTest::EncodeWaitCycles(start_time, start_time)); + EXPECT_EQ(kSpinLockSleeper, + SpinLockTest::EncodeWaitCycles(start_time, start_time)); EXPECT_EQ(0, SpinLockTest::DecodeWaitCycles(0)); EXPECT_EQ(0, SpinLockTest::DecodeWaitCycles(kLockwordReservedMask)); EXPECT_EQ(kMaxCycles & ~kProfileTimestampMask, -- cgit v1.2.3