diff options
author | Abseil Team <absl-team@google.com> | 2018-12-04 11:01:12 -0800 |
---|---|---|
committer | CJ Johnson <johnsoncj@google.com> | 2018-12-04 16:27:57 -0500 |
commit | 44b0fafc62d9b8f192e8180cbe9c4b806b339d57 (patch) | |
tree | 4ada26427deee54333ac271a26c5fe363ba33cc7 /absl/synchronization/mutex_benchmark.cc | |
parent | 926bfeb9fff223429c12224b7514243886323e8d (diff) |
Export of internal Abseil changes.
--
cd076f55c1fa600131f6dda392533dfe61679fc0 by Abseil Team <absl-team@google.com>:
Internal change
PiperOrigin-RevId: 224008762
--
e05f62b01286d51044ff86ec6ef565749b9faf82 by Abseil Team <absl-team@google.com>:
Create a pow10() test helper function to compute guaranteed-precise double values of 10^x. Not all standard libraries ship bit-accurate pow() functions, causing tests to fail that rely on expected values generated by it.
PiperOrigin-RevId: 223883762
--
fd88e5e3f7ab80f7f5df9fd1488cd58b4573be69 by Abseil Team <absl-team@google.com>:
Remove some absl:: qualifications to work around inline namespace bugs on MSVC 2015.
PiperOrigin-RevId: 223869642
--
6276cfff969d596edd36a2bbaba65ee045808903 by Abseil Team <absl-team@google.com>:
Update absl/memory/CMakeLists.txt to use new functions
i.e. absl_cc_(library|test)
PiperOrigin-RevId: 223854224
--
359de9afc7a34c975fd3e0cbc52afd96637d97bd by Chris Kennelly <ckennelly@google.com>:
Mark spinlock_benchmark_common as alwayslink = 1.
PiperOrigin-RevId: 223844536
--
450cd8cbe2789a6d54ed1eb87170259bb334f8b9 by Abseil Team <absl-team@google.com>:
Support .* (pointer-to-member dereference) expressions in demangle.cc.
PiperOrigin-RevId: 223826797
--
772ca92179c3634f3e31a80bbc272ed8022e3572 by Abseil Team <absl-team@google.com>:
Fix misspellings in absl::variant comments and replace a ' with a `.
PiperOrigin-RevId: 223807911
--
35dcdc2fbf299d195658aac101887f6dcad1de2f by Abseil Team <absl-team@google.com>:
Bug fix in CMakeLists.txt file (SRCS --> HDRS).
The compressed_tuple header-only library is being defined
with the SRCS parameter instead of the HDRS parameter and
this has been observed to cause some builds on some platforms
to attempt to create a static library from it which fails
since there are no .cc sources.
PiperOrigin-RevId: 223805367
--
4a57a3d2045bb137c0c97958e45ce425190b8d3e by Chris Kennelly <ckennelly@google.com>:
Add test that absl::make_unique value initializes memory.
PiperOrigin-RevId: 223801819
--
dfe8289d7f4dcc6bb568a26aaf192a89e896bdfd by Chris Kennelly <ckennelly@google.com>:
SpinLock: Use exchange to avoid missing wakeups.
The default fast path for SpinLock::Unlock does not use an atomic. If the
SpinLock becomes contended while we are unlocking between lockword_.load and
lockword_.store, we will fail to wake up the new waiter. This can cause
unexpected latency spikes.
PiperOrigin-RevId: 223800369
--
9b9d35df786482f0016f77dd31691eff81503d23 by Abseil Team <absl-team@google.com>:
Update absl/hash/CMakeLists.txt to use new functions
i.e. absl_cc_(library|test)
PiperOrigin-RevId: 223755819
--
c2014e2704b87e7cdce2d2a0287c7e2397752296 by Abseil Team <absl-team@google.com>:
Update absl/debugging/CMakeLists.txt to use new functions
i.e. absl_cc_(library|test)
PiperOrigin-RevId: 223751986
--
d83a4e09126400e3fd80645cb03ee558f532271e by Derek Mauro <dmauro@google.com>:
Cleanup synchronization benchmarks.
PiperOrigin-RevId: 223589416
--
fad140b473586531b5b12843f942ec27dfcf5e93 by CJ Johnson <johnsoncj@google.com>:
Makes unifies the order of forward_iterator and input_iterator overloads
PiperOrigin-RevId: 223580660
--
6cd7c96faa7cc5f79f574e35a1b13837ef187d05 by Abseil Team <absl-team@google.com>:
Internal Change.
PiperOrigin-RevId: 223561629
--
bd2e545356b0f548af0e3c14bb2f7f0e712e49d0 by Shaindel Schwartz <shaindel@google.com>:
Remove misleading comments. try_emplace() does not exist for the hash_set containers.
PiperOrigin-RevId: 223543089
--
0cd380a53b587eb7aacc4003a4a3bbb6c78d7c10 by Derek Mauro <dmauro@google.com>:
Internal change
PiperOrigin-RevId: 223512551
--
7156dfee599cb72e9adddfe0e6ae07a95ddf10bb by Greg Miller <jgm@google.com>:
Fixes UB that would result from constructing, multiplying, or dividing a
Duration with a double "NaN" value. This CL changes the absl::Duration
*implementation* to return an InfiniteDuration value that has the same sign as
the given NaN.
PiperOrigin-RevId: 223407499
--
196b7d18609958267951882baf7f9429e49bcafa by CJ Johnson <johnsoncj@google.com>:
Addresses NVCC+MSVC compilation bug where `inlined_capacity()` was not considered valid in constexpr
PiperOrigin-RevId: 223397718
GitOrigin-RevId: cd076f55c1fa600131f6dda392533dfe61679fc0
Change-Id: I5423ca6470f661a7c6f73aa8fee49990446f157f
Diffstat (limited to 'absl/synchronization/mutex_benchmark.cc')
-rw-r--r-- | absl/synchronization/mutex_benchmark.cc | 151 |
1 files changed, 140 insertions, 11 deletions
diff --git a/absl/synchronization/mutex_benchmark.cc b/absl/synchronization/mutex_benchmark.cc index 1e019e00..2652bb97 100644 --- a/absl/synchronization/mutex_benchmark.cc +++ b/absl/synchronization/mutex_benchmark.cc @@ -12,16 +12,154 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include <cstdint> +#include <mutex> // NOLINT(build/c++11) #include <vector> -#include "benchmark/benchmark.h" -#include "absl/base/internal/sysinfo.h" +#include "absl/base/internal/cycleclock.h" +#include "absl/base/internal/spinlock.h" #include "absl/synchronization/blocking_counter.h" #include "absl/synchronization/internal/thread_pool.h" #include "absl/synchronization/mutex.h" +#include "benchmark/benchmark.h" namespace { +void BM_Mutex(benchmark::State& state) { + static absl::Mutex* mu = new absl::Mutex; + for (auto _ : state) { + absl::MutexLock lock(mu); + } +} +BENCHMARK(BM_Mutex)->UseRealTime()->Threads(1)->ThreadPerCpu(); + +static void DelayNs(int64_t ns, int* data) { + int64_t end = absl::base_internal::CycleClock::Now() + + ns * absl::base_internal::CycleClock::Frequency() / 1e9; + while (absl::base_internal::CycleClock::Now() < end) { + ++(*data); + benchmark::DoNotOptimize(*data); + } +} + +template <typename MutexType> +class RaiiLocker { + public: + explicit RaiiLocker(MutexType* mu) : mu_(mu) { mu_->Lock(); } + ~RaiiLocker() { mu_->Unlock(); } + private: + MutexType* mu_; +}; + +template <> +class RaiiLocker<std::mutex> { + public: + explicit RaiiLocker(std::mutex* mu) : mu_(mu) { mu_->lock(); } + ~RaiiLocker() { mu_->unlock(); } + private: + std::mutex* mu_; +}; + +template <typename MutexType> +void BM_Contended(benchmark::State& state) { + struct Shared { + MutexType mu; + int data = 0; + }; + static auto* shared = new Shared; + int local = 0; + for (auto _ : state) { + // Here we model both local work outside of the critical section as well as + // some work inside of the critical section. The idea is to capture some + // more or less realisitic contention levels. + // If contention is too low, the benchmark won't measure anything useful. + // If contention is unrealistically high, the benchmark will favor + // bad mutex implementations that block and otherwise distract threads + // from the mutex and shared state for as much as possible. + // To achieve this amount of local work is multiplied by number of threads + // to keep ratio between local work and critical section approximately + // equal regardless of number of threads. + DelayNs(100 * state.threads, &local); + RaiiLocker<MutexType> locker(&shared->mu); + DelayNs(state.range(0), &shared->data); + } +} + +BENCHMARK_TEMPLATE(BM_Contended, absl::Mutex) + ->UseRealTime() + // ThreadPerCpu poorly handles non-power-of-two CPU counts. + ->Threads(1) + ->Threads(2) + ->Threads(4) + ->Threads(6) + ->Threads(8) + ->Threads(12) + ->Threads(16) + ->Threads(24) + ->Threads(32) + ->Threads(48) + ->Threads(64) + ->Threads(96) + ->Threads(128) + ->Threads(192) + ->Threads(256) + // Some empirically chosen amounts of work in critical section. + // 1 is low contention, 200 is high contention and few values in between. + ->Arg(1) + ->Arg(20) + ->Arg(50) + ->Arg(200); + +BENCHMARK_TEMPLATE(BM_Contended, absl::base_internal::SpinLock) + ->UseRealTime() + // ThreadPerCpu poorly handles non-power-of-two CPU counts. + ->Threads(1) + ->Threads(2) + ->Threads(4) + ->Threads(6) + ->Threads(8) + ->Threads(12) + ->Threads(16) + ->Threads(24) + ->Threads(32) + ->Threads(48) + ->Threads(64) + ->Threads(96) + ->Threads(128) + ->Threads(192) + ->Threads(256) + // Some empirically chosen amounts of work in critical section. + // 1 is low contention, 200 is high contention and few values in between. + ->Arg(1) + ->Arg(20) + ->Arg(50) + ->Arg(200); + +BENCHMARK_TEMPLATE(BM_Contended, std::mutex) + ->UseRealTime() + // ThreadPerCpu poorly handles non-power-of-two CPU counts. + ->Threads(1) + ->Threads(2) + ->Threads(4) + ->Threads(6) + ->Threads(8) + ->Threads(12) + ->Threads(16) + ->Threads(24) + ->Threads(32) + ->Threads(48) + ->Threads(64) + ->Threads(96) + ->Threads(128) + ->Threads(192) + ->Threads(256) + // Some empirically chosen amounts of work in critical section. + // 1 is low contention, 200 is high contention and few values in between. + ->Arg(1) + ->Arg(20) + ->Arg(50) + ->Arg(200); + // Measure the overhead of conditions on mutex release (when they must be // evaluated). Mutex has (some) support for equivalence classes allowing // Conditions with the same function/argument to potentially not be multiply @@ -82,13 +220,4 @@ constexpr int kMaxConditionWaiters = 1024; #endif BENCHMARK(BM_ConditionWaiters)->RangePair(0, 2, 1, kMaxConditionWaiters); -void BM_ContendedMutex(benchmark::State& state) { - static absl::Mutex* mu = new absl::Mutex; - for (auto _ : state) { - absl::MutexLock lock(mu); - } -} -BENCHMARK(BM_ContendedMutex)->Threads(1); -BENCHMARK(BM_ContendedMutex)->ThreadPerCpu(); - } // namespace |