// Copyright 2018 The Abseil Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include #include #include #include #include #include #include #include #include #include #include #include "benchmark/benchmark.h" #include "absl/base/internal/raw_logging.h" #include "absl/container/btree_map.h" #include "absl/container/btree_set.h" #include "absl/container/btree_test.h" #include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" #include "absl/container/internal/hashtable_debug.h" #include "absl/hash/hash.h" #include "absl/log/log.h" #include "absl/memory/memory.h" #include "absl/strings/cord.h" #include "absl/strings/str_format.h" #include "absl/time/time.h" namespace absl { ABSL_NAMESPACE_BEGIN namespace container_internal { namespace { constexpr size_t kBenchmarkValues = 1 << 20; // How many times we add and remove sub-batches in one batch of *AddRem // benchmarks. constexpr size_t kAddRemBatchSize = 1 << 2; // Generates n values in the range [0, 4 * n]. template std::vector GenerateValues(int n) { constexpr int kSeed = 23; return GenerateValuesWithSeed(n, 4 * n, kSeed); } // Benchmark insertion of values into a container. template void BM_InsertImpl(benchmark::State& state, bool sorted) { using V = typename remove_pair_const::type; typename KeyOfValue::type key_of_value; std::vector values = GenerateValues(kBenchmarkValues); if (sorted) { std::sort(values.begin(), values.end()); } T container(values.begin(), values.end()); // Remove and re-insert 10% of the keys per batch. const int batch_size = (kBenchmarkValues + 9) / 10; while (state.KeepRunningBatch(batch_size)) { state.PauseTiming(); const auto i = static_cast(state.iterations()); for (int j = i; j < i + batch_size; j++) { int x = j % kBenchmarkValues; container.erase(key_of_value(values[x])); } state.ResumeTiming(); for (int j = i; j < i + batch_size; j++) { int x = j % kBenchmarkValues; container.insert(values[x]); } } } template void BM_Insert(benchmark::State& state) { BM_InsertImpl(state, false); } template void BM_InsertSorted(benchmark::State& state) { BM_InsertImpl(state, true); } // Benchmark inserting the first few elements in a container. In b-tree, this is // when the root node grows. template void BM_InsertSmall(benchmark::State& state) { using V = typename remove_pair_const::type; const int kSize = 8; std::vector values = GenerateValues(kSize); T container; while (state.KeepRunningBatch(kSize)) { for (int i = 0; i < kSize; ++i) { benchmark::DoNotOptimize(container.insert(values[i])); } state.PauseTiming(); // Do not measure the time it takes to clear the container. container.clear(); state.ResumeTiming(); } } template void BM_LookupImpl(benchmark::State& state, bool sorted) { using V = typename remove_pair_const::type; typename KeyOfValue::type key_of_value; std::vector values = GenerateValues(kBenchmarkValues); if (sorted) { std::sort(values.begin(), values.end()); } T container(values.begin(), values.end()); while (state.KeepRunning()) { int idx = state.iterations() % kBenchmarkValues; benchmark::DoNotOptimize(container.find(key_of_value(values[idx]))); } } // Benchmark lookup of values in a container. template void BM_Lookup(benchmark::State& state) { BM_LookupImpl(state, false); } // Benchmark lookup of values in a full container, meaning that values // are inserted in-order to take advantage of biased insertion, which // yields a full tree. template void BM_FullLookup(benchmark::State& state) { BM_LookupImpl(state, true); } // Benchmark erasing values from a container. template void BM_Erase(benchmark::State& state) { using V = typename remove_pair_const::type; typename KeyOfValue::type key_of_value; std::vector values = GenerateValues(kBenchmarkValues); T container(values.begin(), values.end()); // Remove and re-insert 10% of the keys per batch. const int batch_size = (kBenchmarkValues + 9) / 10; while (state.KeepRunningBatch(batch_size)) { const int i = state.iterations(); for (int j = i; j < i + batch_size; j++) { int x = j % kBenchmarkValues; container.erase(key_of_value(values[x])); } state.PauseTiming(); for (int j = i; j < i + batch_size; j++) { int x = j % kBenchmarkValues; container.insert(values[x]); } state.ResumeTiming(); } } // Benchmark erasing multiple values from a container. template void BM_EraseRange(benchmark::State& state) { using V = typename remove_pair_const::type; typename KeyOfValue::type key_of_value; std::vector values = GenerateValues(kBenchmarkValues); T container(values.begin(), values.end()); // Remove and re-insert 10% of the keys per batch. const int batch_size = (kBenchmarkValues + 9) / 10; while (state.KeepRunningBatch(batch_size)) { const int i = state.iterations(); const int start_index = i % kBenchmarkValues; state.PauseTiming(); { std::vector removed; removed.reserve(batch_size); auto itr = container.find(key_of_value(values[start_index])); auto start = itr; for (int j = 0; j < batch_size; j++) { if (itr == container.end()) { state.ResumeTiming(); container.erase(start, itr); state.PauseTiming(); itr = container.begin(); start = itr; } removed.push_back(*itr++); } state.ResumeTiming(); container.erase(start, itr); state.PauseTiming(); container.insert(removed.begin(), removed.end()); } state.ResumeTiming(); } } // Predicate that erases every other element. We can't use a lambda because // C++11 doesn't support generic lambdas. // TODO(b/207389011): consider adding benchmarks that remove different fractions // of keys (e.g. 10%, 90%). struct EraseIfPred { uint64_t i = 0; template bool operator()(const T&) { return ++i % 2; } }; // Benchmark erasing multiple values from a container with a predicate. template void BM_EraseIf(benchmark::State& state) { using V = typename remove_pair_const::type; std::vector values = GenerateValues(kBenchmarkValues); // Removes half of the keys per batch. const int batch_size = (kBenchmarkValues + 1) / 2; EraseIfPred pred; while (state.KeepRunningBatch(batch_size)) { state.PauseTiming(); { T container(values.begin(), values.end()); state.ResumeTiming(); erase_if(container, pred); benchmark::DoNotOptimize(container); state.PauseTiming(); } state.ResumeTiming(); } } // Benchmark steady-state insert (into first half of range) and remove (from // second half of range), treating the container approximately like a queue with // log-time access for all elements. This benchmark does not test the case where // insertion and removal happen in the same region of the tree. This benchmark // counts two value constructors. template void BM_QueueAddRem(benchmark::State& state) { using V = typename remove_pair_const::type; typename KeyOfValue::type key_of_value; ABSL_RAW_CHECK(kBenchmarkValues % 2 == 0, "for performance"); T container; const size_t half = kBenchmarkValues / 2; std::vector remove_keys(half); std::vector add_keys(half); // We want to do the exact same work repeatedly, and the benchmark can end // after a different number of iterations depending on the speed of the // individual run so we use a large batch size here and ensure that we do // deterministic work every batch. while (state.KeepRunningBatch(half * kAddRemBatchSize)) { state.PauseTiming(); container.clear(); for (size_t i = 0; i < half; ++i) { remove_keys[i] = i; add_keys[i] = i; } constexpr int kSeed = 5; std::mt19937_64 rand(kSeed); std::shuffle(remove_keys.begin(), remove_keys.end(), rand); std::shuffle(add_keys.begin(), add_keys.end(), rand); // Note needs lazy generation of values. Generator g(kBenchmarkValues * kAddRemBatchSize); for (size_t i = 0; i < half; ++i) { container.insert(g(add_keys[i])); container.insert(g(half + remove_keys[i])); } // There are three parts each of size "half": // 1 is being deleted from [offset - half, offset) // 2 is standing [offset, offset + half) // 3 is being inserted into [offset + half, offset + 2 * half) size_t offset = 0; for (size_t i = 0; i < kAddRemBatchSize; ++i) { std::shuffle(remove_keys.begin(), remove_keys.end(), rand); std::shuffle(add_keys.begin(), add_keys.end(), rand); offset += half; state.ResumeTiming(); for (size_t idx = 0; idx < half; ++idx) { container.erase(key_of_value(g(offset - half + remove_keys[idx]))); container.insert(g(offset + half + add_keys[idx])); } state.PauseTiming(); } state.ResumeTiming(); } } // Mixed insertion and deletion in the same range using pre-constructed values. template void BM_MixedAddRem(benchmark::State& state) { using V = typename remove_pair_const::type; typename KeyOfValue::type key_of_value; ABSL_RAW_CHECK(kBenchmarkValues % 2 == 0, "for performance"); T container; // Create two random shuffles std::vector remove_keys(kBenchmarkValues); std::vector add_keys(kBenchmarkValues); // We want to do the exact same work repeatedly, and the benchmark can end // after a different number of iterations depending on the speed of the // individual run so we use a large batch size here and ensure that we do // deterministic work every batch. while (state.KeepRunningBatch(kBenchmarkValues * kAddRemBatchSize)) { state.PauseTiming(); container.clear(); constexpr int kSeed = 7; std::mt19937_64 rand(kSeed); std::vector values = GenerateValues(kBenchmarkValues * 2); // Insert the first half of the values (already in random order) container.insert(values.begin(), values.begin() + kBenchmarkValues); // Insert the first half of the values (already in random order) for (size_t i = 0; i < kBenchmarkValues; ++i) { // remove_keys and add_keys will be swapped before each round, // therefore fill add_keys here w/ the keys being inserted, so // they'll be the first to be removed. remove_keys[i] = i + kBenchmarkValues; add_keys[i] = i; } for (size_t i = 0; i < kAddRemBatchSize; ++i) { remove_keys.swap(add_keys); std::shuffle(remove_keys.begin(), remove_keys.end(), rand); std::shuffle(add_keys.begin(), add_keys.end(), rand); state.ResumeTiming(); for (size_t idx = 0; idx < kBenchmarkValues; ++idx) { container.erase(key_of_value(values[remove_keys[idx]])); container.insert(values[add_keys[idx]]); } state.PauseTiming(); } state.ResumeTiming(); } } // Insertion at end, removal from the beginning. This benchmark // counts two value constructors. // TODO(ezb): we could add a GenerateNext version of generator that could reduce // noise for string-like types. template void BM_Fifo(benchmark::State& state) { using V = typename remove_pair_const::type; T container; // Need lazy generation of values as state.max_iterations is large. Generator g(kBenchmarkValues + state.max_iterations); for (int i = 0; i < kBenchmarkValues; i++) { container.insert(g(i)); } while (state.KeepRunning()) { container.erase(container.begin()); container.insert(container.end(), g(state.iterations() + kBenchmarkValues)); } } // Iteration (forward) through the tree template void BM_FwdIter(benchmark::State& state) { using V = typename remove_pair_const::type; using R = typename T::value_type const*; std::vector values = GenerateValues(kBenchmarkValues); T container(values.begin(), values.end()); auto iter = container.end(); R r = nullptr; while (state.KeepRunning()) { if (iter == container.end()) iter = container.begin(); r = &(*iter); ++iter; } benchmark::DoNotOptimize(r); } // Benchmark random range-construction of a container. template void BM_RangeConstructionImpl(benchmark::State& state, bool sorted) { using V = typename remove_pair_const::type; std::vector values = GenerateValues(kBenchmarkValues); if (sorted) { std::sort(values.begin(), values.end()); } { T container(values.begin(), values.end()); } while (state.KeepRunning()) { T container(values.begin(), values.end()); benchmark::DoNotOptimize(container); } } template void BM_InsertRangeRandom(benchmark::State& state) { BM_RangeConstructionImpl(state, false); } template void BM_InsertRangeSorted(benchmark::State& state) { BM_RangeConstructionImpl(state, true); } #define STL_ORDERED_TYPES(value) \ using stl_set_##value = std::set; \ using stl_map_##value = std::map; \ using stl_multiset_##value = std::multiset; \ using stl_multimap_##value = std::multimap using StdString = std::string; STL_ORDERED_TYPES(int32_t); STL_ORDERED_TYPES(int64_t); STL_ORDERED_TYPES(StdString); STL_ORDERED_TYPES(Cord); STL_ORDERED_TYPES(Time); #define STL_UNORDERED_TYPES(value) \ using stl_unordered_set_##value = std::unordered_set; \ using stl_unordered_map_##value = std::unordered_map; \ using flat_hash_set_##value = flat_hash_set; \ using flat_hash_map_##value = flat_hash_map; \ using stl_unordered_multiset_##value = std::unordered_multiset; \ using stl_unordered_multimap_##value = \ std::unordered_multimap #define STL_UNORDERED_TYPES_CUSTOM_HASH(value, hash) \ using stl_unordered_set_##value = std::unordered_set; \ using stl_unordered_map_##value = std::unordered_map; \ using flat_hash_set_##value = flat_hash_set; \ using flat_hash_map_##value = flat_hash_map; \ using stl_unordered_multiset_##value = std::unordered_multiset; \ using stl_unordered_multimap_##value = \ std::unordered_multimap STL_UNORDERED_TYPES_CUSTOM_HASH(Cord, absl::Hash); STL_UNORDERED_TYPES(int32_t); STL_UNORDERED_TYPES(int64_t); STL_UNORDERED_TYPES(StdString); STL_UNORDERED_TYPES_CUSTOM_HASH(Time, absl::Hash); #define BTREE_TYPES(value) \ using btree_256_set_##value = \ btree_set, std::allocator>; \ using btree_256_map_##value = \ btree_map, \ std::allocator>>; \ using btree_256_multiset_##value = \ btree_multiset, std::allocator>; \ using btree_256_multimap_##value = \ btree_multimap, \ std::allocator>> BTREE_TYPES(int32_t); BTREE_TYPES(int64_t); BTREE_TYPES(StdString); BTREE_TYPES(Cord); BTREE_TYPES(Time); #define MY_BENCHMARK4(type, func) \ void BM_##type##_##func(benchmark::State& state) { BM_##func(state); } \ BENCHMARK(BM_##type##_##func) #define MY_BENCHMARK3_STL(type) \ MY_BENCHMARK4(type, Insert); \ MY_BENCHMARK4(type, InsertSorted); \ MY_BENCHMARK4(type, InsertSmall); \ MY_BENCHMARK4(type, Lookup); \ MY_BENCHMARK4(type, FullLookup); \ MY_BENCHMARK4(type, Erase); \ MY_BENCHMARK4(type, EraseRange); \ MY_BENCHMARK4(type, QueueAddRem); \ MY_BENCHMARK4(type, MixedAddRem); \ MY_BENCHMARK4(type, Fifo); \ MY_BENCHMARK4(type, FwdIter); \ MY_BENCHMARK4(type, InsertRangeRandom); \ MY_BENCHMARK4(type, InsertRangeSorted) #define MY_BENCHMARK3(type) \ MY_BENCHMARK4(type, EraseIf); \ MY_BENCHMARK3_STL(type) #define MY_BENCHMARK2_SUPPORTS_MULTI_ONLY(type) \ MY_BENCHMARK3_STL(stl_##type); \ MY_BENCHMARK3_STL(stl_unordered_##type); \ MY_BENCHMARK3(btree_256_##type) #define MY_BENCHMARK2(type) \ MY_BENCHMARK2_SUPPORTS_MULTI_ONLY(type); \ MY_BENCHMARK3(flat_hash_##type) // Define MULTI_TESTING to see benchmarks for multi-containers also. // // You can use --copt=-DMULTI_TESTING. #ifdef MULTI_TESTING #define MY_BENCHMARK(type) \ MY_BENCHMARK2(set_##type); \ MY_BENCHMARK2(map_##type); \ MY_BENCHMARK2_SUPPORTS_MULTI_ONLY(multiset_##type); \ MY_BENCHMARK2_SUPPORTS_MULTI_ONLY(multimap_##type) #else #define MY_BENCHMARK(type) \ MY_BENCHMARK2(set_##type); \ MY_BENCHMARK2(map_##type) #endif MY_BENCHMARK(int32_t); MY_BENCHMARK(int64_t); MY_BENCHMARK(StdString); MY_BENCHMARK(Cord); MY_BENCHMARK(Time); // Define a type whose size and cost of moving are independently customizable. // When sizeof(value_type) increases, we expect btree to no longer have as much // cache-locality advantage over STL. When cost of moving increases, we expect // btree to actually do more work than STL because it has to move values around // and STL doesn't have to. template struct BigType { BigType() : BigType(0) {} explicit BigType(int x) { std::iota(values.begin(), values.end(), x); } void Copy(const BigType& other) { for (int i = 0; i < Size && i < Copies; ++i) values[i] = other.values[i]; // If Copies > Size, do extra copies. for (int i = Size, idx = 0; i < Copies; ++i) { int64_t tmp = other.values[idx]; benchmark::DoNotOptimize(tmp); idx = idx + 1 == Size ? 0 : idx + 1; } } BigType(const BigType& other) { Copy(other); } BigType& operator=(const BigType& other) { Copy(other); return *this; } // Compare only the first Copies elements if Copies is less than Size. bool operator<(const BigType& other) const { return std::lexicographical_compare( values.begin(), values.begin() + std::min(Size, Copies), other.values.begin(), other.values.begin() + std::min(Size, Copies)); } bool operator==(const BigType& other) const { return std::equal(values.begin(), values.begin() + std::min(Size, Copies), other.values.begin()); } // Support absl::Hash. template friend State AbslHashValue(State h, const BigType& b) { for (int i = 0; i < Size && i < Copies; ++i) h = State::combine(std::move(h), b.values[i]); return h; } std::array values; }; #define BIG_TYPE_BENCHMARKS(SIZE, COPIES) \ using stl_set_size##SIZE##copies##COPIES = std::set>; \ using stl_map_size##SIZE##copies##COPIES = \ std::map, intptr_t>; \ using stl_multiset_size##SIZE##copies##COPIES = \ std::multiset>; \ using stl_multimap_size##SIZE##copies##COPIES = \ std::multimap, intptr_t>; \ using stl_unordered_set_size##SIZE##copies##COPIES = \ std::unordered_set, \ absl::Hash>>; \ using stl_unordered_map_size##SIZE##copies##COPIES = \ std::unordered_map, intptr_t, \ absl::Hash>>; \ using flat_hash_set_size##SIZE##copies##COPIES = \ flat_hash_set>; \ using flat_hash_map_size##SIZE##copies##COPIES = \ flat_hash_map, intptr_t>; \ using stl_unordered_multiset_size##SIZE##copies##COPIES = \ std::unordered_multiset, \ absl::Hash>>; \ using stl_unordered_multimap_size##SIZE##copies##COPIES = \ std::unordered_multimap, intptr_t, \ absl::Hash>>; \ using btree_256_set_size##SIZE##copies##COPIES = \ btree_set>; \ using btree_256_map_size##SIZE##copies##COPIES = \ btree_map, intptr_t>; \ using btree_256_multiset_size##SIZE##copies##COPIES = \ btree_multiset>; \ using btree_256_multimap_size##SIZE##copies##COPIES = \ btree_multimap, intptr_t>; \ MY_BENCHMARK(size##SIZE##copies##COPIES) // Define BIG_TYPE_TESTING to see benchmarks for more big types. // // You can use --copt=-DBIG_TYPE_TESTING. #ifndef NODESIZE_TESTING #ifdef BIG_TYPE_TESTING BIG_TYPE_BENCHMARKS(1, 4); BIG_TYPE_BENCHMARKS(4, 1); BIG_TYPE_BENCHMARKS(4, 4); BIG_TYPE_BENCHMARKS(1, 8); BIG_TYPE_BENCHMARKS(8, 1); BIG_TYPE_BENCHMARKS(8, 8); BIG_TYPE_BENCHMARKS(1, 16); BIG_TYPE_BENCHMARKS(16, 1); BIG_TYPE_BENCHMARKS(16, 16); BIG_TYPE_BENCHMARKS(1, 32); BIG_TYPE_BENCHMARKS(32, 1); BIG_TYPE_BENCHMARKS(32, 32); #else BIG_TYPE_BENCHMARKS(32, 32); #endif #endif // Benchmark using unique_ptrs to large value types. In order to be able to use // the same benchmark code as the other types, use a type that holds a // unique_ptr and has a copy constructor. template struct BigTypePtr { BigTypePtr() : BigTypePtr(0) {} explicit BigTypePtr(int x) { ptr = absl::make_unique>(x); } BigTypePtr(const BigTypePtr& other) { ptr = absl::make_unique>(*other.ptr); } BigTypePtr(BigTypePtr&& other) noexcept = default; BigTypePtr& operator=(const BigTypePtr& other) { ptr = absl::make_unique>(*other.ptr); } BigTypePtr& operator=(BigTypePtr&& other) noexcept = default; bool operator<(const BigTypePtr& other) const { return *ptr < *other.ptr; } bool operator==(const BigTypePtr& other) const { return *ptr == *other.ptr; } std::unique_ptr> ptr; }; template double ContainerInfo(const btree_set>& b) { const double bytes_used = b.bytes_used() + b.size() * sizeof(BigType); const double bytes_per_value = bytes_used / b.size(); BtreeContainerInfoLog(b, bytes_used, bytes_per_value); return bytes_per_value; } template double ContainerInfo(const btree_map>& b) { const double bytes_used = b.bytes_used() + b.size() * sizeof(BigType); const double bytes_per_value = bytes_used / b.size(); BtreeContainerInfoLog(b, bytes_used, bytes_per_value); return bytes_per_value; } #define BIG_TYPE_PTR_BENCHMARKS(SIZE) \ using stl_set_size##SIZE##copies##SIZE##ptr = std::set>; \ using stl_map_size##SIZE##copies##SIZE##ptr = \ std::map>; \ using stl_unordered_set_size##SIZE##copies##SIZE##ptr = \ std::unordered_set, \ absl::Hash>>; \ using stl_unordered_map_size##SIZE##copies##SIZE##ptr = \ std::unordered_map>; \ using flat_hash_set_size##SIZE##copies##SIZE##ptr = \ flat_hash_set>; \ using flat_hash_map_size##SIZE##copies##SIZE##ptr = \ flat_hash_map>; \ using btree_256_set_size##SIZE##copies##SIZE##ptr = \ btree_set>; \ using btree_256_map_size##SIZE##copies##SIZE##ptr = \ btree_map>; \ MY_BENCHMARK3_STL(stl_set_size##SIZE##copies##SIZE##ptr); \ MY_BENCHMARK3_STL(stl_unordered_set_size##SIZE##copies##SIZE##ptr); \ MY_BENCHMARK3(flat_hash_set_size##SIZE##copies##SIZE##ptr); \ MY_BENCHMARK3(btree_256_set_size##SIZE##copies##SIZE##ptr); \ MY_BENCHMARK3_STL(stl_map_size##SIZE##copies##SIZE##ptr); \ MY_BENCHMARK3_STL(stl_unordered_map_size##SIZE##copies##SIZE##ptr); \ MY_BENCHMARK3(flat_hash_map_size##SIZE##copies##SIZE##ptr); \ MY_BENCHMARK3(btree_256_map_size##SIZE##copies##SIZE##ptr) BIG_TYPE_PTR_BENCHMARKS(32); } // namespace } // namespace container_internal ABSL_NAMESPACE_END } // namespace absl