diff options
author | Vitaly Goldshteyn <goldvitaly@google.com> | 2024-05-20 11:57:11 -0700 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2024-05-20 11:57:56 -0700 |
commit | 6ab5b0aad86dc08d257f6b567611c231c6b8ac31 (patch) | |
tree | 3b627c722f43e7bac4acbaf89832c665a22bb5b2 /absl/container/internal/raw_hash_set.h | |
parent | 0128305738355d085e079bab281a7211a00a5b83 (diff) |
Move `prepare_insert` out of the line as type erased `PrepareInsertNonSoo`.
This significantly reduces binary size of big binaries and creates a single hot function instead of many cold. That is decreasing cash misses during code execution.
We also avoid size related computations for tables with no deleted slots, when resize is necessary.
PiperOrigin-RevId: 635527119
Change-Id: I763b135f1f6089051e62e348a07b33536af265ab
Diffstat (limited to 'absl/container/internal/raw_hash_set.h')
-rw-r--r-- | absl/container/internal/raw_hash_set.h | 244 |
1 files changed, 85 insertions, 159 deletions
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 1d2e2d14..a64133a7 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -1126,6 +1126,13 @@ class GrowthInfo { return static_cast<std::make_signed_t<size_t>>(growth_left_info_) > 0; } + // Returns true if the table satisfies two properties: + // 1. Guaranteed to have no kDeleted slots. + // 2. There is no growth left. + bool HasNoGrowthLeftAndNoDeleted() const { + return growth_left_info_ == 0; + } + // Returns true if table guaranteed to have no k bool HasNoDeleted() const { return static_cast<std::make_signed_t<size_t>>(growth_left_info_) >= 0; @@ -1374,6 +1381,8 @@ class CommonFields : public CommonFieldsGenerationInfo { // This is stored in the heap allocation before the control bytes. // TODO(b/289225379): experiment with moving growth_info back inline to // increase room for SOO. + size_t growth_left() const { return growth_info().GetGrowthLeft(); } + GrowthInfo& growth_info() { auto* gl_ptr = reinterpret_cast<GrowthInfo*>(control()) - 1; assert(reinterpret_cast<uintptr_t>(gl_ptr) % alignof(GrowthInfo) == 0); @@ -1933,8 +1942,7 @@ class HashSetResizeHelper { // will be no performance benefit. // It has implicit assumption that `resize` will call // `GrowSizeIntoSingleGroup*` in case `IsGrowingIntoSingleGroupApplicable`. - // Falls back to `find_first_non_full` in case of big groups, so it is - // safe to use after `rehash_and_grow_if_necessary`. + // Falls back to `find_first_non_full` in case of big groups. static FindInfo FindFirstNonFullAfterResize(const CommonFields& c, size_t old_capacity, size_t hash) { @@ -2216,14 +2224,22 @@ size_t PrepareInsertAfterSoo(size_t hash, size_t slot_size, struct PolicyFunctions { size_t slot_size; + // Returns the pointer to the hash function stored in the set. + const void* (*hash_fn)(const CommonFields& common); + // Returns the hash of the pointed-to slot. size_t (*hash_slot)(const void* hash_fn, void* slot); - // Transfer the contents of src_slot to dst_slot. + // Transfers the contents of src_slot to dst_slot. void (*transfer)(void* set, void* dst_slot, void* src_slot); - // Deallocate the backing store from common. + // Deallocates the backing store from common. void (*dealloc)(CommonFields& common, const PolicyFunctions& policy); + + // Resizes set to the new capacity. + // Arguments are used as in raw_hash_set::resize_impl. + void (*resize)(CommonFields& common, size_t new_capacity, + HashtablezInfoHandle forced_infoz); }; // ClearBackingArray clears the backing array, either modifying it in place, @@ -2261,9 +2277,26 @@ ABSL_ATTRIBUTE_NOINLINE void TransferRelocatable(void*, void* dst, void* src) { memcpy(dst, src, SizeOfSlot); } -// Type-erased version of raw_hash_set::drop_deletes_without_resize. -void DropDeletesWithoutResize(CommonFields& common, const void* hash_fn, - const PolicyFunctions& policy, void* tmp_space); +// Type erased raw_hash_set::get_hash_ref_fn for the empty hash function case. +const void* GetHashRefForEmptyHasher(const CommonFields& common); + +// Given the hash of a value not currently in the table and the first empty +// slot in the probe sequence, finds a viable slot index to insert it at. +// +// In case there's no space left, the table can be resized or rehashed +// (for tables with deleted slots, see FindInsertPositionWithGrowthOrRehash). +// +// In the case of absence of deleted slots and positive growth_left, the element +// can be inserted in the provided `target` position. +// +// When the table has deleted slots (according to GrowthInfo), the target +// position will be searched one more time using `find_first_non_full`. +// +// REQUIRES: Table is not SOO. +// REQUIRES: At least one non-full slot available. +// REQUIRES: `target` is a valid empty position to insert. +size_t PrepareInsertNonSoo(CommonFields& common, size_t hash, FindInfo target, + const PolicyFunctions& policy); // A SwissTable. // @@ -3533,7 +3566,7 @@ class raw_hash_set { // common(), old_capacity, hash) // can be called right after `resize`. void resize(size_t new_capacity) { - resize_impl(new_capacity, HashtablezInfoHandle{}); + raw_hash_set::resize_impl(common(), new_capacity, HashtablezInfoHandle{}); } // As above, except that we also accept a pre-sampled, forced infoz for @@ -3541,19 +3574,24 @@ class raw_hash_set { // store the infoz. void resize_with_soo_infoz(HashtablezInfoHandle forced_infoz) { assert(forced_infoz.IsSampled()); - resize_impl(NextCapacity(SooCapacity()), forced_infoz); + raw_hash_set::resize_impl(common(), NextCapacity(SooCapacity()), + forced_infoz); } - ABSL_ATTRIBUTE_NOINLINE void resize_impl( - size_t new_capacity, HashtablezInfoHandle forced_infoz) { + // Resizes set to the new capacity. + // It is a static function in order to use its pointer in GetPolicyFunctions. + ABSL_ATTRIBUTE_NOINLINE static void resize_impl( + CommonFields& common, size_t new_capacity, + HashtablezInfoHandle forced_infoz) { + raw_hash_set* set = reinterpret_cast<raw_hash_set*>(&common); assert(IsValidCapacity(new_capacity)); - assert(!fits_in_soo(new_capacity)); - const bool was_soo = is_soo(); - const bool had_soo_slot = was_soo && !empty(); + assert(!set->fits_in_soo(new_capacity)); + const bool was_soo = set->is_soo(); + const bool had_soo_slot = was_soo && !set->empty(); const ctrl_t soo_slot_h2 = - had_soo_slot ? static_cast<ctrl_t>(H2(hash_of(soo_slot()))) + had_soo_slot ? static_cast<ctrl_t>(H2(set->hash_of(set->soo_slot()))) : ctrl_t::kEmpty; - HashSetResizeHelper resize_helper(common(), was_soo, had_soo_slot, + HashSetResizeHelper resize_helper(common, was_soo, had_soo_slot, forced_infoz); // Initialize HashSetResizeHelper::old_heap_or_soo_. We can't do this in // HashSetResizeHelper constructor because it can't transfer slots when @@ -3561,11 +3599,12 @@ class raw_hash_set { // TODO(b/289225379): try to handle more of the SOO cases inside // InitializeSlots. See comment on cl/555990034 snapshot #63. if (PolicyTraits::transfer_uses_memcpy() || !had_soo_slot) { - resize_helper.old_heap_or_soo() = common().heap_or_soo(); + resize_helper.old_heap_or_soo() = common.heap_or_soo(); } else { - transfer(to_slot(resize_helper.old_soo_data()), soo_slot()); + set->transfer(set->to_slot(resize_helper.old_soo_data()), + set->soo_slot()); } - common().set_capacity(new_capacity); + common.set_capacity(new_capacity); // Note that `InitializeSlots` does different number initialization steps // depending on the values of `transfer_uses_memcpy` and capacities. // Refer to the comment in `InitializeSlots` for more details. @@ -3573,7 +3612,7 @@ class raw_hash_set { resize_helper.InitializeSlots<CharAlloc, sizeof(slot_type), PolicyTraits::transfer_uses_memcpy(), SooEnabled(), alignof(slot_type)>( - common(), CharAlloc(alloc_ref()), soo_slot_h2, sizeof(key_type), + common, CharAlloc(set->alloc_ref()), soo_slot_h2, sizeof(key_type), sizeof(value_type)); // In the SooEnabled() case, capacity is never 0 so we don't check. @@ -3585,30 +3624,30 @@ class raw_hash_set { // Nothing more to do in this case. if (was_soo && !had_soo_slot) return; - slot_type* new_slots = slot_array(); + slot_type* new_slots = set->slot_array(); if (grow_single_group) { if (PolicyTraits::transfer_uses_memcpy()) { // InitializeSlots did all the work. return; } if (was_soo) { - transfer(new_slots + resize_helper.SooSlotIndex(), - to_slot(resize_helper.old_soo_data())); + set->transfer(new_slots + resize_helper.SooSlotIndex(), + to_slot(resize_helper.old_soo_data())); return; } else { // We want GrowSizeIntoSingleGroup to be called here in order to make // InitializeSlots not depend on PolicyTraits. - resize_helper.GrowSizeIntoSingleGroup<PolicyTraits>(common(), - alloc_ref()); + resize_helper.GrowSizeIntoSingleGroup<PolicyTraits>(common, + set->alloc_ref()); } } else { // InitializeSlots prepares control bytes to correspond to empty table. const auto insert_slot = [&](slot_type* slot) { - size_t hash = PolicyTraits::apply(HashElement{hash_ref()}, + size_t hash = PolicyTraits::apply(HashElement{set->hash_ref()}, PolicyTraits::element(slot)); - auto target = find_first_non_full(common(), hash); - SetCtrl(common(), target.offset, H2(hash), sizeof(slot_type)); - transfer(new_slots + target.offset, slot); + auto target = find_first_non_full(common, hash); + SetCtrl(common, target.offset, H2(hash), sizeof(slot_type)); + set->transfer(new_slots + target.offset, slot); return target.probe_length; }; if (was_soo) { @@ -3623,80 +3662,13 @@ class raw_hash_set { total_probe_length += insert_slot(old_slots + i); } } - infoz().RecordRehash(total_probe_length); + common.infoz().RecordRehash(total_probe_length); } } - resize_helper.DeallocateOld<alignof(slot_type)>(CharAlloc(alloc_ref()), + resize_helper.DeallocateOld<alignof(slot_type)>(CharAlloc(set->alloc_ref()), sizeof(slot_type)); } - // Prunes control bytes to remove as many tombstones as possible. - // - // See the comment on `rehash_and_grow_if_necessary()`. - inline void drop_deletes_without_resize() { - // Stack-allocate space for swapping elements. - alignas(slot_type) unsigned char tmp[sizeof(slot_type)]; - DropDeletesWithoutResize(common(), &hash_ref(), GetPolicyFunctions(), tmp); - } - - // Called whenever the table *might* need to conditionally grow. - // - // This function is an optimization opportunity to perform a rehash even when - // growth is unnecessary, because vacating tombstones is beneficial for - // performance in the long-run. - void rehash_and_grow_if_necessary() { - const size_t cap = capacity(); - if (cap > Group::kWidth && - // Do these calculations in 64-bit to avoid overflow. - size() * uint64_t{32} <= cap * uint64_t{25}) { - // Squash DELETED without growing if there is enough capacity. - // - // Rehash in place if the current size is <= 25/32 of capacity. - // Rationale for such a high factor: 1) drop_deletes_without_resize() is - // faster than resize, and 2) it takes quite a bit of work to add - // tombstones. In the worst case, seems to take approximately 4 - // insert/erase pairs to create a single tombstone and so if we are - // rehashing because of tombstones, we can afford to rehash-in-place as - // long as we are reclaiming at least 1/8 the capacity without doing more - // than 2X the work. (Where "work" is defined to be size() for rehashing - // or rehashing in place, and 1 for an insert or erase.) But rehashing in - // place is faster per operation than inserting or even doubling the size - // of the table, so we actually afford to reclaim even less space from a - // resize-in-place. The decision is to rehash in place if we can reclaim - // at about 1/8th of the usable capacity (specifically 3/28 of the - // capacity) which means that the total cost of rehashing will be a small - // fraction of the total work. - // - // Here is output of an experiment using the BM_CacheInSteadyState - // benchmark running the old case (where we rehash-in-place only if we can - // reclaim at least 7/16*capacity) vs. this code (which rehashes in place - // if we can recover 3/32*capacity). - // - // Note that although in the worst-case number of rehashes jumped up from - // 15 to 190, but the number of operations per second is almost the same. - // - // Abridged output of running BM_CacheInSteadyState benchmark from - // raw_hash_set_benchmark. N is the number of insert/erase operations. - // - // | OLD (recover >= 7/16 | NEW (recover >= 3/32) - // size | N/s LoadFactor NRehashes | N/s LoadFactor NRehashes - // 448 | 145284 0.44 18 | 140118 0.44 19 - // 493 | 152546 0.24 11 | 151417 0.48 28 - // 538 | 151439 0.26 11 | 151152 0.53 38 - // 583 | 151765 0.28 11 | 150572 0.57 50 - // 628 | 150241 0.31 11 | 150853 0.61 66 - // 672 | 149602 0.33 12 | 150110 0.66 90 - // 717 | 149998 0.35 12 | 149531 0.70 129 - // 762 | 149836 0.37 13 | 148559 0.74 190 - // 807 | 149736 0.39 14 | 151107 0.39 14 - // 852 | 150204 0.42 15 | 151019 0.42 15 - drop_deletes_without_resize(); - } else { - // Otherwise grow the container. - resize(NextCapacity(cap)); - } - } - // Casting directly from e.g. char* to slot_type* can cause compilation errors // on objective-C. This function converts to void* first, avoiding the issue. static slot_type* to_slot(void* buf) { @@ -3817,6 +3789,7 @@ class raw_hash_set { template <class K> std::pair<iterator, bool> find_or_prepare_insert_non_soo(const K& key) { + assert(!is_soo()); prefetch_heap_block(); auto hash = hash_ref()(key); auto seq = probe(common(), hash); @@ -3833,9 +3806,10 @@ class raw_hash_set { if (ABSL_PREDICT_TRUE(mask_empty)) { size_t target = seq.offset( GetInsertionOffset(mask_empty, capacity(), hash, control())); - return { - iterator_at(prepare_insert(hash, FindInfo{target, seq.index()})), - true}; + return {iterator_at(PrepareInsertNonSoo(common(), hash, + FindInfo{target, seq.index()}, + GetPolicyFunctions())), + true}; } seq.next(); assert(seq.index() <= capacity() && "full table!"); @@ -3852,63 +3826,6 @@ class raw_hash_set { return find_or_prepare_insert_non_soo(key); } - // Given the hash of a value not currently in the table and the first empty - // slot in the probe sequence, finds the viable slot index to insert it at. - // - // In case there's no space left, the table can be resized or rehashed - // (see rehash_and_grow_if_necessary). - // - // In case of absence of deleted slots and positive growth_left, element can - // be inserted in the provided `target` position. - // - // When table has deleted slots (according to GrowthInfo), target position - // will be searched one more time using `find_first_non_full`. - // - // REQUIRES: At least one non-full slot available. - // REQUIRES: `target` is a valid empty position to insert. - size_t prepare_insert(size_t hash, FindInfo target) ABSL_ATTRIBUTE_NOINLINE { - assert(!is_soo()); - // When there are no deleted slots in the table - // and growth_left is positive, we can insert at the first - // empty slot in the probe sequence (target). - bool use_target_hint = false; - // Optimization is disabled on enabled generations. - // We have to rehash even sparse tables randomly in such mode. -#ifndef ABSL_SWISSTABLE_ENABLE_GENERATIONS - use_target_hint = growth_info().HasNoDeletedAndGrowthLeft(); -#endif - if (ABSL_PREDICT_FALSE(!use_target_hint)) { - const bool rehash_for_bug_detection = - common().should_rehash_for_bug_detection_on_insert(); - if (rehash_for_bug_detection) { - // Move to a different heap allocation in order to detect bugs. - const size_t cap = capacity(); - resize(growth_left() > 0 ? cap : NextCapacity(cap)); - } - if (!rehash_for_bug_detection && - ABSL_PREDICT_FALSE(growth_left() == 0)) { - const size_t old_capacity = capacity(); - rehash_and_grow_if_necessary(); - // NOTE: It is safe to use `FindFirstNonFullAfterResize` after - // `rehash_and_grow_if_necessary`, whether capacity changes or not. - // `rehash_and_grow_if_necessary` may *not* call `resize` - // and perform `drop_deletes_without_resize` instead. But this - // could happen only on big tables and will not change capacity. - // For big tables `FindFirstNonFullAfterResize` will always - // fallback to normal `find_first_non_full`. - target = HashSetResizeHelper::FindFirstNonFullAfterResize( - common(), old_capacity, hash); - } else { - target = find_first_non_full(common(), hash); - } - } - PrepareInsertCommon(common()); - growth_info().OverwriteControlAsFull(control()[target.offset]); - SetCtrl(common(), target.offset, H2(hash), sizeof(slot_type)); - infoz().RecordInsert(hash, target.probe_length); - return target.offset; - } - // Constructs the value in the space pointed by the iterator. This only works // after an unsuccessful find_or_prepare_insert() and before any other // modifications happen in the raw_hash_set. @@ -3949,7 +3866,7 @@ class raw_hash_set { // See `CapacityToGrowth()`. size_t growth_left() const { assert(!is_soo()); - return growth_info().GetGrowthLeft(); + return common().growth_left(); } GrowthInfo& growth_info() { @@ -4009,6 +3926,10 @@ class raw_hash_set { return settings_.template get<3>(); } + static const void* get_hash_ref_fn(const CommonFields& common) { + auto* h = reinterpret_cast<const raw_hash_set*>(&common); + return &h->hash_ref(); + } static void transfer_slot_fn(void* set, void* dst, void* src) { auto* h = static_cast<raw_hash_set*>(set); h->transfer(static_cast<slot_type*>(dst), static_cast<slot_type*>(src)); @@ -4030,6 +3951,10 @@ class raw_hash_set { static const PolicyFunctions& GetPolicyFunctions() { static constexpr PolicyFunctions value = { sizeof(slot_type), + // TODO(b/328722020): try to type erase + // for standard layout and alignof(Hash) <= alignof(CommonFields). + std::is_empty<hasher>::value ? &GetHashRefForEmptyHasher + : &raw_hash_set::get_hash_ref_fn, PolicyTraits::template get_hash_slot_fn<hasher>(), PolicyTraits::transfer_uses_memcpy() ? TransferRelocatable<sizeof(slot_type)> @@ -4037,6 +3962,7 @@ class raw_hash_set { (std::is_same<SlotAlloc, std::allocator<slot_type>>::value ? &DeallocateStandard<alignof(slot_type)> : &raw_hash_set::dealloc_fn), + &raw_hash_set::resize_impl, }; return value; } |