diff options
Diffstat (limited to 'absl/container/internal/raw_hash_set.h')
-rw-r--r-- | absl/container/internal/raw_hash_set.h | 244 |
1 files changed, 85 insertions, 159 deletions
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 1d2e2d14..a64133a7 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -1126,6 +1126,13 @@ class GrowthInfo { return static_cast<std::make_signed_t<size_t>>(growth_left_info_) > 0; } + // Returns true if the table satisfies two properties: + // 1. Guaranteed to have no kDeleted slots. + // 2. There is no growth left. + bool HasNoGrowthLeftAndNoDeleted() const { + return growth_left_info_ == 0; + } + // Returns true if table guaranteed to have no k bool HasNoDeleted() const { return static_cast<std::make_signed_t<size_t>>(growth_left_info_) >= 0; @@ -1374,6 +1381,8 @@ class CommonFields : public CommonFieldsGenerationInfo { // This is stored in the heap allocation before the control bytes. // TODO(b/289225379): experiment with moving growth_info back inline to // increase room for SOO. + size_t growth_left() const { return growth_info().GetGrowthLeft(); } + GrowthInfo& growth_info() { auto* gl_ptr = reinterpret_cast<GrowthInfo*>(control()) - 1; assert(reinterpret_cast<uintptr_t>(gl_ptr) % alignof(GrowthInfo) == 0); @@ -1933,8 +1942,7 @@ class HashSetResizeHelper { // will be no performance benefit. // It has implicit assumption that `resize` will call // `GrowSizeIntoSingleGroup*` in case `IsGrowingIntoSingleGroupApplicable`. - // Falls back to `find_first_non_full` in case of big groups, so it is - // safe to use after `rehash_and_grow_if_necessary`. + // Falls back to `find_first_non_full` in case of big groups. static FindInfo FindFirstNonFullAfterResize(const CommonFields& c, size_t old_capacity, size_t hash) { @@ -2216,14 +2224,22 @@ size_t PrepareInsertAfterSoo(size_t hash, size_t slot_size, struct PolicyFunctions { size_t slot_size; + // Returns the pointer to the hash function stored in the set. + const void* (*hash_fn)(const CommonFields& common); + // Returns the hash of the pointed-to slot. size_t (*hash_slot)(const void* hash_fn, void* slot); - // Transfer the contents of src_slot to dst_slot. + // Transfers the contents of src_slot to dst_slot. void (*transfer)(void* set, void* dst_slot, void* src_slot); - // Deallocate the backing store from common. + // Deallocates the backing store from common. void (*dealloc)(CommonFields& common, const PolicyFunctions& policy); + + // Resizes set to the new capacity. + // Arguments are used as in raw_hash_set::resize_impl. + void (*resize)(CommonFields& common, size_t new_capacity, + HashtablezInfoHandle forced_infoz); }; // ClearBackingArray clears the backing array, either modifying it in place, @@ -2261,9 +2277,26 @@ ABSL_ATTRIBUTE_NOINLINE void TransferRelocatable(void*, void* dst, void* src) { memcpy(dst, src, SizeOfSlot); } -// Type-erased version of raw_hash_set::drop_deletes_without_resize. -void DropDeletesWithoutResize(CommonFields& common, const void* hash_fn, - const PolicyFunctions& policy, void* tmp_space); +// Type erased raw_hash_set::get_hash_ref_fn for the empty hash function case. +const void* GetHashRefForEmptyHasher(const CommonFields& common); + +// Given the hash of a value not currently in the table and the first empty +// slot in the probe sequence, finds a viable slot index to insert it at. +// +// In case there's no space left, the table can be resized or rehashed +// (for tables with deleted slots, see FindInsertPositionWithGrowthOrRehash). +// +// In the case of absence of deleted slots and positive growth_left, the element +// can be inserted in the provided `target` position. +// +// When the table has deleted slots (according to GrowthInfo), the target +// position will be searched one more time using `find_first_non_full`. +// +// REQUIRES: Table is not SOO. +// REQUIRES: At least one non-full slot available. +// REQUIRES: `target` is a valid empty position to insert. +size_t PrepareInsertNonSoo(CommonFields& common, size_t hash, FindInfo target, + const PolicyFunctions& policy); // A SwissTable. // @@ -3533,7 +3566,7 @@ class raw_hash_set { // common(), old_capacity, hash) // can be called right after `resize`. void resize(size_t new_capacity) { - resize_impl(new_capacity, HashtablezInfoHandle{}); + raw_hash_set::resize_impl(common(), new_capacity, HashtablezInfoHandle{}); } // As above, except that we also accept a pre-sampled, forced infoz for @@ -3541,19 +3574,24 @@ class raw_hash_set { // store the infoz. void resize_with_soo_infoz(HashtablezInfoHandle forced_infoz) { assert(forced_infoz.IsSampled()); - resize_impl(NextCapacity(SooCapacity()), forced_infoz); + raw_hash_set::resize_impl(common(), NextCapacity(SooCapacity()), + forced_infoz); } - ABSL_ATTRIBUTE_NOINLINE void resize_impl( - size_t new_capacity, HashtablezInfoHandle forced_infoz) { + // Resizes set to the new capacity. + // It is a static function in order to use its pointer in GetPolicyFunctions. + ABSL_ATTRIBUTE_NOINLINE static void resize_impl( + CommonFields& common, size_t new_capacity, + HashtablezInfoHandle forced_infoz) { + raw_hash_set* set = reinterpret_cast<raw_hash_set*>(&common); assert(IsValidCapacity(new_capacity)); - assert(!fits_in_soo(new_capacity)); - const bool was_soo = is_soo(); - const bool had_soo_slot = was_soo && !empty(); + assert(!set->fits_in_soo(new_capacity)); + const bool was_soo = set->is_soo(); + const bool had_soo_slot = was_soo && !set->empty(); const ctrl_t soo_slot_h2 = - had_soo_slot ? static_cast<ctrl_t>(H2(hash_of(soo_slot()))) + had_soo_slot ? static_cast<ctrl_t>(H2(set->hash_of(set->soo_slot()))) : ctrl_t::kEmpty; - HashSetResizeHelper resize_helper(common(), was_soo, had_soo_slot, + HashSetResizeHelper resize_helper(common, was_soo, had_soo_slot, forced_infoz); // Initialize HashSetResizeHelper::old_heap_or_soo_. We can't do this in // HashSetResizeHelper constructor because it can't transfer slots when @@ -3561,11 +3599,12 @@ class raw_hash_set { // TODO(b/289225379): try to handle more of the SOO cases inside // InitializeSlots. See comment on cl/555990034 snapshot #63. if (PolicyTraits::transfer_uses_memcpy() || !had_soo_slot) { - resize_helper.old_heap_or_soo() = common().heap_or_soo(); + resize_helper.old_heap_or_soo() = common.heap_or_soo(); } else { - transfer(to_slot(resize_helper.old_soo_data()), soo_slot()); + set->transfer(set->to_slot(resize_helper.old_soo_data()), + set->soo_slot()); } - common().set_capacity(new_capacity); + common.set_capacity(new_capacity); // Note that `InitializeSlots` does different number initialization steps // depending on the values of `transfer_uses_memcpy` and capacities. // Refer to the comment in `InitializeSlots` for more details. @@ -3573,7 +3612,7 @@ class raw_hash_set { resize_helper.InitializeSlots<CharAlloc, sizeof(slot_type), PolicyTraits::transfer_uses_memcpy(), SooEnabled(), alignof(slot_type)>( - common(), CharAlloc(alloc_ref()), soo_slot_h2, sizeof(key_type), + common, CharAlloc(set->alloc_ref()), soo_slot_h2, sizeof(key_type), sizeof(value_type)); // In the SooEnabled() case, capacity is never 0 so we don't check. @@ -3585,30 +3624,30 @@ class raw_hash_set { // Nothing more to do in this case. if (was_soo && !had_soo_slot) return; - slot_type* new_slots = slot_array(); + slot_type* new_slots = set->slot_array(); if (grow_single_group) { if (PolicyTraits::transfer_uses_memcpy()) { // InitializeSlots did all the work. return; } if (was_soo) { - transfer(new_slots + resize_helper.SooSlotIndex(), - to_slot(resize_helper.old_soo_data())); + set->transfer(new_slots + resize_helper.SooSlotIndex(), + to_slot(resize_helper.old_soo_data())); return; } else { // We want GrowSizeIntoSingleGroup to be called here in order to make // InitializeSlots not depend on PolicyTraits. - resize_helper.GrowSizeIntoSingleGroup<PolicyTraits>(common(), - alloc_ref()); + resize_helper.GrowSizeIntoSingleGroup<PolicyTraits>(common, + set->alloc_ref()); } } else { // InitializeSlots prepares control bytes to correspond to empty table. const auto insert_slot = [&](slot_type* slot) { - size_t hash = PolicyTraits::apply(HashElement{hash_ref()}, + size_t hash = PolicyTraits::apply(HashElement{set->hash_ref()}, PolicyTraits::element(slot)); - auto target = find_first_non_full(common(), hash); - SetCtrl(common(), target.offset, H2(hash), sizeof(slot_type)); - transfer(new_slots + target.offset, slot); + auto target = find_first_non_full(common, hash); + SetCtrl(common, target.offset, H2(hash), sizeof(slot_type)); + set->transfer(new_slots + target.offset, slot); return target.probe_length; }; if (was_soo) { @@ -3623,80 +3662,13 @@ class raw_hash_set { total_probe_length += insert_slot(old_slots + i); } } - infoz().RecordRehash(total_probe_length); + common.infoz().RecordRehash(total_probe_length); } } - resize_helper.DeallocateOld<alignof(slot_type)>(CharAlloc(alloc_ref()), + resize_helper.DeallocateOld<alignof(slot_type)>(CharAlloc(set->alloc_ref()), sizeof(slot_type)); } - // Prunes control bytes to remove as many tombstones as possible. - // - // See the comment on `rehash_and_grow_if_necessary()`. - inline void drop_deletes_without_resize() { - // Stack-allocate space for swapping elements. - alignas(slot_type) unsigned char tmp[sizeof(slot_type)]; - DropDeletesWithoutResize(common(), &hash_ref(), GetPolicyFunctions(), tmp); - } - - // Called whenever the table *might* need to conditionally grow. - // - // This function is an optimization opportunity to perform a rehash even when - // growth is unnecessary, because vacating tombstones is beneficial for - // performance in the long-run. - void rehash_and_grow_if_necessary() { - const size_t cap = capacity(); - if (cap > Group::kWidth && - // Do these calculations in 64-bit to avoid overflow. - size() * uint64_t{32} <= cap * uint64_t{25}) { - // Squash DELETED without growing if there is enough capacity. - // - // Rehash in place if the current size is <= 25/32 of capacity. - // Rationale for such a high factor: 1) drop_deletes_without_resize() is - // faster than resize, and 2) it takes quite a bit of work to add - // tombstones. In the worst case, seems to take approximately 4 - // insert/erase pairs to create a single tombstone and so if we are - // rehashing because of tombstones, we can afford to rehash-in-place as - // long as we are reclaiming at least 1/8 the capacity without doing more - // than 2X the work. (Where "work" is defined to be size() for rehashing - // or rehashing in place, and 1 for an insert or erase.) But rehashing in - // place is faster per operation than inserting or even doubling the size - // of the table, so we actually afford to reclaim even less space from a - // resize-in-place. The decision is to rehash in place if we can reclaim - // at about 1/8th of the usable capacity (specifically 3/28 of the - // capacity) which means that the total cost of rehashing will be a small - // fraction of the total work. - // - // Here is output of an experiment using the BM_CacheInSteadyState - // benchmark running the old case (where we rehash-in-place only if we can - // reclaim at least 7/16*capacity) vs. this code (which rehashes in place - // if we can recover 3/32*capacity). - // - // Note that although in the worst-case number of rehashes jumped up from - // 15 to 190, but the number of operations per second is almost the same. - // - // Abridged output of running BM_CacheInSteadyState benchmark from - // raw_hash_set_benchmark. N is the number of insert/erase operations. - // - // | OLD (recover >= 7/16 | NEW (recover >= 3/32) - // size | N/s LoadFactor NRehashes | N/s LoadFactor NRehashes - // 448 | 145284 0.44 18 | 140118 0.44 19 - // 493 | 152546 0.24 11 | 151417 0.48 28 - // 538 | 151439 0.26 11 | 151152 0.53 38 - // 583 | 151765 0.28 11 | 150572 0.57 50 - // 628 | 150241 0.31 11 | 150853 0.61 66 - // 672 | 149602 0.33 12 | 150110 0.66 90 - // 717 | 149998 0.35 12 | 149531 0.70 129 - // 762 | 149836 0.37 13 | 148559 0.74 190 - // 807 | 149736 0.39 14 | 151107 0.39 14 - // 852 | 150204 0.42 15 | 151019 0.42 15 - drop_deletes_without_resize(); - } else { - // Otherwise grow the container. - resize(NextCapacity(cap)); - } - } - // Casting directly from e.g. char* to slot_type* can cause compilation errors // on objective-C. This function converts to void* first, avoiding the issue. static slot_type* to_slot(void* buf) { @@ -3817,6 +3789,7 @@ class raw_hash_set { template <class K> std::pair<iterator, bool> find_or_prepare_insert_non_soo(const K& key) { + assert(!is_soo()); prefetch_heap_block(); auto hash = hash_ref()(key); auto seq = probe(common(), hash); @@ -3833,9 +3806,10 @@ class raw_hash_set { if (ABSL_PREDICT_TRUE(mask_empty)) { size_t target = seq.offset( GetInsertionOffset(mask_empty, capacity(), hash, control())); - return { - iterator_at(prepare_insert(hash, FindInfo{target, seq.index()})), - true}; + return {iterator_at(PrepareInsertNonSoo(common(), hash, + FindInfo{target, seq.index()}, + GetPolicyFunctions())), + true}; } seq.next(); assert(seq.index() <= capacity() && "full table!"); @@ -3852,63 +3826,6 @@ class raw_hash_set { return find_or_prepare_insert_non_soo(key); } - // Given the hash of a value not currently in the table and the first empty - // slot in the probe sequence, finds the viable slot index to insert it at. - // - // In case there's no space left, the table can be resized or rehashed - // (see rehash_and_grow_if_necessary). - // - // In case of absence of deleted slots and positive growth_left, element can - // be inserted in the provided `target` position. - // - // When table has deleted slots (according to GrowthInfo), target position - // will be searched one more time using `find_first_non_full`. - // - // REQUIRES: At least one non-full slot available. - // REQUIRES: `target` is a valid empty position to insert. - size_t prepare_insert(size_t hash, FindInfo target) ABSL_ATTRIBUTE_NOINLINE { - assert(!is_soo()); - // When there are no deleted slots in the table - // and growth_left is positive, we can insert at the first - // empty slot in the probe sequence (target). - bool use_target_hint = false; - // Optimization is disabled on enabled generations. - // We have to rehash even sparse tables randomly in such mode. -#ifndef ABSL_SWISSTABLE_ENABLE_GENERATIONS - use_target_hint = growth_info().HasNoDeletedAndGrowthLeft(); -#endif - if (ABSL_PREDICT_FALSE(!use_target_hint)) { - const bool rehash_for_bug_detection = - common().should_rehash_for_bug_detection_on_insert(); - if (rehash_for_bug_detection) { - // Move to a different heap allocation in order to detect bugs. - const size_t cap = capacity(); - resize(growth_left() > 0 ? cap : NextCapacity(cap)); - } - if (!rehash_for_bug_detection && - ABSL_PREDICT_FALSE(growth_left() == 0)) { - const size_t old_capacity = capacity(); - rehash_and_grow_if_necessary(); - // NOTE: It is safe to use `FindFirstNonFullAfterResize` after - // `rehash_and_grow_if_necessary`, whether capacity changes or not. - // `rehash_and_grow_if_necessary` may *not* call `resize` - // and perform `drop_deletes_without_resize` instead. But this - // could happen only on big tables and will not change capacity. - // For big tables `FindFirstNonFullAfterResize` will always - // fallback to normal `find_first_non_full`. - target = HashSetResizeHelper::FindFirstNonFullAfterResize( - common(), old_capacity, hash); - } else { - target = find_first_non_full(common(), hash); - } - } - PrepareInsertCommon(common()); - growth_info().OverwriteControlAsFull(control()[target.offset]); - SetCtrl(common(), target.offset, H2(hash), sizeof(slot_type)); - infoz().RecordInsert(hash, target.probe_length); - return target.offset; - } - // Constructs the value in the space pointed by the iterator. This only works // after an unsuccessful find_or_prepare_insert() and before any other // modifications happen in the raw_hash_set. @@ -3949,7 +3866,7 @@ class raw_hash_set { // See `CapacityToGrowth()`. size_t growth_left() const { assert(!is_soo()); - return growth_info().GetGrowthLeft(); + return common().growth_left(); } GrowthInfo& growth_info() { @@ -4009,6 +3926,10 @@ class raw_hash_set { return settings_.template get<3>(); } + static const void* get_hash_ref_fn(const CommonFields& common) { + auto* h = reinterpret_cast<const raw_hash_set*>(&common); + return &h->hash_ref(); + } static void transfer_slot_fn(void* set, void* dst, void* src) { auto* h = static_cast<raw_hash_set*>(set); h->transfer(static_cast<slot_type*>(dst), static_cast<slot_type*>(src)); @@ -4030,6 +3951,10 @@ class raw_hash_set { static const PolicyFunctions& GetPolicyFunctions() { static constexpr PolicyFunctions value = { sizeof(slot_type), + // TODO(b/328722020): try to type erase + // for standard layout and alignof(Hash) <= alignof(CommonFields). + std::is_empty<hasher>::value ? &GetHashRefForEmptyHasher + : &raw_hash_set::get_hash_ref_fn, PolicyTraits::template get_hash_slot_fn<hasher>(), PolicyTraits::transfer_uses_memcpy() ? TransferRelocatable<sizeof(slot_type)> @@ -4037,6 +3962,7 @@ class raw_hash_set { (std::is_same<SlotAlloc, std::allocator<slot_type>>::value ? &DeallocateStandard<alignof(slot_type)> : &raw_hash_set::dealloc_fn), + &raw_hash_set::resize_impl, }; return value; } |