diff options
author | Vitaly Goldshteyn <goldvitaly@google.com> | 2024-05-20 11:57:11 -0700 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2024-05-20 11:57:56 -0700 |
commit | 6ab5b0aad86dc08d257f6b567611c231c6b8ac31 (patch) | |
tree | 3b627c722f43e7bac4acbaf89832c665a22bb5b2 /absl | |
parent | 0128305738355d085e079bab281a7211a00a5b83 (diff) |
Move `prepare_insert` out of the line as type erased `PrepareInsertNonSoo`.
This significantly reduces binary size of big binaries and creates a single hot function instead of many cold. That is decreasing cash misses during code execution.
We also avoid size related computations for tables with no deleted slots, when resize is necessary.
PiperOrigin-RevId: 635527119
Change-Id: I763b135f1f6089051e62e348a07b33536af265ab
Diffstat (limited to 'absl')
-rw-r--r-- | absl/container/internal/compressed_tuple_test.cc | 28 | ||||
-rw-r--r-- | absl/container/internal/raw_hash_set.cc | 168 | ||||
-rw-r--r-- | absl/container/internal/raw_hash_set.h | 244 | ||||
-rw-r--r-- | absl/container/internal/raw_hash_set_test.cc | 16 |
4 files changed, 295 insertions, 161 deletions
diff --git a/absl/container/internal/compressed_tuple_test.cc b/absl/container/internal/compressed_tuple_test.cc index 49818fb8..3cd9e18b 100644 --- a/absl/container/internal/compressed_tuple_test.cc +++ b/absl/container/internal/compressed_tuple_test.cc @@ -15,8 +15,11 @@ #include "absl/container/internal/compressed_tuple.h" #include <memory> +#include <set> #include <string> +#include <type_traits> #include <utility> +#include <vector> #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -55,6 +58,7 @@ namespace { using absl::test_internal::CopyableMovableInstance; using absl::test_internal::InstanceTracker; +using ::testing::Each; TEST(CompressedTupleTest, Sizeof) { EXPECT_EQ(sizeof(int), sizeof(CompressedTuple<int>)); @@ -71,6 +75,30 @@ TEST(CompressedTupleTest, Sizeof) { sizeof(CompressedTuple<int, Empty<0>, NotEmpty<double>, Empty<1>>)); } +TEST(CompressedTupleTest, PointerToEmpty) { + auto to_void_ptrs = [](const auto&... objs) { + return std::vector<const void*>{static_cast<const void*>(&objs)...}; + }; + { + using Tuple = CompressedTuple<int, Empty<0>>; + EXPECT_EQ(sizeof(int), sizeof(Tuple)); + Tuple t; + EXPECT_THAT(to_void_ptrs(t.get<1>()), Each(&t)); + } + { + using Tuple = CompressedTuple<int, Empty<0>, Empty<1>>; + EXPECT_EQ(sizeof(int), sizeof(Tuple)); + Tuple t; + EXPECT_THAT(to_void_ptrs(t.get<1>(), t.get<2>()), Each(&t)); + } + { + using Tuple = CompressedTuple<int, Empty<0>, Empty<1>, Empty<2>>; + EXPECT_EQ(sizeof(int), sizeof(Tuple)); + Tuple t; + EXPECT_THAT(to_void_ptrs(t.get<1>(), t.get<2>(), t.get<3>()), Each(&t)); + } +} + TEST(CompressedTupleTest, OneMoveOnRValueConstructionTemp) { InstanceTracker tracker; CompressedTuple<CopyableMovableInstance> x1(CopyableMovableInstance(1)); diff --git a/absl/container/internal/raw_hash_set.cc b/absl/container/internal/raw_hash_set.cc index c23c1f3b..9d399a1b 100644 --- a/absl/container/internal/raw_hash_set.cc +++ b/absl/container/internal/raw_hash_set.cc @@ -23,7 +23,9 @@ #include "absl/base/attributes.h" #include "absl/base/config.h" #include "absl/base/dynamic_annotations.h" +#include "absl/base/optimization.h" #include "absl/container/internal/container_memory.h" +#include "absl/container/internal/hashtablez_sampler.h" #include "absl/hash/hash.h" namespace absl { @@ -157,6 +159,8 @@ FindInfo find_first_non_full_outofline(const CommonFields& common, return find_first_non_full(common, hash); } +namespace { + // Returns the address of the slot just after slot assuming each slot has the // specified size. static inline void* NextSlot(void* slot, size_t slot_size) { @@ -169,8 +173,22 @@ static inline void* PrevSlot(void* slot, size_t slot_size) { return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(slot) - slot_size); } -void DropDeletesWithoutResize(CommonFields& common, const void* hash_fn, - const PolicyFunctions& policy, void* tmp_space) { +// Finds guaranteed to exists empty slot from the given position. +// NOTE: this function is almost never triggered inside of the +// DropDeletesWithoutResize, so we keep it simple. +// The table is rather sparse, so empty slot will be found very quickly. +size_t FindEmptySlot(size_t start, size_t end, const ctrl_t* ctrl) { + for (size_t i = start; i < end; ++i) { + if (IsEmpty(ctrl[i])) { + return i; + } + } + assert(false && "no empty slot"); + return ~size_t{}; +} + +void DropDeletesWithoutResize(CommonFields& common, + const PolicyFunctions& policy) { void* set = &common; void* slot_array = common.slot_array(); const size_t capacity = common.capacity(); @@ -194,15 +212,26 @@ void DropDeletesWithoutResize(CommonFields& common, const void* hash_fn, // repeat procedure for current slot with moved from element (target) ctrl_t* ctrl = common.control(); ConvertDeletedToEmptyAndFullToDeleted(ctrl, capacity); + const void* hash_fn = policy.hash_fn(common); auto hasher = policy.hash_slot; auto transfer = policy.transfer; const size_t slot_size = policy.slot_size; size_t total_probe_length = 0; void* slot_ptr = SlotAddress(slot_array, 0, slot_size); + + // The index of an empty slot that can be used as temporary memory for + // the swap operation. + constexpr size_t kUnknownId = ~size_t{}; + size_t tmp_space_id = kUnknownId; + for (size_t i = 0; i != capacity; ++i, slot_ptr = NextSlot(slot_ptr, slot_size)) { assert(slot_ptr == SlotAddress(slot_array, i, slot_size)); + if (IsEmpty(ctrl[i])) { + tmp_space_id = i; + continue; + } if (!IsDeleted(ctrl[i])) continue; const size_t hash = (*hasher)(hash_fn, slot_ptr); const FindInfo target = find_first_non_full(common, hash); @@ -231,16 +260,26 @@ void DropDeletesWithoutResize(CommonFields& common, const void* hash_fn, SetCtrl(common, new_i, H2(hash), slot_size); (*transfer)(set, new_slot_ptr, slot_ptr); SetCtrl(common, i, ctrl_t::kEmpty, slot_size); + // Initialize or change empty space id. + tmp_space_id = i; } else { assert(IsDeleted(ctrl[new_i])); SetCtrl(common, new_i, H2(hash), slot_size); // Until we are done rehashing, DELETED marks previously FULL slots. + if (tmp_space_id == kUnknownId) { + tmp_space_id = FindEmptySlot(i + 1, capacity, ctrl); + } + void* tmp_space = SlotAddress(slot_array, tmp_space_id, slot_size); + SanitizerUnpoisonMemoryRegion(tmp_space, slot_size); + // Swap i and new_i elements. (*transfer)(set, tmp_space, new_slot_ptr); (*transfer)(set, new_slot_ptr, slot_ptr); (*transfer)(set, slot_ptr, tmp_space); + SanitizerPoisonMemoryRegion(tmp_space, slot_size); + // repeat the processing of the ith slot --i; slot_ptr = PrevSlot(slot_ptr, slot_size); @@ -267,6 +306,8 @@ static bool WasNeverFull(CommonFields& c, size_t index) { Group::kWidth; } +} // namespace + void EraseMetaOnly(CommonFields& c, size_t index, size_t slot_size) { assert(IsFull(c.control()[index]) && "erasing a dangling iterator"); c.decrement_size(); @@ -424,6 +465,129 @@ void HashSetResizeHelper::TransferSlotAfterSoo(CommonFields& c, PoisonSingleGroupEmptySlots(c, slot_size); } +namespace { + +// Called whenever the table needs to vacate empty slots either by removing +// tombstones via rehash or growth. +ABSL_ATTRIBUTE_NOINLINE +FindInfo FindInsertPositionWithGrowthOrRehash(CommonFields& common, size_t hash, + const PolicyFunctions& policy) { + const size_t cap = common.capacity(); + if (cap > Group::kWidth && + // Do these calculations in 64-bit to avoid overflow. + common.size() * uint64_t{32} <= cap * uint64_t{25}) { + // Squash DELETED without growing if there is enough capacity. + // + // Rehash in place if the current size is <= 25/32 of capacity. + // Rationale for such a high factor: 1) DropDeletesWithoutResize() is + // faster than resize, and 2) it takes quite a bit of work to add + // tombstones. In the worst case, seems to take approximately 4 + // insert/erase pairs to create a single tombstone and so if we are + // rehashing because of tombstones, we can afford to rehash-in-place as + // long as we are reclaiming at least 1/8 the capacity without doing more + // than 2X the work. (Where "work" is defined to be size() for rehashing + // or rehashing in place, and 1 for an insert or erase.) But rehashing in + // place is faster per operation than inserting or even doubling the size + // of the table, so we actually afford to reclaim even less space from a + // resize-in-place. The decision is to rehash in place if we can reclaim + // at about 1/8th of the usable capacity (specifically 3/28 of the + // capacity) which means that the total cost of rehashing will be a small + // fraction of the total work. + // + // Here is output of an experiment using the BM_CacheInSteadyState + // benchmark running the old case (where we rehash-in-place only if we can + // reclaim at least 7/16*capacity) vs. this code (which rehashes in place + // if we can recover 3/32*capacity). + // + // Note that although in the worst-case number of rehashes jumped up from + // 15 to 190, but the number of operations per second is almost the same. + // + // Abridged output of running BM_CacheInSteadyState benchmark from + // raw_hash_set_benchmark. N is the number of insert/erase operations. + // + // | OLD (recover >= 7/16 | NEW (recover >= 3/32) + // size | N/s LoadFactor NRehashes | N/s LoadFactor NRehashes + // 448 | 145284 0.44 18 | 140118 0.44 19 + // 493 | 152546 0.24 11 | 151417 0.48 28 + // 538 | 151439 0.26 11 | 151152 0.53 38 + // 583 | 151765 0.28 11 | 150572 0.57 50 + // 628 | 150241 0.31 11 | 150853 0.61 66 + // 672 | 149602 0.33 12 | 150110 0.66 90 + // 717 | 149998 0.35 12 | 149531 0.70 129 + // 762 | 149836 0.37 13 | 148559 0.74 190 + // 807 | 149736 0.39 14 | 151107 0.39 14 + // 852 | 150204 0.42 15 | 151019 0.42 15 + DropDeletesWithoutResize(common, policy); + } else { + // Otherwise grow the container. + policy.resize(common, NextCapacity(cap), HashtablezInfoHandle{}); + } + // This function is typically called with tables containing deleted slots. + // The table will be big and `FindFirstNonFullAfterResize` will always + // fallback to `find_first_non_full`. So using `find_first_non_full` directly. + return find_first_non_full(common, hash); +} + +} // namespace + +const void* GetHashRefForEmptyHasher(const CommonFields& common) { + // Empty base optimization typically make the empty base class address to be + // the same as the first address of the derived class object. + // But we generally assume that for empty hasher we can return any valid + // pointer. + return &common; +} + +size_t PrepareInsertNonSoo(CommonFields& common, size_t hash, FindInfo target, + const PolicyFunctions& policy) { + // When there are no deleted slots in the table + // and growth_left is positive, we can insert at the first + // empty slot in the probe sequence (target). + const bool use_target_hint = + // Optimization is disabled when generations are enabled. + // We have to rehash even sparse tables randomly in such mode. + !SwisstableGenerationsEnabled() && + common.growth_info().HasNoDeletedAndGrowthLeft(); + if (ABSL_PREDICT_FALSE(!use_target_hint)) { + // Notes about optimized mode when generations are disabled: + // We do not enter this branch if table has no deleted slots + // and growth_left is positive. + // We enter this branch in the following cases listed in decreasing + // frequency: + // 1. Table without deleted slots (>95% cases) that needs to be resized. + // 2. Table with deleted slots that has space for the inserting element. + // 3. Table with deleted slots that needs to be rehashed or resized. + if (ABSL_PREDICT_TRUE(common.growth_info().HasNoGrowthLeftAndNoDeleted())) { + const size_t old_capacity = common.capacity(); + policy.resize(common, NextCapacity(old_capacity), HashtablezInfoHandle{}); + target = HashSetResizeHelper::FindFirstNonFullAfterResize( + common, old_capacity, hash); + } else { + // Note: the table may have no deleted slots here when generations + // are enabled. + const bool rehash_for_bug_detection = + common.should_rehash_for_bug_detection_on_insert(); + if (rehash_for_bug_detection) { + // Move to a different heap allocation in order to detect bugs. + const size_t cap = common.capacity(); + policy.resize(common, + common.growth_left() > 0 ? cap : NextCapacity(cap), + HashtablezInfoHandle{}); + } + if (ABSL_PREDICT_TRUE(common.growth_left() > 0)) { + target = find_first_non_full(common, hash); + } else { + target = FindInsertPositionWithGrowthOrRehash(common, hash, policy); + } + } + } + PrepareInsertCommon(common); + common.growth_info().OverwriteControlAsFull(common.control()[target.offset]); + SetCtrl(common, target.offset, H2(hash), policy.slot_size); + common.infoz().RecordInsert(hash, target.probe_length); + return target.offset; +} + } // namespace container_internal ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 1d2e2d14..a64133a7 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -1126,6 +1126,13 @@ class GrowthInfo { return static_cast<std::make_signed_t<size_t>>(growth_left_info_) > 0; } + // Returns true if the table satisfies two properties: + // 1. Guaranteed to have no kDeleted slots. + // 2. There is no growth left. + bool HasNoGrowthLeftAndNoDeleted() const { + return growth_left_info_ == 0; + } + // Returns true if table guaranteed to have no k bool HasNoDeleted() const { return static_cast<std::make_signed_t<size_t>>(growth_left_info_) >= 0; @@ -1374,6 +1381,8 @@ class CommonFields : public CommonFieldsGenerationInfo { // This is stored in the heap allocation before the control bytes. // TODO(b/289225379): experiment with moving growth_info back inline to // increase room for SOO. + size_t growth_left() const { return growth_info().GetGrowthLeft(); } + GrowthInfo& growth_info() { auto* gl_ptr = reinterpret_cast<GrowthInfo*>(control()) - 1; assert(reinterpret_cast<uintptr_t>(gl_ptr) % alignof(GrowthInfo) == 0); @@ -1933,8 +1942,7 @@ class HashSetResizeHelper { // will be no performance benefit. // It has implicit assumption that `resize` will call // `GrowSizeIntoSingleGroup*` in case `IsGrowingIntoSingleGroupApplicable`. - // Falls back to `find_first_non_full` in case of big groups, so it is - // safe to use after `rehash_and_grow_if_necessary`. + // Falls back to `find_first_non_full` in case of big groups. static FindInfo FindFirstNonFullAfterResize(const CommonFields& c, size_t old_capacity, size_t hash) { @@ -2216,14 +2224,22 @@ size_t PrepareInsertAfterSoo(size_t hash, size_t slot_size, struct PolicyFunctions { size_t slot_size; + // Returns the pointer to the hash function stored in the set. + const void* (*hash_fn)(const CommonFields& common); + // Returns the hash of the pointed-to slot. size_t (*hash_slot)(const void* hash_fn, void* slot); - // Transfer the contents of src_slot to dst_slot. + // Transfers the contents of src_slot to dst_slot. void (*transfer)(void* set, void* dst_slot, void* src_slot); - // Deallocate the backing store from common. + // Deallocates the backing store from common. void (*dealloc)(CommonFields& common, const PolicyFunctions& policy); + + // Resizes set to the new capacity. + // Arguments are used as in raw_hash_set::resize_impl. + void (*resize)(CommonFields& common, size_t new_capacity, + HashtablezInfoHandle forced_infoz); }; // ClearBackingArray clears the backing array, either modifying it in place, @@ -2261,9 +2277,26 @@ ABSL_ATTRIBUTE_NOINLINE void TransferRelocatable(void*, void* dst, void* src) { memcpy(dst, src, SizeOfSlot); } -// Type-erased version of raw_hash_set::drop_deletes_without_resize. -void DropDeletesWithoutResize(CommonFields& common, const void* hash_fn, - const PolicyFunctions& policy, void* tmp_space); +// Type erased raw_hash_set::get_hash_ref_fn for the empty hash function case. +const void* GetHashRefForEmptyHasher(const CommonFields& common); + +// Given the hash of a value not currently in the table and the first empty +// slot in the probe sequence, finds a viable slot index to insert it at. +// +// In case there's no space left, the table can be resized or rehashed +// (for tables with deleted slots, see FindInsertPositionWithGrowthOrRehash). +// +// In the case of absence of deleted slots and positive growth_left, the element +// can be inserted in the provided `target` position. +// +// When the table has deleted slots (according to GrowthInfo), the target +// position will be searched one more time using `find_first_non_full`. +// +// REQUIRES: Table is not SOO. +// REQUIRES: At least one non-full slot available. +// REQUIRES: `target` is a valid empty position to insert. +size_t PrepareInsertNonSoo(CommonFields& common, size_t hash, FindInfo target, + const PolicyFunctions& policy); // A SwissTable. // @@ -3533,7 +3566,7 @@ class raw_hash_set { // common(), old_capacity, hash) // can be called right after `resize`. void resize(size_t new_capacity) { - resize_impl(new_capacity, HashtablezInfoHandle{}); + raw_hash_set::resize_impl(common(), new_capacity, HashtablezInfoHandle{}); } // As above, except that we also accept a pre-sampled, forced infoz for @@ -3541,19 +3574,24 @@ class raw_hash_set { // store the infoz. void resize_with_soo_infoz(HashtablezInfoHandle forced_infoz) { assert(forced_infoz.IsSampled()); - resize_impl(NextCapacity(SooCapacity()), forced_infoz); + raw_hash_set::resize_impl(common(), NextCapacity(SooCapacity()), + forced_infoz); } - ABSL_ATTRIBUTE_NOINLINE void resize_impl( - size_t new_capacity, HashtablezInfoHandle forced_infoz) { + // Resizes set to the new capacity. + // It is a static function in order to use its pointer in GetPolicyFunctions. + ABSL_ATTRIBUTE_NOINLINE static void resize_impl( + CommonFields& common, size_t new_capacity, + HashtablezInfoHandle forced_infoz) { + raw_hash_set* set = reinterpret_cast<raw_hash_set*>(&common); assert(IsValidCapacity(new_capacity)); - assert(!fits_in_soo(new_capacity)); - const bool was_soo = is_soo(); - const bool had_soo_slot = was_soo && !empty(); + assert(!set->fits_in_soo(new_capacity)); + const bool was_soo = set->is_soo(); + const bool had_soo_slot = was_soo && !set->empty(); const ctrl_t soo_slot_h2 = - had_soo_slot ? static_cast<ctrl_t>(H2(hash_of(soo_slot()))) + had_soo_slot ? static_cast<ctrl_t>(H2(set->hash_of(set->soo_slot()))) : ctrl_t::kEmpty; - HashSetResizeHelper resize_helper(common(), was_soo, had_soo_slot, + HashSetResizeHelper resize_helper(common, was_soo, had_soo_slot, forced_infoz); // Initialize HashSetResizeHelper::old_heap_or_soo_. We can't do this in // HashSetResizeHelper constructor because it can't transfer slots when @@ -3561,11 +3599,12 @@ class raw_hash_set { // TODO(b/289225379): try to handle more of the SOO cases inside // InitializeSlots. See comment on cl/555990034 snapshot #63. if (PolicyTraits::transfer_uses_memcpy() || !had_soo_slot) { - resize_helper.old_heap_or_soo() = common().heap_or_soo(); + resize_helper.old_heap_or_soo() = common.heap_or_soo(); } else { - transfer(to_slot(resize_helper.old_soo_data()), soo_slot()); + set->transfer(set->to_slot(resize_helper.old_soo_data()), + set->soo_slot()); } - common().set_capacity(new_capacity); + common.set_capacity(new_capacity); // Note that `InitializeSlots` does different number initialization steps // depending on the values of `transfer_uses_memcpy` and capacities. // Refer to the comment in `InitializeSlots` for more details. @@ -3573,7 +3612,7 @@ class raw_hash_set { resize_helper.InitializeSlots<CharAlloc, sizeof(slot_type), PolicyTraits::transfer_uses_memcpy(), SooEnabled(), alignof(slot_type)>( - common(), CharAlloc(alloc_ref()), soo_slot_h2, sizeof(key_type), + common, CharAlloc(set->alloc_ref()), soo_slot_h2, sizeof(key_type), sizeof(value_type)); // In the SooEnabled() case, capacity is never 0 so we don't check. @@ -3585,30 +3624,30 @@ class raw_hash_set { // Nothing more to do in this case. if (was_soo && !had_soo_slot) return; - slot_type* new_slots = slot_array(); + slot_type* new_slots = set->slot_array(); if (grow_single_group) { if (PolicyTraits::transfer_uses_memcpy()) { // InitializeSlots did all the work. return; } if (was_soo) { - transfer(new_slots + resize_helper.SooSlotIndex(), - to_slot(resize_helper.old_soo_data())); + set->transfer(new_slots + resize_helper.SooSlotIndex(), + to_slot(resize_helper.old_soo_data())); return; } else { // We want GrowSizeIntoSingleGroup to be called here in order to make // InitializeSlots not depend on PolicyTraits. - resize_helper.GrowSizeIntoSingleGroup<PolicyTraits>(common(), - alloc_ref()); + resize_helper.GrowSizeIntoSingleGroup<PolicyTraits>(common, + set->alloc_ref()); } } else { // InitializeSlots prepares control bytes to correspond to empty table. const auto insert_slot = [&](slot_type* slot) { - size_t hash = PolicyTraits::apply(HashElement{hash_ref()}, + size_t hash = PolicyTraits::apply(HashElement{set->hash_ref()}, PolicyTraits::element(slot)); - auto target = find_first_non_full(common(), hash); - SetCtrl(common(), target.offset, H2(hash), sizeof(slot_type)); - transfer(new_slots + target.offset, slot); + auto target = find_first_non_full(common, hash); + SetCtrl(common, target.offset, H2(hash), sizeof(slot_type)); + set->transfer(new_slots + target.offset, slot); return target.probe_length; }; if (was_soo) { @@ -3623,80 +3662,13 @@ class raw_hash_set { total_probe_length += insert_slot(old_slots + i); } } - infoz().RecordRehash(total_probe_length); + common.infoz().RecordRehash(total_probe_length); } } - resize_helper.DeallocateOld<alignof(slot_type)>(CharAlloc(alloc_ref()), + resize_helper.DeallocateOld<alignof(slot_type)>(CharAlloc(set->alloc_ref()), sizeof(slot_type)); } - // Prunes control bytes to remove as many tombstones as possible. - // - // See the comment on `rehash_and_grow_if_necessary()`. - inline void drop_deletes_without_resize() { - // Stack-allocate space for swapping elements. - alignas(slot_type) unsigned char tmp[sizeof(slot_type)]; - DropDeletesWithoutResize(common(), &hash_ref(), GetPolicyFunctions(), tmp); - } - - // Called whenever the table *might* need to conditionally grow. - // - // This function is an optimization opportunity to perform a rehash even when - // growth is unnecessary, because vacating tombstones is beneficial for - // performance in the long-run. - void rehash_and_grow_if_necessary() { - const size_t cap = capacity(); - if (cap > Group::kWidth && - // Do these calculations in 64-bit to avoid overflow. - size() * uint64_t{32} <= cap * uint64_t{25}) { - // Squash DELETED without growing if there is enough capacity. - // - // Rehash in place if the current size is <= 25/32 of capacity. - // Rationale for such a high factor: 1) drop_deletes_without_resize() is - // faster than resize, and 2) it takes quite a bit of work to add - // tombstones. In the worst case, seems to take approximately 4 - // insert/erase pairs to create a single tombstone and so if we are - // rehashing because of tombstones, we can afford to rehash-in-place as - // long as we are reclaiming at least 1/8 the capacity without doing more - // than 2X the work. (Where "work" is defined to be size() for rehashing - // or rehashing in place, and 1 for an insert or erase.) But rehashing in - // place is faster per operation than inserting or even doubling the size - // of the table, so we actually afford to reclaim even less space from a - // resize-in-place. The decision is to rehash in place if we can reclaim - // at about 1/8th of the usable capacity (specifically 3/28 of the - // capacity) which means that the total cost of rehashing will be a small - // fraction of the total work. - // - // Here is output of an experiment using the BM_CacheInSteadyState - // benchmark running the old case (where we rehash-in-place only if we can - // reclaim at least 7/16*capacity) vs. this code (which rehashes in place - // if we can recover 3/32*capacity). - // - // Note that although in the worst-case number of rehashes jumped up from - // 15 to 190, but the number of operations per second is almost the same. - // - // Abridged output of running BM_CacheInSteadyState benchmark from - // raw_hash_set_benchmark. N is the number of insert/erase operations. - // - // | OLD (recover >= 7/16 | NEW (recover >= 3/32) - // size | N/s LoadFactor NRehashes | N/s LoadFactor NRehashes - // 448 | 145284 0.44 18 | 140118 0.44 19 - // 493 | 152546 0.24 11 | 151417 0.48 28 - // 538 | 151439 0.26 11 | 151152 0.53 38 - // 583 | 151765 0.28 11 | 150572 0.57 50 - // 628 | 150241 0.31 11 | 150853 0.61 66 - // 672 | 149602 0.33 12 | 150110 0.66 90 - // 717 | 149998 0.35 12 | 149531 0.70 129 - // 762 | 149836 0.37 13 | 148559 0.74 190 - // 807 | 149736 0.39 14 | 151107 0.39 14 - // 852 | 150204 0.42 15 | 151019 0.42 15 - drop_deletes_without_resize(); - } else { - // Otherwise grow the container. - resize(NextCapacity(cap)); - } - } - // Casting directly from e.g. char* to slot_type* can cause compilation errors // on objective-C. This function converts to void* first, avoiding the issue. static slot_type* to_slot(void* buf) { @@ -3817,6 +3789,7 @@ class raw_hash_set { template <class K> std::pair<iterator, bool> find_or_prepare_insert_non_soo(const K& key) { + assert(!is_soo()); prefetch_heap_block(); auto hash = hash_ref()(key); auto seq = probe(common(), hash); @@ -3833,9 +3806,10 @@ class raw_hash_set { if (ABSL_PREDICT_TRUE(mask_empty)) { size_t target = seq.offset( GetInsertionOffset(mask_empty, capacity(), hash, control())); - return { - iterator_at(prepare_insert(hash, FindInfo{target, seq.index()})), - true}; + return {iterator_at(PrepareInsertNonSoo(common(), hash, + FindInfo{target, seq.index()}, + GetPolicyFunctions())), + true}; } seq.next(); assert(seq.index() <= capacity() && "full table!"); @@ -3852,63 +3826,6 @@ class raw_hash_set { return find_or_prepare_insert_non_soo(key); } - // Given the hash of a value not currently in the table and the first empty - // slot in the probe sequence, finds the viable slot index to insert it at. - // - // In case there's no space left, the table can be resized or rehashed - // (see rehash_and_grow_if_necessary). - // - // In case of absence of deleted slots and positive growth_left, element can - // be inserted in the provided `target` position. - // - // When table has deleted slots (according to GrowthInfo), target position - // will be searched one more time using `find_first_non_full`. - // - // REQUIRES: At least one non-full slot available. - // REQUIRES: `target` is a valid empty position to insert. - size_t prepare_insert(size_t hash, FindInfo target) ABSL_ATTRIBUTE_NOINLINE { - assert(!is_soo()); - // When there are no deleted slots in the table - // and growth_left is positive, we can insert at the first - // empty slot in the probe sequence (target). - bool use_target_hint = false; - // Optimization is disabled on enabled generations. - // We have to rehash even sparse tables randomly in such mode. -#ifndef ABSL_SWISSTABLE_ENABLE_GENERATIONS - use_target_hint = growth_info().HasNoDeletedAndGrowthLeft(); -#endif - if (ABSL_PREDICT_FALSE(!use_target_hint)) { - const bool rehash_for_bug_detection = - common().should_rehash_for_bug_detection_on_insert(); - if (rehash_for_bug_detection) { - // Move to a different heap allocation in order to detect bugs. - const size_t cap = capacity(); - resize(growth_left() > 0 ? cap : NextCapacity(cap)); - } - if (!rehash_for_bug_detection && - ABSL_PREDICT_FALSE(growth_left() == 0)) { - const size_t old_capacity = capacity(); - rehash_and_grow_if_necessary(); - // NOTE: It is safe to use `FindFirstNonFullAfterResize` after - // `rehash_and_grow_if_necessary`, whether capacity changes or not. - // `rehash_and_grow_if_necessary` may *not* call `resize` - // and perform `drop_deletes_without_resize` instead. But this - // could happen only on big tables and will not change capacity. - // For big tables `FindFirstNonFullAfterResize` will always - // fallback to normal `find_first_non_full`. - target = HashSetResizeHelper::FindFirstNonFullAfterResize( - common(), old_capacity, hash); - } else { - target = find_first_non_full(common(), hash); - } - } - PrepareInsertCommon(common()); - growth_info().OverwriteControlAsFull(control()[target.offset]); - SetCtrl(common(), target.offset, H2(hash), sizeof(slot_type)); - infoz().RecordInsert(hash, target.probe_length); - return target.offset; - } - // Constructs the value in the space pointed by the iterator. This only works // after an unsuccessful find_or_prepare_insert() and before any other // modifications happen in the raw_hash_set. @@ -3949,7 +3866,7 @@ class raw_hash_set { // See `CapacityToGrowth()`. size_t growth_left() const { assert(!is_soo()); - return growth_info().GetGrowthLeft(); + return common().growth_left(); } GrowthInfo& growth_info() { @@ -4009,6 +3926,10 @@ class raw_hash_set { return settings_.template get<3>(); } + static const void* get_hash_ref_fn(const CommonFields& common) { + auto* h = reinterpret_cast<const raw_hash_set*>(&common); + return &h->hash_ref(); + } static void transfer_slot_fn(void* set, void* dst, void* src) { auto* h = static_cast<raw_hash_set*>(set); h->transfer(static_cast<slot_type*>(dst), static_cast<slot_type*>(src)); @@ -4030,6 +3951,10 @@ class raw_hash_set { static const PolicyFunctions& GetPolicyFunctions() { static constexpr PolicyFunctions value = { sizeof(slot_type), + // TODO(b/328722020): try to type erase + // for standard layout and alignof(Hash) <= alignof(CommonFields). + std::is_empty<hasher>::value ? &GetHashRefForEmptyHasher + : &raw_hash_set::get_hash_ref_fn, PolicyTraits::template get_hash_slot_fn<hasher>(), PolicyTraits::transfer_uses_memcpy() ? TransferRelocatable<sizeof(slot_type)> @@ -4037,6 +3962,7 @@ class raw_hash_set { (std::is_same<SlotAlloc, std::allocator<slot_type>>::value ? &DeallocateStandard<alignof(slot_type)> : &raw_hash_set::dealloc_fn), + &raw_hash_set::resize_impl, }; return value; } diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc index c4e05d60..10f793ef 100644 --- a/absl/container/internal/raw_hash_set_test.cc +++ b/absl/container/internal/raw_hash_set_test.cc @@ -126,6 +126,22 @@ TEST(GrowthInfoTest, HasNoDeletedAndGrowthLeft) { EXPECT_TRUE(gi.HasNoDeletedAndGrowthLeft()); } +TEST(GrowthInfoTest, HasNoGrowthLeftAndNoDeleted) { + GrowthInfo gi; + gi.InitGrowthLeftNoDeleted(1); + EXPECT_FALSE(gi.HasNoGrowthLeftAndNoDeleted()); + gi.OverwriteEmptyAsFull(); + EXPECT_TRUE(gi.HasNoGrowthLeftAndNoDeleted()); + gi.OverwriteFullAsDeleted(); + EXPECT_FALSE(gi.HasNoGrowthLeftAndNoDeleted()); + gi.OverwriteFullAsEmpty(); + EXPECT_FALSE(gi.HasNoGrowthLeftAndNoDeleted()); + gi.InitGrowthLeftNoDeleted(0); + EXPECT_TRUE(gi.HasNoGrowthLeftAndNoDeleted()); + gi.OverwriteFullAsEmpty(); + EXPECT_FALSE(gi.HasNoGrowthLeftAndNoDeleted()); +} + TEST(GrowthInfoTest, OverwriteFullAsEmpty) { GrowthInfo gi; gi.InitGrowthLeftNoDeleted(5); |