summaryrefslogtreecommitdiff
path: root/absl/container/internal/raw_hash_set.h
diff options
context:
space:
mode:
Diffstat (limited to 'absl/container/internal/raw_hash_set.h')
-rw-r--r--absl/container/internal/raw_hash_set.h244
1 files changed, 85 insertions, 159 deletions
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h
index 1d2e2d14..a64133a7 100644
--- a/absl/container/internal/raw_hash_set.h
+++ b/absl/container/internal/raw_hash_set.h
@@ -1126,6 +1126,13 @@ class GrowthInfo {
return static_cast<std::make_signed_t<size_t>>(growth_left_info_) > 0;
}
+ // Returns true if the table satisfies two properties:
+ // 1. Guaranteed to have no kDeleted slots.
+ // 2. There is no growth left.
+ bool HasNoGrowthLeftAndNoDeleted() const {
+ return growth_left_info_ == 0;
+ }
+
// Returns true if table guaranteed to have no k
bool HasNoDeleted() const {
return static_cast<std::make_signed_t<size_t>>(growth_left_info_) >= 0;
@@ -1374,6 +1381,8 @@ class CommonFields : public CommonFieldsGenerationInfo {
// This is stored in the heap allocation before the control bytes.
// TODO(b/289225379): experiment with moving growth_info back inline to
// increase room for SOO.
+ size_t growth_left() const { return growth_info().GetGrowthLeft(); }
+
GrowthInfo& growth_info() {
auto* gl_ptr = reinterpret_cast<GrowthInfo*>(control()) - 1;
assert(reinterpret_cast<uintptr_t>(gl_ptr) % alignof(GrowthInfo) == 0);
@@ -1933,8 +1942,7 @@ class HashSetResizeHelper {
// will be no performance benefit.
// It has implicit assumption that `resize` will call
// `GrowSizeIntoSingleGroup*` in case `IsGrowingIntoSingleGroupApplicable`.
- // Falls back to `find_first_non_full` in case of big groups, so it is
- // safe to use after `rehash_and_grow_if_necessary`.
+ // Falls back to `find_first_non_full` in case of big groups.
static FindInfo FindFirstNonFullAfterResize(const CommonFields& c,
size_t old_capacity,
size_t hash) {
@@ -2216,14 +2224,22 @@ size_t PrepareInsertAfterSoo(size_t hash, size_t slot_size,
struct PolicyFunctions {
size_t slot_size;
+ // Returns the pointer to the hash function stored in the set.
+ const void* (*hash_fn)(const CommonFields& common);
+
// Returns the hash of the pointed-to slot.
size_t (*hash_slot)(const void* hash_fn, void* slot);
- // Transfer the contents of src_slot to dst_slot.
+ // Transfers the contents of src_slot to dst_slot.
void (*transfer)(void* set, void* dst_slot, void* src_slot);
- // Deallocate the backing store from common.
+ // Deallocates the backing store from common.
void (*dealloc)(CommonFields& common, const PolicyFunctions& policy);
+
+ // Resizes set to the new capacity.
+ // Arguments are used as in raw_hash_set::resize_impl.
+ void (*resize)(CommonFields& common, size_t new_capacity,
+ HashtablezInfoHandle forced_infoz);
};
// ClearBackingArray clears the backing array, either modifying it in place,
@@ -2261,9 +2277,26 @@ ABSL_ATTRIBUTE_NOINLINE void TransferRelocatable(void*, void* dst, void* src) {
memcpy(dst, src, SizeOfSlot);
}
-// Type-erased version of raw_hash_set::drop_deletes_without_resize.
-void DropDeletesWithoutResize(CommonFields& common, const void* hash_fn,
- const PolicyFunctions& policy, void* tmp_space);
+// Type erased raw_hash_set::get_hash_ref_fn for the empty hash function case.
+const void* GetHashRefForEmptyHasher(const CommonFields& common);
+
+// Given the hash of a value not currently in the table and the first empty
+// slot in the probe sequence, finds a viable slot index to insert it at.
+//
+// In case there's no space left, the table can be resized or rehashed
+// (for tables with deleted slots, see FindInsertPositionWithGrowthOrRehash).
+//
+// In the case of absence of deleted slots and positive growth_left, the element
+// can be inserted in the provided `target` position.
+//
+// When the table has deleted slots (according to GrowthInfo), the target
+// position will be searched one more time using `find_first_non_full`.
+//
+// REQUIRES: Table is not SOO.
+// REQUIRES: At least one non-full slot available.
+// REQUIRES: `target` is a valid empty position to insert.
+size_t PrepareInsertNonSoo(CommonFields& common, size_t hash, FindInfo target,
+ const PolicyFunctions& policy);
// A SwissTable.
//
@@ -3533,7 +3566,7 @@ class raw_hash_set {
// common(), old_capacity, hash)
// can be called right after `resize`.
void resize(size_t new_capacity) {
- resize_impl(new_capacity, HashtablezInfoHandle{});
+ raw_hash_set::resize_impl(common(), new_capacity, HashtablezInfoHandle{});
}
// As above, except that we also accept a pre-sampled, forced infoz for
@@ -3541,19 +3574,24 @@ class raw_hash_set {
// store the infoz.
void resize_with_soo_infoz(HashtablezInfoHandle forced_infoz) {
assert(forced_infoz.IsSampled());
- resize_impl(NextCapacity(SooCapacity()), forced_infoz);
+ raw_hash_set::resize_impl(common(), NextCapacity(SooCapacity()),
+ forced_infoz);
}
- ABSL_ATTRIBUTE_NOINLINE void resize_impl(
- size_t new_capacity, HashtablezInfoHandle forced_infoz) {
+ // Resizes set to the new capacity.
+ // It is a static function in order to use its pointer in GetPolicyFunctions.
+ ABSL_ATTRIBUTE_NOINLINE static void resize_impl(
+ CommonFields& common, size_t new_capacity,
+ HashtablezInfoHandle forced_infoz) {
+ raw_hash_set* set = reinterpret_cast<raw_hash_set*>(&common);
assert(IsValidCapacity(new_capacity));
- assert(!fits_in_soo(new_capacity));
- const bool was_soo = is_soo();
- const bool had_soo_slot = was_soo && !empty();
+ assert(!set->fits_in_soo(new_capacity));
+ const bool was_soo = set->is_soo();
+ const bool had_soo_slot = was_soo && !set->empty();
const ctrl_t soo_slot_h2 =
- had_soo_slot ? static_cast<ctrl_t>(H2(hash_of(soo_slot())))
+ had_soo_slot ? static_cast<ctrl_t>(H2(set->hash_of(set->soo_slot())))
: ctrl_t::kEmpty;
- HashSetResizeHelper resize_helper(common(), was_soo, had_soo_slot,
+ HashSetResizeHelper resize_helper(common, was_soo, had_soo_slot,
forced_infoz);
// Initialize HashSetResizeHelper::old_heap_or_soo_. We can't do this in
// HashSetResizeHelper constructor because it can't transfer slots when
@@ -3561,11 +3599,12 @@ class raw_hash_set {
// TODO(b/289225379): try to handle more of the SOO cases inside
// InitializeSlots. See comment on cl/555990034 snapshot #63.
if (PolicyTraits::transfer_uses_memcpy() || !had_soo_slot) {
- resize_helper.old_heap_or_soo() = common().heap_or_soo();
+ resize_helper.old_heap_or_soo() = common.heap_or_soo();
} else {
- transfer(to_slot(resize_helper.old_soo_data()), soo_slot());
+ set->transfer(set->to_slot(resize_helper.old_soo_data()),
+ set->soo_slot());
}
- common().set_capacity(new_capacity);
+ common.set_capacity(new_capacity);
// Note that `InitializeSlots` does different number initialization steps
// depending on the values of `transfer_uses_memcpy` and capacities.
// Refer to the comment in `InitializeSlots` for more details.
@@ -3573,7 +3612,7 @@ class raw_hash_set {
resize_helper.InitializeSlots<CharAlloc, sizeof(slot_type),
PolicyTraits::transfer_uses_memcpy(),
SooEnabled(), alignof(slot_type)>(
- common(), CharAlloc(alloc_ref()), soo_slot_h2, sizeof(key_type),
+ common, CharAlloc(set->alloc_ref()), soo_slot_h2, sizeof(key_type),
sizeof(value_type));
// In the SooEnabled() case, capacity is never 0 so we don't check.
@@ -3585,30 +3624,30 @@ class raw_hash_set {
// Nothing more to do in this case.
if (was_soo && !had_soo_slot) return;
- slot_type* new_slots = slot_array();
+ slot_type* new_slots = set->slot_array();
if (grow_single_group) {
if (PolicyTraits::transfer_uses_memcpy()) {
// InitializeSlots did all the work.
return;
}
if (was_soo) {
- transfer(new_slots + resize_helper.SooSlotIndex(),
- to_slot(resize_helper.old_soo_data()));
+ set->transfer(new_slots + resize_helper.SooSlotIndex(),
+ to_slot(resize_helper.old_soo_data()));
return;
} else {
// We want GrowSizeIntoSingleGroup to be called here in order to make
// InitializeSlots not depend on PolicyTraits.
- resize_helper.GrowSizeIntoSingleGroup<PolicyTraits>(common(),
- alloc_ref());
+ resize_helper.GrowSizeIntoSingleGroup<PolicyTraits>(common,
+ set->alloc_ref());
}
} else {
// InitializeSlots prepares control bytes to correspond to empty table.
const auto insert_slot = [&](slot_type* slot) {
- size_t hash = PolicyTraits::apply(HashElement{hash_ref()},
+ size_t hash = PolicyTraits::apply(HashElement{set->hash_ref()},
PolicyTraits::element(slot));
- auto target = find_first_non_full(common(), hash);
- SetCtrl(common(), target.offset, H2(hash), sizeof(slot_type));
- transfer(new_slots + target.offset, slot);
+ auto target = find_first_non_full(common, hash);
+ SetCtrl(common, target.offset, H2(hash), sizeof(slot_type));
+ set->transfer(new_slots + target.offset, slot);
return target.probe_length;
};
if (was_soo) {
@@ -3623,80 +3662,13 @@ class raw_hash_set {
total_probe_length += insert_slot(old_slots + i);
}
}
- infoz().RecordRehash(total_probe_length);
+ common.infoz().RecordRehash(total_probe_length);
}
}
- resize_helper.DeallocateOld<alignof(slot_type)>(CharAlloc(alloc_ref()),
+ resize_helper.DeallocateOld<alignof(slot_type)>(CharAlloc(set->alloc_ref()),
sizeof(slot_type));
}
- // Prunes control bytes to remove as many tombstones as possible.
- //
- // See the comment on `rehash_and_grow_if_necessary()`.
- inline void drop_deletes_without_resize() {
- // Stack-allocate space for swapping elements.
- alignas(slot_type) unsigned char tmp[sizeof(slot_type)];
- DropDeletesWithoutResize(common(), &hash_ref(), GetPolicyFunctions(), tmp);
- }
-
- // Called whenever the table *might* need to conditionally grow.
- //
- // This function is an optimization opportunity to perform a rehash even when
- // growth is unnecessary, because vacating tombstones is beneficial for
- // performance in the long-run.
- void rehash_and_grow_if_necessary() {
- const size_t cap = capacity();
- if (cap > Group::kWidth &&
- // Do these calculations in 64-bit to avoid overflow.
- size() * uint64_t{32} <= cap * uint64_t{25}) {
- // Squash DELETED without growing if there is enough capacity.
- //
- // Rehash in place if the current size is <= 25/32 of capacity.
- // Rationale for such a high factor: 1) drop_deletes_without_resize() is
- // faster than resize, and 2) it takes quite a bit of work to add
- // tombstones. In the worst case, seems to take approximately 4
- // insert/erase pairs to create a single tombstone and so if we are
- // rehashing because of tombstones, we can afford to rehash-in-place as
- // long as we are reclaiming at least 1/8 the capacity without doing more
- // than 2X the work. (Where "work" is defined to be size() for rehashing
- // or rehashing in place, and 1 for an insert or erase.) But rehashing in
- // place is faster per operation than inserting or even doubling the size
- // of the table, so we actually afford to reclaim even less space from a
- // resize-in-place. The decision is to rehash in place if we can reclaim
- // at about 1/8th of the usable capacity (specifically 3/28 of the
- // capacity) which means that the total cost of rehashing will be a small
- // fraction of the total work.
- //
- // Here is output of an experiment using the BM_CacheInSteadyState
- // benchmark running the old case (where we rehash-in-place only if we can
- // reclaim at least 7/16*capacity) vs. this code (which rehashes in place
- // if we can recover 3/32*capacity).
- //
- // Note that although in the worst-case number of rehashes jumped up from
- // 15 to 190, but the number of operations per second is almost the same.
- //
- // Abridged output of running BM_CacheInSteadyState benchmark from
- // raw_hash_set_benchmark. N is the number of insert/erase operations.
- //
- // | OLD (recover >= 7/16 | NEW (recover >= 3/32)
- // size | N/s LoadFactor NRehashes | N/s LoadFactor NRehashes
- // 448 | 145284 0.44 18 | 140118 0.44 19
- // 493 | 152546 0.24 11 | 151417 0.48 28
- // 538 | 151439 0.26 11 | 151152 0.53 38
- // 583 | 151765 0.28 11 | 150572 0.57 50
- // 628 | 150241 0.31 11 | 150853 0.61 66
- // 672 | 149602 0.33 12 | 150110 0.66 90
- // 717 | 149998 0.35 12 | 149531 0.70 129
- // 762 | 149836 0.37 13 | 148559 0.74 190
- // 807 | 149736 0.39 14 | 151107 0.39 14
- // 852 | 150204 0.42 15 | 151019 0.42 15
- drop_deletes_without_resize();
- } else {
- // Otherwise grow the container.
- resize(NextCapacity(cap));
- }
- }
-
// Casting directly from e.g. char* to slot_type* can cause compilation errors
// on objective-C. This function converts to void* first, avoiding the issue.
static slot_type* to_slot(void* buf) {
@@ -3817,6 +3789,7 @@ class raw_hash_set {
template <class K>
std::pair<iterator, bool> find_or_prepare_insert_non_soo(const K& key) {
+ assert(!is_soo());
prefetch_heap_block();
auto hash = hash_ref()(key);
auto seq = probe(common(), hash);
@@ -3833,9 +3806,10 @@ class raw_hash_set {
if (ABSL_PREDICT_TRUE(mask_empty)) {
size_t target = seq.offset(
GetInsertionOffset(mask_empty, capacity(), hash, control()));
- return {
- iterator_at(prepare_insert(hash, FindInfo{target, seq.index()})),
- true};
+ return {iterator_at(PrepareInsertNonSoo(common(), hash,
+ FindInfo{target, seq.index()},
+ GetPolicyFunctions())),
+ true};
}
seq.next();
assert(seq.index() <= capacity() && "full table!");
@@ -3852,63 +3826,6 @@ class raw_hash_set {
return find_or_prepare_insert_non_soo(key);
}
- // Given the hash of a value not currently in the table and the first empty
- // slot in the probe sequence, finds the viable slot index to insert it at.
- //
- // In case there's no space left, the table can be resized or rehashed
- // (see rehash_and_grow_if_necessary).
- //
- // In case of absence of deleted slots and positive growth_left, element can
- // be inserted in the provided `target` position.
- //
- // When table has deleted slots (according to GrowthInfo), target position
- // will be searched one more time using `find_first_non_full`.
- //
- // REQUIRES: At least one non-full slot available.
- // REQUIRES: `target` is a valid empty position to insert.
- size_t prepare_insert(size_t hash, FindInfo target) ABSL_ATTRIBUTE_NOINLINE {
- assert(!is_soo());
- // When there are no deleted slots in the table
- // and growth_left is positive, we can insert at the first
- // empty slot in the probe sequence (target).
- bool use_target_hint = false;
- // Optimization is disabled on enabled generations.
- // We have to rehash even sparse tables randomly in such mode.
-#ifndef ABSL_SWISSTABLE_ENABLE_GENERATIONS
- use_target_hint = growth_info().HasNoDeletedAndGrowthLeft();
-#endif
- if (ABSL_PREDICT_FALSE(!use_target_hint)) {
- const bool rehash_for_bug_detection =
- common().should_rehash_for_bug_detection_on_insert();
- if (rehash_for_bug_detection) {
- // Move to a different heap allocation in order to detect bugs.
- const size_t cap = capacity();
- resize(growth_left() > 0 ? cap : NextCapacity(cap));
- }
- if (!rehash_for_bug_detection &&
- ABSL_PREDICT_FALSE(growth_left() == 0)) {
- const size_t old_capacity = capacity();
- rehash_and_grow_if_necessary();
- // NOTE: It is safe to use `FindFirstNonFullAfterResize` after
- // `rehash_and_grow_if_necessary`, whether capacity changes or not.
- // `rehash_and_grow_if_necessary` may *not* call `resize`
- // and perform `drop_deletes_without_resize` instead. But this
- // could happen only on big tables and will not change capacity.
- // For big tables `FindFirstNonFullAfterResize` will always
- // fallback to normal `find_first_non_full`.
- target = HashSetResizeHelper::FindFirstNonFullAfterResize(
- common(), old_capacity, hash);
- } else {
- target = find_first_non_full(common(), hash);
- }
- }
- PrepareInsertCommon(common());
- growth_info().OverwriteControlAsFull(control()[target.offset]);
- SetCtrl(common(), target.offset, H2(hash), sizeof(slot_type));
- infoz().RecordInsert(hash, target.probe_length);
- return target.offset;
- }
-
// Constructs the value in the space pointed by the iterator. This only works
// after an unsuccessful find_or_prepare_insert() and before any other
// modifications happen in the raw_hash_set.
@@ -3949,7 +3866,7 @@ class raw_hash_set {
// See `CapacityToGrowth()`.
size_t growth_left() const {
assert(!is_soo());
- return growth_info().GetGrowthLeft();
+ return common().growth_left();
}
GrowthInfo& growth_info() {
@@ -4009,6 +3926,10 @@ class raw_hash_set {
return settings_.template get<3>();
}
+ static const void* get_hash_ref_fn(const CommonFields& common) {
+ auto* h = reinterpret_cast<const raw_hash_set*>(&common);
+ return &h->hash_ref();
+ }
static void transfer_slot_fn(void* set, void* dst, void* src) {
auto* h = static_cast<raw_hash_set*>(set);
h->transfer(static_cast<slot_type*>(dst), static_cast<slot_type*>(src));
@@ -4030,6 +3951,10 @@ class raw_hash_set {
static const PolicyFunctions& GetPolicyFunctions() {
static constexpr PolicyFunctions value = {
sizeof(slot_type),
+ // TODO(b/328722020): try to type erase
+ // for standard layout and alignof(Hash) <= alignof(CommonFields).
+ std::is_empty<hasher>::value ? &GetHashRefForEmptyHasher
+ : &raw_hash_set::get_hash_ref_fn,
PolicyTraits::template get_hash_slot_fn<hasher>(),
PolicyTraits::transfer_uses_memcpy()
? TransferRelocatable<sizeof(slot_type)>
@@ -4037,6 +3962,7 @@ class raw_hash_set {
(std::is_same<SlotAlloc, std::allocator<slot_type>>::value
? &DeallocateStandard<alignof(slot_type)>
: &raw_hash_set::dealloc_fn),
+ &raw_hash_set::resize_impl,
};
return value;
}