summaryrefslogtreecommitdiff
path: root/absl/container/internal/raw_hash_set.h
diff options
context:
space:
mode:
Diffstat (limited to 'absl/container/internal/raw_hash_set.h')
-rw-r--r--absl/container/internal/raw_hash_set.h403
1 files changed, 327 insertions, 76 deletions
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h
index 067ea0da..b6d2cf93 100644
--- a/absl/container/internal/raw_hash_set.h
+++ b/absl/container/internal/raw_hash_set.h
@@ -1378,6 +1378,12 @@ struct FindInfo {
// `ShouldInsertBackwards()` for small tables.
inline bool is_small(size_t capacity) { return capacity < Group::kWidth - 1; }
+// Whether a table fits entirely into a probing group.
+// Arbitrary order of elements in such tables is correct.
+inline bool is_single_group(size_t capacity) {
+ return capacity <= Group::kWidth;
+}
+
// Begins a probing operation on `common.control`, using `hash`.
inline probe_seq<Group::kWidth> probe(const ctrl_t* ctrl, const size_t capacity,
size_t hash) {
@@ -1440,7 +1446,6 @@ inline void ResetCtrl(CommonFields& common, size_t slot_size) {
capacity + 1 + NumClonedBytes());
ctrl[capacity] = ctrl_t::kSentinel;
SanitizerPoisonMemoryRegion(common.slot_array(), slot_size * capacity);
- ResetGrowthLeft(common);
}
// Sets `ctrl[i]` to `h`.
@@ -1475,41 +1480,263 @@ constexpr size_t BackingArrayAlignment(size_t align_of_slot) {
return (std::max)(align_of_slot, alignof(size_t));
}
-template <typename Alloc, size_t SizeOfSlot, size_t AlignOfSlot>
-ABSL_ATTRIBUTE_NOINLINE void InitializeSlots(CommonFields& c, Alloc alloc) {
- assert(c.capacity());
- // Folks with custom allocators often make unwarranted assumptions about the
- // behavior of their classes vis-a-vis trivial destructability and what
- // calls they will or won't make. Avoid sampling for people with custom
- // allocators to get us out of this mess. This is not a hard guarantee but
- // a workaround while we plan the exact guarantee we want to provide.
- const size_t sample_size =
- (std::is_same<Alloc, std::allocator<char>>::value &&
- c.slot_array() == nullptr)
- ? SizeOfSlot
- : 0;
- HashtablezInfoHandle infoz =
- sample_size > 0 ? Sample(sample_size) : c.infoz();
-
- const bool has_infoz = infoz.IsSampled();
- const size_t cap = c.capacity();
- const size_t alloc_size = AllocSize(cap, SizeOfSlot, AlignOfSlot, has_infoz);
- char* mem = static_cast<char*>(
- Allocate<BackingArrayAlignment(AlignOfSlot)>(&alloc, alloc_size));
- const GenerationType old_generation = c.generation();
- c.set_generation_ptr(reinterpret_cast<GenerationType*>(
- mem + GenerationOffset(cap, has_infoz)));
- c.set_generation(NextGeneration(old_generation));
- c.set_control(reinterpret_cast<ctrl_t*>(mem + ControlOffset(has_infoz)));
- c.set_slots(mem + SlotOffset(cap, AlignOfSlot, has_infoz));
- ResetCtrl(c, SizeOfSlot);
- c.set_has_infoz(has_infoz);
- if (has_infoz) {
- infoz.RecordStorageChanged(c.size(), cap);
- c.set_infoz(infoz);
- }
+// Returns the address of the ith slot in slots where each slot occupies
+// slot_size.
+inline void* SlotAddress(void* slot_array, size_t slot, size_t slot_size) {
+ return reinterpret_cast<void*>(reinterpret_cast<char*>(slot_array) +
+ (slot * slot_size));
}
+// Helper class to perform resize of the hash set.
+//
+// It contains special optimizations for small group resizes.
+// See GrowIntoSingleGroupShuffleControlBytes for details.
+class HashSetResizeHelper {
+ public:
+ explicit HashSetResizeHelper(CommonFields& c)
+ : old_ctrl_(c.control()),
+ old_capacity_(c.capacity()),
+ had_infoz_(c.has_infoz()) {}
+
+ // Optimized for small groups version of `find_first_non_full` applicable
+ // only right after calling `raw_hash_set::resize`.
+ // It has implicit assumption that `resize` will call
+ // `GrowSizeIntoSingleGroup*` in case `IsGrowingIntoSingleGroupApplicable`.
+ // Falls back to `find_first_non_full` in case of big groups, so it is
+ // safe to use after `rehash_and_grow_if_necessary`.
+ static FindInfo FindFirstNonFullAfterResize(const CommonFields& c,
+ size_t old_capacity,
+ size_t hash) {
+ if (!IsGrowingIntoSingleGroupApplicable(old_capacity, c.capacity())) {
+ return find_first_non_full(c, hash);
+ }
+ // Find a location for the new element non-deterministically.
+ // Note that any position is correct.
+ // It will located at `half_old_capacity` or one of the other
+ // empty slots with approximately 50% probability each.
+ size_t offset = probe(c, hash).offset();
+
+ // Note that we intentionally use unsigned int underflow.
+ if (offset - (old_capacity + 1) >= old_capacity) {
+ // Offset fall on kSentinel or into the mostly occupied first half.
+ offset = old_capacity / 2;
+ }
+ assert(IsEmpty(c.control()[offset]));
+ return FindInfo{offset, 0};
+ }
+
+ ctrl_t* old_ctrl() const { return old_ctrl_; }
+ size_t old_capacity() const { return old_capacity_; }
+
+ // Allocates a backing array for the hashtable.
+ // Reads `capacity` and updates all other fields based on the result of
+ // the allocation.
+ //
+ // It also may do the folowing actions:
+ // 1. initialize control bytes
+ // 2. initialize slots
+ // 3. deallocate old slots.
+ //
+ // We are bundling a lot of functionality
+ // in one ABSL_ATTRIBUTE_NOINLINE function in order to minimize binary code
+ // duplication in raw_hash_set<>::resize.
+ //
+ // `c.capacity()` must be nonzero.
+ // POSTCONDITIONS:
+ // 1. CommonFields is initialized.
+ //
+ // if IsGrowingIntoSingleGroupApplicable && TransferUsesMemcpy
+ // Both control bytes and slots are fully initialized.
+ // old_slots are deallocated.
+ // infoz.RecordRehash is called.
+ //
+ // if IsGrowingIntoSingleGroupApplicable && !TransferUsesMemcpy
+ // Control bytes are fully initialized.
+ // infoz.RecordRehash is called.
+ // GrowSizeIntoSingleGroup must be called to finish slots initialization.
+ //
+ // if !IsGrowingIntoSingleGroupApplicable
+ // Control bytes are initialized to empty table via ResetCtrl.
+ // raw_hash_set<>::resize must insert elements regularly.
+ // infoz.RecordRehash is called if old_capacity == 0.
+ //
+ // Returns IsGrowingIntoSingleGroupApplicable result to avoid recomputation.
+ template <typename Alloc, size_t SizeOfSlot, bool TransferUsesMemcpy,
+ size_t AlignOfSlot>
+ ABSL_ATTRIBUTE_NOINLINE bool InitializeSlots(CommonFields& c, void* old_slots,
+ Alloc alloc) {
+ assert(c.capacity());
+ // Folks with custom allocators often make unwarranted assumptions about the
+ // behavior of their classes vis-a-vis trivial destructability and what
+ // calls they will or won't make. Avoid sampling for people with custom
+ // allocators to get us out of this mess. This is not a hard guarantee but
+ // a workaround while we plan the exact guarantee we want to provide.
+ const size_t sample_size =
+ (std::is_same<Alloc, std::allocator<char>>::value &&
+ c.slot_array() == nullptr)
+ ? SizeOfSlot
+ : 0;
+ HashtablezInfoHandle infoz =
+ sample_size > 0 ? Sample(sample_size) : c.infoz();
+
+ const bool has_infoz = infoz.IsSampled();
+ const size_t cap = c.capacity();
+ const size_t alloc_size =
+ AllocSize(cap, SizeOfSlot, AlignOfSlot, has_infoz);
+ char* mem = static_cast<char*>(
+ Allocate<BackingArrayAlignment(AlignOfSlot)>(&alloc, alloc_size));
+ const GenerationType old_generation = c.generation();
+ c.set_generation_ptr(reinterpret_cast<GenerationType*>(
+ mem + GenerationOffset(cap, has_infoz)));
+ c.set_generation(NextGeneration(old_generation));
+ c.set_control(reinterpret_cast<ctrl_t*>(mem + ControlOffset(has_infoz)));
+ c.set_slots(mem + SlotOffset(cap, AlignOfSlot, has_infoz));
+ ResetGrowthLeft(c);
+
+ const bool grow_single_group =
+ IsGrowingIntoSingleGroupApplicable(old_capacity_, c.capacity());
+ if (old_capacity_ != 0 && grow_single_group) {
+ if (TransferUsesMemcpy) {
+ GrowSizeIntoSingleGroupTransferable(c, old_slots, SizeOfSlot);
+ DeallocateOld<AlignOfSlot>(alloc, SizeOfSlot, old_slots);
+ } else {
+ GrowIntoSingleGroupShuffleControlBytes(c.control(), c.capacity());
+ }
+ } else {
+ ResetCtrl(c, SizeOfSlot);
+ }
+
+ c.set_has_infoz(has_infoz);
+ if (has_infoz) {
+ infoz.RecordStorageChanged(c.size(), cap);
+ if (grow_single_group || old_capacity_ == 0) {
+ infoz.RecordRehash(0);
+ }
+ c.set_infoz(infoz);
+ }
+ return grow_single_group;
+ }
+
+ // Relocates slots into new single group consistent with
+ // GrowIntoSingleGroupShuffleControlBytes.
+ //
+ // PRECONDITIONS:
+ // 1. GrowIntoSingleGroupShuffleControlBytes was already called.
+ template <class PolicyTraits, class Alloc>
+ void GrowSizeIntoSingleGroup(CommonFields& c, Alloc& alloc_ref,
+ typename PolicyTraits::slot_type* old_slots) {
+ assert(old_capacity_ < Group::kWidth / 2);
+ assert(IsGrowingIntoSingleGroupApplicable(old_capacity_, c.capacity()));
+ using slot_type = typename PolicyTraits::slot_type;
+ assert(is_single_group(c.capacity()));
+
+ auto* new_slots = reinterpret_cast<slot_type*>(c.slot_array());
+
+ size_t shuffle_bit = old_capacity_ / 2 + 1;
+ for (size_t i = 0; i < old_capacity_; ++i) {
+ if (IsFull(old_ctrl_[i])) {
+ size_t new_i = i ^ shuffle_bit;
+ SanitizerUnpoisonMemoryRegion(new_slots + new_i, sizeof(slot_type));
+ PolicyTraits::transfer(&alloc_ref, new_slots + new_i, old_slots + i);
+ }
+ }
+ PoisonSingleGroupEmptySlots(c, sizeof(slot_type));
+ }
+
+ // Deallocates old backing array.
+ template <size_t AlignOfSlot, class CharAlloc>
+ void DeallocateOld(CharAlloc alloc_ref, size_t slot_size, void* old_slots) {
+ SanitizerUnpoisonMemoryRegion(old_slots, slot_size * old_capacity_);
+ Deallocate<BackingArrayAlignment(AlignOfSlot)>(
+ &alloc_ref, old_ctrl_ - ControlOffset(had_infoz_),
+ AllocSize(old_capacity_, slot_size, AlignOfSlot, had_infoz_));
+ }
+
+ private:
+ // Returns true if `GrowSizeIntoSingleGroup` can be used for resizing.
+ static bool IsGrowingIntoSingleGroupApplicable(size_t old_capacity,
+ size_t new_capacity) {
+ // NOTE that `old_capacity < new_capacity` in order to have
+ // `old_capacity < Group::kWidth / 2` to make faster copies of 8 bytes.
+ return is_single_group(new_capacity) && old_capacity < new_capacity;
+ }
+
+ // Relocates control bytes and slots into new single group for
+ // transferable objects.
+ // Must be called only if IsGrowingIntoSingleGroupApplicable returned true.
+ void GrowSizeIntoSingleGroupTransferable(CommonFields& c, void* old_slots,
+ size_t slot_size);
+
+ // Shuffle control bits deterministically to the next capacity.
+ // Returns offset for newly added element with given hash.
+ //
+ // PRECONDITIONs:
+ // 1. new_ctrl is allocated for new_capacity,
+ // but not initialized.
+ // 2. new_capacity is a single group.
+ //
+ // All elements are transferred into the first `old_capacity + 1` positions
+ // of the new_ctrl. Elements are rotated by `old_capacity_ / 2 + 1` positions
+ // in order to change an order and keep it non deterministic.
+ // Although rotation itself deterministic, position of the new added element
+ // will be based on `H1` and is not deterministic.
+ //
+ // Examples:
+ // S = kSentinel, E = kEmpty
+ //
+ // old_ctrl = SEEEEEEEE...
+ // new_ctrl = ESEEEEEEE...
+ //
+ // old_ctrl = 0SEEEEEEE...
+ // new_ctrl = E0ESE0EEE...
+ //
+ // old_ctrl = 012S012EEEEEEEEE...
+ // new_ctrl = 2E01EEES2E01EEE...
+ //
+ // old_ctrl = 0123456S0123456EEEEEEEEEEE...
+ // new_ctrl = 456E0123EEEEEES456E0123EEE...
+ void GrowIntoSingleGroupShuffleControlBytes(ctrl_t* new_ctrl,
+ size_t new_capacity) const;
+
+ // Shuffle trivially transferable slots in the way consistent with
+ // GrowIntoSingleGroupShuffleControlBytes.
+ //
+ // PRECONDITIONs:
+ // 1. old_capacity must be non-zero.
+ // 2. new_ctrl is fully initialized using
+ // GrowIntoSingleGroupShuffleControlBytes.
+ // 3. new_slots is allocated and *not* poisoned.
+ //
+ // POSTCONDITIONS:
+ // 1. new_slots are transferred from old_slots_ consistent with
+ // GrowIntoSingleGroupShuffleControlBytes.
+ // 2. Empty new_slots are *not* poisoned.
+ void GrowIntoSingleGroupShuffleTransferableSlots(void* old_slots,
+ void* new_slots,
+ size_t slot_size) const;
+
+ // Poison empty slots that were transferred using the deterministic algorithm
+ // described above.
+ // PRECONDITIONs:
+ // 1. new_ctrl is fully initialized using
+ // GrowIntoSingleGroupShuffleControlBytes.
+ // 2. new_slots is fully initialized consistent with
+ // GrowIntoSingleGroupShuffleControlBytes.
+ void PoisonSingleGroupEmptySlots(CommonFields& c, size_t slot_size) const {
+ // poison non full items
+ for (size_t i = 0; i < c.capacity(); ++i) {
+ if (!IsFull(c.control()[i])) {
+ SanitizerPoisonMemoryRegion(SlotAddress(c.slot_array(), i, slot_size),
+ slot_size);
+ }
+ }
+ }
+
+ ctrl_t* old_ctrl_;
+ size_t old_capacity_;
+ bool had_infoz_;
+};
+
// PolicyFunctions bundles together some information for a particular
// raw_hash_set<T, ...> instantiation. This information is passed to
// type-erased functions that want to do small amounts of type-specific
@@ -1627,6 +1854,11 @@ class raw_hash_set {
using AllocTraits = absl::allocator_traits<allocator_type>;
using SlotAlloc = typename absl::allocator_traits<
allocator_type>::template rebind_alloc<slot_type>;
+ // People are often sloppy with the exact type of their allocator (sometimes
+ // it has an extra const or is missing the pair, but rebinds made it work
+ // anyway).
+ using CharAlloc =
+ typename absl::allocator_traits<Alloc>::template rebind_alloc<char>;
using SlotAllocTraits = typename absl::allocator_traits<
allocator_type>::template rebind_traits<slot_type>;
@@ -1819,8 +2051,7 @@ class raw_hash_set {
const allocator_type& alloc = allocator_type())
: settings_(CommonFields{}, hash, eq, alloc) {
if (bucket_count) {
- common().set_capacity(NormalizeCapacity(bucket_count));
- initialize_slots();
+ resize(NormalizeCapacity(bucket_count));
}
}
@@ -2616,52 +2847,63 @@ class raw_hash_set {
EraseMetaOnly(common(), it.control(), sizeof(slot_type));
}
- // Allocates a backing array for `self` and initializes its control bytes.
- // This reads `capacity` and updates all other fields based on the result of
- // the allocation.
+ // Resizes table to the new capacity and move all elements to the new
+ // positions accordingly.
//
- // This does not free the currently held array; `capacity` must be nonzero.
- inline void initialize_slots() {
- // People are often sloppy with the exact type of their allocator (sometimes
- // it has an extra const or is missing the pair, but rebinds made it work
- // anyway).
- using CharAlloc =
- typename absl::allocator_traits<Alloc>::template rebind_alloc<char>;
- InitializeSlots<CharAlloc, sizeof(slot_type), alignof(slot_type)>(
- common(), CharAlloc(alloc_ref()));
- }
-
+ // Note that for better performance instead of
+ // find_first_non_full(common(), hash),
+ // HashSetResizeHelper::FindFirstNonFullAfterResize(
+ // common(), old_capacity, hash)
+ // can be called right after `resize`.
ABSL_ATTRIBUTE_NOINLINE void resize(size_t new_capacity) {
assert(IsValidCapacity(new_capacity));
- auto* old_ctrl = control();
+ HashSetResizeHelper resize_helper(common());
auto* old_slots = slot_array();
- const bool had_infoz = common().has_infoz();
- const size_t old_capacity = common().capacity();
common().set_capacity(new_capacity);
- initialize_slots();
-
- auto* new_slots = slot_array();
- size_t total_probe_length = 0;
- for (size_t i = 0; i != old_capacity; ++i) {
- if (IsFull(old_ctrl[i])) {
- size_t hash = PolicyTraits::apply(HashElement{hash_ref()},
- PolicyTraits::element(old_slots + i));
- auto target = find_first_non_full(common(), hash);
- size_t new_i = target.offset;
- total_probe_length += target.probe_length;
- SetCtrl(common(), new_i, H2(hash), sizeof(slot_type));
- transfer(new_slots + new_i, old_slots + i);
- }
+ // Note that `InitializeSlots` does different number initialization steps
+ // depending on the values of `transfer_uses_memcpy` and capacities.
+ // Refer to the comment in `InitializeSlots` for more details.
+ const bool grow_single_group =
+ resize_helper.InitializeSlots<CharAlloc, sizeof(slot_type),
+ PolicyTraits::transfer_uses_memcpy(),
+ alignof(slot_type)>(
+ common(), const_cast<std::remove_const_t<slot_type>*>(old_slots),
+ CharAlloc(alloc_ref()));
+
+ if (resize_helper.old_capacity() == 0) {
+ // InitializeSlots did all the work including infoz().RecordRehash().
+ return;
}
- if (old_capacity) {
- SanitizerUnpoisonMemoryRegion(old_slots,
- sizeof(slot_type) * old_capacity);
- Deallocate<BackingArrayAlignment(alignof(slot_type))>(
- &alloc_ref(), old_ctrl - ControlOffset(had_infoz),
- AllocSize(old_capacity, sizeof(slot_type), alignof(slot_type),
- had_infoz));
+
+ if (grow_single_group) {
+ if (PolicyTraits::transfer_uses_memcpy()) {
+ // InitializeSlots did all the work.
+ return;
+ }
+ // We want GrowSizeIntoSingleGroup to be called here in order to make
+ // InitializeSlots not depend on PolicyTraits.
+ resize_helper.GrowSizeIntoSingleGroup<PolicyTraits>(common(), alloc_ref(),
+ old_slots);
+ } else {
+ // InitializeSlots prepares control bytes to correspond to empty table.
+ auto* new_slots = slot_array();
+ size_t total_probe_length = 0;
+ for (size_t i = 0; i != resize_helper.old_capacity(); ++i) {
+ if (IsFull(resize_helper.old_ctrl()[i])) {
+ size_t hash = PolicyTraits::apply(
+ HashElement{hash_ref()}, PolicyTraits::element(old_slots + i));
+ auto target = find_first_non_full(common(), hash);
+ size_t new_i = target.offset;
+ total_probe_length += target.probe_length;
+ SetCtrl(common(), new_i, H2(hash), sizeof(slot_type));
+ transfer(new_slots + new_i, old_slots + i);
+ }
+ }
+ infoz().RecordRehash(total_probe_length);
}
- infoz().RecordRehash(total_probe_length);
+ resize_helper.DeallocateOld<alignof(slot_type)>(
+ CharAlloc(alloc_ref()), sizeof(slot_type),
+ const_cast<std::remove_const_t<slot_type>*>(old_slots));
}
// Prunes control bytes to remove as many tombstones as possible.
@@ -2830,8 +3072,17 @@ class raw_hash_set {
if (!rehash_for_bug_detection &&
ABSL_PREDICT_FALSE(growth_left() == 0 &&
!IsDeleted(control()[target.offset]))) {
+ size_t old_capacity = capacity();
rehash_and_grow_if_necessary();
- target = find_first_non_full(common(), hash);
+ // NOTE: It is safe to use `FindFirstNonFullAfterResize`.
+ // `FindFirstNonFullAfterResize` must be called right after resize.
+ // `rehash_and_grow_if_necessary` may *not* call `resize`
+ // and perform `drop_deletes_without_resize` instead. But this
+ // could happen only on big tables.
+ // For big tables `FindFirstNonFullAfterResize` will always
+ // fallback to normal `find_first_non_full`, so it is safe to use it.
+ target = HashSetResizeHelper::FindFirstNonFullAfterResize(
+ common(), old_capacity, hash);
}
common().increment_size();
set_growth_left(growth_left() - IsEmpty(control()[target.offset]));