summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Abseil Team <absl-team@google.com>2022-11-28 12:27:06 -0800
committerGravatar Copybara-Service <copybara-worker@google.com>2022-11-28 12:27:43 -0800
commite5a7979d36a84831652abf3138c4d76797c002b5 (patch)
treee7dc92ce03b1b04428b588f33bddf657ea97f554
parente3158086978de4ff15a2058f49ba525c89e14ce6 (diff)
Reduce flat_hash_{set,map} generated code size.
This CL makes a bunch of changes (mostly to raw_hash_set which underlies flat_hash_set and flat_hash_map). Techniques used: * Extract code that does not depend on the specific hash table type into common (non-inlined) functions. * Place ABSL_ATTRIBUTE_NOINLINE directives judiciously. * Out-of-line some slow paths. Reduces sizes of some large binaries by ~0.5%. Has no significant performance impact on a few performance critical binaries. ## Speed of fleetbench micro-benchmarks Following is a histogram of %-age changes in [fleetbench](https://github.com/google/fleetbench) hot_swissmap_benchmark results. Negative numbers indicate a speedup caused by this change. Statistically insignificant changes are mapped to zero. XXX Also run and merge in cold_swissmap_benchmark Across all 351 benchmarks, the average speedup is 0.38%. The best speedup was -25%, worst slowdown was +6.81%. ``` Count: 351 Average: -0.382764 StdDev: 3.77807 Min: -25 Median: 0.435135 Max: 6.81 --------------------------------------------- [ -25, -10) 16 4.558% 4.558% # [ -9, -8) 2 0.570% 5.128% [ -8, -7) 1 0.285% 5.413% [ -7, -6) 1 0.285% 5.698% [ -6, -5) 2 0.570% 6.268% [ -5, -4) 5 1.425% 7.692% [ -4, -3) 13 3.704% 11.396% # [ -3, -2) 15 4.274% 15.670% # [ -2, -1) 26 7.407% 23.077% ## [ -1, 0) 14 3.989% 27.066% # [ 0, 1) 185 52.707% 79.772% ############ [ 1, 2) 14 3.989% 83.761% # [ 2, 3) 8 2.279% 86.040% # [ 3, 4) 7 1.994% 88.034% [ 4, 5) 32 9.117% 97.151% ## [ 5, 6) 6 1.709% 98.860% [ 6, 7) 4 1.140% 100.000% ``` We looked at the slowdowns and they do not seem worth worrying about. E.g., the worst one was: ``` BM_FindHit_Hot<::absl::node_hash_set,64>/set_size:4096/density:0 2.61ns ± 1% 2.79ns ± 1% +6.81% (p=0.008 n=5+5) ``` ## Detailed changes * Out-of-line slow paths in hash table sampler methods. * Explicitly unregister from sampler instead of from destructor. * Introduced a non-templated CommonFields struct that holds some of the hash table fields (infoz, ctrl, slots, size, capacity). This struct can be passed to new non-templated helpers. The struct is a private base class of raw_hash_set. * Made non-inlined InitializeSlots<> that is only templated on allocator and size/alignment of the slot type so that we can share instantiations across types that have the same size/alignment. * Moved some infrequently called code paths into non-inlined type-erased. functions. Pass a suite of type-specific function pointers to these routines for when they need to operate on slots. * Marked some methods as non-inlined. * Avoid unnecessary reinitialization in destructor. * Introduce UpdateSpine type-erased helper that is called from clear() and rehash(). PiperOrigin-RevId: 491413386 Change-Id: Ia5495c5a6ec73622a785a0d260e406ddb9085a7c
-rw-r--r--absl/container/internal/hashtablez_sampler.cc46
-rw-r--r--absl/container/internal/hashtablez_sampler.h54
-rw-r--r--absl/container/internal/raw_hash_set.cc152
-rw-r--r--absl/container/internal/raw_hash_set.h652
-rw-r--r--absl/container/internal/raw_hash_set_benchmark.cc18
-rw-r--r--absl/container/internal/raw_hash_set_test.cc6
6 files changed, 579 insertions, 349 deletions
diff --git a/absl/container/internal/hashtablez_sampler.cc b/absl/container/internal/hashtablez_sampler.cc
index 5b8cf341..6b6d3491 100644
--- a/absl/container/internal/hashtablez_sampler.cc
+++ b/absl/container/internal/hashtablez_sampler.cc
@@ -14,6 +14,7 @@
#include "absl/container/internal/hashtablez_sampler.h"
+#include <algorithm>
#include <atomic>
#include <cassert>
#include <cmath>
@@ -158,6 +159,43 @@ void UnsampleSlow(HashtablezInfo* info) {
GlobalHashtablezSampler().Unregister(info);
}
+void RecordRehashSlow(HashtablezInfo* info, size_t total_probe_length) {
+#ifdef ABSL_INTERNAL_HAVE_SSE2
+ total_probe_length /= 16;
+#else
+ total_probe_length /= 8;
+#endif
+ info->total_probe_length.store(total_probe_length, std::memory_order_relaxed);
+ info->num_erases.store(0, std::memory_order_relaxed);
+ // There is only one concurrent writer, so `load` then `store` is sufficient
+ // instead of using `fetch_add`.
+ info->num_rehashes.store(
+ 1 + info->num_rehashes.load(std::memory_order_relaxed),
+ std::memory_order_relaxed);
+}
+
+void RecordReservationSlow(HashtablezInfo* info, size_t target_capacity) {
+ info->max_reserve.store(
+ (std::max)(info->max_reserve.load(std::memory_order_relaxed),
+ target_capacity),
+ std::memory_order_relaxed);
+}
+
+void RecordClearedReservationSlow(HashtablezInfo* info) {
+ info->max_reserve.store(0, std::memory_order_relaxed);
+}
+
+void RecordStorageChangedSlow(HashtablezInfo* info, size_t size,
+ size_t capacity) {
+ info->size.store(size, std::memory_order_relaxed);
+ info->capacity.store(capacity, std::memory_order_relaxed);
+ if (size == 0) {
+ // This is a clear, reset the total/num_erases too.
+ info->total_probe_length.store(0, std::memory_order_relaxed);
+ info->num_erases.store(0, std::memory_order_relaxed);
+ }
+}
+
void RecordInsertSlow(HashtablezInfo* info, size_t hash,
size_t distance_from_desired) {
// SwissTables probe in groups of 16, so scale this to count items probes and
@@ -180,6 +218,14 @@ void RecordInsertSlow(HashtablezInfo* info, size_t hash,
info->size.fetch_add(1, std::memory_order_relaxed);
}
+void RecordEraseSlow(HashtablezInfo* info) {
+ info->size.fetch_sub(1, std::memory_order_relaxed);
+ // There is only one concurrent writer, so `load` then `store` is sufficient
+ // instead of using `fetch_add`.
+ info->num_erases.store(1 + info->num_erases.load(std::memory_order_relaxed),
+ std::memory_order_relaxed);
+}
+
void SetHashtablezConfigListener(HashtablezConfigListener l) {
g_hashtablez_config_listener.store(l, std::memory_order_release);
}
diff --git a/absl/container/internal/hashtablez_sampler.h b/absl/container/internal/hashtablez_sampler.h
index a89518bb..d8fd8f34 100644
--- a/absl/container/internal/hashtablez_sampler.h
+++ b/absl/container/internal/hashtablez_sampler.h
@@ -95,55 +95,19 @@ struct HashtablezInfo : public profiling_internal::Sample<HashtablezInfo> {
size_t inline_element_size; // How big is the slot?
};
-inline void RecordRehashSlow(HashtablezInfo* info, size_t total_probe_length) {
-#ifdef ABSL_INTERNAL_HAVE_SSE2
- total_probe_length /= 16;
-#else
- total_probe_length /= 8;
-#endif
- info->total_probe_length.store(total_probe_length, std::memory_order_relaxed);
- info->num_erases.store(0, std::memory_order_relaxed);
- // There is only one concurrent writer, so `load` then `store` is sufficient
- // instead of using `fetch_add`.
- info->num_rehashes.store(
- 1 + info->num_rehashes.load(std::memory_order_relaxed),
- std::memory_order_relaxed);
-}
+void RecordRehashSlow(HashtablezInfo* info, size_t total_probe_length);
-inline void RecordReservationSlow(HashtablezInfo* info,
- size_t target_capacity) {
- info->max_reserve.store(
- (std::max)(info->max_reserve.load(std::memory_order_relaxed),
- target_capacity),
- std::memory_order_relaxed);
-}
+void RecordReservationSlow(HashtablezInfo* info, size_t target_capacity);
-inline void RecordClearedReservationSlow(HashtablezInfo* info) {
- info->max_reserve.store(0, std::memory_order_relaxed);
-}
+void RecordClearedReservationSlow(HashtablezInfo* info);
-inline void RecordStorageChangedSlow(HashtablezInfo* info, size_t size,
- size_t capacity) {
- info->size.store(size, std::memory_order_relaxed);
- info->capacity.store(capacity, std::memory_order_relaxed);
- if (size == 0) {
- // This is a clear, reset the total/num_erases too.
- info->total_probe_length.store(0, std::memory_order_relaxed);
- info->num_erases.store(0, std::memory_order_relaxed);
- }
-}
+void RecordStorageChangedSlow(HashtablezInfo* info, size_t size,
+ size_t capacity);
void RecordInsertSlow(HashtablezInfo* info, size_t hash,
size_t distance_from_desired);
-inline void RecordEraseSlow(HashtablezInfo* info) {
- info->size.fetch_sub(1, std::memory_order_relaxed);
- // There is only one concurrent writer, so `load` then `store` is sufficient
- // instead of using `fetch_add`.
- info->num_erases.store(
- 1 + info->num_erases.load(std::memory_order_relaxed),
- std::memory_order_relaxed);
-}
+void RecordEraseSlow(HashtablezInfo* info);
struct SamplingState {
int64_t next_sample;
@@ -165,7 +129,10 @@ class HashtablezInfoHandle {
public:
explicit HashtablezInfoHandle() : info_(nullptr) {}
explicit HashtablezInfoHandle(HashtablezInfo* info) : info_(info) {}
- ~HashtablezInfoHandle() {
+
+ // We do not have a destructor. Caller is responsible for calling Unregister
+ // before destroying the handle.
+ void Unregister() {
if (ABSL_PREDICT_TRUE(info_ == nullptr)) return;
UnsampleSlow(info_);
}
@@ -230,6 +197,7 @@ class HashtablezInfoHandle {
explicit HashtablezInfoHandle() = default;
explicit HashtablezInfoHandle(std::nullptr_t) {}
+ inline void Unregister() {}
inline void RecordStorageChanged(size_t /*size*/, size_t /*capacity*/) {}
inline void RecordRehash(size_t /*total_probe_length*/) {}
inline void RecordReservation(size_t /*target_capacity*/) {}
diff --git a/absl/container/internal/raw_hash_set.cc b/absl/container/internal/raw_hash_set.cc
index c63a2e02..fae12793 100644
--- a/absl/container/internal/raw_hash_set.cc
+++ b/absl/container/internal/raw_hash_set.cc
@@ -63,8 +63,156 @@ void ConvertDeletedToEmptyAndFullToDeleted(ctrl_t* ctrl, size_t capacity) {
std::memcpy(ctrl + capacity + 1, ctrl, NumClonedBytes());
ctrl[capacity] = ctrl_t::kSentinel;
}
-// Extern template instantiotion for inline function.
-template FindInfo find_first_non_full(const ctrl_t*, size_t, size_t);
+// Extern template instantiation for inline function.
+template FindInfo find_first_non_full(const CommonFields&, size_t);
+
+FindInfo find_first_non_full_outofline(const CommonFields& common,
+ size_t hash) {
+ return find_first_non_full(common, hash);
+}
+
+// Return address of the ith slot in slots where each slot occupies slot_size.
+static inline void* SlotAddress(void* slot_array, size_t slot,
+ size_t slot_size) {
+ return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(slot_array) +
+ (slot * slot_size));
+}
+
+// Return the address of the slot just after slot assuming each slot
+// has the specified size.
+static inline void* NextSlot(void* slot, size_t slot_size) {
+ return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(slot) + slot_size);
+}
+
+// Return the address of the slot just before slot assuming each slot
+// has the specified size.
+static inline void* PrevSlot(void* slot, size_t slot_size) {
+ return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(slot) - slot_size);
+}
+
+void DropDeletesWithoutResize(CommonFields& common, size_t& growth_left,
+ const PolicyFunctions& policy, void* tmp_space) {
+ void* set = &common;
+ void* slot_array = common.slots_;
+ const size_t capacity = common.capacity_;
+ assert(IsValidCapacity(capacity));
+ assert(!is_small(capacity));
+ // Algorithm:
+ // - mark all DELETED slots as EMPTY
+ // - mark all FULL slots as DELETED
+ // - for each slot marked as DELETED
+ // hash = Hash(element)
+ // target = find_first_non_full(hash)
+ // if target is in the same group
+ // mark slot as FULL
+ // else if target is EMPTY
+ // transfer element to target
+ // mark slot as EMPTY
+ // mark target as FULL
+ // else if target is DELETED
+ // swap current element with target element
+ // mark target as FULL
+ // repeat procedure for current slot with moved from element (target)
+ ctrl_t* ctrl = common.control_;
+ ConvertDeletedToEmptyAndFullToDeleted(ctrl, capacity);
+ auto hasher = policy.hash_slot;
+ auto transfer = policy.transfer;
+ const size_t slot_size = policy.slot_size;
+
+ size_t total_probe_length = 0;
+ void* slot_ptr = SlotAddress(slot_array, 0, slot_size);
+ for (size_t i = 0; i != capacity;
+ ++i, slot_ptr = NextSlot(slot_ptr, slot_size)) {
+ assert(slot_ptr == SlotAddress(slot_array, i, slot_size));
+ if (!IsDeleted(ctrl[i])) continue;
+ const size_t hash = (*hasher)(set, slot_ptr);
+ const FindInfo target = find_first_non_full(common, hash);
+ const size_t new_i = target.offset;
+ total_probe_length += target.probe_length;
+
+ // Verify if the old and new i fall within the same group wrt the hash.
+ // If they do, we don't need to move the object as it falls already in the
+ // best probe we can.
+ const size_t probe_offset = probe(common, hash).offset();
+ const auto probe_index = [probe_offset, capacity](size_t pos) {
+ return ((pos - probe_offset) & capacity) / Group::kWidth;
+ };
+
+ // Element doesn't move.
+ if (ABSL_PREDICT_TRUE(probe_index(new_i) == probe_index(i))) {
+ SetCtrl(common, i, H2(hash), slot_size);
+ continue;
+ }
+
+ void* new_slot_ptr = SlotAddress(slot_array, new_i, slot_size);
+ if (IsEmpty(ctrl[new_i])) {
+ // Transfer element to the empty spot.
+ // SetCtrl poisons/unpoisons the slots so we have to call it at the
+ // right time.
+ SetCtrl(common, new_i, H2(hash), slot_size);
+ (*transfer)(set, new_slot_ptr, slot_ptr);
+ SetCtrl(common, i, ctrl_t::kEmpty, slot_size);
+ } else {
+ assert(IsDeleted(ctrl[new_i]));
+ SetCtrl(common, new_i, H2(hash), slot_size);
+ // Until we are done rehashing, DELETED marks previously FULL slots.
+
+ // Swap i and new_i elements.
+ (*transfer)(set, tmp_space, new_slot_ptr);
+ (*transfer)(set, new_slot_ptr, slot_ptr);
+ (*transfer)(set, slot_ptr, tmp_space);
+
+ // repeat the processing of the ith slot
+ --i;
+ slot_ptr = PrevSlot(slot_ptr, slot_size);
+ }
+ }
+ ResetGrowthLeft(common, growth_left);
+ common.infoz().RecordRehash(total_probe_length);
+}
+
+void EraseMetaOnly(CommonFields& c, size_t& growth_left, ctrl_t* it,
+ size_t slot_size) {
+ assert(IsFull(*it) && "erasing a dangling iterator");
+ --c.size_;
+ const auto index = static_cast<size_t>(it - c.control_);
+ const size_t index_before = (index - Group::kWidth) & c.capacity_;
+ const auto empty_after = Group(it).MaskEmpty();
+ const auto empty_before = Group(c.control_ + index_before).MaskEmpty();
+
+ // We count how many consecutive non empties we have to the right and to the
+ // left of `it`. If the sum is >= kWidth then there is at least one probe
+ // window that might have seen a full group.
+ bool was_never_full =
+ empty_before && empty_after &&
+ static_cast<size_t>(empty_after.TrailingZeros() +
+ empty_before.LeadingZeros()) < Group::kWidth;
+
+ SetCtrl(c, index, was_never_full ? ctrl_t::kEmpty : ctrl_t::kDeleted,
+ slot_size);
+ growth_left += (was_never_full ? 1 : 0);
+ c.infoz().RecordErase();
+}
+
+void ClearBackingArray(CommonFields& c, size_t& growth_left,
+ const PolicyFunctions& policy, bool reuse) {
+ if (reuse) {
+ c.size_ = 0;
+ ResetCtrl(c, growth_left, policy.slot_size);
+ c.infoz().RecordStorageChanged(0, c.capacity_);
+ } else {
+ void* set = &c;
+ (*policy.dealloc)(set, policy, c.control_, c.slots_, c.capacity_);
+ c.control_ = EmptyGroup();
+ c.slots_ = nullptr;
+ c.size_ = 0;
+ c.capacity_ = 0;
+ growth_left = 0;
+ c.infoz().RecordClearedReservation();
+ assert(c.size_ == 0);
+ c.infoz().RecordStorageChanged(0, 0);
+ }
+}
} // namespace container_internal
ABSL_NAMESPACE_END
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h
index de455d6c..ab59856e 100644
--- a/absl/container/internal/raw_hash_set.h
+++ b/absl/container/internal/raw_hash_set.h
@@ -717,6 +717,61 @@ using Group = GroupAArch64Impl;
using Group = GroupPortableImpl;
#endif
+// CommonFields hold the fields in raw_hash_set that do not depend
+// on template parameters. This allows us to conveniently pass all
+// of this state to helper functions as a single argument.
+//
+// We make HashtablezInfoHandle a base class to take advantage of
+// the empty base-class optimization when sampling is turned off.
+class CommonFields : public HashtablezInfoHandle {
+ public:
+ CommonFields() = default;
+
+ // Not copyable
+ CommonFields(const CommonFields&) = delete;
+ CommonFields& operator=(const CommonFields&) = delete;
+
+ // Movable
+ CommonFields(CommonFields&& that)
+ : HashtablezInfoHandle(
+ std::move(static_cast<HashtablezInfoHandle&&>(that))),
+ // Explicitly copying fields into "this" and then resetting "that"
+ // fields generates less code then calling absl::exchange per field.
+ control_(that.control_),
+ slots_(that.slots_),
+ size_(that.size_),
+ capacity_(that.capacity_) {
+ that.control_ = EmptyGroup();
+ that.slots_ = nullptr;
+ that.size_ = 0;
+ that.capacity_ = 0;
+ }
+ CommonFields& operator=(CommonFields&&) = default;
+
+ // TODO(b/259599413): Investigate removing some of these fields:
+ // - control/slots can be derived from each other
+ // - size can be moved into the slot array
+
+ // The control bytes (and, also, a pointer to the base of the backing array).
+ //
+ // This contains `capacity + 1 + NumClonedBytes()` entries, even
+ // when the table is empty (hence EmptyGroup).
+ ctrl_t* control_ = EmptyGroup();
+
+ // The beginning of the slots, located at `SlotOffset()` bytes after
+ // `control`. May be null for empty tables.
+ void* slots_ = nullptr;
+
+ // The number of filled slots.
+ size_t size_ = 0;
+
+ // The total number of available slots.
+ size_t capacity_ = 0;
+
+ HashtablezInfoHandle& infoz() { return *this; }
+ const HashtablezInfoHandle& infoz() const { return *this; }
+};
+
// Returns he number of "cloned control bytes".
//
// This is the number of control bytes that are present both at the beginning
@@ -866,9 +921,10 @@ struct FindInfo {
// `ShouldInsertBackwards()` for small tables.
inline bool is_small(size_t capacity) { return capacity < Group::kWidth - 1; }
-// Begins a probing operation on `ctrl`, using `hash`.
-inline probe_seq<Group::kWidth> probe(const ctrl_t* ctrl, size_t hash,
- size_t capacity) {
+// Begins a probing operation on `common.control`, using `hash`.
+inline probe_seq<Group::kWidth> probe(const CommonFields& common, size_t hash) {
+ const ctrl_t* ctrl = common.control_;
+ const size_t capacity = common.capacity_;
return probe_seq<Group::kWidth>(H1(hash, ctrl), capacity);
}
@@ -880,9 +936,9 @@ inline probe_seq<Group::kWidth> probe(const ctrl_t* ctrl, size_t hash,
// NOTE: this function must work with tables having both empty and deleted
// slots in the same group. Such tables appear during `erase()`.
template <typename = void>
-inline FindInfo find_first_non_full(const ctrl_t* ctrl, size_t hash,
- size_t capacity) {
- auto seq = probe(ctrl, hash, capacity);
+inline FindInfo find_first_non_full(const CommonFields& common, size_t hash) {
+ auto seq = probe(common, hash);
+ const ctrl_t* ctrl = common.control_;
while (true) {
Group g{ctrl + seq.offset()};
auto mask = g.MaskEmptyOrDeleted();
@@ -892,55 +948,68 @@ inline FindInfo find_first_non_full(const ctrl_t* ctrl, size_t hash,
// In debug build we will randomly insert in either the front or back of
// the group.
// TODO(kfm,sbenza): revisit after we do unconditional mixing
- if (!is_small(capacity) && ShouldInsertBackwards(hash, ctrl)) {
+ if (!is_small(common.capacity_) && ShouldInsertBackwards(hash, ctrl)) {
return {seq.offset(mask.HighestBitSet()), seq.index()};
}
#endif
return {seq.offset(mask.LowestBitSet()), seq.index()};
}
seq.next();
- assert(seq.index() <= capacity && "full table!");
+ assert(seq.index() <= common.capacity_ && "full table!");
}
}
// Extern template for inline function keep possibility of inlining.
// When compiler decided to not inline, no symbols will be added to the
// corresponding translation unit.
-extern template FindInfo find_first_non_full(const ctrl_t*, size_t, size_t);
+extern template FindInfo find_first_non_full(const CommonFields&, size_t);
+
+// Non-inlined version of find_first_non_full for use in less
+// performance critical routines.
+FindInfo find_first_non_full_outofline(const CommonFields&, size_t);
+
+inline void ResetGrowthLeft(CommonFields& common, size_t& growth_left) {
+ growth_left = CapacityToGrowth(common.capacity_) - common.size_;
+}
// Sets `ctrl` to `{kEmpty, kSentinel, ..., kEmpty}`, marking the entire
// array as marked as empty.
-inline void ResetCtrl(size_t capacity, ctrl_t* ctrl, const void* slot,
+inline void ResetCtrl(CommonFields& common, size_t& growth_left,
size_t slot_size) {
+ const size_t capacity = common.capacity_;
+ ctrl_t* ctrl = common.control_;
std::memset(ctrl, static_cast<int8_t>(ctrl_t::kEmpty),
capacity + 1 + NumClonedBytes());
ctrl[capacity] = ctrl_t::kSentinel;
- SanitizerPoisonMemoryRegion(slot, slot_size * capacity);
+ SanitizerPoisonMemoryRegion(common.slots_, slot_size * capacity);
+ ResetGrowthLeft(common, growth_left);
}
// Sets `ctrl[i]` to `h`.
//
// Unlike setting it directly, this function will perform bounds checks and
// mirror the value to the cloned tail if necessary.
-inline void SetCtrl(size_t i, ctrl_t h, size_t capacity, ctrl_t* ctrl,
- const void* slot, size_t slot_size) {
+inline void SetCtrl(const CommonFields& common, size_t i, ctrl_t h,
+ size_t slot_size) {
+ const size_t capacity = common.capacity_;
assert(i < capacity);
- auto* slot_i = static_cast<const char*>(slot) + i * slot_size;
+ auto* slot_i = static_cast<const char*>(common.slots_) + i * slot_size;
if (IsFull(h)) {
SanitizerUnpoisonMemoryRegion(slot_i, slot_size);
} else {
SanitizerPoisonMemoryRegion(slot_i, slot_size);
}
+ ctrl_t* ctrl = common.control_;
ctrl[i] = h;
ctrl[((i - NumClonedBytes()) & capacity) + (NumClonedBytes() & capacity)] = h;
}
// Overload for setting to an occupied `h2_t` rather than a special `ctrl_t`.
-inline void SetCtrl(size_t i, h2_t h, size_t capacity, ctrl_t* ctrl,
- const void* slot, size_t slot_size) {
- SetCtrl(i, static_cast<ctrl_t>(h), capacity, ctrl, slot, slot_size);
+inline void SetCtrl(const CommonFields& common, size_t i, h2_t h,
+ size_t slot_size) {
+ SetCtrl(common, i, static_cast<ctrl_t>(h), slot_size);
}
// Given the capacity of a table, computes the offset (from the start of the
@@ -957,6 +1026,82 @@ inline size_t AllocSize(size_t capacity, size_t slot_size, size_t slot_align) {
return SlotOffset(capacity, slot_align) + capacity * slot_size;
}
+template <typename Alloc, size_t SizeOfSlot, size_t AlignOfSlot>
+ABSL_ATTRIBUTE_NOINLINE void InitializeSlots(CommonFields& c,
+ size_t& growth_left, Alloc alloc) {
+ assert(c.capacity_);
+ // Folks with custom allocators often make unwarranted assumptions about the
+ // behavior of their classes vis-a-vis trivial destructability and what
+ // calls they will or won't make. Avoid sampling for people with custom
+ // allocators to get us out of this mess. This is not a hard guarantee but
+ // a workaround while we plan the exact guarantee we want to provide.
+ const size_t sample_size =
+ (std::is_same<Alloc, std::allocator<char>>::value && c.slots_ == nullptr)
+ ? SizeOfSlot
+ : 0;
+
+ const size_t cap = c.capacity_;
+ char* mem = static_cast<char*>(
+ Allocate<AlignOfSlot>(&alloc, AllocSize(cap, SizeOfSlot, AlignOfSlot)));
+ c.control_ = reinterpret_cast<ctrl_t*>(mem);
+ c.slots_ = mem + SlotOffset(cap, AlignOfSlot);
+ ResetCtrl(c, growth_left, SizeOfSlot);
+ if (sample_size) {
+ c.infoz() = Sample(sample_size);
+ }
+ c.infoz().RecordStorageChanged(c.size_, cap);
+}
+
+// PolicyFunctions bundles together some information for a particular
+// raw_hash_set<T, ...> instantiation. This information is passed to
+// type-erased functions that want to do small amounts of type-specific
+// work.
+struct PolicyFunctions {
+ size_t slot_size;
+ size_t slot_align;
+
+ // Return the hash of the pointed-to slot.
+ size_t (*hash_slot)(void* set, void* slot);
+
+ // Transfer the contents of src_slot to dst_slot.
+ void (*transfer)(void* set, void* dst_slot, void* src_slot);
+
+ // Deallocate the specified backing store which is sized for n slots.
+ void (*dealloc)(void* set, const PolicyFunctions& policy, ctrl_t* ctrl,
+ void* slot_array, size_t n);
+};
+
+// ClearBackingArray clears the backing array, either modifying it in place,
+// or creating a new one based on the value of "reuse".
+// REQUIRES: c.capacity > 0
+void ClearBackingArray(CommonFields& c, size_t& growth_left,
+ const PolicyFunctions& policy, bool reuse);
+
+// Type-erased version of raw_hash_set::erase_meta_only.
+void EraseMetaOnly(CommonFields& c, size_t& growth_left, ctrl_t* it,
+ size_t slot_size);
+
+// Function to place in PolicyFunctions::dealloc for raw_hash_sets
+// that are using std::allocator. This allows us to share the same
+// function body for raw_hash_set instantiations that have the
+// same slot alignment.
+template <size_t AlignOfSlot>
+ABSL_ATTRIBUTE_NOINLINE void DeallocateStandard(void*,
+ const PolicyFunctions& policy,
+ ctrl_t* ctrl, void* slot_array,
+ size_t n) {
+ // Unpoison before returning the memory to the allocator.
+ SanitizerUnpoisonMemoryRegion(slot_array, policy.slot_size * n);
+
+ std::allocator<char> alloc;
+ Deallocate<AlignOfSlot>(&alloc, ctrl,
+ AllocSize(n, policy.slot_size, AlignOfSlot));
+}
+
+// Type-erased version of raw_hash_set::drop_deletes_without_resize.
+void DropDeletesWithoutResize(CommonFields& common, size_t& growth_left,
+ const PolicyFunctions& policy, void* tmp_space);
+
// A SwissTable.
//
// Policy: a policy defines how to perform different operations on
@@ -978,7 +1123,7 @@ inline size_t AllocSize(size_t capacity, size_t slot_size, size_t slot_align) {
// the storage of the hashtable will be allocated and the elements will be
// constructed and destroyed.
template <class Policy, class Hash, class Eq, class Alloc>
-class raw_hash_set {
+class raw_hash_set : private CommonFields {
using PolicyTraits = hash_policy_traits<Policy>;
using KeyArgImpl =
KeyArg<IsTransparent<Eq>::value && IsTransparent<Hash>::value>;
@@ -1181,14 +1326,13 @@ class raw_hash_set {
std::is_nothrow_default_constructible<key_equal>::value&&
std::is_nothrow_default_constructible<allocator_type>::value) {}
- explicit raw_hash_set(size_t bucket_count,
- const hasher& hash = hasher(),
- const key_equal& eq = key_equal(),
- const allocator_type& alloc = allocator_type())
- : ctrl_(EmptyGroup()),
- settings_(0u, HashtablezInfoHandle(), hash, eq, alloc) {
+ ABSL_ATTRIBUTE_NOINLINE explicit raw_hash_set(
+ size_t bucket_count, const hasher& hash = hasher(),
+ const key_equal& eq = key_equal(),
+ const allocator_type& alloc = allocator_type())
+ : settings_(0u, hash, eq, alloc) {
if (bucket_count) {
- capacity_ = NormalizeCapacity(bucket_count);
+ common().capacity_ = NormalizeCapacity(bucket_count);
initialize_slots();
}
}
@@ -1295,47 +1439,31 @@ class raw_hash_set {
// than a full `insert`.
for (const auto& v : that) {
const size_t hash = PolicyTraits::apply(HashElement{hash_ref()}, v);
- auto target = find_first_non_full(ctrl_, hash, capacity_);
- SetCtrl(target.offset, H2(hash), capacity_, ctrl_, slots_,
- sizeof(slot_type));
+ auto target = find_first_non_full_outofline(common(), hash);
+ SetCtrl(common(), target.offset, H2(hash), sizeof(slot_type));
emplace_at(target.offset, v);
infoz().RecordInsert(hash, target.probe_length);
}
- size_ = that.size();
+ common().size_ = that.size();
growth_left() -= that.size();
}
- raw_hash_set(raw_hash_set&& that) noexcept(
+ ABSL_ATTRIBUTE_NOINLINE raw_hash_set(raw_hash_set&& that) noexcept(
std::is_nothrow_copy_constructible<hasher>::value&&
std::is_nothrow_copy_constructible<key_equal>::value&&
std::is_nothrow_copy_constructible<allocator_type>::value)
- : ctrl_(absl::exchange(that.ctrl_, EmptyGroup())),
- slots_(absl::exchange(that.slots_, nullptr)),
- size_(absl::exchange(that.size_, size_t{0})),
- capacity_(absl::exchange(that.capacity_, size_t{0})),
- // Hash, equality and allocator are copied instead of moved because
- // `that` must be left valid. If Hash is std::function<Key>, moving it
- // would create a nullptr functor that cannot be called.
+ : // Hash, equality and allocator are copied instead of moved because
+ // `that` must be left valid. If Hash is std::function<Key>, moving it
+ // would create a nullptr functor that cannot be called.
+ CommonFields(std::move(that)),
settings_(absl::exchange(that.growth_left(), size_t{0}),
- absl::exchange(that.infoz(), HashtablezInfoHandle()),
- that.hash_ref(),
- that.eq_ref(),
- that.alloc_ref()) {}
+ that.hash_ref(), that.eq_ref(), that.alloc_ref()) {}
raw_hash_set(raw_hash_set&& that, const allocator_type& a)
- : ctrl_(EmptyGroup()),
- slots_(nullptr),
- size_(0),
- capacity_(0),
- settings_(0, HashtablezInfoHandle(), that.hash_ref(), that.eq_ref(),
- a) {
+ : settings_(0, that.hash_ref(), that.eq_ref(), a) {
if (a == that.alloc_ref()) {
- std::swap(ctrl_, that.ctrl_);
- std::swap(slots_, that.slots_);
- std::swap(size_, that.size_);
- std::swap(capacity_, that.capacity_);
+ std::swap(common(), that.common());
std::swap(growth_left(), that.growth_left());
- std::swap(infoz(), that.infoz());
} else {
reserve(that.size());
// Note: this will copy elements of dense_set and unordered_set instead of
@@ -1364,7 +1492,19 @@ class raw_hash_set {
typename AllocTraits::propagate_on_container_move_assignment());
}
- ~raw_hash_set() { destroy_slots(/*reset=*/false); }
+ ~raw_hash_set() {
+ const size_t cap = capacity();
+ if (!cap) return;
+ destroy_slots();
+
+ // Unpoison before returning the memory to the allocator.
+ SanitizerUnpoisonMemoryRegion(slot_array(), sizeof(slot_type) * cap);
+ Deallocate<alignof(slot_type)>(
+ &alloc_ref(), control(),
+ AllocSize(cap, sizeof(slot_type), alignof(slot_type)));
+
+ infoz().Unregister();
+ }
iterator begin() {
auto it = iterator_at(0);
@@ -1381,8 +1521,8 @@ class raw_hash_set {
const_iterator cend() const { return end(); }
bool empty() const { return !size(); }
- size_t size() const { return size_; }
- size_t capacity() const { return capacity_; }
+ size_t size() const { return common().size_; }
+ size_t capacity() const { return common().capacity_; }
size_t max_size() const { return (std::numeric_limits<size_t>::max)(); }
ABSL_ATTRIBUTE_REINITIALIZES void clear() {
@@ -1393,22 +1533,25 @@ class raw_hash_set {
// compared to destruction of the elements of the container. So we pick the
// largest bucket_count() threshold for which iteration is still fast and
// past that we simply deallocate the array.
- if (capacity_ > 127) {
- destroy_slots(/*reset=*/true);
-
- infoz().RecordClearedReservation();
- } else if (capacity_) {
- for (size_t i = 0; i != capacity_; ++i) {
- if (IsFull(ctrl_[i])) {
- PolicyTraits::destroy(&alloc_ref(), slots_ + i);
- }
+ const size_t cap = capacity();
+ if (cap == 0) {
+ // Already guaranteed to be empty; so nothing to do.
+ } else {
+ destroy_slots();
+ ClearBackingArray(common(), growth_left(), kPolicyFunctions,
+ /*reuse=*/cap < 128);
+ }
+ }
+
+ inline void destroy_slots() {
+ const size_t cap = capacity();
+ const ctrl_t* ctrl = control();
+ slot_type* slot = slot_array();
+ for (size_t i = 0; i != cap; ++i) {
+ if (IsFull(ctrl[i])) {
+ PolicyTraits::destroy(&alloc_ref(), slot + i);
}
- size_ = 0;
- ResetCtrl(capacity_, ctrl_, slots_, sizeof(slot_type));
- reset_growth_left();
}
- assert(empty());
- infoz().RecordStorageChanged(0, capacity_);
}
// This overload kicks in when the argument is an rvalue of insertable and
@@ -1596,7 +1739,7 @@ class raw_hash_set {
iterator lazy_emplace(const key_arg<K>& key, F&& f) {
auto res = find_or_prepare_insert(key);
if (res.second) {
- slot_type* slot = slots_ + res.first;
+ slot_type* slot = slot_array() + res.first;
std::forward<F>(f)(constructor(&alloc_ref(), &slot));
assert(!slot);
}
@@ -1692,24 +1835,19 @@ class raw_hash_set {
IsNoThrowSwappable<allocator_type>(
typename AllocTraits::propagate_on_container_swap{})) {
using std::swap;
- swap(ctrl_, that.ctrl_);
- swap(slots_, that.slots_);
- swap(size_, that.size_);
- swap(capacity_, that.capacity_);
+ swap(common(), that.common());
swap(growth_left(), that.growth_left());
swap(hash_ref(), that.hash_ref());
swap(eq_ref(), that.eq_ref());
- swap(infoz(), that.infoz());
SwapAlloc(alloc_ref(), that.alloc_ref(),
typename AllocTraits::propagate_on_container_swap{});
}
void rehash(size_t n) {
- if (n == 0 && capacity_ == 0) return;
- if (n == 0 && size_ == 0) {
- destroy_slots(/*reset=*/true);
- infoz().RecordStorageChanged(0, 0);
- infoz().RecordClearedReservation();
+ if (n == 0 && capacity() == 0) return;
+ if (n == 0 && size() == 0) {
+ ClearBackingArray(common(), growth_left(), kPolicyFunctions,
+ /*reuse=*/false);
return;
}
@@ -1717,7 +1855,7 @@ class raw_hash_set {
// power-of-2-minus-1, so bitor is good enough.
auto m = NormalizeCapacity(n | GrowthToLowerboundCapacity(size()));
// n == 0 unconditionally rehashes as per the standard.
- if (n == 0 || m > capacity_) {
+ if (n == 0 || m > capacity()) {
resize(m);
// This is after resize, to ensure that we have completed the allocation
@@ -1762,9 +1900,9 @@ class raw_hash_set {
// Avoid probing if we won't be able to prefetch the addresses received.
#ifdef ABSL_INTERNAL_HAVE_PREFETCH
prefetch_heap_block();
- auto seq = probe(ctrl_, hash_ref()(key), capacity_);
- base_internal::PrefetchT0(ctrl_ + seq.offset());
- base_internal::PrefetchT0(slots_ + seq.offset());
+ auto seq = probe(common(), hash_ref()(key));
+ base_internal::PrefetchT0(control() + seq.offset());
+ base_internal::PrefetchT0(slot_array() + seq.offset());
#endif // ABSL_INTERNAL_HAVE_PREFETCH
}
@@ -1777,18 +1915,20 @@ class raw_hash_set {
// called heterogeneous key support.
template <class K = key_type>
iterator find(const key_arg<K>& key, size_t hash) {
- auto seq = probe(ctrl_, hash, capacity_);
+ auto seq = probe(common(), hash);
+ slot_type* slot_ptr = slot_array();
+ const ctrl_t* ctrl = control();
while (true) {
- Group g{ctrl_ + seq.offset()};
+ Group g{ctrl + seq.offset()};
for (uint32_t i : g.Match(H2(hash))) {
if (ABSL_PREDICT_TRUE(PolicyTraits::apply(
EqualElement<K>{key, eq_ref()},
- PolicyTraits::element(slots_ + seq.offset(i)))))
+ PolicyTraits::element(slot_ptr + seq.offset(i)))))
return iterator_at(seq.offset(i));
}
if (ABSL_PREDICT_TRUE(g.MaskEmpty())) return end();
seq.next();
- assert(seq.index() <= capacity_ && "full table!");
+ assert(seq.index() <= capacity() && "full table!");
}
}
template <class K = key_type>
@@ -1826,9 +1966,9 @@ class raw_hash_set {
return {it, it};
}
- size_t bucket_count() const { return capacity_; }
+ size_t bucket_count() const { return capacity(); }
float load_factor() const {
- return capacity_ ? static_cast<double>(size()) / capacity_ : 0.0;
+ return capacity() ? static_cast<double>(size()) / capacity() : 0.0;
}
float max_load_factor() const { return 1.0f; }
void max_load_factor(float) {
@@ -1915,7 +2055,8 @@ class raw_hash_set {
std::pair<iterator, bool> operator()(const K& key, Args&&...) && {
auto res = s.find_or_prepare_insert(key);
if (res.second) {
- PolicyTraits::transfer(&s.alloc_ref(), s.slots_ + res.first, &slot);
+ PolicyTraits::transfer(&s.alloc_ref(), s.slot_array() + res.first,
+ &slot);
} else if (do_destroy) {
PolicyTraits::destroy(&s.alloc_ref(), &slot);
}
@@ -1931,104 +2072,43 @@ class raw_hash_set {
// This merely updates the pertinent control byte. This can be used in
// conjunction with Policy::transfer to move the object to another place.
void erase_meta_only(const_iterator it) {
- assert(IsFull(*it.inner_.ctrl_) && "erasing a dangling iterator");
- --size_;
- const size_t index = static_cast<size_t>(it.inner_.ctrl_ - ctrl_);
- const size_t index_before = (index - Group::kWidth) & capacity_;
- const auto empty_after = Group(it.inner_.ctrl_).MaskEmpty();
- const auto empty_before = Group(ctrl_ + index_before).MaskEmpty();
-
- // We count how many consecutive non empties we have to the right and to the
- // left of `it`. If the sum is >= kWidth then there is at least one probe
- // window that might have seen a full group.
- bool was_never_full =
- empty_before && empty_after &&
- static_cast<size_t>(empty_after.TrailingZeros() +
- empty_before.LeadingZeros()) < Group::kWidth;
-
- SetCtrl(index, was_never_full ? ctrl_t::kEmpty : ctrl_t::kDeleted,
- capacity_, ctrl_, slots_, sizeof(slot_type));
- growth_left() += was_never_full;
- infoz().RecordErase();
+ EraseMetaOnly(common(), growth_left(), it.inner_.ctrl_, sizeof(slot_type));
}
// Allocates a backing array for `self` and initializes its control bytes.
- // This reads `capacity_` and updates all other fields based on the result of
+ // This reads `capacity` and updates all other fields based on the result of
// the allocation.
//
- // This does not free the currently held array; `capacity_` must be nonzero.
- void initialize_slots() {
- assert(capacity_);
- // Folks with custom allocators often make unwarranted assumptions about the
- // behavior of their classes vis-a-vis trivial destructability and what
- // calls they will or wont make. Avoid sampling for people with custom
- // allocators to get us out of this mess. This is not a hard guarantee but
- // a workaround while we plan the exact guarantee we want to provide.
- //
+ // This does not free the currently held array; `capacity` must be nonzero.
+ inline void initialize_slots() {
// People are often sloppy with the exact type of their allocator (sometimes
// it has an extra const or is missing the pair, but rebinds made it work
- // anyway). To avoid the ambiguity, we work off SlotAlloc which we have
- // bound more carefully.
- if (std::is_same<SlotAlloc, std::allocator<slot_type>>::value &&
- slots_ == nullptr) {
- infoz() = Sample(sizeof(slot_type));
- }
-
- char* mem = static_cast<char*>(Allocate<alignof(slot_type)>(
- &alloc_ref(),
- AllocSize(capacity_, sizeof(slot_type), alignof(slot_type))));
- ctrl_ = reinterpret_cast<ctrl_t*>(mem);
- slots_ = reinterpret_cast<slot_type*>(
- mem + SlotOffset(capacity_, alignof(slot_type)));
- ResetCtrl(capacity_, ctrl_, slots_, sizeof(slot_type));
- reset_growth_left();
- infoz().RecordStorageChanged(size_, capacity_);
- }
-
- // Destroys all slots in the backing array, frees the backing array,
- // If reset is true, also clears all top-level book-keeping data.
- //
- // This essentially implements `map = raw_hash_set();`.
- void destroy_slots(bool reset) {
- if (!capacity_) return;
- for (size_t i = 0; i != capacity_; ++i) {
- if (IsFull(ctrl_[i])) {
- PolicyTraits::destroy(&alloc_ref(), slots_ + i);
- }
- }
-
- // Unpoison before returning the memory to the allocator.
- SanitizerUnpoisonMemoryRegion(slots_, sizeof(slot_type) * capacity_);
- Deallocate<alignof(slot_type)>(
- &alloc_ref(), ctrl_,
- AllocSize(capacity_, sizeof(slot_type), alignof(slot_type)));
- if (reset) {
- ctrl_ = EmptyGroup();
- slots_ = nullptr;
- size_ = 0;
- capacity_ = 0;
- growth_left() = 0;
- }
+ // anyway).
+ using CharAlloc =
+ typename absl::allocator_traits<Alloc>::template rebind_alloc<char>;
+ InitializeSlots<CharAlloc, sizeof(slot_type), alignof(slot_type)>(
+ common(), growth_left(), CharAlloc(alloc_ref()));
}
- void resize(size_t new_capacity) {
+ ABSL_ATTRIBUTE_NOINLINE void resize(size_t new_capacity) {
assert(IsValidCapacity(new_capacity));
- auto* old_ctrl = ctrl_;
- auto* old_slots = slots_;
- const size_t old_capacity = capacity_;
- capacity_ = new_capacity;
+ auto* old_ctrl = control();
+ auto* old_slots = slot_array();
+ const size_t old_capacity = common().capacity_;
+ common().capacity_ = new_capacity;
initialize_slots();
+ auto* new_slots = slot_array();
size_t total_probe_length = 0;
for (size_t i = 0; i != old_capacity; ++i) {
if (IsFull(old_ctrl[i])) {
size_t hash = PolicyTraits::apply(HashElement{hash_ref()},
PolicyTraits::element(old_slots + i));
- auto target = find_first_non_full(ctrl_, hash, capacity_);
+ auto target = find_first_non_full(common(), hash);
size_t new_i = target.offset;
total_probe_length += target.probe_length;
- SetCtrl(new_i, H2(hash), capacity_, ctrl_, slots_, sizeof(slot_type));
- PolicyTraits::transfer(&alloc_ref(), slots_ + new_i, old_slots + i);
+ SetCtrl(common(), new_i, H2(hash), sizeof(slot_type));
+ PolicyTraits::transfer(&alloc_ref(), new_slots + new_i, old_slots + i);
}
}
if (old_capacity) {
@@ -2044,70 +2124,10 @@ class raw_hash_set {
// Prunes control bytes to remove as many tombstones as possible.
//
// See the comment on `rehash_and_grow_if_necessary()`.
- void drop_deletes_without_resize() ABSL_ATTRIBUTE_NOINLINE {
- assert(IsValidCapacity(capacity_));
- assert(!is_small(capacity_));
- // Algorithm:
- // - mark all DELETED slots as EMPTY
- // - mark all FULL slots as DELETED
- // - for each slot marked as DELETED
- // hash = Hash(element)
- // target = find_first_non_full(hash)
- // if target is in the same group
- // mark slot as FULL
- // else if target is EMPTY
- // transfer element to target
- // mark slot as EMPTY
- // mark target as FULL
- // else if target is DELETED
- // swap current element with target element
- // mark target as FULL
- // repeat procedure for current slot with moved from element (target)
- ConvertDeletedToEmptyAndFullToDeleted(ctrl_, capacity_);
- alignas(slot_type) unsigned char raw[sizeof(slot_type)];
- size_t total_probe_length = 0;
- slot_type* slot = reinterpret_cast<slot_type*>(&raw);
- for (size_t i = 0; i != capacity_; ++i) {
- if (!IsDeleted(ctrl_[i])) continue;
- const size_t hash = PolicyTraits::apply(
- HashElement{hash_ref()}, PolicyTraits::element(slots_ + i));
- const FindInfo target = find_first_non_full(ctrl_, hash, capacity_);
- const size_t new_i = target.offset;
- total_probe_length += target.probe_length;
-
- // Verify if the old and new i fall within the same group wrt the hash.
- // If they do, we don't need to move the object as it falls already in the
- // best probe we can.
- const size_t probe_offset = probe(ctrl_, hash, capacity_).offset();
- const auto probe_index = [probe_offset, this](size_t pos) {
- return ((pos - probe_offset) & capacity_) / Group::kWidth;
- };
-
- // Element doesn't move.
- if (ABSL_PREDICT_TRUE(probe_index(new_i) == probe_index(i))) {
- SetCtrl(i, H2(hash), capacity_, ctrl_, slots_, sizeof(slot_type));
- continue;
- }
- if (IsEmpty(ctrl_[new_i])) {
- // Transfer element to the empty spot.
- // SetCtrl poisons/unpoisons the slots so we have to call it at the
- // right time.
- SetCtrl(new_i, H2(hash), capacity_, ctrl_, slots_, sizeof(slot_type));
- PolicyTraits::transfer(&alloc_ref(), slots_ + new_i, slots_ + i);
- SetCtrl(i, ctrl_t::kEmpty, capacity_, ctrl_, slots_, sizeof(slot_type));
- } else {
- assert(IsDeleted(ctrl_[new_i]));
- SetCtrl(new_i, H2(hash), capacity_, ctrl_, slots_, sizeof(slot_type));
- // Until we are done rehashing, DELETED marks previously FULL slots.
- // Swap i and new_i elements.
- PolicyTraits::transfer(&alloc_ref(), slot, slots_ + i);
- PolicyTraits::transfer(&alloc_ref(), slots_ + i, slots_ + new_i);
- PolicyTraits::transfer(&alloc_ref(), slots_ + new_i, slot);
- --i; // repeat
- }
- }
- reset_growth_left();
- infoz().RecordRehash(total_probe_length);
+ inline void drop_deletes_without_resize() {
+ // Stack-allocate space for swapping elements.
+ alignas(slot_type) unsigned char tmp[sizeof(slot_type)];
+ DropDeletesWithoutResize(common(), growth_left(), kPolicyFunctions, tmp);
}
// Called whenever the table *might* need to conditionally grow.
@@ -2116,14 +2136,13 @@ class raw_hash_set {
// growth is unnecessary, because vacating tombstones is beneficial for
// performance in the long-run.
void rehash_and_grow_if_necessary() {
- if (capacity_ == 0) {
- resize(1);
- } else if (capacity_ > Group::kWidth &&
- // Do these calcuations in 64-bit to avoid overflow.
- size() * uint64_t{32} <= capacity_ * uint64_t{25}) {
+ const size_t cap = capacity();
+ if (cap > Group::kWidth &&
+ // Do these calcuations in 64-bit to avoid overflow.
+ size() * uint64_t{32} <= cap* uint64_t{25}) {
// Squash DELETED without growing if there is enough capacity.
//
- // Rehash in place if the current size is <= 25/32 of capacity_.
+ // Rehash in place if the current size is <= 25/32 of capacity.
// Rationale for such a high factor: 1) drop_deletes_without_resize() is
// faster than resize, and 2) it takes quite a bit of work to add
// tombstones. In the worst case, seems to take approximately 4
@@ -2141,8 +2160,8 @@ class raw_hash_set {
//
// Here is output of an experiment using the BM_CacheInSteadyState
// benchmark running the old case (where we rehash-in-place only if we can
- // reclaim at least 7/16*capacity_) vs. this code (which rehashes in place
- // if we can recover 3/32*capacity_).
+ // reclaim at least 7/16*capacity) vs. this code (which rehashes in place
+ // if we can recover 3/32*capacity).
//
// Note that although in the worst-case number of rehashes jumped up from
// 15 to 190, but the number of operations per second is almost the same.
@@ -2165,23 +2184,24 @@ class raw_hash_set {
drop_deletes_without_resize();
} else {
// Otherwise grow the container.
- resize(capacity_ * 2 + 1);
+ resize(cap * 2 + 1);
}
}
bool has_element(const value_type& elem) const {
size_t hash = PolicyTraits::apply(HashElement{hash_ref()}, elem);
- auto seq = probe(ctrl_, hash, capacity_);
+ auto seq = probe(common(), hash);
+ const ctrl_t* ctrl = control();
while (true) {
- Group g{ctrl_ + seq.offset()};
+ Group g{ctrl + seq.offset()};
for (uint32_t i : g.Match(H2(hash))) {
- if (ABSL_PREDICT_TRUE(PolicyTraits::element(slots_ + seq.offset(i)) ==
- elem))
+ if (ABSL_PREDICT_TRUE(
+ PolicyTraits::element(slot_array() + seq.offset(i)) == elem))
return true;
}
if (ABSL_PREDICT_TRUE(g.MaskEmpty())) return false;
seq.next();
- assert(seq.index() <= capacity_ && "full table!");
+ assert(seq.index() <= capacity() && "full table!");
}
return false;
}
@@ -2206,18 +2226,19 @@ class raw_hash_set {
std::pair<size_t, bool> find_or_prepare_insert(const K& key) {
prefetch_heap_block();
auto hash = hash_ref()(key);
- auto seq = probe(ctrl_, hash, capacity_);
+ auto seq = probe(common(), hash);
+ const ctrl_t* ctrl = control();
while (true) {
- Group g{ctrl_ + seq.offset()};
+ Group g{ctrl + seq.offset()};
for (uint32_t i : g.Match(H2(hash))) {
if (ABSL_PREDICT_TRUE(PolicyTraits::apply(
EqualElement<K>{key, eq_ref()},
- PolicyTraits::element(slots_ + seq.offset(i)))))
+ PolicyTraits::element(slot_array() + seq.offset(i)))))
return {seq.offset(i), false};
}
if (ABSL_PREDICT_TRUE(g.MaskEmpty())) break;
seq.next();
- assert(seq.index() <= capacity_ && "full table!");
+ assert(seq.index() <= capacity() && "full table!");
}
return {prepare_insert(hash), true};
}
@@ -2227,16 +2248,15 @@ class raw_hash_set {
//
// REQUIRES: At least one non-full slot available.
size_t prepare_insert(size_t hash) ABSL_ATTRIBUTE_NOINLINE {
- auto target = find_first_non_full(ctrl_, hash, capacity_);
+ auto target = find_first_non_full(common(), hash);
if (ABSL_PREDICT_FALSE(growth_left() == 0 &&
- !IsDeleted(ctrl_[target.offset]))) {
+ !IsDeleted(control()[target.offset]))) {
rehash_and_grow_if_necessary();
- target = find_first_non_full(ctrl_, hash, capacity_);
+ target = find_first_non_full(common(), hash);
}
- ++size_;
- growth_left() -= IsEmpty(ctrl_[target.offset]);
- SetCtrl(target.offset, H2(hash), capacity_, ctrl_, slots_,
- sizeof(slot_type));
+ ++common().size_;
+ growth_left() -= IsEmpty(control()[target.offset]);
+ SetCtrl(common(), target.offset, H2(hash), sizeof(slot_type));
infoz().RecordInsert(hash, target.probe_length);
return target.offset;
}
@@ -2251,7 +2271,7 @@ class raw_hash_set {
// POSTCONDITION: *m.iterator_at(i) == value_type(forward<Args>(args)...).
template <class... Args>
void emplace_at(size_t i, Args&&... args) {
- PolicyTraits::construct(&alloc_ref(), slots_ + i,
+ PolicyTraits::construct(&alloc_ref(), slot_array() + i,
std::forward<Args>(args)...);
assert(PolicyTraits::apply(FindElement{*this}, *iterator_at(i)) ==
@@ -2259,16 +2279,14 @@ class raw_hash_set {
"constructed value does not match the lookup key");
}
- iterator iterator_at(size_t i) { return {ctrl_ + i, slots_ + i}; }
- const_iterator iterator_at(size_t i) const { return {ctrl_ + i, slots_ + i}; }
+ iterator iterator_at(size_t i) { return {control() + i, slot_array() + i}; }
+ const_iterator iterator_at(size_t i) const {
+ return {control() + i, slot_array() + i};
+ }
private:
friend struct RawHashSetTestOnlyAccess;
- void reset_growth_left() {
- growth_left() = CapacityToGrowth(capacity()) - size_;
- }
-
// The number of slots we can still fill without needing to rehash.
//
// This is stored separately due to tombstones: we do not include tombstones
@@ -2284,46 +2302,76 @@ class raw_hash_set {
// Prefetch the heap-allocated memory region to resolve potential TLB misses.
// This is intended to overlap with execution of calculating the hash for a
// key.
- void prefetch_heap_block() const {
- base_internal::PrefetchT2(ctrl_);
- }
+ void prefetch_heap_block() const { base_internal::PrefetchT2(control()); }
+
+ CommonFields& common() { return *this; }
+ const CommonFields& common() const { return *this; }
- HashtablezInfoHandle& infoz() { return settings_.template get<1>(); }
+ ctrl_t* control() const { return common().control_; }
+ slot_type* slot_array() const {
+ return static_cast<slot_type*>(common().slots_);
+ }
+ HashtablezInfoHandle& infoz() { return common().infoz(); }
- hasher& hash_ref() { return settings_.template get<2>(); }
- const hasher& hash_ref() const { return settings_.template get<2>(); }
- key_equal& eq_ref() { return settings_.template get<3>(); }
- const key_equal& eq_ref() const { return settings_.template get<3>(); }
- allocator_type& alloc_ref() { return settings_.template get<4>(); }
+ hasher& hash_ref() { return settings_.template get<1>(); }
+ const hasher& hash_ref() const { return settings_.template get<1>(); }
+ key_equal& eq_ref() { return settings_.template get<2>(); }
+ const key_equal& eq_ref() const { return settings_.template get<2>(); }
+ allocator_type& alloc_ref() { return settings_.template get<3>(); }
const allocator_type& alloc_ref() const {
- return settings_.template get<4>();
+ return settings_.template get<3>();
}
- // TODO(alkis): Investigate removing some of these fields:
- // - ctrl/slots can be derived from each other
- // - size can be moved into the slot array
+ // Make type-specific functions for this type's PolicyFunctions struct.
+ static size_t hash_slot_fn(void* set, void* slot) {
+ auto* h = static_cast<raw_hash_set*>(set);
+ return PolicyTraits::apply(
+ HashElement{h->hash_ref()},
+ PolicyTraits::element(static_cast<slot_type*>(slot)));
+ }
+ static void transfer_slot_fn(void* set, void* dst, void* src) {
+ auto* h = static_cast<raw_hash_set*>(set);
+ PolicyTraits::transfer(&h->alloc_ref(), static_cast<slot_type*>(dst),
+ static_cast<slot_type*>(src));
+ }
+ // Note: dealloc_fn will only be used if we have a non-standard allocator.
+ static void dealloc_fn(void* set, const PolicyFunctions&, ctrl_t* ctrl,
+ void* slot_mem, size_t n) {
+ auto* h = static_cast<raw_hash_set*>(set);
- // The control bytes (and, also, a pointer to the base of the backing array).
- //
- // This contains `capacity_ + 1 + NumClonedBytes()` entries, even
- // when the table is empty (hence EmptyGroup).
- ctrl_t* ctrl_ = EmptyGroup();
- // The beginning of the slots, located at `SlotOffset()` bytes after
- // `ctrl_`. May be null for empty tables.
- slot_type* slots_ = nullptr;
+ // Unpoison before returning the memory to the allocator.
+ SanitizerUnpoisonMemoryRegion(slot_mem, sizeof(slot_type) * n);
- // The number of filled slots.
- size_t size_ = 0;
+ Deallocate<alignof(slot_type)>(
+ &h->alloc_ref(), ctrl,
+ AllocSize(n, sizeof(slot_type), alignof(slot_type)));
+ }
+
+ static constexpr PolicyFunctions kPolicyFunctions = {
+ sizeof(slot_type),
+ alignof(slot_type),
+ &raw_hash_set::hash_slot_fn,
+ &raw_hash_set::transfer_slot_fn,
+ (std::is_same<SlotAlloc, std::allocator<slot_type>>::value
+ ? &DeallocateStandard<alignof(slot_type)>
+ : &raw_hash_set::dealloc_fn),
+ };
- // The total number of available slots.
- size_t capacity_ = 0;
- absl::container_internal::CompressedTuple<size_t /* growth_left */,
- HashtablezInfoHandle, hasher,
+ // Bundle together growth_left (number of slots that can be filled without
+ // rehashing) plus other objects which might be empty. CompressedTuple will
+ // ensure that sizeof is not affected by any of the empty fields that occur
+ // after growth_left.
+ absl::container_internal::CompressedTuple<size_t /* growth_left */, hasher,
key_equal, allocator_type>
- settings_{0u, HashtablezInfoHandle{}, hasher{}, key_equal{},
- allocator_type{}};
+ settings_{0u, hasher{}, key_equal{}, allocator_type{}};
};
+#ifdef ABSL_INTERNAL_NEED_REDUNDANT_CONSTEXPR_DECL
+template <class Policy, class Hash, class Eq, class Alloc>
+constexpr PolicyFunctions
+ raw_hash_set<Policy, Hash, Eq, Alloc>::kPolicyFunctions;
+#endif
+
// Erases all elements that satisfy the predicate `pred` from the container `c`.
template <typename P, typename H, typename E, typename A, typename Predicate>
typename raw_hash_set<P, H, E, A>::size_type EraseIf(
@@ -2349,14 +2397,15 @@ struct HashtableDebugAccess<Set, absl::void_t<typename Set::raw_hash_set>> {
const typename Set::key_type& key) {
size_t num_probes = 0;
size_t hash = set.hash_ref()(key);
- auto seq = probe(set.ctrl_, hash, set.capacity_);
+ auto seq = probe(set.common(), hash);
+ const ctrl_t* ctrl = set.control();
while (true) {
- container_internal::Group g{set.ctrl_ + seq.offset()};
+ container_internal::Group g{ctrl + seq.offset()};
for (uint32_t i : g.Match(container_internal::H2(hash))) {
if (Traits::apply(
typename Set::template EqualElement<typename Set::key_type>{
key, set.eq_ref()},
- Traits::element(set.slots_ + seq.offset(i))))
+ Traits::element(set.slot_array() + seq.offset(i))))
return num_probes;
++num_probes;
}
@@ -2367,7 +2416,7 @@ struct HashtableDebugAccess<Set, absl::void_t<typename Set::raw_hash_set>> {
}
static size_t AllocatedByteSize(const Set& c) {
- size_t capacity = c.capacity_;
+ size_t capacity = c.capacity();
if (capacity == 0) return 0;
size_t m = AllocSize(capacity, sizeof(Slot), alignof(Slot));
@@ -2375,9 +2424,10 @@ struct HashtableDebugAccess<Set, absl::void_t<typename Set::raw_hash_set>> {
if (per_slot != ~size_t{}) {
m += per_slot * c.size();
} else {
+ const ctrl_t* ctrl = c.control();
for (size_t i = 0; i != capacity; ++i) {
- if (container_internal::IsFull(c.ctrl_[i])) {
- m += Traits::space_used(c.slots_ + i);
+ if (container_internal::IsFull(ctrl[i])) {
+ m += Traits::space_used(c.slot_array() + i);
}
}
}
diff --git a/absl/container/internal/raw_hash_set_benchmark.cc b/absl/container/internal/raw_hash_set_benchmark.cc
index e17ba9b4..15deddcf 100644
--- a/absl/container/internal/raw_hash_set_benchmark.cc
+++ b/absl/container/internal/raw_hash_set_benchmark.cc
@@ -477,6 +477,24 @@ void BM_DropDeletes(benchmark::State& state) {
}
BENCHMARK(BM_DropDeletes);
+void BM_Resize(benchmark::State& state) {
+ // For now just measure a small cheap hash table since we
+ // are mostly interested in the overhead of type-erasure
+ // in resize().
+ constexpr int kElements = 64;
+ const int kCapacity = kElements * 2;
+
+ IntTable table;
+ for (int i = 0; i < kElements; i++) {
+ table.insert(i);
+ }
+ for (auto unused : state) {
+ table.rehash(0);
+ table.rehash(kCapacity);
+ }
+}
+BENCHMARK(BM_Resize);
+
} // namespace
} // namespace container_internal
ABSL_NAMESPACE_END
diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc
index daa32814..74351c09 100644
--- a/absl/container/internal/raw_hash_set_test.cc
+++ b/absl/container/internal/raw_hash_set_test.cc
@@ -56,8 +56,8 @@ namespace container_internal {
struct RawHashSetTestOnlyAccess {
template <typename C>
- static auto GetSlots(const C& c) -> decltype(c.slots_) {
- return c.slots_;
+ static auto GetSlots(const C& c) -> decltype(c.slot_array()) {
+ return c.slot_array();
}
};
@@ -455,12 +455,12 @@ TEST(Table, EmptyFunctorOptimization) {
static_assert(std::is_empty<std::allocator<int>>::value, "");
struct MockTable {
+ void* infoz;
void* ctrl;
void* slots;
size_t size;
size_t capacity;
size_t growth_left;
- void* infoz;
};
struct MockTableInfozDisabled {
void* ctrl;