summaryrefslogtreecommitdiff
path: root/absl/container/internal/raw_hash_set.cc
diff options
context:
space:
mode:
authorGravatar Abseil Team <absl-team@google.com>2022-11-28 12:27:06 -0800
committerGravatar Copybara-Service <copybara-worker@google.com>2022-11-28 12:27:43 -0800
commite5a7979d36a84831652abf3138c4d76797c002b5 (patch)
treee7dc92ce03b1b04428b588f33bddf657ea97f554 /absl/container/internal/raw_hash_set.cc
parente3158086978de4ff15a2058f49ba525c89e14ce6 (diff)
Reduce flat_hash_{set,map} generated code size.
This CL makes a bunch of changes (mostly to raw_hash_set which underlies flat_hash_set and flat_hash_map). Techniques used: * Extract code that does not depend on the specific hash table type into common (non-inlined) functions. * Place ABSL_ATTRIBUTE_NOINLINE directives judiciously. * Out-of-line some slow paths. Reduces sizes of some large binaries by ~0.5%. Has no significant performance impact on a few performance critical binaries. ## Speed of fleetbench micro-benchmarks Following is a histogram of %-age changes in [fleetbench](https://github.com/google/fleetbench) hot_swissmap_benchmark results. Negative numbers indicate a speedup caused by this change. Statistically insignificant changes are mapped to zero. XXX Also run and merge in cold_swissmap_benchmark Across all 351 benchmarks, the average speedup is 0.38%. The best speedup was -25%, worst slowdown was +6.81%. ``` Count: 351 Average: -0.382764 StdDev: 3.77807 Min: -25 Median: 0.435135 Max: 6.81 --------------------------------------------- [ -25, -10) 16 4.558% 4.558% # [ -9, -8) 2 0.570% 5.128% [ -8, -7) 1 0.285% 5.413% [ -7, -6) 1 0.285% 5.698% [ -6, -5) 2 0.570% 6.268% [ -5, -4) 5 1.425% 7.692% [ -4, -3) 13 3.704% 11.396% # [ -3, -2) 15 4.274% 15.670% # [ -2, -1) 26 7.407% 23.077% ## [ -1, 0) 14 3.989% 27.066% # [ 0, 1) 185 52.707% 79.772% ############ [ 1, 2) 14 3.989% 83.761% # [ 2, 3) 8 2.279% 86.040% # [ 3, 4) 7 1.994% 88.034% [ 4, 5) 32 9.117% 97.151% ## [ 5, 6) 6 1.709% 98.860% [ 6, 7) 4 1.140% 100.000% ``` We looked at the slowdowns and they do not seem worth worrying about. E.g., the worst one was: ``` BM_FindHit_Hot<::absl::node_hash_set,64>/set_size:4096/density:0 2.61ns ± 1% 2.79ns ± 1% +6.81% (p=0.008 n=5+5) ``` ## Detailed changes * Out-of-line slow paths in hash table sampler methods. * Explicitly unregister from sampler instead of from destructor. * Introduced a non-templated CommonFields struct that holds some of the hash table fields (infoz, ctrl, slots, size, capacity). This struct can be passed to new non-templated helpers. The struct is a private base class of raw_hash_set. * Made non-inlined InitializeSlots<> that is only templated on allocator and size/alignment of the slot type so that we can share instantiations across types that have the same size/alignment. * Moved some infrequently called code paths into non-inlined type-erased. functions. Pass a suite of type-specific function pointers to these routines for when they need to operate on slots. * Marked some methods as non-inlined. * Avoid unnecessary reinitialization in destructor. * Introduce UpdateSpine type-erased helper that is called from clear() and rehash(). PiperOrigin-RevId: 491413386 Change-Id: Ia5495c5a6ec73622a785a0d260e406ddb9085a7c
Diffstat (limited to 'absl/container/internal/raw_hash_set.cc')
-rw-r--r--absl/container/internal/raw_hash_set.cc152
1 files changed, 150 insertions, 2 deletions
diff --git a/absl/container/internal/raw_hash_set.cc b/absl/container/internal/raw_hash_set.cc
index c63a2e02..fae12793 100644
--- a/absl/container/internal/raw_hash_set.cc
+++ b/absl/container/internal/raw_hash_set.cc
@@ -63,8 +63,156 @@ void ConvertDeletedToEmptyAndFullToDeleted(ctrl_t* ctrl, size_t capacity) {
std::memcpy(ctrl + capacity + 1, ctrl, NumClonedBytes());
ctrl[capacity] = ctrl_t::kSentinel;
}
-// Extern template instantiotion for inline function.
-template FindInfo find_first_non_full(const ctrl_t*, size_t, size_t);
+// Extern template instantiation for inline function.
+template FindInfo find_first_non_full(const CommonFields&, size_t);
+
+FindInfo find_first_non_full_outofline(const CommonFields& common,
+ size_t hash) {
+ return find_first_non_full(common, hash);
+}
+
+// Return address of the ith slot in slots where each slot occupies slot_size.
+static inline void* SlotAddress(void* slot_array, size_t slot,
+ size_t slot_size) {
+ return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(slot_array) +
+ (slot * slot_size));
+}
+
+// Return the address of the slot just after slot assuming each slot
+// has the specified size.
+static inline void* NextSlot(void* slot, size_t slot_size) {
+ return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(slot) + slot_size);
+}
+
+// Return the address of the slot just before slot assuming each slot
+// has the specified size.
+static inline void* PrevSlot(void* slot, size_t slot_size) {
+ return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(slot) - slot_size);
+}
+
+void DropDeletesWithoutResize(CommonFields& common, size_t& growth_left,
+ const PolicyFunctions& policy, void* tmp_space) {
+ void* set = &common;
+ void* slot_array = common.slots_;
+ const size_t capacity = common.capacity_;
+ assert(IsValidCapacity(capacity));
+ assert(!is_small(capacity));
+ // Algorithm:
+ // - mark all DELETED slots as EMPTY
+ // - mark all FULL slots as DELETED
+ // - for each slot marked as DELETED
+ // hash = Hash(element)
+ // target = find_first_non_full(hash)
+ // if target is in the same group
+ // mark slot as FULL
+ // else if target is EMPTY
+ // transfer element to target
+ // mark slot as EMPTY
+ // mark target as FULL
+ // else if target is DELETED
+ // swap current element with target element
+ // mark target as FULL
+ // repeat procedure for current slot with moved from element (target)
+ ctrl_t* ctrl = common.control_;
+ ConvertDeletedToEmptyAndFullToDeleted(ctrl, capacity);
+ auto hasher = policy.hash_slot;
+ auto transfer = policy.transfer;
+ const size_t slot_size = policy.slot_size;
+
+ size_t total_probe_length = 0;
+ void* slot_ptr = SlotAddress(slot_array, 0, slot_size);
+ for (size_t i = 0; i != capacity;
+ ++i, slot_ptr = NextSlot(slot_ptr, slot_size)) {
+ assert(slot_ptr == SlotAddress(slot_array, i, slot_size));
+ if (!IsDeleted(ctrl[i])) continue;
+ const size_t hash = (*hasher)(set, slot_ptr);
+ const FindInfo target = find_first_non_full(common, hash);
+ const size_t new_i = target.offset;
+ total_probe_length += target.probe_length;
+
+ // Verify if the old and new i fall within the same group wrt the hash.
+ // If they do, we don't need to move the object as it falls already in the
+ // best probe we can.
+ const size_t probe_offset = probe(common, hash).offset();
+ const auto probe_index = [probe_offset, capacity](size_t pos) {
+ return ((pos - probe_offset) & capacity) / Group::kWidth;
+ };
+
+ // Element doesn't move.
+ if (ABSL_PREDICT_TRUE(probe_index(new_i) == probe_index(i))) {
+ SetCtrl(common, i, H2(hash), slot_size);
+ continue;
+ }
+
+ void* new_slot_ptr = SlotAddress(slot_array, new_i, slot_size);
+ if (IsEmpty(ctrl[new_i])) {
+ // Transfer element to the empty spot.
+ // SetCtrl poisons/unpoisons the slots so we have to call it at the
+ // right time.
+ SetCtrl(common, new_i, H2(hash), slot_size);
+ (*transfer)(set, new_slot_ptr, slot_ptr);
+ SetCtrl(common, i, ctrl_t::kEmpty, slot_size);
+ } else {
+ assert(IsDeleted(ctrl[new_i]));
+ SetCtrl(common, new_i, H2(hash), slot_size);
+ // Until we are done rehashing, DELETED marks previously FULL slots.
+
+ // Swap i and new_i elements.
+ (*transfer)(set, tmp_space, new_slot_ptr);
+ (*transfer)(set, new_slot_ptr, slot_ptr);
+ (*transfer)(set, slot_ptr, tmp_space);
+
+ // repeat the processing of the ith slot
+ --i;
+ slot_ptr = PrevSlot(slot_ptr, slot_size);
+ }
+ }
+ ResetGrowthLeft(common, growth_left);
+ common.infoz().RecordRehash(total_probe_length);
+}
+
+void EraseMetaOnly(CommonFields& c, size_t& growth_left, ctrl_t* it,
+ size_t slot_size) {
+ assert(IsFull(*it) && "erasing a dangling iterator");
+ --c.size_;
+ const auto index = static_cast<size_t>(it - c.control_);
+ const size_t index_before = (index - Group::kWidth) & c.capacity_;
+ const auto empty_after = Group(it).MaskEmpty();
+ const auto empty_before = Group(c.control_ + index_before).MaskEmpty();
+
+ // We count how many consecutive non empties we have to the right and to the
+ // left of `it`. If the sum is >= kWidth then there is at least one probe
+ // window that might have seen a full group.
+ bool was_never_full =
+ empty_before && empty_after &&
+ static_cast<size_t>(empty_after.TrailingZeros() +
+ empty_before.LeadingZeros()) < Group::kWidth;
+
+ SetCtrl(c, index, was_never_full ? ctrl_t::kEmpty : ctrl_t::kDeleted,
+ slot_size);
+ growth_left += (was_never_full ? 1 : 0);
+ c.infoz().RecordErase();
+}
+
+void ClearBackingArray(CommonFields& c, size_t& growth_left,
+ const PolicyFunctions& policy, bool reuse) {
+ if (reuse) {
+ c.size_ = 0;
+ ResetCtrl(c, growth_left, policy.slot_size);
+ c.infoz().RecordStorageChanged(0, c.capacity_);
+ } else {
+ void* set = &c;
+ (*policy.dealloc)(set, policy, c.control_, c.slots_, c.capacity_);
+ c.control_ = EmptyGroup();
+ c.slots_ = nullptr;
+ c.size_ = 0;
+ c.capacity_ = 0;
+ growth_left = 0;
+ c.infoz().RecordClearedReservation();
+ assert(c.size_ == 0);
+ c.infoz().RecordStorageChanged(0, 0);
+ }
+}
} // namespace container_internal
ABSL_NAMESPACE_END