diff options
author | Abseil Team <absl-team@google.com> | 2024-02-07 15:26:28 -0800 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2024-02-07 15:27:13 -0800 |
commit | 0be9f99723eba44462245013d6a433c1ad9157ee (patch) | |
tree | 9de6a37384a7e9b28bd7f13fcf67f0e646d01839 /absl | |
parent | 3e59efa2ad1d1777257bd3b1845d5acc4a931687 (diff) |
Avoid hash computation and `Group::Match` in small tables copy and use `IterateOverFullSlots` for iterating for all tables.
PiperOrigin-RevId: 605116090
Change-Id: Ia65c664421f7630225b00f1c45c636b4681121ce
Diffstat (limited to 'absl')
-rw-r--r-- | absl/container/internal/raw_hash_set.h | 78 | ||||
-rw-r--r-- | absl/container/internal/raw_hash_set_test.cc | 46 |
2 files changed, 102 insertions, 22 deletions
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 0bb77bda..dced5b2b 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -1545,7 +1545,7 @@ inline void* SlotAddress(void* slot_array, size_t slot, size_t slot_size) { (slot * slot_size)); } -// Iterates over all full slots and calls `cb(SlotType*)`. +// Iterates over all full slots and calls `cb(const ctrl_t*, SlotType*)`. // NOTE: no erasure from this table allowed during Callback call. template <class SlotType, class Callback> ABSL_ATTRIBUTE_ALWAYS_INLINE inline void IterateOverFullSlots( @@ -1566,16 +1566,18 @@ ABSL_ATTRIBUTE_ALWAYS_INLINE inline void IterateOverFullSlots( "unexpected group width"); // Group starts from kSentinel slot, so indices in the mask will // be increased by 1. + const auto mask = GroupPortableImpl(ctrl + cap).MaskFull(); + --ctrl; --slot; - for (uint32_t i : GroupPortableImpl(ctrl + cap).MaskFull()) { - cb(slot + i); + for (uint32_t i : mask) { + cb(ctrl + i, slot + i); } return; } size_t remaining = c.size(); while (remaining != 0) { for (uint32_t i : GroupFullEmptyOrDeleted(ctrl).MaskFull()) { - cb(slot + i); + cb(ctrl + i, slot + i); --remaining; } slot += Group::kWidth; @@ -2260,19 +2262,55 @@ class raw_hash_set { that.alloc_ref())) {} raw_hash_set(const raw_hash_set& that, const allocator_type& a) - : raw_hash_set(0, that.hash_ref(), that.eq_ref(), a) { + : raw_hash_set(GrowthToLowerboundCapacity(that.size()), that.hash_ref(), + that.eq_ref(), a) { const size_t size = that.size(); - if (size == 0) return; - reserve(size); - // Because the table is guaranteed to be empty, we can do something faster - // than a full `insert`. - for (const auto& v : that) { - const size_t hash = PolicyTraits::apply(HashElement{hash_ref()}, v); - auto target = find_first_non_full_outofline(common(), hash); - SetCtrl(common(), target.offset, H2(hash), sizeof(slot_type)); - emplace_at(target.offset, v); - common().maybe_increment_generation_on_insert(); - infoz().RecordInsert(hash, target.probe_length); + if (size == 0) { + return; + } + const size_t cap = capacity(); + // Note about single group tables: + // 1. It is correct to have any order of elements. + // 2. Order has to be non deterministic. + // 3. We are assigning elements with arbitrary `shift` starting from + // `capacity + shift` position. + // 4. `shift` must be coprime with `capacity + 1` in order to be able to use + // modular arithmetic to traverse all positions, instead if cycling + // through a subset of positions. Odd numbers are coprime with any + // `capacity + 1` (2^N). + size_t offset = cap; + const size_t shift = + is_single_group(cap) ? (PerTableSalt(control()) | 1) : 0; + IterateOverFullSlots( + that.common(), that.slot_array(), + [&](const ctrl_t* that_ctrl, + slot_type* that_slot) ABSL_ATTRIBUTE_ALWAYS_INLINE { + if (shift == 0) { + // Big tables case. Position must be searched via probing. + // The table is guaranteed to be empty, so we can do faster than + // a full `insert`. + const size_t hash = PolicyTraits::apply( + HashElement{hash_ref()}, PolicyTraits::element(that_slot)); + FindInfo target = find_first_non_full_outofline(common(), hash); + infoz().RecordInsert(hash, target.probe_length); + offset = target.offset; + } else { + // Small tables case. Next position is computed via shift. + offset = (offset + shift) & cap; + } + const h2_t h2 = static_cast<h2_t>(*that_ctrl); + assert( // We rely that hash is not changed for small tables. + H2(PolicyTraits::apply(HashElement{hash_ref()}, + PolicyTraits::element(that_slot))) == h2 && + "hash function value changed unexpectedly during the copy"); + SetCtrl(common(), offset, h2, sizeof(slot_type)); + emplace_at(offset, PolicyTraits::element(that_slot)); + common().maybe_increment_generation_on_insert(); + }); + if (shift != 0) { + // On small table copy we do not record individual inserts. + // RecordInsert requires hash, but it is unknown for small tables. + infoz().RecordStorageChanged(size, cap); } common().set_size(size); set_growth_left(growth_left() - size); @@ -2927,10 +2965,10 @@ class raw_hash_set { inline void destroy_slots() { if (PolicyTraits::template destroy_is_trivial<Alloc>()) return; - IterateOverFullSlots(common(), slot_array(), - [&](slot_type* slot) ABSL_ATTRIBUTE_ALWAYS_INLINE { - this->destroy(slot); - }); + IterateOverFullSlots( + common(), slot_array(), + [&](const ctrl_t*, slot_type* slot) + ABSL_ATTRIBUTE_ALWAYS_INLINE { this->destroy(slot); }); } inline void dealloc() { diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc index 5852904f..1c428ea7 100644 --- a/absl/container/internal/raw_hash_set_test.cc +++ b/absl/container/internal/raw_hash_set_test.cc @@ -1765,6 +1765,40 @@ TEST(Table, CopyConstruct) { } } +TEST(Table, CopyDifferentSizes) { + IntTable t; + + for (int i = 0; i < 100; ++i) { + t.emplace(i); + IntTable c = t; + for (int j = 0; j <= i; ++j) { + ASSERT_TRUE(c.find(j) != c.end()) << "i=" << i << " j=" << j; + } + // Testing find miss to verify that table is not full. + ASSERT_TRUE(c.find(-1) == c.end()); + } +} + +TEST(Table, CopyDifferentCapacities) { + for (int cap = 1; cap < 100; cap = cap * 2 + 1) { + IntTable t; + t.reserve(static_cast<size_t>(cap)); + for (int i = 0; i <= cap; ++i) { + t.emplace(i); + if (i != cap && i % 5 != 0) { + continue; + } + IntTable c = t; + for (int j = 0; j <= i; ++j) { + ASSERT_TRUE(c.find(j) != c.end()) + << "cap=" << cap << " i=" << i << " j=" << j; + } + // Testing find miss to verify that table is not full. + ASSERT_TRUE(c.find(-1) == c.end()); + } + } +} + TEST(Table, CopyConstructWithAlloc) { StringTable t; t.emplace("a", "b"); @@ -2747,7 +2781,9 @@ TEST(Table, CountedHash) { TEST(Table, IterateOverFullSlotsEmpty) { IntTable t; - auto fail_if_any = [](int64_t* i) { FAIL() << "expected no slots " << i; }; + auto fail_if_any = [](const ctrl_t*, int64_t* i) { + FAIL() << "expected no slots " << i; + }; container_internal::IterateOverFullSlots( RawHashSetTestOnlyAccess::GetCommon(t), RawHashSetTestOnlyAccess::GetSlots(t), fail_if_any); @@ -2771,7 +2807,13 @@ TEST(Table, IterateOverFullSlotsFull) { container_internal::IterateOverFullSlots( RawHashSetTestOnlyAccess::GetCommon(t), RawHashSetTestOnlyAccess::GetSlots(t), - [&slots](int64_t* i) { slots.push_back(*i); }); + [&t, &slots](const ctrl_t* ctrl, int64_t* i) { + ptrdiff_t ctrl_offset = + ctrl - RawHashSetTestOnlyAccess::GetCommon(t).control(); + ptrdiff_t slot_offset = i - RawHashSetTestOnlyAccess::GetSlots(t); + ASSERT_EQ(ctrl_offset, slot_offset); + slots.push_back(*i); + }); EXPECT_THAT(slots, testing::UnorderedElementsAreArray(expected_slots)); } } |