summaryrefslogtreecommitdiff
path: root/absl/container/internal/raw_hash_set.h
diff options
context:
space:
mode:
authorGravatar Abseil Team <absl-team@google.com>2020-11-17 11:25:13 -0800
committerGravatar Derek Mauro <dmauro@google.com>2020-11-17 14:40:39 -0500
commit4ae6730677ea3c2984f8bb0e4919bd0d9dd04f73 (patch)
treeae9d6d041faaff022f90fb0132ec53850c75512b /absl/container/internal/raw_hash_set.h
parent1b465af3bf865f588251470ea0dec60851a24041 (diff)
Export of internal Abseil changes
-- 77e2a9c277721f23a8df983c1efc6ed97c167964 by Derek Mauro <dmauro@google.com>: Simplify an internal piece of CityHash to remove the conflicting definition of uint128 PiperOrigin-RevId: 342906008 -- 593dbb6d5fd32cc5d31e3ba1eda02e8ddeaeaaf6 by Gennadiy Rozental <rogeeff@google.com>: Skip retired flags in GetAllFlags output. This is a bug fix. We should not have released this interface producing retired flags. There should be no observable difference for the users who should not care about retired flags. PiperOrigin-RevId: 342889378 -- bb77e07abff4dbd0a9c97eb85ee85cb39b84d04a by Abseil Team <absl-team@google.com>: Extract `find_first_not_full` outside of the raw_hash_set. This function is used in the following scenarios: 0. [relatively hot] insert, when actual new element is added. 1. [relatively cold] resize (explicit or on capacity grow) 2. [relatively cold] copy constructor 3. [cold] rehash on insert/erase (aka cache) use cases Resize typically mitigated by `reserve` in performance critical cases. Rehashing happen relatively rare, when hash table become polluted with deleted slots. We keep `find_first_not_full` in header, so that compiler still can inline it, when necessary (most notably in insert use case). This reduce binary size since only one copy of this function will be present in the binary for all tables where the function is not inlined (at least in one case). PiperOrigin-RevId: 342736300 GitOrigin-RevId: 77e2a9c277721f23a8df983c1efc6ed97c167964 Change-Id: I3fe9d054c66049bb598ea35c45fc800b1cdaa9b6
Diffstat (limited to 'absl/container/internal/raw_hash_set.h')
-rw-r--r--absl/container/internal/raw_hash_set.h138
1 files changed, 71 insertions, 67 deletions
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h
index 5fbe56b9..02158c4e 100644
--- a/absl/container/internal/raw_hash_set.h
+++ b/absl/container/internal/raw_hash_set.h
@@ -513,6 +513,64 @@ inline void AssertIsValid(ctrl_t* ctrl) {
"been erased, or the table might have rehashed.");
}
+struct FindInfo {
+ size_t offset;
+ size_t probe_length;
+};
+
+// The representation of the object has two modes:
+// - small: For capacities < kWidth-1
+// - large: For the rest.
+//
+// Differences:
+// - In small mode we are able to use the whole capacity. The extra control
+// bytes give us at least one "empty" control byte to stop the iteration.
+// This is important to make 1 a valid capacity.
+//
+// - In small mode only the first `capacity()` control bytes after the
+// sentinel are valid. The rest contain dummy kEmpty values that do not
+// represent a real slot. This is important to take into account on
+// find_first_non_full(), where we never try ShouldInsertBackwards() for
+// small tables.
+inline bool is_small(size_t capacity) { return capacity < Group::kWidth - 1; }
+
+inline probe_seq<Group::kWidth> probe(ctrl_t* ctrl, size_t hash,
+ size_t capacity) {
+ return probe_seq<Group::kWidth>(H1(hash, ctrl), capacity);
+}
+
+// Probes the raw_hash_set with the probe sequence for hash and returns the
+// pointer to the first empty or deleted slot.
+// NOTE: this function must work with tables having both kEmpty and kDelete
+// in one group. Such tables appears during drop_deletes_without_resize.
+//
+// This function is very useful when insertions happen and:
+// - the input is already a set
+// - there are enough slots
+// - the element with the hash is not in the table
+inline FindInfo find_first_non_full(ctrl_t* ctrl, size_t hash,
+ size_t capacity) {
+ auto seq = probe(ctrl, hash, capacity);
+ while (true) {
+ Group g{ctrl + seq.offset()};
+ auto mask = g.MatchEmptyOrDeleted();
+ if (mask) {
+#if !defined(NDEBUG)
+ // We want to add entropy even when ASLR is not enabled.
+ // In debug build we will randomly insert in either the front or back of
+ // the group.
+ // TODO(kfm,sbenza): revisit after we do unconditional mixing
+ if (!is_small(capacity) && ShouldInsertBackwards(hash, ctrl)) {
+ return {seq.offset(mask.HighestBitSet()), seq.index()};
+ }
+#endif
+ return {seq.offset(mask.LowestBitSet()), seq.index()};
+ }
+ seq.next();
+ assert(seq.index() < capacity && "full table!");
+ }
+}
+
// Policy: a policy defines how to perform different operations on
// the slots of the hashtable (see hash_policy_traits.h for the full interface
// of policy).
@@ -845,7 +903,7 @@ class raw_hash_set {
// than a full `insert`.
for (const auto& v : that) {
const size_t hash = PolicyTraits::apply(HashElement{hash_ref()}, v);
- auto target = find_first_non_full(hash);
+ auto target = find_first_non_full(ctrl_, hash, capacity_);
set_ctrl(target.offset, H2(hash));
emplace_at(target.offset, v);
infoz_.RecordInsert(hash, target.probe_length);
@@ -1297,7 +1355,7 @@ class raw_hash_set {
void prefetch(const key_arg<K>& key) const {
(void)key;
#if defined(__GNUC__)
- auto seq = probe(hash_ref()(key));
+ auto seq = probe(ctrl_, hash_ref()(key), capacity_);
__builtin_prefetch(static_cast<const void*>(ctrl_ + seq.offset()));
__builtin_prefetch(static_cast<const void*>(slots_ + seq.offset()));
#endif // __GNUC__
@@ -1312,7 +1370,7 @@ class raw_hash_set {
// called heterogeneous key support.
template <class K = key_type>
iterator find(const key_arg<K>& key, size_t hash) {
- auto seq = probe(hash);
+ auto seq = probe(ctrl_, hash, capacity_);
while (true) {
Group g{ctrl_ + seq.offset()};
for (int i : g.Match(H2(hash))) {
@@ -1534,7 +1592,7 @@ class raw_hash_set {
if (IsFull(old_ctrl[i])) {
size_t hash = PolicyTraits::apply(HashElement{hash_ref()},
PolicyTraits::element(old_slots + i));
- auto target = find_first_non_full(hash);
+ auto target = find_first_non_full(ctrl_, hash, capacity_);
size_t new_i = target.offset;
total_probe_length += target.probe_length;
set_ctrl(new_i, H2(hash));
@@ -1553,7 +1611,7 @@ class raw_hash_set {
void drop_deletes_without_resize() ABSL_ATTRIBUTE_NOINLINE {
assert(IsValidCapacity(capacity_));
- assert(!is_small());
+ assert(!is_small(capacity_));
// Algorithm:
// - mark all DELETED slots as EMPTY
// - mark all FULL slots as DELETED
@@ -1578,7 +1636,7 @@ class raw_hash_set {
if (!IsDeleted(ctrl_[i])) continue;
size_t hash = PolicyTraits::apply(HashElement{hash_ref()},
PolicyTraits::element(slots_ + i));
- auto target = find_first_non_full(hash);
+ auto target = find_first_non_full(ctrl_, hash, capacity_);
size_t new_i = target.offset;
total_probe_length += target.probe_length;
@@ -1586,7 +1644,8 @@ class raw_hash_set {
// If they do, we don't need to move the object as it falls already in the
// best probe we can.
const auto probe_index = [&](size_t pos) {
- return ((pos - probe(hash).offset()) & capacity_) / Group::kWidth;
+ return ((pos - probe(ctrl_, hash, capacity_).offset()) & capacity_) /
+ Group::kWidth;
};
// Element doesn't move.
@@ -1630,7 +1689,7 @@ class raw_hash_set {
bool has_element(const value_type& elem) const {
size_t hash = PolicyTraits::apply(HashElement{hash_ref()}, elem);
- auto seq = probe(hash);
+ auto seq = probe(ctrl_, hash, capacity_);
while (true) {
Group g{ctrl_ + seq.offset()};
for (int i : g.Match(H2(hash))) {
@@ -1645,41 +1704,6 @@ class raw_hash_set {
return false;
}
- // Probes the raw_hash_set with the probe sequence for hash and returns the
- // pointer to the first empty or deleted slot.
- // NOTE: this function must work with tables having both kEmpty and kDelete
- // in one group. Such tables appears during drop_deletes_without_resize.
- //
- // This function is very useful when insertions happen and:
- // - the input is already a set
- // - there are enough slots
- // - the element with the hash is not in the table
- struct FindInfo {
- size_t offset;
- size_t probe_length;
- };
- FindInfo find_first_non_full(size_t hash) {
- auto seq = probe(hash);
- while (true) {
- Group g{ctrl_ + seq.offset()};
- auto mask = g.MatchEmptyOrDeleted();
- if (mask) {
-#if !defined(NDEBUG)
- // We want to add entropy even when ASLR is not enabled.
- // In debug build we will randomly insert in either the front or back of
- // the group.
- // TODO(kfm,sbenza): revisit after we do unconditional mixing
- if (!is_small() && ShouldInsertBackwards(hash, ctrl_)) {
- return {seq.offset(mask.HighestBitSet()), seq.index()};
- }
-#endif
- return {seq.offset(mask.LowestBitSet()), seq.index()};
- }
- seq.next();
- assert(seq.index() < capacity_ && "full table!");
- }
- }
-
// TODO(alkis): Optimize this assuming *this and that don't overlap.
raw_hash_set& move_assign(raw_hash_set&& that, std::true_type) {
raw_hash_set tmp(std::move(that));
@@ -1696,7 +1720,7 @@ class raw_hash_set {
template <class K>
std::pair<size_t, bool> find_or_prepare_insert(const K& key) {
auto hash = hash_ref()(key);
- auto seq = probe(hash);
+ auto seq = probe(ctrl_, hash, capacity_);
while (true) {
Group g{ctrl_ + seq.offset()};
for (int i : g.Match(H2(hash))) {
@@ -1713,11 +1737,11 @@ class raw_hash_set {
}
size_t prepare_insert(size_t hash) ABSL_ATTRIBUTE_NOINLINE {
- auto target = find_first_non_full(hash);
+ auto target = find_first_non_full(ctrl_, hash, capacity_);
if (ABSL_PREDICT_FALSE(growth_left() == 0 &&
!IsDeleted(ctrl_[target.offset]))) {
rehash_and_grow_if_necessary();
- target = find_first_non_full(hash);
+ target = find_first_non_full(ctrl_, hash, capacity_);
}
++size_;
growth_left() -= IsEmpty(ctrl_[target.offset]);
@@ -1750,10 +1774,6 @@ class raw_hash_set {
private:
friend struct RawHashSetTestOnlyAccess;
- probe_seq<Group::kWidth> probe(size_t hash) const {
- return probe_seq<Group::kWidth>(H1(hash, ctrl_), capacity_);
- }
-
// Reset all ctrl bytes back to kEmpty, except the sentinel.
void reset_ctrl() {
std::memset(ctrl_, kEmpty, capacity_ + Group::kWidth);
@@ -1783,22 +1803,6 @@ class raw_hash_set {
size_t& growth_left() { return settings_.template get<0>(); }
- // The representation of the object has two modes:
- // - small: For capacities < kWidth-1
- // - large: For the rest.
- //
- // Differences:
- // - In small mode we are able to use the whole capacity. The extra control
- // bytes give us at least one "empty" control byte to stop the iteration.
- // This is important to make 1 a valid capacity.
- //
- // - In small mode only the first `capacity()` control bytes after the
- // sentinel are valid. The rest contain dummy kEmpty values that do not
- // represent a real slot. This is important to take into account on
- // find_first_non_full(), where we never try ShouldInsertBackwards() for
- // small tables.
- bool is_small() const { return capacity_ < Group::kWidth - 1; }
-
hasher& hash_ref() { return settings_.template get<1>(); }
const hasher& hash_ref() const { return settings_.template get<1>(); }
key_equal& eq_ref() { return settings_.template get<2>(); }
@@ -1842,7 +1846,7 @@ struct HashtableDebugAccess<Set, absl::void_t<typename Set::raw_hash_set>> {
const typename Set::key_type& key) {
size_t num_probes = 0;
size_t hash = set.hash_ref()(key);
- auto seq = set.probe(hash);
+ auto seq = probe(set.ctrl_, hash, set.capacity_);
while (true) {
container_internal::Group g{set.ctrl_ + seq.offset()};
for (int i : g.Match(container_internal::H2(hash))) {