diff options
Diffstat (limited to 'absl')
-rw-r--r-- | absl/container/internal/raw_hash_set.h | 138 | ||||
-rw-r--r-- | absl/flags/reflection.cc | 2 | ||||
-rw-r--r-- | absl/flags/reflection_test.cc | 2 | ||||
-rw-r--r-- | absl/hash/internal/city.cc | 9 | ||||
-rw-r--r-- | absl/hash/internal/city.h | 18 |
5 files changed, 78 insertions, 91 deletions
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 5fbe56b9..02158c4e 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -513,6 +513,64 @@ inline void AssertIsValid(ctrl_t* ctrl) { "been erased, or the table might have rehashed."); } +struct FindInfo { + size_t offset; + size_t probe_length; +}; + +// The representation of the object has two modes: +// - small: For capacities < kWidth-1 +// - large: For the rest. +// +// Differences: +// - In small mode we are able to use the whole capacity. The extra control +// bytes give us at least one "empty" control byte to stop the iteration. +// This is important to make 1 a valid capacity. +// +// - In small mode only the first `capacity()` control bytes after the +// sentinel are valid. The rest contain dummy kEmpty values that do not +// represent a real slot. This is important to take into account on +// find_first_non_full(), where we never try ShouldInsertBackwards() for +// small tables. +inline bool is_small(size_t capacity) { return capacity < Group::kWidth - 1; } + +inline probe_seq<Group::kWidth> probe(ctrl_t* ctrl, size_t hash, + size_t capacity) { + return probe_seq<Group::kWidth>(H1(hash, ctrl), capacity); +} + +// Probes the raw_hash_set with the probe sequence for hash and returns the +// pointer to the first empty or deleted slot. +// NOTE: this function must work with tables having both kEmpty and kDelete +// in one group. Such tables appears during drop_deletes_without_resize. +// +// This function is very useful when insertions happen and: +// - the input is already a set +// - there are enough slots +// - the element with the hash is not in the table +inline FindInfo find_first_non_full(ctrl_t* ctrl, size_t hash, + size_t capacity) { + auto seq = probe(ctrl, hash, capacity); + while (true) { + Group g{ctrl + seq.offset()}; + auto mask = g.MatchEmptyOrDeleted(); + if (mask) { +#if !defined(NDEBUG) + // We want to add entropy even when ASLR is not enabled. + // In debug build we will randomly insert in either the front or back of + // the group. + // TODO(kfm,sbenza): revisit after we do unconditional mixing + if (!is_small(capacity) && ShouldInsertBackwards(hash, ctrl)) { + return {seq.offset(mask.HighestBitSet()), seq.index()}; + } +#endif + return {seq.offset(mask.LowestBitSet()), seq.index()}; + } + seq.next(); + assert(seq.index() < capacity && "full table!"); + } +} + // Policy: a policy defines how to perform different operations on // the slots of the hashtable (see hash_policy_traits.h for the full interface // of policy). @@ -845,7 +903,7 @@ class raw_hash_set { // than a full `insert`. for (const auto& v : that) { const size_t hash = PolicyTraits::apply(HashElement{hash_ref()}, v); - auto target = find_first_non_full(hash); + auto target = find_first_non_full(ctrl_, hash, capacity_); set_ctrl(target.offset, H2(hash)); emplace_at(target.offset, v); infoz_.RecordInsert(hash, target.probe_length); @@ -1297,7 +1355,7 @@ class raw_hash_set { void prefetch(const key_arg<K>& key) const { (void)key; #if defined(__GNUC__) - auto seq = probe(hash_ref()(key)); + auto seq = probe(ctrl_, hash_ref()(key), capacity_); __builtin_prefetch(static_cast<const void*>(ctrl_ + seq.offset())); __builtin_prefetch(static_cast<const void*>(slots_ + seq.offset())); #endif // __GNUC__ @@ -1312,7 +1370,7 @@ class raw_hash_set { // called heterogeneous key support. template <class K = key_type> iterator find(const key_arg<K>& key, size_t hash) { - auto seq = probe(hash); + auto seq = probe(ctrl_, hash, capacity_); while (true) { Group g{ctrl_ + seq.offset()}; for (int i : g.Match(H2(hash))) { @@ -1534,7 +1592,7 @@ class raw_hash_set { if (IsFull(old_ctrl[i])) { size_t hash = PolicyTraits::apply(HashElement{hash_ref()}, PolicyTraits::element(old_slots + i)); - auto target = find_first_non_full(hash); + auto target = find_first_non_full(ctrl_, hash, capacity_); size_t new_i = target.offset; total_probe_length += target.probe_length; set_ctrl(new_i, H2(hash)); @@ -1553,7 +1611,7 @@ class raw_hash_set { void drop_deletes_without_resize() ABSL_ATTRIBUTE_NOINLINE { assert(IsValidCapacity(capacity_)); - assert(!is_small()); + assert(!is_small(capacity_)); // Algorithm: // - mark all DELETED slots as EMPTY // - mark all FULL slots as DELETED @@ -1578,7 +1636,7 @@ class raw_hash_set { if (!IsDeleted(ctrl_[i])) continue; size_t hash = PolicyTraits::apply(HashElement{hash_ref()}, PolicyTraits::element(slots_ + i)); - auto target = find_first_non_full(hash); + auto target = find_first_non_full(ctrl_, hash, capacity_); size_t new_i = target.offset; total_probe_length += target.probe_length; @@ -1586,7 +1644,8 @@ class raw_hash_set { // If they do, we don't need to move the object as it falls already in the // best probe we can. const auto probe_index = [&](size_t pos) { - return ((pos - probe(hash).offset()) & capacity_) / Group::kWidth; + return ((pos - probe(ctrl_, hash, capacity_).offset()) & capacity_) / + Group::kWidth; }; // Element doesn't move. @@ -1630,7 +1689,7 @@ class raw_hash_set { bool has_element(const value_type& elem) const { size_t hash = PolicyTraits::apply(HashElement{hash_ref()}, elem); - auto seq = probe(hash); + auto seq = probe(ctrl_, hash, capacity_); while (true) { Group g{ctrl_ + seq.offset()}; for (int i : g.Match(H2(hash))) { @@ -1645,41 +1704,6 @@ class raw_hash_set { return false; } - // Probes the raw_hash_set with the probe sequence for hash and returns the - // pointer to the first empty or deleted slot. - // NOTE: this function must work with tables having both kEmpty and kDelete - // in one group. Such tables appears during drop_deletes_without_resize. - // - // This function is very useful when insertions happen and: - // - the input is already a set - // - there are enough slots - // - the element with the hash is not in the table - struct FindInfo { - size_t offset; - size_t probe_length; - }; - FindInfo find_first_non_full(size_t hash) { - auto seq = probe(hash); - while (true) { - Group g{ctrl_ + seq.offset()}; - auto mask = g.MatchEmptyOrDeleted(); - if (mask) { -#if !defined(NDEBUG) - // We want to add entropy even when ASLR is not enabled. - // In debug build we will randomly insert in either the front or back of - // the group. - // TODO(kfm,sbenza): revisit after we do unconditional mixing - if (!is_small() && ShouldInsertBackwards(hash, ctrl_)) { - return {seq.offset(mask.HighestBitSet()), seq.index()}; - } -#endif - return {seq.offset(mask.LowestBitSet()), seq.index()}; - } - seq.next(); - assert(seq.index() < capacity_ && "full table!"); - } - } - // TODO(alkis): Optimize this assuming *this and that don't overlap. raw_hash_set& move_assign(raw_hash_set&& that, std::true_type) { raw_hash_set tmp(std::move(that)); @@ -1696,7 +1720,7 @@ class raw_hash_set { template <class K> std::pair<size_t, bool> find_or_prepare_insert(const K& key) { auto hash = hash_ref()(key); - auto seq = probe(hash); + auto seq = probe(ctrl_, hash, capacity_); while (true) { Group g{ctrl_ + seq.offset()}; for (int i : g.Match(H2(hash))) { @@ -1713,11 +1737,11 @@ class raw_hash_set { } size_t prepare_insert(size_t hash) ABSL_ATTRIBUTE_NOINLINE { - auto target = find_first_non_full(hash); + auto target = find_first_non_full(ctrl_, hash, capacity_); if (ABSL_PREDICT_FALSE(growth_left() == 0 && !IsDeleted(ctrl_[target.offset]))) { rehash_and_grow_if_necessary(); - target = find_first_non_full(hash); + target = find_first_non_full(ctrl_, hash, capacity_); } ++size_; growth_left() -= IsEmpty(ctrl_[target.offset]); @@ -1750,10 +1774,6 @@ class raw_hash_set { private: friend struct RawHashSetTestOnlyAccess; - probe_seq<Group::kWidth> probe(size_t hash) const { - return probe_seq<Group::kWidth>(H1(hash, ctrl_), capacity_); - } - // Reset all ctrl bytes back to kEmpty, except the sentinel. void reset_ctrl() { std::memset(ctrl_, kEmpty, capacity_ + Group::kWidth); @@ -1783,22 +1803,6 @@ class raw_hash_set { size_t& growth_left() { return settings_.template get<0>(); } - // The representation of the object has two modes: - // - small: For capacities < kWidth-1 - // - large: For the rest. - // - // Differences: - // - In small mode we are able to use the whole capacity. The extra control - // bytes give us at least one "empty" control byte to stop the iteration. - // This is important to make 1 a valid capacity. - // - // - In small mode only the first `capacity()` control bytes after the - // sentinel are valid. The rest contain dummy kEmpty values that do not - // represent a real slot. This is important to take into account on - // find_first_non_full(), where we never try ShouldInsertBackwards() for - // small tables. - bool is_small() const { return capacity_ < Group::kWidth - 1; } - hasher& hash_ref() { return settings_.template get<1>(); } const hasher& hash_ref() const { return settings_.template get<1>(); } key_equal& eq_ref() { return settings_.template get<2>(); } @@ -1842,7 +1846,7 @@ struct HashtableDebugAccess<Set, absl::void_t<typename Set::raw_hash_set>> { const typename Set::key_type& key) { size_t num_probes = 0; size_t hash = set.hash_ref()(key); - auto seq = set.probe(hash); + auto seq = probe(set.ctrl_, hash, set.capacity_); while (true) { container_internal::Group g{set.ctrl_ + seq.offset()}; for (int i : g.Match(container_internal::H2(hash))) { diff --git a/absl/flags/reflection.cc b/absl/flags/reflection.cc index c6bf8aab..c976d467 100644 --- a/absl/flags/reflection.cc +++ b/absl/flags/reflection.cc @@ -328,7 +328,7 @@ CommandLineFlag* FindCommandLineFlag(absl::string_view name) { absl::flat_hash_map<absl::string_view, absl::CommandLineFlag*> GetAllFlags() { absl::flat_hash_map<absl::string_view, absl::CommandLineFlag*> res; flags_internal::ForEachFlag([&](CommandLineFlag& flag) { - res.insert({flag.Name(), &flag}); + if (!flag.IsRetired()) res.insert({flag.Name(), &flag}); }); return res; } diff --git a/absl/flags/reflection_test.cc b/absl/flags/reflection_test.cc index 1a1dcb4a..8620d140 100644 --- a/absl/flags/reflection_test.cc +++ b/absl/flags/reflection_test.cc @@ -70,7 +70,7 @@ TEST_F(ReflectionTest, TestGetAllFlags) { auto all_flags = absl::GetAllFlags(); EXPECT_NE(all_flags.find("int_flag"), all_flags.end()); - EXPECT_NE(all_flags.find("bool_retired_flag"), all_flags.end()); + EXPECT_EQ(all_flags.find("bool_retired_flag"), all_flags.end()); EXPECT_NE(all_flags.find("help"), all_flags.end()); EXPECT_EQ(all_flags.find("some_undefined_flag"), all_flags.end()); diff --git a/absl/hash/internal/city.cc b/absl/hash/internal/city.cc index 58d4bcb1..5460134e 100644 --- a/absl/hash/internal/city.cc +++ b/absl/hash/internal/city.cc @@ -200,10 +200,6 @@ static uint64_t Rotate(uint64_t val, int shift) { static uint64_t ShiftMix(uint64_t val) { return val ^ (val >> 47); } -static uint64_t HashLen16(uint64_t u, uint64_t v) { - return Hash128to64(uint128(u, v)); -} - static uint64_t HashLen16(uint64_t u, uint64_t v, uint64_t mul) { // Murmur-inspired hashing. uint64_t a = (u ^ v) * mul; @@ -214,6 +210,11 @@ static uint64_t HashLen16(uint64_t u, uint64_t v, uint64_t mul) { return b; } +static uint64_t HashLen16(uint64_t u, uint64_t v) { + const uint64_t kMul = 0x9ddfea08eb382d69ULL; + return HashLen16(u, v, kMul); +} + static uint64_t HashLen0to16(const char *s, size_t len) { if (len >= 8) { uint64_t mul = k2 + len * 2; diff --git a/absl/hash/internal/city.h b/absl/hash/internal/city.h index 9c1e7a57..393da0b9 100644 --- a/absl/hash/internal/city.h +++ b/absl/hash/internal/city.h @@ -56,11 +56,6 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace hash_internal { -typedef std::pair<uint64_t, uint64_t> uint128; - -inline uint64_t Uint128Low64(const uint128 &x) { return x.first; } -inline uint64_t Uint128High64(const uint128 &x) { return x.second; } - // Hash function for a byte array. uint64_t CityHash64(const char *s, size_t len); @@ -76,19 +71,6 @@ uint64_t CityHash64WithSeeds(const char *s, size_t len, uint64_t seed0, // Hash function for a byte array. Most useful in 32-bit binaries. uint32_t CityHash32(const char *s, size_t len); -// Hash 128 input bits down to 64 bits of output. -// This is intended to be a reasonably good hash function. -inline uint64_t Hash128to64(const uint128 &x) { - // Murmur-inspired hashing. - const uint64_t kMul = 0x9ddfea08eb382d69ULL; - uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul; - a ^= (a >> 47); - uint64_t b = (Uint128High64(x) ^ a) * kMul; - b ^= (b >> 47); - b *= kMul; - return b; -} - } // namespace hash_internal ABSL_NAMESPACE_END } // namespace absl |