diff options
author | Abseil Team <absl-team@google.com> | 2020-11-17 11:25:13 -0800 |
---|---|---|
committer | Derek Mauro <dmauro@google.com> | 2020-11-17 14:40:39 -0500 |
commit | 4ae6730677ea3c2984f8bb0e4919bd0d9dd04f73 (patch) | |
tree | ae9d6d041faaff022f90fb0132ec53850c75512b | |
parent | 1b465af3bf865f588251470ea0dec60851a24041 (diff) |
Export of internal Abseil changes
--
77e2a9c277721f23a8df983c1efc6ed97c167964 by Derek Mauro <dmauro@google.com>:
Simplify an internal piece of CityHash to remove the conflicting
definition of uint128
PiperOrigin-RevId: 342906008
--
593dbb6d5fd32cc5d31e3ba1eda02e8ddeaeaaf6 by Gennadiy Rozental <rogeeff@google.com>:
Skip retired flags in GetAllFlags output.
This is a bug fix. We should not have released this interface producing retired flags. There should be no observable difference for the users who should not care about retired flags.
PiperOrigin-RevId: 342889378
--
bb77e07abff4dbd0a9c97eb85ee85cb39b84d04a by Abseil Team <absl-team@google.com>:
Extract `find_first_not_full` outside of the raw_hash_set.
This function is used in the following scenarios:
0. [relatively hot] insert, when actual new element is added.
1. [relatively cold] resize (explicit or on capacity grow)
2. [relatively cold] copy constructor
3. [cold] rehash on insert/erase (aka cache) use cases
Resize typically mitigated by `reserve` in performance critical cases. Rehashing happen relatively rare, when hash table become polluted with deleted slots.
We keep `find_first_not_full` in header, so that compiler still can inline it, when necessary (most notably in insert use case).
This reduce binary size since only one copy of this function will be present in the binary for all tables where the function is not inlined (at least in one case).
PiperOrigin-RevId: 342736300
GitOrigin-RevId: 77e2a9c277721f23a8df983c1efc6ed97c167964
Change-Id: I3fe9d054c66049bb598ea35c45fc800b1cdaa9b6
-rw-r--r-- | absl/container/internal/raw_hash_set.h | 138 | ||||
-rw-r--r-- | absl/flags/reflection.cc | 2 | ||||
-rw-r--r-- | absl/flags/reflection_test.cc | 2 | ||||
-rw-r--r-- | absl/hash/internal/city.cc | 9 | ||||
-rw-r--r-- | absl/hash/internal/city.h | 18 |
5 files changed, 78 insertions, 91 deletions
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 5fbe56b9..02158c4e 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -513,6 +513,64 @@ inline void AssertIsValid(ctrl_t* ctrl) { "been erased, or the table might have rehashed."); } +struct FindInfo { + size_t offset; + size_t probe_length; +}; + +// The representation of the object has two modes: +// - small: For capacities < kWidth-1 +// - large: For the rest. +// +// Differences: +// - In small mode we are able to use the whole capacity. The extra control +// bytes give us at least one "empty" control byte to stop the iteration. +// This is important to make 1 a valid capacity. +// +// - In small mode only the first `capacity()` control bytes after the +// sentinel are valid. The rest contain dummy kEmpty values that do not +// represent a real slot. This is important to take into account on +// find_first_non_full(), where we never try ShouldInsertBackwards() for +// small tables. +inline bool is_small(size_t capacity) { return capacity < Group::kWidth - 1; } + +inline probe_seq<Group::kWidth> probe(ctrl_t* ctrl, size_t hash, + size_t capacity) { + return probe_seq<Group::kWidth>(H1(hash, ctrl), capacity); +} + +// Probes the raw_hash_set with the probe sequence for hash and returns the +// pointer to the first empty or deleted slot. +// NOTE: this function must work with tables having both kEmpty and kDelete +// in one group. Such tables appears during drop_deletes_without_resize. +// +// This function is very useful when insertions happen and: +// - the input is already a set +// - there are enough slots +// - the element with the hash is not in the table +inline FindInfo find_first_non_full(ctrl_t* ctrl, size_t hash, + size_t capacity) { + auto seq = probe(ctrl, hash, capacity); + while (true) { + Group g{ctrl + seq.offset()}; + auto mask = g.MatchEmptyOrDeleted(); + if (mask) { +#if !defined(NDEBUG) + // We want to add entropy even when ASLR is not enabled. + // In debug build we will randomly insert in either the front or back of + // the group. + // TODO(kfm,sbenza): revisit after we do unconditional mixing + if (!is_small(capacity) && ShouldInsertBackwards(hash, ctrl)) { + return {seq.offset(mask.HighestBitSet()), seq.index()}; + } +#endif + return {seq.offset(mask.LowestBitSet()), seq.index()}; + } + seq.next(); + assert(seq.index() < capacity && "full table!"); + } +} + // Policy: a policy defines how to perform different operations on // the slots of the hashtable (see hash_policy_traits.h for the full interface // of policy). @@ -845,7 +903,7 @@ class raw_hash_set { // than a full `insert`. for (const auto& v : that) { const size_t hash = PolicyTraits::apply(HashElement{hash_ref()}, v); - auto target = find_first_non_full(hash); + auto target = find_first_non_full(ctrl_, hash, capacity_); set_ctrl(target.offset, H2(hash)); emplace_at(target.offset, v); infoz_.RecordInsert(hash, target.probe_length); @@ -1297,7 +1355,7 @@ class raw_hash_set { void prefetch(const key_arg<K>& key) const { (void)key; #if defined(__GNUC__) - auto seq = probe(hash_ref()(key)); + auto seq = probe(ctrl_, hash_ref()(key), capacity_); __builtin_prefetch(static_cast<const void*>(ctrl_ + seq.offset())); __builtin_prefetch(static_cast<const void*>(slots_ + seq.offset())); #endif // __GNUC__ @@ -1312,7 +1370,7 @@ class raw_hash_set { // called heterogeneous key support. template <class K = key_type> iterator find(const key_arg<K>& key, size_t hash) { - auto seq = probe(hash); + auto seq = probe(ctrl_, hash, capacity_); while (true) { Group g{ctrl_ + seq.offset()}; for (int i : g.Match(H2(hash))) { @@ -1534,7 +1592,7 @@ class raw_hash_set { if (IsFull(old_ctrl[i])) { size_t hash = PolicyTraits::apply(HashElement{hash_ref()}, PolicyTraits::element(old_slots + i)); - auto target = find_first_non_full(hash); + auto target = find_first_non_full(ctrl_, hash, capacity_); size_t new_i = target.offset; total_probe_length += target.probe_length; set_ctrl(new_i, H2(hash)); @@ -1553,7 +1611,7 @@ class raw_hash_set { void drop_deletes_without_resize() ABSL_ATTRIBUTE_NOINLINE { assert(IsValidCapacity(capacity_)); - assert(!is_small()); + assert(!is_small(capacity_)); // Algorithm: // - mark all DELETED slots as EMPTY // - mark all FULL slots as DELETED @@ -1578,7 +1636,7 @@ class raw_hash_set { if (!IsDeleted(ctrl_[i])) continue; size_t hash = PolicyTraits::apply(HashElement{hash_ref()}, PolicyTraits::element(slots_ + i)); - auto target = find_first_non_full(hash); + auto target = find_first_non_full(ctrl_, hash, capacity_); size_t new_i = target.offset; total_probe_length += target.probe_length; @@ -1586,7 +1644,8 @@ class raw_hash_set { // If they do, we don't need to move the object as it falls already in the // best probe we can. const auto probe_index = [&](size_t pos) { - return ((pos - probe(hash).offset()) & capacity_) / Group::kWidth; + return ((pos - probe(ctrl_, hash, capacity_).offset()) & capacity_) / + Group::kWidth; }; // Element doesn't move. @@ -1630,7 +1689,7 @@ class raw_hash_set { bool has_element(const value_type& elem) const { size_t hash = PolicyTraits::apply(HashElement{hash_ref()}, elem); - auto seq = probe(hash); + auto seq = probe(ctrl_, hash, capacity_); while (true) { Group g{ctrl_ + seq.offset()}; for (int i : g.Match(H2(hash))) { @@ -1645,41 +1704,6 @@ class raw_hash_set { return false; } - // Probes the raw_hash_set with the probe sequence for hash and returns the - // pointer to the first empty or deleted slot. - // NOTE: this function must work with tables having both kEmpty and kDelete - // in one group. Such tables appears during drop_deletes_without_resize. - // - // This function is very useful when insertions happen and: - // - the input is already a set - // - there are enough slots - // - the element with the hash is not in the table - struct FindInfo { - size_t offset; - size_t probe_length; - }; - FindInfo find_first_non_full(size_t hash) { - auto seq = probe(hash); - while (true) { - Group g{ctrl_ + seq.offset()}; - auto mask = g.MatchEmptyOrDeleted(); - if (mask) { -#if !defined(NDEBUG) - // We want to add entropy even when ASLR is not enabled. - // In debug build we will randomly insert in either the front or back of - // the group. - // TODO(kfm,sbenza): revisit after we do unconditional mixing - if (!is_small() && ShouldInsertBackwards(hash, ctrl_)) { - return {seq.offset(mask.HighestBitSet()), seq.index()}; - } -#endif - return {seq.offset(mask.LowestBitSet()), seq.index()}; - } - seq.next(); - assert(seq.index() < capacity_ && "full table!"); - } - } - // TODO(alkis): Optimize this assuming *this and that don't overlap. raw_hash_set& move_assign(raw_hash_set&& that, std::true_type) { raw_hash_set tmp(std::move(that)); @@ -1696,7 +1720,7 @@ class raw_hash_set { template <class K> std::pair<size_t, bool> find_or_prepare_insert(const K& key) { auto hash = hash_ref()(key); - auto seq = probe(hash); + auto seq = probe(ctrl_, hash, capacity_); while (true) { Group g{ctrl_ + seq.offset()}; for (int i : g.Match(H2(hash))) { @@ -1713,11 +1737,11 @@ class raw_hash_set { } size_t prepare_insert(size_t hash) ABSL_ATTRIBUTE_NOINLINE { - auto target = find_first_non_full(hash); + auto target = find_first_non_full(ctrl_, hash, capacity_); if (ABSL_PREDICT_FALSE(growth_left() == 0 && !IsDeleted(ctrl_[target.offset]))) { rehash_and_grow_if_necessary(); - target = find_first_non_full(hash); + target = find_first_non_full(ctrl_, hash, capacity_); } ++size_; growth_left() -= IsEmpty(ctrl_[target.offset]); @@ -1750,10 +1774,6 @@ class raw_hash_set { private: friend struct RawHashSetTestOnlyAccess; - probe_seq<Group::kWidth> probe(size_t hash) const { - return probe_seq<Group::kWidth>(H1(hash, ctrl_), capacity_); - } - // Reset all ctrl bytes back to kEmpty, except the sentinel. void reset_ctrl() { std::memset(ctrl_, kEmpty, capacity_ + Group::kWidth); @@ -1783,22 +1803,6 @@ class raw_hash_set { size_t& growth_left() { return settings_.template get<0>(); } - // The representation of the object has two modes: - // - small: For capacities < kWidth-1 - // - large: For the rest. - // - // Differences: - // - In small mode we are able to use the whole capacity. The extra control - // bytes give us at least one "empty" control byte to stop the iteration. - // This is important to make 1 a valid capacity. - // - // - In small mode only the first `capacity()` control bytes after the - // sentinel are valid. The rest contain dummy kEmpty values that do not - // represent a real slot. This is important to take into account on - // find_first_non_full(), where we never try ShouldInsertBackwards() for - // small tables. - bool is_small() const { return capacity_ < Group::kWidth - 1; } - hasher& hash_ref() { return settings_.template get<1>(); } const hasher& hash_ref() const { return settings_.template get<1>(); } key_equal& eq_ref() { return settings_.template get<2>(); } @@ -1842,7 +1846,7 @@ struct HashtableDebugAccess<Set, absl::void_t<typename Set::raw_hash_set>> { const typename Set::key_type& key) { size_t num_probes = 0; size_t hash = set.hash_ref()(key); - auto seq = set.probe(hash); + auto seq = probe(set.ctrl_, hash, set.capacity_); while (true) { container_internal::Group g{set.ctrl_ + seq.offset()}; for (int i : g.Match(container_internal::H2(hash))) { diff --git a/absl/flags/reflection.cc b/absl/flags/reflection.cc index c6bf8aab..c976d467 100644 --- a/absl/flags/reflection.cc +++ b/absl/flags/reflection.cc @@ -328,7 +328,7 @@ CommandLineFlag* FindCommandLineFlag(absl::string_view name) { absl::flat_hash_map<absl::string_view, absl::CommandLineFlag*> GetAllFlags() { absl::flat_hash_map<absl::string_view, absl::CommandLineFlag*> res; flags_internal::ForEachFlag([&](CommandLineFlag& flag) { - res.insert({flag.Name(), &flag}); + if (!flag.IsRetired()) res.insert({flag.Name(), &flag}); }); return res; } diff --git a/absl/flags/reflection_test.cc b/absl/flags/reflection_test.cc index 1a1dcb4a..8620d140 100644 --- a/absl/flags/reflection_test.cc +++ b/absl/flags/reflection_test.cc @@ -70,7 +70,7 @@ TEST_F(ReflectionTest, TestGetAllFlags) { auto all_flags = absl::GetAllFlags(); EXPECT_NE(all_flags.find("int_flag"), all_flags.end()); - EXPECT_NE(all_flags.find("bool_retired_flag"), all_flags.end()); + EXPECT_EQ(all_flags.find("bool_retired_flag"), all_flags.end()); EXPECT_NE(all_flags.find("help"), all_flags.end()); EXPECT_EQ(all_flags.find("some_undefined_flag"), all_flags.end()); diff --git a/absl/hash/internal/city.cc b/absl/hash/internal/city.cc index 58d4bcb1..5460134e 100644 --- a/absl/hash/internal/city.cc +++ b/absl/hash/internal/city.cc @@ -200,10 +200,6 @@ static uint64_t Rotate(uint64_t val, int shift) { static uint64_t ShiftMix(uint64_t val) { return val ^ (val >> 47); } -static uint64_t HashLen16(uint64_t u, uint64_t v) { - return Hash128to64(uint128(u, v)); -} - static uint64_t HashLen16(uint64_t u, uint64_t v, uint64_t mul) { // Murmur-inspired hashing. uint64_t a = (u ^ v) * mul; @@ -214,6 +210,11 @@ static uint64_t HashLen16(uint64_t u, uint64_t v, uint64_t mul) { return b; } +static uint64_t HashLen16(uint64_t u, uint64_t v) { + const uint64_t kMul = 0x9ddfea08eb382d69ULL; + return HashLen16(u, v, kMul); +} + static uint64_t HashLen0to16(const char *s, size_t len) { if (len >= 8) { uint64_t mul = k2 + len * 2; diff --git a/absl/hash/internal/city.h b/absl/hash/internal/city.h index 9c1e7a57..393da0b9 100644 --- a/absl/hash/internal/city.h +++ b/absl/hash/internal/city.h @@ -56,11 +56,6 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace hash_internal { -typedef std::pair<uint64_t, uint64_t> uint128; - -inline uint64_t Uint128Low64(const uint128 &x) { return x.first; } -inline uint64_t Uint128High64(const uint128 &x) { return x.second; } - // Hash function for a byte array. uint64_t CityHash64(const char *s, size_t len); @@ -76,19 +71,6 @@ uint64_t CityHash64WithSeeds(const char *s, size_t len, uint64_t seed0, // Hash function for a byte array. Most useful in 32-bit binaries. uint32_t CityHash32(const char *s, size_t len); -// Hash 128 input bits down to 64 bits of output. -// This is intended to be a reasonably good hash function. -inline uint64_t Hash128to64(const uint128 &x) { - // Murmur-inspired hashing. - const uint64_t kMul = 0x9ddfea08eb382d69ULL; - uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul; - a ^= (a >> 47); - uint64_t b = (Uint128High64(x) ^ a) * kMul; - b ^= (b >> 47); - b *= kMul; - return b; -} - } // namespace hash_internal ABSL_NAMESPACE_END } // namespace absl |