diff options
author | Abseil Team <absl-team@google.com> | 2022-01-19 11:04:50 -0800 |
---|---|---|
committer | dinord <dino.radakovich@gmail.com> | 2022-01-19 14:36:25 -0500 |
commit | fbbb5865a562c9a9167d71c1cf56b82025a8f065 (patch) | |
tree | 212c8694bb6cf1d76af2b4aabac02c4910f6beab | |
parent | 9bb42857112ad13b23de4333fbb75eb47db9de95 (diff) |
Export of internal Abseil changes
--
487c7a754a3b93bc0f9de14bdced48007a96ae55 by Greg Falcon <gfalcon@google.com>:
Add support for absl::Hash to hash unordered containers. These can now be hashed directly, as well as combined in AbslHashValue implementations.
This also adds a new method, `H::combine_unordered()`, to the public AbslHashValue hash state API. This allows users to implement hash specializations for their own unordered collection types.
A traits class, `H::is_hashable<T>`, is also added to the hash state API. H::is_hashable<T>::value reflects whether type T is considered hashable by the AbslHashValue framework. This allows users to properly SFINAE templated versions of AbslHashValue. (The AbslHashValue implementation added to raw_hash_set shows an example of its use.)
PiperOrigin-RevId: 422856706
GitOrigin-RevId: 487c7a754a3b93bc0f9de14bdced48007a96ae55
Change-Id: Id31fd4ccba282f8c9ae6fcee6ae0ad0f7879f456
-rw-r--r-- | absl/container/internal/raw_hash_set.h | 8 | ||||
-rw-r--r-- | absl/hash/BUILD.bazel | 6 | ||||
-rw-r--r-- | absl/hash/CMakeLists.txt | 5 | ||||
-rw-r--r-- | absl/hash/hash.h | 85 | ||||
-rw-r--r-- | absl/hash/hash_benchmark.cc | 77 | ||||
-rw-r--r-- | absl/hash/hash_test.cc | 94 | ||||
-rw-r--r-- | absl/hash/internal/hash.h | 180 | ||||
-rw-r--r-- | absl/hash/internal/spy_hash_state.h | 35 |
8 files changed, 453 insertions, 37 deletions
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 1157d25a..93a3fa85 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -1539,6 +1539,14 @@ class raw_hash_set { return !(a == b); } + template <typename H> + friend typename std::enable_if<H::template is_hashable<value_type>::value, + H>::type + AbslHashValue(H h, const raw_hash_set& s) { + return H::combine(H::combine_unordered(std::move(h), s.begin(), s.end()), + s.size()); + } + friend void swap(raw_hash_set& a, raw_hash_set& b) noexcept(noexcept(a.swap(b))) { a.swap(b); diff --git a/absl/hash/BUILD.bazel b/absl/hash/BUILD.bazel index f0640d34..bcc316f9 100644 --- a/absl/hash/BUILD.bazel +++ b/absl/hash/BUILD.bazel @@ -41,6 +41,7 @@ cc_library( "//absl/base:core_headers", "//absl/base:endian", "//absl/container:fixed_array", + "//absl/functional:function_ref", "//absl/meta:type_traits", "//absl/numeric:int128", "//absl/strings", @@ -74,7 +75,11 @@ cc_test( ":hash_testing", ":spy_hash_state", "//absl/base:core_headers", + "//absl/container:btree", + "//absl/container:flat_hash_map", "//absl/container:flat_hash_set", + "//absl/container:node_hash_map", + "//absl/container:node_hash_set", "//absl/meta:type_traits", "//absl/numeric:int128", "//absl/strings:cord_test_helpers", @@ -93,6 +98,7 @@ cc_binary( deps = [ ":hash", "//absl/base:core_headers", + "//absl/container:flat_hash_set", "//absl/random", "//absl/strings", "//absl/strings:cord", diff --git a/absl/hash/CMakeLists.txt b/absl/hash/CMakeLists.txt index 5916ae3c..34434fa0 100644 --- a/absl/hash/CMakeLists.txt +++ b/absl/hash/CMakeLists.txt @@ -30,6 +30,7 @@ absl_cc_library( absl::core_headers absl::endian absl::fixed_array + absl::function_ref absl::meta absl::int128 absl::strings @@ -68,7 +69,11 @@ absl_cc_test( absl::hash absl::hash_testing absl::core_headers + absl::btree + absl::flat_hash_map absl::flat_hash_set + absl::node_hash_map + absl::node_hash_set absl::spy_hash_state absl::meta absl::int128 diff --git a/absl/hash/hash.h b/absl/hash/hash.h index 8282ea53..f31fde40 100644 --- a/absl/hash/hash.h +++ b/absl/hash/hash.h @@ -26,9 +26,9 @@ // support Abseil hashing without requiring you to define a hashing // algorithm. // * `HashState`, a type-erased class which implements the manipulation of the -// hash state (H) itself, contains member functions `combine()` and -// `combine_contiguous()`, which you can use to contribute to an existing -// hash state when hashing your types. +// hash state (H) itself; contains member functions `combine()`, +// `combine_contiguous()`, and `combine_unordered()`; and which you can use +// to contribute to an existing hash state when hashing your types. // // Unlike `std::hash` or other hashing frameworks, the Abseil hashing framework // provides most of its utility by abstracting away the hash algorithm (and its @@ -74,7 +74,9 @@ #define ABSL_HASH_HASH_H_ #include <tuple> +#include <utility> +#include "absl/functional/function_ref.h" #include "absl/hash/internal/hash.h" namespace absl { @@ -107,14 +109,27 @@ ABSL_NAMESPACE_BEGIN // * std::string_view (as well as any instance of std::basic_string that // uses char and std::char_traits) // * All the standard sequence containers (provided the elements are hashable) -// * All the standard ordered associative containers (provided the elements are +// * All the standard associative containers (provided the elements are // hashable) // * absl types such as the following: // * absl::string_view -// * absl::InlinedVector -// * absl::FixedArray // * absl::uint128 // * absl::Time, absl::Duration, and absl::TimeZone +// * absl containers (provided the elements are hashable) such as the +// following: +// * absl::flat_hash_set, absl::node_hash_set, absl::btree_set +// * absl::flat_hash_map, absl::node_hash_map, absl::btree_map +// * absl::btree_multiset, absl::btree_multimap +// * absl::InlinedVector +// * absl::FixedArray +// +// When absl::Hash is used to hash an unordered container with a custom hash +// functor, the elements are hashed using default absl::Hash semantics, not +// the custom hash functor. This is consistent with the behavior of +// operator==() on unordered containers, which compares elements pairwise with +// operator==() rather than the custom equality functor. It is usually a +// mistake to use either operator==() or absl::Hash on unordered collections +// that use functors incompatible with operator==() equality. // // Note: the list above is not meant to be exhaustive. Additional type support // may be added, in which case the above list will be updated. @@ -153,7 +168,8 @@ ABSL_NAMESPACE_BEGIN // that are otherwise difficult to extend using `AbslHashValue()`. (See the // `HashState` class below.) // -// The "hash state" concept contains two member functions for mixing hash state: +// The "hash state" concept contains three member functions for mixing hash +// state: // // * `H::combine(state, values...)` // @@ -187,6 +203,15 @@ ABSL_NAMESPACE_BEGIN // (it may perform internal optimizations). If you need this guarantee, use a // loop instead. // +// * `H::combine_unordered(state, begin, end)` +// +// Combines a set of elements denoted by an iterator pair into a hash +// state, returning the updated state. Note that the existing hash +// state is move-only and must be passed by value. +// +// Unlike the other two methods, the hashing is order-independent. +// This can be used to hash unordered collections. +// // ----------------------------------------------------------------------------- // Adding Type Support to `absl::Hash` // ----------------------------------------------------------------------------- @@ -243,8 +268,9 @@ size_t HashOf(const Types&... values) { // classes, virtual functions, etc.). The type erasure adds overhead so it // should be avoided unless necessary. // -// Note: This wrapper will only erase calls to: +// Note: This wrapper will only erase calls to // combine_contiguous(H, const unsigned char*, size_t) +// RunCombineUnordered(H, CombinerF) // // All other calls will be handled internally and will not invoke overloads // provided by the wrapped class. @@ -318,6 +344,8 @@ class HashState : public hash_internal::HashStateBase<HashState> { private: HashState() = default; + friend class HashState::HashStateBase; + template <typename T> static void CombineContiguousImpl(void* p, const unsigned char* first, size_t size) { @@ -329,16 +357,57 @@ class HashState : public hash_internal::HashStateBase<HashState> { void Init(T* state) { state_ = state; combine_contiguous_ = &CombineContiguousImpl<T>; + run_combine_unordered_ = &RunCombineUnorderedImpl<T>; + } + + template <typename HS> + struct CombineUnorderedInvoker { + template <typename T, typename ConsumerT> + void operator()(T inner_state, ConsumerT inner_cb) { + f(HashState::Create(&inner_state), + [&](HashState& inner_erased) { inner_cb(inner_erased.Real<T>()); }); + } + + absl::FunctionRef<void(HS, absl::FunctionRef<void(HS&)>)> f; + }; + + template <typename T> + static HashState RunCombineUnorderedImpl( + HashState state, + absl::FunctionRef<void(HashState, absl::FunctionRef<void(HashState&)>)> + f) { + // Note that this implementation assumes that inner_state and outer_state + // are the same type. This isn't true in the SpyHash case, but SpyHash + // types are move-convertible to each other, so this still works. + T& real_state = state.Real<T>(); + real_state = T::RunCombineUnordered( + std::move(real_state), CombineUnorderedInvoker<HashState>{f}); + return state; + } + + template <typename CombinerT> + static HashState RunCombineUnordered(HashState state, CombinerT combiner) { + auto* run = state.run_combine_unordered_; + return run(std::move(state), std::ref(combiner)); } // Do not erase an already erased state. void Init(HashState* state) { state_ = state->state_; combine_contiguous_ = state->combine_contiguous_; + run_combine_unordered_ = state->run_combine_unordered_; + } + + template <typename T> + T& Real() { + return *static_cast<T*>(state_); } void* state_; void (*combine_contiguous_)(void*, const unsigned char*, size_t); + HashState (*run_combine_unordered_)( + HashState state, + absl::FunctionRef<void(HashState, absl::FunctionRef<void(HashState&)>)>); }; ABSL_NAMESPACE_END diff --git a/absl/hash/hash_benchmark.cc b/absl/hash/hash_benchmark.cc index d498ac29..8712a01c 100644 --- a/absl/hash/hash_benchmark.cc +++ b/absl/hash/hash_benchmark.cc @@ -19,6 +19,7 @@ #include <vector> #include "absl/base/attributes.h" +#include "absl/container/flat_hash_set.h" #include "absl/hash/hash.h" #include "absl/random/random.h" #include "absl/strings/cord.h" @@ -107,6 +108,44 @@ absl::Cord FragmentedCord(size_t size) { return result; } +template <typename T> +std::vector<T> Vector(size_t count) { + std::vector<T> result; + for (size_t v = 0; v < count; ++v) { + result.push_back(v); + } + return result; +} + +// Bogus type that replicates an unorderd_set's bit mixing, but with +// vector-speed iteration. This is intended to measure the overhead of unordered +// hashing without counting the speed of unordered_set iteration. +template <typename T> +struct FastUnorderedSet { + explicit FastUnorderedSet(size_t count) { + for (size_t v = 0; v < count; ++v) { + values.push_back(v); + } + } + std::vector<T> values; + + template <typename H> + friend H AbslHashValue(H h, const FastUnorderedSet& fus) { + return H::combine(H::combine_unordered(std::move(h), fus.values.begin(), + fus.values.end()), + fus.values.size()); + } +}; + +template <typename T> +absl::flat_hash_set<T> FlatHashSet(size_t count) { + absl::flat_hash_set<T> result; + for (size_t v = 0; v < count; ++v) { + result.insert(v); + } + return result; +} + // Generates a benchmark and a codegen method for the provided types. The // codegen method provides a well known entrypoint for dumping assembly. #define MAKE_BENCHMARK(hash, name, ...) \ @@ -145,10 +184,22 @@ MAKE_BENCHMARK(AbslHash, Cord_Flat_200, FlatCord(200)); MAKE_BENCHMARK(AbslHash, Cord_Flat_5000, FlatCord(5000)); MAKE_BENCHMARK(AbslHash, Cord_Fragmented_200, FragmentedCord(200)); MAKE_BENCHMARK(AbslHash, Cord_Fragmented_5000, FragmentedCord(5000)); -MAKE_BENCHMARK(AbslHash, VectorInt64_10, std::vector<int64_t>(10)); -MAKE_BENCHMARK(AbslHash, VectorInt64_100, std::vector<int64_t>(100)); -MAKE_BENCHMARK(AbslHash, VectorDouble_10, std::vector<double>(10, 1.1)); -MAKE_BENCHMARK(AbslHash, VectorDouble_100, std::vector<double>(100, 1.1)); +MAKE_BENCHMARK(AbslHash, VectorInt64_10, Vector<int64_t>(10)); +MAKE_BENCHMARK(AbslHash, VectorInt64_100, Vector<int64_t>(100)); +MAKE_BENCHMARK(AbslHash, VectorInt64_1000, Vector<int64_t>(1000)); +MAKE_BENCHMARK(AbslHash, VectorDouble_10, Vector<double>(10)); +MAKE_BENCHMARK(AbslHash, VectorDouble_100, Vector<double>(100)); +MAKE_BENCHMARK(AbslHash, VectorDouble_1000, Vector<double>(1000)); +MAKE_BENCHMARK(AbslHash, FlatHashSetInt64_10, FlatHashSet<int64_t>(10)); +MAKE_BENCHMARK(AbslHash, FlatHashSetInt64_100, FlatHashSet<int64_t>(100)); +MAKE_BENCHMARK(AbslHash, FlatHashSetInt64_1000, FlatHashSet<int64_t>(1000)); +MAKE_BENCHMARK(AbslHash, FlatHashSetDouble_10, FlatHashSet<double>(10)); +MAKE_BENCHMARK(AbslHash, FlatHashSetDouble_100, FlatHashSet<double>(100)); +MAKE_BENCHMARK(AbslHash, FlatHashSetDouble_1000, FlatHashSet<double>(1000)); +MAKE_BENCHMARK(AbslHash, FastUnorderedSetInt64_1000, + FastUnorderedSet<int64_t>(1000)); +MAKE_BENCHMARK(AbslHash, FastUnorderedSetDouble_1000, + FastUnorderedSet<double>(1000)); MAKE_BENCHMARK(AbslHash, PairStringString_0, std::make_pair(std::string(), std::string())); MAKE_BENCHMARK(AbslHash, PairStringString_10, @@ -180,6 +231,24 @@ MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_10, std::vector<double>(10, 1.1)); MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_100, std::vector<double>(100, 1.1)); +MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_1000, + std::vector<double>(1000, 1.1)); +MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetInt64_10, + FlatHashSet<int64_t>(10)); +MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetInt64_100, + FlatHashSet<int64_t>(100)); +MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetInt64_1000, + FlatHashSet<int64_t>(1000)); +MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetDouble_10, + FlatHashSet<double>(10)); +MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetDouble_100, + FlatHashSet<double>(100)); +MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetDouble_1000, + FlatHashSet<double>(1000)); +MAKE_BENCHMARK(TypeErasedAbslHash, FastUnorderedSetInt64_1000, + FastUnorderedSet<int64_t>(1000)); +MAKE_BENCHMARK(TypeErasedAbslHash, FastUnorderedSetDouble_1000, + FastUnorderedSet<double>(1000)); // The latency benchmark attempts to model the speed of the hash function in // production. When a hash function is used for hashtable lookups it is rarely diff --git a/absl/hash/hash_test.cc b/absl/hash/hash_test.cc index dbfacb2d..cea54dc0 100644 --- a/absl/hash/hash_test.cc +++ b/absl/hash/hash_test.cc @@ -34,12 +34,18 @@ #include <tuple> #include <type_traits> #include <unordered_map> +#include <unordered_set> #include <utility> #include <vector> #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "absl/container/btree_map.h" +#include "absl/container/btree_set.h" +#include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" +#include "absl/container/node_hash_map.h" +#include "absl/container/node_hash_set.h" #include "absl/hash/hash_testing.h" #include "absl/hash/internal/spy_hash_state.h" #include "absl/meta/type_traits.h" @@ -433,6 +439,52 @@ TEST(HashValueTest, StdBitset) { std::bitset<kNumBits>(bit_strings[5].c_str())})); } // namespace +// Dummy type with unordered equality and hashing semantics. This preserves +// input order internally, and is used below to ensure we get test coverage +// for equal sequences with different iteraton orders. +template <typename T> +class UnorderedSequence { + public: + UnorderedSequence() = default; + template <typename TT> + UnorderedSequence(std::initializer_list<TT> l) + : values_(l.begin(), l.end()) {} + template <typename ForwardIterator, + typename std::enable_if<!std::is_integral<ForwardIterator>::value, + bool>::type = true> + UnorderedSequence(ForwardIterator begin, ForwardIterator end) + : values_(begin, end) {} + // one-argument constructor of value type T, to appease older toolchains that + // get confused by one-element initializer lists in some contexts + explicit UnorderedSequence(const T& v) : values_(&v, &v + 1) {} + + using value_type = T; + + size_t size() const { return values_.size(); } + typename std::vector<T>::const_iterator begin() const { + return values_.begin(); + } + typename std::vector<T>::const_iterator end() const { return values_.end(); } + + friend bool operator==(const UnorderedSequence& lhs, + const UnorderedSequence& rhs) { + return lhs.size() == rhs.size() && + std::is_permutation(lhs.begin(), lhs.end(), rhs.begin()); + } + friend bool operator!=(const UnorderedSequence& lhs, + const UnorderedSequence& rhs) { + return !(lhs == rhs); + } + template <typename H> + friend H AbslHashValue(H h, const UnorderedSequence& u) { + return H::combine(H::combine_unordered(std::move(h), u.begin(), u.end()), + u.size()); + } + + private: + std::vector<T> values_; +}; + template <typename T> class HashValueSequenceTest : public testing::Test { }; @@ -456,10 +508,13 @@ TYPED_TEST_P(HashValueSequenceTest, BasicUsage) { } REGISTER_TYPED_TEST_CASE_P(HashValueSequenceTest, BasicUsage); -using IntSequenceTypes = - testing::Types<std::deque<int>, std::forward_list<int>, std::list<int>, - std::vector<int>, std::vector<bool>, TypeErasedVector<int>, - TypeErasedVector<bool>, std::set<int>, std::multiset<int>>; +using IntSequenceTypes = testing::Types< + std::deque<int>, std::forward_list<int>, std::list<int>, std::vector<int>, + std::vector<bool>, TypeErasedContainer<std::vector<int>>, std::set<int>, + std::multiset<int>, UnorderedSequence<int>, + TypeErasedContainer<UnorderedSequence<int>>, std::unordered_set<int>, + std::unordered_multiset<int>, absl::flat_hash_set<int>, + absl::node_hash_set<int>, absl::btree_set<int>>; INSTANTIATE_TYPED_TEST_CASE_P(My, HashValueSequenceTest, IntSequenceTypes); template <typename T> @@ -487,11 +542,15 @@ TYPED_TEST_P(HashValueNestedSequenceTest, BasicUsage) { } REGISTER_TYPED_TEST_CASE_P(HashValueNestedSequenceTest, BasicUsage); -using NestedIntSequenceTypes = - testing::Types<std::vector<std::vector<int>>, - std::vector<TypeErasedVector<int>>, - TypeErasedVector<std::vector<int>>, - TypeErasedVector<TypeErasedVector<int>>>; +template <typename T> +using TypeErasedSet = TypeErasedContainer<UnorderedSequence<T>>; + +using NestedIntSequenceTypes = testing::Types< + std::vector<std::vector<int>>, std::vector<UnorderedSequence<int>>, + std::vector<TypeErasedSet<int>>, UnorderedSequence<std::vector<int>>, + UnorderedSequence<UnorderedSequence<int>>, + UnorderedSequence<TypeErasedSet<int>>, TypeErasedSet<std::vector<int>>, + TypeErasedSet<UnorderedSequence<int>>, TypeErasedSet<TypeErasedSet<int>>>; INSTANTIATE_TYPED_TEST_CASE_P(My, HashValueNestedSequenceTest, NestedIntSequenceTypes); @@ -674,7 +733,11 @@ TYPED_TEST_P(HashValueAssociativeMapTest, BasicUsage) { } REGISTER_TYPED_TEST_CASE_P(HashValueAssociativeMapTest, BasicUsage); -using AssociativeMapTypes = testing::Types<std::map<int, std::string>>; +using AssociativeMapTypes = testing::Types< + std::map<int, std::string>, std::unordered_map<int, std::string>, + absl::flat_hash_map<int, std::string>, + absl::node_hash_map<int, std::string>, absl::btree_map<int, std::string>, + UnorderedSequence<std::pair<const int, std::string>>>; INSTANTIATE_TYPED_TEST_CASE_P(My, HashValueAssociativeMapTest, AssociativeMapTypes); @@ -702,7 +765,8 @@ TYPED_TEST_P(HashValueAssociativeMultimapTest, BasicUsage) { REGISTER_TYPED_TEST_CASE_P(HashValueAssociativeMultimapTest, BasicUsage); using AssociativeMultimapTypes = - testing::Types<std::multimap<int, std::string>>; + testing::Types<std::multimap<int, std::string>, + std::unordered_multimap<int, std::string>>; INSTANTIATE_TYPED_TEST_CASE_P(My, HashValueAssociativeMultimapTest, AssociativeMultimapTypes); @@ -1062,6 +1126,14 @@ TEST(HashTest, TypeErased) { EXPECT_EQ(SpyHash(std::make_pair(TypeErasedValue<size_t>(7), 17)), SpyHash(std::make_pair(size_t{7}, 17))); + + absl::flat_hash_set<absl::flat_hash_set<int>> ss = {{1, 2}, {3, 4}}; + TypeErasedContainer<absl::flat_hash_set<absl::flat_hash_set<int>>> es = { + absl::flat_hash_set<int>{1, 2}, {3, 4}}; + absl::flat_hash_set<TypeErasedContainer<absl::flat_hash_set<int>>> se = { + {1, 2}, {3, 4}}; + EXPECT_EQ(SpyHash(ss), SpyHash(es)); + EXPECT_EQ(SpyHash(ss), SpyHash(se)); } struct ValueWithBoolConversion { diff --git a/absl/hash/internal/hash.h b/absl/hash/internal/hash.h index 97b68bad..a424e014 100644 --- a/absl/hash/internal/hash.h +++ b/absl/hash/internal/hash.h @@ -23,6 +23,7 @@ #include <array> #include <bitset> #include <cmath> +#include <cstddef> #include <cstring> #include <deque> #include <forward_list> @@ -36,6 +37,8 @@ #include <string> #include <tuple> #include <type_traits> +#include <unordered_map> +#include <unordered_set> #include <utility> #include <vector> @@ -54,6 +57,9 @@ namespace absl { ABSL_NAMESPACE_BEGIN + +class HashState; + namespace hash_internal { // Internal detail: Large buffers are hashed in smaller chunks. This function @@ -115,24 +121,66 @@ class PiecewiseCombiner { size_t position_; }; +// is_hashable() +// +// Trait class which returns true if T is hashable by the absl::Hash framework. +// Used for the AbslHashValue implementations for composite types below. +template <typename T> +struct is_hashable; + // HashStateBase // -// A hash state object represents an intermediate state in the computation -// of an unspecified hash algorithm. `HashStateBase` provides a CRTP style -// base class for hash state implementations. Developers adding type support -// for `absl::Hash` should not rely on any parts of the state object other than -// the following member functions: +// An internal implementation detail that contains common implementation details +// for all of the "hash state objects" objects generated by Abseil. This is not +// a public API; users should not create classes that inherit from this. +// +// A hash state object is the template argument `H` passed to `AbslHashValue`. +// It represents an intermediate state in the computation of an unspecified hash +// algorithm. `HashStateBase` provides a CRTP style base class for hash state +// implementations. Developers adding type support for `absl::Hash` should not +// rely on any parts of the state object other than the following member +// functions: // // * HashStateBase::combine() // * HashStateBase::combine_contiguous() +// * HashStateBase::combine_unordered() // -// A derived hash state class of type `H` must provide a static member function +// A derived hash state class of type `H` must provide a public member function // with a signature similar to the following: // // `static H combine_contiguous(H state, const unsigned char*, size_t)`. // +// It must also provide a private template method named RunCombineUnordered. +// +// A "consumer" is a 1-arg functor returning void. Its argument is a reference +// to an inner hash state object, and it may be called multiple times. When +// called, the functor consumes the entropy from the provided state object, +// and resets that object to its empty state. +// +// A "combiner" is a stateless 2-arg functor returning void. Its arguments are +// an inner hash state object and an ElementStateConsumer functor. A combiner +// uses the provided inner hash state object to hash each element of the +// container, passing the inner hash state object to the consumer after hashing +// each element. +// +// Given these definitions, a derived hash state class of type H +// must provide a private template method with a signature similar to the +// following: +// +// `template <typename CombinerT>` +// `static H RunCombineUnordered(H outer_state, CombinerT combiner)` +// +// This function is responsible for constructing the inner state object and +// providing a consumer to the combiner. It uses side effects of the consumer +// and combiner to mix the state of each element in an order-independent manner, +// and uses this to return an updated value of `outer_state`. +// +// This inside-out approach generates efficient object code in the normal case, +// but allows us to use stack storage to implement the absl::HashState type +// erasure mechanism (avoiding heap allocations while hashing). +// // `HashStateBase` will provide a complete implementation for a hash state -// object in terms of this method. +// object in terms of these two methods. // // Example: // @@ -141,6 +189,10 @@ class PiecewiseCombiner { // static H combine_contiguous(H state, const unsigned char*, size_t); // using MyHashState::HashStateBase::combine; // using MyHashState::HashStateBase::combine_contiguous; +// using MyHashState::HashStateBase::combine_unordered; +// private: +// template <typename CombinerT> +// static H RunCombineUnordered(H state, CombinerT combiner); // }; template <typename H> class HashStateBase { @@ -181,7 +233,30 @@ class HashStateBase { template <typename T> static H combine_contiguous(H state, const T* data, size_t size); + template <typename I> + static H combine_unordered(H state, I begin, I end); + using AbslInternalPiecewiseCombiner = PiecewiseCombiner; + + template <typename T> + using is_hashable = absl::hash_internal::is_hashable<T>; + + private: + // Common implementation of the iteration step of a "combiner", as described + // above. + template <typename I> + struct CombineUnorderedCallback { + I begin; + I end; + + template <typename InnerH, typename ElementStateConsumer> + void operator()(InnerH inner_state, ElementStateConsumer cb) { + for (; begin != end; ++begin) { + inner_state = H::combine(std::move(inner_state), *begin); + cb(inner_state); + } + } + }; }; // is_uniquely_represented @@ -350,13 +425,6 @@ H AbslHashValue(H hash_state, std::nullptr_t) { // AbslHashValue for Composite Types // ----------------------------------------------------------------------------- -// is_hashable() -// -// Trait class which returns true if T is hashable by the absl::Hash framework. -// Used for the AbslHashValue implementations for composite types below. -template <typename T> -struct is_hashable; - // AbslHashValue() for hashing pairs template <typename H, typename T1, typename T2> typename std::enable_if<is_hashable<T1>::value && is_hashable<T2>::value, @@ -503,8 +571,10 @@ AbslHashValue(H hash_state, const std::vector<T, Allocator>& vector) { } // AbslHashValue special cases for hashing std::vector<bool> + #if defined(ABSL_IS_BIG_ENDIAN) && \ (defined(__GLIBCXX__) || defined(__GLIBCPP__)) + // std::hash in libstdc++ does not work correctly with vector<bool> on Big // Endian platforms therefore we need to implement a custom AbslHashValue for // it. More details on the bug: @@ -588,6 +658,55 @@ typename std::enable_if<is_hashable<Key>::value, H>::type AbslHashValue( } // ----------------------------------------------------------------------------- +// AbslHashValue for Unordered Associative Containers +// ----------------------------------------------------------------------------- + +// AbslHashValue for hashing std::unordered_set +template <typename H, typename Key, typename Hash, typename KeyEqual, + typename Alloc> +typename std::enable_if<is_hashable<Key>::value, H>::type AbslHashValue( + H hash_state, const std::unordered_set<Key, Hash, KeyEqual, Alloc>& s) { + return H::combine( + H::combine_unordered(std::move(hash_state), s.begin(), s.end()), + s.size()); +} + +// AbslHashValue for hashing std::unordered_multiset +template <typename H, typename Key, typename Hash, typename KeyEqual, + typename Alloc> +typename std::enable_if<is_hashable<Key>::value, H>::type AbslHashValue( + H hash_state, + const std::unordered_multiset<Key, Hash, KeyEqual, Alloc>& s) { + return H::combine( + H::combine_unordered(std::move(hash_state), s.begin(), s.end()), + s.size()); +} + +// AbslHashValue for hashing std::unordered_set +template <typename H, typename Key, typename T, typename Hash, + typename KeyEqual, typename Alloc> +typename std::enable_if<is_hashable<Key>::value && is_hashable<T>::value, + H>::type +AbslHashValue(H hash_state, + const std::unordered_map<Key, T, Hash, KeyEqual, Alloc>& s) { + return H::combine( + H::combine_unordered(std::move(hash_state), s.begin(), s.end()), + s.size()); +} + +// AbslHashValue for hashing std::unordered_multiset +template <typename H, typename Key, typename T, typename Hash, + typename KeyEqual, typename Alloc> +typename std::enable_if<is_hashable<Key>::value && is_hashable<T>::value, + H>::type +AbslHashValue(H hash_state, + const std::unordered_multimap<Key, T, Hash, KeyEqual, Alloc>& s) { + return H::combine( + H::combine_unordered(std::move(hash_state), s.begin(), s.end()), + s.size()); +} + +// ----------------------------------------------------------------------------- // AbslHashValue for Wrapper Types // ----------------------------------------------------------------------------- @@ -830,6 +949,31 @@ class ABSL_DLL MixingHashState : public HashStateBase<MixingHashState> { // move-only ensures that there is only one non-moved-from object. MixingHashState() : state_(Seed()) {} + friend class MixingHashState::HashStateBase; + + template <typename CombinerT> + static MixingHashState RunCombineUnordered(MixingHashState state, + CombinerT combiner) { + uint64_t unordered_state = 0; + combiner(MixingHashState{}, [&](MixingHashState& inner_state) { + // Add the hash state of the element to the running total, but mix the + // carry bit back into the low bit. This in intended to avoid losing + // entropy to overflow, especially when unordered_multisets contain + // multiple copies of the same value. + auto element_state = inner_state.state_; + unordered_state += element_state; + if (unordered_state < element_state) { + ++unordered_state; + } + inner_state = MixingHashState{}; + }); + return MixingHashState::combine(std::move(state), unordered_state); + } + + // Allow the HashState type-erasure implementation to invoke + // RunCombinedUnordered() directly. + friend class absl::HashState; + // Workaround for MSVC bug. // We make the type copyable to fix the calling convention, even though we // never actually copy it. Keep it private to not affect the public API of the @@ -1064,6 +1208,14 @@ H HashStateBase<H>::combine_contiguous(H state, const T* data, size_t size) { return hash_internal::hash_range_or_bytes(std::move(state), data, size); } +// HashStateBase::combine_unordered() +template <typename H> +template <typename I> +H HashStateBase<H>::combine_unordered(H state, I begin, I end) { + return H::RunCombineUnordered(std::move(state), + CombineUnorderedCallback<I>{begin, end}); +} + // HashStateBase::PiecewiseCombiner::add_buffer() template <typename H> H PiecewiseCombiner::add_buffer(H state, const unsigned char* data, diff --git a/absl/hash/internal/spy_hash_state.h b/absl/hash/internal/spy_hash_state.h index c0831208..09728266 100644 --- a/absl/hash/internal/spy_hash_state.h +++ b/absl/hash/internal/spy_hash_state.h @@ -15,6 +15,7 @@ #ifndef ABSL_HASH_INTERNAL_SPY_HASH_STATE_H_ #define ABSL_HASH_INTERNAL_SPY_HASH_STATE_H_ +#include <algorithm> #include <ostream> #include <string> #include <vector> @@ -167,6 +168,24 @@ class SpyHashStateImpl : public HashStateBase<SpyHashStateImpl<T>> { using SpyHashStateImpl::HashStateBase::combine_contiguous; + template <typename CombinerT> + static SpyHashStateImpl RunCombineUnordered(SpyHashStateImpl state, + CombinerT combiner) { + UnorderedCombinerCallback cb; + + combiner(SpyHashStateImpl<void>{}, std::ref(cb)); + + std::sort(cb.element_hash_representations.begin(), + cb.element_hash_representations.end()); + state.hash_representation_.insert(state.hash_representation_.end(), + cb.element_hash_representations.begin(), + cb.element_hash_representations.end()); + if (cb.error && cb.error->has_value()) { + state.error_ = std::move(cb.error); + } + return state; + } + absl::optional<std::string> error() const { if (moved_from_) { return "Returned a moved-from instance of the hash state object."; @@ -178,6 +197,22 @@ class SpyHashStateImpl : public HashStateBase<SpyHashStateImpl<T>> { template <typename U> friend class SpyHashStateImpl; + struct UnorderedCombinerCallback { + std::vector<std::string> element_hash_representations; + std::shared_ptr<absl::optional<std::string>> error; + + // The inner spy can have a different type. + template <typename U> + void operator()(SpyHashStateImpl<U>& inner) { + element_hash_representations.push_back( + absl::StrJoin(inner.hash_representation_, "")); + if (inner.error_->has_value()) { + error = std::move(inner.error_); + } + inner = SpyHashStateImpl<void>{}; + } + }; + // This is true if SpyHashStateImpl<T> has been passed to a call of // AbslHashValue with the wrong type. This detects that the user called // AbslHashValue directly (because the hash state type does not match). |