From fbbb5865a562c9a9167d71c1cf56b82025a8f065 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Wed, 19 Jan 2022 11:04:50 -0800 Subject: Export of internal Abseil changes -- 487c7a754a3b93bc0f9de14bdced48007a96ae55 by Greg Falcon : Add support for absl::Hash to hash unordered containers. These can now be hashed directly, as well as combined in AbslHashValue implementations. This also adds a new method, `H::combine_unordered()`, to the public AbslHashValue hash state API. This allows users to implement hash specializations for their own unordered collection types. A traits class, `H::is_hashable`, is also added to the hash state API. H::is_hashable::value reflects whether type T is considered hashable by the AbslHashValue framework. This allows users to properly SFINAE templated versions of AbslHashValue. (The AbslHashValue implementation added to raw_hash_set shows an example of its use.) PiperOrigin-RevId: 422856706 GitOrigin-RevId: 487c7a754a3b93bc0f9de14bdced48007a96ae55 Change-Id: Id31fd4ccba282f8c9ae6fcee6ae0ad0f7879f456 --- absl/container/internal/raw_hash_set.h | 8 ++ absl/hash/BUILD.bazel | 6 ++ absl/hash/CMakeLists.txt | 5 + absl/hash/hash.h | 85 ++++++++++++++-- absl/hash/hash_benchmark.cc | 77 +++++++++++++- absl/hash/hash_test.cc | 94 +++++++++++++++-- absl/hash/internal/hash.h | 180 ++++++++++++++++++++++++++++++--- absl/hash/internal/spy_hash_state.h | 35 +++++++ 8 files changed, 453 insertions(+), 37 deletions(-) diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 1157d25a..93a3fa85 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -1539,6 +1539,14 @@ class raw_hash_set { return !(a == b); } + template + friend typename std::enable_if::value, + H>::type + AbslHashValue(H h, const raw_hash_set& s) { + return H::combine(H::combine_unordered(std::move(h), s.begin(), s.end()), + s.size()); + } + friend void swap(raw_hash_set& a, raw_hash_set& b) noexcept(noexcept(a.swap(b))) { a.swap(b); diff --git a/absl/hash/BUILD.bazel b/absl/hash/BUILD.bazel index f0640d34..bcc316f9 100644 --- a/absl/hash/BUILD.bazel +++ b/absl/hash/BUILD.bazel @@ -41,6 +41,7 @@ cc_library( "//absl/base:core_headers", "//absl/base:endian", "//absl/container:fixed_array", + "//absl/functional:function_ref", "//absl/meta:type_traits", "//absl/numeric:int128", "//absl/strings", @@ -74,7 +75,11 @@ cc_test( ":hash_testing", ":spy_hash_state", "//absl/base:core_headers", + "//absl/container:btree", + "//absl/container:flat_hash_map", "//absl/container:flat_hash_set", + "//absl/container:node_hash_map", + "//absl/container:node_hash_set", "//absl/meta:type_traits", "//absl/numeric:int128", "//absl/strings:cord_test_helpers", @@ -93,6 +98,7 @@ cc_binary( deps = [ ":hash", "//absl/base:core_headers", + "//absl/container:flat_hash_set", "//absl/random", "//absl/strings", "//absl/strings:cord", diff --git a/absl/hash/CMakeLists.txt b/absl/hash/CMakeLists.txt index 5916ae3c..34434fa0 100644 --- a/absl/hash/CMakeLists.txt +++ b/absl/hash/CMakeLists.txt @@ -30,6 +30,7 @@ absl_cc_library( absl::core_headers absl::endian absl::fixed_array + absl::function_ref absl::meta absl::int128 absl::strings @@ -68,7 +69,11 @@ absl_cc_test( absl::hash absl::hash_testing absl::core_headers + absl::btree + absl::flat_hash_map absl::flat_hash_set + absl::node_hash_map + absl::node_hash_set absl::spy_hash_state absl::meta absl::int128 diff --git a/absl/hash/hash.h b/absl/hash/hash.h index 8282ea53..f31fde40 100644 --- a/absl/hash/hash.h +++ b/absl/hash/hash.h @@ -26,9 +26,9 @@ // support Abseil hashing without requiring you to define a hashing // algorithm. // * `HashState`, a type-erased class which implements the manipulation of the -// hash state (H) itself, contains member functions `combine()` and -// `combine_contiguous()`, which you can use to contribute to an existing -// hash state when hashing your types. +// hash state (H) itself; contains member functions `combine()`, +// `combine_contiguous()`, and `combine_unordered()`; and which you can use +// to contribute to an existing hash state when hashing your types. // // Unlike `std::hash` or other hashing frameworks, the Abseil hashing framework // provides most of its utility by abstracting away the hash algorithm (and its @@ -74,7 +74,9 @@ #define ABSL_HASH_HASH_H_ #include +#include +#include "absl/functional/function_ref.h" #include "absl/hash/internal/hash.h" namespace absl { @@ -107,14 +109,27 @@ ABSL_NAMESPACE_BEGIN // * std::string_view (as well as any instance of std::basic_string that // uses char and std::char_traits) // * All the standard sequence containers (provided the elements are hashable) -// * All the standard ordered associative containers (provided the elements are +// * All the standard associative containers (provided the elements are // hashable) // * absl types such as the following: // * absl::string_view -// * absl::InlinedVector -// * absl::FixedArray // * absl::uint128 // * absl::Time, absl::Duration, and absl::TimeZone +// * absl containers (provided the elements are hashable) such as the +// following: +// * absl::flat_hash_set, absl::node_hash_set, absl::btree_set +// * absl::flat_hash_map, absl::node_hash_map, absl::btree_map +// * absl::btree_multiset, absl::btree_multimap +// * absl::InlinedVector +// * absl::FixedArray +// +// When absl::Hash is used to hash an unordered container with a custom hash +// functor, the elements are hashed using default absl::Hash semantics, not +// the custom hash functor. This is consistent with the behavior of +// operator==() on unordered containers, which compares elements pairwise with +// operator==() rather than the custom equality functor. It is usually a +// mistake to use either operator==() or absl::Hash on unordered collections +// that use functors incompatible with operator==() equality. // // Note: the list above is not meant to be exhaustive. Additional type support // may be added, in which case the above list will be updated. @@ -153,7 +168,8 @@ ABSL_NAMESPACE_BEGIN // that are otherwise difficult to extend using `AbslHashValue()`. (See the // `HashState` class below.) // -// The "hash state" concept contains two member functions for mixing hash state: +// The "hash state" concept contains three member functions for mixing hash +// state: // // * `H::combine(state, values...)` // @@ -187,6 +203,15 @@ ABSL_NAMESPACE_BEGIN // (it may perform internal optimizations). If you need this guarantee, use a // loop instead. // +// * `H::combine_unordered(state, begin, end)` +// +// Combines a set of elements denoted by an iterator pair into a hash +// state, returning the updated state. Note that the existing hash +// state is move-only and must be passed by value. +// +// Unlike the other two methods, the hashing is order-independent. +// This can be used to hash unordered collections. +// // ----------------------------------------------------------------------------- // Adding Type Support to `absl::Hash` // ----------------------------------------------------------------------------- @@ -243,8 +268,9 @@ size_t HashOf(const Types&... values) { // classes, virtual functions, etc.). The type erasure adds overhead so it // should be avoided unless necessary. // -// Note: This wrapper will only erase calls to: +// Note: This wrapper will only erase calls to // combine_contiguous(H, const unsigned char*, size_t) +// RunCombineUnordered(H, CombinerF) // // All other calls will be handled internally and will not invoke overloads // provided by the wrapped class. @@ -318,6 +344,8 @@ class HashState : public hash_internal::HashStateBase { private: HashState() = default; + friend class HashState::HashStateBase; + template static void CombineContiguousImpl(void* p, const unsigned char* first, size_t size) { @@ -329,16 +357,57 @@ class HashState : public hash_internal::HashStateBase { void Init(T* state) { state_ = state; combine_contiguous_ = &CombineContiguousImpl; + run_combine_unordered_ = &RunCombineUnorderedImpl; + } + + template + struct CombineUnorderedInvoker { + template + void operator()(T inner_state, ConsumerT inner_cb) { + f(HashState::Create(&inner_state), + [&](HashState& inner_erased) { inner_cb(inner_erased.Real()); }); + } + + absl::FunctionRef)> f; + }; + + template + static HashState RunCombineUnorderedImpl( + HashState state, + absl::FunctionRef)> + f) { + // Note that this implementation assumes that inner_state and outer_state + // are the same type. This isn't true in the SpyHash case, but SpyHash + // types are move-convertible to each other, so this still works. + T& real_state = state.Real(); + real_state = T::RunCombineUnordered( + std::move(real_state), CombineUnorderedInvoker{f}); + return state; + } + + template + static HashState RunCombineUnordered(HashState state, CombinerT combiner) { + auto* run = state.run_combine_unordered_; + return run(std::move(state), std::ref(combiner)); } // Do not erase an already erased state. void Init(HashState* state) { state_ = state->state_; combine_contiguous_ = state->combine_contiguous_; + run_combine_unordered_ = state->run_combine_unordered_; + } + + template + T& Real() { + return *static_cast(state_); } void* state_; void (*combine_contiguous_)(void*, const unsigned char*, size_t); + HashState (*run_combine_unordered_)( + HashState state, + absl::FunctionRef)>); }; ABSL_NAMESPACE_END diff --git a/absl/hash/hash_benchmark.cc b/absl/hash/hash_benchmark.cc index d498ac29..8712a01c 100644 --- a/absl/hash/hash_benchmark.cc +++ b/absl/hash/hash_benchmark.cc @@ -19,6 +19,7 @@ #include #include "absl/base/attributes.h" +#include "absl/container/flat_hash_set.h" #include "absl/hash/hash.h" #include "absl/random/random.h" #include "absl/strings/cord.h" @@ -107,6 +108,44 @@ absl::Cord FragmentedCord(size_t size) { return result; } +template +std::vector Vector(size_t count) { + std::vector result; + for (size_t v = 0; v < count; ++v) { + result.push_back(v); + } + return result; +} + +// Bogus type that replicates an unorderd_set's bit mixing, but with +// vector-speed iteration. This is intended to measure the overhead of unordered +// hashing without counting the speed of unordered_set iteration. +template +struct FastUnorderedSet { + explicit FastUnorderedSet(size_t count) { + for (size_t v = 0; v < count; ++v) { + values.push_back(v); + } + } + std::vector values; + + template + friend H AbslHashValue(H h, const FastUnorderedSet& fus) { + return H::combine(H::combine_unordered(std::move(h), fus.values.begin(), + fus.values.end()), + fus.values.size()); + } +}; + +template +absl::flat_hash_set FlatHashSet(size_t count) { + absl::flat_hash_set result; + for (size_t v = 0; v < count; ++v) { + result.insert(v); + } + return result; +} + // Generates a benchmark and a codegen method for the provided types. The // codegen method provides a well known entrypoint for dumping assembly. #define MAKE_BENCHMARK(hash, name, ...) \ @@ -145,10 +184,22 @@ MAKE_BENCHMARK(AbslHash, Cord_Flat_200, FlatCord(200)); MAKE_BENCHMARK(AbslHash, Cord_Flat_5000, FlatCord(5000)); MAKE_BENCHMARK(AbslHash, Cord_Fragmented_200, FragmentedCord(200)); MAKE_BENCHMARK(AbslHash, Cord_Fragmented_5000, FragmentedCord(5000)); -MAKE_BENCHMARK(AbslHash, VectorInt64_10, std::vector(10)); -MAKE_BENCHMARK(AbslHash, VectorInt64_100, std::vector(100)); -MAKE_BENCHMARK(AbslHash, VectorDouble_10, std::vector(10, 1.1)); -MAKE_BENCHMARK(AbslHash, VectorDouble_100, std::vector(100, 1.1)); +MAKE_BENCHMARK(AbslHash, VectorInt64_10, Vector(10)); +MAKE_BENCHMARK(AbslHash, VectorInt64_100, Vector(100)); +MAKE_BENCHMARK(AbslHash, VectorInt64_1000, Vector(1000)); +MAKE_BENCHMARK(AbslHash, VectorDouble_10, Vector(10)); +MAKE_BENCHMARK(AbslHash, VectorDouble_100, Vector(100)); +MAKE_BENCHMARK(AbslHash, VectorDouble_1000, Vector(1000)); +MAKE_BENCHMARK(AbslHash, FlatHashSetInt64_10, FlatHashSet(10)); +MAKE_BENCHMARK(AbslHash, FlatHashSetInt64_100, FlatHashSet(100)); +MAKE_BENCHMARK(AbslHash, FlatHashSetInt64_1000, FlatHashSet(1000)); +MAKE_BENCHMARK(AbslHash, FlatHashSetDouble_10, FlatHashSet(10)); +MAKE_BENCHMARK(AbslHash, FlatHashSetDouble_100, FlatHashSet(100)); +MAKE_BENCHMARK(AbslHash, FlatHashSetDouble_1000, FlatHashSet(1000)); +MAKE_BENCHMARK(AbslHash, FastUnorderedSetInt64_1000, + FastUnorderedSet(1000)); +MAKE_BENCHMARK(AbslHash, FastUnorderedSetDouble_1000, + FastUnorderedSet(1000)); MAKE_BENCHMARK(AbslHash, PairStringString_0, std::make_pair(std::string(), std::string())); MAKE_BENCHMARK(AbslHash, PairStringString_10, @@ -180,6 +231,24 @@ MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_10, std::vector(10, 1.1)); MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_100, std::vector(100, 1.1)); +MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_1000, + std::vector(1000, 1.1)); +MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetInt64_10, + FlatHashSet(10)); +MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetInt64_100, + FlatHashSet(100)); +MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetInt64_1000, + FlatHashSet(1000)); +MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetDouble_10, + FlatHashSet(10)); +MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetDouble_100, + FlatHashSet(100)); +MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetDouble_1000, + FlatHashSet(1000)); +MAKE_BENCHMARK(TypeErasedAbslHash, FastUnorderedSetInt64_1000, + FastUnorderedSet(1000)); +MAKE_BENCHMARK(TypeErasedAbslHash, FastUnorderedSetDouble_1000, + FastUnorderedSet(1000)); // The latency benchmark attempts to model the speed of the hash function in // production. When a hash function is used for hashtable lookups it is rarely diff --git a/absl/hash/hash_test.cc b/absl/hash/hash_test.cc index dbfacb2d..cea54dc0 100644 --- a/absl/hash/hash_test.cc +++ b/absl/hash/hash_test.cc @@ -34,12 +34,18 @@ #include #include #include +#include #include #include #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "absl/container/btree_map.h" +#include "absl/container/btree_set.h" +#include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" +#include "absl/container/node_hash_map.h" +#include "absl/container/node_hash_set.h" #include "absl/hash/hash_testing.h" #include "absl/hash/internal/spy_hash_state.h" #include "absl/meta/type_traits.h" @@ -433,6 +439,52 @@ TEST(HashValueTest, StdBitset) { std::bitset(bit_strings[5].c_str())})); } // namespace +// Dummy type with unordered equality and hashing semantics. This preserves +// input order internally, and is used below to ensure we get test coverage +// for equal sequences with different iteraton orders. +template +class UnorderedSequence { + public: + UnorderedSequence() = default; + template + UnorderedSequence(std::initializer_list l) + : values_(l.begin(), l.end()) {} + template ::value, + bool>::type = true> + UnorderedSequence(ForwardIterator begin, ForwardIterator end) + : values_(begin, end) {} + // one-argument constructor of value type T, to appease older toolchains that + // get confused by one-element initializer lists in some contexts + explicit UnorderedSequence(const T& v) : values_(&v, &v + 1) {} + + using value_type = T; + + size_t size() const { return values_.size(); } + typename std::vector::const_iterator begin() const { + return values_.begin(); + } + typename std::vector::const_iterator end() const { return values_.end(); } + + friend bool operator==(const UnorderedSequence& lhs, + const UnorderedSequence& rhs) { + return lhs.size() == rhs.size() && + std::is_permutation(lhs.begin(), lhs.end(), rhs.begin()); + } + friend bool operator!=(const UnorderedSequence& lhs, + const UnorderedSequence& rhs) { + return !(lhs == rhs); + } + template + friend H AbslHashValue(H h, const UnorderedSequence& u) { + return H::combine(H::combine_unordered(std::move(h), u.begin(), u.end()), + u.size()); + } + + private: + std::vector values_; +}; + template class HashValueSequenceTest : public testing::Test { }; @@ -456,10 +508,13 @@ TYPED_TEST_P(HashValueSequenceTest, BasicUsage) { } REGISTER_TYPED_TEST_CASE_P(HashValueSequenceTest, BasicUsage); -using IntSequenceTypes = - testing::Types, std::forward_list, std::list, - std::vector, std::vector, TypeErasedVector, - TypeErasedVector, std::set, std::multiset>; +using IntSequenceTypes = testing::Types< + std::deque, std::forward_list, std::list, std::vector, + std::vector, TypeErasedContainer>, std::set, + std::multiset, UnorderedSequence, + TypeErasedContainer>, std::unordered_set, + std::unordered_multiset, absl::flat_hash_set, + absl::node_hash_set, absl::btree_set>; INSTANTIATE_TYPED_TEST_CASE_P(My, HashValueSequenceTest, IntSequenceTypes); template @@ -487,11 +542,15 @@ TYPED_TEST_P(HashValueNestedSequenceTest, BasicUsage) { } REGISTER_TYPED_TEST_CASE_P(HashValueNestedSequenceTest, BasicUsage); -using NestedIntSequenceTypes = - testing::Types>, - std::vector>, - TypeErasedVector>, - TypeErasedVector>>; +template +using TypeErasedSet = TypeErasedContainer>; + +using NestedIntSequenceTypes = testing::Types< + std::vector>, std::vector>, + std::vector>, UnorderedSequence>, + UnorderedSequence>, + UnorderedSequence>, TypeErasedSet>, + TypeErasedSet>, TypeErasedSet>>; INSTANTIATE_TYPED_TEST_CASE_P(My, HashValueNestedSequenceTest, NestedIntSequenceTypes); @@ -674,7 +733,11 @@ TYPED_TEST_P(HashValueAssociativeMapTest, BasicUsage) { } REGISTER_TYPED_TEST_CASE_P(HashValueAssociativeMapTest, BasicUsage); -using AssociativeMapTypes = testing::Types>; +using AssociativeMapTypes = testing::Types< + std::map, std::unordered_map, + absl::flat_hash_map, + absl::node_hash_map, absl::btree_map, + UnorderedSequence>>; INSTANTIATE_TYPED_TEST_CASE_P(My, HashValueAssociativeMapTest, AssociativeMapTypes); @@ -702,7 +765,8 @@ TYPED_TEST_P(HashValueAssociativeMultimapTest, BasicUsage) { REGISTER_TYPED_TEST_CASE_P(HashValueAssociativeMultimapTest, BasicUsage); using AssociativeMultimapTypes = - testing::Types>; + testing::Types, + std::unordered_multimap>; INSTANTIATE_TYPED_TEST_CASE_P(My, HashValueAssociativeMultimapTest, AssociativeMultimapTypes); @@ -1062,6 +1126,14 @@ TEST(HashTest, TypeErased) { EXPECT_EQ(SpyHash(std::make_pair(TypeErasedValue(7), 17)), SpyHash(std::make_pair(size_t{7}, 17))); + + absl::flat_hash_set> ss = {{1, 2}, {3, 4}}; + TypeErasedContainer>> es = { + absl::flat_hash_set{1, 2}, {3, 4}}; + absl::flat_hash_set>> se = { + {1, 2}, {3, 4}}; + EXPECT_EQ(SpyHash(ss), SpyHash(es)); + EXPECT_EQ(SpyHash(ss), SpyHash(se)); } struct ValueWithBoolConversion { diff --git a/absl/hash/internal/hash.h b/absl/hash/internal/hash.h index 97b68bad..a424e014 100644 --- a/absl/hash/internal/hash.h +++ b/absl/hash/internal/hash.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -36,6 +37,8 @@ #include #include #include +#include +#include #include #include @@ -54,6 +57,9 @@ namespace absl { ABSL_NAMESPACE_BEGIN + +class HashState; + namespace hash_internal { // Internal detail: Large buffers are hashed in smaller chunks. This function @@ -115,24 +121,66 @@ class PiecewiseCombiner { size_t position_; }; +// is_hashable() +// +// Trait class which returns true if T is hashable by the absl::Hash framework. +// Used for the AbslHashValue implementations for composite types below. +template +struct is_hashable; + // HashStateBase // -// A hash state object represents an intermediate state in the computation -// of an unspecified hash algorithm. `HashStateBase` provides a CRTP style -// base class for hash state implementations. Developers adding type support -// for `absl::Hash` should not rely on any parts of the state object other than -// the following member functions: +// An internal implementation detail that contains common implementation details +// for all of the "hash state objects" objects generated by Abseil. This is not +// a public API; users should not create classes that inherit from this. +// +// A hash state object is the template argument `H` passed to `AbslHashValue`. +// It represents an intermediate state in the computation of an unspecified hash +// algorithm. `HashStateBase` provides a CRTP style base class for hash state +// implementations. Developers adding type support for `absl::Hash` should not +// rely on any parts of the state object other than the following member +// functions: // // * HashStateBase::combine() // * HashStateBase::combine_contiguous() +// * HashStateBase::combine_unordered() // -// A derived hash state class of type `H` must provide a static member function +// A derived hash state class of type `H` must provide a public member function // with a signature similar to the following: // // `static H combine_contiguous(H state, const unsigned char*, size_t)`. // +// It must also provide a private template method named RunCombineUnordered. +// +// A "consumer" is a 1-arg functor returning void. Its argument is a reference +// to an inner hash state object, and it may be called multiple times. When +// called, the functor consumes the entropy from the provided state object, +// and resets that object to its empty state. +// +// A "combiner" is a stateless 2-arg functor returning void. Its arguments are +// an inner hash state object and an ElementStateConsumer functor. A combiner +// uses the provided inner hash state object to hash each element of the +// container, passing the inner hash state object to the consumer after hashing +// each element. +// +// Given these definitions, a derived hash state class of type H +// must provide a private template method with a signature similar to the +// following: +// +// `template ` +// `static H RunCombineUnordered(H outer_state, CombinerT combiner)` +// +// This function is responsible for constructing the inner state object and +// providing a consumer to the combiner. It uses side effects of the consumer +// and combiner to mix the state of each element in an order-independent manner, +// and uses this to return an updated value of `outer_state`. +// +// This inside-out approach generates efficient object code in the normal case, +// but allows us to use stack storage to implement the absl::HashState type +// erasure mechanism (avoiding heap allocations while hashing). +// // `HashStateBase` will provide a complete implementation for a hash state -// object in terms of this method. +// object in terms of these two methods. // // Example: // @@ -141,6 +189,10 @@ class PiecewiseCombiner { // static H combine_contiguous(H state, const unsigned char*, size_t); // using MyHashState::HashStateBase::combine; // using MyHashState::HashStateBase::combine_contiguous; +// using MyHashState::HashStateBase::combine_unordered; +// private: +// template +// static H RunCombineUnordered(H state, CombinerT combiner); // }; template class HashStateBase { @@ -181,7 +233,30 @@ class HashStateBase { template static H combine_contiguous(H state, const T* data, size_t size); + template + static H combine_unordered(H state, I begin, I end); + using AbslInternalPiecewiseCombiner = PiecewiseCombiner; + + template + using is_hashable = absl::hash_internal::is_hashable; + + private: + // Common implementation of the iteration step of a "combiner", as described + // above. + template + struct CombineUnorderedCallback { + I begin; + I end; + + template + void operator()(InnerH inner_state, ElementStateConsumer cb) { + for (; begin != end; ++begin) { + inner_state = H::combine(std::move(inner_state), *begin); + cb(inner_state); + } + } + }; }; // is_uniquely_represented @@ -350,13 +425,6 @@ H AbslHashValue(H hash_state, std::nullptr_t) { // AbslHashValue for Composite Types // ----------------------------------------------------------------------------- -// is_hashable() -// -// Trait class which returns true if T is hashable by the absl::Hash framework. -// Used for the AbslHashValue implementations for composite types below. -template -struct is_hashable; - // AbslHashValue() for hashing pairs template typename std::enable_if::value && is_hashable::value, @@ -503,8 +571,10 @@ AbslHashValue(H hash_state, const std::vector& vector) { } // AbslHashValue special cases for hashing std::vector + #if defined(ABSL_IS_BIG_ENDIAN) && \ (defined(__GLIBCXX__) || defined(__GLIBCPP__)) + // std::hash in libstdc++ does not work correctly with vector on Big // Endian platforms therefore we need to implement a custom AbslHashValue for // it. More details on the bug: @@ -587,6 +657,55 @@ typename std::enable_if::value, H>::type AbslHashValue( return H::combine(std::move(hash_state), set.size()); } +// ----------------------------------------------------------------------------- +// AbslHashValue for Unordered Associative Containers +// ----------------------------------------------------------------------------- + +// AbslHashValue for hashing std::unordered_set +template +typename std::enable_if::value, H>::type AbslHashValue( + H hash_state, const std::unordered_set& s) { + return H::combine( + H::combine_unordered(std::move(hash_state), s.begin(), s.end()), + s.size()); +} + +// AbslHashValue for hashing std::unordered_multiset +template +typename std::enable_if::value, H>::type AbslHashValue( + H hash_state, + const std::unordered_multiset& s) { + return H::combine( + H::combine_unordered(std::move(hash_state), s.begin(), s.end()), + s.size()); +} + +// AbslHashValue for hashing std::unordered_set +template +typename std::enable_if::value && is_hashable::value, + H>::type +AbslHashValue(H hash_state, + const std::unordered_map& s) { + return H::combine( + H::combine_unordered(std::move(hash_state), s.begin(), s.end()), + s.size()); +} + +// AbslHashValue for hashing std::unordered_multiset +template +typename std::enable_if::value && is_hashable::value, + H>::type +AbslHashValue(H hash_state, + const std::unordered_multimap& s) { + return H::combine( + H::combine_unordered(std::move(hash_state), s.begin(), s.end()), + s.size()); +} + // ----------------------------------------------------------------------------- // AbslHashValue for Wrapper Types // ----------------------------------------------------------------------------- @@ -830,6 +949,31 @@ class ABSL_DLL MixingHashState : public HashStateBase { // move-only ensures that there is only one non-moved-from object. MixingHashState() : state_(Seed()) {} + friend class MixingHashState::HashStateBase; + + template + static MixingHashState RunCombineUnordered(MixingHashState state, + CombinerT combiner) { + uint64_t unordered_state = 0; + combiner(MixingHashState{}, [&](MixingHashState& inner_state) { + // Add the hash state of the element to the running total, but mix the + // carry bit back into the low bit. This in intended to avoid losing + // entropy to overflow, especially when unordered_multisets contain + // multiple copies of the same value. + auto element_state = inner_state.state_; + unordered_state += element_state; + if (unordered_state < element_state) { + ++unordered_state; + } + inner_state = MixingHashState{}; + }); + return MixingHashState::combine(std::move(state), unordered_state); + } + + // Allow the HashState type-erasure implementation to invoke + // RunCombinedUnordered() directly. + friend class absl::HashState; + // Workaround for MSVC bug. // We make the type copyable to fix the calling convention, even though we // never actually copy it. Keep it private to not affect the public API of the @@ -1064,6 +1208,14 @@ H HashStateBase::combine_contiguous(H state, const T* data, size_t size) { return hash_internal::hash_range_or_bytes(std::move(state), data, size); } +// HashStateBase::combine_unordered() +template +template +H HashStateBase::combine_unordered(H state, I begin, I end) { + return H::RunCombineUnordered(std::move(state), + CombineUnorderedCallback{begin, end}); +} + // HashStateBase::PiecewiseCombiner::add_buffer() template H PiecewiseCombiner::add_buffer(H state, const unsigned char* data, diff --git a/absl/hash/internal/spy_hash_state.h b/absl/hash/internal/spy_hash_state.h index c0831208..09728266 100644 --- a/absl/hash/internal/spy_hash_state.h +++ b/absl/hash/internal/spy_hash_state.h @@ -15,6 +15,7 @@ #ifndef ABSL_HASH_INTERNAL_SPY_HASH_STATE_H_ #define ABSL_HASH_INTERNAL_SPY_HASH_STATE_H_ +#include #include #include #include @@ -167,6 +168,24 @@ class SpyHashStateImpl : public HashStateBase> { using SpyHashStateImpl::HashStateBase::combine_contiguous; + template + static SpyHashStateImpl RunCombineUnordered(SpyHashStateImpl state, + CombinerT combiner) { + UnorderedCombinerCallback cb; + + combiner(SpyHashStateImpl{}, std::ref(cb)); + + std::sort(cb.element_hash_representations.begin(), + cb.element_hash_representations.end()); + state.hash_representation_.insert(state.hash_representation_.end(), + cb.element_hash_representations.begin(), + cb.element_hash_representations.end()); + if (cb.error && cb.error->has_value()) { + state.error_ = std::move(cb.error); + } + return state; + } + absl::optional error() const { if (moved_from_) { return "Returned a moved-from instance of the hash state object."; @@ -178,6 +197,22 @@ class SpyHashStateImpl : public HashStateBase> { template friend class SpyHashStateImpl; + struct UnorderedCombinerCallback { + std::vector element_hash_representations; + std::shared_ptr> error; + + // The inner spy can have a different type. + template + void operator()(SpyHashStateImpl& inner) { + element_hash_representations.push_back( + absl::StrJoin(inner.hash_representation_, "")); + if (inner.error_->has_value()) { + error = std::move(inner.error_); + } + inner = SpyHashStateImpl{}; + } + }; + // This is true if SpyHashStateImpl has been passed to a call of // AbslHashValue with the wrong type. This detects that the user called // AbslHashValue directly (because the hash state type does not match). -- cgit v1.2.3