summaryrefslogtreecommitdiff
path: root/absl/hash
diff options
context:
space:
mode:
authorGravatar Abseil Team <absl-team@google.com>2022-01-19 11:04:50 -0800
committerGravatar dinord <dino.radakovich@gmail.com>2022-01-19 14:36:25 -0500
commitfbbb5865a562c9a9167d71c1cf56b82025a8f065 (patch)
tree212c8694bb6cf1d76af2b4aabac02c4910f6beab /absl/hash
parent9bb42857112ad13b23de4333fbb75eb47db9de95 (diff)
Export of internal Abseil changes
-- 487c7a754a3b93bc0f9de14bdced48007a96ae55 by Greg Falcon <gfalcon@google.com>: Add support for absl::Hash to hash unordered containers. These can now be hashed directly, as well as combined in AbslHashValue implementations. This also adds a new method, `H::combine_unordered()`, to the public AbslHashValue hash state API. This allows users to implement hash specializations for their own unordered collection types. A traits class, `H::is_hashable<T>`, is also added to the hash state API. H::is_hashable<T>::value reflects whether type T is considered hashable by the AbslHashValue framework. This allows users to properly SFINAE templated versions of AbslHashValue. (The AbslHashValue implementation added to raw_hash_set shows an example of its use.) PiperOrigin-RevId: 422856706 GitOrigin-RevId: 487c7a754a3b93bc0f9de14bdced48007a96ae55 Change-Id: Id31fd4ccba282f8c9ae6fcee6ae0ad0f7879f456
Diffstat (limited to 'absl/hash')
-rw-r--r--absl/hash/BUILD.bazel6
-rw-r--r--absl/hash/CMakeLists.txt5
-rw-r--r--absl/hash/hash.h85
-rw-r--r--absl/hash/hash_benchmark.cc77
-rw-r--r--absl/hash/hash_test.cc94
-rw-r--r--absl/hash/internal/hash.h180
-rw-r--r--absl/hash/internal/spy_hash_state.h35
7 files changed, 445 insertions, 37 deletions
diff --git a/absl/hash/BUILD.bazel b/absl/hash/BUILD.bazel
index f0640d34..bcc316f9 100644
--- a/absl/hash/BUILD.bazel
+++ b/absl/hash/BUILD.bazel
@@ -41,6 +41,7 @@ cc_library(
"//absl/base:core_headers",
"//absl/base:endian",
"//absl/container:fixed_array",
+ "//absl/functional:function_ref",
"//absl/meta:type_traits",
"//absl/numeric:int128",
"//absl/strings",
@@ -74,7 +75,11 @@ cc_test(
":hash_testing",
":spy_hash_state",
"//absl/base:core_headers",
+ "//absl/container:btree",
+ "//absl/container:flat_hash_map",
"//absl/container:flat_hash_set",
+ "//absl/container:node_hash_map",
+ "//absl/container:node_hash_set",
"//absl/meta:type_traits",
"//absl/numeric:int128",
"//absl/strings:cord_test_helpers",
@@ -93,6 +98,7 @@ cc_binary(
deps = [
":hash",
"//absl/base:core_headers",
+ "//absl/container:flat_hash_set",
"//absl/random",
"//absl/strings",
"//absl/strings:cord",
diff --git a/absl/hash/CMakeLists.txt b/absl/hash/CMakeLists.txt
index 5916ae3c..34434fa0 100644
--- a/absl/hash/CMakeLists.txt
+++ b/absl/hash/CMakeLists.txt
@@ -30,6 +30,7 @@ absl_cc_library(
absl::core_headers
absl::endian
absl::fixed_array
+ absl::function_ref
absl::meta
absl::int128
absl::strings
@@ -68,7 +69,11 @@ absl_cc_test(
absl::hash
absl::hash_testing
absl::core_headers
+ absl::btree
+ absl::flat_hash_map
absl::flat_hash_set
+ absl::node_hash_map
+ absl::node_hash_set
absl::spy_hash_state
absl::meta
absl::int128
diff --git a/absl/hash/hash.h b/absl/hash/hash.h
index 8282ea53..f31fde40 100644
--- a/absl/hash/hash.h
+++ b/absl/hash/hash.h
@@ -26,9 +26,9 @@
// support Abseil hashing without requiring you to define a hashing
// algorithm.
// * `HashState`, a type-erased class which implements the manipulation of the
-// hash state (H) itself, contains member functions `combine()` and
-// `combine_contiguous()`, which you can use to contribute to an existing
-// hash state when hashing your types.
+// hash state (H) itself; contains member functions `combine()`,
+// `combine_contiguous()`, and `combine_unordered()`; and which you can use
+// to contribute to an existing hash state when hashing your types.
//
// Unlike `std::hash` or other hashing frameworks, the Abseil hashing framework
// provides most of its utility by abstracting away the hash algorithm (and its
@@ -74,7 +74,9 @@
#define ABSL_HASH_HASH_H_
#include <tuple>
+#include <utility>
+#include "absl/functional/function_ref.h"
#include "absl/hash/internal/hash.h"
namespace absl {
@@ -107,14 +109,27 @@ ABSL_NAMESPACE_BEGIN
// * std::string_view (as well as any instance of std::basic_string that
// uses char and std::char_traits)
// * All the standard sequence containers (provided the elements are hashable)
-// * All the standard ordered associative containers (provided the elements are
+// * All the standard associative containers (provided the elements are
// hashable)
// * absl types such as the following:
// * absl::string_view
-// * absl::InlinedVector
-// * absl::FixedArray
// * absl::uint128
// * absl::Time, absl::Duration, and absl::TimeZone
+// * absl containers (provided the elements are hashable) such as the
+// following:
+// * absl::flat_hash_set, absl::node_hash_set, absl::btree_set
+// * absl::flat_hash_map, absl::node_hash_map, absl::btree_map
+// * absl::btree_multiset, absl::btree_multimap
+// * absl::InlinedVector
+// * absl::FixedArray
+//
+// When absl::Hash is used to hash an unordered container with a custom hash
+// functor, the elements are hashed using default absl::Hash semantics, not
+// the custom hash functor. This is consistent with the behavior of
+// operator==() on unordered containers, which compares elements pairwise with
+// operator==() rather than the custom equality functor. It is usually a
+// mistake to use either operator==() or absl::Hash on unordered collections
+// that use functors incompatible with operator==() equality.
//
// Note: the list above is not meant to be exhaustive. Additional type support
// may be added, in which case the above list will be updated.
@@ -153,7 +168,8 @@ ABSL_NAMESPACE_BEGIN
// that are otherwise difficult to extend using `AbslHashValue()`. (See the
// `HashState` class below.)
//
-// The "hash state" concept contains two member functions for mixing hash state:
+// The "hash state" concept contains three member functions for mixing hash
+// state:
//
// * `H::combine(state, values...)`
//
@@ -187,6 +203,15 @@ ABSL_NAMESPACE_BEGIN
// (it may perform internal optimizations). If you need this guarantee, use a
// loop instead.
//
+// * `H::combine_unordered(state, begin, end)`
+//
+// Combines a set of elements denoted by an iterator pair into a hash
+// state, returning the updated state. Note that the existing hash
+// state is move-only and must be passed by value.
+//
+// Unlike the other two methods, the hashing is order-independent.
+// This can be used to hash unordered collections.
+//
// -----------------------------------------------------------------------------
// Adding Type Support to `absl::Hash`
// -----------------------------------------------------------------------------
@@ -243,8 +268,9 @@ size_t HashOf(const Types&... values) {
// classes, virtual functions, etc.). The type erasure adds overhead so it
// should be avoided unless necessary.
//
-// Note: This wrapper will only erase calls to:
+// Note: This wrapper will only erase calls to
// combine_contiguous(H, const unsigned char*, size_t)
+// RunCombineUnordered(H, CombinerF)
//
// All other calls will be handled internally and will not invoke overloads
// provided by the wrapped class.
@@ -318,6 +344,8 @@ class HashState : public hash_internal::HashStateBase<HashState> {
private:
HashState() = default;
+ friend class HashState::HashStateBase;
+
template <typename T>
static void CombineContiguousImpl(void* p, const unsigned char* first,
size_t size) {
@@ -329,16 +357,57 @@ class HashState : public hash_internal::HashStateBase<HashState> {
void Init(T* state) {
state_ = state;
combine_contiguous_ = &CombineContiguousImpl<T>;
+ run_combine_unordered_ = &RunCombineUnorderedImpl<T>;
+ }
+
+ template <typename HS>
+ struct CombineUnorderedInvoker {
+ template <typename T, typename ConsumerT>
+ void operator()(T inner_state, ConsumerT inner_cb) {
+ f(HashState::Create(&inner_state),
+ [&](HashState& inner_erased) { inner_cb(inner_erased.Real<T>()); });
+ }
+
+ absl::FunctionRef<void(HS, absl::FunctionRef<void(HS&)>)> f;
+ };
+
+ template <typename T>
+ static HashState RunCombineUnorderedImpl(
+ HashState state,
+ absl::FunctionRef<void(HashState, absl::FunctionRef<void(HashState&)>)>
+ f) {
+ // Note that this implementation assumes that inner_state and outer_state
+ // are the same type. This isn't true in the SpyHash case, but SpyHash
+ // types are move-convertible to each other, so this still works.
+ T& real_state = state.Real<T>();
+ real_state = T::RunCombineUnordered(
+ std::move(real_state), CombineUnorderedInvoker<HashState>{f});
+ return state;
+ }
+
+ template <typename CombinerT>
+ static HashState RunCombineUnordered(HashState state, CombinerT combiner) {
+ auto* run = state.run_combine_unordered_;
+ return run(std::move(state), std::ref(combiner));
}
// Do not erase an already erased state.
void Init(HashState* state) {
state_ = state->state_;
combine_contiguous_ = state->combine_contiguous_;
+ run_combine_unordered_ = state->run_combine_unordered_;
+ }
+
+ template <typename T>
+ T& Real() {
+ return *static_cast<T*>(state_);
}
void* state_;
void (*combine_contiguous_)(void*, const unsigned char*, size_t);
+ HashState (*run_combine_unordered_)(
+ HashState state,
+ absl::FunctionRef<void(HashState, absl::FunctionRef<void(HashState&)>)>);
};
ABSL_NAMESPACE_END
diff --git a/absl/hash/hash_benchmark.cc b/absl/hash/hash_benchmark.cc
index d498ac29..8712a01c 100644
--- a/absl/hash/hash_benchmark.cc
+++ b/absl/hash/hash_benchmark.cc
@@ -19,6 +19,7 @@
#include <vector>
#include "absl/base/attributes.h"
+#include "absl/container/flat_hash_set.h"
#include "absl/hash/hash.h"
#include "absl/random/random.h"
#include "absl/strings/cord.h"
@@ -107,6 +108,44 @@ absl::Cord FragmentedCord(size_t size) {
return result;
}
+template <typename T>
+std::vector<T> Vector(size_t count) {
+ std::vector<T> result;
+ for (size_t v = 0; v < count; ++v) {
+ result.push_back(v);
+ }
+ return result;
+}
+
+// Bogus type that replicates an unorderd_set's bit mixing, but with
+// vector-speed iteration. This is intended to measure the overhead of unordered
+// hashing without counting the speed of unordered_set iteration.
+template <typename T>
+struct FastUnorderedSet {
+ explicit FastUnorderedSet(size_t count) {
+ for (size_t v = 0; v < count; ++v) {
+ values.push_back(v);
+ }
+ }
+ std::vector<T> values;
+
+ template <typename H>
+ friend H AbslHashValue(H h, const FastUnorderedSet& fus) {
+ return H::combine(H::combine_unordered(std::move(h), fus.values.begin(),
+ fus.values.end()),
+ fus.values.size());
+ }
+};
+
+template <typename T>
+absl::flat_hash_set<T> FlatHashSet(size_t count) {
+ absl::flat_hash_set<T> result;
+ for (size_t v = 0; v < count; ++v) {
+ result.insert(v);
+ }
+ return result;
+}
+
// Generates a benchmark and a codegen method for the provided types. The
// codegen method provides a well known entrypoint for dumping assembly.
#define MAKE_BENCHMARK(hash, name, ...) \
@@ -145,10 +184,22 @@ MAKE_BENCHMARK(AbslHash, Cord_Flat_200, FlatCord(200));
MAKE_BENCHMARK(AbslHash, Cord_Flat_5000, FlatCord(5000));
MAKE_BENCHMARK(AbslHash, Cord_Fragmented_200, FragmentedCord(200));
MAKE_BENCHMARK(AbslHash, Cord_Fragmented_5000, FragmentedCord(5000));
-MAKE_BENCHMARK(AbslHash, VectorInt64_10, std::vector<int64_t>(10));
-MAKE_BENCHMARK(AbslHash, VectorInt64_100, std::vector<int64_t>(100));
-MAKE_BENCHMARK(AbslHash, VectorDouble_10, std::vector<double>(10, 1.1));
-MAKE_BENCHMARK(AbslHash, VectorDouble_100, std::vector<double>(100, 1.1));
+MAKE_BENCHMARK(AbslHash, VectorInt64_10, Vector<int64_t>(10));
+MAKE_BENCHMARK(AbslHash, VectorInt64_100, Vector<int64_t>(100));
+MAKE_BENCHMARK(AbslHash, VectorInt64_1000, Vector<int64_t>(1000));
+MAKE_BENCHMARK(AbslHash, VectorDouble_10, Vector<double>(10));
+MAKE_BENCHMARK(AbslHash, VectorDouble_100, Vector<double>(100));
+MAKE_BENCHMARK(AbslHash, VectorDouble_1000, Vector<double>(1000));
+MAKE_BENCHMARK(AbslHash, FlatHashSetInt64_10, FlatHashSet<int64_t>(10));
+MAKE_BENCHMARK(AbslHash, FlatHashSetInt64_100, FlatHashSet<int64_t>(100));
+MAKE_BENCHMARK(AbslHash, FlatHashSetInt64_1000, FlatHashSet<int64_t>(1000));
+MAKE_BENCHMARK(AbslHash, FlatHashSetDouble_10, FlatHashSet<double>(10));
+MAKE_BENCHMARK(AbslHash, FlatHashSetDouble_100, FlatHashSet<double>(100));
+MAKE_BENCHMARK(AbslHash, FlatHashSetDouble_1000, FlatHashSet<double>(1000));
+MAKE_BENCHMARK(AbslHash, FastUnorderedSetInt64_1000,
+ FastUnorderedSet<int64_t>(1000));
+MAKE_BENCHMARK(AbslHash, FastUnorderedSetDouble_1000,
+ FastUnorderedSet<double>(1000));
MAKE_BENCHMARK(AbslHash, PairStringString_0,
std::make_pair(std::string(), std::string()));
MAKE_BENCHMARK(AbslHash, PairStringString_10,
@@ -180,6 +231,24 @@ MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_10,
std::vector<double>(10, 1.1));
MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_100,
std::vector<double>(100, 1.1));
+MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_1000,
+ std::vector<double>(1000, 1.1));
+MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetInt64_10,
+ FlatHashSet<int64_t>(10));
+MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetInt64_100,
+ FlatHashSet<int64_t>(100));
+MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetInt64_1000,
+ FlatHashSet<int64_t>(1000));
+MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetDouble_10,
+ FlatHashSet<double>(10));
+MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetDouble_100,
+ FlatHashSet<double>(100));
+MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetDouble_1000,
+ FlatHashSet<double>(1000));
+MAKE_BENCHMARK(TypeErasedAbslHash, FastUnorderedSetInt64_1000,
+ FastUnorderedSet<int64_t>(1000));
+MAKE_BENCHMARK(TypeErasedAbslHash, FastUnorderedSetDouble_1000,
+ FastUnorderedSet<double>(1000));
// The latency benchmark attempts to model the speed of the hash function in
// production. When a hash function is used for hashtable lookups it is rarely
diff --git a/absl/hash/hash_test.cc b/absl/hash/hash_test.cc
index dbfacb2d..cea54dc0 100644
--- a/absl/hash/hash_test.cc
+++ b/absl/hash/hash_test.cc
@@ -34,12 +34,18 @@
#include <tuple>
#include <type_traits>
#include <unordered_map>
+#include <unordered_set>
#include <utility>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+#include "absl/container/btree_map.h"
+#include "absl/container/btree_set.h"
+#include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h"
+#include "absl/container/node_hash_map.h"
+#include "absl/container/node_hash_set.h"
#include "absl/hash/hash_testing.h"
#include "absl/hash/internal/spy_hash_state.h"
#include "absl/meta/type_traits.h"
@@ -433,6 +439,52 @@ TEST(HashValueTest, StdBitset) {
std::bitset<kNumBits>(bit_strings[5].c_str())}));
} // namespace
+// Dummy type with unordered equality and hashing semantics. This preserves
+// input order internally, and is used below to ensure we get test coverage
+// for equal sequences with different iteraton orders.
+template <typename T>
+class UnorderedSequence {
+ public:
+ UnorderedSequence() = default;
+ template <typename TT>
+ UnorderedSequence(std::initializer_list<TT> l)
+ : values_(l.begin(), l.end()) {}
+ template <typename ForwardIterator,
+ typename std::enable_if<!std::is_integral<ForwardIterator>::value,
+ bool>::type = true>
+ UnorderedSequence(ForwardIterator begin, ForwardIterator end)
+ : values_(begin, end) {}
+ // one-argument constructor of value type T, to appease older toolchains that
+ // get confused by one-element initializer lists in some contexts
+ explicit UnorderedSequence(const T& v) : values_(&v, &v + 1) {}
+
+ using value_type = T;
+
+ size_t size() const { return values_.size(); }
+ typename std::vector<T>::const_iterator begin() const {
+ return values_.begin();
+ }
+ typename std::vector<T>::const_iterator end() const { return values_.end(); }
+
+ friend bool operator==(const UnorderedSequence& lhs,
+ const UnorderedSequence& rhs) {
+ return lhs.size() == rhs.size() &&
+ std::is_permutation(lhs.begin(), lhs.end(), rhs.begin());
+ }
+ friend bool operator!=(const UnorderedSequence& lhs,
+ const UnorderedSequence& rhs) {
+ return !(lhs == rhs);
+ }
+ template <typename H>
+ friend H AbslHashValue(H h, const UnorderedSequence& u) {
+ return H::combine(H::combine_unordered(std::move(h), u.begin(), u.end()),
+ u.size());
+ }
+
+ private:
+ std::vector<T> values_;
+};
+
template <typename T>
class HashValueSequenceTest : public testing::Test {
};
@@ -456,10 +508,13 @@ TYPED_TEST_P(HashValueSequenceTest, BasicUsage) {
}
REGISTER_TYPED_TEST_CASE_P(HashValueSequenceTest, BasicUsage);
-using IntSequenceTypes =
- testing::Types<std::deque<int>, std::forward_list<int>, std::list<int>,
- std::vector<int>, std::vector<bool>, TypeErasedVector<int>,
- TypeErasedVector<bool>, std::set<int>, std::multiset<int>>;
+using IntSequenceTypes = testing::Types<
+ std::deque<int>, std::forward_list<int>, std::list<int>, std::vector<int>,
+ std::vector<bool>, TypeErasedContainer<std::vector<int>>, std::set<int>,
+ std::multiset<int>, UnorderedSequence<int>,
+ TypeErasedContainer<UnorderedSequence<int>>, std::unordered_set<int>,
+ std::unordered_multiset<int>, absl::flat_hash_set<int>,
+ absl::node_hash_set<int>, absl::btree_set<int>>;
INSTANTIATE_TYPED_TEST_CASE_P(My, HashValueSequenceTest, IntSequenceTypes);
template <typename T>
@@ -487,11 +542,15 @@ TYPED_TEST_P(HashValueNestedSequenceTest, BasicUsage) {
}
REGISTER_TYPED_TEST_CASE_P(HashValueNestedSequenceTest, BasicUsage);
-using NestedIntSequenceTypes =
- testing::Types<std::vector<std::vector<int>>,
- std::vector<TypeErasedVector<int>>,
- TypeErasedVector<std::vector<int>>,
- TypeErasedVector<TypeErasedVector<int>>>;
+template <typename T>
+using TypeErasedSet = TypeErasedContainer<UnorderedSequence<T>>;
+
+using NestedIntSequenceTypes = testing::Types<
+ std::vector<std::vector<int>>, std::vector<UnorderedSequence<int>>,
+ std::vector<TypeErasedSet<int>>, UnorderedSequence<std::vector<int>>,
+ UnorderedSequence<UnorderedSequence<int>>,
+ UnorderedSequence<TypeErasedSet<int>>, TypeErasedSet<std::vector<int>>,
+ TypeErasedSet<UnorderedSequence<int>>, TypeErasedSet<TypeErasedSet<int>>>;
INSTANTIATE_TYPED_TEST_CASE_P(My, HashValueNestedSequenceTest,
NestedIntSequenceTypes);
@@ -674,7 +733,11 @@ TYPED_TEST_P(HashValueAssociativeMapTest, BasicUsage) {
}
REGISTER_TYPED_TEST_CASE_P(HashValueAssociativeMapTest, BasicUsage);
-using AssociativeMapTypes = testing::Types<std::map<int, std::string>>;
+using AssociativeMapTypes = testing::Types<
+ std::map<int, std::string>, std::unordered_map<int, std::string>,
+ absl::flat_hash_map<int, std::string>,
+ absl::node_hash_map<int, std::string>, absl::btree_map<int, std::string>,
+ UnorderedSequence<std::pair<const int, std::string>>>;
INSTANTIATE_TYPED_TEST_CASE_P(My, HashValueAssociativeMapTest,
AssociativeMapTypes);
@@ -702,7 +765,8 @@ TYPED_TEST_P(HashValueAssociativeMultimapTest, BasicUsage) {
REGISTER_TYPED_TEST_CASE_P(HashValueAssociativeMultimapTest, BasicUsage);
using AssociativeMultimapTypes =
- testing::Types<std::multimap<int, std::string>>;
+ testing::Types<std::multimap<int, std::string>,
+ std::unordered_multimap<int, std::string>>;
INSTANTIATE_TYPED_TEST_CASE_P(My, HashValueAssociativeMultimapTest,
AssociativeMultimapTypes);
@@ -1062,6 +1126,14 @@ TEST(HashTest, TypeErased) {
EXPECT_EQ(SpyHash(std::make_pair(TypeErasedValue<size_t>(7), 17)),
SpyHash(std::make_pair(size_t{7}, 17)));
+
+ absl::flat_hash_set<absl::flat_hash_set<int>> ss = {{1, 2}, {3, 4}};
+ TypeErasedContainer<absl::flat_hash_set<absl::flat_hash_set<int>>> es = {
+ absl::flat_hash_set<int>{1, 2}, {3, 4}};
+ absl::flat_hash_set<TypeErasedContainer<absl::flat_hash_set<int>>> se = {
+ {1, 2}, {3, 4}};
+ EXPECT_EQ(SpyHash(ss), SpyHash(es));
+ EXPECT_EQ(SpyHash(ss), SpyHash(se));
}
struct ValueWithBoolConversion {
diff --git a/absl/hash/internal/hash.h b/absl/hash/internal/hash.h
index 97b68bad..a424e014 100644
--- a/absl/hash/internal/hash.h
+++ b/absl/hash/internal/hash.h
@@ -23,6 +23,7 @@
#include <array>
#include <bitset>
#include <cmath>
+#include <cstddef>
#include <cstring>
#include <deque>
#include <forward_list>
@@ -36,6 +37,8 @@
#include <string>
#include <tuple>
#include <type_traits>
+#include <unordered_map>
+#include <unordered_set>
#include <utility>
#include <vector>
@@ -54,6 +57,9 @@
namespace absl {
ABSL_NAMESPACE_BEGIN
+
+class HashState;
+
namespace hash_internal {
// Internal detail: Large buffers are hashed in smaller chunks. This function
@@ -115,24 +121,66 @@ class PiecewiseCombiner {
size_t position_;
};
+// is_hashable()
+//
+// Trait class which returns true if T is hashable by the absl::Hash framework.
+// Used for the AbslHashValue implementations for composite types below.
+template <typename T>
+struct is_hashable;
+
// HashStateBase
//
-// A hash state object represents an intermediate state in the computation
-// of an unspecified hash algorithm. `HashStateBase` provides a CRTP style
-// base class for hash state implementations. Developers adding type support
-// for `absl::Hash` should not rely on any parts of the state object other than
-// the following member functions:
+// An internal implementation detail that contains common implementation details
+// for all of the "hash state objects" objects generated by Abseil. This is not
+// a public API; users should not create classes that inherit from this.
+//
+// A hash state object is the template argument `H` passed to `AbslHashValue`.
+// It represents an intermediate state in the computation of an unspecified hash
+// algorithm. `HashStateBase` provides a CRTP style base class for hash state
+// implementations. Developers adding type support for `absl::Hash` should not
+// rely on any parts of the state object other than the following member
+// functions:
//
// * HashStateBase::combine()
// * HashStateBase::combine_contiguous()
+// * HashStateBase::combine_unordered()
//
-// A derived hash state class of type `H` must provide a static member function
+// A derived hash state class of type `H` must provide a public member function
// with a signature similar to the following:
//
// `static H combine_contiguous(H state, const unsigned char*, size_t)`.
//
+// It must also provide a private template method named RunCombineUnordered.
+//
+// A "consumer" is a 1-arg functor returning void. Its argument is a reference
+// to an inner hash state object, and it may be called multiple times. When
+// called, the functor consumes the entropy from the provided state object,
+// and resets that object to its empty state.
+//
+// A "combiner" is a stateless 2-arg functor returning void. Its arguments are
+// an inner hash state object and an ElementStateConsumer functor. A combiner
+// uses the provided inner hash state object to hash each element of the
+// container, passing the inner hash state object to the consumer after hashing
+// each element.
+//
+// Given these definitions, a derived hash state class of type H
+// must provide a private template method with a signature similar to the
+// following:
+//
+// `template <typename CombinerT>`
+// `static H RunCombineUnordered(H outer_state, CombinerT combiner)`
+//
+// This function is responsible for constructing the inner state object and
+// providing a consumer to the combiner. It uses side effects of the consumer
+// and combiner to mix the state of each element in an order-independent manner,
+// and uses this to return an updated value of `outer_state`.
+//
+// This inside-out approach generates efficient object code in the normal case,
+// but allows us to use stack storage to implement the absl::HashState type
+// erasure mechanism (avoiding heap allocations while hashing).
+//
// `HashStateBase` will provide a complete implementation for a hash state
-// object in terms of this method.
+// object in terms of these two methods.
//
// Example:
//
@@ -141,6 +189,10 @@ class PiecewiseCombiner {
// static H combine_contiguous(H state, const unsigned char*, size_t);
// using MyHashState::HashStateBase::combine;
// using MyHashState::HashStateBase::combine_contiguous;
+// using MyHashState::HashStateBase::combine_unordered;
+// private:
+// template <typename CombinerT>
+// static H RunCombineUnordered(H state, CombinerT combiner);
// };
template <typename H>
class HashStateBase {
@@ -181,7 +233,30 @@ class HashStateBase {
template <typename T>
static H combine_contiguous(H state, const T* data, size_t size);
+ template <typename I>
+ static H combine_unordered(H state, I begin, I end);
+
using AbslInternalPiecewiseCombiner = PiecewiseCombiner;
+
+ template <typename T>
+ using is_hashable = absl::hash_internal::is_hashable<T>;
+
+ private:
+ // Common implementation of the iteration step of a "combiner", as described
+ // above.
+ template <typename I>
+ struct CombineUnorderedCallback {
+ I begin;
+ I end;
+
+ template <typename InnerH, typename ElementStateConsumer>
+ void operator()(InnerH inner_state, ElementStateConsumer cb) {
+ for (; begin != end; ++begin) {
+ inner_state = H::combine(std::move(inner_state), *begin);
+ cb(inner_state);
+ }
+ }
+ };
};
// is_uniquely_represented
@@ -350,13 +425,6 @@ H AbslHashValue(H hash_state, std::nullptr_t) {
// AbslHashValue for Composite Types
// -----------------------------------------------------------------------------
-// is_hashable()
-//
-// Trait class which returns true if T is hashable by the absl::Hash framework.
-// Used for the AbslHashValue implementations for composite types below.
-template <typename T>
-struct is_hashable;
-
// AbslHashValue() for hashing pairs
template <typename H, typename T1, typename T2>
typename std::enable_if<is_hashable<T1>::value && is_hashable<T2>::value,
@@ -503,8 +571,10 @@ AbslHashValue(H hash_state, const std::vector<T, Allocator>& vector) {
}
// AbslHashValue special cases for hashing std::vector<bool>
+
#if defined(ABSL_IS_BIG_ENDIAN) && \
(defined(__GLIBCXX__) || defined(__GLIBCPP__))
+
// std::hash in libstdc++ does not work correctly with vector<bool> on Big
// Endian platforms therefore we need to implement a custom AbslHashValue for
// it. More details on the bug:
@@ -588,6 +658,55 @@ typename std::enable_if<is_hashable<Key>::value, H>::type AbslHashValue(
}
// -----------------------------------------------------------------------------
+// AbslHashValue for Unordered Associative Containers
+// -----------------------------------------------------------------------------
+
+// AbslHashValue for hashing std::unordered_set
+template <typename H, typename Key, typename Hash, typename KeyEqual,
+ typename Alloc>
+typename std::enable_if<is_hashable<Key>::value, H>::type AbslHashValue(
+ H hash_state, const std::unordered_set<Key, Hash, KeyEqual, Alloc>& s) {
+ return H::combine(
+ H::combine_unordered(std::move(hash_state), s.begin(), s.end()),
+ s.size());
+}
+
+// AbslHashValue for hashing std::unordered_multiset
+template <typename H, typename Key, typename Hash, typename KeyEqual,
+ typename Alloc>
+typename std::enable_if<is_hashable<Key>::value, H>::type AbslHashValue(
+ H hash_state,
+ const std::unordered_multiset<Key, Hash, KeyEqual, Alloc>& s) {
+ return H::combine(
+ H::combine_unordered(std::move(hash_state), s.begin(), s.end()),
+ s.size());
+}
+
+// AbslHashValue for hashing std::unordered_set
+template <typename H, typename Key, typename T, typename Hash,
+ typename KeyEqual, typename Alloc>
+typename std::enable_if<is_hashable<Key>::value && is_hashable<T>::value,
+ H>::type
+AbslHashValue(H hash_state,
+ const std::unordered_map<Key, T, Hash, KeyEqual, Alloc>& s) {
+ return H::combine(
+ H::combine_unordered(std::move(hash_state), s.begin(), s.end()),
+ s.size());
+}
+
+// AbslHashValue for hashing std::unordered_multiset
+template <typename H, typename Key, typename T, typename Hash,
+ typename KeyEqual, typename Alloc>
+typename std::enable_if<is_hashable<Key>::value && is_hashable<T>::value,
+ H>::type
+AbslHashValue(H hash_state,
+ const std::unordered_multimap<Key, T, Hash, KeyEqual, Alloc>& s) {
+ return H::combine(
+ H::combine_unordered(std::move(hash_state), s.begin(), s.end()),
+ s.size());
+}
+
+// -----------------------------------------------------------------------------
// AbslHashValue for Wrapper Types
// -----------------------------------------------------------------------------
@@ -830,6 +949,31 @@ class ABSL_DLL MixingHashState : public HashStateBase<MixingHashState> {
// move-only ensures that there is only one non-moved-from object.
MixingHashState() : state_(Seed()) {}
+ friend class MixingHashState::HashStateBase;
+
+ template <typename CombinerT>
+ static MixingHashState RunCombineUnordered(MixingHashState state,
+ CombinerT combiner) {
+ uint64_t unordered_state = 0;
+ combiner(MixingHashState{}, [&](MixingHashState& inner_state) {
+ // Add the hash state of the element to the running total, but mix the
+ // carry bit back into the low bit. This in intended to avoid losing
+ // entropy to overflow, especially when unordered_multisets contain
+ // multiple copies of the same value.
+ auto element_state = inner_state.state_;
+ unordered_state += element_state;
+ if (unordered_state < element_state) {
+ ++unordered_state;
+ }
+ inner_state = MixingHashState{};
+ });
+ return MixingHashState::combine(std::move(state), unordered_state);
+ }
+
+ // Allow the HashState type-erasure implementation to invoke
+ // RunCombinedUnordered() directly.
+ friend class absl::HashState;
+
// Workaround for MSVC bug.
// We make the type copyable to fix the calling convention, even though we
// never actually copy it. Keep it private to not affect the public API of the
@@ -1064,6 +1208,14 @@ H HashStateBase<H>::combine_contiguous(H state, const T* data, size_t size) {
return hash_internal::hash_range_or_bytes(std::move(state), data, size);
}
+// HashStateBase::combine_unordered()
+template <typename H>
+template <typename I>
+H HashStateBase<H>::combine_unordered(H state, I begin, I end) {
+ return H::RunCombineUnordered(std::move(state),
+ CombineUnorderedCallback<I>{begin, end});
+}
+
// HashStateBase::PiecewiseCombiner::add_buffer()
template <typename H>
H PiecewiseCombiner::add_buffer(H state, const unsigned char* data,
diff --git a/absl/hash/internal/spy_hash_state.h b/absl/hash/internal/spy_hash_state.h
index c0831208..09728266 100644
--- a/absl/hash/internal/spy_hash_state.h
+++ b/absl/hash/internal/spy_hash_state.h
@@ -15,6 +15,7 @@
#ifndef ABSL_HASH_INTERNAL_SPY_HASH_STATE_H_
#define ABSL_HASH_INTERNAL_SPY_HASH_STATE_H_
+#include <algorithm>
#include <ostream>
#include <string>
#include <vector>
@@ -167,6 +168,24 @@ class SpyHashStateImpl : public HashStateBase<SpyHashStateImpl<T>> {
using SpyHashStateImpl::HashStateBase::combine_contiguous;
+ template <typename CombinerT>
+ static SpyHashStateImpl RunCombineUnordered(SpyHashStateImpl state,
+ CombinerT combiner) {
+ UnorderedCombinerCallback cb;
+
+ combiner(SpyHashStateImpl<void>{}, std::ref(cb));
+
+ std::sort(cb.element_hash_representations.begin(),
+ cb.element_hash_representations.end());
+ state.hash_representation_.insert(state.hash_representation_.end(),
+ cb.element_hash_representations.begin(),
+ cb.element_hash_representations.end());
+ if (cb.error && cb.error->has_value()) {
+ state.error_ = std::move(cb.error);
+ }
+ return state;
+ }
+
absl::optional<std::string> error() const {
if (moved_from_) {
return "Returned a moved-from instance of the hash state object.";
@@ -178,6 +197,22 @@ class SpyHashStateImpl : public HashStateBase<SpyHashStateImpl<T>> {
template <typename U>
friend class SpyHashStateImpl;
+ struct UnorderedCombinerCallback {
+ std::vector<std::string> element_hash_representations;
+ std::shared_ptr<absl::optional<std::string>> error;
+
+ // The inner spy can have a different type.
+ template <typename U>
+ void operator()(SpyHashStateImpl<U>& inner) {
+ element_hash_representations.push_back(
+ absl::StrJoin(inner.hash_representation_, ""));
+ if (inner.error_->has_value()) {
+ error = std::move(inner.error_);
+ }
+ inner = SpyHashStateImpl<void>{};
+ }
+ };
+
// This is true if SpyHashStateImpl<T> has been passed to a call of
// AbslHashValue with the wrong type. This detects that the user called
// AbslHashValue directly (because the hash state type does not match).