diff options
author | Dennis Kormalev <denk@google.com> | 2024-02-07 08:39:23 -0800 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2024-02-07 08:40:46 -0800 |
commit | 643b48a3b4362a932c0e41afce62deb55adf825b (patch) | |
tree | 5e5b68000ddde6fe775e2379b18e7dfa0ad98e11 | |
parent | e22f9c1fdd00f2ad3cfdd1dd7a67145eadc3b21c (diff) |
Add absl_container_hash-based HashEq specialization
SwissTable provides support for heterogeneous lookup in associative
containers through transparent Hash and Eq types. However, it is not
possible for user types to provide additional specializations to allow
their types to use this functionality.
This CL brings ability for user types to specify their own transparent
absl_container_hash and (optionally) absl_container_eq inner types to
achieve the same functionality.
PiperOrigin-RevId: 604994859
Change-Id: I302486d292c9a18b7d4c77033227008f5539e354
-rw-r--r-- | absl/container/BUILD.bazel | 4 | ||||
-rw-r--r-- | absl/container/CMakeLists.txt | 4 | ||||
-rw-r--r-- | absl/container/flat_hash_map.h | 15 | ||||
-rw-r--r-- | absl/container/flat_hash_set.h | 15 | ||||
-rw-r--r-- | absl/container/internal/hash_function_defaults.h | 69 | ||||
-rw-r--r-- | absl/container/internal/hash_function_defaults_test.cc | 139 | ||||
-rw-r--r-- | absl/container/node_hash_map.h | 15 | ||||
-rw-r--r-- | absl/container/node_hash_set.h | 15 |
8 files changed, 270 insertions, 6 deletions
diff --git a/absl/container/BUILD.bazel b/absl/container/BUILD.bazel index 5160ccea..0de45263 100644 --- a/absl/container/BUILD.bazel +++ b/absl/container/BUILD.bazel @@ -423,8 +423,10 @@ cc_library( "//visibility:private", ], deps = [ + ":common", "//absl/base:config", "//absl/hash", + "//absl/meta:type_traits", "//absl/strings", "//absl/strings:cord", ], @@ -437,6 +439,8 @@ cc_test( linkopts = ABSL_DEFAULT_LINKOPTS, tags = NOTEST_TAGS_MOBILE + ["no_test_loonix"], deps = [ + ":flat_hash_map", + ":flat_hash_set", ":hash_function_defaults", "//absl/hash", "//absl/random", diff --git a/absl/container/CMakeLists.txt b/absl/container/CMakeLists.txt index 85af0bd7..4b08e6a3 100644 --- a/absl/container/CMakeLists.txt +++ b/absl/container/CMakeLists.txt @@ -471,9 +471,11 @@ absl_cc_library( ${ABSL_DEFAULT_COPTS} DEPS absl::config + absl::container_common absl::cord absl::hash absl::strings + absl::type_traits PUBLIC ) @@ -487,6 +489,8 @@ absl_cc_test( DEPS absl::cord absl::cord_test_helpers + absl::flat_hash_map + absl::flat_hash_set absl::hash_function_defaults absl::hash absl::random_random diff --git a/absl/container/flat_hash_map.h b/absl/container/flat_hash_map.h index 2a1ad0fb..a33c794f 100644 --- a/absl/container/flat_hash_map.h +++ b/absl/container/flat_hash_map.h @@ -62,7 +62,7 @@ struct FlatHashMapPolicy; // * Requires values that are MoveConstructible // * Supports heterogeneous lookup, through `find()`, `operator[]()` and // `insert()`, provided that the map is provided a compatible heterogeneous -// hashing function and equality operator. +// hashing function and equality operator. See below for details. // * Invalidates any references and pointers to elements within the table after // `rehash()` and when the table is moved. // * Contains a `capacity()` member function indicating the number of element @@ -80,6 +80,19 @@ struct FlatHashMapPolicy; // libraries (e.g. .dll, .so) is unsupported due to way `absl::Hash` values may // be randomized across dynamically loaded libraries. // +// To achieve heterogeneous lookup for custom types either `Hash` and `Eq` type +// parameters can be used or `T` should have public inner types +// `absl_container_hash` and (optionally) `absl_container_eq`. In either case, +// `typename Hash::is_transparent` and `typename Eq::is_transparent` should be +// well-formed. Both types are basically functors: +// * `Hash` should support `size_t operator()(U val) const` that returns a hash +// for the given `val`. +// * `Eq` should support `bool operator()(U lhs, V rhs) const` that returns true +// if `lhs` is equal to `rhs`. +// +// In most cases `T` needs only to provide the `absl_container_hash`. In this +// case `std::equal_to<void>` will be used instead of `eq` part. +// // NOTE: A `flat_hash_map` stores its value types directly inside its // implementation array to avoid memory indirection. Because a `flat_hash_map` // is designed to move data when rehashed, map values will not retain pointer diff --git a/absl/container/flat_hash_set.h b/absl/container/flat_hash_set.h index 1be00195..5f72f954 100644 --- a/absl/container/flat_hash_set.h +++ b/absl/container/flat_hash_set.h @@ -60,7 +60,7 @@ struct FlatHashSetPolicy; // * Requires keys that are CopyConstructible // * Supports heterogeneous lookup, through `find()` and `insert()`, provided // that the set is provided a compatible heterogeneous hashing function and -// equality operator. +// equality operator. See below for details. // * Invalidates any references and pointers to elements within the table after // `rehash()` and when the table is moved. // * Contains a `capacity()` member function indicating the number of element @@ -78,6 +78,19 @@ struct FlatHashSetPolicy; // libraries (e.g. .dll, .so) is unsupported due to way `absl::Hash` values may // be randomized across dynamically loaded libraries. // +// To achieve heterogeneous lookup for custom types either `Hash` and `Eq` type +// parameters can be used or `T` should have public inner types +// `absl_container_hash` and (optionally) `absl_container_eq`. In either case, +// `typename Hash::is_transparent` and `typename Eq::is_transparent` should be +// well-formed. Both types are basically functors: +// * `Hash` should support `size_t operator()(U val) const` that returns a hash +// for the given `val`. +// * `Eq` should support `bool operator()(U lhs, V rhs) const` that returns true +// if `lhs` is equal to `rhs`. +// +// In most cases `T` needs only to provide the `absl_container_hash`. In this +// case `std::equal_to<void>` will be used instead of `eq` part. +// // NOTE: A `flat_hash_set` stores its keys directly inside its implementation // array to avoid memory indirection. Because a `flat_hash_set` is designed to // move data when rehashed, set keys will not retain pointer stability. If you diff --git a/absl/container/internal/hash_function_defaults.h b/absl/container/internal/hash_function_defaults.h index a3613b4b..0f07bcfe 100644 --- a/absl/container/internal/hash_function_defaults.h +++ b/absl/container/internal/hash_function_defaults.h @@ -45,14 +45,16 @@ #ifndef ABSL_CONTAINER_INTERNAL_HASH_FUNCTION_DEFAULTS_H_ #define ABSL_CONTAINER_INTERNAL_HASH_FUNCTION_DEFAULTS_H_ -#include <stdint.h> #include <cstddef> +#include <functional> #include <memory> #include <string> #include <type_traits> #include "absl/base/config.h" +#include "absl/container/internal/common.h" #include "absl/hash/hash.h" +#include "absl/meta/type_traits.h" #include "absl/strings/cord.h" #include "absl/strings/string_view.h" @@ -188,6 +190,71 @@ struct HashEq<std::unique_ptr<T, D>> : HashEq<T*> {}; template <class T> struct HashEq<std::shared_ptr<T>> : HashEq<T*> {}; +template <typename T, typename E = void> +struct HasAbslContainerHash : std::false_type {}; + +template <typename T> +struct HasAbslContainerHash<T, absl::void_t<typename T::absl_container_hash>> + : std::true_type {}; + +template <typename T, typename E = void> +struct HasAbslContainerEq : std::false_type {}; + +template <typename T> +struct HasAbslContainerEq<T, absl::void_t<typename T::absl_container_eq>> + : std::true_type {}; + +template <typename T, typename E = void> +struct AbslContainerEq { + using type = std::equal_to<>; +}; + +template <typename T> +struct AbslContainerEq< + T, typename std::enable_if_t<HasAbslContainerEq<T>::value>> { + using type = typename T::absl_container_eq; +}; + +template <typename T, typename E = void> +struct AbslContainerHash { + using type = void; +}; + +template <typename T> +struct AbslContainerHash< + T, typename std::enable_if_t<HasAbslContainerHash<T>::value>> { + using type = typename T::absl_container_hash; +}; + +// HashEq specialization for user types that provide `absl_container_hash` and +// (optionally) `absl_container_eq`. This specialization allows user types to +// provide heterogeneous lookup without requiring to explicitly specify Hash/Eq +// type arguments in unordered Abseil containers. +// +// Both `absl_container_hash` and `absl_container_eq` should be transparent +// (have inner is_transparent type). While there is no technical reason to +// restrict to transparent-only types, there is also no feasible use case when +// it shouldn't be transparent - it is easier to relax the requirement later if +// such a case arises rather than restricting it. +// +// If type provides only `absl_container_hash` then `eq` part will be +// `std::equal_to<void>`. +// +// User types are not allowed to provide only a `Eq` part as there is no +// feasible use case for this behavior - if Hash should be a default one then Eq +// should be an equivalent to the `std::equal_to<T>`. +template <typename T> +struct HashEq<T, typename std::enable_if_t<HasAbslContainerHash<T>::value>> { + using Hash = typename AbslContainerHash<T>::type; + using Eq = typename AbslContainerEq<T>::type; + static_assert(IsTransparent<Hash>::value, + "absl_container_hash must be transparent. To achieve it add a " + "`using is_transparent = void;` clause to this type."); + static_assert(IsTransparent<Eq>::value, + "absl_container_eq must be transparent. To achieve it add a " + "`using is_transparent = void;` clause to this type."); +}; + // This header's visibility is restricted. If you need to access the default // hasher please use the container's ::hasher alias instead. // diff --git a/absl/container/internal/hash_function_defaults_test.cc b/absl/container/internal/hash_function_defaults_test.cc index 7da0c86a..912d1190 100644 --- a/absl/container/internal/hash_function_defaults_test.cc +++ b/absl/container/internal/hash_function_defaults_test.cc @@ -14,11 +14,15 @@ #include "absl/container/internal/hash_function_defaults.h" +#include <cstddef> #include <functional> #include <type_traits> #include <utility> #include "gtest/gtest.h" +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" +#include "absl/hash/hash.h" #include "absl/random/random.h" #include "absl/strings/cord.h" #include "absl/strings/cord_test_helpers.h" @@ -495,13 +499,146 @@ TYPED_TEST(StringLikeTest, HashEq) { EXPECT_NE(this->hash(this->a1), this->hash(this->b2)); } +struct TypeWithAbslContainerHash { + struct absl_container_hash { + using is_transparent = void; + + size_t operator()(const TypeWithAbslContainerHash& foo) const { + return absl::HashOf(foo.value); + } + + // Extra overload to test that heterogeneity works for this hasher. + size_t operator()(int value) const { return absl::HashOf(value); } + }; + + friend bool operator==(const TypeWithAbslContainerHash& lhs, + const TypeWithAbslContainerHash& rhs) { + return lhs.value == rhs.value; + } + + friend bool operator==(const TypeWithAbslContainerHash& lhs, int rhs) { + return lhs.value == rhs; + } + + int value; + int noise; +}; + +struct TypeWithAbslContainerHashAndEq { + struct absl_container_hash { + using is_transparent = void; + + size_t operator()(const TypeWithAbslContainerHashAndEq& foo) const { + return absl::HashOf(foo.value); + } + + // Extra overload to test that heterogeneity works for this hasher. + size_t operator()(int value) const { return absl::HashOf(value); } + }; + + struct absl_container_eq { + using is_transparent = void; + + bool operator()(const TypeWithAbslContainerHashAndEq& lhs, + const TypeWithAbslContainerHashAndEq& rhs) const { + return lhs.value == rhs.value; + } + + // Extra overload to test that heterogeneity works for this eq. + bool operator()(const TypeWithAbslContainerHashAndEq& lhs, int rhs) const { + return lhs.value == rhs; + } + }; + + template <typename T> + bool operator==(T&& other) const = delete; + + int value; + int noise; +}; + +using AbslContainerHashTypes = + Types<TypeWithAbslContainerHash, TypeWithAbslContainerHashAndEq>; + +template <typename T> +using AbslContainerHashTest = ::testing::Test; + +TYPED_TEST_SUITE(AbslContainerHashTest, AbslContainerHashTypes); + +TYPED_TEST(AbslContainerHashTest, HasherWorks) { + hash_default_hash<TypeParam> hasher; + + TypeParam foo1{/*value=*/1, /*noise=*/100}; + TypeParam foo1_copy{/*value=*/1, /*noise=*/20}; + TypeParam foo2{/*value=*/2, /*noise=*/100}; + + EXPECT_EQ(hasher(foo1), absl::HashOf(1)); + EXPECT_EQ(hasher(foo2), absl::HashOf(2)); + EXPECT_EQ(hasher(foo1), hasher(foo1_copy)); + + // Heterogeneity works. + EXPECT_EQ(hasher(foo1), hasher(1)); + EXPECT_EQ(hasher(foo2), hasher(2)); +} + +TYPED_TEST(AbslContainerHashTest, EqWorks) { + hash_default_eq<TypeParam> eq; + + TypeParam foo1{/*value=*/1, /*noise=*/100}; + TypeParam foo1_copy{/*value=*/1, /*noise=*/20}; + TypeParam foo2{/*value=*/2, /*noise=*/100}; + + EXPECT_TRUE(eq(foo1, foo1_copy)); + EXPECT_FALSE(eq(foo1, foo2)); + + // Heterogeneity works. + EXPECT_TRUE(eq(foo1, 1)); + EXPECT_FALSE(eq(foo1, 2)); +} + +TYPED_TEST(AbslContainerHashTest, HeterogeneityInMapWorks) { + absl::flat_hash_map<TypeParam, int> map; + + TypeParam foo1{/*value=*/1, /*noise=*/100}; + TypeParam foo1_copy{/*value=*/1, /*noise=*/20}; + TypeParam foo2{/*value=*/2, /*noise=*/100}; + TypeParam foo3{/*value=*/3, /*noise=*/100}; + + map[foo1] = 1; + map[foo2] = 2; + + EXPECT_TRUE(map.contains(foo1_copy)); + EXPECT_EQ(map.at(foo1_copy), 1); + EXPECT_TRUE(map.contains(1)); + EXPECT_EQ(map.at(1), 1); + EXPECT_TRUE(map.contains(2)); + EXPECT_EQ(map.at(2), 2); + EXPECT_FALSE(map.contains(foo3)); + EXPECT_FALSE(map.contains(3)); +} + +TYPED_TEST(AbslContainerHashTest, HeterogeneityInSetWorks) { + absl::flat_hash_set<TypeParam> set; + + TypeParam foo1{/*value=*/1, /*noise=*/100}; + TypeParam foo1_copy{/*value=*/1, /*noise=*/20}; + TypeParam foo2{/*value=*/2, /*noise=*/100}; + + set.insert(foo1); + + EXPECT_TRUE(set.contains(foo1_copy)); + EXPECT_TRUE(set.contains(1)); + EXPECT_FALSE(set.contains(foo2)); + EXPECT_FALSE(set.contains(2)); +} + } // namespace } // namespace container_internal ABSL_NAMESPACE_END } // namespace absl enum Hash : size_t { - kStd = 0x1, // std::hash + kStd = 0x1, // std::hash #ifdef _MSC_VER kExtension = kStd, // In MSVC, std::hash == ::hash #else // _MSC_VER diff --git a/absl/container/node_hash_map.h b/absl/container/node_hash_map.h index 72e78958..cb41543c 100644 --- a/absl/container/node_hash_map.h +++ b/absl/container/node_hash_map.h @@ -67,7 +67,7 @@ class NodeHashMapPolicy; // // * Supports heterogeneous lookup, through `find()`, `operator[]()` and // `insert()`, provided that the map is provided a compatible heterogeneous -// hashing function and equality operator. +// hashing function and equality operator. See below for details. // * Contains a `capacity()` member function indicating the number of element // slots (open, deleted, and empty) within the hash map. // * Returns `void` from the `erase(iterator)` overload. @@ -83,6 +83,19 @@ class NodeHashMapPolicy; // libraries (e.g. .dll, .so) is unsupported due to way `absl::Hash` values may // be randomized across dynamically loaded libraries. // +// To achieve heterogeneous lookup for custom types either `Hash` and `Eq` type +// parameters can be used or `T` should have public inner types +// `absl_container_hash` and (optionally) `absl_container_eq`. In either case, +// `typename Hash::is_transparent` and `typename Eq::is_transparent` should be +// well-formed. Both types are basically functors: +// * `Hash` should support `size_t operator()(U val) const` that returns a hash +// for the given `val`. +// * `Eq` should support `bool operator()(U lhs, V rhs) const` that returns true +// if `lhs` is equal to `rhs`. +// +// In most cases `T` needs only to provide the `absl_container_hash`. In this +// case `std::equal_to<void>` will be used instead of `eq` part. +// // Example: // // // Create a node hash map of three strings (that map to strings) diff --git a/absl/container/node_hash_set.h b/absl/container/node_hash_set.h index ec9ab519..8cc4b624 100644 --- a/absl/container/node_hash_set.h +++ b/absl/container/node_hash_set.h @@ -64,7 +64,7 @@ struct NodeHashSetPolicy; // // * Supports heterogeneous lookup, through `find()`, `operator[]()` and // `insert()`, provided that the set is provided a compatible heterogeneous -// hashing function and equality operator. +// hashing function and equality operator. See below for details. // * Contains a `capacity()` member function indicating the number of element // slots (open, deleted, and empty) within the hash set. // * Returns `void` from the `erase(iterator)` overload. @@ -80,6 +80,19 @@ struct NodeHashSetPolicy; // libraries (e.g. .dll, .so) is unsupported due to way `absl::Hash` values may // be randomized across dynamically loaded libraries. // +// To achieve heterogeneous lookup for custom types either `Hash` and `Eq` type +// parameters can be used or `T` should have public inner types +// `absl_container_hash` and (optionally) `absl_container_eq`. In either case, +// `typename Hash::is_transparent` and `typename Eq::is_transparent` should be +// well-formed. Both types are basically functors: +// * `Hash` should support `size_t operator()(U val) const` that returns a hash +// for the given `val`. +// * `Eq` should support `bool operator()(U lhs, V rhs) const` that returns true +// if `lhs` is equal to `rhs`. +// +// In most cases `T` needs only to provide the `absl_container_hash`. In this +// case `std::equal_to<void>` will be used instead of `eq` part. +// // Example: // // // Create a node hash set of three strings |