From ed59f62f8bbc5f05bcba2f89ee16f107e03813f2 Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Mon, 30 Jan 2023 14:59:39 -0800 Subject: In sanitizer mode, detect when references become invalidated by randomly rehashing on insertions when there is no reserved growth. PiperOrigin-RevId: 505807487 Change-Id: I9051a04f6a75e579d16e9ae8defd404bcc377fba --- absl/container/BUILD.bazel | 1 + absl/container/CMakeLists.txt | 1 + absl/container/internal/raw_hash_set.cc | 15 +++++++++++++ absl/container/internal/raw_hash_set.h | 33 ++++++++++++++++++++-------- absl/container/internal/raw_hash_set_test.cc | 31 ++++++++++++++++++++++++++ 5 files changed, 72 insertions(+), 9 deletions(-) diff --git a/absl/container/BUILD.bazel b/absl/container/BUILD.bazel index 7a966d63..92031811 100644 --- a/absl/container/BUILD.bazel +++ b/absl/container/BUILD.bazel @@ -623,6 +623,7 @@ cc_library( "//absl/base:endian", "//absl/base:prefetch", "//absl/base:raw_logging_internal", + "//absl/hash", "//absl/memory", "//absl/meta:type_traits", "//absl/numeric:bits", diff --git a/absl/container/CMakeLists.txt b/absl/container/CMakeLists.txt index 416e3e38..2ebfd08e 100644 --- a/absl/container/CMakeLists.txt +++ b/absl/container/CMakeLists.txt @@ -705,6 +705,7 @@ absl_cc_library( absl::container_memory absl::core_headers absl::endian + absl::hash absl::hash_policy_traits absl::hashtable_debug_hooks absl::hashtablez_sampler diff --git a/absl/container/internal/raw_hash_set.cc b/absl/container/internal/raw_hash_set.cc index 3677ac59..5dc8b2fa 100644 --- a/absl/container/internal/raw_hash_set.cc +++ b/absl/container/internal/raw_hash_set.cc @@ -19,6 +19,7 @@ #include #include "absl/base/config.h" +#include "absl/hash/hash.h" namespace absl { ABSL_NAMESPACE_BEGIN @@ -51,6 +52,20 @@ inline size_t RandomSeed() { return value ^ static_cast(reinterpret_cast(&counter)); } +bool CommonFieldsGenerationInfoEnabled:: + should_rehash_for_bug_detection_on_insert(const ctrl_t* ctrl, + size_t capacity) const { + if (reserved_growth_ == kReservedGrowthJustRanOut) return true; + if (reserved_growth_ > 0) return false; + // Note: we can't use the abseil-random library because abseil-random + // depends on swisstable. We want to return true with probability + // `min(1, RehashProbabilityConstant() / capacity())`. In order to do this, + // we probe based on a random hash and see if the offset is less than + // RehashProbabilityConstant(). + return probe(ctrl, capacity, absl::HashOf(RandomSeed())).offset() < + RehashProbabilityConstant(); +} + bool ShouldInsertBackwards(size_t hash, const ctrl_t* ctrl) { // To avoid problems with weak hashes and single bit tests, we use % 13. // TODO(kfm,sbenza): revisit after we do unconditional mixing diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 09b55f66..65dc66c2 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -749,6 +749,15 @@ using Group = GroupAArch64Impl; using Group = GroupPortableImpl; #endif +// When there is an insertion with no reserved growth, we rehash with +// probability `min(1, RehashProbabilityConstant() / capacity())`. Using a +// constant divided by capacity ensures that inserting N elements is still O(N) +// in the average case. Using the constant 16 means that we expect to rehash ~8 +// times more often than when generations are disabled. We are adding expected +// rehash_probability * #insertions/capacity_growth = 16/capacity * ((7/8 - +// 7/16) * capacity)/capacity_growth = ~7 extra rehashes per capacity growth. +inline size_t RehashProbabilityConstant() { return 16; } + class CommonFieldsGenerationInfoEnabled { // A sentinel value for reserved_growth_ indicating that we just ran out of // reserved growth on the last insertion. When reserve is called and then @@ -769,12 +778,10 @@ class CommonFieldsGenerationInfoEnabled { // Whether we should rehash on insert in order to detect bugs of using invalid // references. We rehash on the first insertion after reserved_growth_ reaches - // 0 after a call to reserve. - // TODO(b/254649633): we could potentially do a rehash with low probability + // 0 after a call to reserve. We also do a rehash with low probability // whenever reserved_growth_ is zero. - bool should_rehash_for_bug_detection_on_insert() const { - return reserved_growth_ == kReservedGrowthJustRanOut; - } + bool should_rehash_for_bug_detection_on_insert(const ctrl_t* ctrl, + size_t capacity) const; void maybe_increment_generation_on_insert() { if (reserved_growth_ == kReservedGrowthJustRanOut) reserved_growth_ = 0; @@ -820,7 +827,9 @@ class CommonFieldsGenerationInfoDisabled { CommonFieldsGenerationInfoDisabled& operator=( CommonFieldsGenerationInfoDisabled&&) = default; - bool should_rehash_for_bug_detection_on_insert() const { return false; } + bool should_rehash_for_bug_detection_on_insert(const ctrl_t*, size_t) const { + return false; + } void maybe_increment_generation_on_insert() {} void reset_reserved_growth(size_t, size_t) {} size_t reserved_growth() const { return 0; } @@ -905,6 +914,10 @@ class CommonFields : public CommonFieldsGenerationInfo { return compressed_tuple_.template get<1>(); } + bool should_rehash_for_bug_detection_on_insert() const { + return CommonFieldsGenerationInfo:: + should_rehash_for_bug_detection_on_insert(control_, capacity_); + } void reset_reserved_growth(size_t reservation) { CommonFieldsGenerationInfo::reset_reserved_growth(reservation, size_); } @@ -1106,11 +1119,13 @@ struct FindInfo { inline bool is_small(size_t capacity) { return capacity < Group::kWidth - 1; } // Begins a probing operation on `common.control`, using `hash`. -inline probe_seq probe(const CommonFields& common, size_t hash) { - const ctrl_t* ctrl = common.control_; - const size_t capacity = common.capacity_; +inline probe_seq probe(const ctrl_t* ctrl, const size_t capacity, + size_t hash) { return probe_seq(H1(hash, ctrl), capacity); } +inline probe_seq probe(const CommonFields& common, size_t hash) { + return probe(common.control_, common.capacity_, hash); +} // Probes an array of control bits using a probe sequence derived from `hash`, // and returns the offset corresponding to the first deleted or empty slot. diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc index bdffb817..e33fda20 100644 --- a/absl/container/internal/raw_hash_set_test.cc +++ b/absl/container/internal/raw_hash_set_test.cc @@ -865,6 +865,10 @@ void TestDecompose(bool construct_three) { } TEST(Table, Decompose) { + if (SwisstableGenerationsEnabled()) { + GTEST_SKIP() << "Generations being enabled causes extra rehashes."; + } + TestDecompose(false); struct TransparentHashIntOverload { @@ -903,6 +907,10 @@ struct Modulo1000HashTable // Test that rehash with no resize happen in case of many deleted slots. TEST(Table, RehashWithNoResize) { + if (SwisstableGenerationsEnabled()) { + GTEST_SKIP() << "Generations being enabled causes extra rehashes."; + } + Modulo1000HashTable t; // Adding the same length (and the same hash) strings // to have at least kMinFullGroups groups @@ -996,6 +1004,10 @@ TEST(Table, EnsureNonQuadraticAsInRust) { } TEST(Table, ClearBug) { + if (SwisstableGenerationsEnabled()) { + GTEST_SKIP() << "Generations being enabled causes extra rehashes."; + } + IntTable t; constexpr size_t capacity = container_internal::Group::kWidth - 1; constexpr size_t max_size = capacity / 2 + 1; @@ -2318,6 +2330,25 @@ TEST(Table, ReservedGrowthUpdatesWhenTableDoesntGrow) { EXPECT_EQ(*it, 0); } +TEST(Table, InvalidReferenceUseCrashesWithSanitizers) { + if (!SwisstableGenerationsEnabled()) GTEST_SKIP() << "Generations disabled."; +#ifdef ABSL_HAVE_MEMORY_SANITIZER + GTEST_SKIP() << "MSan fails to detect some of these rehashes."; +#endif + + IntTable t; + t.insert(0); + // Rehashing is guaranteed on every insertion while capacity is less than + // RehashProbabilityConstant(). + int64_t i = 0; + while (t.capacity() <= RehashProbabilityConstant()) { + // ptr will become invalidated on rehash. + const int64_t* ptr = &*t.begin(); + t.insert(++i); + EXPECT_DEATH_IF_SUPPORTED(std::cout << *ptr, "heap-use-after-free") << i; + } +} + } // namespace } // namespace container_internal ABSL_NAMESPACE_END -- cgit v1.2.3