From d51d3cf3feacf836f1acfe6c97c10978077599ab Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Thu, 2 Mar 2023 10:03:38 -0800 Subject: Use multiple empty generations so that we can detect when iterators from different empty hashtables are compared. PiperOrigin-RevId: 513568915 Change-Id: I3f387d0bae0e86749dff540e4fdd5037304ac975 --- absl/container/internal/raw_hash_set.cc | 21 +++++++++++---- absl/container/internal/raw_hash_set.h | 38 ++++++++++++++++++---------- absl/container/internal/raw_hash_set_test.cc | 34 ++++++++++++++++++++----- 3 files changed, 68 insertions(+), 25 deletions(-) (limited to 'absl/container') diff --git a/absl/container/internal/raw_hash_set.cc b/absl/container/internal/raw_hash_set.cc index a6d9b7c0..b91d5a47 100644 --- a/absl/container/internal/raw_hash_set.cc +++ b/absl/container/internal/raw_hash_set.cc @@ -28,19 +28,18 @@ namespace container_internal { // A single block of empty control bytes for tables without any slots allocated. // This enables removing a branch in the hot path of find(). -// We have 17 bytes because there may be a generation counter. Any constant is -// fine for the generation counter. -alignas(16) ABSL_CONST_INIT ABSL_DLL const ctrl_t kEmptyGroup[17] = { +alignas(16) ABSL_CONST_INIT ABSL_DLL const ctrl_t kEmptyGroup[16] = { ctrl_t::kSentinel, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, - ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, - static_cast(0)}; + ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty}; #ifdef ABSL_INTERNAL_NEED_REDUNDANT_CONSTEXPR_DECL constexpr size_t Group::kWidth; #endif +namespace { + // Returns "random" seed. inline size_t RandomSeed() { #ifdef ABSL_HAVE_THREAD_LOCAL @@ -58,6 +57,18 @@ inline size_t RandomSeed() { return value ^ static_cast(reinterpret_cast(&counter)); } +} // namespace + +GenerationType* EmptyGeneration() { + if (SwisstableGenerationsEnabled()) { + constexpr size_t kNumEmptyGenerations = 1024; + static constexpr GenerationType kEmptyGenerations[kNumEmptyGenerations]{}; + return const_cast( + &kEmptyGenerations[RandomSeed() % kNumEmptyGenerations]); + } + return nullptr; +} + bool CommonFieldsGenerationInfoEnabled:: should_rehash_for_bug_detection_on_insert(const ctrl_t* ctrl, size_t capacity) const { diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index f4de2d65..f487e4d5 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -236,6 +236,14 @@ namespace container_internal { // We use uint8_t so we don't need to worry about padding. using GenerationType = uint8_t; +// A sentinel value for empty generations. Using 0 makes it easy to constexpr +// initialize an array of this value. +constexpr GenerationType SentinelEmptyGeneration() { return 0; } + +constexpr GenerationType NextGeneration(GenerationType generation) { + return ++generation == SentinelEmptyGeneration() ? ++generation : generation; +} + #ifdef ABSL_SWISSTABLE_ENABLE_GENERATIONS constexpr bool SwisstableGenerationsEnabled() { return true; } constexpr size_t NumGenerationBytes() { return sizeof(GenerationType); } @@ -476,7 +484,7 @@ static_assert(ctrl_t::kDeleted == static_cast(-2), "ctrl_t::kDeleted must be -2 to make the implementation of " "ConvertSpecialToEmptyAndFullToDeleted efficient"); -ABSL_DLL extern const ctrl_t kEmptyGroup[17]; +ABSL_DLL extern const ctrl_t kEmptyGroup[16]; // Returns a pointer to a control byte group that can be used by empty tables. inline ctrl_t* EmptyGroup() { @@ -485,10 +493,13 @@ inline ctrl_t* EmptyGroup() { return const_cast(kEmptyGroup); } -// Returns a pointer to the generation byte at the end of the empty group, if it -// exists. -inline GenerationType* EmptyGeneration() { - return reinterpret_cast(EmptyGroup() + 16); +// Returns a pointer to a generation to use for an empty hashtable. +GenerationType* EmptyGeneration(); + +// Returns whether `generation` is a generation for an empty hashtable that +// could be returned by EmptyGeneration(). +inline bool IsEmptyGeneration(const GenerationType* generation) { + return *generation == SentinelEmptyGeneration(); } // Mixes a randomly generated per-process seed with `hash` and `ctrl` to @@ -790,7 +801,7 @@ class CommonFieldsGenerationInfoEnabled { if (reserved_growth_ > 0) { if (--reserved_growth_ == 0) reserved_growth_ = kReservedGrowthJustRanOut; } else { - ++*generation_; + *generation_ = NextGeneration(*generation_); } } void reset_reserved_growth(size_t reservation, size_t size) { @@ -818,10 +829,6 @@ class CommonFieldsGenerationInfoEnabled { // the code more complicated, and there's a benefit in having the sizes of // raw_hash_set in sanitizer mode and non-sanitizer mode a bit more different, // which is that tests are less likely to rely on the size remaining the same. - // TODO(b/254649633): Currently, we can't detect when end iterators from - // different empty tables are compared. If we allocate generations separately - // from control bytes, then we could do so. Another option would be to have N - // empty generations and use a random one for empty hashtables. GenerationType* generation_ = EmptyGeneration(); }; @@ -1152,14 +1159,19 @@ inline void AssertSameContainer(const ctrl_t* ctrl_a, const ctrl_t* ctrl_b, if (SwisstableGenerationsEnabled()) { if (generation_ptr_a == generation_ptr_b) return; - const bool a_is_empty = generation_ptr_a == EmptyGeneration(); - const bool b_is_empty = generation_ptr_b == EmptyGeneration(); + const bool a_is_empty = IsEmptyGeneration(generation_ptr_a); + const bool b_is_empty = IsEmptyGeneration(generation_ptr_b); if (a_is_empty != b_is_empty) { ABSL_INTERNAL_LOG(FATAL, "Invalid iterator comparison. Comparing iterator from " "a non-empty hashtable with an iterator from an empty " "hashtable."); } + if (a_is_empty && b_is_empty) { + ABSL_INTERNAL_LOG(FATAL, + "Invalid iterator comparison. Comparing iterators from " + "different empty hashtables."); + } const bool a_is_end = ctrl_a == nullptr; const bool b_is_end = ctrl_b == nullptr; if (a_is_end || b_is_end) { @@ -1332,7 +1344,7 @@ ABSL_ATTRIBUTE_NOINLINE void InitializeSlots(CommonFields& c, Alloc alloc) { const GenerationType old_generation = c.generation(); c.set_generation_ptr( reinterpret_cast(mem + GenerationOffset(cap))); - c.set_generation(old_generation + 1); + c.set_generation(NextGeneration(old_generation)); c.control_ = reinterpret_cast(mem); c.slots_ = mem + SlotOffset(cap, AlignOfSlot); ResetCtrl(c, SizeOfSlot); diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc index c13b6757..c46a5939 100644 --- a/absl/container/internal/raw_hash_set_test.cc +++ b/absl/container/internal/raw_hash_set_test.cc @@ -2309,6 +2309,7 @@ TEST(Iterator, InvalidUseWithReserveCrashesWithSanitizers) { } // ptr will become invalidated on rehash. const int64_t* ptr = &*it; + (void)ptr; // erase decreases size but does not decrease reserved growth so the next // insertion still invalidates iterators. @@ -2319,8 +2320,9 @@ TEST(Iterator, InvalidUseWithReserveCrashesWithSanitizers) { EXPECT_DEATH_IF_SUPPORTED(*it, kInvalidIteratorDeathMessage); EXPECT_DEATH_IF_SUPPORTED(void(it == t.begin()), kInvalidIteratorDeathMessage); - EXPECT_DEATH_IF_SUPPORTED(std::cout << *ptr, - "heap-use-after-free|use-of-uninitialized-value"); +#ifdef ABSL_HAVE_ADDRESS_SANITIZER + EXPECT_DEATH_IF_SUPPORTED(std::cout << *ptr, "heap-use-after-free"); +#endif } TEST(Table, ReservedGrowthUpdatesWhenTableDoesntGrow) { @@ -2338,6 +2340,22 @@ TEST(Table, ReservedGrowthUpdatesWhenTableDoesntGrow) { EXPECT_EQ(*it, 0); } +TEST(Table, GenerationInfoResetsOnClear) { + if (!SwisstableGenerationsEnabled()) GTEST_SKIP() << "Generations disabled."; + if (kMsvc) GTEST_SKIP() << "MSVC doesn't support | in regexp."; + + IntTable t; + for (int i = 0; i < 1000; ++i) t.insert(i); + t.reserve(t.size() + 100); + + t.clear(); + + t.insert(0); + auto it = t.begin(); + t.insert(1); + EXPECT_DEATH_IF_SUPPORTED(*it, kInvalidIteratorDeathMessage); +} + TEST(Table, InvalidReferenceUseCrashesWithSanitizers) { if (!SwisstableGenerationsEnabled()) GTEST_SKIP() << "Generations disabled."; #ifdef ABSL_HAVE_MEMORY_SANITIZER @@ -2362,11 +2380,13 @@ TEST(Iterator, InvalidComparisonDifferentTables) { IntTable t1, t2; IntTable::iterator default_constructed_iter; - // TODO(b/254649633): Currently, we can't detect when end iterators from - // different empty tables are compared. If we allocate generations separately - // from control bytes, then we could do so. - // EXPECT_DEATH_IF_SUPPORTED(void(t1.end() == t2.end()), - // "Invalid iterator comparison.*empty hashtable"); + // We randomly use one of N empty generations for generations from empty + // hashtables. In general, we won't always detect when iterators from + // different empty hashtables are compared, but in this test case, we + // should deterministically detect the error due to our randomness yielding + // consecutive random generations. + EXPECT_DEATH_IF_SUPPORTED(void(t1.end() == t2.end()), + "Invalid iterator comparison.*empty hashtables"); EXPECT_DEATH_IF_SUPPORTED(void(t1.end() == default_constructed_iter), "Invalid iterator comparison.*default-constructed"); t1.insert(0); -- cgit v1.2.3