From 7fc3c7fe7283a8b04fe0f79fe6180b6e688a565f Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Wed, 26 Jul 2023 10:33:14 -0700 Subject: Change the API constraints of erase(const_iterator, const_iterator) so that calling erase(begin(), end()) resets reserved growth. PiperOrigin-RevId: 551248712 Change-Id: I34755c63e3ee40da4ba7047e0d24eec567d28173 --- absl/container/flat_hash_map.h | 6 ++- absl/container/flat_hash_set.h | 6 ++- absl/container/internal/raw_hash_set.h | 27 ++++++++++++- absl/container/internal/raw_hash_set_test.cc | 58 ++++++++++++++++++++++++++++ absl/container/node_hash_map.h | 6 ++- absl/container/node_hash_set.h | 6 ++- 6 files changed, 103 insertions(+), 6 deletions(-) diff --git a/absl/container/flat_hash_map.h b/absl/container/flat_hash_map.h index e6bdbd9e..8f4d9939 100644 --- a/absl/container/flat_hash_map.h +++ b/absl/container/flat_hash_map.h @@ -235,7 +235,11 @@ class flat_hash_map : public absl::container_internal::raw_hash_map< // iterator erase(const_iterator first, const_iterator last): // // Erases the elements in the open interval [`first`, `last`), returning an - // iterator pointing to `last`. + // iterator pointing to `last`. The special case of calling + // `erase(begin(), end())` resets the reserved growth such that if + // `reserve(N)` has previously been called and there has been no intervening + // call to `clear()`, then after calling `erase(begin(), end())`, it is safe + // to assume that inserting N elements will not cause a rehash. // // size_type erase(const key_type& key): // diff --git a/absl/container/flat_hash_set.h b/absl/container/flat_hash_set.h index 17bbf1a4..c789c7ef 100644 --- a/absl/container/flat_hash_set.h +++ b/absl/container/flat_hash_set.h @@ -227,7 +227,11 @@ class flat_hash_set // iterator erase(const_iterator first, const_iterator last): // // Erases the elements in the open interval [`first`, `last`), returning an - // iterator pointing to `last`. + // iterator pointing to `last`. The special case of calling + // `erase(begin(), end())` resets the reserved growth such that if + // `reserve(N)` has previously been called and there has been no intervening + // call to `clear()`, then after calling `erase(begin(), end())`, it is safe + // to assume that inserting N elements will not cause a rehash. // // size_type erase(const key_type& key): // diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index f0e107be..6da43026 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -786,8 +786,11 @@ class CommonFieldsGenerationInfoEnabled { public: CommonFieldsGenerationInfoEnabled() = default; CommonFieldsGenerationInfoEnabled(CommonFieldsGenerationInfoEnabled&& that) - : reserved_growth_(that.reserved_growth_), generation_(that.generation_) { + : reserved_growth_(that.reserved_growth_), + reservation_size_(that.reservation_size_), + generation_(that.generation_) { that.reserved_growth_ = 0; + that.reservation_size_ = 0; that.generation_ = EmptyGeneration(); } CommonFieldsGenerationInfoEnabled& operator=( @@ -813,6 +816,8 @@ class CommonFieldsGenerationInfoEnabled { } size_t reserved_growth() const { return reserved_growth_; } void set_reserved_growth(size_t r) { reserved_growth_ = r; } + size_t reservation_size() const { return reservation_size_; } + void set_reservation_size(size_t r) { reservation_size_ = r; } GenerationType generation() const { return *generation_; } void set_generation(GenerationType g) { *generation_ = g; } GenerationType* generation_ptr() const { return generation_; } @@ -820,10 +825,14 @@ class CommonFieldsGenerationInfoEnabled { private: // The number of insertions remaining that are guaranteed to not rehash due to - // a prior call to reserve. Note: we store reserved growth rather than + // a prior call to reserve. Note: we store reserved growth in addition to // reservation size because calls to erase() decrease size_ but don't decrease // reserved growth. size_t reserved_growth_ = 0; + // The maximum argument to reserve() since the container was cleared. We need + // to keep track of this, in addition to reserved growth, because we reset + // reserved growth to this when erase(begin(), end()) is called. + size_t reservation_size_ = 0; // Pointer to the generation counter, which is used to validate iterators and // is stored in the backing array between the control bytes and the slots. // Note that we can't store the generation inside the container itself and @@ -851,6 +860,8 @@ class CommonFieldsGenerationInfoDisabled { void reset_reserved_growth(size_t, size_t) {} size_t reserved_growth() const { return 0; } void set_reserved_growth(size_t) {} + size_t reservation_size() const { return 0; } + void set_reservation_size(size_t) {} GenerationType generation() const { return 0; } void set_generation(GenerationType) {} GenerationType* generation_ptr() const { return nullptr; } @@ -971,6 +982,12 @@ class CommonFields : public CommonFieldsGenerationInfo { CommonFieldsGenerationInfo::reset_reserved_growth(reservation, size()); } + // Returns the number of control bytes set to kDeleted. For testing only. + size_t TombstonesCount() const { + return static_cast( + std::count(control(), control() + capacity(), ctrl_t::kDeleted)); + } + private: // TODO(b/259599413): Investigate removing some of these fields: // - control/slots can be derived from each other @@ -1913,6 +1930,7 @@ class raw_hash_set { ClearBackingArray(common(), GetPolicyFunctions(), /*reuse=*/cap < 128); } common().set_reserved_growth(0); + common().set_reservation_size(0); } inline void destroy_slots() { @@ -2168,8 +2186,12 @@ class raw_hash_set { // capacity() > 0 as a precondition. if (empty()) return end(); if (first == begin() && last == end()) { + // TODO(ezb): we access control bytes in destroy_slots so it could make + // sense to combine destroy_slots and ClearBackingArray to avoid cache + // misses when the table is large. Note that we also do this in clear(). destroy_slots(); ClearBackingArray(common(), GetPolicyFunctions(), /*reuse=*/true); + common().set_reserved_growth(common().reservation_size()); return end(); } while (first != last) { @@ -2258,6 +2280,7 @@ class raw_hash_set { infoz().RecordReservation(n); } common().reset_reserved_growth(n); + common().set_reservation_size(n); } // Extension API: support for heterogeneous keys. diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc index 66621cf0..f0947b97 100644 --- a/absl/container/internal/raw_hash_set_test.cc +++ b/absl/container/internal/raw_hash_set_test.cc @@ -60,6 +60,10 @@ struct RawHashSetTestOnlyAccess { static auto GetSlots(const C& c) -> decltype(c.slot_array()) { return c.slot_array(); } + template + static size_t CountTombstones(const C& c) { + return c.common().TombstonesCount(); + } }; namespace { @@ -472,6 +476,28 @@ struct MinimumAlignmentUint8Table using Base::Base; }; +// Allows for freezing the allocator to expect no further allocations. +template +struct FreezableAlloc : std::allocator { + explicit FreezableAlloc(bool* f) : frozen(f) {} + + template + explicit FreezableAlloc(const FreezableAlloc& other) + : frozen(other.frozen) {} + + template + struct rebind { + using other = FreezableAlloc; + }; + + T* allocate(size_t n) { + EXPECT_FALSE(*frozen); + return std::allocator::allocate(n); + } + + bool* frozen; +}; + struct BadFastHash { template size_t operator()(const T&) const { @@ -479,6 +505,13 @@ struct BadFastHash { } }; +struct BadHashFreezableIntTable + : raw_hash_set, + FreezableAlloc> { + using Base = typename BadHashFreezableIntTable::raw_hash_set; + using Base::Base; +}; + struct BadTable : raw_hash_set, std::allocator> { using Base = typename BadTable::raw_hash_set; @@ -512,6 +545,7 @@ TEST(Table, EmptyFunctorOptimization) { struct GenerationData { size_t reserved_growth; + size_t reservation_size; GenerationType* generation; }; @@ -2387,6 +2421,30 @@ TEST(Table, ReservedGrowthUpdatesWhenTableDoesntGrow) { EXPECT_EQ(*it, 0); } +TEST(Table, EraseBeginEndResetsReservedGrowth) { + bool frozen = false; + BadHashFreezableIntTable t{FreezableAlloc(&frozen)}; + t.reserve(100); + const size_t cap = t.capacity(); + frozen = true; // no further allocs allowed + + for (int i = 0; i < 10; ++i) { + // Create a long run (hash function returns constant). + for (int j = 0; j < 100; ++j) t.insert(j); + // Erase elements from the middle of the long run, which creates tombstones. + for (int j = 30; j < 60; ++j) t.erase(j); + EXPECT_EQ(t.size(), 70); + EXPECT_EQ(t.capacity(), cap); + ASSERT_EQ(RawHashSetTestOnlyAccess::CountTombstones(t), 30); + + t.erase(t.begin(), t.end()); + + EXPECT_EQ(t.size(), 0); + EXPECT_EQ(t.capacity(), cap); + ASSERT_EQ(RawHashSetTestOnlyAccess::CountTombstones(t), 0); + } +} + TEST(Table, GenerationInfoResetsOnClear) { if (!SwisstableGenerationsEnabled()) GTEST_SKIP() << "Generations disabled."; if (kMsvc) GTEST_SKIP() << "MSVC doesn't support | in regexp."; diff --git a/absl/container/node_hash_map.h b/absl/container/node_hash_map.h index dc8d7d9a..a396de2e 100644 --- a/absl/container/node_hash_map.h +++ b/absl/container/node_hash_map.h @@ -226,7 +226,11 @@ class node_hash_map // iterator erase(const_iterator first, const_iterator last): // // Erases the elements in the open interval [`first`, `last`), returning an - // iterator pointing to `last`. + // iterator pointing to `last`. The special case of calling + // `erase(begin(), end())` resets the reserved growth such that if + // `reserve(N)` has previously been called and there has been no intervening + // call to `clear()`, then after calling `erase(begin(), end())`, it is safe + // to assume that inserting N elements will not cause a rehash. // // size_type erase(const key_type& key): // diff --git a/absl/container/node_hash_set.h b/absl/container/node_hash_set.h index 905a93d8..421ff460 100644 --- a/absl/container/node_hash_set.h +++ b/absl/container/node_hash_set.h @@ -218,7 +218,11 @@ class node_hash_set // iterator erase(const_iterator first, const_iterator last): // // Erases the elements in the open interval [`first`, `last`), returning an - // iterator pointing to `last`. + // iterator pointing to `last`. The special case of calling + // `erase(begin(), end())` resets the reserved growth such that if + // `reserve(N)` has previously been called and there has been no intervening + // call to `clear()`, then after calling `erase(begin(), end())`, it is safe + // to assume that inserting N elements will not cause a rehash. // // size_type erase(const key_type& key): // -- cgit v1.2.3