summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Evan Brown <ezb@google.com>2023-07-26 10:33:14 -0700
committerGravatar Copybara-Service <copybara-worker@google.com>2023-07-26 10:34:46 -0700
commit7fc3c7fe7283a8b04fe0f79fe6180b6e688a565f (patch)
treee4c8a51e15a7d5d27597b3703f1ba5cb0dbfef52
parentc108cd0382a3659eaf2981b22392b4d5fbc122db (diff)
Change the API constraints of erase(const_iterator, const_iterator) so that calling erase(begin(), end()) resets reserved growth.
PiperOrigin-RevId: 551248712 Change-Id: I34755c63e3ee40da4ba7047e0d24eec567d28173
-rw-r--r--absl/container/flat_hash_map.h6
-rw-r--r--absl/container/flat_hash_set.h6
-rw-r--r--absl/container/internal/raw_hash_set.h27
-rw-r--r--absl/container/internal/raw_hash_set_test.cc58
-rw-r--r--absl/container/node_hash_map.h6
-rw-r--r--absl/container/node_hash_set.h6
6 files changed, 103 insertions, 6 deletions
diff --git a/absl/container/flat_hash_map.h b/absl/container/flat_hash_map.h
index e6bdbd9e..8f4d9939 100644
--- a/absl/container/flat_hash_map.h
+++ b/absl/container/flat_hash_map.h
@@ -235,7 +235,11 @@ class flat_hash_map : public absl::container_internal::raw_hash_map<
// iterator erase(const_iterator first, const_iterator last):
//
// Erases the elements in the open interval [`first`, `last`), returning an
- // iterator pointing to `last`.
+ // iterator pointing to `last`. The special case of calling
+ // `erase(begin(), end())` resets the reserved growth such that if
+ // `reserve(N)` has previously been called and there has been no intervening
+ // call to `clear()`, then after calling `erase(begin(), end())`, it is safe
+ // to assume that inserting N elements will not cause a rehash.
//
// size_type erase(const key_type& key):
//
diff --git a/absl/container/flat_hash_set.h b/absl/container/flat_hash_set.h
index 17bbf1a4..c789c7ef 100644
--- a/absl/container/flat_hash_set.h
+++ b/absl/container/flat_hash_set.h
@@ -227,7 +227,11 @@ class flat_hash_set
// iterator erase(const_iterator first, const_iterator last):
//
// Erases the elements in the open interval [`first`, `last`), returning an
- // iterator pointing to `last`.
+ // iterator pointing to `last`. The special case of calling
+ // `erase(begin(), end())` resets the reserved growth such that if
+ // `reserve(N)` has previously been called and there has been no intervening
+ // call to `clear()`, then after calling `erase(begin(), end())`, it is safe
+ // to assume that inserting N elements will not cause a rehash.
//
// size_type erase(const key_type& key):
//
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h
index f0e107be..6da43026 100644
--- a/absl/container/internal/raw_hash_set.h
+++ b/absl/container/internal/raw_hash_set.h
@@ -786,8 +786,11 @@ class CommonFieldsGenerationInfoEnabled {
public:
CommonFieldsGenerationInfoEnabled() = default;
CommonFieldsGenerationInfoEnabled(CommonFieldsGenerationInfoEnabled&& that)
- : reserved_growth_(that.reserved_growth_), generation_(that.generation_) {
+ : reserved_growth_(that.reserved_growth_),
+ reservation_size_(that.reservation_size_),
+ generation_(that.generation_) {
that.reserved_growth_ = 0;
+ that.reservation_size_ = 0;
that.generation_ = EmptyGeneration();
}
CommonFieldsGenerationInfoEnabled& operator=(
@@ -813,6 +816,8 @@ class CommonFieldsGenerationInfoEnabled {
}
size_t reserved_growth() const { return reserved_growth_; }
void set_reserved_growth(size_t r) { reserved_growth_ = r; }
+ size_t reservation_size() const { return reservation_size_; }
+ void set_reservation_size(size_t r) { reservation_size_ = r; }
GenerationType generation() const { return *generation_; }
void set_generation(GenerationType g) { *generation_ = g; }
GenerationType* generation_ptr() const { return generation_; }
@@ -820,10 +825,14 @@ class CommonFieldsGenerationInfoEnabled {
private:
// The number of insertions remaining that are guaranteed to not rehash due to
- // a prior call to reserve. Note: we store reserved growth rather than
+ // a prior call to reserve. Note: we store reserved growth in addition to
// reservation size because calls to erase() decrease size_ but don't decrease
// reserved growth.
size_t reserved_growth_ = 0;
+ // The maximum argument to reserve() since the container was cleared. We need
+ // to keep track of this, in addition to reserved growth, because we reset
+ // reserved growth to this when erase(begin(), end()) is called.
+ size_t reservation_size_ = 0;
// Pointer to the generation counter, which is used to validate iterators and
// is stored in the backing array between the control bytes and the slots.
// Note that we can't store the generation inside the container itself and
@@ -851,6 +860,8 @@ class CommonFieldsGenerationInfoDisabled {
void reset_reserved_growth(size_t, size_t) {}
size_t reserved_growth() const { return 0; }
void set_reserved_growth(size_t) {}
+ size_t reservation_size() const { return 0; }
+ void set_reservation_size(size_t) {}
GenerationType generation() const { return 0; }
void set_generation(GenerationType) {}
GenerationType* generation_ptr() const { return nullptr; }
@@ -971,6 +982,12 @@ class CommonFields : public CommonFieldsGenerationInfo {
CommonFieldsGenerationInfo::reset_reserved_growth(reservation, size());
}
+ // Returns the number of control bytes set to kDeleted. For testing only.
+ size_t TombstonesCount() const {
+ return static_cast<size_t>(
+ std::count(control(), control() + capacity(), ctrl_t::kDeleted));
+ }
+
private:
// TODO(b/259599413): Investigate removing some of these fields:
// - control/slots can be derived from each other
@@ -1913,6 +1930,7 @@ class raw_hash_set {
ClearBackingArray(common(), GetPolicyFunctions(), /*reuse=*/cap < 128);
}
common().set_reserved_growth(0);
+ common().set_reservation_size(0);
}
inline void destroy_slots() {
@@ -2168,8 +2186,12 @@ class raw_hash_set {
// capacity() > 0 as a precondition.
if (empty()) return end();
if (first == begin() && last == end()) {
+ // TODO(ezb): we access control bytes in destroy_slots so it could make
+ // sense to combine destroy_slots and ClearBackingArray to avoid cache
+ // misses when the table is large. Note that we also do this in clear().
destroy_slots();
ClearBackingArray(common(), GetPolicyFunctions(), /*reuse=*/true);
+ common().set_reserved_growth(common().reservation_size());
return end();
}
while (first != last) {
@@ -2258,6 +2280,7 @@ class raw_hash_set {
infoz().RecordReservation(n);
}
common().reset_reserved_growth(n);
+ common().set_reservation_size(n);
}
// Extension API: support for heterogeneous keys.
diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc
index 66621cf0..f0947b97 100644
--- a/absl/container/internal/raw_hash_set_test.cc
+++ b/absl/container/internal/raw_hash_set_test.cc
@@ -60,6 +60,10 @@ struct RawHashSetTestOnlyAccess {
static auto GetSlots(const C& c) -> decltype(c.slot_array()) {
return c.slot_array();
}
+ template <typename C>
+ static size_t CountTombstones(const C& c) {
+ return c.common().TombstonesCount();
+ }
};
namespace {
@@ -472,6 +476,28 @@ struct MinimumAlignmentUint8Table
using Base::Base;
};
+// Allows for freezing the allocator to expect no further allocations.
+template <typename T>
+struct FreezableAlloc : std::allocator<T> {
+ explicit FreezableAlloc(bool* f) : frozen(f) {}
+
+ template <typename U>
+ explicit FreezableAlloc(const FreezableAlloc<U>& other)
+ : frozen(other.frozen) {}
+
+ template <class U>
+ struct rebind {
+ using other = FreezableAlloc<U>;
+ };
+
+ T* allocate(size_t n) {
+ EXPECT_FALSE(*frozen);
+ return std::allocator<T>::allocate(n);
+ }
+
+ bool* frozen;
+};
+
struct BadFastHash {
template <class T>
size_t operator()(const T&) const {
@@ -479,6 +505,13 @@ struct BadFastHash {
}
};
+struct BadHashFreezableIntTable
+ : raw_hash_set<IntPolicy, BadFastHash, std::equal_to<int64_t>,
+ FreezableAlloc<int64_t>> {
+ using Base = typename BadHashFreezableIntTable::raw_hash_set;
+ using Base::Base;
+};
+
struct BadTable : raw_hash_set<IntPolicy, BadFastHash, std::equal_to<int>,
std::allocator<int>> {
using Base = typename BadTable::raw_hash_set;
@@ -512,6 +545,7 @@ TEST(Table, EmptyFunctorOptimization) {
struct GenerationData {
size_t reserved_growth;
+ size_t reservation_size;
GenerationType* generation;
};
@@ -2387,6 +2421,30 @@ TEST(Table, ReservedGrowthUpdatesWhenTableDoesntGrow) {
EXPECT_EQ(*it, 0);
}
+TEST(Table, EraseBeginEndResetsReservedGrowth) {
+ bool frozen = false;
+ BadHashFreezableIntTable t{FreezableAlloc<int64_t>(&frozen)};
+ t.reserve(100);
+ const size_t cap = t.capacity();
+ frozen = true; // no further allocs allowed
+
+ for (int i = 0; i < 10; ++i) {
+ // Create a long run (hash function returns constant).
+ for (int j = 0; j < 100; ++j) t.insert(j);
+ // Erase elements from the middle of the long run, which creates tombstones.
+ for (int j = 30; j < 60; ++j) t.erase(j);
+ EXPECT_EQ(t.size(), 70);
+ EXPECT_EQ(t.capacity(), cap);
+ ASSERT_EQ(RawHashSetTestOnlyAccess::CountTombstones(t), 30);
+
+ t.erase(t.begin(), t.end());
+
+ EXPECT_EQ(t.size(), 0);
+ EXPECT_EQ(t.capacity(), cap);
+ ASSERT_EQ(RawHashSetTestOnlyAccess::CountTombstones(t), 0);
+ }
+}
+
TEST(Table, GenerationInfoResetsOnClear) {
if (!SwisstableGenerationsEnabled()) GTEST_SKIP() << "Generations disabled.";
if (kMsvc) GTEST_SKIP() << "MSVC doesn't support | in regexp.";
diff --git a/absl/container/node_hash_map.h b/absl/container/node_hash_map.h
index dc8d7d9a..a396de2e 100644
--- a/absl/container/node_hash_map.h
+++ b/absl/container/node_hash_map.h
@@ -226,7 +226,11 @@ class node_hash_map
// iterator erase(const_iterator first, const_iterator last):
//
// Erases the elements in the open interval [`first`, `last`), returning an
- // iterator pointing to `last`.
+ // iterator pointing to `last`. The special case of calling
+ // `erase(begin(), end())` resets the reserved growth such that if
+ // `reserve(N)` has previously been called and there has been no intervening
+ // call to `clear()`, then after calling `erase(begin(), end())`, it is safe
+ // to assume that inserting N elements will not cause a rehash.
//
// size_type erase(const key_type& key):
//
diff --git a/absl/container/node_hash_set.h b/absl/container/node_hash_set.h
index 905a93d8..421ff460 100644
--- a/absl/container/node_hash_set.h
+++ b/absl/container/node_hash_set.h
@@ -218,7 +218,11 @@ class node_hash_set
// iterator erase(const_iterator first, const_iterator last):
//
// Erases the elements in the open interval [`first`, `last`), returning an
- // iterator pointing to `last`.
+ // iterator pointing to `last`. The special case of calling
+ // `erase(begin(), end())` resets the reserved growth such that if
+ // `reserve(N)` has previously been called and there has been no intervening
+ // call to `clear()`, then after calling `erase(begin(), end())`, it is safe
+ // to assume that inserting N elements will not cause a rehash.
//
// size_type erase(const key_type& key):
//