diff options
author | Vitaly Goldshteyn <goldvitaly@google.com> | 2024-03-26 21:54:27 -0700 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2024-03-26 21:55:20 -0700 |
commit | b70ad841380f60e8e825019bdc1e63f7c071843f (patch) | |
tree | a5f0dcf0b8951912d4a5ee30f30fdcba84257475 /absl/container | |
parent | 1ccc2eb35ed685a5640cb80a26be4df535b9c7b9 (diff) |
Introduce GrowthInfo with tests, but without usage.
The motivation is to use presence of kDeleted slots for optimizing InsertMiss for tables without kDeleted slots.
PiperOrigin-RevId: 619411282
Change-Id: Icc30606374aba7ce60b41f93ee8d44894e1b8aa5
Diffstat (limited to 'absl/container')
-rw-r--r-- | absl/container/internal/raw_hash_set.h | 82 | ||||
-rw-r--r-- | absl/container/internal/raw_hash_set_test.cc | 72 |
2 files changed, 154 insertions, 0 deletions
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index af75e7fb..7db1c304 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -1054,6 +1054,88 @@ using CommonFieldsGenerationInfo = CommonFieldsGenerationInfoDisabled; using HashSetIteratorGenerationInfo = HashSetIteratorGenerationInfoDisabled; #endif +// Stored the information regarding number of slots we can still fill +// without needing to rehash. +// +// We want to ensure sufficient number of kEmpty slots in the table in order +// to keep probe sequences relatively short. kEmpty slot in the probe group +// is required to stop probing. +// +// Tombstones (kDeleted slots) are not included in the growth capacity, +// because we'd like to rehash when the table is filled with tombstones and/or +// full slots. +// +// GrowthInfo also stores a bit that encodes whether table may have any +// kDeleted slots. +// Most of the tables (>95%) have no kDeleted slots, so some functions can +// be more efficient with this information. +// +// Callers can also force a rehash via the standard `rehash(0)`, +// which will recompute this value as a side-effect. +// +// See also `CapacityToGrowth()`. +class GrowthInfo { + public: + // Leaves data member uninitialized. + GrowthInfo() = default; + + // Initializes the GrowthInfo assuming we can grow `growth_left` elements + // and there are no kDeleted slots in the table. + void InitGrowthLeftNoDeleted(size_t growth_left) { + growth_left_info_ = growth_left; + } + + // Overwrites single full slot with an empty slot. + void OverwriteFullAsEmpty() { ++growth_left_info_; } + + // Overwrites single empty slot with a full slot. + void OverwriteEmptyAsFull() { + assert(GetGrowthLeft() > 0); + --growth_left_info_; + } + + // Overwrites several empty slots with full slots. + void OverwriteManyEmptyAsFull(size_t cnt) { + assert(GetGrowthLeft() >= cnt); + growth_left_info_ -= cnt; + } + + // Overwrites specified control element with full slot. + void OverwriteControlAsFull(ctrl_t ctrl) { + assert(GetGrowthLeft() >= static_cast<size_t>(IsEmpty(ctrl))); + growth_left_info_ -= static_cast<size_t>(IsEmpty(ctrl)); + } + + // Overwrites single full slot with a deleted slot. + void OverwriteFullAsDeleted() { growth_left_info_ |= kDeletedBit; } + + // Returns true if table satisfies two properties: + // 1. Guaranteed to have no kDeleted slots. + // 2. There is a place for at least one element to grow. + bool HasNoDeletedAndGrowthLeft() const { + return static_cast<std::make_signed_t<size_t>>(growth_left_info_) > 0; + } + + // Returns true if table guaranteed to have no k + bool HasNoDeleted() const { + return static_cast<std::make_signed_t<size_t>>(growth_left_info_) >= 0; + } + + // Returns the number of elements left to grow. + size_t GetGrowthLeft() const { + return growth_left_info_ & kGrowthLeftMask; + } + + private: + static constexpr size_t kGrowthLeftMask = ((~size_t{}) >> 1); + static constexpr size_t kDeletedBit = ~kGrowthLeftMask; + // Topmost bit signal whenever there are deleted slots. + size_t growth_left_info_; +}; + +static_assert(sizeof(GrowthInfo) == sizeof(size_t), ""); +static_assert(alignof(GrowthInfo) == alignof(size_t), ""); + // Returns whether `n` is a valid capacity (i.e., number of slots). // // A valid capacity is a non-zero integer `2^m - 1`. diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc index ca6656c8..876894f4 100644 --- a/absl/container/internal/raw_hash_set_test.cc +++ b/absl/container/internal/raw_hash_set_test.cc @@ -92,6 +92,78 @@ using ::testing::UnorderedElementsAre; // Convenience function to static cast to ctrl_t. ctrl_t CtrlT(int i) { return static_cast<ctrl_t>(i); } +TEST(GrowthInfoTest, GetGrowthLeft) { + GrowthInfo gi; + gi.InitGrowthLeftNoDeleted(5); + EXPECT_EQ(gi.GetGrowthLeft(), 5); + gi.OverwriteFullAsDeleted(); + EXPECT_EQ(gi.GetGrowthLeft(), 5); +} + +TEST(GrowthInfoTest, HasNoDeleted) { + GrowthInfo gi; + gi.InitGrowthLeftNoDeleted(5); + EXPECT_TRUE(gi.HasNoDeleted()); + gi.OverwriteFullAsDeleted(); + EXPECT_FALSE(gi.HasNoDeleted()); + // After reinitialization we have no deleted slots. + gi.InitGrowthLeftNoDeleted(5); + EXPECT_TRUE(gi.HasNoDeleted()); +} + +TEST(GrowthInfoTest, HasNoDeletedAndGrowthLeft) { + GrowthInfo gi; + gi.InitGrowthLeftNoDeleted(5); + EXPECT_TRUE(gi.HasNoDeletedAndGrowthLeft()); + gi.OverwriteFullAsDeleted(); + EXPECT_FALSE(gi.HasNoDeletedAndGrowthLeft()); + gi.InitGrowthLeftNoDeleted(0); + EXPECT_FALSE(gi.HasNoDeletedAndGrowthLeft()); + gi.OverwriteFullAsDeleted(); + EXPECT_FALSE(gi.HasNoDeletedAndGrowthLeft()); + // After reinitialization we have no deleted slots. + gi.InitGrowthLeftNoDeleted(5); + EXPECT_TRUE(gi.HasNoDeletedAndGrowthLeft()); +} + +TEST(GrowthInfoTest, OverwriteFullAsEmpty) { + GrowthInfo gi; + gi.InitGrowthLeftNoDeleted(5); + gi.OverwriteFullAsEmpty(); + EXPECT_EQ(gi.GetGrowthLeft(), 6); + gi.OverwriteFullAsDeleted(); + EXPECT_EQ(gi.GetGrowthLeft(), 6); + gi.OverwriteFullAsEmpty(); + EXPECT_EQ(gi.GetGrowthLeft(), 7); + EXPECT_FALSE(gi.HasNoDeleted()); +} + +TEST(GrowthInfoTest, OverwriteEmptyAsFull) { + GrowthInfo gi; + gi.InitGrowthLeftNoDeleted(5); + gi.OverwriteEmptyAsFull(); + EXPECT_EQ(gi.GetGrowthLeft(), 4); + gi.OverwriteFullAsDeleted(); + EXPECT_EQ(gi.GetGrowthLeft(), 4); + gi.OverwriteEmptyAsFull(); + EXPECT_EQ(gi.GetGrowthLeft(), 3); + EXPECT_FALSE(gi.HasNoDeleted()); +} + +TEST(GrowthInfoTest, OverwriteControlAsFull) { + GrowthInfo gi; + gi.InitGrowthLeftNoDeleted(5); + gi.OverwriteControlAsFull(ctrl_t::kEmpty); + EXPECT_EQ(gi.GetGrowthLeft(), 4); + gi.OverwriteControlAsFull(ctrl_t::kDeleted); + EXPECT_EQ(gi.GetGrowthLeft(), 4); + gi.OverwriteFullAsDeleted(); + gi.OverwriteControlAsFull(ctrl_t::kDeleted); + // We do not count number of deleted, so the bit sticks till the next rehash. + EXPECT_FALSE(gi.HasNoDeletedAndGrowthLeft()); + EXPECT_FALSE(gi.HasNoDeleted()); +} + TEST(Util, NormalizeCapacity) { EXPECT_EQ(1, NormalizeCapacity(0)); EXPECT_EQ(1, NormalizeCapacity(1)); |