From 5be22f98733c674d532598454ae729253bc53e82 Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Mon, 17 Jul 2023 14:06:54 -0700 Subject: Move growth_left to the backing array. PiperOrigin-RevId: 548794485 Change-Id: Ie82d5f8ad752518ef05b38144ca1e32b21c9def8 --- absl/container/internal/raw_hash_set.cc | 19 +++-- absl/container/internal/raw_hash_set.h | 107 +++++++++++++++++---------- absl/container/internal/raw_hash_set_test.cc | 48 +++++++++++- 3 files changed, 125 insertions(+), 49 deletions(-) (limited to 'absl/container') diff --git a/absl/container/internal/raw_hash_set.cc b/absl/container/internal/raw_hash_set.cc index a40e38db..1389e6e1 100644 --- a/absl/container/internal/raw_hash_set.cc +++ b/absl/container/internal/raw_hash_set.cc @@ -19,6 +19,7 @@ #include #include +#include "absl/base/attributes.h" #include "absl/base/config.h" #include "absl/base/dynamic_annotations.h" #include "absl/hash/hash.h" @@ -27,9 +28,17 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace container_internal { -// A single block of empty control bytes for tables without any slots allocated. -// This enables removing a branch in the hot path of find(). -alignas(16) ABSL_CONST_INIT ABSL_DLL const ctrl_t kEmptyGroup[16] = { +// We have space for `growth_left` before a single block of control bytes. A +// single block of empty control bytes for tables without any slots allocated. +// This enables removing a branch in the hot path of find(). In order to ensure +// that the control bytes are aligned to 16, we have 16 bytes before the control +// bytes even though growth_left only needs 8. +constexpr ctrl_t ZeroCtrlT() { return static_cast(0); } +alignas(16) ABSL_CONST_INIT ABSL_DLL const ctrl_t kEmptyGroup[32] = { + ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), + ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), + ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), + ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ctrl_t::kSentinel, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, @@ -239,12 +248,12 @@ void ClearBackingArray(CommonFields& c, const PolicyFunctions& policy, c.infoz().RecordStorageChanged(0, c.capacity()); } else { void* set = &c; - (*policy.dealloc)(set, policy, c.control(), c.slots_ptr(), c.capacity()); + (*policy.dealloc)(set, policy, c.backing_array_start(), c.slots_ptr(), + c.capacity()); c.set_control(EmptyGroup()); c.set_generation_ptr(EmptyGeneration()); c.set_slots(nullptr); c.set_capacity(0); - c.set_growth_left(0); c.infoz().RecordClearedReservation(); assert(c.size() == 0); c.infoz().RecordStorageChanged(0, 0); diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index c2fe242d..e99dc00c 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -62,6 +62,8 @@ // pseudo-struct: // // struct BackingArray { +// // The number of elements we can insert before growing the capacity. +// size_t growth_left; // // Control bytes for the "real" slots. // ctrl_t ctrl[capacity]; // // Always `ctrl_t::kSentinel`. This is used by iterators to find when to @@ -174,6 +176,7 @@ #include #include +#include #include #include #include @@ -484,13 +487,14 @@ static_assert(ctrl_t::kDeleted == static_cast(-2), "ctrl_t::kDeleted must be -2 to make the implementation of " "ConvertSpecialToEmptyAndFullToDeleted efficient"); -ABSL_DLL extern const ctrl_t kEmptyGroup[16]; +// See definition comment for why this is size 32. +ABSL_DLL extern const ctrl_t kEmptyGroup[32]; // Returns a pointer to a control byte group that can be used by empty tables. inline ctrl_t* EmptyGroup() { // Const must be cast away here; no uses of this function will actually write // to it, because it is only used for empty tables. - return const_cast(kEmptyGroup); + return const_cast(kEmptyGroup + 16); } // Returns a pointer to a generation to use for an empty hashtable. @@ -913,24 +917,32 @@ class CommonFields : public CommonFieldsGenerationInfo { // fields generates less code then calling absl::exchange per field. control_(that.control()), slots_(that.slots_ptr()), - size_(that.size()), capacity_(that.capacity()), - compressed_tuple_(that.growth_left(), std::move(that.infoz())) { + compressed_tuple_(that.size(), std::move(that.infoz())) { that.set_control(EmptyGroup()); that.set_slots(nullptr); - that.set_size(0); that.set_capacity(0); - that.set_growth_left(0); + that.set_size(0); } CommonFields& operator=(CommonFields&&) = default; ctrl_t* control() const { return control_; } void set_control(ctrl_t* c) { control_ = c; } + void* backing_array_start() const { + // growth_left is stored before control bytes. + assert(reinterpret_cast(control()) % alignof(size_t) == 0); + return control() - sizeof(size_t); + } + // Note: we can't use slots() because Qt defines "slots" as a macro. void* slots_ptr() const { return slots_; } void set_slots(void* s) { slots_ = s; } - size_t size() const { return size_; } - void set_size(size_t s) { size_ = s; } + + // The number of filled slots. + size_t size() const { return compressed_tuple_.template get<0>(); } + void set_size(size_t s) { compressed_tuple_.template get<0>() = s; } + + // The total number of available slots. size_t capacity() const { return capacity_; } void set_capacity(size_t c) { assert(c == 0 || IsValidCapacity(c)); @@ -938,8 +950,13 @@ class CommonFields : public CommonFieldsGenerationInfo { } // The number of slots we can still fill without needing to rehash. - size_t growth_left() const { return compressed_tuple_.template get<0>(); } - void set_growth_left(size_t gl) { compressed_tuple_.template get<0>() = gl; } + // This is stored in the heap allocation before the control bytes. + size_t growth_left() const { + return *reinterpret_cast(backing_array_start()); + } + void set_growth_left(size_t gl) { + *reinterpret_cast(backing_array_start()) = gl; + } HashtablezInfoHandle& infoz() { return compressed_tuple_.template get<1>(); } const HashtablezInfoHandle& infoz() const { @@ -951,32 +968,30 @@ class CommonFields : public CommonFieldsGenerationInfo { should_rehash_for_bug_detection_on_insert(control(), capacity()); } void reset_reserved_growth(size_t reservation) { - CommonFieldsGenerationInfo::reset_reserved_growth(reservation, size_); + CommonFieldsGenerationInfo::reset_reserved_growth(reservation, size()); } private: // TODO(b/259599413): Investigate removing some of these fields: // - control/slots can be derived from each other - // - growth_left can be moved into the slot array // - we can use 6 bits for capacity since it's always a power of two minus 1 - // The control bytes (and, also, a pointer to the base of the backing array). + // The control bytes (and, also, a pointer near to the base of the backing + // array). // // This contains `capacity + 1 + NumClonedBytes()` entries, even // when the table is empty (hence EmptyGroup). + // + // Note that growth_left is stored immediately before this pointer. ctrl_t* control_ = EmptyGroup(); // The beginning of the slots, located at `SlotOffset()` bytes after // `control`. May be null for empty tables. void* slots_ = nullptr; - // The number of filled slots. - size_t size_ = 0; - - // The total number of available slots. size_t capacity_ = 0; - // Bundle together growth_left and HashtablezInfoHandle to ensure EBO for + // Bundle together size and HashtablezInfoHandle to ensure EBO for // HashtablezInfoHandle when sampling is turned off. absl::container_internal::CompressedTuple compressed_tuple_{0u, HashtablezInfoHandle{}}; @@ -1318,20 +1333,28 @@ inline void SetCtrl(const CommonFields& common, size_t i, h2_t h, SetCtrl(common, i, static_cast(h), slot_size); } +// growth_left (which is a size_t) is stored with the backing array. +constexpr size_t BackingArrayAlignment(size_t align_of_slot) { + return (std::max)(align_of_slot, alignof(size_t)); +} + +// Computes the offset from the start of the backing allocation of the control +// bytes. growth_left is stored at the beginning of the backing array. +inline size_t ControlOffset() { return sizeof(size_t); } + // Given the capacity of a table, computes the offset (from the start of the // backing allocation) of the generation counter (if it exists). inline size_t GenerationOffset(size_t capacity) { assert(IsValidCapacity(capacity)); const size_t num_control_bytes = capacity + 1 + NumClonedBytes(); - return num_control_bytes; + return ControlOffset() + num_control_bytes; } // Given the capacity of a table, computes the offset (from the start of the // backing allocation) at which the slots begin. inline size_t SlotOffset(size_t capacity, size_t slot_align) { assert(IsValidCapacity(capacity)); - const size_t num_control_bytes = capacity + 1 + NumClonedBytes(); - return (num_control_bytes + NumGenerationBytes() + slot_align - 1) & + return (GenerationOffset(capacity) + NumGenerationBytes() + slot_align - 1) & (~slot_align + 1); } @@ -1356,13 +1379,15 @@ ABSL_ATTRIBUTE_NOINLINE void InitializeSlots(CommonFields& c, Alloc alloc) { : 0; const size_t cap = c.capacity(); + const size_t alloc_size = AllocSize(cap, SizeOfSlot, AlignOfSlot); + // growth_left (which is a size_t) is stored with the backing array. char* mem = static_cast( - Allocate(&alloc, AllocSize(cap, SizeOfSlot, AlignOfSlot))); + Allocate(&alloc, alloc_size)); const GenerationType old_generation = c.generation(); c.set_generation_ptr( reinterpret_cast(mem + GenerationOffset(cap))); c.set_generation(NextGeneration(old_generation)); - c.set_control(reinterpret_cast(mem)); + c.set_control(reinterpret_cast(mem + ControlOffset())); c.set_slots(mem + SlotOffset(cap, AlignOfSlot)); ResetCtrl(c, SizeOfSlot); if (sample_size) { @@ -1385,8 +1410,8 @@ struct PolicyFunctions { void (*transfer)(void* set, void* dst_slot, void* src_slot); // Deallocate the specified backing store which is sized for n slots. - void (*dealloc)(void* set, const PolicyFunctions& policy, ctrl_t* ctrl, - void* slot_array, size_t n); + void (*dealloc)(void* set, const PolicyFunctions& policy, + void* backing_array_start, void* slot_array, size_t n); }; // ClearBackingArray clears the backing array, either modifying it in place, @@ -1405,14 +1430,14 @@ void EraseMetaOnly(CommonFields& c, ctrl_t* it, size_t slot_size); template ABSL_ATTRIBUTE_NOINLINE void DeallocateStandard(void*, const PolicyFunctions& policy, - ctrl_t* ctrl, void* slot_array, - size_t n) { + void* backing_array_start, + void* slot_array, size_t n) { // Unpoison before returning the memory to the allocator. SanitizerUnpoisonMemoryRegion(slot_array, policy.slot_size * n); std::allocator alloc; - Deallocate(&alloc, ctrl, - AllocSize(n, policy.slot_size, AlignOfSlot)); + Deallocate( + &alloc, backing_array_start, AllocSize(n, policy.slot_size, AlignOfSlot)); } // For trivially relocatable types we use memcpy directly. This allows us to @@ -1773,7 +1798,9 @@ class raw_hash_set { raw_hash_set(const raw_hash_set& that, const allocator_type& a) : raw_hash_set(0, that.hash_ref(), that.eq_ref(), a) { - reserve(that.size()); + const size_t size = that.size(); + if (size == 0) return; + reserve(size); // Because the table is guaranteed to be empty, we can do something faster // than a full `insert`. for (const auto& v : that) { @@ -1784,8 +1811,8 @@ class raw_hash_set { common().maybe_increment_generation_on_insert(); infoz().RecordInsert(hash, target.probe_length); } - common().set_size(that.size()); - set_growth_left(growth_left() - that.size()); + common().set_size(size); + set_growth_left(growth_left() - size); } ABSL_ATTRIBUTE_NOINLINE raw_hash_set(raw_hash_set&& that) noexcept( @@ -1838,8 +1865,8 @@ class raw_hash_set { // Unpoison before returning the memory to the allocator. SanitizerUnpoisonMemoryRegion(slot_array(), sizeof(slot_type) * cap); - Deallocate( - &alloc_ref(), control(), + Deallocate( + &alloc_ref(), common().backing_array_start(), AllocSize(cap, sizeof(slot_type), alignof(slot_type))); infoz().Unregister(); @@ -2470,8 +2497,8 @@ class raw_hash_set { if (old_capacity) { SanitizerUnpoisonMemoryRegion(old_slots, sizeof(slot_type) * old_capacity); - Deallocate( - &alloc_ref(), old_ctrl, + Deallocate( + &alloc_ref(), old_ctrl - ControlOffset(), AllocSize(old_capacity, sizeof(slot_type), alignof(slot_type))); } infoz().RecordRehash(total_probe_length); @@ -2707,15 +2734,15 @@ class raw_hash_set { static_cast(src)); } // Note: dealloc_fn will only be used if we have a non-standard allocator. - static void dealloc_fn(void* set, const PolicyFunctions&, ctrl_t* ctrl, - void* slot_mem, size_t n) { + static void dealloc_fn(void* set, const PolicyFunctions&, + void* backing_array_start, void* slot_mem, size_t n) { auto* h = static_cast(set); // Unpoison before returning the memory to the allocator. SanitizerUnpoisonMemoryRegion(slot_mem, sizeof(slot_type) * n); - Deallocate( - &h->alloc_ref(), ctrl, + Deallocate( + &h->alloc_ref(), backing_array_start, AllocSize(n, sizeof(slot_type), alignof(slot_type))); } diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc index ce1070ba..6cbf6dea 100644 --- a/absl/container/internal/raw_hash_set_test.cc +++ b/absl/container/internal/raw_hash_set_test.cc @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -436,6 +437,41 @@ struct CustomAllocIntTable using Base::Base; }; +// Tries to allocate memory at the minimum alignment even when the default +// allocator uses a higher alignment. +template +struct MinimumAlignmentAlloc : std::allocator { + MinimumAlignmentAlloc() = default; + + template + explicit MinimumAlignmentAlloc(const MinimumAlignmentAlloc& /*other*/) {} + + template + struct rebind { + using other = MinimumAlignmentAlloc; + }; + + T* allocate(size_t n) { + T* ptr = std::allocator::allocate(n + 1); + char* cptr = reinterpret_cast(ptr); + cptr += alignof(T); + return reinterpret_cast(cptr); + } + + void deallocate(T* ptr, size_t n) { + char* cptr = reinterpret_cast(ptr); + cptr -= alignof(T); + std::allocator::deallocate(reinterpret_cast(cptr), n + 1); + } +}; + +struct MinimumAlignmentUint8Table + : raw_hash_set, + std::equal_to, MinimumAlignmentAlloc> { + using Base = typename MinimumAlignmentUint8Table::raw_hash_set; + using Base::Base; +}; + struct BadFastHash { template size_t operator()(const T&) const { @@ -460,14 +496,12 @@ TEST(Table, EmptyFunctorOptimization) { void* slots; size_t size; size_t capacity; - size_t growth_left; }; struct MockTableInfozDisabled { void* ctrl; void* slots; size_t size; size_t capacity; - size_t growth_left; }; struct StatelessHash { size_t operator()(absl::string_view) const { return 0; } @@ -2247,11 +2281,17 @@ TEST(Sanitizer, PoisoningOnErase) { } #endif // ABSL_HAVE_ADDRESS_SANITIZER -TEST(Table, AlignOne) { +template +class AlignOneTest : public ::testing::Test {}; +using AlignOneTestTypes = + ::testing::Types; +TYPED_TEST_SUITE(AlignOneTest, AlignOneTestTypes); + +TYPED_TEST(AlignOneTest, AlignOne) { // We previously had a bug in which we were copying a control byte over the // first slot when alignof(value_type) is 1. We test repeated // insertions/erases and verify that the behavior is correct. - Uint8Table t; + TypeParam t; std::unordered_set verifier; // NOLINT // Do repeated insertions/erases from the table. -- cgit v1.2.3