diff options
author | Chris Kennelly <ckennelly@google.com> | 2024-03-25 13:16:00 -0700 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2024-03-25 13:16:58 -0700 |
commit | 48abb9fe0eeaf0149f0351acb00201f07e79f293 (patch) | |
tree | 144d19ed2fd646f882e8c967a5f006cda19260b2 | |
parent | 06e119066162d7dfa21c4481446e580772ff51f1 (diff) |
Record sizeof(key_type), sizeof(value_type) in hashtable profiles.
This can identify situations where flat_hash_* is suboptimal for large
elements.
PiperOrigin-RevId: 618937993
Change-Id: I2bde069bc3526b14ad1718ba6f50467002aeed16
-rw-r--r-- | absl/container/internal/hashtablez_sampler.cc | 14 | ||||
-rw-r--r-- | absl/container/internal/hashtablez_sampler.h | 13 | ||||
-rw-r--r-- | absl/container/internal/hashtablez_sampler_test.cc | 89 | ||||
-rw-r--r-- | absl/container/internal/raw_hash_set.h | 22 | ||||
-rw-r--r-- | absl/container/internal/raw_hash_set_test.cc | 2 |
5 files changed, 117 insertions, 23 deletions
diff --git a/absl/container/internal/hashtablez_sampler.cc b/absl/container/internal/hashtablez_sampler.cc index e17ba14d..fd21d966 100644 --- a/absl/container/internal/hashtablez_sampler.cc +++ b/absl/container/internal/hashtablez_sampler.cc @@ -79,6 +79,8 @@ HashtablezInfo::~HashtablezInfo() = default; void HashtablezInfo::PrepareForSampling(int64_t stride, size_t inline_element_size_value, + size_t key_size_value, + size_t value_size_value, uint16_t soo_capacity_value) { capacity.store(0, std::memory_order_relaxed); size.store(0, std::memory_order_relaxed); @@ -99,6 +101,8 @@ void HashtablezInfo::PrepareForSampling(int64_t stride, depth = absl::GetStackTrace(stack, HashtablezInfo::kMaxStackDepth, /* skip_count= */ 0); inline_element_size = inline_element_size_value; + key_size = key_size_value; + value_size = value_size_value; soo_capacity = soo_capacity_value; } @@ -123,12 +127,13 @@ static bool ShouldForceSampling() { } HashtablezInfo* SampleSlow(SamplingState& next_sample, - size_t inline_element_size, uint16_t soo_capacity) { + size_t inline_element_size, size_t key_size, + size_t value_size, uint16_t soo_capacity) { if (ABSL_PREDICT_FALSE(ShouldForceSampling())) { next_sample.next_sample = 1; const int64_t old_stride = exchange(next_sample.sample_stride, 1); HashtablezInfo* result = GlobalHashtablezSampler().Register( - old_stride, inline_element_size, soo_capacity); + old_stride, inline_element_size, key_size, value_size, soo_capacity); return result; } @@ -158,11 +163,12 @@ HashtablezInfo* SampleSlow(SamplingState& next_sample, // that case. if (first) { if (ABSL_PREDICT_TRUE(--next_sample.next_sample > 0)) return nullptr; - return SampleSlow(next_sample, inline_element_size, soo_capacity); + return SampleSlow(next_sample, inline_element_size, key_size, value_size, + soo_capacity); } return GlobalHashtablezSampler().Register(old_stride, inline_element_size, - soo_capacity); + key_size, value_size, soo_capacity); #endif } diff --git a/absl/container/internal/hashtablez_sampler.h b/absl/container/internal/hashtablez_sampler.h index 4e11968d..d74acf8c 100644 --- a/absl/container/internal/hashtablez_sampler.h +++ b/absl/container/internal/hashtablez_sampler.h @@ -73,6 +73,7 @@ struct HashtablezInfo : public profiling_internal::Sample<HashtablezInfo> { // Puts the object into a clean state, fills in the logically `const` members, // blocking for any readers that are currently sampling the object. void PrepareForSampling(int64_t stride, size_t inline_element_size_value, + size_t key_size, size_t value_size, uint16_t soo_capacity_value) ABSL_EXCLUSIVE_LOCKS_REQUIRED(init_mu); @@ -104,6 +105,8 @@ struct HashtablezInfo : public profiling_internal::Sample<HashtablezInfo> { uint16_t soo_capacity; void* stack[kMaxStackDepth]; size_t inline_element_size; // How big is the slot in bytes? + size_t key_size; // sizeof(key_type) + size_t value_size; // sizeof(value_type) }; void RecordRehashSlow(HashtablezInfo* info, size_t total_probe_length); @@ -128,7 +131,8 @@ struct SamplingState { }; HashtablezInfo* SampleSlow(SamplingState& next_sample, - size_t inline_element_size, uint16_t soo_capacity); + size_t inline_element_size, size_t key_size, + size_t value_size, uint16_t soo_capacity); void UnsampleSlow(HashtablezInfo* info); #if defined(ABSL_INTERNAL_HASHTABLEZ_SAMPLE) @@ -218,13 +222,16 @@ extern ABSL_PER_THREAD_TLS_KEYWORD SamplingState global_next_sample; // Returns a sampling handle. inline HashtablezInfoHandle Sample( ABSL_ATTRIBUTE_UNUSED size_t inline_element_size, + ABSL_ATTRIBUTE_UNUSED size_t key_size, + ABSL_ATTRIBUTE_UNUSED size_t value_size, ABSL_ATTRIBUTE_UNUSED uint16_t soo_capacity) { #if defined(ABSL_INTERNAL_HASHTABLEZ_SAMPLE) if (ABSL_PREDICT_TRUE(--global_next_sample.next_sample > 0)) { return HashtablezInfoHandle(nullptr); } - return HashtablezInfoHandle( - SampleSlow(global_next_sample, inline_element_size, soo_capacity)); + return HashtablezInfoHandle(SampleSlow(global_next_sample, + inline_element_size, key_size, + value_size, soo_capacity)); #else return HashtablezInfoHandle(nullptr); #endif // !ABSL_PER_THREAD_TLS diff --git a/absl/container/internal/hashtablez_sampler_test.cc b/absl/container/internal/hashtablez_sampler_test.cc index a80f530d..24d3bc48 100644 --- a/absl/container/internal/hashtablez_sampler_test.cc +++ b/absl/container/internal/hashtablez_sampler_test.cc @@ -71,7 +71,11 @@ std::vector<size_t> GetSizes(HashtablezSampler* s) { HashtablezInfo* Register(HashtablezSampler* s, size_t size) { const int64_t test_stride = 123; const size_t test_element_size = 17; - auto* info = s->Register(test_stride, test_element_size, /*soo_capacity=*/0); + const size_t test_key_size = 3; + const size_t test_value_size = 5; + auto* info = + s->Register(test_stride, test_element_size, /*key_size=*/test_key_size, + /*value_size=*/test_value_size, /*soo_capacity=*/0); assert(info != nullptr); info->size.store(size); return info; @@ -81,9 +85,14 @@ TEST(HashtablezInfoTest, PrepareForSampling) { absl::Time test_start = absl::Now(); const int64_t test_stride = 123; const size_t test_element_size = 17; + const size_t test_key_size = 15; + const size_t test_value_size = 13; + HashtablezInfo info; absl::MutexLock l(&info.init_mu); info.PrepareForSampling(test_stride, test_element_size, + /*key_size=*/test_key_size, + /*value_size=*/test_value_size, /*soo_capacity_value=*/1); EXPECT_EQ(info.capacity.load(), 0); @@ -99,6 +108,8 @@ TEST(HashtablezInfoTest, PrepareForSampling) { EXPECT_GE(info.create_time, test_start); EXPECT_EQ(info.weight, test_stride); EXPECT_EQ(info.inline_element_size, test_element_size); + EXPECT_EQ(info.key_size, test_key_size); + EXPECT_EQ(info.value_size, test_value_size); EXPECT_EQ(info.soo_capacity, 1); info.capacity.store(1, std::memory_order_relaxed); @@ -113,6 +124,8 @@ TEST(HashtablezInfoTest, PrepareForSampling) { info.create_time = test_start - absl::Hours(20); info.PrepareForSampling(test_stride * 2, test_element_size, + /*key_size=*/test_key_size, + /*value_size=*/test_value_size, /*soo_capacity_value=*/0); EXPECT_EQ(info.capacity.load(), 0); EXPECT_EQ(info.size.load(), 0); @@ -126,6 +139,8 @@ TEST(HashtablezInfoTest, PrepareForSampling) { EXPECT_EQ(info.max_reserve.load(), 0); EXPECT_EQ(info.weight, 2 * test_stride); EXPECT_EQ(info.inline_element_size, test_element_size); + EXPECT_EQ(info.key_size, test_key_size); + EXPECT_EQ(info.value_size, test_value_size); EXPECT_GE(info.create_time, test_start); EXPECT_EQ(info.soo_capacity, 0); } @@ -135,7 +150,12 @@ TEST(HashtablezInfoTest, RecordStorageChanged) { absl::MutexLock l(&info.init_mu); const int64_t test_stride = 21; const size_t test_element_size = 19; + const size_t test_key_size = 17; + const size_t test_value_size = 15; + info.PrepareForSampling(test_stride, test_element_size, + /*key_size=*/test_key_size, + /*value_size=*/test_value_size, /*soo_capacity_value=*/0); RecordStorageChangedSlow(&info, 17, 47); EXPECT_EQ(info.size.load(), 17); @@ -150,7 +170,12 @@ TEST(HashtablezInfoTest, RecordInsert) { absl::MutexLock l(&info.init_mu); const int64_t test_stride = 25; const size_t test_element_size = 23; + const size_t test_key_size = 21; + const size_t test_value_size = 19; + info.PrepareForSampling(test_stride, test_element_size, + /*key_size=*/test_key_size, + /*value_size=*/test_value_size, /*soo_capacity_value=*/0); EXPECT_EQ(info.max_probe_length.load(), 0); RecordInsertSlow(&info, 0x0000FF00, 6 * kProbeLength); @@ -173,9 +198,14 @@ TEST(HashtablezInfoTest, RecordInsert) { TEST(HashtablezInfoTest, RecordErase) { const int64_t test_stride = 31; const size_t test_element_size = 29; + const size_t test_key_size = 27; + const size_t test_value_size = 25; + HashtablezInfo info; absl::MutexLock l(&info.init_mu); info.PrepareForSampling(test_stride, test_element_size, + /*key_size=*/test_key_size, + /*value_size=*/test_value_size, /*soo_capacity_value=*/1); EXPECT_EQ(info.num_erases.load(), 0); EXPECT_EQ(info.size.load(), 0); @@ -185,15 +215,22 @@ TEST(HashtablezInfoTest, RecordErase) { EXPECT_EQ(info.size.load(), 0); EXPECT_EQ(info.num_erases.load(), 1); EXPECT_EQ(info.inline_element_size, test_element_size); + EXPECT_EQ(info.key_size, test_key_size); + EXPECT_EQ(info.value_size, test_value_size); EXPECT_EQ(info.soo_capacity, 1); } TEST(HashtablezInfoTest, RecordRehash) { const int64_t test_stride = 33; const size_t test_element_size = 31; + const size_t test_key_size = 29; + const size_t test_value_size = 27; HashtablezInfo info; absl::MutexLock l(&info.init_mu); info.PrepareForSampling(test_stride, test_element_size, + /*key_size=*/test_key_size, + /*value_size=*/test_value_size, + /*soo_capacity_value=*/0); RecordInsertSlow(&info, 0x1, 0); RecordInsertSlow(&info, 0x2, kProbeLength); @@ -214,6 +251,8 @@ TEST(HashtablezInfoTest, RecordRehash) { EXPECT_EQ(info.num_erases.load(), 0); EXPECT_EQ(info.num_rehashes.load(), 1); EXPECT_EQ(info.inline_element_size, test_element_size); + EXPECT_EQ(info.key_size, test_key_size); + EXPECT_EQ(info.value_size, test_value_size); EXPECT_EQ(info.soo_capacity, 0); } @@ -222,7 +261,13 @@ TEST(HashtablezInfoTest, RecordReservation) { absl::MutexLock l(&info.init_mu); const int64_t test_stride = 35; const size_t test_element_size = 33; + const size_t test_key_size = 31; + const size_t test_value_size = 29; + info.PrepareForSampling(test_stride, test_element_size, + /*key_size=*/test_key_size, + /*value_size=*/test_value_size, + /*soo_capacity_value=*/0); RecordReservationSlow(&info, 3); EXPECT_EQ(info.max_reserve.load(), 3); @@ -239,13 +284,19 @@ TEST(HashtablezInfoTest, RecordReservation) { #if defined(ABSL_INTERNAL_HASHTABLEZ_SAMPLE) TEST(HashtablezSamplerTest, SmallSampleParameter) { const size_t test_element_size = 31; + const size_t test_key_size = 33; + const size_t test_value_size = 35; + SetHashtablezEnabled(true); SetHashtablezSampleParameter(100); for (int i = 0; i < 1000; ++i) { SamplingState next_sample = {0, 0}; - HashtablezInfo* sample = SampleSlow(next_sample, test_element_size, - /*soo_capacity=*/0); + HashtablezInfo* sample = + SampleSlow(next_sample, test_element_size, + /*key_size=*/test_key_size, /*value_size=*/test_value_size, + + /*soo_capacity=*/0); EXPECT_GT(next_sample.next_sample, 0); EXPECT_EQ(next_sample.next_sample, next_sample.sample_stride); EXPECT_NE(sample, nullptr); @@ -255,13 +306,17 @@ TEST(HashtablezSamplerTest, SmallSampleParameter) { TEST(HashtablezSamplerTest, LargeSampleParameter) { const size_t test_element_size = 31; + const size_t test_key_size = 33; + const size_t test_value_size = 35; SetHashtablezEnabled(true); SetHashtablezSampleParameter(std::numeric_limits<int32_t>::max()); for (int i = 0; i < 1000; ++i) { SamplingState next_sample = {0, 0}; - HashtablezInfo* sample = SampleSlow(next_sample, test_element_size, - /*soo_capacity=*/0); + HashtablezInfo* sample = + SampleSlow(next_sample, test_element_size, + /*key_size=*/test_key_size, /*value_size=*/test_value_size, + /*soo_capacity=*/0); EXPECT_GT(next_sample.next_sample, 0); EXPECT_EQ(next_sample.next_sample, next_sample.sample_stride); EXPECT_NE(sample, nullptr); @@ -271,14 +326,20 @@ TEST(HashtablezSamplerTest, LargeSampleParameter) { TEST(HashtablezSamplerTest, Sample) { const size_t test_element_size = 31; + const size_t test_key_size = 33; + const size_t test_value_size = 35; SetHashtablezEnabled(true); SetHashtablezSampleParameter(100); int64_t num_sampled = 0; int64_t total = 0; double sample_rate = 0.0; for (int i = 0; i < 1000000; ++i) { - HashtablezInfoHandle h = Sample(test_element_size, - /*soo_capacity=*/0); + HashtablezInfoHandle h = + Sample(test_element_size, + /*key_size=*/test_key_size, /*value_size=*/test_value_size, + + /*soo_capacity=*/0); + ++total; if (h.IsSampled()) { ++num_sampled; @@ -293,7 +354,11 @@ TEST(HashtablezSamplerTest, Handle) { auto& sampler = GlobalHashtablezSampler(); const int64_t test_stride = 41; const size_t test_element_size = 39; + const size_t test_key_size = 37; + const size_t test_value_size = 35; HashtablezInfoHandle h(sampler.Register(test_stride, test_element_size, + /*key_size=*/test_key_size, + /*value_size=*/test_value_size, /*soo_capacity=*/0)); auto* info = HashtablezInfoHandlePeer::GetInfo(&h); info->hashes_bitwise_and.store(0x12345678, std::memory_order_relaxed); @@ -370,7 +435,10 @@ TEST(HashtablezSamplerTest, MultiThreaded) { for (int i = 0; i < 10; ++i) { const int64_t sampling_stride = 11 + i % 3; const size_t elt_size = 10 + i % 2; - pool.Schedule([&sampler, &stop, sampling_stride, elt_size]() { + const size_t key_size = 12 + i % 4; + const size_t value_size = 13 + i % 5; + pool.Schedule([&sampler, &stop, sampling_stride, elt_size, key_size, + value_size]() { std::random_device rd; std::mt19937 gen(rd()); @@ -378,11 +446,16 @@ TEST(HashtablezSamplerTest, MultiThreaded) { while (!stop.HasBeenNotified()) { if (infoz.empty()) { infoz.push_back(sampler.Register(sampling_stride, elt_size, + /*key_size=*/key_size, + /*value_size=*/value_size, /*soo_capacity=*/0)); } switch (std::uniform_int_distribution<>(0, 2)(gen)) { case 0: { infoz.push_back(sampler.Register(sampling_stride, elt_size, + /*key_size=*/key_size, + /*value_size=*/value_size, + /*soo_capacity=*/0)); break; } diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 58188444..2b0337ef 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -1786,7 +1786,8 @@ constexpr bool ShouldSampleHashtablezInfo() { } template <bool kSooEnabled> -HashtablezInfoHandle SampleHashtablezInfo(size_t sizeof_slot, +HashtablezInfoHandle SampleHashtablezInfo(size_t sizeof_slot, size_t sizeof_key, + size_t sizeof_value, size_t old_capacity, bool was_soo, HashtablezInfoHandle forced_infoz, CommonFields& c) { @@ -1794,12 +1795,12 @@ HashtablezInfoHandle SampleHashtablezInfo(size_t sizeof_slot, // In SOO, we sample on the first insertion so if this is an empty SOO case // (e.g. when reserve is called), then we still need to sample. if (kSooEnabled && was_soo && c.size() == 0) { - return Sample(sizeof_slot, SooCapacity()); + return Sample(sizeof_slot, sizeof_key, sizeof_value, SooCapacity()); } // For non-SOO cases, we sample whenever the capacity is increasing from zero // to non-zero. if (!kSooEnabled && old_capacity == 0) { - return Sample(sizeof_slot, 0); + return Sample(sizeof_slot, sizeof_key, sizeof_value, 0); } return c.infoz(); } @@ -1902,12 +1903,15 @@ class HashSetResizeHelper { template <typename Alloc, size_t SizeOfSlot, bool TransferUsesMemcpy, bool SooEnabled, size_t AlignOfSlot> ABSL_ATTRIBUTE_NOINLINE bool InitializeSlots(CommonFields& c, Alloc alloc, - ctrl_t soo_slot_h2) { + ctrl_t soo_slot_h2, + size_t key_size, + size_t value_size) { assert(c.capacity()); HashtablezInfoHandle infoz = ShouldSampleHashtablezInfo<Alloc>() - ? SampleHashtablezInfo<SooEnabled>(SizeOfSlot, old_capacity_, - was_soo_, forced_infoz_, c) + ? SampleHashtablezInfo<SooEnabled>(SizeOfSlot, key_size, value_size, + old_capacity_, was_soo_, + forced_infoz_, c) : HashtablezInfoHandle{}; const bool has_infoz = infoz.IsSampled(); @@ -3362,7 +3366,8 @@ class raw_hash_set { inline HashtablezInfoHandle try_sample_soo() { assert(is_soo()); if (!ShouldSampleHashtablezInfo<CharAlloc>()) return HashtablezInfoHandle{}; - return Sample(sizeof(slot_type), SooCapacity()); + return Sample(sizeof(slot_type), sizeof(key_type), sizeof(value_type), + SooCapacity()); } inline void destroy_slots() { @@ -3458,7 +3463,8 @@ class raw_hash_set { resize_helper.InitializeSlots<CharAlloc, sizeof(slot_type), PolicyTraits::transfer_uses_memcpy(), SooEnabled(), alignof(slot_type)>( - common(), CharAlloc(alloc_ref()), soo_slot_h2); + common(), CharAlloc(alloc_ref()), soo_slot_h2, sizeof(key_type), + sizeof(value_type)); // In the SooEnabled() case, capacity is never 0 so we don't check. if (!SooEnabled() && resize_helper.old_capacity() == 0) { diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc index f00cef8c..ca6656c8 100644 --- a/absl/container/internal/raw_hash_set_test.cc +++ b/absl/container/internal/raw_hash_set_test.cc @@ -2571,6 +2571,8 @@ TYPED_TEST_P(RawHashSamplerTest, Sample) { std::memory_order_relaxed)]++; reservations[info.max_reserve.load(std::memory_order_relaxed)]++; EXPECT_EQ(info.inline_element_size, sizeof(typename TypeParam::value_type)); + EXPECT_EQ(info.key_size, sizeof(typename TypeParam::key_type)); + EXPECT_EQ(info.value_size, sizeof(typename TypeParam::value_type)); if (soo_enabled) { EXPECT_EQ(info.soo_capacity, SooCapacity()); |