summaryrefslogtreecommitdiff
path: root/absl/container/internal/hashtablez_sampler.cc
diff options
context:
space:
mode:
authorGravatar Abseil Team <absl-team@google.com>2022-11-28 12:27:06 -0800
committerGravatar Copybara-Service <copybara-worker@google.com>2022-11-28 12:27:43 -0800
commite5a7979d36a84831652abf3138c4d76797c002b5 (patch)
treee7dc92ce03b1b04428b588f33bddf657ea97f554 /absl/container/internal/hashtablez_sampler.cc
parente3158086978de4ff15a2058f49ba525c89e14ce6 (diff)
Reduce flat_hash_{set,map} generated code size.
This CL makes a bunch of changes (mostly to raw_hash_set which underlies flat_hash_set and flat_hash_map). Techniques used: * Extract code that does not depend on the specific hash table type into common (non-inlined) functions. * Place ABSL_ATTRIBUTE_NOINLINE directives judiciously. * Out-of-line some slow paths. Reduces sizes of some large binaries by ~0.5%. Has no significant performance impact on a few performance critical binaries. ## Speed of fleetbench micro-benchmarks Following is a histogram of %-age changes in [fleetbench](https://github.com/google/fleetbench) hot_swissmap_benchmark results. Negative numbers indicate a speedup caused by this change. Statistically insignificant changes are mapped to zero. XXX Also run and merge in cold_swissmap_benchmark Across all 351 benchmarks, the average speedup is 0.38%. The best speedup was -25%, worst slowdown was +6.81%. ``` Count: 351 Average: -0.382764 StdDev: 3.77807 Min: -25 Median: 0.435135 Max: 6.81 --------------------------------------------- [ -25, -10) 16 4.558% 4.558% # [ -9, -8) 2 0.570% 5.128% [ -8, -7) 1 0.285% 5.413% [ -7, -6) 1 0.285% 5.698% [ -6, -5) 2 0.570% 6.268% [ -5, -4) 5 1.425% 7.692% [ -4, -3) 13 3.704% 11.396% # [ -3, -2) 15 4.274% 15.670% # [ -2, -1) 26 7.407% 23.077% ## [ -1, 0) 14 3.989% 27.066% # [ 0, 1) 185 52.707% 79.772% ############ [ 1, 2) 14 3.989% 83.761% # [ 2, 3) 8 2.279% 86.040% # [ 3, 4) 7 1.994% 88.034% [ 4, 5) 32 9.117% 97.151% ## [ 5, 6) 6 1.709% 98.860% [ 6, 7) 4 1.140% 100.000% ``` We looked at the slowdowns and they do not seem worth worrying about. E.g., the worst one was: ``` BM_FindHit_Hot<::absl::node_hash_set,64>/set_size:4096/density:0 2.61ns ± 1% 2.79ns ± 1% +6.81% (p=0.008 n=5+5) ``` ## Detailed changes * Out-of-line slow paths in hash table sampler methods. * Explicitly unregister from sampler instead of from destructor. * Introduced a non-templated CommonFields struct that holds some of the hash table fields (infoz, ctrl, slots, size, capacity). This struct can be passed to new non-templated helpers. The struct is a private base class of raw_hash_set. * Made non-inlined InitializeSlots<> that is only templated on allocator and size/alignment of the slot type so that we can share instantiations across types that have the same size/alignment. * Moved some infrequently called code paths into non-inlined type-erased. functions. Pass a suite of type-specific function pointers to these routines for when they need to operate on slots. * Marked some methods as non-inlined. * Avoid unnecessary reinitialization in destructor. * Introduce UpdateSpine type-erased helper that is called from clear() and rehash(). PiperOrigin-RevId: 491413386 Change-Id: Ia5495c5a6ec73622a785a0d260e406ddb9085a7c
Diffstat (limited to 'absl/container/internal/hashtablez_sampler.cc')
-rw-r--r--absl/container/internal/hashtablez_sampler.cc46
1 files changed, 46 insertions, 0 deletions
diff --git a/absl/container/internal/hashtablez_sampler.cc b/absl/container/internal/hashtablez_sampler.cc
index 5b8cf341..6b6d3491 100644
--- a/absl/container/internal/hashtablez_sampler.cc
+++ b/absl/container/internal/hashtablez_sampler.cc
@@ -14,6 +14,7 @@
#include "absl/container/internal/hashtablez_sampler.h"
+#include <algorithm>
#include <atomic>
#include <cassert>
#include <cmath>
@@ -158,6 +159,43 @@ void UnsampleSlow(HashtablezInfo* info) {
GlobalHashtablezSampler().Unregister(info);
}
+void RecordRehashSlow(HashtablezInfo* info, size_t total_probe_length) {
+#ifdef ABSL_INTERNAL_HAVE_SSE2
+ total_probe_length /= 16;
+#else
+ total_probe_length /= 8;
+#endif
+ info->total_probe_length.store(total_probe_length, std::memory_order_relaxed);
+ info->num_erases.store(0, std::memory_order_relaxed);
+ // There is only one concurrent writer, so `load` then `store` is sufficient
+ // instead of using `fetch_add`.
+ info->num_rehashes.store(
+ 1 + info->num_rehashes.load(std::memory_order_relaxed),
+ std::memory_order_relaxed);
+}
+
+void RecordReservationSlow(HashtablezInfo* info, size_t target_capacity) {
+ info->max_reserve.store(
+ (std::max)(info->max_reserve.load(std::memory_order_relaxed),
+ target_capacity),
+ std::memory_order_relaxed);
+}
+
+void RecordClearedReservationSlow(HashtablezInfo* info) {
+ info->max_reserve.store(0, std::memory_order_relaxed);
+}
+
+void RecordStorageChangedSlow(HashtablezInfo* info, size_t size,
+ size_t capacity) {
+ info->size.store(size, std::memory_order_relaxed);
+ info->capacity.store(capacity, std::memory_order_relaxed);
+ if (size == 0) {
+ // This is a clear, reset the total/num_erases too.
+ info->total_probe_length.store(0, std::memory_order_relaxed);
+ info->num_erases.store(0, std::memory_order_relaxed);
+ }
+}
+
void RecordInsertSlow(HashtablezInfo* info, size_t hash,
size_t distance_from_desired) {
// SwissTables probe in groups of 16, so scale this to count items probes and
@@ -180,6 +218,14 @@ void RecordInsertSlow(HashtablezInfo* info, size_t hash,
info->size.fetch_add(1, std::memory_order_relaxed);
}
+void RecordEraseSlow(HashtablezInfo* info) {
+ info->size.fetch_sub(1, std::memory_order_relaxed);
+ // There is only one concurrent writer, so `load` then `store` is sufficient
+ // instead of using `fetch_add`.
+ info->num_erases.store(1 + info->num_erases.load(std::memory_order_relaxed),
+ std::memory_order_relaxed);
+}
+
void SetHashtablezConfigListener(HashtablezConfigListener l) {
g_hashtablez_config_listener.store(l, std::memory_order_release);
}