diff options
author | Connal de Souza <connaldesouza@google.com> | 2022-12-19 07:43:11 -0800 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2022-12-19 07:43:57 -0800 |
commit | 95505fbfbabc5240b623017d44093bb82f4912a9 (patch) | |
tree | 61e6b5a88a8da5038d0d7c45cde5307efc05ced4 /absl | |
parent | 2468b189eec992ff810f88d7dfbdc5c4dd9094ea (diff) |
Optimize raw_hash_set CountLeadingEmptyOrDeleted() on Arm
name old cpu/op new cpu/op delta
BM_Group_CountLeadingEmptyOrDeleted 0.98ns ± 0% 0.78ns ± 0% -20.51% (p=0.000 n=10+10)
PiperOrigin-RevId: 496397005
Change-Id: I1c6b325b14566da194f21d3387b6f4d838bf0b34
Diffstat (limited to 'absl')
-rw-r--r-- | absl/container/internal/raw_hash_set.h | 14 |
1 files changed, 8 insertions, 6 deletions
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 8a33106f..fb945c6c 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -629,14 +629,16 @@ struct GroupAArch64Impl { } uint32_t CountLeadingEmptyOrDeleted() const { - uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(ctrl), 0); - // ctrl | ~(ctrl >> 7) will have the lowest bit set to zero for kEmpty and - // kDeleted. We lower all other bits and count number of trailing zeros. + uint64_t mask = + vget_lane_u64(vreinterpret_u64_u8(vcle_s8( + vdup_n_s8(static_cast<int8_t>(ctrl_t::kSentinel)), + vreinterpret_s8_u8(ctrl))), + 0); + // Similar to MaskEmptyorDeleted() but we invert the logic to invert the + // produced bitfield. We then count number of trailing zeros. // Clang and GCC optimize countr_zero to rbit+clz without any check for 0, // so we should be fine. - constexpr uint64_t bits = 0x0101010101010101ULL; - return static_cast<uint32_t>(countr_zero((mask | ~(mask >> 7)) & bits) >> - 3); + return static_cast<uint32_t>(countr_zero(mask)) >> 3; } void ConvertSpecialToEmptyAndFullToDeleted(ctrl_t* dst) const { |