summaryrefslogtreecommitdiff
path: root/absl/container/internal/raw_hash_set.h
diff options
context:
space:
mode:
authorGravatar Connal de Souza <connaldesouza@google.com>2022-12-19 07:43:11 -0800
committerGravatar Copybara-Service <copybara-worker@google.com>2022-12-19 07:43:57 -0800
commit95505fbfbabc5240b623017d44093bb82f4912a9 (patch)
tree61e6b5a88a8da5038d0d7c45cde5307efc05ced4 /absl/container/internal/raw_hash_set.h
parent2468b189eec992ff810f88d7dfbdc5c4dd9094ea (diff)
Optimize raw_hash_set CountLeadingEmptyOrDeleted() on Arm
name old cpu/op new cpu/op delta BM_Group_CountLeadingEmptyOrDeleted 0.98ns ± 0% 0.78ns ± 0% -20.51% (p=0.000 n=10+10) PiperOrigin-RevId: 496397005 Change-Id: I1c6b325b14566da194f21d3387b6f4d838bf0b34
Diffstat (limited to 'absl/container/internal/raw_hash_set.h')
-rw-r--r--absl/container/internal/raw_hash_set.h14
1 files changed, 8 insertions, 6 deletions
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h
index 8a33106f..fb945c6c 100644
--- a/absl/container/internal/raw_hash_set.h
+++ b/absl/container/internal/raw_hash_set.h
@@ -629,14 +629,16 @@ struct GroupAArch64Impl {
}
uint32_t CountLeadingEmptyOrDeleted() const {
- uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(ctrl), 0);
- // ctrl | ~(ctrl >> 7) will have the lowest bit set to zero for kEmpty and
- // kDeleted. We lower all other bits and count number of trailing zeros.
+ uint64_t mask =
+ vget_lane_u64(vreinterpret_u64_u8(vcle_s8(
+ vdup_n_s8(static_cast<int8_t>(ctrl_t::kSentinel)),
+ vreinterpret_s8_u8(ctrl))),
+ 0);
+ // Similar to MaskEmptyorDeleted() but we invert the logic to invert the
+ // produced bitfield. We then count number of trailing zeros.
// Clang and GCC optimize countr_zero to rbit+clz without any check for 0,
// so we should be fine.
- constexpr uint64_t bits = 0x0101010101010101ULL;
- return static_cast<uint32_t>(countr_zero((mask | ~(mask >> 7)) & bits) >>
- 3);
+ return static_cast<uint32_t>(countr_zero(mask)) >> 3;
}
void ConvertSpecialToEmptyAndFullToDeleted(ctrl_t* dst) const {