From 95505fbfbabc5240b623017d44093bb82f4912a9 Mon Sep 17 00:00:00 2001 From: Connal de Souza Date: Mon, 19 Dec 2022 07:43:11 -0800 Subject: Optimize raw_hash_set CountLeadingEmptyOrDeleted() on Arm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit name old cpu/op new cpu/op delta BM_Group_CountLeadingEmptyOrDeleted 0.98ns ± 0% 0.78ns ± 0% -20.51% (p=0.000 n=10+10) PiperOrigin-RevId: 496397005 Change-Id: I1c6b325b14566da194f21d3387b6f4d838bf0b34 --- absl/container/internal/raw_hash_set.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'absl/container/internal/raw_hash_set.h') diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 8a33106f..fb945c6c 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -629,14 +629,16 @@ struct GroupAArch64Impl { } uint32_t CountLeadingEmptyOrDeleted() const { - uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(ctrl), 0); - // ctrl | ~(ctrl >> 7) will have the lowest bit set to zero for kEmpty and - // kDeleted. We lower all other bits and count number of trailing zeros. + uint64_t mask = + vget_lane_u64(vreinterpret_u64_u8(vcle_s8( + vdup_n_s8(static_cast(ctrl_t::kSentinel)), + vreinterpret_s8_u8(ctrl))), + 0); + // Similar to MaskEmptyorDeleted() but we invert the logic to invert the + // produced bitfield. We then count number of trailing zeros. // Clang and GCC optimize countr_zero to rbit+clz without any check for 0, // so we should be fine. - constexpr uint64_t bits = 0x0101010101010101ULL; - return static_cast(countr_zero((mask | ~(mask >> 7)) & bits) >> - 3); + return static_cast(countr_zero(mask)) >> 3; } void ConvertSpecialToEmptyAndFullToDeleted(ctrl_t* dst) const { -- cgit v1.2.3