summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--absl/container/internal/raw_hash_set.h20
1 files changed, 16 insertions, 4 deletions
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h
index 4bbb85f4..9d789b48 100644
--- a/absl/container/internal/raw_hash_set.h
+++ b/absl/container/internal/raw_hash_set.h
@@ -740,13 +740,13 @@ struct GroupPortableImpl {
NonIterableBitMask<uint64_t, kWidth, 3> MaskEmpty() const {
constexpr uint64_t msbs = 0x8080808080808080ULL;
- return NonIterableBitMask<uint64_t, kWidth, 3>((ctrl & (~ctrl << 6)) &
+ return NonIterableBitMask<uint64_t, kWidth, 3>((ctrl & ~(ctrl << 6)) &
msbs);
}
NonIterableBitMask<uint64_t, kWidth, 3> MaskEmptyOrDeleted() const {
constexpr uint64_t msbs = 0x8080808080808080ULL;
- return NonIterableBitMask<uint64_t, kWidth, 3>((ctrl & (~ctrl << 7)) &
+ return NonIterableBitMask<uint64_t, kWidth, 3>((ctrl & ~(ctrl << 7)) &
msbs);
}
@@ -771,10 +771,21 @@ struct GroupPortableImpl {
#ifdef ABSL_INTERNAL_HAVE_SSE2
using Group = GroupSse2Impl;
+using GroupEmptyOrDeleted = GroupSse2Impl;
#elif defined(ABSL_INTERNAL_HAVE_ARM_NEON) && defined(ABSL_IS_LITTLE_ENDIAN)
using Group = GroupAArch64Impl;
+// For Aarch64, we use the portable implementation for counting and masking
+// empty or deleted group elements. This is to avoid the latency of moving
+// between data GPRs and Neon registers when it does not provide a benefit.
+// Using Neon is profitable when we call Match(), but is not when we don't,
+// which is the case when we do *EmptyOrDeleted operations. It is difficult to
+// make a similar approach beneficial on other architectures such as x86 since
+// they have much lower GPR <-> vector register transfer latency and 16-wide
+// Groups.
+using GroupEmptyOrDeleted = GroupPortableImpl;
#else
using Group = GroupPortableImpl;
+using GroupEmptyOrDeleted = GroupPortableImpl;
#endif
// When there is an insertion with no reserved growth, we rehash with
@@ -1360,7 +1371,7 @@ inline FindInfo find_first_non_full(const CommonFields& common, size_t hash) {
auto seq = probe(common, hash);
const ctrl_t* ctrl = common.control();
while (true) {
- Group g{ctrl + seq.offset()};
+ GroupEmptyOrDeleted g{ctrl + seq.offset()};
auto mask = g.MaskEmptyOrDeleted();
if (mask) {
#if !defined(NDEBUG)
@@ -1699,7 +1710,8 @@ class raw_hash_set {
// If a sentinel is reached, we null `ctrl_` out instead.
void skip_empty_or_deleted() {
while (IsEmptyOrDeleted(*ctrl_)) {
- uint32_t shift = Group{ctrl_}.CountLeadingEmptyOrDeleted();
+ uint32_t shift =
+ GroupEmptyOrDeleted{ctrl_}.CountLeadingEmptyOrDeleted();
ctrl_ += shift;
slot_ += shift;
}