summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Connal de Souza <connaldesouza@google.com>2023-08-04 09:37:41 -0700
committerGravatar Copybara-Service <copybara-worker@google.com>2023-08-04 09:39:01 -0700
commit039d70f69b34b59d9696c655689316a94026fd0e (patch)
treebd6846859b243eaf41ac6cb2af7b60e15f759dfa
parent14a91eefa7a3f3bf0a949e82ce5854659588a5c0 (diff)
Optimize Swissmap Match on Arm.
Currently we require only a single bit to be set in each abstract bit for iterable bitmasks. However, in most cases, where we have a single match, or no matches in a group, iteration is not needed. We move the masking to the iteration function instead of having it as a requirement for iterable Bitmask construction. This is 4-8% faster for Find and Insert operations. This can hurt performance if we need to iterate many times (there are many matches in the same Group), however this is unlikely, even if we assume the table is completely full. If there are 0 or 1 matches in a group, or the first match is the correct item we are looking for, we save 1 instruction/cycle (most cases) If there are 2 matches in a group, and the first is a false positive, this is neutral (< 3%) If there are more than 2 matches in a group and the first two are false positives, this can be slower by 1 cycle/instruction per additional iteration (< 0.1%) No change to x86. PiperOrigin-RevId: 553831814 Change-Id: I08620899847eaf0086da989d829a1029ea24173a
-rw-r--r--absl/container/internal/raw_hash_set.h9
1 files changed, 6 insertions, 3 deletions
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h
index 26cd2e54..4c1e564a 100644
--- a/absl/container/internal/raw_hash_set.h
+++ b/absl/container/internal/raw_hash_set.h
@@ -361,7 +361,7 @@ uint32_t TrailingZeros(T x) {
// width of an abstract bit in the representation.
// This mask provides operations for any number of real bits set in an abstract
// bit. To add iteration on top of that, implementation must guarantee no more
-// than one real bit is set in an abstract bit.
+// than the most significant real bit is set in a set abstract bit.
template <class T, int SignificantBits, int Shift = 0>
class NonIterableBitMask {
public:
@@ -418,6 +418,10 @@ class BitMask : public NonIterableBitMask<T, SignificantBits, Shift> {
using const_iterator = BitMask;
BitMask& operator++() {
+ if (Shift == 3) {
+ constexpr uint64_t msbs = 0x8080808080808080ULL;
+ this->mask_ &= msbs;
+ }
this->mask_ &= (this->mask_ - 1);
return *this;
}
@@ -651,9 +655,8 @@ struct GroupAArch64Impl {
BitMask<uint64_t, kWidth, 3> Match(h2_t hash) const {
uint8x8_t dup = vdup_n_u8(hash);
auto mask = vceq_u8(ctrl, dup);
- constexpr uint64_t msbs = 0x8080808080808080ULL;
return BitMask<uint64_t, kWidth, 3>(
- vget_lane_u64(vreinterpret_u64_u8(mask), 0) & msbs);
+ vget_lane_u64(vreinterpret_u64_u8(mask), 0));
}
NonIterableBitMask<uint64_t, kWidth, 3> MaskEmpty() const {