diff options
author | Connal de Souza <connaldesouza@google.com> | 2023-08-04 09:37:41 -0700 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2023-08-04 09:39:01 -0700 |
commit | 039d70f69b34b59d9696c655689316a94026fd0e (patch) | |
tree | bd6846859b243eaf41ac6cb2af7b60e15f759dfa | |
parent | 14a91eefa7a3f3bf0a949e82ce5854659588a5c0 (diff) |
Optimize Swissmap Match on Arm.
Currently we require only a single bit to be set in each abstract bit for iterable bitmasks. However, in most cases, where we have a single match, or no matches in a group, iteration is not needed. We move the masking to the iteration function instead of having it as a requirement for iterable Bitmask construction.
This is 4-8% faster for Find and Insert operations.
This can hurt performance if we need to iterate many times (there are many matches in the same Group), however this is unlikely, even if we assume the table is completely full.
If there are 0 or 1 matches in a group, or the first match is the correct item we are looking for, we save 1 instruction/cycle (most cases)
If there are 2 matches in a group, and the first is a false positive, this is neutral (< 3%)
If there are more than 2 matches in a group and the first two are false positives, this can be slower by 1 cycle/instruction per additional iteration (< 0.1%)
No change to x86.
PiperOrigin-RevId: 553831814
Change-Id: I08620899847eaf0086da989d829a1029ea24173a
-rw-r--r-- | absl/container/internal/raw_hash_set.h | 9 |
1 files changed, 6 insertions, 3 deletions
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 26cd2e54..4c1e564a 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -361,7 +361,7 @@ uint32_t TrailingZeros(T x) { // width of an abstract bit in the representation. // This mask provides operations for any number of real bits set in an abstract // bit. To add iteration on top of that, implementation must guarantee no more -// than one real bit is set in an abstract bit. +// than the most significant real bit is set in a set abstract bit. template <class T, int SignificantBits, int Shift = 0> class NonIterableBitMask { public: @@ -418,6 +418,10 @@ class BitMask : public NonIterableBitMask<T, SignificantBits, Shift> { using const_iterator = BitMask; BitMask& operator++() { + if (Shift == 3) { + constexpr uint64_t msbs = 0x8080808080808080ULL; + this->mask_ &= msbs; + } this->mask_ &= (this->mask_ - 1); return *this; } @@ -651,9 +655,8 @@ struct GroupAArch64Impl { BitMask<uint64_t, kWidth, 3> Match(h2_t hash) const { uint8x8_t dup = vdup_n_u8(hash); auto mask = vceq_u8(ctrl, dup); - constexpr uint64_t msbs = 0x8080808080808080ULL; return BitMask<uint64_t, kWidth, 3>( - vget_lane_u64(vreinterpret_u64_u8(mask), 0) & msbs); + vget_lane_u64(vreinterpret_u64_u8(mask), 0)); } NonIterableBitMask<uint64_t, kWidth, 3> MaskEmpty() const { |