diff options
author | Abseil Team <absl-team@google.com> | 2024-02-15 23:54:17 -0800 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2024-02-15 23:55:05 -0800 |
commit | 8a3caf7dea955b513a6c1b572a2423c6b4213402 (patch) | |
tree | de68093898cdea318859c9e37dcb7eaf20c700b2 /absl | |
parent | 4580d86d07dbd7614aaedce0b519752334245db6 (diff) |
Introduce `Group::MaskNonFull` without usage.
It can be used instead of `MaskEmptyOrDeleted` in case of inserting to empty table.
`MaskNonFull` requires less operations, in particular it eliminates `_mm_set1_epi8` and `_mm_cmpgt_epi8` operations.
PiperOrigin-RevId: 607587394
Change-Id: Ia48f922d1ca6de38cc91e7ab0d608c45f8f2c446
Diffstat (limited to 'absl')
-rw-r--r-- | absl/container/internal/raw_hash_set.h | 27 | ||||
-rw-r--r-- | absl/container/internal/raw_hash_set_benchmark.cc | 22 | ||||
-rw-r--r-- | absl/container/internal/raw_hash_set_test.cc | 19 |
3 files changed, 68 insertions, 0 deletions
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index d6950e61..cef042e7 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -657,6 +657,14 @@ struct GroupSse2Impl { static_cast<uint16_t>(_mm_movemask_epi8(ctrl) ^ 0xffff)); } + // Returns a bitmask representing the positions of non full slots. + // Note: this includes: kEmpty, kDeleted, kSentinel. + // It is useful in contexts when kSentinel is not present. + auto MaskNonFull() const { + return BitMask<uint16_t, kWidth>( + static_cast<uint16_t>(_mm_movemask_epi8(ctrl))); + } + // Returns a bitmask representing the positions of empty or deleted slots. NonIterableBitMask<uint16_t, kWidth> MaskEmptyOrDeleted() const { auto special = _mm_set1_epi8(static_cast<char>(ctrl_t::kSentinel)); @@ -725,6 +733,18 @@ struct GroupAArch64Impl { /*NullifyBitsOnIteration=*/true>(mask); } + // Returns a bitmask representing the positions of non full slots. + // Note: this includes: kEmpty, kDeleted, kSentinel. + // It is useful in contexts when kSentinel is not present. + auto MaskNonFull() const { + uint64_t mask = vget_lane_u64( + vreinterpret_u64_u8(vclt_s8(vreinterpret_s8_u8(ctrl), + vdup_n_s8(static_cast<int8_t>(0)))), + 0); + return BitMask<uint64_t, kWidth, /*Shift=*/3, + /*NullifyBitsOnIteration=*/true>(mask); + } + NonIterableBitMask<uint64_t, kWidth, 3> MaskEmptyOrDeleted() const { uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(vcgt_s8( @@ -797,6 +817,13 @@ struct GroupPortableImpl { return BitMask<uint64_t, kWidth, 3>((ctrl ^ kMsbs8Bytes) & kMsbs8Bytes); } + // Returns a bitmask representing the positions of non full slots. + // Note: this includes: kEmpty, kDeleted, kSentinel. + // It is useful in contexts when kSentinel is not present. + auto MaskNonFull() const { + return BitMask<uint64_t, kWidth, 3>(ctrl & kMsbs8Bytes); + } + NonIterableBitMask<uint64_t, kWidth, 3> MaskEmptyOrDeleted() const { return NonIterableBitMask<uint64_t, kWidth, 3>((ctrl & ~(ctrl << 7)) & kMsbs8Bytes); diff --git a/absl/container/internal/raw_hash_set_benchmark.cc b/absl/container/internal/raw_hash_set_benchmark.cc index bc8184d4..8cd43fb5 100644 --- a/absl/container/internal/raw_hash_set_benchmark.cc +++ b/absl/container/internal/raw_hash_set_benchmark.cc @@ -479,6 +479,17 @@ void BM_Group_MaskEmptyOrDeleted(benchmark::State& state) { } BENCHMARK(BM_Group_MaskEmptyOrDeleted); +void BM_Group_MaskNonFull(benchmark::State& state) { + std::array<ctrl_t, Group::kWidth> group; + Iota(group.begin(), group.end(), -4); + Group g{group.data()}; + for (auto _ : state) { + ::benchmark::DoNotOptimize(g); + ::benchmark::DoNotOptimize(g.MaskNonFull()); + } +} +BENCHMARK(BM_Group_MaskNonFull); + void BM_Group_CountLeadingEmptyOrDeleted(benchmark::State& state) { std::array<ctrl_t, Group::kWidth> group; Iota(group.begin(), group.end(), -2); @@ -501,6 +512,17 @@ void BM_Group_MatchFirstEmptyOrDeleted(benchmark::State& state) { } BENCHMARK(BM_Group_MatchFirstEmptyOrDeleted); +void BM_Group_MatchFirstNonFull(benchmark::State& state) { + std::array<ctrl_t, Group::kWidth> group; + Iota(group.begin(), group.end(), -2); + Group g{group.data()}; + for (auto _ : state) { + ::benchmark::DoNotOptimize(g); + ::benchmark::DoNotOptimize(g.MaskNonFull().LowestBitSet()); + } +} +BENCHMARK(BM_Group_MatchFirstNonFull); + void BM_DropDeletes(benchmark::State& state) { constexpr size_t capacity = (1 << 20) - 1; std::vector<ctrl_t> ctrl(capacity + 1 + Group::kWidth); diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc index 1c428ea7..8e196fdf 100644 --- a/absl/container/internal/raw_hash_set_test.cc +++ b/absl/container/internal/raw_hash_set_test.cc @@ -307,6 +307,25 @@ TEST(Group, MaskFull) { } } +TEST(Group, MaskNonFull) { + if (Group::kWidth == 16) { + ctrl_t group[] = { + ctrl_t::kEmpty, CtrlT(1), ctrl_t::kDeleted, CtrlT(3), + ctrl_t::kEmpty, CtrlT(5), ctrl_t::kSentinel, CtrlT(7), + CtrlT(7), CtrlT(5), ctrl_t::kDeleted, CtrlT(1), + CtrlT(1), ctrl_t::kSentinel, ctrl_t::kEmpty, CtrlT(1)}; + EXPECT_THAT(Group{group}.MaskNonFull(), + ElementsAre(0, 2, 4, 6, 10, 13, 14)); + } else if (Group::kWidth == 8) { + ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), ctrl_t::kEmpty, + ctrl_t::kDeleted, CtrlT(2), ctrl_t::kSentinel, + ctrl_t::kSentinel, CtrlT(1)}; + EXPECT_THAT(Group{group}.MaskNonFull(), ElementsAre(0, 2, 3, 5, 6)); + } else { + FAIL() << "No test coverage for Group::kWidth==" << Group::kWidth; + } +} + TEST(Group, MaskEmptyOrDeleted) { if (Group::kWidth == 16) { ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), ctrl_t::kEmpty, CtrlT(3), |