From 6481443560a92d0a3a55a31807de0cd712cd4f88 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Mon, 6 Jun 2022 09:28:31 -0700 Subject: Optimize SwissMap for ARM by 3-8% for all operations https://pastebin.com/CmnzwUFN The key idea is to avoid using 16 byte NEON and use 8 byte NEON which has lower latency for BitMask::Match. Even though 16 byte NEON achieves higher throughput, in SwissMap it's very important to catch these Matches with low latency as probing on average happens at most once. I also introduced NonIterableMask as ARM has really great cbnz instructions and additional AND on scalar mask had 1 extra latency cycle PiperOrigin-RevId: 453216147 Change-Id: I842c50d323954f8383ae156491232ced55aacb78 --- absl/container/internal/raw_hash_set_benchmark.cc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'absl/container/internal/raw_hash_set_benchmark.cc') diff --git a/absl/container/internal/raw_hash_set_benchmark.cc b/absl/container/internal/raw_hash_set_benchmark.cc index 146ef433..47dc9048 100644 --- a/absl/container/internal/raw_hash_set_benchmark.cc +++ b/absl/container/internal/raw_hash_set_benchmark.cc @@ -336,27 +336,27 @@ void BM_Group_Match(benchmark::State& state) { } BENCHMARK(BM_Group_Match); -void BM_Group_MatchEmpty(benchmark::State& state) { +void BM_Group_MaskEmpty(benchmark::State& state) { std::array group; Iota(group.begin(), group.end(), -4); Group g{group.data()}; for (auto _ : state) { ::benchmark::DoNotOptimize(g); - ::benchmark::DoNotOptimize(g.MatchEmpty()); + ::benchmark::DoNotOptimize(g.MaskEmpty()); } } -BENCHMARK(BM_Group_MatchEmpty); +BENCHMARK(BM_Group_MaskEmpty); -void BM_Group_MatchEmptyOrDeleted(benchmark::State& state) { +void BM_Group_MaskEmptyOrDeleted(benchmark::State& state) { std::array group; Iota(group.begin(), group.end(), -4); Group g{group.data()}; for (auto _ : state) { ::benchmark::DoNotOptimize(g); - ::benchmark::DoNotOptimize(g.MatchEmptyOrDeleted()); + ::benchmark::DoNotOptimize(g.MaskEmptyOrDeleted()); } } -BENCHMARK(BM_Group_MatchEmptyOrDeleted); +BENCHMARK(BM_Group_MaskEmptyOrDeleted); void BM_Group_CountLeadingEmptyOrDeleted(benchmark::State& state) { std::array group; @@ -375,7 +375,7 @@ void BM_Group_MatchFirstEmptyOrDeleted(benchmark::State& state) { Group g{group.data()}; for (auto _ : state) { ::benchmark::DoNotOptimize(g); - ::benchmark::DoNotOptimize(*g.MatchEmptyOrDeleted()); + ::benchmark::DoNotOptimize(g.MaskEmptyOrDeleted().LowestBitSet()); } } BENCHMARK(BM_Group_MatchFirstEmptyOrDeleted); -- cgit v1.2.3