diff options
Diffstat (limited to 'absl/strings/internal/memutil_benchmark.cc')
-rw-r--r-- | absl/strings/internal/memutil_benchmark.cc | 323 |
1 files changed, 323 insertions, 0 deletions
diff --git a/absl/strings/internal/memutil_benchmark.cc b/absl/strings/internal/memutil_benchmark.cc new file mode 100644 index 00000000..77915adb --- /dev/null +++ b/absl/strings/internal/memutil_benchmark.cc @@ -0,0 +1,323 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/memutil.h" + +#include <algorithm> +#include <cstdlib> + +#include "benchmark/benchmark.h" +#include "absl/strings/ascii.h" + +// We fill the haystack with aaaaaaaaaaaaaaaaaa...aaaab. +// That gives us: +// - an easy search: 'b' +// - a medium search: 'ab'. That means every letter is a possible match. +// - a pathological search: 'aaaaaa.......aaaaab' (half as many a's as haytack) +// We benchmark case-sensitive and case-insensitive versions of +// three memmem implementations: +// - memmem() from memutil.h +// - search() from STL +// - memmatch(), a custom implementation using memchr and memcmp. +// Here are sample results: +// +// Run on (12 X 3800 MHz CPU s) +// CPU Caches: +// L1 Data 32K (x6) +// L1 Instruction 32K (x6) +// L2 Unified 256K (x6) +// L3 Unified 15360K (x1) +// ---------------------------------------------------------------- +// Benchmark Time CPU Iterations +// ---------------------------------------------------------------- +// BM_Memmem 3583 ns 3582 ns 196469 2.59966GB/s +// BM_MemmemMedium 13743 ns 13742 ns 50901 693.986MB/s +// BM_MemmemPathological 13695030 ns 13693977 ns 51 713.133kB/s +// BM_Memcasemem 3299 ns 3299 ns 212942 2.82309GB/s +// BM_MemcasememMedium 16407 ns 16406 ns 42170 581.309MB/s +// BM_MemcasememPathological 17267745 ns 17266030 ns 41 565.598kB/s +// BM_Search 1610 ns 1609 ns 431321 5.78672GB/s +// BM_SearchMedium 11111 ns 11110 ns 63001 858.414MB/s +// BM_SearchPathological 12117390 ns 12116397 ns 58 805.984kB/s +// BM_Searchcase 3081 ns 3081 ns 229949 3.02313GB/s +// BM_SearchcaseMedium 16003 ns 16001 ns 44170 595.998MB/s +// BM_SearchcasePathological 15823413 ns 15821909 ns 44 617.222kB/s +// BM_Memmatch 197 ns 197 ns 3584225 47.2951GB/s +// BM_MemmatchMedium 52333 ns 52329 ns 13280 182.244MB/s +// BM_MemmatchPathological 659799 ns 659727 ns 1058 14.4556MB/s +// BM_Memcasematch 5460 ns 5460 ns 127606 1.70586GB/s +// BM_MemcasematchMedium 32861 ns 32857 ns 21258 290.248MB/s +// BM_MemcasematchPathological 15154243 ns 15153089 ns 46 644.464kB/s +// BM_MemmemStartup 5 ns 5 ns 150821500 +// BM_SearchStartup 5 ns 5 ns 150644203 +// BM_MemmatchStartup 7 ns 7 ns 97068802 +// +// Conclusions: +// +// The following recommendations are based on the sample results above. However, +// we have found that the performance of STL search can vary significantly +// depending on compiler and standard library implementation. We recommend you +// run the benchmarks for yourself on relevant platforms. +// +// If you need case-insensitive, STL search is slightly better than memmem for +// all cases. +// +// Case-sensitive is more subtle: +// Custom memmatch is _very_ fast at scanning, so if you have very few possible +// matches in your haystack, that's the way to go. Performance drops +// significantly with more matches. +// +// STL search is slightly faster than memmem in the medium and pathological +// benchmarks. However, the performance of memmem is currently more dependable +// across platforms and build configurations. + +namespace { + +constexpr int kHaystackSize = 10000; +constexpr int64_t kHaystackSize64 = kHaystackSize; +const char* MakeHaystack() { + char* haystack = new char[kHaystackSize]; + for (int i = 0; i < kHaystackSize - 1; ++i) haystack[i] = 'a'; + haystack[kHaystackSize - 1] = 'b'; + return haystack; +} +const char* const kHaystack = MakeHaystack(); + +void BM_Memmem(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize( + absl::strings_internal::memmem(kHaystack, kHaystackSize, "b", 1)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_Memmem); + +void BM_MemmemMedium(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize( + absl::strings_internal::memmem(kHaystack, kHaystackSize, "ab", 2)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_MemmemMedium); + +void BM_MemmemPathological(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(absl::strings_internal::memmem( + kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2, + kHaystackSize - kHaystackSize / 2)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_MemmemPathological); + +void BM_Memcasemem(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize( + absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "b", 1)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_Memcasemem); + +void BM_MemcasememMedium(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize( + absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "ab", 2)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_MemcasememMedium); + +void BM_MemcasememPathological(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(absl::strings_internal::memcasemem( + kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2, + kHaystackSize - kHaystackSize / 2)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_MemcasememPathological); + +bool case_eq(const char a, const char b) { + return absl::ascii_tolower(a) == absl::ascii_tolower(b); +} + +void BM_Search(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, + kHaystack + kHaystackSize - 1, + kHaystack + kHaystackSize)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_Search); + +void BM_SearchMedium(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, + kHaystack + kHaystackSize - 2, + kHaystack + kHaystackSize)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_SearchMedium); + +void BM_SearchPathological(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, + kHaystack + kHaystackSize / 2, + kHaystack + kHaystackSize)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_SearchPathological); + +void BM_Searchcase(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, + kHaystack + kHaystackSize - 1, + kHaystack + kHaystackSize, case_eq)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_Searchcase); + +void BM_SearchcaseMedium(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, + kHaystack + kHaystackSize - 2, + kHaystack + kHaystackSize, case_eq)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_SearchcaseMedium); + +void BM_SearchcasePathological(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, + kHaystack + kHaystackSize / 2, + kHaystack + kHaystackSize, case_eq)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_SearchcasePathological); + +char* memcasechr(const char* s, int c, size_t slen) { + c = absl::ascii_tolower(c); + for (; slen; ++s, --slen) { + if (absl::ascii_tolower(*s) == c) return const_cast<char*>(s); + } + return nullptr; +} + +const char* memcasematch(const char* phaystack, size_t haylen, + const char* pneedle, size_t neelen) { + if (0 == neelen) { + return phaystack; // even if haylen is 0 + } + if (haylen < neelen) return nullptr; + + const char* match; + const char* hayend = phaystack + haylen - neelen + 1; + while ((match = static_cast<char*>( + memcasechr(phaystack, pneedle[0], hayend - phaystack)))) { + if (absl::strings_internal::memcasecmp(match, pneedle, neelen) == 0) + return match; + else + phaystack = match + 1; + } + return nullptr; +} + +void BM_Memmatch(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize( + absl::strings_internal::memmatch(kHaystack, kHaystackSize, "b", 1)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_Memmatch); + +void BM_MemmatchMedium(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize( + absl::strings_internal::memmatch(kHaystack, kHaystackSize, "ab", 2)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_MemmatchMedium); + +void BM_MemmatchPathological(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(absl::strings_internal::memmatch( + kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2, + kHaystackSize - kHaystackSize / 2)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_MemmatchPathological); + +void BM_Memcasematch(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, "b", 1)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_Memcasematch); + +void BM_MemcasematchMedium(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, "ab", 2)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_MemcasematchMedium); + +void BM_MemcasematchPathological(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, + kHaystack + kHaystackSize / 2, + kHaystackSize - kHaystackSize / 2)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_MemcasematchPathological); + +void BM_MemmemStartup(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(absl::strings_internal::memmem( + kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1)); + } +} +BENCHMARK(BM_MemmemStartup); + +void BM_SearchStartup(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize( + std::search(kHaystack + kHaystackSize - 10, kHaystack + kHaystackSize, + kHaystack + kHaystackSize - 1, kHaystack + kHaystackSize)); + } +} +BENCHMARK(BM_SearchStartup); + +void BM_MemmatchStartup(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(absl::strings_internal::memmatch( + kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1)); + } +} +BENCHMARK(BM_MemmatchStartup); + +} // namespace |