diff options
Diffstat (limited to 'absl')
-rw-r--r-- | absl/crc/BUILD.bazel | 1 | ||||
-rw-r--r-- | absl/crc/crc32c_benchmark.cc | 23 | ||||
-rw-r--r-- | absl/crc/internal/crc_internal.h | 2 | ||||
-rw-r--r-- | absl/crc/internal/crc_x86_arm_combined.cc | 6 |
4 files changed, 31 insertions, 1 deletions
diff --git a/absl/crc/BUILD.bazel b/absl/crc/BUILD.bazel index 29374560..1c58f46c 100644 --- a/absl/crc/BUILD.bazel +++ b/absl/crc/BUILD.bazel @@ -204,6 +204,7 @@ cc_binary( deps = [ ":crc32c", "//absl/memory", + "//absl/strings", "@com_github_google_benchmark//:benchmark_main", ], ) diff --git a/absl/crc/crc32c_benchmark.cc b/absl/crc/crc32c_benchmark.cc index df99d5cf..3b46ef36 100644 --- a/absl/crc/crc32c_benchmark.cc +++ b/absl/crc/crc32c_benchmark.cc @@ -17,6 +17,7 @@ #include "absl/crc/crc32c.h" #include "absl/crc/internal/crc32c.h" #include "absl/memory/memory.h" +#include "absl/strings/string_view.h" #include "benchmark/benchmark.h" namespace { @@ -52,7 +53,27 @@ void BM_Extend(benchmark::State& state) { benchmark::DoNotOptimize(crc); } } -BENCHMARK(BM_Extend)->Arg(0)->Arg(1)->Arg(100)->Arg(10000)->Arg(500000); +BENCHMARK(BM_Extend)->Arg(0)->Arg(1)->Arg(100)->Arg(10000)->Arg(500000)->Arg( + 100 * 1000 * 1000); + +// Make working set >> CPU cache size to benchmark prefetches better +void BM_ExtendCacheMiss(benchmark::State& state) { + int len = state.range(0); + constexpr int total = 300 * 1000 * 1000; + std::string extension = TestString(total); + absl::crc32c_t base = absl::crc32c_t{0xC99465AA}; // CRC32C of "Hello World" + for (auto s : state) { + for (int i = 0; i < total; i += len * 2) { + benchmark::DoNotOptimize(base); + benchmark::DoNotOptimize(extension); + absl::crc32c_t crc = + absl::ExtendCrc32c(base, absl::string_view(&extension[i], len)); + benchmark::DoNotOptimize(crc); + } + } + state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * total / 2); +} +BENCHMARK(BM_ExtendCacheMiss)->Arg(10)->Arg(100)->Arg(1000)->Arg(100000); void BM_ExtendByZeroes(benchmark::State& state) { absl::crc32c_t base = absl::crc32c_t{0xC99465AA}; // CRC32C of "Hello World" diff --git a/absl/crc/internal/crc_internal.h b/absl/crc/internal/crc_internal.h index 7a503433..0611b383 100644 --- a/absl/crc/internal/crc_internal.h +++ b/absl/crc/internal/crc_internal.h @@ -29,6 +29,8 @@ namespace crc_internal { // Prefetch constants used in some Extend() implementations constexpr int kPrefetchHorizon = ABSL_CACHELINE_SIZE * 4; // Prefetch this far +// Shorter prefetch distance for smaller buffers +constexpr int kPrefetchHorizonMedium = ABSL_CACHELINE_SIZE * 1; static_assert(kPrefetchHorizon >= 64, "CRCPrefetchHorizon less than loop len"); // We require the Scramble() function: diff --git a/absl/crc/internal/crc_x86_arm_combined.cc b/absl/crc/internal/crc_x86_arm_combined.cc index f6e6aacb..d71191e3 100644 --- a/absl/crc/internal/crc_x86_arm_combined.cc +++ b/absl/crc/internal/crc_x86_arm_combined.cc @@ -429,6 +429,12 @@ class CRC32AcceleratedX86ARMCombinedMultipleStreams ABSL_INTERNAL_STEP8BY3(l64, l641, l642, p, p1, p2); ABSL_INTERNAL_STEP8BY3(l64, l641, l642, p, p1, p2); ABSL_INTERNAL_STEP8BY3(l64, l641, l642, p, p1, p2); + base_internal::PrefetchT0( + reinterpret_cast<const char*>(p + kPrefetchHorizonMedium)); + base_internal::PrefetchT0( + reinterpret_cast<const char*>(p1 + kPrefetchHorizonMedium)); + base_internal::PrefetchT0( + reinterpret_cast<const char*>(p2 + kPrefetchHorizonMedium)); } // Don't run crc on last 8 bytes. ABSL_INTERNAL_STEP8BY3(l64, l641, l642, p, p1, p2); |