summaryrefslogtreecommitdiff
path: root/absl
diff options
context:
space:
mode:
Diffstat (limited to 'absl')
-rw-r--r--absl/crc/BUILD.bazel1
-rw-r--r--absl/crc/crc32c_benchmark.cc23
-rw-r--r--absl/crc/internal/crc_internal.h2
-rw-r--r--absl/crc/internal/crc_x86_arm_combined.cc6
4 files changed, 31 insertions, 1 deletions
diff --git a/absl/crc/BUILD.bazel b/absl/crc/BUILD.bazel
index 29374560..1c58f46c 100644
--- a/absl/crc/BUILD.bazel
+++ b/absl/crc/BUILD.bazel
@@ -204,6 +204,7 @@ cc_binary(
deps = [
":crc32c",
"//absl/memory",
+ "//absl/strings",
"@com_github_google_benchmark//:benchmark_main",
],
)
diff --git a/absl/crc/crc32c_benchmark.cc b/absl/crc/crc32c_benchmark.cc
index df99d5cf..3b46ef36 100644
--- a/absl/crc/crc32c_benchmark.cc
+++ b/absl/crc/crc32c_benchmark.cc
@@ -17,6 +17,7 @@
#include "absl/crc/crc32c.h"
#include "absl/crc/internal/crc32c.h"
#include "absl/memory/memory.h"
+#include "absl/strings/string_view.h"
#include "benchmark/benchmark.h"
namespace {
@@ -52,7 +53,27 @@ void BM_Extend(benchmark::State& state) {
benchmark::DoNotOptimize(crc);
}
}
-BENCHMARK(BM_Extend)->Arg(0)->Arg(1)->Arg(100)->Arg(10000)->Arg(500000);
+BENCHMARK(BM_Extend)->Arg(0)->Arg(1)->Arg(100)->Arg(10000)->Arg(500000)->Arg(
+ 100 * 1000 * 1000);
+
+// Make working set >> CPU cache size to benchmark prefetches better
+void BM_ExtendCacheMiss(benchmark::State& state) {
+ int len = state.range(0);
+ constexpr int total = 300 * 1000 * 1000;
+ std::string extension = TestString(total);
+ absl::crc32c_t base = absl::crc32c_t{0xC99465AA}; // CRC32C of "Hello World"
+ for (auto s : state) {
+ for (int i = 0; i < total; i += len * 2) {
+ benchmark::DoNotOptimize(base);
+ benchmark::DoNotOptimize(extension);
+ absl::crc32c_t crc =
+ absl::ExtendCrc32c(base, absl::string_view(&extension[i], len));
+ benchmark::DoNotOptimize(crc);
+ }
+ }
+ state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * total / 2);
+}
+BENCHMARK(BM_ExtendCacheMiss)->Arg(10)->Arg(100)->Arg(1000)->Arg(100000);
void BM_ExtendByZeroes(benchmark::State& state) {
absl::crc32c_t base = absl::crc32c_t{0xC99465AA}; // CRC32C of "Hello World"
diff --git a/absl/crc/internal/crc_internal.h b/absl/crc/internal/crc_internal.h
index 7a503433..0611b383 100644
--- a/absl/crc/internal/crc_internal.h
+++ b/absl/crc/internal/crc_internal.h
@@ -29,6 +29,8 @@ namespace crc_internal {
// Prefetch constants used in some Extend() implementations
constexpr int kPrefetchHorizon = ABSL_CACHELINE_SIZE * 4; // Prefetch this far
+// Shorter prefetch distance for smaller buffers
+constexpr int kPrefetchHorizonMedium = ABSL_CACHELINE_SIZE * 1;
static_assert(kPrefetchHorizon >= 64, "CRCPrefetchHorizon less than loop len");
// We require the Scramble() function:
diff --git a/absl/crc/internal/crc_x86_arm_combined.cc b/absl/crc/internal/crc_x86_arm_combined.cc
index f6e6aacb..d71191e3 100644
--- a/absl/crc/internal/crc_x86_arm_combined.cc
+++ b/absl/crc/internal/crc_x86_arm_combined.cc
@@ -429,6 +429,12 @@ class CRC32AcceleratedX86ARMCombinedMultipleStreams
ABSL_INTERNAL_STEP8BY3(l64, l641, l642, p, p1, p2);
ABSL_INTERNAL_STEP8BY3(l64, l641, l642, p, p1, p2);
ABSL_INTERNAL_STEP8BY3(l64, l641, l642, p, p1, p2);
+ base_internal::PrefetchT0(
+ reinterpret_cast<const char*>(p + kPrefetchHorizonMedium));
+ base_internal::PrefetchT0(
+ reinterpret_cast<const char*>(p1 + kPrefetchHorizonMedium));
+ base_internal::PrefetchT0(
+ reinterpret_cast<const char*>(p2 + kPrefetchHorizonMedium));
}
// Don't run crc on last 8 bytes.
ABSL_INTERNAL_STEP8BY3(l64, l641, l642, p, p1, p2);