summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CMake/AbseilDll.cmake1
-rw-r--r--absl/base/BUILD.bazel17
-rw-r--r--absl/base/CMakeLists.txt3
-rw-r--r--absl/base/prefetch.h196
-rw-r--r--absl/base/prefetch_test.cc64
5 files changed, 272 insertions, 9 deletions
diff --git a/CMake/AbseilDll.cmake b/CMake/AbseilDll.cmake
index c4a41e6d..52a563cd 100644
--- a/CMake/AbseilDll.cmake
+++ b/CMake/AbseilDll.cmake
@@ -28,6 +28,7 @@ set(ABSL_INTERNAL_DLL_FILES
"base/internal/low_level_scheduling.h"
"base/internal/per_thread_tls.h"
"base/internal/prefetch.h"
+ "base/prefetch.h"
"base/internal/pretty_function.h"
"base/internal/raw_logging.cc"
"base/internal/raw_logging.h"
diff --git a/absl/base/BUILD.bazel b/absl/base/BUILD.bazel
index ded26d6a..b4d1c218 100644
--- a/absl/base/BUILD.bazel
+++ b/absl/base/BUILD.bazel
@@ -732,21 +732,22 @@ cc_test(
cc_library(
name = "prefetch",
- hdrs = ["internal/prefetch.h"],
+ hdrs = [
+ "internal/prefetch.h",
+ "prefetch.h",
+ ],
copts = ABSL_DEFAULT_COPTS,
linkopts = ABSL_DEFAULT_LINKOPTS,
- visibility = [
- "//absl:__subpackages__",
- ],
- deps = [
- ":config",
- ],
+ deps = [":config"],
)
cc_test(
name = "prefetch_test",
size = "small",
- srcs = ["internal/prefetch_test.cc"],
+ srcs = [
+ "internal/prefetch_test.cc",
+ "prefetch_test.cc",
+ ],
copts = ABSL_TEST_COPTS,
linkopts = ABSL_DEFAULT_LINKOPTS,
deps = [
diff --git a/absl/base/CMakeLists.txt b/absl/base/CMakeLists.txt
index 26e2b48a..74495d01 100644
--- a/absl/base/CMakeLists.txt
+++ b/absl/base/CMakeLists.txt
@@ -645,11 +645,11 @@ absl_cc_test(
GTest::gtest_main
)
-# Internal-only target, do not depend on directly.
absl_cc_library(
NAME
prefetch
HDRS
+ "prefetch.h"
"internal/prefetch.h"
COPTS
${ABSL_DEFAULT_COPTS}
@@ -663,6 +663,7 @@ absl_cc_test(
NAME
prefetch_test
SRCS
+ "prefetch_test.cc"
"internal/prefetch_test.cc"
COPTS
${ABSL_TEST_COPTS}
diff --git a/absl/base/prefetch.h b/absl/base/prefetch.h
new file mode 100644
index 00000000..4d428462
--- /dev/null
+++ b/absl/base/prefetch.h
@@ -0,0 +1,196 @@
+// Copyright 2023 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// -----------------------------------------------------------------------------
+// File: prefetch.h
+// -----------------------------------------------------------------------------
+//
+// This header file defines prefetch functions to prefetch memory contents
+// into the first level cache (L1) for the current CPU. The prefetch logic
+// offered in this header is limited to prefetching first level cachelines
+// only, and is aimed at relatively 'simple' prefetching logic.
+//
+#ifndef ABSL_BASE_PREFETCH_H_
+#define ABSL_BASE_PREFETCH_H_
+
+#include "absl/base/config.h"
+
+#if defined(ABSL_INTERNAL_HAVE_SSE)
+#include <xmmintrin.h>
+#endif
+
+#if defined(_MSC_VER) && defined(ABSL_INTERNAL_HAVE_SSE)
+#include <intrin.h>
+#pragma intrinsic(_mm_prefetch)
+#endif
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+
+// Moves data into the L1 cache before it is read, or "prefetches" it.
+//
+// The value of `addr` is the address of the memory to prefetch. If
+// the target and compiler support it, data prefetch instructions are
+// generated. If the prefetch is done some time before the memory is
+// read, it may be in the cache by the time the read occurs.
+//
+// This method prefetches data with the highest degree of temporal locality;
+// data is prefetched where possible into all levels of the cache.
+//
+// Incorrect or gratuitous use of this function can degrade performance.
+// Use this function only when representative benchmarks show an improvement.
+//
+// Example:
+//
+// // Computes incremental checksum for `data`.
+// int ComputeChecksum(int sum, absl::string_view data);
+//
+// // Computes cumulative checksum for all values in `data`
+// int ComputeChecksum(absl::Span<const std::string> data) {
+// int sum = 0;
+// auto it = data.begin();
+// auto pit = data.begin();
+// auto end = data.end();
+// for (int dist = 8; dist > 0 && pit != data.end(); --dist, ++pit) {
+// absl::PrefetchToLocalCache(pit->data());
+// }
+// for (; pit != end; ++pit, ++it) {
+// sum = ComputeChecksum(sum, *it);
+// absl::PrefetchToLocalCache(pit->data());
+// }
+// for (; it != end; ++it) {
+// sum = ComputeChecksum(sum, *it);
+// }
+// return sum;
+// }
+//
+void PrefetchToLocalCache(const void* addr);
+
+// Moves data into the L1 cache before it is read, or "prefetches" it.
+//
+// This function is identical to `PrefetchToLocalCache()` except that it has
+// non-temporal locality: the fetched data should not be left in any of the
+// cache tiers. This is useful for cases where the data is used only once /
+// short term, for example, invoking a destructor on an object.
+//
+// Incorrect or gratuitous use of this function can degrade performance.
+// Use this function only when representative benchmarks show an improvement.
+//
+// Example:
+//
+// template <typename Iterator>
+// void DestroyPointers(Iterator begin, Iterator end) {
+// size_t distance = std::min(8U, bars.size());
+//
+// int dist = 8;
+// auto prefetch_it = begin;
+// while (prefetch_it != end && --dist;) {
+// absl::PrefetchToLocalCacheNta(*prefetch_it++);
+// }
+// while (prefetch_it != end) {
+// delete *begin++;
+// absl::PrefetchToLocalCacheNta(*prefetch_it++);
+// }
+// while (begin != end) {
+// delete *begin++;
+// }
+// }
+//
+void PrefetchToLocalCacheNta(const void* addr);
+
+// Moves data into the L1 cache with the intent to modify it.
+//
+// This function is similar to `PrefetchToLocalCache()` except that it
+// prefetches cachelines with an 'intent to modify' This typically includes
+// invalidating cache entries for this address in all other cache tiers, and an
+// exclusive access intent.
+//
+// Incorrect or gratuitous use of this function can degrade performance. As this
+// function can invalidate cached cachelines on other caches and computer cores,
+// incorrect usage of this function can have an even greater negative impact
+// than incorrect regular prefetches.
+// Use this function only when representative benchmarks show an improvement.
+//
+// Example:
+//
+// void* Arena::Allocate(size_t size) {
+// void* ptr = AllocateBlock(size);
+// absl::PrefetchToLocalCacheForWrite(p);
+// return ptr;
+// }
+//
+void PrefetchToLocalCacheforWrite(const void* addr);
+
+#if ABSL_HAVE_BUILTIN(__builtin_prefetch) || defined(__GNUC__)
+
+#define ABSL_HAVE_PREFETCH 1
+
+// See __builtin_prefetch:
+// https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html.
+//
+inline void PrefetchToLocalCache(const void* addr) {
+ __builtin_prefetch(addr, 0, 3);
+}
+
+inline void PrefetchToLocalCacheNta(const void* addr) {
+ __builtin_prefetch(addr, 0, 0);
+}
+
+inline void PrefetchToLocalCacheForWrite(const void* addr) {
+ // [x86] gcc/clang don't generate PREFETCHW for __builtin_prefetch(.., 1)
+ // unless -march=broadwell or newer; this is not generally the default, so we
+ // manually emit prefetchw. PREFETCHW is recognized as a no-op on older Intel
+ // processors and has been present on AMD processors since the K6-2.
+#if defined(__x86_64__)
+ asm("prefetchw (%0)" : : "r"(addr));
+#else
+ __builtin_prefetch(addr, 1, 0);
+#endif
+}
+
+#elif defined(ABSL_INTERNAL_HAVE_SSE)
+
+#define ABSL_HAVE_PREFETCH 1
+
+inline void PrefetchToLocalCache(const void* addr) {
+ _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T0);
+}
+
+inline void PrefetchToLocalCacheNta(const void* addr) {
+ _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_NTA);
+}
+
+inline void PrefetchToLocalCacheForWrite(const void* addr) {
+#if defined(_MM_HINT_ET0)
+ _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_ET0);
+#elif defined(__x86_64__)
+ // _MM_HINT_ET0 is not universally supported. As we commented further
+ // up, PREFETCHW is recognized as a no-op on older Intel processors
+ // and has been present on AMD processors since the K6-2
+ asm("prefetchw (%0)" : : "r"(addr));
+#endif
+}
+
+#else
+
+inline void PrefetchToLocalCache(const void* addr) {}
+inline void PrefetchToLocalCacheNta(const void* addr) {}
+inline void PrefetchToLocalCacheForWrite(const void* addr) {}
+
+#endif
+
+ABSL_NAMESPACE_END
+} // namespace absl
+
+#endif // ABSL_BASE_PREFETCH_H_
diff --git a/absl/base/prefetch_test.cc b/absl/base/prefetch_test.cc
new file mode 100644
index 00000000..ee219897
--- /dev/null
+++ b/absl/base/prefetch_test.cc
@@ -0,0 +1,64 @@
+// Copyright 2023 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/base/prefetch.h"
+
+#include <memory>
+
+#include "gtest/gtest.h"
+
+namespace {
+
+// Below tests exercise the functions only to guarantee they compile and execute
+// correctly. We make no attempt at verifying any prefetch instructions being
+// generated and executed: we assume the various implementation in terms of
+// __builtin_prefetch() or x86 intrinsics to be correct and well tested.
+
+TEST(PrefetchTest, PrefetchToLocalCache_StackA) {
+ char buf[100] = {};
+ absl::PrefetchToLocalCache(buf);
+ absl::PrefetchToLocalCacheNta(buf);
+ absl::PrefetchToLocalCacheForWrite(buf);
+}
+
+TEST(PrefetchTest, PrefetchToLocalCache_Heap) {
+ auto memory = std::make_unique<char[]>(200 << 10);
+ memset(memory.get(), 0, 200 << 10);
+ absl::PrefetchToLocalCache(memory.get());
+ absl::PrefetchToLocalCacheNta(memory.get());
+ absl::PrefetchToLocalCacheForWrite(memory.get());
+ absl::PrefetchToLocalCache(memory.get() + (50 << 10));
+ absl::PrefetchToLocalCacheNta(memory.get() + (50 << 10));
+ absl::PrefetchToLocalCacheForWrite(memory.get() + (50 << 10));
+ absl::PrefetchToLocalCache(memory.get() + (100 << 10));
+ absl::PrefetchToLocalCacheNta(memory.get() + (100 << 10));
+ absl::PrefetchToLocalCacheForWrite(memory.get() + (100 << 10));
+ absl::PrefetchToLocalCache(memory.get() + (150 << 10));
+ absl::PrefetchToLocalCacheNta(memory.get() + (150 << 10));
+ absl::PrefetchToLocalCacheForWrite(memory.get() + (150 << 10));
+}
+
+TEST(PrefetchTest, PrefetchToLocalCache_Nullptr) {
+ absl::PrefetchToLocalCache(nullptr);
+ absl::PrefetchToLocalCacheNta(nullptr);
+ absl::PrefetchToLocalCacheForWrite(nullptr);
+}
+
+TEST(PrefetchTest, PrefetchToLocalCache_InvalidPtr) {
+ absl::PrefetchToLocalCache(reinterpret_cast<const void*>(0x785326532L));
+ absl::PrefetchToLocalCacheNta(reinterpret_cast<const void*>(0x785326532L));
+ absl::PrefetchToLocalCacheForWrite(reinterpret_cast<const void*>(0x78532L));
+}
+
+} // namespace