From db51f68788bfc505e1d68a731b0175c593cf89cd Mon Sep 17 00:00:00 2001 From: Martijn Vels Date: Thu, 26 Jan 2023 14:22:33 -0800 Subject: Introduce Abseil Prefetch API PiperOrigin-RevId: 504941246 Change-Id: I94c1e85afd254e84948477b511d41eeb8285fdae --- CMake/AbseilDll.cmake | 1 + absl/base/BUILD.bazel | 17 ++-- absl/base/CMakeLists.txt | 3 +- absl/base/prefetch.h | 196 +++++++++++++++++++++++++++++++++++++++++++++ absl/base/prefetch_test.cc | 64 +++++++++++++++ 5 files changed, 272 insertions(+), 9 deletions(-) create mode 100644 absl/base/prefetch.h create mode 100644 absl/base/prefetch_test.cc diff --git a/CMake/AbseilDll.cmake b/CMake/AbseilDll.cmake index c4a41e6d..52a563cd 100644 --- a/CMake/AbseilDll.cmake +++ b/CMake/AbseilDll.cmake @@ -28,6 +28,7 @@ set(ABSL_INTERNAL_DLL_FILES "base/internal/low_level_scheduling.h" "base/internal/per_thread_tls.h" "base/internal/prefetch.h" + "base/prefetch.h" "base/internal/pretty_function.h" "base/internal/raw_logging.cc" "base/internal/raw_logging.h" diff --git a/absl/base/BUILD.bazel b/absl/base/BUILD.bazel index ded26d6a..b4d1c218 100644 --- a/absl/base/BUILD.bazel +++ b/absl/base/BUILD.bazel @@ -732,21 +732,22 @@ cc_test( cc_library( name = "prefetch", - hdrs = ["internal/prefetch.h"], + hdrs = [ + "internal/prefetch.h", + "prefetch.h", + ], copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, - visibility = [ - "//absl:__subpackages__", - ], - deps = [ - ":config", - ], + deps = [":config"], ) cc_test( name = "prefetch_test", size = "small", - srcs = ["internal/prefetch_test.cc"], + srcs = [ + "internal/prefetch_test.cc", + "prefetch_test.cc", + ], copts = ABSL_TEST_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, deps = [ diff --git a/absl/base/CMakeLists.txt b/absl/base/CMakeLists.txt index 26e2b48a..74495d01 100644 --- a/absl/base/CMakeLists.txt +++ b/absl/base/CMakeLists.txt @@ -645,11 +645,11 @@ absl_cc_test( GTest::gtest_main ) -# Internal-only target, do not depend on directly. absl_cc_library( NAME prefetch HDRS + "prefetch.h" "internal/prefetch.h" COPTS ${ABSL_DEFAULT_COPTS} @@ -663,6 +663,7 @@ absl_cc_test( NAME prefetch_test SRCS + "prefetch_test.cc" "internal/prefetch_test.cc" COPTS ${ABSL_TEST_COPTS} diff --git a/absl/base/prefetch.h b/absl/base/prefetch.h new file mode 100644 index 00000000..4d428462 --- /dev/null +++ b/absl/base/prefetch.h @@ -0,0 +1,196 @@ +// Copyright 2023 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: prefetch.h +// ----------------------------------------------------------------------------- +// +// This header file defines prefetch functions to prefetch memory contents +// into the first level cache (L1) for the current CPU. The prefetch logic +// offered in this header is limited to prefetching first level cachelines +// only, and is aimed at relatively 'simple' prefetching logic. +// +#ifndef ABSL_BASE_PREFETCH_H_ +#define ABSL_BASE_PREFETCH_H_ + +#include "absl/base/config.h" + +#if defined(ABSL_INTERNAL_HAVE_SSE) +#include +#endif + +#if defined(_MSC_VER) && defined(ABSL_INTERNAL_HAVE_SSE) +#include +#pragma intrinsic(_mm_prefetch) +#endif + +namespace absl { +ABSL_NAMESPACE_BEGIN + +// Moves data into the L1 cache before it is read, or "prefetches" it. +// +// The value of `addr` is the address of the memory to prefetch. If +// the target and compiler support it, data prefetch instructions are +// generated. If the prefetch is done some time before the memory is +// read, it may be in the cache by the time the read occurs. +// +// This method prefetches data with the highest degree of temporal locality; +// data is prefetched where possible into all levels of the cache. +// +// Incorrect or gratuitous use of this function can degrade performance. +// Use this function only when representative benchmarks show an improvement. +// +// Example: +// +// // Computes incremental checksum for `data`. +// int ComputeChecksum(int sum, absl::string_view data); +// +// // Computes cumulative checksum for all values in `data` +// int ComputeChecksum(absl::Span data) { +// int sum = 0; +// auto it = data.begin(); +// auto pit = data.begin(); +// auto end = data.end(); +// for (int dist = 8; dist > 0 && pit != data.end(); --dist, ++pit) { +// absl::PrefetchToLocalCache(pit->data()); +// } +// for (; pit != end; ++pit, ++it) { +// sum = ComputeChecksum(sum, *it); +// absl::PrefetchToLocalCache(pit->data()); +// } +// for (; it != end; ++it) { +// sum = ComputeChecksum(sum, *it); +// } +// return sum; +// } +// +void PrefetchToLocalCache(const void* addr); + +// Moves data into the L1 cache before it is read, or "prefetches" it. +// +// This function is identical to `PrefetchToLocalCache()` except that it has +// non-temporal locality: the fetched data should not be left in any of the +// cache tiers. This is useful for cases where the data is used only once / +// short term, for example, invoking a destructor on an object. +// +// Incorrect or gratuitous use of this function can degrade performance. +// Use this function only when representative benchmarks show an improvement. +// +// Example: +// +// template +// void DestroyPointers(Iterator begin, Iterator end) { +// size_t distance = std::min(8U, bars.size()); +// +// int dist = 8; +// auto prefetch_it = begin; +// while (prefetch_it != end && --dist;) { +// absl::PrefetchToLocalCacheNta(*prefetch_it++); +// } +// while (prefetch_it != end) { +// delete *begin++; +// absl::PrefetchToLocalCacheNta(*prefetch_it++); +// } +// while (begin != end) { +// delete *begin++; +// } +// } +// +void PrefetchToLocalCacheNta(const void* addr); + +// Moves data into the L1 cache with the intent to modify it. +// +// This function is similar to `PrefetchToLocalCache()` except that it +// prefetches cachelines with an 'intent to modify' This typically includes +// invalidating cache entries for this address in all other cache tiers, and an +// exclusive access intent. +// +// Incorrect or gratuitous use of this function can degrade performance. As this +// function can invalidate cached cachelines on other caches and computer cores, +// incorrect usage of this function can have an even greater negative impact +// than incorrect regular prefetches. +// Use this function only when representative benchmarks show an improvement. +// +// Example: +// +// void* Arena::Allocate(size_t size) { +// void* ptr = AllocateBlock(size); +// absl::PrefetchToLocalCacheForWrite(p); +// return ptr; +// } +// +void PrefetchToLocalCacheforWrite(const void* addr); + +#if ABSL_HAVE_BUILTIN(__builtin_prefetch) || defined(__GNUC__) + +#define ABSL_HAVE_PREFETCH 1 + +// See __builtin_prefetch: +// https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html. +// +inline void PrefetchToLocalCache(const void* addr) { + __builtin_prefetch(addr, 0, 3); +} + +inline void PrefetchToLocalCacheNta(const void* addr) { + __builtin_prefetch(addr, 0, 0); +} + +inline void PrefetchToLocalCacheForWrite(const void* addr) { + // [x86] gcc/clang don't generate PREFETCHW for __builtin_prefetch(.., 1) + // unless -march=broadwell or newer; this is not generally the default, so we + // manually emit prefetchw. PREFETCHW is recognized as a no-op on older Intel + // processors and has been present on AMD processors since the K6-2. +#if defined(__x86_64__) + asm("prefetchw (%0)" : : "r"(addr)); +#else + __builtin_prefetch(addr, 1, 0); +#endif +} + +#elif defined(ABSL_INTERNAL_HAVE_SSE) + +#define ABSL_HAVE_PREFETCH 1 + +inline void PrefetchToLocalCache(const void* addr) { + _mm_prefetch(reinterpret_cast(addr), _MM_HINT_T0); +} + +inline void PrefetchToLocalCacheNta(const void* addr) { + _mm_prefetch(reinterpret_cast(addr), _MM_HINT_NTA); +} + +inline void PrefetchToLocalCacheForWrite(const void* addr) { +#if defined(_MM_HINT_ET0) + _mm_prefetch(reinterpret_cast(addr), _MM_HINT_ET0); +#elif defined(__x86_64__) + // _MM_HINT_ET0 is not universally supported. As we commented further + // up, PREFETCHW is recognized as a no-op on older Intel processors + // and has been present on AMD processors since the K6-2 + asm("prefetchw (%0)" : : "r"(addr)); +#endif +} + +#else + +inline void PrefetchToLocalCache(const void* addr) {} +inline void PrefetchToLocalCacheNta(const void* addr) {} +inline void PrefetchToLocalCacheForWrite(const void* addr) {} + +#endif + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_BASE_PREFETCH_H_ diff --git a/absl/base/prefetch_test.cc b/absl/base/prefetch_test.cc new file mode 100644 index 00000000..ee219897 --- /dev/null +++ b/absl/base/prefetch_test.cc @@ -0,0 +1,64 @@ +// Copyright 2023 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/base/prefetch.h" + +#include + +#include "gtest/gtest.h" + +namespace { + +// Below tests exercise the functions only to guarantee they compile and execute +// correctly. We make no attempt at verifying any prefetch instructions being +// generated and executed: we assume the various implementation in terms of +// __builtin_prefetch() or x86 intrinsics to be correct and well tested. + +TEST(PrefetchTest, PrefetchToLocalCache_StackA) { + char buf[100] = {}; + absl::PrefetchToLocalCache(buf); + absl::PrefetchToLocalCacheNta(buf); + absl::PrefetchToLocalCacheForWrite(buf); +} + +TEST(PrefetchTest, PrefetchToLocalCache_Heap) { + auto memory = std::make_unique(200 << 10); + memset(memory.get(), 0, 200 << 10); + absl::PrefetchToLocalCache(memory.get()); + absl::PrefetchToLocalCacheNta(memory.get()); + absl::PrefetchToLocalCacheForWrite(memory.get()); + absl::PrefetchToLocalCache(memory.get() + (50 << 10)); + absl::PrefetchToLocalCacheNta(memory.get() + (50 << 10)); + absl::PrefetchToLocalCacheForWrite(memory.get() + (50 << 10)); + absl::PrefetchToLocalCache(memory.get() + (100 << 10)); + absl::PrefetchToLocalCacheNta(memory.get() + (100 << 10)); + absl::PrefetchToLocalCacheForWrite(memory.get() + (100 << 10)); + absl::PrefetchToLocalCache(memory.get() + (150 << 10)); + absl::PrefetchToLocalCacheNta(memory.get() + (150 << 10)); + absl::PrefetchToLocalCacheForWrite(memory.get() + (150 << 10)); +} + +TEST(PrefetchTest, PrefetchToLocalCache_Nullptr) { + absl::PrefetchToLocalCache(nullptr); + absl::PrefetchToLocalCacheNta(nullptr); + absl::PrefetchToLocalCacheForWrite(nullptr); +} + +TEST(PrefetchTest, PrefetchToLocalCache_InvalidPtr) { + absl::PrefetchToLocalCache(reinterpret_cast(0x785326532L)); + absl::PrefetchToLocalCacheNta(reinterpret_cast(0x785326532L)); + absl::PrefetchToLocalCacheForWrite(reinterpret_cast(0x78532L)); +} + +} // namespace -- cgit v1.2.3