diff options
author | Martijn Vels <mvels@google.com> | 2023-01-26 14:22:33 -0800 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2023-01-26 14:23:41 -0800 |
commit | db51f68788bfc505e1d68a731b0175c593cf89cd (patch) | |
tree | 1f63206b09289d7784a93e49621b2f3eb3b2e3e5 /absl/base/prefetch.h | |
parent | c21bd95257bd0978824e12d91a6f8975a506877f (diff) |
Introduce Abseil Prefetch API
PiperOrigin-RevId: 504941246
Change-Id: I94c1e85afd254e84948477b511d41eeb8285fdae
Diffstat (limited to 'absl/base/prefetch.h')
-rw-r--r-- | absl/base/prefetch.h | 196 |
1 files changed, 196 insertions, 0 deletions
diff --git a/absl/base/prefetch.h b/absl/base/prefetch.h new file mode 100644 index 00000000..4d428462 --- /dev/null +++ b/absl/base/prefetch.h @@ -0,0 +1,196 @@ +// Copyright 2023 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: prefetch.h +// ----------------------------------------------------------------------------- +// +// This header file defines prefetch functions to prefetch memory contents +// into the first level cache (L1) for the current CPU. The prefetch logic +// offered in this header is limited to prefetching first level cachelines +// only, and is aimed at relatively 'simple' prefetching logic. +// +#ifndef ABSL_BASE_PREFETCH_H_ +#define ABSL_BASE_PREFETCH_H_ + +#include "absl/base/config.h" + +#if defined(ABSL_INTERNAL_HAVE_SSE) +#include <xmmintrin.h> +#endif + +#if defined(_MSC_VER) && defined(ABSL_INTERNAL_HAVE_SSE) +#include <intrin.h> +#pragma intrinsic(_mm_prefetch) +#endif + +namespace absl { +ABSL_NAMESPACE_BEGIN + +// Moves data into the L1 cache before it is read, or "prefetches" it. +// +// The value of `addr` is the address of the memory to prefetch. If +// the target and compiler support it, data prefetch instructions are +// generated. If the prefetch is done some time before the memory is +// read, it may be in the cache by the time the read occurs. +// +// This method prefetches data with the highest degree of temporal locality; +// data is prefetched where possible into all levels of the cache. +// +// Incorrect or gratuitous use of this function can degrade performance. +// Use this function only when representative benchmarks show an improvement. +// +// Example: +// +// // Computes incremental checksum for `data`. +// int ComputeChecksum(int sum, absl::string_view data); +// +// // Computes cumulative checksum for all values in `data` +// int ComputeChecksum(absl::Span<const std::string> data) { +// int sum = 0; +// auto it = data.begin(); +// auto pit = data.begin(); +// auto end = data.end(); +// for (int dist = 8; dist > 0 && pit != data.end(); --dist, ++pit) { +// absl::PrefetchToLocalCache(pit->data()); +// } +// for (; pit != end; ++pit, ++it) { +// sum = ComputeChecksum(sum, *it); +// absl::PrefetchToLocalCache(pit->data()); +// } +// for (; it != end; ++it) { +// sum = ComputeChecksum(sum, *it); +// } +// return sum; +// } +// +void PrefetchToLocalCache(const void* addr); + +// Moves data into the L1 cache before it is read, or "prefetches" it. +// +// This function is identical to `PrefetchToLocalCache()` except that it has +// non-temporal locality: the fetched data should not be left in any of the +// cache tiers. This is useful for cases where the data is used only once / +// short term, for example, invoking a destructor on an object. +// +// Incorrect or gratuitous use of this function can degrade performance. +// Use this function only when representative benchmarks show an improvement. +// +// Example: +// +// template <typename Iterator> +// void DestroyPointers(Iterator begin, Iterator end) { +// size_t distance = std::min(8U, bars.size()); +// +// int dist = 8; +// auto prefetch_it = begin; +// while (prefetch_it != end && --dist;) { +// absl::PrefetchToLocalCacheNta(*prefetch_it++); +// } +// while (prefetch_it != end) { +// delete *begin++; +// absl::PrefetchToLocalCacheNta(*prefetch_it++); +// } +// while (begin != end) { +// delete *begin++; +// } +// } +// +void PrefetchToLocalCacheNta(const void* addr); + +// Moves data into the L1 cache with the intent to modify it. +// +// This function is similar to `PrefetchToLocalCache()` except that it +// prefetches cachelines with an 'intent to modify' This typically includes +// invalidating cache entries for this address in all other cache tiers, and an +// exclusive access intent. +// +// Incorrect or gratuitous use of this function can degrade performance. As this +// function can invalidate cached cachelines on other caches and computer cores, +// incorrect usage of this function can have an even greater negative impact +// than incorrect regular prefetches. +// Use this function only when representative benchmarks show an improvement. +// +// Example: +// +// void* Arena::Allocate(size_t size) { +// void* ptr = AllocateBlock(size); +// absl::PrefetchToLocalCacheForWrite(p); +// return ptr; +// } +// +void PrefetchToLocalCacheforWrite(const void* addr); + +#if ABSL_HAVE_BUILTIN(__builtin_prefetch) || defined(__GNUC__) + +#define ABSL_HAVE_PREFETCH 1 + +// See __builtin_prefetch: +// https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html. +// +inline void PrefetchToLocalCache(const void* addr) { + __builtin_prefetch(addr, 0, 3); +} + +inline void PrefetchToLocalCacheNta(const void* addr) { + __builtin_prefetch(addr, 0, 0); +} + +inline void PrefetchToLocalCacheForWrite(const void* addr) { + // [x86] gcc/clang don't generate PREFETCHW for __builtin_prefetch(.., 1) + // unless -march=broadwell or newer; this is not generally the default, so we + // manually emit prefetchw. PREFETCHW is recognized as a no-op on older Intel + // processors and has been present on AMD processors since the K6-2. +#if defined(__x86_64__) + asm("prefetchw (%0)" : : "r"(addr)); +#else + __builtin_prefetch(addr, 1, 0); +#endif +} + +#elif defined(ABSL_INTERNAL_HAVE_SSE) + +#define ABSL_HAVE_PREFETCH 1 + +inline void PrefetchToLocalCache(const void* addr) { + _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T0); +} + +inline void PrefetchToLocalCacheNta(const void* addr) { + _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_NTA); +} + +inline void PrefetchToLocalCacheForWrite(const void* addr) { +#if defined(_MM_HINT_ET0) + _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_ET0); +#elif defined(__x86_64__) + // _MM_HINT_ET0 is not universally supported. As we commented further + // up, PREFETCHW is recognized as a no-op on older Intel processors + // and has been present on AMD processors since the K6-2 + asm("prefetchw (%0)" : : "r"(addr)); +#endif +} + +#else + +inline void PrefetchToLocalCache(const void* addr) {} +inline void PrefetchToLocalCacheNta(const void* addr) {} +inline void PrefetchToLocalCacheForWrite(const void* addr) {} + +#endif + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_BASE_PREFETCH_H_ |