summaryrefslogtreecommitdiff
path: root/absl/base/prefetch.h
diff options
context:
space:
mode:
Diffstat (limited to 'absl/base/prefetch.h')
-rw-r--r--absl/base/prefetch.h198
1 files changed, 198 insertions, 0 deletions
diff --git a/absl/base/prefetch.h b/absl/base/prefetch.h
new file mode 100644
index 00000000..de7a180d
--- /dev/null
+++ b/absl/base/prefetch.h
@@ -0,0 +1,198 @@
+// Copyright 2023 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// -----------------------------------------------------------------------------
+// File: prefetch.h
+// -----------------------------------------------------------------------------
+//
+// This header file defines prefetch functions to prefetch memory contents
+// into the first level cache (L1) for the current CPU. The prefetch logic
+// offered in this header is limited to prefetching first level cachelines
+// only, and is aimed at relatively 'simple' prefetching logic.
+//
+#ifndef ABSL_BASE_PREFETCH_H_
+#define ABSL_BASE_PREFETCH_H_
+
+#include "absl/base/config.h"
+
+#if defined(ABSL_INTERNAL_HAVE_SSE)
+#include <xmmintrin.h>
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER >= 1900 && \
+ (defined(_M_X64) || defined(_M_IX86))
+#include <intrin.h>
+#pragma intrinsic(_mm_prefetch)
+#endif
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+
+// Moves data into the L1 cache before it is read, or "prefetches" it.
+//
+// The value of `addr` is the address of the memory to prefetch. If
+// the target and compiler support it, data prefetch instructions are
+// generated. If the prefetch is done some time before the memory is
+// read, it may be in the cache by the time the read occurs.
+//
+// This method prefetches data with the highest degree of temporal locality;
+// data is prefetched where possible into all levels of the cache.
+//
+// Incorrect or gratuitous use of this function can degrade performance.
+// Use this function only when representative benchmarks show an improvement.
+//
+// Example:
+//
+// // Computes incremental checksum for `data`.
+// int ComputeChecksum(int sum, absl::string_view data);
+//
+// // Computes cumulative checksum for all values in `data`
+// int ComputeChecksum(absl::Span<const std::string> data) {
+// int sum = 0;
+// auto it = data.begin();
+// auto pit = data.begin();
+// auto end = data.end();
+// for (int dist = 8; dist > 0 && pit != data.end(); --dist, ++pit) {
+// absl::PrefetchToLocalCache(pit->data());
+// }
+// for (; pit != end; ++pit, ++it) {
+// sum = ComputeChecksum(sum, *it);
+// absl::PrefetchToLocalCache(pit->data());
+// }
+// for (; it != end; ++it) {
+// sum = ComputeChecksum(sum, *it);
+// }
+// return sum;
+// }
+//
+void PrefetchToLocalCache(const void* addr);
+
+// Moves data into the L1 cache before it is read, or "prefetches" it.
+//
+// This function is identical to `PrefetchToLocalCache()` except that it has
+// non-temporal locality: the fetched data should not be left in any of the
+// cache tiers. This is useful for cases where the data is used only once /
+// short term, for example, invoking a destructor on an object.
+//
+// Incorrect or gratuitous use of this function can degrade performance.
+// Use this function only when representative benchmarks show an improvement.
+//
+// Example:
+//
+// template <typename Iterator>
+// void DestroyPointers(Iterator begin, Iterator end) {
+// size_t distance = std::min(8U, bars.size());
+//
+// int dist = 8;
+// auto prefetch_it = begin;
+// while (prefetch_it != end && --dist;) {
+// absl::PrefetchToLocalCacheNta(*prefetch_it++);
+// }
+// while (prefetch_it != end) {
+// delete *begin++;
+// absl::PrefetchToLocalCacheNta(*prefetch_it++);
+// }
+// while (begin != end) {
+// delete *begin++;
+// }
+// }
+//
+void PrefetchToLocalCacheNta(const void* addr);
+
+// Moves data into the L1 cache with the intent to modify it.
+//
+// This function is similar to `PrefetchToLocalCache()` except that it
+// prefetches cachelines with an 'intent to modify' This typically includes
+// invalidating cache entries for this address in all other cache tiers, and an
+// exclusive access intent.
+//
+// Incorrect or gratuitous use of this function can degrade performance. As this
+// function can invalidate cached cachelines on other caches and computer cores,
+// incorrect usage of this function can have an even greater negative impact
+// than incorrect regular prefetches.
+// Use this function only when representative benchmarks show an improvement.
+//
+// Example:
+//
+// void* Arena::Allocate(size_t size) {
+// void* ptr = AllocateBlock(size);
+// absl::PrefetchToLocalCacheForWrite(p);
+// return ptr;
+// }
+//
+void PrefetchToLocalCacheForWrite(const void* addr);
+
+#if ABSL_HAVE_BUILTIN(__builtin_prefetch) || defined(__GNUC__)
+
+#define ABSL_HAVE_PREFETCH 1
+
+// See __builtin_prefetch:
+// https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html.
+//
+inline void PrefetchToLocalCache(const void* addr) {
+ __builtin_prefetch(addr, 0, 3);
+}
+
+inline void PrefetchToLocalCacheNta(const void* addr) {
+ __builtin_prefetch(addr, 0, 0);
+}
+
+inline void PrefetchToLocalCacheForWrite(const void* addr) {
+ // [x86] gcc/clang don't generate PREFETCHW for __builtin_prefetch(.., 1)
+ // unless -march=broadwell or newer; this is not generally the default, so we
+ // manually emit prefetchw. PREFETCHW is recognized as a no-op on older Intel
+ // processors and has been present on AMD processors since the K6-2.
+#if defined(__x86_64__)
+ asm("prefetchw (%0)" : : "r"(addr));
+#else
+ __builtin_prefetch(addr, 1, 3);
+#endif
+}
+
+#elif defined(ABSL_INTERNAL_HAVE_SSE)
+
+#define ABSL_HAVE_PREFETCH 1
+
+inline void PrefetchToLocalCache(const void* addr) {
+ _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T0);
+}
+
+inline void PrefetchToLocalCacheNta(const void* addr) {
+ _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_NTA);
+}
+
+inline void PrefetchToLocalCacheForWrite(const void* addr) {
+#if defined(_MM_HINT_ET0)
+ _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_ET0);
+#elif !defined(_MSC_VER) && defined(__x86_64__)
+ // _MM_HINT_ET0 is not universally supported. As we commented further
+ // up, PREFETCHW is recognized as a no-op on older Intel processors
+ // and has been present on AMD processors since the K6-2. We have this
+ // disabled for MSVC compilers as this miscompiles on older MSVC compilers.
+ asm("prefetchw (%0)" : : "r"(addr));
+#endif
+}
+
+#else
+
+inline void PrefetchToLocalCache(const void* addr) {}
+inline void PrefetchToLocalCacheNta(const void* addr) {}
+inline void PrefetchToLocalCacheForWrite(const void* addr) {}
+
+#endif
+
+ABSL_NAMESPACE_END
+} // namespace absl
+
+#endif // ABSL_BASE_PREFETCH_H_