summaryrefslogtreecommitdiff
path: root/absl/base/internal/prefetch.h
diff options
context:
space:
mode:
Diffstat (limited to 'absl/base/internal/prefetch.h')
-rw-r--r--absl/base/internal/prefetch.h109
1 files changed, 109 insertions, 0 deletions
diff --git a/absl/base/internal/prefetch.h b/absl/base/internal/prefetch.h
new file mode 100644
index 00000000..a71b3897
--- /dev/null
+++ b/absl/base/internal/prefetch.h
@@ -0,0 +1,109 @@
+// Copyright 2022 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ABSL_BASE_INTERNAL_PREFETCH_H_
+#define ABSL_BASE_INTERNAL_PREFETCH_H_
+
+#include "absl/base/config.h"
+
+// Compatibility wrappers around __builtin_prefetch, to prefetch data
+// for read if supported by the toolchain.
+
+// Move data into the cache before it is read, or "prefetch" it.
+//
+// The value of `addr` is the address of the memory to prefetch. If
+// the target and compiler support it, data prefetch instructions are
+// generated. If the prefetch is done some time before the memory is
+// read, it may be in the cache by the time the read occurs.
+//
+// The function names specify the temporal locality heuristic applied,
+// using the names of Intel prefetch instructions:
+//
+// T0 - high degree of temporal locality; data should be left in as
+// many levels of the cache possible
+// T1 - moderate degree of temporal locality
+// T2 - low degree of temporal locality
+// Nta - no temporal locality, data need not be left in the cache
+// after the read
+//
+// Incorrect or gratuitous use of these functions can degrade
+// performance, so use them only when representative benchmarks show
+// an improvement.
+//
+// Example usage:
+//
+// absl::base_internal::PrefetchT0(addr);
+//
+// Currently, the different prefetch calls behave on some Intel
+// architectures as follows:
+//
+// SNB..SKL SKX
+// PrefetchT0() L1/L2/L3 L1/L2
+// PrefetchT1() L2/L3 L2
+// PrefetchT2() L2/L3 L2
+// PrefetchNta() L1/--/L3 L1*
+//
+// * On SKX PrefetchNta() will bring the line into L1 but will evict
+// from L3 cache. This might result in surprising behavior.
+//
+// SNB = Sandy Bridge, SKL = Skylake, SKX = Skylake Xeon.
+//
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace base_internal {
+
+void PrefetchT0(const void* addr);
+void PrefetchT1(const void* addr);
+void PrefetchT2(const void* addr);
+void PrefetchNta(const void* addr);
+
+// Implementation details follow.
+
+#if ABSL_HAVE_BUILTIN(__builtin_prefetch) || defined(__GNUC__)
+
+// See __builtin_prefetch:
+// https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html.
+//
+// These functions speculatively load for read only. This is
+// safe for all currently supported platforms. However, prefetch for
+// store may have problems depending on the target platform.
+//
+inline void PrefetchT0(const void* addr) {
+ // Note: this uses prefetcht0 on Intel.
+ __builtin_prefetch(addr, 0, 3);
+}
+inline void PrefetchT1(const void* addr) {
+ // Note: this uses prefetcht1 on Intel.
+ __builtin_prefetch(addr, 0, 2);
+}
+inline void PrefetchT2(const void* addr) {
+ // Note: this uses prefetcht2 on Intel.
+ __builtin_prefetch(addr, 0, 1);
+}
+inline void PrefetchNta(const void* addr) {
+ // Note: this uses prefetchtnta on Intel.
+ __builtin_prefetch(addr, 0, 0);
+}
+#else
+inline void PrefetchT0(const void*) {}
+inline void PrefetchT1(const void*) {}
+inline void PrefetchT2(const void*) {}
+inline void PrefetchNta(const void*) {}
+#endif
+
+} // namespace base_internal
+ABSL_NAMESPACE_END
+} // namespace absl
+
+#endif // ABSL_BASE_INTERNAL_PREFETCH_H_