summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Martijn Vels <mvels@google.com>2023-01-27 12:36:55 -0800
committerGravatar Copybara-Service <copybara-worker@google.com>2023-01-27 12:37:47 -0800
commit75d2525117c8da93840ab256f07b191086fd7cbb (patch)
treefabecc85b212f2448f600adfc1287a79f63e4127
parent8a0693b2a75f21e508ffcd172efda3bbb638a275 (diff)
Replace absl::base_internal::Prefetch* calls with absl::Prefetch* calls
PiperOrigin-RevId: 505184961 Change-Id: I64482558a76abda6896bec4b2d323833b6cd7edf
-rw-r--r--absl/base/BUILD.bazel5
-rw-r--r--absl/base/CMakeLists.txt1
-rw-r--r--absl/base/internal/prefetch.h35
-rw-r--r--absl/base/prefetch.h13
-rw-r--r--absl/container/internal/raw_hash_set.h22
-rw-r--r--absl/container/internal/raw_hash_set_test.cc2
-rw-r--r--absl/crc/internal/crc.cc4
-rw-r--r--absl/crc/internal/crc_memcpy_x86_64.cc8
-rw-r--r--absl/crc/internal/crc_x86_arm_combined.cc14
9 files changed, 58 insertions, 46 deletions
diff --git a/absl/base/BUILD.bazel b/absl/base/BUILD.bazel
index b4d1c218..dd29daf6 100644
--- a/absl/base/BUILD.bazel
+++ b/absl/base/BUILD.bazel
@@ -738,7 +738,10 @@ cc_library(
],
copts = ABSL_DEFAULT_COPTS,
linkopts = ABSL_DEFAULT_LINKOPTS,
- deps = [":config"],
+ deps = [
+ ":config",
+ ":core_headers", # TODO(b/265984188): remove
+ ],
)
cc_test(
diff --git a/absl/base/CMakeLists.txt b/absl/base/CMakeLists.txt
index 74495d01..71b93795 100644
--- a/absl/base/CMakeLists.txt
+++ b/absl/base/CMakeLists.txt
@@ -657,6 +657,7 @@ absl_cc_library(
${ABSL_DEFAULT_LINKOPTS}
DEPS
absl::config
+ absl::core_headers # TODO(b/265984188): remove
)
absl_cc_test(
diff --git a/absl/base/internal/prefetch.h b/absl/base/internal/prefetch.h
index 06419283..aecfd877 100644
--- a/absl/base/internal/prefetch.h
+++ b/absl/base/internal/prefetch.h
@@ -12,10 +12,14 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+// TODO(b/265984188): remove all uses and delete this header.
+
#ifndef ABSL_BASE_INTERNAL_PREFETCH_H_
#define ABSL_BASE_INTERNAL_PREFETCH_H_
+#include "absl/base/attributes.h"
#include "absl/base/config.h"
+#include "absl/base/prefetch.h"
#ifdef __SSE__
#include <xmmintrin.h>
@@ -72,10 +76,21 @@ namespace absl {
ABSL_NAMESPACE_BEGIN
namespace base_internal {
-void PrefetchT0(const void* addr);
+ABSL_DEPRECATED("Use absl::PrefetchToLocalCache() instead")
+inline void PrefetchT0(const void* address) {
+ absl::PrefetchToLocalCache(address);
+}
+
+ABSL_DEPRECATED("Use absl::PrefetchToLocalCache() instead")
+inline void PrefetchNta(const void* address) {
+ absl::PrefetchToLocalCacheNta(address);
+}
+
+ABSL_DEPRECATED("Use __builtin_prefetch() for advanced prefetch logic instead")
void PrefetchT1(const void* addr);
+
+ABSL_DEPRECATED("Use __builtin_prefetch() for advanced prefetch logic instead")
void PrefetchT2(const void* addr);
-void PrefetchNta(const void* addr);
// Implementation details follow.
@@ -90,10 +105,6 @@ void PrefetchNta(const void* addr);
// safe for all currently supported platforms. However, prefetch for
// store may have problems depending on the target platform.
//
-inline void PrefetchT0(const void* addr) {
- // Note: this uses prefetcht0 on Intel.
- __builtin_prefetch(addr, 0, 3);
-}
inline void PrefetchT1(const void* addr) {
// Note: this uses prefetcht1 on Intel.
__builtin_prefetch(addr, 0, 2);
@@ -102,33 +113,21 @@ inline void PrefetchT2(const void* addr) {
// Note: this uses prefetcht2 on Intel.
__builtin_prefetch(addr, 0, 1);
}
-inline void PrefetchNta(const void* addr) {
- // Note: this uses prefetchtnta on Intel.
- __builtin_prefetch(addr, 0, 0);
-}
#elif defined(ABSL_INTERNAL_HAVE_SSE)
#define ABSL_INTERNAL_HAVE_PREFETCH 1
-inline void PrefetchT0(const void* addr) {
- _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T0);
-}
inline void PrefetchT1(const void* addr) {
_mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T1);
}
inline void PrefetchT2(const void* addr) {
_mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T2);
}
-inline void PrefetchNta(const void* addr) {
- _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_NTA);
-}
#else
-inline void PrefetchT0(const void*) {}
inline void PrefetchT1(const void*) {}
inline void PrefetchT2(const void*) {}
-inline void PrefetchNta(const void*) {}
#endif
} // namespace base_internal
diff --git a/absl/base/prefetch.h b/absl/base/prefetch.h
index 4d428462..6bc98637 100644
--- a/absl/base/prefetch.h
+++ b/absl/base/prefetch.h
@@ -30,9 +30,11 @@
#include <xmmintrin.h>
#endif
-#if defined(_MSC_VER) && defined(ABSL_INTERNAL_HAVE_SSE)
+#if defined(_MSC_VER) && _MSC_VER >= 1900 && \
+ (defined(_M_X64) || defined(_M_IX86))
#include <intrin.h>
#pragma intrinsic(_mm_prefetch)
+#pragma intrinsic(_m_prefetchw)
#endif
namespace absl {
@@ -174,10 +176,15 @@ inline void PrefetchToLocalCacheNta(const void* addr) {
inline void PrefetchToLocalCacheForWrite(const void* addr) {
#if defined(_MM_HINT_ET0)
_mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_ET0);
-#elif defined(__x86_64__)
+#elif defined(_MSC_VER) && _MSC_VER >= 1900 && \
+ (defined(_M_X64) || defined(_M_IX86))
+ // MSVC 2015 and up on x86/x64 supports prefetchw (feature listed as 3DNOW)
+ _m_prefetchw(const_cast<void*>(addr));
+#elif !defined(_MSC_VER) && defined(__x86_64__)
// _MM_HINT_ET0 is not universally supported. As we commented further
// up, PREFETCHW is recognized as a no-op on older Intel processors
- // and has been present on AMD processors since the K6-2
+ // and has been present on AMD processors since the K6-2. We have this
+ // disabled for MSVC compilers as this miscompiles on older MSVC compilers.
asm("prefetchw (%0)" : : "r"(addr));
#endif
}
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h
index 61ef196d..09b55f66 100644
--- a/absl/container/internal/raw_hash_set.h
+++ b/absl/container/internal/raw_hash_set.h
@@ -185,10 +185,10 @@
#include "absl/base/config.h"
#include "absl/base/internal/endian.h"
-#include "absl/base/internal/prefetch.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/base/optimization.h"
#include "absl/base/port.h"
+#include "absl/base/prefetch.h"
#include "absl/container/internal/common.h"
#include "absl/container/internal/compressed_tuple.h"
#include "absl/container/internal/container_memory.h"
@@ -2117,12 +2117,12 @@ class raw_hash_set {
void prefetch(const key_arg<K>& key) const {
(void)key;
// Avoid probing if we won't be able to prefetch the addresses received.
-#ifdef ABSL_INTERNAL_HAVE_PREFETCH
+#ifdef ABSL_HAVE_PREFETCH
prefetch_heap_block();
auto seq = probe(common(), hash_ref()(key));
- base_internal::PrefetchT0(control() + seq.offset());
- base_internal::PrefetchT0(slot_array() + seq.offset());
-#endif // ABSL_INTERNAL_HAVE_PREFETCH
+ PrefetchToLocalCache(control() + seq.offset());
+ PrefetchToLocalCache(slot_array() + seq.offset());
+#endif // ABSL_HAVE_PREFETCH
}
// The API of find() has two extensions.
@@ -2529,10 +2529,14 @@ class raw_hash_set {
// See `CapacityToGrowth()`.
size_t& growth_left() { return common().growth_left(); }
- // Prefetch the heap-allocated memory region to resolve potential TLB misses.
- // This is intended to overlap with execution of calculating the hash for a
- // key.
- void prefetch_heap_block() const { base_internal::PrefetchT2(control()); }
+ // Prefetch the heap-allocated memory region to resolve potential TLB and
+ // cache misses. This is intended to overlap with execution of calculating the
+ // hash for a key.
+ void prefetch_heap_block() const {
+#if ABSL_HAVE_BUILTIN(__builtin_prefetch) || defined(__GNUC__)
+ __builtin_prefetch(control(), 0, 1);
+#endif
+ }
CommonFields& common() { return settings_.template get<0>(); }
const CommonFields& common() const { return settings_.template get<0>(); }
diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc
index 3d3b089c..bdffb817 100644
--- a/absl/container/internal/raw_hash_set_test.cc
+++ b/absl/container/internal/raw_hash_set_test.cc
@@ -40,8 +40,8 @@
#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/base/internal/cycleclock.h"
-#include "absl/base/internal/prefetch.h"
#include "absl/base/internal/raw_logging.h"
+#include "absl/base/prefetch.h"
#include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h"
#include "absl/container/internal/container_memory.h"
diff --git a/absl/crc/internal/crc.cc b/absl/crc/internal/crc.cc
index bb8936e3..337a173f 100644
--- a/absl/crc/internal/crc.cc
+++ b/absl/crc/internal/crc.cc
@@ -44,8 +44,8 @@
#include <cstdint>
#include "absl/base/internal/endian.h"
-#include "absl/base/internal/prefetch.h"
#include "absl/base/internal/raw_logging.h"
+#include "absl/base/prefetch.h"
#include "absl/crc/internal/crc_internal.h"
namespace absl {
@@ -309,7 +309,7 @@ void CRC32::Extend(uint32_t* crc, const void* bytes, size_t length) const {
// Process kStride interleaved swaths through the data in parallel.
while ((e - p) > kPrefetchHorizon) {
- base_internal::PrefetchNta(
+ PrefetchToLocalCacheNta(
reinterpret_cast<const void*>(p + kPrefetchHorizon));
// Process 64 bytes at a time
step_stride();
diff --git a/absl/crc/internal/crc_memcpy_x86_64.cc b/absl/crc/internal/crc_memcpy_x86_64.cc
index 66f784de..0078f0e8 100644
--- a/absl/crc/internal/crc_memcpy_x86_64.cc
+++ b/absl/crc/internal/crc_memcpy_x86_64.cc
@@ -52,8 +52,8 @@
#include <type_traits>
#include "absl/base/dynamic_annotations.h"
-#include "absl/base/internal/prefetch.h"
#include "absl/base/optimization.h"
+#include "absl/base/prefetch.h"
#include "absl/crc/crc32c.h"
#include "absl/crc/internal/cpu_detect.h"
#include "absl/crc/internal/crc_memcpy.h"
@@ -242,10 +242,8 @@ crc32c_t AcceleratedCrcMemcpyEngine<vec_regions, int_regions>::Compute(
while (copy_rounds > kBlocksPerCacheLine) {
// Prefetch kPrefetchAhead bytes ahead of each pointer.
for (size_t i = 0; i < kRegions; i++) {
- absl::base_internal::PrefetchT0(src_bytes + kPrefetchAhead +
- region_size * i);
- absl::base_internal::PrefetchT0(dst_bytes + kPrefetchAhead +
- region_size * i);
+ absl::PrefetchToLocalCache(src_bytes + kPrefetchAhead + region_size * i);
+ absl::PrefetchToLocalCache(dst_bytes + kPrefetchAhead + region_size * i);
}
// Load and store data, computing CRC on the way.
diff --git a/absl/crc/internal/crc_x86_arm_combined.cc b/absl/crc/internal/crc_x86_arm_combined.cc
index d71191e3..e482b37a 100644
--- a/absl/crc/internal/crc_x86_arm_combined.cc
+++ b/absl/crc/internal/crc_x86_arm_combined.cc
@@ -21,7 +21,7 @@
#include "absl/base/config.h"
#include "absl/base/dynamic_annotations.h"
#include "absl/base/internal/endian.h"
-#include "absl/base/internal/prefetch.h"
+#include "absl/base/prefetch.h"
#include "absl/crc/internal/cpu_detect.h"
#include "absl/crc/internal/crc.h"
#include "absl/crc/internal/crc32_x86_arm_combined_simd.h"
@@ -429,11 +429,11 @@ class CRC32AcceleratedX86ARMCombinedMultipleStreams
ABSL_INTERNAL_STEP8BY3(l64, l641, l642, p, p1, p2);
ABSL_INTERNAL_STEP8BY3(l64, l641, l642, p, p1, p2);
ABSL_INTERNAL_STEP8BY3(l64, l641, l642, p, p1, p2);
- base_internal::PrefetchT0(
+ PrefetchToLocalCache(
reinterpret_cast<const char*>(p + kPrefetchHorizonMedium));
- base_internal::PrefetchT0(
+ PrefetchToLocalCache(
reinterpret_cast<const char*>(p1 + kPrefetchHorizonMedium));
- base_internal::PrefetchT0(
+ PrefetchToLocalCache(
reinterpret_cast<const char*>(p2 + kPrefetchHorizonMedium));
}
// Don't run crc on last 8 bytes.
@@ -517,12 +517,12 @@ class CRC32AcceleratedX86ARMCombinedMultipleStreams
for (size_t i = 1; i < bs; i++) {
// Prefetch data for next itterations.
for (size_t j = 0; j < num_crc_streams; j++) {
- base_internal::PrefetchT0(
+ PrefetchToLocalCache(
reinterpret_cast<const char*>(crc_streams[j] + kPrefetchHorizon));
}
for (size_t j = 0; j < num_pclmul_streams; j++) {
- base_internal::PrefetchT0(reinterpret_cast<const char*>(
- pclmul_streams[j] + kPrefetchHorizon));
+ PrefetchToLocalCache(reinterpret_cast<const char*>(pclmul_streams[j] +
+ kPrefetchHorizon));
}
// We process each stream in 64 byte blocks. This can be written as