diff options
author | Martijn Vels <mvels@google.com> | 2023-01-27 12:36:55 -0800 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2023-01-27 12:37:47 -0800 |
commit | 75d2525117c8da93840ab256f07b191086fd7cbb (patch) | |
tree | fabecc85b212f2448f600adfc1287a79f63e4127 | |
parent | 8a0693b2a75f21e508ffcd172efda3bbb638a275 (diff) |
Replace absl::base_internal::Prefetch* calls with absl::Prefetch* calls
PiperOrigin-RevId: 505184961
Change-Id: I64482558a76abda6896bec4b2d323833b6cd7edf
-rw-r--r-- | absl/base/BUILD.bazel | 5 | ||||
-rw-r--r-- | absl/base/CMakeLists.txt | 1 | ||||
-rw-r--r-- | absl/base/internal/prefetch.h | 35 | ||||
-rw-r--r-- | absl/base/prefetch.h | 13 | ||||
-rw-r--r-- | absl/container/internal/raw_hash_set.h | 22 | ||||
-rw-r--r-- | absl/container/internal/raw_hash_set_test.cc | 2 | ||||
-rw-r--r-- | absl/crc/internal/crc.cc | 4 | ||||
-rw-r--r-- | absl/crc/internal/crc_memcpy_x86_64.cc | 8 | ||||
-rw-r--r-- | absl/crc/internal/crc_x86_arm_combined.cc | 14 |
9 files changed, 58 insertions, 46 deletions
diff --git a/absl/base/BUILD.bazel b/absl/base/BUILD.bazel index b4d1c218..dd29daf6 100644 --- a/absl/base/BUILD.bazel +++ b/absl/base/BUILD.bazel @@ -738,7 +738,10 @@ cc_library( ], copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, - deps = [":config"], + deps = [ + ":config", + ":core_headers", # TODO(b/265984188): remove + ], ) cc_test( diff --git a/absl/base/CMakeLists.txt b/absl/base/CMakeLists.txt index 74495d01..71b93795 100644 --- a/absl/base/CMakeLists.txt +++ b/absl/base/CMakeLists.txt @@ -657,6 +657,7 @@ absl_cc_library( ${ABSL_DEFAULT_LINKOPTS} DEPS absl::config + absl::core_headers # TODO(b/265984188): remove ) absl_cc_test( diff --git a/absl/base/internal/prefetch.h b/absl/base/internal/prefetch.h index 06419283..aecfd877 100644 --- a/absl/base/internal/prefetch.h +++ b/absl/base/internal/prefetch.h @@ -12,10 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. +// TODO(b/265984188): remove all uses and delete this header. + #ifndef ABSL_BASE_INTERNAL_PREFETCH_H_ #define ABSL_BASE_INTERNAL_PREFETCH_H_ +#include "absl/base/attributes.h" #include "absl/base/config.h" +#include "absl/base/prefetch.h" #ifdef __SSE__ #include <xmmintrin.h> @@ -72,10 +76,21 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace base_internal { -void PrefetchT0(const void* addr); +ABSL_DEPRECATED("Use absl::PrefetchToLocalCache() instead") +inline void PrefetchT0(const void* address) { + absl::PrefetchToLocalCache(address); +} + +ABSL_DEPRECATED("Use absl::PrefetchToLocalCache() instead") +inline void PrefetchNta(const void* address) { + absl::PrefetchToLocalCacheNta(address); +} + +ABSL_DEPRECATED("Use __builtin_prefetch() for advanced prefetch logic instead") void PrefetchT1(const void* addr); + +ABSL_DEPRECATED("Use __builtin_prefetch() for advanced prefetch logic instead") void PrefetchT2(const void* addr); -void PrefetchNta(const void* addr); // Implementation details follow. @@ -90,10 +105,6 @@ void PrefetchNta(const void* addr); // safe for all currently supported platforms. However, prefetch for // store may have problems depending on the target platform. // -inline void PrefetchT0(const void* addr) { - // Note: this uses prefetcht0 on Intel. - __builtin_prefetch(addr, 0, 3); -} inline void PrefetchT1(const void* addr) { // Note: this uses prefetcht1 on Intel. __builtin_prefetch(addr, 0, 2); @@ -102,33 +113,21 @@ inline void PrefetchT2(const void* addr) { // Note: this uses prefetcht2 on Intel. __builtin_prefetch(addr, 0, 1); } -inline void PrefetchNta(const void* addr) { - // Note: this uses prefetchtnta on Intel. - __builtin_prefetch(addr, 0, 0); -} #elif defined(ABSL_INTERNAL_HAVE_SSE) #define ABSL_INTERNAL_HAVE_PREFETCH 1 -inline void PrefetchT0(const void* addr) { - _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T0); -} inline void PrefetchT1(const void* addr) { _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T1); } inline void PrefetchT2(const void* addr) { _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T2); } -inline void PrefetchNta(const void* addr) { - _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_NTA); -} #else -inline void PrefetchT0(const void*) {} inline void PrefetchT1(const void*) {} inline void PrefetchT2(const void*) {} -inline void PrefetchNta(const void*) {} #endif } // namespace base_internal diff --git a/absl/base/prefetch.h b/absl/base/prefetch.h index 4d428462..6bc98637 100644 --- a/absl/base/prefetch.h +++ b/absl/base/prefetch.h @@ -30,9 +30,11 @@ #include <xmmintrin.h> #endif -#if defined(_MSC_VER) && defined(ABSL_INTERNAL_HAVE_SSE) +#if defined(_MSC_VER) && _MSC_VER >= 1900 && \ + (defined(_M_X64) || defined(_M_IX86)) #include <intrin.h> #pragma intrinsic(_mm_prefetch) +#pragma intrinsic(_m_prefetchw) #endif namespace absl { @@ -174,10 +176,15 @@ inline void PrefetchToLocalCacheNta(const void* addr) { inline void PrefetchToLocalCacheForWrite(const void* addr) { #if defined(_MM_HINT_ET0) _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_ET0); -#elif defined(__x86_64__) +#elif defined(_MSC_VER) && _MSC_VER >= 1900 && \ + (defined(_M_X64) || defined(_M_IX86)) + // MSVC 2015 and up on x86/x64 supports prefetchw (feature listed as 3DNOW) + _m_prefetchw(const_cast<void*>(addr)); +#elif !defined(_MSC_VER) && defined(__x86_64__) // _MM_HINT_ET0 is not universally supported. As we commented further // up, PREFETCHW is recognized as a no-op on older Intel processors - // and has been present on AMD processors since the K6-2 + // and has been present on AMD processors since the K6-2. We have this + // disabled for MSVC compilers as this miscompiles on older MSVC compilers. asm("prefetchw (%0)" : : "r"(addr)); #endif } diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 61ef196d..09b55f66 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -185,10 +185,10 @@ #include "absl/base/config.h" #include "absl/base/internal/endian.h" -#include "absl/base/internal/prefetch.h" #include "absl/base/internal/raw_logging.h" #include "absl/base/optimization.h" #include "absl/base/port.h" +#include "absl/base/prefetch.h" #include "absl/container/internal/common.h" #include "absl/container/internal/compressed_tuple.h" #include "absl/container/internal/container_memory.h" @@ -2117,12 +2117,12 @@ class raw_hash_set { void prefetch(const key_arg<K>& key) const { (void)key; // Avoid probing if we won't be able to prefetch the addresses received. -#ifdef ABSL_INTERNAL_HAVE_PREFETCH +#ifdef ABSL_HAVE_PREFETCH prefetch_heap_block(); auto seq = probe(common(), hash_ref()(key)); - base_internal::PrefetchT0(control() + seq.offset()); - base_internal::PrefetchT0(slot_array() + seq.offset()); -#endif // ABSL_INTERNAL_HAVE_PREFETCH + PrefetchToLocalCache(control() + seq.offset()); + PrefetchToLocalCache(slot_array() + seq.offset()); +#endif // ABSL_HAVE_PREFETCH } // The API of find() has two extensions. @@ -2529,10 +2529,14 @@ class raw_hash_set { // See `CapacityToGrowth()`. size_t& growth_left() { return common().growth_left(); } - // Prefetch the heap-allocated memory region to resolve potential TLB misses. - // This is intended to overlap with execution of calculating the hash for a - // key. - void prefetch_heap_block() const { base_internal::PrefetchT2(control()); } + // Prefetch the heap-allocated memory region to resolve potential TLB and + // cache misses. This is intended to overlap with execution of calculating the + // hash for a key. + void prefetch_heap_block() const { +#if ABSL_HAVE_BUILTIN(__builtin_prefetch) || defined(__GNUC__) + __builtin_prefetch(control(), 0, 1); +#endif + } CommonFields& common() { return settings_.template get<0>(); } const CommonFields& common() const { return settings_.template get<0>(); } diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc index 3d3b089c..bdffb817 100644 --- a/absl/container/internal/raw_hash_set_test.cc +++ b/absl/container/internal/raw_hash_set_test.cc @@ -40,8 +40,8 @@ #include "absl/base/attributes.h" #include "absl/base/config.h" #include "absl/base/internal/cycleclock.h" -#include "absl/base/internal/prefetch.h" #include "absl/base/internal/raw_logging.h" +#include "absl/base/prefetch.h" #include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" #include "absl/container/internal/container_memory.h" diff --git a/absl/crc/internal/crc.cc b/absl/crc/internal/crc.cc index bb8936e3..337a173f 100644 --- a/absl/crc/internal/crc.cc +++ b/absl/crc/internal/crc.cc @@ -44,8 +44,8 @@ #include <cstdint> #include "absl/base/internal/endian.h" -#include "absl/base/internal/prefetch.h" #include "absl/base/internal/raw_logging.h" +#include "absl/base/prefetch.h" #include "absl/crc/internal/crc_internal.h" namespace absl { @@ -309,7 +309,7 @@ void CRC32::Extend(uint32_t* crc, const void* bytes, size_t length) const { // Process kStride interleaved swaths through the data in parallel. while ((e - p) > kPrefetchHorizon) { - base_internal::PrefetchNta( + PrefetchToLocalCacheNta( reinterpret_cast<const void*>(p + kPrefetchHorizon)); // Process 64 bytes at a time step_stride(); diff --git a/absl/crc/internal/crc_memcpy_x86_64.cc b/absl/crc/internal/crc_memcpy_x86_64.cc index 66f784de..0078f0e8 100644 --- a/absl/crc/internal/crc_memcpy_x86_64.cc +++ b/absl/crc/internal/crc_memcpy_x86_64.cc @@ -52,8 +52,8 @@ #include <type_traits> #include "absl/base/dynamic_annotations.h" -#include "absl/base/internal/prefetch.h" #include "absl/base/optimization.h" +#include "absl/base/prefetch.h" #include "absl/crc/crc32c.h" #include "absl/crc/internal/cpu_detect.h" #include "absl/crc/internal/crc_memcpy.h" @@ -242,10 +242,8 @@ crc32c_t AcceleratedCrcMemcpyEngine<vec_regions, int_regions>::Compute( while (copy_rounds > kBlocksPerCacheLine) { // Prefetch kPrefetchAhead bytes ahead of each pointer. for (size_t i = 0; i < kRegions; i++) { - absl::base_internal::PrefetchT0(src_bytes + kPrefetchAhead + - region_size * i); - absl::base_internal::PrefetchT0(dst_bytes + kPrefetchAhead + - region_size * i); + absl::PrefetchToLocalCache(src_bytes + kPrefetchAhead + region_size * i); + absl::PrefetchToLocalCache(dst_bytes + kPrefetchAhead + region_size * i); } // Load and store data, computing CRC on the way. diff --git a/absl/crc/internal/crc_x86_arm_combined.cc b/absl/crc/internal/crc_x86_arm_combined.cc index d71191e3..e482b37a 100644 --- a/absl/crc/internal/crc_x86_arm_combined.cc +++ b/absl/crc/internal/crc_x86_arm_combined.cc @@ -21,7 +21,7 @@ #include "absl/base/config.h" #include "absl/base/dynamic_annotations.h" #include "absl/base/internal/endian.h" -#include "absl/base/internal/prefetch.h" +#include "absl/base/prefetch.h" #include "absl/crc/internal/cpu_detect.h" #include "absl/crc/internal/crc.h" #include "absl/crc/internal/crc32_x86_arm_combined_simd.h" @@ -429,11 +429,11 @@ class CRC32AcceleratedX86ARMCombinedMultipleStreams ABSL_INTERNAL_STEP8BY3(l64, l641, l642, p, p1, p2); ABSL_INTERNAL_STEP8BY3(l64, l641, l642, p, p1, p2); ABSL_INTERNAL_STEP8BY3(l64, l641, l642, p, p1, p2); - base_internal::PrefetchT0( + PrefetchToLocalCache( reinterpret_cast<const char*>(p + kPrefetchHorizonMedium)); - base_internal::PrefetchT0( + PrefetchToLocalCache( reinterpret_cast<const char*>(p1 + kPrefetchHorizonMedium)); - base_internal::PrefetchT0( + PrefetchToLocalCache( reinterpret_cast<const char*>(p2 + kPrefetchHorizonMedium)); } // Don't run crc on last 8 bytes. @@ -517,12 +517,12 @@ class CRC32AcceleratedX86ARMCombinedMultipleStreams for (size_t i = 1; i < bs; i++) { // Prefetch data for next itterations. for (size_t j = 0; j < num_crc_streams; j++) { - base_internal::PrefetchT0( + PrefetchToLocalCache( reinterpret_cast<const char*>(crc_streams[j] + kPrefetchHorizon)); } for (size_t j = 0; j < num_pclmul_streams; j++) { - base_internal::PrefetchT0(reinterpret_cast<const char*>( - pclmul_streams[j] + kPrefetchHorizon)); + PrefetchToLocalCache(reinterpret_cast<const char*>(pclmul_streams[j] + + kPrefetchHorizon)); } // We process each stream in 64 byte blocks. This can be written as |