diff options
author | Abseil Team <absl-team@google.com> | 2024-03-27 11:22:09 -0700 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2024-03-27 11:22:57 -0700 |
commit | 18018aa45dd40b123d0c86224bbcc26fa3269d73 (patch) | |
tree | 72fe4d682d1f89aa354f830f896da2fdc26807b0 /absl/crc/internal | |
parent | a1ced69bbc383ab84abbbb8d08e9178ab6885b81 (diff) |
Adjust conditonal compilation in non_temporal_memcpy.h
This change will allow the AVX version of non-temporal memcpy to be compiled
even if the compiler isn't run with AVX support. This allows runtime dispatch
to select the AVX implementation for CPUs that are known to be compatible with
AVX instructions.
PiperOrigin-RevId: 619594422
Change-Id: Ia7d92404ef8d10d152030b29b71948ed954f28f5
Diffstat (limited to 'absl/crc/internal')
-rw-r--r-- | absl/crc/internal/non_temporal_memcpy.h | 35 |
1 files changed, 18 insertions, 17 deletions
diff --git a/absl/crc/internal/non_temporal_memcpy.h b/absl/crc/internal/non_temporal_memcpy.h index b3d94bad..7ae83bdc 100644 --- a/absl/crc/internal/non_temporal_memcpy.h +++ b/absl/crc/internal/non_temporal_memcpy.h @@ -19,19 +19,8 @@ #include <intrin.h> #endif -#ifdef __SSE__ -#include <xmmintrin.h> -#endif - -#ifdef __SSE2__ -#include <emmintrin.h> -#endif - -#ifdef __SSE3__ -#include <pmmintrin.h> -#endif - -#ifdef __AVX__ +#if defined(__SSE__) || defined(__AVX__) +// Pulls in both SSE and AVX intrinsics. #include <immintrin.h> #endif @@ -44,6 +33,7 @@ #include <cstdint> #include <cstring> +#include "absl/base/attributes.h" #include "absl/base/config.h" #include "absl/base/optimization.h" @@ -57,7 +47,9 @@ namespace crc_internal { // memcpy can save 1 DRAM load of the destination cacheline. constexpr size_t kCacheLineSize = ABSL_CACHELINE_SIZE; -// If the objects overlap, the behavior is undefined. +// If the objects overlap, the behavior is undefined. Uses regular memcpy +// instead of non-temporal memcpy if the required CPU intrinsics are unavailable +// at compile time. inline void *non_temporal_store_memcpy(void *__restrict dst, const void *__restrict src, size_t len) { #if defined(__SSE3__) || defined(__aarch64__) || \ @@ -119,10 +111,20 @@ inline void *non_temporal_store_memcpy(void *__restrict dst, #endif // __SSE3__ || __aarch64__ || (_MSC_VER && __AVX__) } +// If the objects overlap, the behavior is undefined. Uses regular memcpy +// instead of non-temporal memcpy if the required CPU intrinsics are unavailable +// at compile time. +#if ABSL_HAVE_CPP_ATTRIBUTE(gnu::target) && \ + (defined(__x86_64__) || defined(__i386__)) +[[gnu::target("avx")]] +#endif inline void *non_temporal_store_memcpy_avx(void *__restrict dst, const void *__restrict src, size_t len) { -#ifdef __AVX__ + // This function requires AVX. For clang and gcc we compile it with AVX even + // if the translation unit isn't built with AVX support. This works because we + // only select this implementation at runtime if the CPU supports AVX. +#if defined(__SSE3__) || (defined(_MSC_VER) && defined(__AVX__)) uint8_t *d = reinterpret_cast<uint8_t *>(dst); const uint8_t *s = reinterpret_cast<const uint8_t *>(src); @@ -168,9 +170,8 @@ inline void *non_temporal_store_memcpy_avx(void *__restrict dst, } return dst; #else - // Fallback to regular memcpy when AVX is not available. return memcpy(dst, src, len); -#endif // __AVX__ +#endif // __SSE3__ || (_MSC_VER && __AVX__) } } // namespace crc_internal |