diff options
Diffstat (limited to 'absl/crc/internal/crc_memcpy_x86_64.cc')
-rw-r--r-- | absl/crc/internal/crc_memcpy_x86_64.cc | 36 |
1 files changed, 17 insertions, 19 deletions
diff --git a/absl/crc/internal/crc_memcpy_x86_64.cc b/absl/crc/internal/crc_memcpy_x86_64.cc index 66f784de..d42b08dc 100644 --- a/absl/crc/internal/crc_memcpy_x86_64.cc +++ b/absl/crc/internal/crc_memcpy_x86_64.cc @@ -52,8 +52,8 @@ #include <type_traits> #include "absl/base/dynamic_annotations.h" -#include "absl/base/internal/prefetch.h" #include "absl/base/optimization.h" +#include "absl/base/prefetch.h" #include "absl/crc/crc32c.h" #include "absl/crc/internal/cpu_detect.h" #include "absl/crc/internal/crc_memcpy.h" @@ -242,10 +242,8 @@ crc32c_t AcceleratedCrcMemcpyEngine<vec_regions, int_regions>::Compute( while (copy_rounds > kBlocksPerCacheLine) { // Prefetch kPrefetchAhead bytes ahead of each pointer. for (size_t i = 0; i < kRegions; i++) { - absl::base_internal::PrefetchT0(src_bytes + kPrefetchAhead + - region_size * i); - absl::base_internal::PrefetchT0(dst_bytes + kPrefetchAhead + - region_size * i); + absl::PrefetchToLocalCache(src_bytes + kPrefetchAhead + region_size * i); + absl::PrefetchToLocalCache(dst_bytes + kPrefetchAhead + region_size * i); } // Load and store data, computing CRC on the way. @@ -359,18 +357,18 @@ CrcMemcpy::ArchSpecificEngines CrcMemcpy::GetArchSpecificEngines() { case CpuType::kIntelHaswell: case CpuType::kIntelIvybridge: return { - .temporal = new FallbackCrcMemcpyEngine(), - .non_temporal = new CrcNonTemporalMemcpyAVXEngine(), + /*.temporal=*/new FallbackCrcMemcpyEngine(), + /*.non_temporal=*/new CrcNonTemporalMemcpyAVXEngine(), }; // INTEL_SANDYBRIDGE performs better with SSE than AVX. case CpuType::kIntelSandybridge: return { - .temporal = new FallbackCrcMemcpyEngine(), - .non_temporal = new CrcNonTemporalMemcpyEngine(), + /*.temporal=*/new FallbackCrcMemcpyEngine(), + /*.non_temporal=*/new CrcNonTemporalMemcpyEngine(), }; default: - return {.temporal = new FallbackCrcMemcpyEngine(), - .non_temporal = new FallbackCrcMemcpyEngine()}; + return {/*.temporal=*/new FallbackCrcMemcpyEngine(), + /*.non_temporal=*/new FallbackCrcMemcpyEngine()}; } #else // Get the underlying architecture. @@ -388,8 +386,8 @@ CrcMemcpy::ArchSpecificEngines CrcMemcpy::GetArchSpecificEngines() { case CpuType::kAmdRome: case CpuType::kAmdNaples: return { - .temporal = new AcceleratedCrcMemcpyEngine<1, 2>(), - .non_temporal = new CrcNonTemporalMemcpyAVXEngine(), + /*.temporal=*/new AcceleratedCrcMemcpyEngine<1, 2>(), + /*.non_temporal=*/new CrcNonTemporalMemcpyAVXEngine(), }; // PCLMULQDQ is slow and we don't have wide enough issue width to take // advantage of it. For an unknown architecture, don't risk using CLMULs. @@ -400,18 +398,18 @@ CrcMemcpy::ArchSpecificEngines CrcMemcpy::GetArchSpecificEngines() { case CpuType::kIntelHaswell: case CpuType::kIntelIvybridge: return { - .temporal = new AcceleratedCrcMemcpyEngine<3, 0>(), - .non_temporal = new CrcNonTemporalMemcpyAVXEngine(), + /*.temporal=*/new AcceleratedCrcMemcpyEngine<3, 0>(), + /*.non_temporal=*/new CrcNonTemporalMemcpyAVXEngine(), }; // INTEL_SANDYBRIDGE performs better with SSE than AVX. case CpuType::kIntelSandybridge: return { - .temporal = new AcceleratedCrcMemcpyEngine<3, 0>(), - .non_temporal = new CrcNonTemporalMemcpyEngine(), + /*.temporal=*/new AcceleratedCrcMemcpyEngine<3, 0>(), + /*.non_temporal=*/new CrcNonTemporalMemcpyEngine(), }; default: - return {.temporal = new FallbackCrcMemcpyEngine(), - .non_temporal = new FallbackCrcMemcpyEngine()}; + return {/*.temporal=*/new FallbackCrcMemcpyEngine(), + /*.non_temporal=*/new FallbackCrcMemcpyEngine()}; } #endif // UNDEFINED_BEHAVIOR_SANITIZER } |