summaryrefslogtreecommitdiff
path: root/absl/crc/internal/crc_memcpy_x86_64.cc
diff options
context:
space:
mode:
Diffstat (limited to 'absl/crc/internal/crc_memcpy_x86_64.cc')
-rw-r--r--absl/crc/internal/crc_memcpy_x86_64.cc36
1 files changed, 17 insertions, 19 deletions
diff --git a/absl/crc/internal/crc_memcpy_x86_64.cc b/absl/crc/internal/crc_memcpy_x86_64.cc
index 66f784de..d42b08dc 100644
--- a/absl/crc/internal/crc_memcpy_x86_64.cc
+++ b/absl/crc/internal/crc_memcpy_x86_64.cc
@@ -52,8 +52,8 @@
#include <type_traits>
#include "absl/base/dynamic_annotations.h"
-#include "absl/base/internal/prefetch.h"
#include "absl/base/optimization.h"
+#include "absl/base/prefetch.h"
#include "absl/crc/crc32c.h"
#include "absl/crc/internal/cpu_detect.h"
#include "absl/crc/internal/crc_memcpy.h"
@@ -242,10 +242,8 @@ crc32c_t AcceleratedCrcMemcpyEngine<vec_regions, int_regions>::Compute(
while (copy_rounds > kBlocksPerCacheLine) {
// Prefetch kPrefetchAhead bytes ahead of each pointer.
for (size_t i = 0; i < kRegions; i++) {
- absl::base_internal::PrefetchT0(src_bytes + kPrefetchAhead +
- region_size * i);
- absl::base_internal::PrefetchT0(dst_bytes + kPrefetchAhead +
- region_size * i);
+ absl::PrefetchToLocalCache(src_bytes + kPrefetchAhead + region_size * i);
+ absl::PrefetchToLocalCache(dst_bytes + kPrefetchAhead + region_size * i);
}
// Load and store data, computing CRC on the way.
@@ -359,18 +357,18 @@ CrcMemcpy::ArchSpecificEngines CrcMemcpy::GetArchSpecificEngines() {
case CpuType::kIntelHaswell:
case CpuType::kIntelIvybridge:
return {
- .temporal = new FallbackCrcMemcpyEngine(),
- .non_temporal = new CrcNonTemporalMemcpyAVXEngine(),
+ /*.temporal=*/new FallbackCrcMemcpyEngine(),
+ /*.non_temporal=*/new CrcNonTemporalMemcpyAVXEngine(),
};
// INTEL_SANDYBRIDGE performs better with SSE than AVX.
case CpuType::kIntelSandybridge:
return {
- .temporal = new FallbackCrcMemcpyEngine(),
- .non_temporal = new CrcNonTemporalMemcpyEngine(),
+ /*.temporal=*/new FallbackCrcMemcpyEngine(),
+ /*.non_temporal=*/new CrcNonTemporalMemcpyEngine(),
};
default:
- return {.temporal = new FallbackCrcMemcpyEngine(),
- .non_temporal = new FallbackCrcMemcpyEngine()};
+ return {/*.temporal=*/new FallbackCrcMemcpyEngine(),
+ /*.non_temporal=*/new FallbackCrcMemcpyEngine()};
}
#else
// Get the underlying architecture.
@@ -388,8 +386,8 @@ CrcMemcpy::ArchSpecificEngines CrcMemcpy::GetArchSpecificEngines() {
case CpuType::kAmdRome:
case CpuType::kAmdNaples:
return {
- .temporal = new AcceleratedCrcMemcpyEngine<1, 2>(),
- .non_temporal = new CrcNonTemporalMemcpyAVXEngine(),
+ /*.temporal=*/new AcceleratedCrcMemcpyEngine<1, 2>(),
+ /*.non_temporal=*/new CrcNonTemporalMemcpyAVXEngine(),
};
// PCLMULQDQ is slow and we don't have wide enough issue width to take
// advantage of it. For an unknown architecture, don't risk using CLMULs.
@@ -400,18 +398,18 @@ CrcMemcpy::ArchSpecificEngines CrcMemcpy::GetArchSpecificEngines() {
case CpuType::kIntelHaswell:
case CpuType::kIntelIvybridge:
return {
- .temporal = new AcceleratedCrcMemcpyEngine<3, 0>(),
- .non_temporal = new CrcNonTemporalMemcpyAVXEngine(),
+ /*.temporal=*/new AcceleratedCrcMemcpyEngine<3, 0>(),
+ /*.non_temporal=*/new CrcNonTemporalMemcpyAVXEngine(),
};
// INTEL_SANDYBRIDGE performs better with SSE than AVX.
case CpuType::kIntelSandybridge:
return {
- .temporal = new AcceleratedCrcMemcpyEngine<3, 0>(),
- .non_temporal = new CrcNonTemporalMemcpyEngine(),
+ /*.temporal=*/new AcceleratedCrcMemcpyEngine<3, 0>(),
+ /*.non_temporal=*/new CrcNonTemporalMemcpyEngine(),
};
default:
- return {.temporal = new FallbackCrcMemcpyEngine(),
- .non_temporal = new FallbackCrcMemcpyEngine()};
+ return {/*.temporal=*/new FallbackCrcMemcpyEngine(),
+ /*.non_temporal=*/new FallbackCrcMemcpyEngine()};
}
#endif // UNDEFINED_BEHAVIOR_SANITIZER
}