diff options
Diffstat (limited to 'absl/crc/internal')
-rw-r--r-- | absl/crc/internal/cpu_detect.cc | 22 | ||||
-rw-r--r-- | absl/crc/internal/crc.cc | 39 | ||||
-rw-r--r-- | absl/crc/internal/crc.h | 12 | ||||
-rw-r--r-- | absl/crc/internal/crc32_x86_arm_combined_simd.h | 2 | ||||
-rw-r--r-- | absl/crc/internal/crc_cord_state.cc | 2 | ||||
-rw-r--r-- | absl/crc/internal/crc_cord_state.h | 8 | ||||
-rw-r--r-- | absl/crc/internal/crc_internal.h | 18 | ||||
-rw-r--r-- | absl/crc/internal/crc_memcpy_x86_64.cc | 36 | ||||
-rw-r--r-- | absl/crc/internal/crc_x86_arm_combined.cc | 16 |
9 files changed, 59 insertions, 96 deletions
diff --git a/absl/crc/internal/cpu_detect.cc b/absl/crc/internal/cpu_detect.cc index d61b7018..83838085 100644 --- a/absl/crc/internal/cpu_detect.cc +++ b/absl/crc/internal/cpu_detect.cc @@ -28,15 +28,12 @@ #include <intrin.h> #endif -namespace absl { -ABSL_NAMESPACE_BEGIN -namespace crc_internal { - #if defined(__x86_64__) || defined(_M_X64) - -namespace { - -#if !defined(_WIN32) && !defined(_WIN64) +#if ABSL_HAVE_BUILTIN(__cpuid) +// MSVC-equivalent __cpuid intrinsic declaration for clang-like compilers +// for non-Windows build environments. +extern void __cpuid(int[4], int); +#elif !defined(_WIN32) && !defined(_WIN64) // MSVC defines this function for us. // https://learn.microsoft.com/en-us/cpp/intrinsics/cpuid-cpuidex static void __cpuid(int cpu_info[4], int info_type) { @@ -46,6 +43,15 @@ static void __cpuid(int cpu_info[4], int info_type) { : "a"(info_type), "c"(0)); } #endif // !defined(_WIN32) && !defined(_WIN64) +#endif // defined(__x86_64__) || defined(_M_X64) + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace crc_internal { + +#if defined(__x86_64__) || defined(_M_X64) + +namespace { enum class Vendor { kUnknown, diff --git a/absl/crc/internal/crc.cc b/absl/crc/internal/crc.cc index bb8936e3..22e91c53 100644 --- a/absl/crc/internal/crc.cc +++ b/absl/crc/internal/crc.cc @@ -44,8 +44,8 @@ #include <cstdint> #include "absl/base/internal/endian.h" -#include "absl/base/internal/prefetch.h" #include "absl/base/internal/raw_logging.h" +#include "absl/base/prefetch.h" #include "absl/crc/internal/crc_internal.h" namespace absl { @@ -176,9 +176,6 @@ CRCImpl* CRCImpl::NewInternal() { return result; } -// The CRC of the empty string is always the CRC polynomial itself. -void CRCImpl::Empty(uint32_t* crc) const { *crc = kCrc32cPoly; } - // The 32-bit implementation void CRC32::InitTables() { @@ -261,7 +258,7 @@ void CRC32::Extend(uint32_t* crc, const void* bytes, size_t length) const { const uint8_t* e = p + length; uint32_t l = *crc; - auto step_one_byte = [this, &p, &l] () { + auto step_one_byte = [this, &p, &l]() { int c = (l & 0xff) ^ *p++; l = this->table0_[c] ^ (l >> 8); }; @@ -309,7 +306,7 @@ void CRC32::Extend(uint32_t* crc, const void* bytes, size_t length) const { // Process kStride interleaved swaths through the data in parallel. while ((e - p) > kPrefetchHorizon) { - base_internal::PrefetchNta( + PrefetchToLocalCacheNta( reinterpret_cast<const void*>(p + kPrefetchHorizon)); // Process 64 bytes at a time step_stride(); @@ -359,7 +356,7 @@ void CRC32::Extend(uint32_t* crc, const void* bytes, size_t length) const { void CRC32::ExtendByZeroesImpl(uint32_t* crc, size_t length, const uint32_t zeroes_table[256], - const uint32_t poly_table[256]) const { + const uint32_t poly_table[256]) { if (length != 0) { uint32_t l = *crc; // For each ZEROES_BASE_LG bits in length @@ -435,34 +432,6 @@ CRC* CRC::Crc32c() { return singleton; } -// This Concat implementation works for arbitrary polynomials. -void CRC::Concat(uint32_t* px, uint32_t y, size_t ylen) { - // https://en.wikipedia.org/wiki/Mathematics_of_cyclic_redundancy_checks - // The CRC of a message M is the remainder of polynomial divison modulo G, - // where the coefficient arithmetic is performed modulo 2 (so +/- are XOR): - // R(x) = M(x) x**n (mod G) - // (n is the degree of G) - // In practice, we use an initial value A and a bitmask B to get - // R = (A ^ B)x**|M| ^ Mx**n ^ B (mod G) - // If M is the concatenation of two strings S and T, and Z is the string of - // len(T) 0s, then the remainder CRC(ST) can be expressed as: - // R = (A ^ B)x**|ST| ^ STx**n ^ B - // = (A ^ B)x**|SZ| ^ SZx**n ^ B ^ Tx**n - // = CRC(SZ) ^ Tx**n - // CRC(Z) = (A ^ B)x**|T| ^ B - // CRC(T) = (A ^ B)x**|T| ^ Tx**n ^ B - // So R = CRC(SZ) ^ CRC(Z) ^ CRC(T) - // - // And further, since CRC(SZ) = Extend(CRC(S), Z), - // CRC(SZ) ^ CRC(Z) = Extend(CRC(S) ^ CRC(''), Z). - uint32_t z; - uint32_t t; - Empty(&z); - t = *px ^ z; - ExtendByZeroes(&t, ylen); - *px = t ^ y; -} - } // namespace crc_internal ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/crc/internal/crc.h b/absl/crc/internal/crc.h index 72515b06..4efdd032 100644 --- a/absl/crc/internal/crc.h +++ b/absl/crc/internal/crc.h @@ -40,9 +40,6 @@ class CRC { public: virtual ~CRC(); - // Place the CRC of the empty string in "*crc" - virtual void Empty(uint32_t* crc) const = 0; - // If "*crc" is the CRC of bytestring A, place the CRC of // the bytestring formed from the concatenation of A and the "length" // bytes at "bytes" into "*crc". @@ -53,22 +50,17 @@ class CRC { // points to an array of "length" zero bytes. virtual void ExtendByZeroes(uint32_t* crc, size_t length) const = 0; - // Inverse opration of ExtendByZeroes. If `crc` is the CRC value of a string + // Inverse operation of ExtendByZeroes. If `crc` is the CRC value of a string // ending in `length` zero bytes, this returns a CRC value of that string // with those zero bytes removed. virtual void UnextendByZeroes(uint32_t* crc, size_t length) const = 0; - // If *px is the CRC (as defined by *crc) of some string X, - // and y is the CRC of some string Y that is ylen bytes long, set - // *px to the CRC of the concatenation of X followed by Y. - virtual void Concat(uint32_t* px, uint32_t y, size_t ylen); - // Apply a non-linear transformation to "*crc" so that // it is safe to CRC the result with the same polynomial without // any reduction of error-detection ability in the outer CRC. // Unscramble() performs the inverse transformation. // It is strongly recommended that CRCs be scrambled before storage or - // transmission, and unscrambled at the other end before futher manipulation. + // transmission, and unscrambled at the other end before further manipulation. virtual void Scramble(uint32_t* crc) const = 0; virtual void Unscramble(uint32_t* crc) const = 0; diff --git a/absl/crc/internal/crc32_x86_arm_combined_simd.h b/absl/crc/internal/crc32_x86_arm_combined_simd.h index fb643986..39e53dd0 100644 --- a/absl/crc/internal/crc32_x86_arm_combined_simd.h +++ b/absl/crc/internal/crc32_x86_arm_combined_simd.h @@ -33,7 +33,7 @@ #include <x86intrin.h> #define ABSL_CRC_INTERNAL_HAVE_X86_SIMD -#elif defined(_MSC_VER) && defined(__AVX__) +#elif defined(_MSC_VER) && !defined(__clang__) && defined(__AVX__) // MSVC AVX (/arch:AVX) implies SSE 4.2 and PCLMULQDQ. #include <intrin.h> diff --git a/absl/crc/internal/crc_cord_state.cc b/absl/crc/internal/crc_cord_state.cc index d0be0ddd..28d04dc4 100644 --- a/absl/crc/internal/crc_cord_state.cc +++ b/absl/crc/internal/crc_cord_state.cc @@ -121,7 +121,7 @@ void CrcCordState::Poison() { } } else { // Add a fake corrupt chunk. - rep->prefix_crc.push_back(PrefixCrc(0, crc32c_t{1})); + rep->prefix_crc.emplace_back(0, crc32c_t{1}); } } diff --git a/absl/crc/internal/crc_cord_state.h b/absl/crc/internal/crc_cord_state.h index d305424c..fbbb8c00 100644 --- a/absl/crc/internal/crc_cord_state.h +++ b/absl/crc/internal/crc_cord_state.h @@ -71,9 +71,9 @@ class CrcCordState { struct Rep { // `removed_prefix` is the crc and length of any prefix that has been // removed from the Cord (for example, by calling - // `CrcCord::RemovePrefix()`). To get the checkum of any prefix of the cord, - // this value must be subtracted from `prefix_crc`. See `Checksum()` for an - // example. + // `CrcCord::RemovePrefix()`). To get the checksum of any prefix of the + // cord, this value must be subtracted from `prefix_crc`. See `Checksum()` + // for an example. // // CrcCordState is said to be "normalized" if removed_prefix.length == 0. PrefixCrc removed_prefix; @@ -109,7 +109,7 @@ class CrcCordState { // Returns true if the chunked CRC32C cached is normalized. bool IsNormalized() const { return rep().removed_prefix.length == 0; } - // Normalizes the chunked CRC32C checksum cache by substracting any removed + // Normalizes the chunked CRC32C checksum cache by subtracting any removed // prefix from the chunks. void Normalize(); diff --git a/absl/crc/internal/crc_internal.h b/absl/crc/internal/crc_internal.h index 0611b383..4d3582d9 100644 --- a/absl/crc/internal/crc_internal.h +++ b/absl/crc/internal/crc_internal.h @@ -60,18 +60,16 @@ constexpr uint64_t kScrambleHi = (static_cast<uint64_t>(0x4f1bbcdcU) << 32) | constexpr uint64_t kScrambleLo = (static_cast<uint64_t>(0xf9ce6030U) << 32) | static_cast<uint64_t>(0x2e76e41bU); -class CRCImpl : public CRC { // Implemention of the abstract class CRC +class CRCImpl : public CRC { // Implementation of the abstract class CRC public: using Uint32By256 = uint32_t[256]; - CRCImpl() {} + CRCImpl() = default; ~CRCImpl() override = default; // The internal version of CRC::New(). static CRCImpl* NewInternal(); - void Empty(uint32_t* crc) const override; - // Fill in a table for updating a CRC by one word of 'word_size' bytes // [last_lo, last_hi] contains the answer if the last bit in the word // is set. @@ -96,8 +94,8 @@ class CRCImpl : public CRC { // Implemention of the abstract class CRC // This is the 32-bit implementation. It handles all sizes from 8 to 32. class CRC32 : public CRCImpl { public: - CRC32() {} - ~CRC32() override {} + CRC32() = default; + ~CRC32() override = default; void Extend(uint32_t* crc, const void* bytes, size_t length) const override; void ExtendByZeroes(uint32_t* crc, size_t length) const override; @@ -111,16 +109,16 @@ class CRC32 : public CRCImpl { // Common implementation guts for ExtendByZeroes and UnextendByZeroes(). // // zeroes_table is a table as returned by FillZeroesTable(), containing - // polynomials representing CRCs of strings-of-zeros of various lenghts, + // polynomials representing CRCs of strings-of-zeros of various lengths, // and which can be combined by polynomial multiplication. poly_table is // a table of CRC byte extension values. These tables are determined by // the generator polynomial. // // These will be set to reverse_zeroes_ and reverse_table0_ for Unextend, and // CRC32::zeroes_ and CRC32::table0_ for Extend. - void ExtendByZeroesImpl(uint32_t* crc, size_t length, - const uint32_t zeroes_table[256], - const uint32_t poly_table[256]) const; + static void ExtendByZeroesImpl(uint32_t* crc, size_t length, + const uint32_t zeroes_table[256], + const uint32_t poly_table[256]); uint32_t table0_[256]; // table of byte extensions uint32_t zeroes_[256]; // table of zero extensions diff --git a/absl/crc/internal/crc_memcpy_x86_64.cc b/absl/crc/internal/crc_memcpy_x86_64.cc index 66f784de..d42b08dc 100644 --- a/absl/crc/internal/crc_memcpy_x86_64.cc +++ b/absl/crc/internal/crc_memcpy_x86_64.cc @@ -52,8 +52,8 @@ #include <type_traits> #include "absl/base/dynamic_annotations.h" -#include "absl/base/internal/prefetch.h" #include "absl/base/optimization.h" +#include "absl/base/prefetch.h" #include "absl/crc/crc32c.h" #include "absl/crc/internal/cpu_detect.h" #include "absl/crc/internal/crc_memcpy.h" @@ -242,10 +242,8 @@ crc32c_t AcceleratedCrcMemcpyEngine<vec_regions, int_regions>::Compute( while (copy_rounds > kBlocksPerCacheLine) { // Prefetch kPrefetchAhead bytes ahead of each pointer. for (size_t i = 0; i < kRegions; i++) { - absl::base_internal::PrefetchT0(src_bytes + kPrefetchAhead + - region_size * i); - absl::base_internal::PrefetchT0(dst_bytes + kPrefetchAhead + - region_size * i); + absl::PrefetchToLocalCache(src_bytes + kPrefetchAhead + region_size * i); + absl::PrefetchToLocalCache(dst_bytes + kPrefetchAhead + region_size * i); } // Load and store data, computing CRC on the way. @@ -359,18 +357,18 @@ CrcMemcpy::ArchSpecificEngines CrcMemcpy::GetArchSpecificEngines() { case CpuType::kIntelHaswell: case CpuType::kIntelIvybridge: return { - .temporal = new FallbackCrcMemcpyEngine(), - .non_temporal = new CrcNonTemporalMemcpyAVXEngine(), + /*.temporal=*/new FallbackCrcMemcpyEngine(), + /*.non_temporal=*/new CrcNonTemporalMemcpyAVXEngine(), }; // INTEL_SANDYBRIDGE performs better with SSE than AVX. case CpuType::kIntelSandybridge: return { - .temporal = new FallbackCrcMemcpyEngine(), - .non_temporal = new CrcNonTemporalMemcpyEngine(), + /*.temporal=*/new FallbackCrcMemcpyEngine(), + /*.non_temporal=*/new CrcNonTemporalMemcpyEngine(), }; default: - return {.temporal = new FallbackCrcMemcpyEngine(), - .non_temporal = new FallbackCrcMemcpyEngine()}; + return {/*.temporal=*/new FallbackCrcMemcpyEngine(), + /*.non_temporal=*/new FallbackCrcMemcpyEngine()}; } #else // Get the underlying architecture. @@ -388,8 +386,8 @@ CrcMemcpy::ArchSpecificEngines CrcMemcpy::GetArchSpecificEngines() { case CpuType::kAmdRome: case CpuType::kAmdNaples: return { - .temporal = new AcceleratedCrcMemcpyEngine<1, 2>(), - .non_temporal = new CrcNonTemporalMemcpyAVXEngine(), + /*.temporal=*/new AcceleratedCrcMemcpyEngine<1, 2>(), + /*.non_temporal=*/new CrcNonTemporalMemcpyAVXEngine(), }; // PCLMULQDQ is slow and we don't have wide enough issue width to take // advantage of it. For an unknown architecture, don't risk using CLMULs. @@ -400,18 +398,18 @@ CrcMemcpy::ArchSpecificEngines CrcMemcpy::GetArchSpecificEngines() { case CpuType::kIntelHaswell: case CpuType::kIntelIvybridge: return { - .temporal = new AcceleratedCrcMemcpyEngine<3, 0>(), - .non_temporal = new CrcNonTemporalMemcpyAVXEngine(), + /*.temporal=*/new AcceleratedCrcMemcpyEngine<3, 0>(), + /*.non_temporal=*/new CrcNonTemporalMemcpyAVXEngine(), }; // INTEL_SANDYBRIDGE performs better with SSE than AVX. case CpuType::kIntelSandybridge: return { - .temporal = new AcceleratedCrcMemcpyEngine<3, 0>(), - .non_temporal = new CrcNonTemporalMemcpyEngine(), + /*.temporal=*/new AcceleratedCrcMemcpyEngine<3, 0>(), + /*.non_temporal=*/new CrcNonTemporalMemcpyEngine(), }; default: - return {.temporal = new FallbackCrcMemcpyEngine(), - .non_temporal = new FallbackCrcMemcpyEngine()}; + return {/*.temporal=*/new FallbackCrcMemcpyEngine(), + /*.non_temporal=*/new FallbackCrcMemcpyEngine()}; } #endif // UNDEFINED_BEHAVIOR_SANITIZER } diff --git a/absl/crc/internal/crc_x86_arm_combined.cc b/absl/crc/internal/crc_x86_arm_combined.cc index d71191e3..ef521d22 100644 --- a/absl/crc/internal/crc_x86_arm_combined.cc +++ b/absl/crc/internal/crc_x86_arm_combined.cc @@ -21,7 +21,7 @@ #include "absl/base/config.h" #include "absl/base/dynamic_annotations.h" #include "absl/base/internal/endian.h" -#include "absl/base/internal/prefetch.h" +#include "absl/base/prefetch.h" #include "absl/crc/internal/cpu_detect.h" #include "absl/crc/internal/crc.h" #include "absl/crc/internal/crc32_x86_arm_combined_simd.h" @@ -429,11 +429,11 @@ class CRC32AcceleratedX86ARMCombinedMultipleStreams ABSL_INTERNAL_STEP8BY3(l64, l641, l642, p, p1, p2); ABSL_INTERNAL_STEP8BY3(l64, l641, l642, p, p1, p2); ABSL_INTERNAL_STEP8BY3(l64, l641, l642, p, p1, p2); - base_internal::PrefetchT0( + PrefetchToLocalCache( reinterpret_cast<const char*>(p + kPrefetchHorizonMedium)); - base_internal::PrefetchT0( + PrefetchToLocalCache( reinterpret_cast<const char*>(p1 + kPrefetchHorizonMedium)); - base_internal::PrefetchT0( + PrefetchToLocalCache( reinterpret_cast<const char*>(p2 + kPrefetchHorizonMedium)); } // Don't run crc on last 8 bytes. @@ -515,14 +515,14 @@ class CRC32AcceleratedX86ARMCombinedMultipleStreams } for (size_t i = 1; i < bs; i++) { - // Prefetch data for next itterations. + // Prefetch data for next iterations. for (size_t j = 0; j < num_crc_streams; j++) { - base_internal::PrefetchT0( + PrefetchToLocalCache( reinterpret_cast<const char*>(crc_streams[j] + kPrefetchHorizon)); } for (size_t j = 0; j < num_pclmul_streams; j++) { - base_internal::PrefetchT0(reinterpret_cast<const char*>( - pclmul_streams[j] + kPrefetchHorizon)); + PrefetchToLocalCache(reinterpret_cast<const char*>(pclmul_streams[j] + + kPrefetchHorizon)); } // We process each stream in 64 byte blocks. This can be written as |