summaryrefslogtreecommitdiff
path: root/absl/crc/internal
diff options
context:
space:
mode:
Diffstat (limited to 'absl/crc/internal')
-rw-r--r--absl/crc/internal/cpu_detect.cc22
-rw-r--r--absl/crc/internal/crc.cc39
-rw-r--r--absl/crc/internal/crc.h12
-rw-r--r--absl/crc/internal/crc32_x86_arm_combined_simd.h2
-rw-r--r--absl/crc/internal/crc_cord_state.cc2
-rw-r--r--absl/crc/internal/crc_cord_state.h8
-rw-r--r--absl/crc/internal/crc_internal.h18
-rw-r--r--absl/crc/internal/crc_memcpy_x86_64.cc36
-rw-r--r--absl/crc/internal/crc_x86_arm_combined.cc16
9 files changed, 59 insertions, 96 deletions
diff --git a/absl/crc/internal/cpu_detect.cc b/absl/crc/internal/cpu_detect.cc
index d61b7018..83838085 100644
--- a/absl/crc/internal/cpu_detect.cc
+++ b/absl/crc/internal/cpu_detect.cc
@@ -28,15 +28,12 @@
#include <intrin.h>
#endif
-namespace absl {
-ABSL_NAMESPACE_BEGIN
-namespace crc_internal {
-
#if defined(__x86_64__) || defined(_M_X64)
-
-namespace {
-
-#if !defined(_WIN32) && !defined(_WIN64)
+#if ABSL_HAVE_BUILTIN(__cpuid)
+// MSVC-equivalent __cpuid intrinsic declaration for clang-like compilers
+// for non-Windows build environments.
+extern void __cpuid(int[4], int);
+#elif !defined(_WIN32) && !defined(_WIN64)
// MSVC defines this function for us.
// https://learn.microsoft.com/en-us/cpp/intrinsics/cpuid-cpuidex
static void __cpuid(int cpu_info[4], int info_type) {
@@ -46,6 +43,15 @@ static void __cpuid(int cpu_info[4], int info_type) {
: "a"(info_type), "c"(0));
}
#endif // !defined(_WIN32) && !defined(_WIN64)
+#endif // defined(__x86_64__) || defined(_M_X64)
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace crc_internal {
+
+#if defined(__x86_64__) || defined(_M_X64)
+
+namespace {
enum class Vendor {
kUnknown,
diff --git a/absl/crc/internal/crc.cc b/absl/crc/internal/crc.cc
index bb8936e3..22e91c53 100644
--- a/absl/crc/internal/crc.cc
+++ b/absl/crc/internal/crc.cc
@@ -44,8 +44,8 @@
#include <cstdint>
#include "absl/base/internal/endian.h"
-#include "absl/base/internal/prefetch.h"
#include "absl/base/internal/raw_logging.h"
+#include "absl/base/prefetch.h"
#include "absl/crc/internal/crc_internal.h"
namespace absl {
@@ -176,9 +176,6 @@ CRCImpl* CRCImpl::NewInternal() {
return result;
}
-// The CRC of the empty string is always the CRC polynomial itself.
-void CRCImpl::Empty(uint32_t* crc) const { *crc = kCrc32cPoly; }
-
// The 32-bit implementation
void CRC32::InitTables() {
@@ -261,7 +258,7 @@ void CRC32::Extend(uint32_t* crc, const void* bytes, size_t length) const {
const uint8_t* e = p + length;
uint32_t l = *crc;
- auto step_one_byte = [this, &p, &l] () {
+ auto step_one_byte = [this, &p, &l]() {
int c = (l & 0xff) ^ *p++;
l = this->table0_[c] ^ (l >> 8);
};
@@ -309,7 +306,7 @@ void CRC32::Extend(uint32_t* crc, const void* bytes, size_t length) const {
// Process kStride interleaved swaths through the data in parallel.
while ((e - p) > kPrefetchHorizon) {
- base_internal::PrefetchNta(
+ PrefetchToLocalCacheNta(
reinterpret_cast<const void*>(p + kPrefetchHorizon));
// Process 64 bytes at a time
step_stride();
@@ -359,7 +356,7 @@ void CRC32::Extend(uint32_t* crc, const void* bytes, size_t length) const {
void CRC32::ExtendByZeroesImpl(uint32_t* crc, size_t length,
const uint32_t zeroes_table[256],
- const uint32_t poly_table[256]) const {
+ const uint32_t poly_table[256]) {
if (length != 0) {
uint32_t l = *crc;
// For each ZEROES_BASE_LG bits in length
@@ -435,34 +432,6 @@ CRC* CRC::Crc32c() {
return singleton;
}
-// This Concat implementation works for arbitrary polynomials.
-void CRC::Concat(uint32_t* px, uint32_t y, size_t ylen) {
- // https://en.wikipedia.org/wiki/Mathematics_of_cyclic_redundancy_checks
- // The CRC of a message M is the remainder of polynomial divison modulo G,
- // where the coefficient arithmetic is performed modulo 2 (so +/- are XOR):
- // R(x) = M(x) x**n (mod G)
- // (n is the degree of G)
- // In practice, we use an initial value A and a bitmask B to get
- // R = (A ^ B)x**|M| ^ Mx**n ^ B (mod G)
- // If M is the concatenation of two strings S and T, and Z is the string of
- // len(T) 0s, then the remainder CRC(ST) can be expressed as:
- // R = (A ^ B)x**|ST| ^ STx**n ^ B
- // = (A ^ B)x**|SZ| ^ SZx**n ^ B ^ Tx**n
- // = CRC(SZ) ^ Tx**n
- // CRC(Z) = (A ^ B)x**|T| ^ B
- // CRC(T) = (A ^ B)x**|T| ^ Tx**n ^ B
- // So R = CRC(SZ) ^ CRC(Z) ^ CRC(T)
- //
- // And further, since CRC(SZ) = Extend(CRC(S), Z),
- // CRC(SZ) ^ CRC(Z) = Extend(CRC(S) ^ CRC(''), Z).
- uint32_t z;
- uint32_t t;
- Empty(&z);
- t = *px ^ z;
- ExtendByZeroes(&t, ylen);
- *px = t ^ y;
-}
-
} // namespace crc_internal
ABSL_NAMESPACE_END
} // namespace absl
diff --git a/absl/crc/internal/crc.h b/absl/crc/internal/crc.h
index 72515b06..4efdd032 100644
--- a/absl/crc/internal/crc.h
+++ b/absl/crc/internal/crc.h
@@ -40,9 +40,6 @@ class CRC {
public:
virtual ~CRC();
- // Place the CRC of the empty string in "*crc"
- virtual void Empty(uint32_t* crc) const = 0;
-
// If "*crc" is the CRC of bytestring A, place the CRC of
// the bytestring formed from the concatenation of A and the "length"
// bytes at "bytes" into "*crc".
@@ -53,22 +50,17 @@ class CRC {
// points to an array of "length" zero bytes.
virtual void ExtendByZeroes(uint32_t* crc, size_t length) const = 0;
- // Inverse opration of ExtendByZeroes. If `crc` is the CRC value of a string
+ // Inverse operation of ExtendByZeroes. If `crc` is the CRC value of a string
// ending in `length` zero bytes, this returns a CRC value of that string
// with those zero bytes removed.
virtual void UnextendByZeroes(uint32_t* crc, size_t length) const = 0;
- // If *px is the CRC (as defined by *crc) of some string X,
- // and y is the CRC of some string Y that is ylen bytes long, set
- // *px to the CRC of the concatenation of X followed by Y.
- virtual void Concat(uint32_t* px, uint32_t y, size_t ylen);
-
// Apply a non-linear transformation to "*crc" so that
// it is safe to CRC the result with the same polynomial without
// any reduction of error-detection ability in the outer CRC.
// Unscramble() performs the inverse transformation.
// It is strongly recommended that CRCs be scrambled before storage or
- // transmission, and unscrambled at the other end before futher manipulation.
+ // transmission, and unscrambled at the other end before further manipulation.
virtual void Scramble(uint32_t* crc) const = 0;
virtual void Unscramble(uint32_t* crc) const = 0;
diff --git a/absl/crc/internal/crc32_x86_arm_combined_simd.h b/absl/crc/internal/crc32_x86_arm_combined_simd.h
index fb643986..39e53dd0 100644
--- a/absl/crc/internal/crc32_x86_arm_combined_simd.h
+++ b/absl/crc/internal/crc32_x86_arm_combined_simd.h
@@ -33,7 +33,7 @@
#include <x86intrin.h>
#define ABSL_CRC_INTERNAL_HAVE_X86_SIMD
-#elif defined(_MSC_VER) && defined(__AVX__)
+#elif defined(_MSC_VER) && !defined(__clang__) && defined(__AVX__)
// MSVC AVX (/arch:AVX) implies SSE 4.2 and PCLMULQDQ.
#include <intrin.h>
diff --git a/absl/crc/internal/crc_cord_state.cc b/absl/crc/internal/crc_cord_state.cc
index d0be0ddd..28d04dc4 100644
--- a/absl/crc/internal/crc_cord_state.cc
+++ b/absl/crc/internal/crc_cord_state.cc
@@ -121,7 +121,7 @@ void CrcCordState::Poison() {
}
} else {
// Add a fake corrupt chunk.
- rep->prefix_crc.push_back(PrefixCrc(0, crc32c_t{1}));
+ rep->prefix_crc.emplace_back(0, crc32c_t{1});
}
}
diff --git a/absl/crc/internal/crc_cord_state.h b/absl/crc/internal/crc_cord_state.h
index d305424c..fbbb8c00 100644
--- a/absl/crc/internal/crc_cord_state.h
+++ b/absl/crc/internal/crc_cord_state.h
@@ -71,9 +71,9 @@ class CrcCordState {
struct Rep {
// `removed_prefix` is the crc and length of any prefix that has been
// removed from the Cord (for example, by calling
- // `CrcCord::RemovePrefix()`). To get the checkum of any prefix of the cord,
- // this value must be subtracted from `prefix_crc`. See `Checksum()` for an
- // example.
+ // `CrcCord::RemovePrefix()`). To get the checksum of any prefix of the
+ // cord, this value must be subtracted from `prefix_crc`. See `Checksum()`
+ // for an example.
//
// CrcCordState is said to be "normalized" if removed_prefix.length == 0.
PrefixCrc removed_prefix;
@@ -109,7 +109,7 @@ class CrcCordState {
// Returns true if the chunked CRC32C cached is normalized.
bool IsNormalized() const { return rep().removed_prefix.length == 0; }
- // Normalizes the chunked CRC32C checksum cache by substracting any removed
+ // Normalizes the chunked CRC32C checksum cache by subtracting any removed
// prefix from the chunks.
void Normalize();
diff --git a/absl/crc/internal/crc_internal.h b/absl/crc/internal/crc_internal.h
index 0611b383..4d3582d9 100644
--- a/absl/crc/internal/crc_internal.h
+++ b/absl/crc/internal/crc_internal.h
@@ -60,18 +60,16 @@ constexpr uint64_t kScrambleHi = (static_cast<uint64_t>(0x4f1bbcdcU) << 32) |
constexpr uint64_t kScrambleLo = (static_cast<uint64_t>(0xf9ce6030U) << 32) |
static_cast<uint64_t>(0x2e76e41bU);
-class CRCImpl : public CRC { // Implemention of the abstract class CRC
+class CRCImpl : public CRC { // Implementation of the abstract class CRC
public:
using Uint32By256 = uint32_t[256];
- CRCImpl() {}
+ CRCImpl() = default;
~CRCImpl() override = default;
// The internal version of CRC::New().
static CRCImpl* NewInternal();
- void Empty(uint32_t* crc) const override;
-
// Fill in a table for updating a CRC by one word of 'word_size' bytes
// [last_lo, last_hi] contains the answer if the last bit in the word
// is set.
@@ -96,8 +94,8 @@ class CRCImpl : public CRC { // Implemention of the abstract class CRC
// This is the 32-bit implementation. It handles all sizes from 8 to 32.
class CRC32 : public CRCImpl {
public:
- CRC32() {}
- ~CRC32() override {}
+ CRC32() = default;
+ ~CRC32() override = default;
void Extend(uint32_t* crc, const void* bytes, size_t length) const override;
void ExtendByZeroes(uint32_t* crc, size_t length) const override;
@@ -111,16 +109,16 @@ class CRC32 : public CRCImpl {
// Common implementation guts for ExtendByZeroes and UnextendByZeroes().
//
// zeroes_table is a table as returned by FillZeroesTable(), containing
- // polynomials representing CRCs of strings-of-zeros of various lenghts,
+ // polynomials representing CRCs of strings-of-zeros of various lengths,
// and which can be combined by polynomial multiplication. poly_table is
// a table of CRC byte extension values. These tables are determined by
// the generator polynomial.
//
// These will be set to reverse_zeroes_ and reverse_table0_ for Unextend, and
// CRC32::zeroes_ and CRC32::table0_ for Extend.
- void ExtendByZeroesImpl(uint32_t* crc, size_t length,
- const uint32_t zeroes_table[256],
- const uint32_t poly_table[256]) const;
+ static void ExtendByZeroesImpl(uint32_t* crc, size_t length,
+ const uint32_t zeroes_table[256],
+ const uint32_t poly_table[256]);
uint32_t table0_[256]; // table of byte extensions
uint32_t zeroes_[256]; // table of zero extensions
diff --git a/absl/crc/internal/crc_memcpy_x86_64.cc b/absl/crc/internal/crc_memcpy_x86_64.cc
index 66f784de..d42b08dc 100644
--- a/absl/crc/internal/crc_memcpy_x86_64.cc
+++ b/absl/crc/internal/crc_memcpy_x86_64.cc
@@ -52,8 +52,8 @@
#include <type_traits>
#include "absl/base/dynamic_annotations.h"
-#include "absl/base/internal/prefetch.h"
#include "absl/base/optimization.h"
+#include "absl/base/prefetch.h"
#include "absl/crc/crc32c.h"
#include "absl/crc/internal/cpu_detect.h"
#include "absl/crc/internal/crc_memcpy.h"
@@ -242,10 +242,8 @@ crc32c_t AcceleratedCrcMemcpyEngine<vec_regions, int_regions>::Compute(
while (copy_rounds > kBlocksPerCacheLine) {
// Prefetch kPrefetchAhead bytes ahead of each pointer.
for (size_t i = 0; i < kRegions; i++) {
- absl::base_internal::PrefetchT0(src_bytes + kPrefetchAhead +
- region_size * i);
- absl::base_internal::PrefetchT0(dst_bytes + kPrefetchAhead +
- region_size * i);
+ absl::PrefetchToLocalCache(src_bytes + kPrefetchAhead + region_size * i);
+ absl::PrefetchToLocalCache(dst_bytes + kPrefetchAhead + region_size * i);
}
// Load and store data, computing CRC on the way.
@@ -359,18 +357,18 @@ CrcMemcpy::ArchSpecificEngines CrcMemcpy::GetArchSpecificEngines() {
case CpuType::kIntelHaswell:
case CpuType::kIntelIvybridge:
return {
- .temporal = new FallbackCrcMemcpyEngine(),
- .non_temporal = new CrcNonTemporalMemcpyAVXEngine(),
+ /*.temporal=*/new FallbackCrcMemcpyEngine(),
+ /*.non_temporal=*/new CrcNonTemporalMemcpyAVXEngine(),
};
// INTEL_SANDYBRIDGE performs better with SSE than AVX.
case CpuType::kIntelSandybridge:
return {
- .temporal = new FallbackCrcMemcpyEngine(),
- .non_temporal = new CrcNonTemporalMemcpyEngine(),
+ /*.temporal=*/new FallbackCrcMemcpyEngine(),
+ /*.non_temporal=*/new CrcNonTemporalMemcpyEngine(),
};
default:
- return {.temporal = new FallbackCrcMemcpyEngine(),
- .non_temporal = new FallbackCrcMemcpyEngine()};
+ return {/*.temporal=*/new FallbackCrcMemcpyEngine(),
+ /*.non_temporal=*/new FallbackCrcMemcpyEngine()};
}
#else
// Get the underlying architecture.
@@ -388,8 +386,8 @@ CrcMemcpy::ArchSpecificEngines CrcMemcpy::GetArchSpecificEngines() {
case CpuType::kAmdRome:
case CpuType::kAmdNaples:
return {
- .temporal = new AcceleratedCrcMemcpyEngine<1, 2>(),
- .non_temporal = new CrcNonTemporalMemcpyAVXEngine(),
+ /*.temporal=*/new AcceleratedCrcMemcpyEngine<1, 2>(),
+ /*.non_temporal=*/new CrcNonTemporalMemcpyAVXEngine(),
};
// PCLMULQDQ is slow and we don't have wide enough issue width to take
// advantage of it. For an unknown architecture, don't risk using CLMULs.
@@ -400,18 +398,18 @@ CrcMemcpy::ArchSpecificEngines CrcMemcpy::GetArchSpecificEngines() {
case CpuType::kIntelHaswell:
case CpuType::kIntelIvybridge:
return {
- .temporal = new AcceleratedCrcMemcpyEngine<3, 0>(),
- .non_temporal = new CrcNonTemporalMemcpyAVXEngine(),
+ /*.temporal=*/new AcceleratedCrcMemcpyEngine<3, 0>(),
+ /*.non_temporal=*/new CrcNonTemporalMemcpyAVXEngine(),
};
// INTEL_SANDYBRIDGE performs better with SSE than AVX.
case CpuType::kIntelSandybridge:
return {
- .temporal = new AcceleratedCrcMemcpyEngine<3, 0>(),
- .non_temporal = new CrcNonTemporalMemcpyEngine(),
+ /*.temporal=*/new AcceleratedCrcMemcpyEngine<3, 0>(),
+ /*.non_temporal=*/new CrcNonTemporalMemcpyEngine(),
};
default:
- return {.temporal = new FallbackCrcMemcpyEngine(),
- .non_temporal = new FallbackCrcMemcpyEngine()};
+ return {/*.temporal=*/new FallbackCrcMemcpyEngine(),
+ /*.non_temporal=*/new FallbackCrcMemcpyEngine()};
}
#endif // UNDEFINED_BEHAVIOR_SANITIZER
}
diff --git a/absl/crc/internal/crc_x86_arm_combined.cc b/absl/crc/internal/crc_x86_arm_combined.cc
index d71191e3..ef521d22 100644
--- a/absl/crc/internal/crc_x86_arm_combined.cc
+++ b/absl/crc/internal/crc_x86_arm_combined.cc
@@ -21,7 +21,7 @@
#include "absl/base/config.h"
#include "absl/base/dynamic_annotations.h"
#include "absl/base/internal/endian.h"
-#include "absl/base/internal/prefetch.h"
+#include "absl/base/prefetch.h"
#include "absl/crc/internal/cpu_detect.h"
#include "absl/crc/internal/crc.h"
#include "absl/crc/internal/crc32_x86_arm_combined_simd.h"
@@ -429,11 +429,11 @@ class CRC32AcceleratedX86ARMCombinedMultipleStreams
ABSL_INTERNAL_STEP8BY3(l64, l641, l642, p, p1, p2);
ABSL_INTERNAL_STEP8BY3(l64, l641, l642, p, p1, p2);
ABSL_INTERNAL_STEP8BY3(l64, l641, l642, p, p1, p2);
- base_internal::PrefetchT0(
+ PrefetchToLocalCache(
reinterpret_cast<const char*>(p + kPrefetchHorizonMedium));
- base_internal::PrefetchT0(
+ PrefetchToLocalCache(
reinterpret_cast<const char*>(p1 + kPrefetchHorizonMedium));
- base_internal::PrefetchT0(
+ PrefetchToLocalCache(
reinterpret_cast<const char*>(p2 + kPrefetchHorizonMedium));
}
// Don't run crc on last 8 bytes.
@@ -515,14 +515,14 @@ class CRC32AcceleratedX86ARMCombinedMultipleStreams
}
for (size_t i = 1; i < bs; i++) {
- // Prefetch data for next itterations.
+ // Prefetch data for next iterations.
for (size_t j = 0; j < num_crc_streams; j++) {
- base_internal::PrefetchT0(
+ PrefetchToLocalCache(
reinterpret_cast<const char*>(crc_streams[j] + kPrefetchHorizon));
}
for (size_t j = 0; j < num_pclmul_streams; j++) {
- base_internal::PrefetchT0(reinterpret_cast<const char*>(
- pclmul_streams[j] + kPrefetchHorizon));
+ PrefetchToLocalCache(reinterpret_cast<const char*>(pclmul_streams[j] +
+ kPrefetchHorizon));
}
// We process each stream in 64 byte blocks. This can be written as