diff options
author | Pavel P <pavlov.pavel@gmail.com> | 2024-05-07 10:32:12 -0700 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2024-05-07 10:33:09 -0700 |
commit | 77224c28ff00abf1a8d233226921d72d27bbae7e (patch) | |
tree | 33395b332e2b17a27a9a91d49d8d38f07a0a77ec /absl/crc/internal | |
parent | e0df4a721bba5f5bd10d68fc52568ca3ea6b0123 (diff) |
PR #1662: Replace shift with addition in crc multiply
Imported from GitHub PR https://github.com/abseil/abseil-cpp/pull/1662
Merge 4b2c6c909b573d31a1cccba7cb72d4d8badeef8b into cba31a956209e68e4d4049e8a9bc03b1fd67320a
Merging this change closes #1662
COPYBARA_INTEGRATE_REVIEW=https://github.com/abseil/abseil-cpp/pull/1662 from pps83:crc-add 4b2c6c909b573d31a1cccba7cb72d4d8badeef8b
PiperOrigin-RevId: 631470883
Change-Id: I4a72be643ed341ddf0e0007418ab4a613a03db4b
Diffstat (limited to 'absl/crc/internal')
-rw-r--r-- | absl/crc/internal/crc32_x86_arm_combined_simd.h | 12 | ||||
-rw-r--r-- | absl/crc/internal/crc_x86_arm_combined.cc | 10 |
2 files changed, 12 insertions, 10 deletions
diff --git a/absl/crc/internal/crc32_x86_arm_combined_simd.h b/absl/crc/internal/crc32_x86_arm_combined_simd.h index aa6a6595..0f6e3479 100644 --- a/absl/crc/internal/crc32_x86_arm_combined_simd.h +++ b/absl/crc/internal/crc32_x86_arm_combined_simd.h @@ -123,8 +123,8 @@ uint64_t V128_Extract64(const V128 l); // Extracts the low 64 bits from V128. int64_t V128_Low64(const V128 l); -// Left-shifts packed 64-bit integers in l by r. -V128 V128_ShiftLeft64(const V128 l, const V128 r); +// Add packed 64-bit integers in |l| and |r|. +V128 V128_Add64(const V128 l, const V128 r); #endif @@ -193,8 +193,8 @@ inline uint64_t V128_Extract64(const V128 l) { inline int64_t V128_Low64(const V128 l) { return _mm_cvtsi128_si64(l); } -inline V128 V128_ShiftLeft64(const V128 l, const V128 r) { - return _mm_sll_epi64(l, r); +inline V128 V128_Add64(const V128 l, const V128 r) { + return _mm_add_epi64(l, r); } #elif defined(ABSL_CRC_INTERNAL_HAVE_ARM_SIMD) @@ -289,9 +289,7 @@ inline int64_t V128_Low64(const V128 l) { return vgetq_lane_s64(vreinterpretq_s64_u64(l), 0); } -inline V128 V128_ShiftLeft64(const V128 l, const V128 r) { - return vshlq_u64(l, vreinterpretq_s64_u64(r)); -} +inline V128 V128_Add64(const V128 l, const V128 r) { return vaddq_u64(l, r); } #endif diff --git a/absl/crc/internal/crc_x86_arm_combined.cc b/absl/crc/internal/crc_x86_arm_combined.cc index 20dd3e01..79dace34 100644 --- a/absl/crc/internal/crc_x86_arm_combined.cc +++ b/absl/crc/internal/crc_x86_arm_combined.cc @@ -101,13 +101,17 @@ constexpr size_t kMediumCutoff = 2048; namespace { uint32_t multiply(uint32_t a, uint32_t b) { - V128 shifts = V128_From64WithZeroFill(1); V128 power = V128_From64WithZeroFill(a); V128 crc = V128_From64WithZeroFill(b); V128 res = V128_PMulLow(power, crc); - // Combine crc values - res = V128_ShiftLeft64(res, shifts); + // Combine crc values. + // + // Adding res to itself is equivalent to multiplying by 2, + // or shifting left by 1. Addition is used as not all compilers + // are able to generate optimal code without this hint. + // https://godbolt.org/z/rr3fMnf39 + res = V128_Add64(res, res); return static_cast<uint32_t>(V128_Extract32<1>(res)) ^ CRC32_u32(0, static_cast<uint32_t>(V128_Low64(res))); } |