From 61e47a454c81eb07147b0315485f476513cc1230 Mon Sep 17 00:00:00 2001 From: Connal de Souza Date: Thu, 4 Apr 2024 13:08:45 -0700 Subject: Optimize crc32 V128_From2x64 on Arm This removes redundant vector-vector moves and results in Extend being up to 3% faster. PiperOrigin-RevId: 621948170 Change-Id: Id82816aa6e294d34140ff591103cb20feac79d9a --- absl/crc/internal/crc_x86_arm_combined.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'absl/crc/internal/crc_x86_arm_combined.cc') diff --git a/absl/crc/internal/crc_x86_arm_combined.cc b/absl/crc/internal/crc_x86_arm_combined.cc index 51eff4ed..20dd3e01 100644 --- a/absl/crc/internal/crc_x86_arm_combined.cc +++ b/absl/crc/internal/crc_x86_arm_combined.cc @@ -101,9 +101,9 @@ constexpr size_t kMediumCutoff = 2048; namespace { uint32_t multiply(uint32_t a, uint32_t b) { - V128 shifts = V128_From2x64(0, 1); - V128 power = V128_From2x64(0, a); - V128 crc = V128_From2x64(0, b); + V128 shifts = V128_From64WithZeroFill(1); + V128 power = V128_From64WithZeroFill(a); + V128 crc = V128_From64WithZeroFill(b); V128 res = V128_PMulLow(power, crc); // Combine crc values @@ -444,11 +444,11 @@ class CRC32AcceleratedX86ARMCombinedMultipleStreams V128 magic = *(reinterpret_cast(kClmulConstants) + bs - 1); - V128 tmp = V128_From2x64(0, l64); + V128 tmp = V128_From64WithZeroFill(l64); V128 res1 = V128_PMulLow(tmp, magic); - tmp = V128_From2x64(0, l641); + tmp = V128_From64WithZeroFill(l641); V128 res2 = V128_PMul10(tmp, magic); V128 x = V128_Xor(res1, res2); -- cgit v1.2.3