summaryrefslogtreecommitdiff
path: root/absl/crc/internal/crc32_x86_arm_combined_simd.h
diff options
context:
space:
mode:
Diffstat (limited to 'absl/crc/internal/crc32_x86_arm_combined_simd.h')
-rw-r--r--absl/crc/internal/crc32_x86_arm_combined_simd.h36
1 files changed, 19 insertions, 17 deletions
diff --git a/absl/crc/internal/crc32_x86_arm_combined_simd.h b/absl/crc/internal/crc32_x86_arm_combined_simd.h
index 59995ae3..0f6e3479 100644
--- a/absl/crc/internal/crc32_x86_arm_combined_simd.h
+++ b/absl/crc/internal/crc32_x86_arm_combined_simd.h
@@ -33,14 +33,15 @@
#include <x86intrin.h>
#define ABSL_CRC_INTERNAL_HAVE_X86_SIMD
-#elif defined(_MSC_VER) && !defined(__clang__) && defined(__AVX__)
+#elif defined(_MSC_VER) && !defined(__clang__) && defined(__AVX__) && \
+ defined(_M_AMD64)
// MSVC AVX (/arch:AVX) implies SSE 4.2 and PCLMULQDQ.
#include <intrin.h>
#define ABSL_CRC_INTERNAL_HAVE_X86_SIMD
-#elif defined(__aarch64__) && defined(__LITTLE_ENDIAN__) && \
- defined(__ARM_FEATURE_CRC32) && defined(ABSL_INTERNAL_HAVE_ARM_NEON) && \
+#elif defined(__aarch64__) && defined(__LITTLE_ENDIAN__) && \
+ defined(__ARM_FEATURE_CRC32) && defined(ABSL_INTERNAL_HAVE_ARM_NEON) && \
defined(__ARM_FEATURE_CRYPTO)
#include <arm_acle.h>
@@ -101,10 +102,11 @@ V128 V128_Xor(const V128 l, const V128 r);
// Produces an AND operation of |l| and |r|.
V128 V128_And(const V128 l, const V128 r);
-// Sets two 64 bit integers to one 128 bit vector. The order is reverse.
+// Sets the lower half of a 128 bit register to the given 64-bit value and
+// zeroes the upper half.
// dst[63:0] := |r|
-// dst[127:64] := |l|
-V128 V128_From2x64(const uint64_t l, const uint64_t r);
+// dst[127:64] := |0|
+V128 V128_From64WithZeroFill(const uint64_t r);
// Shift |l| right by |imm| bytes while shifting in zeros.
template <int imm>
@@ -121,8 +123,8 @@ uint64_t V128_Extract64(const V128 l);
// Extracts the low 64 bits from V128.
int64_t V128_Low64(const V128 l);
-// Left-shifts packed 64-bit integers in l by r.
-V128 V128_ShiftLeft64(const V128 l, const V128 r);
+// Add packed 64-bit integers in |l| and |r|.
+V128 V128_Add64(const V128 l, const V128 r);
#endif
@@ -170,8 +172,8 @@ inline V128 V128_Xor(const V128 l, const V128 r) { return _mm_xor_si128(l, r); }
inline V128 V128_And(const V128 l, const V128 r) { return _mm_and_si128(l, r); }
-inline V128 V128_From2x64(const uint64_t l, const uint64_t r) {
- return _mm_set_epi64x(static_cast<int64_t>(l), static_cast<int64_t>(r));
+inline V128 V128_From64WithZeroFill(const uint64_t r) {
+ return _mm_set_epi64x(static_cast<int64_t>(0), static_cast<int64_t>(r));
}
template <int imm>
@@ -191,8 +193,8 @@ inline uint64_t V128_Extract64(const V128 l) {
inline int64_t V128_Low64(const V128 l) { return _mm_cvtsi128_si64(l); }
-inline V128 V128_ShiftLeft64(const V128 l, const V128 r) {
- return _mm_sll_epi64(l, r);
+inline V128 V128_Add64(const V128 l, const V128 r) {
+ return _mm_add_epi64(l, r);
}
#elif defined(ABSL_CRC_INTERNAL_HAVE_ARM_SIMD)
@@ -261,10 +263,12 @@ inline V128 V128_Xor(const V128 l, const V128 r) { return veorq_u64(l, r); }
inline V128 V128_And(const V128 l, const V128 r) { return vandq_u64(l, r); }
-inline V128 V128_From2x64(const uint64_t l, const uint64_t r) {
- return vcombine_u64(vcreate_u64(r), vcreate_u64(l));
+inline V128 V128_From64WithZeroFill(const uint64_t r){
+ constexpr uint64x2_t kZero = {0, 0};
+ return vsetq_lane_u64(r, kZero, 0);
}
+
template <int imm>
inline V128 V128_ShiftRight(const V128 l) {
return vreinterpretq_u64_s8(
@@ -285,9 +289,7 @@ inline int64_t V128_Low64(const V128 l) {
return vgetq_lane_s64(vreinterpretq_s64_u64(l), 0);
}
-inline V128 V128_ShiftLeft64(const V128 l, const V128 r) {
- return vshlq_u64(l, vreinterpretq_s64_u64(r));
-}
+inline V128 V128_Add64(const V128 l, const V128 r) { return vaddq_u64(l, r); }
#endif