diff options
Diffstat (limited to 'absl/crc/internal/non_temporal_arm_intrinsics.h')
-rw-r--r-- | absl/crc/internal/non_temporal_arm_intrinsics.h | 77 |
1 files changed, 77 insertions, 0 deletions
diff --git a/absl/crc/internal/non_temporal_arm_intrinsics.h b/absl/crc/internal/non_temporal_arm_intrinsics.h new file mode 100644 index 00000000..92632a33 --- /dev/null +++ b/absl/crc/internal/non_temporal_arm_intrinsics.h @@ -0,0 +1,77 @@ +// Copyright 2022 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_CRC_INTERNAL_NON_TEMPORAL_ARM_INTRINSICS_H_ +#define ABSL_CRC_INTERNAL_NON_TEMPORAL_ARM_INTRINSICS_H_ + +#ifdef __aarch64__ +#include <arm_neon.h> + +typedef int64x2_t __m128i; /* 128-bit vector containing integers */ +#define vreinterpretq_m128i_s32(x) vreinterpretq_s64_s32(x) +#define vreinterpretq_s64_m128i(x) (x) + +// Guarantees that every preceding store is globally visible before any +// subsequent store. +// https://msdn.microsoft.com/en-us/library/5h2w73d1%28v=vs.90%29.aspx +static inline __attribute__((always_inline)) void _mm_sfence(void) { + __sync_synchronize(); +} + +// Load 128-bits of integer data from unaligned memory into dst. This intrinsic +// may perform better than _mm_loadu_si128 when the data crosses a cache line +// boundary. +// +// dst[127:0] := MEM[mem_addr+127:mem_addr] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lddqu_si128 +#define _mm_lddqu_si128 _mm_loadu_si128 + +// Loads 128-bit value. : +// https://msdn.microsoft.com/zh-cn/library/f4k12ae8(v=vs.90).aspx +static inline __attribute__((always_inline)) __m128i _mm_loadu_si128( + const __m128i *p) { + return vreinterpretq_m128i_s32(vld1q_s32((const int32_t *)p)); +} + +// Stores the data in a to the address p without polluting the caches. If the +// cache line containing address p is already in the cache, the cache will be +// updated. +// https://msdn.microsoft.com/en-us/library/ba08y07y%28v=vs.90%29.aspx +static inline __attribute__((always_inline)) void _mm_stream_si128(__m128i *p, + __m128i a) { +#if __has_builtin(__builtin_nontemporal_store) + __builtin_nontemporal_store(a, p); +#else + vst1q_s64((int64_t *)p, vreinterpretq_s64_m128i(a)); +#endif +} + +// Sets the 16 signed 8-bit integer values. +// https://msdn.microsoft.com/en-us/library/x0cx8zd3(v=vs.90).aspx +static inline __attribute__((always_inline)) __m128i _mm_set_epi8( + signed char b15, signed char b14, signed char b13, signed char b12, + signed char b11, signed char b10, signed char b9, signed char b8, + signed char b7, signed char b6, signed char b5, signed char b4, + signed char b3, signed char b2, signed char b1, signed char b0) { + int8_t __attribute__((aligned(16))) + data[16] = {(int8_t)b0, (int8_t)b1, (int8_t)b2, (int8_t)b3, + (int8_t)b4, (int8_t)b5, (int8_t)b6, (int8_t)b7, + (int8_t)b8, (int8_t)b9, (int8_t)b10, (int8_t)b11, + (int8_t)b12, (int8_t)b13, (int8_t)b14, (int8_t)b15}; + return (__m128i)vld1q_s8(data); +} +#endif // __aarch64__ + +#endif // ABSL_CRC_INTERNAL_NON_TEMPORAL_ARM_INTRINSICS_H_ |