diff options
Diffstat (limited to 'absl/crc')
-rw-r--r-- | absl/crc/BUILD.bazel | 12 | ||||
-rw-r--r-- | absl/crc/CMakeLists.txt | 1 | ||||
-rw-r--r-- | absl/crc/internal/crc32_x86_arm_combined_simd.h | 36 | ||||
-rw-r--r-- | absl/crc/internal/crc_cord_state.cc | 7 | ||||
-rw-r--r-- | absl/crc/internal/crc_memcpy_fallback.cc | 3 | ||||
-rw-r--r-- | absl/crc/internal/crc_memcpy_x86_arm_combined.cc | 20 | ||||
-rw-r--r-- | absl/crc/internal/crc_non_temporal_memcpy.cc | 2 | ||||
-rw-r--r-- | absl/crc/internal/crc_x86_arm_combined.cc | 18 | ||||
-rw-r--r-- | absl/crc/internal/non_temporal_memcpy.h | 35 |
9 files changed, 76 insertions, 58 deletions
diff --git a/absl/crc/BUILD.bazel b/absl/crc/BUILD.bazel index f44c3f6b..890d637c 100644 --- a/absl/crc/BUILD.bazel +++ b/absl/crc/BUILD.bazel @@ -121,7 +121,9 @@ cc_library( hdrs = ["internal/non_temporal_arm_intrinsics.h"], copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, - visibility = ["//visibility:private"], + visibility = [ + ":__pkg__", + ], deps = [ "//absl/base:config", ], @@ -132,7 +134,9 @@ cc_library( hdrs = ["internal/non_temporal_memcpy.h"], copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, - visibility = ["//visibility:private"], + visibility = [ + ":__pkg__", + ], deps = [ ":non_temporal_arm_intrinsics", "//absl/base:config", @@ -182,8 +186,8 @@ cc_library( deps = [ ":crc32c", "//absl/base:config", + "//absl/base:no_destructor", "//absl/numeric:bits", - "//absl/strings", ], ) @@ -203,7 +207,7 @@ cc_test( cc_binary( name = "crc32c_benchmark", - testonly = 1, + testonly = True, srcs = ["crc32c_benchmark.cc"], copts = ABSL_TEST_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, diff --git a/absl/crc/CMakeLists.txt b/absl/crc/CMakeLists.txt index ec7b4512..d52a1bc4 100644 --- a/absl/crc/CMakeLists.txt +++ b/absl/crc/CMakeLists.txt @@ -159,6 +159,7 @@ absl_cc_library( absl::crc32c absl::config absl::strings + absl::no_destructor ) absl_cc_test( diff --git a/absl/crc/internal/crc32_x86_arm_combined_simd.h b/absl/crc/internal/crc32_x86_arm_combined_simd.h index 59995ae3..0f6e3479 100644 --- a/absl/crc/internal/crc32_x86_arm_combined_simd.h +++ b/absl/crc/internal/crc32_x86_arm_combined_simd.h @@ -33,14 +33,15 @@ #include <x86intrin.h> #define ABSL_CRC_INTERNAL_HAVE_X86_SIMD -#elif defined(_MSC_VER) && !defined(__clang__) && defined(__AVX__) +#elif defined(_MSC_VER) && !defined(__clang__) && defined(__AVX__) && \ + defined(_M_AMD64) // MSVC AVX (/arch:AVX) implies SSE 4.2 and PCLMULQDQ. #include <intrin.h> #define ABSL_CRC_INTERNAL_HAVE_X86_SIMD -#elif defined(__aarch64__) && defined(__LITTLE_ENDIAN__) && \ - defined(__ARM_FEATURE_CRC32) && defined(ABSL_INTERNAL_HAVE_ARM_NEON) && \ +#elif defined(__aarch64__) && defined(__LITTLE_ENDIAN__) && \ + defined(__ARM_FEATURE_CRC32) && defined(ABSL_INTERNAL_HAVE_ARM_NEON) && \ defined(__ARM_FEATURE_CRYPTO) #include <arm_acle.h> @@ -101,10 +102,11 @@ V128 V128_Xor(const V128 l, const V128 r); // Produces an AND operation of |l| and |r|. V128 V128_And(const V128 l, const V128 r); -// Sets two 64 bit integers to one 128 bit vector. The order is reverse. +// Sets the lower half of a 128 bit register to the given 64-bit value and +// zeroes the upper half. // dst[63:0] := |r| -// dst[127:64] := |l| -V128 V128_From2x64(const uint64_t l, const uint64_t r); +// dst[127:64] := |0| +V128 V128_From64WithZeroFill(const uint64_t r); // Shift |l| right by |imm| bytes while shifting in zeros. template <int imm> @@ -121,8 +123,8 @@ uint64_t V128_Extract64(const V128 l); // Extracts the low 64 bits from V128. int64_t V128_Low64(const V128 l); -// Left-shifts packed 64-bit integers in l by r. -V128 V128_ShiftLeft64(const V128 l, const V128 r); +// Add packed 64-bit integers in |l| and |r|. +V128 V128_Add64(const V128 l, const V128 r); #endif @@ -170,8 +172,8 @@ inline V128 V128_Xor(const V128 l, const V128 r) { return _mm_xor_si128(l, r); } inline V128 V128_And(const V128 l, const V128 r) { return _mm_and_si128(l, r); } -inline V128 V128_From2x64(const uint64_t l, const uint64_t r) { - return _mm_set_epi64x(static_cast<int64_t>(l), static_cast<int64_t>(r)); +inline V128 V128_From64WithZeroFill(const uint64_t r) { + return _mm_set_epi64x(static_cast<int64_t>(0), static_cast<int64_t>(r)); } template <int imm> @@ -191,8 +193,8 @@ inline uint64_t V128_Extract64(const V128 l) { inline int64_t V128_Low64(const V128 l) { return _mm_cvtsi128_si64(l); } -inline V128 V128_ShiftLeft64(const V128 l, const V128 r) { - return _mm_sll_epi64(l, r); +inline V128 V128_Add64(const V128 l, const V128 r) { + return _mm_add_epi64(l, r); } #elif defined(ABSL_CRC_INTERNAL_HAVE_ARM_SIMD) @@ -261,10 +263,12 @@ inline V128 V128_Xor(const V128 l, const V128 r) { return veorq_u64(l, r); } inline V128 V128_And(const V128 l, const V128 r) { return vandq_u64(l, r); } -inline V128 V128_From2x64(const uint64_t l, const uint64_t r) { - return vcombine_u64(vcreate_u64(r), vcreate_u64(l)); +inline V128 V128_From64WithZeroFill(const uint64_t r){ + constexpr uint64x2_t kZero = {0, 0}; + return vsetq_lane_u64(r, kZero, 0); } + template <int imm> inline V128 V128_ShiftRight(const V128 l) { return vreinterpretq_u64_s8( @@ -285,9 +289,7 @@ inline int64_t V128_Low64(const V128 l) { return vgetq_lane_s64(vreinterpretq_s64_u64(l), 0); } -inline V128 V128_ShiftLeft64(const V128 l, const V128 r) { - return vshlq_u64(l, vreinterpretq_s64_u64(r)); -} +inline V128 V128_Add64(const V128 l, const V128 r) { return vaddq_u64(l, r); } #endif diff --git a/absl/crc/internal/crc_cord_state.cc b/absl/crc/internal/crc_cord_state.cc index 28d04dc4..303a5559 100644 --- a/absl/crc/internal/crc_cord_state.cc +++ b/absl/crc/internal/crc_cord_state.cc @@ -17,6 +17,7 @@ #include <cassert> #include "absl/base/config.h" +#include "absl/base/no_destructor.h" #include "absl/numeric/bits.h" namespace absl { @@ -24,14 +25,14 @@ ABSL_NAMESPACE_BEGIN namespace crc_internal { CrcCordState::RefcountedRep* CrcCordState::RefSharedEmptyRep() { - static CrcCordState::RefcountedRep* empty = new CrcCordState::RefcountedRep; + static absl::NoDestructor<CrcCordState::RefcountedRep> empty; assert(empty->count.load(std::memory_order_relaxed) >= 1); assert(empty->rep.removed_prefix.length == 0); assert(empty->rep.prefix_crc.empty()); - Ref(empty); - return empty; + Ref(empty.get()); + return empty.get(); } CrcCordState::CrcCordState() : refcounted_rep_(new RefcountedRep) {} diff --git a/absl/crc/internal/crc_memcpy_fallback.cc b/absl/crc/internal/crc_memcpy_fallback.cc index 07795504..e34249f0 100644 --- a/absl/crc/internal/crc_memcpy_fallback.cc +++ b/absl/crc/internal/crc_memcpy_fallback.cc @@ -12,12 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include <cstdint> +#include <cstring> #include <memory> #include "absl/base/config.h" #include "absl/crc/crc32c.h" #include "absl/crc/internal/crc_memcpy.h" +#include "absl/strings/string_view.h" namespace absl { ABSL_NAMESPACE_BEGIN diff --git a/absl/crc/internal/crc_memcpy_x86_arm_combined.cc b/absl/crc/internal/crc_memcpy_x86_arm_combined.cc index 968e9ae3..38f61e9b 100644 --- a/absl/crc/internal/crc_memcpy_x86_arm_combined.cc +++ b/absl/crc/internal/crc_memcpy_x86_arm_combined.cc @@ -52,6 +52,7 @@ #include <cstring> #include <memory> +#include "absl/base/attributes.h" #include "absl/base/config.h" #include "absl/base/optimization.h" #include "absl/base/prefetch.h" @@ -88,9 +89,11 @@ inline crc32c_t ShortCrcCopy(char* dst, const char* src, std::size_t length, constexpr size_t kIntLoadsPerVec = sizeof(V128) / sizeof(uint64_t); // Common function for copying the tails of multiple large regions. +// Disable ubsan for benign unaligned access. See b/254108538. template <size_t vec_regions, size_t int_regions> -inline void LargeTailCopy(crc32c_t* crcs, char** dst, const char** src, - size_t region_size, size_t copy_rounds) { +ABSL_ATTRIBUTE_NO_SANITIZE_UNDEFINED inline void LargeTailCopy( + crc32c_t* crcs, char** dst, const char** src, size_t region_size, + size_t copy_rounds) { std::array<V128, vec_regions> data; std::array<uint64_t, kIntLoadsPerVec * int_regions> int_data; @@ -127,8 +130,8 @@ inline void LargeTailCopy(crc32c_t* crcs, char** dst, const char** src, size_t data_index = i * kIntLoadsPerVec + j; int_data[data_index] = *(usrc + j); - crcs[region] = crc32c_t{static_cast<uint32_t>(CRC32_u64( - static_cast<uint32_t>(crcs[region]), int_data[data_index]))}; + crcs[region] = crc32c_t{CRC32_u64(static_cast<uint32_t>(crcs[region]), + int_data[data_index])}; *(udst + j) = int_data[data_index]; } @@ -155,8 +158,10 @@ class AcceleratedCrcMemcpyEngine : public CrcMemcpyEngine { std::size_t length, crc32c_t initial_crc) const override; }; +// Disable ubsan for benign unaligned access. See b/254108538. template <size_t vec_regions, size_t int_regions> -crc32c_t AcceleratedCrcMemcpyEngine<vec_regions, int_regions>::Compute( +ABSL_ATTRIBUTE_NO_SANITIZE_UNDEFINED crc32c_t +AcceleratedCrcMemcpyEngine<vec_regions, int_regions>::Compute( void* __restrict dst, const void* __restrict src, std::size_t length, crc32c_t initial_crc) const { constexpr std::size_t kRegions = vec_regions + int_regions; @@ -196,7 +201,6 @@ crc32c_t AcceleratedCrcMemcpyEngine<vec_regions, int_regions>::Compute( // Start work on the CRC: undo the XOR from the previous calculation or set up // the initial value of the CRC. - // initial_crc ^= kCrcDataXor; initial_crc = crc32c_t{static_cast<uint32_t>(initial_crc) ^ kCrcDataXor}; // Do an initial alignment copy, so we can use aligned store instructions to @@ -295,8 +299,8 @@ crc32c_t AcceleratedCrcMemcpyEngine<vec_regions, int_regions>::Compute( // Load and CRC the data. int_data[data_index] = *(usrc + i * kIntLoadsPerVec + k); - crcs[region] = crc32c_t{static_cast<uint32_t>(CRC32_u64( - static_cast<uint32_t>(crcs[region]), int_data[data_index]))}; + crcs[region] = crc32c_t{CRC32_u64(static_cast<uint32_t>(crcs[region]), + int_data[data_index])}; // Store the data. *(udst + i * kIntLoadsPerVec + k) = int_data[data_index]; diff --git a/absl/crc/internal/crc_non_temporal_memcpy.cc b/absl/crc/internal/crc_non_temporal_memcpy.cc index adc867f6..a56f1eb0 100644 --- a/absl/crc/internal/crc_non_temporal_memcpy.cc +++ b/absl/crc/internal/crc_non_temporal_memcpy.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include <cstdint> +#include <cstddef> #include "absl/base/config.h" #include "absl/crc/crc32c.h" diff --git a/absl/crc/internal/crc_x86_arm_combined.cc b/absl/crc/internal/crc_x86_arm_combined.cc index 51eff4ed..79dace34 100644 --- a/absl/crc/internal/crc_x86_arm_combined.cc +++ b/absl/crc/internal/crc_x86_arm_combined.cc @@ -101,13 +101,17 @@ constexpr size_t kMediumCutoff = 2048; namespace { uint32_t multiply(uint32_t a, uint32_t b) { - V128 shifts = V128_From2x64(0, 1); - V128 power = V128_From2x64(0, a); - V128 crc = V128_From2x64(0, b); + V128 power = V128_From64WithZeroFill(a); + V128 crc = V128_From64WithZeroFill(b); V128 res = V128_PMulLow(power, crc); - // Combine crc values - res = V128_ShiftLeft64(res, shifts); + // Combine crc values. + // + // Adding res to itself is equivalent to multiplying by 2, + // or shifting left by 1. Addition is used as not all compilers + // are able to generate optimal code without this hint. + // https://godbolt.org/z/rr3fMnf39 + res = V128_Add64(res, res); return static_cast<uint32_t>(V128_Extract32<1>(res)) ^ CRC32_u32(0, static_cast<uint32_t>(V128_Low64(res))); } @@ -444,11 +448,11 @@ class CRC32AcceleratedX86ARMCombinedMultipleStreams V128 magic = *(reinterpret_cast<const V128*>(kClmulConstants) + bs - 1); - V128 tmp = V128_From2x64(0, l64); + V128 tmp = V128_From64WithZeroFill(l64); V128 res1 = V128_PMulLow(tmp, magic); - tmp = V128_From2x64(0, l641); + tmp = V128_From64WithZeroFill(l641); V128 res2 = V128_PMul10(tmp, magic); V128 x = V128_Xor(res1, res2); diff --git a/absl/crc/internal/non_temporal_memcpy.h b/absl/crc/internal/non_temporal_memcpy.h index b3d94bad..7ae83bdc 100644 --- a/absl/crc/internal/non_temporal_memcpy.h +++ b/absl/crc/internal/non_temporal_memcpy.h @@ -19,19 +19,8 @@ #include <intrin.h> #endif -#ifdef __SSE__ -#include <xmmintrin.h> -#endif - -#ifdef __SSE2__ -#include <emmintrin.h> -#endif - -#ifdef __SSE3__ -#include <pmmintrin.h> -#endif - -#ifdef __AVX__ +#if defined(__SSE__) || defined(__AVX__) +// Pulls in both SSE and AVX intrinsics. #include <immintrin.h> #endif @@ -44,6 +33,7 @@ #include <cstdint> #include <cstring> +#include "absl/base/attributes.h" #include "absl/base/config.h" #include "absl/base/optimization.h" @@ -57,7 +47,9 @@ namespace crc_internal { // memcpy can save 1 DRAM load of the destination cacheline. constexpr size_t kCacheLineSize = ABSL_CACHELINE_SIZE; -// If the objects overlap, the behavior is undefined. +// If the objects overlap, the behavior is undefined. Uses regular memcpy +// instead of non-temporal memcpy if the required CPU intrinsics are unavailable +// at compile time. inline void *non_temporal_store_memcpy(void *__restrict dst, const void *__restrict src, size_t len) { #if defined(__SSE3__) || defined(__aarch64__) || \ @@ -119,10 +111,20 @@ inline void *non_temporal_store_memcpy(void *__restrict dst, #endif // __SSE3__ || __aarch64__ || (_MSC_VER && __AVX__) } +// If the objects overlap, the behavior is undefined. Uses regular memcpy +// instead of non-temporal memcpy if the required CPU intrinsics are unavailable +// at compile time. +#if ABSL_HAVE_CPP_ATTRIBUTE(gnu::target) && \ + (defined(__x86_64__) || defined(__i386__)) +[[gnu::target("avx")]] +#endif inline void *non_temporal_store_memcpy_avx(void *__restrict dst, const void *__restrict src, size_t len) { -#ifdef __AVX__ + // This function requires AVX. For clang and gcc we compile it with AVX even + // if the translation unit isn't built with AVX support. This works because we + // only select this implementation at runtime if the CPU supports AVX. +#if defined(__SSE3__) || (defined(_MSC_VER) && defined(__AVX__)) uint8_t *d = reinterpret_cast<uint8_t *>(dst); const uint8_t *s = reinterpret_cast<const uint8_t *>(src); @@ -168,9 +170,8 @@ inline void *non_temporal_store_memcpy_avx(void *__restrict dst, } return dst; #else - // Fallback to regular memcpy when AVX is not available. return memcpy(dst, src, len); -#endif // __AVX__ +#endif // __SSE3__ || (_MSC_VER && __AVX__) } } // namespace crc_internal |