diff options
-rw-r--r-- | absl/crc/internal/crc_memcpy_test.cc | 7 | ||||
-rw-r--r-- | absl/crc/internal/crc_memcpy_x86_64.cc | 28 |
2 files changed, 25 insertions, 10 deletions
diff --git a/absl/crc/internal/crc_memcpy_test.cc b/absl/crc/internal/crc_memcpy_test.cc index bbdcd205..a4c6a012 100644 --- a/absl/crc/internal/crc_memcpy_test.cc +++ b/absl/crc/internal/crc_memcpy_test.cc @@ -107,6 +107,9 @@ TEST_P(x86ParamTest, SmallCorrectnessCheckSourceAlignment) { *(base_data + i) = static_cast<char>(absl::Uniform<unsigned char>(gen_)); } + SCOPED_TRACE(absl::StrCat("engine=<", GetParam().vector_lanes, ",", + GetParam().integer_lanes, ">, ", "size=", size, + ", source_alignment=", source_alignment)); absl::crc32c_t initial_crc = absl::crc32c_t{absl::Uniform<uint32_t>(gen_)}; absl::crc32c_t experiment_crc = @@ -138,6 +141,9 @@ TEST_P(x86ParamTest, SmallCorrectnessCheckDestAlignment) { *(base_data + i) = static_cast<char>(absl::Uniform<unsigned char>(gen_)); } + SCOPED_TRACE(absl::StrCat("engine=<", GetParam().vector_lanes, ",", + GetParam().integer_lanes, ">, ", "size=", size, + ", destination_alignment=", dest_alignment)); absl::crc32c_t initial_crc = absl::crc32c_t{absl::Uniform<uint32_t>(gen_)}; absl::crc32c_t experiment_crc = @@ -161,6 +167,7 @@ INSTANTIATE_TEST_SUITE_P(x86ParamTest, x86ParamTest, ::testing::Values( // Tests for configurations that may occur in prod. TestParams{X86, 3, 0}, TestParams{X86, 1, 2}, + TestParams{X86, 1, 0}, // Fallback test. TestParams{FALLBACK, 0, 0}, // Non Temporal diff --git a/absl/crc/internal/crc_memcpy_x86_64.cc b/absl/crc/internal/crc_memcpy_x86_64.cc index c984cf9a..c4ccd472 100644 --- a/absl/crc/internal/crc_memcpy_x86_64.cc +++ b/absl/crc/internal/crc_memcpy_x86_64.cc @@ -159,6 +159,7 @@ crc32c_t AcceleratedCrcMemcpyEngine<vec_regions, int_regions>::Compute( void* __restrict dst, const void* __restrict src, std::size_t length, crc32c_t initial_crc) const { constexpr std::size_t kRegions = vec_regions + int_regions; + static_assert(kRegions > 0, "Must specify at least one region."); constexpr uint32_t kCrcDataXor = uint32_t{0xffffffff}; constexpr std::size_t kBlockSize = sizeof(__m128i); constexpr std::size_t kCopyRoundSize = kRegions * kBlockSize; @@ -314,6 +315,21 @@ crc32c_t AcceleratedCrcMemcpyEngine<vec_regions, int_regions>::Compute( src_bytes += region_size * (kRegions - 1); dst_bytes += region_size * (kRegions - 1); + // Copy and CRC the tail through the XMM registers. + std::size_t tail_blocks = tail_size / kBlockSize; + LargeTailCopy<0, 1>(&crcs[kRegions - 1], &dst_bytes, &src_bytes, 0, + tail_blocks); + + // Final tail copy for under 16 bytes. + crcs[kRegions - 1] = + ShortCrcCopy(dst_bytes, src_bytes, tail_size - tail_blocks * kBlockSize, + crcs[kRegions - 1]); + + if (kRegions == 1) { + // If there is only one region, finalize and return its CRC. + return crc32c_t{static_cast<uint32_t>(crcs[0]) ^ kCrcDataXor}; + } + // Finalize the first CRCs: XOR the internal CRCs by the XOR mask to undo the // XOR done before doing block copy + CRCs. for (size_t i = 0; i + 1 < kRegions; i++) { @@ -326,16 +342,6 @@ crc32c_t AcceleratedCrcMemcpyEngine<vec_regions, int_regions>::Compute( full_crc = ConcatCrc32c(full_crc, crcs[i], region_size); } - // Copy and CRC the tail through the XMM registers. - std::size_t tail_blocks = tail_size / kBlockSize; - LargeTailCopy<0, 1>(&crcs[kRegions - 1], &dst_bytes, &src_bytes, 0, - tail_blocks); - - // Final tail copy for under 16 bytes. - crcs[kRegions - 1] = - ShortCrcCopy(dst_bytes, src_bytes, tail_size - tail_blocks * kBlockSize, - crcs[kRegions - 1]); - // Finalize and concatenate the final CRC, then return. crcs[kRegions - 1] = crc32c_t{static_cast<uint32_t>(crcs[kRegions - 1]) ^ kCrcDataXor}; @@ -427,6 +433,8 @@ std::unique_ptr<CrcMemcpyEngine> CrcMemcpy::GetTestEngine(int vector, return std::make_unique<AcceleratedCrcMemcpyEngine<3, 0>>(); } else if (vector == 1 && integer == 2) { return std::make_unique<AcceleratedCrcMemcpyEngine<1, 2>>(); + } else if (vector == 1 && integer == 0) { + return std::make_unique<AcceleratedCrcMemcpyEngine<1, 0>>(); } return nullptr; } |