From 1687dbf814eceb93de2d93f91b31acaab404091c Mon Sep 17 00:00:00 2001 From: Derek Mauro Date: Wed, 9 Nov 2022 13:08:29 -0800 Subject: Release the CRC library This implementation can advantage of hardware acceleration available on common CPUs when using GCC and Clang. A future update may enable this on MSVC as well. PiperOrigin-RevId: 487327024 Change-Id: I99a8f1bcbdf25297e776537e23bd0a902e0818a1 --- absl/crc/internal/non_temporal_memcpy_test.cc | 88 +++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 absl/crc/internal/non_temporal_memcpy_test.cc (limited to 'absl/crc/internal/non_temporal_memcpy_test.cc') diff --git a/absl/crc/internal/non_temporal_memcpy_test.cc b/absl/crc/internal/non_temporal_memcpy_test.cc new file mode 100644 index 00000000..f7a1c3db --- /dev/null +++ b/absl/crc/internal/non_temporal_memcpy_test.cc @@ -0,0 +1,88 @@ +// Copyright 2022 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/crc/internal/non_temporal_memcpy.h" + +#include +#include +#include +#include + +#include "gtest/gtest.h" + +namespace { + +struct TestParam { + size_t copy_size; + uint32_t src_offset; + uint32_t dst_offset; +}; + +class NonTemporalMemcpyTest : public testing::TestWithParam { + protected: + void SetUp() override { + // Make buf_size multiple of 16 bytes. + size_t buf_size = ((std::max(GetParam().src_offset, GetParam().dst_offset) + + GetParam().copy_size) + + 15) / + 16 * 16; + a_.resize(buf_size); + b_.resize(buf_size); + for (size_t i = 0; i < buf_size; i++) { + a_[i] = i % 256; + b_[i] = ~a_[i]; + } + } + + std::vector a_, b_; +}; + +TEST_P(NonTemporalMemcpyTest, SSEEquality) { + uint8_t *src = a_.data() + GetParam().src_offset; + uint8_t *dst = b_.data() + GetParam().dst_offset; + absl::crc_internal::non_temporal_store_memcpy(dst, src, GetParam().copy_size); + for (size_t i = 0; i < GetParam().copy_size; i++) { + EXPECT_EQ(src[i], dst[i]); + } +} + +TEST_P(NonTemporalMemcpyTest, AVXEquality) { + uint8_t* src = a_.data() + GetParam().src_offset; + uint8_t* dst = b_.data() + GetParam().dst_offset; + + absl::crc_internal::non_temporal_store_memcpy_avx(dst, src, + GetParam().copy_size); + for (size_t i = 0; i < GetParam().copy_size; i++) { + EXPECT_EQ(src[i], dst[i]); + } +} + +// 63B is smaller than one cacheline operation thus the non-temporal routine +// will not be called. +// 4352B is sufficient for testing 4092B data copy with room for offsets. +constexpr TestParam params[] = { + {63, 0, 0}, {58, 5, 5}, {61, 2, 0}, {61, 0, 2}, + {58, 5, 2}, {4096, 0, 0}, {4096, 0, 1}, {4096, 0, 2}, + {4096, 0, 3}, {4096, 0, 4}, {4096, 0, 5}, {4096, 0, 6}, + {4096, 0, 7}, {4096, 0, 8}, {4096, 0, 9}, {4096, 0, 10}, + {4096, 0, 11}, {4096, 0, 12}, {4096, 0, 13}, {4096, 0, 14}, + {4096, 0, 15}, {4096, 7, 7}, {4096, 3, 0}, {4096, 1, 0}, + {4096, 9, 3}, {4096, 9, 11}, {8192, 0, 0}, {8192, 5, 2}, + {1024768, 7, 11}, {1, 0, 0}, {1, 0, 1}, {1, 1, 0}, + {1, 1, 1}}; + +INSTANTIATE_TEST_SUITE_P(ParameterizedNonTemporalMemcpyTest, + NonTemporalMemcpyTest, testing::ValuesIn(params)); + +} // namespace -- cgit v1.2.3