summaryrefslogtreecommitdiff
path: root/absl/crc
diff options
context:
space:
mode:
authorGravatar Derek Mauro <dmauro@google.com>2022-12-11 16:43:28 -0800
committerGravatar Copybara-Service <copybara-worker@google.com>2022-12-11 16:44:18 -0800
commitff5644bb34333d2ad7f1abf421d57bda155398e7 (patch)
treee2fc6335996cb5d96d871e67c0411c8c104233d4 /absl/crc
parent0b8e676c1b83b157786b4766362fd647b5c59e0d (diff)
Allow Cord to store chunked checksums
PiperOrigin-RevId: 494587777 Change-Id: I41504edca6fcf750d52602fa84a33bc7fe5fbb48
Diffstat (limited to 'absl/crc')
-rw-r--r--absl/crc/BUILD.bazel28
-rw-r--r--absl/crc/CMakeLists.txt28
-rw-r--r--absl/crc/internal/crc32_x86_arm_combined_simd.h5
-rw-r--r--absl/crc/internal/crc_cord_state.cc130
-rw-r--r--absl/crc/internal/crc_cord_state.h159
-rw-r--r--absl/crc/internal/crc_cord_state_test.cc124
-rw-r--r--absl/crc/internal/crc_x86_arm_combined.cc9
7 files changed, 474 insertions, 9 deletions
diff --git a/absl/crc/BUILD.bazel b/absl/crc/BUILD.bazel
index bceb7258..29374560 100644
--- a/absl/crc/BUILD.bazel
+++ b/absl/crc/BUILD.bazel
@@ -163,6 +163,34 @@ cc_test(
],
)
+cc_library(
+ name = "crc_cord_state",
+ srcs = ["internal/crc_cord_state.cc"],
+ hdrs = ["internal/crc_cord_state.h"],
+ copts = ABSL_DEFAULT_COPTS,
+ linkopts = ABSL_DEFAULT_LINKOPTS,
+ visibility = ["//absl/strings:__pkg__"],
+ deps = [
+ ":crc32c",
+ "//absl/base:config",
+ "//absl/numeric:bits",
+ "//absl/strings",
+ ],
+)
+
+cc_test(
+ name = "crc_cord_state_test",
+ srcs = ["internal/crc_cord_state_test.cc"],
+ copts = ABSL_TEST_COPTS,
+ linkopts = ABSL_DEFAULT_LINKOPTS,
+ visibility = ["//visibility:private"],
+ deps = [
+ ":crc32c",
+ ":crc_cord_state",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
cc_binary(
name = "crc32c_benchmark",
testonly = 1,
diff --git a/absl/crc/CMakeLists.txt b/absl/crc/CMakeLists.txt
index e1093c9f..72ea2094 100644
--- a/absl/crc/CMakeLists.txt
+++ b/absl/crc/CMakeLists.txt
@@ -146,3 +146,31 @@ absl_cc_test(
absl::non_temporal_memcpy
GTest::gtest_main
)
+
+absl_cc_library(
+ NAME
+ crc_cord_state
+ HDRS
+ "internal/crc_cord_state.h"
+ SRCS
+ "internal/crc_cord_state.cc"
+ COPTS
+ ${ABSL_DEFAULT_COPTS}
+ DEPS
+ absl::crc32c
+ absl::config
+ absl::strings
+)
+
+absl_cc_test(
+ NAME
+ crc_cord_state_test
+ SRCS
+ "internal/crc_cord_state_test.cc"
+ COPTS
+ ${ABSL_DEFAULT_COPTS}
+ DEPS
+ absl::crc_cord_state
+ absl::crc32c
+ GTest::gtest_main
+)
diff --git a/absl/crc/internal/crc32_x86_arm_combined_simd.h b/absl/crc/internal/crc32_x86_arm_combined_simd.h
index f6c2a21c..f23cd75e 100644
--- a/absl/crc/internal/crc32_x86_arm_combined_simd.h
+++ b/absl/crc/internal/crc32_x86_arm_combined_simd.h
@@ -38,7 +38,8 @@
#define ABSL_CRC_INTERNAL_HAVE_X86_SIMD
#elif defined(__aarch64__) && defined(__LITTLE_ENDIAN__) && \
- defined(__ARM_FEATURE_CRC32) && defined(__ARM_NEON)
+ defined(__ARM_FEATURE_CRC32) && defined(__ARM_NEON) && \
+ defined(__ARM_FEATURE_CRYPTO)
#include <arm_acle.h>
#include <arm_neon.h>
@@ -254,7 +255,7 @@ inline int64_t V128_Low64(const V128 l) {
}
inline V128 V128_ShiftLeft64(const V128 l, const V128 r) {
- return vshlq_u64(l, r);
+ return vshlq_u64(l, vreinterpretq_s64_u64(r));
}
#endif
diff --git a/absl/crc/internal/crc_cord_state.cc b/absl/crc/internal/crc_cord_state.cc
new file mode 100644
index 00000000..d0be0ddd
--- /dev/null
+++ b/absl/crc/internal/crc_cord_state.cc
@@ -0,0 +1,130 @@
+// Copyright 2022 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/crc/internal/crc_cord_state.h"
+
+#include <cassert>
+
+#include "absl/base/config.h"
+#include "absl/numeric/bits.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace crc_internal {
+
+CrcCordState::RefcountedRep* CrcCordState::RefSharedEmptyRep() {
+ static CrcCordState::RefcountedRep* empty = new CrcCordState::RefcountedRep;
+
+ assert(empty->count.load(std::memory_order_relaxed) >= 1);
+ assert(empty->rep.removed_prefix.length == 0);
+ assert(empty->rep.prefix_crc.empty());
+
+ Ref(empty);
+ return empty;
+}
+
+CrcCordState::CrcCordState() : refcounted_rep_(new RefcountedRep) {}
+
+CrcCordState::CrcCordState(const CrcCordState& other)
+ : refcounted_rep_(other.refcounted_rep_) {
+ Ref(refcounted_rep_);
+}
+
+CrcCordState::CrcCordState(CrcCordState&& other)
+ : refcounted_rep_(other.refcounted_rep_) {
+ // Make `other` valid for use after move.
+ other.refcounted_rep_ = RefSharedEmptyRep();
+}
+
+CrcCordState& CrcCordState::operator=(const CrcCordState& other) {
+ if (this != &other) {
+ Unref(refcounted_rep_);
+ refcounted_rep_ = other.refcounted_rep_;
+ Ref(refcounted_rep_);
+ }
+ return *this;
+}
+
+CrcCordState& CrcCordState::operator=(CrcCordState&& other) {
+ if (this != &other) {
+ Unref(refcounted_rep_);
+ refcounted_rep_ = other.refcounted_rep_;
+ // Make `other` valid for use after move.
+ other.refcounted_rep_ = RefSharedEmptyRep();
+ }
+ return *this;
+}
+
+CrcCordState::~CrcCordState() {
+ Unref(refcounted_rep_);
+}
+
+crc32c_t CrcCordState::Checksum() const {
+ if (rep().prefix_crc.empty()) {
+ return absl::crc32c_t{0};
+ }
+ if (IsNormalized()) {
+ return rep().prefix_crc.back().crc;
+ }
+ return absl::RemoveCrc32cPrefix(
+ rep().removed_prefix.crc, rep().prefix_crc.back().crc,
+ rep().prefix_crc.back().length - rep().removed_prefix.length);
+}
+
+CrcCordState::PrefixCrc CrcCordState::NormalizedPrefixCrcAtNthChunk(
+ size_t n) const {
+ assert(n < NumChunks());
+ if (IsNormalized()) {
+ return rep().prefix_crc[n];
+ }
+ size_t length = rep().prefix_crc[n].length - rep().removed_prefix.length;
+ return PrefixCrc(length,
+ absl::RemoveCrc32cPrefix(rep().removed_prefix.crc,
+ rep().prefix_crc[n].crc, length));
+}
+
+void CrcCordState::Normalize() {
+ if (IsNormalized() || rep().prefix_crc.empty()) {
+ return;
+ }
+
+ Rep* r = mutable_rep();
+ for (auto& prefix_crc : r->prefix_crc) {
+ size_t remaining = prefix_crc.length - r->removed_prefix.length;
+ prefix_crc.crc = absl::RemoveCrc32cPrefix(r->removed_prefix.crc,
+ prefix_crc.crc, remaining);
+ prefix_crc.length = remaining;
+ }
+ r->removed_prefix = PrefixCrc();
+}
+
+void CrcCordState::Poison() {
+ Rep* rep = mutable_rep();
+ if (NumChunks() > 0) {
+ for (auto& prefix_crc : rep->prefix_crc) {
+ // This is basically CRC32::Scramble().
+ uint32_t crc = static_cast<uint32_t>(prefix_crc.crc);
+ crc += 0x2e76e41b;
+ crc = absl::rotr(crc, 17);
+ prefix_crc.crc = crc32c_t{crc};
+ }
+ } else {
+ // Add a fake corrupt chunk.
+ rep->prefix_crc.push_back(PrefixCrc(0, crc32c_t{1}));
+ }
+}
+
+} // namespace crc_internal
+ABSL_NAMESPACE_END
+} // namespace absl
diff --git a/absl/crc/internal/crc_cord_state.h b/absl/crc/internal/crc_cord_state.h
new file mode 100644
index 00000000..d305424c
--- /dev/null
+++ b/absl/crc/internal/crc_cord_state.h
@@ -0,0 +1,159 @@
+// Copyright 2022 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ABSL_CRC_INTERNAL_CRC_CORD_STATE_H_
+#define ABSL_CRC_INTERNAL_CRC_CORD_STATE_H_
+
+#include <atomic>
+#include <cstddef>
+#include <deque>
+
+#include "absl/base/config.h"
+#include "absl/crc/crc32c.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace crc_internal {
+
+// CrcCordState is a copy-on-write class that holds the chunked CRC32C data
+// that allows CrcCord to perform efficient substring operations. CrcCordState
+// is used as a member variable in CrcCord. When a CrcCord is converted to a
+// Cord, the CrcCordState is shallow-copied into the root node of the Cord. If
+// the converted Cord is modified outside of CrcCord, the CrcCordState is
+// discarded from the Cord. If the Cord is converted back to a CrcCord, and the
+// Cord is still carrying the CrcCordState in its root node, the CrcCord can
+// re-use the CrcCordState, making the construction of the CrcCord cheap.
+//
+// CrcCordState does not try to encapsulate the CRC32C state (CrcCord requires
+// knowledge of how CrcCordState represents the CRC32C state). It does
+// encapsulate the copy-on-write nature of the state.
+class CrcCordState {
+ public:
+ // Constructors.
+ CrcCordState();
+ CrcCordState(const CrcCordState&);
+ CrcCordState(CrcCordState&&);
+
+ // Destructor. Atomically unreferences the data.
+ ~CrcCordState();
+
+ // Copy and move operators.
+ CrcCordState& operator=(const CrcCordState&);
+ CrcCordState& operator=(CrcCordState&&);
+
+ // A (length, crc) pair.
+ struct PrefixCrc {
+ PrefixCrc() = default;
+ PrefixCrc(size_t length_arg, absl::crc32c_t crc_arg)
+ : length(length_arg), crc(crc_arg) {}
+
+ size_t length = 0;
+
+ // TODO(absl-team): Memory stomping often zeros out memory. If this struct
+ // gets overwritten, we could end up with {0, 0}, which is the correct CRC
+ // for a string of length 0. Consider storing a scrambled value and
+ // unscrambling it before verifying it.
+ absl::crc32c_t crc = absl::crc32c_t{0};
+ };
+
+ // The representation of the chunked CRC32C data.
+ struct Rep {
+ // `removed_prefix` is the crc and length of any prefix that has been
+ // removed from the Cord (for example, by calling
+ // `CrcCord::RemovePrefix()`). To get the checkum of any prefix of the cord,
+ // this value must be subtracted from `prefix_crc`. See `Checksum()` for an
+ // example.
+ //
+ // CrcCordState is said to be "normalized" if removed_prefix.length == 0.
+ PrefixCrc removed_prefix;
+
+ // A deque of (length, crc) pairs, representing length and crc of a prefix
+ // of the Cord, before removed_prefix has been subtracted. The lengths of
+ // the prefixes are stored in increasing order. If the Cord is not empty,
+ // the last value in deque is the contains the CRC32C of the entire Cord
+ // when removed_prefix is subtracted from it.
+ std::deque<PrefixCrc> prefix_crc;
+ };
+
+ // Returns a reference to the representation of the chunked CRC32C data.
+ const Rep& rep() const { return refcounted_rep_->rep; }
+
+ // Returns a mutable reference to the representation of the chunked CRC32C
+ // data. Calling this function will copy the data if another instance also
+ // holds a reference to the data, so it is important to call rep() instead if
+ // the data may not be mutated.
+ Rep* mutable_rep() {
+ if (refcounted_rep_->count.load(std::memory_order_acquire) != 1) {
+ RefcountedRep* copy = new RefcountedRep;
+ copy->rep = refcounted_rep_->rep;
+ Unref(refcounted_rep_);
+ refcounted_rep_ = copy;
+ }
+ return &refcounted_rep_->rep;
+ }
+
+ // Returns the CRC32C of the entire Cord.
+ absl::crc32c_t Checksum() const;
+
+ // Returns true if the chunked CRC32C cached is normalized.
+ bool IsNormalized() const { return rep().removed_prefix.length == 0; }
+
+ // Normalizes the chunked CRC32C checksum cache by substracting any removed
+ // prefix from the chunks.
+ void Normalize();
+
+ // Returns the number of cached chunks.
+ size_t NumChunks() const { return rep().prefix_crc.size(); }
+
+ // Helper that returns the (length, crc) of the `n`-th cached chunked.
+ PrefixCrc NormalizedPrefixCrcAtNthChunk(size_t n) const;
+
+ // Poisons all chunks to so that Checksum() will likely be incorrect with high
+ // probability.
+ void Poison();
+
+ private:
+ struct RefcountedRep {
+ std::atomic<int32_t> count{1};
+ Rep rep;
+ };
+
+ // Adds a reference to the shared global empty `RefcountedRep`, and returns a
+ // pointer to the `RefcountedRep`. This is an optimization to avoid unneeded
+ // allocations when the allocation is unlikely to ever be used. The returned
+ // pointer can be `Unref()`ed when it is no longer needed. Since the returned
+ // instance will always have a reference counter greater than 1, attempts to
+ // modify it (by calling `mutable_rep()`) will create a new unshared copy.
+ static RefcountedRep* RefSharedEmptyRep();
+
+ static void Ref(RefcountedRep* r) {
+ assert(r != nullptr);
+ r->count.fetch_add(1, std::memory_order_relaxed);
+ }
+
+ static void Unref(RefcountedRep* r) {
+ assert(r != nullptr);
+ if (r->count.fetch_sub(1, std::memory_order_acq_rel) == 1) {
+ delete r;
+ }
+ }
+
+ RefcountedRep* refcounted_rep_;
+};
+
+} // namespace crc_internal
+ABSL_NAMESPACE_END
+} // namespace absl
+
+#endif // ABSL_CRC_INTERNAL_CRC_CORD_STATE_H_
diff --git a/absl/crc/internal/crc_cord_state_test.cc b/absl/crc/internal/crc_cord_state_test.cc
new file mode 100644
index 00000000..e2c8e3cd
--- /dev/null
+++ b/absl/crc/internal/crc_cord_state_test.cc
@@ -0,0 +1,124 @@
+// Copyright 2022 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/crc/internal/crc_cord_state.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <string>
+#include <utility>
+
+#include "gtest/gtest.h"
+#include "absl/crc/crc32c.h"
+
+namespace {
+
+TEST(CrcCordState, Default) {
+ absl::crc_internal::CrcCordState state;
+ EXPECT_TRUE(state.IsNormalized());
+ EXPECT_EQ(state.Checksum(), absl::crc32c_t{0});
+ state.Normalize();
+ EXPECT_EQ(state.Checksum(), absl::crc32c_t{0});
+}
+
+TEST(CrcCordState, Normalize) {
+ absl::crc_internal::CrcCordState state;
+ auto* rep = state.mutable_rep();
+ rep->prefix_crc.push_back(
+ absl::crc_internal::CrcCordState::PrefixCrc(1000, absl::crc32c_t{1000}));
+ rep->prefix_crc.push_back(
+ absl::crc_internal::CrcCordState::PrefixCrc(2000, absl::crc32c_t{2000}));
+ rep->removed_prefix =
+ absl::crc_internal::CrcCordState::PrefixCrc(500, absl::crc32c_t{500});
+
+ // The removed_prefix means state is not normalized.
+ EXPECT_FALSE(state.IsNormalized());
+
+ absl::crc32c_t crc = state.Checksum();
+ state.Normalize();
+ EXPECT_TRUE(state.IsNormalized());
+
+ // The checksum should not change as a result of calling Normalize().
+ EXPECT_EQ(state.Checksum(), crc);
+ EXPECT_EQ(rep->removed_prefix.length, 0);
+}
+
+TEST(CrcCordState, Copy) {
+ absl::crc_internal::CrcCordState state;
+ auto* rep = state.mutable_rep();
+ rep->prefix_crc.push_back(
+ absl::crc_internal::CrcCordState::PrefixCrc(1000, absl::crc32c_t{1000}));
+
+ absl::crc_internal::CrcCordState copy = state;
+
+ EXPECT_EQ(state.Checksum(), absl::crc32c_t{1000});
+ EXPECT_EQ(copy.Checksum(), absl::crc32c_t{1000});
+}
+
+TEST(CrcCordState, UnsharedSelfCopy) {
+ absl::crc_internal::CrcCordState state;
+ auto* rep = state.mutable_rep();
+ rep->prefix_crc.push_back(
+ absl::crc_internal::CrcCordState::PrefixCrc(1000, absl::crc32c_t{1000}));
+
+ const absl::crc_internal::CrcCordState& ref = state;
+ state = ref;
+
+ EXPECT_EQ(state.Checksum(), absl::crc32c_t{1000});
+}
+
+TEST(CrcCordState, Move) {
+ absl::crc_internal::CrcCordState state;
+ auto* rep = state.mutable_rep();
+ rep->prefix_crc.push_back(
+ absl::crc_internal::CrcCordState::PrefixCrc(1000, absl::crc32c_t{1000}));
+
+ absl::crc_internal::CrcCordState moved = std::move(state);
+ EXPECT_EQ(moved.Checksum(), absl::crc32c_t{1000});
+}
+
+TEST(CrcCordState, UnsharedSelfMove) {
+ absl::crc_internal::CrcCordState state;
+ auto* rep = state.mutable_rep();
+ rep->prefix_crc.push_back(
+ absl::crc_internal::CrcCordState::PrefixCrc(1000, absl::crc32c_t{1000}));
+
+ absl::crc_internal::CrcCordState& ref = state;
+ state = std::move(ref);
+
+ EXPECT_EQ(state.Checksum(), absl::crc32c_t{1000});
+}
+
+TEST(CrcCordState, PoisonDefault) {
+ absl::crc_internal::CrcCordState state;
+ state.Poison();
+ EXPECT_NE(state.Checksum(), absl::crc32c_t{0});
+}
+
+TEST(CrcCordState, PoisonData) {
+ absl::crc_internal::CrcCordState state;
+ auto* rep = state.mutable_rep();
+ rep->prefix_crc.push_back(
+ absl::crc_internal::CrcCordState::PrefixCrc(1000, absl::crc32c_t{1000}));
+ rep->prefix_crc.push_back(
+ absl::crc_internal::CrcCordState::PrefixCrc(2000, absl::crc32c_t{2000}));
+ rep->removed_prefix =
+ absl::crc_internal::CrcCordState::PrefixCrc(500, absl::crc32c_t{500});
+
+ absl::crc32c_t crc = state.Checksum();
+ state.Poison();
+ EXPECT_NE(state.Checksum(), crc);
+}
+
+} // namespace
diff --git a/absl/crc/internal/crc_x86_arm_combined.cc b/absl/crc/internal/crc_x86_arm_combined.cc
index 2112f609..f6e6aacb 100644
--- a/absl/crc/internal/crc_x86_arm_combined.cc
+++ b/absl/crc/internal/crc_x86_arm_combined.cc
@@ -29,13 +29,8 @@
#include "absl/memory/memory.h"
#include "absl/numeric/bits.h"
-#if defined(__aarch64__) && defined(__LITTLE_ENDIAN__) && \
- defined(__ARM_FEATURE_CRC32) && defined(__ARM_NEON)
-#define ABSL_INTERNAL_CAN_USE_SIMD_CRC32C
-#elif defined(__SSE4_2__) && defined(__PCLMUL__)
-#define ABSL_INTERNAL_CAN_USE_SIMD_CRC32C
-#elif defined(_MSC_VER) && defined(__AVX__)
-// MSVC AVX support (/arch:AVX) implies SSE 4.2 and PCLMUL support.
+#if defined(ABSL_CRC_INTERNAL_HAVE_ARM_SIMD) || \
+ defined(ABSL_CRC_INTERNAL_HAVE_X86_SIMD)
#define ABSL_INTERNAL_CAN_USE_SIMD_CRC32C
#endif