diff options
22 files changed, 1123 insertions, 280 deletions
diff --git a/CMake/AbseilDll.cmake b/CMake/AbseilDll.cmake index 77c6f9e9..7c827251 100644 --- a/CMake/AbseilDll.cmake +++ b/CMake/AbseilDll.cmake @@ -210,6 +210,7 @@ set(ABSL_INTERNAL_DLL_FILES "strings/internal/cord_rep_btree_navigator.h" "strings/internal/cord_rep_btree_reader.cc" "strings/internal/cord_rep_btree_reader.h" + "strings/internal/cord_rep_concat.cc" "strings/internal/cord_rep_crc.cc" "strings/internal/cord_rep_crc.h" "strings/internal/cord_rep_consume.h" diff --git a/absl/container/inlined_vector.h b/absl/container/inlined_vector.h index 318c4679..5597d43e 100644 --- a/absl/container/inlined_vector.h +++ b/absl/container/inlined_vector.h @@ -36,7 +36,6 @@ #define ABSL_CONTAINER_INLINED_VECTOR_H_ #include <algorithm> -#include <cassert> #include <cstddef> #include <cstdlib> #include <cstring> diff --git a/absl/container/internal/inlined_vector.h b/absl/container/internal/inlined_vector.h index 26caa49a..98c26afc 100644 --- a/absl/container/internal/inlined_vector.h +++ b/absl/container/internal/inlined_vector.h @@ -430,7 +430,7 @@ class Storage { } void SubtractSize(SizeType<A> count) { - assert(count <= GetSize()); + ABSL_HARDENING_ASSERT(count <= GetSize()); GetSizeAndIsAllocated() -= count << static_cast<SizeType<A>>(1); } @@ -441,7 +441,8 @@ class Storage { } void MemcpyFrom(const Storage& other_storage) { - assert(IsMemcpyOk<A>::value || other_storage.GetIsAllocated()); + ABSL_HARDENING_ASSERT(IsMemcpyOk<A>::value || + other_storage.GetIsAllocated()); GetSizeAndIsAllocated() = other_storage.GetSizeAndIsAllocated(); data_ = other_storage.data_; @@ -490,7 +491,7 @@ void Storage<T, N, A>::DestroyContents() { template <typename T, size_t N, typename A> void Storage<T, N, A>::InitFrom(const Storage& other) { const SizeType<A> n = other.GetSize(); - assert(n > 0); // Empty sources handled handled in caller. + ABSL_HARDENING_ASSERT(n > 0); // Empty sources handled handled in caller. ConstPointer<A> src; Pointer<A> dst; if (!other.GetIsAllocated()) { @@ -522,8 +523,8 @@ template <typename ValueAdapter> auto Storage<T, N, A>::Initialize(ValueAdapter values, SizeType<A> new_size) -> void { // Only callable from constructors! - assert(!GetIsAllocated()); - assert(GetSize() == 0); + ABSL_HARDENING_ASSERT(!GetIsAllocated()); + ABSL_HARDENING_ASSERT(GetSize() == 0); Pointer<A> construct_data; if (new_size > GetInlinedCapacity()) { @@ -832,7 +833,7 @@ auto Storage<T, N, A>::Reserve(SizeType<A> requested_capacity) -> void { template <typename T, size_t N, typename A> auto Storage<T, N, A>::ShrinkToFit() -> void { // May only be called on allocated instances! - assert(GetIsAllocated()); + ABSL_HARDENING_ASSERT(GetIsAllocated()); StorageView<A> storage_view{GetAllocatedData(), GetSize(), GetAllocatedCapacity()}; @@ -881,7 +882,7 @@ auto Storage<T, N, A>::ShrinkToFit() -> void { template <typename T, size_t N, typename A> auto Storage<T, N, A>::Swap(Storage* other_storage_ptr) -> void { using std::swap; - assert(this != other_storage_ptr); + ABSL_HARDENING_ASSERT(this != other_storage_ptr); if (GetIsAllocated() && other_storage_ptr->GetIsAllocated()) { swap(data_.allocated, other_storage_ptr->data_.allocated); diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index 1948fe32..0c47ca54 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -271,6 +271,7 @@ cc_library( "internal/cord_rep_btree.cc", "internal/cord_rep_btree_navigator.cc", "internal/cord_rep_btree_reader.cc", + "internal/cord_rep_concat.cc", "internal/cord_rep_consume.cc", "internal/cord_rep_crc.cc", "internal/cord_rep_ring.cc", @@ -308,12 +309,15 @@ cc_library( ) cc_test( - name = "cord_internal_test", - srcs = ["internal/cord_internal_test.cc"], + name = "cord_rep_concat_test", + size = "small", + srcs = ["internal/cord_rep_concat_test.cc"], copts = ABSL_TEST_COPTS, visibility = ["//visibility:private"], deps = [ ":cord_internal", + ":cord_rep_test_util", + "//absl/base:config", "@com_google_googletest//:gtest_main", ], ) @@ -711,6 +715,7 @@ cc_test( "//absl/base:endian", "//absl/base:raw_logging_internal", "//absl/container:fixed_array", + "//absl/hash", "//absl/random", "@com_google_googletest//:gtest_main", ], diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index 1ef3332c..aab97efd 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -568,6 +568,7 @@ absl_cc_library( "internal/cord_rep_btree.cc" "internal/cord_rep_btree_navigator.cc" "internal/cord_rep_btree_reader.cc" + "internal/cord_rep_concat.cc" "internal/cord_rep_crc.cc" "internal/cord_rep_consume.cc" "internal/cord_rep_ring.cc" @@ -922,6 +923,7 @@ absl_cc_test( absl::cordz_test_helpers absl::core_headers absl::endian + absl::hash absl::random_random absl::raw_logging_internal absl::fixed_array @@ -948,13 +950,17 @@ absl_cc_test( absl_cc_test( NAME - cord_internal_test + cord_rep_concat_test SRCS - "internal/cord_internal_test.cc" + "internal/cord_rep_concat_test.cc" COPTS ${ABSL_TEST_COPTS} DEPS + absl::base + absl::config absl::cord_internal + absl::cord_rep_test_util + absl::core_headers GTest::gmock_main ) diff --git a/absl/strings/cord.cc b/absl/strings/cord.cc index 63a82133..0015bb9f 100644 --- a/absl/strings/cord.cc +++ b/absl/strings/cord.cc @@ -267,6 +267,7 @@ static CordRep* NewSubstring(CordRep* child, size_t offset, size_t length) { return nullptr; } else { CordRepSubstring* rep = new CordRepSubstring(); + assert(child->IsExternal() || child->IsFlat()); assert((offset + length) <= child->length); rep->length = length; rep->tag = cord_internal::SUBSTRING; @@ -343,7 +344,9 @@ inline void Cord::InlineRep::remove_prefix(size_t n) { // Returns `rep` converted into a CordRepBtree. // Directly returns `rep` if `rep` is already a CordRepBtree. static CordRepBtree* ForceBtree(CordRep* rep) { - return rep->IsBtree() ? rep->btree() : CordRepBtree::Create(rep); + return rep->IsBtree() + ? rep->btree() + : CordRepBtree::Create(cord_internal::RemoveCrcNode(rep)); } void Cord::InlineRep::AppendTreeToInlined(CordRep* tree, @@ -366,13 +369,14 @@ void Cord::InlineRep::AppendTreeToTree(CordRep* tree, MethodIdentifier method) { if (btree_enabled()) { tree = CordRepBtree::Append(ForceBtree(data_.as_tree()), tree); } else { - tree = Concat(data_.as_tree(), tree); + tree = Concat(cord_internal::RemoveCrcNode(data_.as_tree()), tree); } SetTree(tree, scope); } void Cord::InlineRep::AppendTree(CordRep* tree, MethodIdentifier method) { if (tree == nullptr) return; + assert(!tree->IsCrc()); if (data_.is_tree()) { AppendTreeToTree(tree, method); } else { @@ -401,13 +405,14 @@ void Cord::InlineRep::PrependTreeToTree(CordRep* tree, if (btree_enabled()) { tree = CordRepBtree::Prepend(ForceBtree(data_.as_tree()), tree); } else { - tree = Concat(tree, data_.as_tree()); + tree = Concat(tree, cord_internal::RemoveCrcNode(data_.as_tree())); } SetTree(tree, scope); } void Cord::InlineRep::PrependTree(CordRep* tree, MethodIdentifier method) { assert(tree != nullptr); + assert(!tree->IsCrc()); if (data_.is_tree()) { PrependTreeToTree(tree, method); } else { @@ -421,7 +426,7 @@ void Cord::InlineRep::PrependTree(CordRep* tree, MethodIdentifier method) { // written to region and the actual size increase will be written to size. static inline bool PrepareAppendRegion(CordRep* root, char** region, size_t* size, size_t max_length) { - if (root->IsBtree() && root->refcount.IsMutable()) { + if (root->IsBtree() && root->refcount.IsOne()) { Span<char> span = root->btree()->GetAppendBuffer(max_length); if (!span.empty()) { *region = span.data(); @@ -432,11 +437,11 @@ static inline bool PrepareAppendRegion(CordRep* root, char** region, // Search down the right-hand path for a non-full FLAT node. CordRep* dst = root; - while (dst->IsConcat() && dst->refcount.IsMutable()) { + while (dst->IsConcat() && dst->refcount.IsOne()) { dst = dst->concat()->right; } - if (!dst->IsFlat() || !dst->refcount.IsMutable()) { + if (!dst->IsFlat() || !dst->refcount.IsOne()) { *region = nullptr; *size = 0; return false; @@ -481,8 +486,9 @@ void Cord::InlineRep::GetAppendRegion(char** region, size_t* size, } size_t extra = has_length ? length : (std::max)(sz, kMinFlatLength); - CordRep* rep = root ? root : MakeFlatWithExtraCapacity(extra); CordzUpdateScope scope(root ? data_.cordz_info() : nullptr, method); + CordRep* rep = root ? cord_internal::RemoveCrcNode(root) + : MakeFlatWithExtraCapacity(extra); if (PrepareAppendRegion(rep, region, size, length)) { CommitTree(root, rep, scope, method); return; @@ -651,7 +657,7 @@ Cord& Cord::operator=(absl::string_view src) { if (tree != nullptr) { CordzUpdateScope scope(contents_.cordz_info(), method); if (tree->IsFlat() && tree->flat()->Capacity() >= length && - tree->refcount.IsMutable()) { + tree->refcount.IsOne()) { // Copy in place if the existing FLAT node is reusable. memmove(tree->flat()->Data(), data, length); tree->length = length; @@ -677,6 +683,7 @@ void Cord::InlineRep::AppendArray(absl::string_view src, const CordRep* const root = rep; CordzUpdateScope scope(root ? cordz_info() : nullptr, method); if (root != nullptr) { + rep = cord_internal::RemoveCrcNode(rep); char* region; if (PrepareAppendRegion(rep, ®ion, &appended, src.size())) { memcpy(region, src.data(), appended); @@ -748,7 +755,8 @@ inline void Cord::AppendImpl(C&& src) { // Since destination is empty, we can avoid allocating a node, if (src.contents_.is_tree()) { // by taking the tree directly - CordRep* rep = std::forward<C>(src).TakeRep(); + CordRep* rep = + cord_internal::RemoveCrcNode(std::forward<C>(src).TakeRep()); contents_.EmplaceTree(rep, method); } else { // or copying over inline data @@ -784,7 +792,7 @@ inline void Cord::AppendImpl(C&& src) { } // Guaranteed to be a tree (kMaxBytesToCopy > kInlinedSize) - CordRep* rep = std::forward<C>(src).TakeRep(); + CordRep* rep = cord_internal::RemoveCrcNode(std::forward<C>(src).TakeRep()); contents_.AppendTree(rep, CordzUpdateTracker::kAppendCord); } @@ -812,7 +820,8 @@ void Cord::Prepend(const Cord& src) { CordRep* src_tree = src.contents_.tree(); if (src_tree != nullptr) { CordRep::Ref(src_tree); - contents_.PrependTree(src_tree, CordzUpdateTracker::kPrependCord); + contents_.PrependTree(cord_internal::RemoveCrcNode(src_tree), + CordzUpdateTracker::kPrependCord); return; } @@ -856,6 +865,7 @@ static CordRep* RemovePrefixFrom(CordRep* node, size_t n) { if (n == 0) return CordRep::Ref(node); absl::InlinedVector<CordRep*, kInlinedVectorSize> rhs_stack; + assert(!node->IsCrc()); while (node->IsConcat()) { assert(n <= node->length); if (n < node->concat()->left->length) { @@ -896,7 +906,8 @@ static CordRep* RemoveSuffixFrom(CordRep* node, size_t n) { if (n >= node->length) return nullptr; if (n == 0) return CordRep::Ref(node); absl::InlinedVector<CordRep*, kInlinedVectorSize> lhs_stack; - bool inplace_ok = node->refcount.IsMutable(); + bool inplace_ok = node->refcount.IsOne(); + assert(!node->IsCrc()); while (node->IsConcat()) { assert(n <= node->length); @@ -909,7 +920,7 @@ static CordRep* RemoveSuffixFrom(CordRep* node, size_t n) { n -= node->concat()->right->length; node = node->concat()->left; } - inplace_ok = inplace_ok && node->refcount.IsMutable(); + inplace_ok = inplace_ok && node->refcount.IsOne(); } assert(n <= node->length); @@ -946,6 +957,7 @@ void Cord::RemovePrefix(size_t n) { } else { auto constexpr method = CordzUpdateTracker::kRemovePrefix; CordzUpdateScope scope(contents_.cordz_info(), method); + tree = cord_internal::RemoveCrcNode(tree); if (tree->IsBtree()) { CordRep* old = tree; tree = tree->btree()->SubTree(n, tree->length - n); @@ -969,6 +981,7 @@ void Cord::RemoveSuffix(size_t n) { } else { auto constexpr method = CordzUpdateTracker::kRemoveSuffix; CordzUpdateScope scope(contents_.cordz_info(), method); + tree = cord_internal::RemoveCrcNode(tree); if (tree->IsBtree()) { tree = CordRepBtree::RemoveSuffix(tree->btree(), n); } else { @@ -992,6 +1005,7 @@ struct SubRange { static CordRep* NewSubRange(CordRep* node, size_t pos, size_t n) { absl::InlinedVector<CordRep*, kInlinedVectorSize> results; absl::InlinedVector<SubRange, kInlinedVectorSize> todo; + assert(!node->IsCrc()); todo.push_back(SubRange(node, pos, n)); do { const SubRange& sr = todo.back(); @@ -1062,6 +1076,7 @@ Cord Cord::Subcord(size_t pos, size_t new_size) const { return sub_cord; } + tree = cord_internal::SkipCrcNode(tree); if (tree->IsBtree()) { tree = tree->btree()->SubTree(pos, new_size); } else { @@ -1082,6 +1097,7 @@ class CordForest { void Build(CordRep* cord_root) { std::vector<CordRep*> pending = {cord_root}; + assert(cord_root->IsConcat()); while (!pending.empty()) { CordRep* node = pending.back(); @@ -1258,7 +1274,7 @@ inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const { return absl::string_view(data_.as_chars(), data_.inline_size()); } - CordRep* node = tree(); + CordRep* node = cord_internal::SkipCrcNode(tree()); if (node->IsFlat()) { return absl::string_view(node->flat()->Data(), node->length); } @@ -1300,6 +1316,28 @@ inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const { return absl::string_view(node->external()->base + offset, length); } +void Cord::SetExpectedChecksum(uint32_t crc) { + auto constexpr method = CordzUpdateTracker::kSetExpectedChecksum; + if (empty()) return; + + if (!contents_.is_tree()) { + CordRep* rep = contents_.MakeFlatWithExtraCapacity(0); + rep = CordRepCrc::New(rep, crc); + contents_.EmplaceTree(rep, method); + } else { + const CordzUpdateScope scope(contents_.data_.cordz_info(), method); + CordRep* rep = CordRepCrc::New(contents_.data_.as_tree(), crc); + contents_.SetTree(rep, scope); + } +} + +absl::optional<uint32_t> Cord::ExpectedChecksum() const { + if (!contents_.is_tree() || !contents_.tree()->IsCrc()) { + return absl::nullopt; + } + return contents_.tree()->crc()->crc; +} + inline int Cord::CompareSlowPath(absl::string_view rhs, size_t compared_size, size_t size_to_compare) const { auto advance = [](Cord::ChunkIterator* it, absl::string_view* chunk) { @@ -1720,6 +1758,7 @@ char Cord::operator[](size_t i) const { if (rep == nullptr) { return contents_.data()[i]; } + rep = cord_internal::SkipCrcNode(rep); while (true) { assert(rep != nullptr); assert(offset < rep->length); @@ -1780,6 +1819,7 @@ absl::string_view Cord::FlattenSlowPath() { /* static */ bool Cord::GetFlatAux(CordRep* rep, absl::string_view* fragment) { assert(rep != nullptr); + rep = cord_internal::SkipCrcNode(rep); if (rep->IsFlat()) { *fragment = absl::string_view(rep->flat()->Data(), rep->length); return true; @@ -1809,6 +1849,9 @@ absl::string_view Cord::FlattenSlowPath() { /* static */ void Cord::ForEachChunkAux( absl::cord_internal::CordRep* rep, absl::FunctionRef<void(absl::string_view)> callback) { + assert(rep != nullptr); + rep = cord_internal::SkipCrcNode(rep); + if (rep->IsBtree()) { ChunkIterator it(rep), end; while (it != end) { @@ -1818,12 +1861,11 @@ absl::string_view Cord::FlattenSlowPath() { return; } - assert(rep != nullptr); int stack_pos = 0; constexpr int stack_max = 128; // Stack of right branches for tree traversal absl::cord_internal::CordRep* stack[stack_max]; - absl::cord_internal::CordRep* current_node = rep; + absl::cord_internal::CordRep* current_node = cord_internal::SkipCrcNode(rep); while (true) { if (current_node->IsConcat()) { if (stack_pos == stack_max) { diff --git a/absl/strings/cord.h b/absl/strings/cord.h index f0a19914..3f0633bd 100644 --- a/absl/strings/cord.h +++ b/absl/strings/cord.h @@ -81,6 +81,7 @@ #include "absl/strings/internal/cord_internal.h" #include "absl/strings/internal/cord_rep_btree.h" #include "absl/strings/internal/cord_rep_btree_reader.h" +#include "absl/strings/internal/cord_rep_crc.h" #include "absl/strings/internal/cord_rep_ring.h" #include "absl/strings/internal/cordz_functions.h" #include "absl/strings/internal/cordz_info.h" @@ -671,6 +672,29 @@ class Cord { cord->Append(part); } + // Cord::SetExpectedChecksum() + // + // Stores a checksum value with this non-empty cord instance, for later + // retrieval. + // + // The expected checksum is a number stored out-of-band, alongside the data. + // It is preserved across copies and assignments, but any mutations to a cord + // will cause it to lose its expected checksum. + // + // The expected checksum is not part of a Cord's value, and does not affect + // operations such as equality or hashing. + // + // This field is intended to store a CRC32C checksum for later validation, to + // help support end-to-end checksum workflows. However, the Cord API itself + // does no CRC validation, and assigns no meaning to this number. + // + // This call has no effect if this cord is empty. + void SetExpectedChecksum(uint32_t crc); + + // Returns this cord's expected checksum, if it has one. Otherwise, returns + // nullopt. + absl::optional<uint32_t> ExpectedChecksum() const; + template <typename H> friend H AbslHashValue(H hash_state, const absl::Cord& c) { absl::optional<absl::string_view> maybe_flat = c.TryFlat(); @@ -1274,6 +1298,7 @@ inline bool Cord::StartsWith(absl::string_view rhs) const { } inline void Cord::ChunkIterator::InitTree(cord_internal::CordRep* tree) { + tree = cord_internal::SkipCrcNode(tree); if (tree->tag == cord_internal::BTREE) { current_chunk_ = btree_reader_.Init(tree->btree()); return; diff --git a/absl/strings/cord_test.cc b/absl/strings/cord_test.cc index cced9bba..188fbc2e 100644 --- a/absl/strings/cord_test.cc +++ b/absl/strings/cord_test.cc @@ -34,9 +34,11 @@ #include "absl/base/internal/raw_logging.h" #include "absl/base/macros.h" #include "absl/container/fixed_array.h" +#include "absl/hash/hash.h" #include "absl/random/random.h" #include "absl/strings/cord_test_helpers.h" #include "absl/strings/cordz_test_helpers.h" +#include "absl/strings/match.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" #include "absl/strings/string_view.h" @@ -192,10 +194,13 @@ class CordTestPeer { static Cord MakeSubstring(Cord src, size_t offset, size_t length) { ABSL_RAW_CHECK(src.contents_.is_tree(), "Can not be inlined"); + ABSL_RAW_CHECK(src.ExpectedChecksum() == absl::nullopt, + "Can not be hardened"); Cord cord; auto* rep = new cord_internal::CordRepSubstring; rep->tag = cord_internal::SUBSTRING; - rep->child = cord_internal::CordRep::Ref(src.contents_.tree()); + rep->child = cord_internal::CordRep::Ref( + cord_internal::SkipCrcNode(src.contents_.tree())); rep->start = offset; rep->length = length; cord.contents_.EmplaceTree(rep, @@ -207,8 +212,9 @@ class CordTestPeer { ABSL_NAMESPACE_END } // namespace absl -// The CordTest fixture runs all tests with and without Cord Btree enabled. -class CordTest : public testing::TestWithParam<bool> { +// The CordTest fixture runs all tests with and without Cord Btree enabled, +// and with our without expected CRCs being set on the subject Cords. +class CordTest : public testing::TestWithParam<int> { public: CordTest() : was_btree_(absl::cord_internal::cord_btree_enabled.load()) { absl::cord_internal::cord_btree_enabled.store(UseBtree()); @@ -218,18 +224,40 @@ class CordTest : public testing::TestWithParam<bool> { } // Returns true if test is running with btree enabled. - bool UseBtree() const { return GetParam(); } + bool UseBtree() const { return GetParam() == 1 || GetParam() == 3; } + bool UseCrc() const { return GetParam() == 2 || GetParam() == 3; } + void MaybeHarden(absl::Cord& c) { + if (UseCrc()) { + c.SetExpectedChecksum(1); + } + } + absl::Cord MaybeHardened(absl::Cord c) { + MaybeHarden(c); + return c; + } // Returns human readable string representation of the test parameter. - static std::string ToString(testing::TestParamInfo<bool> param) { - return param.param ? "Btree" : "Concat"; + static std::string ToString(testing::TestParamInfo<int> param) { + switch (param.param) { + case 0: + return "Concat"; + case 1: + return "Btree"; + case 2: + return "ConcatHardened"; + case 3: + return "BtreeHardened"; + default: + assert(false); + return "???"; + } } private: const bool was_btree_; }; -INSTANTIATE_TEST_SUITE_P(WithParam, CordTest, testing::Bool(), +INSTANTIATE_TEST_SUITE_P(WithParam, CordTest, testing::Values(0, 1, 2, 3), CordTest::ToString); TEST_P(CordTest, AllFlatSizes) { @@ -243,6 +271,7 @@ TEST_P(CordTest, AllFlatSizes) { } absl::Cord dst(src); + MaybeHarden(dst); EXPECT_EQ(std::string(dst), src) << s; } } @@ -274,6 +303,7 @@ TEST_P(CordTest, GigabyteCordFromExternal) { c.Append(from); c.Append(from); c.Append(from); + MaybeHarden(c); } for (int i = 0; i < 1024; ++i) { @@ -302,6 +332,8 @@ bool my_unique_true_boolean = true; TEST_P(CordTest, Assignment) { absl::Cord x(absl::string_view("hi there")); absl::Cord y(x); + MaybeHarden(y); + ASSERT_EQ(x.ExpectedChecksum(), absl::nullopt); ASSERT_EQ(std::string(x), "hi there"); ASSERT_EQ(std::string(y), "hi there"); ASSERT_TRUE(x == y); @@ -355,6 +387,7 @@ TEST_P(CordTest, Assignment) { TEST_P(CordTest, StartsEndsWith) { absl::Cord x(absl::string_view("abcde")); + MaybeHarden(x); absl::Cord empty(""); ASSERT_TRUE(x.StartsWith(absl::Cord("abcde"))); @@ -392,6 +425,7 @@ TEST_P(CordTest, Subcord) { absl::Cord a; AppendWithFragments(s, &rng, &a); + MaybeHarden(a); ASSERT_EQ(s, std::string(a)); // Check subcords of a, from a variety of interesting points. @@ -413,6 +447,9 @@ TEST_P(CordTest, Subcord) { ASSERT_EQ(absl::string_view(s).substr(pos, end_pos - pos), std::string(sa)) << a; + if (pos != 0 || end_pos != a.size()) { + ASSERT_EQ(sa.ExpectedChecksum(), absl::nullopt); + } } } @@ -452,10 +489,19 @@ TEST_P(CordTest, Swap) { absl::string_view b("Mandark"); absl::Cord x(a); absl::Cord y(b); + MaybeHarden(x); swap(x, y); + if (UseCrc()) { + ASSERT_EQ(x.ExpectedChecksum(), absl::nullopt); + ASSERT_EQ(y.ExpectedChecksum(), 1); + } ASSERT_EQ(x, absl::Cord(b)); ASSERT_EQ(y, absl::Cord(a)); x.swap(y); + if (UseCrc()) { + ASSERT_EQ(x.ExpectedChecksum(), 1); + ASSERT_EQ(y.ExpectedChecksum(), absl::nullopt); + } ASSERT_EQ(x, absl::Cord(a)); ASSERT_EQ(y, absl::Cord(b)); } @@ -480,11 +526,11 @@ static void VerifyCopyToString(const absl::Cord& cord) { } TEST_P(CordTest, CopyToString) { - VerifyCopyToString(absl::Cord()); - VerifyCopyToString(absl::Cord("small cord")); - VerifyCopyToString( + VerifyCopyToString(absl::Cord()); // empty cords cannot carry CRCs + VerifyCopyToString(MaybeHardened(absl::Cord("small cord"))); + VerifyCopyToString(MaybeHardened( absl::MakeFragmentedCord({"fragmented ", "cord ", "to ", "test ", - "copying ", "to ", "a ", "string."})); + "copying ", "to ", "a ", "string."}))); } TEST_P(CordTest, TryFlatEmpty) { @@ -494,40 +540,47 @@ TEST_P(CordTest, TryFlatEmpty) { TEST_P(CordTest, TryFlatFlat) { absl::Cord c("hello"); + MaybeHarden(c); EXPECT_EQ(c.TryFlat(), "hello"); } TEST_P(CordTest, TryFlatSubstrInlined) { absl::Cord c("hello"); c.RemovePrefix(1); + MaybeHarden(c); EXPECT_EQ(c.TryFlat(), "ello"); } TEST_P(CordTest, TryFlatSubstrFlat) { absl::Cord c("longer than 15 bytes"); absl::Cord sub = absl::CordTestPeer::MakeSubstring(c, 1, c.size() - 1); + MaybeHarden(sub); EXPECT_EQ(sub.TryFlat(), "onger than 15 bytes"); } TEST_P(CordTest, TryFlatConcat) { absl::Cord c = absl::MakeFragmentedCord({"hel", "lo"}); + MaybeHarden(c); EXPECT_EQ(c.TryFlat(), absl::nullopt); } TEST_P(CordTest, TryFlatExternal) { absl::Cord c = absl::MakeCordFromExternal("hell", [](absl::string_view) {}); + MaybeHarden(c); EXPECT_EQ(c.TryFlat(), "hell"); } TEST_P(CordTest, TryFlatSubstrExternal) { absl::Cord c = absl::MakeCordFromExternal("hell", [](absl::string_view) {}); absl::Cord sub = absl::CordTestPeer::MakeSubstring(c, 1, c.size() - 1); + MaybeHarden(sub); EXPECT_EQ(sub.TryFlat(), "ell"); } TEST_P(CordTest, TryFlatSubstrConcat) { absl::Cord c = absl::MakeFragmentedCord({"hello", " world"}); absl::Cord sub = absl::CordTestPeer::MakeSubstring(c, 1, c.size() - 1); + MaybeHarden(sub); EXPECT_EQ(sub.TryFlat(), absl::nullopt); c.RemovePrefix(1); EXPECT_EQ(c.TryFlat(), absl::nullopt); @@ -547,6 +600,7 @@ TEST_P(CordTest, TryFlatCommonlyAssumedInvariants) { "returned by the ", "iterator"}; absl::Cord c = absl::MakeFragmentedCord(fragments); + MaybeHarden(c); int fragment = 0; int offset = 0; absl::Cord::CharIterator itc = c.char_begin(); @@ -591,13 +645,15 @@ static void VerifyFlatten(absl::Cord c) { TEST_P(CordTest, Flatten) { VerifyFlatten(absl::Cord()); - VerifyFlatten(absl::Cord("small cord")); - VerifyFlatten(absl::Cord("larger than small buffer optimization")); - VerifyFlatten(absl::MakeFragmentedCord({"small ", "fragmented ", "cord"})); + VerifyFlatten(MaybeHardened(absl::Cord("small cord"))); + VerifyFlatten( + MaybeHardened(absl::Cord("larger than small buffer optimization"))); + VerifyFlatten(MaybeHardened( + absl::MakeFragmentedCord({"small ", "fragmented ", "cord"}))); // Test with a cord that is longer than the largest flat buffer RandomEngine rng(GTEST_FLAG_GET(random_seed)); - VerifyFlatten(absl::Cord(RandomLowercaseString(&rng, 8192))); + VerifyFlatten(MaybeHardened(absl::Cord(RandomLowercaseString(&rng, 8192)))); } // Test data @@ -651,22 +707,26 @@ TEST_P(CordTest, MultipleLengths) { { // Construct from Cord absl::Cord tmp(a); absl::Cord x(tmp); + MaybeHarden(x); EXPECT_EQ(a, std::string(x)) << "'" << a << "'"; } { // Construct from absl::string_view absl::Cord x(a); + MaybeHarden(x); EXPECT_EQ(a, std::string(x)) << "'" << a << "'"; } { // Append cord to self absl::Cord self(a); + MaybeHarden(self); self.Append(self); EXPECT_EQ(a + a, std::string(self)) << "'" << a << "' + '" << a << "'"; } { // Prepend cord to self absl::Cord self(a); + MaybeHarden(self); self.Prepend(self); EXPECT_EQ(a + a, std::string(self)) << "'" << a << "' + '" << a << "'"; } @@ -678,12 +738,14 @@ TEST_P(CordTest, MultipleLengths) { { // CopyFrom Cord absl::Cord x(a); absl::Cord y(b); + MaybeHarden(x); x = y; EXPECT_EQ(b, std::string(x)) << "'" << a << "' + '" << b << "'"; } { // CopyFrom absl::string_view absl::Cord x(a); + MaybeHarden(x); x = b; EXPECT_EQ(b, std::string(x)) << "'" << a << "' + '" << b << "'"; } @@ -691,12 +753,14 @@ TEST_P(CordTest, MultipleLengths) { { // Cord::Append(Cord) absl::Cord x(a); absl::Cord y(b); + MaybeHarden(x); x.Append(y); EXPECT_EQ(a + b, std::string(x)) << "'" << a << "' + '" << b << "'"; } { // Cord::Append(absl::string_view) absl::Cord x(a); + MaybeHarden(x); x.Append(b); EXPECT_EQ(a + b, std::string(x)) << "'" << a << "' + '" << b << "'"; } @@ -704,12 +768,14 @@ TEST_P(CordTest, MultipleLengths) { { // Cord::Prepend(Cord) absl::Cord x(a); absl::Cord y(b); + MaybeHarden(x); x.Prepend(y); EXPECT_EQ(b + a, std::string(x)) << "'" << b << "' + '" << a << "'"; } { // Cord::Prepend(absl::string_view) absl::Cord x(a); + MaybeHarden(x); x.Prepend(b); EXPECT_EQ(b + a, std::string(x)) << "'" << b << "' + '" << a << "'"; } @@ -722,13 +788,16 @@ namespace { TEST_P(CordTest, RemoveSuffixWithExternalOrSubstring) { absl::Cord cord = absl::MakeCordFromExternal( "foo bar baz", [](absl::string_view s) { DoNothing(s, nullptr); }); - EXPECT_EQ("foo bar baz", std::string(cord)); + MaybeHarden(cord); + // This RemoveSuffix() will wrap the EXTERNAL node in a SUBSTRING node. cord.RemoveSuffix(4); EXPECT_EQ("foo bar", std::string(cord)); + MaybeHarden(cord); + // This RemoveSuffix() will adjust the SUBSTRING node in-place. cord.RemoveSuffix(4); EXPECT_EQ("foo", std::string(cord)); @@ -738,6 +807,7 @@ TEST_P(CordTest, RemoveSuffixMakesZeroLengthNode) { absl::Cord c; c.Append(absl::Cord(std::string(100, 'x'))); absl::Cord other_ref = c; // Prevent inplace appends + MaybeHarden(c); c.Append(absl::Cord(std::string(200, 'y'))); c.RemoveSuffix(200); EXPECT_EQ(std::string(100, 'x'), std::string(c)); @@ -763,6 +833,7 @@ absl::Cord CordWithZedBlock(size_t size) { // Establish that ZedBlock does what we think it does. TEST_P(CordTest, CordSpliceTestZedBlock) { absl::Cord blob = CordWithZedBlock(10); + MaybeHarden(blob); EXPECT_EQ(10, blob.size()); std::string s; absl::CopyCordToString(blob, &s); @@ -771,6 +842,7 @@ TEST_P(CordTest, CordSpliceTestZedBlock) { TEST_P(CordTest, CordSpliceTestZedBlock0) { absl::Cord blob = CordWithZedBlock(0); + MaybeHarden(blob); EXPECT_EQ(0, blob.size()); std::string s; absl::CopyCordToString(blob, &s); @@ -779,6 +851,7 @@ TEST_P(CordTest, CordSpliceTestZedBlock0) { TEST_P(CordTest, CordSpliceTestZedBlockSuffix1) { absl::Cord blob = CordWithZedBlock(10); + MaybeHarden(blob); EXPECT_EQ(10, blob.size()); absl::Cord suffix(blob); suffix.RemovePrefix(9); @@ -791,6 +864,7 @@ TEST_P(CordTest, CordSpliceTestZedBlockSuffix1) { // Remove all of a prefix block TEST_P(CordTest, CordSpliceTestZedBlockSuffix0) { absl::Cord blob = CordWithZedBlock(10); + MaybeHarden(blob); EXPECT_EQ(10, blob.size()); absl::Cord suffix(blob); suffix.RemovePrefix(10); @@ -823,6 +897,7 @@ absl::Cord SpliceCord(const absl::Cord& blob, int64_t offset, // Taking an empty suffix of a block breaks appending. TEST_P(CordTest, CordSpliceTestRemoveEntireBlock1) { absl::Cord zero = CordWithZedBlock(10); + MaybeHarden(zero); absl::Cord suffix(zero); suffix.RemovePrefix(10); absl::Cord result; @@ -831,6 +906,7 @@ TEST_P(CordTest, CordSpliceTestRemoveEntireBlock1) { TEST_P(CordTest, CordSpliceTestRemoveEntireBlock2) { absl::Cord zero = CordWithZedBlock(10); + MaybeHarden(zero); absl::Cord prefix(zero); prefix.RemoveSuffix(10); absl::Cord suffix(zero); @@ -842,13 +918,19 @@ TEST_P(CordTest, CordSpliceTestRemoveEntireBlock2) { TEST_P(CordTest, CordSpliceTestRemoveEntireBlock3) { absl::Cord blob = CordWithZedBlock(10); absl::Cord block = BigCord(10, 'b'); + MaybeHarden(blob); + MaybeHarden(block); blob = SpliceCord(blob, 0, block); } struct CordCompareTestCase { template <typename LHS, typename RHS> - CordCompareTestCase(const LHS& lhs, const RHS& rhs) - : lhs_cord(lhs), rhs_cord(rhs) {} + CordCompareTestCase(const LHS& lhs, const RHS& rhs, bool use_crc) + : lhs_cord(lhs), rhs_cord(rhs) { + if (use_crc) { + lhs_cord.SetExpectedChecksum(1); + } + } absl::Cord lhs_cord; absl::Cord rhs_cord; @@ -885,47 +967,54 @@ TEST_P(CordTest, Compare) { concat2.Append("cccccccccccDDDDDDDDDDDDDD"); concat2.Append("DD"); + const bool use_crc = UseCrc(); + std::vector<CordCompareTestCase> test_cases = {{ // Inline cords - {"abcdef", "abcdef"}, - {"abcdef", "abcdee"}, - {"abcdef", "abcdeg"}, - {"bbcdef", "abcdef"}, - {"bbcdef", "abcdeg"}, - {"abcdefa", "abcdef"}, - {"abcdef", "abcdefa"}, + {"abcdef", "abcdef", use_crc}, + {"abcdef", "abcdee", use_crc}, + {"abcdef", "abcdeg", use_crc}, + {"bbcdef", "abcdef", use_crc}, + {"bbcdef", "abcdeg", use_crc}, + {"abcdefa", "abcdef", use_crc}, + {"abcdef", "abcdefa", use_crc}, // Small flat cords - {"aaaaaBBBBBcccccDDDDD", "aaaaaBBBBBcccccDDDDD"}, - {"aaaaaBBBBBcccccDDDDD", "aaaaaBBBBBxccccDDDDD"}, - {"aaaaaBBBBBcxcccDDDDD", "aaaaaBBBBBcccccDDDDD"}, - {"aaaaaBBBBBxccccDDDDD", "aaaaaBBBBBcccccDDDDX"}, - {"aaaaaBBBBBcccccDDDDDa", "aaaaaBBBBBcccccDDDDD"}, - {"aaaaaBBBBBcccccDDDDD", "aaaaaBBBBBcccccDDDDDa"}, + {"aaaaaBBBBBcccccDDDDD", "aaaaaBBBBBcccccDDDDD", use_crc}, + {"aaaaaBBBBBcccccDDDDD", "aaaaaBBBBBxccccDDDDD", use_crc}, + {"aaaaaBBBBBcxcccDDDDD", "aaaaaBBBBBcccccDDDDD", use_crc}, + {"aaaaaBBBBBxccccDDDDD", "aaaaaBBBBBcccccDDDDX", use_crc}, + {"aaaaaBBBBBcccccDDDDDa", "aaaaaBBBBBcccccDDDDD", use_crc}, + {"aaaaaBBBBBcccccDDDDD", "aaaaaBBBBBcccccDDDDDa", use_crc}, // Subcords - {subcord, subcord}, - {subcord, "aaBBBBBccc"}, - {subcord, "aaBBBBBccd"}, - {subcord, "aaBBBBBccb"}, - {subcord, "aaBBBBBxcb"}, - {subcord, "aaBBBBBccca"}, - {subcord, "aaBBBBBcc"}, + {subcord, subcord, use_crc}, + {subcord, "aaBBBBBccc", use_crc}, + {subcord, "aaBBBBBccd", use_crc}, + {subcord, "aaBBBBBccb", use_crc}, + {subcord, "aaBBBBBxcb", use_crc}, + {subcord, "aaBBBBBccca", use_crc}, + {subcord, "aaBBBBBcc", use_crc}, // Concats - {concat, concat}, + {concat, concat, use_crc}, {concat, - "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBccccccccccccccccDDDDDDDDDDDDDDDD"}, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBccccccccccccccccDDDDDDDDDDDDDDDD", + use_crc}, {concat, - "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBcccccccccccccccxDDDDDDDDDDDDDDDD"}, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBcccccccccccccccxDDDDDDDDDDDDDDDD", + use_crc}, {concat, - "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBacccccccccccccccDDDDDDDDDDDDDDDD"}, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBacccccccccccccccDDDDDDDDDDDDDDDD", + use_crc}, {concat, - "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBccccccccccccccccDDDDDDDDDDDDDDD"}, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBccccccccccccccccDDDDDDDDDDDDDDD", + use_crc}, {concat, - "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBccccccccccccccccDDDDDDDDDDDDDDDDe"}, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBccccccccccccccccDDDDDDDDDDDDDDDDe", + use_crc}, - {concat, concat2}, + {concat, concat2, use_crc}, }}; for (const auto& tc : test_cases) { @@ -936,6 +1025,7 @@ TEST_P(CordTest, Compare) { TEST_P(CordTest, CompareAfterAssign) { absl::Cord a("aaaaaa1111111"); absl::Cord b("aaaaaa2222222"); + MaybeHarden(a); a = "cccccc"; b = "cccccc"; EXPECT_EQ(a, b); @@ -994,6 +1084,8 @@ TEST_P(CordTest, CompareRandomComparisons) { d.Append(a[GetUniformRandomUpTo(&rng, ABSL_ARRAYSIZE(a))]); } std::bernoulli_distribution coin_flip(0.5); + MaybeHarden(c); + MaybeHarden(d); TestCompare(coin_flip(rng) ? c : absl::Cord(std::string(c)), coin_flip(rng) ? d : absl::Cord(std::string(d)), &rng); } @@ -1119,6 +1211,7 @@ TEST_P(CordTest, ConstructFromExternalCompareContents) { EXPECT_EQ(external->size(), sv.size()); delete external; }); + MaybeHarden(cord); EXPECT_EQ(data, cord); } } @@ -1134,7 +1227,7 @@ TEST_P(CordTest, ConstructFromExternalLargeReleaser) { EXPECT_EQ(data, absl::string_view(data_array.data(), data_array.size())); invoked = true; }; - (void)absl::MakeCordFromExternal(data, releaser); + (void)MaybeHardened(absl::MakeCordFromExternal(data, releaser)); EXPECT_TRUE(invoked); } @@ -1147,11 +1240,11 @@ TEST_P(CordTest, ConstructFromExternalFunctionPointerReleaser) { invoked = true; }); invoked = false; - (void)absl::MakeCordFromExternal(data, releaser); + (void)MaybeHardened(absl::MakeCordFromExternal(data, releaser)); EXPECT_TRUE(invoked); invoked = false; - (void)absl::MakeCordFromExternal(data, *releaser); + (void)MaybeHardened(absl::MakeCordFromExternal(data, *releaser)); EXPECT_TRUE(invoked); } @@ -1165,20 +1258,21 @@ TEST_P(CordTest, ConstructFromExternalMoveOnlyReleaser) { }; bool invoked = false; - (void)absl::MakeCordFromExternal("dummy", Releaser(&invoked)); + (void)MaybeHardened(absl::MakeCordFromExternal("dummy", Releaser(&invoked))); EXPECT_TRUE(invoked); } TEST_P(CordTest, ConstructFromExternalNoArgLambda) { bool invoked = false; - (void)absl::MakeCordFromExternal("dummy", [&invoked]() { invoked = true; }); + (void)MaybeHardened( + absl::MakeCordFromExternal("dummy", [&invoked]() { invoked = true; })); EXPECT_TRUE(invoked); } TEST_P(CordTest, ConstructFromExternalStringViewArgLambda) { bool invoked = false; - (void)absl::MakeCordFromExternal( - "dummy", [&invoked](absl::string_view) { invoked = true; }); + (void)MaybeHardened(absl::MakeCordFromExternal( + "dummy", [&invoked](absl::string_view) { invoked = true; })); EXPECT_TRUE(invoked); } @@ -1193,7 +1287,7 @@ TEST_P(CordTest, ConstructFromExternalNonTrivialReleaserDestructor) { bool destroyed = false; Releaser releaser(&destroyed); - (void)absl::MakeCordFromExternal("dummy", releaser); + (void)MaybeHardened(absl::MakeCordFromExternal("dummy", releaser)); EXPECT_TRUE(destroyed); } @@ -1209,18 +1303,18 @@ TEST_P(CordTest, ConstructFromExternalReferenceQualifierOverloads) { bool lvalue_invoked = false; bool rvalue_invoked = false; Releaser releaser = {&lvalue_invoked, &rvalue_invoked}; - (void)absl::MakeCordFromExternal("", releaser); + (void)MaybeHardened(absl::MakeCordFromExternal("", releaser)); EXPECT_FALSE(lvalue_invoked); EXPECT_TRUE(rvalue_invoked); rvalue_invoked = false; - (void)absl::MakeCordFromExternal("dummy", releaser); + (void)MaybeHardened(absl::MakeCordFromExternal("dummy", releaser)); EXPECT_FALSE(lvalue_invoked); EXPECT_TRUE(rvalue_invoked); rvalue_invoked = false; // NOLINTNEXTLINE: suppress clang-tidy std::move on trivially copyable type. - (void)absl::MakeCordFromExternal("dummy", std::move(releaser)); + (void)MaybeHardened(absl::MakeCordFromExternal("dummy", std::move(releaser))); EXPECT_FALSE(lvalue_invoked); EXPECT_TRUE(rvalue_invoked); } @@ -1229,7 +1323,9 @@ TEST_P(CordTest, ExternalMemoryBasicUsage) { static const char* strings[] = {"", "hello", "there"}; for (const char* str : strings) { absl::Cord dst("(prefix)"); + MaybeHarden(dst); AddExternalMemory(str, &dst); + MaybeHarden(dst); dst.Append("(suffix)"); EXPECT_EQ((std::string("(prefix)") + str + std::string("(suffix)")), std::string(dst)); @@ -1243,7 +1339,9 @@ TEST_P(CordTest, ExternalMemoryRemovePrefixSuffix) { for (int offset = 0; offset <= s.size(); offset++) { for (int length = 0; length <= s.size() - offset; length++) { absl::Cord result(cord); + MaybeHarden(result); result.RemovePrefix(offset); + MaybeHarden(result); result.RemoveSuffix(result.size() - length); EXPECT_EQ(s.substr(offset, length), std::string(result)) << offset << " " << length; @@ -1254,8 +1352,10 @@ TEST_P(CordTest, ExternalMemoryRemovePrefixSuffix) { TEST_P(CordTest, ExternalMemoryGet) { absl::Cord cord("hello"); AddExternalMemory(" world!", &cord); + MaybeHarden(cord); AddExternalMemory(" how are ", &cord); cord.Append(" you?"); + MaybeHarden(cord); std::string s = std::string(cord); for (int i = 0; i < s.size(); i++) { EXPECT_EQ(s[i], cord[i]); @@ -1354,11 +1454,13 @@ TEST_P(CordTest, CordMemoryUsageInlineRep) { TEST_P(CordTest, Concat_Append) { // Create a rep of type CONCAT absl::Cord s1("foobarbarbarbarbar"); + MaybeHarden(s1); s1.Append("abcdefgabcdefgabcdefgabcdefgabcdefgabcdefgabcdefg"); size_t size = s1.size(); // Create a copy of s1 and append to it. absl::Cord s2 = s1; + MaybeHarden(s2); s2.Append("x"); // 7465150 modifies s1 when it shouldn't. @@ -1378,6 +1480,7 @@ TEST_P(CordTest, DiabolicalGrowth) { for (char c : expected) { absl::Cord shared(cord); cord.Append(absl::string_view(&c, 1)); + MaybeHarden(cord); } std::string value; absl::CopyCordToString(cord, &value); @@ -1422,8 +1525,12 @@ static absl::Cord MakeHuge(absl::string_view prefix) { TEST_P(CordTest, HugeCord) { absl::Cord cord = MakeHuge("huge cord"); + MaybeHarden(cord); + + const size_t acceptable_delta = + 100 + (UseCrc() ? sizeof(absl::cord_internal::CordRepCrc) : 0); EXPECT_LE(cord.size(), cord.EstimatedMemoryUsage()); - EXPECT_GE(cord.size() + 100, cord.EstimatedMemoryUsage()); + EXPECT_GE(cord.size() + acceptable_delta, cord.EstimatedMemoryUsage()); } // Tests that Append() works ok when handed a self reference @@ -1433,6 +1540,7 @@ TEST_P(CordTest, AppendSelf) { std::string control_data = "Abc"; absl::Cord data(control_data); while (control_data.length() < 0x4000) { + MaybeHarden(data); data.Append(data); control_data.append(control_data); ASSERT_EQ(control_data, data); @@ -1443,6 +1551,8 @@ TEST_P(CordTest, MakeFragmentedCordFromInitializerList) { absl::Cord fragmented = absl::MakeFragmentedCord({"A ", "fragmented ", "Cord"}); + MaybeHarden(fragmented); + EXPECT_EQ("A fragmented Cord", fragmented); auto chunk_it = fragmented.chunk_begin(); @@ -1463,6 +1573,8 @@ TEST_P(CordTest, MakeFragmentedCordFromVector) { std::vector<absl::string_view> chunks = {"A ", "fragmented ", "Cord"}; absl::Cord fragmented = absl::MakeFragmentedCord(chunks); + MaybeHarden(fragmented); + EXPECT_EQ("A fragmented Cord", fragmented); auto chunk_it = fragmented.chunk_begin(); @@ -1565,22 +1677,26 @@ TEST_P(CordTest, CordChunkIteratorOperations) { VerifyChunkIterator(empty_cord, 0); absl::Cord small_buffer_cord("small cord"); + MaybeHarden(small_buffer_cord); VerifyChunkIterator(small_buffer_cord, 1); absl::Cord flat_node_cord("larger than small buffer optimization"); + MaybeHarden(flat_node_cord); VerifyChunkIterator(flat_node_cord, 1); - VerifyChunkIterator( - absl::MakeFragmentedCord({"a ", "small ", "fragmented ", "cord ", "for ", - "testing ", "chunk ", "iterations."}), - 8); + VerifyChunkIterator(MaybeHardened(absl::MakeFragmentedCord( + {"a ", "small ", "fragmented ", "cord ", "for ", + "testing ", "chunk ", "iterations."})), + 8); absl::Cord reused_nodes_cord(std::string(40, 'c')); reused_nodes_cord.Prepend(absl::Cord(std::string(40, 'b'))); + MaybeHarden(reused_nodes_cord); reused_nodes_cord.Prepend(absl::Cord(std::string(40, 'a'))); size_t expected_chunks = 3; for (int i = 0; i < 8; ++i) { reused_nodes_cord.Prepend(reused_nodes_cord); + MaybeHarden(reused_nodes_cord); expected_chunks *= 2; VerifyChunkIterator(reused_nodes_cord, expected_chunks); } @@ -1706,27 +1822,33 @@ TEST_P(CordTest, CharIteratorOperations) { VerifyCharIterator(empty_cord); absl::Cord small_buffer_cord("small cord"); + MaybeHarden(small_buffer_cord); VerifyCharIterator(small_buffer_cord); absl::Cord flat_node_cord("larger than small buffer optimization"); + MaybeHarden(flat_node_cord); VerifyCharIterator(flat_node_cord); - VerifyCharIterator( + VerifyCharIterator(MaybeHardened( absl::MakeFragmentedCord({"a ", "small ", "fragmented ", "cord ", "for ", - "testing ", "character ", "iteration."})); + "testing ", "character ", "iteration."}))); absl::Cord reused_nodes_cord("ghi"); reused_nodes_cord.Prepend(absl::Cord("def")); reused_nodes_cord.Prepend(absl::Cord("abc")); for (int i = 0; i < 4; ++i) { reused_nodes_cord.Prepend(reused_nodes_cord); + MaybeHarden(reused_nodes_cord); VerifyCharIterator(reused_nodes_cord); } RandomEngine rng(GTEST_FLAG_GET(random_seed)); absl::Cord flat_cord(RandomLowercaseString(&rng, 256)); absl::Cord subcords; - for (int i = 0; i < 4; ++i) subcords.Prepend(flat_cord.Subcord(16 * i, 128)); + for (int i = 0; i < 4; ++i) { + subcords.Prepend(flat_cord.Subcord(16 * i, 128)); + MaybeHarden(subcords); + } VerifyCharIterator(subcords); } @@ -1751,6 +1873,8 @@ TEST_P(CordTest, CharIteratorAdvanceAndRead) { cord.Append(absl::Cord(block)); } + MaybeHarden(cord); + for (size_t chunk_size : {kChunkSize1, kChunkSize2, kChunkSize3, kChunkSize4}) { absl::Cord::CharIterator it = cord.char_begin(); @@ -1768,6 +1892,7 @@ TEST_P(CordTest, CharIteratorAdvanceAndRead) { TEST_P(CordTest, StreamingOutput) { absl::Cord c = absl::MakeFragmentedCord({"A ", "small ", "fragmented ", "Cord", "."}); + MaybeHarden(c); std::stringstream output; output << c; EXPECT_EQ("A small fragmented Cord.", output.str()); @@ -1781,6 +1906,7 @@ TEST_P(CordTest, ForEachChunk) { cord_chunks.push_back(absl::StrCat("[", i, "]")); } absl::Cord c = absl::MakeFragmentedCord(cord_chunks); + MaybeHarden(c); std::vector<std::string> iterated_chunks; absl::CordTestPeer::ForEachChunk(c, @@ -1798,6 +1924,7 @@ TEST_P(CordTest, SmallBufferAssignFromOwnData) { for (size_t pos = 0; pos < contents.size(); ++pos) { for (size_t count = contents.size() - pos; count > 0; --count) { absl::Cord c(contents); + MaybeHarden(c); absl::string_view flat = c.Flatten(); c = flat.substr(pos, count); EXPECT_EQ(c, contents.substr(pos, count)) @@ -1810,12 +1937,16 @@ TEST_P(CordTest, Format) { absl::Cord c; absl::Format(&c, "There were %04d little %s.", 3, "pigs"); EXPECT_EQ(c, "There were 0003 little pigs."); + MaybeHarden(c); absl::Format(&c, "And %-3llx bad wolf!", 1); + MaybeHarden(c); EXPECT_EQ(c, "There were 0003 little pigs.And 1 bad wolf!"); } TEST_P(CordTest, Hardening) { absl::Cord cord("hello"); + MaybeHarden(cord); + // These statement should abort the program in all builds modes. EXPECT_DEATH_IF_SUPPORTED(cord.RemovePrefix(6), ""); EXPECT_DEATH_IF_SUPPORTED(cord.RemoveSuffix(6), ""); @@ -1855,6 +1986,7 @@ TEST_P(CordTest, BtreeHostileSplitInsertJoin) { } for (int j = 0; j < 1000; ++j) { + MaybeHarden(cord); size_t offset = absl::Uniform(bitgen, 0u, cord.size()); size_t length = absl::Uniform(bitgen, 100u, data.size()); if (cord.size() == offset) { @@ -1955,3 +2087,272 @@ TEST_P(CordTest, ConstinitConstructor) { TestConstinitConstructor( absl::strings_internal::MakeStringConstant(LongView{})); } + +namespace { + +// Test helper that generates a populated cord for future manipulation. +// +// By test convention, all generated cords begin with the characters "abcde" at +// the start of the first chunk. +class PopulatedCordFactory { + public: + constexpr PopulatedCordFactory(absl::string_view name, + absl::Cord (*generator)()) + : name_(name), generator_(generator) {} + + absl::string_view Name() const { return name_; } + absl::Cord Generate() const { return generator_(); } + + private: + absl::string_view name_; + absl::Cord (*generator_)(); +}; + +// clang-format off +// This array is constant-initialized in conformant compilers. +PopulatedCordFactory cord_factories[] = { + {"sso", [] { return absl::Cord("abcde"); }}, + {"flat", [] { + // Too large to live in SSO space, but small enough to be a simple FLAT. + absl::Cord flat(absl::StrCat("abcde", std::string(1000, 'x'))); + flat.Flatten(); + return flat; + }}, + {"external", [] { + // A cheat: we are using a string literal as the external storage, so a + // no-op releaser is correct here. + return absl::MakeCordFromExternal("abcde External!", []{}); + }}, + {"external substring", [] { + // A cheat: we are using a string literal as the external storage, so a + // no-op releaser is correct here. + absl::Cord ext = absl::MakeCordFromExternal("-abcde External!", []{}); + return absl::CordTestPeer::MakeSubstring(ext, 1, ext.size() - 1); + }}, + {"substring", [] { + absl::Cord flat(absl::StrCat("-abcde", std::string(1000, 'x'))); + flat.Flatten(); + return flat.Subcord(1, 998); + }}, + {"fragmented", [] { + std::string fragment = absl::StrCat("abcde", std::string(195, 'x')); + std::vector<std::string> fragments(200, fragment); + absl::Cord cord = absl::MakeFragmentedCord(fragments); + assert(cord.size() == 40000); + return cord; + }}, +}; +// clang-format on + +// Test helper that can mutate a cord, and possibly undo the mutation, for +// testing. +class CordMutator { + public: + constexpr CordMutator(absl::string_view name, void (*mutate)(absl::Cord&), + void (*undo)(absl::Cord&) = nullptr) + : name_(name), mutate_(mutate), undo_(undo) {} + + absl::string_view Name() const { return name_; } + void Mutate(absl::Cord& cord) const { mutate_(cord); } + bool CanUndo() const { return undo_ != nullptr; } + void Undo(absl::Cord& cord) const { undo_(cord); } + + private: + absl::string_view name_; + void (*mutate_)(absl::Cord&); + void (*undo_)(absl::Cord&); +}; + +// clang-format off +// This array is constant-initialized in conformant compilers. +CordMutator cord_mutators[] ={ + {"clear", [](absl::Cord& c) { c.Clear(); }}, + {"overwrite", [](absl::Cord& c) { c = "overwritten"; }}, + { + "append string", + [](absl::Cord& c) { c.Append("0123456789"); }, + [](absl::Cord& c) { c.RemoveSuffix(10); } + }, + { + "append cord", + [](absl::Cord& c) { + c.Append(absl::MakeFragmentedCord({"12345", "67890"})); + }, + [](absl::Cord& c) { c.RemoveSuffix(10); } + }, + { + "append checksummed cord", + [](absl::Cord& c) { + absl::Cord to_append = absl::MakeFragmentedCord({"12345", "67890"}); + to_append.SetExpectedChecksum(999); + c.Append(to_append); + }, + [](absl::Cord& c) { c.RemoveSuffix(10); } + }, + { + "append self", + [](absl::Cord& c) { c.Append(c); }, + [](absl::Cord& c) { c.RemoveSuffix(c.size() / 2); } + }, + { + "prepend string", + [](absl::Cord& c) { c.Prepend("9876543210"); }, + [](absl::Cord& c) { c.RemovePrefix(10); } + }, + { + "prepend cord", + [](absl::Cord& c) { + c.Prepend(absl::MakeFragmentedCord({"98765", "43210"})); + }, + [](absl::Cord& c) { c.RemovePrefix(10); } + }, + { + "prepend checksummed cord", + [](absl::Cord& c) { + absl::Cord to_prepend = absl::MakeFragmentedCord({"98765", "43210"}); + to_prepend.SetExpectedChecksum(999); + c.Prepend(to_prepend); + }, + [](absl::Cord& c) { c.RemovePrefix(10); } + }, + { + "prepend self", + [](absl::Cord& c) { c.Prepend(c); }, + [](absl::Cord& c) { c.RemovePrefix(c.size() / 2); } + }, + {"remove prefix", [](absl::Cord& c) { c.RemovePrefix(2); }}, + {"remove suffix", [](absl::Cord& c) { c.RemoveSuffix(2); }}, + {"subcord", [](absl::Cord& c) { c = c.Subcord(1, c.size() - 2); }}, + { + "swap inline", + [](absl::Cord& c) { + absl::Cord other("swap"); + c.swap(other); + } + }, + { + "swap tree", + [](absl::Cord& c) { + absl::Cord other(std::string(10000, 'x')); + c.swap(other); + } + }, +}; +// clang-format on +} // namespace + +TEST_P(CordTest, ExpectedChecksum) { + for (const PopulatedCordFactory& factory : cord_factories) { + SCOPED_TRACE(factory.Name()); + for (bool shared : {false, true}) { + SCOPED_TRACE(shared); + + absl::Cord shared_cord_source = factory.Generate(); + auto make_instance = [=] { + return shared ? shared_cord_source : factory.Generate(); + }; + + const absl::Cord base_value = factory.Generate(); + const std::string base_value_as_string(factory.Generate().Flatten()); + + absl::Cord c1 = make_instance(); + EXPECT_FALSE(c1.ExpectedChecksum().has_value()); + + // Setting an expected checksum works, and retains the cord's bytes + c1.SetExpectedChecksum(12345); + EXPECT_EQ(c1.ExpectedChecksum().value_or(0), 12345); + EXPECT_EQ(c1, base_value); + + // CRC persists through copies, assignments, and moves: + absl::Cord c1_copy_construct = c1; + EXPECT_EQ(c1_copy_construct.ExpectedChecksum().value_or(0), 12345); + + absl::Cord c1_copy_assign; + c1_copy_assign = c1; + EXPECT_EQ(c1_copy_assign.ExpectedChecksum().value_or(0), 12345); + + absl::Cord c1_move(std::move(c1_copy_assign)); + EXPECT_EQ(c1_move.ExpectedChecksum().value_or(0), 12345); + + EXPECT_EQ(c1.ExpectedChecksum().value_or(0), 12345); + + // A CRC Cord compares equal to its non-CRC value. + EXPECT_EQ(c1, make_instance()); + + for (const CordMutator& mutator : cord_mutators) { + SCOPED_TRACE(mutator.Name()); + + // Test that mutating a cord removes its stored checksum + absl::Cord c2 = make_instance(); + c2.SetExpectedChecksum(24680); + + mutator.Mutate(c2); + EXPECT_EQ(c2.ExpectedChecksum(), absl::nullopt); + + if (mutator.CanUndo()) { + // Undoing an operation should not restore the checksum + mutator.Undo(c2); + EXPECT_EQ(c2, base_value); + EXPECT_EQ(c2.ExpectedChecksum(), absl::nullopt); + } + } + + absl::Cord c3 = make_instance(); + c3.SetExpectedChecksum(999); + const absl::Cord& cc3 = c3; + + // Test that all cord reading operations function in the face of an + // expected checksum. + + // Test data precondition + ASSERT_TRUE(cc3.StartsWith("abcde")); + + EXPECT_EQ(cc3.size(), base_value_as_string.size()); + EXPECT_FALSE(cc3.empty()); + EXPECT_EQ(cc3.Compare(base_value), 0); + EXPECT_EQ(cc3.Compare(base_value_as_string), 0); + EXPECT_EQ(cc3.Compare("wxyz"), -1); + EXPECT_EQ(cc3.Compare(absl::Cord("wxyz")), -1); + EXPECT_EQ(cc3.Compare("aaaa"), 1); + EXPECT_EQ(cc3.Compare(absl::Cord("aaaa")), 1); + EXPECT_EQ(absl::Cord("wxyz").Compare(cc3), 1); + EXPECT_EQ(absl::Cord("aaaa").Compare(cc3), -1); + EXPECT_TRUE(cc3.StartsWith("abcd")); + EXPECT_EQ(std::string(cc3), base_value_as_string); + + std::string dest; + absl::CopyCordToString(cc3, &dest); + EXPECT_EQ(dest, base_value_as_string); + + bool first_pass = true; + for (absl::string_view chunk : cc3.Chunks()) { + if (first_pass) { + EXPECT_TRUE(absl::StartsWith(chunk, "abcde")); + } + first_pass = false; + } + first_pass = true; + for (char ch : cc3.Chars()) { + if (first_pass) { + EXPECT_EQ(ch, 'a'); + } + first_pass = false; + } + EXPECT_TRUE(absl::StartsWith(*cc3.chunk_begin(), "abcde")); + EXPECT_EQ(*cc3.char_begin(), 'a'); + + auto char_it = cc3.char_begin(); + absl::Cord::Advance(&char_it, 2); + EXPECT_EQ(absl::Cord::AdvanceAndRead(&char_it, 2), "cd"); + EXPECT_EQ(*char_it, 'e'); + char_it = cc3.char_begin(); + absl::Cord::Advance(&char_it, 2); + EXPECT_TRUE(absl::StartsWith(absl::Cord::ChunkRemaining(char_it), "cde")); + + EXPECT_EQ(cc3[0], 'a'); + EXPECT_EQ(cc3[4], 'e'); + EXPECT_EQ(absl::HashOf(cc3), absl::HashOf(base_value)); + EXPECT_EQ(absl::HashOf(cc3), absl::HashOf(base_value_as_string)); + } + } +} diff --git a/absl/strings/internal/cord_internal.h b/absl/strings/internal/cord_internal.h index bf135ae2..672bf178 100644 --- a/absl/strings/internal/cord_internal.h +++ b/absl/strings/internal/cord_internal.h @@ -87,9 +87,6 @@ class RefcountAndFlags { constexpr RefcountAndFlags() : count_{kRefIncrement} {} struct Immortal {}; explicit constexpr RefcountAndFlags(Immortal) : count_(kImmortalFlag) {} - struct WithCrc {}; - explicit constexpr RefcountAndFlags(WithCrc) - : count_(kCrcFlag | kRefIncrement) {} // Increments the reference count. Imposes no memory ordering. inline void Increment() { @@ -125,32 +122,14 @@ class RefcountAndFlags { return count_.load(std::memory_order_acquire) >> kNumFlags; } - // Returns true if the referenced object carries a CRC value. - bool HasCrc() const { - return (count_.load(std::memory_order_relaxed) & kCrcFlag) != 0; - } - - // Returns true iff the atomic integer is 1 and this node does not store - // a CRC. When both these conditions are met, the current thread owns - // the reference and no other thread shares it, so its contents may be - // safely mutated. - // - // If the referenced item is shared, carries a CRC, or is immortal, - // it should not be modified in-place, and this function returns false. - // - // This call performs the memory barrier needed for the owning thread - // to act on the object, so that if it returns true, it may safely - // assume exclusive access to the object. - inline bool IsMutable() { - return (count_.load(std::memory_order_acquire)) == kRefIncrement; - } - - // Returns whether the atomic integer is 1. Similar to IsMutable(), - // but does not check for a stored CRC. (An unshared node with a CRC is not - // mutable, because changing its data would invalidate the CRC.) - // - // When this returns true, there are no other references, and data sinks - // may safely adopt the children of the CordRep. + // Returns whether the atomic integer is 1. + // If the reference count is used in the conventional way, a + // reference count of 1 implies that the current thread owns the + // reference and no other thread shares it. + // This call performs the test for a reference count of one, and + // performs the memory barrier needed for the owning thread + // to act on the object, knowing that it has exclusive access to the + // object. Always returns false when the immortal bit is set. inline bool IsOne() { return (count_.load(std::memory_order_acquire) & kRefcountMask) == kRefIncrement; @@ -170,14 +149,14 @@ class RefcountAndFlags { kNumFlags = 2, kImmortalFlag = 0x1, - kCrcFlag = 0x2, + kReservedFlag = 0x2, kRefIncrement = (1 << kNumFlags), // Bitmask to use when checking refcount by equality. This masks out // all flags except kImmortalFlag, which is part of the refcount for // purposes of equality. (A refcount of 0 or 1 does not count as 0 or 1 // if the immortal bit is set.) - kRefcountMask = ~kCrcFlag, + kRefcountMask = ~kReservedFlag, }; std::atomic<int32_t> count_; @@ -227,6 +206,18 @@ static_assert(EXTERNAL == RING + 1, "BTREE and EXTERNAL not consecutive"); static_assert(FLAT == EXTERNAL + 1, "EXTERNAL and FLAT not consecutive"); struct CordRep { + // Result from an `extract edge` operation. Contains the (possibly changed) + // tree node as well as the extracted edge, or {tree, nullptr} if no edge + // could be extracted. + // On success, the returned `tree` value is null if `extracted` was the only + // data edge inside the tree, a data edge if there were only two data edges in + // the tree, or the (possibly new / smaller) remaining tree with the extracted + // data edge removed. + struct ExtractResult { + CordRep* tree; + CordRep* extracted; + }; + CordRep() = default; constexpr CordRep(RefcountAndFlags::Immortal immortal, size_t l) : length(l), refcount(immortal), tag(EXTERNAL), storage{} {} @@ -294,6 +285,13 @@ struct CordRepConcat : public CordRep { uint8_t depth() const { return storage[0]; } void set_depth(uint8_t depth) { storage[0] = depth; } + + // Extracts the right-most flat in the provided concat tree if the entire path + // to that flat is not shared, and the flat has the requested extra capacity. + // Returns the (potentially new) top level tree node and the extracted flat, + // or {tree, nullptr} if no flat was extracted. + static ExtractResult ExtractAppendBuffer(CordRepConcat* tree, + size_t extra_capacity); }; struct CordRepSubstring : public CordRep { diff --git a/absl/strings/internal/cord_internal_test.cc b/absl/strings/internal/cord_internal_test.cc deleted file mode 100644 index 0758dfef..00000000 --- a/absl/strings/internal/cord_internal_test.cc +++ /dev/null @@ -1,116 +0,0 @@ -// Copyright 2021 The Abseil Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "absl/strings/internal/cord_internal.h" - -#include "gmock/gmock.h" - -namespace absl { -ABSL_NAMESPACE_BEGIN -namespace cord_internal { - -TEST(RefcountAndFlags, NormalRefcount) { - for (bool expect_high_refcount : {false, true}) { - SCOPED_TRACE(expect_high_refcount); - RefcountAndFlags refcount; - // count = 1 - - EXPECT_FALSE(refcount.HasCrc()); - EXPECT_TRUE(refcount.IsMutable()); - EXPECT_TRUE(refcount.IsOne()); - - refcount.Increment(); - // count = 2 - - EXPECT_FALSE(refcount.HasCrc()); - EXPECT_FALSE(refcount.IsMutable()); - EXPECT_FALSE(refcount.IsOne()); - - // Decrementing should return true, since a reference is outstanding. - if (expect_high_refcount) { - EXPECT_TRUE(refcount.DecrementExpectHighRefcount()); - } else { - EXPECT_TRUE(refcount.Decrement()); - } - // count = 1 - - EXPECT_FALSE(refcount.HasCrc()); - EXPECT_TRUE(refcount.IsMutable()); - EXPECT_TRUE(refcount.IsOne()); - - // One more decremnt will return false, as no references remain. - if (expect_high_refcount) { - EXPECT_FALSE(refcount.DecrementExpectHighRefcount()); - } else { - EXPECT_FALSE(refcount.Decrement()); - } - } -} - -TEST(RefcountAndFlags, CrcRefcount) { - for (bool expect_high_refcount : {false, true}) { - SCOPED_TRACE(expect_high_refcount); - RefcountAndFlags refcount(RefcountAndFlags::WithCrc{}); - // count = 1 - - // A CRC-carrying node is never mutable, but can be unshared - EXPECT_TRUE(refcount.HasCrc()); - EXPECT_FALSE(refcount.IsMutable()); - EXPECT_TRUE(refcount.IsOne()); - - refcount.Increment(); - // count = 2 - - EXPECT_TRUE(refcount.HasCrc()); - EXPECT_FALSE(refcount.IsMutable()); - EXPECT_FALSE(refcount.IsOne()); - - // Decrementing should return true, since a reference is outstanding. - if (expect_high_refcount) { - EXPECT_TRUE(refcount.DecrementExpectHighRefcount()); - } else { - EXPECT_TRUE(refcount.Decrement()); - } - // count = 1 - - EXPECT_TRUE(refcount.HasCrc()); - EXPECT_FALSE(refcount.IsMutable()); - EXPECT_TRUE(refcount.IsOne()); - - // One more decremnt will return false, as no references remain. - if (expect_high_refcount) { - EXPECT_FALSE(refcount.DecrementExpectHighRefcount()); - } else { - EXPECT_FALSE(refcount.Decrement()); - } - } -} - -TEST(RefcountAndFlags, ImmortalRefcount) { - RefcountAndFlags immortal_refcount(RefcountAndFlags::Immortal{}); - - for (int i = 0; i < 100; ++i) { - // An immortal refcount is never unshared, and decrementing never causes - // a collection. - EXPECT_FALSE(immortal_refcount.HasCrc()); - EXPECT_FALSE(immortal_refcount.IsMutable()); - EXPECT_FALSE(immortal_refcount.IsOne()); - EXPECT_TRUE(immortal_refcount.Decrement()); - EXPECT_TRUE(immortal_refcount.DecrementExpectHighRefcount()); - } -} - -} // namespace cord_internal -ABSL_NAMESPACE_END -} // namespace absl diff --git a/absl/strings/internal/cord_rep_btree.cc b/absl/strings/internal/cord_rep_btree.cc index 4404f33a..7cefcc52 100644 --- a/absl/strings/internal/cord_rep_btree.cc +++ b/absl/strings/internal/cord_rep_btree.cc @@ -216,8 +216,8 @@ void DeleteLeafEdge(CordRep* rep) { // propagate node changes up the stack. template <EdgeType edge_type> struct StackOperations { - // Returns true if the node at 'depth' is mutable, i.e. has a refcount - // of one, carries no CRC, and all of its parent nodes have a refcount of one. + // Returns true if the node at 'depth' is not shared, i.e. has a refcount + // of one and all of its parent nodes have a refcount of one. inline bool owned(int depth) const { return depth < share_depth; } // Returns the node at 'depth'. @@ -228,11 +228,11 @@ struct StackOperations { inline CordRepBtree* BuildStack(CordRepBtree* tree, int depth) { assert(depth <= tree->height()); int current_depth = 0; - while (current_depth < depth && tree->refcount.IsMutable()) { + while (current_depth < depth && tree->refcount.IsOne()) { stack[current_depth++] = tree; tree = tree->Edge(edge_type)->btree(); } - share_depth = current_depth + (tree->refcount.IsMutable() ? 1 : 0); + share_depth = current_depth + (tree->refcount.IsOne() ? 1 : 0); while (current_depth < depth) { stack[current_depth++] = tree; tree = tree->Edge(edge_type)->btree(); @@ -241,17 +241,17 @@ struct StackOperations { } // Builds a stack with the invariant that all nodes are private owned / not - // shared and carry no CRC data. This is used in iterative updates where a - // previous propagation guaranteed all nodes have this property. + // shared. This is used in iterative updates where a previous propagation + // guaranteed all nodes are owned / private. inline void BuildOwnedStack(CordRepBtree* tree, int height) { assert(height <= CordRepBtree::kMaxHeight); int depth = 0; while (depth < height) { - assert(tree->refcount.IsMutable()); + assert(tree->refcount.IsOne()); stack[depth++] = tree; tree = tree->Edge(edge_type)->btree(); } - assert(tree->refcount.IsMutable()); + assert(tree->refcount.IsOne()); share_depth = depth + 1; } @@ -336,12 +336,12 @@ struct StackOperations { return Unwind</*propagate=*/true>(tree, depth, length, result); } - // `share_depth` contains the depth at which the nodes in the stack cannot - // be mutated. I.e., if the top most level is shared (i.e.: - // `!refcount.IsMutable()`), then `share_depth` is 0. If the 2nd node - // is shared (and implicitly all nodes below that) then `share_depth` is 1, - // etc. A `share_depth` greater than the depth of the stack indicates that - // none of the nodes in the stack are shared. + // `share_depth` contains the depth at which the nodes in the stack become + // shared. I.e., if the top most level is shared (i.e.: `!refcount.IsOne()`), + // then `share_depth` is 0. If the 2nd node is shared (and implicitly all + // nodes below that) then `share_depth` is 1, etc. A `share_depth` greater + // than the depth of the stack indicates that none of the nodes in the stack + // are shared. int share_depth; NodeStack stack; @@ -773,7 +773,7 @@ CopyResult CordRepBtree::CopyPrefix(size_t n, bool allow_folding) { CordRep* CordRepBtree::ExtractFront(CordRepBtree* tree) { CordRep* front = tree->Edge(tree->begin()); - if (tree->refcount.IsMutable()) { + if (tree->refcount.IsOne()) { Unref(tree->Edges(tree->begin() + 1, tree->end())); CordRepBtree::Delete(tree); } else { @@ -786,7 +786,7 @@ CordRep* CordRepBtree::ExtractFront(CordRepBtree* tree) { CordRepBtree* CordRepBtree::ConsumeBeginTo(CordRepBtree* tree, size_t end, size_t new_length) { assert(end <= tree->end()); - if (tree->refcount.IsMutable()) { + if (tree->refcount.IsOne()) { Unref(tree->Edges(end, tree->end())); tree->set_end(end); tree->length = new_length; @@ -813,13 +813,13 @@ CordRep* CordRepBtree::RemoveSuffix(CordRepBtree* tree, size_t n) { size_t length = len - n; int height = tree->height(); - bool is_mutable = tree->refcount.IsMutable(); + bool is_mutable = tree->refcount.IsOne(); // Extract all top nodes which are reduced to size = 1 Position pos = tree->IndexOfLength(length); while (pos.index == tree->begin()) { CordRep* edge = ExtractFront(tree); - is_mutable &= edge->refcount.IsMutable(); + is_mutable &= edge->refcount.IsOne(); if (height-- == 0) return ResizeEdge(edge, length, is_mutable); tree = edge->btree(); pos = tree->IndexOfLength(length); @@ -835,8 +835,8 @@ CordRep* CordRepBtree::RemoveSuffix(CordRepBtree* tree, size_t n) { length = pos.n; while (length != edge->length) { // ConsumeBeginTo guarantees `tree` is a clean, privately owned copy. - assert(tree->refcount.IsMutable()); - const bool edge_is_mutable = edge->refcount.IsMutable(); + assert(tree->refcount.IsOne()); + const bool edge_is_mutable = edge->refcount.IsOne(); if (height-- == 0) { tree->edges_[pos.index] = ResizeEdge(edge, length, edge_is_mutable); @@ -973,7 +973,7 @@ char CordRepBtree::GetCharacter(size_t offset) const { Span<char> CordRepBtree::GetAppendBufferSlow(size_t size) { // The inlined version in `GetAppendBuffer()` deals with all heights <= 3. assert(height() >= 4); - assert(refcount.IsMutable()); + assert(refcount.IsOne()); // Build a stack of nodes we may potentially need to update if we find a // non-shared FLAT with capacity at the leaf level. @@ -982,13 +982,13 @@ Span<char> CordRepBtree::GetAppendBufferSlow(size_t size) { CordRepBtree* stack[kMaxDepth]; for (int i = 0; i < depth; ++i) { node = node->Edge(kBack)->btree(); - if (!node->refcount.IsMutable()) return {}; + if (!node->refcount.IsOne()) return {}; stack[i] = node; } // Must be a privately owned, mutable flat. CordRep* const edge = node->Edge(kBack); - if (!edge->refcount.IsMutable() || edge->tag < FLAT) return {}; + if (!edge->refcount.IsOne() || edge->tag < FLAT) return {}; // Must have capacity. const size_t avail = edge->flat()->Capacity() - edge->length; @@ -1123,6 +1123,79 @@ CordRepBtree* CordRepBtree::Rebuild(CordRepBtree* tree) { return nullptr; } +CordRepBtree::ExtractResult CordRepBtree::ExtractAppendBuffer( + CordRepBtree* tree, size_t extra_capacity) { + int depth = 0; + NodeStack stack; + + // Set up default 'no success' result which is {tree, nullptr}. + ExtractResult result; + result.tree = tree; + result.extracted = nullptr; + + // Dive down the right side of the tree, making sure no edges are shared. + while (tree->height() > 0) { + if (!tree->refcount.IsOne()) return result; + stack[depth++] = tree; + tree = tree->Edge(kBack)->btree(); + } + if (!tree->refcount.IsOne()) return result; + + // Validate we ended on a non shared flat. + CordRep* rep = tree->Edge(kBack); + if (!(rep->IsFlat() && rep->refcount.IsOne())) return result; + + // Verify it has at least the requested extra capacity. + CordRepFlat* flat = rep->flat(); + const size_t length = flat->length; + const size_t avail = flat->Capacity() - flat->length; + if (extra_capacity > avail) return result; + + // Set the extracted flat in the result. + result.extracted = flat; + + // Cascading delete all nodes that become empty. + while (tree->size() == 1) { + CordRepBtree::Delete(tree); + if (--depth < 0) { + // We consumed the entire tree: return nullptr for new tree. + result.tree = nullptr; + return result; + } + rep = tree; + tree = stack[depth]; + } + + // Remove the edge or cascaded up parent node. + tree->set_end(tree->end() - 1); + tree->length -= length; + + // Adjust lengths up the tree. + while (depth > 0) { + tree = stack[--depth]; + tree->length -= length; + } + + // Remove unnecessary top nodes with size = 1. This may iterate all the way + // down to the leaf node in which case we simply return the remaining last + // edge in that node and the extracted flat. + while (tree->size() == 1) { + int height = tree->height(); + rep = tree->Edge(kBack); + Delete(tree); + if (height == 0) { + // We consumed the leaf: return the sole data edge as the new tree. + result.tree = rep; + return result; + } + tree = rep->btree(); + } + + // Done: return the (new) top level node and extracted flat. + result.tree = tree; + return result; +} + } // namespace cord_internal ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/cord_rep_btree.h b/absl/strings/internal/cord_rep_btree.h index bb38f0c3..0b44509e 100644 --- a/absl/strings/internal/cord_rep_btree.h +++ b/absl/strings/internal/cord_rep_btree.h @@ -240,11 +240,41 @@ class CordRepBtree : public CordRep { // length of the flat node and involved tree nodes have been increased by // `span.length()`. The caller is responsible for immediately assigning values // to all uninitialized data reference by the returned span. - // Requires `this->refcount.IsMutable()`: this function forces the - // caller to do this fast path check on the top level node, as this is the - // most commonly shared node of a cord tree. + // Requires `this->refcount.IsOne()`: this function forces the caller to do + // this fast path check on the top level node, as this is the most commonly + // shared node of a cord tree. Span<char> GetAppendBuffer(size_t size); + // Extracts the right-most data edge from this tree iff: + // - the tree and all internal edges to the right-most node are not shared. + // - the right-most node is a FLAT node and not shared. + // - the right-most node has at least the desired extra capacity. + // + // Returns {tree, nullptr} if any of the above conditions are not met. + // This method effectively removes data from the tree. The intent of this + // method is to allow applications appending small string data to use + // pre-existing capacity, and add the modified rep back to the tree. + // + // Simplified such code would look similar to this: + // void MyTreeBuilder::Append(string_view data) { + // ExtractResult result = CordRepBtree::ExtractAppendBuffer(tree_, 1); + // if (CordRep* rep = result.extracted) { + // size_t available = rep->Capacity() - rep->length; + // size_t n = std::min(data.size(), n); + // memcpy(rep->Data(), data.data(), n); + // rep->length += n; + // data.remove_prefix(n); + // if (!result.tree->IsBtree()) { + // tree_ = CordRepBtree::Create(result.tree); + // } + // tree_ = CordRepBtree::Append(tree_, rep); + // } + // ... + // // Remaining edge in `result.tree`. + // } + static ExtractResult ExtractAppendBuffer(CordRepBtree* tree, + size_t extra_capacity = 1); + // Returns the `height` of the tree. The height of a tree is limited to // kMaxHeight. `height` is implemented as an `int` as in some places we // use negative (-1) values for 'data edges'. @@ -849,7 +879,7 @@ inline CordRepBtree* CordRepBtree::Create(CordRep* rep) { } inline Span<char> CordRepBtree::GetAppendBuffer(size_t size) { - assert(refcount.IsMutable()); + assert(refcount.IsOne()); CordRepBtree* tree = this; const int height = this->height(); CordRepBtree* n1 = tree; @@ -858,21 +888,21 @@ inline Span<char> CordRepBtree::GetAppendBuffer(size_t size) { switch (height) { case 3: tree = tree->Edge(kBack)->btree(); - if (!tree->refcount.IsMutable()) return {}; + if (!tree->refcount.IsOne()) return {}; n2 = tree; ABSL_FALLTHROUGH_INTENDED; case 2: tree = tree->Edge(kBack)->btree(); - if (!tree->refcount.IsMutable()) return {}; + if (!tree->refcount.IsOne()) return {}; n1 = tree; ABSL_FALLTHROUGH_INTENDED; case 1: tree = tree->Edge(kBack)->btree(); - if (!tree->refcount.IsMutable()) return {}; + if (!tree->refcount.IsOne()) return {}; ABSL_FALLTHROUGH_INTENDED; case 0: CordRep* edge = tree->Edge(kBack); - if (!edge->refcount.IsMutable()) return {}; + if (!edge->refcount.IsOne()) return {}; if (edge->tag < FLAT) return {}; size_t avail = edge->flat()->Capacity() - edge->length; if (avail == 0) return {}; diff --git a/absl/strings/internal/cord_rep_btree_test.cc b/absl/strings/internal/cord_rep_btree_test.cc index be9473d4..1d0c68b2 100644 --- a/absl/strings/internal/cord_rep_btree_test.cc +++ b/absl/strings/internal/cord_rep_btree_test.cc @@ -128,6 +128,16 @@ MATCHER_P2(IsSubstring, start, length, return true; } +MATCHER_P2(EqExtractResult, tree, rep, "Equals ExtractResult") { + if (arg.tree != tree || arg.extracted != rep) { + *result_listener << "Expected {" << static_cast<const void*>(tree) << ", " + << static_cast<const void*>(rep) << "}, got {" << arg.tree + << ", " << arg.extracted << "}"; + return false; + } + return true; +} + // DataConsumer is a simple helper class used by tests to 'consume' string // fragments from the provided input in forward or backward direction. class DataConsumer { @@ -1483,6 +1493,125 @@ TEST_P(CordRepBtreeTest, Rebuild) { } } +// Convenience helper for CordRepBtree::ExtractAppendBuffer +CordRepBtree::ExtractResult ExtractLast(CordRepBtree* input, size_t cap = 1) { + return CordRepBtree::ExtractAppendBuffer(input, cap); +} + +TEST(CordRepBtreeTest, ExtractAppendBufferLeafSingleFlat) { + CordRep* flat = MakeFlat("Abc"); + CordRepBtree* leaf = CordRepBtree::Create(flat); + EXPECT_THAT(ExtractLast(leaf), EqExtractResult(nullptr, flat)); + CordRep::Unref(flat); +} + +TEST(CordRepBtreeTest, ExtractAppendBufferNodeSingleFlat) { + CordRep* flat = MakeFlat("Abc"); + CordRepBtree* leaf = CordRepBtree::Create(flat); + CordRepBtree* node = CordRepBtree::New(leaf); + EXPECT_THAT(ExtractLast(node), EqExtractResult(nullptr, flat)); + CordRep::Unref(flat); +} + +TEST(CordRepBtreeTest, ExtractAppendBufferLeafTwoFlats) { + std::vector<CordRep*> flats = CreateFlatsFromString("abcdef", 3); + CordRepBtree* leaf = CreateTree(flats); + EXPECT_THAT(ExtractLast(leaf), EqExtractResult(flats[0], flats[1])); + CordRep::Unref(flats[0]); + CordRep::Unref(flats[1]); +} + +TEST(CordRepBtreeTest, ExtractAppendBufferNodeTwoFlats) { + std::vector<CordRep*> flats = CreateFlatsFromString("abcdef", 3); + CordRepBtree* leaf = CreateTree(flats); + CordRepBtree* node = CordRepBtree::New(leaf); + EXPECT_THAT(ExtractLast(node), EqExtractResult(flats[0], flats[1])); + CordRep::Unref(flats[0]); + CordRep::Unref(flats[1]); +} + +TEST(CordRepBtreeTest, ExtractAppendBufferNodeTwoFlatsInTwoLeafs) { + std::vector<CordRep*> flats = CreateFlatsFromString("abcdef", 3); + CordRepBtree* leaf1 = CordRepBtree::Create(flats[0]); + CordRepBtree* leaf2 = CordRepBtree::Create(flats[1]); + CordRepBtree* node = CordRepBtree::New(leaf1, leaf2); + EXPECT_THAT(ExtractLast(node), EqExtractResult(flats[0], flats[1])); + CordRep::Unref(flats[0]); + CordRep::Unref(flats[1]); +} + +TEST(CordRepBtreeTest, ExtractAppendBufferLeafThreeFlats) { + std::vector<CordRep*> flats = CreateFlatsFromString("abcdefghi", 3); + CordRepBtree* leaf = CreateTree(flats); + EXPECT_THAT(ExtractLast(leaf), EqExtractResult(leaf, flats[2])); + CordRep::Unref(flats[2]); + CordRep::Unref(leaf); +} + +TEST(CordRepBtreeTest, ExtractAppendBufferNodeThreeFlatsRightNoFolding) { + CordRep* flat = MakeFlat("Abc"); + std::vector<CordRep*> flats = CreateFlatsFromString("defghi", 3); + CordRepBtree* leaf1 = CordRepBtree::Create(flat); + CordRepBtree* leaf2 = CreateTree(flats); + CordRepBtree* node = CordRepBtree::New(leaf1, leaf2); + EXPECT_THAT(ExtractLast(node), EqExtractResult(node, flats[1])); + EXPECT_THAT(node->Edges(), ElementsAre(leaf1, leaf2)); + EXPECT_THAT(leaf1->Edges(), ElementsAre(flat)); + EXPECT_THAT(leaf2->Edges(), ElementsAre(flats[0])); + CordRep::Unref(node); + CordRep::Unref(flats[1]); +} + +TEST(CordRepBtreeTest, ExtractAppendBufferNodeThreeFlatsRightLeafFolding) { + CordRep* flat = MakeFlat("Abc"); + std::vector<CordRep*> flats = CreateFlatsFromString("defghi", 3); + CordRepBtree* leaf1 = CreateTree(flats); + CordRepBtree* leaf2 = CordRepBtree::Create(flat); + CordRepBtree* node = CordRepBtree::New(leaf1, leaf2); + EXPECT_THAT(ExtractLast(node), EqExtractResult(leaf1, flat)); + EXPECT_THAT(leaf1->Edges(), ElementsAreArray(flats)); + CordRep::Unref(leaf1); + CordRep::Unref(flat); +} + +TEST(CordRepBtreeTest, ExtractAppendBufferNoCapacity) { + std::vector<CordRep*> flats = CreateFlatsFromString("abcdef", 3); + CordRepBtree* leaf = CreateTree(flats); + size_t avail = flats[1]->flat()->Capacity() - flats[1]->length; + EXPECT_THAT(ExtractLast(leaf, avail + 1), EqExtractResult(leaf, nullptr)); + EXPECT_THAT(ExtractLast(leaf, avail), EqExtractResult(flats[0], flats[1])); + CordRep::Unref(flats[0]); + CordRep::Unref(flats[1]); +} + +TEST(CordRepBtreeTest, ExtractAppendBufferNotFlat) { + std::vector<CordRep*> flats = CreateFlatsFromString("abcdef", 3); + auto substr = MakeSubstring(1, 2, flats[1]); + CordRepBtree* leaf = CreateTree({flats[0], substr}); + EXPECT_THAT(ExtractLast(leaf), EqExtractResult(leaf, nullptr)); + CordRep::Unref(leaf); +} + +TEST(CordRepBtreeTest, ExtractAppendBufferShared) { + std::vector<CordRep*> flats = CreateFlatsFromString("abcdef", 3); + CordRepBtree* leaf = CreateTree(flats); + + CordRep::Ref(flats[1]); + EXPECT_THAT(ExtractLast(leaf), EqExtractResult(leaf, nullptr)); + CordRep::Unref(flats[1]); + + CordRep::Ref(leaf); + EXPECT_THAT(ExtractLast(leaf), EqExtractResult(leaf, nullptr)); + CordRep::Unref(leaf); + + CordRepBtree* node = CordRepBtree::New(leaf); + CordRep::Ref(node); + EXPECT_THAT(ExtractLast(node), EqExtractResult(node, nullptr)); + CordRep::Unref(node); + + CordRep::Unref(node); +} + } // namespace } // namespace cord_internal ABSL_NAMESPACE_END diff --git a/absl/strings/internal/cord_rep_concat.cc b/absl/strings/internal/cord_rep_concat.cc new file mode 100644 index 00000000..3457b55c --- /dev/null +++ b/absl/strings/internal/cord_rep_concat.cc @@ -0,0 +1,63 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <cstdint> + +#include "absl/base/config.h" +#include "absl/container/inlined_vector.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_flat.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +CordRepConcat::ExtractResult CordRepConcat::ExtractAppendBuffer( + CordRepConcat* tree, size_t extra_capacity) { + absl::InlinedVector<CordRepConcat*, kInlinedVectorSize> stack; + CordRepConcat* concat = tree; + CordRep* rep = concat->right; + + // Dive down the tree, making sure no edges are shared + while (concat->refcount.IsOne() && rep->IsConcat()) { + stack.push_back(concat); + concat = rep->concat(); + rep = concat->right; + } + // Validate we ended on a non shared flat. + if (concat->refcount.IsOne() && rep->IsFlat() && + rep->refcount.IsOne()) { + // Verify it has at least the requested extra capacity + CordRepFlat* flat = rep->flat(); + size_t remaining = flat->Capacity() - flat->length; + if (extra_capacity > remaining) return {tree, nullptr}; + + // Check if we have a parent to adjust, or if we must return the left node. + rep = concat->left; + if (!stack.empty()) { + stack.back()->right = rep; + for (CordRepConcat* parent : stack) { + parent->length -= flat->length; + } + rep = tree; + } + delete concat; + return {rep, flat}; + } + return {tree, nullptr}; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cord_rep_concat_test.cc b/absl/strings/internal/cord_rep_concat_test.cc new file mode 100644 index 00000000..32bab975 --- /dev/null +++ b/absl/strings/internal/cord_rep_concat_test.cc @@ -0,0 +1,143 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <cstdint> +#include <utility> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_test_util.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using ::absl::cordrep_testing::MakeFlat; +using ::absl::cordrep_testing::MakeSubstring; +using ::testing::Eq; + +MATCHER_P2(EqExtractResult, tree, rep, "Equals ExtractResult") { + if (arg.tree != tree || arg.extracted != rep) { + *result_listener << "Expected {" << static_cast<const void*>(tree) << ", " + << static_cast<const void*>(rep) << "}, got {" << arg.tree + << ", " << arg.extracted << "}"; + return false; + } + return true; +} + +CordRepConcat* MakeConcat(CordRep* left, CordRep* right, int depth) { + CordRepConcat* concat = new CordRepConcat; + concat->tag = CONCAT; + concat->set_depth(depth); + concat->length = left->length + right->length; + concat->left = left; + concat->right = right; + return concat; +} + +CordRepConcat::ExtractResult ExtractLast(CordRepConcat* concat, + size_t extra_capacity = 1) { + return CordRepConcat::ExtractAppendBuffer(concat, extra_capacity); +} + +TEST(CordRepConcatTest, ExtractAppendBufferTwoFlats) { + CordRepFlat* flat1 = MakeFlat("abc"); + CordRepFlat* flat2 = MakeFlat("defg"); + CordRepConcat* concat = MakeConcat(flat1, flat2, 0); + EXPECT_THAT(ExtractLast(concat), EqExtractResult(flat1, flat2)); + CordRep::Unref(flat1); + CordRep::Unref(flat2); +} + +TEST(CordRepConcatTest, ExtractAppendBufferThreeFlatsOne) { + CordRepFlat* flat1 = MakeFlat("abc"); + CordRepFlat* flat2 = MakeFlat("defg"); + CordRepFlat* flat3 = MakeFlat("hijkl"); + CordRepConcat* lconcat = MakeConcat(flat1, flat2, 0); + CordRepConcat* concat = MakeConcat(lconcat, flat3, 1); + EXPECT_THAT(ExtractLast(concat), EqExtractResult(lconcat, flat3)); + ASSERT_THAT(lconcat->length, Eq(7)); + CordRep::Unref(lconcat); + CordRep::Unref(flat3); +} + +TEST(CordRepConcatTest, ExtractAppendBufferThreeFlatsTwo) { + CordRepFlat* flat1 = MakeFlat("hijkl"); + CordRepFlat* flat2 = MakeFlat("abc"); + CordRepFlat* flat3 = MakeFlat("defg"); + CordRepConcat* rconcat = MakeConcat(flat2, flat3, 0); + CordRepConcat* concat = MakeConcat(flat1, rconcat, 1); + EXPECT_THAT(ExtractLast(concat), EqExtractResult(concat, flat3)); + ASSERT_THAT(concat->length, Eq(8)); + CordRep::Unref(concat); + CordRep::Unref(flat3); +} + +TEST(CordRepConcatTest, ExtractAppendBufferShared) { + CordRepFlat* flat1 = MakeFlat("hijkl"); + CordRepFlat* flat2 = MakeFlat("abc"); + CordRepFlat* flat3 = MakeFlat("defg"); + CordRepConcat* rconcat = MakeConcat(flat2, flat3, 0); + CordRepConcat* concat = MakeConcat(flat1, rconcat, 1); + + CordRep::Ref(concat); + EXPECT_THAT(ExtractLast(concat), EqExtractResult(concat, nullptr)); + CordRep::Unref(concat); + + CordRep::Ref(rconcat); + EXPECT_THAT(ExtractLast(concat), EqExtractResult(concat, nullptr)); + CordRep::Unref(rconcat); + + CordRep::Ref(flat3); + EXPECT_THAT(ExtractLast(concat), EqExtractResult(concat, nullptr)); + CordRep::Unref(flat3); + + CordRep::Unref(concat); +} + +TEST(CordRepConcatTest, ExtractAppendBufferNotFlat) { + CordRepFlat* flat1 = MakeFlat("hijkl"); + CordRepFlat* flat2 = MakeFlat("abc"); + CordRepFlat* flat3 = MakeFlat("defg"); + auto substr = MakeSubstring(1, 2, flat3); + CordRepConcat* rconcat = MakeConcat(flat2, substr, 0); + CordRepConcat* concat = MakeConcat(flat1, rconcat, 1); + EXPECT_THAT(ExtractLast(concat), EqExtractResult(concat, nullptr)); + CordRep::Unref(concat); +} + +TEST(CordRepConcatTest, ExtractAppendBufferNoCapacity) { + CordRepFlat* flat1 = MakeFlat("hijkl"); + CordRepFlat* flat2 = MakeFlat("abc"); + CordRepFlat* flat3 = MakeFlat("defg"); + size_t avail = flat3->Capacity() - flat3->length; + CordRepConcat* rconcat = MakeConcat(flat2, flat3, 0); + CordRepConcat* concat = MakeConcat(flat1, rconcat, 1); + + // Should fail if 1 byte over, success if exactly matching + EXPECT_THAT(ExtractLast(concat, avail + 1), EqExtractResult(concat, nullptr)); + EXPECT_THAT(ExtractLast(concat, avail), EqExtractResult(concat, flat3)); + + CordRep::Unref(concat); + CordRep::Unref(flat3); +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cord_rep_crc.h b/absl/strings/internal/cord_rep_crc.h index 3ead94a1..5294b0d1 100644 --- a/absl/strings/internal/cord_rep_crc.h +++ b/absl/strings/internal/cord_rep_crc.h @@ -76,6 +76,15 @@ inline CordRep* SkipCrcNode(CordRep* rep) { } } +inline const CordRep* SkipCrcNode(const CordRep* rep) { + assert(rep != nullptr); + if (ABSL_PREDICT_FALSE(rep->IsCrc())) { + return rep->crc()->child; + } else { + return rep; + } +} + inline CordRepCrc* CordRep::crc() { assert(IsCrc()); return static_cast<CordRepCrc*>(this); diff --git a/absl/strings/internal/cord_rep_ring.cc b/absl/strings/internal/cord_rep_ring.cc index 07c77eb3..db1f63fa 100644 --- a/absl/strings/internal/cord_rep_ring.cc +++ b/absl/strings/internal/cord_rep_ring.cc @@ -277,7 +277,7 @@ CordRepRing* CordRepRing::Mutable(CordRepRing* rep, size_t extra) { // Get current number of entries, and check for max capacity. size_t entries = rep->entries(); - if (!rep->refcount.IsMutable()) { + if (!rep->refcount.IsOne()) { return Copy(rep, rep->head(), rep->tail(), extra); } else if (entries + extra > rep->capacity()) { const size_t min_grow = rep->capacity() + rep->capacity() / 2; @@ -292,10 +292,10 @@ CordRepRing* CordRepRing::Mutable(CordRepRing* rep, size_t extra) { } Span<char> CordRepRing::GetAppendBuffer(size_t size) { - assert(refcount.IsMutable()); + assert(refcount.IsOne()); index_type back = retreat(tail_); CordRep* child = entry_child(back); - if (child->tag >= FLAT && child->refcount.IsMutable()) { + if (child->tag >= FLAT && child->refcount.IsOne()) { size_t capacity = child->flat()->Capacity(); pos_type end_pos = entry_end_pos(back); size_t data_offset = entry_data_offset(back); @@ -312,10 +312,10 @@ Span<char> CordRepRing::GetAppendBuffer(size_t size) { } Span<char> CordRepRing::GetPrependBuffer(size_t size) { - assert(refcount.IsMutable()); + assert(refcount.IsOne()); CordRep* child = entry_child(head_); size_t data_offset = entry_data_offset(head_); - if (data_offset && child->refcount.IsMutable() && child->tag >= FLAT) { + if (data_offset && child->refcount.IsOne() && child->tag >= FLAT) { size_t n = (std::min)(data_offset, size); this->length += n; begin_pos_ -= n; @@ -504,7 +504,7 @@ CordRepRing* CordRepRing::Prepend(CordRepRing* rep, CordRep* child) { CordRepRing* CordRepRing::Append(CordRepRing* rep, absl::string_view data, size_t extra) { - if (rep->refcount.IsMutable()) { + if (rep->refcount.IsOne()) { Span<char> avail = rep->GetAppendBuffer(data.length()); if (!avail.empty()) { memcpy(avail.data(), data.data(), avail.length()); @@ -538,7 +538,7 @@ CordRepRing* CordRepRing::Append(CordRepRing* rep, absl::string_view data, CordRepRing* CordRepRing::Prepend(CordRepRing* rep, absl::string_view data, size_t extra) { - if (rep->refcount.IsMutable()) { + if (rep->refcount.IsOne()) { Span<char> avail = rep->GetPrependBuffer(data.length()); if (!avail.empty()) { const char* tail = data.data() + data.length() - avail.length(); @@ -678,7 +678,7 @@ CordRepRing* CordRepRing::SubRing(CordRepRing* rep, size_t offset, Position tail = rep->FindTail(head.index, offset + len); const size_t new_entries = rep->entries(head.index, tail.index); - if (rep->refcount.IsMutable() && extra <= (rep->capacity() - new_entries)) { + if (rep->refcount.IsOne() && extra <= (rep->capacity() - new_entries)) { // We adopt a privately owned rep and no extra entries needed. if (head.index != rep->head_) UnrefEntries(rep, rep->head_, head.index); if (tail.index != rep->tail_) UnrefEntries(rep, tail.index, rep->tail_); @@ -715,7 +715,7 @@ CordRepRing* CordRepRing::RemovePrefix(CordRepRing* rep, size_t len, } Position head = rep->Find(len); - if (rep->refcount.IsMutable()) { + if (rep->refcount.IsOne()) { if (head.index != rep->head_) UnrefEntries(rep, rep->head_, head.index); rep->head_ = head.index; } else { @@ -745,7 +745,7 @@ CordRepRing* CordRepRing::RemoveSuffix(CordRepRing* rep, size_t len, } Position tail = rep->FindTail(rep->length - len); - if (rep->refcount.IsMutable()) { + if (rep->refcount.IsOne()) { // We adopt a privately owned rep, scrub. if (tail.index != rep->tail_) UnrefEntries(rep, tail.index, rep->tail_); rep->tail_ = tail.index; diff --git a/absl/strings/internal/cordz_info.cc b/absl/strings/internal/cordz_info.cc index 5c18bbc5..4d52a802 100644 --- a/absl/strings/internal/cordz_info.cc +++ b/absl/strings/internal/cordz_info.cc @@ -20,6 +20,7 @@ #include "absl/debugging/stacktrace.h" #include "absl/strings/internal/cord_internal.h" #include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_crc.h" #include "absl/strings/internal/cord_rep_ring.h" #include "absl/strings/internal/cordz_handle.h" #include "absl/strings/internal/cordz_statistics.h" @@ -81,6 +82,14 @@ class CordRepAnalyzer { size_t refcount = rep->refcount.Get(); RepRef repref{rep, (refcount > 1) ? refcount - 1 : 1}; + // Process the top level CRC node, if present. + if (repref.rep->tag == CRC) { + statistics_.node_count++; + statistics_.node_counts.crc++; + memory_usage_.Add(sizeof(CordRepCrc), repref.refcount); + repref = repref.Child(repref.rep->crc()->child); + } + // Process all top level linear nodes (substrings and flats). repref = CountLinearReps(repref, memory_usage_); diff --git a/absl/strings/internal/cordz_info_statistics_test.cc b/absl/strings/internal/cordz_info_statistics_test.cc index 7430d281..5277c3c1 100644 --- a/absl/strings/internal/cordz_info_statistics_test.cc +++ b/absl/strings/internal/cordz_info_statistics_test.cc @@ -22,6 +22,7 @@ #include "absl/strings/cord.h" #include "absl/strings/internal/cord_internal.h" #include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_crc.h" #include "absl/strings/internal/cord_rep_flat.h" #include "absl/strings/internal/cord_rep_ring.h" #include "absl/strings/internal/cordz_info.h" @@ -535,6 +536,27 @@ TEST(CordzInfoStatisticsTest, BtreeNodeShared) { EXPECT_THAT(SampleCord(tree), EqStatistics(expected)); } +TEST(CordzInfoStatisticsTest, Crc) { + RefHelper ref; + auto* left = Flat(1000); + auto* right = Flat(1000); + auto* concat = Concat(left, right); + auto* crc = ref.NeedsUnref(CordRepCrc::New(concat, 12345)); + + CordzStatistics expected; + expected.size = concat->length; + expected.estimated_memory_usage = + SizeOf(crc) + SizeOf(concat) + SizeOf(left) + SizeOf(right); + expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage; + expected.node_count = 4; + expected.node_counts.flat = 2; + expected.node_counts.flat_1k = 2; + expected.node_counts.concat = 1; + expected.node_counts.crc = 1; + + EXPECT_THAT(SampleCord(crc), EqStatistics(expected)); +} + TEST(CordzInfoStatisticsTest, ThreadSafety) { Notification stop; static constexpr int kNumThreads = 8; diff --git a/absl/strings/internal/cordz_statistics.h b/absl/strings/internal/cordz_statistics.h index da4c7dbb..57071905 100644 --- a/absl/strings/internal/cordz_statistics.h +++ b/absl/strings/internal/cordz_statistics.h @@ -41,6 +41,7 @@ struct CordzStatistics { size_t concat = 0; // #concat reps size_t ring = 0; // #ring buffer reps size_t btree = 0; // #btree reps + size_t crc = 0; // #crc reps }; // The size of the cord in bytes. This matches the result of Cord::size(). diff --git a/absl/strings/internal/cordz_update_tracker.h b/absl/strings/internal/cordz_update_tracker.h index 1f764486..7a41c180 100644 --- a/absl/strings/internal/cordz_update_tracker.h +++ b/absl/strings/internal/cordz_update_tracker.h @@ -60,6 +60,7 @@ class CordzUpdateTracker { kPrependString, kRemovePrefix, kRemoveSuffix, + kSetExpectedChecksum, kSubCord, // kNumMethods defines the number of entries: must be the last entry. diff --git a/absl/strings/internal/cordz_update_tracker_test.cc b/absl/strings/internal/cordz_update_tracker_test.cc index 2348a175..8eda529e 100644 --- a/absl/strings/internal/cordz_update_tracker_test.cc +++ b/absl/strings/internal/cordz_update_tracker_test.cc @@ -58,6 +58,7 @@ Methods AllMethods() { Method::kPrependString, Method::kRemovePrefix, Method::kRemoveSuffix, + Method::kSetExpectedChecksum, Method::kSubCord}; } |