diff options
author | Derek Mauro <dmauro@google.com> | 2022-10-28 11:21:06 -0700 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2022-10-28 11:22:03 -0700 |
commit | 1db72eb03e3685742c62abea9d13ddab2adcdf01 (patch) | |
tree | b2e9c4182d1ba593b924152d24a8e9e1e8569c9e /absl/strings | |
parent | fd9fbe74511fb2a060a3b15a1572b52fe16a6a43 (diff) |
Support empty Cords with an expected checksum
PiperOrigin-RevId: 484578104
Change-Id: Ie4be3e4de27dc28d88395e16fd075fb10ab7a302
Diffstat (limited to 'absl/strings')
-rw-r--r-- | absl/strings/cord.cc | 47 | ||||
-rw-r--r-- | absl/strings/cord.h | 29 | ||||
-rw-r--r-- | absl/strings/cord_test.cc | 148 | ||||
-rw-r--r-- | absl/strings/internal/cord_rep_crc.cc | 9 | ||||
-rw-r--r-- | absl/strings/internal/cord_rep_crc.h | 2 | ||||
-rw-r--r-- | absl/strings/internal/cord_rep_crc_test.cc | 15 |
6 files changed, 226 insertions, 24 deletions
diff --git a/absl/strings/cord.cc b/absl/strings/cord.cc index 66f45fef..57082c5f 100644 --- a/absl/strings/cord.cc +++ b/absl/strings/cord.cc @@ -420,6 +420,7 @@ Cord& Cord::operator=(absl::string_view src) { // we keep it here to make diffs easier. void Cord::InlineRep::AppendArray(absl::string_view src, MethodIdentifier method) { + MaybeRemoveEmptyCrcNode(); if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined. size_t appended = 0; @@ -479,6 +480,10 @@ inline CordRep* Cord::TakeRep() && { template <typename C> inline void Cord::AppendImpl(C&& src) { auto constexpr method = CordzUpdateTracker::kAppendCord; + + contents_.MaybeRemoveEmptyCrcNode(); + if (src.empty()) return; + if (empty()) { // Since destination is empty, we can avoid allocating a node, if (src.contents_.is_tree()) { @@ -591,6 +596,9 @@ void Cord::Append(T&& src) { template void Cord::Append(std::string&& src); void Cord::Prepend(const Cord& src) { + contents_.MaybeRemoveEmptyCrcNode(); + if (src.empty()) return; + CordRep* src_tree = src.contents_.tree(); if (src_tree != nullptr) { CordRep::Ref(src_tree); @@ -605,7 +613,9 @@ void Cord::Prepend(const Cord& src) { } void Cord::PrependArray(absl::string_view src, MethodIdentifier method) { + contents_.MaybeRemoveEmptyCrcNode(); if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined. + if (!contents_.is_tree()) { size_t cur_size = contents_.inline_size(); if (cur_size + src.size() <= InlineRep::kMaxInline) { @@ -665,6 +675,7 @@ void Cord::RemovePrefix(size_t n) { ABSL_INTERNAL_CHECK(n <= size(), absl::StrCat("Requested prefix size ", n, " exceeds Cord's size ", size())); + contents_.MaybeRemoveEmptyCrcNode(); CordRep* tree = contents_.tree(); if (tree == nullptr) { contents_.remove_prefix(n); @@ -695,6 +706,7 @@ void Cord::RemoveSuffix(size_t n) { ABSL_INTERNAL_CHECK(n <= size(), absl::StrCat("Requested suffix size ", n, " exceeds Cord's size ", size())); + contents_.MaybeRemoveEmptyCrcNode(); CordRep* tree = contents_.tree(); if (tree == nullptr) { contents_.reduce_size(n); @@ -844,9 +856,10 @@ inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const { void Cord::SetExpectedChecksum(uint32_t crc) { auto constexpr method = CordzUpdateTracker::kSetExpectedChecksum; - if (empty()) return; - - if (!contents_.is_tree()) { + if (empty()) { + CordRep* rep = CordRepCrc::New(nullptr, crc); + contents_.EmplaceTree(rep, method); + } else if (!contents_.is_tree()) { CordRep* rep = contents_.MakeFlatWithExtraCapacity(0); rep = CordRepCrc::New(rep, crc); contents_.EmplaceTree(rep, method); @@ -929,6 +942,7 @@ inline int Cord::CompareSlowPath(const Cord& rhs, size_t compared_size, } inline absl::string_view Cord::GetFirstChunk(const Cord& c) { + if (c.empty()) return {}; return c.contents_.FindFlatStartPiece(); } inline absl::string_view Cord::GetFirstChunk(absl::string_view sv) { @@ -1166,6 +1180,10 @@ absl::string_view Cord::FlattenSlowPath() { /* static */ bool Cord::GetFlatAux(CordRep* rep, absl::string_view* fragment) { assert(rep != nullptr); + if (rep->length == 0) { + *fragment = absl::string_view(); + return true; + } rep = cord_internal::SkipCrcNode(rep); if (rep->IsFlat()) { *fragment = absl::string_view(rep->flat()->Data(), rep->length); @@ -1197,6 +1215,7 @@ absl::string_view Cord::FlattenSlowPath() { absl::cord_internal::CordRep* rep, absl::FunctionRef<void(absl::string_view)> callback) { assert(rep != nullptr); + if (rep->length == 0) return; rep = cord_internal::SkipCrcNode(rep); if (rep->IsBtree()) { @@ -1230,7 +1249,11 @@ static void DumpNode(CordRep* rep, bool include_data, std::ostream* os, if (include_data) *os << static_cast<void*>(rep); *os << "]"; *os << " " << std::setw(indent) << ""; - if (rep->IsCrc()) { + bool leaf = false; + if (rep == nullptr) { + *os << "NULL\n"; + leaf = true; + } else if (rep->IsCrc()) { *os << "CRC crc=" << rep->crc()->crc << "\n"; indent += kIndentStep; rep = rep->crc()->child; @@ -1239,6 +1262,7 @@ static void DumpNode(CordRep* rep, bool include_data, std::ostream* os, indent += kIndentStep; rep = rep->substring()->child; } else { // Leaf or ring + leaf = true; if (rep->IsExternal()) { *os << "EXTERNAL ["; if (include_data) @@ -1252,6 +1276,8 @@ static void DumpNode(CordRep* rep, bool include_data, std::ostream* os, } else { CordRepBtree::Dump(rep, /*label=*/ "", include_data, *os); } + } + if (leaf) { if (stack.empty()) break; rep = stack.back(); stack.pop_back(); @@ -1297,11 +1323,14 @@ static bool VerifyNode(CordRep* root, CordRep* start_node, node->substring()->child->length, ReportError(root, node)); } else if (node->IsCrc()) { - ABSL_INTERNAL_CHECK(node->crc()->child != nullptr, - ReportError(root, node)); - ABSL_INTERNAL_CHECK(node->crc()->length == node->crc()->child->length, - ReportError(root, node)); - worklist.push_back(node->crc()->child); + ABSL_INTERNAL_CHECK( + node->crc()->child != nullptr || node->crc()->length == 0, + ReportError(root, node)); + if (node->crc()->child != nullptr) { + ABSL_INTERNAL_CHECK(node->crc()->length == node->crc()->child->length, + ReportError(root, node)); + worklist.push_back(node->crc()->child); + } } } while (!worklist.empty()); return true; diff --git a/absl/strings/cord.h b/absl/strings/cord.h index 88e1c85d..6e3da89e 100644 --- a/absl/strings/cord.h +++ b/absl/strings/cord.h @@ -926,6 +926,13 @@ class Cord { void set_inline_size(size_t size) { data_.set_inline_size(size); } size_t inline_size() const { return data_.inline_size(); } + // Empty cords that carry a checksum have a CordRepCrc node with a null + // child node. The code can avoid lots of special cases where it would + // otherwise transition from tree to inline storage if we just remove the + // CordRepCrc node before mutations. Must never be called inside a + // CordzUpdateScope since it untracks the cordz info. + void MaybeRemoveEmptyCrcNode(); + cord_internal::InlineData data_; }; InlineRep contents_; @@ -1236,6 +1243,18 @@ inline void Cord::InlineRep::CopyToArray(char* dst) const { cord_internal::SmallMemmove(dst, data_.as_chars(), n); } +inline void Cord::InlineRep::MaybeRemoveEmptyCrcNode() { + CordRep* rep = tree(); + if (rep == nullptr || ABSL_PREDICT_TRUE(rep->length > 0)) { + return; + } + assert(rep->IsCrc()); + assert(rep->crc()->child == nullptr); + CordzInfo::MaybeUntrackCord(cordz_info()); + CordRep::Unref(rep); + ResetToEmpty(); +} + constexpr inline Cord::Cord() noexcept {} inline Cord::Cord(absl::string_view src) @@ -1285,7 +1304,7 @@ inline size_t Cord::size() const { return contents_.size(); } -inline bool Cord::empty() const { return contents_.empty(); } +inline bool Cord::empty() const { return size() == 0; } inline size_t Cord::EstimatedMemoryUsage( CordMemoryAccounting accounting_method) const { @@ -1411,7 +1430,11 @@ inline Cord::ChunkIterator::ChunkIterator(cord_internal::CordRep* tree) { inline Cord::ChunkIterator::ChunkIterator(const Cord* cord) { if (CordRep* tree = cord->contents_.tree()) { bytes_remaining_ = tree->length; - InitTree(tree); + if (ABSL_PREDICT_TRUE(bytes_remaining_ != 0)) { + InitTree(tree); + } else { + current_chunk_ = {}; + } } else { bytes_remaining_ = cord->contents_.inline_size(); current_chunk_ = {cord->contents_.data(), bytes_remaining_}; @@ -1580,7 +1603,7 @@ inline void Cord::ForEachChunk( if (rep == nullptr) { callback(absl::string_view(contents_.data(), contents_.size())); } else { - return ForEachChunkAux(rep, callback); + ForEachChunkAux(rep, callback); } } diff --git a/absl/strings/cord_test.cc b/absl/strings/cord_test.cc index 1fc4be6e..9a72f7be 100644 --- a/absl/strings/cord_test.cc +++ b/absl/strings/cord_test.cc @@ -1988,6 +1988,12 @@ TEST_P(CordTest, HugeCord) { // Tests that Append() works ok when handed a self reference TEST_P(CordTest, AppendSelf) { + // Test the empty case. + absl::Cord empty; + MaybeHarden(empty); + empty.Append(empty); + ASSERT_EQ(empty, ""); + // We run the test until data is ~16K // This guarantees it covers small, medium and large data. std::string control_data = "Abc"; @@ -2712,7 +2718,7 @@ class CordMutator { // clang-format off // This array is constant-initialized in conformant compilers. -CordMutator cord_mutators[] ={ +CordMutator cord_mutators[] = { {"clear", [](absl::Cord& c) { c.Clear(); }}, {"overwrite", [](absl::Cord& c) { c = "overwritten"; }}, { @@ -2742,6 +2748,25 @@ CordMutator cord_mutators[] ={ [](absl::Cord& c) { c.RemoveSuffix(c.size() / 2); } }, { + "append empty string", + [](absl::Cord& c) { c.Append(""); }, + [](absl::Cord& c) { } + }, + { + "append empty cord", + [](absl::Cord& c) { c.Append(absl::Cord()); }, + [](absl::Cord& c) { } + }, + { + "append empty checksummed cord", + [](absl::Cord& c) { + absl::Cord to_append; + to_append.SetExpectedChecksum(999); + c.Append(to_append); + }, + [](absl::Cord& c) { } + }, + { "prepend string", [](absl::Cord& c) { c.Prepend("9876543210"); }, [](absl::Cord& c) { c.RemovePrefix(10); } @@ -2763,12 +2788,33 @@ CordMutator cord_mutators[] ={ [](absl::Cord& c) { c.RemovePrefix(10); } }, { + "prepend empty string", + [](absl::Cord& c) { c.Prepend(""); }, + [](absl::Cord& c) { } + }, + { + "prepend empty cord", + [](absl::Cord& c) { c.Prepend(absl::Cord()); }, + [](absl::Cord& c) { } + }, + { + "prepend empty checksummed cord", + [](absl::Cord& c) { + absl::Cord to_prepend; + to_prepend.SetExpectedChecksum(999); + c.Prepend(to_prepend); + }, + [](absl::Cord& c) { } + }, + { "prepend self", [](absl::Cord& c) { c.Prepend(c); }, [](absl::Cord& c) { c.RemovePrefix(c.size() / 2); } }, - {"remove prefix", [](absl::Cord& c) { c.RemovePrefix(2); }}, - {"remove suffix", [](absl::Cord& c) { c.RemoveSuffix(2); }}, + {"remove prefix", [](absl::Cord& c) { c.RemovePrefix(c.size() / 2); }}, + {"remove suffix", [](absl::Cord& c) { c.RemoveSuffix(c.size() / 2); }}, + {"remove 0-prefix", [](absl::Cord& c) { c.RemovePrefix(0); }}, + {"remove 0-suffix", [](absl::Cord& c) { c.RemoveSuffix(0); }}, {"subcord", [](absl::Cord& c) { c = c.Subcord(1, c.size() - 2); }}, { "swap inline", @@ -2834,6 +2880,13 @@ TEST_P(CordTest, ExpectedChecksum) { c2.SetExpectedChecksum(24680); mutator.Mutate(c2); + + if (c1 == c2) { + // Not a mutation (for example, appending the empty string). + // Whether the checksum is removed is not defined. + continue; + } + EXPECT_EQ(c2.ExpectedChecksum(), absl::nullopt); if (mutator.CanUndo()) { @@ -2903,3 +2956,92 @@ TEST_P(CordTest, ExpectedChecksum) { } } } + +// Test the special cases encountered with an empty checksummed cord. +TEST_P(CordTest, ChecksummedEmptyCord) { + absl::Cord c1; + EXPECT_FALSE(c1.ExpectedChecksum().has_value()); + + // Setting an expected checksum works. + c1.SetExpectedChecksum(12345); + EXPECT_EQ(c1.ExpectedChecksum().value_or(0), 12345); + EXPECT_EQ(c1, ""); + EXPECT_TRUE(c1.empty()); + + // CRC persists through copies, assignments, and moves: + absl::Cord c1_copy_construct = c1; + EXPECT_EQ(c1_copy_construct.ExpectedChecksum().value_or(0), 12345); + + absl::Cord c1_copy_assign; + c1_copy_assign = c1; + EXPECT_EQ(c1_copy_assign.ExpectedChecksum().value_or(0), 12345); + + absl::Cord c1_move(std::move(c1_copy_assign)); + EXPECT_EQ(c1_move.ExpectedChecksum().value_or(0), 12345); + + EXPECT_EQ(c1.ExpectedChecksum().value_or(0), 12345); + + // A CRC Cord compares equal to its non-CRC value. + EXPECT_EQ(c1, absl::Cord()); + + for (const CordMutator& mutator : cord_mutators) { + SCOPED_TRACE(mutator.Name()); + + // Exercise mutating an empty checksummed cord to catch crashes and exercise + // memory sanitizers. + absl::Cord c2; + c2.SetExpectedChecksum(24680); + mutator.Mutate(c2); + + if (c2.empty()) { + // Not a mutation + continue; + } + EXPECT_EQ(c2.ExpectedChecksum(), absl::nullopt); + + if (mutator.CanUndo()) { + mutator.Undo(c2); + } + } + + absl::Cord c3; + c3.SetExpectedChecksum(999); + const absl::Cord& cc3 = c3; + + // Test that all cord reading operations function in the face of an + // expected checksum. + EXPECT_TRUE(cc3.StartsWith("")); + EXPECT_TRUE(cc3.EndsWith("")); + EXPECT_TRUE(cc3.empty()); + EXPECT_EQ(cc3, ""); + EXPECT_EQ(cc3, absl::Cord()); + EXPECT_EQ(cc3.size(), 0); + EXPECT_EQ(cc3.Compare(absl::Cord()), 0); + EXPECT_EQ(cc3.Compare(c1), 0); + EXPECT_EQ(cc3.Compare(cc3), 0); + EXPECT_EQ(cc3.Compare(""), 0); + EXPECT_EQ(cc3.Compare("wxyz"), -1); + EXPECT_EQ(cc3.Compare(absl::Cord("wxyz")), -1); + EXPECT_EQ(absl::Cord("wxyz").Compare(cc3), 1); + EXPECT_EQ(std::string(cc3), ""); + + std::string dest; + absl::CopyCordToString(cc3, &dest); + EXPECT_EQ(dest, ""); + + for (absl::string_view chunk : cc3.Chunks()) { // NOLINT(unreachable loop) + static_cast<void>(chunk); + GTEST_FAIL() << "no chunks expected"; + } + EXPECT_TRUE(cc3.chunk_begin() == cc3.chunk_end()); + + for (char ch : cc3.Chars()) { // NOLINT(unreachable loop) + static_cast<void>(ch); + GTEST_FAIL() << "no chars expected"; + } + EXPECT_TRUE(cc3.char_begin() == cc3.char_end()); + + EXPECT_EQ(cc3.TryFlat(), ""); + EXPECT_EQ(absl::HashOf(c3), absl::HashOf(absl::Cord())); + EXPECT_EQ(absl::HashOf(c3), absl::HashOf(absl::string_view())); +} diff --git a/absl/strings/internal/cord_rep_crc.cc b/absl/strings/internal/cord_rep_crc.cc index ee140354..7d7273ef 100644 --- a/absl/strings/internal/cord_rep_crc.cc +++ b/absl/strings/internal/cord_rep_crc.cc @@ -25,8 +25,7 @@ ABSL_NAMESPACE_BEGIN namespace cord_internal { CordRepCrc* CordRepCrc::New(CordRep* child, uint32_t crc) { - assert(child != nullptr); - if (child->IsCrc()) { + if (child != nullptr && child->IsCrc()) { if (child->refcount.IsOne()) { child->crc()->crc = crc; return child->crc(); @@ -37,7 +36,7 @@ CordRepCrc* CordRepCrc::New(CordRep* child, uint32_t crc) { CordRep::Unref(old); } auto* new_cordrep = new CordRepCrc; - new_cordrep->length = child->length; + new_cordrep->length = child != nullptr ? child->length : 0; new_cordrep->tag = cord_internal::CRC; new_cordrep->child = child; new_cordrep->crc = crc; @@ -45,7 +44,9 @@ CordRepCrc* CordRepCrc::New(CordRep* child, uint32_t crc) { } void CordRepCrc::Destroy(CordRepCrc* node) { - CordRep::Unref(node->child); + if (node->child != nullptr) { + CordRep::Unref(node->child); + } delete node; } diff --git a/absl/strings/internal/cord_rep_crc.h b/absl/strings/internal/cord_rep_crc.h index 5294b0d1..455a1127 100644 --- a/absl/strings/internal/cord_rep_crc.h +++ b/absl/strings/internal/cord_rep_crc.h @@ -40,7 +40,7 @@ struct CordRepCrc : public CordRep { // If the specified `child` is itself a CordRepCrc node, then this method // either replaces the existing node, or directly updates the crc value in it // depending on the node being shared or not, i.e.: refcount.IsOne(). - // `child` must not be null. Never returns null. + // `child` must only be null if the Cord is empty. Never returns null. static CordRepCrc* New(CordRep* child, uint32_t crc); // Destroys (deletes) the provided node. `node` must not be null. diff --git a/absl/strings/internal/cord_rep_crc_test.cc b/absl/strings/internal/cord_rep_crc_test.cc index d73ea7b3..42a9110b 100644 --- a/absl/strings/internal/cord_rep_crc_test.cc +++ b/absl/strings/internal/cord_rep_crc_test.cc @@ -27,14 +27,11 @@ namespace { using ::absl::cordrep_testing::MakeFlat; using ::testing::Eq; +using ::testing::IsNull; using ::testing::Ne; #if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST -TEST(CordRepCrc, NewWithNullPtr) { - EXPECT_DEATH(CordRepCrc::New(nullptr, 0), ""); -} - TEST(CordRepCrc, RemoveCrcWithNullptr) { EXPECT_DEATH(RemoveCrcNode(nullptr), ""); } @@ -82,6 +79,16 @@ TEST(CordRepCrc, NewExistingCrcShared) { CordRep::Unref(new_crc); } +TEST(CordRepCrc, NewEmpty) { + CordRepCrc* crc = CordRepCrc::New(nullptr, 12345); + EXPECT_TRUE(crc->refcount.IsOne()); + EXPECT_THAT(crc->child, IsNull()); + EXPECT_THAT(crc->length, Eq(0u)); + EXPECT_THAT(crc->crc, Eq(12345u)); + EXPECT_TRUE(crc->refcount.IsOne()); + CordRepCrc::Destroy(crc); +} + TEST(CordRepCrc, RemoveCrcNotCrc) { CordRep* rep = cordrep_testing::MakeFlat("Hello world"); CordRep* nocrc = RemoveCrcNode(rep); |