summaryrefslogtreecommitdiff
path: root/absl/strings
diff options
context:
space:
mode:
authorGravatar Derek Mauro <dmauro@google.com>2022-10-28 11:21:06 -0700
committerGravatar Copybara-Service <copybara-worker@google.com>2022-10-28 11:22:03 -0700
commit1db72eb03e3685742c62abea9d13ddab2adcdf01 (patch)
treeb2e9c4182d1ba593b924152d24a8e9e1e8569c9e /absl/strings
parentfd9fbe74511fb2a060a3b15a1572b52fe16a6a43 (diff)
Support empty Cords with an expected checksum
PiperOrigin-RevId: 484578104 Change-Id: Ie4be3e4de27dc28d88395e16fd075fb10ab7a302
Diffstat (limited to 'absl/strings')
-rw-r--r--absl/strings/cord.cc47
-rw-r--r--absl/strings/cord.h29
-rw-r--r--absl/strings/cord_test.cc148
-rw-r--r--absl/strings/internal/cord_rep_crc.cc9
-rw-r--r--absl/strings/internal/cord_rep_crc.h2
-rw-r--r--absl/strings/internal/cord_rep_crc_test.cc15
6 files changed, 226 insertions, 24 deletions
diff --git a/absl/strings/cord.cc b/absl/strings/cord.cc
index 66f45fef..57082c5f 100644
--- a/absl/strings/cord.cc
+++ b/absl/strings/cord.cc
@@ -420,6 +420,7 @@ Cord& Cord::operator=(absl::string_view src) {
// we keep it here to make diffs easier.
void Cord::InlineRep::AppendArray(absl::string_view src,
MethodIdentifier method) {
+ MaybeRemoveEmptyCrcNode();
if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined.
size_t appended = 0;
@@ -479,6 +480,10 @@ inline CordRep* Cord::TakeRep() && {
template <typename C>
inline void Cord::AppendImpl(C&& src) {
auto constexpr method = CordzUpdateTracker::kAppendCord;
+
+ contents_.MaybeRemoveEmptyCrcNode();
+ if (src.empty()) return;
+
if (empty()) {
// Since destination is empty, we can avoid allocating a node,
if (src.contents_.is_tree()) {
@@ -591,6 +596,9 @@ void Cord::Append(T&& src) {
template void Cord::Append(std::string&& src);
void Cord::Prepend(const Cord& src) {
+ contents_.MaybeRemoveEmptyCrcNode();
+ if (src.empty()) return;
+
CordRep* src_tree = src.contents_.tree();
if (src_tree != nullptr) {
CordRep::Ref(src_tree);
@@ -605,7 +613,9 @@ void Cord::Prepend(const Cord& src) {
}
void Cord::PrependArray(absl::string_view src, MethodIdentifier method) {
+ contents_.MaybeRemoveEmptyCrcNode();
if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined.
+
if (!contents_.is_tree()) {
size_t cur_size = contents_.inline_size();
if (cur_size + src.size() <= InlineRep::kMaxInline) {
@@ -665,6 +675,7 @@ void Cord::RemovePrefix(size_t n) {
ABSL_INTERNAL_CHECK(n <= size(),
absl::StrCat("Requested prefix size ", n,
" exceeds Cord's size ", size()));
+ contents_.MaybeRemoveEmptyCrcNode();
CordRep* tree = contents_.tree();
if (tree == nullptr) {
contents_.remove_prefix(n);
@@ -695,6 +706,7 @@ void Cord::RemoveSuffix(size_t n) {
ABSL_INTERNAL_CHECK(n <= size(),
absl::StrCat("Requested suffix size ", n,
" exceeds Cord's size ", size()));
+ contents_.MaybeRemoveEmptyCrcNode();
CordRep* tree = contents_.tree();
if (tree == nullptr) {
contents_.reduce_size(n);
@@ -844,9 +856,10 @@ inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const {
void Cord::SetExpectedChecksum(uint32_t crc) {
auto constexpr method = CordzUpdateTracker::kSetExpectedChecksum;
- if (empty()) return;
-
- if (!contents_.is_tree()) {
+ if (empty()) {
+ CordRep* rep = CordRepCrc::New(nullptr, crc);
+ contents_.EmplaceTree(rep, method);
+ } else if (!contents_.is_tree()) {
CordRep* rep = contents_.MakeFlatWithExtraCapacity(0);
rep = CordRepCrc::New(rep, crc);
contents_.EmplaceTree(rep, method);
@@ -929,6 +942,7 @@ inline int Cord::CompareSlowPath(const Cord& rhs, size_t compared_size,
}
inline absl::string_view Cord::GetFirstChunk(const Cord& c) {
+ if (c.empty()) return {};
return c.contents_.FindFlatStartPiece();
}
inline absl::string_view Cord::GetFirstChunk(absl::string_view sv) {
@@ -1166,6 +1180,10 @@ absl::string_view Cord::FlattenSlowPath() {
/* static */ bool Cord::GetFlatAux(CordRep* rep, absl::string_view* fragment) {
assert(rep != nullptr);
+ if (rep->length == 0) {
+ *fragment = absl::string_view();
+ return true;
+ }
rep = cord_internal::SkipCrcNode(rep);
if (rep->IsFlat()) {
*fragment = absl::string_view(rep->flat()->Data(), rep->length);
@@ -1197,6 +1215,7 @@ absl::string_view Cord::FlattenSlowPath() {
absl::cord_internal::CordRep* rep,
absl::FunctionRef<void(absl::string_view)> callback) {
assert(rep != nullptr);
+ if (rep->length == 0) return;
rep = cord_internal::SkipCrcNode(rep);
if (rep->IsBtree()) {
@@ -1230,7 +1249,11 @@ static void DumpNode(CordRep* rep, bool include_data, std::ostream* os,
if (include_data) *os << static_cast<void*>(rep);
*os << "]";
*os << " " << std::setw(indent) << "";
- if (rep->IsCrc()) {
+ bool leaf = false;
+ if (rep == nullptr) {
+ *os << "NULL\n";
+ leaf = true;
+ } else if (rep->IsCrc()) {
*os << "CRC crc=" << rep->crc()->crc << "\n";
indent += kIndentStep;
rep = rep->crc()->child;
@@ -1239,6 +1262,7 @@ static void DumpNode(CordRep* rep, bool include_data, std::ostream* os,
indent += kIndentStep;
rep = rep->substring()->child;
} else { // Leaf or ring
+ leaf = true;
if (rep->IsExternal()) {
*os << "EXTERNAL [";
if (include_data)
@@ -1252,6 +1276,8 @@ static void DumpNode(CordRep* rep, bool include_data, std::ostream* os,
} else {
CordRepBtree::Dump(rep, /*label=*/ "", include_data, *os);
}
+ }
+ if (leaf) {
if (stack.empty()) break;
rep = stack.back();
stack.pop_back();
@@ -1297,11 +1323,14 @@ static bool VerifyNode(CordRep* root, CordRep* start_node,
node->substring()->child->length,
ReportError(root, node));
} else if (node->IsCrc()) {
- ABSL_INTERNAL_CHECK(node->crc()->child != nullptr,
- ReportError(root, node));
- ABSL_INTERNAL_CHECK(node->crc()->length == node->crc()->child->length,
- ReportError(root, node));
- worklist.push_back(node->crc()->child);
+ ABSL_INTERNAL_CHECK(
+ node->crc()->child != nullptr || node->crc()->length == 0,
+ ReportError(root, node));
+ if (node->crc()->child != nullptr) {
+ ABSL_INTERNAL_CHECK(node->crc()->length == node->crc()->child->length,
+ ReportError(root, node));
+ worklist.push_back(node->crc()->child);
+ }
}
} while (!worklist.empty());
return true;
diff --git a/absl/strings/cord.h b/absl/strings/cord.h
index 88e1c85d..6e3da89e 100644
--- a/absl/strings/cord.h
+++ b/absl/strings/cord.h
@@ -926,6 +926,13 @@ class Cord {
void set_inline_size(size_t size) { data_.set_inline_size(size); }
size_t inline_size() const { return data_.inline_size(); }
+ // Empty cords that carry a checksum have a CordRepCrc node with a null
+ // child node. The code can avoid lots of special cases where it would
+ // otherwise transition from tree to inline storage if we just remove the
+ // CordRepCrc node before mutations. Must never be called inside a
+ // CordzUpdateScope since it untracks the cordz info.
+ void MaybeRemoveEmptyCrcNode();
+
cord_internal::InlineData data_;
};
InlineRep contents_;
@@ -1236,6 +1243,18 @@ inline void Cord::InlineRep::CopyToArray(char* dst) const {
cord_internal::SmallMemmove(dst, data_.as_chars(), n);
}
+inline void Cord::InlineRep::MaybeRemoveEmptyCrcNode() {
+ CordRep* rep = tree();
+ if (rep == nullptr || ABSL_PREDICT_TRUE(rep->length > 0)) {
+ return;
+ }
+ assert(rep->IsCrc());
+ assert(rep->crc()->child == nullptr);
+ CordzInfo::MaybeUntrackCord(cordz_info());
+ CordRep::Unref(rep);
+ ResetToEmpty();
+}
+
constexpr inline Cord::Cord() noexcept {}
inline Cord::Cord(absl::string_view src)
@@ -1285,7 +1304,7 @@ inline size_t Cord::size() const {
return contents_.size();
}
-inline bool Cord::empty() const { return contents_.empty(); }
+inline bool Cord::empty() const { return size() == 0; }
inline size_t Cord::EstimatedMemoryUsage(
CordMemoryAccounting accounting_method) const {
@@ -1411,7 +1430,11 @@ inline Cord::ChunkIterator::ChunkIterator(cord_internal::CordRep* tree) {
inline Cord::ChunkIterator::ChunkIterator(const Cord* cord) {
if (CordRep* tree = cord->contents_.tree()) {
bytes_remaining_ = tree->length;
- InitTree(tree);
+ if (ABSL_PREDICT_TRUE(bytes_remaining_ != 0)) {
+ InitTree(tree);
+ } else {
+ current_chunk_ = {};
+ }
} else {
bytes_remaining_ = cord->contents_.inline_size();
current_chunk_ = {cord->contents_.data(), bytes_remaining_};
@@ -1580,7 +1603,7 @@ inline void Cord::ForEachChunk(
if (rep == nullptr) {
callback(absl::string_view(contents_.data(), contents_.size()));
} else {
- return ForEachChunkAux(rep, callback);
+ ForEachChunkAux(rep, callback);
}
}
diff --git a/absl/strings/cord_test.cc b/absl/strings/cord_test.cc
index 1fc4be6e..9a72f7be 100644
--- a/absl/strings/cord_test.cc
+++ b/absl/strings/cord_test.cc
@@ -1988,6 +1988,12 @@ TEST_P(CordTest, HugeCord) {
// Tests that Append() works ok when handed a self reference
TEST_P(CordTest, AppendSelf) {
+ // Test the empty case.
+ absl::Cord empty;
+ MaybeHarden(empty);
+ empty.Append(empty);
+ ASSERT_EQ(empty, "");
+
// We run the test until data is ~16K
// This guarantees it covers small, medium and large data.
std::string control_data = "Abc";
@@ -2712,7 +2718,7 @@ class CordMutator {
// clang-format off
// This array is constant-initialized in conformant compilers.
-CordMutator cord_mutators[] ={
+CordMutator cord_mutators[] = {
{"clear", [](absl::Cord& c) { c.Clear(); }},
{"overwrite", [](absl::Cord& c) { c = "overwritten"; }},
{
@@ -2742,6 +2748,25 @@ CordMutator cord_mutators[] ={
[](absl::Cord& c) { c.RemoveSuffix(c.size() / 2); }
},
{
+ "append empty string",
+ [](absl::Cord& c) { c.Append(""); },
+ [](absl::Cord& c) { }
+ },
+ {
+ "append empty cord",
+ [](absl::Cord& c) { c.Append(absl::Cord()); },
+ [](absl::Cord& c) { }
+ },
+ {
+ "append empty checksummed cord",
+ [](absl::Cord& c) {
+ absl::Cord to_append;
+ to_append.SetExpectedChecksum(999);
+ c.Append(to_append);
+ },
+ [](absl::Cord& c) { }
+ },
+ {
"prepend string",
[](absl::Cord& c) { c.Prepend("9876543210"); },
[](absl::Cord& c) { c.RemovePrefix(10); }
@@ -2763,12 +2788,33 @@ CordMutator cord_mutators[] ={
[](absl::Cord& c) { c.RemovePrefix(10); }
},
{
+ "prepend empty string",
+ [](absl::Cord& c) { c.Prepend(""); },
+ [](absl::Cord& c) { }
+ },
+ {
+ "prepend empty cord",
+ [](absl::Cord& c) { c.Prepend(absl::Cord()); },
+ [](absl::Cord& c) { }
+ },
+ {
+ "prepend empty checksummed cord",
+ [](absl::Cord& c) {
+ absl::Cord to_prepend;
+ to_prepend.SetExpectedChecksum(999);
+ c.Prepend(to_prepend);
+ },
+ [](absl::Cord& c) { }
+ },
+ {
"prepend self",
[](absl::Cord& c) { c.Prepend(c); },
[](absl::Cord& c) { c.RemovePrefix(c.size() / 2); }
},
- {"remove prefix", [](absl::Cord& c) { c.RemovePrefix(2); }},
- {"remove suffix", [](absl::Cord& c) { c.RemoveSuffix(2); }},
+ {"remove prefix", [](absl::Cord& c) { c.RemovePrefix(c.size() / 2); }},
+ {"remove suffix", [](absl::Cord& c) { c.RemoveSuffix(c.size() / 2); }},
+ {"remove 0-prefix", [](absl::Cord& c) { c.RemovePrefix(0); }},
+ {"remove 0-suffix", [](absl::Cord& c) { c.RemoveSuffix(0); }},
{"subcord", [](absl::Cord& c) { c = c.Subcord(1, c.size() - 2); }},
{
"swap inline",
@@ -2834,6 +2880,13 @@ TEST_P(CordTest, ExpectedChecksum) {
c2.SetExpectedChecksum(24680);
mutator.Mutate(c2);
+
+ if (c1 == c2) {
+ // Not a mutation (for example, appending the empty string).
+ // Whether the checksum is removed is not defined.
+ continue;
+ }
+
EXPECT_EQ(c2.ExpectedChecksum(), absl::nullopt);
if (mutator.CanUndo()) {
@@ -2903,3 +2956,92 @@ TEST_P(CordTest, ExpectedChecksum) {
}
}
}
+
+// Test the special cases encountered with an empty checksummed cord.
+TEST_P(CordTest, ChecksummedEmptyCord) {
+ absl::Cord c1;
+ EXPECT_FALSE(c1.ExpectedChecksum().has_value());
+
+ // Setting an expected checksum works.
+ c1.SetExpectedChecksum(12345);
+ EXPECT_EQ(c1.ExpectedChecksum().value_or(0), 12345);
+ EXPECT_EQ(c1, "");
+ EXPECT_TRUE(c1.empty());
+
+ // CRC persists through copies, assignments, and moves:
+ absl::Cord c1_copy_construct = c1;
+ EXPECT_EQ(c1_copy_construct.ExpectedChecksum().value_or(0), 12345);
+
+ absl::Cord c1_copy_assign;
+ c1_copy_assign = c1;
+ EXPECT_EQ(c1_copy_assign.ExpectedChecksum().value_or(0), 12345);
+
+ absl::Cord c1_move(std::move(c1_copy_assign));
+ EXPECT_EQ(c1_move.ExpectedChecksum().value_or(0), 12345);
+
+ EXPECT_EQ(c1.ExpectedChecksum().value_or(0), 12345);
+
+ // A CRC Cord compares equal to its non-CRC value.
+ EXPECT_EQ(c1, absl::Cord());
+
+ for (const CordMutator& mutator : cord_mutators) {
+ SCOPED_TRACE(mutator.Name());
+
+ // Exercise mutating an empty checksummed cord to catch crashes and exercise
+ // memory sanitizers.
+ absl::Cord c2;
+ c2.SetExpectedChecksum(24680);
+ mutator.Mutate(c2);
+
+ if (c2.empty()) {
+ // Not a mutation
+ continue;
+ }
+ EXPECT_EQ(c2.ExpectedChecksum(), absl::nullopt);
+
+ if (mutator.CanUndo()) {
+ mutator.Undo(c2);
+ }
+ }
+
+ absl::Cord c3;
+ c3.SetExpectedChecksum(999);
+ const absl::Cord& cc3 = c3;
+
+ // Test that all cord reading operations function in the face of an
+ // expected checksum.
+ EXPECT_TRUE(cc3.StartsWith(""));
+ EXPECT_TRUE(cc3.EndsWith(""));
+ EXPECT_TRUE(cc3.empty());
+ EXPECT_EQ(cc3, "");
+ EXPECT_EQ(cc3, absl::Cord());
+ EXPECT_EQ(cc3.size(), 0);
+ EXPECT_EQ(cc3.Compare(absl::Cord()), 0);
+ EXPECT_EQ(cc3.Compare(c1), 0);
+ EXPECT_EQ(cc3.Compare(cc3), 0);
+ EXPECT_EQ(cc3.Compare(""), 0);
+ EXPECT_EQ(cc3.Compare("wxyz"), -1);
+ EXPECT_EQ(cc3.Compare(absl::Cord("wxyz")), -1);
+ EXPECT_EQ(absl::Cord("wxyz").Compare(cc3), 1);
+ EXPECT_EQ(std::string(cc3), "");
+
+ std::string dest;
+ absl::CopyCordToString(cc3, &dest);
+ EXPECT_EQ(dest, "");
+
+ for (absl::string_view chunk : cc3.Chunks()) { // NOLINT(unreachable loop)
+ static_cast<void>(chunk);
+ GTEST_FAIL() << "no chunks expected";
+ }
+ EXPECT_TRUE(cc3.chunk_begin() == cc3.chunk_end());
+
+ for (char ch : cc3.Chars()) { // NOLINT(unreachable loop)
+ static_cast<void>(ch);
+ GTEST_FAIL() << "no chars expected";
+ }
+ EXPECT_TRUE(cc3.char_begin() == cc3.char_end());
+
+ EXPECT_EQ(cc3.TryFlat(), "");
+ EXPECT_EQ(absl::HashOf(c3), absl::HashOf(absl::Cord()));
+ EXPECT_EQ(absl::HashOf(c3), absl::HashOf(absl::string_view()));
+}
diff --git a/absl/strings/internal/cord_rep_crc.cc b/absl/strings/internal/cord_rep_crc.cc
index ee140354..7d7273ef 100644
--- a/absl/strings/internal/cord_rep_crc.cc
+++ b/absl/strings/internal/cord_rep_crc.cc
@@ -25,8 +25,7 @@ ABSL_NAMESPACE_BEGIN
namespace cord_internal {
CordRepCrc* CordRepCrc::New(CordRep* child, uint32_t crc) {
- assert(child != nullptr);
- if (child->IsCrc()) {
+ if (child != nullptr && child->IsCrc()) {
if (child->refcount.IsOne()) {
child->crc()->crc = crc;
return child->crc();
@@ -37,7 +36,7 @@ CordRepCrc* CordRepCrc::New(CordRep* child, uint32_t crc) {
CordRep::Unref(old);
}
auto* new_cordrep = new CordRepCrc;
- new_cordrep->length = child->length;
+ new_cordrep->length = child != nullptr ? child->length : 0;
new_cordrep->tag = cord_internal::CRC;
new_cordrep->child = child;
new_cordrep->crc = crc;
@@ -45,7 +44,9 @@ CordRepCrc* CordRepCrc::New(CordRep* child, uint32_t crc) {
}
void CordRepCrc::Destroy(CordRepCrc* node) {
- CordRep::Unref(node->child);
+ if (node->child != nullptr) {
+ CordRep::Unref(node->child);
+ }
delete node;
}
diff --git a/absl/strings/internal/cord_rep_crc.h b/absl/strings/internal/cord_rep_crc.h
index 5294b0d1..455a1127 100644
--- a/absl/strings/internal/cord_rep_crc.h
+++ b/absl/strings/internal/cord_rep_crc.h
@@ -40,7 +40,7 @@ struct CordRepCrc : public CordRep {
// If the specified `child` is itself a CordRepCrc node, then this method
// either replaces the existing node, or directly updates the crc value in it
// depending on the node being shared or not, i.e.: refcount.IsOne().
- // `child` must not be null. Never returns null.
+ // `child` must only be null if the Cord is empty. Never returns null.
static CordRepCrc* New(CordRep* child, uint32_t crc);
// Destroys (deletes) the provided node. `node` must not be null.
diff --git a/absl/strings/internal/cord_rep_crc_test.cc b/absl/strings/internal/cord_rep_crc_test.cc
index d73ea7b3..42a9110b 100644
--- a/absl/strings/internal/cord_rep_crc_test.cc
+++ b/absl/strings/internal/cord_rep_crc_test.cc
@@ -27,14 +27,11 @@ namespace {
using ::absl::cordrep_testing::MakeFlat;
using ::testing::Eq;
+using ::testing::IsNull;
using ::testing::Ne;
#if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST
-TEST(CordRepCrc, NewWithNullPtr) {
- EXPECT_DEATH(CordRepCrc::New(nullptr, 0), "");
-}
-
TEST(CordRepCrc, RemoveCrcWithNullptr) {
EXPECT_DEATH(RemoveCrcNode(nullptr), "");
}
@@ -82,6 +79,16 @@ TEST(CordRepCrc, NewExistingCrcShared) {
CordRep::Unref(new_crc);
}
+TEST(CordRepCrc, NewEmpty) {
+ CordRepCrc* crc = CordRepCrc::New(nullptr, 12345);
+ EXPECT_TRUE(crc->refcount.IsOne());
+ EXPECT_THAT(crc->child, IsNull());
+ EXPECT_THAT(crc->length, Eq(0u));
+ EXPECT_THAT(crc->crc, Eq(12345u));
+ EXPECT_TRUE(crc->refcount.IsOne());
+ CordRepCrc::Destroy(crc);
+}
+
TEST(CordRepCrc, RemoveCrcNotCrc) {
CordRep* rep = cordrep_testing::MakeFlat("Hello world");
CordRep* nocrc = RemoveCrcNode(rep);