summaryrefslogtreecommitdiff
path: root/absl/strings/cord.cc
diff options
context:
space:
mode:
authorGravatar Abseil Team <absl-team@google.com>2021-04-30 10:41:56 -0700
committerGravatar Derek Mauro <dmauro@google.com>2021-04-30 13:55:34 -0400
commit5dd240724366295970c613ed23d0092bcf392f18 (patch)
tree6f4f75dacce4a7e10f0fe354474fca849aaa446c /absl/strings/cord.cc
parentf14d5f4af12eb521a5142151d6ea3addbbed42bb (diff)
Export of internal Abseil changes
-- 60b8e77be4bab1bbd3b4c3b70054879229634511 by Derek Mauro <dmauro@google.com>: Use _MSVC_LANG for some C++ dialect checks since MSVC doesn't set __cplusplus accurately by default. https://devblogs.microsoft.com/cppblog/msvc-now-correctly-reports-__cplusplus/ See GitHub #722. PiperOrigin-RevId: 371362181 -- 5d736accdff04db0e722f377c0d79f2d3ed53263 by Martijn Vels <mvels@google.com>: Fix the estimated memory size for CordRepExternal PiperOrigin-RevId: 371350380 -- eaaa1d8a167aeca67a2aa3a098a2b61a9d72172f by Martijn Vels <mvels@google.com>: Remove flakes by not enforcing re-allocated pointers do never match original Tests that do multiple updates could end up with the original allocated pointer on a 2nd resize, so the 'EqIfPrivate' should not assume that if we do 'not' have the capacity that all following relocations will never match the original. We only care about 'pointer unchanged if private and there is capacity', trying to establish 'pointer changed at some point due to re-allocation; is pointless. PiperOrigin-RevId: 371338965 -- d1837bee6bade1902b095c1cbf64231668bb84c5 by Martijn Vels <mvels@google.com>: Undo inline of small data copy in cord This leads to a performance regression as the code is not inlined (absent hard FDO inputs), and there are no suitable tail call options. PiperOrigin-RevId: 371332332 -- 06dc64b833069efc7d18b11df607c8c22be690da by Martijn Vels <mvels@google.com>: Add final instrumentation for Cordz and remove 'old' cordz logic. This change instruments the last cord function for cordz. It removes the 'old' functions: set_tree, replace_tree, UpdateCordzStatistics and RecordMetrics. PiperOrigin-RevId: 371219909 -- a5e0be538579c603052feec03e6d9910c43ea787 by Martijn Vels <mvels@google.com>: Extend the life of CordRep* if inside a snapshot If a snapshot (potentially) includes the current CordzInfo, we need to extent the lifetime of the CordRep*, as the snapshot 'point in time' observation of the cord should ideally be preserved. PiperOrigin-RevId: 371146151 -- 74d77a89774cd6c8ecdeebee0193b294a39383d6 by Martijn Vels <mvels@google.com>: Instrument std::string consuming methods: ctor, operator=, Append and Prepend This change moves the 'steal into CordRep' logic into a separate function so we can use it directly in the ctor, operator assign and append and prepend, allowing Cordz instrumentation with the proper method attributes. The assign operator is implemented in AssignLargeString leaving the dispatch inlined in cord.h (which as a side effects also allows clean tail calls in the AssignLargeString method) PiperOrigin-RevId: 371094756 -- b39effc45266b7ce2e7f96caa3b16cb6e3acc2dd by Martijn Vels <mvels@google.com>: Add Cordz instrumentation to CordReader PiperOrigin-RevId: 370990181 GitOrigin-RevId: 60b8e77be4bab1bbd3b4c3b70054879229634511 Change-Id: I96af62e6f1a643e8b1228ae01e6c84e33706bb05
Diffstat (limited to 'absl/strings/cord.cc')
-rw-r--r--absl/strings/cord.cc154
1 files changed, 82 insertions, 72 deletions
diff --git a/absl/strings/cord.cc b/absl/strings/cord.cc
index 9262aee6..1c2ff9f2 100644
--- a/absl/strings/cord.cc
+++ b/absl/strings/cord.cc
@@ -56,6 +56,7 @@ using ::absl::cord_internal::CordRepExternal;
using ::absl::cord_internal::CordRepFlat;
using ::absl::cord_internal::CordRepRing;
using ::absl::cord_internal::CordRepSubstring;
+using ::absl::cord_internal::CordzUpdateTracker;
using ::absl::cord_internal::InlineData;
using ::absl::cord_internal::kMaxFlatLength;
using ::absl::cord_internal::kMinFlatLength;
@@ -281,6 +282,35 @@ static CordRep* NewSubstring(CordRep* child, size_t offset, size_t length) {
}
}
+// Creates a CordRep from the provided string. If the string is large enough,
+// and not wasteful, we move the string into an external cord rep, preserving
+// the already allocated string contents.
+// Requires the provided string length to be larger than `kMaxInline`.
+static CordRep* CordRepFromString(std::string&& src) {
+ assert(src.length() > cord_internal::kMaxInline);
+ if (
+ // String is short: copy data to avoid external block overhead.
+ src.size() <= kMaxBytesToCopy ||
+ // String is wasteful: copy data to avoid pinning too much unused memory.
+ src.size() < src.capacity() / 2
+ ) {
+ return NewTree(src.data(), src.size(), 0);
+ }
+
+ struct StringReleaser {
+ void operator()(absl::string_view /* data */) {}
+ std::string data;
+ };
+ const absl::string_view original_data = src;
+ auto* rep =
+ static_cast<::absl::cord_internal::CordRepExternalImpl<StringReleaser>*>(
+ absl::cord_internal::NewExternalRep(original_data,
+ StringReleaser{std::move(src)}));
+ // Moving src may have invalidated its data pointer, so adjust it.
+ rep->base = rep->template get<0>().data.data();
+ return rep;
+}
+
// --------------------------------------------------------------------
// Cord::InlineRep functions
@@ -486,17 +516,17 @@ static bool RepMemoryUsageLeaf(const CordRep* rep, size_t* total_mem_usage) {
return true;
}
if (rep->tag == EXTERNAL) {
- *total_mem_usage += sizeof(CordRepConcat) + rep->length;
+ // We don't know anything about the embedded / bound data, but we can safely
+ // assume it is 'at least' a word / pointer to data. In the future we may
+ // choose to use the 'data' byte as a tag to identify the types of some
+ // well-known externals, such as a std::string instance.
+ *total_mem_usage +=
+ sizeof(cord_internal::CordRepExternalImpl<intptr_t>) + rep->length;
return true;
}
return false;
}
-void Cord::InlineRep::UpdateCordzStatisticsSlow() {
- CordRep* tree = as_tree();
- data_.cordz_info()->RecordMetrics(tree->length);
-}
-
void Cord::InlineRep::AssignSlow(const Cord::InlineRep& src) {
assert(&src != this);
assert(is_tree() || src.is_tree());
@@ -525,42 +555,24 @@ void Cord::InlineRep::UnrefTree() {
// --------------------------------------------------------------------
// Constructors and destructors
-Cord::Cord(absl::string_view src) : contents_(InlineData::kDefaultInit) {
+Cord::Cord(absl::string_view src, MethodIdentifier method)
+ : contents_(InlineData::kDefaultInit) {
const size_t n = src.size();
if (n <= InlineRep::kMaxInline) {
contents_.set_data(src.data(), n, true);
} else {
CordRep* rep = NewTree(src.data(), n, 0);
- contents_.EmplaceTree(rep, CordzUpdateTracker::kConstructorString);
+ contents_.EmplaceTree(rep, method);
}
}
template <typename T, Cord::EnableIfString<T>>
-Cord::Cord(T&& src) {
- if (
- // String is short: copy data to avoid external block overhead.
- src.size() <= kMaxBytesToCopy ||
- // String is wasteful: copy data to avoid pinning too much unused memory.
- src.size() < src.capacity() / 2
- ) {
- if (src.size() <= InlineRep::kMaxInline) {
- contents_.set_data(src.data(), src.size(), false);
- } else {
- contents_.set_tree(NewTree(src.data(), src.size(), 0));
- }
+Cord::Cord(T&& src) : contents_(InlineData::kDefaultInit) {
+ if (src.size() <= InlineRep::kMaxInline) {
+ contents_.set_data(src.data(), src.size(), true);
} else {
- struct StringReleaser {
- void operator()(absl::string_view /* data */) {}
- std::string data;
- };
- const absl::string_view original_data = src;
- auto* rep = static_cast<
- ::absl::cord_internal::CordRepExternalImpl<StringReleaser>*>(
- absl::cord_internal::NewExternalRep(
- original_data, StringReleaser{std::forward<T>(src)}));
- // Moving src may have invalidated its data pointer, so adjust it.
- rep->base = rep->template get<0>().data.data();
- contents_.set_tree(rep);
+ CordRep* rep = CordRepFromString(std::forward<T>(src));
+ contents_.EmplaceTree(rep, CordzUpdateTracker::kConstructorString);
}
}
@@ -583,6 +595,20 @@ void Cord::Clear() {
}
}
+Cord& Cord::AssignLargeString(std::string&& src) {
+ auto constexpr method = CordzUpdateTracker::kAssignString;
+ assert(src.size() > kMaxBytesToCopy);
+ CordRep* rep = CordRepFromString(std::move(src));
+ if (CordRep* tree = contents_.tree()) {
+ CordzUpdateScope scope(contents_.cordz_info(), method);
+ contents_.SetTree(rep, scope);
+ CordRep::Unref(tree);
+ } else {
+ contents_.EmplaceTree(rep, method);
+ }
+ return *this;
+}
+
Cord& Cord::operator=(absl::string_view src) {
auto constexpr method = CordzUpdateTracker::kAssignString;
const char* data = src.data();
@@ -616,18 +642,6 @@ Cord& Cord::operator=(absl::string_view src) {
return *this;
}
-template <typename T, Cord::EnableIfString<T>>
-Cord& Cord::operator=(T&& src) {
- if (src.size() <= kMaxBytesToCopy) {
- *this = absl::string_view(src);
- } else {
- *this = Cord(std::forward<T>(src));
- }
- return *this;
-}
-
-template Cord& Cord::operator=(std::string&& src);
-
// TODO(sanjay): Move to Cord::InlineRep section of file. For now,
// we keep it here to make diffs easier.
void Cord::InlineRep::AppendArray(absl::string_view src,
@@ -653,10 +667,8 @@ void Cord::InlineRep::AppendArray(absl::string_view src,
return;
}
- // It is possible that src.data() == data_, but when we transition from an
- // InlineRep to a tree we need to assign data_ = root via set_tree. To
- // avoid corrupting the source data before we copy it, delay calling
- // set_tree until after we've copied data.
+ // Note: we don't concern ourselves if src aliases data stored in the
+ // inlined data of 'this', as we update the InlineData only at the end.
// We are going from an inline size to beyond inline size. Make the new size
// either double the inlined size, or the added size + 10%.
const size_t size1 = inline_length * 2 + src.size();
@@ -762,7 +774,8 @@ void Cord::Append(T&& src) {
if (src.size() <= kMaxBytesToCopy) {
Append(absl::string_view(src));
} else {
- Append(Cord(std::forward<T>(src)));
+ CordRep* rep = CordRepFromString(std::forward<T>(src));
+ contents_.AppendTree(rep, CordzUpdateTracker::kAppendString);
}
}
@@ -804,7 +817,8 @@ inline void Cord::Prepend(T&& src) {
if (src.size() <= kMaxBytesToCopy) {
Prepend(absl::string_view(src));
} else {
- Prepend(Cord(std::forward<T>(src)));
+ CordRep* rep = CordRepFromString(std::forward<T>(src));
+ contents_.PrependTree(rep, CordzUpdateTracker::kPrependString);
}
}
@@ -988,22 +1002,6 @@ static CordRep* NewSubRange(CordRep* node, size_t pos, size_t n) {
return results[0];
}
-void Cord::CopyDataAtPosition(size_t pos, size_t new_size, char* dest) const {
- assert(new_size <= cord_internal::kMaxInline);
- assert(pos <= size());
- assert(new_size <= size() - pos);
- Cord::ChunkIterator it = chunk_begin();
- it.AdvanceBytes(pos);
- size_t remaining_size = new_size;
- while (remaining_size > it->size()) {
- cord_internal::SmallMemmove(dest, it->data(), it->size());
- remaining_size -= it->size();
- dest += it->size();
- ++it;
- }
- cord_internal::SmallMemmove(dest, it->data(), remaining_size);
-}
-
Cord Cord::Subcord(size_t pos, size_t new_size) const {
Cord sub_cord;
size_t length = size();
@@ -1020,7 +1018,17 @@ Cord Cord::Subcord(size_t pos, size_t new_size) const {
}
if (new_size <= InlineRep::kMaxInline) {
- CopyDataAtPosition(pos, new_size, sub_cord.contents_.data_.as_chars());
+ char* dest = sub_cord.contents_.data_.as_chars();
+ Cord::ChunkIterator it = chunk_begin();
+ it.AdvanceBytes(pos);
+ size_t remaining_size = new_size;
+ while (remaining_size > it->size()) {
+ cord_internal::SmallMemmove(dest, it->data(), it->size());
+ remaining_size -= it->size();
+ dest += it->size();
+ ++it;
+ }
+ cord_internal::SmallMemmove(dest, it->data(), remaining_size);
sub_cord.contents_.set_inline_size(new_size);
return sub_cord;
}
@@ -1474,6 +1482,7 @@ Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) {
ABSL_HARDENING_ASSERT(bytes_remaining_ >= n &&
"Attempted to iterate past `end()`");
Cord subcord;
+ auto constexpr method = CordzUpdateTracker::kCordReader;
if (n <= InlineRep::kMaxInline) {
// Range to read fits in inline data. Flatten it.
@@ -1496,11 +1505,12 @@ Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) {
if (ring_reader_) {
size_t chunk_size = current_chunk_.size();
if (n <= chunk_size && n <= kMaxBytesToCopy) {
- subcord = Cord(current_chunk_.substr(0, n));
+ subcord = Cord(current_chunk_.substr(0, n), method);
} else {
auto* ring = CordRep::Ref(ring_reader_.ring())->ring();
size_t offset = ring_reader_.length() - bytes_remaining_;
- subcord.contents_.set_tree(CordRepRing::SubRing(ring, offset, n));
+ CordRep* rep = CordRepRing::SubRing(ring, offset, n);
+ subcord.contents_.EmplaceTree(rep, method);
}
if (n < chunk_size) {
bytes_remaining_ -= n;
@@ -1519,7 +1529,7 @@ Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) {
const char* data = subnode->tag == EXTERNAL ? subnode->external()->base
: subnode->flat()->Data();
subnode = NewSubstring(subnode, current_chunk_.data() - data, n);
- subcord.contents_.set_tree(VerifyTree(subnode));
+ subcord.contents_.EmplaceTree(VerifyTree(subnode), method);
RemoveChunkPrefix(n);
return subcord;
}
@@ -1562,7 +1572,7 @@ Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) {
if (node == nullptr) {
// We have reached the end of the Cord.
assert(bytes_remaining_ == 0);
- subcord.contents_.set_tree(VerifyTree(subnode));
+ subcord.contents_.EmplaceTree(VerifyTree(subnode), method);
return subcord;
}
@@ -1602,7 +1612,7 @@ Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) {
current_chunk_ = absl::string_view(data + offset + n, length - n);
current_leaf_ = node;
bytes_remaining_ -= n;
- subcord.contents_.set_tree(VerifyTree(subnode));
+ subcord.contents_.EmplaceTree(VerifyTree(subnode), method);
return subcord;
}