summaryrefslogtreecommitdiff
path: root/absl/strings/cord.cc
diff options
context:
space:
mode:
authorGravatar Abseil Team <absl-team@google.com>2022-02-17 12:07:55 -0800
committerGravatar vslashg <gfalcon@google.com>2022-02-17 16:24:45 -0500
commit7f850b3167fb38e6b4a9ce1824e6fabd733b5d62 (patch)
tree862004f447a80f89c31957cf3f254aec1aa4a5be /absl/strings/cord.cc
parentc2ef7033380a3d8661fee76465097422170fb653 (diff)
Export of internal Abseil changes
-- ed829ac612f090375427c3488827c6e74deb2e3f by Derek Mauro <dmauro@google.com>: Update latest GCC/Clang Linux tests to Bazel 5.0.0 and CMake 3.22.2 PiperOrigin-RevId: 429369775 -- 76952303c4d942288c4e7657ffb5893cec54a132 by Martijn Vels <mvels@google.com>: Optimize Cord::ChunkIterator now that CordRepConcat is removed PiperOrigin-RevId: 429321455 -- dcd0d287793649aba9b98268c5783e449a34749f by Martijn Vels <mvels@google.com>: Add IsDataEdge() and DataEdgeValue() helper functions. This moves repetitive logic accessing data edges into its own header, and more strongly defines the notion of what a data edge is, enforcing the internal invariants. This will also be incorporated in optimized Cord iteration logic once CordRepConcat is totally removed from the Cord code. PiperOrigin-RevId: 429307248 -- 6a0903962155988085bf8656743fda9c4cdcba6c by Abseil Team <absl-team@google.com>: Make it clear that the probability function given for the zipf distribution is unnormalized, i.e., sum(p(x) for x = 0..k) != 100%. Quoting Section 7 of the paper cited in the comments, where this formula comes from (emphasis mine): "We will consider the two parameter generalization as defined in Dagpunar [1988] with the *unnormalized* probability function ..." PiperOrigin-RevId: 429068258 -- 3899ff6d444ba755148bc521a6ee031d9e9d4485 by Abseil Team <absl-team@google.com>: Internal Changes PiperOrigin-RevId: 428644856 -- 319de702d2b537cbb76c4c71277ae89b349b162e by Benjamin Barenblat <bbaren@google.com>: Support symbolization on PA-RISC Null out supervisor bits in PA-RISC addresses before symbolizing, and handle function descriptor tables correctly. Change symbolize_test.cc to use 32-bit aligned addresses, allowing that test to pass on PA-RISC. PiperOrigin-RevId: 428590564 GitOrigin-RevId: ed829ac612f090375427c3488827c6e74deb2e3f Change-Id: Ie01ff3b9365fd45e5a55f858038552679f3180d3
Diffstat (limited to 'absl/strings/cord.cc')
-rw-r--r--absl/strings/cord.cc173
1 files changed, 24 insertions, 149 deletions
diff --git a/absl/strings/cord.cc b/absl/strings/cord.cc
index 4ee722da..6547c2da 100644
--- a/absl/strings/cord.cc
+++ b/absl/strings/cord.cc
@@ -35,6 +35,7 @@
#include "absl/container/fixed_array.h"
#include "absl/container/inlined_vector.h"
#include "absl/strings/escaping.h"
+#include "absl/strings/internal/cord_data_edge.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_crc.h"
@@ -1094,35 +1095,6 @@ void Cord::CopyToArraySlowPath(char* dst) const {
}
}
-Cord::ChunkIterator& Cord::ChunkIterator::AdvanceStack() {
- auto& stack_of_right_children = stack_of_right_children_;
- if (stack_of_right_children.empty()) {
- assert(!current_chunk_.empty()); // Called on invalid iterator.
- // We have reached the end of the Cord.
- return *this;
- }
-
- // Process the next node on the stack.
- CordRep* node = stack_of_right_children.back();
- stack_of_right_children.pop_back();
-
- // Get the child node if we encounter a SUBSTRING.
- size_t offset = 0;
- size_t length = node->length;
- if (node->IsSubstring()) {
- offset = node->substring()->start;
- node = node->substring()->child;
- }
-
- assert(node->IsExternal() || node->IsFlat());
- assert(length != 0);
- const char* data =
- node->IsExternal() ? node->external()->base : node->flat()->Data();
- current_chunk_ = absl::string_view(data + offset, length);
- current_leaf_ = node;
- return *this;
-}
-
Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) {
ABSL_HARDENING_ASSERT(bytes_remaining_ >= n &&
"Attempted to iterate past `end()`");
@@ -1165,133 +1137,36 @@ Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) {
return subcord;
}
- auto& stack_of_right_children = stack_of_right_children_;
- if (n < current_chunk_.size()) {
- // Range to read is a proper subrange of the current chunk.
- assert(current_leaf_ != nullptr);
- CordRep* subnode = CordRep::Ref(current_leaf_);
- const char* data = subnode->IsExternal() ? subnode->external()->base
- : subnode->flat()->Data();
- subnode = NewSubstring(subnode, current_chunk_.data() - data, n);
- subcord.contents_.EmplaceTree(VerifyTree(subnode), method);
- RemoveChunkPrefix(n);
- return subcord;
- }
-
- // Range to read begins with a proper subrange of the current chunk.
- assert(!current_chunk_.empty());
+ // Short circuit if reading the entire data edge.
assert(current_leaf_ != nullptr);
- CordRep* subnode = CordRep::Ref(current_leaf_);
- if (current_chunk_.size() < subnode->length) {
- const char* data = subnode->IsExternal() ? subnode->external()->base
- : subnode->flat()->Data();
- subnode = NewSubstring(subnode, current_chunk_.data() - data,
- current_chunk_.size());
- }
- n -= current_chunk_.size();
- bytes_remaining_ -= current_chunk_.size();
-
- // Process the next node(s) on the stack, reading whole subtrees depending on
- // their length and how many bytes we are advancing.
- CordRep* node = nullptr;
- while (!stack_of_right_children.empty()) {
- node = stack_of_right_children.back();
- stack_of_right_children.pop_back();
- if (node->length > n) break;
- // TODO(qrczak): This might unnecessarily recreate existing concat nodes.
- // Avoiding that would need pretty complicated logic (instead of
- // current_leaf, keep current_subtree_ which points to the highest node
- // such that the current leaf can be found on the path of left children
- // starting from current_subtree_; delay creating subnode while node is
- // below current_subtree_; find the proper node along the path of left
- // children starting from current_subtree_ if this loop exits while staying
- // below current_subtree_; etc.; alternatively, push parents instead of
- // right children on the stack).
- subnode = Concat(subnode, CordRep::Ref(node));
- n -= node->length;
- bytes_remaining_ -= node->length;
- node = nullptr;
- }
-
- if (node == nullptr) {
- // We have reached the end of the Cord.
- assert(bytes_remaining_ == 0);
- subcord.contents_.EmplaceTree(VerifyTree(subnode), method);
+ if (n == current_leaf_->length) {
+ bytes_remaining_ = 0;
+ current_chunk_ = {};
+ CordRep* tree = CordRep::Ref(current_leaf_);
+ subcord.contents_.EmplaceTree(VerifyTree(tree), method);
return subcord;
}
- // Get the child node if we encounter a SUBSTRING.
- size_t offset = 0;
- size_t length = node->length;
- if (node->IsSubstring()) {
- offset = node->substring()->start;
- node = node->substring()->child;
- }
-
- // Range to read ends with a proper (possibly empty) subrange of the current
- // chunk.
- assert(node->IsExternal() || node->IsFlat());
- assert(length > n);
- if (n > 0) {
- subnode = Concat(subnode, NewSubstring(CordRep::Ref(node), offset, n));
- }
- const char* data =
- node->IsExternal() ? node->external()->base : node->flat()->Data();
- current_chunk_ = absl::string_view(data + offset + n, length - n);
- current_leaf_ = node;
- bytes_remaining_ -= n;
- subcord.contents_.EmplaceTree(VerifyTree(subnode), method);
- return subcord;
-}
-
-void Cord::ChunkIterator::AdvanceBytesSlowPath(size_t n) {
- assert(bytes_remaining_ >= n && "Attempted to iterate past `end()`");
- assert(n >= current_chunk_.size()); // This should only be called when
- // iterating to a new node.
-
- n -= current_chunk_.size();
- bytes_remaining_ -= current_chunk_.size();
-
- if (stack_of_right_children_.empty()) {
- // We have reached the end of the Cord.
- assert(bytes_remaining_ == 0);
- return;
- }
-
- // Process the next node(s) on the stack, skipping whole subtrees depending on
- // their length and how many bytes we are advancing.
- CordRep* node = nullptr;
- auto& stack_of_right_children = stack_of_right_children_;
- while (!stack_of_right_children.empty()) {
- node = stack_of_right_children.back();
- stack_of_right_children.pop_back();
- if (node->length > n) break;
- n -= node->length;
- bytes_remaining_ -= node->length;
- node = nullptr;
- }
-
- if (node == nullptr) {
- // We have reached the end of the Cord.
- assert(bytes_remaining_ == 0);
- return;
- }
+ // From this point on, we need a partial substring node.
+ // Get pointer to the underlying flat or external data payload and
+ // compute data pointer and offset into current flat or external.
+ CordRep* payload = current_leaf_->IsSubstring()
+ ? current_leaf_->substring()->child
+ : current_leaf_;
+ const char* data = payload->IsExternal() ? payload->external()->base
+ : payload->flat()->Data();
+ const size_t offset = current_chunk_.data() - data;
- // Get the child node if we encounter a SUBSTRING.
- size_t offset = 0;
- size_t length = node->length;
- if (node->IsSubstring()) {
- offset = node->substring()->start;
- node = node->substring()->child;
- }
+ CordRepSubstring* tree = new CordRepSubstring();
+ tree->tag = cord_internal::SUBSTRING;
+ tree->length = n;
+ tree->start = offset;
+ tree->child = CordRep::Ref(payload);
- assert(node->IsExternal() || node->IsFlat());
- assert(length > n);
- const char* data =
- node->IsExternal() ? node->external()->base : node->flat()->Data();
- current_chunk_ = absl::string_view(data + offset + n, length - n);
- current_leaf_ = node;
+ subcord.contents_.EmplaceTree(VerifyTree(tree), method);
bytes_remaining_ -= n;
+ current_chunk_.remove_prefix(n);
+ return subcord;
}
char Cord::operator[](size_t i) const {