diff options
author | Abseil Team <absl-team@google.com> | 2022-02-17 12:07:55 -0800 |
---|---|---|
committer | vslashg <gfalcon@google.com> | 2022-02-17 16:24:45 -0500 |
commit | 7f850b3167fb38e6b4a9ce1824e6fabd733b5d62 (patch) | |
tree | 862004f447a80f89c31957cf3f254aec1aa4a5be /absl/strings/cord.cc | |
parent | c2ef7033380a3d8661fee76465097422170fb653 (diff) |
Export of internal Abseil changes
--
ed829ac612f090375427c3488827c6e74deb2e3f by Derek Mauro <dmauro@google.com>:
Update latest GCC/Clang Linux tests to Bazel 5.0.0 and CMake 3.22.2
PiperOrigin-RevId: 429369775
--
76952303c4d942288c4e7657ffb5893cec54a132 by Martijn Vels <mvels@google.com>:
Optimize Cord::ChunkIterator now that CordRepConcat is removed
PiperOrigin-RevId: 429321455
--
dcd0d287793649aba9b98268c5783e449a34749f by Martijn Vels <mvels@google.com>:
Add IsDataEdge() and DataEdgeValue() helper functions.
This moves repetitive logic accessing data edges into its own header, and more strongly defines the notion of what a data edge is, enforcing the internal invariants. This will also be incorporated in optimized Cord iteration logic once CordRepConcat is totally removed from the Cord code.
PiperOrigin-RevId: 429307248
--
6a0903962155988085bf8656743fda9c4cdcba6c by Abseil Team <absl-team@google.com>:
Make it clear that the probability function given for the zipf distribution is unnormalized, i.e., sum(p(x) for x = 0..k) != 100%.
Quoting Section 7 of the paper cited in the comments, where this formula comes from (emphasis mine): "We will consider the two parameter generalization as defined in Dagpunar [1988] with the *unnormalized* probability function ..."
PiperOrigin-RevId: 429068258
--
3899ff6d444ba755148bc521a6ee031d9e9d4485 by Abseil Team <absl-team@google.com>:
Internal Changes
PiperOrigin-RevId: 428644856
--
319de702d2b537cbb76c4c71277ae89b349b162e by Benjamin Barenblat <bbaren@google.com>:
Support symbolization on PA-RISC
Null out supervisor bits in PA-RISC addresses before symbolizing, and
handle function descriptor tables correctly.
Change symbolize_test.cc to use 32-bit aligned addresses, allowing that
test to pass on PA-RISC.
PiperOrigin-RevId: 428590564
GitOrigin-RevId: ed829ac612f090375427c3488827c6e74deb2e3f
Change-Id: Ie01ff3b9365fd45e5a55f858038552679f3180d3
Diffstat (limited to 'absl/strings/cord.cc')
-rw-r--r-- | absl/strings/cord.cc | 173 |
1 files changed, 24 insertions, 149 deletions
diff --git a/absl/strings/cord.cc b/absl/strings/cord.cc index 4ee722da..6547c2da 100644 --- a/absl/strings/cord.cc +++ b/absl/strings/cord.cc @@ -35,6 +35,7 @@ #include "absl/container/fixed_array.h" #include "absl/container/inlined_vector.h" #include "absl/strings/escaping.h" +#include "absl/strings/internal/cord_data_edge.h" #include "absl/strings/internal/cord_internal.h" #include "absl/strings/internal/cord_rep_btree.h" #include "absl/strings/internal/cord_rep_crc.h" @@ -1094,35 +1095,6 @@ void Cord::CopyToArraySlowPath(char* dst) const { } } -Cord::ChunkIterator& Cord::ChunkIterator::AdvanceStack() { - auto& stack_of_right_children = stack_of_right_children_; - if (stack_of_right_children.empty()) { - assert(!current_chunk_.empty()); // Called on invalid iterator. - // We have reached the end of the Cord. - return *this; - } - - // Process the next node on the stack. - CordRep* node = stack_of_right_children.back(); - stack_of_right_children.pop_back(); - - // Get the child node if we encounter a SUBSTRING. - size_t offset = 0; - size_t length = node->length; - if (node->IsSubstring()) { - offset = node->substring()->start; - node = node->substring()->child; - } - - assert(node->IsExternal() || node->IsFlat()); - assert(length != 0); - const char* data = - node->IsExternal() ? node->external()->base : node->flat()->Data(); - current_chunk_ = absl::string_view(data + offset, length); - current_leaf_ = node; - return *this; -} - Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) { ABSL_HARDENING_ASSERT(bytes_remaining_ >= n && "Attempted to iterate past `end()`"); @@ -1165,133 +1137,36 @@ Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) { return subcord; } - auto& stack_of_right_children = stack_of_right_children_; - if (n < current_chunk_.size()) { - // Range to read is a proper subrange of the current chunk. - assert(current_leaf_ != nullptr); - CordRep* subnode = CordRep::Ref(current_leaf_); - const char* data = subnode->IsExternal() ? subnode->external()->base - : subnode->flat()->Data(); - subnode = NewSubstring(subnode, current_chunk_.data() - data, n); - subcord.contents_.EmplaceTree(VerifyTree(subnode), method); - RemoveChunkPrefix(n); - return subcord; - } - - // Range to read begins with a proper subrange of the current chunk. - assert(!current_chunk_.empty()); + // Short circuit if reading the entire data edge. assert(current_leaf_ != nullptr); - CordRep* subnode = CordRep::Ref(current_leaf_); - if (current_chunk_.size() < subnode->length) { - const char* data = subnode->IsExternal() ? subnode->external()->base - : subnode->flat()->Data(); - subnode = NewSubstring(subnode, current_chunk_.data() - data, - current_chunk_.size()); - } - n -= current_chunk_.size(); - bytes_remaining_ -= current_chunk_.size(); - - // Process the next node(s) on the stack, reading whole subtrees depending on - // their length and how many bytes we are advancing. - CordRep* node = nullptr; - while (!stack_of_right_children.empty()) { - node = stack_of_right_children.back(); - stack_of_right_children.pop_back(); - if (node->length > n) break; - // TODO(qrczak): This might unnecessarily recreate existing concat nodes. - // Avoiding that would need pretty complicated logic (instead of - // current_leaf, keep current_subtree_ which points to the highest node - // such that the current leaf can be found on the path of left children - // starting from current_subtree_; delay creating subnode while node is - // below current_subtree_; find the proper node along the path of left - // children starting from current_subtree_ if this loop exits while staying - // below current_subtree_; etc.; alternatively, push parents instead of - // right children on the stack). - subnode = Concat(subnode, CordRep::Ref(node)); - n -= node->length; - bytes_remaining_ -= node->length; - node = nullptr; - } - - if (node == nullptr) { - // We have reached the end of the Cord. - assert(bytes_remaining_ == 0); - subcord.contents_.EmplaceTree(VerifyTree(subnode), method); + if (n == current_leaf_->length) { + bytes_remaining_ = 0; + current_chunk_ = {}; + CordRep* tree = CordRep::Ref(current_leaf_); + subcord.contents_.EmplaceTree(VerifyTree(tree), method); return subcord; } - // Get the child node if we encounter a SUBSTRING. - size_t offset = 0; - size_t length = node->length; - if (node->IsSubstring()) { - offset = node->substring()->start; - node = node->substring()->child; - } - - // Range to read ends with a proper (possibly empty) subrange of the current - // chunk. - assert(node->IsExternal() || node->IsFlat()); - assert(length > n); - if (n > 0) { - subnode = Concat(subnode, NewSubstring(CordRep::Ref(node), offset, n)); - } - const char* data = - node->IsExternal() ? node->external()->base : node->flat()->Data(); - current_chunk_ = absl::string_view(data + offset + n, length - n); - current_leaf_ = node; - bytes_remaining_ -= n; - subcord.contents_.EmplaceTree(VerifyTree(subnode), method); - return subcord; -} - -void Cord::ChunkIterator::AdvanceBytesSlowPath(size_t n) { - assert(bytes_remaining_ >= n && "Attempted to iterate past `end()`"); - assert(n >= current_chunk_.size()); // This should only be called when - // iterating to a new node. - - n -= current_chunk_.size(); - bytes_remaining_ -= current_chunk_.size(); - - if (stack_of_right_children_.empty()) { - // We have reached the end of the Cord. - assert(bytes_remaining_ == 0); - return; - } - - // Process the next node(s) on the stack, skipping whole subtrees depending on - // their length and how many bytes we are advancing. - CordRep* node = nullptr; - auto& stack_of_right_children = stack_of_right_children_; - while (!stack_of_right_children.empty()) { - node = stack_of_right_children.back(); - stack_of_right_children.pop_back(); - if (node->length > n) break; - n -= node->length; - bytes_remaining_ -= node->length; - node = nullptr; - } - - if (node == nullptr) { - // We have reached the end of the Cord. - assert(bytes_remaining_ == 0); - return; - } + // From this point on, we need a partial substring node. + // Get pointer to the underlying flat or external data payload and + // compute data pointer and offset into current flat or external. + CordRep* payload = current_leaf_->IsSubstring() + ? current_leaf_->substring()->child + : current_leaf_; + const char* data = payload->IsExternal() ? payload->external()->base + : payload->flat()->Data(); + const size_t offset = current_chunk_.data() - data; - // Get the child node if we encounter a SUBSTRING. - size_t offset = 0; - size_t length = node->length; - if (node->IsSubstring()) { - offset = node->substring()->start; - node = node->substring()->child; - } + CordRepSubstring* tree = new CordRepSubstring(); + tree->tag = cord_internal::SUBSTRING; + tree->length = n; + tree->start = offset; + tree->child = CordRep::Ref(payload); - assert(node->IsExternal() || node->IsFlat()); - assert(length > n); - const char* data = - node->IsExternal() ? node->external()->base : node->flat()->Data(); - current_chunk_ = absl::string_view(data + offset + n, length - n); - current_leaf_ = node; + subcord.contents_.EmplaceTree(VerifyTree(tree), method); bytes_remaining_ -= n; + current_chunk_.remove_prefix(n); + return subcord; } char Cord::operator[](size_t i) const { |