summaryrefslogtreecommitdiff
path: root/absl/strings
diff options
context:
space:
mode:
authorGravatar Abseil Team <absl-team@google.com>2022-02-17 12:07:55 -0800
committerGravatar vslashg <gfalcon@google.com>2022-02-17 16:24:45 -0500
commit7f850b3167fb38e6b4a9ce1824e6fabd733b5d62 (patch)
tree862004f447a80f89c31957cf3f254aec1aa4a5be /absl/strings
parentc2ef7033380a3d8661fee76465097422170fb653 (diff)
Export of internal Abseil changes
-- ed829ac612f090375427c3488827c6e74deb2e3f by Derek Mauro <dmauro@google.com>: Update latest GCC/Clang Linux tests to Bazel 5.0.0 and CMake 3.22.2 PiperOrigin-RevId: 429369775 -- 76952303c4d942288c4e7657ffb5893cec54a132 by Martijn Vels <mvels@google.com>: Optimize Cord::ChunkIterator now that CordRepConcat is removed PiperOrigin-RevId: 429321455 -- dcd0d287793649aba9b98268c5783e449a34749f by Martijn Vels <mvels@google.com>: Add IsDataEdge() and DataEdgeValue() helper functions. This moves repetitive logic accessing data edges into its own header, and more strongly defines the notion of what a data edge is, enforcing the internal invariants. This will also be incorporated in optimized Cord iteration logic once CordRepConcat is totally removed from the Cord code. PiperOrigin-RevId: 429307248 -- 6a0903962155988085bf8656743fda9c4cdcba6c by Abseil Team <absl-team@google.com>: Make it clear that the probability function given for the zipf distribution is unnormalized, i.e., sum(p(x) for x = 0..k) != 100%. Quoting Section 7 of the paper cited in the comments, where this formula comes from (emphasis mine): "We will consider the two parameter generalization as defined in Dagpunar [1988] with the *unnormalized* probability function ..." PiperOrigin-RevId: 429068258 -- 3899ff6d444ba755148bc521a6ee031d9e9d4485 by Abseil Team <absl-team@google.com>: Internal Changes PiperOrigin-RevId: 428644856 -- 319de702d2b537cbb76c4c71277ae89b349b162e by Benjamin Barenblat <bbaren@google.com>: Support symbolization on PA-RISC Null out supervisor bits in PA-RISC addresses before symbolizing, and handle function descriptor tables correctly. Change symbolize_test.cc to use 32-bit aligned addresses, allowing that test to pass on PA-RISC. PiperOrigin-RevId: 428590564 GitOrigin-RevId: ed829ac612f090375427c3488827c6e74deb2e3f Change-Id: Ie01ff3b9365fd45e5a55f858038552679f3180d3
Diffstat (limited to 'absl/strings')
-rw-r--r--absl/strings/BUILD.bazel16
-rw-r--r--absl/strings/CMakeLists.txt18
-rw-r--r--absl/strings/cord.cc173
-rw-r--r--absl/strings/cord.h56
-rw-r--r--absl/strings/cord_analysis.cc14
-rw-r--r--absl/strings/cord_test.cc72
-rw-r--r--absl/strings/internal/cord_data_edge.h63
-rw-r--r--absl/strings/internal/cord_data_edge_test.cc130
-rw-r--r--absl/strings/internal/cord_rep_btree.cc7
-rw-r--r--absl/strings/internal/cord_rep_btree.h31
-rw-r--r--absl/strings/internal/cord_rep_btree_navigator.cc3
-rw-r--r--absl/strings/internal/cord_rep_btree_reader.cc5
-rw-r--r--absl/strings/internal/cord_rep_btree_reader.h9
-rw-r--r--absl/strings/internal/cord_rep_btree_test.cc33
14 files changed, 380 insertions, 250 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel
index 129affec..5f122ee9 100644
--- a/absl/strings/BUILD.bazel
+++ b/absl/strings/BUILD.bazel
@@ -276,6 +276,7 @@ cc_library(
"internal/cord_rep_ring.cc",
],
hdrs = [
+ "internal/cord_data_edge.h",
"internal/cord_internal.h",
"internal/cord_rep_btree.h",
"internal/cord_rep_btree_navigator.h",
@@ -308,6 +309,21 @@ cc_library(
)
cc_test(
+ name = "cord_data_edge_test",
+ size = "small",
+ srcs = ["internal/cord_data_edge_test.cc"],
+ copts = ABSL_TEST_COPTS,
+ visibility = ["//visibility:private"],
+ deps = [
+ ":cord_internal",
+ ":cord_rep_test_util",
+ ":strings",
+ "//absl/base:config",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
name = "cord_rep_btree_test",
size = "medium",
srcs = ["internal/cord_rep_btree_test.cc"],
diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt
index f31eef4d..5d418c86 100644
--- a/absl/strings/CMakeLists.txt
+++ b/absl/strings/CMakeLists.txt
@@ -554,6 +554,7 @@ absl_cc_library(
NAME
cord_internal
HDRS
+ "internal/cord_data_edge.h"
"internal/cord_internal.h"
"internal/cord_rep_btree.h"
"internal/cord_rep_btree_navigator.h"
@@ -934,6 +935,23 @@ absl_cc_test(
absl_cc_test(
NAME
+ cord_data_edge_test
+ SRCS
+ "internal/cord_data_edge_test.cc"
+ COPTS
+ ${ABSL_TEST_COPTS}
+ DEPS
+ absl::base
+ absl::config
+ absl::cord_internal
+ absl::cord_rep_test_util
+ absl::core_headers
+ absl::strings
+ GTest::gmock_main
+)
+
+absl_cc_test(
+ NAME
cord_rep_btree_test
SRCS
"internal/cord_rep_btree_test.cc"
diff --git a/absl/strings/cord.cc b/absl/strings/cord.cc
index 4ee722da..6547c2da 100644
--- a/absl/strings/cord.cc
+++ b/absl/strings/cord.cc
@@ -35,6 +35,7 @@
#include "absl/container/fixed_array.h"
#include "absl/container/inlined_vector.h"
#include "absl/strings/escaping.h"
+#include "absl/strings/internal/cord_data_edge.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_crc.h"
@@ -1094,35 +1095,6 @@ void Cord::CopyToArraySlowPath(char* dst) const {
}
}
-Cord::ChunkIterator& Cord::ChunkIterator::AdvanceStack() {
- auto& stack_of_right_children = stack_of_right_children_;
- if (stack_of_right_children.empty()) {
- assert(!current_chunk_.empty()); // Called on invalid iterator.
- // We have reached the end of the Cord.
- return *this;
- }
-
- // Process the next node on the stack.
- CordRep* node = stack_of_right_children.back();
- stack_of_right_children.pop_back();
-
- // Get the child node if we encounter a SUBSTRING.
- size_t offset = 0;
- size_t length = node->length;
- if (node->IsSubstring()) {
- offset = node->substring()->start;
- node = node->substring()->child;
- }
-
- assert(node->IsExternal() || node->IsFlat());
- assert(length != 0);
- const char* data =
- node->IsExternal() ? node->external()->base : node->flat()->Data();
- current_chunk_ = absl::string_view(data + offset, length);
- current_leaf_ = node;
- return *this;
-}
-
Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) {
ABSL_HARDENING_ASSERT(bytes_remaining_ >= n &&
"Attempted to iterate past `end()`");
@@ -1165,133 +1137,36 @@ Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) {
return subcord;
}
- auto& stack_of_right_children = stack_of_right_children_;
- if (n < current_chunk_.size()) {
- // Range to read is a proper subrange of the current chunk.
- assert(current_leaf_ != nullptr);
- CordRep* subnode = CordRep::Ref(current_leaf_);
- const char* data = subnode->IsExternal() ? subnode->external()->base
- : subnode->flat()->Data();
- subnode = NewSubstring(subnode, current_chunk_.data() - data, n);
- subcord.contents_.EmplaceTree(VerifyTree(subnode), method);
- RemoveChunkPrefix(n);
- return subcord;
- }
-
- // Range to read begins with a proper subrange of the current chunk.
- assert(!current_chunk_.empty());
+ // Short circuit if reading the entire data edge.
assert(current_leaf_ != nullptr);
- CordRep* subnode = CordRep::Ref(current_leaf_);
- if (current_chunk_.size() < subnode->length) {
- const char* data = subnode->IsExternal() ? subnode->external()->base
- : subnode->flat()->Data();
- subnode = NewSubstring(subnode, current_chunk_.data() - data,
- current_chunk_.size());
- }
- n -= current_chunk_.size();
- bytes_remaining_ -= current_chunk_.size();
-
- // Process the next node(s) on the stack, reading whole subtrees depending on
- // their length and how many bytes we are advancing.
- CordRep* node = nullptr;
- while (!stack_of_right_children.empty()) {
- node = stack_of_right_children.back();
- stack_of_right_children.pop_back();
- if (node->length > n) break;
- // TODO(qrczak): This might unnecessarily recreate existing concat nodes.
- // Avoiding that would need pretty complicated logic (instead of
- // current_leaf, keep current_subtree_ which points to the highest node
- // such that the current leaf can be found on the path of left children
- // starting from current_subtree_; delay creating subnode while node is
- // below current_subtree_; find the proper node along the path of left
- // children starting from current_subtree_ if this loop exits while staying
- // below current_subtree_; etc.; alternatively, push parents instead of
- // right children on the stack).
- subnode = Concat(subnode, CordRep::Ref(node));
- n -= node->length;
- bytes_remaining_ -= node->length;
- node = nullptr;
- }
-
- if (node == nullptr) {
- // We have reached the end of the Cord.
- assert(bytes_remaining_ == 0);
- subcord.contents_.EmplaceTree(VerifyTree(subnode), method);
+ if (n == current_leaf_->length) {
+ bytes_remaining_ = 0;
+ current_chunk_ = {};
+ CordRep* tree = CordRep::Ref(current_leaf_);
+ subcord.contents_.EmplaceTree(VerifyTree(tree), method);
return subcord;
}
- // Get the child node if we encounter a SUBSTRING.
- size_t offset = 0;
- size_t length = node->length;
- if (node->IsSubstring()) {
- offset = node->substring()->start;
- node = node->substring()->child;
- }
-
- // Range to read ends with a proper (possibly empty) subrange of the current
- // chunk.
- assert(node->IsExternal() || node->IsFlat());
- assert(length > n);
- if (n > 0) {
- subnode = Concat(subnode, NewSubstring(CordRep::Ref(node), offset, n));
- }
- const char* data =
- node->IsExternal() ? node->external()->base : node->flat()->Data();
- current_chunk_ = absl::string_view(data + offset + n, length - n);
- current_leaf_ = node;
- bytes_remaining_ -= n;
- subcord.contents_.EmplaceTree(VerifyTree(subnode), method);
- return subcord;
-}
-
-void Cord::ChunkIterator::AdvanceBytesSlowPath(size_t n) {
- assert(bytes_remaining_ >= n && "Attempted to iterate past `end()`");
- assert(n >= current_chunk_.size()); // This should only be called when
- // iterating to a new node.
-
- n -= current_chunk_.size();
- bytes_remaining_ -= current_chunk_.size();
-
- if (stack_of_right_children_.empty()) {
- // We have reached the end of the Cord.
- assert(bytes_remaining_ == 0);
- return;
- }
-
- // Process the next node(s) on the stack, skipping whole subtrees depending on
- // their length and how many bytes we are advancing.
- CordRep* node = nullptr;
- auto& stack_of_right_children = stack_of_right_children_;
- while (!stack_of_right_children.empty()) {
- node = stack_of_right_children.back();
- stack_of_right_children.pop_back();
- if (node->length > n) break;
- n -= node->length;
- bytes_remaining_ -= node->length;
- node = nullptr;
- }
-
- if (node == nullptr) {
- // We have reached the end of the Cord.
- assert(bytes_remaining_ == 0);
- return;
- }
+ // From this point on, we need a partial substring node.
+ // Get pointer to the underlying flat or external data payload and
+ // compute data pointer and offset into current flat or external.
+ CordRep* payload = current_leaf_->IsSubstring()
+ ? current_leaf_->substring()->child
+ : current_leaf_;
+ const char* data = payload->IsExternal() ? payload->external()->base
+ : payload->flat()->Data();
+ const size_t offset = current_chunk_.data() - data;
- // Get the child node if we encounter a SUBSTRING.
- size_t offset = 0;
- size_t length = node->length;
- if (node->IsSubstring()) {
- offset = node->substring()->start;
- node = node->substring()->child;
- }
+ CordRepSubstring* tree = new CordRepSubstring();
+ tree->tag = cord_internal::SUBSTRING;
+ tree->length = n;
+ tree->start = offset;
+ tree->child = CordRep::Ref(payload);
- assert(node->IsExternal() || node->IsFlat());
- assert(length > n);
- const char* data =
- node->IsExternal() ? node->external()->base : node->flat()->Data();
- current_chunk_ = absl::string_view(data + offset + n, length - n);
- current_leaf_ = node;
+ subcord.contents_.EmplaceTree(VerifyTree(tree), method);
bytes_remaining_ -= n;
+ current_chunk_.remove_prefix(n);
+ return subcord;
}
char Cord::operator[](size_t i) const {
diff --git a/absl/strings/cord.h b/absl/strings/cord.h
index 7f34ef48..081b6311 100644
--- a/absl/strings/cord.h
+++ b/absl/strings/cord.h
@@ -80,6 +80,7 @@
#include "absl/functional/function_ref.h"
#include "absl/meta/type_traits.h"
#include "absl/strings/cord_analysis.h"
+#include "absl/strings/internal/cord_data_edge.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_btree_reader.h"
@@ -388,12 +389,6 @@ class Cord {
using CordRepBtree = absl::cord_internal::CordRepBtree;
using CordRepBtreeReader = absl::cord_internal::CordRepBtreeReader;
- // Stack of right children of concat nodes that we have to visit.
- // Keep this at the end of the structure to avoid cache-thrashing.
- // TODO(jgm): Benchmark to see if there's a more optimal value than 47 for
- // the inlined vector size (47 exists for backward compatibility).
- using Stack = absl::InlinedVector<absl::cord_internal::CordRep*, 47>;
-
// Constructs a `begin()` iterator from `tree`. `tree` must not be null.
explicit ChunkIterator(cord_internal::CordRep* tree);
@@ -409,17 +404,10 @@ class Cord {
Cord AdvanceAndReadBytes(size_t n);
void AdvanceBytes(size_t n);
- // Stack specific operator++
- ChunkIterator& AdvanceStack();
-
// Btree specific operator++
ChunkIterator& AdvanceBtree();
void AdvanceBytesBtree(size_t n);
- // Iterates `n` bytes, where `n` is expected to be greater than or equal to
- // `current_chunk_.size()`.
- void AdvanceBytesSlowPath(size_t n);
-
// A view into bytes of the current `CordRep`. It may only be a view to a
// suffix of bytes if this is being used by `CharIterator`.
absl::string_view current_chunk_;
@@ -432,9 +420,6 @@ class Cord {
// Cord reader for cord btrees. Empty if not traversing a btree.
CordRepBtreeReader btree_reader_;
-
- // See 'Stack' alias definition.
- Stack stack_of_right_children_;
};
// Cord::ChunkIterator::chunk_begin()
@@ -725,7 +710,8 @@ class Cord {
// be used by spelling absl::strings_internal::MakeStringConstant, which is
// also an internal API.
template <typename T>
- explicit constexpr Cord(strings_internal::StringConstant<T>);
+ // NOLINTNEXTLINE(google-explicit-constructor)
+ constexpr Cord(strings_internal::StringConstant<T>);
private:
using CordRep = absl::cord_internal::CordRep;
@@ -1321,25 +1307,24 @@ inline void Cord::ChunkIterator::InitTree(cord_internal::CordRep* tree) {
tree = cord_internal::SkipCrcNode(tree);
if (tree->tag == cord_internal::BTREE) {
current_chunk_ = btree_reader_.Init(tree->btree());
- return;
+ } else {
+ current_leaf_ = tree;
+ current_chunk_ = cord_internal::EdgeData(tree);
}
-
- stack_of_right_children_.push_back(tree);
- operator++();
}
-inline Cord::ChunkIterator::ChunkIterator(cord_internal::CordRep* tree)
- : bytes_remaining_(tree->length) {
+inline Cord::ChunkIterator::ChunkIterator(cord_internal::CordRep* tree) {
+ bytes_remaining_ = tree->length;
InitTree(tree);
}
-inline Cord::ChunkIterator::ChunkIterator(const Cord* cord)
- : bytes_remaining_(cord->size()) {
- if (cord->contents_.is_tree()) {
- InitTree(cord->contents_.as_tree());
+inline Cord::ChunkIterator::ChunkIterator(const Cord* cord) {
+ if (CordRep* tree = cord->contents_.tree()) {
+ bytes_remaining_ = tree->length;
+ InitTree(tree);
} else {
- current_chunk_ =
- absl::string_view(cord->contents_.data(), bytes_remaining_);
+ bytes_remaining_ = cord->contents_.inline_size();
+ current_chunk_ = {cord->contents_.data(), bytes_remaining_};
}
}
@@ -1369,8 +1354,11 @@ inline Cord::ChunkIterator& Cord::ChunkIterator::operator++() {
assert(bytes_remaining_ >= current_chunk_.size());
bytes_remaining_ -= current_chunk_.size();
if (bytes_remaining_ > 0) {
- return btree_reader_ ? AdvanceBtree() : AdvanceStack();
- } else {
+ if (btree_reader_) {
+ return AdvanceBtree();
+ } else {
+ assert(!current_chunk_.empty()); // Called on invalid iterator.
+ }
current_chunk_ = {};
}
return *this;
@@ -1411,7 +1399,11 @@ inline void Cord::ChunkIterator::AdvanceBytes(size_t n) {
if (ABSL_PREDICT_TRUE(n < current_chunk_.size())) {
RemoveChunkPrefix(n);
} else if (n != 0) {
- btree_reader_ ? AdvanceBytesBtree(n) : AdvanceBytesSlowPath(n);
+ if (btree_reader_) {
+ AdvanceBytesBtree(n);
+ } else {
+ bytes_remaining_ = 0;
+ }
}
}
diff --git a/absl/strings/cord_analysis.cc b/absl/strings/cord_analysis.cc
index 3fa15b01..73d3c4e6 100644
--- a/absl/strings/cord_analysis.cc
+++ b/absl/strings/cord_analysis.cc
@@ -12,13 +12,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+#include "absl/strings/cord_analysis.h"
+
#include <cstddef>
#include <cstdint>
#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/container/inlined_vector.h"
-#include "absl/strings/cord_analysis.h"
+#include "absl/strings/internal/cord_data_edge.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_crc.h"
@@ -98,16 +100,6 @@ struct RawUsage<Mode::kFairShare> {
}
};
-// Returns true if the provided rep is a valid data edge.
-bool IsDataEdge(const CordRep* rep) {
- // The fast path is that `rep` is an EXTERNAL or FLAT node, making the below
- // if a single, well predicted branch. We then repeat the FLAT or EXTERNAL
- // check in the slow path the SUBSTRING check to optimize for the hot path.
- if (rep->tag == EXTERNAL || rep->tag >= FLAT) return true;
- if (rep->tag == SUBSTRING) rep = rep->substring()->child;
- return rep->tag == EXTERNAL || rep->tag >= FLAT;
-}
-
// Computes the estimated memory size of the provided data edge.
// External reps are assumed 'heap allocated at their exact size'.
template <Mode mode>
diff --git a/absl/strings/cord_test.cc b/absl/strings/cord_test.cc
index c26e506d..9dcc4ce5 100644
--- a/absl/strings/cord_test.cc
+++ b/absl/strings/cord_test.cc
@@ -1866,6 +1866,78 @@ TEST_P(CordTest, CordChunkIteratorOperations) {
VerifyChunkIterator(subcords, 128);
}
+
+TEST_P(CordTest, AdvanceAndReadOnDataEdge) {
+ RandomEngine rng(GTEST_FLAG_GET(random_seed));
+ const std::string data = RandomLowercaseString(&rng, 2000);
+ for (bool as_flat : {true, false}) {
+ SCOPED_TRACE(as_flat ? "Flat" : "External");
+
+ absl::Cord cord =
+ as_flat ? absl::Cord(data)
+ : absl::MakeCordFromExternal(data, [](absl::string_view) {});
+ auto it = cord.Chars().begin();
+#if !defined(NDEBUG) || ABSL_OPTION_HARDENED
+ EXPECT_DEATH_IF_SUPPORTED(cord.AdvanceAndRead(&it, 2001), ".*");
+#endif
+
+ it = cord.Chars().begin();
+ absl::Cord frag = cord.AdvanceAndRead(&it, 2000);
+ EXPECT_EQ(frag, data);
+ EXPECT_TRUE(it == cord.Chars().end());
+
+ it = cord.Chars().begin();
+ frag = cord.AdvanceAndRead(&it, 200);
+ EXPECT_EQ(frag, data.substr(0, 200));
+ EXPECT_FALSE(it == cord.Chars().end());
+
+ frag = cord.AdvanceAndRead(&it, 1500);
+ EXPECT_EQ(frag, data.substr(200, 1500));
+ EXPECT_FALSE(it == cord.Chars().end());
+
+ frag = cord.AdvanceAndRead(&it, 300);
+ EXPECT_EQ(frag, data.substr(1700, 300));
+ EXPECT_TRUE(it == cord.Chars().end());
+ }
+}
+
+TEST_P(CordTest, AdvanceAndReadOnSubstringDataEdge) {
+ RandomEngine rng(GTEST_FLAG_GET(random_seed));
+ const std::string data = RandomLowercaseString(&rng, 2500);
+ for (bool as_flat : {true, false}) {
+ SCOPED_TRACE(as_flat ? "Flat" : "External");
+
+ absl::Cord cord =
+ as_flat ? absl::Cord(data)
+ : absl::MakeCordFromExternal(data, [](absl::string_view) {});
+ cord = cord.Subcord(200, 2000);
+ const std::string substr = data.substr(200, 2000);
+
+ auto it = cord.Chars().begin();
+#if !defined(NDEBUG) || ABSL_OPTION_HARDENED
+ EXPECT_DEATH_IF_SUPPORTED(cord.AdvanceAndRead(&it, 2001), ".*");
+#endif
+
+ it = cord.Chars().begin();
+ absl::Cord frag = cord.AdvanceAndRead(&it, 2000);
+ EXPECT_EQ(frag, substr);
+ EXPECT_TRUE(it == cord.Chars().end());
+
+ it = cord.Chars().begin();
+ frag = cord.AdvanceAndRead(&it, 200);
+ EXPECT_EQ(frag, substr.substr(0, 200));
+ EXPECT_FALSE(it == cord.Chars().end());
+
+ frag = cord.AdvanceAndRead(&it, 1500);
+ EXPECT_EQ(frag, substr.substr(200, 1500));
+ EXPECT_FALSE(it == cord.Chars().end());
+
+ frag = cord.AdvanceAndRead(&it, 300);
+ EXPECT_EQ(frag, substr.substr(1700, 300));
+ EXPECT_TRUE(it == cord.Chars().end());
+ }
+}
+
TEST_P(CordTest, CharIteratorTraits) {
static_assert(std::is_copy_constructible<absl::Cord::CharIterator>::value,
"");
diff --git a/absl/strings/internal/cord_data_edge.h b/absl/strings/internal/cord_data_edge.h
new file mode 100644
index 00000000..e18b33e1
--- /dev/null
+++ b/absl/strings/internal/cord_data_edge.h
@@ -0,0 +1,63 @@
+// Copyright 2022 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ABSL_STRINGS_INTERNAL_CORD_DATA_EDGE_H_
+#define ABSL_STRINGS_INTERNAL_CORD_DATA_EDGE_H_
+
+#include <cassert>
+#include <cstddef>
+
+#include "absl/base/config.h"
+#include "absl/strings/internal/cord_internal.h"
+#include "absl/strings/internal/cord_rep_flat.h"
+#include "absl/strings/string_view.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace cord_internal {
+
+// Returns true if the provided rep is a FLAT, EXTERNAL or a SUBSTRING node
+// holding a FLAT or EXTERNAL child rep. Requires `rep != nullptr`.
+inline bool IsDataEdge(const CordRep* edge) {
+ assert(edge != nullptr);
+
+ // The fast path is that `edge` is an EXTERNAL or FLAT node, making the below
+ // if a single, well predicted branch. We then repeat the FLAT or EXTERNAL
+ // check in the slow path of the SUBSTRING check to optimize for the hot path.
+ if (edge->tag == EXTERNAL || edge->tag >= FLAT) return true;
+ if (edge->tag == SUBSTRING) edge = edge->substring()->child;
+ return edge->tag == EXTERNAL || edge->tag >= FLAT;
+}
+
+// Returns the `absl::string_view` data reference for the provided data edge.
+// Requires 'IsDataEdge(edge) == true`.
+inline absl::string_view EdgeData(const CordRep* edge) {
+ assert(IsDataEdge(edge));
+
+ size_t offset = 0;
+ const size_t length = edge->length;
+ if (edge->IsSubstring()) {
+ offset = edge->substring()->start;
+ edge = edge->substring()->child;
+ }
+ return edge->tag >= FLAT
+ ? absl::string_view{edge->flat()->Data() + offset, length}
+ : absl::string_view{edge->external()->base + offset, length};
+}
+
+} // namespace cord_internal
+ABSL_NAMESPACE_END
+} // namespace absl
+
+#endif // ABSL_STRINGS_INTERNAL_CORD_DATA_EDGE_H_
diff --git a/absl/strings/internal/cord_data_edge_test.cc b/absl/strings/internal/cord_data_edge_test.cc
new file mode 100644
index 00000000..8fce3bc6
--- /dev/null
+++ b/absl/strings/internal/cord_data_edge_test.cc
@@ -0,0 +1,130 @@
+// Copyright 2022 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/internal/cord_data_edge.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/internal/cord_internal.h"
+#include "absl/strings/internal/cord_rep_test_util.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace cord_internal {
+namespace {
+
+using ::absl::cordrep_testing::MakeExternal;
+using ::absl::cordrep_testing::MakeFlat;
+using ::absl::cordrep_testing::MakeSubstring;
+
+TEST(CordDataEdgeTest, IsDataEdgeOnFlat) {
+ CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ...");
+ EXPECT_TRUE(IsDataEdge(rep));
+ CordRep::Unref(rep);
+}
+
+TEST(CordDataEdgeTest, IsDataEdgeOnExternal) {
+ CordRep* rep = MakeExternal("Lorem ipsum dolor sit amet, consectetur ...");
+ EXPECT_TRUE(IsDataEdge(rep));
+ CordRep::Unref(rep);
+}
+
+TEST(CordDataEdgeTest, IsDataEdgeOnSubstringOfFlat) {
+ CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ...");
+ CordRep* substr = MakeSubstring(1, 20, rep);
+ EXPECT_TRUE(IsDataEdge(substr));
+ CordRep::Unref(substr);
+}
+
+TEST(CordDataEdgeTest, IsDataEdgeOnSubstringOfExternal) {
+ CordRep* rep = MakeExternal("Lorem ipsum dolor sit amet, consectetur ...");
+ CordRep* substr = MakeSubstring(1, 20, rep);
+ EXPECT_TRUE(IsDataEdge(substr));
+ CordRep::Unref(substr);
+}
+
+TEST(CordDataEdgeTest, IsDataEdgeOnBtree) {
+ CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ...");
+ CordRepBtree* tree = CordRepBtree::New(rep);
+ EXPECT_FALSE(IsDataEdge(tree));
+ CordRep::Unref(tree);
+}
+
+TEST(CordDataEdgeTest, IsDataEdgeOnBadSubstr) {
+ CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ...");
+ CordRep* substr = MakeSubstring(1, 18, MakeSubstring(1, 20, rep));
+ EXPECT_FALSE(IsDataEdge(substr));
+ CordRep::Unref(substr);
+}
+
+TEST(CordDataEdgeTest, EdgeDataOnFlat) {
+ absl::string_view value = "Lorem ipsum dolor sit amet, consectetur ...";
+ CordRep* rep = MakeFlat(value);
+ EXPECT_EQ(EdgeData(rep), value);
+ CordRep::Unref(rep);
+}
+
+TEST(CordDataEdgeTest, EdgeDataOnExternal) {
+ absl::string_view value = "Lorem ipsum dolor sit amet, consectetur ...";
+ CordRep* rep = MakeExternal(value);
+ EXPECT_EQ(EdgeData(rep), value);
+ CordRep::Unref(rep);
+}
+
+TEST(CordDataEdgeTest, EdgeDataOnSubstringOfFlat) {
+ absl::string_view value = "Lorem ipsum dolor sit amet, consectetur ...";
+ CordRep* rep = MakeFlat(value);
+ CordRep* substr = MakeSubstring(1, 20, rep);
+ EXPECT_EQ(EdgeData(substr), value.substr(1, 20));
+ CordRep::Unref(substr);
+}
+
+TEST(CordDataEdgeTest, EdgeDataOnSubstringOfExternal) {
+ absl::string_view value = "Lorem ipsum dolor sit amet, consectetur ...";
+ CordRep* rep = MakeExternal(value);
+ CordRep* substr = MakeSubstring(1, 20, rep);
+ EXPECT_EQ(EdgeData(substr), value.substr(1, 20));
+ CordRep::Unref(substr);
+}
+
+#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG)
+
+TEST(CordDataEdgeTest, IsDataEdgeOnNullPtr) {
+ EXPECT_DEATH(IsDataEdge(nullptr), ".*");
+}
+
+TEST(CordDataEdgeTest, EdgeDataOnNullPtr) {
+ EXPECT_DEATH(EdgeData(nullptr), ".*");
+}
+
+TEST(CordDataEdgeTest, EdgeDataOnBtree) {
+ CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ...");
+ CordRepBtree* tree = CordRepBtree::New(rep);
+ EXPECT_DEATH(EdgeData(tree), ".*");
+ CordRep::Unref(tree);
+}
+
+TEST(CordDataEdgeTest, EdgeDataOnBadSubstr) {
+ CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ...");
+ CordRep* substr = MakeSubstring(1, 18, MakeSubstring(1, 20, rep));
+ EXPECT_DEATH(EdgeData(substr), ".*");
+ CordRep::Unref(substr);
+}
+
+#endif // GTEST_HAS_DEATH_TEST && !NDEBUG
+
+} // namespace
+} // namespace cord_internal
+ABSL_NAMESPACE_END
+} // namespace absl
diff --git a/absl/strings/internal/cord_rep_btree.cc b/absl/strings/internal/cord_rep_btree.cc
index 3ebd9f6d..2b592b47 100644
--- a/absl/strings/internal/cord_rep_btree.cc
+++ b/absl/strings/internal/cord_rep_btree.cc
@@ -22,6 +22,7 @@
#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
+#include "absl/strings/internal/cord_data_edge.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_consume.h"
#include "absl/strings/internal/cord_rep_flat.h"
@@ -69,7 +70,7 @@ void DumpAll(const CordRep* rep, bool include_contents, std::ostream& stream,
// indentation and prefix / labels keeps us within roughly 80-100 wide.
constexpr size_t kMaxDataLength = 60;
stream << ", data = \""
- << CordRepBtree::EdgeData(r).substr(0, kMaxDataLength)
+ << EdgeData(r).substr(0, kMaxDataLength)
<< (r->length > kMaxDataLength ? "\"..." : "\"");
}
stream << '\n';
@@ -150,7 +151,7 @@ inline CordRep* MakeSubstring(CordRep* rep, size_t offset) {
CordRep* ResizeEdge(CordRep* edge, size_t length, bool is_mutable) {
assert(length > 0);
assert(length <= edge->length);
- assert(CordRepBtree::IsDataEdge(edge));
+ assert(IsDataEdge(edge));
if (length >= edge->length) return edge;
if (is_mutable && (edge->tag >= FLAT || edge->tag == SUBSTRING)) {
@@ -207,7 +208,7 @@ void DeleteSubstring(CordRepSubstring* substring) {
// Deletes a leaf node data edge. Requires `IsDataEdge(rep)`.
void DeleteLeafEdge(CordRep* rep) {
- assert(CordRepBtree::IsDataEdge(rep));
+ assert(IsDataEdge(rep));
if (rep->tag >= FLAT) {
CordRepFlat::Delete(rep->flat());
} else if (rep->tag == EXTERNAL) {
diff --git a/absl/strings/internal/cord_rep_btree.h b/absl/strings/internal/cord_rep_btree.h
index df5c994c..0e78e12c 100644
--- a/absl/strings/internal/cord_rep_btree.h
+++ b/absl/strings/internal/cord_rep_btree.h
@@ -22,6 +22,7 @@
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/base/optimization.h"
+#include "absl/strings/internal/cord_data_edge.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/string_view.h"
@@ -310,13 +311,6 @@ class CordRepBtree : public CordRep {
// Requires this instance to be a leaf node, and `index` to be valid index.
inline absl::string_view Data(size_t index) const;
- static const char* EdgeDataPtr(const CordRep* r);
- static absl::string_view EdgeData(const CordRep* r);
-
- // Returns true if the provided rep is a FLAT, EXTERNAL or a SUBSTRING node
- // holding a FLAT or EXTERNAL child rep.
- static bool IsDataEdge(const CordRep* rep);
-
// Diagnostics: returns true if `tree` is valid and internally consistent.
// If `shallow` is false, then the provided top level node and all child nodes
// below it are recursively checked. If `shallow` is true, only the provided
@@ -631,34 +625,11 @@ inline absl::Span<CordRep* const> CordRepBtree::Edges(size_t begin,
return {edges_ + begin, static_cast<size_t>(end - begin)};
}
-inline const char* CordRepBtree::EdgeDataPtr(const CordRep* r) {
- assert(IsDataEdge(r));
- size_t offset = 0;
- if (r->tag == SUBSTRING) {
- offset = r->substring()->start;
- r = r->substring()->child;
- }
- return (r->tag >= FLAT ? r->flat()->Data() : r->external()->base) + offset;
-}
-
-inline absl::string_view CordRepBtree::EdgeData(const CordRep* r) {
- return absl::string_view(EdgeDataPtr(r), r->length);
-}
-
inline absl::string_view CordRepBtree::Data(size_t index) const {
assert(height() == 0);
return EdgeData(Edge(index));
}
-inline bool CordRepBtree::IsDataEdge(const CordRep* rep) {
- // The fast path is that `rep` is an EXTERNAL or FLAT node, making the below
- // if a single, well predicted branch. We then repeat the FLAT or EXTERNAL
- // check in the slow path the SUBSTRING check to optimize for the hot path.
- if (rep->tag == EXTERNAL || rep->tag >= FLAT) return true;
- if (rep->tag == SUBSTRING) rep = rep->substring()->child;
- return rep->tag == EXTERNAL || rep->tag >= FLAT;
-}
-
inline CordRepBtree* CordRepBtree::New(int height) {
CordRepBtree* tree = new CordRepBtree;
tree->length = 0;
diff --git a/absl/strings/internal/cord_rep_btree_navigator.cc b/absl/strings/internal/cord_rep_btree_navigator.cc
index 19afbe90..9b896a3d 100644
--- a/absl/strings/internal/cord_rep_btree_navigator.cc
+++ b/absl/strings/internal/cord_rep_btree_navigator.cc
@@ -16,6 +16,7 @@
#include <cassert>
+#include "absl/strings/internal/cord_data_edge.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
@@ -39,7 +40,7 @@ inline CordRep* Substring(CordRep* rep, size_t offset, size_t n) {
assert(n <= rep->length);
assert(offset < rep->length);
assert(offset <= rep->length - n);
- assert(CordRepBtree::IsDataEdge(rep));
+ assert(IsDataEdge(rep));
if (n == 0) return nullptr;
if (n == rep->length) return CordRep::Ref(rep);
diff --git a/absl/strings/internal/cord_rep_btree_reader.cc b/absl/strings/internal/cord_rep_btree_reader.cc
index 5dc76966..0d0e8601 100644
--- a/absl/strings/internal/cord_rep_btree_reader.cc
+++ b/absl/strings/internal/cord_rep_btree_reader.cc
@@ -17,6 +17,7 @@
#include <cassert>
#include "absl/base/config.h"
+#include "absl/strings/internal/cord_data_edge.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_btree_navigator.h"
@@ -44,7 +45,7 @@ absl::string_view CordRepBtreeReader::Read(size_t n, size_t chunk_size,
// can directly return the substring into the current data edge as the next
// chunk. We can easily establish from the above code that `navigator_.Next()`
// has not been called as that requires `chunk_size` to be zero.
- if (n < chunk_size) return CordRepBtree::EdgeData(edge).substr(result.n);
+ if (n < chunk_size) return EdgeData(edge).substr(result.n);
// The amount of data taken from the last edge is `chunk_size` and `result.n`
// contains the offset into the current edge trailing the read data (which can
@@ -60,7 +61,7 @@ absl::string_view CordRepBtreeReader::Read(size_t n, size_t chunk_size,
// We did not read all data, return remaining data from current edge.
edge = navigator_.Current();
remaining_ -= consumed_by_read + edge->length;
- return CordRepBtree::EdgeData(edge).substr(result.n);
+ return EdgeData(edge).substr(result.n);
}
} // namespace cord_internal
diff --git a/absl/strings/internal/cord_rep_btree_reader.h b/absl/strings/internal/cord_rep_btree_reader.h
index 7aa79dbf..8db8f8dd 100644
--- a/absl/strings/internal/cord_rep_btree_reader.h
+++ b/absl/strings/internal/cord_rep_btree_reader.h
@@ -18,6 +18,7 @@
#include <cassert>
#include "absl/base/config.h"
+#include "absl/strings/internal/cord_data_edge.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_btree_navigator.h"
@@ -167,7 +168,7 @@ inline absl::string_view CordRepBtreeReader::Init(CordRepBtree* tree) {
assert(tree != nullptr);
const CordRep* edge = navigator_.InitFirst(tree);
remaining_ = tree->length - edge->length;
- return CordRepBtree::EdgeData(edge);
+ return EdgeData(edge);
}
inline absl::string_view CordRepBtreeReader::Next() {
@@ -175,7 +176,7 @@ inline absl::string_view CordRepBtreeReader::Next() {
const CordRep* edge = navigator_.Next();
assert(edge != nullptr);
remaining_ -= edge->length;
- return CordRepBtree::EdgeData(edge);
+ return EdgeData(edge);
}
inline absl::string_view CordRepBtreeReader::Skip(size_t skip) {
@@ -190,7 +191,7 @@ inline absl::string_view CordRepBtreeReader::Skip(size_t skip) {
// The combined length of all edges skipped before `pos.edge` is `skip -
// pos.offset`, all of which are 'consumed', as well as the current edge.
remaining_ -= skip - pos.offset + pos.edge->length;
- return CordRepBtree::EdgeData(pos.edge).substr(pos.offset);
+ return EdgeData(pos.edge).substr(pos.offset);
}
inline absl::string_view CordRepBtreeReader::Seek(size_t offset) {
@@ -199,7 +200,7 @@ inline absl::string_view CordRepBtreeReader::Seek(size_t offset) {
remaining_ = 0;
return {};
}
- absl::string_view chunk = CordRepBtree::EdgeData(pos.edge).substr(pos.offset);
+ absl::string_view chunk = EdgeData(pos.edge).substr(pos.offset);
remaining_ = length() - offset - chunk.length();
return chunk;
}
diff --git a/absl/strings/internal/cord_rep_btree_test.cc b/absl/strings/internal/cord_rep_btree_test.cc
index 1d56b252..51b90db1 100644
--- a/absl/strings/internal/cord_rep_btree_test.cc
+++ b/absl/strings/internal/cord_rep_btree_test.cc
@@ -25,6 +25,7 @@
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/cleanup/cleanup.h"
+#include "absl/strings/internal/cord_data_edge.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_test_util.h"
#include "absl/strings/str_cat.h"
@@ -323,30 +324,26 @@ TEST(CordRepBtreeTest, EdgeData) {
CordRep* substr2 = MakeSubstring(1, 6, CordRep::Ref(external));
CordRep* bad_substr = MakeSubstring(1, 2, CordRep::Ref(substr1));
- EXPECT_TRUE(CordRepBtree::IsDataEdge(flat));
- EXPECT_THAT(CordRepBtree::EdgeDataPtr(flat),
- TypedEq<const void*>(flat->Data()));
- EXPECT_THAT(CordRepBtree::EdgeData(flat), Eq("Hello world"));
+ EXPECT_TRUE(IsDataEdge(flat));
+ EXPECT_THAT(EdgeData(flat).data(), TypedEq<const void*>(flat->Data()));
+ EXPECT_THAT(EdgeData(flat), Eq("Hello world"));
- EXPECT_TRUE(CordRepBtree::IsDataEdge(external));
- EXPECT_THAT(CordRepBtree::EdgeDataPtr(external),
- TypedEq<const void*>(external->base));
- EXPECT_THAT(CordRepBtree::EdgeData(external), Eq("Hello external"));
+ EXPECT_TRUE(IsDataEdge(external));
+ EXPECT_THAT(EdgeData(external).data(), TypedEq<const void*>(external->base));
+ EXPECT_THAT(EdgeData(external), Eq("Hello external"));
- EXPECT_TRUE(CordRepBtree::IsDataEdge(substr1));
- EXPECT_THAT(CordRepBtree::EdgeDataPtr(substr1),
- TypedEq<const void*>(flat->Data() + 1));
- EXPECT_THAT(CordRepBtree::EdgeData(substr1), Eq("ello w"));
+ EXPECT_TRUE(IsDataEdge(substr1));
+ EXPECT_THAT(EdgeData(substr1).data(), TypedEq<const void*>(flat->Data() + 1));
+ EXPECT_THAT(EdgeData(substr1), Eq("ello w"));
- EXPECT_TRUE(CordRepBtree::IsDataEdge(substr2));
- EXPECT_THAT(CordRepBtree::EdgeDataPtr(substr2),
+ EXPECT_TRUE(IsDataEdge(substr2));
+ EXPECT_THAT(EdgeData(substr2).data(),
TypedEq<const void*>(external->base + 1));
- EXPECT_THAT(CordRepBtree::EdgeData(substr2), Eq("ello e"));
+ EXPECT_THAT(EdgeData(substr2), Eq("ello e"));
- EXPECT_FALSE(CordRepBtree::IsDataEdge(bad_substr));
+ EXPECT_FALSE(IsDataEdge(bad_substr));
#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG)
- EXPECT_DEATH(CordRepBtree::EdgeData(bad_substr), ".*");
- EXPECT_DEATH(CordRepBtree::EdgeDataPtr(bad_substr), ".*");
+ EXPECT_DEATH(EdgeData(bad_substr), ".*");
#endif
CordRep::Unref(bad_substr);