diff options
author | Abseil Team <absl-team@google.com> | 2022-02-17 12:07:55 -0800 |
---|---|---|
committer | vslashg <gfalcon@google.com> | 2022-02-17 16:24:45 -0500 |
commit | 7f850b3167fb38e6b4a9ce1824e6fabd733b5d62 (patch) | |
tree | 862004f447a80f89c31957cf3f254aec1aa4a5be /absl/strings | |
parent | c2ef7033380a3d8661fee76465097422170fb653 (diff) |
Export of internal Abseil changes
--
ed829ac612f090375427c3488827c6e74deb2e3f by Derek Mauro <dmauro@google.com>:
Update latest GCC/Clang Linux tests to Bazel 5.0.0 and CMake 3.22.2
PiperOrigin-RevId: 429369775
--
76952303c4d942288c4e7657ffb5893cec54a132 by Martijn Vels <mvels@google.com>:
Optimize Cord::ChunkIterator now that CordRepConcat is removed
PiperOrigin-RevId: 429321455
--
dcd0d287793649aba9b98268c5783e449a34749f by Martijn Vels <mvels@google.com>:
Add IsDataEdge() and DataEdgeValue() helper functions.
This moves repetitive logic accessing data edges into its own header, and more strongly defines the notion of what a data edge is, enforcing the internal invariants. This will also be incorporated in optimized Cord iteration logic once CordRepConcat is totally removed from the Cord code.
PiperOrigin-RevId: 429307248
--
6a0903962155988085bf8656743fda9c4cdcba6c by Abseil Team <absl-team@google.com>:
Make it clear that the probability function given for the zipf distribution is unnormalized, i.e., sum(p(x) for x = 0..k) != 100%.
Quoting Section 7 of the paper cited in the comments, where this formula comes from (emphasis mine): "We will consider the two parameter generalization as defined in Dagpunar [1988] with the *unnormalized* probability function ..."
PiperOrigin-RevId: 429068258
--
3899ff6d444ba755148bc521a6ee031d9e9d4485 by Abseil Team <absl-team@google.com>:
Internal Changes
PiperOrigin-RevId: 428644856
--
319de702d2b537cbb76c4c71277ae89b349b162e by Benjamin Barenblat <bbaren@google.com>:
Support symbolization on PA-RISC
Null out supervisor bits in PA-RISC addresses before symbolizing, and
handle function descriptor tables correctly.
Change symbolize_test.cc to use 32-bit aligned addresses, allowing that
test to pass on PA-RISC.
PiperOrigin-RevId: 428590564
GitOrigin-RevId: ed829ac612f090375427c3488827c6e74deb2e3f
Change-Id: Ie01ff3b9365fd45e5a55f858038552679f3180d3
Diffstat (limited to 'absl/strings')
-rw-r--r-- | absl/strings/BUILD.bazel | 16 | ||||
-rw-r--r-- | absl/strings/CMakeLists.txt | 18 | ||||
-rw-r--r-- | absl/strings/cord.cc | 173 | ||||
-rw-r--r-- | absl/strings/cord.h | 56 | ||||
-rw-r--r-- | absl/strings/cord_analysis.cc | 14 | ||||
-rw-r--r-- | absl/strings/cord_test.cc | 72 | ||||
-rw-r--r-- | absl/strings/internal/cord_data_edge.h | 63 | ||||
-rw-r--r-- | absl/strings/internal/cord_data_edge_test.cc | 130 | ||||
-rw-r--r-- | absl/strings/internal/cord_rep_btree.cc | 7 | ||||
-rw-r--r-- | absl/strings/internal/cord_rep_btree.h | 31 | ||||
-rw-r--r-- | absl/strings/internal/cord_rep_btree_navigator.cc | 3 | ||||
-rw-r--r-- | absl/strings/internal/cord_rep_btree_reader.cc | 5 | ||||
-rw-r--r-- | absl/strings/internal/cord_rep_btree_reader.h | 9 | ||||
-rw-r--r-- | absl/strings/internal/cord_rep_btree_test.cc | 33 |
14 files changed, 380 insertions, 250 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index 129affec..5f122ee9 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -276,6 +276,7 @@ cc_library( "internal/cord_rep_ring.cc", ], hdrs = [ + "internal/cord_data_edge.h", "internal/cord_internal.h", "internal/cord_rep_btree.h", "internal/cord_rep_btree_navigator.h", @@ -308,6 +309,21 @@ cc_library( ) cc_test( + name = "cord_data_edge_test", + size = "small", + srcs = ["internal/cord_data_edge_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":cord_internal", + ":cord_rep_test_util", + ":strings", + "//absl/base:config", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( name = "cord_rep_btree_test", size = "medium", srcs = ["internal/cord_rep_btree_test.cc"], diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index f31eef4d..5d418c86 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -554,6 +554,7 @@ absl_cc_library( NAME cord_internal HDRS + "internal/cord_data_edge.h" "internal/cord_internal.h" "internal/cord_rep_btree.h" "internal/cord_rep_btree_navigator.h" @@ -934,6 +935,23 @@ absl_cc_test( absl_cc_test( NAME + cord_data_edge_test + SRCS + "internal/cord_data_edge_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::base + absl::config + absl::cord_internal + absl::cord_rep_test_util + absl::core_headers + absl::strings + GTest::gmock_main +) + +absl_cc_test( + NAME cord_rep_btree_test SRCS "internal/cord_rep_btree_test.cc" diff --git a/absl/strings/cord.cc b/absl/strings/cord.cc index 4ee722da..6547c2da 100644 --- a/absl/strings/cord.cc +++ b/absl/strings/cord.cc @@ -35,6 +35,7 @@ #include "absl/container/fixed_array.h" #include "absl/container/inlined_vector.h" #include "absl/strings/escaping.h" +#include "absl/strings/internal/cord_data_edge.h" #include "absl/strings/internal/cord_internal.h" #include "absl/strings/internal/cord_rep_btree.h" #include "absl/strings/internal/cord_rep_crc.h" @@ -1094,35 +1095,6 @@ void Cord::CopyToArraySlowPath(char* dst) const { } } -Cord::ChunkIterator& Cord::ChunkIterator::AdvanceStack() { - auto& stack_of_right_children = stack_of_right_children_; - if (stack_of_right_children.empty()) { - assert(!current_chunk_.empty()); // Called on invalid iterator. - // We have reached the end of the Cord. - return *this; - } - - // Process the next node on the stack. - CordRep* node = stack_of_right_children.back(); - stack_of_right_children.pop_back(); - - // Get the child node if we encounter a SUBSTRING. - size_t offset = 0; - size_t length = node->length; - if (node->IsSubstring()) { - offset = node->substring()->start; - node = node->substring()->child; - } - - assert(node->IsExternal() || node->IsFlat()); - assert(length != 0); - const char* data = - node->IsExternal() ? node->external()->base : node->flat()->Data(); - current_chunk_ = absl::string_view(data + offset, length); - current_leaf_ = node; - return *this; -} - Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) { ABSL_HARDENING_ASSERT(bytes_remaining_ >= n && "Attempted to iterate past `end()`"); @@ -1165,133 +1137,36 @@ Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) { return subcord; } - auto& stack_of_right_children = stack_of_right_children_; - if (n < current_chunk_.size()) { - // Range to read is a proper subrange of the current chunk. - assert(current_leaf_ != nullptr); - CordRep* subnode = CordRep::Ref(current_leaf_); - const char* data = subnode->IsExternal() ? subnode->external()->base - : subnode->flat()->Data(); - subnode = NewSubstring(subnode, current_chunk_.data() - data, n); - subcord.contents_.EmplaceTree(VerifyTree(subnode), method); - RemoveChunkPrefix(n); - return subcord; - } - - // Range to read begins with a proper subrange of the current chunk. - assert(!current_chunk_.empty()); + // Short circuit if reading the entire data edge. assert(current_leaf_ != nullptr); - CordRep* subnode = CordRep::Ref(current_leaf_); - if (current_chunk_.size() < subnode->length) { - const char* data = subnode->IsExternal() ? subnode->external()->base - : subnode->flat()->Data(); - subnode = NewSubstring(subnode, current_chunk_.data() - data, - current_chunk_.size()); - } - n -= current_chunk_.size(); - bytes_remaining_ -= current_chunk_.size(); - - // Process the next node(s) on the stack, reading whole subtrees depending on - // their length and how many bytes we are advancing. - CordRep* node = nullptr; - while (!stack_of_right_children.empty()) { - node = stack_of_right_children.back(); - stack_of_right_children.pop_back(); - if (node->length > n) break; - // TODO(qrczak): This might unnecessarily recreate existing concat nodes. - // Avoiding that would need pretty complicated logic (instead of - // current_leaf, keep current_subtree_ which points to the highest node - // such that the current leaf can be found on the path of left children - // starting from current_subtree_; delay creating subnode while node is - // below current_subtree_; find the proper node along the path of left - // children starting from current_subtree_ if this loop exits while staying - // below current_subtree_; etc.; alternatively, push parents instead of - // right children on the stack). - subnode = Concat(subnode, CordRep::Ref(node)); - n -= node->length; - bytes_remaining_ -= node->length; - node = nullptr; - } - - if (node == nullptr) { - // We have reached the end of the Cord. - assert(bytes_remaining_ == 0); - subcord.contents_.EmplaceTree(VerifyTree(subnode), method); + if (n == current_leaf_->length) { + bytes_remaining_ = 0; + current_chunk_ = {}; + CordRep* tree = CordRep::Ref(current_leaf_); + subcord.contents_.EmplaceTree(VerifyTree(tree), method); return subcord; } - // Get the child node if we encounter a SUBSTRING. - size_t offset = 0; - size_t length = node->length; - if (node->IsSubstring()) { - offset = node->substring()->start; - node = node->substring()->child; - } - - // Range to read ends with a proper (possibly empty) subrange of the current - // chunk. - assert(node->IsExternal() || node->IsFlat()); - assert(length > n); - if (n > 0) { - subnode = Concat(subnode, NewSubstring(CordRep::Ref(node), offset, n)); - } - const char* data = - node->IsExternal() ? node->external()->base : node->flat()->Data(); - current_chunk_ = absl::string_view(data + offset + n, length - n); - current_leaf_ = node; - bytes_remaining_ -= n; - subcord.contents_.EmplaceTree(VerifyTree(subnode), method); - return subcord; -} - -void Cord::ChunkIterator::AdvanceBytesSlowPath(size_t n) { - assert(bytes_remaining_ >= n && "Attempted to iterate past `end()`"); - assert(n >= current_chunk_.size()); // This should only be called when - // iterating to a new node. - - n -= current_chunk_.size(); - bytes_remaining_ -= current_chunk_.size(); - - if (stack_of_right_children_.empty()) { - // We have reached the end of the Cord. - assert(bytes_remaining_ == 0); - return; - } - - // Process the next node(s) on the stack, skipping whole subtrees depending on - // their length and how many bytes we are advancing. - CordRep* node = nullptr; - auto& stack_of_right_children = stack_of_right_children_; - while (!stack_of_right_children.empty()) { - node = stack_of_right_children.back(); - stack_of_right_children.pop_back(); - if (node->length > n) break; - n -= node->length; - bytes_remaining_ -= node->length; - node = nullptr; - } - - if (node == nullptr) { - // We have reached the end of the Cord. - assert(bytes_remaining_ == 0); - return; - } + // From this point on, we need a partial substring node. + // Get pointer to the underlying flat or external data payload and + // compute data pointer and offset into current flat or external. + CordRep* payload = current_leaf_->IsSubstring() + ? current_leaf_->substring()->child + : current_leaf_; + const char* data = payload->IsExternal() ? payload->external()->base + : payload->flat()->Data(); + const size_t offset = current_chunk_.data() - data; - // Get the child node if we encounter a SUBSTRING. - size_t offset = 0; - size_t length = node->length; - if (node->IsSubstring()) { - offset = node->substring()->start; - node = node->substring()->child; - } + CordRepSubstring* tree = new CordRepSubstring(); + tree->tag = cord_internal::SUBSTRING; + tree->length = n; + tree->start = offset; + tree->child = CordRep::Ref(payload); - assert(node->IsExternal() || node->IsFlat()); - assert(length > n); - const char* data = - node->IsExternal() ? node->external()->base : node->flat()->Data(); - current_chunk_ = absl::string_view(data + offset + n, length - n); - current_leaf_ = node; + subcord.contents_.EmplaceTree(VerifyTree(tree), method); bytes_remaining_ -= n; + current_chunk_.remove_prefix(n); + return subcord; } char Cord::operator[](size_t i) const { diff --git a/absl/strings/cord.h b/absl/strings/cord.h index 7f34ef48..081b6311 100644 --- a/absl/strings/cord.h +++ b/absl/strings/cord.h @@ -80,6 +80,7 @@ #include "absl/functional/function_ref.h" #include "absl/meta/type_traits.h" #include "absl/strings/cord_analysis.h" +#include "absl/strings/internal/cord_data_edge.h" #include "absl/strings/internal/cord_internal.h" #include "absl/strings/internal/cord_rep_btree.h" #include "absl/strings/internal/cord_rep_btree_reader.h" @@ -388,12 +389,6 @@ class Cord { using CordRepBtree = absl::cord_internal::CordRepBtree; using CordRepBtreeReader = absl::cord_internal::CordRepBtreeReader; - // Stack of right children of concat nodes that we have to visit. - // Keep this at the end of the structure to avoid cache-thrashing. - // TODO(jgm): Benchmark to see if there's a more optimal value than 47 for - // the inlined vector size (47 exists for backward compatibility). - using Stack = absl::InlinedVector<absl::cord_internal::CordRep*, 47>; - // Constructs a `begin()` iterator from `tree`. `tree` must not be null. explicit ChunkIterator(cord_internal::CordRep* tree); @@ -409,17 +404,10 @@ class Cord { Cord AdvanceAndReadBytes(size_t n); void AdvanceBytes(size_t n); - // Stack specific operator++ - ChunkIterator& AdvanceStack(); - // Btree specific operator++ ChunkIterator& AdvanceBtree(); void AdvanceBytesBtree(size_t n); - // Iterates `n` bytes, where `n` is expected to be greater than or equal to - // `current_chunk_.size()`. - void AdvanceBytesSlowPath(size_t n); - // A view into bytes of the current `CordRep`. It may only be a view to a // suffix of bytes if this is being used by `CharIterator`. absl::string_view current_chunk_; @@ -432,9 +420,6 @@ class Cord { // Cord reader for cord btrees. Empty if not traversing a btree. CordRepBtreeReader btree_reader_; - - // See 'Stack' alias definition. - Stack stack_of_right_children_; }; // Cord::ChunkIterator::chunk_begin() @@ -725,7 +710,8 @@ class Cord { // be used by spelling absl::strings_internal::MakeStringConstant, which is // also an internal API. template <typename T> - explicit constexpr Cord(strings_internal::StringConstant<T>); + // NOLINTNEXTLINE(google-explicit-constructor) + constexpr Cord(strings_internal::StringConstant<T>); private: using CordRep = absl::cord_internal::CordRep; @@ -1321,25 +1307,24 @@ inline void Cord::ChunkIterator::InitTree(cord_internal::CordRep* tree) { tree = cord_internal::SkipCrcNode(tree); if (tree->tag == cord_internal::BTREE) { current_chunk_ = btree_reader_.Init(tree->btree()); - return; + } else { + current_leaf_ = tree; + current_chunk_ = cord_internal::EdgeData(tree); } - - stack_of_right_children_.push_back(tree); - operator++(); } -inline Cord::ChunkIterator::ChunkIterator(cord_internal::CordRep* tree) - : bytes_remaining_(tree->length) { +inline Cord::ChunkIterator::ChunkIterator(cord_internal::CordRep* tree) { + bytes_remaining_ = tree->length; InitTree(tree); } -inline Cord::ChunkIterator::ChunkIterator(const Cord* cord) - : bytes_remaining_(cord->size()) { - if (cord->contents_.is_tree()) { - InitTree(cord->contents_.as_tree()); +inline Cord::ChunkIterator::ChunkIterator(const Cord* cord) { + if (CordRep* tree = cord->contents_.tree()) { + bytes_remaining_ = tree->length; + InitTree(tree); } else { - current_chunk_ = - absl::string_view(cord->contents_.data(), bytes_remaining_); + bytes_remaining_ = cord->contents_.inline_size(); + current_chunk_ = {cord->contents_.data(), bytes_remaining_}; } } @@ -1369,8 +1354,11 @@ inline Cord::ChunkIterator& Cord::ChunkIterator::operator++() { assert(bytes_remaining_ >= current_chunk_.size()); bytes_remaining_ -= current_chunk_.size(); if (bytes_remaining_ > 0) { - return btree_reader_ ? AdvanceBtree() : AdvanceStack(); - } else { + if (btree_reader_) { + return AdvanceBtree(); + } else { + assert(!current_chunk_.empty()); // Called on invalid iterator. + } current_chunk_ = {}; } return *this; @@ -1411,7 +1399,11 @@ inline void Cord::ChunkIterator::AdvanceBytes(size_t n) { if (ABSL_PREDICT_TRUE(n < current_chunk_.size())) { RemoveChunkPrefix(n); } else if (n != 0) { - btree_reader_ ? AdvanceBytesBtree(n) : AdvanceBytesSlowPath(n); + if (btree_reader_) { + AdvanceBytesBtree(n); + } else { + bytes_remaining_ = 0; + } } } diff --git a/absl/strings/cord_analysis.cc b/absl/strings/cord_analysis.cc index 3fa15b01..73d3c4e6 100644 --- a/absl/strings/cord_analysis.cc +++ b/absl/strings/cord_analysis.cc @@ -12,13 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "absl/strings/cord_analysis.h" + #include <cstddef> #include <cstdint> #include "absl/base/attributes.h" #include "absl/base/config.h" #include "absl/container/inlined_vector.h" -#include "absl/strings/cord_analysis.h" +#include "absl/strings/internal/cord_data_edge.h" #include "absl/strings/internal/cord_internal.h" #include "absl/strings/internal/cord_rep_btree.h" #include "absl/strings/internal/cord_rep_crc.h" @@ -98,16 +100,6 @@ struct RawUsage<Mode::kFairShare> { } }; -// Returns true if the provided rep is a valid data edge. -bool IsDataEdge(const CordRep* rep) { - // The fast path is that `rep` is an EXTERNAL or FLAT node, making the below - // if a single, well predicted branch. We then repeat the FLAT or EXTERNAL - // check in the slow path the SUBSTRING check to optimize for the hot path. - if (rep->tag == EXTERNAL || rep->tag >= FLAT) return true; - if (rep->tag == SUBSTRING) rep = rep->substring()->child; - return rep->tag == EXTERNAL || rep->tag >= FLAT; -} - // Computes the estimated memory size of the provided data edge. // External reps are assumed 'heap allocated at their exact size'. template <Mode mode> diff --git a/absl/strings/cord_test.cc b/absl/strings/cord_test.cc index c26e506d..9dcc4ce5 100644 --- a/absl/strings/cord_test.cc +++ b/absl/strings/cord_test.cc @@ -1866,6 +1866,78 @@ TEST_P(CordTest, CordChunkIteratorOperations) { VerifyChunkIterator(subcords, 128); } + +TEST_P(CordTest, AdvanceAndReadOnDataEdge) { + RandomEngine rng(GTEST_FLAG_GET(random_seed)); + const std::string data = RandomLowercaseString(&rng, 2000); + for (bool as_flat : {true, false}) { + SCOPED_TRACE(as_flat ? "Flat" : "External"); + + absl::Cord cord = + as_flat ? absl::Cord(data) + : absl::MakeCordFromExternal(data, [](absl::string_view) {}); + auto it = cord.Chars().begin(); +#if !defined(NDEBUG) || ABSL_OPTION_HARDENED + EXPECT_DEATH_IF_SUPPORTED(cord.AdvanceAndRead(&it, 2001), ".*"); +#endif + + it = cord.Chars().begin(); + absl::Cord frag = cord.AdvanceAndRead(&it, 2000); + EXPECT_EQ(frag, data); + EXPECT_TRUE(it == cord.Chars().end()); + + it = cord.Chars().begin(); + frag = cord.AdvanceAndRead(&it, 200); + EXPECT_EQ(frag, data.substr(0, 200)); + EXPECT_FALSE(it == cord.Chars().end()); + + frag = cord.AdvanceAndRead(&it, 1500); + EXPECT_EQ(frag, data.substr(200, 1500)); + EXPECT_FALSE(it == cord.Chars().end()); + + frag = cord.AdvanceAndRead(&it, 300); + EXPECT_EQ(frag, data.substr(1700, 300)); + EXPECT_TRUE(it == cord.Chars().end()); + } +} + +TEST_P(CordTest, AdvanceAndReadOnSubstringDataEdge) { + RandomEngine rng(GTEST_FLAG_GET(random_seed)); + const std::string data = RandomLowercaseString(&rng, 2500); + for (bool as_flat : {true, false}) { + SCOPED_TRACE(as_flat ? "Flat" : "External"); + + absl::Cord cord = + as_flat ? absl::Cord(data) + : absl::MakeCordFromExternal(data, [](absl::string_view) {}); + cord = cord.Subcord(200, 2000); + const std::string substr = data.substr(200, 2000); + + auto it = cord.Chars().begin(); +#if !defined(NDEBUG) || ABSL_OPTION_HARDENED + EXPECT_DEATH_IF_SUPPORTED(cord.AdvanceAndRead(&it, 2001), ".*"); +#endif + + it = cord.Chars().begin(); + absl::Cord frag = cord.AdvanceAndRead(&it, 2000); + EXPECT_EQ(frag, substr); + EXPECT_TRUE(it == cord.Chars().end()); + + it = cord.Chars().begin(); + frag = cord.AdvanceAndRead(&it, 200); + EXPECT_EQ(frag, substr.substr(0, 200)); + EXPECT_FALSE(it == cord.Chars().end()); + + frag = cord.AdvanceAndRead(&it, 1500); + EXPECT_EQ(frag, substr.substr(200, 1500)); + EXPECT_FALSE(it == cord.Chars().end()); + + frag = cord.AdvanceAndRead(&it, 300); + EXPECT_EQ(frag, substr.substr(1700, 300)); + EXPECT_TRUE(it == cord.Chars().end()); + } +} + TEST_P(CordTest, CharIteratorTraits) { static_assert(std::is_copy_constructible<absl::Cord::CharIterator>::value, ""); diff --git a/absl/strings/internal/cord_data_edge.h b/absl/strings/internal/cord_data_edge.h new file mode 100644 index 00000000..e18b33e1 --- /dev/null +++ b/absl/strings/internal/cord_data_edge.h @@ -0,0 +1,63 @@ +// Copyright 2022 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_DATA_EDGE_H_ +#define ABSL_STRINGS_INTERNAL_CORD_DATA_EDGE_H_ + +#include <cassert> +#include <cstddef> + +#include "absl/base/config.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// Returns true if the provided rep is a FLAT, EXTERNAL or a SUBSTRING node +// holding a FLAT or EXTERNAL child rep. Requires `rep != nullptr`. +inline bool IsDataEdge(const CordRep* edge) { + assert(edge != nullptr); + + // The fast path is that `edge` is an EXTERNAL or FLAT node, making the below + // if a single, well predicted branch. We then repeat the FLAT or EXTERNAL + // check in the slow path of the SUBSTRING check to optimize for the hot path. + if (edge->tag == EXTERNAL || edge->tag >= FLAT) return true; + if (edge->tag == SUBSTRING) edge = edge->substring()->child; + return edge->tag == EXTERNAL || edge->tag >= FLAT; +} + +// Returns the `absl::string_view` data reference for the provided data edge. +// Requires 'IsDataEdge(edge) == true`. +inline absl::string_view EdgeData(const CordRep* edge) { + assert(IsDataEdge(edge)); + + size_t offset = 0; + const size_t length = edge->length; + if (edge->IsSubstring()) { + offset = edge->substring()->start; + edge = edge->substring()->child; + } + return edge->tag >= FLAT + ? absl::string_view{edge->flat()->Data() + offset, length} + : absl::string_view{edge->external()->base + offset, length}; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_DATA_EDGE_H_ diff --git a/absl/strings/internal/cord_data_edge_test.cc b/absl/strings/internal/cord_data_edge_test.cc new file mode 100644 index 00000000..8fce3bc6 --- /dev/null +++ b/absl/strings/internal/cord_data_edge_test.cc @@ -0,0 +1,130 @@ +// Copyright 2022 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_data_edge.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_test_util.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using ::absl::cordrep_testing::MakeExternal; +using ::absl::cordrep_testing::MakeFlat; +using ::absl::cordrep_testing::MakeSubstring; + +TEST(CordDataEdgeTest, IsDataEdgeOnFlat) { + CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ..."); + EXPECT_TRUE(IsDataEdge(rep)); + CordRep::Unref(rep); +} + +TEST(CordDataEdgeTest, IsDataEdgeOnExternal) { + CordRep* rep = MakeExternal("Lorem ipsum dolor sit amet, consectetur ..."); + EXPECT_TRUE(IsDataEdge(rep)); + CordRep::Unref(rep); +} + +TEST(CordDataEdgeTest, IsDataEdgeOnSubstringOfFlat) { + CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ..."); + CordRep* substr = MakeSubstring(1, 20, rep); + EXPECT_TRUE(IsDataEdge(substr)); + CordRep::Unref(substr); +} + +TEST(CordDataEdgeTest, IsDataEdgeOnSubstringOfExternal) { + CordRep* rep = MakeExternal("Lorem ipsum dolor sit amet, consectetur ..."); + CordRep* substr = MakeSubstring(1, 20, rep); + EXPECT_TRUE(IsDataEdge(substr)); + CordRep::Unref(substr); +} + +TEST(CordDataEdgeTest, IsDataEdgeOnBtree) { + CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ..."); + CordRepBtree* tree = CordRepBtree::New(rep); + EXPECT_FALSE(IsDataEdge(tree)); + CordRep::Unref(tree); +} + +TEST(CordDataEdgeTest, IsDataEdgeOnBadSubstr) { + CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ..."); + CordRep* substr = MakeSubstring(1, 18, MakeSubstring(1, 20, rep)); + EXPECT_FALSE(IsDataEdge(substr)); + CordRep::Unref(substr); +} + +TEST(CordDataEdgeTest, EdgeDataOnFlat) { + absl::string_view value = "Lorem ipsum dolor sit amet, consectetur ..."; + CordRep* rep = MakeFlat(value); + EXPECT_EQ(EdgeData(rep), value); + CordRep::Unref(rep); +} + +TEST(CordDataEdgeTest, EdgeDataOnExternal) { + absl::string_view value = "Lorem ipsum dolor sit amet, consectetur ..."; + CordRep* rep = MakeExternal(value); + EXPECT_EQ(EdgeData(rep), value); + CordRep::Unref(rep); +} + +TEST(CordDataEdgeTest, EdgeDataOnSubstringOfFlat) { + absl::string_view value = "Lorem ipsum dolor sit amet, consectetur ..."; + CordRep* rep = MakeFlat(value); + CordRep* substr = MakeSubstring(1, 20, rep); + EXPECT_EQ(EdgeData(substr), value.substr(1, 20)); + CordRep::Unref(substr); +} + +TEST(CordDataEdgeTest, EdgeDataOnSubstringOfExternal) { + absl::string_view value = "Lorem ipsum dolor sit amet, consectetur ..."; + CordRep* rep = MakeExternal(value); + CordRep* substr = MakeSubstring(1, 20, rep); + EXPECT_EQ(EdgeData(substr), value.substr(1, 20)); + CordRep::Unref(substr); +} + +#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG) + +TEST(CordDataEdgeTest, IsDataEdgeOnNullPtr) { + EXPECT_DEATH(IsDataEdge(nullptr), ".*"); +} + +TEST(CordDataEdgeTest, EdgeDataOnNullPtr) { + EXPECT_DEATH(EdgeData(nullptr), ".*"); +} + +TEST(CordDataEdgeTest, EdgeDataOnBtree) { + CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ..."); + CordRepBtree* tree = CordRepBtree::New(rep); + EXPECT_DEATH(EdgeData(tree), ".*"); + CordRep::Unref(tree); +} + +TEST(CordDataEdgeTest, EdgeDataOnBadSubstr) { + CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ..."); + CordRep* substr = MakeSubstring(1, 18, MakeSubstring(1, 20, rep)); + EXPECT_DEATH(EdgeData(substr), ".*"); + CordRep::Unref(substr); +} + +#endif // GTEST_HAS_DEATH_TEST && !NDEBUG + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cord_rep_btree.cc b/absl/strings/internal/cord_rep_btree.cc index 3ebd9f6d..2b592b47 100644 --- a/absl/strings/internal/cord_rep_btree.cc +++ b/absl/strings/internal/cord_rep_btree.cc @@ -22,6 +22,7 @@ #include "absl/base/attributes.h" #include "absl/base/config.h" #include "absl/base/internal/raw_logging.h" +#include "absl/strings/internal/cord_data_edge.h" #include "absl/strings/internal/cord_internal.h" #include "absl/strings/internal/cord_rep_consume.h" #include "absl/strings/internal/cord_rep_flat.h" @@ -69,7 +70,7 @@ void DumpAll(const CordRep* rep, bool include_contents, std::ostream& stream, // indentation and prefix / labels keeps us within roughly 80-100 wide. constexpr size_t kMaxDataLength = 60; stream << ", data = \"" - << CordRepBtree::EdgeData(r).substr(0, kMaxDataLength) + << EdgeData(r).substr(0, kMaxDataLength) << (r->length > kMaxDataLength ? "\"..." : "\""); } stream << '\n'; @@ -150,7 +151,7 @@ inline CordRep* MakeSubstring(CordRep* rep, size_t offset) { CordRep* ResizeEdge(CordRep* edge, size_t length, bool is_mutable) { assert(length > 0); assert(length <= edge->length); - assert(CordRepBtree::IsDataEdge(edge)); + assert(IsDataEdge(edge)); if (length >= edge->length) return edge; if (is_mutable && (edge->tag >= FLAT || edge->tag == SUBSTRING)) { @@ -207,7 +208,7 @@ void DeleteSubstring(CordRepSubstring* substring) { // Deletes a leaf node data edge. Requires `IsDataEdge(rep)`. void DeleteLeafEdge(CordRep* rep) { - assert(CordRepBtree::IsDataEdge(rep)); + assert(IsDataEdge(rep)); if (rep->tag >= FLAT) { CordRepFlat::Delete(rep->flat()); } else if (rep->tag == EXTERNAL) { diff --git a/absl/strings/internal/cord_rep_btree.h b/absl/strings/internal/cord_rep_btree.h index df5c994c..0e78e12c 100644 --- a/absl/strings/internal/cord_rep_btree.h +++ b/absl/strings/internal/cord_rep_btree.h @@ -22,6 +22,7 @@ #include "absl/base/config.h" #include "absl/base/internal/raw_logging.h" #include "absl/base/optimization.h" +#include "absl/strings/internal/cord_data_edge.h" #include "absl/strings/internal/cord_internal.h" #include "absl/strings/internal/cord_rep_flat.h" #include "absl/strings/string_view.h" @@ -310,13 +311,6 @@ class CordRepBtree : public CordRep { // Requires this instance to be a leaf node, and `index` to be valid index. inline absl::string_view Data(size_t index) const; - static const char* EdgeDataPtr(const CordRep* r); - static absl::string_view EdgeData(const CordRep* r); - - // Returns true if the provided rep is a FLAT, EXTERNAL or a SUBSTRING node - // holding a FLAT or EXTERNAL child rep. - static bool IsDataEdge(const CordRep* rep); - // Diagnostics: returns true if `tree` is valid and internally consistent. // If `shallow` is false, then the provided top level node and all child nodes // below it are recursively checked. If `shallow` is true, only the provided @@ -631,34 +625,11 @@ inline absl::Span<CordRep* const> CordRepBtree::Edges(size_t begin, return {edges_ + begin, static_cast<size_t>(end - begin)}; } -inline const char* CordRepBtree::EdgeDataPtr(const CordRep* r) { - assert(IsDataEdge(r)); - size_t offset = 0; - if (r->tag == SUBSTRING) { - offset = r->substring()->start; - r = r->substring()->child; - } - return (r->tag >= FLAT ? r->flat()->Data() : r->external()->base) + offset; -} - -inline absl::string_view CordRepBtree::EdgeData(const CordRep* r) { - return absl::string_view(EdgeDataPtr(r), r->length); -} - inline absl::string_view CordRepBtree::Data(size_t index) const { assert(height() == 0); return EdgeData(Edge(index)); } -inline bool CordRepBtree::IsDataEdge(const CordRep* rep) { - // The fast path is that `rep` is an EXTERNAL or FLAT node, making the below - // if a single, well predicted branch. We then repeat the FLAT or EXTERNAL - // check in the slow path the SUBSTRING check to optimize for the hot path. - if (rep->tag == EXTERNAL || rep->tag >= FLAT) return true; - if (rep->tag == SUBSTRING) rep = rep->substring()->child; - return rep->tag == EXTERNAL || rep->tag >= FLAT; -} - inline CordRepBtree* CordRepBtree::New(int height) { CordRepBtree* tree = new CordRepBtree; tree->length = 0; diff --git a/absl/strings/internal/cord_rep_btree_navigator.cc b/absl/strings/internal/cord_rep_btree_navigator.cc index 19afbe90..9b896a3d 100644 --- a/absl/strings/internal/cord_rep_btree_navigator.cc +++ b/absl/strings/internal/cord_rep_btree_navigator.cc @@ -16,6 +16,7 @@ #include <cassert> +#include "absl/strings/internal/cord_data_edge.h" #include "absl/strings/internal/cord_internal.h" #include "absl/strings/internal/cord_rep_btree.h" @@ -39,7 +40,7 @@ inline CordRep* Substring(CordRep* rep, size_t offset, size_t n) { assert(n <= rep->length); assert(offset < rep->length); assert(offset <= rep->length - n); - assert(CordRepBtree::IsDataEdge(rep)); + assert(IsDataEdge(rep)); if (n == 0) return nullptr; if (n == rep->length) return CordRep::Ref(rep); diff --git a/absl/strings/internal/cord_rep_btree_reader.cc b/absl/strings/internal/cord_rep_btree_reader.cc index 5dc76966..0d0e8601 100644 --- a/absl/strings/internal/cord_rep_btree_reader.cc +++ b/absl/strings/internal/cord_rep_btree_reader.cc @@ -17,6 +17,7 @@ #include <cassert> #include "absl/base/config.h" +#include "absl/strings/internal/cord_data_edge.h" #include "absl/strings/internal/cord_internal.h" #include "absl/strings/internal/cord_rep_btree.h" #include "absl/strings/internal/cord_rep_btree_navigator.h" @@ -44,7 +45,7 @@ absl::string_view CordRepBtreeReader::Read(size_t n, size_t chunk_size, // can directly return the substring into the current data edge as the next // chunk. We can easily establish from the above code that `navigator_.Next()` // has not been called as that requires `chunk_size` to be zero. - if (n < chunk_size) return CordRepBtree::EdgeData(edge).substr(result.n); + if (n < chunk_size) return EdgeData(edge).substr(result.n); // The amount of data taken from the last edge is `chunk_size` and `result.n` // contains the offset into the current edge trailing the read data (which can @@ -60,7 +61,7 @@ absl::string_view CordRepBtreeReader::Read(size_t n, size_t chunk_size, // We did not read all data, return remaining data from current edge. edge = navigator_.Current(); remaining_ -= consumed_by_read + edge->length; - return CordRepBtree::EdgeData(edge).substr(result.n); + return EdgeData(edge).substr(result.n); } } // namespace cord_internal diff --git a/absl/strings/internal/cord_rep_btree_reader.h b/absl/strings/internal/cord_rep_btree_reader.h index 7aa79dbf..8db8f8dd 100644 --- a/absl/strings/internal/cord_rep_btree_reader.h +++ b/absl/strings/internal/cord_rep_btree_reader.h @@ -18,6 +18,7 @@ #include <cassert> #include "absl/base/config.h" +#include "absl/strings/internal/cord_data_edge.h" #include "absl/strings/internal/cord_internal.h" #include "absl/strings/internal/cord_rep_btree.h" #include "absl/strings/internal/cord_rep_btree_navigator.h" @@ -167,7 +168,7 @@ inline absl::string_view CordRepBtreeReader::Init(CordRepBtree* tree) { assert(tree != nullptr); const CordRep* edge = navigator_.InitFirst(tree); remaining_ = tree->length - edge->length; - return CordRepBtree::EdgeData(edge); + return EdgeData(edge); } inline absl::string_view CordRepBtreeReader::Next() { @@ -175,7 +176,7 @@ inline absl::string_view CordRepBtreeReader::Next() { const CordRep* edge = navigator_.Next(); assert(edge != nullptr); remaining_ -= edge->length; - return CordRepBtree::EdgeData(edge); + return EdgeData(edge); } inline absl::string_view CordRepBtreeReader::Skip(size_t skip) { @@ -190,7 +191,7 @@ inline absl::string_view CordRepBtreeReader::Skip(size_t skip) { // The combined length of all edges skipped before `pos.edge` is `skip - // pos.offset`, all of which are 'consumed', as well as the current edge. remaining_ -= skip - pos.offset + pos.edge->length; - return CordRepBtree::EdgeData(pos.edge).substr(pos.offset); + return EdgeData(pos.edge).substr(pos.offset); } inline absl::string_view CordRepBtreeReader::Seek(size_t offset) { @@ -199,7 +200,7 @@ inline absl::string_view CordRepBtreeReader::Seek(size_t offset) { remaining_ = 0; return {}; } - absl::string_view chunk = CordRepBtree::EdgeData(pos.edge).substr(pos.offset); + absl::string_view chunk = EdgeData(pos.edge).substr(pos.offset); remaining_ = length() - offset - chunk.length(); return chunk; } diff --git a/absl/strings/internal/cord_rep_btree_test.cc b/absl/strings/internal/cord_rep_btree_test.cc index 1d56b252..51b90db1 100644 --- a/absl/strings/internal/cord_rep_btree_test.cc +++ b/absl/strings/internal/cord_rep_btree_test.cc @@ -25,6 +25,7 @@ #include "absl/base/config.h" #include "absl/base/internal/raw_logging.h" #include "absl/cleanup/cleanup.h" +#include "absl/strings/internal/cord_data_edge.h" #include "absl/strings/internal/cord_internal.h" #include "absl/strings/internal/cord_rep_test_util.h" #include "absl/strings/str_cat.h" @@ -323,30 +324,26 @@ TEST(CordRepBtreeTest, EdgeData) { CordRep* substr2 = MakeSubstring(1, 6, CordRep::Ref(external)); CordRep* bad_substr = MakeSubstring(1, 2, CordRep::Ref(substr1)); - EXPECT_TRUE(CordRepBtree::IsDataEdge(flat)); - EXPECT_THAT(CordRepBtree::EdgeDataPtr(flat), - TypedEq<const void*>(flat->Data())); - EXPECT_THAT(CordRepBtree::EdgeData(flat), Eq("Hello world")); + EXPECT_TRUE(IsDataEdge(flat)); + EXPECT_THAT(EdgeData(flat).data(), TypedEq<const void*>(flat->Data())); + EXPECT_THAT(EdgeData(flat), Eq("Hello world")); - EXPECT_TRUE(CordRepBtree::IsDataEdge(external)); - EXPECT_THAT(CordRepBtree::EdgeDataPtr(external), - TypedEq<const void*>(external->base)); - EXPECT_THAT(CordRepBtree::EdgeData(external), Eq("Hello external")); + EXPECT_TRUE(IsDataEdge(external)); + EXPECT_THAT(EdgeData(external).data(), TypedEq<const void*>(external->base)); + EXPECT_THAT(EdgeData(external), Eq("Hello external")); - EXPECT_TRUE(CordRepBtree::IsDataEdge(substr1)); - EXPECT_THAT(CordRepBtree::EdgeDataPtr(substr1), - TypedEq<const void*>(flat->Data() + 1)); - EXPECT_THAT(CordRepBtree::EdgeData(substr1), Eq("ello w")); + EXPECT_TRUE(IsDataEdge(substr1)); + EXPECT_THAT(EdgeData(substr1).data(), TypedEq<const void*>(flat->Data() + 1)); + EXPECT_THAT(EdgeData(substr1), Eq("ello w")); - EXPECT_TRUE(CordRepBtree::IsDataEdge(substr2)); - EXPECT_THAT(CordRepBtree::EdgeDataPtr(substr2), + EXPECT_TRUE(IsDataEdge(substr2)); + EXPECT_THAT(EdgeData(substr2).data(), TypedEq<const void*>(external->base + 1)); - EXPECT_THAT(CordRepBtree::EdgeData(substr2), Eq("ello e")); + EXPECT_THAT(EdgeData(substr2), Eq("ello e")); - EXPECT_FALSE(CordRepBtree::IsDataEdge(bad_substr)); + EXPECT_FALSE(IsDataEdge(bad_substr)); #if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG) - EXPECT_DEATH(CordRepBtree::EdgeData(bad_substr), ".*"); - EXPECT_DEATH(CordRepBtree::EdgeDataPtr(bad_substr), ".*"); + EXPECT_DEATH(EdgeData(bad_substr), ".*"); #endif CordRep::Unref(bad_substr); |