diff options
-rw-r--r-- | absl/flags/flag.h | 8 | ||||
-rw-r--r-- | absl/strings/BUILD.bazel | 12 | ||||
-rw-r--r-- | absl/strings/CMakeLists.txt | 13 | ||||
-rw-r--r-- | absl/strings/cord.cc | 20 | ||||
-rw-r--r-- | absl/strings/cord.h | 28 | ||||
-rw-r--r-- | absl/strings/cord_test.cc | 43 | ||||
-rw-r--r-- | absl/strings/internal/cord_internal.h | 41 | ||||
-rw-r--r-- | absl/strings/internal/cord_internal_test.cc | 116 | ||||
-rw-r--r-- | absl/strings/internal/cord_rep_btree.cc | 227 | ||||
-rw-r--r-- | absl/strings/internal/cord_rep_btree.h | 95 | ||||
-rw-r--r-- | absl/strings/internal/cord_rep_btree_test.cc | 136 | ||||
-rw-r--r-- | absl/strings/internal/cord_rep_ring.cc | 20 | ||||
-rw-r--r-- | absl/strings/internal/cord_rep_ring.h | 4 | ||||
-rw-r--r-- | absl/strings/internal/cord_rep_test_util.h | 35 | ||||
-rw-r--r-- | absl/strings/internal/cordz_update_tracker.h | 2 | ||||
-rw-r--r-- | absl/strings/internal/cordz_update_tracker_test.cc | 2 | ||||
-rw-r--r-- | absl/strings/numbers.h | 34 | ||||
-rw-r--r-- | absl/strings/numbers_test.cc | 143 | ||||
-rw-r--r-- | absl/strings/str_split_test.cc | 8 | ||||
-rw-r--r-- | absl/strings/substitute.cc | 3 |
20 files changed, 890 insertions, 100 deletions
diff --git a/absl/flags/flag.h b/absl/flags/flag.h index 1a7e4d4d..a724ccc9 100644 --- a/absl/flags/flag.h +++ b/absl/flags/flag.h @@ -241,8 +241,8 @@ ABSL_NAMESPACE_END /* default value argument. That keeps temporaries alive */ \ /* long enough for NonConst to work correctly. */ \ static constexpr absl::string_view Value( \ - absl::string_view v = ABSL_FLAG_IMPL_FLAGHELP(txt)) { \ - return v; \ + absl::string_view absl_flag_help = ABSL_FLAG_IMPL_FLAGHELP(txt)) { \ + return absl_flag_help; \ } \ static std::string NonConst() { return std::string(Value()); } \ }; \ @@ -254,8 +254,8 @@ ABSL_NAMESPACE_END #define ABSL_FLAG_IMPL_DECLARE_DEF_VAL_WRAPPER(name, Type, default_value) \ struct AbslFlagDefaultGenFor##name { \ Type value = absl::flags_internal::InitDefaultValue<Type>(default_value); \ - static void Gen(void* p) { \ - new (p) Type(AbslFlagDefaultGenFor##name{}.value); \ + static void Gen(void* absl_flag_default_loc) { \ + new (absl_flag_default_loc) Type(AbslFlagDefaultGenFor##name{}.value); \ } \ }; diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index c046366c..090fc58a 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -306,6 +306,17 @@ cc_library( ) cc_test( + name = "cord_internal_test", + srcs = ["internal/cord_internal_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":cord_internal", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( name = "cord_rep_btree_test", size = "medium", srcs = ["internal/cord_rep_btree_test.cc"], @@ -684,6 +695,7 @@ cc_test( "//absl/base:endian", "//absl/base:raw_logging_internal", "//absl/container:fixed_array", + "//absl/random", "@com_google_googletest//:gtest_main", ], ) diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index 8ad5f9c7..d6801fe6 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -920,6 +920,7 @@ absl_cc_test( absl::cordz_test_helpers absl::core_headers absl::endian + absl::random_random absl::raw_logging_internal absl::fixed_array GTest::gmock_main @@ -945,6 +946,18 @@ absl_cc_test( absl_cc_test( NAME + cord_internal_test + SRCS + "internal/cord_internal_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::cord_internal + GTest::gmock_main +) + +absl_cc_test( + NAME cord_rep_btree_test SRCS "internal/cord_rep_btree_test.cc" diff --git a/absl/strings/cord.cc b/absl/strings/cord.cc index 29af9782..854047ca 100644 --- a/absl/strings/cord.cc +++ b/absl/strings/cord.cc @@ -419,7 +419,7 @@ void Cord::InlineRep::PrependTree(CordRep* tree, MethodIdentifier method) { // written to region and the actual size increase will be written to size. static inline bool PrepareAppendRegion(CordRep* root, char** region, size_t* size, size_t max_length) { - if (root->IsBtree() && root->refcount.IsOne()) { + if (root->IsBtree() && root->refcount.IsMutable()) { Span<char> span = root->btree()->GetAppendBuffer(max_length); if (!span.empty()) { *region = span.data(); @@ -430,11 +430,11 @@ static inline bool PrepareAppendRegion(CordRep* root, char** region, // Search down the right-hand path for a non-full FLAT node. CordRep* dst = root; - while (dst->IsConcat() && dst->refcount.IsOne()) { + while (dst->IsConcat() && dst->refcount.IsMutable()) { dst = dst->concat()->right; } - if (!dst->IsFlat() || !dst->refcount.IsOne()) { + if (!dst->IsFlat() || !dst->refcount.IsMutable()) { *region = nullptr; *size = 0; return false; @@ -649,7 +649,7 @@ Cord& Cord::operator=(absl::string_view src) { if (tree != nullptr) { CordzUpdateScope scope(contents_.cordz_info(), method); if (tree->IsFlat() && tree->flat()->Capacity() >= length && - tree->refcount.IsOne()) { + tree->refcount.IsMutable()) { // Copy in place if the existing FLAT node is reusable. memmove(tree->flat()->Data(), data, length); tree->length = length; @@ -819,7 +819,7 @@ void Cord::Prepend(const Cord& src) { return Prepend(src_contents); } -void Cord::Prepend(absl::string_view src) { +void Cord::PrependArray(absl::string_view src, MethodIdentifier method) { if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined. if (!contents_.is_tree()) { size_t cur_size = contents_.inline_size(); @@ -834,7 +834,7 @@ void Cord::Prepend(absl::string_view src) { } } CordRep* rep = NewTree(src.data(), src.size(), 0); - contents_.PrependTree(rep, CordzUpdateTracker::kPrependString); + contents_.PrependTree(rep, method); } template <typename T, Cord::EnableIfString<T>> @@ -894,7 +894,7 @@ static CordRep* RemoveSuffixFrom(CordRep* node, size_t n) { if (n >= node->length) return nullptr; if (n == 0) return CordRep::Ref(node); absl::InlinedVector<CordRep*, kInlinedVectorSize> lhs_stack; - bool inplace_ok = node->refcount.IsOne(); + bool inplace_ok = node->refcount.IsMutable(); while (node->IsConcat()) { assert(n <= node->length); @@ -907,7 +907,7 @@ static CordRep* RemoveSuffixFrom(CordRep* node, size_t n) { n -= node->concat()->right->length; node = node->concat()->left; } - inplace_ok = inplace_ok && node->refcount.IsOne(); + inplace_ok = inplace_ok && node->refcount.IsMutable(); } assert(n <= node->length); @@ -968,9 +968,7 @@ void Cord::RemoveSuffix(size_t n) { auto constexpr method = CordzUpdateTracker::kRemoveSuffix; CordzUpdateScope scope(contents_.cordz_info(), method); if (tree->IsBtree()) { - CordRep* old = tree; - tree = tree->btree()->SubTree(0, tree->length - n); - CordRep::Unref(old); + tree = CordRepBtree::RemoveSuffix(tree->btree(), n); } else { CordRep* newrep = RemoveSuffixFrom(tree, n); CordRep::Unref(tree); diff --git a/absl/strings/cord.h b/absl/strings/cord.h index 175d7b90..f0a19914 100644 --- a/absl/strings/cord.h +++ b/absl/strings/cord.h @@ -460,6 +460,16 @@ class Cord { // `Cord::chunk_begin()` and `Cord::chunk_end()`. class ChunkRange { public: + // Fulfill minimum c++ container requirements [container.requirements] + // Theses (partial) container type definitions allow ChunkRange to be used + // in various utilities expecting a subset of [container.requirements]. + // For example, the below enables using `::testing::ElementsAre(...)` + using value_type = absl::string_view; + using reference = value_type&; + using const_reference = const value_type&; + using iterator = ChunkIterator; + using const_iterator = ChunkIterator; + explicit ChunkRange(const Cord* cord) : cord_(cord) {} ChunkIterator begin() const; @@ -591,6 +601,16 @@ class Cord { // `Cord::char_begin()` and `Cord::char_end()`. class CharRange { public: + // Fulfill minimum c++ container requirements [container.requirements] + // Theses (partial) container type definitions allow CharRange to be used + // in various utilities expecting a subset of [container.requirements]. + // For example, the below enables using `::testing::ElementsAre(...)` + using value_type = char; + using reference = value_type&; + using const_reference = const value_type&; + using iterator = CharIterator; + using const_iterator = CharIterator; + explicit CharRange(const Cord* cord) : cord_(cord) {} CharIterator begin() const; @@ -881,6 +901,10 @@ class Cord { template <typename C> void AppendImpl(C&& src); + // Prepends the provided data to this instance. `method` contains the public + // API method for this action which is tracked for Cordz sampling purposes. + void PrependArray(absl::string_view src, MethodIdentifier method); + // Assigns the value in 'src' to this instance, 'stealing' its contents. // Requires src.length() > kMaxBytesToCopy. Cord& AssignLargeString(std::string&& src); @@ -1220,6 +1244,10 @@ inline void Cord::Append(absl::string_view src) { contents_.AppendArray(src, CordzUpdateTracker::kAppendString); } +inline void Cord::Prepend(absl::string_view src) { + PrependArray(src, CordzUpdateTracker::kPrependString); +} + extern template void Cord::Append(std::string&& src); extern template void Cord::Prepend(std::string&& src); diff --git a/absl/strings/cord_test.cc b/absl/strings/cord_test.cc index 50079b7c..cced9bba 100644 --- a/absl/strings/cord_test.cc +++ b/absl/strings/cord_test.cc @@ -34,6 +34,7 @@ #include "absl/base/internal/raw_logging.h" #include "absl/base/macros.h" #include "absl/container/fixed_array.h" +#include "absl/random/random.h" #include "absl/strings/cord_test_helpers.h" #include "absl/strings/cordz_test_helpers.h" #include "absl/strings/str_cat.h" @@ -1833,6 +1834,48 @@ TEST_P(CordTest, Hardening) { EXPECT_DEATH_IF_SUPPORTED(++cord.chunk_end(), ""); } +// This test mimics a specific (and rare) application repeatedly splitting a +// cord, inserting (overwriting) a string value, and composing a new cord from +// the three pieces. This is hostile towards a Btree implementation: A split of +// a node at any level is likely to have the right-most edge of the left split, +// and the left-most edge of the right split shared. For example, splitting a +// leaf node with 6 edges will result likely in a 1-6, 2-5, 3-4, etc. split, +// sharing the 'split node'. When recomposing such nodes, we 'injected' an edge +// in that node. As this happens with some probability on each level of the +// tree, this will quickly grow the tree until it reaches maximum height. +TEST_P(CordTest, BtreeHostileSplitInsertJoin) { + absl::BitGen bitgen; + + // Start with about 1GB of data + std::string data(1 << 10, 'x'); + absl::Cord buffer(data); + absl::Cord cord; + for (int i = 0; i < 1000000; ++i) { + cord.Append(buffer); + } + + for (int j = 0; j < 1000; ++j) { + size_t offset = absl::Uniform(bitgen, 0u, cord.size()); + size_t length = absl::Uniform(bitgen, 100u, data.size()); + if (cord.size() == offset) { + cord.Append(absl::string_view(data.data(), length)); + } else { + absl::Cord suffix; + if (offset + length < cord.size()) { + suffix = cord; + suffix.RemovePrefix(offset + length); + } + if (cord.size() > offset) { + cord.RemoveSuffix(cord.size() - offset); + } + cord.Append(absl::string_view(data.data(), length)); + if (!suffix.empty()) { + cord.Append(suffix); + } + } + } +} + class AfterExitCordTester { public: bool Set(absl::Cord* cord, absl::string_view expected) { diff --git a/absl/strings/internal/cord_internal.h b/absl/strings/internal/cord_internal.h index 0300ac40..bfe5564e 100644 --- a/absl/strings/internal/cord_internal.h +++ b/absl/strings/internal/cord_internal.h @@ -87,6 +87,9 @@ class RefcountAndFlags { constexpr RefcountAndFlags() : count_{kRefIncrement} {} struct Immortal {}; explicit constexpr RefcountAndFlags(Immortal) : count_(kImmortalFlag) {} + struct WithCrc {}; + explicit constexpr RefcountAndFlags(WithCrc) + : count_(kCrcFlag | kRefIncrement) {} // Increments the reference count. Imposes no memory ordering. inline void Increment() { @@ -122,14 +125,32 @@ class RefcountAndFlags { return count_.load(std::memory_order_acquire) >> kNumFlags; } - // Returns whether the atomic integer is 1. - // If the reference count is used in the conventional way, a - // reference count of 1 implies that the current thread owns the - // reference and no other thread shares it. - // This call performs the test for a reference count of one, and - // performs the memory barrier needed for the owning thread - // to act on the object, knowing that it has exclusive access to the - // object. Always returns false when the immortal bit is set. + // Returns true if the referenced object carries a CRC value. + bool HasCrc() const { + return (count_.load(std::memory_order_relaxed) & kCrcFlag) != 0; + } + + // Returns true iff the atomic integer is 1 and this node does not store + // a CRC. When both these conditions are met, the current thread owns + // the reference and no other thread shares it, so its contents may be + // safely mutated. + // + // If the referenced item is shared, carries a CRC, or is immortal, + // it should not be modified in-place, and this function returns false. + // + // This call performs the memory barrier needed for the owning thread + // to act on the object, so that if it returns true, it may safely + // assume exclusive access to the object. + inline bool IsMutable() { + return (count_.load(std::memory_order_acquire)) == kRefIncrement; + } + + // Returns whether the atomic integer is 1. Similar to IsMutable(), + // but does not check for a stored CRC. (An unshared node with a CRC is not + // mutable, because changing its data would invalidate the CRC.) + // + // When this returns true, there are no other references, and data sinks + // may safely adopt the children of the CordRep. inline bool IsOne() { return (count_.load(std::memory_order_acquire) & kRefcountMask) == kRefIncrement; @@ -149,14 +170,14 @@ class RefcountAndFlags { kNumFlags = 2, kImmortalFlag = 0x1, - kReservedFlag = 0x2, + kCrcFlag = 0x2, kRefIncrement = (1 << kNumFlags), // Bitmask to use when checking refcount by equality. This masks out // all flags except kImmortalFlag, which is part of the refcount for // purposes of equality. (A refcount of 0 or 1 does not count as 0 or 1 // if the immortal bit is set.) - kRefcountMask = ~kReservedFlag, + kRefcountMask = ~kCrcFlag, }; std::atomic<int32_t> count_; diff --git a/absl/strings/internal/cord_internal_test.cc b/absl/strings/internal/cord_internal_test.cc new file mode 100644 index 00000000..0758dfef --- /dev/null +++ b/absl/strings/internal/cord_internal_test.cc @@ -0,0 +1,116 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_internal.h" + +#include "gmock/gmock.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +TEST(RefcountAndFlags, NormalRefcount) { + for (bool expect_high_refcount : {false, true}) { + SCOPED_TRACE(expect_high_refcount); + RefcountAndFlags refcount; + // count = 1 + + EXPECT_FALSE(refcount.HasCrc()); + EXPECT_TRUE(refcount.IsMutable()); + EXPECT_TRUE(refcount.IsOne()); + + refcount.Increment(); + // count = 2 + + EXPECT_FALSE(refcount.HasCrc()); + EXPECT_FALSE(refcount.IsMutable()); + EXPECT_FALSE(refcount.IsOne()); + + // Decrementing should return true, since a reference is outstanding. + if (expect_high_refcount) { + EXPECT_TRUE(refcount.DecrementExpectHighRefcount()); + } else { + EXPECT_TRUE(refcount.Decrement()); + } + // count = 1 + + EXPECT_FALSE(refcount.HasCrc()); + EXPECT_TRUE(refcount.IsMutable()); + EXPECT_TRUE(refcount.IsOne()); + + // One more decremnt will return false, as no references remain. + if (expect_high_refcount) { + EXPECT_FALSE(refcount.DecrementExpectHighRefcount()); + } else { + EXPECT_FALSE(refcount.Decrement()); + } + } +} + +TEST(RefcountAndFlags, CrcRefcount) { + for (bool expect_high_refcount : {false, true}) { + SCOPED_TRACE(expect_high_refcount); + RefcountAndFlags refcount(RefcountAndFlags::WithCrc{}); + // count = 1 + + // A CRC-carrying node is never mutable, but can be unshared + EXPECT_TRUE(refcount.HasCrc()); + EXPECT_FALSE(refcount.IsMutable()); + EXPECT_TRUE(refcount.IsOne()); + + refcount.Increment(); + // count = 2 + + EXPECT_TRUE(refcount.HasCrc()); + EXPECT_FALSE(refcount.IsMutable()); + EXPECT_FALSE(refcount.IsOne()); + + // Decrementing should return true, since a reference is outstanding. + if (expect_high_refcount) { + EXPECT_TRUE(refcount.DecrementExpectHighRefcount()); + } else { + EXPECT_TRUE(refcount.Decrement()); + } + // count = 1 + + EXPECT_TRUE(refcount.HasCrc()); + EXPECT_FALSE(refcount.IsMutable()); + EXPECT_TRUE(refcount.IsOne()); + + // One more decremnt will return false, as no references remain. + if (expect_high_refcount) { + EXPECT_FALSE(refcount.DecrementExpectHighRefcount()); + } else { + EXPECT_FALSE(refcount.Decrement()); + } + } +} + +TEST(RefcountAndFlags, ImmortalRefcount) { + RefcountAndFlags immortal_refcount(RefcountAndFlags::Immortal{}); + + for (int i = 0; i < 100; ++i) { + // An immortal refcount is never unshared, and decrementing never causes + // a collection. + EXPECT_FALSE(immortal_refcount.HasCrc()); + EXPECT_FALSE(immortal_refcount.IsMutable()); + EXPECT_FALSE(immortal_refcount.IsOne()); + EXPECT_TRUE(immortal_refcount.Decrement()); + EXPECT_TRUE(immortal_refcount.DecrementExpectHighRefcount()); + } +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cord_rep_btree.cc b/absl/strings/internal/cord_rep_btree.cc index 6d53ab61..4404f33a 100644 --- a/absl/strings/internal/cord_rep_btree.cc +++ b/absl/strings/internal/cord_rep_btree.cc @@ -140,6 +140,26 @@ inline CordRep* MakeSubstring(CordRep* rep, size_t offset) { return CreateSubstring(rep, offset, rep->length - offset); } +// Resizes `edge` to the provided `length`. Adopts a reference on `edge`. +// This method directly returns `edge` if `length` equals `edge->length`. +// If `is_mutable` is set to true, this function may return `edge` with +// `edge->length` set to the new length depending on the type and size of +// `edge`. Otherwise, this function returns a new CordRepSubstring value. +// Requires `length > 0 && length <= edge->length`. +CordRep* ResizeEdge(CordRep* edge, size_t length, bool is_mutable) { + assert(length > 0); + assert(length <= edge->length); + assert(CordRepBtree::IsDataEdge(edge)); + if (length >= edge->length) return edge; + + if (is_mutable && (edge->tag >= FLAT || edge->tag == SUBSTRING)) { + edge->length = length; + return edge; + } + + return CreateSubstring(edge, 0, length); +} + template <EdgeType edge_type> inline absl::string_view Consume(absl::string_view s, size_t n) { return edge_type == kBack ? s.substr(n) : s.substr(0, s.size() - n); @@ -196,8 +216,8 @@ void DeleteLeafEdge(CordRep* rep) { // propagate node changes up the stack. template <EdgeType edge_type> struct StackOperations { - // Returns true if the node at 'depth' is not shared, i.e. has a refcount - // of one and all of its parent nodes have a refcount of one. + // Returns true if the node at 'depth' is mutable, i.e. has a refcount + // of one, carries no CRC, and all of its parent nodes have a refcount of one. inline bool owned(int depth) const { return depth < share_depth; } // Returns the node at 'depth'. @@ -208,11 +228,11 @@ struct StackOperations { inline CordRepBtree* BuildStack(CordRepBtree* tree, int depth) { assert(depth <= tree->height()); int current_depth = 0; - while (current_depth < depth && tree->refcount.IsOne()) { + while (current_depth < depth && tree->refcount.IsMutable()) { stack[current_depth++] = tree; tree = tree->Edge(edge_type)->btree(); } - share_depth = current_depth + (tree->refcount.IsOne() ? 1 : 0); + share_depth = current_depth + (tree->refcount.IsMutable() ? 1 : 0); while (current_depth < depth) { stack[current_depth++] = tree; tree = tree->Edge(edge_type)->btree(); @@ -221,17 +241,17 @@ struct StackOperations { } // Builds a stack with the invariant that all nodes are private owned / not - // shared. This is used in iterative updates where a previous propagation - // guaranteed all nodes are owned / private. + // shared and carry no CRC data. This is used in iterative updates where a + // previous propagation guaranteed all nodes have this property. inline void BuildOwnedStack(CordRepBtree* tree, int height) { assert(height <= CordRepBtree::kMaxHeight); int depth = 0; while (depth < height) { - assert(tree->refcount.IsOne()); + assert(tree->refcount.IsMutable()); stack[depth++] = tree; tree = tree->Edge(edge_type)->btree(); } - assert(tree->refcount.IsOne()); + assert(tree->refcount.IsMutable()); share_depth = depth + 1; } @@ -240,11 +260,14 @@ struct StackOperations { static inline CordRepBtree* Finalize(CordRepBtree* tree, OpResult result) { switch (result.action) { case CordRepBtree::kPopped: - if (ABSL_PREDICT_FALSE(tree->height() >= CordRepBtree::kMaxHeight)) { - ABSL_RAW_LOG(FATAL, "Max height exceeded"); - } - return edge_type == kBack ? CordRepBtree::New(tree, result.tree) + tree = edge_type == kBack ? CordRepBtree::New(tree, result.tree) : CordRepBtree::New(result.tree, tree); + if (ABSL_PREDICT_FALSE(tree->height() > CordRepBtree::kMaxHeight)) { + tree = CordRepBtree::Rebuild(tree); + ABSL_RAW_CHECK(tree->height() <= CordRepBtree::kMaxHeight, + "Max height exceeded"); + } + return tree; case CordRepBtree::kCopied: CordRep::Unref(tree); ABSL_FALLTHROUGH_INTENDED; @@ -313,12 +336,12 @@ struct StackOperations { return Unwind</*propagate=*/true>(tree, depth, length, result); } - // `share_depth` contains the depth at which the nodes in the stack become - // shared. I.e., if the top most level is shared (i.e.: `!refcount.IsOne()`), - // then `share_depth` is 0. If the 2nd node is shared (and implicitly all - // nodes below that) then `share_depth` is 1, etc. A `share_depth` greater - // than the depth of the stack indicates that none of the nodes in the stack - // are shared. + // `share_depth` contains the depth at which the nodes in the stack cannot + // be mutated. I.e., if the top most level is shared (i.e.: + // `!refcount.IsMutable()`), then `share_depth` is 0. If the 2nd node + // is shared (and implicitly all nodes below that) then `share_depth` is 1, + // etc. A `share_depth` greater than the depth of the stack indicates that + // none of the nodes in the stack are shared. int share_depth; NodeStack stack; @@ -692,7 +715,7 @@ CopyResult CordRepBtree::CopySuffix(size_t offset) { return result; } -CopyResult CordRepBtree::CopyPrefix(size_t n) { +CopyResult CordRepBtree::CopyPrefix(size_t n, bool allow_folding) { assert(n > 0); assert(n <= this->length); @@ -704,10 +727,12 @@ CopyResult CordRepBtree::CopyPrefix(size_t n) { int height = this->height(); CordRepBtree* node = this; CordRep* front = node->Edge(kFront); - while (front->length >= n) { - if (--height < 0) return {MakeSubstring(CordRep::Ref(front), 0, n), -1}; - node = front->btree(); - front = node->Edge(kFront); + if (allow_folding) { + while (front->length >= n) { + if (--height < 0) return {MakeSubstring(CordRep::Ref(front), 0, n), -1}; + node = front->btree(); + front = node->Edge(kFront); + } } if (node->length == n) return {CordRep::Ref(node), height}; @@ -746,6 +771,97 @@ CopyResult CordRepBtree::CopyPrefix(size_t n) { return result; } +CordRep* CordRepBtree::ExtractFront(CordRepBtree* tree) { + CordRep* front = tree->Edge(tree->begin()); + if (tree->refcount.IsMutable()) { + Unref(tree->Edges(tree->begin() + 1, tree->end())); + CordRepBtree::Delete(tree); + } else { + CordRep::Ref(front); + CordRep::Unref(tree); + } + return front; +} + +CordRepBtree* CordRepBtree::ConsumeBeginTo(CordRepBtree* tree, size_t end, + size_t new_length) { + assert(end <= tree->end()); + if (tree->refcount.IsMutable()) { + Unref(tree->Edges(end, tree->end())); + tree->set_end(end); + tree->length = new_length; + } else { + CordRepBtree* old = tree; + tree = tree->CopyBeginTo(end, new_length); + CordRep::Unref(old); + } + return tree; +} + +CordRep* CordRepBtree::RemoveSuffix(CordRepBtree* tree, size_t n) { + // Check input and deal with trivial cases 'Remove all/none' + assert(tree != nullptr); + assert(n <= tree->length); + const size_t len = tree->length; + if (ABSL_PREDICT_FALSE(n == 0)) { + return tree; + } + if (ABSL_PREDICT_FALSE(n >= len)) { + CordRepBtree::Unref(tree); + return nullptr; + } + + size_t length = len - n; + int height = tree->height(); + bool is_mutable = tree->refcount.IsMutable(); + + // Extract all top nodes which are reduced to size = 1 + Position pos = tree->IndexOfLength(length); + while (pos.index == tree->begin()) { + CordRep* edge = ExtractFront(tree); + is_mutable &= edge->refcount.IsMutable(); + if (height-- == 0) return ResizeEdge(edge, length, is_mutable); + tree = edge->btree(); + pos = tree->IndexOfLength(length); + } + + // Repeat the following sequence traversing down the tree: + // - Crop the top node to the 'last remaining edge' adjusting length. + // - Set the length for down edges to the partial length in that last edge. + // - Repeat this until the last edge is 'included in full' + // - If we hit the data edge level, resize and return the last data edge + CordRepBtree* top = tree = ConsumeBeginTo(tree, pos.index + 1, length); + CordRep* edge = tree->Edge(pos.index); + length = pos.n; + while (length != edge->length) { + // ConsumeBeginTo guarantees `tree` is a clean, privately owned copy. + assert(tree->refcount.IsMutable()); + const bool edge_is_mutable = edge->refcount.IsMutable(); + + if (height-- == 0) { + tree->edges_[pos.index] = ResizeEdge(edge, length, edge_is_mutable); + return AssertValid(top); + } + + if (!edge_is_mutable) { + // We can't 'in place' remove any suffixes down this edge. + // Replace this edge with a prefix copy instead. + tree->edges_[pos.index] = edge->btree()->CopyPrefix(length, false).edge; + CordRep::Unref(edge); + return AssertValid(top); + } + + // Move down one level, rinse repeat. + tree = edge->btree(); + pos = tree->IndexOfLength(length); + tree = ConsumeBeginTo(edge->btree(), pos.index + 1, length); + edge = tree->Edge(pos.index); + length = pos.n; + } + + return AssertValid(top); +} + CordRep* CordRepBtree::SubTree(size_t offset, size_t n) { assert(n <= this->length); assert(offset <= this->length - n); @@ -857,7 +973,7 @@ char CordRepBtree::GetCharacter(size_t offset) const { Span<char> CordRepBtree::GetAppendBufferSlow(size_t size) { // The inlined version in `GetAppendBuffer()` deals with all heights <= 3. assert(height() >= 4); - assert(refcount.IsOne()); + assert(refcount.IsMutable()); // Build a stack of nodes we may potentially need to update if we find a // non-shared FLAT with capacity at the leaf level. @@ -866,13 +982,13 @@ Span<char> CordRepBtree::GetAppendBufferSlow(size_t size) { CordRepBtree* stack[kMaxDepth]; for (int i = 0; i < depth; ++i) { node = node->Edge(kBack)->btree(); - if (!node->refcount.IsOne()) return {}; + if (!node->refcount.IsMutable()) return {}; stack[i] = node; } - // Must be a privately owned flat. + // Must be a privately owned, mutable flat. CordRep* const edge = node->Edge(kBack); - if (!edge->refcount.IsOne() || edge->tag < FLAT) return {}; + if (!edge->refcount.IsMutable() || edge->tag < FLAT) return {}; // Must have capacity. const size_t avail = edge->flat()->Capacity() - edge->length; @@ -950,6 +1066,63 @@ template CordRepBtree* CordRepBtree::AddData<kBack>(CordRepBtree* tree, absl::string_view data, size_t extra); +void CordRepBtree::Rebuild(CordRepBtree** stack, CordRepBtree* tree, + bool consume) { + bool owned = consume && tree->refcount.IsOne(); + if (tree->height() == 0) { + for (CordRep* edge : tree->Edges()) { + if (!owned) edge = CordRep::Ref(edge); + size_t height = 0; + size_t length = edge->length; + CordRepBtree* node = stack[0]; + OpResult result = node->AddEdge<kBack>(true, edge, length); + while (result.action == CordRepBtree::kPopped) { + stack[height] = result.tree; + if (stack[++height] == nullptr) { + result.action = CordRepBtree::kSelf; + stack[height] = CordRepBtree::New(node, result.tree); + } else { + node = stack[height]; + result = node->AddEdge<kBack>(true, result.tree, length); + } + } + while (stack[++height] != nullptr) { + stack[height]->length += length; + } + } + } else { + for (CordRep* rep : tree->Edges()) { + Rebuild(stack, rep->btree(), owned); + } + } + if (consume) { + if (owned) { + CordRepBtree::Delete(tree); + } else { + CordRepBtree::Unref(tree); + } + } +} + +CordRepBtree* CordRepBtree::Rebuild(CordRepBtree* tree) { + // Set up initial stack with empty leaf node. + CordRepBtree* node = CordRepBtree::New(); + CordRepBtree* stack[CordRepBtree::kMaxDepth + 1] = {node}; + + // Recursively build the tree, consuming the input tree. + Rebuild(stack, tree, /* consume reference */ true); + + // Return top most node + for (CordRepBtree* parent : stack) { + if (parent == nullptr) return node; + node = parent; + } + + // Unreachable + assert(false); + return nullptr; +} + } // namespace cord_internal ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/cord_rep_btree.h b/absl/strings/internal/cord_rep_btree.h index bbaa7934..bb38f0c3 100644 --- a/absl/strings/internal/cord_rep_btree.h +++ b/absl/strings/internal/cord_rep_btree.h @@ -163,6 +163,12 @@ class CordRepBtree : public CordRep { // typically after a ref_count.Decrement() on the last reference count. static void Destroy(CordRepBtree* tree); + // Use CordRep::Unref() as we overload for absl::Span<CordRep* const>. + using CordRep::Unref; + + // Unrefs all edges in `edges` which are assumed to be 'likely one'. + static void Unref(absl::Span<CordRep* const> edges); + // Appends / Prepends an existing CordRep instance to this tree. // The below methods accept three types of input: // 1) `rep` is a data node (See `IsDataNode` for valid data edges). @@ -198,6 +204,19 @@ class CordRepBtree : public CordRep { // Requires `offset + n <= length`. Returns `nullptr` if `n` is zero. CordRep* SubTree(size_t offset, size_t n); + // Removes `n` trailing bytes from `tree`, and returns the resulting tree + // or data edge. Returns `tree` if n is zero, and nullptr if n == length. + // This function is logically identical to: + // result = tree->SubTree(0, tree->length - n); + // Unref(tree); + // return result; + // However, the actual implementation will as much as possible perform 'in + // place' modifications on the tree on all nodes and edges that are mutable. + // For example, in a fully privately owned tree with the last edge being a + // flat of length 12, RemoveSuffix(1) will simply set the length of that data + // edge to 11, and reduce the length of all nodes on the edge path by 1. + static CordRep* RemoveSuffix(CordRepBtree* tree, size_t n); + // Returns the character at the given offset. char GetCharacter(size_t offset) const; @@ -221,9 +240,9 @@ class CordRepBtree : public CordRep { // length of the flat node and involved tree nodes have been increased by // `span.length()`. The caller is responsible for immediately assigning values // to all uninitialized data reference by the returned span. - // Requires `this->refcount.IsOne()`: this function forces the caller to do - // this fast path check on the top level node, as this is the most commonly - // shared node of a cord tree. + // Requires `this->refcount.IsMutable()`: this function forces the + // caller to do this fast path check on the top level node, as this is the + // most commonly shared node of a cord tree. Span<char> GetAppendBuffer(size_t size); // Returns the `height` of the tree. The height of a tree is limited to @@ -324,6 +343,11 @@ class CordRepBtree : public CordRep { // `front.height() + 1`. Requires `back.height() == front.height()`. static CordRepBtree* New(CordRepBtree* front, CordRepBtree* back); + // Creates a fully balanced tree from the provided tree by rebuilding a new + // tree from all data edges in the input. This function is automatically + // invoked internally when the tree exceeds the maximum height. + static CordRepBtree* Rebuild(CordRepBtree* tree); + private: CordRepBtree() = default; ~CordRepBtree() = default; @@ -368,6 +392,12 @@ class CordRepBtree : public CordRep { // Requires 0 < `offset` <= length. Position IndexBefore(size_t offset) const; + // Returns the index of the edge ending at (or on) length `length`, and the + // number of bytes inside that edge up to `length`. For example, if we have a + // Node with 2 edges, one of 10 and one of 20 long, then IndexOfLength(27) + // will return {1, 17}, and IndexOfLength(10) will return {0, 10}. + Position IndexOfLength(size_t n) const; + // Identical to the above function except starting from the position `front`. // This function is equivalent to `IndexBefore(front.n + offset)`, with // the difference that this function is optimized to start at `front.index`. @@ -406,11 +436,28 @@ class CordRepBtree : public CordRep { // created copy to `new_length`. CordRepBtree* CopyBeginTo(size_t end, size_t new_length) const; + // Returns a tree containing the edges [tree->begin(), end) and length + // of `new_length`. This method consumes a reference on the provided + // tree, and logically performs the following operation: + // result = tree->CopyBeginTo(end, new_length); + // CordRep::Unref(tree); + // return result; + static CordRepBtree* ConsumeBeginTo(CordRepBtree* tree, size_t end, + size_t new_length); + // Creates a partial copy of this Btree node, copying all edges starting at // `begin`, adding a reference on each copied edge, and sets the length of // the newly created copy to `new_length`. CordRepBtree* CopyToEndFrom(size_t begin, size_t new_length) const; + // Extracts and returns the front edge from the provided tree. + // This method consumes a reference on the provided tree, and logically + // performs the following operation: + // edge = CordRep::Ref(tree->Edge(kFront)); + // CordRep::Unref(tree); + // return edge; + static CordRep* ExtractFront(CordRepBtree* tree); + // Returns a tree containing the result of appending `right` to `left`. static CordRepBtree* MergeTrees(CordRepBtree* left, CordRepBtree* right); @@ -420,6 +467,12 @@ class CordRepBtree : public CordRep { static CordRepBtree* AppendSlow(CordRepBtree*, CordRep* rep); static CordRepBtree* PrependSlow(CordRepBtree*, CordRep* rep); + // Recursively rebuilds `tree` into `stack`. If 'consume` is set to true, the + // function will consume a reference on `tree`. `stack` is a null terminated + // array containing the new tree's state, with the current leaf node at + // stack[0], and parent nodes above that, or null for 'top of tree'. + static void Rebuild(CordRepBtree** stack, CordRepBtree* tree, bool consume); + // Aligns existing edges to start at index 0, to allow for a new edge to be // added to the back of the current edges. inline void AlignBegin(); @@ -459,11 +512,11 @@ class CordRepBtree : public CordRep { // Returns a partial copy of the current tree containing the first `n` bytes // of data. `CopyResult` contains both the resulting edge and its height. The // resulting tree may be less high than the current tree, or even be a single - // matching data edge. For example, if `n == 1`, then the result will be the - // single data edge, and height will be set to -1 (one below the owning leaf - // node). If n == 0, this function returns null. - // Requires `n <= length` - CopyResult CopyPrefix(size_t n); + // matching data edge if `allow_folding` is set to true. + // For example, if `n == 1`, then the result will be the single data edge, and + // height will be set to -1 (one below the owning leaf node). If n == 0, this + // function returns null. Requires `n <= length` + CopyResult CopyPrefix(size_t n, bool allow_folding = true); // Returns a partial copy of the current tree containing all data starting // after `offset`. `CopyResult` contains both the resulting edge and its @@ -619,6 +672,14 @@ inline void CordRepBtree::Destroy(CordRepBtree* tree) { DestroyTree(tree, tree->begin(), tree->end()); } +inline void CordRepBtree::Unref(absl::Span<CordRep* const> edges) { + for (CordRep* edge : edges) { + if (ABSL_PREDICT_FALSE(!edge->refcount.Decrement())) { + CordRep::Destroy(edge); + } + } +} + inline CordRepBtree* CordRepBtree::CopyRaw() const { auto* tree = static_cast<CordRepBtree*>(::operator new(sizeof(CordRepBtree))); memcpy(static_cast<void*>(tree), this, sizeof(CordRepBtree)); @@ -762,6 +823,14 @@ inline CordRepBtree::Position CordRepBtree::IndexBefore(Position front, return {index, offset}; } +inline CordRepBtree::Position CordRepBtree::IndexOfLength(size_t n) const { + assert(n <= length); + size_t index = back(); + size_t strip = length - n; + while (strip >= edges_[index]->length) strip -= edges_[index--]->length; + return {index, edges_[index]->length - strip}; +} + inline CordRepBtree::Position CordRepBtree::IndexBeyond( const size_t offset) const { // We need to find the edge which `starting offset` is beyond (>=)`offset`. @@ -780,7 +849,7 @@ inline CordRepBtree* CordRepBtree::Create(CordRep* rep) { } inline Span<char> CordRepBtree::GetAppendBuffer(size_t size) { - assert(refcount.IsOne()); + assert(refcount.IsMutable()); CordRepBtree* tree = this; const int height = this->height(); CordRepBtree* n1 = tree; @@ -789,21 +858,21 @@ inline Span<char> CordRepBtree::GetAppendBuffer(size_t size) { switch (height) { case 3: tree = tree->Edge(kBack)->btree(); - if (!tree->refcount.IsOne()) return {}; + if (!tree->refcount.IsMutable()) return {}; n2 = tree; ABSL_FALLTHROUGH_INTENDED; case 2: tree = tree->Edge(kBack)->btree(); - if (!tree->refcount.IsOne()) return {}; + if (!tree->refcount.IsMutable()) return {}; n1 = tree; ABSL_FALLTHROUGH_INTENDED; case 1: tree = tree->Edge(kBack)->btree(); - if (!tree->refcount.IsOne()) return {}; + if (!tree->refcount.IsMutable()) return {}; ABSL_FALLTHROUGH_INTENDED; case 0: CordRep* edge = tree->Edge(kBack); - if (!edge->refcount.IsOne()) return {}; + if (!edge->refcount.IsMutable()) return {}; if (edge->tag < FLAT) return {}; size_t avail = edge->flat()->Capacity() - edge->length; if (avail == 0) return {}; diff --git a/absl/strings/internal/cord_rep_btree_test.cc b/absl/strings/internal/cord_rep_btree_test.cc index 073a7d45..be9473d4 100644 --- a/absl/strings/internal/cord_rep_btree_test.cc +++ b/absl/strings/internal/cord_rep_btree_test.cc @@ -47,7 +47,9 @@ class CordRepBtreeTestPeer { namespace { using ::absl::cordrep_testing::AutoUnref; +using ::absl::cordrep_testing::CordCollectRepsIf; using ::absl::cordrep_testing::CordToString; +using ::absl::cordrep_testing::CordVisitReps; using ::absl::cordrep_testing::CreateFlatsFromString; using ::absl::cordrep_testing::CreateRandomString; using ::absl::cordrep_testing::MakeConcat; @@ -62,6 +64,7 @@ using ::testing::ElementsAre; using ::testing::ElementsAreArray; using ::testing::Eq; using ::testing::HasSubstr; +using ::testing::Le; using ::testing::Ne; using ::testing::Not; using ::testing::SizeIs; @@ -205,14 +208,17 @@ CordRepBtree* MakeTree(size_t size, bool append = true) { return tree; } -CordRepBtree* CreateTree(absl::string_view data, size_t chunk_size) { - std::vector<CordRep*> flats = CreateFlatsFromString(data, chunk_size); - auto it = flats.begin(); +CordRepBtree* CreateTree(absl::Span<CordRep* const> reps) { + auto it = reps.begin(); CordRepBtree* tree = CordRepBtree::Create(*it); - while (++it != flats.end()) tree = CordRepBtree::Append(tree, *it); + while (++it != reps.end()) tree = CordRepBtree::Append(tree, *it); return tree; } +CordRepBtree* CreateTree(absl::string_view data, size_t chunk_size) { + return CreateTree(CreateFlatsFromString(data, chunk_size)); +} + CordRepBtree* CreateTreeReverse(absl::string_view data, size_t chunk_size) { std::vector<CordRep*> flats = CreateFlatsFromString(data, chunk_size); auto rit = flats.rbegin(); @@ -759,6 +765,63 @@ TEST(CordRepBtreeTest, MergeFuzzTest) { } } +TEST_P(CordRepBtreeTest, RemoveSuffix) { + // Create tree of 1, 2 and 3 levels high + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + for (size_t cap : {max_cap - 1, max_cap * 2, max_cap * max_cap * 2}) { + const std::string data = CreateRandomString(cap * 512); + + { + // Verify RemoveSuffix(<all>) + AutoUnref refs; + CordRepBtree* node = refs.RefIf(shared(), CreateTree(data, 512)); + EXPECT_THAT(CordRepBtree::RemoveSuffix(node, data.length()), Eq(nullptr)); + + // Verify RemoveSuffix(<none>) + node = refs.RefIf(shared(), CreateTree(data, 512)); + EXPECT_THAT(CordRepBtree::RemoveSuffix(node, 0), Eq(node)); + CordRep::Unref(node); + } + + for (int n = 1; n < data.length(); ++n) { + AutoUnref refs; + auto flats = CreateFlatsFromString(data, 512); + CordRepBtree* node = refs.RefIf(shared(), CreateTree(flats)); + CordRep* rep = refs.Add(CordRepBtree::RemoveSuffix(node, n)); + EXPECT_THAT(CordToString(rep), Eq(data.substr(0, data.length() - n))); + + // Collect all flats + auto is_flat = [](CordRep* rep) { return rep->tag >= FLAT; }; + std::vector<CordRep*> edges = CordCollectRepsIf(is_flat, rep); + ASSERT_THAT(edges.size(), Le(flats.size())); + + // Isolate last edge + CordRep* last_edge = edges.back(); + edges.pop_back(); + const size_t last_length = rep->length - edges.size() * 512; + + // All flats except the last edge must be kept or copied 'as is' + int index = 0; + for (CordRep* edge : edges) { + ASSERT_THAT(edge, Eq(flats[index++])); + ASSERT_THAT(edge->length, Eq(512)); + } + + // CordRepBtree may optimize small substrings to avoid waste, so only + // check for flat sharing / updates where the code should always do this. + if (last_length >= 500) { + EXPECT_THAT(last_edge, Eq(flats[index++])); + if (shared()) { + EXPECT_THAT(last_edge->length, Eq(512)); + } else { + EXPECT_TRUE(last_edge->refcount.IsOne()); + EXPECT_THAT(last_edge->length, Eq(last_length)); + } + } + } + } +} + TEST(CordRepBtreeTest, SubTree) { // Create tree of at least 2 levels high constexpr size_t max_cap = CordRepBtree::kMaxCapacity; @@ -986,23 +1049,6 @@ TEST_P(CordRepBtreeTest, CreateFromTreeReturnsTree) { CordRep::Unref(result); } -TEST_P(CordRepBtreeTest, ExceedMaxHeight) { - AutoUnref refs; - CordRepBtree* tree = MakeLeaf(); - for (int h = 1; h <= CordRepBtree::kMaxHeight; ++h) { - CordRepBtree* newtree = CordRepBtree::New(tree); - for (size_t i = 1; i < CordRepBtree::kMaxCapacity; ++i) { - newtree = CordRepBtree::Append(newtree, CordRep::Ref(tree)); - } - tree = newtree; - } - refs.RefIf(shared(), tree); -#if defined(GTEST_HAS_DEATH_TEST) - EXPECT_DEATH(tree = CordRepBtree::Append(tree, MakeFlat("Boom")), ".*"); -#endif - CordRep::Unref(tree); -} - TEST(CordRepBtreeTest, GetCharacter) { size_t n = CordRepBtree::kMaxCapacity * CordRepBtree::kMaxCapacity + 2; std::string data = CreateRandomString(n * 3); @@ -1389,6 +1435,54 @@ TEST(CordRepBtreeTest, CheckAssertValidShallowVsDeep) { CordRep::Unref(tree); } +TEST_P(CordRepBtreeTest, Rebuild) { + for (size_t size : {3, 8, 100, 10000, 1000000}) { + SCOPED_TRACE(absl::StrCat("Rebuild @", size)); + + std::vector<CordRepFlat*> flats; + for (int i = 0; i < size; ++i) { + flats.push_back(CordRepFlat::New(2)); + flats.back()->Data()[0] = 'x'; + flats.back()->length = 1; + } + + // Build the tree into 'right', and each so many 'split_limit' edges, + // combine 'left' + 'right' into a new 'left', and start a new 'right'. + // This guarantees we get a reasonable amount of chaos in the tree. + size_t split_count = 0; + size_t split_limit = 3; + auto it = flats.begin(); + CordRepBtree* left = nullptr; + CordRepBtree* right = CordRepBtree::New(*it); + while (++it != flats.end()) { + if (++split_count >= split_limit) { + split_limit += split_limit / 16; + left = left ? CordRepBtree::Append(left, right) : right; + right = CordRepBtree::New(*it); + } else { + right = CordRepBtree::Append(right, *it); + } + } + + // Finalize tree + left = left ? CordRepBtree::Append(left, right) : right; + + // Rebuild + AutoUnref ref; + left = ref.Add(CordRepBtree::Rebuild(ref.RefIf(shared(), left))); + ASSERT_TRUE(CordRepBtree::IsValid(left)); + + // Verify we have the exact same edges in the exact same order. + bool ok = true; + it = flats.begin(); + CordVisitReps(left, [&](CordRep* edge) { + if (edge->tag < FLAT) return; + ok = ok && (it != flats.end() && *it++ == edge); + }); + EXPECT_TRUE(ok && it == flats.end()) << "Rebuild edges mismatch"; + } +} + } // namespace } // namespace cord_internal ABSL_NAMESPACE_END diff --git a/absl/strings/internal/cord_rep_ring.cc b/absl/strings/internal/cord_rep_ring.cc index db1f63fa..07c77eb3 100644 --- a/absl/strings/internal/cord_rep_ring.cc +++ b/absl/strings/internal/cord_rep_ring.cc @@ -277,7 +277,7 @@ CordRepRing* CordRepRing::Mutable(CordRepRing* rep, size_t extra) { // Get current number of entries, and check for max capacity. size_t entries = rep->entries(); - if (!rep->refcount.IsOne()) { + if (!rep->refcount.IsMutable()) { return Copy(rep, rep->head(), rep->tail(), extra); } else if (entries + extra > rep->capacity()) { const size_t min_grow = rep->capacity() + rep->capacity() / 2; @@ -292,10 +292,10 @@ CordRepRing* CordRepRing::Mutable(CordRepRing* rep, size_t extra) { } Span<char> CordRepRing::GetAppendBuffer(size_t size) { - assert(refcount.IsOne()); + assert(refcount.IsMutable()); index_type back = retreat(tail_); CordRep* child = entry_child(back); - if (child->tag >= FLAT && child->refcount.IsOne()) { + if (child->tag >= FLAT && child->refcount.IsMutable()) { size_t capacity = child->flat()->Capacity(); pos_type end_pos = entry_end_pos(back); size_t data_offset = entry_data_offset(back); @@ -312,10 +312,10 @@ Span<char> CordRepRing::GetAppendBuffer(size_t size) { } Span<char> CordRepRing::GetPrependBuffer(size_t size) { - assert(refcount.IsOne()); + assert(refcount.IsMutable()); CordRep* child = entry_child(head_); size_t data_offset = entry_data_offset(head_); - if (data_offset && child->refcount.IsOne() && child->tag >= FLAT) { + if (data_offset && child->refcount.IsMutable() && child->tag >= FLAT) { size_t n = (std::min)(data_offset, size); this->length += n; begin_pos_ -= n; @@ -504,7 +504,7 @@ CordRepRing* CordRepRing::Prepend(CordRepRing* rep, CordRep* child) { CordRepRing* CordRepRing::Append(CordRepRing* rep, absl::string_view data, size_t extra) { - if (rep->refcount.IsOne()) { + if (rep->refcount.IsMutable()) { Span<char> avail = rep->GetAppendBuffer(data.length()); if (!avail.empty()) { memcpy(avail.data(), data.data(), avail.length()); @@ -538,7 +538,7 @@ CordRepRing* CordRepRing::Append(CordRepRing* rep, absl::string_view data, CordRepRing* CordRepRing::Prepend(CordRepRing* rep, absl::string_view data, size_t extra) { - if (rep->refcount.IsOne()) { + if (rep->refcount.IsMutable()) { Span<char> avail = rep->GetPrependBuffer(data.length()); if (!avail.empty()) { const char* tail = data.data() + data.length() - avail.length(); @@ -678,7 +678,7 @@ CordRepRing* CordRepRing::SubRing(CordRepRing* rep, size_t offset, Position tail = rep->FindTail(head.index, offset + len); const size_t new_entries = rep->entries(head.index, tail.index); - if (rep->refcount.IsOne() && extra <= (rep->capacity() - new_entries)) { + if (rep->refcount.IsMutable() && extra <= (rep->capacity() - new_entries)) { // We adopt a privately owned rep and no extra entries needed. if (head.index != rep->head_) UnrefEntries(rep, rep->head_, head.index); if (tail.index != rep->tail_) UnrefEntries(rep, tail.index, rep->tail_); @@ -715,7 +715,7 @@ CordRepRing* CordRepRing::RemovePrefix(CordRepRing* rep, size_t len, } Position head = rep->Find(len); - if (rep->refcount.IsOne()) { + if (rep->refcount.IsMutable()) { if (head.index != rep->head_) UnrefEntries(rep, rep->head_, head.index); rep->head_ = head.index; } else { @@ -745,7 +745,7 @@ CordRepRing* CordRepRing::RemoveSuffix(CordRepRing* rep, size_t len, } Position tail = rep->FindTail(rep->length - len); - if (rep->refcount.IsOne()) { + if (rep->refcount.IsMutable()) { // We adopt a privately owned rep, scrub. if (tail.index != rep->tail_) UnrefEntries(rep, tail.index, rep->tail_); rep->tail_ = tail.index; diff --git a/absl/strings/internal/cord_rep_ring.h b/absl/strings/internal/cord_rep_ring.h index 44db8494..2000e21e 100644 --- a/absl/strings/internal/cord_rep_ring.h +++ b/absl/strings/internal/cord_rep_ring.h @@ -383,8 +383,8 @@ class CordRepRing : public CordRep { // Destroys the provided ring buffer, decrementing the reference count of all // contained child CordReps. The provided 1\`rep` should have a ref count of - // one (pre decrement destroy call observing `refcount.IsOne()`) or zero (post - // decrement destroy call observing `!refcount.Decrement()`). + // one (pre decrement destroy call observing `refcount.IsOne()`) or zero + // (post decrement destroy call observing `!refcount.Decrement()`). static void Destroy(CordRepRing* rep); // Returns a mutable reference to the logical end position array. diff --git a/absl/strings/internal/cord_rep_test_util.h b/absl/strings/internal/cord_rep_test_util.h index bc500064..ad828af2 100644 --- a/absl/strings/internal/cord_rep_test_util.h +++ b/absl/strings/internal/cord_rep_test_util.h @@ -115,6 +115,41 @@ inline cord_internal::CordRepBtree* CordRepBtreeFromFlats( return node; } +template <typename Fn> +inline void CordVisitReps(cord_internal::CordRep* rep, Fn&& fn) { + fn(rep); + while (rep->tag == cord_internal::SUBSTRING) { + rep = rep->substring()->child; + fn(rep); + } + if (rep->tag == cord_internal::BTREE) { + for (cord_internal::CordRep* edge : rep->btree()->Edges()) { + CordVisitReps(edge, fn); + } + } else if (rep->tag == cord_internal::CONCAT) { + CordVisitReps(rep->concat()->left, fn); + CordVisitReps(rep->concat()->right, fn); + } +} + +template <typename Predicate> +inline std::vector<cord_internal::CordRep*> CordCollectRepsIf( + Predicate&& predicate, cord_internal::CordRep* rep) { + std::vector<cord_internal::CordRep*> reps; + CordVisitReps(rep, [&reps, &predicate](cord_internal::CordRep* rep) { + if (predicate(rep)) reps.push_back(rep); + }); + return reps; +} + +inline std::vector<cord_internal::CordRep*> CordCollectReps( + cord_internal::CordRep* rep) { + std::vector<cord_internal::CordRep*> reps; + auto fn = [&reps](cord_internal::CordRep* rep) { reps.push_back(rep); }; + CordVisitReps(rep, fn); + return reps; +} + inline void CordToString(cord_internal::CordRep* rep, std::string& s) { size_t offset = 0; size_t length = rep->length; diff --git a/absl/strings/internal/cordz_update_tracker.h b/absl/strings/internal/cordz_update_tracker.h index 02efcc3a..1f764486 100644 --- a/absl/strings/internal/cordz_update_tracker.h +++ b/absl/strings/internal/cordz_update_tracker.h @@ -39,6 +39,7 @@ class CordzUpdateTracker { // Tracked update methods. enum MethodIdentifier { kUnknown, + kAppendBuffer, kAppendCord, kAppendExternalMemory, kAppendString, @@ -54,6 +55,7 @@ class CordzUpdateTracker { kMoveAppendCord, kMoveAssignCord, kMovePrependCord, + kPrependBuffer, kPrependCord, kPrependString, kRemovePrefix, diff --git a/absl/strings/internal/cordz_update_tracker_test.cc b/absl/strings/internal/cordz_update_tracker_test.cc index fcd17df7..2348a175 100644 --- a/absl/strings/internal/cordz_update_tracker_test.cc +++ b/absl/strings/internal/cordz_update_tracker_test.cc @@ -37,6 +37,7 @@ using Methods = std::array<Method, Method::kNumMethods>; // Returns an array of all methods defined in `MethodIdentifier` Methods AllMethods() { return Methods{Method::kUnknown, + Method::kAppendBuffer, Method::kAppendCord, Method::kAppendExternalMemory, Method::kAppendString, @@ -52,6 +53,7 @@ Methods AllMethods() { Method::kMoveAppendCord, Method::kMoveAssignCord, Method::kMovePrependCord, + Method::kPrependBuffer, Method::kPrependCord, Method::kPrependString, Method::kRemovePrefix, diff --git a/absl/strings/numbers.h b/absl/strings/numbers.h index 1780bb44..4ae07c2d 100644 --- a/absl/strings/numbers.h +++ b/absl/strings/numbers.h @@ -96,6 +96,25 @@ ABSL_MUST_USE_RESULT bool SimpleAtod(absl::string_view str, double* out); // unspecified state. ABSL_MUST_USE_RESULT bool SimpleAtob(absl::string_view str, bool* out); +// SimpleHexAtoi() +// +// Converts a hexadecimal string (optionally followed or preceded by ASCII +// whitespace) to an integer, returning `true` if successful. Only valid base-16 +// hexadecimal integers whose value falls within the range of the integer type +// (optionally preceded by a `+` or `-`) can be converted. A valid hexadecimal +// value may include both upper and lowercase character symbols, and may +// optionally include a leading "0x" (or "0X") number prefix, which is ignored +// by this function. If any errors are encountered, this function returns +// `false`, leaving `out` in an unspecified state. +template <typename int_type> +ABSL_MUST_USE_RESULT bool SimpleHexAtoi(absl::string_view str, int_type* out); + +// Overloads of SimpleHexAtoi() for 128 bit integers. +ABSL_MUST_USE_RESULT inline bool SimpleHexAtoi(absl::string_view str, + absl::int128* out); +ABSL_MUST_USE_RESULT inline bool SimpleHexAtoi(absl::string_view str, + absl::uint128* out); + ABSL_NAMESPACE_END } // namespace absl @@ -260,6 +279,21 @@ ABSL_MUST_USE_RESULT inline bool SimpleAtoi(absl::string_view str, return numbers_internal::safe_strtou128_base(str, out, 10); } +template <typename int_type> +ABSL_MUST_USE_RESULT bool SimpleHexAtoi(absl::string_view str, int_type* out) { + return numbers_internal::safe_strtoi_base(str, out, 16); +} + +ABSL_MUST_USE_RESULT inline bool SimpleHexAtoi(absl::string_view str, + absl::int128* out) { + return numbers_internal::safe_strto128_base(str, out, 16); +} + +ABSL_MUST_USE_RESULT inline bool SimpleHexAtoi(absl::string_view str, + absl::uint128* out) { + return numbers_internal::safe_strtou128_base(str, out, 16); +} + ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/numbers_test.cc b/absl/strings/numbers_test.cc index f3103106..498c210d 100644 --- a/absl/strings/numbers_test.cc +++ b/absl/strings/numbers_test.cc @@ -47,6 +47,7 @@ namespace { using absl::SimpleAtoi; +using absl::SimpleHexAtoi; using absl::numbers_internal::kSixDigitsToBufferSize; using absl::numbers_internal::safe_strto32_base; using absl::numbers_internal::safe_strto64_base; @@ -468,6 +469,148 @@ TEST(NumbersTest, Atoenum) { VerifySimpleAtoiGood<E_biguint>(E_biguint_max32, E_biguint_max32); } +template <typename int_type, typename in_val_type> +void VerifySimpleHexAtoiGood(in_val_type in_value, int_type exp_value) { + std::string s; + // uint128 can be streamed but not StrCat'd + absl::strings_internal::OStringStream strm(&s); + if (in_value >= 0) { + strm << std::hex << in_value; + } else { + // Inefficient for small integers, but works with all integral types. + strm << "-" << std::hex << -absl::uint128(in_value); + } + int_type x = static_cast<int_type>(~exp_value); + EXPECT_TRUE(SimpleHexAtoi(s, &x)) + << "in_value=" << std::hex << in_value << " s=" << s << " x=" << x; + EXPECT_EQ(exp_value, x); + x = static_cast<int_type>(~exp_value); + EXPECT_TRUE(SimpleHexAtoi( + s.c_str(), &x)); // NOLINT: readability-redundant-string-conversions + EXPECT_EQ(exp_value, x); +} + +template <typename int_type, typename in_val_type> +void VerifySimpleHexAtoiBad(in_val_type in_value) { + std::string s; + // uint128 can be streamed but not StrCat'd + absl::strings_internal::OStringStream strm(&s); + if (in_value >= 0) { + strm << std::hex << in_value; + } else { + // Inefficient for small integers, but works with all integral types. + strm << "-" << std::hex << -absl::uint128(in_value); + } + int_type x; + EXPECT_FALSE(SimpleHexAtoi(s, &x)); + EXPECT_FALSE(SimpleHexAtoi( + s.c_str(), &x)); // NOLINT: readability-redundant-string-conversions +} + +TEST(NumbersTest, HexAtoi) { + // SimpleHexAtoi(absl::string_view, int32_t) + VerifySimpleHexAtoiGood<int32_t>(0, 0); + VerifySimpleHexAtoiGood<int32_t>(0x42, 0x42); + VerifySimpleHexAtoiGood<int32_t>(-0x42, -0x42); + + VerifySimpleHexAtoiGood<int32_t>(std::numeric_limits<int32_t>::min(), + std::numeric_limits<int32_t>::min()); + VerifySimpleHexAtoiGood<int32_t>(std::numeric_limits<int32_t>::max(), + std::numeric_limits<int32_t>::max()); + + // SimpleHexAtoi(absl::string_view, uint32_t) + VerifySimpleHexAtoiGood<uint32_t>(0, 0); + VerifySimpleHexAtoiGood<uint32_t>(0x42, 0x42); + VerifySimpleHexAtoiBad<uint32_t>(-0x42); + + VerifySimpleHexAtoiBad<uint32_t>(std::numeric_limits<int32_t>::min()); + VerifySimpleHexAtoiGood<uint32_t>(std::numeric_limits<int32_t>::max(), + std::numeric_limits<int32_t>::max()); + VerifySimpleHexAtoiGood<uint32_t>(std::numeric_limits<uint32_t>::max(), + std::numeric_limits<uint32_t>::max()); + VerifySimpleHexAtoiBad<uint32_t>(std::numeric_limits<int64_t>::min()); + VerifySimpleHexAtoiBad<uint32_t>(std::numeric_limits<int64_t>::max()); + VerifySimpleHexAtoiBad<uint32_t>(std::numeric_limits<uint64_t>::max()); + + // SimpleHexAtoi(absl::string_view, int64_t) + VerifySimpleHexAtoiGood<int64_t>(0, 0); + VerifySimpleHexAtoiGood<int64_t>(0x42, 0x42); + VerifySimpleHexAtoiGood<int64_t>(-0x42, -0x42); + + VerifySimpleHexAtoiGood<int64_t>(std::numeric_limits<int32_t>::min(), + std::numeric_limits<int32_t>::min()); + VerifySimpleHexAtoiGood<int64_t>(std::numeric_limits<int32_t>::max(), + std::numeric_limits<int32_t>::max()); + VerifySimpleHexAtoiGood<int64_t>(std::numeric_limits<uint32_t>::max(), + std::numeric_limits<uint32_t>::max()); + VerifySimpleHexAtoiGood<int64_t>(std::numeric_limits<int64_t>::min(), + std::numeric_limits<int64_t>::min()); + VerifySimpleHexAtoiGood<int64_t>(std::numeric_limits<int64_t>::max(), + std::numeric_limits<int64_t>::max()); + VerifySimpleHexAtoiBad<int64_t>(std::numeric_limits<uint64_t>::max()); + + // SimpleHexAtoi(absl::string_view, uint64_t) + VerifySimpleHexAtoiGood<uint64_t>(0, 0); + VerifySimpleHexAtoiGood<uint64_t>(0x42, 0x42); + VerifySimpleHexAtoiBad<uint64_t>(-0x42); + + VerifySimpleHexAtoiBad<uint64_t>(std::numeric_limits<int32_t>::min()); + VerifySimpleHexAtoiGood<uint64_t>(std::numeric_limits<int32_t>::max(), + std::numeric_limits<int32_t>::max()); + VerifySimpleHexAtoiGood<uint64_t>(std::numeric_limits<uint32_t>::max(), + std::numeric_limits<uint32_t>::max()); + VerifySimpleHexAtoiBad<uint64_t>(std::numeric_limits<int64_t>::min()); + VerifySimpleHexAtoiGood<uint64_t>(std::numeric_limits<int64_t>::max(), + std::numeric_limits<int64_t>::max()); + VerifySimpleHexAtoiGood<uint64_t>(std::numeric_limits<uint64_t>::max(), + std::numeric_limits<uint64_t>::max()); + + // SimpleHexAtoi(absl::string_view, absl::uint128) + VerifySimpleHexAtoiGood<absl::uint128>(0, 0); + VerifySimpleHexAtoiGood<absl::uint128>(0x42, 0x42); + VerifySimpleHexAtoiBad<absl::uint128>(-0x42); + + VerifySimpleHexAtoiBad<absl::uint128>(std::numeric_limits<int32_t>::min()); + VerifySimpleHexAtoiGood<absl::uint128>(std::numeric_limits<int32_t>::max(), + std::numeric_limits<int32_t>::max()); + VerifySimpleHexAtoiGood<absl::uint128>(std::numeric_limits<uint32_t>::max(), + std::numeric_limits<uint32_t>::max()); + VerifySimpleHexAtoiBad<absl::uint128>(std::numeric_limits<int64_t>::min()); + VerifySimpleHexAtoiGood<absl::uint128>(std::numeric_limits<int64_t>::max(), + std::numeric_limits<int64_t>::max()); + VerifySimpleHexAtoiGood<absl::uint128>(std::numeric_limits<uint64_t>::max(), + std::numeric_limits<uint64_t>::max()); + VerifySimpleHexAtoiGood<absl::uint128>( + std::numeric_limits<absl::uint128>::max(), + std::numeric_limits<absl::uint128>::max()); + + // Some other types + VerifySimpleHexAtoiGood<int>(-0x42, -0x42); + VerifySimpleHexAtoiGood<int32_t>(-0x42, -0x42); + VerifySimpleHexAtoiGood<uint32_t>(0x42, 0x42); + VerifySimpleHexAtoiGood<unsigned int>(0x42, 0x42); + VerifySimpleHexAtoiGood<int64_t>(-0x42, -0x42); + VerifySimpleHexAtoiGood<long>(-0x42, -0x42); // NOLINT: runtime-int + VerifySimpleHexAtoiGood<uint64_t>(0x42, 0x42); + VerifySimpleHexAtoiGood<size_t>(0x42, 0x42); + VerifySimpleHexAtoiGood<std::string::size_type>(0x42, 0x42); + + // Number prefix + int32_t value; + EXPECT_TRUE(safe_strto32_base("0x34234324", &value, 16)); + EXPECT_EQ(0x34234324, value); + + EXPECT_TRUE(safe_strto32_base("0X34234324", &value, 16)); + EXPECT_EQ(0x34234324, value); + + // ASCII whitespace + EXPECT_TRUE(safe_strto32_base(" \t\n 34234324", &value, 16)); + EXPECT_EQ(0x34234324, value); + + EXPECT_TRUE(safe_strto32_base("34234324 \t\n ", &value, 16)); + EXPECT_EQ(0x34234324, value); +} + TEST(stringtest, safe_strto32_base) { int32_t value; EXPECT_TRUE(safe_strto32_base("0x34234324", &value, 16)); diff --git a/absl/strings/str_split_test.cc b/absl/strings/str_split_test.cc index f472f9ed..1b4427b8 100644 --- a/absl/strings/str_split_test.cc +++ b/absl/strings/str_split_test.cc @@ -943,8 +943,14 @@ TEST(Delimiter, ByLength) { } TEST(Split, WorksWithLargeStrings) { +#if defined(ABSL_HAVE_ADDRESS_SANITIZER) || \ + defined(ABSL_HAVE_MEMORY_SANITIZER) || defined(ABSL_HAVE_THREAD_SANITIZER) + constexpr size_t kSize = (uint32_t{1} << 26) + 1; // 64M + 1 byte +#else + constexpr size_t kSize = (uint32_t{1} << 31) + 1; // 2G + 1 byte +#endif if (sizeof(size_t) > 4) { - std::string s((uint32_t{1} << 31) + 1, 'x'); // 2G + 1 byte + std::string s(kSize, 'x'); s.back() = '-'; std::vector<absl::string_view> v = absl::StrSplit(s, '-'); EXPECT_EQ(2, v.size()); diff --git a/absl/strings/substitute.cc b/absl/strings/substitute.cc index 1f3c7409..8980b198 100644 --- a/absl/strings/substitute.cc +++ b/absl/strings/substitute.cc @@ -75,7 +75,8 @@ void SubstituteAndAppendArray(std::string* output, absl::string_view format, // Build the string. size_t original_size = output->size(); - strings_internal::STLStringResizeUninitialized(output, original_size + size); + strings_internal::STLStringResizeUninitializedAmortized(output, + original_size + size); char* target = &(*output)[original_size]; for (size_t i = 0; i < format.size(); i++) { if (format[i] == '$') { |