From b6b62c72b4209f953923354bfb1eb40ed867b5c3 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Mon, 1 Nov 2021 16:01:36 -0700 Subject: Export of internal Abseil changes -- f49e405201d2ffd5955503fa8ad0f08ec0cdfb2b by Martijn Vels : Add common [container.requirements] type definitions to `CharRange` and `ChunkRange` The presence of these allow these range classes to be used in various utility functions which require some minimum type of container. For example, this change allows tests to use `EXPECT_THAT(cord.Chunks(), ElementsAre(...))` PiperOrigin-RevId: 406941278 -- 0c195f073632e21d9a4bce158047b2ba8551c2d1 by Evan Brown : Use explicit exponential growth in SubstituteAndAppendArray. PiperOrigin-RevId: 406931952 -- afb043bccd809a55cab78abadb7548a057d9eda0 by Jorg Brown : Use longer var names in macro to avoid clang-tidy warning PiperOrigin-RevId: 406930978 -- 80397e2604e6b3d929a34742c3a32581b34d3ac4 by Martijn Vels : Add future kAppendBuffer and kPrependBuffer API trackers for Cordz sampling PiperOrigin-RevId: 406912759 -- e910ce919ef83933f08a690e8b7325c7cc5b6d5d by Martijn Vels : Implement Prepend(string_view) in terms of PrependArray(string_view, MethodIdentifier). PiperOrigin-RevId: 406891665 -- c9cff43d4c0568ed01f2fca0f6ef038ae03112b5 by Martijn Vels : Add 'Rebuild' logic to CordRepBtree There are btree hostile scenarios where an application could perform repeated split/insert/merge operations on a cord leading to a tree exceeding the maximum height. While this should be rare in practice, this change adds a Rebuild() method that will rebuild a tree with a 100% fill factor, and we will invoke this rebuild when a tree exceeds the maximum height. This basically follows the similar 'balance' logic in Concat trees (although the latter is common in Concat uses) PiperOrigin-RevId: 406875739 -- 5b2b8fb88f1ebfdc1c670088152da2cb2ea4c376 by Martijn Vels : Add 'in place' enabled RemoveSuffix An in-place RemoveSuffix is more efficient than SubTree() as it can directly modify privately owned nodes and flats allowing easy re-use of free capacity in right-most flats that may turn into Substring edges when using SubTree. PiperOrigin-RevId: 406431230 -- f09903c0a3d7344f59aaf1380a16ea10829217d4 by Derek Mauro : Internal change PiperOrigin-RevId: 406430373 -- 9957af575c33bb18dc170572a4ee8cc5901df6b2 by Greg Falcon : Initial groundwork to allow storing checksum data inside CordRep instances. This uses a RefcountAndFlags bit that was reserved for this purpose, and will be leveraged in a follow-up change to allow attaching checksums to a Cord's value. This change splits RefcountAndFlags::IsOne() into two distinct operations: * IsOne(): This returns true when the associated CordRep is not shared with other threads. This is useful for functions that consume CordRep instances; for example, code that consumes an unshared CordRep can assume ownership of its children without modifying those refcounts. * IsMutable(): This returns true when the associated CordRep reference is not shared with other threads, *and* does not store an associated checksum value. This is useful for functions that modify a CordRep's contents: code may modify the bytes of a mutable-unshared CordRep without fear of races with other threads, or of invalidating a stored checksum. The tricky part of this CL is ensuring that the correct choice between IsMutable() and IsOne() was made at each point. An incorrect application of IsOne() could lead to correctness bugs in the future. Code conditioned on IsOne() may delete the CordRep in question, or assume ownership of its children, but must not modify the CordRep's data without explicitly adjusting the CRC. PiperOrigin-RevId: 406191103 -- 686544814079e5ab6d4593cca0c068b510be400a by Martijn Vels : Reduce the size in the LargeString test when running with Sanitizers PiperOrigin-RevId: 406186945 -- 735b4490bdb695c35731f06ce4b8de14ce2be6ed by Alex Strelnikov : Release absl::SimpleHexAtoi. PiperOrigin-RevId: 406143188 GitOrigin-RevId: f49e405201d2ffd5955503fa8ad0f08ec0cdfb2b Change-Id: Ic6527ac40fa03ea02ca813e8bb7868a219544de4 --- absl/flags/flag.h | 8 +- absl/strings/BUILD.bazel | 12 ++ absl/strings/CMakeLists.txt | 13 ++ absl/strings/cord.cc | 20 +- absl/strings/cord.h | 28 +++ absl/strings/cord_test.cc | 43 ++++ absl/strings/internal/cord_internal.h | 41 +++- absl/strings/internal/cord_internal_test.cc | 116 +++++++++++ absl/strings/internal/cord_rep_btree.cc | 227 ++++++++++++++++++--- absl/strings/internal/cord_rep_btree.h | 95 +++++++-- absl/strings/internal/cord_rep_btree_test.cc | 136 ++++++++++-- absl/strings/internal/cord_rep_ring.cc | 20 +- absl/strings/internal/cord_rep_ring.h | 4 +- absl/strings/internal/cord_rep_test_util.h | 35 ++++ absl/strings/internal/cordz_update_tracker.h | 2 + absl/strings/internal/cordz_update_tracker_test.cc | 2 + absl/strings/numbers.h | 34 +++ absl/strings/numbers_test.cc | 143 +++++++++++++ absl/strings/str_split_test.cc | 8 +- absl/strings/substitute.cc | 3 +- 20 files changed, 890 insertions(+), 100 deletions(-) create mode 100644 absl/strings/internal/cord_internal_test.cc (limited to 'absl') diff --git a/absl/flags/flag.h b/absl/flags/flag.h index 1a7e4d4d..a724ccc9 100644 --- a/absl/flags/flag.h +++ b/absl/flags/flag.h @@ -241,8 +241,8 @@ ABSL_NAMESPACE_END /* default value argument. That keeps temporaries alive */ \ /* long enough for NonConst to work correctly. */ \ static constexpr absl::string_view Value( \ - absl::string_view v = ABSL_FLAG_IMPL_FLAGHELP(txt)) { \ - return v; \ + absl::string_view absl_flag_help = ABSL_FLAG_IMPL_FLAGHELP(txt)) { \ + return absl_flag_help; \ } \ static std::string NonConst() { return std::string(Value()); } \ }; \ @@ -254,8 +254,8 @@ ABSL_NAMESPACE_END #define ABSL_FLAG_IMPL_DECLARE_DEF_VAL_WRAPPER(name, Type, default_value) \ struct AbslFlagDefaultGenFor##name { \ Type value = absl::flags_internal::InitDefaultValue(default_value); \ - static void Gen(void* p) { \ - new (p) Type(AbslFlagDefaultGenFor##name{}.value); \ + static void Gen(void* absl_flag_default_loc) { \ + new (absl_flag_default_loc) Type(AbslFlagDefaultGenFor##name{}.value); \ } \ }; diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index c046366c..090fc58a 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -305,6 +305,17 @@ cc_library( ], ) +cc_test( + name = "cord_internal_test", + srcs = ["internal/cord_internal_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":cord_internal", + "@com_google_googletest//:gtest_main", + ], +) + cc_test( name = "cord_rep_btree_test", size = "medium", @@ -684,6 +695,7 @@ cc_test( "//absl/base:endian", "//absl/base:raw_logging_internal", "//absl/container:fixed_array", + "//absl/random", "@com_google_googletest//:gtest_main", ], ) diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index 8ad5f9c7..d6801fe6 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -920,6 +920,7 @@ absl_cc_test( absl::cordz_test_helpers absl::core_headers absl::endian + absl::random_random absl::raw_logging_internal absl::fixed_array GTest::gmock_main @@ -943,6 +944,18 @@ absl_cc_test( GTest::gmock_main ) +absl_cc_test( + NAME + cord_internal_test + SRCS + "internal/cord_internal_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::cord_internal + GTest::gmock_main +) + absl_cc_test( NAME cord_rep_btree_test diff --git a/absl/strings/cord.cc b/absl/strings/cord.cc index 29af9782..854047ca 100644 --- a/absl/strings/cord.cc +++ b/absl/strings/cord.cc @@ -419,7 +419,7 @@ void Cord::InlineRep::PrependTree(CordRep* tree, MethodIdentifier method) { // written to region and the actual size increase will be written to size. static inline bool PrepareAppendRegion(CordRep* root, char** region, size_t* size, size_t max_length) { - if (root->IsBtree() && root->refcount.IsOne()) { + if (root->IsBtree() && root->refcount.IsMutable()) { Span span = root->btree()->GetAppendBuffer(max_length); if (!span.empty()) { *region = span.data(); @@ -430,11 +430,11 @@ static inline bool PrepareAppendRegion(CordRep* root, char** region, // Search down the right-hand path for a non-full FLAT node. CordRep* dst = root; - while (dst->IsConcat() && dst->refcount.IsOne()) { + while (dst->IsConcat() && dst->refcount.IsMutable()) { dst = dst->concat()->right; } - if (!dst->IsFlat() || !dst->refcount.IsOne()) { + if (!dst->IsFlat() || !dst->refcount.IsMutable()) { *region = nullptr; *size = 0; return false; @@ -649,7 +649,7 @@ Cord& Cord::operator=(absl::string_view src) { if (tree != nullptr) { CordzUpdateScope scope(contents_.cordz_info(), method); if (tree->IsFlat() && tree->flat()->Capacity() >= length && - tree->refcount.IsOne()) { + tree->refcount.IsMutable()) { // Copy in place if the existing FLAT node is reusable. memmove(tree->flat()->Data(), data, length); tree->length = length; @@ -819,7 +819,7 @@ void Cord::Prepend(const Cord& src) { return Prepend(src_contents); } -void Cord::Prepend(absl::string_view src) { +void Cord::PrependArray(absl::string_view src, MethodIdentifier method) { if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined. if (!contents_.is_tree()) { size_t cur_size = contents_.inline_size(); @@ -834,7 +834,7 @@ void Cord::Prepend(absl::string_view src) { } } CordRep* rep = NewTree(src.data(), src.size(), 0); - contents_.PrependTree(rep, CordzUpdateTracker::kPrependString); + contents_.PrependTree(rep, method); } template > @@ -894,7 +894,7 @@ static CordRep* RemoveSuffixFrom(CordRep* node, size_t n) { if (n >= node->length) return nullptr; if (n == 0) return CordRep::Ref(node); absl::InlinedVector lhs_stack; - bool inplace_ok = node->refcount.IsOne(); + bool inplace_ok = node->refcount.IsMutable(); while (node->IsConcat()) { assert(n <= node->length); @@ -907,7 +907,7 @@ static CordRep* RemoveSuffixFrom(CordRep* node, size_t n) { n -= node->concat()->right->length; node = node->concat()->left; } - inplace_ok = inplace_ok && node->refcount.IsOne(); + inplace_ok = inplace_ok && node->refcount.IsMutable(); } assert(n <= node->length); @@ -968,9 +968,7 @@ void Cord::RemoveSuffix(size_t n) { auto constexpr method = CordzUpdateTracker::kRemoveSuffix; CordzUpdateScope scope(contents_.cordz_info(), method); if (tree->IsBtree()) { - CordRep* old = tree; - tree = tree->btree()->SubTree(0, tree->length - n); - CordRep::Unref(old); + tree = CordRepBtree::RemoveSuffix(tree->btree(), n); } else { CordRep* newrep = RemoveSuffixFrom(tree, n); CordRep::Unref(tree); diff --git a/absl/strings/cord.h b/absl/strings/cord.h index 175d7b90..f0a19914 100644 --- a/absl/strings/cord.h +++ b/absl/strings/cord.h @@ -460,6 +460,16 @@ class Cord { // `Cord::chunk_begin()` and `Cord::chunk_end()`. class ChunkRange { public: + // Fulfill minimum c++ container requirements [container.requirements] + // Theses (partial) container type definitions allow ChunkRange to be used + // in various utilities expecting a subset of [container.requirements]. + // For example, the below enables using `::testing::ElementsAre(...)` + using value_type = absl::string_view; + using reference = value_type&; + using const_reference = const value_type&; + using iterator = ChunkIterator; + using const_iterator = ChunkIterator; + explicit ChunkRange(const Cord* cord) : cord_(cord) {} ChunkIterator begin() const; @@ -591,6 +601,16 @@ class Cord { // `Cord::char_begin()` and `Cord::char_end()`. class CharRange { public: + // Fulfill minimum c++ container requirements [container.requirements] + // Theses (partial) container type definitions allow CharRange to be used + // in various utilities expecting a subset of [container.requirements]. + // For example, the below enables using `::testing::ElementsAre(...)` + using value_type = char; + using reference = value_type&; + using const_reference = const value_type&; + using iterator = CharIterator; + using const_iterator = CharIterator; + explicit CharRange(const Cord* cord) : cord_(cord) {} CharIterator begin() const; @@ -881,6 +901,10 @@ class Cord { template void AppendImpl(C&& src); + // Prepends the provided data to this instance. `method` contains the public + // API method for this action which is tracked for Cordz sampling purposes. + void PrependArray(absl::string_view src, MethodIdentifier method); + // Assigns the value in 'src' to this instance, 'stealing' its contents. // Requires src.length() > kMaxBytesToCopy. Cord& AssignLargeString(std::string&& src); @@ -1220,6 +1244,10 @@ inline void Cord::Append(absl::string_view src) { contents_.AppendArray(src, CordzUpdateTracker::kAppendString); } +inline void Cord::Prepend(absl::string_view src) { + PrependArray(src, CordzUpdateTracker::kPrependString); +} + extern template void Cord::Append(std::string&& src); extern template void Cord::Prepend(std::string&& src); diff --git a/absl/strings/cord_test.cc b/absl/strings/cord_test.cc index 50079b7c..cced9bba 100644 --- a/absl/strings/cord_test.cc +++ b/absl/strings/cord_test.cc @@ -34,6 +34,7 @@ #include "absl/base/internal/raw_logging.h" #include "absl/base/macros.h" #include "absl/container/fixed_array.h" +#include "absl/random/random.h" #include "absl/strings/cord_test_helpers.h" #include "absl/strings/cordz_test_helpers.h" #include "absl/strings/str_cat.h" @@ -1833,6 +1834,48 @@ TEST_P(CordTest, Hardening) { EXPECT_DEATH_IF_SUPPORTED(++cord.chunk_end(), ""); } +// This test mimics a specific (and rare) application repeatedly splitting a +// cord, inserting (overwriting) a string value, and composing a new cord from +// the three pieces. This is hostile towards a Btree implementation: A split of +// a node at any level is likely to have the right-most edge of the left split, +// and the left-most edge of the right split shared. For example, splitting a +// leaf node with 6 edges will result likely in a 1-6, 2-5, 3-4, etc. split, +// sharing the 'split node'. When recomposing such nodes, we 'injected' an edge +// in that node. As this happens with some probability on each level of the +// tree, this will quickly grow the tree until it reaches maximum height. +TEST_P(CordTest, BtreeHostileSplitInsertJoin) { + absl::BitGen bitgen; + + // Start with about 1GB of data + std::string data(1 << 10, 'x'); + absl::Cord buffer(data); + absl::Cord cord; + for (int i = 0; i < 1000000; ++i) { + cord.Append(buffer); + } + + for (int j = 0; j < 1000; ++j) { + size_t offset = absl::Uniform(bitgen, 0u, cord.size()); + size_t length = absl::Uniform(bitgen, 100u, data.size()); + if (cord.size() == offset) { + cord.Append(absl::string_view(data.data(), length)); + } else { + absl::Cord suffix; + if (offset + length < cord.size()) { + suffix = cord; + suffix.RemovePrefix(offset + length); + } + if (cord.size() > offset) { + cord.RemoveSuffix(cord.size() - offset); + } + cord.Append(absl::string_view(data.data(), length)); + if (!suffix.empty()) { + cord.Append(suffix); + } + } + } +} + class AfterExitCordTester { public: bool Set(absl::Cord* cord, absl::string_view expected) { diff --git a/absl/strings/internal/cord_internal.h b/absl/strings/internal/cord_internal.h index 0300ac40..bfe5564e 100644 --- a/absl/strings/internal/cord_internal.h +++ b/absl/strings/internal/cord_internal.h @@ -87,6 +87,9 @@ class RefcountAndFlags { constexpr RefcountAndFlags() : count_{kRefIncrement} {} struct Immortal {}; explicit constexpr RefcountAndFlags(Immortal) : count_(kImmortalFlag) {} + struct WithCrc {}; + explicit constexpr RefcountAndFlags(WithCrc) + : count_(kCrcFlag | kRefIncrement) {} // Increments the reference count. Imposes no memory ordering. inline void Increment() { @@ -122,14 +125,32 @@ class RefcountAndFlags { return count_.load(std::memory_order_acquire) >> kNumFlags; } - // Returns whether the atomic integer is 1. - // If the reference count is used in the conventional way, a - // reference count of 1 implies that the current thread owns the - // reference and no other thread shares it. - // This call performs the test for a reference count of one, and - // performs the memory barrier needed for the owning thread - // to act on the object, knowing that it has exclusive access to the - // object. Always returns false when the immortal bit is set. + // Returns true if the referenced object carries a CRC value. + bool HasCrc() const { + return (count_.load(std::memory_order_relaxed) & kCrcFlag) != 0; + } + + // Returns true iff the atomic integer is 1 and this node does not store + // a CRC. When both these conditions are met, the current thread owns + // the reference and no other thread shares it, so its contents may be + // safely mutated. + // + // If the referenced item is shared, carries a CRC, or is immortal, + // it should not be modified in-place, and this function returns false. + // + // This call performs the memory barrier needed for the owning thread + // to act on the object, so that if it returns true, it may safely + // assume exclusive access to the object. + inline bool IsMutable() { + return (count_.load(std::memory_order_acquire)) == kRefIncrement; + } + + // Returns whether the atomic integer is 1. Similar to IsMutable(), + // but does not check for a stored CRC. (An unshared node with a CRC is not + // mutable, because changing its data would invalidate the CRC.) + // + // When this returns true, there are no other references, and data sinks + // may safely adopt the children of the CordRep. inline bool IsOne() { return (count_.load(std::memory_order_acquire) & kRefcountMask) == kRefIncrement; @@ -149,14 +170,14 @@ class RefcountAndFlags { kNumFlags = 2, kImmortalFlag = 0x1, - kReservedFlag = 0x2, + kCrcFlag = 0x2, kRefIncrement = (1 << kNumFlags), // Bitmask to use when checking refcount by equality. This masks out // all flags except kImmortalFlag, which is part of the refcount for // purposes of equality. (A refcount of 0 or 1 does not count as 0 or 1 // if the immortal bit is set.) - kRefcountMask = ~kReservedFlag, + kRefcountMask = ~kCrcFlag, }; std::atomic count_; diff --git a/absl/strings/internal/cord_internal_test.cc b/absl/strings/internal/cord_internal_test.cc new file mode 100644 index 00000000..0758dfef --- /dev/null +++ b/absl/strings/internal/cord_internal_test.cc @@ -0,0 +1,116 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_internal.h" + +#include "gmock/gmock.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +TEST(RefcountAndFlags, NormalRefcount) { + for (bool expect_high_refcount : {false, true}) { + SCOPED_TRACE(expect_high_refcount); + RefcountAndFlags refcount; + // count = 1 + + EXPECT_FALSE(refcount.HasCrc()); + EXPECT_TRUE(refcount.IsMutable()); + EXPECT_TRUE(refcount.IsOne()); + + refcount.Increment(); + // count = 2 + + EXPECT_FALSE(refcount.HasCrc()); + EXPECT_FALSE(refcount.IsMutable()); + EXPECT_FALSE(refcount.IsOne()); + + // Decrementing should return true, since a reference is outstanding. + if (expect_high_refcount) { + EXPECT_TRUE(refcount.DecrementExpectHighRefcount()); + } else { + EXPECT_TRUE(refcount.Decrement()); + } + // count = 1 + + EXPECT_FALSE(refcount.HasCrc()); + EXPECT_TRUE(refcount.IsMutable()); + EXPECT_TRUE(refcount.IsOne()); + + // One more decremnt will return false, as no references remain. + if (expect_high_refcount) { + EXPECT_FALSE(refcount.DecrementExpectHighRefcount()); + } else { + EXPECT_FALSE(refcount.Decrement()); + } + } +} + +TEST(RefcountAndFlags, CrcRefcount) { + for (bool expect_high_refcount : {false, true}) { + SCOPED_TRACE(expect_high_refcount); + RefcountAndFlags refcount(RefcountAndFlags::WithCrc{}); + // count = 1 + + // A CRC-carrying node is never mutable, but can be unshared + EXPECT_TRUE(refcount.HasCrc()); + EXPECT_FALSE(refcount.IsMutable()); + EXPECT_TRUE(refcount.IsOne()); + + refcount.Increment(); + // count = 2 + + EXPECT_TRUE(refcount.HasCrc()); + EXPECT_FALSE(refcount.IsMutable()); + EXPECT_FALSE(refcount.IsOne()); + + // Decrementing should return true, since a reference is outstanding. + if (expect_high_refcount) { + EXPECT_TRUE(refcount.DecrementExpectHighRefcount()); + } else { + EXPECT_TRUE(refcount.Decrement()); + } + // count = 1 + + EXPECT_TRUE(refcount.HasCrc()); + EXPECT_FALSE(refcount.IsMutable()); + EXPECT_TRUE(refcount.IsOne()); + + // One more decremnt will return false, as no references remain. + if (expect_high_refcount) { + EXPECT_FALSE(refcount.DecrementExpectHighRefcount()); + } else { + EXPECT_FALSE(refcount.Decrement()); + } + } +} + +TEST(RefcountAndFlags, ImmortalRefcount) { + RefcountAndFlags immortal_refcount(RefcountAndFlags::Immortal{}); + + for (int i = 0; i < 100; ++i) { + // An immortal refcount is never unshared, and decrementing never causes + // a collection. + EXPECT_FALSE(immortal_refcount.HasCrc()); + EXPECT_FALSE(immortal_refcount.IsMutable()); + EXPECT_FALSE(immortal_refcount.IsOne()); + EXPECT_TRUE(immortal_refcount.Decrement()); + EXPECT_TRUE(immortal_refcount.DecrementExpectHighRefcount()); + } +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cord_rep_btree.cc b/absl/strings/internal/cord_rep_btree.cc index 6d53ab61..4404f33a 100644 --- a/absl/strings/internal/cord_rep_btree.cc +++ b/absl/strings/internal/cord_rep_btree.cc @@ -140,6 +140,26 @@ inline CordRep* MakeSubstring(CordRep* rep, size_t offset) { return CreateSubstring(rep, offset, rep->length - offset); } +// Resizes `edge` to the provided `length`. Adopts a reference on `edge`. +// This method directly returns `edge` if `length` equals `edge->length`. +// If `is_mutable` is set to true, this function may return `edge` with +// `edge->length` set to the new length depending on the type and size of +// `edge`. Otherwise, this function returns a new CordRepSubstring value. +// Requires `length > 0 && length <= edge->length`. +CordRep* ResizeEdge(CordRep* edge, size_t length, bool is_mutable) { + assert(length > 0); + assert(length <= edge->length); + assert(CordRepBtree::IsDataEdge(edge)); + if (length >= edge->length) return edge; + + if (is_mutable && (edge->tag >= FLAT || edge->tag == SUBSTRING)) { + edge->length = length; + return edge; + } + + return CreateSubstring(edge, 0, length); +} + template inline absl::string_view Consume(absl::string_view s, size_t n) { return edge_type == kBack ? s.substr(n) : s.substr(0, s.size() - n); @@ -196,8 +216,8 @@ void DeleteLeafEdge(CordRep* rep) { // propagate node changes up the stack. template struct StackOperations { - // Returns true if the node at 'depth' is not shared, i.e. has a refcount - // of one and all of its parent nodes have a refcount of one. + // Returns true if the node at 'depth' is mutable, i.e. has a refcount + // of one, carries no CRC, and all of its parent nodes have a refcount of one. inline bool owned(int depth) const { return depth < share_depth; } // Returns the node at 'depth'. @@ -208,11 +228,11 @@ struct StackOperations { inline CordRepBtree* BuildStack(CordRepBtree* tree, int depth) { assert(depth <= tree->height()); int current_depth = 0; - while (current_depth < depth && tree->refcount.IsOne()) { + while (current_depth < depth && tree->refcount.IsMutable()) { stack[current_depth++] = tree; tree = tree->Edge(edge_type)->btree(); } - share_depth = current_depth + (tree->refcount.IsOne() ? 1 : 0); + share_depth = current_depth + (tree->refcount.IsMutable() ? 1 : 0); while (current_depth < depth) { stack[current_depth++] = tree; tree = tree->Edge(edge_type)->btree(); @@ -221,17 +241,17 @@ struct StackOperations { } // Builds a stack with the invariant that all nodes are private owned / not - // shared. This is used in iterative updates where a previous propagation - // guaranteed all nodes are owned / private. + // shared and carry no CRC data. This is used in iterative updates where a + // previous propagation guaranteed all nodes have this property. inline void BuildOwnedStack(CordRepBtree* tree, int height) { assert(height <= CordRepBtree::kMaxHeight); int depth = 0; while (depth < height) { - assert(tree->refcount.IsOne()); + assert(tree->refcount.IsMutable()); stack[depth++] = tree; tree = tree->Edge(edge_type)->btree(); } - assert(tree->refcount.IsOne()); + assert(tree->refcount.IsMutable()); share_depth = depth + 1; } @@ -240,11 +260,14 @@ struct StackOperations { static inline CordRepBtree* Finalize(CordRepBtree* tree, OpResult result) { switch (result.action) { case CordRepBtree::kPopped: - if (ABSL_PREDICT_FALSE(tree->height() >= CordRepBtree::kMaxHeight)) { - ABSL_RAW_LOG(FATAL, "Max height exceeded"); - } - return edge_type == kBack ? CordRepBtree::New(tree, result.tree) + tree = edge_type == kBack ? CordRepBtree::New(tree, result.tree) : CordRepBtree::New(result.tree, tree); + if (ABSL_PREDICT_FALSE(tree->height() > CordRepBtree::kMaxHeight)) { + tree = CordRepBtree::Rebuild(tree); + ABSL_RAW_CHECK(tree->height() <= CordRepBtree::kMaxHeight, + "Max height exceeded"); + } + return tree; case CordRepBtree::kCopied: CordRep::Unref(tree); ABSL_FALLTHROUGH_INTENDED; @@ -313,12 +336,12 @@ struct StackOperations { return Unwind(tree, depth, length, result); } - // `share_depth` contains the depth at which the nodes in the stack become - // shared. I.e., if the top most level is shared (i.e.: `!refcount.IsOne()`), - // then `share_depth` is 0. If the 2nd node is shared (and implicitly all - // nodes below that) then `share_depth` is 1, etc. A `share_depth` greater - // than the depth of the stack indicates that none of the nodes in the stack - // are shared. + // `share_depth` contains the depth at which the nodes in the stack cannot + // be mutated. I.e., if the top most level is shared (i.e.: + // `!refcount.IsMutable()`), then `share_depth` is 0. If the 2nd node + // is shared (and implicitly all nodes below that) then `share_depth` is 1, + // etc. A `share_depth` greater than the depth of the stack indicates that + // none of the nodes in the stack are shared. int share_depth; NodeStack stack; @@ -692,7 +715,7 @@ CopyResult CordRepBtree::CopySuffix(size_t offset) { return result; } -CopyResult CordRepBtree::CopyPrefix(size_t n) { +CopyResult CordRepBtree::CopyPrefix(size_t n, bool allow_folding) { assert(n > 0); assert(n <= this->length); @@ -704,10 +727,12 @@ CopyResult CordRepBtree::CopyPrefix(size_t n) { int height = this->height(); CordRepBtree* node = this; CordRep* front = node->Edge(kFront); - while (front->length >= n) { - if (--height < 0) return {MakeSubstring(CordRep::Ref(front), 0, n), -1}; - node = front->btree(); - front = node->Edge(kFront); + if (allow_folding) { + while (front->length >= n) { + if (--height < 0) return {MakeSubstring(CordRep::Ref(front), 0, n), -1}; + node = front->btree(); + front = node->Edge(kFront); + } } if (node->length == n) return {CordRep::Ref(node), height}; @@ -746,6 +771,97 @@ CopyResult CordRepBtree::CopyPrefix(size_t n) { return result; } +CordRep* CordRepBtree::ExtractFront(CordRepBtree* tree) { + CordRep* front = tree->Edge(tree->begin()); + if (tree->refcount.IsMutable()) { + Unref(tree->Edges(tree->begin() + 1, tree->end())); + CordRepBtree::Delete(tree); + } else { + CordRep::Ref(front); + CordRep::Unref(tree); + } + return front; +} + +CordRepBtree* CordRepBtree::ConsumeBeginTo(CordRepBtree* tree, size_t end, + size_t new_length) { + assert(end <= tree->end()); + if (tree->refcount.IsMutable()) { + Unref(tree->Edges(end, tree->end())); + tree->set_end(end); + tree->length = new_length; + } else { + CordRepBtree* old = tree; + tree = tree->CopyBeginTo(end, new_length); + CordRep::Unref(old); + } + return tree; +} + +CordRep* CordRepBtree::RemoveSuffix(CordRepBtree* tree, size_t n) { + // Check input and deal with trivial cases 'Remove all/none' + assert(tree != nullptr); + assert(n <= tree->length); + const size_t len = tree->length; + if (ABSL_PREDICT_FALSE(n == 0)) { + return tree; + } + if (ABSL_PREDICT_FALSE(n >= len)) { + CordRepBtree::Unref(tree); + return nullptr; + } + + size_t length = len - n; + int height = tree->height(); + bool is_mutable = tree->refcount.IsMutable(); + + // Extract all top nodes which are reduced to size = 1 + Position pos = tree->IndexOfLength(length); + while (pos.index == tree->begin()) { + CordRep* edge = ExtractFront(tree); + is_mutable &= edge->refcount.IsMutable(); + if (height-- == 0) return ResizeEdge(edge, length, is_mutable); + tree = edge->btree(); + pos = tree->IndexOfLength(length); + } + + // Repeat the following sequence traversing down the tree: + // - Crop the top node to the 'last remaining edge' adjusting length. + // - Set the length for down edges to the partial length in that last edge. + // - Repeat this until the last edge is 'included in full' + // - If we hit the data edge level, resize and return the last data edge + CordRepBtree* top = tree = ConsumeBeginTo(tree, pos.index + 1, length); + CordRep* edge = tree->Edge(pos.index); + length = pos.n; + while (length != edge->length) { + // ConsumeBeginTo guarantees `tree` is a clean, privately owned copy. + assert(tree->refcount.IsMutable()); + const bool edge_is_mutable = edge->refcount.IsMutable(); + + if (height-- == 0) { + tree->edges_[pos.index] = ResizeEdge(edge, length, edge_is_mutable); + return AssertValid(top); + } + + if (!edge_is_mutable) { + // We can't 'in place' remove any suffixes down this edge. + // Replace this edge with a prefix copy instead. + tree->edges_[pos.index] = edge->btree()->CopyPrefix(length, false).edge; + CordRep::Unref(edge); + return AssertValid(top); + } + + // Move down one level, rinse repeat. + tree = edge->btree(); + pos = tree->IndexOfLength(length); + tree = ConsumeBeginTo(edge->btree(), pos.index + 1, length); + edge = tree->Edge(pos.index); + length = pos.n; + } + + return AssertValid(top); +} + CordRep* CordRepBtree::SubTree(size_t offset, size_t n) { assert(n <= this->length); assert(offset <= this->length - n); @@ -857,7 +973,7 @@ char CordRepBtree::GetCharacter(size_t offset) const { Span CordRepBtree::GetAppendBufferSlow(size_t size) { // The inlined version in `GetAppendBuffer()` deals with all heights <= 3. assert(height() >= 4); - assert(refcount.IsOne()); + assert(refcount.IsMutable()); // Build a stack of nodes we may potentially need to update if we find a // non-shared FLAT with capacity at the leaf level. @@ -866,13 +982,13 @@ Span CordRepBtree::GetAppendBufferSlow(size_t size) { CordRepBtree* stack[kMaxDepth]; for (int i = 0; i < depth; ++i) { node = node->Edge(kBack)->btree(); - if (!node->refcount.IsOne()) return {}; + if (!node->refcount.IsMutable()) return {}; stack[i] = node; } - // Must be a privately owned flat. + // Must be a privately owned, mutable flat. CordRep* const edge = node->Edge(kBack); - if (!edge->refcount.IsOne() || edge->tag < FLAT) return {}; + if (!edge->refcount.IsMutable() || edge->tag < FLAT) return {}; // Must have capacity. const size_t avail = edge->flat()->Capacity() - edge->length; @@ -950,6 +1066,63 @@ template CordRepBtree* CordRepBtree::AddData(CordRepBtree* tree, absl::string_view data, size_t extra); +void CordRepBtree::Rebuild(CordRepBtree** stack, CordRepBtree* tree, + bool consume) { + bool owned = consume && tree->refcount.IsOne(); + if (tree->height() == 0) { + for (CordRep* edge : tree->Edges()) { + if (!owned) edge = CordRep::Ref(edge); + size_t height = 0; + size_t length = edge->length; + CordRepBtree* node = stack[0]; + OpResult result = node->AddEdge(true, edge, length); + while (result.action == CordRepBtree::kPopped) { + stack[height] = result.tree; + if (stack[++height] == nullptr) { + result.action = CordRepBtree::kSelf; + stack[height] = CordRepBtree::New(node, result.tree); + } else { + node = stack[height]; + result = node->AddEdge(true, result.tree, length); + } + } + while (stack[++height] != nullptr) { + stack[height]->length += length; + } + } + } else { + for (CordRep* rep : tree->Edges()) { + Rebuild(stack, rep->btree(), owned); + } + } + if (consume) { + if (owned) { + CordRepBtree::Delete(tree); + } else { + CordRepBtree::Unref(tree); + } + } +} + +CordRepBtree* CordRepBtree::Rebuild(CordRepBtree* tree) { + // Set up initial stack with empty leaf node. + CordRepBtree* node = CordRepBtree::New(); + CordRepBtree* stack[CordRepBtree::kMaxDepth + 1] = {node}; + + // Recursively build the tree, consuming the input tree. + Rebuild(stack, tree, /* consume reference */ true); + + // Return top most node + for (CordRepBtree* parent : stack) { + if (parent == nullptr) return node; + node = parent; + } + + // Unreachable + assert(false); + return nullptr; +} + } // namespace cord_internal ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/cord_rep_btree.h b/absl/strings/internal/cord_rep_btree.h index bbaa7934..bb38f0c3 100644 --- a/absl/strings/internal/cord_rep_btree.h +++ b/absl/strings/internal/cord_rep_btree.h @@ -163,6 +163,12 @@ class CordRepBtree : public CordRep { // typically after a ref_count.Decrement() on the last reference count. static void Destroy(CordRepBtree* tree); + // Use CordRep::Unref() as we overload for absl::Span. + using CordRep::Unref; + + // Unrefs all edges in `edges` which are assumed to be 'likely one'. + static void Unref(absl::Span edges); + // Appends / Prepends an existing CordRep instance to this tree. // The below methods accept three types of input: // 1) `rep` is a data node (See `IsDataNode` for valid data edges). @@ -198,6 +204,19 @@ class CordRepBtree : public CordRep { // Requires `offset + n <= length`. Returns `nullptr` if `n` is zero. CordRep* SubTree(size_t offset, size_t n); + // Removes `n` trailing bytes from `tree`, and returns the resulting tree + // or data edge. Returns `tree` if n is zero, and nullptr if n == length. + // This function is logically identical to: + // result = tree->SubTree(0, tree->length - n); + // Unref(tree); + // return result; + // However, the actual implementation will as much as possible perform 'in + // place' modifications on the tree on all nodes and edges that are mutable. + // For example, in a fully privately owned tree with the last edge being a + // flat of length 12, RemoveSuffix(1) will simply set the length of that data + // edge to 11, and reduce the length of all nodes on the edge path by 1. + static CordRep* RemoveSuffix(CordRepBtree* tree, size_t n); + // Returns the character at the given offset. char GetCharacter(size_t offset) const; @@ -221,9 +240,9 @@ class CordRepBtree : public CordRep { // length of the flat node and involved tree nodes have been increased by // `span.length()`. The caller is responsible for immediately assigning values // to all uninitialized data reference by the returned span. - // Requires `this->refcount.IsOne()`: this function forces the caller to do - // this fast path check on the top level node, as this is the most commonly - // shared node of a cord tree. + // Requires `this->refcount.IsMutable()`: this function forces the + // caller to do this fast path check on the top level node, as this is the + // most commonly shared node of a cord tree. Span GetAppendBuffer(size_t size); // Returns the `height` of the tree. The height of a tree is limited to @@ -324,6 +343,11 @@ class CordRepBtree : public CordRep { // `front.height() + 1`. Requires `back.height() == front.height()`. static CordRepBtree* New(CordRepBtree* front, CordRepBtree* back); + // Creates a fully balanced tree from the provided tree by rebuilding a new + // tree from all data edges in the input. This function is automatically + // invoked internally when the tree exceeds the maximum height. + static CordRepBtree* Rebuild(CordRepBtree* tree); + private: CordRepBtree() = default; ~CordRepBtree() = default; @@ -368,6 +392,12 @@ class CordRepBtree : public CordRep { // Requires 0 < `offset` <= length. Position IndexBefore(size_t offset) const; + // Returns the index of the edge ending at (or on) length `length`, and the + // number of bytes inside that edge up to `length`. For example, if we have a + // Node with 2 edges, one of 10 and one of 20 long, then IndexOfLength(27) + // will return {1, 17}, and IndexOfLength(10) will return {0, 10}. + Position IndexOfLength(size_t n) const; + // Identical to the above function except starting from the position `front`. // This function is equivalent to `IndexBefore(front.n + offset)`, with // the difference that this function is optimized to start at `front.index`. @@ -406,11 +436,28 @@ class CordRepBtree : public CordRep { // created copy to `new_length`. CordRepBtree* CopyBeginTo(size_t end, size_t new_length) const; + // Returns a tree containing the edges [tree->begin(), end) and length + // of `new_length`. This method consumes a reference on the provided + // tree, and logically performs the following operation: + // result = tree->CopyBeginTo(end, new_length); + // CordRep::Unref(tree); + // return result; + static CordRepBtree* ConsumeBeginTo(CordRepBtree* tree, size_t end, + size_t new_length); + // Creates a partial copy of this Btree node, copying all edges starting at // `begin`, adding a reference on each copied edge, and sets the length of // the newly created copy to `new_length`. CordRepBtree* CopyToEndFrom(size_t begin, size_t new_length) const; + // Extracts and returns the front edge from the provided tree. + // This method consumes a reference on the provided tree, and logically + // performs the following operation: + // edge = CordRep::Ref(tree->Edge(kFront)); + // CordRep::Unref(tree); + // return edge; + static CordRep* ExtractFront(CordRepBtree* tree); + // Returns a tree containing the result of appending `right` to `left`. static CordRepBtree* MergeTrees(CordRepBtree* left, CordRepBtree* right); @@ -420,6 +467,12 @@ class CordRepBtree : public CordRep { static CordRepBtree* AppendSlow(CordRepBtree*, CordRep* rep); static CordRepBtree* PrependSlow(CordRepBtree*, CordRep* rep); + // Recursively rebuilds `tree` into `stack`. If 'consume` is set to true, the + // function will consume a reference on `tree`. `stack` is a null terminated + // array containing the new tree's state, with the current leaf node at + // stack[0], and parent nodes above that, or null for 'top of tree'. + static void Rebuild(CordRepBtree** stack, CordRepBtree* tree, bool consume); + // Aligns existing edges to start at index 0, to allow for a new edge to be // added to the back of the current edges. inline void AlignBegin(); @@ -459,11 +512,11 @@ class CordRepBtree : public CordRep { // Returns a partial copy of the current tree containing the first `n` bytes // of data. `CopyResult` contains both the resulting edge and its height. The // resulting tree may be less high than the current tree, or even be a single - // matching data edge. For example, if `n == 1`, then the result will be the - // single data edge, and height will be set to -1 (one below the owning leaf - // node). If n == 0, this function returns null. - // Requires `n <= length` - CopyResult CopyPrefix(size_t n); + // matching data edge if `allow_folding` is set to true. + // For example, if `n == 1`, then the result will be the single data edge, and + // height will be set to -1 (one below the owning leaf node). If n == 0, this + // function returns null. Requires `n <= length` + CopyResult CopyPrefix(size_t n, bool allow_folding = true); // Returns a partial copy of the current tree containing all data starting // after `offset`. `CopyResult` contains both the resulting edge and its @@ -619,6 +672,14 @@ inline void CordRepBtree::Destroy(CordRepBtree* tree) { DestroyTree(tree, tree->begin(), tree->end()); } +inline void CordRepBtree::Unref(absl::Span edges) { + for (CordRep* edge : edges) { + if (ABSL_PREDICT_FALSE(!edge->refcount.Decrement())) { + CordRep::Destroy(edge); + } + } +} + inline CordRepBtree* CordRepBtree::CopyRaw() const { auto* tree = static_cast(::operator new(sizeof(CordRepBtree))); memcpy(static_cast(tree), this, sizeof(CordRepBtree)); @@ -762,6 +823,14 @@ inline CordRepBtree::Position CordRepBtree::IndexBefore(Position front, return {index, offset}; } +inline CordRepBtree::Position CordRepBtree::IndexOfLength(size_t n) const { + assert(n <= length); + size_t index = back(); + size_t strip = length - n; + while (strip >= edges_[index]->length) strip -= edges_[index--]->length; + return {index, edges_[index]->length - strip}; +} + inline CordRepBtree::Position CordRepBtree::IndexBeyond( const size_t offset) const { // We need to find the edge which `starting offset` is beyond (>=)`offset`. @@ -780,7 +849,7 @@ inline CordRepBtree* CordRepBtree::Create(CordRep* rep) { } inline Span CordRepBtree::GetAppendBuffer(size_t size) { - assert(refcount.IsOne()); + assert(refcount.IsMutable()); CordRepBtree* tree = this; const int height = this->height(); CordRepBtree* n1 = tree; @@ -789,21 +858,21 @@ inline Span CordRepBtree::GetAppendBuffer(size_t size) { switch (height) { case 3: tree = tree->Edge(kBack)->btree(); - if (!tree->refcount.IsOne()) return {}; + if (!tree->refcount.IsMutable()) return {}; n2 = tree; ABSL_FALLTHROUGH_INTENDED; case 2: tree = tree->Edge(kBack)->btree(); - if (!tree->refcount.IsOne()) return {}; + if (!tree->refcount.IsMutable()) return {}; n1 = tree; ABSL_FALLTHROUGH_INTENDED; case 1: tree = tree->Edge(kBack)->btree(); - if (!tree->refcount.IsOne()) return {}; + if (!tree->refcount.IsMutable()) return {}; ABSL_FALLTHROUGH_INTENDED; case 0: CordRep* edge = tree->Edge(kBack); - if (!edge->refcount.IsOne()) return {}; + if (!edge->refcount.IsMutable()) return {}; if (edge->tag < FLAT) return {}; size_t avail = edge->flat()->Capacity() - edge->length; if (avail == 0) return {}; diff --git a/absl/strings/internal/cord_rep_btree_test.cc b/absl/strings/internal/cord_rep_btree_test.cc index 073a7d45..be9473d4 100644 --- a/absl/strings/internal/cord_rep_btree_test.cc +++ b/absl/strings/internal/cord_rep_btree_test.cc @@ -47,7 +47,9 @@ class CordRepBtreeTestPeer { namespace { using ::absl::cordrep_testing::AutoUnref; +using ::absl::cordrep_testing::CordCollectRepsIf; using ::absl::cordrep_testing::CordToString; +using ::absl::cordrep_testing::CordVisitReps; using ::absl::cordrep_testing::CreateFlatsFromString; using ::absl::cordrep_testing::CreateRandomString; using ::absl::cordrep_testing::MakeConcat; @@ -62,6 +64,7 @@ using ::testing::ElementsAre; using ::testing::ElementsAreArray; using ::testing::Eq; using ::testing::HasSubstr; +using ::testing::Le; using ::testing::Ne; using ::testing::Not; using ::testing::SizeIs; @@ -205,14 +208,17 @@ CordRepBtree* MakeTree(size_t size, bool append = true) { return tree; } -CordRepBtree* CreateTree(absl::string_view data, size_t chunk_size) { - std::vector flats = CreateFlatsFromString(data, chunk_size); - auto it = flats.begin(); +CordRepBtree* CreateTree(absl::Span reps) { + auto it = reps.begin(); CordRepBtree* tree = CordRepBtree::Create(*it); - while (++it != flats.end()) tree = CordRepBtree::Append(tree, *it); + while (++it != reps.end()) tree = CordRepBtree::Append(tree, *it); return tree; } +CordRepBtree* CreateTree(absl::string_view data, size_t chunk_size) { + return CreateTree(CreateFlatsFromString(data, chunk_size)); +} + CordRepBtree* CreateTreeReverse(absl::string_view data, size_t chunk_size) { std::vector flats = CreateFlatsFromString(data, chunk_size); auto rit = flats.rbegin(); @@ -759,6 +765,63 @@ TEST(CordRepBtreeTest, MergeFuzzTest) { } } +TEST_P(CordRepBtreeTest, RemoveSuffix) { + // Create tree of 1, 2 and 3 levels high + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + for (size_t cap : {max_cap - 1, max_cap * 2, max_cap * max_cap * 2}) { + const std::string data = CreateRandomString(cap * 512); + + { + // Verify RemoveSuffix() + AutoUnref refs; + CordRepBtree* node = refs.RefIf(shared(), CreateTree(data, 512)); + EXPECT_THAT(CordRepBtree::RemoveSuffix(node, data.length()), Eq(nullptr)); + + // Verify RemoveSuffix() + node = refs.RefIf(shared(), CreateTree(data, 512)); + EXPECT_THAT(CordRepBtree::RemoveSuffix(node, 0), Eq(node)); + CordRep::Unref(node); + } + + for (int n = 1; n < data.length(); ++n) { + AutoUnref refs; + auto flats = CreateFlatsFromString(data, 512); + CordRepBtree* node = refs.RefIf(shared(), CreateTree(flats)); + CordRep* rep = refs.Add(CordRepBtree::RemoveSuffix(node, n)); + EXPECT_THAT(CordToString(rep), Eq(data.substr(0, data.length() - n))); + + // Collect all flats + auto is_flat = [](CordRep* rep) { return rep->tag >= FLAT; }; + std::vector edges = CordCollectRepsIf(is_flat, rep); + ASSERT_THAT(edges.size(), Le(flats.size())); + + // Isolate last edge + CordRep* last_edge = edges.back(); + edges.pop_back(); + const size_t last_length = rep->length - edges.size() * 512; + + // All flats except the last edge must be kept or copied 'as is' + int index = 0; + for (CordRep* edge : edges) { + ASSERT_THAT(edge, Eq(flats[index++])); + ASSERT_THAT(edge->length, Eq(512)); + } + + // CordRepBtree may optimize small substrings to avoid waste, so only + // check for flat sharing / updates where the code should always do this. + if (last_length >= 500) { + EXPECT_THAT(last_edge, Eq(flats[index++])); + if (shared()) { + EXPECT_THAT(last_edge->length, Eq(512)); + } else { + EXPECT_TRUE(last_edge->refcount.IsOne()); + EXPECT_THAT(last_edge->length, Eq(last_length)); + } + } + } + } +} + TEST(CordRepBtreeTest, SubTree) { // Create tree of at least 2 levels high constexpr size_t max_cap = CordRepBtree::kMaxCapacity; @@ -986,23 +1049,6 @@ TEST_P(CordRepBtreeTest, CreateFromTreeReturnsTree) { CordRep::Unref(result); } -TEST_P(CordRepBtreeTest, ExceedMaxHeight) { - AutoUnref refs; - CordRepBtree* tree = MakeLeaf(); - for (int h = 1; h <= CordRepBtree::kMaxHeight; ++h) { - CordRepBtree* newtree = CordRepBtree::New(tree); - for (size_t i = 1; i < CordRepBtree::kMaxCapacity; ++i) { - newtree = CordRepBtree::Append(newtree, CordRep::Ref(tree)); - } - tree = newtree; - } - refs.RefIf(shared(), tree); -#if defined(GTEST_HAS_DEATH_TEST) - EXPECT_DEATH(tree = CordRepBtree::Append(tree, MakeFlat("Boom")), ".*"); -#endif - CordRep::Unref(tree); -} - TEST(CordRepBtreeTest, GetCharacter) { size_t n = CordRepBtree::kMaxCapacity * CordRepBtree::kMaxCapacity + 2; std::string data = CreateRandomString(n * 3); @@ -1389,6 +1435,54 @@ TEST(CordRepBtreeTest, CheckAssertValidShallowVsDeep) { CordRep::Unref(tree); } +TEST_P(CordRepBtreeTest, Rebuild) { + for (size_t size : {3, 8, 100, 10000, 1000000}) { + SCOPED_TRACE(absl::StrCat("Rebuild @", size)); + + std::vector flats; + for (int i = 0; i < size; ++i) { + flats.push_back(CordRepFlat::New(2)); + flats.back()->Data()[0] = 'x'; + flats.back()->length = 1; + } + + // Build the tree into 'right', and each so many 'split_limit' edges, + // combine 'left' + 'right' into a new 'left', and start a new 'right'. + // This guarantees we get a reasonable amount of chaos in the tree. + size_t split_count = 0; + size_t split_limit = 3; + auto it = flats.begin(); + CordRepBtree* left = nullptr; + CordRepBtree* right = CordRepBtree::New(*it); + while (++it != flats.end()) { + if (++split_count >= split_limit) { + split_limit += split_limit / 16; + left = left ? CordRepBtree::Append(left, right) : right; + right = CordRepBtree::New(*it); + } else { + right = CordRepBtree::Append(right, *it); + } + } + + // Finalize tree + left = left ? CordRepBtree::Append(left, right) : right; + + // Rebuild + AutoUnref ref; + left = ref.Add(CordRepBtree::Rebuild(ref.RefIf(shared(), left))); + ASSERT_TRUE(CordRepBtree::IsValid(left)); + + // Verify we have the exact same edges in the exact same order. + bool ok = true; + it = flats.begin(); + CordVisitReps(left, [&](CordRep* edge) { + if (edge->tag < FLAT) return; + ok = ok && (it != flats.end() && *it++ == edge); + }); + EXPECT_TRUE(ok && it == flats.end()) << "Rebuild edges mismatch"; + } +} + } // namespace } // namespace cord_internal ABSL_NAMESPACE_END diff --git a/absl/strings/internal/cord_rep_ring.cc b/absl/strings/internal/cord_rep_ring.cc index db1f63fa..07c77eb3 100644 --- a/absl/strings/internal/cord_rep_ring.cc +++ b/absl/strings/internal/cord_rep_ring.cc @@ -277,7 +277,7 @@ CordRepRing* CordRepRing::Mutable(CordRepRing* rep, size_t extra) { // Get current number of entries, and check for max capacity. size_t entries = rep->entries(); - if (!rep->refcount.IsOne()) { + if (!rep->refcount.IsMutable()) { return Copy(rep, rep->head(), rep->tail(), extra); } else if (entries + extra > rep->capacity()) { const size_t min_grow = rep->capacity() + rep->capacity() / 2; @@ -292,10 +292,10 @@ CordRepRing* CordRepRing::Mutable(CordRepRing* rep, size_t extra) { } Span CordRepRing::GetAppendBuffer(size_t size) { - assert(refcount.IsOne()); + assert(refcount.IsMutable()); index_type back = retreat(tail_); CordRep* child = entry_child(back); - if (child->tag >= FLAT && child->refcount.IsOne()) { + if (child->tag >= FLAT && child->refcount.IsMutable()) { size_t capacity = child->flat()->Capacity(); pos_type end_pos = entry_end_pos(back); size_t data_offset = entry_data_offset(back); @@ -312,10 +312,10 @@ Span CordRepRing::GetAppendBuffer(size_t size) { } Span CordRepRing::GetPrependBuffer(size_t size) { - assert(refcount.IsOne()); + assert(refcount.IsMutable()); CordRep* child = entry_child(head_); size_t data_offset = entry_data_offset(head_); - if (data_offset && child->refcount.IsOne() && child->tag >= FLAT) { + if (data_offset && child->refcount.IsMutable() && child->tag >= FLAT) { size_t n = (std::min)(data_offset, size); this->length += n; begin_pos_ -= n; @@ -504,7 +504,7 @@ CordRepRing* CordRepRing::Prepend(CordRepRing* rep, CordRep* child) { CordRepRing* CordRepRing::Append(CordRepRing* rep, absl::string_view data, size_t extra) { - if (rep->refcount.IsOne()) { + if (rep->refcount.IsMutable()) { Span avail = rep->GetAppendBuffer(data.length()); if (!avail.empty()) { memcpy(avail.data(), data.data(), avail.length()); @@ -538,7 +538,7 @@ CordRepRing* CordRepRing::Append(CordRepRing* rep, absl::string_view data, CordRepRing* CordRepRing::Prepend(CordRepRing* rep, absl::string_view data, size_t extra) { - if (rep->refcount.IsOne()) { + if (rep->refcount.IsMutable()) { Span avail = rep->GetPrependBuffer(data.length()); if (!avail.empty()) { const char* tail = data.data() + data.length() - avail.length(); @@ -678,7 +678,7 @@ CordRepRing* CordRepRing::SubRing(CordRepRing* rep, size_t offset, Position tail = rep->FindTail(head.index, offset + len); const size_t new_entries = rep->entries(head.index, tail.index); - if (rep->refcount.IsOne() && extra <= (rep->capacity() - new_entries)) { + if (rep->refcount.IsMutable() && extra <= (rep->capacity() - new_entries)) { // We adopt a privately owned rep and no extra entries needed. if (head.index != rep->head_) UnrefEntries(rep, rep->head_, head.index); if (tail.index != rep->tail_) UnrefEntries(rep, tail.index, rep->tail_); @@ -715,7 +715,7 @@ CordRepRing* CordRepRing::RemovePrefix(CordRepRing* rep, size_t len, } Position head = rep->Find(len); - if (rep->refcount.IsOne()) { + if (rep->refcount.IsMutable()) { if (head.index != rep->head_) UnrefEntries(rep, rep->head_, head.index); rep->head_ = head.index; } else { @@ -745,7 +745,7 @@ CordRepRing* CordRepRing::RemoveSuffix(CordRepRing* rep, size_t len, } Position tail = rep->FindTail(rep->length - len); - if (rep->refcount.IsOne()) { + if (rep->refcount.IsMutable()) { // We adopt a privately owned rep, scrub. if (tail.index != rep->tail_) UnrefEntries(rep, tail.index, rep->tail_); rep->tail_ = tail.index; diff --git a/absl/strings/internal/cord_rep_ring.h b/absl/strings/internal/cord_rep_ring.h index 44db8494..2000e21e 100644 --- a/absl/strings/internal/cord_rep_ring.h +++ b/absl/strings/internal/cord_rep_ring.h @@ -383,8 +383,8 @@ class CordRepRing : public CordRep { // Destroys the provided ring buffer, decrementing the reference count of all // contained child CordReps. The provided 1\`rep` should have a ref count of - // one (pre decrement destroy call observing `refcount.IsOne()`) or zero (post - // decrement destroy call observing `!refcount.Decrement()`). + // one (pre decrement destroy call observing `refcount.IsOne()`) or zero + // (post decrement destroy call observing `!refcount.Decrement()`). static void Destroy(CordRepRing* rep); // Returns a mutable reference to the logical end position array. diff --git a/absl/strings/internal/cord_rep_test_util.h b/absl/strings/internal/cord_rep_test_util.h index bc500064..ad828af2 100644 --- a/absl/strings/internal/cord_rep_test_util.h +++ b/absl/strings/internal/cord_rep_test_util.h @@ -115,6 +115,41 @@ inline cord_internal::CordRepBtree* CordRepBtreeFromFlats( return node; } +template +inline void CordVisitReps(cord_internal::CordRep* rep, Fn&& fn) { + fn(rep); + while (rep->tag == cord_internal::SUBSTRING) { + rep = rep->substring()->child; + fn(rep); + } + if (rep->tag == cord_internal::BTREE) { + for (cord_internal::CordRep* edge : rep->btree()->Edges()) { + CordVisitReps(edge, fn); + } + } else if (rep->tag == cord_internal::CONCAT) { + CordVisitReps(rep->concat()->left, fn); + CordVisitReps(rep->concat()->right, fn); + } +} + +template +inline std::vector CordCollectRepsIf( + Predicate&& predicate, cord_internal::CordRep* rep) { + std::vector reps; + CordVisitReps(rep, [&reps, &predicate](cord_internal::CordRep* rep) { + if (predicate(rep)) reps.push_back(rep); + }); + return reps; +} + +inline std::vector CordCollectReps( + cord_internal::CordRep* rep) { + std::vector reps; + auto fn = [&reps](cord_internal::CordRep* rep) { reps.push_back(rep); }; + CordVisitReps(rep, fn); + return reps; +} + inline void CordToString(cord_internal::CordRep* rep, std::string& s) { size_t offset = 0; size_t length = rep->length; diff --git a/absl/strings/internal/cordz_update_tracker.h b/absl/strings/internal/cordz_update_tracker.h index 02efcc3a..1f764486 100644 --- a/absl/strings/internal/cordz_update_tracker.h +++ b/absl/strings/internal/cordz_update_tracker.h @@ -39,6 +39,7 @@ class CordzUpdateTracker { // Tracked update methods. enum MethodIdentifier { kUnknown, + kAppendBuffer, kAppendCord, kAppendExternalMemory, kAppendString, @@ -54,6 +55,7 @@ class CordzUpdateTracker { kMoveAppendCord, kMoveAssignCord, kMovePrependCord, + kPrependBuffer, kPrependCord, kPrependString, kRemovePrefix, diff --git a/absl/strings/internal/cordz_update_tracker_test.cc b/absl/strings/internal/cordz_update_tracker_test.cc index fcd17df7..2348a175 100644 --- a/absl/strings/internal/cordz_update_tracker_test.cc +++ b/absl/strings/internal/cordz_update_tracker_test.cc @@ -37,6 +37,7 @@ using Methods = std::array; // Returns an array of all methods defined in `MethodIdentifier` Methods AllMethods() { return Methods{Method::kUnknown, + Method::kAppendBuffer, Method::kAppendCord, Method::kAppendExternalMemory, Method::kAppendString, @@ -52,6 +53,7 @@ Methods AllMethods() { Method::kMoveAppendCord, Method::kMoveAssignCord, Method::kMovePrependCord, + Method::kPrependBuffer, Method::kPrependCord, Method::kPrependString, Method::kRemovePrefix, diff --git a/absl/strings/numbers.h b/absl/strings/numbers.h index 1780bb44..4ae07c2d 100644 --- a/absl/strings/numbers.h +++ b/absl/strings/numbers.h @@ -96,6 +96,25 @@ ABSL_MUST_USE_RESULT bool SimpleAtod(absl::string_view str, double* out); // unspecified state. ABSL_MUST_USE_RESULT bool SimpleAtob(absl::string_view str, bool* out); +// SimpleHexAtoi() +// +// Converts a hexadecimal string (optionally followed or preceded by ASCII +// whitespace) to an integer, returning `true` if successful. Only valid base-16 +// hexadecimal integers whose value falls within the range of the integer type +// (optionally preceded by a `+` or `-`) can be converted. A valid hexadecimal +// value may include both upper and lowercase character symbols, and may +// optionally include a leading "0x" (or "0X") number prefix, which is ignored +// by this function. If any errors are encountered, this function returns +// `false`, leaving `out` in an unspecified state. +template +ABSL_MUST_USE_RESULT bool SimpleHexAtoi(absl::string_view str, int_type* out); + +// Overloads of SimpleHexAtoi() for 128 bit integers. +ABSL_MUST_USE_RESULT inline bool SimpleHexAtoi(absl::string_view str, + absl::int128* out); +ABSL_MUST_USE_RESULT inline bool SimpleHexAtoi(absl::string_view str, + absl::uint128* out); + ABSL_NAMESPACE_END } // namespace absl @@ -260,6 +279,21 @@ ABSL_MUST_USE_RESULT inline bool SimpleAtoi(absl::string_view str, return numbers_internal::safe_strtou128_base(str, out, 10); } +template +ABSL_MUST_USE_RESULT bool SimpleHexAtoi(absl::string_view str, int_type* out) { + return numbers_internal::safe_strtoi_base(str, out, 16); +} + +ABSL_MUST_USE_RESULT inline bool SimpleHexAtoi(absl::string_view str, + absl::int128* out) { + return numbers_internal::safe_strto128_base(str, out, 16); +} + +ABSL_MUST_USE_RESULT inline bool SimpleHexAtoi(absl::string_view str, + absl::uint128* out) { + return numbers_internal::safe_strtou128_base(str, out, 16); +} + ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/numbers_test.cc b/absl/strings/numbers_test.cc index f3103106..498c210d 100644 --- a/absl/strings/numbers_test.cc +++ b/absl/strings/numbers_test.cc @@ -47,6 +47,7 @@ namespace { using absl::SimpleAtoi; +using absl::SimpleHexAtoi; using absl::numbers_internal::kSixDigitsToBufferSize; using absl::numbers_internal::safe_strto32_base; using absl::numbers_internal::safe_strto64_base; @@ -468,6 +469,148 @@ TEST(NumbersTest, Atoenum) { VerifySimpleAtoiGood(E_biguint_max32, E_biguint_max32); } +template +void VerifySimpleHexAtoiGood(in_val_type in_value, int_type exp_value) { + std::string s; + // uint128 can be streamed but not StrCat'd + absl::strings_internal::OStringStream strm(&s); + if (in_value >= 0) { + strm << std::hex << in_value; + } else { + // Inefficient for small integers, but works with all integral types. + strm << "-" << std::hex << -absl::uint128(in_value); + } + int_type x = static_cast(~exp_value); + EXPECT_TRUE(SimpleHexAtoi(s, &x)) + << "in_value=" << std::hex << in_value << " s=" << s << " x=" << x; + EXPECT_EQ(exp_value, x); + x = static_cast(~exp_value); + EXPECT_TRUE(SimpleHexAtoi( + s.c_str(), &x)); // NOLINT: readability-redundant-string-conversions + EXPECT_EQ(exp_value, x); +} + +template +void VerifySimpleHexAtoiBad(in_val_type in_value) { + std::string s; + // uint128 can be streamed but not StrCat'd + absl::strings_internal::OStringStream strm(&s); + if (in_value >= 0) { + strm << std::hex << in_value; + } else { + // Inefficient for small integers, but works with all integral types. + strm << "-" << std::hex << -absl::uint128(in_value); + } + int_type x; + EXPECT_FALSE(SimpleHexAtoi(s, &x)); + EXPECT_FALSE(SimpleHexAtoi( + s.c_str(), &x)); // NOLINT: readability-redundant-string-conversions +} + +TEST(NumbersTest, HexAtoi) { + // SimpleHexAtoi(absl::string_view, int32_t) + VerifySimpleHexAtoiGood(0, 0); + VerifySimpleHexAtoiGood(0x42, 0x42); + VerifySimpleHexAtoiGood(-0x42, -0x42); + + VerifySimpleHexAtoiGood(std::numeric_limits::min(), + std::numeric_limits::min()); + VerifySimpleHexAtoiGood(std::numeric_limits::max(), + std::numeric_limits::max()); + + // SimpleHexAtoi(absl::string_view, uint32_t) + VerifySimpleHexAtoiGood(0, 0); + VerifySimpleHexAtoiGood(0x42, 0x42); + VerifySimpleHexAtoiBad(-0x42); + + VerifySimpleHexAtoiBad(std::numeric_limits::min()); + VerifySimpleHexAtoiGood(std::numeric_limits::max(), + std::numeric_limits::max()); + VerifySimpleHexAtoiGood(std::numeric_limits::max(), + std::numeric_limits::max()); + VerifySimpleHexAtoiBad(std::numeric_limits::min()); + VerifySimpleHexAtoiBad(std::numeric_limits::max()); + VerifySimpleHexAtoiBad(std::numeric_limits::max()); + + // SimpleHexAtoi(absl::string_view, int64_t) + VerifySimpleHexAtoiGood(0, 0); + VerifySimpleHexAtoiGood(0x42, 0x42); + VerifySimpleHexAtoiGood(-0x42, -0x42); + + VerifySimpleHexAtoiGood(std::numeric_limits::min(), + std::numeric_limits::min()); + VerifySimpleHexAtoiGood(std::numeric_limits::max(), + std::numeric_limits::max()); + VerifySimpleHexAtoiGood(std::numeric_limits::max(), + std::numeric_limits::max()); + VerifySimpleHexAtoiGood(std::numeric_limits::min(), + std::numeric_limits::min()); + VerifySimpleHexAtoiGood(std::numeric_limits::max(), + std::numeric_limits::max()); + VerifySimpleHexAtoiBad(std::numeric_limits::max()); + + // SimpleHexAtoi(absl::string_view, uint64_t) + VerifySimpleHexAtoiGood(0, 0); + VerifySimpleHexAtoiGood(0x42, 0x42); + VerifySimpleHexAtoiBad(-0x42); + + VerifySimpleHexAtoiBad(std::numeric_limits::min()); + VerifySimpleHexAtoiGood(std::numeric_limits::max(), + std::numeric_limits::max()); + VerifySimpleHexAtoiGood(std::numeric_limits::max(), + std::numeric_limits::max()); + VerifySimpleHexAtoiBad(std::numeric_limits::min()); + VerifySimpleHexAtoiGood(std::numeric_limits::max(), + std::numeric_limits::max()); + VerifySimpleHexAtoiGood(std::numeric_limits::max(), + std::numeric_limits::max()); + + // SimpleHexAtoi(absl::string_view, absl::uint128) + VerifySimpleHexAtoiGood(0, 0); + VerifySimpleHexAtoiGood(0x42, 0x42); + VerifySimpleHexAtoiBad(-0x42); + + VerifySimpleHexAtoiBad(std::numeric_limits::min()); + VerifySimpleHexAtoiGood(std::numeric_limits::max(), + std::numeric_limits::max()); + VerifySimpleHexAtoiGood(std::numeric_limits::max(), + std::numeric_limits::max()); + VerifySimpleHexAtoiBad(std::numeric_limits::min()); + VerifySimpleHexAtoiGood(std::numeric_limits::max(), + std::numeric_limits::max()); + VerifySimpleHexAtoiGood(std::numeric_limits::max(), + std::numeric_limits::max()); + VerifySimpleHexAtoiGood( + std::numeric_limits::max(), + std::numeric_limits::max()); + + // Some other types + VerifySimpleHexAtoiGood(-0x42, -0x42); + VerifySimpleHexAtoiGood(-0x42, -0x42); + VerifySimpleHexAtoiGood(0x42, 0x42); + VerifySimpleHexAtoiGood(0x42, 0x42); + VerifySimpleHexAtoiGood(-0x42, -0x42); + VerifySimpleHexAtoiGood(-0x42, -0x42); // NOLINT: runtime-int + VerifySimpleHexAtoiGood(0x42, 0x42); + VerifySimpleHexAtoiGood(0x42, 0x42); + VerifySimpleHexAtoiGood(0x42, 0x42); + + // Number prefix + int32_t value; + EXPECT_TRUE(safe_strto32_base("0x34234324", &value, 16)); + EXPECT_EQ(0x34234324, value); + + EXPECT_TRUE(safe_strto32_base("0X34234324", &value, 16)); + EXPECT_EQ(0x34234324, value); + + // ASCII whitespace + EXPECT_TRUE(safe_strto32_base(" \t\n 34234324", &value, 16)); + EXPECT_EQ(0x34234324, value); + + EXPECT_TRUE(safe_strto32_base("34234324 \t\n ", &value, 16)); + EXPECT_EQ(0x34234324, value); +} + TEST(stringtest, safe_strto32_base) { int32_t value; EXPECT_TRUE(safe_strto32_base("0x34234324", &value, 16)); diff --git a/absl/strings/str_split_test.cc b/absl/strings/str_split_test.cc index f472f9ed..1b4427b8 100644 --- a/absl/strings/str_split_test.cc +++ b/absl/strings/str_split_test.cc @@ -943,8 +943,14 @@ TEST(Delimiter, ByLength) { } TEST(Split, WorksWithLargeStrings) { +#if defined(ABSL_HAVE_ADDRESS_SANITIZER) || \ + defined(ABSL_HAVE_MEMORY_SANITIZER) || defined(ABSL_HAVE_THREAD_SANITIZER) + constexpr size_t kSize = (uint32_t{1} << 26) + 1; // 64M + 1 byte +#else + constexpr size_t kSize = (uint32_t{1} << 31) + 1; // 2G + 1 byte +#endif if (sizeof(size_t) > 4) { - std::string s((uint32_t{1} << 31) + 1, 'x'); // 2G + 1 byte + std::string s(kSize, 'x'); s.back() = '-'; std::vector v = absl::StrSplit(s, '-'); EXPECT_EQ(2, v.size()); diff --git a/absl/strings/substitute.cc b/absl/strings/substitute.cc index 1f3c7409..8980b198 100644 --- a/absl/strings/substitute.cc +++ b/absl/strings/substitute.cc @@ -75,7 +75,8 @@ void SubstituteAndAppendArray(std::string* output, absl::string_view format, // Build the string. size_t original_size = output->size(); - strings_internal::STLStringResizeUninitialized(output, original_size + size); + strings_internal::STLStringResizeUninitializedAmortized(output, + original_size + size); char* target = &(*output)[original_size]; for (size_t i = 0; i < format.size(); i++) { if (format[i] == '$') { -- cgit v1.2.3