From bf31a10b65d945665cecfb9d8807702ae4a7fde1 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Fri, 6 Aug 2021 06:19:26 -0700 Subject: Export of internal Abseil changes -- 93c607726d663800b4bfa472cba043fd3f5d0e97 by Derek Mauro : Internal change PiperOrigin-RevId: 389158822 -- 55b3bb50bbc168567c6ba25d07df2c2c39e864af by Martijn Vels : Change CordRepRing alternative implementation to CordRepBtree alternative. This changes makes CordRepBtree (BTREE) the alternative to CordRepConcat (CONCAT) trees, enabled through the internal / experimental 'cord_btree_enabled' latch. PiperOrigin-RevId: 389030571 -- d6fc346143606c096bca8eb5029e4c429ac6e305 by Todd Lipcon : Fix a small typo in SequenceLock doc comment PiperOrigin-RevId: 388972936 -- e46f9245dce8b4150e3ca2664e0cf42b75f90a83 by Martijn Vels : Add 'shallow' validation mode to CordRepBtree which will be the default for internal assertions. PiperOrigin-RevId: 388753606 -- b5e74f163b490beb006f848ace67bb650433fe13 by Martijn Vels : Add btree statistics to CordzInfo, and reduce rounding errors PiperOrigin-RevId: 388715878 -- 105bcbf80de649937e693b29b18220f9e6841a51 by Evan Brown : Skip length checking when constructing absl::string_view from `const char*`. The length check causes unnecessary code bloat. PiperOrigin-RevId: 388271741 -- bed595158f24839efe49c65ae483f797d79fe0ae by Derek Mauro : Internal change PiperOrigin-RevId: 387713428 GitOrigin-RevId: 93c607726d663800b4bfa472cba043fd3f5d0e97 Change-Id: I2a4840f5ffcd7f70b7d7d45cce66f23c42cf565f --- absl/strings/BUILD.bazel | 1 + absl/strings/CMakeLists.txt | 1 + absl/strings/cord.cc | 180 +++++++++++---------- absl/strings/cord.h | 37 ++--- absl/strings/cord_test.cc | 2 +- absl/strings/internal/cord_internal.cc | 1 + absl/strings/internal/cord_internal.h | 6 + absl/strings/internal/cord_rep_btree.cc | 21 ++- absl/strings/internal/cord_rep_btree.h | 32 +++- absl/strings/internal/cord_rep_btree_test.cc | 43 +++++ absl/strings/internal/cordz_info.cc | 51 +++--- .../strings/internal/cordz_info_statistics_test.cc | 133 ++++++++++++++- absl/strings/internal/cordz_statistics.h | 3 + absl/strings/string_view.h | 4 +- 14 files changed, 368 insertions(+), 147 deletions(-) (limited to 'absl/strings') diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index 9720b683..f9735b4b 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -318,6 +318,7 @@ cc_test( ":strings", "//absl/base:config", "//absl/base:raw_logging_internal", + "//absl/cleanup", "@com_google_googletest//:gtest_main", ], ) diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index 88f076a8..8ad5f9c7 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -952,6 +952,7 @@ absl_cc_test( ${ABSL_TEST_COPTS} DEPS absl::base + absl::cleanup absl::config absl::cord_internal absl::cord_rep_test_util diff --git a/absl/strings/cord.cc b/absl/strings/cord.cc index f5aa6e47..ecbd2dbd 100644 --- a/absl/strings/cord.cc +++ b/absl/strings/cord.cc @@ -36,8 +36,8 @@ #include "absl/container/inlined_vector.h" #include "absl/strings/escaping.h" #include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" #include "absl/strings/internal/cord_rep_flat.h" -#include "absl/strings/internal/cord_rep_ring.h" #include "absl/strings/internal/cordz_statistics.h" #include "absl/strings/internal/cordz_update_scope.h" #include "absl/strings/internal/cordz_update_tracker.h" @@ -51,20 +51,20 @@ namespace absl { ABSL_NAMESPACE_BEGIN using ::absl::cord_internal::CordRep; +using ::absl::cord_internal::CordRepBtree; using ::absl::cord_internal::CordRepConcat; using ::absl::cord_internal::CordRepExternal; using ::absl::cord_internal::CordRepFlat; -using ::absl::cord_internal::CordRepRing; using ::absl::cord_internal::CordRepSubstring; using ::absl::cord_internal::CordzUpdateTracker; using ::absl::cord_internal::InlineData; using ::absl::cord_internal::kMaxFlatLength; using ::absl::cord_internal::kMinFlatLength; +using ::absl::cord_internal::BTREE; using ::absl::cord_internal::CONCAT; using ::absl::cord_internal::EXTERNAL; using ::absl::cord_internal::FLAT; -using ::absl::cord_internal::RING; using ::absl::cord_internal::SUBSTRING; using ::absl::cord_internal::kInlinedVectorSize; @@ -100,8 +100,8 @@ static constexpr uint64_t min_length[] = { static const int kMinLengthSize = ABSL_ARRAYSIZE(min_length); -static inline bool cord_ring_enabled() { - return cord_internal::cord_ring_buffer_enabled.load( +static inline bool btree_enabled() { + return cord_internal::cord_btree_enabled.load( std::memory_order_relaxed); } @@ -218,27 +218,25 @@ static CordRepFlat* CreateFlat(const char* data, size_t length, return flat; } -// Creates a new flat or ringbuffer out of the specified array. +// Creates a new flat or Btree out of the specified array. // The returned node has a refcount of 1. -static CordRep* RingNewTree(const char* data, size_t length, - size_t alloc_hint) { +static CordRep* NewBtree(const char* data, size_t length, size_t alloc_hint) { if (length <= kMaxFlatLength) { return CreateFlat(data, length, alloc_hint); } CordRepFlat* flat = CreateFlat(data, kMaxFlatLength, 0); data += kMaxFlatLength; length -= kMaxFlatLength; - size_t extra = (length - 1) / kMaxFlatLength + 1; - auto* root = CordRepRing::Create(flat, extra); - return CordRepRing::Append(root, {data, length}, alloc_hint); + auto* root = CordRepBtree::Create(flat); + return CordRepBtree::Append(root, {data, length}, alloc_hint); } // Create a new tree out of the specified array. // The returned node has a refcount of 1. static CordRep* NewTree(const char* data, size_t length, size_t alloc_hint) { if (length == 0) return nullptr; - if (cord_ring_enabled()) { - return RingNewTree(data, length, alloc_hint); + if (btree_enabled()) { + return NewBtree(data, length, alloc_hint); } absl::FixedArray reps((length - 1) / kMaxFlatLength + 1); size_t n = 0; @@ -346,10 +344,10 @@ inline void Cord::InlineRep::remove_prefix(size_t n) { reduce_size(n); } -// Returns `rep` converted into a CordRepRing. -// Directly returns `rep` if `rep` is already a CordRepRing. -static CordRepRing* ForceRing(CordRep* rep, size_t extra) { - return (rep->tag == RING) ? rep->ring() : CordRepRing::Create(rep, extra); +// Returns `rep` converted into a CordRepBtree. +// Directly returns `rep` if `rep` is already a CordRepBtree. +static CordRepBtree* ForceBtree(CordRep* rep) { + return rep->tag == BTREE ? rep->btree() : CordRepBtree::Create(rep); } void Cord::InlineRep::AppendTreeToInlined(CordRep* tree, @@ -357,8 +355,8 @@ void Cord::InlineRep::AppendTreeToInlined(CordRep* tree, assert(!is_tree()); if (!data_.is_empty()) { CordRepFlat* flat = MakeFlatWithExtraCapacity(0); - if (cord_ring_enabled()) { - tree = CordRepRing::Append(CordRepRing::Create(flat, 1), tree); + if (btree_enabled()) { + tree = CordRepBtree::Append(CordRepBtree::Create(flat), tree); } else { tree = Concat(flat, tree); } @@ -369,8 +367,8 @@ void Cord::InlineRep::AppendTreeToInlined(CordRep* tree, void Cord::InlineRep::AppendTreeToTree(CordRep* tree, MethodIdentifier method) { assert(is_tree()); const CordzUpdateScope scope(data_.cordz_info(), method); - if (cord_ring_enabled()) { - tree = CordRepRing::Append(ForceRing(data_.as_tree(), 1), tree); + if (btree_enabled()) { + tree = CordRepBtree::Append(ForceBtree(data_.as_tree()), tree); } else { tree = Concat(data_.as_tree(), tree); } @@ -391,8 +389,8 @@ void Cord::InlineRep::PrependTreeToInlined(CordRep* tree, assert(!is_tree()); if (!data_.is_empty()) { CordRepFlat* flat = MakeFlatWithExtraCapacity(0); - if (cord_ring_enabled()) { - tree = CordRepRing::Prepend(CordRepRing::Create(flat, 1), tree); + if (btree_enabled()) { + tree = CordRepBtree::Prepend(CordRepBtree::Create(flat), tree); } else { tree = Concat(tree, flat); } @@ -404,8 +402,8 @@ void Cord::InlineRep::PrependTreeToTree(CordRep* tree, MethodIdentifier method) { assert(is_tree()); const CordzUpdateScope scope(data_.cordz_info(), method); - if (cord_ring_enabled()) { - tree = CordRepRing::Prepend(ForceRing(data_.as_tree(), 1), tree); + if (btree_enabled()) { + tree = CordRepBtree::Prepend(ForceBtree(data_.as_tree()), tree); } else { tree = Concat(tree, data_.as_tree()); } @@ -427,8 +425,8 @@ void Cord::InlineRep::PrependTree(CordRep* tree, MethodIdentifier method) { // written to region and the actual size increase will be written to size. static inline bool PrepareAppendRegion(CordRep* root, char** region, size_t* size, size_t max_length) { - if (root->tag == RING && root->refcount.IsOne()) { - Span span = root->ring()->GetAppendBuffer(max_length); + if (root->tag == BTREE && root->refcount.IsOne()) { + Span span = root->btree()->GetAppendBuffer(max_length); if (!span.empty()) { *region = span.data(); *size = span.size(); @@ -500,8 +498,8 @@ void Cord::InlineRep::GetAppendRegion(char** region, size_t* size, *region = new_node->Data(); *size = new_node->length; - if (cord_ring_enabled()) { - rep = CordRepRing::Append(ForceRing(rep, 1), new_node); + if (btree_enabled()) { + rep = CordRepBtree::Append(ForceBtree(rep), new_node); } else { rep = Concat(rep, new_node); } @@ -511,8 +509,13 @@ void Cord::InlineRep::GetAppendRegion(char** region, size_t* size, // If the rep is a leaf, this will increment the value at total_mem_usage and // will return true. static bool RepMemoryUsageLeaf(const CordRep* rep, size_t* total_mem_usage) { + size_t maybe_sub_size = 0; + if (rep->tag == SUBSTRING) { + maybe_sub_size = sizeof(cord_internal::CordRepSubstring); + rep = rep->substring()->child; + } if (rep->tag >= FLAT) { - *total_mem_usage += rep->flat()->AllocatedSize(); + *total_mem_usage += maybe_sub_size + rep->flat()->AllocatedSize(); return true; } if (rep->tag == EXTERNAL) { @@ -520,8 +523,9 @@ static bool RepMemoryUsageLeaf(const CordRep* rep, size_t* total_mem_usage) { // assume it is 'at least' a word / pointer to data. In the future we may // choose to use the 'data' byte as a tag to identify the types of some // well-known externals, such as a std::string instance. - *total_mem_usage += - sizeof(cord_internal::CordRepExternalImpl) + rep->length; + *total_mem_usage += maybe_sub_size + + sizeof(cord_internal::CordRepExternalImpl) + + rep->length; return true; } return false; @@ -690,9 +694,11 @@ void Cord::InlineRep::AppendArray(absl::string_view src, return; } - if (cord_ring_enabled()) { - rep = ForceRing(rep, (src.size() - 1) / kMaxFlatLength + 1); - rep = CordRepRing::Append(rep->ring(), src); + if (btree_enabled()) { + // TODO(b/192061034): keep legacy 10% growth rate: consider other rates. + rep = ForceBtree(rep); + const size_t alloc_hint = (std::min)(kMaxFlatLength, rep->length / 10); + rep = CordRepBtree::Append(rep->btree(), src, alloc_hint); } else { // Use new block(s) for any remaining bytes that were not handled above. // Alloc extra memory only if the right child of the root of the new tree @@ -769,9 +775,13 @@ inline void Cord::AppendImpl(C&& src) { contents_.AppendTree(rep, CordzUpdateTracker::kAppendCord); } -void Cord::Append(const Cord& src) { AppendImpl(src); } +void Cord::Append(const Cord& src) { + AppendImpl(src); +} -void Cord::Append(Cord&& src) { AppendImpl(std::move(src)); } +void Cord::Append(Cord&& src) { + AppendImpl(std::move(src)); +} template > void Cord::Append(T&& src) { @@ -923,8 +933,10 @@ void Cord::RemovePrefix(size_t n) { } else { auto constexpr method = CordzUpdateTracker::kRemovePrefix; CordzUpdateScope scope(contents_.cordz_info(), method); - if (tree->tag == RING) { - tree = CordRepRing::RemovePrefix(tree->ring(), n); + if (tree->tag == BTREE) { + CordRep* old = tree; + tree = tree->btree()->SubTree(n, tree->length - n); + CordRep::Unref(old); } else { CordRep* newrep = RemovePrefixFrom(tree, n); CordRep::Unref(tree); @@ -944,8 +956,10 @@ void Cord::RemoveSuffix(size_t n) { } else { auto constexpr method = CordzUpdateTracker::kRemoveSuffix; CordzUpdateScope scope(contents_.cordz_info(), method); - if (tree->tag == RING) { - tree = CordRepRing::RemoveSuffix(tree->ring(), n); + if (tree->tag == BTREE) { + CordRep* old = tree; + tree = tree->btree()->SubTree(0, tree->length - n); + CordRep::Unref(old); } else { CordRep* newrep = RemoveSuffixFrom(tree, n); CordRep::Unref(tree); @@ -1037,9 +1051,8 @@ Cord Cord::Subcord(size_t pos, size_t new_size) const { return sub_cord; } - if (tree->tag == RING) { - CordRepRing* ring = CordRep::Ref(tree)->ring(); - tree = CordRepRing::SubRing(ring, pos, new_size); + if (tree->tag == BTREE) { + tree = tree->btree()->SubTree(pos, new_size); } else { tree = NewSubRange(tree, pos, new_size); } @@ -1227,8 +1240,8 @@ bool ComputeCompareResult(int memcmp_res) { } // namespace -// Helper routine. Locates the first flat chunk of the Cord without -// initializing the iterator. +// Helper routine. Locates the first flat or external chunk of the Cord without +// initializing the iterator, and returns a string_view referencing the data. inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const { if (!is_tree()) { return absl::string_view(data_.as_chars(), data_.inline_size()); @@ -1243,8 +1256,13 @@ inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const { return absl::string_view(node->external()->base, node->length); } - if (node->tag == RING) { - return node->ring()->entry_data(node->ring()->head()); + if (node->tag == BTREE) { + CordRepBtree* tree = node->btree(); + int height = tree->height(); + while (--height >= 0) { + tree = tree->Edge(CordRepBtree::kFront)->btree(); + } + return tree->Data(tree->begin()); } // Walk down the left branches until we hit a non-CONCAT node. @@ -1506,22 +1524,21 @@ Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) { return subcord; } - if (ring_reader_) { + if (btree_reader_) { size_t chunk_size = current_chunk_.size(); if (n <= chunk_size && n <= kMaxBytesToCopy) { subcord = Cord(current_chunk_.substr(0, n), method); + if (n < chunk_size) { + current_chunk_.remove_prefix(n); + } else { + current_chunk_ = btree_reader_.Next(); + } } else { - auto* ring = CordRep::Ref(ring_reader_.ring())->ring(); - size_t offset = ring_reader_.length() - bytes_remaining_; - CordRep* rep = CordRepRing::SubRing(ring, offset, n); + CordRep* rep; + current_chunk_ = btree_reader_.Read(n, chunk_size, rep); subcord.contents_.EmplaceTree(rep, method); } - if (n < chunk_size) { - bytes_remaining_ -= n; - current_chunk_.remove_prefix(n); - } else { - AdvanceBytesRing(n); - } + bytes_remaining_ -= n; return subcord; } @@ -1698,8 +1715,8 @@ char Cord::operator[](size_t i) const { if (rep->tag >= FLAT) { // Get the "i"th character directly from the flat array. return rep->flat()->Data()[offset]; - } else if (rep->tag == RING) { - return rep->ring()->GetCharacter(offset); + } else if (rep->tag == BTREE) { + return rep->btree()->GetCharacter(offset); } else if (rep->tag == EXTERNAL) { // Get the "i"th character from the external array. return rep->external()->base[offset]; @@ -1758,8 +1775,8 @@ absl::string_view Cord::FlattenSlowPath() { } else if (rep->tag == EXTERNAL) { *fragment = absl::string_view(rep->external()->base, rep->length); return true; - } else if (rep->tag == RING) { - return rep->ring()->IsFlat(fragment); + } else if (rep->tag == BTREE) { + return rep->btree()->IsFlat(fragment); } else if (rep->tag == SUBSTRING) { CordRep* child = rep->substring()->child; if (child->tag >= FLAT) { @@ -1770,9 +1787,9 @@ absl::string_view Cord::FlattenSlowPath() { *fragment = absl::string_view( child->external()->base + rep->substring()->start, rep->length); return true; - } else if (child->tag == RING) { - return child->ring()->IsFlat(rep->substring()->start, rep->length, - fragment); + } else if (child->tag == BTREE) { + return child->btree()->IsFlat(rep->substring()->start, rep->length, + fragment); } } return false; @@ -1781,7 +1798,7 @@ absl::string_view Cord::FlattenSlowPath() { /* static */ void Cord::ForEachChunkAux( absl::cord_internal::CordRep* rep, absl::FunctionRef callback) { - if (rep->tag == RING) { + if (rep->tag == BTREE) { ChunkIterator it(rep), end; while (it != end) { callback(*it); @@ -1866,15 +1883,7 @@ static void DumpNode(CordRep* rep, bool include_data, std::ostream* os, *os << absl::CEscape(std::string(rep->flat()->Data(), rep->length)); *os << "]\n"; } else { - assert(rep->tag == RING); - auto* ring = rep->ring(); - *os << "RING, entries = " << ring->entries() << "\n"; - CordRepRing::index_type head = ring->head(); - do { - DumpNode(ring->entry_child(head), include_data, os, - indent + kIndentStep); - head = ring->advance(head); - } while (head != ring->tail()); + CordRepBtree::Dump(rep, /*label=*/ "", include_data, *os); } if (stack.empty()) break; rep = stack.back(); @@ -1967,15 +1976,18 @@ static bool VerifyNode(CordRep* root, CordRep* start_node, } next_node = right; } - } else if (cur_node->tag == RING) { - total_mem_usage += CordRepRing::AllocSize(cur_node->ring()->capacity()); - const CordRepRing* ring = cur_node->ring(); - CordRepRing::index_type pos = ring->head(), tail = ring->tail(); - do { - CordRep* node = ring->entry_child(pos); - assert(node->tag >= FLAT || node->tag == EXTERNAL); - RepMemoryUsageLeaf(node, &total_mem_usage); - } while ((pos = ring->advance(pos)) != tail); + } else if (cur_node->tag == BTREE) { + total_mem_usage += sizeof(CordRepBtree); + const CordRepBtree* node = cur_node->btree(); + if (node->height() == 0) { + for (const CordRep* edge : node->Edges()) { + RepMemoryUsageLeaf(edge, &total_mem_usage); + } + } else { + for (const CordRep* edge : node->Edges()) { + tree_stack.push_back(edge); + } + } } else { // Since cur_node is not a leaf or a concat node it must be a substring. assert(cur_node->tag == SUBSTRING); diff --git a/absl/strings/cord.h b/absl/strings/cord.h index e758f1cd..ac1832f0 100644 --- a/absl/strings/cord.h +++ b/absl/strings/cord.h @@ -79,8 +79,9 @@ #include "absl/functional/function_ref.h" #include "absl/meta/type_traits.h" #include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_btree_reader.h" #include "absl/strings/internal/cord_rep_ring.h" -#include "absl/strings/internal/cord_rep_ring_reader.h" #include "absl/strings/internal/cordz_functions.h" #include "absl/strings/internal/cordz_info.h" #include "absl/strings/internal/cordz_statistics.h" @@ -370,8 +371,8 @@ class Cord { private: using CordRep = absl::cord_internal::CordRep; - using CordRepRing = absl::cord_internal::CordRepRing; - using CordRepRingReader = absl::cord_internal::CordRepRingReader; + using CordRepBtree = absl::cord_internal::CordRepBtree; + using CordRepBtreeReader = absl::cord_internal::CordRepBtreeReader; // Stack of right children of concat nodes that we have to visit. // Keep this at the end of the structure to avoid cache-thrashing. @@ -397,9 +398,9 @@ class Cord { // Stack specific operator++ ChunkIterator& AdvanceStack(); - // Ring buffer specific operator++ - ChunkIterator& AdvanceRing(); - void AdvanceBytesRing(size_t n); + // Btree specific operator++ + ChunkIterator& AdvanceBtree(); + void AdvanceBytesBtree(size_t n); // Iterates `n` bytes, where `n` is expected to be greater than or equal to // `current_chunk_.size()`. @@ -415,8 +416,8 @@ class Cord { // The number of bytes left in the `Cord` over which we are iterating. size_t bytes_remaining_ = 0; - // Cord reader for ring buffers. Empty if not traversing a ring buffer. - CordRepRingReader ring_reader_; + // Cord reader for cord btrees. Empty if not traversing a btree. + CordRepBtreeReader btree_reader_; // See 'Stack' alias definition. Stack stack_of_right_children_; @@ -1247,8 +1248,8 @@ inline bool Cord::StartsWith(absl::string_view rhs) const { } inline void Cord::ChunkIterator::InitTree(cord_internal::CordRep* tree) { - if (tree->tag == cord_internal::RING) { - current_chunk_ = ring_reader_.Reset(tree->ring()); + if (tree->tag == cord_internal::BTREE) { + current_chunk_ = btree_reader_.Init(tree->btree()); return; } @@ -1271,20 +1272,20 @@ inline Cord::ChunkIterator::ChunkIterator(const Cord* cord) } } -inline Cord::ChunkIterator& Cord::ChunkIterator::AdvanceRing() { - current_chunk_ = ring_reader_.Next(); +inline Cord::ChunkIterator& Cord::ChunkIterator::AdvanceBtree() { + current_chunk_ = btree_reader_.Next(); return *this; } -inline void Cord::ChunkIterator::AdvanceBytesRing(size_t n) { +inline void Cord::ChunkIterator::AdvanceBytesBtree(size_t n) { assert(n >= current_chunk_.size()); bytes_remaining_ -= n; if (bytes_remaining_) { if (n == current_chunk_.size()) { - current_chunk_ = ring_reader_.Next(); + current_chunk_ = btree_reader_.Next(); } else { - size_t offset = ring_reader_.length() - bytes_remaining_; - current_chunk_ = ring_reader_.Seek(offset); + size_t offset = btree_reader_.length() - bytes_remaining_; + current_chunk_ = btree_reader_.Seek(offset); } } else { current_chunk_ = {}; @@ -1297,7 +1298,7 @@ inline Cord::ChunkIterator& Cord::ChunkIterator::operator++() { assert(bytes_remaining_ >= current_chunk_.size()); bytes_remaining_ -= current_chunk_.size(); if (bytes_remaining_ > 0) { - return ring_reader_ ? AdvanceRing() : AdvanceStack(); + return btree_reader_ ? AdvanceBtree() : AdvanceStack(); } else { current_chunk_ = {}; } @@ -1339,7 +1340,7 @@ inline void Cord::ChunkIterator::AdvanceBytes(size_t n) { if (ABSL_PREDICT_TRUE(n < current_chunk_.size())) { RemoveChunkPrefix(n); } else if (n != 0) { - ring_reader_ ? AdvanceBytesRing(n) : AdvanceBytesSlowPath(n); + btree_reader_ ? AdvanceBytesBtree(n) : AdvanceBytesSlowPath(n); } } diff --git a/absl/strings/cord_test.cc b/absl/strings/cord_test.cc index 597378cc..0c0a8a7a 100644 --- a/absl/strings/cord_test.cc +++ b/absl/strings/cord_test.cc @@ -366,7 +366,7 @@ TEST(Cord, Subcord) { absl::Cord a; AppendWithFragments(s, &rng, &a); - ASSERT_EQ(s.size(), a.size()); + ASSERT_EQ(s, std::string(a)); // Check subcords of a, from a variety of interesting points. std::set positions; diff --git a/absl/strings/internal/cord_internal.cc b/absl/strings/internal/cord_internal.cc index f79cb628..1767e6fc 100644 --- a/absl/strings/internal/cord_internal.cc +++ b/absl/strings/internal/cord_internal.cc @@ -31,6 +31,7 @@ ABSL_CONST_INIT std::atomic cord_ring_buffer_enabled( kCordEnableRingBufferDefault); ABSL_CONST_INIT std::atomic shallow_subcords_enabled( kCordShallowSubcordsDefault); +ABSL_CONST_INIT std::atomic cord_btree_exhaustive_validation(false); void CordRep::Destroy(CordRep* rep) { assert(rep != nullptr); diff --git a/absl/strings/internal/cord_internal.h b/absl/strings/internal/cord_internal.h index 371a7f9c..8ae644ba 100644 --- a/absl/strings/internal/cord_internal.h +++ b/absl/strings/internal/cord_internal.h @@ -46,6 +46,12 @@ extern std::atomic cord_btree_enabled; extern std::atomic cord_ring_buffer_enabled; extern std::atomic shallow_subcords_enabled; +// `cord_btree_exhaustive_validation` can be set to force exhaustive validation +// in debug assertions, and code that calls `IsValid()` explicitly. By default, +// assertions should be relatively cheap and AssertValid() can easily lead to +// O(n^2) complexity as recursive / full tree validation is O(n). +extern std::atomic cord_btree_exhaustive_validation; + inline void enable_cord_btree(bool enable) { cord_btree_enabled.store(enable, std::memory_order_relaxed); } diff --git a/absl/strings/internal/cord_rep_btree.cc b/absl/strings/internal/cord_rep_btree.cc index 6978cfd2..fd3a0045 100644 --- a/absl/strings/internal/cord_rep_btree.cc +++ b/absl/strings/internal/cord_rep_btree.cc @@ -32,6 +32,8 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace cord_internal { +constexpr size_t CordRepBtree::kMaxCapacity; // NOLINT: needed for c++ < c++17 + namespace { using NodeStack = CordRepBtree * [CordRepBtree::kMaxDepth]; @@ -42,6 +44,10 @@ using CopyResult = CordRepBtree::CopyResult; constexpr auto kFront = CordRepBtree::kFront; constexpr auto kBack = CordRepBtree::kBack; +inline bool exhaustive_validation() { + return cord_btree_exhaustive_validation.load(std::memory_order_relaxed); +} + // Implementation of the various 'Dump' functions. // Prints the entire tree structure or 'rep'. External callers should // not specify 'depth' and leave it to its default (0) value. @@ -357,7 +363,7 @@ void CordRepBtree::DestroyNonLeaf(CordRepBtree* tree, size_t begin, Delete(tree); } -bool CordRepBtree::IsValid(const CordRepBtree* tree) { +bool CordRepBtree::IsValid(const CordRepBtree* tree, bool shallow) { #define NODE_CHECK_VALID(x) \ if (!(x)) { \ ABSL_RAW_LOG(ERROR, "CordRepBtree::CheckValid() FAILED: %s", #x); \ @@ -389,9 +395,9 @@ bool CordRepBtree::IsValid(const CordRepBtree* tree) { child_length += edge->length; } NODE_CHECK_EQ(child_length, tree->length); - if (tree->height() > 0) { + if ((!shallow || exhaustive_validation()) && tree->height() > 0) { for (CordRep* edge : tree->Edges()) { - if (!IsValid(edge->btree())) return false; + if (!IsValid(edge->btree(), shallow)) return false; } } return true; @@ -402,16 +408,17 @@ bool CordRepBtree::IsValid(const CordRepBtree* tree) { #ifndef NDEBUG -CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree) { - if (!IsValid(tree)) { +CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree, bool shallow) { + if (!IsValid(tree, shallow)) { Dump(tree, "CordRepBtree validation failed:", false, std::cout); ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED"); } return tree; } -const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree) { - if (!IsValid(tree)) { +const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree, + bool shallow) { + if (!IsValid(tree, shallow)) { Dump(tree, "CordRepBtree validation failed:", false, std::cout); ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED"); } diff --git a/absl/strings/internal/cord_rep_btree.h b/absl/strings/internal/cord_rep_btree.h index 7d854731..56e1e4af 100644 --- a/absl/strings/internal/cord_rep_btree.h +++ b/absl/strings/internal/cord_rep_btree.h @@ -266,10 +266,28 @@ class CordRepBtree : public CordRep { // holding a FLAT or EXTERNAL child rep. static bool IsDataEdge(const CordRep* rep); - // Diagnostics - static bool IsValid(const CordRepBtree* tree); - static CordRepBtree* AssertValid(CordRepBtree* tree); - static const CordRepBtree* AssertValid(const CordRepBtree* tree); + // Diagnostics: returns true if `tree` is valid and internally consistent. + // If `shallow` is false, then the provided top level node and all child nodes + // below it are recursively checked. If `shallow` is true, only the provided + // node in `tree` and the cumulative length, type and height of the direct + // child nodes of `tree` are checked. The value of `shallow` is ignored if the + // internal `cord_btree_exhaustive_validation` diagnostics variable is true, + // in which case the performed validations works as if `shallow` were false. + // This function is intended for debugging and testing purposes only. + static bool IsValid(const CordRepBtree* tree, bool shallow = false); + + // Diagnostics: asserts that the provided tree is valid. + // `AssertValid()` performs a shallow validation by default. `shallow` can be + // set to false in which case an exhaustive validation is performed. This + // function is implemented in terms of calling `IsValid()` and asserting the + // return value to be true. See `IsValid()` for more information. + // This function is intended for debugging and testing purposes only. + static CordRepBtree* AssertValid(CordRepBtree* tree, bool shallow = true); + static const CordRepBtree* AssertValid(const CordRepBtree* tree, + bool shallow = true); + + // Diagnostics: dump the contents of this tree to `stream`. + // This function is intended for debugging and testing purposes only. static void Dump(const CordRep* rep, std::ostream& stream); static void Dump(const CordRep* rep, absl::string_view label, std::ostream& stream); @@ -834,11 +852,13 @@ inline CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, CordRep* rep) { #ifdef NDEBUG -inline CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree) { +inline CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree, + bool /* shallow */) { return tree; } -inline const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree) { +inline const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree, + bool /* shallow */) { return tree; } diff --git a/absl/strings/internal/cord_rep_btree_test.cc b/absl/strings/internal/cord_rep_btree_test.cc index 7a0b7811..073a7d45 100644 --- a/absl/strings/internal/cord_rep_btree_test.cc +++ b/absl/strings/internal/cord_rep_btree_test.cc @@ -24,6 +24,7 @@ #include "gtest/gtest.h" #include "absl/base/config.h" #include "absl/base/internal/raw_logging.h" +#include "absl/cleanup/cleanup.h" #include "absl/strings/internal/cord_internal.h" #include "absl/strings/internal/cord_rep_test_util.h" #include "absl/strings/str_cat.h" @@ -1346,6 +1347,48 @@ TEST(CordRepBtreeTest, AssertValid) { CordRep::Unref(tree); } +TEST(CordRepBtreeTest, CheckAssertValidShallowVsDeep) { + // Restore exhaustive validation on any exit. + const bool exhaustive_validation = cord_btree_exhaustive_validation.load(); + auto cleanup = absl::MakeCleanup([exhaustive_validation] { + cord_btree_exhaustive_validation.store(exhaustive_validation); + }); + + // Create a tree of at least 2 levels, and mess with the original flat, which + // should go undetected in shallow mode as the flat is too far away, but + // should be detected in forced non-shallow mode. + CordRep* flat = MakeFlat("abc"); + CordRepBtree* tree = CordRepBtree::Create(flat); + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + const size_t n = max_cap * max_cap * 2; + for (size_t i = 0; i < n; ++i) { + tree = CordRepBtree::Append(tree, MakeFlat("Hello world")); + } + flat->length = 100; + + cord_btree_exhaustive_validation.store(false); + EXPECT_FALSE(CordRepBtree::IsValid(tree)); + EXPECT_TRUE(CordRepBtree::IsValid(tree, true)); + EXPECT_FALSE(CordRepBtree::IsValid(tree, false)); + CordRepBtree::AssertValid(tree); + CordRepBtree::AssertValid(tree, true); +#if defined(GTEST_HAS_DEATH_TEST) + EXPECT_DEBUG_DEATH(CordRepBtree::AssertValid(tree, false), ".*"); +#endif + + cord_btree_exhaustive_validation.store(true); + EXPECT_FALSE(CordRepBtree::IsValid(tree)); + EXPECT_FALSE(CordRepBtree::IsValid(tree, true)); + EXPECT_FALSE(CordRepBtree::IsValid(tree, false)); +#if defined(GTEST_HAS_DEATH_TEST) + EXPECT_DEBUG_DEATH(CordRepBtree::AssertValid(tree), ".*"); + EXPECT_DEBUG_DEATH(CordRepBtree::AssertValid(tree, true), ".*"); +#endif + + flat->length = 3; + CordRep::Unref(tree); +} + } // namespace } // namespace cord_internal ABSL_NAMESPACE_END diff --git a/absl/strings/internal/cordz_info.cc b/absl/strings/internal/cordz_info.cc index a3a0b9c0..5c18bbc5 100644 --- a/absl/strings/internal/cordz_info.cc +++ b/absl/strings/internal/cordz_info.cc @@ -19,6 +19,7 @@ #include "absl/container/inlined_vector.h" #include "absl/debugging/stacktrace.h" #include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" #include "absl/strings/internal/cord_rep_ring.h" #include "absl/strings/internal/cordz_handle.h" #include "absl/strings/internal/cordz_statistics.h" @@ -83,19 +84,23 @@ class CordRepAnalyzer { // Process all top level linear nodes (substrings and flats). repref = CountLinearReps(repref, memory_usage_); - // We should have have either a concat or ring node node if not null. if (repref.rep != nullptr) { - assert(repref.rep->tag == RING || repref.rep->tag == CONCAT); if (repref.rep->tag == RING) { AnalyzeRing(repref); + } else if (repref.rep->tag == BTREE) { + AnalyzeBtree(repref); } else if (repref.rep->tag == CONCAT) { AnalyzeConcat(repref); + } else { + // We should have either a concat, btree, or ring node if not null. + assert(false); } } // Adds values to output statistics_.estimated_memory_usage += memory_usage_.total; - statistics_.estimated_fair_share_memory_usage += memory_usage_.fair_share; + statistics_.estimated_fair_share_memory_usage += + static_cast(memory_usage_.fair_share); } private: @@ -117,13 +122,13 @@ class CordRepAnalyzer { // Memory usage values struct MemoryUsage { size_t total = 0; - size_t fair_share = 0; + double fair_share = 0.0; // Adds 'size` memory usage to this class, with a cumulative (recursive) // reference count of `refcount` void Add(size_t size, size_t refcount) { total += size; - fair_share += size / refcount; + fair_share += static_cast(size) / refcount; } }; @@ -215,28 +220,32 @@ class CordRepAnalyzer { } } - // Counts the provided ring buffer child into `child_usage`. - void CountRingChild(const CordRep* child, MemoryUsage& child_usage) { - RepRef rep{child, static_cast(child->refcount.Get())}; - rep = CountLinearReps(rep, child_usage); - assert(rep.rep == nullptr); - } - - // Analyzes the provided ring. As ring buffers can have many child nodes, the - // effect of rounding errors can become non trivial, so we compute the totals - // first at the ring level, and then divide the fair share of the total - // including children fair share totals. + // Analyzes the provided ring. void AnalyzeRing(RepRef rep) { statistics_.node_count++; statistics_.node_counts.ring++; - MemoryUsage ring_usage; const CordRepRing* ring = rep.rep->ring(); - ring_usage.Add(CordRepRing::AllocSize(ring->capacity()), 1); + memory_usage_.Add(CordRepRing::AllocSize(ring->capacity()), rep.refcount); ring->ForEach([&](CordRepRing::index_type pos) { - CountRingChild(ring->entry_child(pos), ring_usage); + CountLinearReps(rep.Child(ring->entry_child(pos)), memory_usage_); }); - memory_usage_.total += ring_usage.total; - memory_usage_.fair_share += ring_usage.fair_share / rep.refcount; + } + + // Analyzes the provided btree. + void AnalyzeBtree(RepRef rep) { + statistics_.node_count++; + statistics_.node_counts.btree++; + memory_usage_.Add(sizeof(CordRepBtree), rep.refcount); + const CordRepBtree* tree = rep.rep->btree(); + if (tree->height() > 0) { + for (CordRep* edge : tree->Edges()) { + AnalyzeBtree(rep.Child(edge)); + } + } else { + for (CordRep* edge : tree->Edges()) { + CountLinearReps(rep.Child(edge), memory_usage_); + } + } } CordzStatistics& statistics_; diff --git a/absl/strings/internal/cordz_info_statistics_test.cc b/absl/strings/internal/cordz_info_statistics_test.cc index 9f2842d9..7430d281 100644 --- a/absl/strings/internal/cordz_info_statistics_test.cc +++ b/absl/strings/internal/cordz_info_statistics_test.cc @@ -21,6 +21,7 @@ #include "absl/base/config.h" #include "absl/strings/cord.h" #include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" #include "absl/strings/internal/cord_rep_flat.h" #include "absl/strings/internal/cord_rep_ring.h" #include "absl/strings/internal/cordz_info.h" @@ -42,6 +43,8 @@ inline void PrintTo(const CordzStatistics& stats, std::ostream* s) { namespace { +using ::testing::Ge; + // Creates a flat of the specified allocated size CordRepFlat* Flat(size_t size) { // Round up to a tag size, as we are going to poke an exact tag size back into @@ -134,8 +137,8 @@ size_t SizeOf(const CordRepRing* rep) { } // Computes fair share memory used in a naive 'we dare to recurse' way. -size_t FairShare(CordRep* rep, size_t ref = 1) { - size_t self = 0, children = 0; +double FairShareImpl(CordRep* rep, size_t ref) { + double self = 0.0, children = 0.0; ref *= rep->refcount.Get(); if (rep->tag >= FLAT) { self = SizeOf(rep->flat()); @@ -143,22 +146,32 @@ size_t FairShare(CordRep* rep, size_t ref = 1) { self = SizeOf(rep->external()); } else if (rep->tag == SUBSTRING) { self = SizeOf(rep->substring()); - children = FairShare(rep->substring()->child, ref); + children = FairShareImpl(rep->substring()->child, ref); + } else if (rep->tag == BTREE) { + self = SizeOf(rep->btree()); + for (CordRep*edge : rep->btree()->Edges()) { + children += FairShareImpl(edge, ref); + } } else if (rep->tag == RING) { self = SizeOf(rep->ring()); rep->ring()->ForEach([&](CordRepRing::index_type i) { - self += FairShare(rep->ring()->entry_child(i)); + self += FairShareImpl(rep->ring()->entry_child(i), 1); }); } else if (rep->tag == CONCAT) { self = SizeOf(rep->concat()); - children = FairShare(rep->concat()->left, ref) + - FairShare(rep->concat()->right, ref); + children = FairShareImpl(rep->concat()->left, ref) + + FairShareImpl(rep->concat()->right, ref); } else { assert(false); } return self / ref + children; } +// Returns the fair share memory size from `ShareFhareImpl()` as a size_t. +size_t FairShare(CordRep* rep, size_t ref = 1) { + return static_cast(FairShareImpl(rep, ref)); +} + // Samples the cord and returns CordzInfo::GetStatistics() CordzStatistics SampleCord(CordRep* rep) { InlineData cord(rep); @@ -191,6 +204,7 @@ MATCHER_P(EqStatistics, stats, "Statistics equal expected values") { STATS_MATCHER_EXPECT_EQ(node_counts.concat); STATS_MATCHER_EXPECT_EQ(node_counts.substring); STATS_MATCHER_EXPECT_EQ(node_counts.ring); + STATS_MATCHER_EXPECT_EQ(node_counts.btree); STATS_MATCHER_EXPECT_EQ(estimated_memory_usage); STATS_MATCHER_EXPECT_EQ(estimated_fair_share_memory_usage); @@ -424,6 +438,103 @@ TEST(CordzInfoStatisticsTest, SharedSubstringRing) { EXPECT_THAT(SampleCord(substring), EqStatistics(expected)); } +TEST(CordzInfoStatisticsTest, BtreeLeaf) { + ASSERT_THAT(CordRepBtree::kMaxCapacity, Ge(3)); + RefHelper ref; + auto* flat1 = Flat(2000); + auto* flat2 = Flat(200); + auto* substr = Substring(flat2); + auto* external = External(3000); + + CordRepBtree* tree = CordRepBtree::Create(flat1); + tree = CordRepBtree::Append(tree, substr); + tree = CordRepBtree::Append(tree, external); + size_t flat3_count = CordRepBtree::kMaxCapacity - 3; + size_t flat3_size = 0; + for (size_t i = 0; i < flat3_count; ++i) { + auto* flat3 = Flat(70); + flat3_size += SizeOf(flat3); + tree = CordRepBtree::Append(tree, flat3); + } + ref.NeedsUnref(tree); + + CordzStatistics expected; + expected.size = tree->length; + expected.estimated_memory_usage = SizeOf(tree) + SizeOf(flat1) + + SizeOf(flat2) + SizeOf(substr) + + flat3_size + SizeOf(external); + expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage; + expected.node_count = 1 + 3 + 1 + flat3_count; + expected.node_counts.flat = 2 + flat3_count; + expected.node_counts.flat_128 = flat3_count; + expected.node_counts.flat_256 = 1; + expected.node_counts.external = 1; + expected.node_counts.substring = 1; + expected.node_counts.btree = 1; + + EXPECT_THAT(SampleCord(tree), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, BtreeNodeShared) { + RefHelper ref; + static constexpr int leaf_count = 3; + const size_t flat3_count = CordRepBtree::kMaxCapacity - 3; + ASSERT_THAT(flat3_count, Ge(0)); + + CordRepBtree* tree = nullptr; + size_t mem_size = 0; + for (int i = 0; i < leaf_count; ++i) { + auto* flat1 = ref.Ref(Flat(2000), 9); + mem_size += SizeOf(flat1); + if (i == 0) { + tree = CordRepBtree::Create(flat1); + } else { + tree = CordRepBtree::Append(tree, flat1); + } + + auto* flat2 = Flat(200); + auto* substr = Substring(flat2); + mem_size += SizeOf(flat2) + SizeOf(substr); + tree = CordRepBtree::Append(tree, substr); + + auto* external = External(30); + mem_size += SizeOf(external); + tree = CordRepBtree::Append(tree, external); + + for (size_t i = 0; i < flat3_count; ++i) { + auto* flat3 = Flat(70); + mem_size += SizeOf(flat3); + tree = CordRepBtree::Append(tree, flat3); + } + + if (i == 0) { + mem_size += SizeOf(tree); + } else { + mem_size += SizeOf(tree->Edges().back()->btree()); + } + } + ref.NeedsUnref(tree); + + // Ref count: 2 for top (add 1), 5 for leaf 0 (add 4). + ref.Ref(tree, 1); + ref.Ref(tree->Edges().front(), 4); + + CordzStatistics expected; + expected.size = tree->length; + expected.estimated_memory_usage = SizeOf(tree) + mem_size; + expected.estimated_fair_share_memory_usage = FairShare(tree); + + expected.node_count = 1 + leaf_count * (1 + 3 + 1 + flat3_count); + expected.node_counts.flat = leaf_count * (2 + flat3_count); + expected.node_counts.flat_128 = leaf_count * flat3_count; + expected.node_counts.flat_256 = leaf_count; + expected.node_counts.external = leaf_count; + expected.node_counts.substring = leaf_count; + expected.node_counts.btree = 1 + leaf_count; + + EXPECT_THAT(SampleCord(tree), EqStatistics(expected)); +} + TEST(CordzInfoStatisticsTest, ThreadSafety) { Notification stop; static constexpr int kNumThreads = 8; @@ -471,9 +582,15 @@ TEST(CordzInfoStatisticsTest, ThreadSafety) { CordRep::Unref(cord.as_tree()); cord.set_inline_size(0); } else { - // 50/50 Ring or Flat coin toss + // Coin toss to 25% ring, 25% btree, and 50% flat. CordRep* rep = Flat(256); - rep = (coin_toss(gen) != 0) ? CordRepRing::Create(rep) : rep; + if (coin_toss(gen) != 0) { + if (coin_toss(gen) != 0) { + rep = CordRepRing::Create(rep); + } else { + rep = CordRepBtree::Create(rep); + } + } cord.make_tree(rep); // 50/50 sample diff --git a/absl/strings/internal/cordz_statistics.h b/absl/strings/internal/cordz_statistics.h index e03c651e..da4c7dbb 100644 --- a/absl/strings/internal/cordz_statistics.h +++ b/absl/strings/internal/cordz_statistics.h @@ -40,6 +40,7 @@ struct CordzStatistics { size_t substring = 0; // #substring reps size_t concat = 0; // #concat reps size_t ring = 0; // #ring buffer reps + size_t btree = 0; // #btree reps }; // The size of the cord in bytes. This matches the result of Cord::size(). @@ -61,6 +62,8 @@ struct CordzStatistics { // The total number of nodes referenced by this cord. // For ring buffer Cords, this includes the 'ring buffer' node. + // For btree Cords, this includes all 'CordRepBtree' tree nodes as well as all + // the substring, flat and external nodes referenced by the tree. // A value of 0 implies the property has not been recorded. int64_t node_count = 0; diff --git a/absl/strings/string_view.h b/absl/strings/string_view.h index ec6c431f..ea760526 100644 --- a/absl/strings/string_view.h +++ b/absl/strings/string_view.h @@ -198,9 +198,9 @@ class string_view { // Implicit constructor of a `string_view` from NUL-terminated `str`. When // accepting possibly null strings, use `absl::NullSafeStringView(str)` // instead (see below). + // The length check is skipped since it is unnecessary and causes code bloat. constexpr string_view(const char* str) // NOLINT(runtime/explicit) - : ptr_(str), - length_(str ? CheckLengthInternal(StrlenInternal(str)) : 0) {} + : ptr_(str), length_(str ? StrlenInternal(str) : 0) {} // Implicit constructor of a `string_view` from a `const char*` and length. constexpr string_view(const char* data, size_type len) -- cgit v1.2.3