diff options
Diffstat (limited to 'absl/strings/internal')
-rw-r--r-- | absl/strings/internal/cord_internal.cc | 4 | ||||
-rw-r--r-- | absl/strings/internal/cord_internal.h | 206 | ||||
-rw-r--r-- | absl/strings/internal/cord_rep_flat.h | 4 | ||||
-rw-r--r-- | absl/strings/internal/cord_rep_ring.cc | 895 | ||||
-rw-r--r-- | absl/strings/internal/cord_rep_ring.h | 576 |
5 files changed, 1648 insertions, 37 deletions
diff --git a/absl/strings/internal/cord_internal.cc b/absl/strings/internal/cord_internal.cc index 59f2e4d9..905ffd0c 100644 --- a/absl/strings/internal/cord_internal.cc +++ b/absl/strings/internal/cord_internal.cc @@ -19,6 +19,7 @@ #include "absl/container/inlined_vector.h" #include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/internal/cord_rep_ring.h" namespace absl { ABSL_NAMESPACE_BEGIN @@ -48,6 +49,9 @@ void CordRep::Destroy(CordRep* rep) { rep = left; continue; } + } else if (rep->tag == RING) { + CordRepRing::Destroy(rep->ring()); + rep = nullptr; } else if (rep->tag == EXTERNAL) { CordRepExternal::Delete(rep); rep = nullptr; diff --git a/absl/strings/internal/cord_internal.h b/absl/strings/internal/cord_internal.h index b586ea37..011b49d3 100644 --- a/absl/strings/internal/cord_internal.h +++ b/absl/strings/internal/cord_internal.h @@ -1,4 +1,4 @@ -// Copyright 2020 The Abseil Authors. +// Copyright 2021 The Abseil Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include <cstdint> #include <type_traits> +#include "absl/base/config.h" #include "absl/base/internal/invoke.h" #include "absl/base/optimization.h" #include "absl/container/internal/compressed_tuple.h" @@ -145,13 +146,14 @@ struct CordRepConcat; struct CordRepExternal; struct CordRepFlat; struct CordRepSubstring; +class CordRepRing; // Various representations that we allow enum CordRepKind { - CONCAT = 0, - EXTERNAL = 1, - SUBSTRING = 2, - RING = 3, + CONCAT = 0, + EXTERNAL = 1, + SUBSTRING = 2, + RING = 3, // We have different tags for different sized flat arrays, // starting with FLAT, and limited to MAX_FLAT_TAG. The 224 value is based on @@ -160,7 +162,7 @@ enum CordRepKind { // as the Tag <---> Size logic so that FLAT stil represents the minimum flat // allocation size. (32 bytes as of now). FLAT = 4, - MAX_FLAT_TAG = 224, + MAX_FLAT_TAG = 224 }; struct CordRep { @@ -177,6 +179,8 @@ struct CordRep { uint8_t tag; char storage[1]; // Starting point for flat array: MUST BE LAST FIELD + inline CordRepRing* ring(); + inline const CordRepRing* ring() const; inline CordRepConcat* concat(); inline const CordRepConcat* concat() const; inline CordRepSubstring* substring(); @@ -306,45 +310,165 @@ CordRepExternal ConstInitExternalStorage<Str>::value(Str::value); enum { kMaxInline = 15, - // Tag byte & kMaxInline means we are storing a pointer. - kTreeFlag = 1 << 4, - // Tag byte & kProfiledFlag means we are profiling the Cord. - kProfiledFlag = 1 << 5 -}; - -// If the data has length <= kMaxInline, we store it in `as_chars`, and -// store the size in `tagged_size`. -// Else we store it in a tree and store a pointer to that tree in -// `as_tree.rep` and store a tag in `tagged_size`. -struct AsTree { - absl::cord_internal::CordRep* rep; - char padding[kMaxInline + 1 - sizeof(absl::cord_internal::CordRep*) - 1]; - char tagged_size; }; constexpr char GetOrNull(absl::string_view data, size_t pos) { return pos < data.size() ? data[pos] : '\0'; } -union InlineData { - constexpr InlineData() : as_chars{} {} - explicit constexpr InlineData(AsTree tree) : as_tree(tree) {} +// We store cordz_info as 64 bit pointer value in big endian format. This +// guarantees that the least significant byte of cordz_info matches the last +// byte of the inline data representation in as_chars_, which holds the inlined +// size or the 'is_tree' bit. +using cordz_info_t = int64_t; + +// Assert that the `cordz_info` pointer value perfectly overlaps the last half +// of `as_chars_` and can hold a pointer value. +static_assert(sizeof(cordz_info_t) * 2 == kMaxInline + 1, ""); +static_assert(sizeof(cordz_info_t) >= sizeof(intptr_t), ""); + +// BigEndianByte() creates a big endian representation of 'value', i.e.: a big +// endian value where the last byte in the host's representation holds 'value`, +// with all other bytes being 0. +static constexpr cordz_info_t BigEndianByte(unsigned char value) { +#if defined(ABSL_IS_BIG_ENDIAN) + return value; +#else + return static_cast<cordz_info_t>(value) << ((sizeof(cordz_info_t) - 1) * 8); +#endif +} + +class InlineData { + public: + // kNullCordzInfo holds the big endian representation of intptr_t(1) + // This is the 'null' / initial value of 'cordz_info'. The null value + // is specifically big endian 1 as with 64-bit pointers, the last + // byte of cordz_info overlaps with the last byte holding the tag. + static constexpr cordz_info_t kNullCordzInfo = BigEndianByte(1); + + // kFakeCordzInfo holds a 'fake', non-null cordz-info value we use to + // emulate the previous 'kProfiled' tag logic in 'set_profiled' until + // cord code is changed to store cordz_info values in InlineData. + static constexpr cordz_info_t kFakeCordzInfo = BigEndianByte(9); + + constexpr InlineData() : as_chars_{0} {} + explicit constexpr InlineData(CordRep* rep) : as_tree_(rep) {} explicit constexpr InlineData(absl::string_view chars) - : as_chars{GetOrNull(chars, 0), GetOrNull(chars, 1), - GetOrNull(chars, 2), GetOrNull(chars, 3), - GetOrNull(chars, 4), GetOrNull(chars, 5), - GetOrNull(chars, 6), GetOrNull(chars, 7), - GetOrNull(chars, 8), GetOrNull(chars, 9), - GetOrNull(chars, 10), GetOrNull(chars, 11), - GetOrNull(chars, 12), GetOrNull(chars, 13), - GetOrNull(chars, 14), static_cast<char>(chars.size())} {} - - AsTree as_tree; - char as_chars[kMaxInline + 1]; + : as_chars_{ + GetOrNull(chars, 0), GetOrNull(chars, 1), + GetOrNull(chars, 2), GetOrNull(chars, 3), + GetOrNull(chars, 4), GetOrNull(chars, 5), + GetOrNull(chars, 6), GetOrNull(chars, 7), + GetOrNull(chars, 8), GetOrNull(chars, 9), + GetOrNull(chars, 10), GetOrNull(chars, 11), + GetOrNull(chars, 12), GetOrNull(chars, 13), + GetOrNull(chars, 14), static_cast<char>((chars.size() << 1))} {} + + // Returns true if the current instance is empty. + // The 'empty value' is an inlined data value of zero length. + bool is_empty() const { return tag() == 0; } + + // Returns true if the current instance holds a tree value. + bool is_tree() const { return (tag() & 1) != 0; } + + // Returns true if the current instance holds a cordz_info value. + // Requires the current instance to hold a tree value. + bool is_profiled() const { + assert(is_tree()); + return as_tree_.cordz_info != kNullCordzInfo; + } + + // Returns a read only pointer to the character data inside this instance. + // Requires the current instance to hold inline data. + const char* as_chars() const { + assert(!is_tree()); + return as_chars_; + } + + // Returns a mutable pointer to the character data inside this instance. + // Should be used for 'write only' operations setting an inlined value. + // Applications can set the value of inlined data either before or after + // setting the inlined size, i.e., both of the below are valid: + // + // // Set inlined data and inline size + // memcpy(data_.as_chars(), data, size); + // data_.set_inline_size(size); + // + // // Set inlined size and inline data + // data_.set_inline_size(size); + // memcpy(data_.as_chars(), data, size); + // + // It's an error to read from the returned pointer without a preceding write + // if the current instance does not hold inline data, i.e.: is_tree() == true. + char* as_chars() { return as_chars_; } + + // Returns the tree value of this value. + // Requires the current instance to hold a tree value. + CordRep* as_tree() const { + assert(is_tree()); + return as_tree_.rep; + } + + // Initialize this instance to holding the tree value `rep`, + // initializing the cordz_info to null, i.e.: 'not profiled'. + void make_tree(CordRep* rep) { + as_tree_.rep = rep; + as_tree_.cordz_info = kNullCordzInfo; + } + + // Set the tree value of this instance to 'rep`. + // Requires the current instance to already hold a tree value. + // Does not affect the value of cordz_info. + void set_tree(CordRep* rep) { + assert(is_tree()); + as_tree_.rep = rep; + } + + // Returns the size of the inlined character data inside this instance. + // Requires the current instance to hold inline data. + size_t inline_size() const { + assert(!is_tree()); + return tag() >> 1; + } + + // Sets the size of the inlined character data inside this instance. + // Requires `size` to be <= kMaxInline. + // See the documentation on 'as_chars()' for more information and examples. + void set_inline_size(size_t size) { + ABSL_ASSERT(size <= kMaxInline); + tag() = static_cast<char>(size << 1); + } + + // Sets or unsets the 'is_profiled' state of this instance. + // Requires the current instance to hold a tree value. + void set_profiled(bool profiled) { + assert(is_tree()); + as_tree_.cordz_info = profiled ? kFakeCordzInfo : kNullCordzInfo; + } + + private: + // See cordz_info_t for forced alignment and size of `cordz_info` details. + struct AsTree { + explicit constexpr AsTree(absl::cord_internal::CordRep* tree) + : rep(tree), cordz_info(kNullCordzInfo) {} + absl::cord_internal::CordRep* rep; + alignas(sizeof(cordz_info_t)) cordz_info_t cordz_info; + }; + + char& tag() { return reinterpret_cast<char*>(this)[kMaxInline]; } + char tag() const { return reinterpret_cast<const char*>(this)[kMaxInline]; } + + // If the data has length <= kMaxInline, we store it in `as_chars_`, and + // store the size in the last char of `as_chars_` shifted left + 1. + // Else we store it in a tree and store a pointer to that tree in + // `as_tree_.rep` and store a tag in `tagged_size`. + union { + char as_chars_[kMaxInline + 1]; + AsTree as_tree_; + }; }; + static_assert(sizeof(InlineData) == kMaxInline + 1, ""); -static_assert(sizeof(AsTree) == sizeof(InlineData), ""); -static_assert(offsetof(AsTree, tagged_size) == kMaxInline, ""); inline CordRepConcat* CordRep::concat() { assert(tag == CONCAT); @@ -386,6 +510,16 @@ inline const CordRepFlat* CordRep::flat() const { return reinterpret_cast<const CordRepFlat*>(this); } +inline CordRepRing* CordRep::ring() { + assert(tag == RING); + return reinterpret_cast<CordRepRing*>(this); +} + +inline const CordRepRing* CordRep::ring() const { + assert(tag == RING); + return reinterpret_cast<const CordRepRing*>(this); +} + inline CordRep* CordRep::Ref(CordRep* rep) { assert(rep != nullptr); rep->refcount.Increment(); diff --git a/absl/strings/internal/cord_rep_flat.h b/absl/strings/internal/cord_rep_flat.h index 8c7d160e..5f7d55ce 100644 --- a/absl/strings/internal/cord_rep_flat.h +++ b/absl/strings/internal/cord_rep_flat.h @@ -104,7 +104,8 @@ struct CordRepFlat : public CordRep { // Flat CordReps are allocated and constructed with raw ::operator new and // placement new, and must be destructed and deallocated accordingly. static void Delete(CordRep*rep) { - assert(rep->tag >= FLAT); + assert(rep->tag >= FLAT && rep->tag <= MAX_FLAT_TAG); + #if defined(__cpp_sized_deallocation) size_t size = TagToAllocatedSize(rep->tag); rep->~CordRep(); @@ -115,6 +116,7 @@ struct CordRepFlat : public CordRep { #endif } + // Returns a pointer to the data inside this flat rep. char* Data() { return storage; } const char* Data() const { return storage; } diff --git a/absl/strings/internal/cord_rep_ring.cc b/absl/strings/internal/cord_rep_ring.cc new file mode 100644 index 00000000..358b0d92 --- /dev/null +++ b/absl/strings/internal/cord_rep_ring.cc @@ -0,0 +1,895 @@ +// Copyright 2020 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "absl/strings/internal/cord_rep_ring.h" + +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iostream> +#include <limits> +#include <memory> +#include <string> + +#include "absl/base/internal/raw_logging.h" +#include "absl/base/internal/throw_delegate.h" +#include "absl/base/macros.h" +#include "absl/container/inlined_vector.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_flat.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// See https://bugs.llvm.org/show_bug.cgi?id=48477 +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wshadow" +#pragma clang diagnostic ignored "-Wshadow-field" +#endif + +namespace { + +using index_type = CordRepRing::index_type; + +enum class Direction { kForward, kReversed }; + +inline bool IsFlatOrExternal(CordRep* rep) { + return rep->tag >= FLAT || rep->tag == EXTERNAL; +} + +// Verifies that n + extra <= kMaxCapacity: throws std::length_error otherwise. +inline void CheckCapacity(size_t n, size_t extra) { + if (ABSL_PREDICT_FALSE(extra > CordRepRing::kMaxCapacity - n)) { + base_internal::ThrowStdLengthError("Maximum capacity exceeded"); + } +} + +// Removes a reference from `rep` only. +// Asserts that the refcount after decrement is not zero. +inline bool UnrefNeverOne(CordRep* rep) { + bool result = rep->refcount.Decrement(); + assert(result); + return result; +} + +// Creates a flat from the provided string data, allocating up to `extra` +// capacity in the returned flat depending on kMaxFlatLength limitations. +// Requires `len` to be less or equal to `kMaxFlatLength` +CordRepFlat* CreateFlat(const char* s, size_t n, size_t extra = 0) { // NOLINT + assert(n <= kMaxFlatLength); + auto* rep = CordRepFlat::New(n + extra); + rep->length = n; + memcpy(rep->Data(), s, n); + return rep; +} + +// Unrefs the provided `substring`, and returns `substring->child` +// Adds or assumes a reference on `substring->child` +CordRep* ClipSubstring(CordRepSubstring* substring) { + CordRep* child = substring->child; + if (substring->refcount.IsOne()) { + delete substring; + } else { + CordRep::Ref(child); + if (ABSL_PREDICT_FALSE(!substring->refcount.Decrement())) { + UnrefNeverOne(child); + delete substring; + } + } + return child; +} + +// Unrefs the provided `concat`, and returns `{concat->left, concat->right}` +// Adds or assumes a reference on `concat->left` and `concat->right`. +std::pair<CordRep*, CordRep*> ClipConcat(CordRepConcat* concat) { + auto result = std::make_pair(concat->left, concat->right); + if (concat->refcount.IsOne()) { + delete concat; + } else { + CordRep::Ref(result.first); + CordRep::Ref(result.second); + if (ABSL_PREDICT_FALSE(!concat->refcount.Decrement())) { + UnrefNeverOne(result.first); + UnrefNeverOne(result.second); + delete concat; + } + } + return result; +} + +// Unrefs the entries in `[head, tail)`. +// Requires all entries to be a FLAT or EXTERNAL node. +void UnrefEntries(const CordRepRing* rep, index_type head, index_type tail) { + rep->ForEach(head, tail, [rep](index_type ix) { + CordRep* child = rep->entry_child(ix); + if (!child->refcount.Decrement()) { + if (child->tag >= FLAT) { + CordRepFlat::Delete(child->flat()); + } else { + CordRepExternal::Delete(child->external()); + } + } + }); +} + +template <typename F> +void Consume(Direction direction, CordRep* rep, F&& fn) { + size_t offset = 0; + size_t length = rep->length; + struct Entry { + CordRep* rep; + size_t offset; + size_t length; + }; + absl::InlinedVector<Entry, 40> stack; + + for (;;) { + if (rep->tag >= FLAT || rep->tag == EXTERNAL || rep->tag == RING) { + fn(rep, offset, length); + if (stack.empty()) return; + + rep = stack.back().rep; + offset = stack.back().offset; + length = stack.back().length; + stack.pop_back(); + } else if (rep->tag == SUBSTRING) { + offset += rep->substring()->start; + rep = ClipSubstring(rep->substring()); + } else if (rep->tag == CONCAT) { + auto res = ClipConcat(rep->concat()); + CordRep* left = res.first; + CordRep* right = res.second; + + if (left->length <= offset) { + // Don't need left node + offset -= left->length; + CordRep::Unref(left); + rep = right; + continue; + } + + size_t length_left = left->length - offset; + if (length_left >= length) { + // Don't need right node + CordRep::Unref(right); + rep = left; + continue; + } + + // Need both nodes + size_t length_right = length - length_left; + if (direction == Direction::kReversed) { + stack.push_back({left, offset, length_left}); + rep = right; + offset = 0; + length = length_right; + } else { + stack.push_back({right, 0, length_right}); + rep = left; + length = length_left; + } + } else { + assert("Valid tag" == nullptr); + return; + } + } +} + +template <typename F> +void Consume(CordRep* rep, F&& fn) { + return Consume(Direction::kForward, rep, std::forward<F>(fn)); +} + +template <typename F> +void RConsume(CordRep* rep, F&& fn) { + return Consume(Direction::kReversed, rep, std::forward<F>(fn)); +} + +} // namespace + +std::ostream& operator<<(std::ostream& s, const CordRepRing& rep) { + // Note: 'pos' values are defined as size_t (for overflow reasons), but that + // prints really awkward for small prepended values such as -5. ssize_t is not + // portable (POSIX), so we use ptrdiff_t instead to cast to signed values. + s << " CordRepRing(" << &rep << ", length = " << rep.length + << ", head = " << rep.head_ << ", tail = " << rep.tail_ + << ", cap = " << rep.capacity_ << ", rc = " << rep.refcount.Get() + << ", begin_pos_ = " << static_cast<ptrdiff_t>(rep.begin_pos_) << ") {\n"; + CordRepRing::index_type head = rep.head(); + do { + CordRep* child = rep.entry_child(head); + s << " entry[" << head << "] length = " << rep.entry_length(head) + << ", child " << child << ", clen = " << child->length + << ", tag = " << static_cast<int>(child->tag) + << ", rc = " << child->refcount.Get() + << ", offset = " << rep.entry_data_offset(head) + << ", end_pos = " << static_cast<ptrdiff_t>(rep.entry_end_pos(head)) + << "\n"; + head = rep.advance(head); + } while (head != rep.tail()); + return s << "}\n"; +} + +void CordRepRing::AddDataOffset(index_type index, size_t n) { + entry_data_offset()[index] += static_cast<offset_type>(n); +} + +void CordRepRing::SubLength(index_type index, size_t n) { + entry_end_pos()[index] -= n; +} + +class CordRepRing::Filler { + public: + Filler(CordRepRing* rep, index_type pos) : rep_(rep), head_(pos), pos_(pos) {} + + index_type head() const { return head_; } + index_type pos() const { return pos_; } + + void Add(CordRep* child, size_t offset, pos_type end_pos) { + rep_->entry_end_pos()[pos_] = end_pos; + rep_->entry_child()[pos_] = child; + rep_->entry_data_offset()[pos_] = static_cast<offset_type>(offset); + pos_ = rep_->advance(pos_); + } + + private: + CordRepRing* rep_; + index_type head_; + index_type pos_; +}; + +constexpr size_t CordRepRing::kMaxCapacity; // NOLINT: needed for c++11 + +bool CordRepRing::IsValid(std::ostream& output) const { + if (capacity_ == 0) { + output << "capacity == 0"; + return false; + } + + if (head_ >= capacity_ || tail_ >= capacity_) { + output << "head " << head_ << " and/or tail " << tail_ << "exceed capacity " + << capacity_; + return false; + } + + const index_type back = retreat(tail_); + size_t pos_length = Distance(begin_pos_, entry_end_pos(back)); + if (pos_length != length) { + output << "length " << length << " does not match positional length " + << pos_length << " from begin_pos " << begin_pos_ << " and entry[" + << back << "].end_pos " << entry_end_pos(back); + return false; + } + + index_type head = head_; + pos_type begin_pos = begin_pos_; + do { + pos_type end_pos = entry_end_pos(head); + size_t entry_length = Distance(begin_pos, end_pos); + if (entry_length == 0) { + output << "entry[" << head << "] has an invalid length " << entry_length + << " from begin_pos " << begin_pos << " and end_pos " << end_pos; + return false; + } + + CordRep* child = entry_child(head); + if (child == nullptr) { + output << "entry[" << head << "].child == nullptr"; + return false; + } + if (child->tag < FLAT && child->tag != EXTERNAL) { + output << "entry[" << head << "].child has an invalid tag " + << static_cast<int>(child->tag); + return false; + } + + size_t offset = entry_data_offset(head); + if (offset >= child->length || entry_length > child->length - offset) { + output << "entry[" << head << "] has offset " << offset + << " and entry length " << entry_length + << " which are outside of the childs length of " << child->length; + return false; + } + + begin_pos = end_pos; + head = advance(head); + } while (head != tail_); + + return true; +} + +#ifdef EXTRA_CORD_RING_VALIDATION +CordRepRing* CordRepRing::Validate(CordRepRing* rep, const char* file, + int line) { + if (!rep->IsValid(std::cerr)) { + std::cerr << "\nERROR: CordRepRing corrupted"; + if (line) std::cerr << " at line " << line; + if (file) std::cerr << " in file " << file; + std::cerr << "\nContent = " << *rep; + abort(); + } + return rep; +} +#endif // EXTRA_CORD_RING_VALIDATION + +CordRepRing* CordRepRing::New(size_t capacity, size_t extra) { + CheckCapacity(capacity, extra); + + size_t size = AllocSize(capacity += extra); + void* mem = ::operator new(size); + auto* rep = new (mem) CordRepRing(static_cast<index_type>(capacity)); + rep->tag = RING; + rep->capacity_ = static_cast<index_type>(capacity); + rep->begin_pos_ = 0; + return rep; +} + +void CordRepRing::SetCapacityForTesting(size_t capacity) { + // Adjust for the changed layout + assert(capacity <= capacity_); + assert(head() == 0 || head() < tail()); + memmove(Layout::Partial(capacity).Pointer<1>(data_) + head(), + Layout::Partial(capacity_).Pointer<1>(data_) + head(), + entries() * sizeof(Layout::ElementType<1>)); + memmove(Layout::Partial(capacity, capacity).Pointer<2>(data_) + head(), + Layout::Partial(capacity_, capacity_).Pointer<2>(data_) + head(), + entries() * sizeof(Layout::ElementType<2>)); + capacity_ = static_cast<index_type>(capacity); +} + +void CordRepRing::Delete(CordRepRing* rep) { + assert(rep != nullptr && rep->tag == RING); +#if defined(__cpp_sized_deallocation) + size_t size = AllocSize(rep->capacity_); + rep->~CordRepRing(); + ::operator delete(rep, size); +#else + rep->~CordRepRing(); + ::operator delete(rep); +#endif +} + +void CordRepRing::Destroy(CordRepRing* rep) { + UnrefEntries(rep, rep->head(), rep->tail()); + Delete(rep); +} + +template <bool ref> +void CordRepRing::Fill(const CordRepRing* src, index_type head, + index_type tail) { + this->length = src->length; + head_ = 0; + tail_ = advance(0, src->entries(head, tail)); + begin_pos_ = src->begin_pos_; + + // TODO(mvels): there may be opportunities here for large buffers. + auto* dst_pos = entry_end_pos(); + auto* dst_child = entry_child(); + auto* dst_offset = entry_data_offset(); + src->ForEach(head, tail, [&](index_type index) { + *dst_pos++ = src->entry_end_pos(index); + CordRep* child = src->entry_child(index); + *dst_child++ = ref ? CordRep::Ref(child) : child; + *dst_offset++ = src->entry_data_offset(index); + }); +} + +CordRepRing* CordRepRing::Copy(CordRepRing* rep, index_type head, + index_type tail, size_t extra) { + CordRepRing* newrep = CordRepRing::New(rep->entries(head, tail), extra); + newrep->Fill<true>(rep, head, tail); + CordRep::Unref(rep); + return newrep; +} + +CordRepRing* CordRepRing::Mutable(CordRepRing* rep, size_t extra) { + // Get current number of entries, and check for max capacity. + size_t entries = rep->entries(); + + size_t min_extra = (std::max)(extra, rep->capacity() * 2 - entries); + if (!rep->refcount.IsOne()) { + return Copy(rep, rep->head(), rep->tail(), min_extra); + } else if (entries + extra > rep->capacity()) { + CordRepRing* newrep = CordRepRing::New(entries, min_extra); + newrep->Fill<false>(rep, rep->head(), rep->tail()); + CordRepRing::Delete(rep); + return newrep; + } else { + return rep; + } +} + +Span<char> CordRepRing::GetAppendBuffer(size_t size) { + assert(refcount.IsOne()); + index_type back = retreat(tail_); + CordRep* child = entry_child(back); + if (child->tag >= FLAT && child->refcount.IsOne()) { + size_t capacity = child->flat()->Capacity(); + pos_type end_pos = entry_end_pos(back); + size_t data_offset = entry_data_offset(back); + size_t entry_length = Distance(entry_begin_pos(back), end_pos); + size_t used = data_offset + entry_length; + if (size_t n = (std::min)(capacity - used, size)) { + child->length = data_offset + entry_length + n; + entry_end_pos()[back] = end_pos + n; + this->length += n; + return {child->flat()->Data() + used, n}; + } + } + return {nullptr, 0}; +} + +Span<char> CordRepRing::GetPrependBuffer(size_t size) { + assert(refcount.IsOne()); + CordRep* child = entry_child(head_); + size_t data_offset = entry_data_offset(head_); + if (data_offset && child->refcount.IsOne() && child->tag >= FLAT) { + size_t n = (std::min)(data_offset, size); + this->length += n; + begin_pos_ -= n; + data_offset -= n; + entry_data_offset()[head_] = static_cast<offset_type>(data_offset); + return {child->flat()->Data() + data_offset, n}; + } + return {nullptr, 0}; +} + +CordRepRing* CordRepRing::CreateFromLeaf(CordRep* child, size_t offset, + size_t length, size_t extra) { + CordRepRing* rep = CordRepRing::New(1, extra); + rep->head_ = 0; + rep->tail_ = rep->advance(0); + rep->length = length; + rep->entry_end_pos()[0] = length; + rep->entry_child()[0] = child; + rep->entry_data_offset()[0] = static_cast<offset_type>(offset); + return Validate(rep); +} + +CordRepRing* CordRepRing::CreateSlow(CordRep* child, size_t extra) { + CordRepRing* rep = nullptr; + Consume(child, [&](CordRep* child, size_t offset, size_t length) { + if (IsFlatOrExternal(child)) { + rep = rep ? AppendLeaf(rep, child, offset, length) + : CreateFromLeaf(child, offset, length, extra); + } else if (rep) { + rep = AddRing<AddMode::kAppend>(rep, child->ring(), offset, length); + } else if (offset == 0 && child->length == length) { + rep = Mutable(child->ring(), extra); + } else { + rep = SubRing(child->ring(), offset, length, extra); + } + }); + return Validate(rep, nullptr, __LINE__); +} + +CordRepRing* CordRepRing::Create(CordRep* child, size_t extra) { + size_t length = child->length; + if (IsFlatOrExternal(child)) { + return CreateFromLeaf(child, 0, length, extra); + } + if (child->tag == RING) { + return Mutable(child->ring(), extra); + } + return CreateSlow(child, extra); +} + +template <CordRepRing::AddMode mode> +CordRepRing* CordRepRing::AddRing(CordRepRing* rep, CordRepRing* ring, + size_t offset, size_t length) { + assert(offset < ring->length); + constexpr bool append = mode == AddMode::kAppend; + Position head = ring->Find(offset); + Position tail = ring->FindTail(head.index, offset + length); + const index_type entries = ring->entries(head.index, tail.index); + + rep = Mutable(rep, entries); + + // The delta for making ring[head].end_pos into 'len - offset' + const pos_type delta_length = + (append ? rep->begin_pos_ + rep->length : rep->begin_pos_ - length) - + ring->entry_begin_pos(head.index) - head.offset; + + // Start filling at `tail`, or `entries` before `head` + Filler filler(rep, append ? rep->tail_ : rep->retreat(rep->head_, entries)); + + if (ring->refcount.IsOne()) { + // Copy entries from source stealing the ref and adjusting the end position. + // Commit the filler as this is no-op. + ring->ForEach(head.index, tail.index, [&](index_type ix) { + filler.Add(ring->entry_child(ix), ring->entry_data_offset(ix), + ring->entry_end_pos(ix) + delta_length); + }); + + // Unref entries we did not copy over, and delete source. + if (head.index != ring->head_) UnrefEntries(ring, ring->head_, head.index); + if (tail.index != ring->tail_) UnrefEntries(ring, tail.index, ring->tail_); + CordRepRing::Delete(ring); + } else { + ring->ForEach(head.index, tail.index, [&](index_type ix) { + CordRep* child = ring->entry_child(ix); + filler.Add(child, ring->entry_data_offset(ix), + ring->entry_end_pos(ix) + delta_length); + CordRep::Ref(child); + }); + CordRepRing::Unref(ring); + } + + if (head.offset) { + // Increase offset of first 'source' entry appended or prepended. + // This is always the entry in `filler.head()` + rep->AddDataOffset(filler.head(), head.offset); + } + + if (tail.offset) { + // Reduce length of last 'source' entry appended or prepended. + // This is always the entry tailed by `filler.pos()` + rep->SubLength(rep->retreat(filler.pos()), tail.offset); + } + + // Commit changes + rep->length += length; + if (append) { + rep->tail_ = filler.pos(); + } else { + rep->head_ = filler.head(); + rep->begin_pos_ -= length; + } + + return Validate(rep); +} + +CordRepRing* CordRepRing::AppendSlow(CordRepRing* rep, CordRep* child) { + Consume(child, [&rep](CordRep* child, size_t offset, size_t length) { + if (child->tag == RING) { + rep = AddRing<AddMode::kAppend>(rep, child->ring(), offset, length); + } else { + rep = AppendLeaf(rep, child, offset, length); + } + }); + return rep; +} + +CordRepRing* CordRepRing::AppendLeaf(CordRepRing* rep, CordRep* child, + size_t offset, size_t length) { + rep = Mutable(rep, 1); + index_type back = rep->tail_; + const pos_type begin_pos = rep->begin_pos_ + rep->length; + rep->tail_ = rep->advance(rep->tail_); + rep->length += length; + rep->entry_end_pos()[back] = begin_pos + length; + rep->entry_child()[back] = child; + rep->entry_data_offset()[back] = static_cast<offset_type>(offset); + return Validate(rep, nullptr, __LINE__); +} + +CordRepRing* CordRepRing::Append(CordRepRing* rep, CordRep* child) { + size_t length = child->length; + if (IsFlatOrExternal(child)) { + return AppendLeaf(rep, child, 0, length); + } + if (child->tag == RING) { + return AddRing<AddMode::kAppend>(rep, child->ring(), 0, length); + } + return AppendSlow(rep, child); +} + +CordRepRing* CordRepRing::PrependSlow(CordRepRing* rep, CordRep* child) { + RConsume(child, [&](CordRep* child, size_t offset, size_t length) { + if (IsFlatOrExternal(child)) { + rep = PrependLeaf(rep, child, offset, length); + } else { + rep = AddRing<AddMode::kPrepend>(rep, child->ring(), offset, length); + } + }); + return Validate(rep); +} + +CordRepRing* CordRepRing::PrependLeaf(CordRepRing* rep, CordRep* child, + size_t offset, size_t length) { + rep = Mutable(rep, 1); + index_type head = rep->retreat(rep->head_); + pos_type end_pos = rep->begin_pos_; + rep->head_ = head; + rep->length += length; + rep->begin_pos_ -= length; + rep->entry_end_pos()[head] = end_pos; + rep->entry_child()[head] = child; + rep->entry_data_offset()[head] = static_cast<offset_type>(offset); + return Validate(rep); +} + +CordRepRing* CordRepRing::Prepend(CordRepRing* rep, CordRep* child) { + size_t length = child->length; + if (IsFlatOrExternal(child)) { + return PrependLeaf(rep, child, 0, length); + } + if (child->tag == RING) { + return AddRing<AddMode::kPrepend>(rep, child->ring(), 0, length); + } + return PrependSlow(rep, child); +} + +CordRepRing* CordRepRing::Append(CordRepRing* rep, absl::string_view data, + size_t extra) { + if (rep->refcount.IsOne()) { + Span<char> avail = rep->GetAppendBuffer(data.length()); + if (!avail.empty()) { + memcpy(avail.data(), data.data(), avail.length()); + data.remove_prefix(avail.length()); + } + } + if (data.empty()) return Validate(rep); + + const size_t flats = (data.length() - 1) / kMaxFlatLength + 1; + rep = Mutable(rep, flats); + + Filler filler(rep, rep->tail_); + pos_type pos = rep->begin_pos_ + rep->length; + + while (data.length() >= kMaxFlatLength) { + auto* flat = CreateFlat(data.data(), kMaxFlatLength); + filler.Add(flat, 0, pos += kMaxFlatLength); + data.remove_prefix(kMaxFlatLength); + } + + if (data.length()) { + auto* flat = CreateFlat(data.data(), data.length(), extra); + filler.Add(flat, 0, pos += data.length()); + } + + rep->length = pos - rep->begin_pos_; + rep->tail_ = filler.pos(); + + return Validate(rep); +} + +CordRepRing* CordRepRing::Prepend(CordRepRing* rep, absl::string_view data, + size_t extra) { + if (rep->refcount.IsOne()) { + Span<char> avail = rep->GetPrependBuffer(data.length()); + if (!avail.empty()) { + const char* tail = data.data() + data.length() - avail.length(); + memcpy(avail.data(), tail, avail.length()); + data.remove_suffix(avail.length()); + } + } + if (data.empty()) return rep; + + const size_t flats = (data.length() - 1) / kMaxFlatLength + 1; + rep = Mutable(rep, flats); + pos_type pos = rep->begin_pos_; + Filler filler(rep, rep->retreat(rep->head_, static_cast<index_type>(flats))); + + size_t first_size = data.size() - (flats - 1) * kMaxFlatLength; + CordRepFlat* flat = CordRepFlat::New(first_size + extra); + flat->length = first_size + extra; + memcpy(flat->Data() + extra, data.data(), first_size); + data.remove_prefix(first_size); + filler.Add(flat, extra, pos); + pos -= first_size; + + while (!data.empty()) { + assert(data.size() >= kMaxFlatLength); + flat = CreateFlat(data.data(), kMaxFlatLength); + filler.Add(flat, 0, pos); + pos -= kMaxFlatLength; + data.remove_prefix(kMaxFlatLength); + } + + rep->head_ = filler.head(); + rep->length += rep->begin_pos_ - pos; + rep->begin_pos_ = pos; + + return Validate(rep); +} + +// 32 entries is 32 * sizeof(pos_type) = 4 cache lines on x86 +static constexpr index_type kBinarySearchThreshold = 32; +static constexpr index_type kBinarySearchEndCount = 8; + +template <bool wrap> +CordRepRing::index_type CordRepRing::FindBinary(index_type head, + index_type tail, + size_t offset) const { + index_type count = tail + (wrap ? capacity_ : 0) - head; + do { + count = (count - 1) / 2; + assert(count < entries(head, tail_)); + index_type mid = wrap ? advance(head, count) : head + count; + index_type after_mid = wrap ? advance(mid) : mid + 1; + bool larger = (offset >= entry_end_offset(mid)); + head = larger ? after_mid : head; + tail = larger ? tail : mid; + assert(head != tail); + } while (ABSL_PREDICT_TRUE(count > kBinarySearchEndCount)); + return head; +} + +CordRepRing::Position CordRepRing::FindSlow(index_type head, + size_t offset) const { + index_type tail = tail_; + + // Binary search until we are good for linear search + // Optimize for branchless / non wrapping ops + if (tail > head) { + index_type count = tail - head; + if (count > kBinarySearchThreshold) { + head = FindBinary<false>(head, tail, offset); + } + } else { + index_type count = capacity_ + tail - head; + if (count > kBinarySearchThreshold) { + head = FindBinary<true>(head, tail, offset); + } + } + + pos_type pos = entry_begin_pos(head); + pos_type end_pos = entry_end_pos(head); + while (offset >= Distance(begin_pos_, end_pos)) { + head = advance(head); + pos = end_pos; + end_pos = entry_end_pos(head); + } + + return {head, offset - Distance(begin_pos_, pos)}; +} + +CordRepRing::Position CordRepRing::FindTailSlow(index_type head, + size_t offset) const { + index_type tail = tail_; + const size_t tail_offset = offset - 1; + + // Binary search until we are good for linear search + // Optimize for branchless / non wrapping ops + if (tail > head) { + index_type count = tail - head; + if (count > kBinarySearchThreshold) { + head = FindBinary<false>(head, tail, tail_offset); + } + } else { + index_type count = capacity_ + tail - head; + if (count > kBinarySearchThreshold) { + head = FindBinary<true>(head, tail, tail_offset); + } + } + + size_t end_offset = entry_end_offset(head); + while (tail_offset >= end_offset) { + head = advance(head); + end_offset = entry_end_offset(head); + } + + return {advance(head), end_offset - offset}; +} + +char CordRepRing::GetCharacter(size_t offset) const { + assert(offset < length); + + Position pos = Find(offset); + size_t data_offset = entry_data_offset(pos.index) + pos.offset; + return GetRepData(entry_child(pos.index))[data_offset]; +} + +CordRepRing* CordRepRing::SubRing(CordRepRing* rep, size_t offset, + size_t length, size_t extra) { + assert(offset <= rep->length); + assert(offset <= rep->length - length); + + if (length == 0) { + CordRep::Unref(rep); + return nullptr; + } + + // Find position of first byte + Position head = rep->Find(offset); + Position tail = rep->FindTail(head.index, offset + length); + const size_t new_entries = rep->entries(head.index, tail.index); + + if (rep->refcount.IsOne() && extra <= (rep->capacity() - new_entries)) { + // We adopt a privately owned rep and no extra entries needed. + if (head.index != rep->head_) UnrefEntries(rep, rep->head_, head.index); + if (tail.index != rep->tail_) UnrefEntries(rep, tail.index, rep->tail_); + rep->head_ = head.index; + rep->tail_ = tail.index; + } else { + // Copy subset to new rep + rep = Copy(rep, head.index, tail.index, extra); + head.index = rep->head_; + tail.index = rep->tail_; + } + + // Adjust begin_pos and length + rep->length = length; + rep->begin_pos_ += offset; + + // Adjust head and tail blocks + if (head.offset) { + rep->AddDataOffset(head.index, head.offset); + } + if (tail.offset) { + rep->SubLength(rep->retreat(tail.index), tail.offset); + } + + return Validate(rep); +} + +CordRepRing* CordRepRing::RemovePrefix(CordRepRing* rep, size_t len, + size_t extra) { + assert(len <= rep->length); + if (len == rep->length) { + CordRep::Unref(rep); + return nullptr; + } + + Position head = rep->Find(len); + if (rep->refcount.IsOne()) { + if (head.index != rep->head_) UnrefEntries(rep, rep->head_, head.index); + rep->head_ = head.index; + } else { + rep = Copy(rep, head.index, rep->tail_, extra); + head.index = rep->head_; + } + + // Adjust begin_pos and length + rep->length -= len; + rep->begin_pos_ += len; + + // Adjust head block + if (head.offset) { + rep->AddDataOffset(head.index, head.offset); + } + + return Validate(rep); +} + +CordRepRing* CordRepRing::RemoveSuffix(CordRepRing* rep, size_t len, + size_t extra) { + assert(len <= rep->length); + + if (len == rep->length) { + CordRep::Unref(rep); + return nullptr; + } + + Position tail = rep->FindTail(rep->length - len); + if (rep->refcount.IsOne()) { + // We adopt a privately owned rep, scrub. + if (tail.index != rep->tail_) UnrefEntries(rep, tail.index, rep->tail_); + rep->tail_ = tail.index; + } else { + // Copy subset to new rep + rep = Copy(rep, rep->head_, tail.index, extra); + tail.index = rep->tail_; + } + + // Adjust length + rep->length -= len; + + // Adjust tail block + if (tail.offset) { + rep->SubLength(rep->retreat(tail.index), tail.offset); + } + + return Validate(rep); +} + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cord_rep_ring.h b/absl/strings/internal/cord_rep_ring.h new file mode 100644 index 00000000..e6f6b59c --- /dev/null +++ b/absl/strings/internal/cord_rep_ring.h @@ -0,0 +1,576 @@ +// Copyright 2020 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_RING_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_RING_H_ + +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iosfwd> +#include <limits> +#include <memory> + +#include "absl/container/internal/layout.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_flat.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// See https://bugs.llvm.org/show_bug.cgi?id=48477 +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wshadow" +#pragma clang diagnostic ignored "-Wshadow-field" +#endif + +// All operations modifying a ring buffer are implemented as static methods +// requiring a CordRepRing instance with a reference adopted by the method. +// +// The methods return the modified ring buffer, which may be equal to the input +// if the input was not shared, and having large enough capacity to accommodate +// any newly added node(s). Otherwise, a copy of the input rep with the new +// node(s) added is returned. +// +// Any modification on non shared ring buffers with enough capacity will then +// require minimum atomic operations. Caller should where possible provide +// reasonable `extra` hints for both anticipated extra `flat` byte space, as +// well as anticipated extra nodes required for complex operations. +// +// Example of code creating a ring buffer, adding some data to it, +// and discarding the buffer when done: +// +// void FunWithRings() { +// // Create ring with 3 flats +// CordRep* flat = CreateFlat("Hello"); +// CordRepRing* ring = CordRepRing::Create(flat, 2); +// ring = CordRepRing::Append(ring, CreateFlat(" ")); +// ring = CordRepRing::Append(ring, CreateFlat("world")); +// DoSomethingWithRing(ring); +// CordRep::Unref(ring); +// } +// +// Example of code Copying an existing ring buffer and modifying it: +// +// void MoreFunWithRings(CordRepRing* src) { +// CordRepRing* ring = CordRep::Ref(src)->ring(); +// ring = CordRepRing::Append(ring, CreateFlat("Hello")); +// ring = CordRepRing::Append(ring, CreateFlat(" ")); +// ring = CordRepRing::Append(ring, CreateFlat("world")); +// DoSomethingWithRing(ring); +// CordRep::Unref(ring); +// } +// +class CordRepRing : public CordRep { + public: + // `pos_type` represents a 'logical position'. A CordRepRing instance has a + // `begin_pos` (default 0), and each node inside the buffer will have an + // `end_pos` which is the `end_pos` of the previous node (or `begin_pos`) plus + // this node's length. The purpose is to allow for a binary search on this + // position, while allowing O(1) prepend and append operations. + using pos_type = uint64_t; + + // `index_type` is the type for the `head`, `tail` and `capacity` indexes. + // Ring buffers are limited to having no more than four billion entries. + using index_type = uint32_t; + + // `offset_type` is the type for the data offset inside a child rep's data. + using offset_type = uint32_t; + + // Position holds the node index and relative offset into the node for + // some physical offset in the contained data as returned by the Find() + // and FindTail() methods. + struct Position { + index_type index; + size_t offset; + }; + + // The maximum # of child nodes that can be hosted inside a CordRepRing. + static constexpr size_t kMaxCapacity = (std::numeric_limits<uint32_t>::max)(); + + // CordRepring can not be default constructed, moved, copied or assigned. + CordRepRing() = delete; + CordRepRing(const CordRepRing&) = delete; + CordRepRing& operator=(const CordRepRing&) = delete; + + // Returns true if this instance is valid, false if some or all of the + // invariants are broken. Intended for debug purposes only. + // `output` receives an explanation of the broken invariants. + bool IsValid(std::ostream& output) const; + + // Returns the size in bytes for a CordRepRing with `capacity' entries. + static constexpr size_t AllocSize(size_t capacity); + + // Returns the distance in bytes from `pos` to `end_pos`. + static constexpr size_t Distance(pos_type pos, pos_type end_pos); + + // Creates a new ring buffer from the provided `rep`. Adopts a reference + // on `rep`. The returned ring buffer has a capacity of at least `extra + 1` + static CordRepRing* Create(CordRep* child, size_t extra = 0); + + // `head`, `tail` and `capacity` indexes defining the ring buffer boundaries. + index_type head() const { return head_; } + index_type tail() const { return tail_; } + index_type capacity() const { return capacity_; } + + // Returns the number of entries in this instance. + index_type entries() const { return entries(head_, tail_); } + + // Returns the logical begin position of this instance. + pos_type begin_pos() const { return begin_pos_; } + + // Returns the number of entries for a given head-tail range. + // Requires `head` and `tail` values to be less than `capacity()`. + index_type entries(index_type head, index_type tail) const { + assert(head < capacity_ && tail < capacity_); + return tail - head + ((tail > head) ? 0 : capacity_); + } + + // Returns the logical end position of entry `index`. + pos_type const& entry_end_pos(index_type index) const { + assert(IsValidIndex(index)); + return Layout::Partial().Pointer<0>(data_)[index]; + } + + // Returns the child pointer of entry `index`. + CordRep* const& entry_child(index_type index) const { + assert(IsValidIndex(index)); + return Layout::Partial(capacity()).Pointer<1>(data_)[index]; + } + + // Returns the data offset of entry `index` + offset_type const& entry_data_offset(index_type index) const { + assert(IsValidIndex(index)); + return Layout::Partial(capacity(), capacity()).Pointer<2>(data_)[index]; + } + + // Appends the provided child node to the `rep` instance. + // Adopts a reference from `rep` and `child` which may not be null. + // If the provided child is a FLAT or EXTERNAL node, or a SUBSTRING node + // containing a FLAT or EXTERNAL node, then flat or external the node is added + // 'as is', with an offset added for the SUBSTRING case. + // If the provided child is a RING or CONCAT tree, or a SUBSTRING of a RING or + // CONCAT tree, then all child nodes not excluded by any start offset or + // length values are added recursively. + static CordRepRing* Append(CordRepRing* rep, CordRep* child); + + // Appends the provided string data to the `rep` instance. + // This function will attempt to utilize any remaining capacity in the last + // node of the input if that node is not shared (directly or indirectly), and + // of type FLAT. Remaining data will be added as one or more FLAT nodes. + // Any last node added to the ring buffer will be allocated with up to + // `extra` bytes of capacity for (anticipated) subsequent append actions. + static CordRepRing* Append(CordRepRing* rep, string_view data, + size_t extra = 0); + + // Prepends the provided child node to the `rep` instance. + // Adopts a reference from `rep` and `child` which may not be null. + // If the provided child is a FLAT or EXTERNAL node, or a SUBSTRING node + // containing a FLAT or EXTERNAL node, then flat or external the node is + // prepended 'as is', with an optional offset added for the SUBSTRING case. + // If the provided child is a RING or CONCAT tree, or a SUBSTRING of a RING + // or CONCAT tree, then all child nodes not excluded by any start offset or + // length values are added recursively. + static CordRepRing* Prepend(CordRepRing* rep, CordRep* child); + + // Prepends the provided string data to the `rep` instance. + // This function will attempt to utilize any remaining capacity in the first + // node of the input if that node is not shared (directly or indirectly), and + // of type FLAT. Remaining data will be added as one or more FLAT nodes. + // Any first node prepnded to the ring buffer will be allocated with up to + // `extra` bytes of capacity for (anticipated) subsequent prepend actions. + static CordRepRing* Prepend(CordRepRing* rep, string_view data, + size_t extra = 0); + + // Returns a span referencing potentially unused capacity in the last node. + // The returned span may be empty if no such capacity is available, or if the + // current instance is shared. Else, a span of size `n <= size` is returned. + // If non empty, the ring buffer is adjusted to the new length, with the newly + // added capacity left uninitialized. Callers should assign a value to the + // entire span before any other operations on this instance. + Span<char> GetAppendBuffer(size_t size); + + // Returns a span referencing potentially unused capacity in the first node. + // This function is identical to GetAppendBuffer except that it returns a span + // referencing up to `size` capacity directly before the existing data. + Span<char> GetPrependBuffer(size_t size); + + // Returns a cord ring buffer containing `length` bytes of data starting at + // `offset`. If the input is not shared, this function will remove all head + // and tail child nodes outside of the requested range, and adjust the new + // head and tail nodes as required. If the input is shared, this function + // returns a new instance sharing some or all of the nodes from the input. + static CordRepRing* SubRing(CordRepRing* r, size_t offset, size_t length, + size_t extra = 0); + + // Returns a cord ring buffer with the first `length` bytes removed. + // If the input is not shared, this function will remove all head child nodes + // fully inside the first `length` bytes, and adjust the new head as required. + // If the input is shared, this function returns a new instance sharing some + // or all of the nodes from the input. + static CordRepRing* RemoveSuffix(CordRepRing* r, size_t length, + size_t extra = 0); + + // Returns a cord ring buffer with the last `length` bytes removed. + // If the input is not shared, this function will remove all head child nodes + // fully inside the first `length` bytes, and adjust the new head as required. + // If the input is shared, this function returns a new instance sharing some + // or all of the nodes from the input. + static CordRepRing* RemovePrefix(CordRepRing* r, size_t len, + size_t extra = 0); + + // Returns the character at `offset`. Requires that `offset < length`. + char GetCharacter(size_t offset) const; + + // Testing only: set capacity to requested capacity. + void SetCapacityForTesting(size_t capacity); + + // Returns the CordRep data pointer for the provided CordRep. + // Requires that the provided `rep` is either a FLAT or EXTERNAL CordRep. + static const char* GetLeafData(const CordRep* rep); + + // Returns the CordRep data pointer for the provided CordRep. + // Requires that `rep` is either a FLAT, EXTERNAL, or SUBSTRING CordRep. + static const char* GetRepData(const CordRep* rep); + + // Advances the provided position, wrapping around capacity as needed. + // Requires `index` < capacity() + inline index_type advance(index_type index) const; + + // Advances the provided position by 'n`, wrapping around capacity as needed. + // Requires `index` < capacity() and `n` <= capacity. + inline index_type advance(index_type index, index_type n) const; + + // Retreats the provided position, wrapping around 0 as needed. + // Requires `index` < capacity() + inline index_type retreat(index_type index) const; + + // Retreats the provided position by 'n', wrapping around 0 as needed. + // Requires `index` < capacity() + inline index_type retreat(index_type index, index_type n) const; + + // Returns the logical begin position of entry `index` + pos_type const& entry_begin_pos(index_type index) const { + return (index == head_) ? begin_pos_ : entry_end_pos(retreat(index)); + } + + // Returns the physical start offset of entry `index` + size_t entry_start_offset(index_type index) const { + return Distance(begin_pos_, entry_begin_pos(index)); + } + + // Returns the physical end offset of entry `index` + size_t entry_end_offset(index_type index) const { + return Distance(begin_pos_, entry_end_pos(index)); + } + + // Returns the data length for entry `index` + size_t entry_length(index_type index) const { + return Distance(entry_begin_pos(index), entry_end_pos(index)); + } + + // Returns the data for entry `index` + absl::string_view entry_data(index_type index) const; + + // Returns the position for `offset` as {index, prefix}. `index` holds the + // index of the entry at the specified offset and `prefix` holds the relative + // offset inside that entry. + // Requires `offset` < length. + // + // For example we can implement GetCharacter(offset) as: + // char GetCharacter(size_t offset) { + // Position pos = this->Find(offset); + // return this->entry_data(pos.pos)[pos.offset]; + // } + inline Position Find(size_t offset) const; + + // Find starting at `head` + inline Position Find(index_type head, size_t offset) const; + + // Returns the tail position for `offset` as {tail index, suffix}. + // `tail index` holds holds the index of the entry holding the offset directly + // before 'offset` advanced by one. 'suffix` holds the relative offset from + // that relative offset in the entry to the end of the entry. + // For example, FindTail(length) will return {tail(), 0}, FindTail(length - 5) + // will return {retreat(tail), 5)} provided the preceding entry contains at + // least 5 bytes of data. + // Requires offset >= 1 && offset <= length. + // + // This function is very useful in functions that need to clip the end of some + // ring buffer such as 'RemovePrefix'. + // For example, we could implement RemovePrefix for non shared instances as: + // void RemoveSuffix(size_t n) { + // Position pos = FindTail(length - n); + // UnrefEntries(pos.pos, this->tail_); + // this->tail_ = pos.pos; + // entry(retreat(pos.pos)).end_pos -= pos.offset; + // } + inline Position FindTail(size_t offset) const; + + // Find tail starting at `head` + inline Position FindTail(index_type head, size_t offset) const; + + // Invokes f(index_type index) for each entry inside the range [head, tail> + template <typename F> + void ForEach(index_type head, index_type tail, F&& f) const { + index_type n1 = (tail > head) ? tail : capacity_; + for (index_type i = head; i < n1; ++i) f(i); + if (tail <= head) { + for (index_type i = 0; i < tail; ++i) f(i); + } + } + + // Invokes f(index_type index) for each entry inside this instance. + template <typename F> + void ForEach(F&& f) const { + ForEach(head_, tail_, std::forward<F>(f)); + } + + // Dump this instance's data tp stream `s` in human readable format, excluding + // the actual data content itself. Intended for debug purposes only. + friend std::ostream& operator<<(std::ostream& s, const CordRepRing& rep); + + private: + enum class AddMode { kAppend, kPrepend }; + + using Layout = container_internal::Layout<pos_type, CordRep*, offset_type>; + + class Filler; + class Transaction; + class CreateTransaction; + + static constexpr size_t kLayoutAlignment = Layout::Partial().Alignment(); + + // Creates a new CordRepRing. + explicit CordRepRing(index_type capacity) : capacity_(capacity) {} + + // Returns true if `index` is a valid index into this instance. + bool IsValidIndex(index_type index) const; + + // Debug use only: validates the provided CordRepRing invariants. + // Verification of all CordRepRing methods can be enabled by defining + // EXTRA_CORD_RING_VALIDATION, i.e.: `--copts=-DEXTRA_CORD_RING_VALIDATION` + // Verification is VERY expensive, so only do it for debugging purposes. + static CordRepRing* Validate(CordRepRing* rep, const char* file = nullptr, + int line = 0); + + // Allocates a CordRepRing large enough to hold `capacity + extra' entries. + // The returned capacity may be larger if the allocated memory allows for it. + // The maximum capacity of a CordRepRing is capped at kMaxCapacity. + // Throws `std::length_error` if `capacity + extra' exceeds kMaxCapacity. + static CordRepRing* New(size_t capacity, size_t extra); + + // Deallocates (but does not destroy) the provided ring buffer. + static void Delete(CordRepRing* rep); + + // Destroys the provided ring buffer, decrementing the reference count of all + // contained child CordReps. The provided 1\`rep` should have a ref count of + // one (pre decrement destroy call observing `refcount.IsOne()`) or zero (post + // decrement destroy call observing `!refcount.Decrement()`). + static void Destroy(CordRepRing* rep); + + // Returns a mutable reference to the logical end position array. + pos_type* entry_end_pos() { + return Layout::Partial().Pointer<0>(data_); + } + + // Returns a mutable reference to the child pointer array. + CordRep** entry_child() { + return Layout::Partial(capacity()).Pointer<1>(data_); + } + + // Returns a mutable reference to the data offset array. + offset_type* entry_data_offset() { + return Layout::Partial(capacity(), capacity()).Pointer<2>(data_); + } + + // Find implementations for the non fast path 0 / length cases. + Position FindSlow(index_type head, size_t offset) const; + Position FindTailSlow(index_type head, size_t offset) const; + + // Finds the index of the first node that is inside a reasonable distance + // of the node at `offset` from which we can continue with a linear search. + template <bool wrap> + index_type FindBinary(index_type head, index_type tail, size_t offset) const; + + // Fills the current (initialized) instance from the provided source, copying + // entries [head, tail). Adds a reference to copied entries if `ref` is true. + template <bool ref> + void Fill(const CordRepRing* src, index_type head, index_type tail); + + // Create a copy of 'rep', copying all entries [head, tail), allocating room + // for `extra` entries. Adds a reference on all copied entries. + static CordRepRing* Copy(CordRepRing* rep, index_type head, index_type tail, + size_t extra = 0); + + // Returns a Mutable CordRepRing reference from `rep` with room for at least + // `extra` additional nodes. Adopts a reference count from `rep`. + // This function will return `rep` if, and only if: + // - rep.entries + extra <= rep.capacity + // - rep.refcount == 1 + // Otherwise, this function will create a new copy of `rep` with additional + // capacity to satisfy `extra` extra nodes, and unref the old `rep` instance. + // + // If a new CordRepRing can not be allocated, or the new capacity would exceed + // the maxmimum capacity, then the input is consumed only, and an exception is + // thrown. + static CordRepRing* Mutable(CordRepRing* rep, size_t extra); + + // Slow path for Append(CordRepRing* rep, CordRep* child). This function is + // exercised if the provided `child` in Append() is not a leaf node, i.e., a + // ring buffer or old (concat) cord tree. + static CordRepRing* AppendSlow(CordRepRing* rep, CordRep* child); + + // Appends the provided leaf node. Requires `child` to be FLAT or EXTERNAL. + static CordRepRing* AppendLeaf(CordRepRing* rep, CordRep* child, + size_t offset, size_t length); + + // Prepends the provided leaf node. Requires `child` to be FLAT or EXTERNAL. + static CordRepRing* PrependLeaf(CordRepRing* rep, CordRep* child, + size_t offset, size_t length); + + // Slow path for Prepend(CordRepRing* rep, CordRep* child). This function is + // exercised if the provided `child` in Prepend() is not a leaf node, i.e., a + // ring buffer or old (concat) cord tree. + static CordRepRing* PrependSlow(CordRepRing* rep, CordRep* child); + + // Slow path for Create(CordRep* child, size_t extra). This function is + // exercised if the provided `child` in Prepend() is not a leaf node, i.e., a + // ring buffer or old (concat) cord tree. + static CordRepRing* CreateSlow(CordRep* child, size_t extra); + + // Creates a new ring buffer from the provided `child` leaf node. Requires + // `child` to be FLAT or EXTERNAL. on `rep`. + // The returned ring buffer has a capacity of at least `1 + extra` + static CordRepRing* CreateFromLeaf(CordRep* child, size_t offset, + size_t length, size_t extra); + + // Appends or prepends (depending on AddMode) the ring buffer in `ring' to + // `rep` starting at `offset` with length `length`. + template <AddMode mode> + static CordRepRing* AddRing(CordRepRing* rep, CordRepRing* ring, + size_t offset, size_t length); + + // Increases the data offset for entry `index` by `n`. + void AddDataOffset(index_type index, size_t n); + + // Descreases the length for entry `index` by `n`. + void SubLength(index_type index, size_t n); + + index_type head_; + index_type tail_; + index_type capacity_; + pos_type begin_pos_; + + alignas(kLayoutAlignment) char data_[kLayoutAlignment]; + + friend struct CordRep; +}; + +constexpr size_t CordRepRing::AllocSize(size_t capacity) { + return sizeof(CordRepRing) - sizeof(data_) + + Layout(capacity, capacity, capacity).AllocSize(); +} + +inline constexpr size_t CordRepRing::Distance(pos_type pos, pos_type end_pos) { + return (end_pos - pos); +} + +inline const char* CordRepRing::GetLeafData(const CordRep* rep) { + return rep->tag != EXTERNAL ? rep->flat()->Data() : rep->external()->base; +} + +inline const char* CordRepRing::GetRepData(const CordRep* rep) { + if (rep->tag >= FLAT) return rep->flat()->Data(); + if (rep->tag == EXTERNAL) return rep->external()->base; + return GetLeafData(rep->substring()->child) + rep->substring()->start; +} + +inline CordRepRing::index_type CordRepRing::advance(index_type index) const { + assert(index < capacity_); + return ++index == capacity_ ? 0 : index; +} + +inline CordRepRing::index_type CordRepRing::advance(index_type index, + index_type n) const { + assert(index < capacity_ && n <= capacity_); + return (index += n) >= capacity_ ? index - capacity_ : index; +} + +inline CordRepRing::index_type CordRepRing::retreat(index_type index) const { + assert(index < capacity_); + return (index > 0 ? index : capacity_) - 1; +} + +inline CordRepRing::index_type CordRepRing::retreat(index_type index, + index_type n) const { + assert(index < capacity_ && n <= capacity_); + return index >= n ? index - n : capacity_ - n + index; +} + +inline absl::string_view CordRepRing::entry_data(index_type index) const { + size_t data_offset = entry_data_offset(index); + return {GetRepData(entry_child(index)) + data_offset, entry_length(index)}; +} + +inline bool CordRepRing::IsValidIndex(index_type index) const { + if (index >= capacity_) return false; + return (tail_ > head_) ? (index >= head_ && index < tail_) + : (index >= head_ || index < tail_); +} + +#ifndef EXTRA_CORD_RING_VALIDATION +inline CordRepRing* CordRepRing::Validate(CordRepRing* rep, + const char* /*file*/, int /*line*/) { + return rep; +} +#endif + +inline CordRepRing::Position CordRepRing::Find(size_t offset) const { + assert(offset < length); + return (offset == 0) ? Position{head_, 0} : FindSlow(head_, offset); +} + +inline CordRepRing::Position CordRepRing::Find(index_type head, + size_t offset) const { + assert(offset < length); + assert(IsValidIndex(head) && offset >= entry_start_offset(head)); + return (offset == 0) ? Position{head_, 0} : FindSlow(head, offset); +} + +inline CordRepRing::Position CordRepRing::FindTail(size_t offset) const { + assert(offset > 0 && offset <= length); + return (offset == length) ? Position{tail_, 0} : FindTailSlow(head_, offset); +} + +inline CordRepRing::Position CordRepRing::FindTail(index_type head, + size_t offset) const { + assert(offset > 0 && offset <= length); + assert(IsValidIndex(head) && offset >= entry_start_offset(head) + 1); + return (offset == length) ? Position{tail_, 0} : FindTailSlow(head, offset); +} + +std::ostream& operator<<(std::ostream& s, const CordRepRing& rep); + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_RING_H_ |