summaryrefslogtreecommitdiff
path: root/absl/strings/internal
diff options
context:
space:
mode:
authorGravatar Abseil Team <absl-team@google.com>2021-01-20 12:39:22 -0800
committerGravatar vslashg <gfalcon@google.com>2021-01-20 16:43:33 -0500
commit22771d471930ce88e1e75d0ca9dd8c65a7b0f895 (patch)
tree7dcfb410421ef18643a08fc552691b07ace50395 /absl/strings/internal
parentb2dcbba18341d75f3fef486b717585cefda0195d (diff)
Export of internal Abseil changes
-- 642ab296a2c9629c44f3f2ce6911cd2488bcf416 by Derek Mauro <dmauro@google.com>: Remove an obsolete check in CMakeLists.txt PiperOrigin-RevId: 352852564 -- ce78cb96bcfd162737dbcf35005da3d1d6a3486b by Abseil Team <absl-team@google.com>: Clarify that the calling *thread* must have locked the mutex in order to unlock it. PiperOrigin-RevId: 352801804 -- 24e1f5f72756046f5265abf618e951c341f09b8d by Derek Mauro <dmauro@google.com>: Fixes failing CMake string comparisons https://cmake.org/cmake/help/latest/policy/CMP0054.html Fixes #791 PiperOrigin-RevId: 352791054 -- 0ac10bc3f4dca2c4c4b51d7b8196a2eaee9537a1 by Abseil Team <absl-team@google.com>: Introduce CordRepRing class This change introduces the CordRepRing class that implements all the lower level / internal implementation for upcoming CordRepRing ring buffer support in cord. PiperOrigin-RevId: 352771994 -- 4bd36dda61760785844f0f29f26d90cc18046f75 by Abseil Team <absl-team@google.com>: Optimize InlineData representation for cord sampling (cordz) This CL changes InlineData to allow us to store a (future) Cordz Info pointer directly into the inline representation: - make InlineData a class that provides a public API to set the active union members (tree or chars) and safely access that data. - change 'tree' and 'profiled' bits to be the 2 least significant bits, allowing us 62 continquous bits for storing a Cordz Info pointer. PiperOrigin-RevId: 352642411 -- dc55ba71bbce0e6a83e05a453990c51ac3d68426 by Mark Barolak <mbar@google.com>: Add unit test coverage for the mutating overload of absl::AsciiStrToLower. PiperOrigin-RevId: 352626006 GitOrigin-RevId: 642ab296a2c9629c44f3f2ce6911cd2488bcf416 Change-Id: I6c5929dd830d3c630e14e7fd5387fc3e25a69100
Diffstat (limited to 'absl/strings/internal')
-rw-r--r--absl/strings/internal/cord_internal.cc4
-rw-r--r--absl/strings/internal/cord_internal.h206
-rw-r--r--absl/strings/internal/cord_rep_flat.h4
-rw-r--r--absl/strings/internal/cord_rep_ring.cc895
-rw-r--r--absl/strings/internal/cord_rep_ring.h576
5 files changed, 1648 insertions, 37 deletions
diff --git a/absl/strings/internal/cord_internal.cc b/absl/strings/internal/cord_internal.cc
index 59f2e4d9..905ffd0c 100644
--- a/absl/strings/internal/cord_internal.cc
+++ b/absl/strings/internal/cord_internal.cc
@@ -19,6 +19,7 @@
#include "absl/container/inlined_vector.h"
#include "absl/strings/internal/cord_rep_flat.h"
+#include "absl/strings/internal/cord_rep_ring.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
@@ -48,6 +49,9 @@ void CordRep::Destroy(CordRep* rep) {
rep = left;
continue;
}
+ } else if (rep->tag == RING) {
+ CordRepRing::Destroy(rep->ring());
+ rep = nullptr;
} else if (rep->tag == EXTERNAL) {
CordRepExternal::Delete(rep);
rep = nullptr;
diff --git a/absl/strings/internal/cord_internal.h b/absl/strings/internal/cord_internal.h
index b586ea37..011b49d3 100644
--- a/absl/strings/internal/cord_internal.h
+++ b/absl/strings/internal/cord_internal.h
@@ -1,4 +1,4 @@
-// Copyright 2020 The Abseil Authors.
+// Copyright 2021 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -21,6 +21,7 @@
#include <cstdint>
#include <type_traits>
+#include "absl/base/config.h"
#include "absl/base/internal/invoke.h"
#include "absl/base/optimization.h"
#include "absl/container/internal/compressed_tuple.h"
@@ -145,13 +146,14 @@ struct CordRepConcat;
struct CordRepExternal;
struct CordRepFlat;
struct CordRepSubstring;
+class CordRepRing;
// Various representations that we allow
enum CordRepKind {
- CONCAT = 0,
- EXTERNAL = 1,
- SUBSTRING = 2,
- RING = 3,
+ CONCAT = 0,
+ EXTERNAL = 1,
+ SUBSTRING = 2,
+ RING = 3,
// We have different tags for different sized flat arrays,
// starting with FLAT, and limited to MAX_FLAT_TAG. The 224 value is based on
@@ -160,7 +162,7 @@ enum CordRepKind {
// as the Tag <---> Size logic so that FLAT stil represents the minimum flat
// allocation size. (32 bytes as of now).
FLAT = 4,
- MAX_FLAT_TAG = 224,
+ MAX_FLAT_TAG = 224
};
struct CordRep {
@@ -177,6 +179,8 @@ struct CordRep {
uint8_t tag;
char storage[1]; // Starting point for flat array: MUST BE LAST FIELD
+ inline CordRepRing* ring();
+ inline const CordRepRing* ring() const;
inline CordRepConcat* concat();
inline const CordRepConcat* concat() const;
inline CordRepSubstring* substring();
@@ -306,45 +310,165 @@ CordRepExternal ConstInitExternalStorage<Str>::value(Str::value);
enum {
kMaxInline = 15,
- // Tag byte & kMaxInline means we are storing a pointer.
- kTreeFlag = 1 << 4,
- // Tag byte & kProfiledFlag means we are profiling the Cord.
- kProfiledFlag = 1 << 5
-};
-
-// If the data has length <= kMaxInline, we store it in `as_chars`, and
-// store the size in `tagged_size`.
-// Else we store it in a tree and store a pointer to that tree in
-// `as_tree.rep` and store a tag in `tagged_size`.
-struct AsTree {
- absl::cord_internal::CordRep* rep;
- char padding[kMaxInline + 1 - sizeof(absl::cord_internal::CordRep*) - 1];
- char tagged_size;
};
constexpr char GetOrNull(absl::string_view data, size_t pos) {
return pos < data.size() ? data[pos] : '\0';
}
-union InlineData {
- constexpr InlineData() : as_chars{} {}
- explicit constexpr InlineData(AsTree tree) : as_tree(tree) {}
+// We store cordz_info as 64 bit pointer value in big endian format. This
+// guarantees that the least significant byte of cordz_info matches the last
+// byte of the inline data representation in as_chars_, which holds the inlined
+// size or the 'is_tree' bit.
+using cordz_info_t = int64_t;
+
+// Assert that the `cordz_info` pointer value perfectly overlaps the last half
+// of `as_chars_` and can hold a pointer value.
+static_assert(sizeof(cordz_info_t) * 2 == kMaxInline + 1, "");
+static_assert(sizeof(cordz_info_t) >= sizeof(intptr_t), "");
+
+// BigEndianByte() creates a big endian representation of 'value', i.e.: a big
+// endian value where the last byte in the host's representation holds 'value`,
+// with all other bytes being 0.
+static constexpr cordz_info_t BigEndianByte(unsigned char value) {
+#if defined(ABSL_IS_BIG_ENDIAN)
+ return value;
+#else
+ return static_cast<cordz_info_t>(value) << ((sizeof(cordz_info_t) - 1) * 8);
+#endif
+}
+
+class InlineData {
+ public:
+ // kNullCordzInfo holds the big endian representation of intptr_t(1)
+ // This is the 'null' / initial value of 'cordz_info'. The null value
+ // is specifically big endian 1 as with 64-bit pointers, the last
+ // byte of cordz_info overlaps with the last byte holding the tag.
+ static constexpr cordz_info_t kNullCordzInfo = BigEndianByte(1);
+
+ // kFakeCordzInfo holds a 'fake', non-null cordz-info value we use to
+ // emulate the previous 'kProfiled' tag logic in 'set_profiled' until
+ // cord code is changed to store cordz_info values in InlineData.
+ static constexpr cordz_info_t kFakeCordzInfo = BigEndianByte(9);
+
+ constexpr InlineData() : as_chars_{0} {}
+ explicit constexpr InlineData(CordRep* rep) : as_tree_(rep) {}
explicit constexpr InlineData(absl::string_view chars)
- : as_chars{GetOrNull(chars, 0), GetOrNull(chars, 1),
- GetOrNull(chars, 2), GetOrNull(chars, 3),
- GetOrNull(chars, 4), GetOrNull(chars, 5),
- GetOrNull(chars, 6), GetOrNull(chars, 7),
- GetOrNull(chars, 8), GetOrNull(chars, 9),
- GetOrNull(chars, 10), GetOrNull(chars, 11),
- GetOrNull(chars, 12), GetOrNull(chars, 13),
- GetOrNull(chars, 14), static_cast<char>(chars.size())} {}
-
- AsTree as_tree;
- char as_chars[kMaxInline + 1];
+ : as_chars_{
+ GetOrNull(chars, 0), GetOrNull(chars, 1),
+ GetOrNull(chars, 2), GetOrNull(chars, 3),
+ GetOrNull(chars, 4), GetOrNull(chars, 5),
+ GetOrNull(chars, 6), GetOrNull(chars, 7),
+ GetOrNull(chars, 8), GetOrNull(chars, 9),
+ GetOrNull(chars, 10), GetOrNull(chars, 11),
+ GetOrNull(chars, 12), GetOrNull(chars, 13),
+ GetOrNull(chars, 14), static_cast<char>((chars.size() << 1))} {}
+
+ // Returns true if the current instance is empty.
+ // The 'empty value' is an inlined data value of zero length.
+ bool is_empty() const { return tag() == 0; }
+
+ // Returns true if the current instance holds a tree value.
+ bool is_tree() const { return (tag() & 1) != 0; }
+
+ // Returns true if the current instance holds a cordz_info value.
+ // Requires the current instance to hold a tree value.
+ bool is_profiled() const {
+ assert(is_tree());
+ return as_tree_.cordz_info != kNullCordzInfo;
+ }
+
+ // Returns a read only pointer to the character data inside this instance.
+ // Requires the current instance to hold inline data.
+ const char* as_chars() const {
+ assert(!is_tree());
+ return as_chars_;
+ }
+
+ // Returns a mutable pointer to the character data inside this instance.
+ // Should be used for 'write only' operations setting an inlined value.
+ // Applications can set the value of inlined data either before or after
+ // setting the inlined size, i.e., both of the below are valid:
+ //
+ // // Set inlined data and inline size
+ // memcpy(data_.as_chars(), data, size);
+ // data_.set_inline_size(size);
+ //
+ // // Set inlined size and inline data
+ // data_.set_inline_size(size);
+ // memcpy(data_.as_chars(), data, size);
+ //
+ // It's an error to read from the returned pointer without a preceding write
+ // if the current instance does not hold inline data, i.e.: is_tree() == true.
+ char* as_chars() { return as_chars_; }
+
+ // Returns the tree value of this value.
+ // Requires the current instance to hold a tree value.
+ CordRep* as_tree() const {
+ assert(is_tree());
+ return as_tree_.rep;
+ }
+
+ // Initialize this instance to holding the tree value `rep`,
+ // initializing the cordz_info to null, i.e.: 'not profiled'.
+ void make_tree(CordRep* rep) {
+ as_tree_.rep = rep;
+ as_tree_.cordz_info = kNullCordzInfo;
+ }
+
+ // Set the tree value of this instance to 'rep`.
+ // Requires the current instance to already hold a tree value.
+ // Does not affect the value of cordz_info.
+ void set_tree(CordRep* rep) {
+ assert(is_tree());
+ as_tree_.rep = rep;
+ }
+
+ // Returns the size of the inlined character data inside this instance.
+ // Requires the current instance to hold inline data.
+ size_t inline_size() const {
+ assert(!is_tree());
+ return tag() >> 1;
+ }
+
+ // Sets the size of the inlined character data inside this instance.
+ // Requires `size` to be <= kMaxInline.
+ // See the documentation on 'as_chars()' for more information and examples.
+ void set_inline_size(size_t size) {
+ ABSL_ASSERT(size <= kMaxInline);
+ tag() = static_cast<char>(size << 1);
+ }
+
+ // Sets or unsets the 'is_profiled' state of this instance.
+ // Requires the current instance to hold a tree value.
+ void set_profiled(bool profiled) {
+ assert(is_tree());
+ as_tree_.cordz_info = profiled ? kFakeCordzInfo : kNullCordzInfo;
+ }
+
+ private:
+ // See cordz_info_t for forced alignment and size of `cordz_info` details.
+ struct AsTree {
+ explicit constexpr AsTree(absl::cord_internal::CordRep* tree)
+ : rep(tree), cordz_info(kNullCordzInfo) {}
+ absl::cord_internal::CordRep* rep;
+ alignas(sizeof(cordz_info_t)) cordz_info_t cordz_info;
+ };
+
+ char& tag() { return reinterpret_cast<char*>(this)[kMaxInline]; }
+ char tag() const { return reinterpret_cast<const char*>(this)[kMaxInline]; }
+
+ // If the data has length <= kMaxInline, we store it in `as_chars_`, and
+ // store the size in the last char of `as_chars_` shifted left + 1.
+ // Else we store it in a tree and store a pointer to that tree in
+ // `as_tree_.rep` and store a tag in `tagged_size`.
+ union {
+ char as_chars_[kMaxInline + 1];
+ AsTree as_tree_;
+ };
};
+
static_assert(sizeof(InlineData) == kMaxInline + 1, "");
-static_assert(sizeof(AsTree) == sizeof(InlineData), "");
-static_assert(offsetof(AsTree, tagged_size) == kMaxInline, "");
inline CordRepConcat* CordRep::concat() {
assert(tag == CONCAT);
@@ -386,6 +510,16 @@ inline const CordRepFlat* CordRep::flat() const {
return reinterpret_cast<const CordRepFlat*>(this);
}
+inline CordRepRing* CordRep::ring() {
+ assert(tag == RING);
+ return reinterpret_cast<CordRepRing*>(this);
+}
+
+inline const CordRepRing* CordRep::ring() const {
+ assert(tag == RING);
+ return reinterpret_cast<const CordRepRing*>(this);
+}
+
inline CordRep* CordRep::Ref(CordRep* rep) {
assert(rep != nullptr);
rep->refcount.Increment();
diff --git a/absl/strings/internal/cord_rep_flat.h b/absl/strings/internal/cord_rep_flat.h
index 8c7d160e..5f7d55ce 100644
--- a/absl/strings/internal/cord_rep_flat.h
+++ b/absl/strings/internal/cord_rep_flat.h
@@ -104,7 +104,8 @@ struct CordRepFlat : public CordRep {
// Flat CordReps are allocated and constructed with raw ::operator new and
// placement new, and must be destructed and deallocated accordingly.
static void Delete(CordRep*rep) {
- assert(rep->tag >= FLAT);
+ assert(rep->tag >= FLAT && rep->tag <= MAX_FLAT_TAG);
+
#if defined(__cpp_sized_deallocation)
size_t size = TagToAllocatedSize(rep->tag);
rep->~CordRep();
@@ -115,6 +116,7 @@ struct CordRepFlat : public CordRep {
#endif
}
+ // Returns a pointer to the data inside this flat rep.
char* Data() { return storage; }
const char* Data() const { return storage; }
diff --git a/absl/strings/internal/cord_rep_ring.cc b/absl/strings/internal/cord_rep_ring.cc
new file mode 100644
index 00000000..358b0d92
--- /dev/null
+++ b/absl/strings/internal/cord_rep_ring.cc
@@ -0,0 +1,895 @@
+// Copyright 2020 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "absl/strings/internal/cord_rep_ring.h"
+
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iostream>
+#include <limits>
+#include <memory>
+#include <string>
+
+#include "absl/base/internal/raw_logging.h"
+#include "absl/base/internal/throw_delegate.h"
+#include "absl/base/macros.h"
+#include "absl/container/inlined_vector.h"
+#include "absl/strings/internal/cord_internal.h"
+#include "absl/strings/internal/cord_rep_flat.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace cord_internal {
+
+// See https://bugs.llvm.org/show_bug.cgi?id=48477
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wshadow"
+#pragma clang diagnostic ignored "-Wshadow-field"
+#endif
+
+namespace {
+
+using index_type = CordRepRing::index_type;
+
+enum class Direction { kForward, kReversed };
+
+inline bool IsFlatOrExternal(CordRep* rep) {
+ return rep->tag >= FLAT || rep->tag == EXTERNAL;
+}
+
+// Verifies that n + extra <= kMaxCapacity: throws std::length_error otherwise.
+inline void CheckCapacity(size_t n, size_t extra) {
+ if (ABSL_PREDICT_FALSE(extra > CordRepRing::kMaxCapacity - n)) {
+ base_internal::ThrowStdLengthError("Maximum capacity exceeded");
+ }
+}
+
+// Removes a reference from `rep` only.
+// Asserts that the refcount after decrement is not zero.
+inline bool UnrefNeverOne(CordRep* rep) {
+ bool result = rep->refcount.Decrement();
+ assert(result);
+ return result;
+}
+
+// Creates a flat from the provided string data, allocating up to `extra`
+// capacity in the returned flat depending on kMaxFlatLength limitations.
+// Requires `len` to be less or equal to `kMaxFlatLength`
+CordRepFlat* CreateFlat(const char* s, size_t n, size_t extra = 0) { // NOLINT
+ assert(n <= kMaxFlatLength);
+ auto* rep = CordRepFlat::New(n + extra);
+ rep->length = n;
+ memcpy(rep->Data(), s, n);
+ return rep;
+}
+
+// Unrefs the provided `substring`, and returns `substring->child`
+// Adds or assumes a reference on `substring->child`
+CordRep* ClipSubstring(CordRepSubstring* substring) {
+ CordRep* child = substring->child;
+ if (substring->refcount.IsOne()) {
+ delete substring;
+ } else {
+ CordRep::Ref(child);
+ if (ABSL_PREDICT_FALSE(!substring->refcount.Decrement())) {
+ UnrefNeverOne(child);
+ delete substring;
+ }
+ }
+ return child;
+}
+
+// Unrefs the provided `concat`, and returns `{concat->left, concat->right}`
+// Adds or assumes a reference on `concat->left` and `concat->right`.
+std::pair<CordRep*, CordRep*> ClipConcat(CordRepConcat* concat) {
+ auto result = std::make_pair(concat->left, concat->right);
+ if (concat->refcount.IsOne()) {
+ delete concat;
+ } else {
+ CordRep::Ref(result.first);
+ CordRep::Ref(result.second);
+ if (ABSL_PREDICT_FALSE(!concat->refcount.Decrement())) {
+ UnrefNeverOne(result.first);
+ UnrefNeverOne(result.second);
+ delete concat;
+ }
+ }
+ return result;
+}
+
+// Unrefs the entries in `[head, tail)`.
+// Requires all entries to be a FLAT or EXTERNAL node.
+void UnrefEntries(const CordRepRing* rep, index_type head, index_type tail) {
+ rep->ForEach(head, tail, [rep](index_type ix) {
+ CordRep* child = rep->entry_child(ix);
+ if (!child->refcount.Decrement()) {
+ if (child->tag >= FLAT) {
+ CordRepFlat::Delete(child->flat());
+ } else {
+ CordRepExternal::Delete(child->external());
+ }
+ }
+ });
+}
+
+template <typename F>
+void Consume(Direction direction, CordRep* rep, F&& fn) {
+ size_t offset = 0;
+ size_t length = rep->length;
+ struct Entry {
+ CordRep* rep;
+ size_t offset;
+ size_t length;
+ };
+ absl::InlinedVector<Entry, 40> stack;
+
+ for (;;) {
+ if (rep->tag >= FLAT || rep->tag == EXTERNAL || rep->tag == RING) {
+ fn(rep, offset, length);
+ if (stack.empty()) return;
+
+ rep = stack.back().rep;
+ offset = stack.back().offset;
+ length = stack.back().length;
+ stack.pop_back();
+ } else if (rep->tag == SUBSTRING) {
+ offset += rep->substring()->start;
+ rep = ClipSubstring(rep->substring());
+ } else if (rep->tag == CONCAT) {
+ auto res = ClipConcat(rep->concat());
+ CordRep* left = res.first;
+ CordRep* right = res.second;
+
+ if (left->length <= offset) {
+ // Don't need left node
+ offset -= left->length;
+ CordRep::Unref(left);
+ rep = right;
+ continue;
+ }
+
+ size_t length_left = left->length - offset;
+ if (length_left >= length) {
+ // Don't need right node
+ CordRep::Unref(right);
+ rep = left;
+ continue;
+ }
+
+ // Need both nodes
+ size_t length_right = length - length_left;
+ if (direction == Direction::kReversed) {
+ stack.push_back({left, offset, length_left});
+ rep = right;
+ offset = 0;
+ length = length_right;
+ } else {
+ stack.push_back({right, 0, length_right});
+ rep = left;
+ length = length_left;
+ }
+ } else {
+ assert("Valid tag" == nullptr);
+ return;
+ }
+ }
+}
+
+template <typename F>
+void Consume(CordRep* rep, F&& fn) {
+ return Consume(Direction::kForward, rep, std::forward<F>(fn));
+}
+
+template <typename F>
+void RConsume(CordRep* rep, F&& fn) {
+ return Consume(Direction::kReversed, rep, std::forward<F>(fn));
+}
+
+} // namespace
+
+std::ostream& operator<<(std::ostream& s, const CordRepRing& rep) {
+ // Note: 'pos' values are defined as size_t (for overflow reasons), but that
+ // prints really awkward for small prepended values such as -5. ssize_t is not
+ // portable (POSIX), so we use ptrdiff_t instead to cast to signed values.
+ s << " CordRepRing(" << &rep << ", length = " << rep.length
+ << ", head = " << rep.head_ << ", tail = " << rep.tail_
+ << ", cap = " << rep.capacity_ << ", rc = " << rep.refcount.Get()
+ << ", begin_pos_ = " << static_cast<ptrdiff_t>(rep.begin_pos_) << ") {\n";
+ CordRepRing::index_type head = rep.head();
+ do {
+ CordRep* child = rep.entry_child(head);
+ s << " entry[" << head << "] length = " << rep.entry_length(head)
+ << ", child " << child << ", clen = " << child->length
+ << ", tag = " << static_cast<int>(child->tag)
+ << ", rc = " << child->refcount.Get()
+ << ", offset = " << rep.entry_data_offset(head)
+ << ", end_pos = " << static_cast<ptrdiff_t>(rep.entry_end_pos(head))
+ << "\n";
+ head = rep.advance(head);
+ } while (head != rep.tail());
+ return s << "}\n";
+}
+
+void CordRepRing::AddDataOffset(index_type index, size_t n) {
+ entry_data_offset()[index] += static_cast<offset_type>(n);
+}
+
+void CordRepRing::SubLength(index_type index, size_t n) {
+ entry_end_pos()[index] -= n;
+}
+
+class CordRepRing::Filler {
+ public:
+ Filler(CordRepRing* rep, index_type pos) : rep_(rep), head_(pos), pos_(pos) {}
+
+ index_type head() const { return head_; }
+ index_type pos() const { return pos_; }
+
+ void Add(CordRep* child, size_t offset, pos_type end_pos) {
+ rep_->entry_end_pos()[pos_] = end_pos;
+ rep_->entry_child()[pos_] = child;
+ rep_->entry_data_offset()[pos_] = static_cast<offset_type>(offset);
+ pos_ = rep_->advance(pos_);
+ }
+
+ private:
+ CordRepRing* rep_;
+ index_type head_;
+ index_type pos_;
+};
+
+constexpr size_t CordRepRing::kMaxCapacity; // NOLINT: needed for c++11
+
+bool CordRepRing::IsValid(std::ostream& output) const {
+ if (capacity_ == 0) {
+ output << "capacity == 0";
+ return false;
+ }
+
+ if (head_ >= capacity_ || tail_ >= capacity_) {
+ output << "head " << head_ << " and/or tail " << tail_ << "exceed capacity "
+ << capacity_;
+ return false;
+ }
+
+ const index_type back = retreat(tail_);
+ size_t pos_length = Distance(begin_pos_, entry_end_pos(back));
+ if (pos_length != length) {
+ output << "length " << length << " does not match positional length "
+ << pos_length << " from begin_pos " << begin_pos_ << " and entry["
+ << back << "].end_pos " << entry_end_pos(back);
+ return false;
+ }
+
+ index_type head = head_;
+ pos_type begin_pos = begin_pos_;
+ do {
+ pos_type end_pos = entry_end_pos(head);
+ size_t entry_length = Distance(begin_pos, end_pos);
+ if (entry_length == 0) {
+ output << "entry[" << head << "] has an invalid length " << entry_length
+ << " from begin_pos " << begin_pos << " and end_pos " << end_pos;
+ return false;
+ }
+
+ CordRep* child = entry_child(head);
+ if (child == nullptr) {
+ output << "entry[" << head << "].child == nullptr";
+ return false;
+ }
+ if (child->tag < FLAT && child->tag != EXTERNAL) {
+ output << "entry[" << head << "].child has an invalid tag "
+ << static_cast<int>(child->tag);
+ return false;
+ }
+
+ size_t offset = entry_data_offset(head);
+ if (offset >= child->length || entry_length > child->length - offset) {
+ output << "entry[" << head << "] has offset " << offset
+ << " and entry length " << entry_length
+ << " which are outside of the childs length of " << child->length;
+ return false;
+ }
+
+ begin_pos = end_pos;
+ head = advance(head);
+ } while (head != tail_);
+
+ return true;
+}
+
+#ifdef EXTRA_CORD_RING_VALIDATION
+CordRepRing* CordRepRing::Validate(CordRepRing* rep, const char* file,
+ int line) {
+ if (!rep->IsValid(std::cerr)) {
+ std::cerr << "\nERROR: CordRepRing corrupted";
+ if (line) std::cerr << " at line " << line;
+ if (file) std::cerr << " in file " << file;
+ std::cerr << "\nContent = " << *rep;
+ abort();
+ }
+ return rep;
+}
+#endif // EXTRA_CORD_RING_VALIDATION
+
+CordRepRing* CordRepRing::New(size_t capacity, size_t extra) {
+ CheckCapacity(capacity, extra);
+
+ size_t size = AllocSize(capacity += extra);
+ void* mem = ::operator new(size);
+ auto* rep = new (mem) CordRepRing(static_cast<index_type>(capacity));
+ rep->tag = RING;
+ rep->capacity_ = static_cast<index_type>(capacity);
+ rep->begin_pos_ = 0;
+ return rep;
+}
+
+void CordRepRing::SetCapacityForTesting(size_t capacity) {
+ // Adjust for the changed layout
+ assert(capacity <= capacity_);
+ assert(head() == 0 || head() < tail());
+ memmove(Layout::Partial(capacity).Pointer<1>(data_) + head(),
+ Layout::Partial(capacity_).Pointer<1>(data_) + head(),
+ entries() * sizeof(Layout::ElementType<1>));
+ memmove(Layout::Partial(capacity, capacity).Pointer<2>(data_) + head(),
+ Layout::Partial(capacity_, capacity_).Pointer<2>(data_) + head(),
+ entries() * sizeof(Layout::ElementType<2>));
+ capacity_ = static_cast<index_type>(capacity);
+}
+
+void CordRepRing::Delete(CordRepRing* rep) {
+ assert(rep != nullptr && rep->tag == RING);
+#if defined(__cpp_sized_deallocation)
+ size_t size = AllocSize(rep->capacity_);
+ rep->~CordRepRing();
+ ::operator delete(rep, size);
+#else
+ rep->~CordRepRing();
+ ::operator delete(rep);
+#endif
+}
+
+void CordRepRing::Destroy(CordRepRing* rep) {
+ UnrefEntries(rep, rep->head(), rep->tail());
+ Delete(rep);
+}
+
+template <bool ref>
+void CordRepRing::Fill(const CordRepRing* src, index_type head,
+ index_type tail) {
+ this->length = src->length;
+ head_ = 0;
+ tail_ = advance(0, src->entries(head, tail));
+ begin_pos_ = src->begin_pos_;
+
+ // TODO(mvels): there may be opportunities here for large buffers.
+ auto* dst_pos = entry_end_pos();
+ auto* dst_child = entry_child();
+ auto* dst_offset = entry_data_offset();
+ src->ForEach(head, tail, [&](index_type index) {
+ *dst_pos++ = src->entry_end_pos(index);
+ CordRep* child = src->entry_child(index);
+ *dst_child++ = ref ? CordRep::Ref(child) : child;
+ *dst_offset++ = src->entry_data_offset(index);
+ });
+}
+
+CordRepRing* CordRepRing::Copy(CordRepRing* rep, index_type head,
+ index_type tail, size_t extra) {
+ CordRepRing* newrep = CordRepRing::New(rep->entries(head, tail), extra);
+ newrep->Fill<true>(rep, head, tail);
+ CordRep::Unref(rep);
+ return newrep;
+}
+
+CordRepRing* CordRepRing::Mutable(CordRepRing* rep, size_t extra) {
+ // Get current number of entries, and check for max capacity.
+ size_t entries = rep->entries();
+
+ size_t min_extra = (std::max)(extra, rep->capacity() * 2 - entries);
+ if (!rep->refcount.IsOne()) {
+ return Copy(rep, rep->head(), rep->tail(), min_extra);
+ } else if (entries + extra > rep->capacity()) {
+ CordRepRing* newrep = CordRepRing::New(entries, min_extra);
+ newrep->Fill<false>(rep, rep->head(), rep->tail());
+ CordRepRing::Delete(rep);
+ return newrep;
+ } else {
+ return rep;
+ }
+}
+
+Span<char> CordRepRing::GetAppendBuffer(size_t size) {
+ assert(refcount.IsOne());
+ index_type back = retreat(tail_);
+ CordRep* child = entry_child(back);
+ if (child->tag >= FLAT && child->refcount.IsOne()) {
+ size_t capacity = child->flat()->Capacity();
+ pos_type end_pos = entry_end_pos(back);
+ size_t data_offset = entry_data_offset(back);
+ size_t entry_length = Distance(entry_begin_pos(back), end_pos);
+ size_t used = data_offset + entry_length;
+ if (size_t n = (std::min)(capacity - used, size)) {
+ child->length = data_offset + entry_length + n;
+ entry_end_pos()[back] = end_pos + n;
+ this->length += n;
+ return {child->flat()->Data() + used, n};
+ }
+ }
+ return {nullptr, 0};
+}
+
+Span<char> CordRepRing::GetPrependBuffer(size_t size) {
+ assert(refcount.IsOne());
+ CordRep* child = entry_child(head_);
+ size_t data_offset = entry_data_offset(head_);
+ if (data_offset && child->refcount.IsOne() && child->tag >= FLAT) {
+ size_t n = (std::min)(data_offset, size);
+ this->length += n;
+ begin_pos_ -= n;
+ data_offset -= n;
+ entry_data_offset()[head_] = static_cast<offset_type>(data_offset);
+ return {child->flat()->Data() + data_offset, n};
+ }
+ return {nullptr, 0};
+}
+
+CordRepRing* CordRepRing::CreateFromLeaf(CordRep* child, size_t offset,
+ size_t length, size_t extra) {
+ CordRepRing* rep = CordRepRing::New(1, extra);
+ rep->head_ = 0;
+ rep->tail_ = rep->advance(0);
+ rep->length = length;
+ rep->entry_end_pos()[0] = length;
+ rep->entry_child()[0] = child;
+ rep->entry_data_offset()[0] = static_cast<offset_type>(offset);
+ return Validate(rep);
+}
+
+CordRepRing* CordRepRing::CreateSlow(CordRep* child, size_t extra) {
+ CordRepRing* rep = nullptr;
+ Consume(child, [&](CordRep* child, size_t offset, size_t length) {
+ if (IsFlatOrExternal(child)) {
+ rep = rep ? AppendLeaf(rep, child, offset, length)
+ : CreateFromLeaf(child, offset, length, extra);
+ } else if (rep) {
+ rep = AddRing<AddMode::kAppend>(rep, child->ring(), offset, length);
+ } else if (offset == 0 && child->length == length) {
+ rep = Mutable(child->ring(), extra);
+ } else {
+ rep = SubRing(child->ring(), offset, length, extra);
+ }
+ });
+ return Validate(rep, nullptr, __LINE__);
+}
+
+CordRepRing* CordRepRing::Create(CordRep* child, size_t extra) {
+ size_t length = child->length;
+ if (IsFlatOrExternal(child)) {
+ return CreateFromLeaf(child, 0, length, extra);
+ }
+ if (child->tag == RING) {
+ return Mutable(child->ring(), extra);
+ }
+ return CreateSlow(child, extra);
+}
+
+template <CordRepRing::AddMode mode>
+CordRepRing* CordRepRing::AddRing(CordRepRing* rep, CordRepRing* ring,
+ size_t offset, size_t length) {
+ assert(offset < ring->length);
+ constexpr bool append = mode == AddMode::kAppend;
+ Position head = ring->Find(offset);
+ Position tail = ring->FindTail(head.index, offset + length);
+ const index_type entries = ring->entries(head.index, tail.index);
+
+ rep = Mutable(rep, entries);
+
+ // The delta for making ring[head].end_pos into 'len - offset'
+ const pos_type delta_length =
+ (append ? rep->begin_pos_ + rep->length : rep->begin_pos_ - length) -
+ ring->entry_begin_pos(head.index) - head.offset;
+
+ // Start filling at `tail`, or `entries` before `head`
+ Filler filler(rep, append ? rep->tail_ : rep->retreat(rep->head_, entries));
+
+ if (ring->refcount.IsOne()) {
+ // Copy entries from source stealing the ref and adjusting the end position.
+ // Commit the filler as this is no-op.
+ ring->ForEach(head.index, tail.index, [&](index_type ix) {
+ filler.Add(ring->entry_child(ix), ring->entry_data_offset(ix),
+ ring->entry_end_pos(ix) + delta_length);
+ });
+
+ // Unref entries we did not copy over, and delete source.
+ if (head.index != ring->head_) UnrefEntries(ring, ring->head_, head.index);
+ if (tail.index != ring->tail_) UnrefEntries(ring, tail.index, ring->tail_);
+ CordRepRing::Delete(ring);
+ } else {
+ ring->ForEach(head.index, tail.index, [&](index_type ix) {
+ CordRep* child = ring->entry_child(ix);
+ filler.Add(child, ring->entry_data_offset(ix),
+ ring->entry_end_pos(ix) + delta_length);
+ CordRep::Ref(child);
+ });
+ CordRepRing::Unref(ring);
+ }
+
+ if (head.offset) {
+ // Increase offset of first 'source' entry appended or prepended.
+ // This is always the entry in `filler.head()`
+ rep->AddDataOffset(filler.head(), head.offset);
+ }
+
+ if (tail.offset) {
+ // Reduce length of last 'source' entry appended or prepended.
+ // This is always the entry tailed by `filler.pos()`
+ rep->SubLength(rep->retreat(filler.pos()), tail.offset);
+ }
+
+ // Commit changes
+ rep->length += length;
+ if (append) {
+ rep->tail_ = filler.pos();
+ } else {
+ rep->head_ = filler.head();
+ rep->begin_pos_ -= length;
+ }
+
+ return Validate(rep);
+}
+
+CordRepRing* CordRepRing::AppendSlow(CordRepRing* rep, CordRep* child) {
+ Consume(child, [&rep](CordRep* child, size_t offset, size_t length) {
+ if (child->tag == RING) {
+ rep = AddRing<AddMode::kAppend>(rep, child->ring(), offset, length);
+ } else {
+ rep = AppendLeaf(rep, child, offset, length);
+ }
+ });
+ return rep;
+}
+
+CordRepRing* CordRepRing::AppendLeaf(CordRepRing* rep, CordRep* child,
+ size_t offset, size_t length) {
+ rep = Mutable(rep, 1);
+ index_type back = rep->tail_;
+ const pos_type begin_pos = rep->begin_pos_ + rep->length;
+ rep->tail_ = rep->advance(rep->tail_);
+ rep->length += length;
+ rep->entry_end_pos()[back] = begin_pos + length;
+ rep->entry_child()[back] = child;
+ rep->entry_data_offset()[back] = static_cast<offset_type>(offset);
+ return Validate(rep, nullptr, __LINE__);
+}
+
+CordRepRing* CordRepRing::Append(CordRepRing* rep, CordRep* child) {
+ size_t length = child->length;
+ if (IsFlatOrExternal(child)) {
+ return AppendLeaf(rep, child, 0, length);
+ }
+ if (child->tag == RING) {
+ return AddRing<AddMode::kAppend>(rep, child->ring(), 0, length);
+ }
+ return AppendSlow(rep, child);
+}
+
+CordRepRing* CordRepRing::PrependSlow(CordRepRing* rep, CordRep* child) {
+ RConsume(child, [&](CordRep* child, size_t offset, size_t length) {
+ if (IsFlatOrExternal(child)) {
+ rep = PrependLeaf(rep, child, offset, length);
+ } else {
+ rep = AddRing<AddMode::kPrepend>(rep, child->ring(), offset, length);
+ }
+ });
+ return Validate(rep);
+}
+
+CordRepRing* CordRepRing::PrependLeaf(CordRepRing* rep, CordRep* child,
+ size_t offset, size_t length) {
+ rep = Mutable(rep, 1);
+ index_type head = rep->retreat(rep->head_);
+ pos_type end_pos = rep->begin_pos_;
+ rep->head_ = head;
+ rep->length += length;
+ rep->begin_pos_ -= length;
+ rep->entry_end_pos()[head] = end_pos;
+ rep->entry_child()[head] = child;
+ rep->entry_data_offset()[head] = static_cast<offset_type>(offset);
+ return Validate(rep);
+}
+
+CordRepRing* CordRepRing::Prepend(CordRepRing* rep, CordRep* child) {
+ size_t length = child->length;
+ if (IsFlatOrExternal(child)) {
+ return PrependLeaf(rep, child, 0, length);
+ }
+ if (child->tag == RING) {
+ return AddRing<AddMode::kPrepend>(rep, child->ring(), 0, length);
+ }
+ return PrependSlow(rep, child);
+}
+
+CordRepRing* CordRepRing::Append(CordRepRing* rep, absl::string_view data,
+ size_t extra) {
+ if (rep->refcount.IsOne()) {
+ Span<char> avail = rep->GetAppendBuffer(data.length());
+ if (!avail.empty()) {
+ memcpy(avail.data(), data.data(), avail.length());
+ data.remove_prefix(avail.length());
+ }
+ }
+ if (data.empty()) return Validate(rep);
+
+ const size_t flats = (data.length() - 1) / kMaxFlatLength + 1;
+ rep = Mutable(rep, flats);
+
+ Filler filler(rep, rep->tail_);
+ pos_type pos = rep->begin_pos_ + rep->length;
+
+ while (data.length() >= kMaxFlatLength) {
+ auto* flat = CreateFlat(data.data(), kMaxFlatLength);
+ filler.Add(flat, 0, pos += kMaxFlatLength);
+ data.remove_prefix(kMaxFlatLength);
+ }
+
+ if (data.length()) {
+ auto* flat = CreateFlat(data.data(), data.length(), extra);
+ filler.Add(flat, 0, pos += data.length());
+ }
+
+ rep->length = pos - rep->begin_pos_;
+ rep->tail_ = filler.pos();
+
+ return Validate(rep);
+}
+
+CordRepRing* CordRepRing::Prepend(CordRepRing* rep, absl::string_view data,
+ size_t extra) {
+ if (rep->refcount.IsOne()) {
+ Span<char> avail = rep->GetPrependBuffer(data.length());
+ if (!avail.empty()) {
+ const char* tail = data.data() + data.length() - avail.length();
+ memcpy(avail.data(), tail, avail.length());
+ data.remove_suffix(avail.length());
+ }
+ }
+ if (data.empty()) return rep;
+
+ const size_t flats = (data.length() - 1) / kMaxFlatLength + 1;
+ rep = Mutable(rep, flats);
+ pos_type pos = rep->begin_pos_;
+ Filler filler(rep, rep->retreat(rep->head_, static_cast<index_type>(flats)));
+
+ size_t first_size = data.size() - (flats - 1) * kMaxFlatLength;
+ CordRepFlat* flat = CordRepFlat::New(first_size + extra);
+ flat->length = first_size + extra;
+ memcpy(flat->Data() + extra, data.data(), first_size);
+ data.remove_prefix(first_size);
+ filler.Add(flat, extra, pos);
+ pos -= first_size;
+
+ while (!data.empty()) {
+ assert(data.size() >= kMaxFlatLength);
+ flat = CreateFlat(data.data(), kMaxFlatLength);
+ filler.Add(flat, 0, pos);
+ pos -= kMaxFlatLength;
+ data.remove_prefix(kMaxFlatLength);
+ }
+
+ rep->head_ = filler.head();
+ rep->length += rep->begin_pos_ - pos;
+ rep->begin_pos_ = pos;
+
+ return Validate(rep);
+}
+
+// 32 entries is 32 * sizeof(pos_type) = 4 cache lines on x86
+static constexpr index_type kBinarySearchThreshold = 32;
+static constexpr index_type kBinarySearchEndCount = 8;
+
+template <bool wrap>
+CordRepRing::index_type CordRepRing::FindBinary(index_type head,
+ index_type tail,
+ size_t offset) const {
+ index_type count = tail + (wrap ? capacity_ : 0) - head;
+ do {
+ count = (count - 1) / 2;
+ assert(count < entries(head, tail_));
+ index_type mid = wrap ? advance(head, count) : head + count;
+ index_type after_mid = wrap ? advance(mid) : mid + 1;
+ bool larger = (offset >= entry_end_offset(mid));
+ head = larger ? after_mid : head;
+ tail = larger ? tail : mid;
+ assert(head != tail);
+ } while (ABSL_PREDICT_TRUE(count > kBinarySearchEndCount));
+ return head;
+}
+
+CordRepRing::Position CordRepRing::FindSlow(index_type head,
+ size_t offset) const {
+ index_type tail = tail_;
+
+ // Binary search until we are good for linear search
+ // Optimize for branchless / non wrapping ops
+ if (tail > head) {
+ index_type count = tail - head;
+ if (count > kBinarySearchThreshold) {
+ head = FindBinary<false>(head, tail, offset);
+ }
+ } else {
+ index_type count = capacity_ + tail - head;
+ if (count > kBinarySearchThreshold) {
+ head = FindBinary<true>(head, tail, offset);
+ }
+ }
+
+ pos_type pos = entry_begin_pos(head);
+ pos_type end_pos = entry_end_pos(head);
+ while (offset >= Distance(begin_pos_, end_pos)) {
+ head = advance(head);
+ pos = end_pos;
+ end_pos = entry_end_pos(head);
+ }
+
+ return {head, offset - Distance(begin_pos_, pos)};
+}
+
+CordRepRing::Position CordRepRing::FindTailSlow(index_type head,
+ size_t offset) const {
+ index_type tail = tail_;
+ const size_t tail_offset = offset - 1;
+
+ // Binary search until we are good for linear search
+ // Optimize for branchless / non wrapping ops
+ if (tail > head) {
+ index_type count = tail - head;
+ if (count > kBinarySearchThreshold) {
+ head = FindBinary<false>(head, tail, tail_offset);
+ }
+ } else {
+ index_type count = capacity_ + tail - head;
+ if (count > kBinarySearchThreshold) {
+ head = FindBinary<true>(head, tail, tail_offset);
+ }
+ }
+
+ size_t end_offset = entry_end_offset(head);
+ while (tail_offset >= end_offset) {
+ head = advance(head);
+ end_offset = entry_end_offset(head);
+ }
+
+ return {advance(head), end_offset - offset};
+}
+
+char CordRepRing::GetCharacter(size_t offset) const {
+ assert(offset < length);
+
+ Position pos = Find(offset);
+ size_t data_offset = entry_data_offset(pos.index) + pos.offset;
+ return GetRepData(entry_child(pos.index))[data_offset];
+}
+
+CordRepRing* CordRepRing::SubRing(CordRepRing* rep, size_t offset,
+ size_t length, size_t extra) {
+ assert(offset <= rep->length);
+ assert(offset <= rep->length - length);
+
+ if (length == 0) {
+ CordRep::Unref(rep);
+ return nullptr;
+ }
+
+ // Find position of first byte
+ Position head = rep->Find(offset);
+ Position tail = rep->FindTail(head.index, offset + length);
+ const size_t new_entries = rep->entries(head.index, tail.index);
+
+ if (rep->refcount.IsOne() && extra <= (rep->capacity() - new_entries)) {
+ // We adopt a privately owned rep and no extra entries needed.
+ if (head.index != rep->head_) UnrefEntries(rep, rep->head_, head.index);
+ if (tail.index != rep->tail_) UnrefEntries(rep, tail.index, rep->tail_);
+ rep->head_ = head.index;
+ rep->tail_ = tail.index;
+ } else {
+ // Copy subset to new rep
+ rep = Copy(rep, head.index, tail.index, extra);
+ head.index = rep->head_;
+ tail.index = rep->tail_;
+ }
+
+ // Adjust begin_pos and length
+ rep->length = length;
+ rep->begin_pos_ += offset;
+
+ // Adjust head and tail blocks
+ if (head.offset) {
+ rep->AddDataOffset(head.index, head.offset);
+ }
+ if (tail.offset) {
+ rep->SubLength(rep->retreat(tail.index), tail.offset);
+ }
+
+ return Validate(rep);
+}
+
+CordRepRing* CordRepRing::RemovePrefix(CordRepRing* rep, size_t len,
+ size_t extra) {
+ assert(len <= rep->length);
+ if (len == rep->length) {
+ CordRep::Unref(rep);
+ return nullptr;
+ }
+
+ Position head = rep->Find(len);
+ if (rep->refcount.IsOne()) {
+ if (head.index != rep->head_) UnrefEntries(rep, rep->head_, head.index);
+ rep->head_ = head.index;
+ } else {
+ rep = Copy(rep, head.index, rep->tail_, extra);
+ head.index = rep->head_;
+ }
+
+ // Adjust begin_pos and length
+ rep->length -= len;
+ rep->begin_pos_ += len;
+
+ // Adjust head block
+ if (head.offset) {
+ rep->AddDataOffset(head.index, head.offset);
+ }
+
+ return Validate(rep);
+}
+
+CordRepRing* CordRepRing::RemoveSuffix(CordRepRing* rep, size_t len,
+ size_t extra) {
+ assert(len <= rep->length);
+
+ if (len == rep->length) {
+ CordRep::Unref(rep);
+ return nullptr;
+ }
+
+ Position tail = rep->FindTail(rep->length - len);
+ if (rep->refcount.IsOne()) {
+ // We adopt a privately owned rep, scrub.
+ if (tail.index != rep->tail_) UnrefEntries(rep, tail.index, rep->tail_);
+ rep->tail_ = tail.index;
+ } else {
+ // Copy subset to new rep
+ rep = Copy(rep, rep->head_, tail.index, extra);
+ tail.index = rep->tail_;
+ }
+
+ // Adjust length
+ rep->length -= len;
+
+ // Adjust tail block
+ if (tail.offset) {
+ rep->SubLength(rep->retreat(tail.index), tail.offset);
+ }
+
+ return Validate(rep);
+}
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+} // namespace cord_internal
+ABSL_NAMESPACE_END
+} // namespace absl
diff --git a/absl/strings/internal/cord_rep_ring.h b/absl/strings/internal/cord_rep_ring.h
new file mode 100644
index 00000000..e6f6b59c
--- /dev/null
+++ b/absl/strings/internal/cord_rep_ring.h
@@ -0,0 +1,576 @@
+// Copyright 2020 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_RING_H_
+#define ABSL_STRINGS_INTERNAL_CORD_REP_RING_H_
+
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iosfwd>
+#include <limits>
+#include <memory>
+
+#include "absl/container/internal/layout.h"
+#include "absl/strings/internal/cord_internal.h"
+#include "absl/strings/internal/cord_rep_flat.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace cord_internal {
+
+// See https://bugs.llvm.org/show_bug.cgi?id=48477
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wshadow"
+#pragma clang diagnostic ignored "-Wshadow-field"
+#endif
+
+// All operations modifying a ring buffer are implemented as static methods
+// requiring a CordRepRing instance with a reference adopted by the method.
+//
+// The methods return the modified ring buffer, which may be equal to the input
+// if the input was not shared, and having large enough capacity to accommodate
+// any newly added node(s). Otherwise, a copy of the input rep with the new
+// node(s) added is returned.
+//
+// Any modification on non shared ring buffers with enough capacity will then
+// require minimum atomic operations. Caller should where possible provide
+// reasonable `extra` hints for both anticipated extra `flat` byte space, as
+// well as anticipated extra nodes required for complex operations.
+//
+// Example of code creating a ring buffer, adding some data to it,
+// and discarding the buffer when done:
+//
+// void FunWithRings() {
+// // Create ring with 3 flats
+// CordRep* flat = CreateFlat("Hello");
+// CordRepRing* ring = CordRepRing::Create(flat, 2);
+// ring = CordRepRing::Append(ring, CreateFlat(" "));
+// ring = CordRepRing::Append(ring, CreateFlat("world"));
+// DoSomethingWithRing(ring);
+// CordRep::Unref(ring);
+// }
+//
+// Example of code Copying an existing ring buffer and modifying it:
+//
+// void MoreFunWithRings(CordRepRing* src) {
+// CordRepRing* ring = CordRep::Ref(src)->ring();
+// ring = CordRepRing::Append(ring, CreateFlat("Hello"));
+// ring = CordRepRing::Append(ring, CreateFlat(" "));
+// ring = CordRepRing::Append(ring, CreateFlat("world"));
+// DoSomethingWithRing(ring);
+// CordRep::Unref(ring);
+// }
+//
+class CordRepRing : public CordRep {
+ public:
+ // `pos_type` represents a 'logical position'. A CordRepRing instance has a
+ // `begin_pos` (default 0), and each node inside the buffer will have an
+ // `end_pos` which is the `end_pos` of the previous node (or `begin_pos`) plus
+ // this node's length. The purpose is to allow for a binary search on this
+ // position, while allowing O(1) prepend and append operations.
+ using pos_type = uint64_t;
+
+ // `index_type` is the type for the `head`, `tail` and `capacity` indexes.
+ // Ring buffers are limited to having no more than four billion entries.
+ using index_type = uint32_t;
+
+ // `offset_type` is the type for the data offset inside a child rep's data.
+ using offset_type = uint32_t;
+
+ // Position holds the node index and relative offset into the node for
+ // some physical offset in the contained data as returned by the Find()
+ // and FindTail() methods.
+ struct Position {
+ index_type index;
+ size_t offset;
+ };
+
+ // The maximum # of child nodes that can be hosted inside a CordRepRing.
+ static constexpr size_t kMaxCapacity = (std::numeric_limits<uint32_t>::max)();
+
+ // CordRepring can not be default constructed, moved, copied or assigned.
+ CordRepRing() = delete;
+ CordRepRing(const CordRepRing&) = delete;
+ CordRepRing& operator=(const CordRepRing&) = delete;
+
+ // Returns true if this instance is valid, false if some or all of the
+ // invariants are broken. Intended for debug purposes only.
+ // `output` receives an explanation of the broken invariants.
+ bool IsValid(std::ostream& output) const;
+
+ // Returns the size in bytes for a CordRepRing with `capacity' entries.
+ static constexpr size_t AllocSize(size_t capacity);
+
+ // Returns the distance in bytes from `pos` to `end_pos`.
+ static constexpr size_t Distance(pos_type pos, pos_type end_pos);
+
+ // Creates a new ring buffer from the provided `rep`. Adopts a reference
+ // on `rep`. The returned ring buffer has a capacity of at least `extra + 1`
+ static CordRepRing* Create(CordRep* child, size_t extra = 0);
+
+ // `head`, `tail` and `capacity` indexes defining the ring buffer boundaries.
+ index_type head() const { return head_; }
+ index_type tail() const { return tail_; }
+ index_type capacity() const { return capacity_; }
+
+ // Returns the number of entries in this instance.
+ index_type entries() const { return entries(head_, tail_); }
+
+ // Returns the logical begin position of this instance.
+ pos_type begin_pos() const { return begin_pos_; }
+
+ // Returns the number of entries for a given head-tail range.
+ // Requires `head` and `tail` values to be less than `capacity()`.
+ index_type entries(index_type head, index_type tail) const {
+ assert(head < capacity_ && tail < capacity_);
+ return tail - head + ((tail > head) ? 0 : capacity_);
+ }
+
+ // Returns the logical end position of entry `index`.
+ pos_type const& entry_end_pos(index_type index) const {
+ assert(IsValidIndex(index));
+ return Layout::Partial().Pointer<0>(data_)[index];
+ }
+
+ // Returns the child pointer of entry `index`.
+ CordRep* const& entry_child(index_type index) const {
+ assert(IsValidIndex(index));
+ return Layout::Partial(capacity()).Pointer<1>(data_)[index];
+ }
+
+ // Returns the data offset of entry `index`
+ offset_type const& entry_data_offset(index_type index) const {
+ assert(IsValidIndex(index));
+ return Layout::Partial(capacity(), capacity()).Pointer<2>(data_)[index];
+ }
+
+ // Appends the provided child node to the `rep` instance.
+ // Adopts a reference from `rep` and `child` which may not be null.
+ // If the provided child is a FLAT or EXTERNAL node, or a SUBSTRING node
+ // containing a FLAT or EXTERNAL node, then flat or external the node is added
+ // 'as is', with an offset added for the SUBSTRING case.
+ // If the provided child is a RING or CONCAT tree, or a SUBSTRING of a RING or
+ // CONCAT tree, then all child nodes not excluded by any start offset or
+ // length values are added recursively.
+ static CordRepRing* Append(CordRepRing* rep, CordRep* child);
+
+ // Appends the provided string data to the `rep` instance.
+ // This function will attempt to utilize any remaining capacity in the last
+ // node of the input if that node is not shared (directly or indirectly), and
+ // of type FLAT. Remaining data will be added as one or more FLAT nodes.
+ // Any last node added to the ring buffer will be allocated with up to
+ // `extra` bytes of capacity for (anticipated) subsequent append actions.
+ static CordRepRing* Append(CordRepRing* rep, string_view data,
+ size_t extra = 0);
+
+ // Prepends the provided child node to the `rep` instance.
+ // Adopts a reference from `rep` and `child` which may not be null.
+ // If the provided child is a FLAT or EXTERNAL node, or a SUBSTRING node
+ // containing a FLAT or EXTERNAL node, then flat or external the node is
+ // prepended 'as is', with an optional offset added for the SUBSTRING case.
+ // If the provided child is a RING or CONCAT tree, or a SUBSTRING of a RING
+ // or CONCAT tree, then all child nodes not excluded by any start offset or
+ // length values are added recursively.
+ static CordRepRing* Prepend(CordRepRing* rep, CordRep* child);
+
+ // Prepends the provided string data to the `rep` instance.
+ // This function will attempt to utilize any remaining capacity in the first
+ // node of the input if that node is not shared (directly or indirectly), and
+ // of type FLAT. Remaining data will be added as one or more FLAT nodes.
+ // Any first node prepnded to the ring buffer will be allocated with up to
+ // `extra` bytes of capacity for (anticipated) subsequent prepend actions.
+ static CordRepRing* Prepend(CordRepRing* rep, string_view data,
+ size_t extra = 0);
+
+ // Returns a span referencing potentially unused capacity in the last node.
+ // The returned span may be empty if no such capacity is available, or if the
+ // current instance is shared. Else, a span of size `n <= size` is returned.
+ // If non empty, the ring buffer is adjusted to the new length, with the newly
+ // added capacity left uninitialized. Callers should assign a value to the
+ // entire span before any other operations on this instance.
+ Span<char> GetAppendBuffer(size_t size);
+
+ // Returns a span referencing potentially unused capacity in the first node.
+ // This function is identical to GetAppendBuffer except that it returns a span
+ // referencing up to `size` capacity directly before the existing data.
+ Span<char> GetPrependBuffer(size_t size);
+
+ // Returns a cord ring buffer containing `length` bytes of data starting at
+ // `offset`. If the input is not shared, this function will remove all head
+ // and tail child nodes outside of the requested range, and adjust the new
+ // head and tail nodes as required. If the input is shared, this function
+ // returns a new instance sharing some or all of the nodes from the input.
+ static CordRepRing* SubRing(CordRepRing* r, size_t offset, size_t length,
+ size_t extra = 0);
+
+ // Returns a cord ring buffer with the first `length` bytes removed.
+ // If the input is not shared, this function will remove all head child nodes
+ // fully inside the first `length` bytes, and adjust the new head as required.
+ // If the input is shared, this function returns a new instance sharing some
+ // or all of the nodes from the input.
+ static CordRepRing* RemoveSuffix(CordRepRing* r, size_t length,
+ size_t extra = 0);
+
+ // Returns a cord ring buffer with the last `length` bytes removed.
+ // If the input is not shared, this function will remove all head child nodes
+ // fully inside the first `length` bytes, and adjust the new head as required.
+ // If the input is shared, this function returns a new instance sharing some
+ // or all of the nodes from the input.
+ static CordRepRing* RemovePrefix(CordRepRing* r, size_t len,
+ size_t extra = 0);
+
+ // Returns the character at `offset`. Requires that `offset < length`.
+ char GetCharacter(size_t offset) const;
+
+ // Testing only: set capacity to requested capacity.
+ void SetCapacityForTesting(size_t capacity);
+
+ // Returns the CordRep data pointer for the provided CordRep.
+ // Requires that the provided `rep` is either a FLAT or EXTERNAL CordRep.
+ static const char* GetLeafData(const CordRep* rep);
+
+ // Returns the CordRep data pointer for the provided CordRep.
+ // Requires that `rep` is either a FLAT, EXTERNAL, or SUBSTRING CordRep.
+ static const char* GetRepData(const CordRep* rep);
+
+ // Advances the provided position, wrapping around capacity as needed.
+ // Requires `index` < capacity()
+ inline index_type advance(index_type index) const;
+
+ // Advances the provided position by 'n`, wrapping around capacity as needed.
+ // Requires `index` < capacity() and `n` <= capacity.
+ inline index_type advance(index_type index, index_type n) const;
+
+ // Retreats the provided position, wrapping around 0 as needed.
+ // Requires `index` < capacity()
+ inline index_type retreat(index_type index) const;
+
+ // Retreats the provided position by 'n', wrapping around 0 as needed.
+ // Requires `index` < capacity()
+ inline index_type retreat(index_type index, index_type n) const;
+
+ // Returns the logical begin position of entry `index`
+ pos_type const& entry_begin_pos(index_type index) const {
+ return (index == head_) ? begin_pos_ : entry_end_pos(retreat(index));
+ }
+
+ // Returns the physical start offset of entry `index`
+ size_t entry_start_offset(index_type index) const {
+ return Distance(begin_pos_, entry_begin_pos(index));
+ }
+
+ // Returns the physical end offset of entry `index`
+ size_t entry_end_offset(index_type index) const {
+ return Distance(begin_pos_, entry_end_pos(index));
+ }
+
+ // Returns the data length for entry `index`
+ size_t entry_length(index_type index) const {
+ return Distance(entry_begin_pos(index), entry_end_pos(index));
+ }
+
+ // Returns the data for entry `index`
+ absl::string_view entry_data(index_type index) const;
+
+ // Returns the position for `offset` as {index, prefix}. `index` holds the
+ // index of the entry at the specified offset and `prefix` holds the relative
+ // offset inside that entry.
+ // Requires `offset` < length.
+ //
+ // For example we can implement GetCharacter(offset) as:
+ // char GetCharacter(size_t offset) {
+ // Position pos = this->Find(offset);
+ // return this->entry_data(pos.pos)[pos.offset];
+ // }
+ inline Position Find(size_t offset) const;
+
+ // Find starting at `head`
+ inline Position Find(index_type head, size_t offset) const;
+
+ // Returns the tail position for `offset` as {tail index, suffix}.
+ // `tail index` holds holds the index of the entry holding the offset directly
+ // before 'offset` advanced by one. 'suffix` holds the relative offset from
+ // that relative offset in the entry to the end of the entry.
+ // For example, FindTail(length) will return {tail(), 0}, FindTail(length - 5)
+ // will return {retreat(tail), 5)} provided the preceding entry contains at
+ // least 5 bytes of data.
+ // Requires offset >= 1 && offset <= length.
+ //
+ // This function is very useful in functions that need to clip the end of some
+ // ring buffer such as 'RemovePrefix'.
+ // For example, we could implement RemovePrefix for non shared instances as:
+ // void RemoveSuffix(size_t n) {
+ // Position pos = FindTail(length - n);
+ // UnrefEntries(pos.pos, this->tail_);
+ // this->tail_ = pos.pos;
+ // entry(retreat(pos.pos)).end_pos -= pos.offset;
+ // }
+ inline Position FindTail(size_t offset) const;
+
+ // Find tail starting at `head`
+ inline Position FindTail(index_type head, size_t offset) const;
+
+ // Invokes f(index_type index) for each entry inside the range [head, tail>
+ template <typename F>
+ void ForEach(index_type head, index_type tail, F&& f) const {
+ index_type n1 = (tail > head) ? tail : capacity_;
+ for (index_type i = head; i < n1; ++i) f(i);
+ if (tail <= head) {
+ for (index_type i = 0; i < tail; ++i) f(i);
+ }
+ }
+
+ // Invokes f(index_type index) for each entry inside this instance.
+ template <typename F>
+ void ForEach(F&& f) const {
+ ForEach(head_, tail_, std::forward<F>(f));
+ }
+
+ // Dump this instance's data tp stream `s` in human readable format, excluding
+ // the actual data content itself. Intended for debug purposes only.
+ friend std::ostream& operator<<(std::ostream& s, const CordRepRing& rep);
+
+ private:
+ enum class AddMode { kAppend, kPrepend };
+
+ using Layout = container_internal::Layout<pos_type, CordRep*, offset_type>;
+
+ class Filler;
+ class Transaction;
+ class CreateTransaction;
+
+ static constexpr size_t kLayoutAlignment = Layout::Partial().Alignment();
+
+ // Creates a new CordRepRing.
+ explicit CordRepRing(index_type capacity) : capacity_(capacity) {}
+
+ // Returns true if `index` is a valid index into this instance.
+ bool IsValidIndex(index_type index) const;
+
+ // Debug use only: validates the provided CordRepRing invariants.
+ // Verification of all CordRepRing methods can be enabled by defining
+ // EXTRA_CORD_RING_VALIDATION, i.e.: `--copts=-DEXTRA_CORD_RING_VALIDATION`
+ // Verification is VERY expensive, so only do it for debugging purposes.
+ static CordRepRing* Validate(CordRepRing* rep, const char* file = nullptr,
+ int line = 0);
+
+ // Allocates a CordRepRing large enough to hold `capacity + extra' entries.
+ // The returned capacity may be larger if the allocated memory allows for it.
+ // The maximum capacity of a CordRepRing is capped at kMaxCapacity.
+ // Throws `std::length_error` if `capacity + extra' exceeds kMaxCapacity.
+ static CordRepRing* New(size_t capacity, size_t extra);
+
+ // Deallocates (but does not destroy) the provided ring buffer.
+ static void Delete(CordRepRing* rep);
+
+ // Destroys the provided ring buffer, decrementing the reference count of all
+ // contained child CordReps. The provided 1\`rep` should have a ref count of
+ // one (pre decrement destroy call observing `refcount.IsOne()`) or zero (post
+ // decrement destroy call observing `!refcount.Decrement()`).
+ static void Destroy(CordRepRing* rep);
+
+ // Returns a mutable reference to the logical end position array.
+ pos_type* entry_end_pos() {
+ return Layout::Partial().Pointer<0>(data_);
+ }
+
+ // Returns a mutable reference to the child pointer array.
+ CordRep** entry_child() {
+ return Layout::Partial(capacity()).Pointer<1>(data_);
+ }
+
+ // Returns a mutable reference to the data offset array.
+ offset_type* entry_data_offset() {
+ return Layout::Partial(capacity(), capacity()).Pointer<2>(data_);
+ }
+
+ // Find implementations for the non fast path 0 / length cases.
+ Position FindSlow(index_type head, size_t offset) const;
+ Position FindTailSlow(index_type head, size_t offset) const;
+
+ // Finds the index of the first node that is inside a reasonable distance
+ // of the node at `offset` from which we can continue with a linear search.
+ template <bool wrap>
+ index_type FindBinary(index_type head, index_type tail, size_t offset) const;
+
+ // Fills the current (initialized) instance from the provided source, copying
+ // entries [head, tail). Adds a reference to copied entries if `ref` is true.
+ template <bool ref>
+ void Fill(const CordRepRing* src, index_type head, index_type tail);
+
+ // Create a copy of 'rep', copying all entries [head, tail), allocating room
+ // for `extra` entries. Adds a reference on all copied entries.
+ static CordRepRing* Copy(CordRepRing* rep, index_type head, index_type tail,
+ size_t extra = 0);
+
+ // Returns a Mutable CordRepRing reference from `rep` with room for at least
+ // `extra` additional nodes. Adopts a reference count from `rep`.
+ // This function will return `rep` if, and only if:
+ // - rep.entries + extra <= rep.capacity
+ // - rep.refcount == 1
+ // Otherwise, this function will create a new copy of `rep` with additional
+ // capacity to satisfy `extra` extra nodes, and unref the old `rep` instance.
+ //
+ // If a new CordRepRing can not be allocated, or the new capacity would exceed
+ // the maxmimum capacity, then the input is consumed only, and an exception is
+ // thrown.
+ static CordRepRing* Mutable(CordRepRing* rep, size_t extra);
+
+ // Slow path for Append(CordRepRing* rep, CordRep* child). This function is
+ // exercised if the provided `child` in Append() is not a leaf node, i.e., a
+ // ring buffer or old (concat) cord tree.
+ static CordRepRing* AppendSlow(CordRepRing* rep, CordRep* child);
+
+ // Appends the provided leaf node. Requires `child` to be FLAT or EXTERNAL.
+ static CordRepRing* AppendLeaf(CordRepRing* rep, CordRep* child,
+ size_t offset, size_t length);
+
+ // Prepends the provided leaf node. Requires `child` to be FLAT or EXTERNAL.
+ static CordRepRing* PrependLeaf(CordRepRing* rep, CordRep* child,
+ size_t offset, size_t length);
+
+ // Slow path for Prepend(CordRepRing* rep, CordRep* child). This function is
+ // exercised if the provided `child` in Prepend() is not a leaf node, i.e., a
+ // ring buffer or old (concat) cord tree.
+ static CordRepRing* PrependSlow(CordRepRing* rep, CordRep* child);
+
+ // Slow path for Create(CordRep* child, size_t extra). This function is
+ // exercised if the provided `child` in Prepend() is not a leaf node, i.e., a
+ // ring buffer or old (concat) cord tree.
+ static CordRepRing* CreateSlow(CordRep* child, size_t extra);
+
+ // Creates a new ring buffer from the provided `child` leaf node. Requires
+ // `child` to be FLAT or EXTERNAL. on `rep`.
+ // The returned ring buffer has a capacity of at least `1 + extra`
+ static CordRepRing* CreateFromLeaf(CordRep* child, size_t offset,
+ size_t length, size_t extra);
+
+ // Appends or prepends (depending on AddMode) the ring buffer in `ring' to
+ // `rep` starting at `offset` with length `length`.
+ template <AddMode mode>
+ static CordRepRing* AddRing(CordRepRing* rep, CordRepRing* ring,
+ size_t offset, size_t length);
+
+ // Increases the data offset for entry `index` by `n`.
+ void AddDataOffset(index_type index, size_t n);
+
+ // Descreases the length for entry `index` by `n`.
+ void SubLength(index_type index, size_t n);
+
+ index_type head_;
+ index_type tail_;
+ index_type capacity_;
+ pos_type begin_pos_;
+
+ alignas(kLayoutAlignment) char data_[kLayoutAlignment];
+
+ friend struct CordRep;
+};
+
+constexpr size_t CordRepRing::AllocSize(size_t capacity) {
+ return sizeof(CordRepRing) - sizeof(data_) +
+ Layout(capacity, capacity, capacity).AllocSize();
+}
+
+inline constexpr size_t CordRepRing::Distance(pos_type pos, pos_type end_pos) {
+ return (end_pos - pos);
+}
+
+inline const char* CordRepRing::GetLeafData(const CordRep* rep) {
+ return rep->tag != EXTERNAL ? rep->flat()->Data() : rep->external()->base;
+}
+
+inline const char* CordRepRing::GetRepData(const CordRep* rep) {
+ if (rep->tag >= FLAT) return rep->flat()->Data();
+ if (rep->tag == EXTERNAL) return rep->external()->base;
+ return GetLeafData(rep->substring()->child) + rep->substring()->start;
+}
+
+inline CordRepRing::index_type CordRepRing::advance(index_type index) const {
+ assert(index < capacity_);
+ return ++index == capacity_ ? 0 : index;
+}
+
+inline CordRepRing::index_type CordRepRing::advance(index_type index,
+ index_type n) const {
+ assert(index < capacity_ && n <= capacity_);
+ return (index += n) >= capacity_ ? index - capacity_ : index;
+}
+
+inline CordRepRing::index_type CordRepRing::retreat(index_type index) const {
+ assert(index < capacity_);
+ return (index > 0 ? index : capacity_) - 1;
+}
+
+inline CordRepRing::index_type CordRepRing::retreat(index_type index,
+ index_type n) const {
+ assert(index < capacity_ && n <= capacity_);
+ return index >= n ? index - n : capacity_ - n + index;
+}
+
+inline absl::string_view CordRepRing::entry_data(index_type index) const {
+ size_t data_offset = entry_data_offset(index);
+ return {GetRepData(entry_child(index)) + data_offset, entry_length(index)};
+}
+
+inline bool CordRepRing::IsValidIndex(index_type index) const {
+ if (index >= capacity_) return false;
+ return (tail_ > head_) ? (index >= head_ && index < tail_)
+ : (index >= head_ || index < tail_);
+}
+
+#ifndef EXTRA_CORD_RING_VALIDATION
+inline CordRepRing* CordRepRing::Validate(CordRepRing* rep,
+ const char* /*file*/, int /*line*/) {
+ return rep;
+}
+#endif
+
+inline CordRepRing::Position CordRepRing::Find(size_t offset) const {
+ assert(offset < length);
+ return (offset == 0) ? Position{head_, 0} : FindSlow(head_, offset);
+}
+
+inline CordRepRing::Position CordRepRing::Find(index_type head,
+ size_t offset) const {
+ assert(offset < length);
+ assert(IsValidIndex(head) && offset >= entry_start_offset(head));
+ return (offset == 0) ? Position{head_, 0} : FindSlow(head, offset);
+}
+
+inline CordRepRing::Position CordRepRing::FindTail(size_t offset) const {
+ assert(offset > 0 && offset <= length);
+ return (offset == length) ? Position{tail_, 0} : FindTailSlow(head_, offset);
+}
+
+inline CordRepRing::Position CordRepRing::FindTail(index_type head,
+ size_t offset) const {
+ assert(offset > 0 && offset <= length);
+ assert(IsValidIndex(head) && offset >= entry_start_offset(head) + 1);
+ return (offset == length) ? Position{tail_, 0} : FindTailSlow(head, offset);
+}
+
+std::ostream& operator<<(std::ostream& s, const CordRepRing& rep);
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+} // namespace cord_internal
+ABSL_NAMESPACE_END
+} // namespace absl
+
+#endif // ABSL_STRINGS_INTERNAL_CORD_REP_RING_H_