summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CMake/AbseilDll.cmake2
-rw-r--r--WORKSPACE25
-rw-r--r--absl/base/internal/thread_identity.h8
-rw-r--r--absl/strings/BUILD.bazel32
-rw-r--r--absl/strings/CMakeLists.txt35
-rw-r--r--absl/strings/internal/cord_internal.cc4
-rw-r--r--absl/strings/internal/cord_rep_btree.cc947
-rw-r--r--absl/strings/internal/cord_rep_btree.h851
-rw-r--r--absl/strings/internal/cord_rep_btree_test.cc1352
-rw-r--r--absl/strings/internal/cord_rep_test_util.h185
-rw-r--r--ci/cmake_common.sh2
11 files changed, 3426 insertions, 17 deletions
diff --git a/CMake/AbseilDll.cmake b/CMake/AbseilDll.cmake
index 78ace82b..65014bfb 100644
--- a/CMake/AbseilDll.cmake
+++ b/CMake/AbseilDll.cmake
@@ -205,6 +205,8 @@ set(ABSL_INTERNAL_DLL_FILES
"strings/internal/cord_internal.h"
"strings/internal/cord_rep_consume.h"
"strings/internal/cord_rep_consume.cc"
+ "strings/internal/cord_rep_btree.cc"
+ "strings/internal/cord_rep_btree.h"
"strings/internal/cord_rep_flat.h"
"strings/internal/cord_rep_ring.cc"
"strings/internal/cord_rep_ring.h"
diff --git a/WORKSPACE b/WORKSPACE
index bc00ea3f..f4a5c476 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -15,29 +15,30 @@
#
workspace(name = "com_google_absl")
+
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
# GoogleTest/GoogleMock framework. Used by most unit-tests.
http_archive(
- name = "com_google_googletest",
+ name = "com_google_googletest", # 2021-07-09T13:28:13Z
+ sha256 = "12ef65654dc01ab40f6f33f9d02c04f2097d2cd9fbe48dc6001b29543583b0ad",
+ strip_prefix = "googletest-8d51ffdfab10b3fba636ae69bc03da4b54f8c235",
# Keep this URL in sync with ABSL_GOOGLETEST_COMMIT in ci/cmake_common.sh.
- urls = ["https://github.com/google/googletest/archive/5bcd8e3bb929714e031a542d303f818e5a5af45d.zip"], # 2021-06-08T22:36:38Z
- strip_prefix = "googletest-5bcd8e3bb929714e031a542d303f818e5a5af45d",
- sha256 = "3adecb6686ac7367452561dca518fad5a990fb09c5a961bfa1836f15eb774348",
+ urls = ["https://github.com/google/googletest/archive/8d51ffdfab10b3fba636ae69bc03da4b54f8c235.zip"],
)
# Google benchmark.
http_archive(
- name = "com_github_google_benchmark",
- urls = ["https://github.com/google/benchmark/archive/7d0d9061d83b663ce05d9de5da3d5865a3845b79.zip"], # 2021-05-11T11:56:00Z
- strip_prefix = "benchmark-7d0d9061d83b663ce05d9de5da3d5865a3845b79",
- sha256 = "a07789754963e3ea3a1e13fed3a4d48fac0c5f7f749c5065f6c30cd2c1529661",
+ name = "com_github_google_benchmark", # 2021-07-01T09:02:54Z
+ sha256 = "1cb4b97a90aa1fd9c8e412a6bc29fc13fc140162a4a0db3811af40befd8c9ea5",
+ strip_prefix = "benchmark-e451e50e9b8af453f076dec10bd6890847f1624e",
+ urls = ["https://github.com/google/benchmark/archive/e451e50e9b8af453f076dec10bd6890847f1624e.zip"],
)
# C++ rules for Bazel.
http_archive(
- name = "rules_cc",
- urls = ["https://github.com/bazelbuild/rules_cc/archive/ab5395627c80e025e824bd005d41f96b20618b9d.zip"], # 2021-05-10T15:35:04Z
- strip_prefix = "rules_cc-ab5395627c80e025e824bd005d41f96b20618b9d",
- sha256 = "f3908cb40a6577ab0d1ef9e00052739bf69b4313fa7d20b4d61d4521ea67abf3",
+ name = "rules_cc", # 2021-06-07T16:41:49Z
+ sha256 = "b295cad8c5899e371dde175079c0a2cdc0151f5127acc92366a8c986beb95c76",
+ strip_prefix = "rules_cc-daf6ace7cfeacd6a83e9ff2ed659f416537b6c74",
+ urls = ["https://github.com/bazelbuild/rules_cc/archive/daf6ace7cfeacd6a83e9ff2ed659f416537b6c74.zip"],
)
diff --git a/absl/base/internal/thread_identity.h b/absl/base/internal/thread_identity.h
index 6e25b92f..659694b3 100644
--- a/absl/base/internal/thread_identity.h
+++ b/absl/base/internal/thread_identity.h
@@ -188,25 +188,25 @@ void ClearCurrentThreadIdentity();
// May be chosen at compile time via: -DABSL_FORCE_THREAD_IDENTITY_MODE=<mode
// index>
#ifdef ABSL_THREAD_IDENTITY_MODE_USE_POSIX_SETSPECIFIC
-#error ABSL_THREAD_IDENTITY_MODE_USE_POSIX_SETSPECIFIC cannot be direcly set
+#error ABSL_THREAD_IDENTITY_MODE_USE_POSIX_SETSPECIFIC cannot be directly set
#else
#define ABSL_THREAD_IDENTITY_MODE_USE_POSIX_SETSPECIFIC 0
#endif
#ifdef ABSL_THREAD_IDENTITY_MODE_USE_TLS
-#error ABSL_THREAD_IDENTITY_MODE_USE_TLS cannot be direcly set
+#error ABSL_THREAD_IDENTITY_MODE_USE_TLS cannot be directly set
#else
#define ABSL_THREAD_IDENTITY_MODE_USE_TLS 1
#endif
#ifdef ABSL_THREAD_IDENTITY_MODE_USE_CPP11
-#error ABSL_THREAD_IDENTITY_MODE_USE_CPP11 cannot be direcly set
+#error ABSL_THREAD_IDENTITY_MODE_USE_CPP11 cannot be directly set
#else
#define ABSL_THREAD_IDENTITY_MODE_USE_CPP11 2
#endif
#ifdef ABSL_THREAD_IDENTITY_MODE
-#error ABSL_THREAD_IDENTITY_MODE cannot be direcly set
+#error ABSL_THREAD_IDENTITY_MODE cannot be directly set
#elif defined(ABSL_FORCE_THREAD_IDENTITY_MODE)
#define ABSL_THREAD_IDENTITY_MODE ABSL_FORCE_THREAD_IDENTITY_MODE
#elif defined(_WIN32) && !defined(__MINGW32__)
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel
index b70aac3e..c686e05a 100644
--- a/absl/strings/BUILD.bazel
+++ b/absl/strings/BUILD.bazel
@@ -269,11 +269,13 @@ cc_library(
name = "cord_internal",
srcs = [
"internal/cord_internal.cc",
+ "internal/cord_rep_btree.cc",
"internal/cord_rep_consume.cc",
"internal/cord_rep_ring.cc",
],
hdrs = [
"internal/cord_internal.h",
+ "internal/cord_rep_btree.h",
"internal/cord_rep_consume.h",
"internal/cord_rep_flat.h",
"internal/cord_rep_ring.h",
@@ -296,6 +298,23 @@ cc_library(
"//absl/container:layout",
"//absl/functional:function_ref",
"//absl/meta:type_traits",
+ "//absl/types:span",
+ ],
+)
+
+cc_test(
+ name = "cord_rep_btree_test",
+ size = "medium",
+ srcs = ["internal/cord_rep_btree_test.cc"],
+ copts = ABSL_TEST_COPTS,
+ visibility = ["//visibility:private"],
+ deps = [
+ ":cord_internal",
+ ":cord_rep_test_util",
+ ":strings",
+ "//absl/base:config",
+ "//absl/base:raw_logging_internal",
+ "@com_google_googletest//:gtest_main",
],
)
@@ -578,6 +597,19 @@ cc_library(
)
cc_library(
+ name = "cord_rep_test_util",
+ testonly = 1,
+ hdrs = ["internal/cord_rep_test_util.h"],
+ copts = ABSL_DEFAULT_COPTS,
+ deps = [
+ ":cord_internal",
+ ":strings",
+ "//absl/base:config",
+ "//absl/base:raw_logging_internal",
+ ],
+)
+
+cc_library(
name = "cordz_test_helpers",
testonly = 1,
hdrs = ["cordz_test_helpers.h"],
diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt
index d79ef712..2ddd3c48 100644
--- a/absl/strings/CMakeLists.txt
+++ b/absl/strings/CMakeLists.txt
@@ -555,12 +555,14 @@ absl_cc_library(
cord_internal
HDRS
"internal/cord_internal.h"
+ "internal/cord_rep_btree.h"
"internal/cord_rep_consume.h"
"internal/cord_rep_flat.h"
"internal/cord_rep_ring.h"
"internal/cord_rep_ring_reader.h"
SRCS
"internal/cord_internal.cc"
+ "internal/cord_rep_btree.cc"
"internal/cord_rep_consume.cc"
"internal/cord_rep_ring.cc"
COPTS
@@ -849,6 +851,21 @@ absl_cc_library(
absl_cc_library(
NAME
+ cord_rep_test_util
+ HDRS
+ "internal/cord_rep_test_util.h"
+ COPTS
+ ${ABSL_TEST_COPTS}
+ DEPS
+ absl::config
+ absl::cord_internal
+ absl::raw_logging_internal
+ absl::strings
+ TESTONLY
+)
+
+absl_cc_library(
+ NAME
cord_test_helpers
HDRS
"cord_test_helpers.h"
@@ -924,6 +941,24 @@ absl_cc_test(
absl_cc_test(
NAME
+ cord_rep_btree_test
+ SRCS
+ "internal/cord_rep_btree_test.cc"
+ COPTS
+ ${ABSL_TEST_COPTS}
+ DEPS
+ absl::base
+ absl::config
+ absl::cord_internal
+ absl::cord_rep_test_util
+ absl::core_headers
+ absl::raw_logging_internal
+ absl::strings
+ GTest::gmock_main
+)
+
+absl_cc_test(
+ NAME
cord_ring_test
SRCS
"cord_ring_test.cc"
diff --git a/absl/strings/internal/cord_internal.cc b/absl/strings/internal/cord_internal.cc
index 6a0b051c..f79cb628 100644
--- a/absl/strings/internal/cord_internal.cc
+++ b/absl/strings/internal/cord_internal.cc
@@ -18,6 +18,7 @@
#include <memory>
#include "absl/container/inlined_vector.h"
+#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/internal/cord_rep_ring.h"
@@ -50,6 +51,9 @@ void CordRep::Destroy(CordRep* rep) {
rep = left;
continue;
}
+ } else if (rep->tag == BTREE) {
+ CordRepBtree::Destroy(rep->btree());
+ rep = nullptr;
} else if (rep->tag == RING) {
CordRepRing::Destroy(rep->ring());
rep = nullptr;
diff --git a/absl/strings/internal/cord_rep_btree.cc b/absl/strings/internal/cord_rep_btree.cc
new file mode 100644
index 00000000..6978cfd2
--- /dev/null
+++ b/absl/strings/internal/cord_rep_btree.cc
@@ -0,0 +1,947 @@
+// Copyright 2021 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/internal/cord_rep_btree.h"
+
+#include <cassert>
+#include <cstdint>
+#include <iostream>
+#include <string>
+
+#include "absl/base/attributes.h"
+#include "absl/base/config.h"
+#include "absl/base/internal/raw_logging.h"
+#include "absl/strings/internal/cord_internal.h"
+#include "absl/strings/internal/cord_rep_consume.h"
+#include "absl/strings/internal/cord_rep_flat.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace cord_internal {
+
+namespace {
+
+using NodeStack = CordRepBtree * [CordRepBtree::kMaxDepth];
+using EdgeType = CordRepBtree::EdgeType;
+using OpResult = CordRepBtree::OpResult;
+using CopyResult = CordRepBtree::CopyResult;
+
+constexpr auto kFront = CordRepBtree::kFront;
+constexpr auto kBack = CordRepBtree::kBack;
+
+// Implementation of the various 'Dump' functions.
+// Prints the entire tree structure or 'rep'. External callers should
+// not specify 'depth' and leave it to its default (0) value.
+// Rep may be a CordRepBtree tree, or a SUBSTRING / EXTERNAL / FLAT node.
+void DumpAll(const CordRep* rep, bool include_contents, std::ostream& stream,
+ int depth = 0) {
+ // Allow for full height trees + substring -> flat / external nodes.
+ assert(depth <= CordRepBtree::kMaxDepth + 2);
+ std::string sharing = const_cast<CordRep*>(rep)->refcount.IsOne()
+ ? std::string("Private")
+ : absl::StrCat("Shared(", rep->refcount.Get(), ")");
+ std::string sptr = absl::StrCat("0x", absl::Hex(rep));
+
+ // Dumps the data contents of `rep` if `include_contents` is true.
+ // Always emits a new line character.
+ auto maybe_dump_data = [&stream, include_contents](const CordRep* r) {
+ if (include_contents) {
+ // Allow for up to 60 wide display of content data, which with some
+ // indentation and prefix / labels keeps us within roughly 80-100 wide.
+ constexpr size_t kMaxDataLength = 60;
+ stream << ", data = \""
+ << CordRepBtree::EdgeData(r).substr(0, kMaxDataLength)
+ << (r->length > kMaxDataLength ? "\"..." : "\"");
+ }
+ stream << '\n';
+ };
+
+ // For each level, we print the 'shared/private' state and the rep pointer,
+ // indented by two spaces per recursive depth.
+ stream << std::string(depth * 2, ' ') << sharing << " (" << sptr << ") ";
+
+ if (rep->tag == BTREE) {
+ const CordRepBtree* node = rep->btree();
+ std::string label =
+ node->height() ? absl::StrCat("Node(", node->height(), ")") : "Leaf";
+ stream << label << ", len = " << node->length
+ << ", begin = " << node->begin() << ", end = " << node->end()
+ << "\n";
+ for (CordRep* edge : node->Edges()) {
+ DumpAll(edge, include_contents, stream, depth + 1);
+ }
+ } else if (rep->tag == SUBSTRING) {
+ const CordRepSubstring* substring = rep->substring();
+ stream << "Substring, len = " << rep->length
+ << ", start = " << substring->start;
+ maybe_dump_data(rep);
+ DumpAll(substring->child, include_contents, stream, depth + 1);
+ } else if (rep->tag >= FLAT) {
+ stream << "Flat, len = " << rep->length;
+ maybe_dump_data(rep);
+ } else if (rep->tag == EXTERNAL) {
+ stream << "Extn, len = " << rep->length;
+ maybe_dump_data(rep);
+ }
+}
+
+// TODO(b/192061034): add 'bytes to copy' logic to avoid large slop on substring
+// small data out of large reps, and general efficiency of 'always copy small
+// data'. Consider making this a cord rep internal library function.
+CordRepSubstring* CreateSubstring(CordRep* rep, size_t offset, size_t n) {
+ assert(n != 0);
+ assert(offset + n <= rep->length);
+ assert(offset != 0 || n != rep->length);
+
+ if (rep->tag == SUBSTRING) {
+ CordRepSubstring* substring = rep->substring();
+ offset += substring->start;
+ rep = CordRep::Ref(substring->child);
+ CordRep::Unref(substring);
+ }
+ CordRepSubstring* substring = new CordRepSubstring();
+ substring->length = n;
+ substring->tag = SUBSTRING;
+ substring->start = offset;
+ substring->child = rep;
+ return substring;
+}
+
+// TODO(b/192061034): consider making this a cord rep library function.
+inline CordRep* MakeSubstring(CordRep* rep, size_t offset, size_t n) {
+ if (n == rep->length) return rep;
+ if (n == 0) return CordRep::Unref(rep), nullptr;
+ return CreateSubstring(rep, offset, n);
+}
+
+// TODO(b/192061034): consider making this a cord rep library function.
+inline CordRep* MakeSubstring(CordRep* rep, size_t offset) {
+ if (offset == 0) return rep;
+ return CreateSubstring(rep, offset, rep->length - offset);
+}
+
+template <EdgeType edge_type>
+inline absl::string_view Consume(absl::string_view s, size_t n) {
+ return edge_type == kBack ? s.substr(n) : s.substr(0, s.size() - n);
+}
+
+template <EdgeType edge_type>
+inline absl::string_view Consume(char* dst, absl::string_view s, size_t n) {
+ if (edge_type == kBack) {
+ memcpy(dst, s.data(), n);
+ return s.substr(n);
+ } else {
+ const size_t offset = s.size() - n;
+ memcpy(dst, s.data() + offset, n);
+ return s.substr(0, offset);
+ }
+}
+
+// Known issue / optimization weirdness: the store associated with the
+// decrement introduces traffic between cpus (even if the result of that
+// traffic does nothing), making this faster than a single call to
+// refcount.Decrement() checking the zero refcount condition.
+template <typename R, typename Fn>
+inline void FastUnref(R* r, Fn&& fn) {
+ if (r->refcount.IsOne()) {
+ fn(r);
+ } else if (!r->refcount.DecrementExpectHighRefcount()) {
+ fn(r);
+ }
+}
+
+// Deletes a leaf node data edge. Requires `rep` to be an EXTERNAL or FLAT
+// node, or a SUBSTRING of an EXTERNAL or FLAT node.
+void DeleteLeafEdge(CordRep* rep) {
+ for (;;) {
+ if (rep->tag >= FLAT) {
+ CordRepFlat::Delete(rep->flat());
+ return;
+ }
+ if (rep->tag == EXTERNAL) {
+ CordRepExternal::Delete(rep->external());
+ return;
+ }
+ assert(rep->tag == SUBSTRING);
+ CordRepSubstring* substring = rep->substring();
+ rep = substring->child;
+ assert(rep->tag == EXTERNAL || rep->tag >= FLAT);
+ delete substring;
+ if (rep->refcount.Decrement()) return;
+ }
+}
+
+// StackOperations contains the logic to build a left-most or right-most stack
+// (leg) down to the leaf level of a btree, and 'unwind' / 'Finalize' methods to
+// propagate node changes up the stack.
+template <EdgeType edge_type>
+struct StackOperations {
+ // Returns true if the node at 'depth' is not shared, i.e. has a refcount
+ // of one and all of its parent nodes have a refcount of one.
+ inline bool owned(int depth) const { return depth < share_depth; }
+
+ // Returns the node at 'depth'.
+ inline CordRepBtree* node(int depth) const { return stack[depth]; }
+
+ // Builds a `depth` levels deep stack starting at `tree` recording which nodes
+ // are private in the form of the 'share depth' where nodes are shared.
+ inline CordRepBtree* BuildStack(CordRepBtree* tree, int depth) {
+ assert(depth <= tree->height());
+ int current_depth = 0;
+ while (current_depth < depth && tree->refcount.IsOne()) {
+ stack[current_depth++] = tree;
+ tree = tree->Edge(edge_type)->btree();
+ }
+ share_depth = current_depth + (tree->refcount.IsOne() ? 1 : 0);
+ while (current_depth < depth) {
+ stack[current_depth++] = tree;
+ tree = tree->Edge(edge_type)->btree();
+ }
+ return tree;
+ }
+
+ // Builds a stack with the invariant that all nodes are private owned / not
+ // shared. This is used in iterative updates where a previous propagation
+ // guaranteed all nodes are owned / private.
+ inline void BuildOwnedStack(CordRepBtree* tree, int height) {
+ assert(height <= CordRepBtree::kMaxHeight);
+ int depth = 0;
+ while (depth < height) {
+ assert(tree->refcount.IsOne());
+ stack[depth++] = tree;
+ tree = tree->Edge(edge_type)->btree();
+ }
+ assert(tree->refcount.IsOne());
+ share_depth = depth + 1;
+ }
+
+ // Processes the final 'top level' result action for the tree.
+ // See the 'Action' enum for the various action implications.
+ static inline CordRepBtree* Finalize(CordRepBtree* tree, OpResult result) {
+ switch (result.action) {
+ case CordRepBtree::kPopped:
+ if (ABSL_PREDICT_FALSE(tree->height() >= CordRepBtree::kMaxHeight)) {
+ ABSL_RAW_LOG(FATAL, "Max height exceeded");
+ }
+ return edge_type == kBack ? CordRepBtree::New(tree, result.tree)
+ : CordRepBtree::New(result.tree, tree);
+ case CordRepBtree::kCopied:
+ CordRep::Unref(tree);
+ ABSL_FALLTHROUGH_INTENDED;
+ case CordRepBtree::kSelf:
+ return result.tree;
+ }
+ ABSL_INTERNAL_UNREACHABLE;
+ return result.tree;
+ }
+
+ // Propagate the action result in 'result' up into all nodes of the stack
+ // starting at depth 'depth'. 'length' contains the extra length of data that
+ // was added at the lowest level, and is updated into all nodes of the stack.
+ // See the 'Action' enum for the various action implications.
+ // If 'propagate' is true, then any copied node values are updated into the
+ // stack, which is used for iterative processing on the same stack.
+ template <bool propagate = false>
+ inline CordRepBtree* Unwind(CordRepBtree* tree, int depth, size_t length,
+ OpResult result) {
+ // TODO(mvels): revisit the below code to check if 3 loops with 3
+ // (incremental) conditions is faster than 1 loop with a switch.
+ // Benchmarking and perf recordings indicate the loop with switch is
+ // fastest, likely because of indirect jumps on the tight case values and
+ // dense branches. But it's worth considering 3 loops, as the `action`
+ // transitions are mono directional. E.g.:
+ // while (action == kPopped) {
+ // ...
+ // }
+ // while (action == kCopied) {
+ // ...
+ // }
+ // ...
+ // We also found that an "if () do {}" loop here seems faster, possibly
+ // because it allows the branch predictor more granular heuristics on
+ // 'single leaf' (`depth` == 0) and 'single depth' (`depth` == 1) cases
+ // which appear to be the most common use cases.
+ if (depth != 0) {
+ do {
+ CordRepBtree* node = stack[--depth];
+ const bool owned = depth < share_depth;
+ switch (result.action) {
+ case CordRepBtree::kPopped:
+ assert(!propagate);
+ result = node->AddEdge<edge_type>(owned, result.tree, length);
+ break;
+ case CordRepBtree::kCopied:
+ result = node->SetEdge<edge_type>(owned, result.tree, length);
+ if (propagate) stack[depth] = result.tree;
+ break;
+ case CordRepBtree::kSelf:
+ node->length += length;
+ while (depth > 0) {
+ node = stack[--depth];
+ node->length += length;
+ }
+ return node;
+ }
+ } while (depth > 0);
+ }
+ return Finalize(tree, result);
+ }
+
+ // Invokes `Unwind` with `propagate=true` to update the stack node values.
+ inline CordRepBtree* Propagate(CordRepBtree* tree, int depth, size_t length,
+ OpResult result) {
+ return Unwind</*propagate=*/true>(tree, depth, length, result);
+ }
+
+ // `share_depth` contains the depth at which the nodes in the stack become
+ // shared. I.e., if the top most level is shared (i.e.: `!refcount.IsOne()`),
+ // then `share_depth` is 0. If the 2nd node is shared (and implicitly all
+ // nodes below that) then `share_depth` is 1, etc. A `share_depth` greater
+ // than the depth of the stack indicates that none of the nodes in the stack
+ // are shared.
+ int share_depth;
+
+ NodeStack stack;
+};
+
+} // namespace
+
+void CordRepBtree::Dump(const CordRep* rep, absl::string_view label,
+ bool include_contents, std::ostream& stream) {
+ stream << "===================================\n";
+ if (!label.empty()) {
+ stream << label << '\n';
+ stream << "-----------------------------------\n";
+ }
+ if (rep) {
+ DumpAll(rep, include_contents, stream);
+ } else {
+ stream << "NULL\n";
+ }
+}
+
+void CordRepBtree::Dump(const CordRep* rep, absl::string_view label,
+ std::ostream& stream) {
+ Dump(rep, label, false, stream);
+}
+
+void CordRepBtree::Dump(const CordRep* rep, std::ostream& stream) {
+ Dump(rep, absl::string_view(), false, stream);
+}
+
+void CordRepBtree::DestroyLeaf(CordRepBtree* tree, size_t begin, size_t end) {
+ for (CordRep* edge : tree->Edges(begin, end)) {
+ FastUnref(edge, DeleteLeafEdge);
+ }
+ Delete(tree);
+}
+
+void CordRepBtree::DestroyNonLeaf(CordRepBtree* tree, size_t begin,
+ size_t end) {
+ for (CordRep* edge : tree->Edges(begin, end)) {
+ FastUnref(edge->btree(), Destroy);
+ }
+ Delete(tree);
+}
+
+bool CordRepBtree::IsValid(const CordRepBtree* tree) {
+#define NODE_CHECK_VALID(x) \
+ if (!(x)) { \
+ ABSL_RAW_LOG(ERROR, "CordRepBtree::CheckValid() FAILED: %s", #x); \
+ return false; \
+ }
+#define NODE_CHECK_EQ(x, y) \
+ if ((x) != (y)) { \
+ ABSL_RAW_LOG(ERROR, \
+ "CordRepBtree::CheckValid() FAILED: %s != %s (%s vs %s)", #x, \
+ #y, absl::StrCat(x).c_str(), absl::StrCat(y).c_str()); \
+ return false; \
+ }
+
+ NODE_CHECK_VALID(tree != nullptr);
+ NODE_CHECK_EQ(tree->tag, BTREE);
+ NODE_CHECK_VALID(tree->height() <= kMaxHeight);
+ NODE_CHECK_VALID(tree->begin() < tree->capacity());
+ NODE_CHECK_VALID(tree->end() <= tree->capacity());
+ NODE_CHECK_VALID(tree->begin() <= tree->end());
+ size_t child_length = 0;
+ for (CordRep* edge : tree->Edges()) {
+ NODE_CHECK_VALID(edge != nullptr);
+ if (tree->height() > 0) {
+ NODE_CHECK_VALID(edge->tag == BTREE);
+ NODE_CHECK_VALID(edge->btree()->height() == tree->height() - 1);
+ } else {
+ NODE_CHECK_VALID(IsDataEdge(edge));
+ }
+ child_length += edge->length;
+ }
+ NODE_CHECK_EQ(child_length, tree->length);
+ if (tree->height() > 0) {
+ for (CordRep* edge : tree->Edges()) {
+ if (!IsValid(edge->btree())) return false;
+ }
+ }
+ return true;
+
+#undef NODE_CHECK_VALID
+#undef NODE_CHECK_EQ
+}
+
+#ifndef NDEBUG
+
+CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree) {
+ if (!IsValid(tree)) {
+ Dump(tree, "CordRepBtree validation failed:", false, std::cout);
+ ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED");
+ }
+ return tree;
+}
+
+const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree) {
+ if (!IsValid(tree)) {
+ Dump(tree, "CordRepBtree validation failed:", false, std::cout);
+ ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED");
+ }
+ return tree;
+}
+
+#endif // NDEBUG
+
+template <EdgeType edge_type>
+inline OpResult CordRepBtree::AddEdge(bool owned, CordRep* edge, size_t delta) {
+ if (size() >= kMaxCapacity) return {New(edge), kPopped};
+ OpResult result = ToOpResult(owned);
+ result.tree->Add<edge_type>(edge);
+ result.tree->length += delta;
+ return result;
+}
+
+template <EdgeType edge_type>
+OpResult CordRepBtree::SetEdge(bool owned, CordRep* edge, size_t delta) {
+ OpResult result;
+ const size_t idx = index(edge_type);
+ if (owned) {
+ result = {this, kSelf};
+ CordRep::Unref(edges_[idx]);
+ } else {
+ // Create a copy containing all unchanged edges. Unchanged edges are the
+ // open interval [begin, back) or [begin + 1, end) depending on `edge_type`.
+ // We conveniently cover both case using a constexpr `shift` being 0 or 1
+ // as `end :== back + 1`.
+ result = {CopyRaw(), kCopied};
+ constexpr int shift = edge_type == kFront ? 1 : 0;
+ for (CordRep* r : Edges(begin() + shift, back() + shift)) {
+ CordRep::Ref(r);
+ }
+ }
+ result.tree->edges_[idx] = edge;
+ result.tree->length += delta;
+ return result;
+}
+
+template <EdgeType edge_type>
+CordRepBtree* CordRepBtree::AddCordRep(CordRepBtree* tree, CordRep* rep) {
+ const int depth = tree->height();
+ const size_t length = rep->length;
+ StackOperations<edge_type> ops;
+ CordRepBtree* leaf = ops.BuildStack(tree, depth);
+ const OpResult result =
+ leaf->AddEdge<edge_type>(ops.owned(depth), rep, length);
+ return ops.Unwind(tree, depth, length, result);
+}
+
+template <>
+CordRepBtree* CordRepBtree::NewLeaf<kBack>(absl::string_view data,
+ size_t extra) {
+ CordRepBtree* leaf = CordRepBtree::New(0);
+ size_t length = 0;
+ size_t end = 0;
+ const size_t cap = leaf->capacity();
+ while (!data.empty() && end != cap) {
+ auto* flat = CordRepFlat::New(data.length() + extra);
+ flat->length = (std::min)(data.length(), flat->Capacity());
+ length += flat->length;
+ leaf->edges_[end++] = flat;
+ data = Consume<kBack>(flat->Data(), data, flat->length);
+ }
+ leaf->length = length;
+ leaf->set_end(end);
+ return leaf;
+}
+
+template <>
+CordRepBtree* CordRepBtree::NewLeaf<kFront>(absl::string_view data,
+ size_t extra) {
+ CordRepBtree* leaf = CordRepBtree::New(0);
+ size_t length = 0;
+ size_t begin = leaf->capacity();
+ leaf->set_end(leaf->capacity());
+ while (!data.empty() && begin != 0) {
+ auto* flat = CordRepFlat::New(data.length() + extra);
+ flat->length = (std::min)(data.length(), flat->Capacity());
+ length += flat->length;
+ leaf->edges_[--begin] = flat;
+ data = Consume<kFront>(flat->Data(), data, flat->length);
+ }
+ leaf->length = length;
+ leaf->set_begin(begin);
+ return leaf;
+}
+
+template <>
+absl::string_view CordRepBtree::AddData<kBack>(absl::string_view data,
+ size_t extra) {
+ assert(!data.empty());
+ assert(size() < capacity());
+ AlignBegin();
+ const size_t cap = capacity();
+ do {
+ CordRepFlat* flat = CordRepFlat::New(data.length() + extra);
+ const size_t n = (std::min)(data.length(), flat->Capacity());
+ flat->length = n;
+ edges_[fetch_add_end(1)] = flat;
+ data = Consume<kBack>(flat->Data(), data, n);
+ } while (!data.empty() && end() != cap);
+ return data;
+}
+
+template <>
+absl::string_view CordRepBtree::AddData<kFront>(absl::string_view data,
+ size_t extra) {
+ assert(!data.empty());
+ assert(size() < capacity());
+ AlignEnd();
+ do {
+ CordRepFlat* flat = CordRepFlat::New(data.length() + extra);
+ const size_t n = (std::min)(data.length(), flat->Capacity());
+ flat->length = n;
+ edges_[sub_fetch_begin(1)] = flat;
+ data = Consume<kFront>(flat->Data(), data, n);
+ } while (!data.empty() && begin() != 0);
+ return data;
+}
+
+template <EdgeType edge_type>
+CordRepBtree* CordRepBtree::AddData(CordRepBtree* tree, absl::string_view data,
+ size_t extra) {
+ if (ABSL_PREDICT_FALSE(data.empty())) return tree;
+
+ const size_t original_data_size = data.size();
+ int depth = tree->height();
+ StackOperations<edge_type> ops;
+ CordRepBtree* leaf = ops.BuildStack(tree, depth);
+
+ // If there is capacity in the last edge, append as much data
+ // as possible into this last edge.
+ if (leaf->size() < leaf->capacity()) {
+ OpResult result = leaf->ToOpResult(ops.owned(depth));
+ data = result.tree->AddData<edge_type>(data, extra);
+ if (data.empty()) {
+ result.tree->length += original_data_size;
+ return ops.Unwind(tree, depth, original_data_size, result);
+ }
+
+ // We added some data into this leaf, but not all. Propagate the added
+ // length to the top most node, and rebuild the stack with any newly copied
+ // or updated nodes. From this point on, the path (leg) from the top most
+ // node to the right-most node towards the leaf node is privately owned.
+ size_t delta = original_data_size - data.size();
+ assert(delta > 0);
+ result.tree->length += delta;
+ tree = ops.Propagate(tree, depth, delta, result);
+ ops.share_depth = depth + 1;
+ }
+
+ // We were unable to append all data into the existing right-most leaf node.
+ // This means all remaining data must be put into (a) new leaf node(s) which
+ // we append to the tree. To make this efficient, we iteratively build full
+ // leaf nodes from `data` until the created leaf contains all remaining data.
+ // We utilize the `Unwind` method to merge the created leaf into the first
+ // level towards root that has capacity. On each iteration with remaining
+ // data, we rebuild the stack in the knowledge that right-most nodes are
+ // privately owned after the first `Unwind` completes.
+ for (;;) {
+ OpResult result = {CordRepBtree::NewLeaf<edge_type>(data, extra), kPopped};
+ if (result.tree->length == data.size()) {
+ return ops.Unwind(tree, depth, result.tree->length, result);
+ }
+ data = Consume<edge_type>(data, result.tree->length);
+ tree = ops.Unwind(tree, depth, result.tree->length, result);
+ depth = tree->height();
+ ops.BuildOwnedStack(tree, depth);
+ }
+}
+
+template <EdgeType edge_type>
+CordRepBtree* CordRepBtree::Merge(CordRepBtree* dst, CordRepBtree* src) {
+ assert(dst->height() >= src->height());
+
+ // Capture source length as we may consume / destroy `src`.
+ const size_t length = src->length;
+
+ // We attempt to merge `src` at its corresponding height in `dst`.
+ const int depth = dst->height() - src->height();
+ StackOperations<edge_type> ops;
+ CordRepBtree* merge_node = ops.BuildStack(dst, depth);
+
+ // If there is enough space in `merge_node` for all edges from `src`, add all
+ // edges to this node, making a fresh copy as needed if not privately owned.
+ // If `merge_node` does not have capacity for `src`, we rely on `Unwind` and
+ // `Finalize` to merge `src` into the first level towards `root` where there
+ // is capacity for another edge, or create a new top level node.
+ OpResult result;
+ if (merge_node->size() + src->size() <= kMaxCapacity) {
+ result = merge_node->ToOpResult(ops.owned(depth));
+ result.tree->Add<edge_type>(src->Edges());
+ result.tree->length += src->length;
+ if (src->refcount.IsOne()) {
+ Delete(src);
+ } else {
+ for (CordRep* edge : src->Edges()) CordRep::Ref(edge);
+ CordRepBtree::Unref(src);
+ }
+ } else {
+ result = {src, kPopped};
+ }
+
+ // Unless we merged at the top level (i.e.: src and dst are equal height),
+ // unwind the result towards the top level, and finalize the result.
+ if (depth) {
+ return ops.Unwind(dst, depth, length, result);
+ }
+ return ops.Finalize(dst, result);
+}
+
+CopyResult CordRepBtree::CopySuffix(size_t offset) {
+ assert(offset < this->length);
+
+ // As long as `offset` starts inside the last edge, we can 'drop' the current
+ // depth. For the most extreme example: if offset references the last data
+ // edge in the tree, there is only a single edge / path from the top of the
+ // tree to that last edge, so we can drop all the nodes except that edge.
+ // The fast path check for this is `back->length >= length - offset`.
+ int height = this->height();
+ CordRepBtree* node = this;
+ size_t len = node->length - offset;
+ CordRep* back = node->Edge(kBack);
+ while (back->length >= len) {
+ offset = back->length - len;
+ if (--height < 0) {
+ return {MakeSubstring(CordRep::Ref(back), offset), height};
+ }
+ node = back->btree();
+ back = node->Edge(kBack);
+ }
+ if (offset == 0) return {CordRep::Ref(node), height};
+
+ // Offset does not point into the last edge, so we span at least two edges.
+ // Find the index of offset with `IndexBeyond` which provides us the edge
+ // 'beyond' the offset if offset is not a clean starting point of an edge.
+ Position pos = node->IndexBeyond(offset);
+ CordRepBtree* sub = node->CopyToEndFrom(pos.index, len);
+ const CopyResult result = {sub, height};
+
+ // `pos.n` contains a non zero value if the offset is not an exact starting
+ // point of an edge. In this case, `pos.n` contains the 'trailing' amount of
+ // bytes of the edge preceding that in `pos.index`. We need to iteratively
+ // adjust the preceding edge with the 'broken' offset until we have a perfect
+ // start of the edge.
+ while (pos.n != 0) {
+ assert(pos.index >= 1);
+ const size_t begin = pos.index - 1;
+ sub->set_begin(begin);
+ CordRep* const edge = node->Edge(begin);
+
+ len = pos.n;
+ offset = edge->length - len;
+
+ if (--height < 0) {
+ sub->edges_[begin] = MakeSubstring(CordRep::Ref(edge), offset, len);
+ return result;
+ }
+
+ node = edge->btree();
+ pos = node->IndexBeyond(offset);
+
+ CordRepBtree* nsub = node->CopyToEndFrom(pos.index, len);
+ sub->edges_[begin] = nsub;
+ sub = nsub;
+ }
+ sub->set_begin(pos.index);
+ return result;
+}
+
+CopyResult CordRepBtree::CopyPrefix(size_t n) {
+ assert(n > 0);
+ assert(n <= this->length);
+
+ // As long as `n` does not exceed the length of the first edge, we can 'drop'
+ // the current depth. For the most extreme example: if we'd copy a 1 byte
+ // prefix from a tree, there is only a single edge / path from the top of the
+ // tree to the single data edge containing this byte, so we can drop all the
+ // nodes except the data node.
+ int height = this->height();
+ CordRepBtree* node = this;
+ CordRep* front = node->Edge(kFront);
+ while (front->length >= n) {
+ if (--height < 0) return {MakeSubstring(CordRep::Ref(front), 0, n), -1};
+ node = front->btree();
+ front = node->Edge(kFront);
+ }
+ if (node->length == n) return {CordRep::Ref(node), height};
+
+ // `n` spans at least two nodes, find the end point of the span.
+ Position pos = node->IndexOf(n);
+
+ // Create a partial copy of the node up to `pos.index`, with a defined length
+ // of `n`. Any 'partial last edge' is added further below as needed.
+ CordRepBtree* sub = node->CopyBeginTo(pos.index, n);
+ const CopyResult result = {sub, height};
+
+ // `pos.n` contains the 'offset inside the edge for IndexOf(n)'. As long as
+ // this is not zero, we don't have a 'clean cut', so we need to make a
+ // (partial) copy of that last edge, and repeat this until pos.n is zero.
+ while (pos.n != 0) {
+ size_t end = pos.index;
+ n = pos.n;
+
+ CordRep* edge = node->Edge(pos.index);
+ if (--height < 0) {
+ sub->edges_[end++] = MakeSubstring(CordRep::Ref(edge), 0, n);
+ sub->set_end(end);
+ AssertValid(result.edge->btree());
+ return result;
+ }
+
+ node = edge->btree();
+ pos = node->IndexOf(n);
+ CordRepBtree* nsub = node->CopyBeginTo(pos.index, n);
+ sub->edges_[end++] = nsub;
+ sub->set_end(end);
+ sub = nsub;
+ }
+ sub->set_end(pos.index);
+ AssertValid(result.edge->btree());
+ return result;
+}
+
+CordRep* CordRepBtree::SubTree(size_t offset, size_t n) {
+ assert(n <= this->length);
+ assert(offset <= this->length - n);
+ if (ABSL_PREDICT_FALSE(n == 0)) return nullptr;
+
+ CordRepBtree* node = this;
+ int height = node->height();
+ Position front = node->IndexOf(offset);
+ CordRep* left = node->edges_[front.index];
+ while (front.n + n <= left->length) {
+ if (--height < 0) return MakeSubstring(CordRep::Ref(left), front.n, n);
+ node = left->btree();
+ front = node->IndexOf(front.n);
+ left = node->edges_[front.index];
+ }
+
+ const Position back = node->IndexBefore(front, n);
+ CordRep* const right = node->edges_[back.index];
+ assert(back.index > front.index);
+
+ // Get partial suffix and prefix entries.
+ CopyResult prefix;
+ CopyResult suffix;
+ if (height > 0) {
+ // Copy prefix and suffix of the boundary nodes.
+ prefix = left->btree()->CopySuffix(front.n);
+ suffix = right->btree()->CopyPrefix(back.n);
+
+ // If there is an edge between the prefix and suffix edges, then the tree
+ // must remain at its previous (full) height. If we have no edges between
+ // prefix and suffix edges, then the tree must be as high as either the
+ // suffix or prefix edges (which are collapsed to their minimum heights).
+ if (front.index + 1 == back.index) {
+ height = (std::max)(prefix.height, suffix.height) + 1;
+ }
+
+ // Raise prefix and suffixes to the new tree height.
+ for (int h = prefix.height + 1; h < height; ++h) {
+ prefix.edge = CordRepBtree::New(prefix.edge);
+ }
+ for (int h = suffix.height + 1; h < height; ++h) {
+ suffix.edge = CordRepBtree::New(suffix.edge);
+ }
+ } else {
+ // Leaf node, simply take substrings for prefix and suffix.
+ prefix = CopyResult{MakeSubstring(CordRep::Ref(left), front.n), -1};
+ suffix = CopyResult{MakeSubstring(CordRep::Ref(right), 0, back.n), -1};
+ }
+
+ // Compose resulting tree.
+ CordRepBtree* sub = CordRepBtree::New(height);
+ size_t end = 0;
+ sub->edges_[end++] = prefix.edge;
+ for (CordRep* r : node->Edges(front.index + 1, back.index)) {
+ sub->edges_[end++] = CordRep::Ref(r);
+ }
+ sub->edges_[end++] = suffix.edge;
+ sub->set_end(end);
+ sub->length = n;
+ return AssertValid(sub);
+}
+
+CordRepBtree* CordRepBtree::MergeTrees(CordRepBtree* left,
+ CordRepBtree* right) {
+ return left->height() >= right->height() ? Merge<kBack>(left, right)
+ : Merge<kFront>(right, left);
+}
+
+bool CordRepBtree::IsFlat(absl::string_view* fragment) const {
+ if (height() == 0 && size() == 1) {
+ if (fragment) *fragment = Data(begin());
+ return true;
+ }
+ return false;
+}
+
+bool CordRepBtree::IsFlat(size_t offset, const size_t n,
+ absl::string_view* fragment) const {
+ assert(n <= this->length);
+ assert(offset <= this->length - n);
+ if (ABSL_PREDICT_FALSE(n == 0)) return false;
+ int height = this->height();
+ const CordRepBtree* node = this;
+ for (;;) {
+ const Position front = node->IndexOf(offset);
+ const CordRep* edge = node->Edge(front.index);
+ if (edge->length < front.n + n) return false;
+ if (--height < 0) {
+ if (fragment) *fragment = EdgeData(edge).substr(front.n, n);
+ return true;
+ }
+ offset = front.n;
+ node = node->Edge(front.index)->btree();
+ }
+}
+
+char CordRepBtree::GetCharacter(size_t offset) const {
+ assert(offset < length);
+ const CordRepBtree* node = this;
+ int height = node->height();
+ for (;;) {
+ Position front = node->IndexOf(offset);
+ if (--height < 0) return node->Data(front.index)[front.n];
+ offset = front.n;
+ node = node->Edge(front.index)->btree();
+ }
+}
+
+Span<char> CordRepBtree::GetAppendBufferSlow(size_t size) {
+ // The inlined version in `GetAppendBuffer()` deals with all heights <= 3.
+ assert(height() >= 4);
+ assert(refcount.IsOne());
+
+ // Build a stack of nodes we may potentially need to update if we find a
+ // non-shared FLAT with capacity at the leaf level.
+ const int depth = height();
+ CordRepBtree* node = this;
+ CordRepBtree* stack[kMaxDepth];
+ for (int i = 0; i < depth; ++i) {
+ node = node->Edge(kBack)->btree();
+ if (!node->refcount.IsOne()) return {};
+ stack[i] = node;
+ }
+
+ // Must be a privately owned flat.
+ CordRep* const edge = node->Edge(kBack);
+ if (!edge->refcount.IsOne() || edge->tag < FLAT) return {};
+
+ // Must have capacity.
+ const size_t avail = edge->flat()->Capacity() - edge->length;
+ if (avail == 0) return {};
+
+ // Build span on remaining capacity.
+ size_t delta = (std::min)(size, avail);
+ Span<char> span = {edge->flat()->Data() + edge->length, delta};
+ edge->length += delta;
+ this->length += delta;
+ for (int i = 0; i < depth; ++i) {
+ stack[i]->length += delta;
+ }
+ return span;
+}
+
+CordRepBtree* CordRepBtree::CreateSlow(CordRep* rep) {
+ if (rep->tag == BTREE) return rep->btree();
+
+ CordRepBtree* node = nullptr;
+ auto consume = [&node](CordRep* r, size_t offset, size_t length) {
+ r = MakeSubstring(r, offset, length);
+ if (node == nullptr) {
+ node = New(r);
+ } else {
+ node = CordRepBtree::AddCordRep<kBack>(node, r);
+ }
+ };
+ Consume(rep, consume);
+ return node;
+}
+
+CordRepBtree* CordRepBtree::AppendSlow(CordRepBtree* tree, CordRep* rep) {
+ if (ABSL_PREDICT_TRUE(rep->tag == BTREE)) {
+ return MergeTrees(tree, rep->btree());
+ }
+ auto consume = [&tree](CordRep* r, size_t offset, size_t length) {
+ r = MakeSubstring(r, offset, length);
+ tree = CordRepBtree::AddCordRep<kBack>(tree, r);
+ };
+ Consume(rep, consume);
+ return tree;
+}
+
+CordRepBtree* CordRepBtree::PrependSlow(CordRepBtree* tree, CordRep* rep) {
+ if (ABSL_PREDICT_TRUE(rep->tag == BTREE)) {
+ return MergeTrees(rep->btree(), tree);
+ }
+ auto consume = [&tree](CordRep* r, size_t offset, size_t length) {
+ r = MakeSubstring(r, offset, length);
+ tree = CordRepBtree::AddCordRep<kFront>(tree, r);
+ };
+ ReverseConsume(rep, consume);
+ return tree;
+}
+
+CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, absl::string_view data,
+ size_t extra) {
+ return CordRepBtree::AddData<kBack>(tree, data, extra);
+}
+
+CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, absl::string_view data,
+ size_t extra) {
+ return CordRepBtree::AddData<kFront>(tree, data, extra);
+}
+
+template CordRepBtree* CordRepBtree::AddCordRep<kFront>(CordRepBtree* tree,
+ CordRep* rep);
+template CordRepBtree* CordRepBtree::AddCordRep<kBack>(CordRepBtree* tree,
+ CordRep* rep);
+template CordRepBtree* CordRepBtree::AddData<kFront>(CordRepBtree* tree,
+ absl::string_view data,
+ size_t extra);
+template CordRepBtree* CordRepBtree::AddData<kBack>(CordRepBtree* tree,
+ absl::string_view data,
+ size_t extra);
+
+} // namespace cord_internal
+ABSL_NAMESPACE_END
+} // namespace absl
diff --git a/absl/strings/internal/cord_rep_btree.h b/absl/strings/internal/cord_rep_btree.h
new file mode 100644
index 00000000..f4118fc0
--- /dev/null
+++ b/absl/strings/internal/cord_rep_btree.h
@@ -0,0 +1,851 @@
+// Copyright 2021 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
+#define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
+
+#include <cassert>
+#include <cstdint>
+#include <iosfwd>
+
+#include "absl/base/config.h"
+#include "absl/base/internal/raw_logging.h"
+#include "absl/base/optimization.h"
+#include "absl/strings/internal/cord_internal.h"
+#include "absl/strings/internal/cord_rep_btree.h"
+#include "absl/strings/internal/cord_rep_flat.h"
+#include "absl/strings/string_view.h"
+#include "absl/types/span.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace cord_internal {
+
+class CordRepBtreeNavigator;
+
+// CordRepBtree is as the name implies a btree implementation of a Cordrep tree.
+// Data is stored at the leaf level only, non leaf nodes contain down pointers
+// only. Allowed types of data edges are FLAT, EXTERNAL and SUBSTRINGs of FLAT
+// or EXTERNAL nodes. The implementation allows for data to be added to either
+// end of the tree only, it does not provide any 'insert' logic. This has the
+// benefit that we can expect good fill ratios: all nodes except the outer
+// 'legs' will have 100% fill ratios for trees built using Append/Prepend
+// methods. Merged trees will typically have a fill ratio well above 50% as in a
+// similar fashion, one side of the merged tree will typically have a 100% fill
+// ratio, and the 'open' end will average 50%. All operations are O(log(n)) or
+// better, and the tree never needs balancing.
+//
+// All methods accepting a CordRep* or CordRepBtree* adopt a reference on that
+// input unless explicitly stated otherwise. All functions returning a CordRep*
+// or CordRepBtree* instance transfer a reference back to the caller.
+// Simplified, callers both 'donate' and 'consume' a reference count on each
+// call, simplifying the API. An example of building a tree:
+//
+// CordRepBtree* tree = CordRepBtree::Create(MakeFlat("Hello"));
+// tree = CordRepBtree::Append(tree, MakeFlat("world"));
+//
+// In the above example, all inputs are consumed, making each call affecting
+// `tree` reference count neutral. The returned `tree` value can be different
+// from the input if the input is shared with other threads, or if the tree
+// grows in height, but callers typically never have to concern themselves with
+// that and trust that all methods DTRT at all times.
+class CordRepBtree : public CordRep {
+ public:
+ // EdgeType identifies `front` and `back` enum values.
+ // Various implementations in CordRepBtree such as `Add` and `Edge` are
+ // generic and templated on operating on either of the boundary edges.
+ // For more information on the possible edges contained in a CordRepBtree
+ // instance see the documentation for `edges_`.
+ enum class EdgeType { kFront, kBack };
+
+ // Convenience constants into `EdgeType`
+ static constexpr EdgeType kFront = EdgeType::kFront;
+ static constexpr EdgeType kBack = EdgeType::kBack;
+
+ // Maximum number of edges: based on experiments and performance data, we can
+ // pick suitable values resulting in optimum cacheline aligned values. The
+ // preferred values are based on 64-bit systems where we aim to align this
+ // class onto 64 bytes, i.e.: 6 = 64 bytes, 14 = 128 bytes, etc.
+ // TODO(b/192061034): experiment with alternative sizes.
+ static constexpr size_t kMaxCapacity = 6;
+
+ // Reasonable maximum height of the btree. We can expect a fill ratio of at
+ // least 50%: trees are always expanded at the front or back. Concatenating
+ // trees will then typically fold at the top most node, where the lower nodes
+ // are at least at capacity on one side of joined inputs. At a lower fill
+ // rate of 4 edges per node, we have capacity for ~16 million leaf nodes.
+ // We will fail / abort if an application ever exceeds this height, which
+ // should be extremely rare (near impossible) and be an indication of an
+ // application error: we do not assume it reasonable for any application to
+ // operate correctly with such monster trees.
+ // Another compelling reason for the number `12` is that any contextual stack
+ // required for navigation or insertion requires 12 words and 12 bytes, which
+ // fits inside 2 cache lines with some room to spare, and is reasonable as a
+ // local stack variable compared to Cord's current near 400 bytes stack use.
+ // The maximum `height` value of a node is then `kMaxDepth - 1` as node height
+ // values start with a value of 0 for leaf nodes.
+ static constexpr int kMaxDepth = 12;
+ static constexpr int kMaxHeight = kMaxDepth - 1;
+
+ // `Action` defines the action for unwinding changes done at the btree's leaf
+ // level that need to be propagated up to the parent node(s). Each operation
+ // on a node has an effect / action defined as follows:
+ // - kSelf
+ // The operation (add / update, etc) was performed directly on the node as
+ // the node is private to the current thread (i.e.: not shared directly or
+ // indirectly through a refcount > 1). Changes can be propagated directly to
+ // all parent nodes as all parent nodes are also then private to the current
+ // thread.
+ // - kCopied
+ // The operation (add / update, etc) was performed on a copy of the original
+ // node, as the node is (potentially) directly or indirectly shared with
+ // other threads. Changes need to be propagated into the parent nodes where
+ // the old down pointer must be unreffed and replaced with this new copy.
+ // Such changes to parent nodes may themselves require a copy if the parent
+ // node is also shared. A kCopied action can propagate all the way to the
+ // top node where we then must unref the `tree` input provided by the
+ // caller, and return the new copy.
+ // - kPopped
+ // The operation (typically add) could not be satisfied due to insufficient
+ // capacity in the targeted node, and a new 'leg' was created that needs to
+ // be added into the parent node. For example, adding a FLAT inside a leaf
+ // node that is at capacity will create a new leaf node containing that
+ // FLAT, that needs to be 'popped' up the btree. Such 'pop' actions can
+ // cascade up the tree if parent nodes are also at capacity. A 'Popped'
+ // action propagating all the way to the top of the tree will result in
+ // the tree becoming one level higher than the current tree through a final
+ // `CordRepBtree::New(tree, popped)` call, resulting in a new top node
+ // referencing the old tree and the new (fully popped upwards) 'leg'.
+ enum Action { kSelf, kCopied, kPopped };
+
+ // Result of an operation on a node. See the `Action` enum for details.
+ struct OpResult {
+ CordRepBtree* tree;
+ Action action;
+ };
+
+ // Return value of the CopyPrefix and CopySuffix methods which can
+ // return a node or data edge at any height inside the tree.
+ // A height of 0 defines the lowest (leaf) node, a height of -1 identifies
+ // `edge` as being a plain data node: EXTERNAL / FLAT or SUBSTRING thereof.
+ struct CopyResult {
+ CordRep* edge;
+ int height;
+ };
+
+ // Logical position inside a node:
+ // - index: index of the edge.
+ // - n: size or offset value depending on context.
+ struct Position {
+ size_t index;
+ size_t n;
+ };
+
+ // Creates a btree from the given input. Adopts a ref of `rep`.
+ // If the input `rep` is itself a btree, i.e., `tag == BTREE`, then this
+ // function immediately returns `rep->btree()`. If the input is a valid data
+ // edge (see IsDataEdge()), then a new leaf node is returned containing `rep`
+ // as the sole data edge. Else, the input is assumed to be a (legacy) concat
+ // tree, and the input is consumed and transformed into a btree().
+ static CordRepBtree* Create(CordRep* rep);
+
+ // Destroys the provided tree. Should only be called by cord internal API's,
+ // typically after a ref_count.Decrement() on the last reference count.
+ static void Destroy(CordRepBtree* tree);
+
+ // Appends / Prepends an existing CordRep instance to this tree.
+ // The below methods accept three types of input:
+ // 1) `rep` is a data node (See `IsDataNode` for valid data edges).
+ // `rep` is appended or prepended to this tree 'as is'.
+ // 2) `rep` is a BTREE.
+ // `rep` is merged into `tree` respecting the Append/Prepend order.
+ // 3) `rep` is some other (legacy) type.
+ // `rep` is converted in place and added to `tree`
+ // Requires `tree` and `rep` to be not null.
+ static CordRepBtree* Append(CordRepBtree* tree, CordRep* rep);
+ static CordRepBtree* Prepend(CordRepBtree* tree, CordRep* rep);
+
+ // Append/Prepend the data in `data` to this tree.
+ // The `extra` parameter defines how much extra capacity should be allocated
+ // for any additional FLAT being allocated. This is an optimization hint from
+ // the caller. For example, a caller may need to add 2 string_views of data
+ // "abc" and "defghi" which are not consecutive. The caller can in this case
+ // invoke `AddData(tree, "abc", 6)`, and any newly added flat is allocated
+ // where possible with at least 6 bytes of extra capacity beyond `length`.
+ // This helps avoiding data getting fragmented over multiple flats.
+ // There is no limit on the size of `data`. If `data` can not be stored inside
+ // a single flat, then the function will iteratively add flats until all data
+ // has been consumed and appended or prepended to the tree.
+ static CordRepBtree* Append(CordRepBtree* tree, string_view data,
+ size_t extra = 0);
+ static CordRepBtree* Prepend(CordRepBtree* tree, string_view data,
+ size_t extra = 0);
+
+ // Returns a new tree, containing `n` bytes of data from this instance
+ // starting at offset `offset`. Where possible, the returned tree shares
+ // (re-uses) data edges and nodes with this instance to minimize the
+ // combined memory footprint of both trees.
+ // Requires `offset + n <= length`. Returns `nullptr` if `n` is zero.
+ CordRep* SubTree(size_t offset, size_t n);
+
+ // Returns the character at the given offset.
+ char GetCharacter(size_t offset) const;
+
+ // Returns true if this node holds a single data edge, and if so, sets
+ // `fragment` to reference the contained data. `fragment` is an optional
+ // output parameter and allowed to be null.
+ bool IsFlat(absl::string_view* fragment) const;
+
+ // Returns true if the data of `n` bytes starting at offset `offset`
+ // is contained in a single data edge, and if so, sets fragment to reference
+ // the contained data. `fragment` is an optional output parameter and allowed
+ // to be null.
+ bool IsFlat(size_t offset, size_t n, absl::string_view* fragment) const;
+
+ // Returns a span (mutable range of bytes) of up to `size` bytes into the
+ // last FLAT data edge inside this tree under the following conditions:
+ // - none of the nodes down into the FLAT node are shared.
+ // - the last data edge in this tree is a non-shared FLAT.
+ // - the referenced FLAT has additional capacity available.
+ // If all these conditions are met, a non-empty span is returned, and the
+ // length of the flat node and involved tree nodes have been increased by
+ // `span.length()`. The caller is responsible for immediately assigning values
+ // to all uninitialized data reference by the returned span.
+ // Requires `this->refcount.IsOne()`: this function forces the caller to do
+ // this fast path check on the top level node, as this is the most commonly
+ // shared node of a cord tree.
+ Span<char> GetAppendBuffer(size_t size);
+
+ // Returns the `height` of the tree. The height of a tree is limited to
+ // kMaxHeight. `height` is implemented as an `int` as in some places we
+ // use negative (-1) values for 'data edges'.
+ int height() const { return static_cast<int>(storage[0]); }
+
+ // Properties: begin, back, end, front/back boundary indexes.
+ size_t begin() const { return static_cast<size_t>(storage[1]); }
+ size_t back() const { return static_cast<size_t>(storage[2]) - 1; }
+ size_t end() const { return static_cast<size_t>(storage[2]); }
+ size_t index(EdgeType edge) const {
+ return edge == kFront ? begin() : back();
+ }
+
+ // Properties: size and capacity.
+ // `capacity` contains the current capacity of this instance, where
+ // `kMaxCapacity` contains the maximum capacity of a btree node.
+ // For now, `capacity` and `kMaxCapacity` return the same value, but this may
+ // change in the future if we see benefit in dynamically sizing 'small' nodes
+ // to 'large' nodes for large data trees.
+ size_t size() const { return end() - begin(); }
+ size_t capacity() const { return kMaxCapacity; }
+
+ // Edge access
+ inline CordRep* Edge(size_t index) const;
+ inline CordRep* Edge(EdgeType edge_type) const;
+ inline absl::Span<CordRep* const> Edges() const;
+ inline absl::Span<CordRep* const> Edges(size_t begin, size_t end) const;
+
+ // Returns reference to the data edge at `index`.
+ // Requires this instance to be a leaf node, and `index` to be valid index.
+ inline absl::string_view Data(size_t index) const;
+
+ static const char* EdgeDataPtr(const CordRep* r);
+ static absl::string_view EdgeData(const CordRep* r);
+
+ // Returns true if the provided rep is a FLAT, EXTERNAL or a SUBSTRING node
+ // holding a FLAT or EXTERNAL child rep.
+ static bool IsDataEdge(const CordRep* rep);
+
+ // Diagnostics
+ static bool IsValid(const CordRepBtree* tree);
+ static CordRepBtree* AssertValid(CordRepBtree* tree);
+ static const CordRepBtree* AssertValid(const CordRepBtree* tree);
+ static void Dump(const CordRep* rep, std::ostream& stream);
+ static void Dump(const CordRep* rep, absl::string_view label,
+ std::ostream& stream);
+ static void Dump(const CordRep* rep, absl::string_view label,
+ bool include_contents, std::ostream& stream);
+
+ // Adds the edge `edge` to this node if possible. `owned` indicates if the
+ // current node is potentially shared or not with other threads. Returns:
+ // - {kSelf, <this>}
+ // The edge was directly added to this node.
+ // - {kCopied, <node>}
+ // The edge was added to a copy of this node.
+ // - {kPopped, New(edge, height())}
+ // A new leg with the edge was created as this node has no extra capacity.
+ template <EdgeType edge_type>
+ inline OpResult AddEdge(bool owned, CordRep* edge, size_t delta);
+
+ // Replaces the front or back edge with the provided new edge. Returns:
+ // - {kSelf, <this>}
+ // The edge was directly set in this node. The old edge is unreffed.
+ // - {kCopied, <node>}
+ // A copy of this node was created with the new edge value.
+ // In both cases, the function adopts a reference on `edge`.
+ template <EdgeType edge_type>
+ OpResult SetEdge(bool owned, CordRep* edge, size_t delta);
+
+ // Creates a new empty node at the specified height.
+ static CordRepBtree* New(int height = 0);
+
+ // Creates a new node containing `rep`, with the height being computed
+ // automatically based on the type of `rep`.
+ static CordRepBtree* New(CordRep* rep);
+
+ // Creates a new node containing both `front` and `back` at height
+ // `front.height() + 1`. Requires `back.height() == front.height()`.
+ static CordRepBtree* New(CordRepBtree* front, CordRepBtree* back);
+
+ private:
+ CordRepBtree() = default;
+ ~CordRepBtree() = default;
+
+ // Initializes the main properties `tag`, `begin`, `end`, `height`.
+ inline void InitInstance(int height, size_t begin = 0, size_t end = 0);
+
+ // Direct property access begin / end
+ void set_begin(size_t begin) { storage[1] = static_cast<uint8_t>(begin); }
+ void set_end(size_t end) { storage[2] = static_cast<uint8_t>(end); }
+
+ // Decreases the value of `begin` by `n`, and returns the new value. Notice
+ // how this returns the new value unlike atomic::fetch_add which returns the
+ // old value. This is because this is used to prepend edges at 'begin - 1'.
+ size_t sub_fetch_begin(size_t n) {
+ storage[1] -= static_cast<uint8_t>(n);
+ return storage[1];
+ }
+
+ // Increases the value of `end` by `n`, and returns the previous value. This
+ // function is typically used to append edges at 'end'.
+ size_t fetch_add_end(size_t n) {
+ const uint8_t current = storage[2];
+ storage[2] = static_cast<uint8_t>(current + n);
+ return current;
+ }
+
+ // Returns the index of the last edge starting on, or before `offset`, with
+ // `n` containing the relative offset of `offset` inside that edge.
+ // Requires `offset` < length.
+ Position IndexOf(size_t offset) const;
+
+ // Returns the index of the last edge starting before `offset`, with `n`
+ // containing the relative offset of `offset` inside that edge.
+ // This function is useful to find the edges for some span of bytes ending at
+ // `offset` (i.e., `n` bytes). For example:
+ //
+ // Position pos = IndexBefore(n)
+ // edges = Edges(begin(), pos.index) // All full edges (may be empty)
+ // last = Sub(Edge(pos.index), 0, pos.n) // Last partial edge (may be empty)
+ //
+ // Requires 0 < `offset` <= length.
+ Position IndexBefore(size_t offset) const;
+
+ // Identical to the above function except starting from the position `front`.
+ // This function is equivalent to `IndexBefore(front.n + offset)`, with
+ // the difference that this function is optimized to start at `front.index`.
+ Position IndexBefore(Position front, size_t offset) const;
+
+ // Returns the index of the edge directly beyond the edge containing offset
+ // `offset`, with `n` containing the distance of that edge from `offset`.
+ // This function is useful for iteratively finding suffix nodes and remaining
+ // partial bytes in left-most suffix nodes as for example in CopySuffix.
+ // Requires `offset` < length.
+ Position IndexBeyond(size_t offset) const;
+
+ // Destruction
+ static void DestroyLeaf(CordRepBtree* tree, size_t begin, size_t end);
+ static void DestroyNonLeaf(CordRepBtree* tree, size_t begin, size_t end);
+ static void DestroyTree(CordRepBtree* tree, size_t begin, size_t end);
+ static void Delete(CordRepBtree* tree) { delete tree; }
+
+ // Creates a new leaf node containing as much data as possible from `data`.
+ // The data is added either forwards or reversed depending on `edge_type`.
+ // Callers must check the length of the returned node to determine if all data
+ // was copied or not.
+ // See the `Append/Prepend` function for the meaning and purpose of `extra`.
+ template <EdgeType edge_type>
+ static CordRepBtree* NewLeaf(absl::string_view data, size_t extra);
+
+ // Creates a raw copy of this Btree node, copying all properties, but
+ // without adding any references to existing edges.
+ CordRepBtree* CopyRaw() const;
+
+ // Creates a full copy of this Btree node, adding a reference on all edges.
+ CordRepBtree* Copy() const;
+
+ // Creates a partial copy of this Btree node, copying all edges up to `end`,
+ // adding a reference on each copied edge, and sets the length of the newly
+ // created copy to `new_length`.
+ CordRepBtree* CopyBeginTo(size_t end, size_t new_length) const;
+
+ // Creates a partial copy of this Btree node, copying all edges starting at
+ // `begin`, adding a reference on each copied edge, and sets the length of
+ // the newly created copy to `new_length`.
+ CordRepBtree* CopyToEndFrom(size_t begin, size_t new_length) const;
+
+ // Returns a tree containing the result of appending `right` to `left`.
+ static CordRepBtree* MergeTrees(CordRepBtree* left, CordRepBtree* right);
+
+ // Fallback functions for `Create()`, `Append()` and `Prepend()` which
+ // deal with legacy / non conforming input, i.e.: CONCAT trees.
+ static CordRepBtree* CreateSlow(CordRep* rep);
+ static CordRepBtree* AppendSlow(CordRepBtree*, CordRep* rep);
+ static CordRepBtree* PrependSlow(CordRepBtree*, CordRep* rep);
+
+ // Aligns existing edges to start at index 0, to allow for a new edge to be
+ // added to the back of the current edges.
+ inline void AlignBegin();
+
+ // Aligns existing edges to end at `capacity`, to allow for a new edge to be
+ // added in front of the current edges.
+ inline void AlignEnd();
+
+ // Adds the provided edge to this node.
+ // Requires this node to have capacity for the edge. Realigns / moves
+ // existing edges as needed to prepend or append the new edge.
+ template <EdgeType edge_type>
+ inline void Add(CordRep* rep);
+
+ // Adds the provided edges to this node.
+ // Requires this node to have capacity for the edges. Realigns / moves
+ // existing edges as needed to prepend or append the new edges.
+ template <EdgeType edge_type>
+ inline void Add(absl::Span<CordRep* const>);
+
+ // Adds data from `data` to this node until either all data has been consumed,
+ // or there is no more capacity for additional flat nodes inside this node.
+ // Requires the current node to be a leaf node, data to be non empty, and the
+ // current node to have capacity for at least one more data edge.
+ // Returns any remaining data from `data` that was not added, which is
+ // depending on the edge type (front / back) either the remaining prefix of
+ // suffix of the input.
+ // See the `Append/Prepend` function for the meaning and purpose of `extra`.
+ template <EdgeType edge_type>
+ absl::string_view AddData(absl::string_view data, size_t extra);
+
+ // Replace the front or back edge with the provided value.
+ // Adopts a reference on `edge` and unrefs the old edge.
+ template <EdgeType edge_type>
+ inline void SetEdge(CordRep* edge);
+
+ // Returns a partial copy of the current tree containing the first `n` bytes
+ // of data. `CopyResult` contains both the resulting edge and its height. The
+ // resulting tree may be less high than the current tree, or even be a single
+ // matching data edge. For example, if `n == 1`, then the result will be the
+ // single data edge, and height will be set to -1 (one below the owning leaf
+ // node). If n == 0, this function returns null.
+ // Requires `n <= length`
+ CopyResult CopyPrefix(size_t n);
+
+ // Returns a partial copy of the current tree containing all data starting
+ // after `offset`. `CopyResult` contains both the resulting edge and its
+ // height. The resulting tree may be less high than the current tree, or even
+ // be a single matching data edge. For example, if `n == length - 1`, then the
+ // result will be a single data edge, and height will be set to -1 (one below
+ // the owning leaf node).
+ // Requires `offset < length`
+ CopyResult CopySuffix(size_t offset);
+
+ // Returns a OpResult value of {this, kSelf} or {Copy(), kCopied}
+ // depending on the value of `owned`.
+ inline OpResult ToOpResult(bool owned);
+
+ // Adds `rep` to the specified tree, returning the modified tree.
+ template <EdgeType edge_type>
+ static CordRepBtree* AddCordRep(CordRepBtree* tree, CordRep* rep);
+
+ // Adds `data` to the specified tree, returning the modified tree.
+ // See the `Append/Prepend` function for the meaning and purpose of `extra`.
+ template <EdgeType edge_type>
+ static CordRepBtree* AddData(CordRepBtree* tree, absl::string_view data,
+ size_t extra = 0);
+
+ // Merges `src` into `dst` with `src` being added either before (kFront) or
+ // after (kBack) `dst`. Requires the height of `dst` to be greater than or
+ // equal to the height of `src`.
+ template <EdgeType edge_type>
+ static CordRepBtree* Merge(CordRepBtree* dst, CordRepBtree* src);
+
+ // Fallback version of GetAppendBuffer for large trees: GetAppendBuffer()
+ // implements an inlined version for trees of limited height (3 levels),
+ // GetAppendBufferSlow implements the logic for large trees.
+ Span<char> GetAppendBufferSlow(size_t size);
+
+ // `edges_` contains all edges starting from this instance.
+ // These are explicitly `child` edges only, a cord btree (or any cord tree in
+ // that respect) does not store `parent` pointers anywhere: multiple trees /
+ // parents can reference the same shared child edge. The type of these edges
+ // depends on the height of the node. `Leaf nodes` (height == 0) contain `data
+ // edges` (external or flat nodes, or sub-strings thereof). All other nodes
+ // (height > 0) contain pointers to BTREE nodes with a height of `height - 1`.
+ CordRep* edges_[kMaxCapacity];
+
+ friend class CordRepBtreeTestPeer;
+ friend class CordRepBtreeNavigator;
+};
+
+inline CordRepBtree* CordRep::btree() {
+ assert(tag == BTREE);
+ return static_cast<CordRepBtree*>(this);
+}
+
+inline const CordRepBtree* CordRep::btree() const {
+ assert(tag == BTREE);
+ return static_cast<const CordRepBtree*>(this);
+}
+
+inline void CordRepBtree::InitInstance(int height, size_t begin, size_t end) {
+ tag = BTREE;
+ storage[0] = height;
+ storage[1] = begin;
+ storage[2] = end;
+}
+
+inline CordRep* CordRepBtree::Edge(size_t index) const {
+ assert(index >= begin());
+ assert(index < end());
+ return edges_[index];
+}
+
+inline CordRep* CordRepBtree::Edge(EdgeType edge_type) const {
+ return edges_[edge_type == kFront ? begin() : back()];
+}
+
+inline absl::Span<CordRep* const> CordRepBtree::Edges() const {
+ return {edges_ + begin(), size()};
+}
+
+inline absl::Span<CordRep* const> CordRepBtree::Edges(size_t begin,
+ size_t end) const {
+ assert(begin <= end);
+ assert(begin >= this->begin());
+ assert(end <= this->end());
+ return {edges_ + begin, static_cast<size_t>(end - begin)};
+}
+
+inline const char* CordRepBtree::EdgeDataPtr(const CordRep* r) {
+ assert(IsDataEdge(r));
+ size_t offset = 0;
+ if (r->tag == SUBSTRING) {
+ offset = r->substring()->start;
+ r = r->substring()->child;
+ }
+ return (r->tag >= FLAT ? r->flat()->Data() : r->external()->base) + offset;
+}
+
+inline absl::string_view CordRepBtree::EdgeData(const CordRep* r) {
+ return absl::string_view(EdgeDataPtr(r), r->length);
+}
+
+inline absl::string_view CordRepBtree::Data(size_t index) const {
+ assert(height() == 0);
+ return EdgeData(Edge(index));
+}
+
+inline bool CordRepBtree::IsDataEdge(const CordRep* rep) {
+ // The fast path is that `rep` is an EXTERNAL or FLAT node, making the below
+ // if a single, well predicted branch. We then repeat the FLAT or EXTERNAL
+ // check in the slow path the SUBSTRING check to optimize for the hot path.
+ if (rep->tag == EXTERNAL || rep->tag >= FLAT) return true;
+ if (rep->tag == SUBSTRING) rep = rep->substring()->child;
+ return rep->tag == EXTERNAL || rep->tag >= FLAT;
+}
+
+inline CordRepBtree* CordRepBtree::New(int height) {
+ CordRepBtree* tree = new CordRepBtree;
+ tree->length = 0;
+ tree->InitInstance(height);
+ return tree;
+}
+
+inline CordRepBtree* CordRepBtree::New(CordRep* rep) {
+ CordRepBtree* tree = new CordRepBtree;
+ int height = rep->tag == BTREE ? rep->btree()->height() + 1 : 0;
+ tree->length = rep->length;
+ tree->InitInstance(height, /*begin=*/0, /*end=*/1);
+ tree->edges_[0] = rep;
+ return tree;
+}
+
+inline CordRepBtree* CordRepBtree::New(CordRepBtree* front,
+ CordRepBtree* back) {
+ assert(front->height() == back->height());
+ CordRepBtree* tree = new CordRepBtree;
+ tree->length = front->length + back->length;
+ tree->InitInstance(front->height() + 1, /*begin=*/0, /*end=*/2);
+ tree->edges_[0] = front;
+ tree->edges_[1] = back;
+ return tree;
+}
+
+inline void CordRepBtree::DestroyTree(CordRepBtree* tree, size_t begin,
+ size_t end) {
+ if (tree->height() == 0) {
+ DestroyLeaf(tree, begin, end);
+ } else {
+ DestroyNonLeaf(tree, begin, end);
+ }
+}
+
+inline void CordRepBtree::Destroy(CordRepBtree* tree) {
+ DestroyTree(tree, tree->begin(), tree->end());
+}
+
+inline CordRepBtree* CordRepBtree::CopyRaw() const {
+ auto* tree = static_cast<CordRepBtree*>(::operator new(sizeof(CordRepBtree)));
+ memcpy(static_cast<void*>(tree), this, sizeof(CordRepBtree));
+ new (&tree->refcount) Refcount;
+ return tree;
+}
+
+inline CordRepBtree* CordRepBtree::Copy() const {
+ CordRepBtree* tree = CopyRaw();
+ for (CordRep* rep : Edges()) CordRep::Ref(rep);
+ return tree;
+}
+
+inline CordRepBtree* CordRepBtree::CopyToEndFrom(size_t begin,
+ size_t new_length) const {
+ assert(begin >= this->begin());
+ assert(begin <= this->end());
+ CordRepBtree* tree = CopyRaw();
+ tree->length = new_length;
+ tree->set_begin(begin);
+ for (CordRep* edge : tree->Edges()) CordRep::Ref(edge);
+ return tree;
+}
+
+inline CordRepBtree* CordRepBtree::CopyBeginTo(size_t end,
+ size_t new_length) const {
+ assert(end <= capacity());
+ assert(end >= this->begin());
+ CordRepBtree* tree = CopyRaw();
+ tree->length = new_length;
+ tree->set_end(end);
+ for (CordRep* edge : tree->Edges()) CordRep::Ref(edge);
+ return tree;
+}
+
+inline void CordRepBtree::AlignBegin() {
+ // The below code itself does not need to be fast as typically we have
+ // mono-directional append/prepend calls, and `begin` / `end` are typically
+ // adjusted no more than once. But we want to avoid potential register clobber
+ // effects, making the compiler emit register save/store/spills, and minimize
+ // the size of code.
+ const size_t delta = begin();
+ if (ABSL_PREDICT_FALSE(delta != 0)) {
+ const size_t new_end = end() - delta;
+ set_begin(0);
+ set_end(new_end);
+ // TODO(mvels): we can write this using 2 loads / 2 stores depending on
+ // total size for the kMaxCapacity = 6 case. I.e., we can branch (switch) on
+ // size, and then do overlapping load/store of up to 4 pointers (inlined as
+ // XMM, YMM or ZMM load/store) and up to 2 pointers (XMM / YMM), which is a)
+ // compact and b) not clobbering any registers.
+ ABSL_INTERNAL_ASSUME(new_end <= kMaxCapacity);
+#ifdef __clang__
+#pragma unroll 1
+#endif
+ for (size_t i = 0; i < new_end; ++i) {
+ edges_[i] = edges_[i + delta];
+ }
+ }
+}
+
+inline void CordRepBtree::AlignEnd() {
+ // See comments in `AlignBegin` for motivation on the hand-rolled for loops.
+ const size_t delta = capacity() - end();
+ if (delta != 0) {
+ const size_t new_begin = begin() + delta;
+ const size_t new_end = end() + delta;
+ set_begin(new_begin);
+ set_end(new_end);
+ ABSL_INTERNAL_ASSUME(new_end <= kMaxCapacity);
+#ifdef __clang__
+#pragma unroll 1
+#endif
+ for (size_t i = new_end - 1; i >= new_begin; --i) {
+ edges_[i] = edges_[i - delta];
+ }
+ }
+}
+
+template <>
+inline void CordRepBtree::Add<CordRepBtree::kBack>(CordRep* rep) {
+ AlignBegin();
+ edges_[fetch_add_end(1)] = rep;
+}
+
+template <>
+inline void CordRepBtree::Add<CordRepBtree::kBack>(
+ absl::Span<CordRep* const> edges) {
+ AlignBegin();
+ size_t new_end = end();
+ for (CordRep* edge : edges) edges_[new_end++] = edge;
+ set_end(new_end);
+}
+
+template <>
+inline void CordRepBtree::Add<CordRepBtree::kFront>(CordRep* rep) {
+ AlignEnd();
+ edges_[sub_fetch_begin(1)] = rep;
+}
+
+template <>
+inline void CordRepBtree::Add<CordRepBtree::kFront>(
+ absl::Span<CordRep* const> edges) {
+ AlignEnd();
+ size_t new_begin = begin() - edges.size();
+ set_begin(new_begin);
+ for (CordRep* edge : edges) edges_[new_begin++] = edge;
+}
+
+template <CordRepBtree::EdgeType edge_type>
+inline void CordRepBtree::SetEdge(CordRep* edge) {
+ const int idx = edge_type == kFront ? begin() : back();
+ CordRep::Unref(edges_[idx]);
+ edges_[idx] = edge;
+}
+
+inline CordRepBtree::OpResult CordRepBtree::ToOpResult(bool owned) {
+ return owned ? OpResult{this, kSelf} : OpResult{Copy(), kCopied};
+}
+
+inline CordRepBtree::Position CordRepBtree::IndexOf(size_t offset) const {
+ assert(offset < length);
+ size_t index = begin();
+ while (offset >= edges_[index]->length) offset -= edges_[index++]->length;
+ return {index, offset};
+}
+
+inline CordRepBtree::Position CordRepBtree::IndexBefore(size_t offset) const {
+ assert(offset > 0);
+ assert(offset <= length);
+ size_t index = begin();
+ while (offset > edges_[index]->length) offset -= edges_[index++]->length;
+ return {index, offset};
+}
+
+inline CordRepBtree::Position CordRepBtree::IndexBefore(Position front,
+ size_t offset) const {
+ size_t index = front.index;
+ offset = offset + front.n;
+ while (offset > edges_[index]->length) offset -= edges_[index++]->length;
+ return {index, offset};
+}
+
+inline CordRepBtree::Position CordRepBtree::IndexBeyond(
+ const size_t offset) const {
+ // We need to find the edge which `starting offset` is beyond (>=)`offset`.
+ // For this we can't use the `offset -= length` logic of IndexOf. Instead, we
+ // track the offset of the `current edge` in `off`, which we increase as we
+ // iterate over the edges until we find the matching edge.
+ size_t off = 0;
+ size_t index = begin();
+ while (offset > off) off += edges_[index++]->length;
+ return {index, off - offset};
+}
+
+inline CordRepBtree* CordRepBtree::Create(CordRep* rep) {
+ if (IsDataEdge(rep)) return New(rep);
+ return CreateSlow(rep);
+}
+
+inline Span<char> CordRepBtree::GetAppendBuffer(size_t size) {
+ assert(refcount.IsOne());
+ CordRepBtree* tree = this;
+ const int height = this->height();
+ CordRepBtree* n1 = tree;
+ CordRepBtree* n2 = tree;
+ CordRepBtree* n3 = tree;
+ switch (height) {
+ case 3:
+ tree = tree->Edge(kBack)->btree();
+ if (!tree->refcount.IsOne()) return {};
+ n2 = tree;
+ ABSL_FALLTHROUGH_INTENDED;
+ case 2:
+ tree = tree->Edge(kBack)->btree();
+ if (!tree->refcount.IsOne()) return {};
+ n1 = tree;
+ ABSL_FALLTHROUGH_INTENDED;
+ case 1:
+ tree = tree->Edge(kBack)->btree();
+ if (!tree->refcount.IsOne()) return {};
+ ABSL_FALLTHROUGH_INTENDED;
+ case 0:
+ CordRep* edge = tree->Edge(kBack);
+ if (!edge->refcount.IsOne()) return {};
+ if (edge->tag < FLAT) return {};
+ size_t avail = edge->flat()->Capacity() - edge->length;
+ if (avail == 0) return {};
+ size_t delta = (std::min)(size, avail);
+ Span<char> span = {edge->flat()->Data() + edge->length, delta};
+ edge->length += delta;
+ switch (height) {
+ case 3:
+ n3->length += delta;
+ ABSL_FALLTHROUGH_INTENDED;
+ case 2:
+ n2->length += delta;
+ ABSL_FALLTHROUGH_INTENDED;
+ case 1:
+ n1->length += delta;
+ ABSL_FALLTHROUGH_INTENDED;
+ case 0:
+ tree->length += delta;
+ return span;
+ }
+ break;
+ }
+ return GetAppendBufferSlow(size);
+}
+
+extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kBack>(
+ CordRepBtree* tree, CordRep* rep);
+
+extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kFront>(
+ CordRepBtree* tree, CordRep* rep);
+
+inline CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, CordRep* rep) {
+ if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) {
+ return CordRepBtree::AddCordRep<kBack>(tree, rep);
+ }
+ return AppendSlow(tree, rep);
+}
+
+inline CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, CordRep* rep) {
+ if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) {
+ return CordRepBtree::AddCordRep<kFront>(tree, rep);
+ }
+ return PrependSlow(tree, rep);
+}
+
+#ifdef NDEBUG
+
+inline CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree) {
+ return tree;
+}
+
+inline const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree) {
+ return tree;
+}
+
+#endif
+
+} // namespace cord_internal
+ABSL_NAMESPACE_END
+} // namespace absl
+
+#endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
diff --git a/absl/strings/internal/cord_rep_btree_test.cc b/absl/strings/internal/cord_rep_btree_test.cc
new file mode 100644
index 00000000..7a0b7811
--- /dev/null
+++ b/absl/strings/internal/cord_rep_btree_test.cc
@@ -0,0 +1,1352 @@
+// Copyright 2021 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/internal/cord_rep_btree.h"
+
+#include <cmath>
+#include <deque>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/base/config.h"
+#include "absl/base/internal/raw_logging.h"
+#include "absl/strings/internal/cord_internal.h"
+#include "absl/strings/internal/cord_rep_test_util.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace cord_internal {
+
+class CordRepBtreeTestPeer {
+ public:
+ static void SetEdge(CordRepBtree* node, size_t idx, CordRep* edge) {
+ node->edges_[idx] = edge;
+ }
+ static void AddEdge(CordRepBtree* node, CordRep* edge) {
+ node->edges_[node->fetch_add_end(1)] = edge;
+ }
+};
+
+namespace {
+
+using ::absl::cordrep_testing::AutoUnref;
+using ::absl::cordrep_testing::CordToString;
+using ::absl::cordrep_testing::CreateFlatsFromString;
+using ::absl::cordrep_testing::CreateRandomString;
+using ::absl::cordrep_testing::MakeConcat;
+using ::absl::cordrep_testing::MakeExternal;
+using ::absl::cordrep_testing::MakeFlat;
+using ::absl::cordrep_testing::MakeSubstring;
+using ::testing::_;
+using ::testing::AllOf;
+using ::testing::AnyOf;
+using ::testing::Conditional;
+using ::testing::ElementsAre;
+using ::testing::ElementsAreArray;
+using ::testing::Eq;
+using ::testing::HasSubstr;
+using ::testing::Ne;
+using ::testing::Not;
+using ::testing::SizeIs;
+using ::testing::TypedEq;
+
+MATCHER_P(EqFlatHolding, data, "Equals flat holding data") {
+ if (arg->tag < FLAT) {
+ *result_listener << "Expected FLAT, got tag " << static_cast<int>(arg->tag);
+ return false;
+ }
+ std::string actual = CordToString(arg);
+ if (actual != data) {
+ *result_listener << "Expected flat holding \"" << data
+ << "\", got flat holding \"" << actual << "\"";
+ return false;
+ }
+ return true;
+}
+
+MATCHER_P(IsNode, height, absl::StrCat("Is a valid node of height ", height)) {
+ if (arg == nullptr) {
+ *result_listener << "Expected NODE, got nullptr";
+ return false;
+ }
+ if (arg->tag != BTREE) {
+ *result_listener << "Expected NODE, got " << static_cast<int>(arg->tag);
+ return false;
+ }
+ if (!CordRepBtree::IsValid(arg->btree())) {
+ CordRepBtree::Dump(arg->btree(), "Expected valid NODE, got:", false,
+ *result_listener->stream());
+ return false;
+ }
+ if (arg->btree()->height() != height) {
+ *result_listener << "Expected NODE of height " << height << ", got "
+ << arg->btree()->height();
+ return false;
+ }
+ return true;
+}
+
+MATCHER_P2(IsSubstring, start, length,
+ absl::StrCat("Is a substring(start = ", start, ", length = ", length,
+ ")")) {
+ if (arg == nullptr) {
+ *result_listener << "Expected substring, got nullptr";
+ return false;
+ }
+ if (arg->tag != SUBSTRING) {
+ *result_listener << "Expected SUBSTRING, got "
+ << static_cast<int>(arg->tag);
+ return false;
+ }
+ const CordRepSubstring* const substr = arg->substring();
+ if (substr->start != start || substr->length != length) {
+ *result_listener << "Expected substring(" << start << ", " << length
+ << "), got substring(" << substr->start << ", "
+ << substr->length << ")";
+ return false;
+ }
+ return true;
+}
+
+// DataConsumer is a simple helper class used by tests to 'consume' string
+// fragments from the provided input in forward or backward direction.
+class DataConsumer {
+ public:
+ // Starts consumption of `data`. Caller must make sure `data` outlives this
+ // instance. Consumes data starting at the front if `forward` is true, else
+ // consumes data from the back.
+ DataConsumer(absl::string_view data, bool forward)
+ : data_(data), forward_(forward) {}
+
+ // Return the next `n` bytes from referenced data.
+ absl::string_view Next(size_t n) {
+ assert(n <= data_.size() - consumed_);
+ consumed_ += n;
+ return data_.substr(forward_ ? consumed_ - n : data_.size() - consumed_, n);
+ }
+
+ // Returns all data consumed so far.
+ absl::string_view Consumed() const {
+ return forward_ ? data_.substr(0, consumed_)
+ : data_.substr(data_.size() - consumed_);
+ }
+
+ private:
+ absl::string_view data_;
+ size_t consumed_ = 0;
+ bool forward_;
+};
+
+// BtreeAdd returns either CordRepBtree::Append or CordRepBtree::Prepend.
+CordRepBtree* BtreeAdd(CordRepBtree* node, bool append,
+ absl::string_view data) {
+ return append ? CordRepBtree::Append(node, data)
+ : CordRepBtree::Prepend(node, data);
+}
+
+// Recursively collects all leaf edges from `tree` and appends them to `edges`.
+void GetLeafEdges(const CordRepBtree* tree, std::vector<CordRep*>& edges) {
+ if (tree->height() == 0) {
+ for (CordRep* edge : tree->Edges()) {
+ edges.push_back(edge);
+ }
+ } else {
+ for (CordRep* edge : tree->Edges()) {
+ GetLeafEdges(edge->btree(), edges);
+ }
+ }
+}
+
+// Recursively collects and returns all leaf edges from `tree`.
+std::vector<CordRep*> GetLeafEdges(const CordRepBtree* tree) {
+ std::vector<CordRep*> edges;
+ GetLeafEdges(tree, edges);
+ return edges;
+}
+
+// Creates a flat containing the hexadecimal value of `i` zero padded
+// to at least 4 digits prefixed with "0x", e.g.: "0x04AC".
+CordRepFlat* MakeHexFlat(size_t i) {
+ return MakeFlat(absl::StrCat("0x", absl::Hex(i, absl::kZeroPad4)));
+}
+
+CordRepBtree* MakeLeaf(size_t size = CordRepBtree::kMaxCapacity) {
+ assert(size <= CordRepBtree::kMaxCapacity);
+ CordRepBtree* leaf = CordRepBtree::Create(MakeHexFlat(0));
+ for (size_t i = 1; i < size; ++i) {
+ leaf = CordRepBtree::Append(leaf, MakeHexFlat(i));
+ }
+ return leaf;
+}
+
+CordRepBtree* MakeTree(size_t size, bool append = true) {
+ CordRepBtree* tree = CordRepBtree::Create(MakeHexFlat(0));
+ for (size_t i = 1; i < size; ++i) {
+ tree = append ? CordRepBtree::Append(tree, MakeHexFlat(i))
+ : CordRepBtree::Prepend(tree, MakeHexFlat(i));
+ }
+ return tree;
+}
+
+CordRepBtree* CreateTree(absl::string_view data, size_t chunk_size) {
+ std::vector<CordRep*> flats = CreateFlatsFromString(data, chunk_size);
+ auto it = flats.begin();
+ CordRepBtree* tree = CordRepBtree::Create(*it);
+ while (++it != flats.end()) tree = CordRepBtree::Append(tree, *it);
+ return tree;
+}
+
+CordRepBtree* CreateTreeReverse(absl::string_view data, size_t chunk_size) {
+ std::vector<CordRep*> flats = CreateFlatsFromString(data, chunk_size);
+ auto rit = flats.rbegin();
+ CordRepBtree* tree = CordRepBtree::Create(*rit);
+ while (++rit != flats.rend()) tree = CordRepBtree::Prepend(tree, *rit);
+ return tree;
+}
+
+class CordRepBtreeTest : public testing::TestWithParam<bool> {
+ public:
+ bool shared() const { return GetParam(); }
+
+ static std::string ToString(testing::TestParamInfo<bool> param) {
+ return param.param ? "Shared" : "Private";
+ }
+};
+
+INSTANTIATE_TEST_SUITE_P(WithParam, CordRepBtreeTest, testing::Bool(),
+ CordRepBtreeTest::ToString);
+
+class CordRepBtreeHeightTest : public testing::TestWithParam<int> {
+ public:
+ int height() const { return GetParam(); }
+
+ static std::string ToString(testing::TestParamInfo<int> param) {
+ return absl::StrCat(param.param);
+ }
+};
+
+INSTANTIATE_TEST_SUITE_P(WithHeights, CordRepBtreeHeightTest,
+ testing::Range(0, CordRepBtree::kMaxHeight),
+ CordRepBtreeHeightTest::ToString);
+
+using TwoBools = testing::tuple<bool, bool>;
+
+class CordRepBtreeDualTest : public testing::TestWithParam<TwoBools> {
+ public:
+ bool first_shared() const { return std::get<0>(GetParam()); }
+ bool second_shared() const { return std::get<1>(GetParam()); }
+
+ static std::string ToString(testing::TestParamInfo<TwoBools> param) {
+ if (std::get<0>(param.param)) {
+ return std::get<1>(param.param) ? "BothShared" : "FirstShared";
+ }
+ return std::get<1>(param.param) ? "SecondShared" : "Private";
+ }
+};
+
+INSTANTIATE_TEST_SUITE_P(WithParam, CordRepBtreeDualTest,
+ testing::Combine(testing::Bool(), testing::Bool()),
+ CordRepBtreeDualTest::ToString);
+
+TEST(CordRepBtreeTest, SizeIsMultipleOf64) {
+ // Only enforce for fully 64-bit platforms.
+ if (sizeof(size_t) == 8 && sizeof(void*) == 8) {
+ EXPECT_THAT(sizeof(CordRepBtree) % 64, Eq(0)) << "Should be multiple of 64";
+ }
+}
+
+TEST(CordRepBtreeTest, NewDestroyEmptyTree) {
+ auto* tree = CordRepBtree::New();
+ EXPECT_THAT(tree->size(), Eq(0));
+ EXPECT_THAT(tree->height(), Eq(0));
+ EXPECT_THAT(tree->Edges(), ElementsAre());
+ CordRepBtree::Destroy(tree);
+}
+
+TEST(CordRepBtreeTest, NewDestroyEmptyTreeAtHeight) {
+ auto* tree = CordRepBtree::New(3);
+ EXPECT_THAT(tree->size(), Eq(0));
+ EXPECT_THAT(tree->height(), Eq(3));
+ EXPECT_THAT(tree->Edges(), ElementsAre());
+ CordRepBtree::Destroy(tree);
+}
+
+TEST(CordRepBtreeTest, Btree) {
+ CordRep* rep = CordRepBtree::New();
+ EXPECT_THAT(rep->btree(), Eq(rep));
+ EXPECT_THAT(static_cast<const CordRep*>(rep)->btree(), Eq(rep));
+ CordRep::Unref(rep);
+#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG)
+ rep = MakeFlat("Hello world");
+ EXPECT_DEATH(rep->btree(), ".*");
+ EXPECT_DEATH(static_cast<const CordRep*>(rep)->btree(), ".*");
+ CordRep::Unref(rep);
+#endif
+}
+
+TEST(CordRepBtreeTest, EdgeData) {
+ CordRepFlat* flat = MakeFlat("Hello world");
+ CordRepExternal* external = MakeExternal("Hello external");
+ CordRep* substr1 = MakeSubstring(1, 6, CordRep::Ref(flat));
+ CordRep* substr2 = MakeSubstring(1, 6, CordRep::Ref(external));
+ CordRep* concat = MakeConcat(CordRep::Ref(flat), CordRep::Ref(external));
+ CordRep* bad_substr = MakeSubstring(1, 2, CordRep::Ref(substr1));
+
+ EXPECT_TRUE(CordRepBtree::IsDataEdge(flat));
+ EXPECT_THAT(CordRepBtree::EdgeDataPtr(flat),
+ TypedEq<const void*>(flat->Data()));
+ EXPECT_THAT(CordRepBtree::EdgeData(flat), Eq("Hello world"));
+
+ EXPECT_TRUE(CordRepBtree::IsDataEdge(external));
+ EXPECT_THAT(CordRepBtree::EdgeDataPtr(external),
+ TypedEq<const void*>(external->base));
+ EXPECT_THAT(CordRepBtree::EdgeData(external), Eq("Hello external"));
+
+ EXPECT_TRUE(CordRepBtree::IsDataEdge(substr1));
+ EXPECT_THAT(CordRepBtree::EdgeDataPtr(substr1),
+ TypedEq<const void*>(flat->Data() + 1));
+ EXPECT_THAT(CordRepBtree::EdgeData(substr1), Eq("ello w"));
+
+ EXPECT_TRUE(CordRepBtree::IsDataEdge(substr2));
+ EXPECT_THAT(CordRepBtree::EdgeDataPtr(substr2),
+ TypedEq<const void*>(external->base + 1));
+ EXPECT_THAT(CordRepBtree::EdgeData(substr2), Eq("ello e"));
+
+ EXPECT_FALSE(CordRepBtree::IsDataEdge(concat));
+ EXPECT_FALSE(CordRepBtree::IsDataEdge(bad_substr));
+#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG)
+ EXPECT_DEATH(CordRepBtree::EdgeData(concat), ".*");
+ EXPECT_DEATH(CordRepBtree::EdgeDataPtr(concat), ".*");
+ EXPECT_DEATH(CordRepBtree::EdgeData(bad_substr), ".*");
+ EXPECT_DEATH(CordRepBtree::EdgeDataPtr(bad_substr), ".*");
+#endif
+
+ CordRep::Unref(bad_substr);
+ CordRep::Unref(concat);
+ CordRep::Unref(substr2);
+ CordRep::Unref(substr1);
+ CordRep::Unref(external);
+ CordRep::Unref(flat);
+}
+
+TEST(CordRepBtreeTest, CreateUnrefLeaf) {
+ auto* flat = MakeFlat("a");
+ auto* leaf = CordRepBtree::Create(flat);
+ EXPECT_THAT(leaf->size(), Eq(1));
+ EXPECT_THAT(leaf->height(), Eq(0));
+ EXPECT_THAT(leaf->Edges(), ElementsAre(flat));
+ CordRepBtree::Unref(leaf);
+}
+
+TEST(CordRepBtreeTest, NewUnrefNode) {
+ auto* leaf = CordRepBtree::Create(MakeFlat("a"));
+ CordRepBtree* tree = CordRepBtree::New(leaf);
+ EXPECT_THAT(tree->size(), Eq(1));
+ EXPECT_THAT(tree->height(), Eq(1));
+ EXPECT_THAT(tree->Edges(), ElementsAre(leaf));
+ CordRepBtree::Unref(tree);
+}
+
+TEST_P(CordRepBtreeTest, AppendToLeafToCapacity) {
+ AutoUnref refs;
+ std::vector<CordRep*> flats;
+ flats.push_back(MakeHexFlat(0));
+ auto* leaf = CordRepBtree::Create(flats.back());
+
+ for (size_t i = 1; i < CordRepBtree::kMaxCapacity; ++i) {
+ refs.RefIf(shared(), leaf);
+ flats.push_back(MakeHexFlat(i));
+ auto* result = CordRepBtree::Append(leaf, flats.back());
+ EXPECT_THAT(result->height(), Eq(0));
+ EXPECT_THAT(result, Conditional(shared(), Ne(leaf), Eq(leaf)));
+ EXPECT_THAT(result->Edges(), ElementsAreArray(flats));
+ leaf = result;
+ }
+ CordRep::Unref(leaf);
+}
+
+TEST_P(CordRepBtreeTest, PrependToLeafToCapacity) {
+ AutoUnref refs;
+ std::deque<CordRep*> flats;
+ flats.push_front(MakeHexFlat(0));
+ auto* leaf = CordRepBtree::Create(flats.front());
+
+ for (size_t i = 1; i < CordRepBtree::kMaxCapacity; ++i) {
+ refs.RefIf(shared(), leaf);
+ flats.push_front(MakeHexFlat(i));
+ auto* result = CordRepBtree::Prepend(leaf, flats.front());
+ EXPECT_THAT(result->height(), Eq(0));
+ EXPECT_THAT(result, Conditional(shared(), Ne(leaf), Eq(leaf)));
+ EXPECT_THAT(result->Edges(), ElementsAreArray(flats));
+ leaf = result;
+ }
+ CordRep::Unref(leaf);
+}
+
+// This test specifically aims at code aligning data at either the front or the
+// back of the contained `edges[]` array, alternating Append and Prepend will
+// move `begin()` and `end()` values as needed for each added value.
+TEST_P(CordRepBtreeTest, AppendPrependToLeafToCapacity) {
+ AutoUnref refs;
+ std::deque<CordRep*> flats;
+ flats.push_front(MakeHexFlat(0));
+ auto* leaf = CordRepBtree::Create(flats.front());
+
+ for (size_t i = 1; i < CordRepBtree::kMaxCapacity; ++i) {
+ refs.RefIf(shared(), leaf);
+ CordRepBtree* result;
+ if (i % 2 != 0) {
+ flats.push_front(MakeHexFlat(i));
+ result = CordRepBtree::Prepend(leaf, flats.front());
+ } else {
+ flats.push_back(MakeHexFlat(i));
+ result = CordRepBtree::Append(leaf, flats.back());
+ }
+ EXPECT_THAT(result->height(), Eq(0));
+ EXPECT_THAT(result, Conditional(shared(), Ne(leaf), Eq(leaf)));
+ EXPECT_THAT(result->Edges(), ElementsAreArray(flats));
+ leaf = result;
+ }
+ CordRep::Unref(leaf);
+}
+
+TEST_P(CordRepBtreeTest, AppendToLeafBeyondCapacity) {
+ AutoUnref refs;
+ auto* leaf = MakeLeaf();
+ refs.RefIf(shared(), leaf);
+ CordRep* flat = MakeFlat("abc");
+ auto* result = CordRepBtree::Append(leaf, flat);
+ ASSERT_THAT(result, IsNode(1));
+ EXPECT_THAT(result, Ne(leaf));
+ absl::Span<CordRep* const> edges = result->Edges();
+ ASSERT_THAT(edges, ElementsAre(leaf, IsNode(0)));
+ EXPECT_THAT(edges[1]->btree()->Edges(), ElementsAre(flat));
+ CordRep::Unref(result);
+}
+
+TEST_P(CordRepBtreeTest, PrependToLeafBeyondCapacity) {
+ AutoUnref refs;
+ auto* leaf = MakeLeaf();
+ refs.RefIf(shared(), leaf);
+ CordRep* flat = MakeFlat("abc");
+ auto* result = CordRepBtree::Prepend(leaf, flat);
+ ASSERT_THAT(result, IsNode(1));
+ EXPECT_THAT(result, Ne(leaf));
+ absl::Span<CordRep* const> edges = result->Edges();
+ ASSERT_THAT(edges, ElementsAre(IsNode(0), leaf));
+ EXPECT_THAT(edges[0]->btree()->Edges(), ElementsAre(flat));
+ CordRep::Unref(result);
+}
+
+TEST_P(CordRepBtreeTest, AppendToTreeOneDeep) {
+ constexpr size_t max_cap = CordRepBtree::kMaxCapacity;
+ AutoUnref refs;
+ std::vector<CordRep*> flats;
+ flats.push_back(MakeHexFlat(0));
+ CordRepBtree* tree = CordRepBtree::Create(flats.back());
+ for (size_t i = 1; i <= max_cap; ++i) {
+ flats.push_back(MakeHexFlat(i));
+ tree = CordRepBtree::Append(tree, flats.back());
+ }
+ ASSERT_THAT(tree, IsNode(1));
+
+ for (size_t i = max_cap + 1; i < max_cap * max_cap; ++i) {
+ // Ref top level tree based on param.
+ // Ref leaf node once every 4 iterations, which should not have an
+ // observable effect other than that the leaf itself is copied.
+ refs.RefIf(shared(), tree);
+ refs.RefIf(i % 4 == 0, tree->Edges().back());
+
+ flats.push_back(MakeHexFlat(i));
+ CordRepBtree* result = CordRepBtree::Append(tree, flats.back());
+ ASSERT_THAT(result, IsNode(1));
+ ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree)));
+ std::vector<CordRep*> edges = GetLeafEdges(result);
+ ASSERT_THAT(edges, ElementsAreArray(flats));
+ tree = result;
+ }
+ CordRep::Unref(tree);
+}
+
+TEST_P(CordRepBtreeTest, AppendToTreeTwoDeep) {
+ constexpr size_t max_cap = CordRepBtree::kMaxCapacity;
+ AutoUnref refs;
+ std::vector<CordRep*> flats;
+ flats.push_back(MakeHexFlat(0));
+ CordRepBtree* tree = CordRepBtree::Create(flats.back());
+ for (size_t i = 1; i <= max_cap * max_cap; ++i) {
+ flats.push_back(MakeHexFlat(i));
+ tree = CordRepBtree::Append(tree, flats.back());
+ }
+ ASSERT_THAT(tree, IsNode(2));
+ for (size_t i = max_cap * max_cap + 1; i < max_cap * max_cap * max_cap; ++i) {
+ // Ref top level tree based on param.
+ // Ref child node once every 16 iterations, and leaf node every 4
+ // iterrations which which should not have an observable effect other than
+ // the node and/or the leaf below it being copied.
+ refs.RefIf(shared(), tree);
+ refs.RefIf(i % 16 == 0, tree->Edges().back());
+ refs.RefIf(i % 4 == 0, tree->Edges().back()->btree()->Edges().back());
+
+ flats.push_back(MakeHexFlat(i));
+ CordRepBtree* result = CordRepBtree::Append(tree, flats.back());
+ ASSERT_THAT(result, IsNode(2));
+ ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree)));
+ std::vector<CordRep*> edges = GetLeafEdges(result);
+ ASSERT_THAT(edges, ElementsAreArray(flats));
+ tree = result;
+ }
+ CordRep::Unref(tree);
+}
+
+TEST_P(CordRepBtreeTest, PrependToTreeOneDeep) {
+ constexpr size_t max_cap = CordRepBtree::kMaxCapacity;
+ AutoUnref refs;
+ std::deque<CordRep*> flats;
+ flats.push_back(MakeHexFlat(0));
+ CordRepBtree* tree = CordRepBtree::Create(flats.back());
+ for (size_t i = 1; i <= max_cap; ++i) {
+ flats.push_front(MakeHexFlat(i));
+ tree = CordRepBtree::Prepend(tree, flats.front());
+ }
+ ASSERT_THAT(tree, IsNode(1));
+
+ for (size_t i = max_cap + 1; i < max_cap * max_cap; ++i) {
+ // Ref top level tree based on param.
+ // Ref leaf node once every 4 iterations which should not have an observable
+ // effect other than than the leaf itself is copied.
+ refs.RefIf(shared(), tree);
+ refs.RefIf(i % 4 == 0, tree->Edges().back());
+
+ flats.push_front(MakeHexFlat(i));
+ CordRepBtree* result = CordRepBtree::Prepend(tree, flats.front());
+ ASSERT_THAT(result, IsNode(1));
+ ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree)));
+ std::vector<CordRep*> edges = GetLeafEdges(result);
+ ASSERT_THAT(edges, ElementsAreArray(flats));
+ tree = result;
+ }
+ CordRep::Unref(tree);
+}
+
+TEST_P(CordRepBtreeTest, PrependToTreeTwoDeep) {
+ constexpr size_t max_cap = CordRepBtree::kMaxCapacity;
+ AutoUnref refs;
+ std::deque<CordRep*> flats;
+ flats.push_back(MakeHexFlat(0));
+ CordRepBtree* tree = CordRepBtree::Create(flats.back());
+ for (size_t i = 1; i <= max_cap * max_cap; ++i) {
+ flats.push_front(MakeHexFlat(i));
+ tree = CordRepBtree::Prepend(tree, flats.front());
+ }
+ ASSERT_THAT(tree, IsNode(2));
+ for (size_t i = max_cap * max_cap + 1; i < max_cap * max_cap * max_cap; ++i) {
+ // Ref top level tree based on param.
+ // Ref child node once every 16 iterations, and leaf node every 4
+ // iterrations which which should not have an observable effect other than
+ // the node and/or the leaf below it being copied.
+ refs.RefIf(shared(), tree);
+ refs.RefIf(i % 16 == 0, tree->Edges().back());
+ refs.RefIf(i % 4 == 0, tree->Edges().back()->btree()->Edges().back());
+
+ flats.push_front(MakeHexFlat(i));
+ CordRepBtree* result = CordRepBtree::Prepend(tree, flats.front());
+ ASSERT_THAT(result, IsNode(2));
+ ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree)));
+ std::vector<CordRep*> edges = GetLeafEdges(result);
+ ASSERT_THAT(edges, ElementsAreArray(flats));
+ tree = result;
+ }
+ CordRep::Unref(tree);
+}
+
+TEST_P(CordRepBtreeDualTest, MergeLeafsNotExceedingCapacity) {
+ for (bool use_append : {false, true}) {
+ SCOPED_TRACE(use_append ? "Using Append" : "Using Prepend");
+
+ AutoUnref refs;
+ std::vector<CordRep*> flats;
+
+ // Build `left` side leaf appending all contained flats to `flats`
+ CordRepBtree* left = MakeLeaf(3);
+ GetLeafEdges(left, flats);
+ refs.RefIf(first_shared(), left);
+
+ // Build `right` side leaf appending all contained flats to `flats`
+ CordRepBtree* right = MakeLeaf(2);
+ GetLeafEdges(right, flats);
+ refs.RefIf(second_shared(), right);
+
+ CordRepBtree* tree = use_append ? CordRepBtree::Append(left, right)
+ : CordRepBtree::Prepend(right, left);
+ EXPECT_THAT(tree, IsNode(0));
+
+ // `tree` contains all flats originally belonging to `left` and `right`.
+ EXPECT_THAT(tree->Edges(), ElementsAreArray(flats));
+ CordRepBtree::Unref(tree);
+ }
+}
+
+TEST_P(CordRepBtreeDualTest, MergeLeafsExceedingCapacity) {
+ for (bool use_append : {false, true}) {
+ SCOPED_TRACE(use_append ? "Using Append" : "Using Prepend");
+
+ AutoUnref refs;
+
+ // Build `left` side tree appending all contained flats to `flats`
+ CordRepBtree* left = MakeLeaf(CordRepBtree::kMaxCapacity - 2);
+ refs.RefIf(first_shared(), left);
+
+ // Build `right` side tree appending all contained flats to `flats`
+ CordRepBtree* right = MakeLeaf(CordRepBtree::kMaxCapacity - 1);
+ refs.RefIf(second_shared(), right);
+
+ CordRepBtree* tree = use_append ? CordRepBtree::Append(left, right)
+ : CordRepBtree::Prepend(right, left);
+ EXPECT_THAT(tree, IsNode(1));
+ EXPECT_THAT(tree->Edges(), ElementsAre(left, right));
+ CordRepBtree::Unref(tree);
+ }
+}
+
+TEST_P(CordRepBtreeDualTest, MergeEqualHeightTrees) {
+ for (bool use_append : {false, true}) {
+ SCOPED_TRACE(use_append ? "Using Append" : "Using Prepend");
+
+ AutoUnref refs;
+ std::vector<CordRep*> flats;
+
+ // Build `left` side tree appending all contained flats to `flats`
+ CordRepBtree* left = MakeTree(CordRepBtree::kMaxCapacity * 3);
+ GetLeafEdges(left, flats);
+ refs.RefIf(first_shared(), left);
+
+ // Build `right` side tree appending all contained flats to `flats`
+ CordRepBtree* right = MakeTree(CordRepBtree::kMaxCapacity * 2);
+ GetLeafEdges(right, flats);
+ refs.RefIf(second_shared(), right);
+
+ CordRepBtree* tree = use_append ? CordRepBtree::Append(left, right)
+ : CordRepBtree::Prepend(right, left);
+ EXPECT_THAT(tree, IsNode(1));
+ EXPECT_THAT(tree->Edges(), SizeIs(5));
+
+ // `tree` contains all flats originally belonging to `left` and `right`.
+ EXPECT_THAT(GetLeafEdges(tree), ElementsAreArray(flats));
+ CordRepBtree::Unref(tree);
+ }
+}
+
+TEST_P(CordRepBtreeDualTest, MergeLeafWithTreeNotExceedingLeafCapacity) {
+ for (bool use_append : {false, true}) {
+ SCOPED_TRACE(use_append ? "Using Append" : "Using Prepend");
+
+ AutoUnref refs;
+ std::vector<CordRep*> flats;
+
+ // Build `left` side tree appending all added flats to `flats`
+ CordRepBtree* left = MakeTree(CordRepBtree::kMaxCapacity * 2 + 2);
+ GetLeafEdges(left, flats);
+ refs.RefIf(first_shared(), left);
+
+ // Build `right` side tree appending all added flats to `flats`
+ CordRepBtree* right = MakeTree(3);
+ GetLeafEdges(right, flats);
+ refs.RefIf(second_shared(), right);
+
+ CordRepBtree* tree = use_append ? CordRepBtree::Append(left, right)
+ : CordRepBtree::Prepend(right, left);
+ EXPECT_THAT(tree, IsNode(1));
+ EXPECT_THAT(tree->Edges(), SizeIs(3));
+
+ // `tree` contains all flats originally belonging to `left` and `right`.
+ EXPECT_THAT(GetLeafEdges(tree), ElementsAreArray(flats));
+ CordRepBtree::Unref(tree);
+ }
+}
+
+TEST_P(CordRepBtreeDualTest, MergeLeafWithTreeExceedingLeafCapacity) {
+ for (bool use_append : {false, true}) {
+ SCOPED_TRACE(use_append ? "Using Append" : "Using Prepend");
+
+ AutoUnref refs;
+ std::vector<CordRep*> flats;
+
+ // Build `left` side tree appending all added flats to `flats`
+ CordRepBtree* left = MakeTree(CordRepBtree::kMaxCapacity * 3 - 2);
+ GetLeafEdges(left, flats);
+ refs.RefIf(first_shared(), left);
+
+ // Build `right` side tree appending all added flats to `flats`
+ CordRepBtree* right = MakeTree(3);
+ GetLeafEdges(right, flats);
+ refs.RefIf(second_shared(), right);
+
+ CordRepBtree* tree = use_append ? CordRepBtree::Append(left, right)
+ : CordRepBtree::Prepend(right, left);
+ EXPECT_THAT(tree, IsNode(1));
+ EXPECT_THAT(tree->Edges(), SizeIs(4));
+
+ // `tree` contains all flats originally belonging to `left` and `right`.
+ EXPECT_THAT(GetLeafEdges(tree), ElementsAreArray(flats));
+ CordRepBtree::Unref(tree);
+ }
+}
+
+void RefEdgesAt(size_t depth, AutoUnref& refs, CordRepBtree* tree) {
+ absl::Span<CordRep* const> edges = tree->Edges();
+ if (depth == 0) {
+ refs.Ref(edges.front());
+ refs.Ref(edges.back());
+ } else {
+ assert(tree->height() > 0);
+ RefEdgesAt(depth - 1, refs, edges.front()->btree());
+ RefEdgesAt(depth - 1, refs, edges.back()->btree());
+ }
+}
+
+TEST(CordRepBtreeTest, MergeFuzzTest) {
+ constexpr size_t max_cap = CordRepBtree::kMaxCapacity;
+ std::minstd_rand rnd;
+ std::uniform_int_distribution<int> coin_flip(0, 1);
+ std::uniform_int_distribution<int> dice_throw(1, 6);
+
+ auto random_leaf_count = [&]() {
+ std::uniform_int_distribution<int> dist_height(0, 3);
+ std::uniform_int_distribution<int> dist_leaf(0, max_cap - 1);
+ const size_t height = dist_height(rnd);
+ return (height ? pow(max_cap, height) : 0) + dist_leaf(rnd);
+ };
+
+ for (int i = 0; i < 10000; ++i) {
+ AutoUnref refs;
+ std::vector<CordRep*> flats;
+
+ CordRepBtree* left = MakeTree(random_leaf_count(), coin_flip(rnd));
+ GetLeafEdges(left, flats);
+ if (dice_throw(rnd) == 1) {
+ std::uniform_int_distribution<int> dist(0, left->height());
+ RefEdgesAt(dist(rnd), refs, left);
+ }
+
+ CordRepBtree* right = MakeTree(random_leaf_count(), coin_flip(rnd));
+ GetLeafEdges(right, flats);
+ if (dice_throw(rnd) == 1) {
+ std::uniform_int_distribution<int> dist(0, right->height());
+ RefEdgesAt(dist(rnd), refs, right);
+ }
+
+ CordRepBtree* tree = CordRepBtree::Append(left, right);
+ EXPECT_THAT(GetLeafEdges(tree), ElementsAreArray(flats));
+ CordRepBtree::Unref(tree);
+ }
+}
+
+TEST(CordRepBtreeTest, SubTree) {
+ // Create tree of at least 2 levels high
+ constexpr size_t max_cap = CordRepBtree::kMaxCapacity;
+ const size_t n = max_cap * max_cap * 2;
+ const std::string data = CreateRandomString(n * 3);
+ std::vector<CordRep*> flats;
+ for (absl::string_view s = data; !s.empty(); s.remove_prefix(3)) {
+ flats.push_back(MakeFlat(s.substr(0, 3)));
+ }
+ CordRepBtree* node = CordRepBtree::Create(CordRep::Ref(flats[0]));
+ for (size_t i = 1; i < flats.size(); ++i) {
+ node = CordRepBtree::Append(node, CordRep::Ref(flats[i]));
+ }
+
+ for (int offset = 0; offset < data.length(); ++offset) {
+ for (int length = 1; length <= data.length() - offset; ++length) {
+ CordRep* rep = node->SubTree(offset, length);
+ EXPECT_THAT(CordToString(rep), Eq(data.substr(offset, length)));
+ CordRep::Unref(rep);
+ }
+ }
+ CordRepBtree::Unref(node);
+ for (CordRep* rep : flats) {
+ CordRep::Unref(rep);
+ }
+}
+
+TEST(CordRepBtreeTest, SubTreeOnExistingSubstring) {
+ // This test verifies that a SubTree call on a pre-existing (large) substring
+ // adjusts the existing substring if not shared, and else rewrites the
+ // existing substring.
+ AutoUnref refs;
+ std::string data = CreateRandomString(1000);
+ CordRepBtree* leaf = CordRepBtree::Create(MakeFlat("abc"));
+ CordRep* flat = MakeFlat(data);
+ leaf = CordRepBtree::Append(leaf, flat);
+
+ // Setup tree containing substring.
+ CordRep* result = leaf->SubTree(0, 3 + 990);
+ ASSERT_THAT(result->tag, Eq(BTREE));
+ CordRep::Unref(leaf);
+ leaf = result->btree();
+ ASSERT_THAT(leaf->Edges(), ElementsAre(_, IsSubstring(0, 990)));
+ EXPECT_THAT(leaf->Edges()[1]->substring()->child, Eq(flat));
+
+ // Verify substring of substring.
+ result = leaf->SubTree(3 + 5, 970);
+ ASSERT_THAT(result, IsSubstring(5, 970));
+ EXPECT_THAT(result->substring()->child, Eq(flat));
+ CordRep::Unref(result);
+
+ CordRep::Unref(leaf);
+}
+
+TEST_P(CordRepBtreeTest, AddDataToLeaf) {
+ const size_t n = CordRepBtree::kMaxCapacity;
+ const std::string data = CreateRandomString(n * 3);
+
+ for (bool append : {true, false}) {
+ AutoUnref refs;
+ DataConsumer consumer(data, append);
+ SCOPED_TRACE(append ? "Append" : "Prepend");
+
+ CordRepBtree* leaf = CordRepBtree::Create(MakeFlat(consumer.Next(3)));
+ for (size_t i = 1; i < n; ++i) {
+ refs.RefIf(shared(), leaf);
+ CordRepBtree* result = BtreeAdd(leaf, append, consumer.Next(3));
+ EXPECT_THAT(result, Conditional(shared(), Ne(leaf), Eq(leaf)));
+ EXPECT_THAT(CordToString(result), Eq(consumer.Consumed()));
+ leaf = result;
+ }
+ CordRep::Unref(leaf);
+ }
+}
+
+TEST_P(CordRepBtreeTest, AppendDataToTree) {
+ AutoUnref refs;
+ size_t n = CordRepBtree::kMaxCapacity + CordRepBtree::kMaxCapacity / 2;
+ std::string data = CreateRandomString(n * 3);
+ CordRepBtree* tree = refs.RefIf(shared(), CreateTree(data, 3));
+ CordRepBtree* leaf0 = tree->Edges()[0]->btree();
+ CordRepBtree* leaf1 = tree->Edges()[1]->btree();
+ CordRepBtree* result = CordRepBtree::Append(tree, "123456789");
+ EXPECT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree)));
+ EXPECT_THAT(result->Edges(),
+ ElementsAre(leaf0, Conditional(shared(), Ne(leaf1), Eq(leaf1))));
+ EXPECT_THAT(CordToString(result), Eq(data + "123456789"));
+ CordRep::Unref(result);
+}
+
+TEST_P(CordRepBtreeTest, PrependDataToTree) {
+ AutoUnref refs;
+ size_t n = CordRepBtree::kMaxCapacity + CordRepBtree::kMaxCapacity / 2;
+ std::string data = CreateRandomString(n * 3);
+ CordRepBtree* tree = refs.RefIf(shared(), CreateTreeReverse(data, 3));
+ CordRepBtree* leaf0 = tree->Edges()[0]->btree();
+ CordRepBtree* leaf1 = tree->Edges()[1]->btree();
+ CordRepBtree* result = CordRepBtree::Prepend(tree, "123456789");
+ EXPECT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree)));
+ EXPECT_THAT(result->Edges(),
+ ElementsAre(Conditional(shared(), Ne(leaf0), Eq(leaf0)), leaf1));
+ EXPECT_THAT(CordToString(result), Eq("123456789" + data));
+ CordRep::Unref(result);
+}
+
+TEST_P(CordRepBtreeTest, AddDataToTreeThreeLevelsDeep) {
+ constexpr size_t max_cap = CordRepBtree::kMaxCapacity;
+ const size_t n = max_cap * max_cap * max_cap;
+ const std::string data = CreateRandomString(n * 3);
+
+ for (bool append : {true, false}) {
+ AutoUnref refs;
+ DataConsumer consumer(data, append);
+ SCOPED_TRACE(append ? "Append" : "Prepend");
+
+ // Fill leaf
+ CordRepBtree* tree = CordRepBtree::Create(MakeFlat(consumer.Next(3)));
+ for (size_t i = 1; i < max_cap; ++i) {
+ tree = BtreeAdd(tree, append, consumer.Next(3));
+ }
+ ASSERT_THAT(CordToString(tree), Eq(consumer.Consumed()));
+
+ // Fill to maximum at one deep
+ refs.RefIf(shared(), tree);
+ CordRepBtree* result = BtreeAdd(tree, append, consumer.Next(3));
+ ASSERT_THAT(result, IsNode(1));
+ ASSERT_THAT(result, Ne(tree));
+ ASSERT_THAT(CordToString(result), Eq(consumer.Consumed()));
+ tree = result;
+ for (size_t i = max_cap + 1; i < max_cap * max_cap; ++i) {
+ refs.RefIf(shared(), tree);
+ result = BtreeAdd(tree, append, consumer.Next(3));
+ ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree)));
+ ASSERT_THAT(CordToString(result), Eq(consumer.Consumed()));
+ tree = result;
+ }
+
+ // Fill to maximum at two deep
+ refs.RefIf(shared(), tree);
+ result = BtreeAdd(tree, append, consumer.Next(3));
+ ASSERT_THAT(result, IsNode(2));
+ ASSERT_THAT(result, Ne(tree));
+ ASSERT_THAT(CordToString(result), Eq(consumer.Consumed()));
+ tree = result;
+ for (size_t i = max_cap * max_cap + 1; i < max_cap * max_cap * max_cap;
+ ++i) {
+ refs.RefIf(shared(), tree);
+ result = BtreeAdd(tree, append, consumer.Next(3));
+ ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree)));
+ ASSERT_THAT(CordToString(result), Eq(consumer.Consumed()));
+ tree = result;
+ }
+
+ CordRep::Unref(tree);
+ }
+}
+
+TEST_P(CordRepBtreeTest, AddLargeDataToLeaf) {
+ const size_t max_cap = CordRepBtree::kMaxCapacity;
+ const size_t n = max_cap * max_cap * max_cap * 3 + 2;
+ const std::string data = CreateRandomString(n * kMaxFlatLength);
+
+ for (bool append : {true, false}) {
+ AutoUnref refs;
+ SCOPED_TRACE(append ? "Append" : "Prepend");
+
+ CordRepBtree* leaf = CordRepBtree::Create(MakeFlat("abc"));
+ refs.RefIf(shared(), leaf);
+ CordRepBtree* result = BtreeAdd(leaf, append, data);
+ EXPECT_THAT(CordToString(result), Eq(append ? "abc" + data : data + "abc"));
+ CordRep::Unref(result);
+ }
+}
+
+TEST_P(CordRepBtreeDualTest, CreateFromConcat) {
+ AutoUnref refs;
+ CordRep* flats[] = {MakeFlat("abcdefgh"), MakeFlat("ijklm"),
+ MakeFlat("nopqrstuv"), MakeFlat("wxyz")};
+ auto* left = MakeConcat(flats[0], flats[1]);
+ auto* right = MakeConcat(flats[2], refs.RefIf(first_shared(), flats[3]));
+ auto* concat = refs.RefIf(second_shared(), MakeConcat(left, right));
+ CordRepBtree* result = CordRepBtree::Create(concat);
+ ASSERT_TRUE(CordRepBtree::IsValid(result));
+ EXPECT_THAT(result->length, Eq(26));
+ EXPECT_THAT(CordToString(result), Eq("abcdefghijklmnopqrstuvwxyz"));
+ CordRep::Unref(result);
+}
+
+TEST_P(CordRepBtreeDualTest, AppendConcat) {
+ AutoUnref refs;
+ CordRep* flats[] = {MakeFlat("defgh"), MakeFlat("ijklm"),
+ MakeFlat("nopqrstuv"), MakeFlat("wxyz")};
+ auto* left = MakeConcat(flats[0], flats[1]);
+ auto* right = MakeConcat(flats[2], refs.RefIf(first_shared(), flats[3]));
+ auto* concat = refs.RefIf(second_shared(), MakeConcat(left, right));
+ CordRepBtree* result = CordRepBtree::Create(MakeFlat("abc"));
+ result = CordRepBtree::Append(result, concat);
+ ASSERT_TRUE(CordRepBtree::IsValid(result));
+ EXPECT_THAT(result->length, Eq(26));
+ EXPECT_THAT(CordToString(result), Eq("abcdefghijklmnopqrstuvwxyz"));
+ CordRep::Unref(result);
+}
+
+TEST_P(CordRepBtreeDualTest, PrependConcat) {
+ AutoUnref refs;
+ CordRep* flats[] = {MakeFlat("abcdefgh"), MakeFlat("ijklm"),
+ MakeFlat("nopqrstuv"), MakeFlat("wx")};
+ auto* left = MakeConcat(flats[0], flats[1]);
+ auto* right = MakeConcat(flats[2], refs.RefIf(first_shared(), flats[3]));
+ auto* concat = refs.RefIf(second_shared(), MakeConcat(left, right));
+ CordRepBtree* result = CordRepBtree::Create(MakeFlat("yz"));
+ result = CordRepBtree::Prepend(result, concat);
+ ASSERT_TRUE(CordRepBtree::IsValid(result));
+ EXPECT_THAT(result->length, Eq(26));
+ EXPECT_THAT(CordToString(result), Eq("abcdefghijklmnopqrstuvwxyz"));
+ CordRep::Unref(result);
+}
+
+TEST_P(CordRepBtreeTest, CreateFromTreeReturnsTree) {
+ AutoUnref refs;
+ CordRepBtree* leaf = CordRepBtree::Create(MakeFlat("Hello world"));
+ refs.RefIf(shared(), leaf);
+ CordRepBtree* result = CordRepBtree::Create(leaf);
+ EXPECT_THAT(result, Eq(leaf));
+ CordRep::Unref(result);
+}
+
+TEST_P(CordRepBtreeTest, ExceedMaxHeight) {
+ AutoUnref refs;
+ CordRepBtree* tree = MakeLeaf();
+ for (int h = 1; h <= CordRepBtree::kMaxHeight; ++h) {
+ CordRepBtree* newtree = CordRepBtree::New(tree);
+ for (size_t i = 1; i < CordRepBtree::kMaxCapacity; ++i) {
+ newtree = CordRepBtree::Append(newtree, CordRep::Ref(tree));
+ }
+ tree = newtree;
+ }
+ refs.RefIf(shared(), tree);
+#if defined(GTEST_HAS_DEATH_TEST)
+ EXPECT_DEATH(tree = CordRepBtree::Append(tree, MakeFlat("Boom")), ".*");
+#endif
+ CordRep::Unref(tree);
+}
+
+TEST(CordRepBtreeTest, GetCharacter) {
+ size_t n = CordRepBtree::kMaxCapacity * CordRepBtree::kMaxCapacity + 2;
+ std::string data = CreateRandomString(n * 3);
+ CordRepBtree* tree = CreateTree(data, 3);
+ // Add a substring node for good measure.
+ tree = tree->Append(tree, MakeSubstring(4, 5, MakeFlat("abcdefghijklm")));
+ data += "efghi";
+ for (size_t i = 0; i < data.length(); ++i) {
+ ASSERT_THAT(tree->GetCharacter(i), Eq(data[i]));
+ }
+ CordRep::Unref(tree);
+}
+
+TEST_P(CordRepBtreeTest, IsFlatSingleFlat) {
+ CordRepBtree* leaf = CordRepBtree::Create(MakeFlat("Hello world"));
+
+ absl::string_view fragment;
+ EXPECT_TRUE(leaf->IsFlat(nullptr));
+ EXPECT_TRUE(leaf->IsFlat(&fragment));
+ EXPECT_THAT(fragment, Eq("Hello world"));
+ fragment = "";
+ EXPECT_TRUE(leaf->IsFlat(0, 11, nullptr));
+ EXPECT_TRUE(leaf->IsFlat(0, 11, &fragment));
+ EXPECT_THAT(fragment, Eq("Hello world"));
+
+ // Arbitrary ranges must check true as well.
+ EXPECT_TRUE(leaf->IsFlat(1, 4, &fragment));
+ EXPECT_THAT(fragment, Eq("ello"));
+ EXPECT_TRUE(leaf->IsFlat(6, 5, &fragment));
+ EXPECT_THAT(fragment, Eq("world"));
+
+ CordRep::Unref(leaf);
+}
+
+TEST(CordRepBtreeTest, IsFlatMultiFlat) {
+ size_t n = CordRepBtree::kMaxCapacity * CordRepBtree::kMaxCapacity + 2;
+ std::string data = CreateRandomString(n * 3);
+ CordRepBtree* tree = CreateTree(data, 3);
+ // Add substring nodes for good measure.
+ tree = tree->Append(tree, MakeSubstring(4, 3, MakeFlat("abcdefghijklm")));
+ tree = tree->Append(tree, MakeSubstring(8, 3, MakeFlat("abcdefghijklm")));
+ data += "efgijk";
+
+ EXPECT_FALSE(tree->IsFlat(nullptr));
+ absl::string_view fragment = "Can't touch this";
+ EXPECT_FALSE(tree->IsFlat(&fragment));
+ EXPECT_THAT(fragment, Eq("Can't touch this"));
+
+ for (size_t offset = 0; offset < data.size(); offset += 3) {
+ EXPECT_TRUE(tree->IsFlat(offset, 3, nullptr));
+ EXPECT_TRUE(tree->IsFlat(offset, 3, &fragment));
+ EXPECT_THAT(fragment, Eq(data.substr(offset, 3)));
+
+ fragment = "Can't touch this";
+ if (offset > 0) {
+ EXPECT_FALSE(tree->IsFlat(offset - 1, 4, nullptr));
+ EXPECT_FALSE(tree->IsFlat(offset - 1, 4, &fragment));
+ EXPECT_THAT(fragment, Eq("Can't touch this"));
+ }
+ if (offset < data.size() - 4) {
+ EXPECT_FALSE(tree->IsFlat(offset, 4, nullptr));
+ EXPECT_FALSE(tree->IsFlat(offset, 4, &fragment));
+ EXPECT_THAT(fragment, Eq("Can't touch this"));
+ }
+ }
+
+ CordRep::Unref(tree);
+}
+
+#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG)
+
+TEST_P(CordRepBtreeHeightTest, GetAppendBufferNotPrivate) {
+ CordRepBtree* tree = CordRepBtree::Create(MakeExternal("Foo"));
+ CordRepBtree::Ref(tree);
+ EXPECT_DEATH(tree->GetAppendBuffer(1), ".*");
+ CordRepBtree::Unref(tree);
+ CordRepBtree::Unref(tree);
+}
+
+#endif // defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG)
+
+TEST_P(CordRepBtreeHeightTest, GetAppendBufferNotFlat) {
+ CordRepBtree* tree = CordRepBtree::Create(MakeExternal("Foo"));
+ for (int i = 1; i <= height(); ++i) {
+ tree = CordRepBtree::New(tree);
+ }
+ EXPECT_THAT(tree->GetAppendBuffer(1), SizeIs(0));
+ CordRepBtree::Unref(tree);
+}
+
+TEST_P(CordRepBtreeHeightTest, GetAppendBufferFlatNotPrivate) {
+ CordRepFlat* flat = MakeFlat("abc");
+ CordRepBtree* tree = CordRepBtree::Create(CordRep::Ref(flat));
+ for (int i = 1; i <= height(); ++i) {
+ tree = CordRepBtree::New(tree);
+ }
+ EXPECT_THAT(tree->GetAppendBuffer(1), SizeIs(0));
+ CordRepBtree::Unref(tree);
+ CordRep::Unref(flat);
+}
+
+TEST_P(CordRepBtreeHeightTest, GetAppendBufferTreeNotPrivate) {
+ if (height() == 0) return;
+ AutoUnref refs;
+ CordRepFlat* flat = MakeFlat("abc");
+ CordRepBtree* tree = CordRepBtree::Create(CordRep::Ref(flat));
+ for (int i = 1; i <= height(); ++i) {
+ if (i == (height() + 1) / 2) refs.Ref(tree);
+ tree = CordRepBtree::New(tree);
+ }
+ EXPECT_THAT(tree->GetAppendBuffer(1), SizeIs(0));
+ CordRepBtree::Unref(tree);
+ CordRep::Unref(flat);
+}
+
+TEST_P(CordRepBtreeHeightTest, GetAppendBufferFlatNoCapacity) {
+ CordRepFlat* flat = MakeFlat("abc");
+ flat->length = flat->Capacity();
+ CordRepBtree* tree = CordRepBtree::Create(flat);
+ for (int i = 1; i <= height(); ++i) {
+ tree = CordRepBtree::New(tree);
+ }
+ EXPECT_THAT(tree->GetAppendBuffer(1), SizeIs(0));
+ CordRepBtree::Unref(tree);
+}
+
+TEST_P(CordRepBtreeHeightTest, GetAppendBufferFlatWithCapacity) {
+ CordRepFlat* flat = MakeFlat("abc");
+ CordRepBtree* tree = CordRepBtree::Create(flat);
+ for (int i = 1; i <= height(); ++i) {
+ tree = CordRepBtree::New(tree);
+ }
+ absl::Span<char> span = tree->GetAppendBuffer(2);
+ EXPECT_THAT(span, SizeIs(2));
+ EXPECT_THAT(span.data(), TypedEq<void*>(flat->Data() + 3));
+ EXPECT_THAT(tree->length, Eq(5));
+
+ size_t avail = flat->Capacity() - 5;
+ span = tree->GetAppendBuffer(avail + 100);
+ EXPECT_THAT(span, SizeIs(avail));
+ EXPECT_THAT(span.data(), TypedEq<void*>(flat->Data() + 5));
+ EXPECT_THAT(tree->length, Eq(5 + avail));
+
+ CordRepBtree::Unref(tree);
+}
+
+TEST(CordRepBtreeTest, Dump) {
+ // Handles nullptr
+ std::stringstream ss;
+ CordRepBtree::Dump(nullptr, ss);
+ CordRepBtree::Dump(nullptr, "Once upon a label", ss);
+ CordRepBtree::Dump(nullptr, "Once upon a label", false, ss);
+ CordRepBtree::Dump(nullptr, "Once upon a label", true, ss);
+
+ // Cover legal edges
+ CordRepFlat* flat = MakeFlat("Hello world");
+ CordRepExternal* external = MakeExternal("Hello external");
+ CordRep* substr_flat = MakeSubstring(1, 6, CordRep::Ref(flat));
+ CordRep* substr_external = MakeSubstring(2, 7, CordRep::Ref(external));
+
+ // Build tree
+ CordRepBtree* tree = CordRepBtree::Create(flat);
+ tree = CordRepBtree::Append(tree, external);
+ tree = CordRepBtree::Append(tree, substr_flat);
+ tree = CordRepBtree::Append(tree, substr_external);
+
+ // Repeat until we have a tree
+ while (tree->height() == 0) {
+ tree = CordRepBtree::Append(tree, CordRep::Ref(flat));
+ tree = CordRepBtree::Append(tree, CordRep::Ref(external));
+ tree = CordRepBtree::Append(tree, CordRep::Ref(substr_flat));
+ tree = CordRepBtree::Append(tree, CordRep::Ref(substr_external));
+ }
+
+ for (int api = 0; api <= 3; ++api) {
+ absl::string_view api_scope;
+ std::stringstream ss;
+ switch (api) {
+ case 0:
+ api_scope = "Bare";
+ CordRepBtree::Dump(tree, ss);
+ break;
+ case 1:
+ api_scope = "Label only";
+ CordRepBtree::Dump(tree, "Once upon a label", ss);
+ break;
+ case 2:
+ api_scope = "Label no content";
+ CordRepBtree::Dump(tree, "Once upon a label", false, ss);
+ break;
+ default:
+ api_scope = "Label and content";
+ CordRepBtree::Dump(tree, "Once upon a label", true, ss);
+ break;
+ }
+ SCOPED_TRACE(api_scope);
+ std::string str = ss.str();
+
+ // Contains Node(depth) / Leaf and private / shared indicators
+ EXPECT_THAT(str, AllOf(HasSubstr("Node(1)"), HasSubstr("Leaf"),
+ HasSubstr("Private"), HasSubstr("Shared")));
+
+ // Contains length and start offset of all data edges
+ EXPECT_THAT(str, AllOf(HasSubstr("len = 11"), HasSubstr("len = 14"),
+ HasSubstr("len = 6"), HasSubstr("len = 7"),
+ HasSubstr("start = 1"), HasSubstr("start = 2")));
+
+ // Contains address of all data edges
+ EXPECT_THAT(
+ str, AllOf(HasSubstr(absl::StrCat("0x", absl::Hex(flat))),
+ HasSubstr(absl::StrCat("0x", absl::Hex(external))),
+ HasSubstr(absl::StrCat("0x", absl::Hex(substr_flat))),
+ HasSubstr(absl::StrCat("0x", absl::Hex(substr_external)))));
+
+ if (api != 0) {
+ // Contains label
+ EXPECT_THAT(str, HasSubstr("Once upon a label"));
+ }
+
+ if (api != 3) {
+ // Does not contain contents
+ EXPECT_THAT(str, Not(AnyOf((HasSubstr("data = \"Hello world\""),
+ HasSubstr("data = \"Hello external\""),
+ HasSubstr("data = \"ello w\""),
+ HasSubstr("data = \"llo ext\"")))));
+ } else {
+ // Contains contents
+ EXPECT_THAT(str, AllOf((HasSubstr("data = \"Hello world\""),
+ HasSubstr("data = \"Hello external\""),
+ HasSubstr("data = \"ello w\""),
+ HasSubstr("data = \"llo ext\""))));
+ }
+ }
+
+ CordRep::Unref(tree);
+}
+
+TEST(CordRepBtreeTest, IsValid) {
+ EXPECT_FALSE(CordRepBtree::IsValid(nullptr));
+
+ CordRepBtree* empty = CordRepBtree::New(0);
+ EXPECT_TRUE(CordRepBtree::IsValid(empty));
+ CordRep::Unref(empty);
+
+ for (bool as_tree : {false, true}) {
+ CordRepBtree* leaf = CordRepBtree::Create(MakeFlat("abc"));
+ CordRepBtree* tree = as_tree ? CordRepBtree::New(leaf) : nullptr;
+ CordRepBtree* check = as_tree ? tree : leaf;
+
+ ASSERT_TRUE(CordRepBtree::IsValid(check));
+ leaf->length--;
+ EXPECT_FALSE(CordRepBtree::IsValid(check));
+ leaf->length++;
+
+ ASSERT_TRUE(CordRepBtree::IsValid(check));
+ leaf->tag--;
+ EXPECT_FALSE(CordRepBtree::IsValid(check));
+ leaf->tag++;
+
+ // Height
+ ASSERT_TRUE(CordRepBtree::IsValid(check));
+ leaf->storage[0] = static_cast<uint8_t>(CordRepBtree::kMaxHeight + 1);
+ EXPECT_FALSE(CordRepBtree::IsValid(check));
+ leaf->storage[0] = 1;
+ EXPECT_FALSE(CordRepBtree::IsValid(check));
+ leaf->storage[0] = 0;
+
+ // Begin
+ ASSERT_TRUE(CordRepBtree::IsValid(check));
+ const uint8_t begin = leaf->storage[1];
+ leaf->storage[1] = static_cast<uint8_t>(CordRepBtree::kMaxCapacity);
+ EXPECT_FALSE(CordRepBtree::IsValid(check));
+ leaf->storage[1] = 2;
+ EXPECT_FALSE(CordRepBtree::IsValid(check));
+ leaf->storage[1] = begin;
+
+ // End
+ ASSERT_TRUE(CordRepBtree::IsValid(check));
+ const uint8_t end = leaf->storage[2];
+ leaf->storage[2] = static_cast<uint8_t>(CordRepBtree::kMaxCapacity + 1);
+ EXPECT_FALSE(CordRepBtree::IsValid(check));
+ leaf->storage[2] = end;
+
+ // DataEdge tag and value
+ ASSERT_TRUE(CordRepBtree::IsValid(check));
+ CordRep* const edge = leaf->Edges()[0];
+ const uint8_t tag = edge->tag;
+ CordRepBtreeTestPeer::SetEdge(leaf, begin, nullptr);
+ EXPECT_FALSE(CordRepBtree::IsValid(check));
+ CordRepBtreeTestPeer::SetEdge(leaf, begin, edge);
+ edge->tag = BTREE;
+ EXPECT_FALSE(CordRepBtree::IsValid(check));
+ edge->tag = tag;
+
+ if (as_tree) {
+ ASSERT_TRUE(CordRepBtree::IsValid(check));
+ leaf->length--;
+ EXPECT_FALSE(CordRepBtree::IsValid(check));
+ leaf->length++;
+
+ // Height
+ ASSERT_TRUE(CordRepBtree::IsValid(check));
+ tree->storage[0] = static_cast<uint8_t>(2);
+ EXPECT_FALSE(CordRepBtree::IsValid(check));
+ tree->storage[0] = 1;
+
+ // Btree edge
+ ASSERT_TRUE(CordRepBtree::IsValid(check));
+ CordRep* const edge = tree->Edges()[0];
+ const uint8_t tag = edge->tag;
+ edge->tag = FLAT;
+ EXPECT_FALSE(CordRepBtree::IsValid(check));
+ edge->tag = tag;
+ }
+
+ ASSERT_TRUE(CordRepBtree::IsValid(check));
+ CordRep::Unref(check);
+ }
+}
+
+TEST(CordRepBtreeTest, AssertValid) {
+ CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc"));
+ const CordRepBtree* ctree = tree;
+ EXPECT_THAT(CordRepBtree::AssertValid(tree), Eq(tree));
+ EXPECT_THAT(CordRepBtree::AssertValid(ctree), Eq(ctree));
+
+#if defined(GTEST_HAS_DEATH_TEST)
+ CordRepBtree* nulltree = nullptr;
+ const CordRepBtree* cnulltree = nullptr;
+ EXPECT_DEBUG_DEATH(
+ EXPECT_THAT(CordRepBtree::AssertValid(nulltree), Eq(nulltree)), ".*");
+ EXPECT_DEBUG_DEATH(
+ EXPECT_THAT(CordRepBtree::AssertValid(cnulltree), Eq(cnulltree)), ".*");
+
+ tree->length--;
+ EXPECT_DEBUG_DEATH(EXPECT_THAT(CordRepBtree::AssertValid(tree), Eq(tree)),
+ ".*");
+ EXPECT_DEBUG_DEATH(EXPECT_THAT(CordRepBtree::AssertValid(ctree), Eq(ctree)),
+ ".*");
+ tree->length++;
+#endif
+ CordRep::Unref(tree);
+}
+
+} // namespace
+} // namespace cord_internal
+ABSL_NAMESPACE_END
+} // namespace absl
diff --git a/absl/strings/internal/cord_rep_test_util.h b/absl/strings/internal/cord_rep_test_util.h
new file mode 100644
index 00000000..bc500064
--- /dev/null
+++ b/absl/strings/internal/cord_rep_test_util.h
@@ -0,0 +1,185 @@
+// Copyright 2021 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_
+#define ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_
+
+#include <cassert>
+#include <memory>
+#include <random>
+#include <string>
+#include <vector>
+
+#include "absl/base/config.h"
+#include "absl/base/internal/raw_logging.h"
+#include "absl/strings/internal/cord_internal.h"
+#include "absl/strings/internal/cord_rep_btree.h"
+#include "absl/strings/internal/cord_rep_flat.h"
+#include "absl/strings/string_view.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace cordrep_testing {
+
+inline cord_internal::CordRepSubstring* MakeSubstring(
+ size_t start, size_t len, cord_internal::CordRep* rep) {
+ auto* sub = new cord_internal::CordRepSubstring;
+ sub->tag = cord_internal::SUBSTRING;
+ sub->start = start;
+ sub->length = len <= 0 ? rep->length - start + len : len;
+ sub->child = rep;
+ return sub;
+}
+
+inline cord_internal::CordRepConcat* MakeConcat(cord_internal::CordRep* left,
+ cord_internal::CordRep* right,
+ int depth = 0) {
+ auto* concat = new cord_internal::CordRepConcat;
+ concat->tag = cord_internal::CONCAT;
+ concat->length = left->length + right->length;
+ concat->left = left;
+ concat->right = right;
+ concat->set_depth(depth);
+ return concat;
+}
+
+inline cord_internal::CordRepFlat* MakeFlat(absl::string_view value) {
+ assert(value.length() <= cord_internal::kMaxFlatLength);
+ auto* flat = cord_internal::CordRepFlat::New(value.length());
+ flat->length = value.length();
+ memcpy(flat->Data(), value.data(), value.length());
+ return flat;
+}
+
+// Creates an external node for testing
+inline cord_internal::CordRepExternal* MakeExternal(absl::string_view s) {
+ struct Rep : public cord_internal::CordRepExternal {
+ std::string s;
+ explicit Rep(absl::string_view sv) : s(sv) {
+ this->tag = cord_internal::EXTERNAL;
+ this->base = s.data();
+ this->length = s.length();
+ this->releaser_invoker = [](cord_internal::CordRepExternal* self) {
+ delete static_cast<Rep*>(self);
+ };
+ }
+ };
+ return new Rep(s);
+}
+
+inline std::string CreateRandomString(size_t n) {
+ absl::string_view data =
+ "abcdefghijklmnopqrstuvwxyz"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "0123456789~!@#$%^&*()_+=-<>?:\"{}[]|";
+ std::minstd_rand rnd;
+ std::uniform_int_distribution<size_t> dist(0, data.size() - 1);
+ std::string s(n, ' ');
+ for (size_t i = 0; i < n; ++i) {
+ s[i] = data[dist(rnd)];
+ }
+ return s;
+}
+
+// Creates an array of flats from the provided string, chopping
+// the provided string up into flats of size `chunk_size` characters
+// resulting in roughly `data.size() / chunk_size` total flats.
+inline std::vector<cord_internal::CordRep*> CreateFlatsFromString(
+ absl::string_view data, size_t chunk_size) {
+ assert(chunk_size > 0);
+ std::vector<cord_internal::CordRep*> flats;
+ for (absl::string_view s = data; !s.empty(); s.remove_prefix(chunk_size)) {
+ flats.push_back(MakeFlat(s.substr(0, chunk_size)));
+ }
+ return flats;
+}
+
+inline cord_internal::CordRepBtree* CordRepBtreeFromFlats(
+ absl::Span<cord_internal::CordRep* const> flats) {
+ assert(!flats.empty());
+ auto* node = cord_internal::CordRepBtree::Create(flats[0]);
+ for (size_t i = 1; i < flats.size(); ++i) {
+ node = cord_internal::CordRepBtree::Append(node, flats[i]);
+ }
+ return node;
+}
+
+inline void CordToString(cord_internal::CordRep* rep, std::string& s) {
+ size_t offset = 0;
+ size_t length = rep->length;
+ while (rep->tag == cord_internal::SUBSTRING) {
+ offset += rep->substring()->start;
+ rep = rep->substring()->child;
+ }
+ if (rep->tag == cord_internal::BTREE) {
+ for (cord_internal::CordRep* edge : rep->btree()->Edges()) {
+ CordToString(edge, s);
+ }
+ } else if (rep->tag >= cord_internal::FLAT) {
+ s.append(rep->flat()->Data() + offset, length);
+ } else if (rep->tag == cord_internal::EXTERNAL) {
+ s.append(rep->external()->base + offset, length);
+ } else {
+ ABSL_RAW_LOG(FATAL, "Unsupported tag %d", rep->tag);
+ }
+}
+
+inline std::string CordToString(cord_internal::CordRep* rep) {
+ std::string s;
+ s.reserve(rep->length);
+ CordToString(rep, s);
+ return s;
+}
+
+// RAII Helper class to automatically unref reps on destruction.
+class AutoUnref {
+ public:
+ ~AutoUnref() {
+ for (CordRep* rep : unrefs_) CordRep::Unref(rep);
+ }
+
+ // Adds `rep` to the list of reps to be unreffed at destruction.
+ template <typename CordRepType>
+ CordRepType* Add(CordRepType* rep) {
+ unrefs_.push_back(rep);
+ return rep;
+ }
+
+ // Increments the reference count of `rep` by one, and adds it to
+ // the list of reps to be unreffed at destruction.
+ template <typename CordRepType>
+ CordRepType* Ref(CordRepType* rep) {
+ unrefs_.push_back(CordRep::Ref(rep));
+ return rep;
+ }
+
+ // Increments the reference count of `rep` by one if `condition` is true,
+ // and adds it to the list of reps to be unreffed at destruction.
+ template <typename CordRepType>
+ CordRepType* RefIf(bool condition, CordRepType* rep) {
+ if (condition) unrefs_.push_back(CordRep::Ref(rep));
+ return rep;
+ }
+
+ private:
+ using CordRep = absl::cord_internal::CordRep;
+
+ std::vector<CordRep*> unrefs_;
+};
+
+} // namespace cordrep_testing
+ABSL_NAMESPACE_END
+} // namespace absl
+
+#endif // ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_
diff --git a/ci/cmake_common.sh b/ci/cmake_common.sh
index 626fed0d..51f31069 100644
--- a/ci/cmake_common.sh
+++ b/ci/cmake_common.sh
@@ -14,7 +14,7 @@
# The commit of GoogleTest to be used in the CMake tests in this directory.
# Keep this in sync with the commit in the WORKSPACE file.
-readonly ABSL_GOOGLETEST_COMMIT="5bcd8e3bb929714e031a542d303f818e5a5af45d"
+readonly ABSL_GOOGLETEST_COMMIT="8d51ffdfab10b3fba636ae69bc03da4b54f8c235"
# Avoid depending on GitHub by looking for a cached copy of the commit first.
if [[ -r "${KOKORO_GFILE_DIR:-}/distdir/${ABSL_GOOGLETEST_COMMIT}.zip" ]]; then