summaryrefslogtreecommitdiff
path: root/absl/strings
diff options
context:
space:
mode:
authorGravatar Abseil Team <absl-team@google.com>2021-07-29 08:04:56 -0700
committerGravatar rogeeff <rogeeff@google.com>2021-07-29 14:17:36 -0400
commit89c531c1e0d7372e2e7029f072a35495c5447d61 (patch)
tree82d07491bf41094d1383e1ef1e1f8d725adabd85 /absl/strings
parent4bb739310c0257bedc41bfe2824c3f2860398a65 (diff)
Export of internal Abseil changes
-- e1a0989213908927f05002ab7697955ad7dc5632 by Martijn Vels <mvels@google.com>: Introduce CordRepBtreeReader CordRepBtreeReader provides forward navigation on cord btrees with absolute positional (offset) context, iterating over btree data in absl::string_view chunks. PiperOrigin-RevId: 387585161 -- 206d298e2bccb998731995cb05717b31fa9d90ec by Abseil Team <absl-team@google.com>: Internal change PiperOrigin-RevId: 387577465 -- f07fafe8a400a4f5dfef186d1a3b61fb7f709fe5 by Abseil Team <absl-team@google.com>: This change adds debug-build enforcement that the inputs to absl::c_set_intersection are sorted, which is a prerequisite of std::set_intersection and required for correct operation of the algorithm. PiperOrigin-RevId: 387446657 -- 2ca15c6361bb758be7fb88cae82bf8489b4d3364 by Abseil Team <absl-team@google.com>: Change BadStatusOrAccess::what() to contain status_.ToString() This ensures that on uncaught exception propagation that would cause program termination, the message contains information on the error which caused the failure. Lazy initialization of what_ is a value judgement: if most callers are expected to call status() not what(), lazy initialization is correct. If most callers are expected to call what(), it should be initialized on construction to avoid atomic operation overhead. PiperOrigin-RevId: 387402243 -- 3e855084e104dc972a0c4385395e6d8e8465127f by Gennadiy Rozental <rogeeff@google.com>: LSC: Standardize access to GoogleTest flags on GTEST_FLAG_GET/GTEST_FLAG_SET This change is necessary to move Googletest flags out of the testing:: namespace without breaking code. These new macros will continue to be required for code that needs to work both inside Google's monorepo and outside in OSS, but can be used anywhere inside the monorepo. PiperOrigin-RevId: 387396025 -- 1ccf5895a15059ef689af5c4817d7b84f73190be by Gennadiy Rozental <rogeeff@google.com>: Import of CCTZ from GitHub. PiperOrigin-RevId: 387388496 GitOrigin-RevId: e1a0989213908927f05002ab7697955ad7dc5632 Change-Id: I3606d9ce29d909a3555e662e9df564202cf5068d
Diffstat (limited to 'absl/strings')
-rw-r--r--absl/strings/BUILD.bazel19
-rw-r--r--absl/strings/CMakeLists.txt20
-rw-r--r--absl/strings/cord_ring_test.cc2
-rw-r--r--absl/strings/cord_test.cc18
-rw-r--r--absl/strings/internal/cord_rep_btree_reader.cc68
-rw-r--r--absl/strings/internal/cord_rep_btree_reader.h219
-rw-r--r--absl/strings/internal/cord_rep_btree_reader_test.cc285
7 files changed, 621 insertions, 10 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel
index 7e0245a3..9720b683 100644
--- a/absl/strings/BUILD.bazel
+++ b/absl/strings/BUILD.bazel
@@ -271,6 +271,7 @@ cc_library(
"internal/cord_internal.cc",
"internal/cord_rep_btree.cc",
"internal/cord_rep_btree_navigator.cc",
+ "internal/cord_rep_btree_reader.cc",
"internal/cord_rep_consume.cc",
"internal/cord_rep_ring.cc",
],
@@ -278,6 +279,7 @@ cc_library(
"internal/cord_internal.h",
"internal/cord_rep_btree.h",
"internal/cord_rep_btree_navigator.h",
+ "internal/cord_rep_btree_reader.h",
"internal/cord_rep_consume.h",
"internal/cord_rep_flat.h",
"internal/cord_rep_ring.h",
@@ -336,6 +338,23 @@ cc_test(
],
)
+cc_test(
+ name = "cord_rep_btree_reader_test",
+ size = "medium",
+ srcs = ["internal/cord_rep_btree_reader_test.cc"],
+ copts = ABSL_TEST_COPTS,
+ visibility = ["//visibility:private"],
+ deps = [
+ ":cord",
+ ":cord_internal",
+ ":cord_rep_test_util",
+ ":strings",
+ "//absl/base:config",
+ "//absl/base:raw_logging_internal",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
cc_library(
name = "cordz_update_tracker",
hdrs = ["internal/cordz_update_tracker.h"],
diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt
index e55c035d..88f076a8 100644
--- a/absl/strings/CMakeLists.txt
+++ b/absl/strings/CMakeLists.txt
@@ -557,6 +557,7 @@ absl_cc_library(
"internal/cord_internal.h"
"internal/cord_rep_btree.h"
"internal/cord_rep_btree_navigator.h"
+ "internal/cord_rep_btree_reader.h"
"internal/cord_rep_consume.h"
"internal/cord_rep_flat.h"
"internal/cord_rep_ring.h"
@@ -565,6 +566,7 @@ absl_cc_library(
"internal/cord_internal.cc"
"internal/cord_rep_btree.cc"
"internal/cord_rep_btree_navigator.cc"
+ "internal/cord_rep_btree_reader.cc"
"internal/cord_rep_consume.cc"
"internal/cord_rep_ring.cc"
COPTS
@@ -979,6 +981,24 @@ absl_cc_test(
absl_cc_test(
NAME
+ cord_rep_btree_reader_test
+ SRCS
+ "internal/cord_rep_btree_reader_test.cc"
+ COPTS
+ ${ABSL_TEST_COPTS}
+ DEPS
+ absl::base
+ absl::config
+ absl::cord_internal
+ absl::cord_rep_test_util
+ absl::core_headers
+ absl::raw_logging_internal
+ absl::strings
+ GTest::gmock_main
+)
+
+absl_cc_test(
+ NAME
cord_ring_test
SRCS
"cord_ring_test.cc"
diff --git a/absl/strings/cord_ring_test.cc b/absl/strings/cord_ring_test.cc
index b1787edf..f1318595 100644
--- a/absl/strings/cord_ring_test.cc
+++ b/absl/strings/cord_ring_test.cc
@@ -275,7 +275,7 @@ CordRepConcat* MakeConcat(CordRep* left, CordRep* right, int depth = 0) {
enum Composition { kMix, kAppend, kPrepend };
Composition RandomComposition() {
- RandomEngine rng(testing::GTEST_FLAG(random_seed));
+ RandomEngine rng(GTEST_FLAG_GET(random_seed));
return (rng() & 1) ? kMix : ((rng() & 1) ? kAppend : kPrepend);
}
diff --git a/absl/strings/cord_test.cc b/absl/strings/cord_test.cc
index 14eca155..597378cc 100644
--- a/absl/strings/cord_test.cc
+++ b/absl/strings/cord_test.cc
@@ -361,7 +361,7 @@ TEST(Cord, StartsEndsWith) {
}
TEST(Cord, Subcord) {
- RandomEngine rng(testing::GTEST_FLAG(random_seed));
+ RandomEngine rng(GTEST_FLAG_GET(random_seed));
const std::string s = RandomLowercaseString(&rng, 1024);
absl::Cord a;
@@ -570,7 +570,7 @@ TEST(Cord, Flatten) {
VerifyFlatten(absl::MakeFragmentedCord({"small ", "fragmented ", "cord"}));
// Test with a cord that is longer than the largest flat buffer
- RandomEngine rng(testing::GTEST_FLAG(random_seed));
+ RandomEngine rng(GTEST_FLAG_GET(random_seed));
VerifyFlatten(absl::Cord(RandomLowercaseString(&rng, 8192)));
}
@@ -937,7 +937,7 @@ static void TestCompare(const absl::Cord& c, const absl::Cord& d,
}
TEST(Compare, ComparisonIsUnsigned) {
- RandomEngine rng(testing::GTEST_FLAG(random_seed));
+ RandomEngine rng(GTEST_FLAG_GET(random_seed));
std::uniform_int_distribution<uint32_t> uniform_uint8(0, 255);
char x = static_cast<char>(uniform_uint8(rng));
TestCompare(
@@ -947,7 +947,7 @@ TEST(Compare, ComparisonIsUnsigned) {
TEST(Compare, RandomComparisons) {
const int kIters = 5000;
- RandomEngine rng(testing::GTEST_FLAG(random_seed));
+ RandomEngine rng(GTEST_FLAG_GET(random_seed));
int n = GetUniformRandomUpTo(&rng, 5000);
absl::Cord a[] = {MakeExternalCord(n),
@@ -1082,7 +1082,7 @@ TEST(ConstructFromExternal, ReleaserInvoked) {
}
TEST(ConstructFromExternal, CompareContents) {
- RandomEngine rng(testing::GTEST_FLAG(random_seed));
+ RandomEngine rng(GTEST_FLAG_GET(random_seed));
for (int length = 1; length <= 2048; length *= 2) {
std::string data = RandomLowercaseString(&rng, length);
@@ -1098,7 +1098,7 @@ TEST(ConstructFromExternal, CompareContents) {
}
TEST(ConstructFromExternal, LargeReleaser) {
- RandomEngine rng(testing::GTEST_FLAG(random_seed));
+ RandomEngine rng(GTEST_FLAG_GET(random_seed));
constexpr size_t kLength = 256;
std::string data = RandomLowercaseString(&rng, kLength);
std::array<char, kLength> data_array;
@@ -1323,7 +1323,7 @@ TEST(Cord, DiabolicalGrowth) {
// resulting cord.
// TODO(b/183983616): Apply some minimum compaction when copying a shared
// source cord into a mutable copy for updates in CordRepRing.
- RandomEngine rng(testing::GTEST_FLAG(random_seed));
+ RandomEngine rng(GTEST_FLAG_GET(random_seed));
const std::string expected = RandomLowercaseString(&rng, 5000);
absl::Cord cord;
for (char c : expected) {
@@ -1483,7 +1483,7 @@ TEST(CordChunkIterator, Operations) {
VerifyChunkIterator(reused_nodes_cord, expected_chunks);
}
- RandomEngine rng(testing::GTEST_FLAG(random_seed));
+ RandomEngine rng(GTEST_FLAG_GET(random_seed));
absl::Cord flat_cord(RandomLowercaseString(&rng, 256));
absl::Cord subcords;
for (int i = 0; i < 128; ++i) subcords.Prepend(flat_cord.Subcord(i, 128));
@@ -1621,7 +1621,7 @@ TEST(CordCharIterator, Operations) {
VerifyCharIterator(reused_nodes_cord);
}
- RandomEngine rng(testing::GTEST_FLAG(random_seed));
+ RandomEngine rng(GTEST_FLAG_GET(random_seed));
absl::Cord flat_cord(RandomLowercaseString(&rng, 256));
absl::Cord subcords;
for (int i = 0; i < 4; ++i) subcords.Prepend(flat_cord.Subcord(16 * i, 128));
diff --git a/absl/strings/internal/cord_rep_btree_reader.cc b/absl/strings/internal/cord_rep_btree_reader.cc
new file mode 100644
index 00000000..3ba43144
--- /dev/null
+++ b/absl/strings/internal/cord_rep_btree_reader.cc
@@ -0,0 +1,68 @@
+// Copyright 2021 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/internal/cord_rep_btree_reader.h"
+
+#include <cassert>
+
+#include "absl/base/config.h"
+#include "absl/strings/internal/cord_internal.h"
+#include "absl/strings/internal/cord_rep_btree.h"
+#include "absl/strings/internal/cord_rep_btree_navigator.h"
+#include "absl/strings/internal/cord_rep_flat.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace cord_internal {
+
+absl::string_view CordRepBtreeReader::Read(size_t n, size_t chunk_size,
+ CordRep*& tree) {
+ assert(chunk_size <= navigator_.Current()->length);
+
+ // If chunk_size is non-zero, we need to start inside last returned edge.
+ // Else we start reading at the next data edge of the tree.
+ CordRep* edge = chunk_size ? navigator_.Current() : navigator_.Next();
+ const size_t offset = chunk_size ? edge->length - chunk_size : 0;
+
+ // Read the sub tree and verify we got what we wanted.
+ ReadResult result = navigator_.Read(offset, n);
+ tree = result.tree;
+
+ // If the data returned in `tree` was covered entirely by `chunk_size`, i.e.,
+ // read from the 'previous' edge, we did not consume any additional data, and
+ // can directly return the substring into the current data edge as the next
+ // chunk. We can easily establish from the above code that `navigator_.Next()`
+ // has not been called as that requires `chunk_size` to be zero.
+ if (n < chunk_size) return CordRepBtree::EdgeData(edge).substr(result.n);
+
+ // The amount of data taken from the last edge is `chunk_size` and `result.n`
+ // contains the offset into the current edge trailing the read data (which can
+ // be 0). As the call to `navigator_.Read()` could have consumed all remaining
+ // data, calling `navigator_.Current()` is not safe before checking if we
+ // already consumed all remaining data.
+ const size_t consumed_by_read = n - chunk_size - result.n;
+ if (consumed_ + consumed_by_read >= length()) {
+ consumed_ = length();
+ return {};
+ }
+
+ // We did not read all data, return remaining data from current edge.
+ edge = navigator_.Current();
+ consumed_ += consumed_by_read + edge->length;
+ return CordRepBtree::EdgeData(edge).substr(result.n);
+}
+
+} // namespace cord_internal
+ABSL_NAMESPACE_END
+} // namespace absl
diff --git a/absl/strings/internal/cord_rep_btree_reader.h b/absl/strings/internal/cord_rep_btree_reader.h
new file mode 100644
index 00000000..c19fa43d
--- /dev/null
+++ b/absl/strings/internal/cord_rep_btree_reader.h
@@ -0,0 +1,219 @@
+// Copyright 2021 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_READER_H_
+#define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_READER_H_
+
+#include <cassert>
+
+#include "absl/base/config.h"
+#include "absl/strings/internal/cord_internal.h"
+#include "absl/strings/internal/cord_rep_btree.h"
+#include "absl/strings/internal/cord_rep_btree_navigator.h"
+#include "absl/strings/internal/cord_rep_flat.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace cord_internal {
+
+// CordRepBtreeReader implements logic to iterate over cord btrees.
+// References to the underlying data are returned as absl::string_view values.
+// The most typical use case is a forward only iteration over tree data.
+// The class also provides `Skip()`, `Seek()` and `Read()` methods similar to
+// CordRepBtreeNavigator that allow more advanced navigation. The class provides
+// a `consumed` property which contains the end offset of the chunk last
+// returned to the user which is useful in cord iteration logic.
+//
+// Example: iterate over all data inside a cord btree:
+//
+// CordRepBtreeReader reader;
+// for (string_view sv = reader.Init(tree); !sv.Empty(); sv = sv.Next()) {
+// DoSomethingWithDataIn(sv);
+// }
+//
+// All navigation methods always return the next 'chunk' of data. The class
+// assumes that all data is directly 'consumed' by the caller. For example:
+// invoking `Skip()` will skip the desired number of bytes, and directly
+// read and return the next chunk of data directly after the skipped bytes.
+//
+// Example: iterate over all data inside a btree skipping the first 100 bytes:
+//
+// CordRepBtreeReader reader;
+// absl::string_view sv = reader.Init(tree);
+// if (sv.length() > 100) {
+// sv.RemovePrefix(100);
+// } else {
+// sv = reader.Skip(100 - sv.length());
+// }
+// while (!sv.empty()) {
+// DoSomethingWithDataIn(sv);
+// absl::string_view sv = reader.Next();
+// }
+//
+// It is important to notice that `consumed` represents the end position of the
+// last data edge returned to the caller, not the cumulative data returned to
+// the caller which can be less in cases of skipping or seeking over data.
+//
+// For example, consider a cord btree with five data edges: "abc", "def", "ghi",
+// "jkl" and "mno":
+//
+// absl::string_view sv;
+// CordRepBtreeReader reader;
+//
+// sv = reader.Init(tree); // sv = "abc", reader.consumed() = 3
+// sv = reader.Skip(4); // sv = "hi", reader.consumed() = 9
+// sv = reader.Skip(2); // sv = "l", reader.consumed() = 12
+// sv = reader.Next(); // sv = "mno", reader.consumed() = 15
+//
+// In the above example, `reader.consumed()` reflects the data edges iterated
+// over or skipped by the reader, not the amount of data 'consumed' by the
+// caller.
+class CordRepBtreeReader {
+ public:
+ using ReadResult = CordRepBtreeNavigator::ReadResult;
+ using Position = CordRepBtreeNavigator::Position;
+
+ // Returns true if this instance is not empty.
+ explicit operator bool() const { return navigator_.btree() != nullptr; }
+
+ // Returns the tree referenced by this instance or nullptr if empty.
+ CordRepBtree* btree() const { return navigator_.btree(); }
+
+ // Returns the current data edge inside the referenced btree.
+ // Requires that the current instance is not empty.
+ CordRep* node() const { return navigator_.Current(); }
+
+ // Returns the length of the referenced tree.
+ // Requires that the current instance is not empty.
+ size_t length() const;
+
+ // Returns the end offset of the last navigated to chunk, which represents the
+ // total bytes 'consumed' relative to the start of the tree. The returned
+ // value is never zero. For example, initializing a reader with a tree with a
+ // first data edge of 19 bytes will return `consumed() = 19`. See also the
+ // class comments on the meaning of `consumed`.
+ // Requires that the current instance is not empty.
+ size_t consumed() const;
+
+ // Resets this instance to an empty value.
+ void Reset() { navigator_.Reset(); }
+
+ // Initializes this instance with `tree`. `tree` must not be null.
+ // Returns a reference to the first data edge of the provided tree.
+ absl::string_view Init(CordRepBtree* tree);
+
+ // Navigates to and returns the next data edge of the referenced tree.
+ // Returns an empty string_view if an attempt is made to read beyond the end
+ // of the tree, i.e.: if `remaining()` is zero indicating an EOF condition.
+ // Requires that the current instance is not empty.
+ absl::string_view Next();
+
+ // Skips the provided amount of bytes and returns a reference to the data
+ // directly following the skipped bytes.
+ absl::string_view Skip(size_t skip);
+
+ // Reads `n` bytes into `tree`.
+ // If `chunk_size` is zero, starts reading at the next data edge. If
+ // `chunk_size` is non zero, the read starts at the last `chunk_size` bytes of
+ // the last returned data edge. Effectively, this means that the read starts
+ // at offset `consumed() - chunk_size`.
+ // Requires that `chunk_size` is less than or equal to the length of the
+ // last returned data edge. The purpose of `chunk_size` is to simplify code
+ // partially consuming a returned chunk and wanting to include the remaining
+ // bytes in the Read call. For example, the below code will read 1000 bytes of
+ // data into a cord tree if the first chunk starts with "big:":
+ //
+ // CordRepBtreeReader reader;
+ // absl::string_view sv = reader.Init(tree);
+ // if (absl::StartsWith(sv, "big:")) {
+ // CordRepBtree tree;
+ // sv = reader.Read(1000, sv.size() - 4 /* "big:" */, &tree);
+ // }
+ //
+ // This method will return an empty string view if all remaining data was
+ // read. If `n` exceeded the amount of remaining data this function will
+ // return an empty string view and `tree` will be set to nullptr.
+ // In both cases, `consumed` will be set to `length`.
+ absl::string_view Read(size_t n, size_t chunk_size, CordRep*& tree);
+
+ // Navigates to the chunk at offset `offset`.
+ // Returns a reference into the navigated to chunk, adjusted for the relative
+ // position of `offset` into that chunk. For example, calling `Seek(13)` on a
+ // cord tree containing 2 chunks of 10 and 20 bytes respectively will return
+ // a string view into the second chunk starting at offset 3 with a size of 17.
+ // Returns an empty string view if `offset` is equal to or greater than the
+ // length of the referenced tree.
+ absl::string_view Seek(size_t offset);
+
+ private:
+ size_t consumed_;
+ CordRepBtreeNavigator navigator_;
+};
+
+inline size_t CordRepBtreeReader::length() const {
+ assert(btree() != nullptr);
+ return btree()->length;
+}
+
+inline size_t CordRepBtreeReader::consumed() const {
+ assert(btree() != nullptr);
+ return consumed_;
+}
+
+inline absl::string_view CordRepBtreeReader::Init(CordRepBtree* tree) {
+ assert(tree != nullptr);
+ const CordRep* edge = navigator_.InitFirst(tree);
+ consumed_ = edge->length;
+ return CordRepBtree::EdgeData(edge);
+}
+
+inline absl::string_view CordRepBtreeReader::Next() {
+ assert(consumed() < length());
+ const CordRep* edge = navigator_.Next();
+ assert(edge != nullptr);
+ consumed_ += edge->length;
+ return CordRepBtree::EdgeData(edge);
+}
+
+inline absl::string_view CordRepBtreeReader::Skip(size_t skip) {
+ // As we are always positioned on the last 'consumed' edge, we
+ // need to skip the current edge as well as `skip`.
+ const size_t edge_length = navigator_.Current()->length;
+ CordRepBtreeNavigator::Position pos = navigator_.Skip(skip + edge_length);
+ if (ABSL_PREDICT_FALSE(pos.edge == nullptr)) {
+ consumed_ = length();
+ return {};
+ }
+ // The combined length of all edges skipped before `pos.edge` is `skip -
+ // pos.offset`, all of which are 'consumed', as well as the current edge.
+ consumed_ += skip - pos.offset + pos.edge->length;
+ return CordRepBtree::EdgeData(pos.edge).substr(pos.offset);
+}
+
+inline absl::string_view CordRepBtreeReader::Seek(size_t offset) {
+ const CordRepBtreeNavigator::Position pos = navigator_.Seek(offset);
+ if (ABSL_PREDICT_FALSE(pos.edge == nullptr)) {
+ consumed_ = length();
+ return {};
+ }
+ absl::string_view chunk = CordRepBtree::EdgeData(pos.edge).substr(pos.offset);
+ consumed_ = offset + chunk.length();
+ return chunk;
+}
+
+} // namespace cord_internal
+ABSL_NAMESPACE_END
+} // namespace absl
+
+#endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_READER_H_
diff --git a/absl/strings/internal/cord_rep_btree_reader_test.cc b/absl/strings/internal/cord_rep_btree_reader_test.cc
new file mode 100644
index 00000000..44d3365f
--- /dev/null
+++ b/absl/strings/internal/cord_rep_btree_reader_test.cc
@@ -0,0 +1,285 @@
+// Copyright 2021 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/internal/cord_rep_btree_reader.h"
+
+#include <iostream>
+#include <random>
+#include <string>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/base/config.h"
+#include "absl/base/internal/raw_logging.h"
+#include "absl/strings/cord.h"
+#include "absl/strings/internal/cord_internal.h"
+#include "absl/strings/internal/cord_rep_btree.h"
+#include "absl/strings/internal/cord_rep_test_util.h"
+#include "absl/strings/string_view.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace cord_internal {
+namespace {
+
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::Ne;
+using ::testing::Not;
+
+using ::absl::cordrep_testing::CordRepBtreeFromFlats;
+using ::absl::cordrep_testing::MakeFlat;
+using ::absl::cordrep_testing::CordToString;
+using ::absl::cordrep_testing::CreateFlatsFromString;
+using ::absl::cordrep_testing::CreateRandomString;
+
+using ReadResult = CordRepBtreeReader::ReadResult;
+
+TEST(CordRepBtreeReaderTest, Next) {
+ constexpr size_t kChars = 3;
+ const size_t cap = CordRepBtree::kMaxCapacity;
+ int counts[] = {1, 2, cap, cap * cap, cap * cap + 1, cap * cap * 2 + 17};
+
+ for (int count : counts) {
+ std::string data = CreateRandomString(count * kChars);
+ std::vector<CordRep*> flats = CreateFlatsFromString(data, kChars);
+ CordRepBtree* node = CordRepBtreeFromFlats(flats);
+
+ CordRepBtreeReader reader;
+ absl::string_view chunk = reader.Init(node);
+ EXPECT_THAT(chunk, Eq(data.substr(0, chunk.length())));
+
+ size_t consumed = chunk.length();
+ EXPECT_THAT(reader.consumed(), Eq(consumed));
+
+ while (consumed < data.length()) {
+ chunk = reader.Next();
+ EXPECT_THAT(chunk, Eq(data.substr(consumed, chunk.length())));
+
+ consumed += chunk.length();
+ EXPECT_THAT(reader.consumed(), Eq(consumed));
+ }
+
+ EXPECT_THAT(consumed, Eq(data.length()));
+ EXPECT_THAT(reader.consumed(), Eq(data.length()));
+
+ CordRep::Unref(node);
+ }
+}
+
+TEST(CordRepBtreeReaderTest, Skip) {
+ constexpr size_t kChars = 3;
+ const size_t cap = CordRepBtree::kMaxCapacity;
+ int counts[] = {1, 2, cap, cap * cap, cap * cap + 1, cap * cap * 2 + 17};
+
+ for (int count : counts) {
+ std::string data = CreateRandomString(count * kChars);
+ std::vector<CordRep*> flats = CreateFlatsFromString(data, kChars);
+ CordRepBtree* node = CordRepBtreeFromFlats(flats);
+
+ for (size_t skip1 = 0; skip1 < data.length() - kChars; ++skip1) {
+ for (size_t skip2 = 0; skip2 < data.length() - kChars; ++skip2) {
+ CordRepBtreeReader reader;
+ absl::string_view chunk = reader.Init(node);
+ size_t consumed = chunk.length();
+
+ chunk = reader.Skip(skip1);
+ ASSERT_THAT(chunk, Eq(data.substr(consumed + skip1, chunk.length())));
+ consumed += chunk.length() + skip1;
+ ASSERT_THAT(reader.consumed(), Eq(consumed));
+
+ if (consumed >= data.length()) continue;
+
+ size_t skip = std::min(data.length() - consumed - 1, skip2);
+ chunk = reader.Skip(skip);
+ ASSERT_THAT(chunk, Eq(data.substr(consumed + skip, chunk.length())));
+ }
+ }
+
+ CordRep::Unref(node);
+ }
+}
+
+TEST(CordRepBtreeReaderTest, SkipBeyondLength) {
+ CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc"));
+ tree = CordRepBtree::Append(tree, MakeFlat("def"));
+ CordRepBtreeReader reader;
+ reader.Init(tree);
+ EXPECT_THAT(reader.Skip(100), IsEmpty());
+ EXPECT_THAT(reader.consumed(), Eq(6));
+ CordRep::Unref(tree);
+}
+
+TEST(CordRepBtreeReaderTest, Seek) {
+ constexpr size_t kChars = 3;
+ const size_t cap = CordRepBtree::kMaxCapacity;
+ int counts[] = {1, 2, cap, cap * cap, cap * cap + 1, cap * cap * 2 + 17};
+
+ for (int count : counts) {
+ std::string data = CreateRandomString(count * kChars);
+ std::vector<CordRep*> flats = CreateFlatsFromString(data, kChars);
+ CordRepBtree* node = CordRepBtreeFromFlats(flats);
+
+ for (size_t seek = 0; seek < data.length() - 1; ++seek) {
+ CordRepBtreeReader reader;
+ reader.Init(node);
+ absl::string_view chunk = reader.Seek(seek);
+ ASSERT_THAT(chunk, Not(IsEmpty()));
+ ASSERT_THAT(chunk, Eq(data.substr(seek, chunk.length())));
+ ASSERT_THAT(reader.consumed(), Eq(seek + chunk.length()));
+ }
+
+ CordRep::Unref(node);
+ }
+}
+
+TEST(CordRepBtreeReaderTest, SeekBeyondLength) {
+ CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc"));
+ tree = CordRepBtree::Append(tree, MakeFlat("def"));
+ CordRepBtreeReader reader;
+ reader.Init(tree);
+ EXPECT_THAT(reader.Seek(6), IsEmpty());
+ EXPECT_THAT(reader.consumed(), Eq(6));
+ EXPECT_THAT(reader.Seek(100), IsEmpty());
+ EXPECT_THAT(reader.consumed(), Eq(6));
+ CordRep::Unref(tree);
+}
+
+TEST(CordRepBtreeReaderTest, Read) {
+ std::string data = "abcdefghijklmno";
+ std::vector<CordRep*> flats = CreateFlatsFromString(data, 5);
+ CordRepBtree* node = CordRepBtreeFromFlats(flats);
+
+ CordRep* tree;
+ CordRepBtreeReader reader;
+ absl::string_view chunk;
+
+ // Read zero bytes
+ chunk = reader.Init(node);
+ chunk = reader.Read(0, chunk.length(), tree);
+ EXPECT_THAT(tree, Eq(nullptr));
+ EXPECT_THAT(chunk, Eq("abcde"));
+ EXPECT_THAT(reader.consumed(), Eq(5));
+ EXPECT_THAT(reader.Next(), Eq("fghij"));
+
+ // Read in full
+ chunk = reader.Init(node);
+ chunk = reader.Read(15, chunk.length(), tree);
+ EXPECT_THAT(tree, Ne(nullptr));
+ EXPECT_THAT(CordToString(tree), Eq("abcdefghijklmno"));
+ EXPECT_THAT(chunk, Eq(""));
+ EXPECT_THAT(reader.consumed(), Eq(15));
+ CordRep::Unref(tree);
+
+ // Read < chunk bytes
+ chunk = reader.Init(node);
+ chunk = reader.Read(3, chunk.length(), tree);
+ ASSERT_THAT(tree, Ne(nullptr));
+ EXPECT_THAT(CordToString(tree), Eq("abc"));
+ EXPECT_THAT(chunk, Eq("de"));
+ EXPECT_THAT(reader.consumed(), Eq(5));
+ EXPECT_THAT(reader.Next(), Eq("fghij"));
+ CordRep::Unref(tree);
+
+ // Read < chunk bytes at offset
+ chunk = reader.Init(node);
+ chunk = reader.Read(2, chunk.length() - 2, tree);
+ ASSERT_THAT(tree, Ne(nullptr));
+ EXPECT_THAT(CordToString(tree), Eq("cd"));
+ EXPECT_THAT(chunk, Eq("e"));
+ EXPECT_THAT(reader.consumed(), Eq(5));
+ EXPECT_THAT(reader.Next(), Eq("fghij"));
+ CordRep::Unref(tree);
+
+ // Read from consumed chunk
+ chunk = reader.Init(node);
+ chunk = reader.Read(3, 0, tree);
+ ASSERT_THAT(tree, Ne(nullptr));
+ EXPECT_THAT(CordToString(tree), Eq("fgh"));
+ EXPECT_THAT(chunk, Eq("ij"));
+ EXPECT_THAT(reader.consumed(), Eq(10));
+ EXPECT_THAT(reader.Next(), Eq("klmno"));
+ CordRep::Unref(tree);
+
+ // Read across chunks
+ chunk = reader.Init(node);
+ chunk = reader.Read(12, chunk.length() - 2, tree);
+ ASSERT_THAT(tree, Ne(nullptr));
+ EXPECT_THAT(CordToString(tree), Eq("cdefghijklmn"));
+ EXPECT_THAT(chunk, Eq("o"));
+ EXPECT_THAT(reader.consumed(), Eq(15));
+ CordRep::Unref(tree);
+
+ // Read across chunks landing on exact edge boundary
+ chunk = reader.Init(node);
+ chunk = reader.Read(10 - 2, chunk.length() - 2, tree);
+ ASSERT_THAT(tree, Ne(nullptr));
+ EXPECT_THAT(CordToString(tree), Eq("cdefghij"));
+ EXPECT_THAT(chunk, Eq("klmno"));
+ EXPECT_THAT(reader.consumed(), Eq(15));
+ CordRep::Unref(tree);
+
+ CordRep::Unref(node);
+}
+
+TEST(CordRepBtreeReaderTest, ReadExhaustive) {
+ constexpr size_t kChars = 3;
+ const size_t cap = CordRepBtree::kMaxCapacity;
+ int counts[] = {1, 2, cap, cap * cap + 1, cap * cap * cap * 2 + 17};
+
+ for (int count : counts) {
+ std::string data = CreateRandomString(count * kChars);
+ std::vector<CordRep*> flats = CreateFlatsFromString(data, kChars);
+ CordRepBtree* node = CordRepBtreeFromFlats(flats);
+
+ for (size_t read_size : {kChars - 1, kChars, kChars + 7, cap * cap}) {
+ CordRepBtreeReader reader;
+ absl::string_view chunk = reader.Init(node);
+
+ // `consumed` tracks the end of last consumed chunk which is the start of
+ // the next chunk: we always read with `chunk_size = chunk.length()`.
+ size_t consumed = 0;
+ size_t remaining = data.length();
+ while (remaining > 0) {
+ CordRep* tree;
+ size_t n = (std::min)(remaining, read_size);
+ chunk = reader.Read(n, chunk.length(), tree);
+ EXPECT_THAT(tree, Ne(nullptr));
+ if (tree) {
+ EXPECT_THAT(CordToString(tree), Eq(data.substr(consumed, n)));
+ CordRep::Unref(tree);
+ }
+
+ consumed += n;
+ remaining -= n;
+ EXPECT_THAT(reader.consumed(), Eq(consumed + chunk.length()));
+
+ if (remaining > 0) {
+ ASSERT_FALSE(chunk.empty());
+ ASSERT_THAT(chunk, Eq(data.substr(consumed, chunk.length())));
+ } else {
+ ASSERT_TRUE(chunk.empty()) << chunk;
+ }
+ }
+ }
+
+ CordRep::Unref(node);
+ }
+}
+
+} // namespace
+} // namespace cord_internal
+ABSL_NAMESPACE_END
+} // namespace absl