diff options
Diffstat (limited to 'absl/strings')
-rw-r--r-- | absl/strings/BUILD.bazel | 19 | ||||
-rw-r--r-- | absl/strings/CMakeLists.txt | 20 | ||||
-rw-r--r-- | absl/strings/cord_ring_test.cc | 2 | ||||
-rw-r--r-- | absl/strings/cord_test.cc | 18 | ||||
-rw-r--r-- | absl/strings/internal/cord_rep_btree_reader.cc | 68 | ||||
-rw-r--r-- | absl/strings/internal/cord_rep_btree_reader.h | 219 | ||||
-rw-r--r-- | absl/strings/internal/cord_rep_btree_reader_test.cc | 285 |
7 files changed, 621 insertions, 10 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index 7e0245a3..9720b683 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -271,6 +271,7 @@ cc_library( "internal/cord_internal.cc", "internal/cord_rep_btree.cc", "internal/cord_rep_btree_navigator.cc", + "internal/cord_rep_btree_reader.cc", "internal/cord_rep_consume.cc", "internal/cord_rep_ring.cc", ], @@ -278,6 +279,7 @@ cc_library( "internal/cord_internal.h", "internal/cord_rep_btree.h", "internal/cord_rep_btree_navigator.h", + "internal/cord_rep_btree_reader.h", "internal/cord_rep_consume.h", "internal/cord_rep_flat.h", "internal/cord_rep_ring.h", @@ -336,6 +338,23 @@ cc_test( ], ) +cc_test( + name = "cord_rep_btree_reader_test", + size = "medium", + srcs = ["internal/cord_rep_btree_reader_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":cord", + ":cord_internal", + ":cord_rep_test_util", + ":strings", + "//absl/base:config", + "//absl/base:raw_logging_internal", + "@com_google_googletest//:gtest_main", + ], +) + cc_library( name = "cordz_update_tracker", hdrs = ["internal/cordz_update_tracker.h"], diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index e55c035d..88f076a8 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -557,6 +557,7 @@ absl_cc_library( "internal/cord_internal.h" "internal/cord_rep_btree.h" "internal/cord_rep_btree_navigator.h" + "internal/cord_rep_btree_reader.h" "internal/cord_rep_consume.h" "internal/cord_rep_flat.h" "internal/cord_rep_ring.h" @@ -565,6 +566,7 @@ absl_cc_library( "internal/cord_internal.cc" "internal/cord_rep_btree.cc" "internal/cord_rep_btree_navigator.cc" + "internal/cord_rep_btree_reader.cc" "internal/cord_rep_consume.cc" "internal/cord_rep_ring.cc" COPTS @@ -979,6 +981,24 @@ absl_cc_test( absl_cc_test( NAME + cord_rep_btree_reader_test + SRCS + "internal/cord_rep_btree_reader_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::base + absl::config + absl::cord_internal + absl::cord_rep_test_util + absl::core_headers + absl::raw_logging_internal + absl::strings + GTest::gmock_main +) + +absl_cc_test( + NAME cord_ring_test SRCS "cord_ring_test.cc" diff --git a/absl/strings/cord_ring_test.cc b/absl/strings/cord_ring_test.cc index b1787edf..f1318595 100644 --- a/absl/strings/cord_ring_test.cc +++ b/absl/strings/cord_ring_test.cc @@ -275,7 +275,7 @@ CordRepConcat* MakeConcat(CordRep* left, CordRep* right, int depth = 0) { enum Composition { kMix, kAppend, kPrepend }; Composition RandomComposition() { - RandomEngine rng(testing::GTEST_FLAG(random_seed)); + RandomEngine rng(GTEST_FLAG_GET(random_seed)); return (rng() & 1) ? kMix : ((rng() & 1) ? kAppend : kPrepend); } diff --git a/absl/strings/cord_test.cc b/absl/strings/cord_test.cc index 14eca155..597378cc 100644 --- a/absl/strings/cord_test.cc +++ b/absl/strings/cord_test.cc @@ -361,7 +361,7 @@ TEST(Cord, StartsEndsWith) { } TEST(Cord, Subcord) { - RandomEngine rng(testing::GTEST_FLAG(random_seed)); + RandomEngine rng(GTEST_FLAG_GET(random_seed)); const std::string s = RandomLowercaseString(&rng, 1024); absl::Cord a; @@ -570,7 +570,7 @@ TEST(Cord, Flatten) { VerifyFlatten(absl::MakeFragmentedCord({"small ", "fragmented ", "cord"})); // Test with a cord that is longer than the largest flat buffer - RandomEngine rng(testing::GTEST_FLAG(random_seed)); + RandomEngine rng(GTEST_FLAG_GET(random_seed)); VerifyFlatten(absl::Cord(RandomLowercaseString(&rng, 8192))); } @@ -937,7 +937,7 @@ static void TestCompare(const absl::Cord& c, const absl::Cord& d, } TEST(Compare, ComparisonIsUnsigned) { - RandomEngine rng(testing::GTEST_FLAG(random_seed)); + RandomEngine rng(GTEST_FLAG_GET(random_seed)); std::uniform_int_distribution<uint32_t> uniform_uint8(0, 255); char x = static_cast<char>(uniform_uint8(rng)); TestCompare( @@ -947,7 +947,7 @@ TEST(Compare, ComparisonIsUnsigned) { TEST(Compare, RandomComparisons) { const int kIters = 5000; - RandomEngine rng(testing::GTEST_FLAG(random_seed)); + RandomEngine rng(GTEST_FLAG_GET(random_seed)); int n = GetUniformRandomUpTo(&rng, 5000); absl::Cord a[] = {MakeExternalCord(n), @@ -1082,7 +1082,7 @@ TEST(ConstructFromExternal, ReleaserInvoked) { } TEST(ConstructFromExternal, CompareContents) { - RandomEngine rng(testing::GTEST_FLAG(random_seed)); + RandomEngine rng(GTEST_FLAG_GET(random_seed)); for (int length = 1; length <= 2048; length *= 2) { std::string data = RandomLowercaseString(&rng, length); @@ -1098,7 +1098,7 @@ TEST(ConstructFromExternal, CompareContents) { } TEST(ConstructFromExternal, LargeReleaser) { - RandomEngine rng(testing::GTEST_FLAG(random_seed)); + RandomEngine rng(GTEST_FLAG_GET(random_seed)); constexpr size_t kLength = 256; std::string data = RandomLowercaseString(&rng, kLength); std::array<char, kLength> data_array; @@ -1323,7 +1323,7 @@ TEST(Cord, DiabolicalGrowth) { // resulting cord. // TODO(b/183983616): Apply some minimum compaction when copying a shared // source cord into a mutable copy for updates in CordRepRing. - RandomEngine rng(testing::GTEST_FLAG(random_seed)); + RandomEngine rng(GTEST_FLAG_GET(random_seed)); const std::string expected = RandomLowercaseString(&rng, 5000); absl::Cord cord; for (char c : expected) { @@ -1483,7 +1483,7 @@ TEST(CordChunkIterator, Operations) { VerifyChunkIterator(reused_nodes_cord, expected_chunks); } - RandomEngine rng(testing::GTEST_FLAG(random_seed)); + RandomEngine rng(GTEST_FLAG_GET(random_seed)); absl::Cord flat_cord(RandomLowercaseString(&rng, 256)); absl::Cord subcords; for (int i = 0; i < 128; ++i) subcords.Prepend(flat_cord.Subcord(i, 128)); @@ -1621,7 +1621,7 @@ TEST(CordCharIterator, Operations) { VerifyCharIterator(reused_nodes_cord); } - RandomEngine rng(testing::GTEST_FLAG(random_seed)); + RandomEngine rng(GTEST_FLAG_GET(random_seed)); absl::Cord flat_cord(RandomLowercaseString(&rng, 256)); absl::Cord subcords; for (int i = 0; i < 4; ++i) subcords.Prepend(flat_cord.Subcord(16 * i, 128)); diff --git a/absl/strings/internal/cord_rep_btree_reader.cc b/absl/strings/internal/cord_rep_btree_reader.cc new file mode 100644 index 00000000..3ba43144 --- /dev/null +++ b/absl/strings/internal/cord_rep_btree_reader.cc @@ -0,0 +1,68 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_btree_reader.h" + +#include <cassert> + +#include "absl/base/config.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_btree_navigator.h" +#include "absl/strings/internal/cord_rep_flat.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +absl::string_view CordRepBtreeReader::Read(size_t n, size_t chunk_size, + CordRep*& tree) { + assert(chunk_size <= navigator_.Current()->length); + + // If chunk_size is non-zero, we need to start inside last returned edge. + // Else we start reading at the next data edge of the tree. + CordRep* edge = chunk_size ? navigator_.Current() : navigator_.Next(); + const size_t offset = chunk_size ? edge->length - chunk_size : 0; + + // Read the sub tree and verify we got what we wanted. + ReadResult result = navigator_.Read(offset, n); + tree = result.tree; + + // If the data returned in `tree` was covered entirely by `chunk_size`, i.e., + // read from the 'previous' edge, we did not consume any additional data, and + // can directly return the substring into the current data edge as the next + // chunk. We can easily establish from the above code that `navigator_.Next()` + // has not been called as that requires `chunk_size` to be zero. + if (n < chunk_size) return CordRepBtree::EdgeData(edge).substr(result.n); + + // The amount of data taken from the last edge is `chunk_size` and `result.n` + // contains the offset into the current edge trailing the read data (which can + // be 0). As the call to `navigator_.Read()` could have consumed all remaining + // data, calling `navigator_.Current()` is not safe before checking if we + // already consumed all remaining data. + const size_t consumed_by_read = n - chunk_size - result.n; + if (consumed_ + consumed_by_read >= length()) { + consumed_ = length(); + return {}; + } + + // We did not read all data, return remaining data from current edge. + edge = navigator_.Current(); + consumed_ += consumed_by_read + edge->length; + return CordRepBtree::EdgeData(edge).substr(result.n); +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cord_rep_btree_reader.h b/absl/strings/internal/cord_rep_btree_reader.h new file mode 100644 index 00000000..c19fa43d --- /dev/null +++ b/absl/strings/internal/cord_rep_btree_reader.h @@ -0,0 +1,219 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_READER_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_READER_H_ + +#include <cassert> + +#include "absl/base/config.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_btree_navigator.h" +#include "absl/strings/internal/cord_rep_flat.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordRepBtreeReader implements logic to iterate over cord btrees. +// References to the underlying data are returned as absl::string_view values. +// The most typical use case is a forward only iteration over tree data. +// The class also provides `Skip()`, `Seek()` and `Read()` methods similar to +// CordRepBtreeNavigator that allow more advanced navigation. The class provides +// a `consumed` property which contains the end offset of the chunk last +// returned to the user which is useful in cord iteration logic. +// +// Example: iterate over all data inside a cord btree: +// +// CordRepBtreeReader reader; +// for (string_view sv = reader.Init(tree); !sv.Empty(); sv = sv.Next()) { +// DoSomethingWithDataIn(sv); +// } +// +// All navigation methods always return the next 'chunk' of data. The class +// assumes that all data is directly 'consumed' by the caller. For example: +// invoking `Skip()` will skip the desired number of bytes, and directly +// read and return the next chunk of data directly after the skipped bytes. +// +// Example: iterate over all data inside a btree skipping the first 100 bytes: +// +// CordRepBtreeReader reader; +// absl::string_view sv = reader.Init(tree); +// if (sv.length() > 100) { +// sv.RemovePrefix(100); +// } else { +// sv = reader.Skip(100 - sv.length()); +// } +// while (!sv.empty()) { +// DoSomethingWithDataIn(sv); +// absl::string_view sv = reader.Next(); +// } +// +// It is important to notice that `consumed` represents the end position of the +// last data edge returned to the caller, not the cumulative data returned to +// the caller which can be less in cases of skipping or seeking over data. +// +// For example, consider a cord btree with five data edges: "abc", "def", "ghi", +// "jkl" and "mno": +// +// absl::string_view sv; +// CordRepBtreeReader reader; +// +// sv = reader.Init(tree); // sv = "abc", reader.consumed() = 3 +// sv = reader.Skip(4); // sv = "hi", reader.consumed() = 9 +// sv = reader.Skip(2); // sv = "l", reader.consumed() = 12 +// sv = reader.Next(); // sv = "mno", reader.consumed() = 15 +// +// In the above example, `reader.consumed()` reflects the data edges iterated +// over or skipped by the reader, not the amount of data 'consumed' by the +// caller. +class CordRepBtreeReader { + public: + using ReadResult = CordRepBtreeNavigator::ReadResult; + using Position = CordRepBtreeNavigator::Position; + + // Returns true if this instance is not empty. + explicit operator bool() const { return navigator_.btree() != nullptr; } + + // Returns the tree referenced by this instance or nullptr if empty. + CordRepBtree* btree() const { return navigator_.btree(); } + + // Returns the current data edge inside the referenced btree. + // Requires that the current instance is not empty. + CordRep* node() const { return navigator_.Current(); } + + // Returns the length of the referenced tree. + // Requires that the current instance is not empty. + size_t length() const; + + // Returns the end offset of the last navigated to chunk, which represents the + // total bytes 'consumed' relative to the start of the tree. The returned + // value is never zero. For example, initializing a reader with a tree with a + // first data edge of 19 bytes will return `consumed() = 19`. See also the + // class comments on the meaning of `consumed`. + // Requires that the current instance is not empty. + size_t consumed() const; + + // Resets this instance to an empty value. + void Reset() { navigator_.Reset(); } + + // Initializes this instance with `tree`. `tree` must not be null. + // Returns a reference to the first data edge of the provided tree. + absl::string_view Init(CordRepBtree* tree); + + // Navigates to and returns the next data edge of the referenced tree. + // Returns an empty string_view if an attempt is made to read beyond the end + // of the tree, i.e.: if `remaining()` is zero indicating an EOF condition. + // Requires that the current instance is not empty. + absl::string_view Next(); + + // Skips the provided amount of bytes and returns a reference to the data + // directly following the skipped bytes. + absl::string_view Skip(size_t skip); + + // Reads `n` bytes into `tree`. + // If `chunk_size` is zero, starts reading at the next data edge. If + // `chunk_size` is non zero, the read starts at the last `chunk_size` bytes of + // the last returned data edge. Effectively, this means that the read starts + // at offset `consumed() - chunk_size`. + // Requires that `chunk_size` is less than or equal to the length of the + // last returned data edge. The purpose of `chunk_size` is to simplify code + // partially consuming a returned chunk and wanting to include the remaining + // bytes in the Read call. For example, the below code will read 1000 bytes of + // data into a cord tree if the first chunk starts with "big:": + // + // CordRepBtreeReader reader; + // absl::string_view sv = reader.Init(tree); + // if (absl::StartsWith(sv, "big:")) { + // CordRepBtree tree; + // sv = reader.Read(1000, sv.size() - 4 /* "big:" */, &tree); + // } + // + // This method will return an empty string view if all remaining data was + // read. If `n` exceeded the amount of remaining data this function will + // return an empty string view and `tree` will be set to nullptr. + // In both cases, `consumed` will be set to `length`. + absl::string_view Read(size_t n, size_t chunk_size, CordRep*& tree); + + // Navigates to the chunk at offset `offset`. + // Returns a reference into the navigated to chunk, adjusted for the relative + // position of `offset` into that chunk. For example, calling `Seek(13)` on a + // cord tree containing 2 chunks of 10 and 20 bytes respectively will return + // a string view into the second chunk starting at offset 3 with a size of 17. + // Returns an empty string view if `offset` is equal to or greater than the + // length of the referenced tree. + absl::string_view Seek(size_t offset); + + private: + size_t consumed_; + CordRepBtreeNavigator navigator_; +}; + +inline size_t CordRepBtreeReader::length() const { + assert(btree() != nullptr); + return btree()->length; +} + +inline size_t CordRepBtreeReader::consumed() const { + assert(btree() != nullptr); + return consumed_; +} + +inline absl::string_view CordRepBtreeReader::Init(CordRepBtree* tree) { + assert(tree != nullptr); + const CordRep* edge = navigator_.InitFirst(tree); + consumed_ = edge->length; + return CordRepBtree::EdgeData(edge); +} + +inline absl::string_view CordRepBtreeReader::Next() { + assert(consumed() < length()); + const CordRep* edge = navigator_.Next(); + assert(edge != nullptr); + consumed_ += edge->length; + return CordRepBtree::EdgeData(edge); +} + +inline absl::string_view CordRepBtreeReader::Skip(size_t skip) { + // As we are always positioned on the last 'consumed' edge, we + // need to skip the current edge as well as `skip`. + const size_t edge_length = navigator_.Current()->length; + CordRepBtreeNavigator::Position pos = navigator_.Skip(skip + edge_length); + if (ABSL_PREDICT_FALSE(pos.edge == nullptr)) { + consumed_ = length(); + return {}; + } + // The combined length of all edges skipped before `pos.edge` is `skip - + // pos.offset`, all of which are 'consumed', as well as the current edge. + consumed_ += skip - pos.offset + pos.edge->length; + return CordRepBtree::EdgeData(pos.edge).substr(pos.offset); +} + +inline absl::string_view CordRepBtreeReader::Seek(size_t offset) { + const CordRepBtreeNavigator::Position pos = navigator_.Seek(offset); + if (ABSL_PREDICT_FALSE(pos.edge == nullptr)) { + consumed_ = length(); + return {}; + } + absl::string_view chunk = CordRepBtree::EdgeData(pos.edge).substr(pos.offset); + consumed_ = offset + chunk.length(); + return chunk; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_READER_H_ diff --git a/absl/strings/internal/cord_rep_btree_reader_test.cc b/absl/strings/internal/cord_rep_btree_reader_test.cc new file mode 100644 index 00000000..44d3365f --- /dev/null +++ b/absl/strings/internal/cord_rep_btree_reader_test.cc @@ -0,0 +1,285 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_btree_reader.h" + +#include <iostream> +#include <random> +#include <string> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/strings/cord.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_test_util.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using ::testing::Eq; +using ::testing::IsEmpty; +using ::testing::Ne; +using ::testing::Not; + +using ::absl::cordrep_testing::CordRepBtreeFromFlats; +using ::absl::cordrep_testing::MakeFlat; +using ::absl::cordrep_testing::CordToString; +using ::absl::cordrep_testing::CreateFlatsFromString; +using ::absl::cordrep_testing::CreateRandomString; + +using ReadResult = CordRepBtreeReader::ReadResult; + +TEST(CordRepBtreeReaderTest, Next) { + constexpr size_t kChars = 3; + const size_t cap = CordRepBtree::kMaxCapacity; + int counts[] = {1, 2, cap, cap * cap, cap * cap + 1, cap * cap * 2 + 17}; + + for (int count : counts) { + std::string data = CreateRandomString(count * kChars); + std::vector<CordRep*> flats = CreateFlatsFromString(data, kChars); + CordRepBtree* node = CordRepBtreeFromFlats(flats); + + CordRepBtreeReader reader; + absl::string_view chunk = reader.Init(node); + EXPECT_THAT(chunk, Eq(data.substr(0, chunk.length()))); + + size_t consumed = chunk.length(); + EXPECT_THAT(reader.consumed(), Eq(consumed)); + + while (consumed < data.length()) { + chunk = reader.Next(); + EXPECT_THAT(chunk, Eq(data.substr(consumed, chunk.length()))); + + consumed += chunk.length(); + EXPECT_THAT(reader.consumed(), Eq(consumed)); + } + + EXPECT_THAT(consumed, Eq(data.length())); + EXPECT_THAT(reader.consumed(), Eq(data.length())); + + CordRep::Unref(node); + } +} + +TEST(CordRepBtreeReaderTest, Skip) { + constexpr size_t kChars = 3; + const size_t cap = CordRepBtree::kMaxCapacity; + int counts[] = {1, 2, cap, cap * cap, cap * cap + 1, cap * cap * 2 + 17}; + + for (int count : counts) { + std::string data = CreateRandomString(count * kChars); + std::vector<CordRep*> flats = CreateFlatsFromString(data, kChars); + CordRepBtree* node = CordRepBtreeFromFlats(flats); + + for (size_t skip1 = 0; skip1 < data.length() - kChars; ++skip1) { + for (size_t skip2 = 0; skip2 < data.length() - kChars; ++skip2) { + CordRepBtreeReader reader; + absl::string_view chunk = reader.Init(node); + size_t consumed = chunk.length(); + + chunk = reader.Skip(skip1); + ASSERT_THAT(chunk, Eq(data.substr(consumed + skip1, chunk.length()))); + consumed += chunk.length() + skip1; + ASSERT_THAT(reader.consumed(), Eq(consumed)); + + if (consumed >= data.length()) continue; + + size_t skip = std::min(data.length() - consumed - 1, skip2); + chunk = reader.Skip(skip); + ASSERT_THAT(chunk, Eq(data.substr(consumed + skip, chunk.length()))); + } + } + + CordRep::Unref(node); + } +} + +TEST(CordRepBtreeReaderTest, SkipBeyondLength) { + CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc")); + tree = CordRepBtree::Append(tree, MakeFlat("def")); + CordRepBtreeReader reader; + reader.Init(tree); + EXPECT_THAT(reader.Skip(100), IsEmpty()); + EXPECT_THAT(reader.consumed(), Eq(6)); + CordRep::Unref(tree); +} + +TEST(CordRepBtreeReaderTest, Seek) { + constexpr size_t kChars = 3; + const size_t cap = CordRepBtree::kMaxCapacity; + int counts[] = {1, 2, cap, cap * cap, cap * cap + 1, cap * cap * 2 + 17}; + + for (int count : counts) { + std::string data = CreateRandomString(count * kChars); + std::vector<CordRep*> flats = CreateFlatsFromString(data, kChars); + CordRepBtree* node = CordRepBtreeFromFlats(flats); + + for (size_t seek = 0; seek < data.length() - 1; ++seek) { + CordRepBtreeReader reader; + reader.Init(node); + absl::string_view chunk = reader.Seek(seek); + ASSERT_THAT(chunk, Not(IsEmpty())); + ASSERT_THAT(chunk, Eq(data.substr(seek, chunk.length()))); + ASSERT_THAT(reader.consumed(), Eq(seek + chunk.length())); + } + + CordRep::Unref(node); + } +} + +TEST(CordRepBtreeReaderTest, SeekBeyondLength) { + CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc")); + tree = CordRepBtree::Append(tree, MakeFlat("def")); + CordRepBtreeReader reader; + reader.Init(tree); + EXPECT_THAT(reader.Seek(6), IsEmpty()); + EXPECT_THAT(reader.consumed(), Eq(6)); + EXPECT_THAT(reader.Seek(100), IsEmpty()); + EXPECT_THAT(reader.consumed(), Eq(6)); + CordRep::Unref(tree); +} + +TEST(CordRepBtreeReaderTest, Read) { + std::string data = "abcdefghijklmno"; + std::vector<CordRep*> flats = CreateFlatsFromString(data, 5); + CordRepBtree* node = CordRepBtreeFromFlats(flats); + + CordRep* tree; + CordRepBtreeReader reader; + absl::string_view chunk; + + // Read zero bytes + chunk = reader.Init(node); + chunk = reader.Read(0, chunk.length(), tree); + EXPECT_THAT(tree, Eq(nullptr)); + EXPECT_THAT(chunk, Eq("abcde")); + EXPECT_THAT(reader.consumed(), Eq(5)); + EXPECT_THAT(reader.Next(), Eq("fghij")); + + // Read in full + chunk = reader.Init(node); + chunk = reader.Read(15, chunk.length(), tree); + EXPECT_THAT(tree, Ne(nullptr)); + EXPECT_THAT(CordToString(tree), Eq("abcdefghijklmno")); + EXPECT_THAT(chunk, Eq("")); + EXPECT_THAT(reader.consumed(), Eq(15)); + CordRep::Unref(tree); + + // Read < chunk bytes + chunk = reader.Init(node); + chunk = reader.Read(3, chunk.length(), tree); + ASSERT_THAT(tree, Ne(nullptr)); + EXPECT_THAT(CordToString(tree), Eq("abc")); + EXPECT_THAT(chunk, Eq("de")); + EXPECT_THAT(reader.consumed(), Eq(5)); + EXPECT_THAT(reader.Next(), Eq("fghij")); + CordRep::Unref(tree); + + // Read < chunk bytes at offset + chunk = reader.Init(node); + chunk = reader.Read(2, chunk.length() - 2, tree); + ASSERT_THAT(tree, Ne(nullptr)); + EXPECT_THAT(CordToString(tree), Eq("cd")); + EXPECT_THAT(chunk, Eq("e")); + EXPECT_THAT(reader.consumed(), Eq(5)); + EXPECT_THAT(reader.Next(), Eq("fghij")); + CordRep::Unref(tree); + + // Read from consumed chunk + chunk = reader.Init(node); + chunk = reader.Read(3, 0, tree); + ASSERT_THAT(tree, Ne(nullptr)); + EXPECT_THAT(CordToString(tree), Eq("fgh")); + EXPECT_THAT(chunk, Eq("ij")); + EXPECT_THAT(reader.consumed(), Eq(10)); + EXPECT_THAT(reader.Next(), Eq("klmno")); + CordRep::Unref(tree); + + // Read across chunks + chunk = reader.Init(node); + chunk = reader.Read(12, chunk.length() - 2, tree); + ASSERT_THAT(tree, Ne(nullptr)); + EXPECT_THAT(CordToString(tree), Eq("cdefghijklmn")); + EXPECT_THAT(chunk, Eq("o")); + EXPECT_THAT(reader.consumed(), Eq(15)); + CordRep::Unref(tree); + + // Read across chunks landing on exact edge boundary + chunk = reader.Init(node); + chunk = reader.Read(10 - 2, chunk.length() - 2, tree); + ASSERT_THAT(tree, Ne(nullptr)); + EXPECT_THAT(CordToString(tree), Eq("cdefghij")); + EXPECT_THAT(chunk, Eq("klmno")); + EXPECT_THAT(reader.consumed(), Eq(15)); + CordRep::Unref(tree); + + CordRep::Unref(node); +} + +TEST(CordRepBtreeReaderTest, ReadExhaustive) { + constexpr size_t kChars = 3; + const size_t cap = CordRepBtree::kMaxCapacity; + int counts[] = {1, 2, cap, cap * cap + 1, cap * cap * cap * 2 + 17}; + + for (int count : counts) { + std::string data = CreateRandomString(count * kChars); + std::vector<CordRep*> flats = CreateFlatsFromString(data, kChars); + CordRepBtree* node = CordRepBtreeFromFlats(flats); + + for (size_t read_size : {kChars - 1, kChars, kChars + 7, cap * cap}) { + CordRepBtreeReader reader; + absl::string_view chunk = reader.Init(node); + + // `consumed` tracks the end of last consumed chunk which is the start of + // the next chunk: we always read with `chunk_size = chunk.length()`. + size_t consumed = 0; + size_t remaining = data.length(); + while (remaining > 0) { + CordRep* tree; + size_t n = (std::min)(remaining, read_size); + chunk = reader.Read(n, chunk.length(), tree); + EXPECT_THAT(tree, Ne(nullptr)); + if (tree) { + EXPECT_THAT(CordToString(tree), Eq(data.substr(consumed, n))); + CordRep::Unref(tree); + } + + consumed += n; + remaining -= n; + EXPECT_THAT(reader.consumed(), Eq(consumed + chunk.length())); + + if (remaining > 0) { + ASSERT_FALSE(chunk.empty()); + ASSERT_THAT(chunk, Eq(data.substr(consumed, chunk.length()))); + } else { + ASSERT_TRUE(chunk.empty()) << chunk; + } + } + } + + CordRep::Unref(node); + } +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl |