diff options
23 files changed, 1215 insertions, 136 deletions
diff --git a/CMake/AbseilDll.cmake b/CMake/AbseilDll.cmake index 65014bfb..66c9e395 100644 --- a/CMake/AbseilDll.cmake +++ b/CMake/AbseilDll.cmake @@ -207,6 +207,8 @@ set(ABSL_INTERNAL_DLL_FILES "strings/internal/cord_rep_consume.cc" "strings/internal/cord_rep_btree.cc" "strings/internal/cord_rep_btree.h" + "strings/internal/cord_rep_btree_navigator.cc" + "strings/internal/cord_rep_btree_navigator.h" "strings/internal/cord_rep_flat.h" "strings/internal/cord_rep_ring.cc" "strings/internal/cord_rep_ring.h" diff --git a/absl/container/internal/raw_hash_set.cc b/absl/container/internal/raw_hash_set.cc index f5a9ce68..a5dee6aa 100644 --- a/absl/container/internal/raw_hash_set.cc +++ b/absl/container/internal/raw_hash_set.cc @@ -37,25 +37,23 @@ inline size_t RandomSeed() { return value ^ static_cast<size_t>(reinterpret_cast<uintptr_t>(&counter)); } -bool ShouldInsertBackwards(size_t hash, ctrl_t* ctrl) { +bool ShouldInsertBackwards(size_t hash, const ctrl_t* ctrl) { // To avoid problems with weak hashes and single bit tests, we use % 13. // TODO(kfm,sbenza): revisit after we do unconditional mixing return (H1(hash, ctrl) ^ RandomSeed()) % 13 > 6; } -void ConvertDeletedToEmptyAndFullToDeleted( - ctrl_t* ctrl, size_t capacity) { - assert(ctrl[capacity] == kSentinel); +void ConvertDeletedToEmptyAndFullToDeleted(ctrl_t* ctrl, size_t capacity) { + assert(ctrl[capacity] == ctrl_t::kSentinel); assert(IsValidCapacity(capacity)); for (ctrl_t* pos = ctrl; pos < ctrl + capacity; pos += Group::kWidth) { Group{pos}.ConvertSpecialToEmptyAndFullToDeleted(pos); } // Copy the cloned ctrl bytes. std::memcpy(ctrl + capacity + 1, ctrl, NumClonedBytes()); - ctrl[capacity] = kSentinel; + ctrl[capacity] = ctrl_t::kSentinel; } - } // namespace container_internal ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 675c9148..d7783263 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -252,48 +252,57 @@ class BitMask { T mask_; }; -using ctrl_t = signed char; using h2_t = uint8_t; // The values here are selected for maximum performance. See the static asserts -// below for details. -enum Ctrl : ctrl_t { +// below for details. We use an enum class so that when strict aliasing is +// enabled, the compiler knows ctrl_t doesn't alias other types. +enum class ctrl_t : int8_t { kEmpty = -128, // 0b10000000 kDeleted = -2, // 0b11111110 kSentinel = -1, // 0b11111111 }; static_assert( - kEmpty & kDeleted & kSentinel & 0x80, + (static_cast<int8_t>(ctrl_t::kEmpty) & + static_cast<int8_t>(ctrl_t::kDeleted) & + static_cast<int8_t>(ctrl_t::kSentinel) & 0x80) != 0, "Special markers need to have the MSB to make checking for them efficient"); -static_assert(kEmpty < kSentinel && kDeleted < kSentinel, - "kEmpty and kDeleted must be smaller than kSentinel to make the " - "SIMD test of IsEmptyOrDeleted() efficient"); -static_assert(kSentinel == -1, - "kSentinel must be -1 to elide loading it from memory into SIMD " - "registers (pcmpeqd xmm, xmm)"); -static_assert(kEmpty == -128, - "kEmpty must be -128 to make the SIMD check for its " +static_assert( + ctrl_t::kEmpty < ctrl_t::kSentinel && ctrl_t::kDeleted < ctrl_t::kSentinel, + "ctrl_t::kEmpty and ctrl_t::kDeleted must be smaller than " + "ctrl_t::kSentinel to make the SIMD test of IsEmptyOrDeleted() efficient"); +static_assert( + ctrl_t::kSentinel == static_cast<ctrl_t>(-1), + "ctrl_t::kSentinel must be -1 to elide loading it from memory into SIMD " + "registers (pcmpeqd xmm, xmm)"); +static_assert(ctrl_t::kEmpty == static_cast<ctrl_t>(-128), + "ctrl_t::kEmpty must be -128 to make the SIMD check for its " "existence efficient (psignb xmm, xmm)"); -static_assert(~kEmpty & ~kDeleted & kSentinel & 0x7F, - "kEmpty and kDeleted must share an unset bit that is not shared " - "by kSentinel to make the scalar test for MatchEmptyOrDeleted() " - "efficient"); -static_assert(kDeleted == -2, - "kDeleted must be -2 to make the implementation of " +static_assert( + (~static_cast<int8_t>(ctrl_t::kEmpty) & + ~static_cast<int8_t>(ctrl_t::kDeleted) & + static_cast<int8_t>(ctrl_t::kSentinel) & 0x7F) != 0, + "ctrl_t::kEmpty and ctrl_t::kDeleted must share an unset bit that is not " + "shared by ctrl_t::kSentinel to make the scalar test for " + "MatchEmptyOrDeleted() efficient"); +static_assert(ctrl_t::kDeleted == static_cast<ctrl_t>(-2), + "ctrl_t::kDeleted must be -2 to make the implementation of " "ConvertSpecialToEmptyAndFullToDeleted efficient"); // A single block of empty control bytes for tables without any slots allocated. // This enables removing a branch in the hot path of find(). inline ctrl_t* EmptyGroup() { alignas(16) static constexpr ctrl_t empty_group[] = { - kSentinel, kEmpty, kEmpty, kEmpty, kEmpty, kEmpty, kEmpty, kEmpty, - kEmpty, kEmpty, kEmpty, kEmpty, kEmpty, kEmpty, kEmpty, kEmpty}; + ctrl_t::kSentinel, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, + ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, + ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, + ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty}; return const_cast<ctrl_t*>(empty_group); } // Mixes a randomly generated per-process seed with `hash` and `ctrl` to // randomize insertion order within groups. -bool ShouldInsertBackwards(size_t hash, ctrl_t* ctrl); +bool ShouldInsertBackwards(size_t hash, const ctrl_t* ctrl); // Returns a hash seed. // @@ -309,12 +318,12 @@ inline size_t HashSeed(const ctrl_t* ctrl) { inline size_t H1(size_t hash, const ctrl_t* ctrl) { return (hash >> 7) ^ HashSeed(ctrl); } -inline ctrl_t H2(size_t hash) { return hash & 0x7F; } +inline h2_t H2(size_t hash) { return hash & 0x7F; } -inline bool IsEmpty(ctrl_t c) { return c == kEmpty; } -inline bool IsFull(ctrl_t c) { return c >= 0; } -inline bool IsDeleted(ctrl_t c) { return c == kDeleted; } -inline bool IsEmptyOrDeleted(ctrl_t c) { return c < kSentinel; } +inline bool IsEmpty(ctrl_t c) { return c == ctrl_t::kEmpty; } +inline bool IsFull(ctrl_t c) { return c >= static_cast<ctrl_t>(0); } +inline bool IsDeleted(ctrl_t c) { return c == ctrl_t::kDeleted; } +inline bool IsEmptyOrDeleted(ctrl_t c) { return c < ctrl_t::kSentinel; } #if ABSL_INTERNAL_RAW_HASH_SET_HAVE_SSE2 @@ -351,24 +360,24 @@ struct GroupSse2Impl { // Returns a bitmask representing the positions of empty slots. BitMask<uint32_t, kWidth> MatchEmpty() const { #if ABSL_INTERNAL_RAW_HASH_SET_HAVE_SSSE3 - // This only works because kEmpty is -128. + // This only works because ctrl_t::kEmpty is -128. return BitMask<uint32_t, kWidth>( _mm_movemask_epi8(_mm_sign_epi8(ctrl, ctrl))); #else - return Match(static_cast<h2_t>(kEmpty)); + return Match(static_cast<h2_t>(ctrl_t::kEmpty)); #endif } // Returns a bitmask representing the positions of empty or deleted slots. BitMask<uint32_t, kWidth> MatchEmptyOrDeleted() const { - auto special = _mm_set1_epi8(kSentinel); + auto special = _mm_set1_epi8(static_cast<int8_t>(ctrl_t::kSentinel)); return BitMask<uint32_t, kWidth>( _mm_movemask_epi8(_mm_cmpgt_epi8_fixed(special, ctrl))); } // Returns the number of trailing empty or deleted elements in the group. uint32_t CountLeadingEmptyOrDeleted() const { - auto special = _mm_set1_epi8(kSentinel); + auto special = _mm_set1_epi8(static_cast<int8_t>(ctrl_t::kSentinel)); return TrailingZeros(static_cast<uint32_t>( _mm_movemask_epi8(_mm_cmpgt_epi8_fixed(special, ctrl)) + 1)); } @@ -403,7 +412,7 @@ struct GroupPortableImpl { // // Caveat: there are false positives but: // - they only occur if there is a real match - // - they never occur on kEmpty, kDeleted, kSentinel + // - they never occur on ctrl_t::kEmpty, ctrl_t::kDeleted, ctrl_t::kSentinel // - they will be handled gracefully by subsequent checks in code // // Example: @@ -459,8 +468,8 @@ inline bool IsValidCapacity(size_t n) { return ((n + 1) & n) == 0 && n > 0; } // PRECONDITION: // IsValidCapacity(capacity) -// ctrl[capacity] == kSentinel -// ctrl[i] != kSentinel for all i < capacity +// ctrl[capacity] == ctrl_t::kSentinel +// ctrl[i] != ctrl_t::kSentinel for all i < capacity // Applies mapping for every byte in ctrl: // DELETED -> EMPTY // EMPTY -> EMPTY @@ -545,27 +554,28 @@ struct FindInfo { // This is important to make 1 a valid capacity. // // - In small mode only the first `capacity()` control bytes after the -// sentinel are valid. The rest contain dummy kEmpty values that do not +// sentinel are valid. The rest contain dummy ctrl_t::kEmpty values that do not // represent a real slot. This is important to take into account on // find_first_non_full(), where we never try ShouldInsertBackwards() for // small tables. inline bool is_small(size_t capacity) { return capacity < Group::kWidth - 1; } -inline probe_seq<Group::kWidth> probe(ctrl_t* ctrl, size_t hash, +inline probe_seq<Group::kWidth> probe(const ctrl_t* ctrl, size_t hash, size_t capacity) { return probe_seq<Group::kWidth>(H1(hash, ctrl), capacity); } // Probes the raw_hash_set with the probe sequence for hash and returns the // pointer to the first empty or deleted slot. -// NOTE: this function must work with tables having both kEmpty and kDelete -// in one group. Such tables appears during drop_deletes_without_resize. +// NOTE: this function must work with tables having both ctrl_t::kEmpty and +// ctrl_t::kDeleted in one group. Such tables appears during +// drop_deletes_without_resize. // // This function is very useful when insertions happen and: // - the input is already a set // - there are enough slots // - the element with the hash is not in the table -inline FindInfo find_first_non_full(ctrl_t* ctrl, size_t hash, +inline FindInfo find_first_non_full(const ctrl_t* ctrl, size_t hash, size_t capacity) { auto seq = probe(ctrl, hash, capacity); while (true) { @@ -588,11 +598,12 @@ inline FindInfo find_first_non_full(ctrl_t* ctrl, size_t hash, } } -// Reset all ctrl bytes back to kEmpty, except the sentinel. +// Reset all ctrl bytes back to ctrl_t::kEmpty, except the sentinel. inline void ResetCtrl(size_t capacity, ctrl_t* ctrl, const void* slot, size_t slot_size) { - std::memset(ctrl, kEmpty, capacity + 1 + NumClonedBytes()); - ctrl[capacity] = kSentinel; + std::memset(ctrl, static_cast<int8_t>(ctrl_t::kEmpty), + capacity + 1 + NumClonedBytes()); + ctrl[capacity] = ctrl_t::kSentinel; SanitizerPoisonMemoryRegion(slot, slot_size * capacity); } @@ -613,6 +624,11 @@ inline void SetCtrl(size_t i, ctrl_t h, size_t capacity, ctrl_t* ctrl, ctrl[((i - NumClonedBytes()) & capacity) + (NumClonedBytes() & capacity)] = h; } +inline void SetCtrl(size_t i, h2_t h, size_t capacity, ctrl_t* ctrl, + const void* slot, size_t slot_size) { + SetCtrl(i, static_cast<ctrl_t>(h), capacity, ctrl, slot, slot_size); +} + // Policy: a policy defines how to perform different operations on // the slots of the hashtable (see hash_policy_traits.h for the full interface // of policy). @@ -780,7 +796,7 @@ class raw_hash_set { ctrl_ += shift; slot_ += shift; } - if (ABSL_PREDICT_FALSE(*ctrl_ == kSentinel)) ctrl_ = nullptr; + if (ABSL_PREDICT_FALSE(*ctrl_ == ctrl_t::kSentinel)) ctrl_ = nullptr; } ctrl_t* ctrl_ = nullptr; @@ -1575,8 +1591,8 @@ class raw_hash_set { static_cast<size_t>(empty_after.TrailingZeros() + empty_before.LeadingZeros()) < Group::kWidth; - SetCtrl(index, was_never_full ? kEmpty : kDeleted, capacity_, ctrl_, slots_, - sizeof(slot_type)); + SetCtrl(index, was_never_full ? ctrl_t::kEmpty : ctrl_t::kDeleted, + capacity_, ctrl_, slots_, sizeof(slot_type)); growth_left() += was_never_full; infoz().RecordErase(); } @@ -1706,7 +1722,7 @@ class raw_hash_set { // right time. SetCtrl(new_i, H2(hash), capacity_, ctrl_, slots_, sizeof(slot_type)); PolicyTraits::transfer(&alloc_ref(), slots_ + new_i, slots_ + i); - SetCtrl(i, kEmpty, capacity_, ctrl_, slots_, sizeof(slot_type)); + SetCtrl(i, ctrl_t::kEmpty, capacity_, ctrl_, slots_, sizeof(slot_type)); } else { assert(IsDeleted(ctrl_[new_i])); SetCtrl(new_i, H2(hash), capacity_, ctrl_, slots_, sizeof(slot_type)); diff --git a/absl/container/internal/raw_hash_set_benchmark.cc b/absl/container/internal/raw_hash_set_benchmark.cc index 977ff4c0..c886d3ad 100644 --- a/absl/container/internal/raw_hash_set_benchmark.cc +++ b/absl/container/internal/raw_hash_set_benchmark.cc @@ -315,9 +315,17 @@ void BM_ReserveStringTable(benchmark::State& state) { } BENCHMARK(BM_ReserveStringTable)->Range(128, 4096); +// Like std::iota, except that ctrl_t doesn't support operator++. +template <typename CtrlIter> +void Iota(CtrlIter begin, CtrlIter end, int value) { + for (; begin != end; ++begin, ++value) { + *begin = static_cast<ctrl_t>(value); + } +} + void BM_Group_Match(benchmark::State& state) { std::array<ctrl_t, Group::kWidth> group; - std::iota(group.begin(), group.end(), -4); + Iota(group.begin(), group.end(), -4); Group g{group.data()}; h2_t h = 1; for (auto _ : state) { @@ -329,7 +337,7 @@ BENCHMARK(BM_Group_Match); void BM_Group_MatchEmpty(benchmark::State& state) { std::array<ctrl_t, Group::kWidth> group; - std::iota(group.begin(), group.end(), -4); + Iota(group.begin(), group.end(), -4); Group g{group.data()}; for (auto _ : state) ::benchmark::DoNotOptimize(g.MatchEmpty()); } @@ -337,7 +345,7 @@ BENCHMARK(BM_Group_MatchEmpty); void BM_Group_MatchEmptyOrDeleted(benchmark::State& state) { std::array<ctrl_t, Group::kWidth> group; - std::iota(group.begin(), group.end(), -4); + Iota(group.begin(), group.end(), -4); Group g{group.data()}; for (auto _ : state) ::benchmark::DoNotOptimize(g.MatchEmptyOrDeleted()); } @@ -345,7 +353,7 @@ BENCHMARK(BM_Group_MatchEmptyOrDeleted); void BM_Group_CountLeadingEmptyOrDeleted(benchmark::State& state) { std::array<ctrl_t, Group::kWidth> group; - std::iota(group.begin(), group.end(), -2); + Iota(group.begin(), group.end(), -2); Group g{group.data()}; for (auto _ : state) ::benchmark::DoNotOptimize(g.CountLeadingEmptyOrDeleted()); @@ -354,7 +362,7 @@ BENCHMARK(BM_Group_CountLeadingEmptyOrDeleted); void BM_Group_MatchFirstEmptyOrDeleted(benchmark::State& state) { std::array<ctrl_t, Group::kWidth> group; - std::iota(group.begin(), group.end(), -2); + Iota(group.begin(), group.end(), -2); Group g{group.data()}; for (auto _ : state) ::benchmark::DoNotOptimize(*g.MatchEmptyOrDeleted()); } @@ -363,8 +371,11 @@ BENCHMARK(BM_Group_MatchFirstEmptyOrDeleted); void BM_DropDeletes(benchmark::State& state) { constexpr size_t capacity = (1 << 20) - 1; std::vector<ctrl_t> ctrl(capacity + 1 + Group::kWidth); - ctrl[capacity] = kSentinel; - std::vector<ctrl_t> pattern = {kEmpty, 2, kDeleted, 2, kEmpty, 1, kDeleted}; + ctrl[capacity] = ctrl_t::kSentinel; + std::vector<ctrl_t> pattern = {ctrl_t::kEmpty, static_cast<ctrl_t>(2), + ctrl_t::kDeleted, static_cast<ctrl_t>(2), + ctrl_t::kEmpty, static_cast<ctrl_t>(1), + ctrl_t::kDeleted}; for (size_t i = 0; i != capacity; ++i) { ctrl[i] = pattern[i % pattern.size()]; } diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc index a191bff9..4fb31fad 100644 --- a/absl/container/internal/raw_hash_set_test.cc +++ b/absl/container/internal/raw_hash_set_test.cc @@ -58,6 +58,9 @@ using ::testing::Lt; using ::testing::Pair; using ::testing::UnorderedElementsAre; +// Convenience function to static cast to ctrl_t. +ctrl_t CtrlT(int i) { return static_cast<ctrl_t>(i); } + TEST(Util, NormalizeCapacity) { EXPECT_EQ(1, NormalizeCapacity(0)); EXPECT_EQ(1, NormalizeCapacity(1)); @@ -170,15 +173,19 @@ TEST(Group, EmptyGroup) { TEST(Group, Match) { if (Group::kWidth == 16) { - ctrl_t group[] = {kEmpty, 1, kDeleted, 3, kEmpty, 5, kSentinel, 7, - 7, 5, 3, 1, 1, 1, 1, 1}; + ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), ctrl_t::kDeleted, CtrlT(3), + ctrl_t::kEmpty, CtrlT(5), ctrl_t::kSentinel, CtrlT(7), + CtrlT(7), CtrlT(5), CtrlT(3), CtrlT(1), + CtrlT(1), CtrlT(1), CtrlT(1), CtrlT(1)}; EXPECT_THAT(Group{group}.Match(0), ElementsAre()); EXPECT_THAT(Group{group}.Match(1), ElementsAre(1, 11, 12, 13, 14, 15)); EXPECT_THAT(Group{group}.Match(3), ElementsAre(3, 10)); EXPECT_THAT(Group{group}.Match(5), ElementsAre(5, 9)); EXPECT_THAT(Group{group}.Match(7), ElementsAre(7, 8)); } else if (Group::kWidth == 8) { - ctrl_t group[] = {kEmpty, 1, 2, kDeleted, 2, 1, kSentinel, 1}; + ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), CtrlT(2), + ctrl_t::kDeleted, CtrlT(2), CtrlT(1), + ctrl_t::kSentinel, CtrlT(1)}; EXPECT_THAT(Group{group}.Match(0), ElementsAre()); EXPECT_THAT(Group{group}.Match(1), ElementsAre(1, 5, 7)); EXPECT_THAT(Group{group}.Match(2), ElementsAre(2, 4)); @@ -189,11 +196,15 @@ TEST(Group, Match) { TEST(Group, MatchEmpty) { if (Group::kWidth == 16) { - ctrl_t group[] = {kEmpty, 1, kDeleted, 3, kEmpty, 5, kSentinel, 7, - 7, 5, 3, 1, 1, 1, 1, 1}; + ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), ctrl_t::kDeleted, CtrlT(3), + ctrl_t::kEmpty, CtrlT(5), ctrl_t::kSentinel, CtrlT(7), + CtrlT(7), CtrlT(5), CtrlT(3), CtrlT(1), + CtrlT(1), CtrlT(1), CtrlT(1), CtrlT(1)}; EXPECT_THAT(Group{group}.MatchEmpty(), ElementsAre(0, 4)); } else if (Group::kWidth == 8) { - ctrl_t group[] = {kEmpty, 1, 2, kDeleted, 2, 1, kSentinel, 1}; + ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), CtrlT(2), + ctrl_t::kDeleted, CtrlT(2), CtrlT(1), + ctrl_t::kSentinel, CtrlT(1)}; EXPECT_THAT(Group{group}.MatchEmpty(), ElementsAre(0)); } else { FAIL() << "No test coverage for Group::kWidth==" << Group::kWidth; @@ -202,11 +213,15 @@ TEST(Group, MatchEmpty) { TEST(Group, MatchEmptyOrDeleted) { if (Group::kWidth == 16) { - ctrl_t group[] = {kEmpty, 1, kDeleted, 3, kEmpty, 5, kSentinel, 7, - 7, 5, 3, 1, 1, 1, 1, 1}; + ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), ctrl_t::kDeleted, CtrlT(3), + ctrl_t::kEmpty, CtrlT(5), ctrl_t::kSentinel, CtrlT(7), + CtrlT(7), CtrlT(5), CtrlT(3), CtrlT(1), + CtrlT(1), CtrlT(1), CtrlT(1), CtrlT(1)}; EXPECT_THAT(Group{group}.MatchEmptyOrDeleted(), ElementsAre(0, 2, 4)); } else if (Group::kWidth == 8) { - ctrl_t group[] = {kEmpty, 1, 2, kDeleted, 2, 1, kSentinel, 1}; + ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), CtrlT(2), + ctrl_t::kDeleted, CtrlT(2), CtrlT(1), + ctrl_t::kSentinel, CtrlT(1)}; EXPECT_THAT(Group{group}.MatchEmptyOrDeleted(), ElementsAre(0, 3)); } else { FAIL() << "No test coverage for Group::kWidth==" << Group::kWidth; @@ -217,28 +232,32 @@ TEST(Batch, DropDeletes) { constexpr size_t kCapacity = 63; constexpr size_t kGroupWidth = container_internal::Group::kWidth; std::vector<ctrl_t> ctrl(kCapacity + 1 + kGroupWidth); - ctrl[kCapacity] = kSentinel; - std::vector<ctrl_t> pattern = {kEmpty, 2, kDeleted, 2, kEmpty, 1, kDeleted}; + ctrl[kCapacity] = ctrl_t::kSentinel; + std::vector<ctrl_t> pattern = { + ctrl_t::kEmpty, CtrlT(2), ctrl_t::kDeleted, CtrlT(2), + ctrl_t::kEmpty, CtrlT(1), ctrl_t::kDeleted}; for (size_t i = 0; i != kCapacity; ++i) { ctrl[i] = pattern[i % pattern.size()]; if (i < kGroupWidth - 1) ctrl[i + kCapacity + 1] = pattern[i % pattern.size()]; } ConvertDeletedToEmptyAndFullToDeleted(ctrl.data(), kCapacity); - ASSERT_EQ(ctrl[kCapacity], kSentinel); + ASSERT_EQ(ctrl[kCapacity], ctrl_t::kSentinel); for (size_t i = 0; i < kCapacity + kGroupWidth; ++i) { ctrl_t expected = pattern[i % (kCapacity + 1) % pattern.size()]; - if (i == kCapacity) expected = kSentinel; - if (expected == kDeleted) expected = kEmpty; - if (IsFull(expected)) expected = kDeleted; + if (i == kCapacity) expected = ctrl_t::kSentinel; + if (expected == ctrl_t::kDeleted) expected = ctrl_t::kEmpty; + if (IsFull(expected)) expected = ctrl_t::kDeleted; EXPECT_EQ(ctrl[i], expected) - << i << " " << int{pattern[i % pattern.size()]}; + << i << " " << static_cast<int>(pattern[i % pattern.size()]); } } TEST(Group, CountLeadingEmptyOrDeleted) { - const std::vector<ctrl_t> empty_examples = {kEmpty, kDeleted}; - const std::vector<ctrl_t> full_examples = {0, 1, 2, 3, 5, 9, 127, kSentinel}; + const std::vector<ctrl_t> empty_examples = {ctrl_t::kEmpty, ctrl_t::kDeleted}; + const std::vector<ctrl_t> full_examples = { + CtrlT(0), CtrlT(1), CtrlT(2), CtrlT(3), + CtrlT(5), CtrlT(9), CtrlT(127), ctrl_t::kSentinel}; for (ctrl_t empty : empty_examples) { std::vector<ctrl_t> e(Group::kWidth, empty); @@ -871,7 +890,7 @@ TEST(Table, RehashWithNoResize) { const size_t capacity = t.capacity(); // Remove elements from all groups except the first and the last one. - // All elements removed from full groups will be marked as kDeleted. + // All elements removed from full groups will be marked as ctrl_t::kDeleted. const size_t erase_begin = Group::kWidth / 2; const size_t erase_end = (t.size() / Group::kWidth - 1) * Group::kWidth; for (size_t i = erase_begin; i < erase_end; ++i) { diff --git a/absl/debugging/BUILD.bazel b/absl/debugging/BUILD.bazel index 2aac0f66..a8c51a5c 100644 --- a/absl/debugging/BUILD.bazel +++ b/absl/debugging/BUILD.bazel @@ -34,6 +34,7 @@ cc_library( "internal/stacktrace_aarch64-inl.inc", "internal/stacktrace_arm-inl.inc", "internal/stacktrace_config.h", + "internal/stacktrace_emscripten-inl.inc", "internal/stacktrace_generic-inl.inc", "internal/stacktrace_powerpc-inl.inc", "internal/stacktrace_unimplemented-inl.inc", @@ -57,6 +58,7 @@ cc_library( "symbolize.cc", "symbolize_darwin.inc", "symbolize_elf.inc", + "symbolize_emscripten.inc", "symbolize_unimplemented.inc", "symbolize_win32.inc", ], diff --git a/absl/debugging/CMakeLists.txt b/absl/debugging/CMakeLists.txt index bb4d4c92..760772f3 100644 --- a/absl/debugging/CMakeLists.txt +++ b/absl/debugging/CMakeLists.txt @@ -22,6 +22,7 @@ absl_cc_library( "internal/stacktrace_aarch64-inl.inc" "internal/stacktrace_arm-inl.inc" "internal/stacktrace_config.h" + "internal/stacktrace_emscripten-inl.inc" "internal/stacktrace_generic-inl.inc" "internal/stacktrace_powerpc-inl.inc" "internal/stacktrace_unimplemented-inl.inc" @@ -48,6 +49,7 @@ absl_cc_library( "symbolize.cc" "symbolize_darwin.inc" "symbolize_elf.inc" + "symbolize_emscripten.inc" "symbolize_unimplemented.inc" "symbolize_win32.inc" COPTS diff --git a/absl/debugging/internal/stacktrace_config.h b/absl/debugging/internal/stacktrace_config.h index cca410d4..6cceef26 100644 --- a/absl/debugging/internal/stacktrace_config.h +++ b/absl/debugging/internal/stacktrace_config.h @@ -37,6 +37,10 @@ "absl/debugging/internal/stacktrace_generic-inl.inc" #endif +#elif defined(__EMSCRIPTEN__) +#define ABSL_STACKTRACE_INL_HEADER \ + "absl/debugging/internal/stacktrace_emscripten-inl.inc" + #elif defined(__linux__) && !defined(__ANDROID__) #if defined(NO_FRAME_POINTER) && \ @@ -68,7 +72,6 @@ "absl/debugging/internal/stacktrace_generic-inl.inc" #endif #endif - #endif // Fallback to the empty implementation. diff --git a/absl/debugging/internal/stacktrace_emscripten-inl.inc b/absl/debugging/internal/stacktrace_emscripten-inl.inc new file mode 100644 index 00000000..0f444514 --- /dev/null +++ b/absl/debugging/internal/stacktrace_emscripten-inl.inc @@ -0,0 +1,110 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Portable implementation - just use glibc +// +// Note: The glibc implementation may cause a call to malloc. +// This can cause a deadlock in HeapProfiler. + +#ifndef ABSL_DEBUGGING_INTERNAL_STACKTRACE_EMSCRIPTEN_INL_H_ +#define ABSL_DEBUGGING_INTERNAL_STACKTRACE_EMSCRIPTEN_INL_H_ + +#include <emscripten.h> + +#include <atomic> +#include <cstring> + +#include "absl/base/attributes.h" +#include "absl/debugging/stacktrace.h" + +extern "C" { +uintptr_t emscripten_stack_snapshot(); +uint32_t emscripten_stack_unwind_buffer(uintptr_t pc, void *buffer, + uint32_t depth); +} + +// Sometimes, we can try to get a stack trace from within a stack +// trace, which can cause a self-deadlock. +// Protect against such reentrant call by failing to get a stack trace. +// +// We use __thread here because the code here is extremely low level -- it is +// called while collecting stack traces from within malloc and mmap, and thus +// can not call anything which might call malloc or mmap itself. +static __thread int recursive = 0; + +// The stack trace function might be invoked very early in the program's +// execution (e.g. from the very first malloc). +// As such, we suppress usage of backtrace during this early stage of execution. +static std::atomic<bool> disable_stacktraces(true); // Disabled until healthy. +// Waiting until static initializers run seems to be late enough. +// This file is included into stacktrace.cc so this will only run once. +ABSL_ATTRIBUTE_UNUSED static int stacktraces_enabler = []() { + // Check if we can even create stacktraces. If not, bail early and leave + // disable_stacktraces set as-is. + // clang-format off + if (!EM_ASM_INT({ return (typeof wasmOffsetConverter !== 'undefined'); })) { + return 0; + } + // clang-format on + disable_stacktraces.store(false, std::memory_order_relaxed); + return 0; +}(); + +template <bool IS_STACK_FRAMES, bool IS_WITH_CONTEXT> +static int UnwindImpl(void **result, int *sizes, int max_depth, int skip_count, + const void *ucp, int *min_dropped_frames) { + if (recursive || disable_stacktraces.load(std::memory_order_relaxed)) { + return 0; + } + ++recursive; + + static_cast<void>(ucp); // Unused. + constexpr int kStackLength = 64; + void *stack[kStackLength]; + + int size; + uintptr_t pc = emscripten_stack_snapshot(); + size = emscripten_stack_unwind_buffer(pc, stack, kStackLength); + + int result_count = size - skip_count; + if (result_count < 0) result_count = 0; + if (result_count > max_depth) result_count = max_depth; + for (int i = 0; i < result_count; i++) result[i] = stack[i + skip_count]; + + if (IS_STACK_FRAMES) { + // No implementation for finding out the stack frame sizes yet. + memset(sizes, 0, sizeof(*sizes) * result_count); + } + if (min_dropped_frames != nullptr) { + if (size - skip_count - max_depth > 0) { + *min_dropped_frames = size - skip_count - max_depth; + } else { + *min_dropped_frames = 0; + } + } + + --recursive; + + return result_count; +} + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { +bool StackTraceWorksForTest() { return true; } +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_DEBUGGING_INTERNAL_STACKTRACE_EMSCRIPTEN_INL_H_ diff --git a/absl/debugging/internal/symbolize.h b/absl/debugging/internal/symbolize.h index 4f26130f..8c296f89 100644 --- a/absl/debugging/internal/symbolize.h +++ b/absl/debugging/internal/symbolize.h @@ -68,6 +68,12 @@ ABSL_NAMESPACE_END #define ABSL_INTERNAL_HAVE_DARWIN_SYMBOLIZE 1 #endif +#ifdef ABSL_INTERNAL_HAVE_EMSCRIPTEN_SYMBOLIZE +#error ABSL_INTERNAL_HAVE_EMSCRIPTEN_SYMBOLIZE cannot be directly set +#elif defined(__EMSCRIPTEN__) +#define ABSL_INTERNAL_HAVE_EMSCRIPTEN_SYMBOLIZE 1 +#endif + namespace absl { ABSL_NAMESPACE_BEGIN namespace debugging_internal { diff --git a/absl/debugging/stacktrace.cc b/absl/debugging/stacktrace.cc index 1f7c7d82..5358b942 100644 --- a/absl/debugging/stacktrace.cc +++ b/absl/debugging/stacktrace.cc @@ -49,6 +49,7 @@ # include "absl/debugging/internal/stacktrace_aarch64-inl.inc" # include "absl/debugging/internal/stacktrace_arm-inl.inc" +# include "absl/debugging/internal/stacktrace_emscripten-inl.inc" # include "absl/debugging/internal/stacktrace_generic-inl.inc" # include "absl/debugging/internal/stacktrace_powerpc-inl.inc" # include "absl/debugging/internal/stacktrace_unimplemented-inl.inc" diff --git a/absl/debugging/symbolize.cc b/absl/debugging/symbolize.cc index 5e4a25d6..f1abdfda 100644 --- a/absl/debugging/symbolize.cc +++ b/absl/debugging/symbolize.cc @@ -31,6 +31,8 @@ #include "absl/debugging/symbolize_win32.inc" #elif defined(__APPLE__) #include "absl/debugging/symbolize_darwin.inc" +#elif defined(__EMSCRIPTEN__) +#include "absl/debugging/symbolize_emscripten.inc" #else #include "absl/debugging/symbolize_unimplemented.inc" #endif diff --git a/absl/debugging/symbolize_emscripten.inc b/absl/debugging/symbolize_emscripten.inc new file mode 100644 index 00000000..c226c456 --- /dev/null +++ b/absl/debugging/symbolize_emscripten.inc @@ -0,0 +1,72 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <cxxabi.h> +#include <emscripten.h> + +#include <algorithm> +#include <cstring> + +#include "absl/base/internal/raw_logging.h" +#include "absl/debugging/internal/demangle.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" + +extern "C" { +const char* emscripten_pc_get_function(const void* pc); +} + +// clang-format off +EM_JS(bool, HaveOffsetConverter, (), + { return typeof wasmOffsetConverter !== 'undefined'; }); +// clang-format on + +namespace absl { +ABSL_NAMESPACE_BEGIN + +void InitializeSymbolizer(const char*) { + if (!HaveOffsetConverter()) { + ABSL_RAW_LOG(INFO, + "Symbolization unavailable. Rebuild with -sWASM=1 " + "and -sUSE_OFFSET_CONVERTER=1."); + } +} + +bool Symbolize(const void* pc, char* out, int out_size) { + // Check if we have the offset converter necessary for pc_get_function. + // Without it, the program will abort(). + if (!HaveOffsetConverter()) { + return false; + } + const char* func_name = emscripten_pc_get_function(pc); + if (func_name == nullptr) { + return false; + } + + strncpy(out, func_name, out_size); + + if (out[out_size - 1] != '\0') { + // strncpy() does not '\0' terminate when it truncates. + static constexpr char kEllipsis[] = "..."; + int ellipsis_size = std::min<int>(sizeof(kEllipsis) - 1, out_size - 1); + memcpy(out + out_size - ellipsis_size - 1, kEllipsis, ellipsis_size); + out[out_size - 1] = '\0'; + } + + return true; +} + +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/debugging/symbolize_test.cc b/absl/debugging/symbolize_test.cc index 35de02e2..c710a3da 100644 --- a/absl/debugging/symbolize_test.cc +++ b/absl/debugging/symbolize_test.cc @@ -146,8 +146,22 @@ static const char *TrySymbolize(void *pc) { return TrySymbolizeWithLimit(pc, sizeof(try_symbolize_buffer)); } -#if defined(ABSL_INTERNAL_HAVE_ELF_SYMBOLIZE) || \ - defined(ABSL_INTERNAL_HAVE_DARWIN_SYMBOLIZE) +#if defined(ABSL_INTERNAL_HAVE_ELF_SYMBOLIZE) || \ + defined(ABSL_INTERNAL_HAVE_DARWIN_SYMBOLIZE) || \ + defined(ABSL_INTERNAL_HAVE_EMSCRIPTEN_SYMBOLIZE) + +// Test with a return address. +void ABSL_ATTRIBUTE_NOINLINE TestWithReturnAddress() { +#if defined(ABSL_HAVE_ATTRIBUTE_NOINLINE) + void *return_address = __builtin_return_address(0); + const char *symbol = TrySymbolize(return_address); + ABSL_RAW_CHECK(symbol != nullptr, "TestWithReturnAddress failed"); + ABSL_RAW_CHECK(strcmp(symbol, "main") == 0, "TestWithReturnAddress failed"); + std::cout << "TestWithReturnAddress passed" << std::endl; +#endif +} + +#ifndef ABSL_INTERNAL_HAVE_EMSCRIPTEN_SYMBOLIZE TEST(Symbolize, Cached) { // Compilers should give us pointers to them. @@ -418,6 +432,7 @@ TEST(Symbolize, ForEachSection) { close(fd); } #endif // !ABSL_INTERNAL_HAVE_DARWIN_SYMBOLIZE +#endif // !ABSL_INTERNAL_HAVE_EMSCRIPTEN_SYMBOLIZE // x86 specific tests. Uses some inline assembler. extern "C" { @@ -466,17 +481,6 @@ void ABSL_ATTRIBUTE_NOINLINE TestWithPCInsideInlineFunction() { } } -// Test with a return address. -void ABSL_ATTRIBUTE_NOINLINE TestWithReturnAddress() { -#if defined(ABSL_HAVE_ATTRIBUTE_NOINLINE) - void *return_address = __builtin_return_address(0); - const char *symbol = TrySymbolize(return_address); - ABSL_RAW_CHECK(symbol != nullptr, "TestWithReturnAddress failed"); - ABSL_RAW_CHECK(strcmp(symbol, "main") == 0, "TestWithReturnAddress failed"); - std::cout << "TestWithReturnAddress passed" << std::endl; -#endif -} - #if defined(__arm__) && ABSL_HAVE_ATTRIBUTE(target) // Test that we correctly identify bounds of Thumb functions on ARM. // @@ -559,7 +563,6 @@ TEST(Symbolize, SymbolizeWithDemangling) { #endif // !defined(ABSL_CONSUME_DLL) #else // Symbolizer unimplemented - TEST(Symbolize, Unimplemented) { char buf[64]; EXPECT_FALSE(absl::Symbolize((void *)(&nonstatic_func), buf, sizeof(buf))); diff --git a/absl/status/status.h b/absl/status/status.h index 2e05f46e..c5fe0a70 100644 --- a/absl/status/status.h +++ b/absl/status/status.h @@ -80,7 +80,7 @@ ABSL_NAMESPACE_BEGIN // `kFailedPrecondition` if both codes apply. Similarly prefer `kNotFound` or // `kAlreadyExists` over `kFailedPrecondition`. // -// Because these errors may travel RPC boundaries, these codes are tied to the +// Because these errors may cross RPC boundaries, these codes are tied to the // `google.rpc.Code` definitions within // https://github.com/googleapis/googleapis/blob/master/google/rpc/code.proto // The string value of these RPC codes is denoted within each enum below. @@ -114,10 +114,10 @@ enum class StatusCode : int { // StatusCode::kInvalidArgument // // kInvalidArgument (gRPC code "INVALID_ARGUMENT") indicates the caller - // specified an invalid argument, such a malformed filename. Note that such - // errors should be narrowly limited to indicate to the invalid nature of the - // arguments themselves. Errors with validly formed arguments that may cause - // errors with the state of the receiving system should be denoted with + // specified an invalid argument, such as a malformed filename. Note that use + // of such errors should be narrowly limited to indicate the invalid nature of + // the arguments themselves. Errors with validly formed arguments that may + // cause errors with the state of the receiving system should be denoted with // `kFailedPrecondition` instead. kInvalidArgument = 3, @@ -137,14 +137,15 @@ enum class StatusCode : int { // // `kNotFound` is useful if a request should be denied for an entire class of // users, such as during a gradual feature rollout or undocumented allow list. - // If, instead, a request should be denied for specific sets of users, such as - // through user-based access control, use `kPermissionDenied` instead. + // If a request should be denied for specific sets of users, such as through + // user-based access control, use `kPermissionDenied` instead. kNotFound = 5, // StatusCode::kAlreadyExists // - // kAlreadyExists (gRPC code "ALREADY_EXISTS") indicates the entity that a - // caller attempted to create (such as file or directory) is already present. + // kAlreadyExists (gRPC code "ALREADY_EXISTS") indicates that the entity a + // caller attempted to create (such as a file or directory) is already + // present. kAlreadyExists = 6, // StatusCode::kPermissionDenied @@ -183,7 +184,7 @@ enum class StatusCode : int { // level (such as when a client-specified test-and-set fails, indicating // the client should restart a read-modify-write sequence). // (c) Use `kFailedPrecondition` if the client should not retry until - // the system state has been explicitly fixed. For example, if an "rmdir" + // the system state has been explicitly fixed. For example, if a "rmdir" // fails because the directory is non-empty, `kFailedPrecondition` // should be returned since the client should not retry unless // the files are deleted from the directory. @@ -283,7 +284,7 @@ std::ostream& operator<<(std::ostream& os, StatusCode code); // absl::StatusToStringMode // // An `absl::StatusToStringMode` is an enumerated type indicating how -// `absl::Status::ToString()` should construct the output string for an non-ok +// `absl::Status::ToString()` should construct the output string for a non-ok // status. enum class StatusToStringMode : int { // ToString will not contain any extra data (such as payloads). It will only diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index c686e05a..7e0245a3 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -270,12 +270,14 @@ cc_library( srcs = [ "internal/cord_internal.cc", "internal/cord_rep_btree.cc", + "internal/cord_rep_btree_navigator.cc", "internal/cord_rep_consume.cc", "internal/cord_rep_ring.cc", ], hdrs = [ "internal/cord_internal.h", "internal/cord_rep_btree.h", + "internal/cord_rep_btree_navigator.h", "internal/cord_rep_consume.h", "internal/cord_rep_flat.h", "internal/cord_rep_ring.h", @@ -318,6 +320,22 @@ cc_test( ], ) +cc_test( + name = "cord_rep_btree_navigator_test", + size = "medium", + srcs = ["internal/cord_rep_btree_navigator_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":cord_internal", + ":cord_rep_test_util", + ":strings", + "//absl/base:config", + "//absl/base:raw_logging_internal", + "@com_google_googletest//:gtest_main", + ], +) + cc_library( name = "cordz_update_tracker", hdrs = ["internal/cordz_update_tracker.h"], diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index 2ddd3c48..e55c035d 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -556,6 +556,7 @@ absl_cc_library( HDRS "internal/cord_internal.h" "internal/cord_rep_btree.h" + "internal/cord_rep_btree_navigator.h" "internal/cord_rep_consume.h" "internal/cord_rep_flat.h" "internal/cord_rep_ring.h" @@ -563,6 +564,7 @@ absl_cc_library( SRCS "internal/cord_internal.cc" "internal/cord_rep_btree.cc" + "internal/cord_rep_btree_navigator.cc" "internal/cord_rep_consume.cc" "internal/cord_rep_ring.cc" COPTS @@ -959,6 +961,24 @@ absl_cc_test( absl_cc_test( NAME + cord_rep_btree_navigator_test + SRCS + "internal/cord_rep_btree_navigator_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::base + absl::config + absl::cord_internal + absl::cord_rep_test_util + absl::core_headers + absl::raw_logging_internal + absl::strings + GTest::gmock_main +) + +absl_cc_test( + NAME cord_ring_test SRCS "cord_ring_test.cc" diff --git a/absl/strings/internal/cord_rep_btree.h b/absl/strings/internal/cord_rep_btree.h index f4118fc0..7d854731 100644 --- a/absl/strings/internal/cord_rep_btree.h +++ b/absl/strings/internal/cord_rep_btree.h @@ -507,9 +507,9 @@ inline const CordRepBtree* CordRep::btree() const { inline void CordRepBtree::InitInstance(int height, size_t begin, size_t end) { tag = BTREE; - storage[0] = height; - storage[1] = begin; - storage[2] = end; + storage[0] = static_cast<uint8_t>(height); + storage[1] = static_cast<uint8_t>(begin); + storage[2] = static_cast<uint8_t>(end); } inline CordRep* CordRepBtree::Edge(size_t index) const { diff --git a/absl/strings/internal/cord_rep_btree_navigator.cc b/absl/strings/internal/cord_rep_btree_navigator.cc new file mode 100644 index 00000000..d1f9995d --- /dev/null +++ b/absl/strings/internal/cord_rep_btree_navigator.cc @@ -0,0 +1,185 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_btree_navigator.h" + +#include <cassert> + +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +using ReadResult = CordRepBtreeNavigator::ReadResult; + +namespace { + +// Returns a `CordRepSubstring` from `rep` starting at `offset` of size `n`. +// If `rep` is already a `CordRepSubstring` instance, an adjusted instance is +// created based on the old offset and new offset. +// Adopts a reference on `rep`. Rep must be a valid data edge. Returns +// nullptr if `n == 0`, `rep` if `n == rep->length`. +// Requires `offset < rep->length` and `offset + n <= rep->length`. +// TODO(192061034): move to utility library in internal and optimize for small +// substrings of larger reps. +inline CordRep* Substring(CordRep* rep, size_t offset, size_t n) { + assert(n <= rep->length); + assert(offset < rep->length); + assert(offset <= rep->length - n); + assert(CordRepBtree::IsDataEdge(rep)); + + if (n == 0) return nullptr; + if (n == rep->length) return CordRep::Ref(rep); + + if (rep->tag == SUBSTRING) { + offset += rep->substring()->start; + rep = rep->substring()->child; + } + + CordRepSubstring* substring = new CordRepSubstring(); + substring->length = n; + substring->tag = SUBSTRING; + substring->start = offset; + substring->child = CordRep::Ref(rep); + return substring; +} + +inline CordRep* Substring(CordRep* rep, size_t offset) { + return Substring(rep, offset, rep->length - offset); +} + +} // namespace + +CordRepBtreeNavigator::Position CordRepBtreeNavigator::Skip(size_t n) { + int height = 0; + size_t index = index_[0]; + CordRepBtree* node = node_[0]; + CordRep* edge = node->Edge(index); + + // Overall logic: Find an edge of at least the length we need to skip. + // We consume all edges which are smaller (i.e., must be 100% skipped). + // If we exhausted all edges on the current level, we move one level + // up the tree, and repeat until we either find the edge, or until we hit + // the top of the tree meaning the skip exceeds tree->length. + while (n >= edge->length) { + n -= edge->length; + while (++index == node->end()) { + if (++height > height_) return {nullptr, n}; + node = node_[height]; + index = index_[height]; + } + edge = node->Edge(index); + } + + // If we moved up the tree, descend down to the leaf level, consuming all + // edges that must be skipped. + while (height > 0) { + node = edge->btree(); + index_[height] = index; + node_[--height] = node; + index = node->begin(); + edge = node->Edge(index); + while (n >= edge->length) { + n -= edge->length; + ++index; + assert(index != node->end()); + edge = node->Edge(index); + } + } + index_[0] = index; + return {edge, n}; +} + +ReadResult CordRepBtreeNavigator::Read(size_t edge_offset, size_t n) { + int height = 0; + size_t length = edge_offset + n; + size_t index = index_[0]; + CordRepBtree* node = node_[0]; + CordRep* edge = node->Edge(index); + assert(edge_offset < edge->length); + + if (length < edge->length) { + return {Substring(edge, edge_offset, n), length}; + } + + // Similar to 'Skip', we consume all edges that are inside the 'length' of + // data that needs to be read. If we exhaust the current level, we move one + // level up the tree and repeat until we hit the final edge that must be + // (partially) read. We consume all edges into `subtree`. + CordRepBtree* subtree = CordRepBtree::New(Substring(edge, edge_offset)); + size_t subtree_end = 1; + do { + length -= edge->length; + while (++index == node->end()) { + index_[height] = index; + if (++height > height_) { + subtree->set_end(subtree_end); + if (length == 0) return {subtree, 0}; + CordRep::Unref(subtree); + return {nullptr, length}; + } + if (length != 0) { + subtree->set_end(subtree_end); + subtree = CordRepBtree::New(subtree); + subtree_end = 1; + } + node = node_[height]; + index = index_[height]; + } + edge = node->Edge(index); + if (length >= edge->length) { + subtree->length += edge->length; + subtree->edges_[subtree_end++] = CordRep::Ref(edge); + } + } while (length >= edge->length); + CordRepBtree* tree = subtree; + subtree->length += length; + + // If we moved up the tree, descend down to the leaf level, consuming all + // edges that must be read, adding 'down' nodes to `subtree`. + while (height > 0) { + node = edge->btree(); + index_[height] = index; + node_[--height] = node; + index = node->begin(); + edge = node->Edge(index); + + if (length != 0) { + CordRepBtree* right = CordRepBtree::New(height); + right->length = length; + subtree->edges_[subtree_end++] = right; + subtree->set_end(subtree_end); + subtree = right; + subtree_end = 0; + while (length >= edge->length) { + subtree->edges_[subtree_end++] = CordRep::Ref(edge); + length -= edge->length; + edge = node->Edge(++index); + } + } + } + // Add any (partial) edge still remaining at the leaf level. + if (length != 0) { + subtree->edges_[subtree_end++] = Substring(edge, 0, length); + } + subtree->set_end(subtree_end); + index_[0] = index; + return {tree, length}; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cord_rep_btree_navigator.h b/absl/strings/internal/cord_rep_btree_navigator.h new file mode 100644 index 00000000..971b92ed --- /dev/null +++ b/absl/strings/internal/cord_rep_btree_navigator.h @@ -0,0 +1,265 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_NAVIGATOR_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_NAVIGATOR_H_ + +#include <cassert> +#include <iostream> + +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordRepBtreeNavigator is a bi-directional navigator allowing callers to +// navigate all the (leaf) data edges in a CordRepBtree instance. +// +// A CordRepBtreeNavigator instance is by default empty. Callers initialize a +// navigator instance by calling one of `InitFirst()`, `InitLast()` or +// `InitOffset()`, which establishes a current position. Callers can then +// navigate using the `Next`, `Previous`, `Skip` and `Seek` methods. +// +// The navigator instance does not take or adopt a reference on the provided +// `tree` on any of the initialization calls. Callers are responsible for +// guaranteeing the lifecycle of the provided tree. A navigator instance can +// be reset to the empty state by calling `Reset`. +// +// A navigator only keeps positional state on the 'current data edge', it does +// explicitly not keep any 'offset' state. The class does accept and return +// offsets in the `Read()`, `Skip()` and 'Seek()` methods as these would +// otherwise put a big burden on callers. Callers are expected to maintain +// (returned) offset info if they require such granular state. +class CordRepBtreeNavigator { + public: + // The logical position as returned by the Seek() and Skip() functions. + // Returns the current leaf edge for the desired seek or skip position and + // the offset of that position inside that edge. + struct Position { + CordRep* edge; + size_t offset; + }; + + // The read result as returned by the Read() function. + // `tree` contains the resulting tree which is identical to the result + // of calling CordRepBtree::SubTree(...) on the tree being navigated. + // `n` contains the number of bytes used from the last navigated to + // edge of the tree. + struct ReadResult { + CordRep* tree; + size_t n; + }; + + // Returns true if this instance is not empty. + explicit operator bool() const; + + // Returns the tree for this instance or nullptr if empty. + CordRepBtree* btree() const; + + // Returns the data edge of the current position. + // Requires this instance to not be empty. + CordRep* Current() const; + + // Resets this navigator to `tree`, returning the first data edge in the tree. + CordRep* InitFirst(CordRepBtree* tree); + + // Resets this navigator to `tree`, returning the last data edge in the tree. + CordRep* InitLast(CordRepBtree* tree); + + // Resets this navigator to `tree` returning the data edge at position + // `offset` and the relative offset of `offset` into that data edge. + // Returns `Position.edge = nullptr` if the provided offset is greater + // than or equal to the length of the tree, in which case the state of + // the navigator instance remains unchanged. + Position InitOffset(CordRepBtree* tree, size_t offset); + + // Navigates to the next data edge. + // Returns the next data edge or nullptr if there is no next data edge, in + // which case the current position remains unchanged. + CordRep* Next(); + + // Navigates to the previous data edge. + // Returns the previous data edge or nullptr if there is no previous data + // edge, in which case the current position remains unchanged. + CordRep* Previous(); + + // Navigates to the data edge at position `offset`. Returns the navigated to + // data edge in `Position.edge` and the relative offset of `offset` into that + // data edge in `Position.offset`. Returns `Position.edge = nullptr` if the + // provide offset is greater than or equal to the tree's length. + Position Seek(size_t offset); + + // Reads `n` bytes of data starting at offset `edge_offset` of the current + // data edge, and returns the result in `ReadResult.tree`. `ReadResult.n` + // contains the 'bytes used` from the last / current data edge in the tree. + // This allows users that mix regular navigation (using string views) and + // 'read into cord' navigation to keep track of the current state, and which + // bytes have been consumed from a navigator. + // This function returns `ReadResult.tree = nullptr` if the requested length + // exceeds the length of the tree starting at the current data edge. + ReadResult Read(size_t edge_offset, size_t n); + + // Skips `n` bytes forward from the current data edge, returning the navigated + // to data edge in `Position.edge` and `Position.offset` containing the offset + // inside that data edge. Note that the state of the navigator is left + // unchanged if `n` is smaller than the length of the current data edge. + Position Skip(size_t n); + + // Resets this instance to the default / empty state. + void Reset(); + + private: + // Slow path for Next() if Next() reached the end of a leaf node. Backtracks + // up the stack until it finds a node that has a 'next' position available, + // and then does a 'front dive' towards the next leaf node. + CordRep* NextUp(); + + // Slow path for Previous() if Previous() reached the beginning of a leaf + // node. Backtracks up the stack until it finds a node that has a 'previous' + // position available, and then does a 'back dive' towards the previous leaf + // node. + CordRep* PreviousUp(); + + // Generic implementation of InitFirst() and InitLast(). + template <CordRepBtree::EdgeType edge_type> + CordRep* Init(CordRepBtree* tree); + + // `height_` contains the height of the current tree, or -1 if empty. + int height_ = -1; + + // `index_` and `node_` contain the navigation state as the 'path' to the + // current data edge which is at `node_[0]->Edge(index_[0])`. The contents + // of these are undefined until the instance is initialized (`height_ >= 0`). + uint8_t index_[CordRepBtree::kMaxHeight]; + CordRepBtree* node_[CordRepBtree::kMaxHeight]; +}; + +// Returns true if this instance is not empty. +inline CordRepBtreeNavigator::operator bool() const { return height_ >= 0; } + +inline CordRepBtree* CordRepBtreeNavigator::btree() const { + return height_ >= 0 ? node_[height_] : nullptr; +} + +inline CordRep* CordRepBtreeNavigator::Current() const { + assert(height_ >= 0); + return node_[0]->Edge(index_[0]); +} + +inline void CordRepBtreeNavigator::Reset() { height_ = -1; } + +inline CordRep* CordRepBtreeNavigator::InitFirst(CordRepBtree* tree) { + return Init<CordRepBtree::kFront>(tree); +} + +inline CordRep* CordRepBtreeNavigator::InitLast(CordRepBtree* tree) { + return Init<CordRepBtree::kBack>(tree); +} + +template <CordRepBtree::EdgeType edge_type> +inline CordRep* CordRepBtreeNavigator::Init(CordRepBtree* tree) { + assert(tree != nullptr); + assert(tree->size() > 0); + int height = height_ = tree->height(); + size_t index = tree->index(edge_type); + node_[height] = tree; + index_[height] = static_cast<uint8_t>(index); + while (--height >= 0) { + tree = tree->Edge(index)->btree(); + node_[height] = tree; + index = tree->index(edge_type); + index_[height] = static_cast<uint8_t>(index); + } + return node_[0]->Edge(index); +} + +inline CordRepBtreeNavigator::Position CordRepBtreeNavigator::Seek( + size_t offset) { + assert(btree() != nullptr); + int height = height_; + CordRepBtree* edge = node_[height]; + if (ABSL_PREDICT_FALSE(offset >= edge->length)) return {nullptr, 0}; + CordRepBtree::Position index = edge->IndexOf(offset); + index_[height] = static_cast<uint8_t>(index.index); + while (--height >= 0) { + edge = edge->Edge(index.index)->btree(); + node_[height] = edge; + index = edge->IndexOf(index.n); + index_[height] = static_cast<uint8_t>(index.index); + } + return {edge->Edge(index.index), index.n}; +} + +inline CordRepBtreeNavigator::Position CordRepBtreeNavigator::InitOffset( + CordRepBtree* tree, size_t offset) { + assert(tree != nullptr); + if (ABSL_PREDICT_FALSE(offset >= tree->length)) return {nullptr, 0}; + height_ = tree->height(); + node_[height_] = tree; + return Seek(offset); +} + +inline CordRep* CordRepBtreeNavigator::Next() { + CordRepBtree* edge = node_[0]; + return index_[0] == edge->back() ? NextUp() : edge->Edge(++index_[0]); +} + +inline CordRep* CordRepBtreeNavigator::Previous() { + CordRepBtree* edge = node_[0]; + return index_[0] == edge->begin() ? PreviousUp() : edge->Edge(--index_[0]); +} + +inline CordRep* CordRepBtreeNavigator::NextUp() { + assert(index_[0] == node_[0]->back()); + CordRepBtree* edge; + size_t index; + int height = 0; + do { + if (++height > height_) return nullptr; + edge = node_[height]; + index = index_[height] + 1; + } while (index == edge->end()); + index_[height] = static_cast<uint8_t>(index); + do { + node_[--height] = edge = edge->Edge(index)->btree(); + index_[height] = static_cast<uint8_t>(index = edge->begin()); + } while (height > 0); + return edge->Edge(index); +} + +inline CordRep* CordRepBtreeNavigator::PreviousUp() { + assert(index_[0] == node_[0]->begin()); + CordRepBtree* edge; + size_t index; + int height = 0; + do { + if (++height > height_) return nullptr; + edge = node_[height]; + index = index_[height]; + } while (index == edge->begin()); + index_[height] = static_cast<uint8_t>(--index); + do { + node_[--height] = edge = edge->Edge(index)->btree(); + index_[height] = static_cast<uint8_t>(index = edge->back()); + } while (height > 0); + return edge->Edge(index); +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_NAVIGATOR_H_ diff --git a/absl/strings/internal/cord_rep_btree_navigator_test.cc b/absl/strings/internal/cord_rep_btree_navigator_test.cc new file mode 100644 index 00000000..ce09b199 --- /dev/null +++ b/absl/strings/internal/cord_rep_btree_navigator_test.cc @@ -0,0 +1,325 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_btree_navigator.h" + +#include <string> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_test_util.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using ::testing::Eq; +using ::testing::Ne; + +using ::absl::cordrep_testing::CordRepBtreeFromFlats; +using ::absl::cordrep_testing::CordToString; +using ::absl::cordrep_testing::CreateFlatsFromString; +using ::absl::cordrep_testing::CreateRandomString; +using ::absl::cordrep_testing::MakeFlat; +using ::absl::cordrep_testing::MakeSubstring; + +using ReadResult = CordRepBtreeNavigator::ReadResult; +using Position = CordRepBtreeNavigator::Position; + +// CordRepBtreeNavigatorTest is a test fixture which automatically creates a +// tree to test navigation logic on. The parameter `count' defines the number of +// data edges in the test tree. +class CordRepBtreeNavigatorTest : public testing::TestWithParam<int> { + public: + using Flats = std::vector<CordRep*>; + static constexpr size_t kCharsPerFlat = 3; + + CordRepBtreeNavigatorTest() { + data_ = CreateRandomString(count() * kCharsPerFlat); + flats_ = CreateFlatsFromString(data_, kCharsPerFlat); + + // Turn flat 0 or 1 into a substring to cover partial reads on substrings. + if (count() > 1) { + CordRep::Unref(flats_[1]); + flats_[1] = MakeSubstring(kCharsPerFlat, kCharsPerFlat, MakeFlat(data_)); + } else { + CordRep::Unref(flats_[0]); + flats_[0] = MakeSubstring(0, kCharsPerFlat, MakeFlat(data_)); + } + + tree_ = CordRepBtreeFromFlats(flats_); + } + + ~CordRepBtreeNavigatorTest() override { CordRep::Unref(tree_); } + + int count() const { return GetParam(); } + CordRepBtree* tree() { return tree_; } + const std::string& data() const { return data_; } + const std::vector<CordRep*>& flats() const { return flats_; } + + static std::string ToString(testing::TestParamInfo<int> param) { + return absl::StrCat(param.param, "_Flats"); + } + + private: + std::string data_; + Flats flats_; + CordRepBtree* tree_; +}; + +INSTANTIATE_TEST_SUITE_P( + WithParam, CordRepBtreeNavigatorTest, + testing::Values(1, CordRepBtree::kMaxCapacity - 1, + CordRepBtree::kMaxCapacity, + CordRepBtree::kMaxCapacity* CordRepBtree::kMaxCapacity - 1, + CordRepBtree::kMaxCapacity* CordRepBtree::kMaxCapacity, + CordRepBtree::kMaxCapacity* CordRepBtree::kMaxCapacity + 1, + CordRepBtree::kMaxCapacity* CordRepBtree::kMaxCapacity * 2 + + 17), + CordRepBtreeNavigatorTest::ToString); + +TEST(CordRepBtreeNavigatorTest, Uninitialized) { + CordRepBtreeNavigator nav; + EXPECT_FALSE(nav); + EXPECT_THAT(nav.btree(), Eq(nullptr)); +#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG) + EXPECT_DEATH(nav.Current(), ".*"); +#endif +} + +TEST_P(CordRepBtreeNavigatorTest, InitFirst) { + CordRepBtreeNavigator nav; + CordRep* edge = nav.InitFirst(tree()); + EXPECT_TRUE(nav); + EXPECT_THAT(nav.btree(), Eq(tree())); + EXPECT_THAT(nav.Current(), Eq(flats().front())); + EXPECT_THAT(edge, Eq(flats().front())); +} + +TEST_P(CordRepBtreeNavigatorTest, InitLast) { + CordRepBtreeNavigator nav; + CordRep* edge = nav.InitLast(tree()); + EXPECT_TRUE(nav); + EXPECT_THAT(nav.btree(), Eq(tree())); + EXPECT_THAT(nav.Current(), Eq(flats().back())); + EXPECT_THAT(edge, Eq(flats().back())); +} + +TEST_P(CordRepBtreeNavigatorTest, NextPrev) { + CordRepBtreeNavigator nav; + nav.InitFirst(tree()); + const Flats& flats = this->flats(); + + EXPECT_THAT(nav.Previous(), Eq(nullptr)); + EXPECT_THAT(nav.Current(), Eq(flats.front())); + for (int i = 1; i < flats.size(); ++i) { + ASSERT_THAT(nav.Next(), Eq(flats[i])); + EXPECT_THAT(nav.Current(), Eq(flats[i])); + } + EXPECT_THAT(nav.Next(), Eq(nullptr)); + EXPECT_THAT(nav.Current(), Eq(flats.back())); + for (int i = static_cast<int>(flats.size()) - 2; i >= 0; --i) { + ASSERT_THAT(nav.Previous(), Eq(flats[i])); + EXPECT_THAT(nav.Current(), Eq(flats[i])); + } + EXPECT_THAT(nav.Previous(), Eq(nullptr)); + EXPECT_THAT(nav.Current(), Eq(flats.front())); +} + +TEST_P(CordRepBtreeNavigatorTest, PrevNext) { + CordRepBtreeNavigator nav; + nav.InitLast(tree()); + const Flats& flats = this->flats(); + + EXPECT_THAT(nav.Next(), Eq(nullptr)); + EXPECT_THAT(nav.Current(), Eq(flats.back())); + for (int i = static_cast<int>(flats.size()) - 2; i >= 0; --i) { + ASSERT_THAT(nav.Previous(), Eq(flats[i])); + EXPECT_THAT(nav.Current(), Eq(flats[i])); + } + EXPECT_THAT(nav.Previous(), Eq(nullptr)); + EXPECT_THAT(nav.Current(), Eq(flats.front())); + for (int i = 1; i < flats.size(); ++i) { + ASSERT_THAT(nav.Next(), Eq(flats[i])); + EXPECT_THAT(nav.Current(), Eq(flats[i])); + } + EXPECT_THAT(nav.Next(), Eq(nullptr)); + EXPECT_THAT(nav.Current(), Eq(flats.back())); +} + +TEST(CordRepBtreeNavigatorTest, Reset) { + CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc")); + CordRepBtreeNavigator nav; + nav.InitFirst(tree); + nav.Reset(); + EXPECT_FALSE(nav); + EXPECT_THAT(nav.btree(), Eq(nullptr)); +#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG) + EXPECT_DEATH(nav.Current(), ".*"); +#endif + CordRep::Unref(tree); +} + +TEST_P(CordRepBtreeNavigatorTest, Skip) { + int count = this->count(); + const Flats& flats = this->flats(); + CordRepBtreeNavigator nav; + nav.InitFirst(tree()); + + for (int char_offset = 0; char_offset < kCharsPerFlat; ++char_offset) { + Position pos = nav.Skip(char_offset); + EXPECT_THAT(pos.edge, Eq(nav.Current())); + EXPECT_THAT(pos.edge, Eq(flats[0])); + EXPECT_THAT(pos.offset, Eq(char_offset)); + } + + for (int index1 = 0; index1 < count; ++index1) { + for (int index2 = index1; index2 < count; ++index2) { + for (int char_offset = 0; char_offset < kCharsPerFlat; ++char_offset) { + CordRepBtreeNavigator nav; + nav.InitFirst(tree()); + + size_t length1 = index1 * kCharsPerFlat; + Position pos1 = nav.Skip(length1 + char_offset); + ASSERT_THAT(pos1.edge, Eq(flats[index1])); + ASSERT_THAT(pos1.edge, Eq(nav.Current())); + ASSERT_THAT(pos1.offset, Eq(char_offset)); + + size_t length2 = index2 * kCharsPerFlat; + Position pos2 = nav.Skip(length2 - length1 + char_offset); + ASSERT_THAT(pos2.edge, Eq(flats[index2])); + ASSERT_THAT(pos2.edge, Eq(nav.Current())); + ASSERT_THAT(pos2.offset, Eq(char_offset)); + } + } + } +} + +TEST_P(CordRepBtreeNavigatorTest, Seek) { + int count = this->count(); + const Flats& flats = this->flats(); + CordRepBtreeNavigator nav; + nav.InitFirst(tree()); + + for (int char_offset = 0; char_offset < kCharsPerFlat; ++char_offset) { + Position pos = nav.Seek(char_offset); + EXPECT_THAT(pos.edge, Eq(nav.Current())); + EXPECT_THAT(pos.edge, Eq(flats[0])); + EXPECT_THAT(pos.offset, Eq(char_offset)); + } + + for (int index = 0; index < count; ++index) { + for (int char_offset = 0; char_offset < kCharsPerFlat; ++char_offset) { + size_t offset = index * kCharsPerFlat + char_offset; + Position pos1 = nav.Seek(offset); + ASSERT_THAT(pos1.edge, Eq(flats[index])); + ASSERT_THAT(pos1.edge, Eq(nav.Current())); + ASSERT_THAT(pos1.offset, Eq(char_offset)); + } + } +} + +TEST(CordRepBtreeNavigatorTest, InitOffset) { + // Whitebox: InitOffset() is implemented in terms of Seek() which is + // exhaustively tested. Only test it initializes / forwards properly.. + CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc")); + tree = CordRepBtree::Append(tree, MakeFlat("def")); + CordRepBtreeNavigator nav; + Position pos = nav.InitOffset(tree, 5); + EXPECT_TRUE(nav); + EXPECT_THAT(nav.btree(), Eq(tree)); + EXPECT_THAT(pos.edge, Eq(tree->Edges()[1])); + EXPECT_THAT(pos.edge, Eq(nav.Current())); + EXPECT_THAT(pos.offset, Eq(2)); + CordRep::Unref(tree); +} + +TEST(CordRepBtreeNavigatorTest, InitOffsetAndSeekBeyondLength) { + CordRepBtree* tree1 = CordRepBtree::Create(MakeFlat("abc")); + CordRepBtree* tree2 = CordRepBtree::Create(MakeFlat("def")); + + CordRepBtreeNavigator nav; + nav.InitFirst(tree1); + EXPECT_THAT(nav.Seek(3).edge, Eq(nullptr)); + EXPECT_THAT(nav.Seek(100).edge, Eq(nullptr)); + EXPECT_THAT(nav.btree(), Eq(tree1)); + EXPECT_THAT(nav.Current(), Eq(tree1->Edges().front())); + + EXPECT_THAT(nav.InitOffset(tree2, 3).edge, Eq(nullptr)); + EXPECT_THAT(nav.InitOffset(tree2, 100).edge, Eq(nullptr)); + EXPECT_THAT(nav.btree(), Eq(tree1)); + EXPECT_THAT(nav.Current(), Eq(tree1->Edges().front())); + + CordRep::Unref(tree1); + CordRep::Unref(tree2); +} + +TEST_P(CordRepBtreeNavigatorTest, Read) { + const Flats& flats = this->flats(); + const std::string& data = this->data(); + + for (size_t offset = 0; offset < data.size(); ++offset) { + for (size_t length = 1; length <= data.size() - offset; ++length) { + CordRepBtreeNavigator nav; + nav.InitFirst(tree()); + + // Skip towards edge holding offset + size_t edge_offset = nav.Skip(offset).offset; + + // Read node + ReadResult result = nav.Read(edge_offset, length); + ASSERT_THAT(result.tree, Ne(nullptr)); + EXPECT_THAT(result.tree->length, Eq(length)); + if (result.tree->tag == BTREE) { + ASSERT_TRUE(CordRepBtree::IsValid(result.tree->btree())); + } + + // Verify contents + std::string value = CordToString(result.tree); + EXPECT_THAT(value, Eq(data.substr(offset, length))); + + // Verify 'partial last edge' reads. + size_t partial = (offset + length) % kCharsPerFlat; + ASSERT_THAT(result.n, Eq(partial)); + + // Verify ending position if not EOF + if (offset + length < data.size()) { + size_t index = (offset + length) / kCharsPerFlat; + EXPECT_THAT(nav.Current(), Eq(flats[index])); + } + + CordRep::Unref(result.tree); + } + } +} + +TEST_P(CordRepBtreeNavigatorTest, ReadBeyondLengthOfTree) { + CordRepBtreeNavigator nav; + nav.InitFirst(tree()); + ReadResult result = nav.Read(2, tree()->length); + ASSERT_THAT(result.tree, Eq(nullptr)); +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/string_view.h b/absl/strings/string_view.h index 968549be..ec6c431f 100644 --- a/absl/strings/string_view.h +++ b/absl/strings/string_view.h @@ -191,7 +191,9 @@ class string_view { ABSL_ATTRIBUTE_LIFETIME_BOUND) noexcept // This is implemented in terms of `string_view(p, n)` so `str.size()` // doesn't need to be reevaluated after `ptr_` is set. - : string_view(str.data(), str.size()) {} + // The length check is also skipped since it is unnecessary and causes + // code bloat. + : string_view(str.data(), str.size(), SkipCheckLengthTag{}) {} // Implicit constructor of a `string_view` from NUL-terminated `str`. When // accepting possibly null strings, use `absl::NullSafeStringView(str)` @@ -594,6 +596,12 @@ class string_view { } private: + // The constructor from std::string delegates to this constuctor. + // See the comment on that constructor for the rationale. + struct SkipCheckLengthTag {}; + string_view(const char* data, size_type len, SkipCheckLengthTag) noexcept + : ptr_(data), length_(len) {} + static constexpr size_type kMaxSize = (std::numeric_limits<difference_type>::max)(); diff --git a/absl/time/time.h b/absl/time/time.h index 7fa0f5c6..e9cbce84 100644 --- a/absl/time/time.h +++ b/absl/time/time.h @@ -182,18 +182,29 @@ class Duration { // Overloads that forward to either the int64_t or double overloads above. // Integer operands must be representable as int64_t. - template <typename T> + template <typename T, time_internal::EnableIfIntegral<T> = 0> Duration& operator*=(T r) { int64_t x = r; return *this *= x; } - template <typename T> + + template <typename T, time_internal::EnableIfIntegral<T> = 0> Duration& operator/=(T r) { int64_t x = r; return *this /= x; } - Duration& operator*=(float r) { return *this *= static_cast<double>(r); } - Duration& operator/=(float r) { return *this /= static_cast<double>(r); } + + template <typename T, time_internal::EnableIfFloat<T> = 0> + Duration& operator*=(T r) { + double x = r; + return *this *= x; + } + + template <typename T, time_internal::EnableIfFloat<T> = 0> + Duration& operator/=(T r) { + double x = r; + return *this /= x; + } template <typename H> friend H AbslHashValue(H h, Duration d) { @@ -392,12 +403,30 @@ constexpr Duration InfiniteDuration(); // // absl::Duration a = absl::Seconds(60); // absl::Duration b = absl::Minutes(1); // b == a -constexpr Duration Nanoseconds(int64_t n); -constexpr Duration Microseconds(int64_t n); -constexpr Duration Milliseconds(int64_t n); -constexpr Duration Seconds(int64_t n); -constexpr Duration Minutes(int64_t n); -constexpr Duration Hours(int64_t n); +template <typename T, time_internal::EnableIfIntegral<T> = 0> +constexpr Duration Nanoseconds(T n) { + return time_internal::FromInt64(n, std::nano{}); +} +template <typename T, time_internal::EnableIfIntegral<T> = 0> +constexpr Duration Microseconds(T n) { + return time_internal::FromInt64(n, std::micro{}); +} +template <typename T, time_internal::EnableIfIntegral<T> = 0> +constexpr Duration Milliseconds(T n) { + return time_internal::FromInt64(n, std::milli{}); +} +template <typename T, time_internal::EnableIfIntegral<T> = 0> +constexpr Duration Seconds(T n) { + return time_internal::FromInt64(n, std::ratio<1>{}); +} +template <typename T, time_internal::EnableIfIntegral<T> = 0> +constexpr Duration Minutes(T n) { + return time_internal::FromInt64(n, std::ratio<60>{}); +} +template <typename T, time_internal::EnableIfIntegral<T> = 0> +constexpr Duration Hours(T n) { + return time_internal::FromInt64(n, std::ratio<3600>{}); +} // Factory overloads for constructing `Duration` values from a floating-point // number of the unit indicated by the factory function's name. These functions @@ -1496,25 +1525,6 @@ T ToChronoDuration(Duration d) { } // namespace time_internal -constexpr Duration Nanoseconds(int64_t n) { - return time_internal::FromInt64(n, std::nano{}); -} -constexpr Duration Microseconds(int64_t n) { - return time_internal::FromInt64(n, std::micro{}); -} -constexpr Duration Milliseconds(int64_t n) { - return time_internal::FromInt64(n, std::milli{}); -} -constexpr Duration Seconds(int64_t n) { - return time_internal::FromInt64(n, std::ratio<1>{}); -} -constexpr Duration Minutes(int64_t n) { - return time_internal::FromInt64(n, std::ratio<60>{}); -} -constexpr Duration Hours(int64_t n) { - return time_internal::FromInt64(n, std::ratio<3600>{}); -} - constexpr bool operator<(Duration lhs, Duration rhs) { return time_internal::GetRepHi(lhs) != time_internal::GetRepHi(rhs) ? time_internal::GetRepHi(lhs) < time_internal::GetRepHi(rhs) |