From 58e042da9210710dc4ac3b320e48b54e2449521e Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Fri, 2 Jul 2021 20:42:20 -0700 Subject: Export of internal Abseil changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit -- 1620e8ffaa93ef24510ca60c7fff2a07248ac9f6 by Abseil Team : Update comment. PiperOrigin-RevId: 382858259 -- 20db116f28469149d10e0f7f8b976cb903dd4879 by Gennadiy Rozental : Add benchmark running on multiple flags. Update size_tester to include cost of absl::GetFlag call. Add size_tester invocation for bool flag. New benchmark better represent GetFlag usage. PiperOrigin-RevId: 382820341 -- 2e097ad3811c4e329f75b98877a5e74c1d3d84fd by Abseil Team : Avoid 64x64->128 multiplication in absl::Hash's mix on AArch64 On AArch64, calculating a 128-bit product is inefficient, because it requires a sequence of two instructions to calculate the upper and lower halves of the result. So calculate a 64-bit product instead. Making MultType 64-bits means the upper 32 bits of the result do not participate in shift/xor, but the add/multiply gives us sufficient mixing. PiperOrigin-RevId: 382625931 -- f3ae3f32cb53168c8dc91b766f2932dc87cec503 by Abseil Team : Remove homegrown Round implementation absl/time/duration.cc defined a Round implementation to accommodate old versions of MSVC that lacked std::round(long double). Abseil no longer supports those MSVCs, so we don’t need the homegrown implementation anymore. Remove it, and replace calls to it with std::rint. PiperOrigin-RevId: 382605191 -- a13631c91bf5478289e1a512ce215c85501a26f7 by Martijn Vels : Move the Consume() conversion functions out of cord_rep_ring into cord_rep_consume. This makes these functions generic, so we can repurpose these for the new Btree conversion functions. PiperOrigin-RevId: 382594902 -- 7394c737500c2d8371fcf913b21ad1b321ba499d by Benjamin Barenblat : Remove homegrown Round implementation absl/time/duration.cc defined a Round implementation to accommodate old versions of MSVC that lacked std::round(long double). Abseil no longer supports those MSVCs, so we don’t need the homegrown implementation anymore. Remove it, and replace calls to it with std::rint. PiperOrigin-RevId: 382569900 -- d72a761f43dc5c9b9510c3a1363177ed26646b5d by Abseil Team : Prefer `getentropy` for Emscripten. It needs a different header, so I've separated it out from the GLIBC check above. PiperOrigin-RevId: 382332475 -- 74e261dbb467741b2ddd8b490e04c531fdd2f559 by Martijn Vels : Add BTREE tag for CordRepNode implementing a Btree cord. This change only forward declared the CordRepBtree class (not implemented yet) and defines the enum value BTREE. While RING and BTREE should never co-exist, we define a new value for BTREE so as not to make transitioning between RING and BTREE harder than it needs to be. This changes shifts the FLAT value / computation from FLAT = 4 to FLAT =5 PiperOrigin-RevId: 382326710 GitOrigin-RevId: 1620e8ffaa93ef24510ca60c7fff2a07248ac9f6 Change-Id: Ia8f99dde3874808f56062bd37ab3e63764099734 --- CMake/AbseilDll.cmake | 2 + absl/flags/flag_benchmark.cc | 138 ++++++++++++++++---- absl/hash/internal/hash.h | 7 + absl/random/internal/seed_material.cc | 6 + absl/strings/BUILD.bazel | 19 +++ absl/strings/CMakeLists.txt | 20 +++ absl/strings/internal/cord_internal.h | 20 ++- absl/strings/internal/cord_rep_consume.cc | 129 ++++++++++++++++++ absl/strings/internal/cord_rep_consume.h | 50 +++++++ absl/strings/internal/cord_rep_consume_test.cc | 173 +++++++++++++++++++++++++ absl/strings/internal/cord_rep_flat.h | 10 +- absl/strings/internal/cord_rep_ring.cc | 118 +---------------- absl/strings/internal/str_split_internal.h | 2 +- 13 files changed, 537 insertions(+), 157 deletions(-) create mode 100644 absl/strings/internal/cord_rep_consume.cc create mode 100644 absl/strings/internal/cord_rep_consume.h create mode 100644 absl/strings/internal/cord_rep_consume_test.cc diff --git a/CMake/AbseilDll.cmake b/CMake/AbseilDll.cmake index 8ee4120f..78ace82b 100644 --- a/CMake/AbseilDll.cmake +++ b/CMake/AbseilDll.cmake @@ -203,6 +203,8 @@ set(ABSL_INTERNAL_DLL_FILES "strings/internal/charconv_parse.h" "strings/internal/cord_internal.cc" "strings/internal/cord_internal.h" + "strings/internal/cord_rep_consume.h" + "strings/internal/cord_rep_consume.cc" "strings/internal/cord_rep_flat.h" "strings/internal/cord_rep_ring.cc" "strings/internal/cord_rep_ring.h" diff --git a/absl/flags/flag_benchmark.cc b/absl/flags/flag_benchmark.cc index 57584f85..fc572d9c 100644 --- a/absl/flags/flag_benchmark.cc +++ b/absl/flags/flag_benchmark.cc @@ -101,7 +101,39 @@ std::string AbslUnparseFlag(const UDT&) { return ""; } A(AbslDuration) \ A(UDT) -#define FLAG_DEF(T) ABSL_FLAG(T, T##_flag, {}, ""); +#define REPLICATE_0(A, T, name, index) A(T, name, index) +#define REPLICATE_1(A, T, name, index) \ + REPLICATE_0(A, T, name, index##0) REPLICATE_0(A, T, name, index##1) +#define REPLICATE_2(A, T, name, index) \ + REPLICATE_1(A, T, name, index##0) REPLICATE_1(A, T, name, index##1) +#define REPLICATE_3(A, T, name, index) \ + REPLICATE_2(A, T, name, index##0) REPLICATE_2(A, T, name, index##1) +#define REPLICATE_4(A, T, name, index) \ + REPLICATE_3(A, T, name, index##0) REPLICATE_3(A, T, name, index##1) +#define REPLICATE_5(A, T, name, index) \ + REPLICATE_4(A, T, name, index##0) REPLICATE_4(A, T, name, index##1) +#define REPLICATE_6(A, T, name, index) \ + REPLICATE_5(A, T, name, index##0) REPLICATE_5(A, T, name, index##1) +#define REPLICATE_7(A, T, name, index) \ + REPLICATE_6(A, T, name, index##0) REPLICATE_6(A, T, name, index##1) +#define REPLICATE_8(A, T, name, index) \ + REPLICATE_7(A, T, name, index##0) REPLICATE_7(A, T, name, index##1) +#define REPLICATE_9(A, T, name, index) \ + REPLICATE_8(A, T, name, index##0) REPLICATE_8(A, T, name, index##1) +#if defined(_MSC_VER) +#define REPLICATE(A, T, name) \ + REPLICATE_7(A, T, name, 0) REPLICATE_7(A, T, name, 1) +#define SINGLE_FLAG(T) FLAGS_##T##_flag_00000000 +#else +#define REPLICATE(A, T, name) \ + REPLICATE_9(A, T, name, 0) REPLICATE_9(A, T, name, 1) +#define SINGLE_FLAG(T) FLAGS_##T##_flag_0000000000 +#endif +#define REPLICATE_ALL(A, T, name) \ + REPLICATE_9(A, T, name, 0) REPLICATE_9(A, T, name, 1) + +#define COUNT(T, name, index) +1 +constexpr size_t kNumFlags = 0 REPLICATE(COUNT, _, _); #if defined(__clang__) && defined(__linux__) // Force the flags used for benchmarks into a separate ELF section. @@ -110,38 +142,87 @@ std::string AbslUnparseFlag(const UDT&) { return ""; } // benchmark results more reproducible across unrelated code changes. #pragma clang section data = ".benchmark_flags" #endif +#define DEFINE_FLAG(T, name, index) ABSL_FLAG(T, name##_##index, {}, ""); +#define FLAG_DEF(T) REPLICATE(DEFINE_FLAG, T, T##_flag); BENCHMARKED_TYPES(FLAG_DEF) #if defined(__clang__) && defined(__linux__) #pragma clang section data = "" #endif // Register thousands of flags to bloat up the size of the registry. // This mimics real life production binaries. -#define DEFINE_FLAG_0(name) ABSL_FLAG(int, name, 0, ""); -#define DEFINE_FLAG_1(name) DEFINE_FLAG_0(name##0) DEFINE_FLAG_0(name##1) -#define DEFINE_FLAG_2(name) DEFINE_FLAG_1(name##0) DEFINE_FLAG_1(name##1) -#define DEFINE_FLAG_3(name) DEFINE_FLAG_2(name##0) DEFINE_FLAG_2(name##1) -#define DEFINE_FLAG_4(name) DEFINE_FLAG_3(name##0) DEFINE_FLAG_3(name##1) -#define DEFINE_FLAG_5(name) DEFINE_FLAG_4(name##0) DEFINE_FLAG_4(name##1) -#define DEFINE_FLAG_6(name) DEFINE_FLAG_5(name##0) DEFINE_FLAG_5(name##1) -#define DEFINE_FLAG_7(name) DEFINE_FLAG_6(name##0) DEFINE_FLAG_6(name##1) -#define DEFINE_FLAG_8(name) DEFINE_FLAG_7(name##0) DEFINE_FLAG_7(name##1) -#define DEFINE_FLAG_9(name) DEFINE_FLAG_8(name##0) DEFINE_FLAG_8(name##1) -#define DEFINE_FLAG_10(name) DEFINE_FLAG_9(name##0) DEFINE_FLAG_9(name##1) -#define DEFINE_FLAG_11(name) DEFINE_FLAG_10(name##0) DEFINE_FLAG_10(name##1) -#define DEFINE_FLAG_12(name) DEFINE_FLAG_11(name##0) DEFINE_FLAG_11(name##1) -DEFINE_FLAG_12(bloat_flag_); +#define BLOAT_FLAG(_unused1, _unused2, index) \ + ABSL_FLAG(int, bloat_flag_##index, 0, ""); +REPLICATE_ALL(BLOAT_FLAG, _, _) namespace { -#define BM_GetFlag(T) \ - void BM_GetFlag_##T(benchmark::State& state) { \ - for (auto _ : state) { \ - benchmark::DoNotOptimize(absl::GetFlag(FLAGS_##T##_flag)); \ - } \ - } \ - BENCHMARK(BM_GetFlag_##T)->ThreadRange(1, 16); +#define FLAG_PTR(T, name, index) &FLAGS_##name##_##index, +#define FLAG_PTR_ARR(T) \ + static constexpr absl::Flag* FlagPtrs_##T[] = { \ + REPLICATE(FLAG_PTR, T, T##_flag)}; +BENCHMARKED_TYPES(FLAG_PTR_ARR) + +#define BM_SingleGetFlag(T) \ + void BM_SingleGetFlag_##T(benchmark::State& state) { \ + for (auto _ : state) { \ + benchmark::DoNotOptimize(absl::GetFlag(SINGLE_FLAG(T))); \ + } \ + } \ + BENCHMARK(BM_SingleGetFlag_##T)->ThreadRange(1, 16); + +BENCHMARKED_TYPES(BM_SingleGetFlag) + +template +struct Accumulator { + using type = T; +}; +template <> +struct Accumulator { + using type = size_t; +}; +template <> +struct Accumulator { + using type = size_t; +}; +template <> +struct Accumulator { + using type = bool; +}; +template <> +struct Accumulator { + using type = bool; +}; +template <> +struct Accumulator { + using type = bool; +}; + +template +void Accumulate(typename Accumulator::type& a, const T& f) { + a += f; +} +void Accumulate(bool& a, bool f) { a = a || f; } +void Accumulate(size_t& a, const std::string& f) { a += f.size(); } +void Accumulate(size_t& a, const std::vector& f) { a += f.size(); } +void Accumulate(bool& a, const OptionalInt& f) { a |= f.has_value(); } +void Accumulate(bool& a, const OptionalString& f) { a |= f.has_value(); } +void Accumulate(bool& a, const UDT& f) { + a |= reinterpret_cast(&f) & 0x1; +} + +#define BM_ManyGetFlag(T) \ + void BM_ManyGetFlag_##T(benchmark::State& state) { \ + Accumulator::type res = {}; \ + while (state.KeepRunningBatch(kNumFlags)) { \ + for (auto* flag_ptr : FlagPtrs_##T) { \ + Accumulate(res, absl::GetFlag(*flag_ptr)); \ + } \ + } \ + benchmark::DoNotOptimize(res); \ + } \ + BENCHMARK(BM_ManyGetFlag_##T)->ThreadRange(1, 8); -BENCHMARKED_TYPES(BM_GetFlag) +BENCHMARKED_TYPES(BM_ManyGetFlag) void BM_ThreadedFindCommandLineFlag(benchmark::State& state) { char dummy[] = "dummy"; @@ -150,17 +231,18 @@ void BM_ThreadedFindCommandLineFlag(benchmark::State& state) { // is finalized. absl::ParseCommandLine(1, argv); - for (auto s : state) { - benchmark::DoNotOptimize( - absl::FindCommandLineFlag("bloat_flag_010101010101")); + while (state.KeepRunningBatch(kNumFlags)) { + for (auto* flag_ptr : FlagPtrs_bool) { + benchmark::DoNotOptimize(absl::FindCommandLineFlag(flag_ptr->Name())); + } } } BENCHMARK(BM_ThreadedFindCommandLineFlag)->ThreadRange(1, 16); } // namespace -#define InvokeGetFlag(T) \ - T AbslInvokeGetFlag##T() { return absl::GetFlag(FLAGS_##T##_flag); } \ +#define InvokeGetFlag(T) \ + T AbslInvokeGetFlag##T() { return absl::GetFlag(SINGLE_FLAG(T)); } \ int odr##T = (benchmark::DoNotOptimize(AbslInvokeGetFlag##T), 1); BENCHMARKED_TYPES(InvokeGetFlag) diff --git a/absl/hash/internal/hash.h b/absl/hash/internal/hash.h index 69dbbc6b..90627e00 100644 --- a/absl/hash/internal/hash.h +++ b/absl/hash/internal/hash.h @@ -856,8 +856,15 @@ class ABSL_DLL MixingHashState : public HashStateBase { } ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Mix(uint64_t state, uint64_t v) { +#if defined(__aarch64__) + // On AArch64, calculating a 128-bit product is inefficient, because it + // requires a sequence of two instructions to calculate the upper and lower + // halves of the result. + using MultType = uint64_t; +#else using MultType = absl::conditional_t; +#endif // We do the addition in 64-bit space to make sure the 128-bit // multiplication is fast. If we were to do it as MultType the compiler has // to assume that the high word is non-zero and needs to perform 2 diff --git a/absl/random/internal/seed_material.cc b/absl/random/internal/seed_material.cc index 7c1d9efa..c03cad85 100644 --- a/absl/random/internal/seed_material.cc +++ b/absl/random/internal/seed_material.cc @@ -57,6 +57,12 @@ #define ABSL_RANDOM_USE_GET_ENTROPY 1 #endif +#if defined(__EMSCRIPTEN__) +#include +// Emscripten has getentropy, but it resides in a different header. +#define ABSL_RANDOM_USE_GET_ENTROPY 1 +#endif + #if defined(ABSL_RANDOM_USE_BCRYPT) #include diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index 1cb5b3e5..b70aac3e 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -269,10 +269,12 @@ cc_library( name = "cord_internal", srcs = [ "internal/cord_internal.cc", + "internal/cord_rep_consume.cc", "internal/cord_rep_ring.cc", ], hdrs = [ "internal/cord_internal.h", + "internal/cord_rep_consume.h", "internal/cord_rep_flat.h", "internal/cord_rep_ring.h", "internal/cord_rep_ring_reader.h", @@ -292,6 +294,7 @@ cc_library( "//absl/container:compressed_tuple", "//absl/container:inlined_vector", "//absl/container:layout", + "//absl/functional:function_ref", "//absl/meta:type_traits", ], ) @@ -649,6 +652,22 @@ cc_test( ], ) +cc_test( + name = "cord_rep_consume_test", + size = "medium", + srcs = ["internal/cord_rep_consume_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":cord_internal", + ":strings", + "//absl/base:config", + "//absl/base:core_headers", + "//absl/debugging:leak_check", + "@com_google_googletest//:gtest_main", + ], +) + cc_test( name = "cord_ring_test", size = "medium", diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index 0246dc38..d79ef712 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -555,11 +555,13 @@ absl_cc_library( cord_internal HDRS "internal/cord_internal.h" + "internal/cord_rep_consume.h" "internal/cord_rep_flat.h" "internal/cord_rep_ring.h" "internal/cord_rep_ring_reader.h" SRCS "internal/cord_internal.cc" + "internal/cord_rep_consume.cc" "internal/cord_rep_ring.cc" COPTS ${ABSL_DEFAULT_COPTS} @@ -902,6 +904,24 @@ absl_cc_test( GTest::gmock_main ) +absl_cc_test( + NAME + cord_rep_consume_test + SRCS + "internal/cord_rep_consume_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::base + absl::config + absl::cord_internal + absl::core_headers + absl::function_ref + absl::raw_logging_internal + absl::strings + GTest::gmock_main +) + absl_cc_test( NAME cord_ring_test diff --git a/absl/strings/internal/cord_internal.h b/absl/strings/internal/cord_internal.h index 1e2436ba..bddc9815 100644 --- a/absl/strings/internal/cord_internal.h +++ b/absl/strings/internal/cord_internal.h @@ -150,22 +150,24 @@ struct CordRepExternal; struct CordRepFlat; struct CordRepSubstring; class CordRepRing; +class CordRepBtree; // Various representations that we allow enum CordRepKind { CONCAT = 0, SUBSTRING = 1, - RING = 2, - EXTERNAL = 3, + BTREE = 2, + RING = 3, + EXTERNAL = 4, // We have different tags for different sized flat arrays, - // starting with FLAT, and limited to MAX_FLAT_TAG. The 224 value is based on + // starting with FLAT, and limited to MAX_FLAT_TAG. The 225 value is based on // the current 'size to tag' encoding of 8 / 32 bytes. If a new tag is needed // in the future, then 'FLAT' and 'MAX_FLAT_TAG' should be adjusted as well // as the Tag <---> Size logic so that FLAT stil represents the minimum flat // allocation size. (32 bytes as of now). - FLAT = 4, - MAX_FLAT_TAG = 224 + FLAT = 5, + MAX_FLAT_TAG = 225 }; // There are various locations where we want to check if some rep is a 'plain' @@ -175,8 +177,9 @@ enum CordRepKind { // so likewise align RING to EXTERNAL. // Note that we can leave this optimization to the compiler. The compiler will // DTRT when it sees a condition like `tag == EXTERNAL || tag >= FLAT`. -static_assert(EXTERNAL == RING + 1, "RING and EXTERNAL values not consecutive"); -static_assert(FLAT == EXTERNAL + 1, "EXTERNAL and FLAT values not consecutive"); +static_assert(RING == BTREE + 1, "BTREE and RING not consecutive"); +static_assert(EXTERNAL == RING + 1, "BTREE and EXTERNAL not consecutive"); +static_assert(FLAT == EXTERNAL + 1, "EXTERNAL and FLAT not consecutive"); struct CordRep { CordRep() = default; @@ -203,6 +206,9 @@ struct CordRep { inline CordRepFlat* flat(); inline const CordRepFlat* flat() const; + inline CordRepBtree* btree(); + inline const CordRepBtree* btree() const; + // -------------------------------------------------------------------- // Memory management diff --git a/absl/strings/internal/cord_rep_consume.cc b/absl/strings/internal/cord_rep_consume.cc new file mode 100644 index 00000000..81514543 --- /dev/null +++ b/absl/strings/internal/cord_rep_consume.cc @@ -0,0 +1,129 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_consume.h" + +#include +#include + +#include "absl/container/inlined_vector.h" +#include "absl/functional/function_ref.h" +#include "absl/strings/internal/cord_internal.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +namespace { + +// Unrefs the provided `substring`, and returns `substring->child` +// Adds or assumes a reference on `substring->child` +CordRep* ClipSubstring(CordRepSubstring* substring) { + CordRep* child = substring->child; + if (substring->refcount.IsOne()) { + delete substring; + } else { + CordRep::Ref(child); + CordRep::Unref(substring); + } + return child; +} + +// Unrefs the provided `concat`, and returns `{concat->left, concat->right}` +// Adds or assumes a reference on `concat->left` and `concat->right`. +// Returns an array of 2 elements containing the left and right nodes. +std::array ClipConcat(CordRepConcat* concat) { + std::array result{concat->left, concat->right}; + if (concat->refcount.IsOne()) { + delete concat; + } else { + CordRep::Ref(result[0]); + CordRep::Ref(result[1]); + CordRep::Unref(concat); + } + return result; +} + +void Consume(bool forward, CordRep* rep, ConsumeFn consume_fn) { + size_t offset = 0; + size_t length = rep->length; + struct Entry { + CordRep* rep; + size_t offset; + size_t length; + }; + absl::InlinedVector stack; + + for (;;) { + if (rep->tag == CONCAT) { + std::array res = ClipConcat(rep->concat()); + CordRep* left = res[0]; + CordRep* right = res[1]; + + if (left->length <= offset) { + // Don't need left node + offset -= left->length; + CordRep::Unref(left); + rep = right; + continue; + } + + size_t length_left = left->length - offset; + if (length_left >= length) { + // Don't need right node + CordRep::Unref(right); + rep = left; + continue; + } + + // Need both nodes + size_t length_right = length - length_left; + if (forward) { + stack.push_back({right, 0, length_right}); + rep = left; + length = length_left; + } else { + stack.push_back({left, offset, length_left}); + rep = right; + offset = 0; + length = length_right; + } + } else if (rep->tag == SUBSTRING) { + offset += rep->substring()->start; + rep = ClipSubstring(rep->substring()); + } else { + consume_fn(rep, offset, length); + if (stack.empty()) return; + + rep = stack.back().rep; + offset = stack.back().offset; + length = stack.back().length; + stack.pop_back(); + } + } +} + +} // namespace + +void Consume(CordRep* rep, ConsumeFn consume_fn) { + return Consume(true, rep, std::move(consume_fn)); +} + +void ReverseConsume(CordRep* rep, ConsumeFn consume_fn) { + return Consume(false, rep, std::move(consume_fn)); +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cord_rep_consume.h b/absl/strings/internal/cord_rep_consume.h new file mode 100644 index 00000000..d46fca2b --- /dev/null +++ b/absl/strings/internal/cord_rep_consume.h @@ -0,0 +1,50 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_CONSUME_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_CONSUME_H_ + +#include + +#include "absl/functional/function_ref.h" +#include "absl/strings/internal/cord_internal.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// Functor for the Consume() and ReverseConsume() functions: +// void ConsumeFunc(CordRep* rep, size_t offset, size_t length); +// See the Consume() and ReverseConsume() function comments for documentation. +using ConsumeFn = FunctionRef; + +// Consume() and ReverseConsume() consume CONCAT based trees and invoke the +// provided functor with the contained nodes in the proper forward or reverse +// order, which is used to convert CONCAT trees into other tree or cord data. +// All CONCAT and SUBSTRING nodes are processed internally. The 'offset` +// parameter of the functor is non-zero for any nodes below SUBSTRING nodes. +// It's up to the caller to form these back into SUBSTRING nodes or otherwise +// store offset / prefix information. These functions are intended to be used +// only for migration / transitional code where due to factors such as ODR +// violations, we can not 100% guarantee that all code respects 'new format' +// settings and flags, so we need to be able to parse old data on the fly until +// all old code is deprecated / no longer the default format. +void Consume(CordRep* rep, ConsumeFn consume_fn); +void ReverseConsume(CordRep* rep, ConsumeFn consume_fn); + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_CONSUME_H_ diff --git a/absl/strings/internal/cord_rep_consume_test.cc b/absl/strings/internal/cord_rep_consume_test.cc new file mode 100644 index 00000000..e507824b --- /dev/null +++ b/absl/strings/internal/cord_rep_consume_test.cc @@ -0,0 +1,173 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_consume.h" + +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_flat.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using testing::InSequence; +using testing::MockFunction; + +// Returns the depth of a node +int Depth(const CordRep* rep) { + return (rep->tag == CONCAT) ? rep->concat()->depth() : 0; +} + +// Creates a concatenation of the specified nodes. +CordRepConcat* CreateConcat(CordRep* left, CordRep* right) { + auto* concat = new CordRepConcat(); + concat->tag = CONCAT; + concat->left = left; + concat->right = right; + concat->length = left->length + right->length; + concat->set_depth(1 + (std::max)(Depth(left), Depth(right))); + return concat; +} + +// Creates a flat with the length set to `length` +CordRepFlat* CreateFlatWithLength(size_t length) { + auto* flat = CordRepFlat::New(length); + flat->length = length; + return flat; +} + +// Creates a substring node on the specified child. +CordRepSubstring* CreateSubstring(CordRep* child, size_t start, size_t length) { + auto* rep = new CordRepSubstring(); + rep->length = length; + rep->tag = SUBSTRING; + rep->start = start; + rep->child = child; + return rep; +} + +// Flats we use in the tests +CordRep* flat[6]; + +// Creates a test tree +CordRep* CreateTestTree() { + flat[0] = CreateFlatWithLength(1); + flat[1] = CreateFlatWithLength(7); + CordRepConcat* left = CreateConcat(flat[0], CreateSubstring(flat[1], 2, 4)); + + flat[2] = CreateFlatWithLength(9); + flat[3] = CreateFlatWithLength(13); + CordRepConcat* right1 = CreateConcat(flat[2], flat[3]); + + flat[4] = CreateFlatWithLength(15); + flat[5] = CreateFlatWithLength(19); + CordRepConcat* right2 = CreateConcat(flat[4], flat[5]); + + CordRepConcat* right = CreateConcat(right1, CreateSubstring(right2, 5, 17)); + return CreateConcat(left, right); +} + +TEST(CordRepConsumeTest, Consume) { + InSequence in_sequence; + CordRep* tree = CreateTestTree(); + MockFunction consume; + EXPECT_CALL(consume, Call(flat[0], 0, 1)); + EXPECT_CALL(consume, Call(flat[1], 2, 4)); + EXPECT_CALL(consume, Call(flat[2], 0, 9)); + EXPECT_CALL(consume, Call(flat[3], 0, 13)); + EXPECT_CALL(consume, Call(flat[4], 5, 10)); + EXPECT_CALL(consume, Call(flat[5], 0, 7)); + Consume(tree, consume.AsStdFunction()); + for (CordRep* rep : flat) { + EXPECT_TRUE(rep->refcount.IsOne()); + CordRep::Unref(rep); + } +} + +TEST(CordRepConsumeTest, ConsumeShared) { + InSequence in_sequence; + CordRep* tree = CreateTestTree(); + MockFunction consume; + EXPECT_CALL(consume, Call(flat[0], 0, 1)); + EXPECT_CALL(consume, Call(flat[1], 2, 4)); + EXPECT_CALL(consume, Call(flat[2], 0, 9)); + EXPECT_CALL(consume, Call(flat[3], 0, 13)); + EXPECT_CALL(consume, Call(flat[4], 5, 10)); + EXPECT_CALL(consume, Call(flat[5], 0, 7)); + Consume(CordRep::Ref(tree), consume.AsStdFunction()); + for (CordRep* rep : flat) { + EXPECT_FALSE(rep->refcount.IsOne()); + CordRep::Unref(rep); + } + CordRep::Unref(tree); +} + +TEST(CordRepConsumeTest, Reverse) { + InSequence in_sequence; + CordRep* tree = CreateTestTree(); + MockFunction consume; + EXPECT_CALL(consume, Call(flat[5], 0, 7)); + EXPECT_CALL(consume, Call(flat[4], 5, 10)); + EXPECT_CALL(consume, Call(flat[3], 0, 13)); + EXPECT_CALL(consume, Call(flat[2], 0, 9)); + EXPECT_CALL(consume, Call(flat[1], 2, 4)); + EXPECT_CALL(consume, Call(flat[0], 0, 1)); + ReverseConsume(tree, consume.AsStdFunction()); + for (CordRep* rep : flat) { + EXPECT_TRUE(rep->refcount.IsOne()); + CordRep::Unref(rep); + } +} + +TEST(CordRepConsumeTest, ReverseShared) { + InSequence in_sequence; + CordRep* tree = CreateTestTree(); + MockFunction consume; + EXPECT_CALL(consume, Call(flat[5], 0, 7)); + EXPECT_CALL(consume, Call(flat[4], 5, 10)); + EXPECT_CALL(consume, Call(flat[3], 0, 13)); + EXPECT_CALL(consume, Call(flat[2], 0, 9)); + EXPECT_CALL(consume, Call(flat[1], 2, 4)); + EXPECT_CALL(consume, Call(flat[0], 0, 1)); + ReverseConsume(CordRep::Ref(tree), consume.AsStdFunction()); + for (CordRep* rep : flat) { + EXPECT_FALSE(rep->refcount.IsOne()); + CordRep::Unref(rep); + } + CordRep::Unref(tree); +} + +TEST(CordRepConsumeTest, UnreachableFlat) { + InSequence in_sequence; + CordRepFlat* flat1 = CreateFlatWithLength(10); + CordRepFlat* flat2 = CreateFlatWithLength(20); + CordRepConcat* concat = CreateConcat(flat1, flat2); + CordRepSubstring* tree = CreateSubstring(concat, 15, 10); + MockFunction consume; + EXPECT_CALL(consume, Call(flat2, 5, 10)); + Consume(tree, consume.AsStdFunction()); + EXPECT_TRUE(flat2->refcount.IsOne()); + CordRep::Unref(flat2); +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cord_rep_flat.h b/absl/strings/internal/cord_rep_flat.h index a98aa9df..cddb282f 100644 --- a/absl/strings/internal/cord_rep_flat.h +++ b/absl/strings/internal/cord_rep_flat.h @@ -44,11 +44,11 @@ static constexpr size_t kMaxFlatLength = kMaxFlatSize - kFlatOverhead; static constexpr size_t kMinFlatLength = kMinFlatSize - kFlatOverhead; constexpr uint8_t AllocatedSizeToTagUnchecked(size_t size) { - return static_cast((size <= 1024) ? size / 8 - : 128 + size / 32 - 1024 / 32); + return static_cast((size <= 1024) ? size / 8 + 1 + : 129 + size / 32 - 1024 / 32); } -static_assert(kMinFlatSize / 8 >= FLAT, ""); +static_assert(kMinFlatSize / 8 + 1 >= FLAT, ""); static_assert(AllocatedSizeToTagUnchecked(kMaxFlatSize) <= MAX_FLAT_TAG, ""); // Helper functions for rounded div, and rounding to exact sizes. @@ -73,7 +73,7 @@ inline uint8_t AllocatedSizeToTag(size_t size) { // Converts the provided tag to the corresponding allocated size constexpr size_t TagToAllocatedSize(uint8_t tag) { - return (tag <= 128) ? (tag * 8) : (1024 + (tag - 128) * 32); + return (tag <= 129) ? ((tag - 1) * 8) : (1024 + (tag - 129) * 32); } // Converts the provided tag to the corresponding available data length @@ -82,7 +82,7 @@ constexpr size_t TagToLength(uint8_t tag) { } // Enforce that kMaxFlatSize maps to a well-known exact tag value. -static_assert(TagToAllocatedSize(224) == kMaxFlatSize, "Bad tag logic"); +static_assert(TagToAllocatedSize(225) == kMaxFlatSize, "Bad tag logic"); struct CordRepFlat : public CordRep { // Creates a new flat node. diff --git a/absl/strings/internal/cord_rep_ring.cc b/absl/strings/internal/cord_rep_ring.cc index f78c94e1..20a6fc2b 100644 --- a/absl/strings/internal/cord_rep_ring.cc +++ b/absl/strings/internal/cord_rep_ring.cc @@ -26,6 +26,7 @@ #include "absl/base/macros.h" #include "absl/container/inlined_vector.h" #include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_consume.h" #include "absl/strings/internal/cord_rep_flat.h" namespace absl { @@ -49,14 +50,6 @@ inline void CheckCapacity(size_t n, size_t extra) { } } -// Removes a reference from `rep` only. -// Asserts that the refcount after decrement is not zero. -inline bool UnrefNeverOne(CordRep* rep) { - bool result = rep->refcount.Decrement(); - assert(result); - return result; -} - // Creates a flat from the provided string data, allocating up to `extra` // capacity in the returned flat depending on kMaxFlatLength limitations. // Requires `len` to be less or equal to `kMaxFlatLength` @@ -68,40 +61,6 @@ CordRepFlat* CreateFlat(const char* s, size_t n, size_t extra = 0) { // NOLINT return rep; } -// Unrefs the provided `substring`, and returns `substring->child` -// Adds or assumes a reference on `substring->child` -CordRep* ClipSubstring(CordRepSubstring* substring) { - CordRep* child = substring->child; - if (substring->refcount.IsOne()) { - delete substring; - } else { - CordRep::Ref(child); - if (ABSL_PREDICT_FALSE(!substring->refcount.Decrement())) { - UnrefNeverOne(child); - delete substring; - } - } - return child; -} - -// Unrefs the provided `concat`, and returns `{concat->left, concat->right}` -// Adds or assumes a reference on `concat->left` and `concat->right`. -std::pair ClipConcat(CordRepConcat* concat) { - auto result = std::make_pair(concat->left, concat->right); - if (concat->refcount.IsOne()) { - delete concat; - } else { - CordRep::Ref(result.first); - CordRep::Ref(result.second); - if (ABSL_PREDICT_FALSE(!concat->refcount.Decrement())) { - UnrefNeverOne(result.first); - UnrefNeverOne(result.second); - delete concat; - } - } - return result; -} - // Unrefs the entries in `[head, tail)`. // Requires all entries to be a FLAT or EXTERNAL node. void UnrefEntries(const CordRepRing* rep, index_type head, index_type tail) { @@ -117,79 +76,6 @@ void UnrefEntries(const CordRepRing* rep, index_type head, index_type tail) { }); } -template -void Consume(Direction direction, CordRep* rep, F&& fn) { - size_t offset = 0; - size_t length = rep->length; - struct Entry { - CordRep* rep; - size_t offset; - size_t length; - }; - absl::InlinedVector stack; - - for (;;) { - if (rep->tag >= FLAT || rep->tag == EXTERNAL || rep->tag == RING) { - fn(rep, offset, length); - if (stack.empty()) return; - - rep = stack.back().rep; - offset = stack.back().offset; - length = stack.back().length; - stack.pop_back(); - } else if (rep->tag == SUBSTRING) { - offset += rep->substring()->start; - rep = ClipSubstring(rep->substring()); - } else if (rep->tag == CONCAT) { - auto res = ClipConcat(rep->concat()); - CordRep* left = res.first; - CordRep* right = res.second; - - if (left->length <= offset) { - // Don't need left node - offset -= left->length; - CordRep::Unref(left); - rep = right; - continue; - } - - size_t length_left = left->length - offset; - if (length_left >= length) { - // Don't need right node - CordRep::Unref(right); - rep = left; - continue; - } - - // Need both nodes - size_t length_right = length - length_left; - if (direction == Direction::kReversed) { - stack.push_back({left, offset, length_left}); - rep = right; - offset = 0; - length = length_right; - } else { - stack.push_back({right, 0, length_right}); - rep = left; - length = length_left; - } - } else { - assert("Valid tag" == nullptr); - return; - } - } -} - -template -void Consume(CordRep* rep, F&& fn) { - return Consume(Direction::kForward, rep, std::forward(fn)); -} - -template -void RConsume(CordRep* rep, F&& fn) { - return Consume(Direction::kReversed, rep, std::forward(fn)); -} - } // namespace std::ostream& operator<<(std::ostream& s, const CordRepRing& rep) { @@ -581,7 +467,7 @@ CordRepRing* CordRepRing::Append(CordRepRing* rep, CordRep* child) { } CordRepRing* CordRepRing::PrependSlow(CordRepRing* rep, CordRep* child) { - RConsume(child, [&](CordRep* child_arg, size_t offset, size_t len) { + ReverseConsume(child, [&](CordRep* child_arg, size_t offset, size_t len) { if (IsFlatOrExternal(child_arg)) { rep = PrependLeaf(rep, child_arg, offset, len); } else { diff --git a/absl/strings/internal/str_split_internal.h b/absl/strings/internal/str_split_internal.h index 17c1bfe8..e7664216 100644 --- a/absl/strings/internal/str_split_internal.h +++ b/absl/strings/internal/str_split_internal.h @@ -64,7 +64,7 @@ class ConvertibleToStringView { ConvertibleToStringView(const std::string& s) // NOLINT(runtime/explicit) : value_(s) {} - // Matches rvalue strings and moves their data to a member. + // Disable conversion from rvalue strings. ConvertibleToStringView(std::string&& s) = delete; ConvertibleToStringView(const std::string&& s) = delete; -- cgit v1.2.3