diff options
Diffstat (limited to 'absl/strings')
47 files changed, 5504 insertions, 618 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index 64a13cef..123b5efb 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -54,6 +54,7 @@ cc_library( "ascii.h", "charconv.h", "escaping.h", + "internal/string_constant.h", "match.h", "numbers.h", "str_cat.h", @@ -68,7 +69,6 @@ cc_library( deps = [ ":internal", "//absl/base", - "//absl/base:bits", "//absl/base:config", "//absl/base:core_headers", "//absl/base:endian", @@ -76,6 +76,7 @@ cc_library( "//absl/base:throw_delegate", "//absl/memory", "//absl/meta:type_traits", + "//absl/numeric:bits", "//absl/numeric:int128", ], ) @@ -223,6 +224,19 @@ cc_test( ) cc_test( + name = "string_constant_test", + size = "small", + srcs = ["internal/string_constant_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":strings", + "//absl/meta:type_traits", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( name = "string_view_benchmark", srcs = ["string_view_benchmark.cc"], copts = ABSL_TEST_COPTS, @@ -253,13 +267,31 @@ cc_test( cc_library( name = "cord_internal", - hdrs = ["internal/cord_internal.h"], + srcs = [ + "internal/cord_internal.cc", + "internal/cord_rep_ring.cc", + ], + hdrs = [ + "internal/cord_internal.h", + "internal/cord_rep_flat.h", + "internal/cord_rep_ring.h", + "internal/cord_rep_ring_reader.h", + ], copts = ABSL_DEFAULT_COPTS, - visibility = ["//visibility:private"], + visibility = [ + "//visibility:private", + ], deps = [ ":strings", "//absl/base:base_internal", + "//absl/base:config", + "//absl/base:core_headers", + "//absl/base:endian", + "//absl/base:raw_logging_internal", + "//absl/base:throw_delegate", "//absl/container:compressed_tuple", + "//absl/container:inlined_vector", + "//absl/container:layout", "//absl/meta:type_traits", ], ) @@ -324,6 +356,38 @@ cc_test( ) cc_test( + name = "cord_ring_test", + size = "medium", + srcs = ["cord_ring_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":cord_internal", + ":strings", + "//absl/base:config", + "//absl/base:core_headers", + "//absl/base:raw_logging_internal", + "//absl/debugging:leak_check", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "cord_ring_reader_test", + size = "medium", + srcs = ["cord_ring_reader_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":cord_internal", + ":strings", + "//absl/base:core_headers", + "//absl/debugging:leak_check", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( name = "substitute_test", size = "small", srcs = ["substitute_test.cc"], @@ -639,12 +703,13 @@ cc_library( visibility = ["//visibility:private"], deps = [ ":strings", - "//absl/base:bits", "//absl/base:config", "//absl/base:core_headers", "//absl/functional:function_ref", "//absl/meta:type_traits", + "//absl/numeric:bits", "//absl/numeric:int128", + "//absl/numeric:representation", "//absl/types:optional", "//absl/types:span", ], diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index d6c2126d..3b7ae639 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -21,6 +21,7 @@ absl_cc_library( "ascii.h" "charconv.h" "escaping.h" + "internal/string_constant.h" "match.h" "numbers.h" "str_cat.h" @@ -160,6 +161,19 @@ absl_cc_test( absl_cc_test( NAME + string_constant_test + SRCS + "internal/string_constant_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::strings + absl::type_traits + gmock_main +) + +absl_cc_test( + NAME string_view_test SRCS "string_view_test.cc" @@ -396,6 +410,7 @@ absl_cc_library( absl::strings absl::config absl::core_headers + absl::numeric_representation absl::type_traits absl::int128 absl::span @@ -542,13 +557,19 @@ absl_cc_library( "cord.h" SRCS "cord.cc" + "internal/cord_internal.cc" "internal/cord_internal.h" + "internal/cord_rep_ring.h" + "internal/cord_rep_ring.cc" + "internal/cord_rep_ring_reader.h" + "internal/cord_rep_flat.h" COPTS ${ABSL_DEFAULT_COPTS} DEPS absl::base absl::base_internal absl::compressed_tuple + absl::config absl::core_headers absl::endian absl::fixed_array @@ -558,6 +579,7 @@ absl_cc_library( absl::raw_logging_internal absl::strings absl::strings_internal + absl::throw_delegate absl::type_traits PUBLIC ) @@ -593,3 +615,35 @@ absl_cc_test( absl::fixed_array gmock_main ) + +absl_cc_test( + NAME + cord_ring_test + SRCS + "cord_ring_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::config + absl::cord + absl::strings + absl::base + absl::core_headers + absl::raw_logging_internal + gmock_main +) + +absl_cc_test( + NAME + cord_ring_reader_test + SRCS + "cord_ring_reader_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::cord + absl::strings + absl::base + absl::core_headers + gmock_main +) diff --git a/absl/strings/ascii_test.cc b/absl/strings/ascii_test.cc index 5ecd23f8..83af7825 100644 --- a/absl/strings/ascii_test.cc +++ b/absl/strings/ascii_test.cc @@ -197,11 +197,15 @@ TEST(AsciiStrTo, Lower) { const std::string str("GHIJKL"); const std::string str2("MNOPQR"); const absl::string_view sp(str2); + std::string mutable_str("STUVWX"); EXPECT_EQ("abcdef", absl::AsciiStrToLower(buf)); EXPECT_EQ("ghijkl", absl::AsciiStrToLower(str)); EXPECT_EQ("mnopqr", absl::AsciiStrToLower(sp)); + absl::AsciiStrToLower(&mutable_str); + EXPECT_EQ("stuvwx", mutable_str); + char mutable_buf[] = "Mutable"; std::transform(mutable_buf, mutable_buf + strlen(mutable_buf), mutable_buf, absl::ascii_tolower); diff --git a/absl/strings/charconv.cc b/absl/strings/charconv.cc index 3613a652..b8674c28 100644 --- a/absl/strings/charconv.cc +++ b/absl/strings/charconv.cc @@ -20,7 +20,7 @@ #include <cstring> #include "absl/base/casts.h" -#include "absl/base/internal/bits.h" +#include "absl/numeric/bits.h" #include "absl/numeric/int128.h" #include "absl/strings/internal/charconv_bigint.h" #include "absl/strings/internal/charconv_parse.h" @@ -242,11 +242,11 @@ struct CalculatedFloat { // Returns the bit width of the given uint128. (Equivalently, returns 128 // minus the number of leading zero bits.) -int BitWidth(uint128 value) { +unsigned BitWidth(uint128 value) { if (Uint128High64(value) == 0) { - return 64 - base_internal::CountLeadingZeros64(Uint128Low64(value)); + return static_cast<unsigned>(bit_width(Uint128Low64(value))); } - return 128 - base_internal::CountLeadingZeros64(Uint128High64(value)); + return 128 - countl_zero(Uint128High64(value)); } // Calculates how far to the right a mantissa needs to be shifted to create a @@ -519,7 +519,7 @@ CalculatedFloat CalculateFromParsedHexadecimal( const strings_internal::ParsedFloat& parsed_hex) { uint64_t mantissa = parsed_hex.mantissa; int exponent = parsed_hex.exponent; - int mantissa_width = 64 - base_internal::CountLeadingZeros64(mantissa); + auto mantissa_width = static_cast<unsigned>(bit_width(mantissa)); const int shift = NormalizedShiftSize<FloatType>(mantissa_width, exponent); bool result_exact; exponent += shift; diff --git a/absl/strings/charconv_test.cc b/absl/strings/charconv_test.cc index 9090e9c8..b83de5a0 100644 --- a/absl/strings/charconv_test.cc +++ b/absl/strings/charconv_test.cc @@ -653,7 +653,9 @@ TEST(FromChars, NaNFloats) { negative_from_chars_float); EXPECT_TRUE(std::signbit(negative_from_chars_float)); EXPECT_FALSE(Identical(negative_from_chars_float, from_chars_float)); - from_chars_float = std::copysign(from_chars_float, -1.0); + // Use the (float, float) overload of std::copysign to prevent narrowing; + // see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98251. + from_chars_float = std::copysign(from_chars_float, -1.0f); EXPECT_TRUE(Identical(negative_from_chars_float, from_chars_float)); } } diff --git a/absl/strings/cord.cc b/absl/strings/cord.cc index 763dcc45..93533757 100644 --- a/absl/strings/cord.cc +++ b/absl/strings/cord.cc @@ -36,6 +36,8 @@ #include "absl/container/inlined_vector.h" #include "absl/strings/escaping.h" #include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/internal/cord_rep_ring.h" #include "absl/strings/internal/resize_uninitialized.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" @@ -48,98 +50,20 @@ ABSL_NAMESPACE_BEGIN using ::absl::cord_internal::CordRep; using ::absl::cord_internal::CordRepConcat; using ::absl::cord_internal::CordRepExternal; +using ::absl::cord_internal::CordRepFlat; +using ::absl::cord_internal::CordRepRing; using ::absl::cord_internal::CordRepSubstring; +using ::absl::cord_internal::kMinFlatLength; +using ::absl::cord_internal::kMaxFlatLength; -// Various representations that we allow -enum CordRepKind { - CONCAT = 0, - EXTERNAL = 1, - SUBSTRING = 2, +using ::absl::cord_internal::CONCAT; +using ::absl::cord_internal::EXTERNAL; +using ::absl::cord_internal::FLAT; +using ::absl::cord_internal::RING; +using ::absl::cord_internal::SUBSTRING; - // We have different tags for different sized flat arrays, - // starting with FLAT - FLAT = 3, -}; - -namespace cord_internal { - -inline CordRepConcat* CordRep::concat() { - assert(tag == CONCAT); - return static_cast<CordRepConcat*>(this); -} - -inline const CordRepConcat* CordRep::concat() const { - assert(tag == CONCAT); - return static_cast<const CordRepConcat*>(this); -} - -inline CordRepSubstring* CordRep::substring() { - assert(tag == SUBSTRING); - return static_cast<CordRepSubstring*>(this); -} - -inline const CordRepSubstring* CordRep::substring() const { - assert(tag == SUBSTRING); - return static_cast<const CordRepSubstring*>(this); -} - -inline CordRepExternal* CordRep::external() { - assert(tag == EXTERNAL); - return static_cast<CordRepExternal*>(this); -} - -inline const CordRepExternal* CordRep::external() const { - assert(tag == EXTERNAL); - return static_cast<const CordRepExternal*>(this); -} - -} // namespace cord_internal - -static const size_t kFlatOverhead = offsetof(CordRep, data); - -// Largest and smallest flat node lengths we are willing to allocate -// Flat allocation size is stored in tag, which currently can encode sizes up -// to 4K, encoded as multiple of either 8 or 32 bytes. -// If we allow for larger sizes, we need to change this to 8/64, 16/128, etc. -static constexpr size_t kMaxFlatSize = 4096; -static constexpr size_t kMaxFlatLength = kMaxFlatSize - kFlatOverhead; -static constexpr size_t kMinFlatLength = 32 - kFlatOverhead; - -// Prefer copying blocks of at most this size, otherwise reference count. -static const size_t kMaxBytesToCopy = 511; - -// Helper functions for rounded div, and rounding to exact sizes. -static size_t DivUp(size_t n, size_t m) { return (n + m - 1) / m; } -static size_t RoundUp(size_t n, size_t m) { return DivUp(n, m) * m; } - -// Returns the size to the nearest equal or larger value that can be -// expressed exactly as a tag value. -static size_t RoundUpForTag(size_t size) { - return RoundUp(size, (size <= 1024) ? 8 : 32); -} - -// Converts the allocated size to a tag, rounding down if the size -// does not exactly match a 'tag expressible' size value. The result is -// undefined if the size exceeds the maximum size that can be encoded in -// a tag, i.e., if size is larger than TagToAllocatedSize(<max tag>). -static uint8_t AllocatedSizeToTag(size_t size) { - const size_t tag = (size <= 1024) ? size / 8 : 128 + size / 32 - 1024 / 32; - assert(tag <= std::numeric_limits<uint8_t>::max()); - return tag; -} - -// Converts the provided tag to the corresponding allocated size -static constexpr size_t TagToAllocatedSize(uint8_t tag) { - return (tag <= 128) ? (tag * 8) : (1024 + (tag - 128) * 32); -} - -// Converts the provided tag to the corresponding available data length -static constexpr size_t TagToLength(uint8_t tag) { - return TagToAllocatedSize(tag) - kFlatOverhead; -} - -// Enforce that kMaxFlatSize maps to a well-known exact tag value. -static_assert(TagToAllocatedSize(224) == kMaxFlatSize, "Bad tag logic"); +using ::absl::cord_internal::kInlinedVectorSize; +using ::absl::cord_internal::kMaxBytesToCopy; constexpr uint64_t Fibonacci(unsigned char n, uint64_t a = 0, uint64_t b = 1) { return n == 0 ? a : Fibonacci(n - 1, b, a + b); @@ -171,16 +95,10 @@ static constexpr uint64_t min_length[] = { static const int kMinLengthSize = ABSL_ARRAYSIZE(min_length); -// The inlined size to use with absl::InlinedVector. -// -// Note: The InlinedVectors in this file (and in cord.h) do not need to use -// the same value for their inlined size. The fact that they do is historical. -// It may be desirable for each to use a different inlined size optimized for -// that InlinedVector's usage. -// -// TODO(jgm): Benchmark to see if there's a more optimal value than 47 for -// the inlined vector size (47 exists for backward compatibility). -static const int kInlinedVectorSize = 47; +static inline bool cord_ring_enabled() { + return cord_internal::cord_ring_buffer_enabled.load( + std::memory_order_relaxed); +} static inline bool IsRootBalanced(CordRep* node) { if (node->tag != CONCAT) { @@ -197,7 +115,8 @@ static inline bool IsRootBalanced(CordRep* node) { } static CordRep* Rebalance(CordRep* node); -static void DumpNode(CordRep* rep, bool include_data, std::ostream* os); +static void DumpNode(CordRep* rep, bool include_data, std::ostream* os, + int indent = 0); static bool VerifyNode(CordRep* root, CordRep* start_node, bool full_validation); @@ -217,96 +136,6 @@ static inline CordRep* VerifyTree(CordRep* node) { return node; } -// -------------------------------------------------------------------- -// Memory management - -inline CordRep* Ref(CordRep* rep) { - if (rep != nullptr) { - rep->refcount.Increment(); - } - return rep; -} - -// This internal routine is called from the cold path of Unref below. Keeping it -// in a separate routine allows good inlining of Unref into many profitable call -// sites. However, the call to this function can be highly disruptive to the -// register pressure in those callers. To minimize the cost to callers, we use -// a special LLVM calling convention that preserves most registers. This allows -// the call to this routine in cold paths to not disrupt the caller's register -// pressure. This calling convention is not available on all platforms; we -// intentionally allow LLVM to ignore the attribute rather than attempting to -// hardcode the list of supported platforms. -#if defined(__clang__) && !defined(__i386__) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wattributes" -__attribute__((preserve_most)) -#pragma clang diagnostic pop -#endif -static void UnrefInternal(CordRep* rep) { - assert(rep != nullptr); - - absl::InlinedVector<CordRep*, kInlinedVectorSize> pending; - while (true) { - if (rep->tag == CONCAT) { - CordRepConcat* rep_concat = rep->concat(); - CordRep* right = rep_concat->right; - if (!right->refcount.Decrement()) { - pending.push_back(right); - } - CordRep* left = rep_concat->left; - delete rep_concat; - rep = nullptr; - if (!left->refcount.Decrement()) { - rep = left; - continue; - } - } else if (rep->tag == EXTERNAL) { - CordRepExternal* rep_external = rep->external(); - rep_external->releaser_invoker(rep_external); - rep = nullptr; - } else if (rep->tag == SUBSTRING) { - CordRepSubstring* rep_substring = rep->substring(); - CordRep* child = rep_substring->child; - delete rep_substring; - rep = nullptr; - if (!child->refcount.Decrement()) { - rep = child; - continue; - } - } else { - // Flat CordReps are allocated and constructed with raw ::operator new - // and placement new, and must be destructed and deallocated - // accordingly. -#if defined(__cpp_sized_deallocation) - size_t size = TagToAllocatedSize(rep->tag); - rep->~CordRep(); - ::operator delete(rep, size); -#else - rep->~CordRep(); - ::operator delete(rep); -#endif - rep = nullptr; - } - - if (!pending.empty()) { - rep = pending.back(); - pending.pop_back(); - } else { - break; - } - } -} - -inline void Unref(CordRep* rep) { - // Fast-path for two common, hot cases: a null rep and a shared root. - if (ABSL_PREDICT_TRUE(rep == nullptr || - rep->refcount.DecrementExpectHighRefcount())) { - return; - } - - UnrefInternal(rep); -} - // Return the depth of a node static int Depth(const CordRep* rep) { if (rep->tag == CONCAT) { @@ -330,12 +159,14 @@ static void SetConcatChildren(CordRepConcat* concat, CordRep* left, // The returned node has a refcount of 1. static CordRep* RawConcat(CordRep* left, CordRep* right) { // Avoid making degenerate concat nodes (one child is empty) - if (left == nullptr || left->length == 0) { - Unref(left); + if (left == nullptr) return right; + if (right == nullptr) return left; + if (left->length == 0) { + CordRep::Unref(left); return right; } - if (right == nullptr || right->length == 0) { - Unref(right); + if (right->length == 0) { + CordRep::Unref(right); return left; } @@ -374,20 +205,27 @@ static CordRep* MakeBalancedTree(CordRep** reps, size_t n) { return reps[0]; } -// Create a new flat node. -static CordRep* NewFlat(size_t length_hint) { - if (length_hint <= kMinFlatLength) { - length_hint = kMinFlatLength; - } else if (length_hint > kMaxFlatLength) { - length_hint = kMaxFlatLength; - } +static CordRepFlat* CreateFlat(const char* data, size_t length, + size_t alloc_hint) { + CordRepFlat* flat = CordRepFlat::New(length + alloc_hint); + flat->length = length; + memcpy(flat->Data(), data, length); + return flat; +} - // Round size up so it matches a size we can exactly express in a tag. - const size_t size = RoundUpForTag(length_hint + kFlatOverhead); - void* const raw_rep = ::operator new(size); - CordRep* rep = new (raw_rep) CordRep(); - rep->tag = AllocatedSizeToTag(size); - return VerifyTree(rep); +// Creates a new flat or ringbuffer out of the specified array. +// The returned node has a refcount of 1. +static CordRep* RingNewTree(const char* data, size_t length, + size_t alloc_hint) { + if (length <= kMaxFlatLength) { + return CreateFlat(data, length, alloc_hint); + } + CordRepFlat* flat = CreateFlat(data, kMaxFlatLength, 0); + data += kMaxFlatLength; + length -= kMaxFlatLength; + size_t extra = (length - 1) / kMaxFlatLength + 1; + auto* root = CordRepRing::Create(flat, extra); + return CordRepRing::Append(root, {data, length}, alloc_hint); } // Create a new tree out of the specified array. @@ -396,13 +234,16 @@ static CordRep* NewTree(const char* data, size_t length, size_t alloc_hint) { if (length == 0) return nullptr; + if (cord_ring_enabled()) { + return RingNewTree(data, length, alloc_hint); + } absl::FixedArray<CordRep*> reps((length - 1) / kMaxFlatLength + 1); size_t n = 0; do { const size_t len = std::min(length, kMaxFlatLength); - CordRep* rep = NewFlat(len + alloc_hint); + CordRepFlat* rep = CordRepFlat::New(len + alloc_hint); rep->length = len; - memcpy(rep->data, data, len); + memcpy(rep->Data(), data, len); reps[n++] = VerifyTree(rep); data += len; length -= len; @@ -425,7 +266,7 @@ void InitializeCordRepExternal(absl::string_view data, CordRepExternal* rep) { static CordRep* NewSubstring(CordRep* child, size_t offset, size_t length) { // Never create empty substring nodes if (length == 0) { - Unref(child); + CordRep::Unref(child); return nullptr; } else { CordRepSubstring* rep = new CordRepSubstring(); @@ -447,50 +288,58 @@ inline void Cord::InlineRep::set_data(const char* data, size_t n, bool nullify_tail) { static_assert(kMaxInline == 15, "set_data is hard-coded for a length of 15"); - cord_internal::SmallMemmove(data_, data, n, nullify_tail); - data_[kMaxInline] = static_cast<char>(n); + cord_internal::SmallMemmove(data_.as_chars(), data, n, nullify_tail); + set_inline_size(n); } inline char* Cord::InlineRep::set_data(size_t n) { assert(n <= kMaxInline); - memset(data_, 0, sizeof(data_)); - data_[kMaxInline] = static_cast<char>(n); - return data_; + ResetToEmpty(); + set_inline_size(n); + return data_.as_chars(); } inline CordRep* Cord::InlineRep::force_tree(size_t extra_hint) { - size_t len = data_[kMaxInline]; - CordRep* result; - if (len > kMaxInline) { - memcpy(&result, data_, sizeof(result)); - } else { - result = NewFlat(len + extra_hint); - result->length = len; - memcpy(result->data, data_, len); - set_tree(result); + if (data_.is_tree()) { + return data_.as_tree(); } + + size_t len = inline_size(); + CordRepFlat* result = CordRepFlat::New(len + extra_hint); + result->length = len; + static_assert(kMinFlatLength >= sizeof(data_), ""); + memcpy(result->Data(), data_.as_chars(), sizeof(data_)); + set_tree(result); return result; } inline void Cord::InlineRep::reduce_size(size_t n) { - size_t tag = data_[kMaxInline]; + size_t tag = inline_size(); assert(tag <= kMaxInline); assert(tag >= n); tag -= n; - memset(data_ + tag, 0, n); - data_[kMaxInline] = static_cast<char>(tag); + memset(data_.as_chars() + tag, 0, n); + set_inline_size(static_cast<char>(tag)); } inline void Cord::InlineRep::remove_prefix(size_t n) { - cord_internal::SmallMemmove(data_, data_ + n, data_[kMaxInline] - n); + cord_internal::SmallMemmove(data_.as_chars(), data_.as_chars() + n, + inline_size() - n); reduce_size(n); } +// Returns `rep` converted into a CordRepRing. +// Directly returns `rep` if `rep` is already a CordRepRing. +static CordRepRing* ForceRing(CordRep* rep, size_t extra) { + return (rep->tag == RING) ? rep->ring() : CordRepRing::Create(rep, extra); +} + void Cord::InlineRep::AppendTree(CordRep* tree) { if (tree == nullptr) return; - size_t len = data_[kMaxInline]; - if (len == 0) { + if (data_.is_empty()) { set_tree(tree); + } else if (cord_ring_enabled()) { + set_tree(CordRepRing::Append(ForceRing(force_tree(0), 1), tree)); } else { set_tree(Concat(force_tree(0), tree)); } @@ -498,9 +347,10 @@ void Cord::InlineRep::AppendTree(CordRep* tree) { void Cord::InlineRep::PrependTree(CordRep* tree) { assert(tree != nullptr); - size_t len = data_[kMaxInline]; - if (len == 0) { + if (data_.is_empty()) { set_tree(tree); + } else if (cord_ring_enabled()) { + set_tree(CordRepRing::Prepend(ForceRing(force_tree(0), 1), tree)); } else { set_tree(Concat(tree, force_tree(0))); } @@ -512,6 +362,15 @@ void Cord::InlineRep::PrependTree(CordRep* tree) { // written to region and the actual size increase will be written to size. static inline bool PrepareAppendRegion(CordRep* root, char** region, size_t* size, size_t max_length) { + if (root->tag == RING && root->refcount.IsOne()) { + Span<char> span = root->ring()->GetAppendBuffer(max_length); + if (!span.empty()) { + *region = span.data(); + *size = span.size(); + return true; + } + } + // Search down the right-hand path for a non-full FLAT node. CordRep* dst = root; while (dst->tag == CONCAT && dst->refcount.IsOne()) { @@ -525,7 +384,7 @@ static inline bool PrepareAppendRegion(CordRep* root, char** region, } const size_t in_use = dst->length; - const size_t capacity = TagToLength(dst->tag); + const size_t capacity = dst->flat()->Capacity(); if (in_use == capacity) { *region = nullptr; *size = 0; @@ -540,7 +399,7 @@ static inline bool PrepareAppendRegion(CordRep* root, char** region, } dst->length += size_increase; - *region = dst->data + in_use; + *region = dst->flat()->Data() + in_use; *size = size_increase; return true; } @@ -554,12 +413,14 @@ void Cord::InlineRep::GetAppendRegion(char** region, size_t* size, } // Try to fit in the inline buffer if possible. - size_t inline_length = data_[kMaxInline]; - if (inline_length < kMaxInline && max_length <= kMaxInline - inline_length) { - *region = data_ + inline_length; - *size = max_length; - data_[kMaxInline] = static_cast<char>(inline_length + max_length); - return; + if (!is_tree()) { + size_t inline_length = inline_size(); + if (max_length <= kMaxInline - inline_length) { + *region = data_.as_chars() + inline_length; + *size = max_length; + set_inline_size(inline_length + max_length); + return; + } } CordRep* root = force_tree(max_length); @@ -569,12 +430,16 @@ void Cord::InlineRep::GetAppendRegion(char** region, size_t* size, } // Allocate new node. - CordRep* new_node = - NewFlat(std::max(static_cast<size_t>(root->length), max_length)); - new_node->length = - std::min(static_cast<size_t>(TagToLength(new_node->tag)), max_length); - *region = new_node->data; + CordRepFlat* new_node = + CordRepFlat::New(std::max(static_cast<size_t>(root->length), max_length)); + new_node->length = std::min(new_node->Capacity(), max_length); + *region = new_node->Data(); *size = new_node->length; + + if (cord_ring_enabled()) { + replace_tree(CordRepRing::Append(ForceRing(root, 1), new_node)); + return; + } replace_tree(Concat(root, new_node)); } @@ -582,12 +447,14 @@ void Cord::InlineRep::GetAppendRegion(char** region, size_t* size) { const size_t max_length = std::numeric_limits<size_t>::max(); // Try to fit in the inline buffer if possible. - size_t inline_length = data_[kMaxInline]; - if (inline_length < kMaxInline) { - *region = data_ + inline_length; - *size = kMaxInline - inline_length; - data_[kMaxInline] = kMaxInline; - return; + if (!data_.is_tree()) { + size_t inline_length = inline_size(); + if (inline_length < kMaxInline) { + *region = data_.as_chars() + inline_length; + *size = kMaxInline - inline_length; + set_inline_size(kMaxInline); + return; + } } CordRep* root = force_tree(max_length); @@ -597,10 +464,15 @@ void Cord::InlineRep::GetAppendRegion(char** region, size_t* size) { } // Allocate new node. - CordRep* new_node = NewFlat(root->length); - new_node->length = TagToLength(new_node->tag); - *region = new_node->data; + CordRepFlat* new_node = CordRepFlat::New(root->length); + new_node->length = new_node->Capacity(); + *region = new_node->Data(); *size = new_node->length; + + if (cord_ring_enabled()) { + replace_tree(CordRepRing::Append(ForceRing(root, 1), new_node)); + return; + } replace_tree(Concat(root, new_node)); } @@ -608,7 +480,7 @@ void Cord::InlineRep::GetAppendRegion(char** region, size_t* size) { // will return true. static bool RepMemoryUsageLeaf(const CordRep* rep, size_t* total_mem_usage) { if (rep->tag >= FLAT) { - *total_mem_usage += TagToAllocatedSize(rep->tag); + *total_mem_usage += rep->flat()->AllocatedSize(); return true; } if (rep->tag == EXTERNAL) { @@ -621,26 +493,24 @@ static bool RepMemoryUsageLeaf(const CordRep* rep, size_t* total_mem_usage) { void Cord::InlineRep::AssignSlow(const Cord::InlineRep& src) { ClearSlow(); - memcpy(data_, src.data_, sizeof(data_)); + data_ = src.data_; if (is_tree()) { - Ref(tree()); + data_.set_profiled(false); + CordRep::Ref(tree()); + clear_cordz_info(); } } void Cord::InlineRep::ClearSlow() { if (is_tree()) { - Unref(tree()); + CordRep::Unref(tree()); } - memset(data_, 0, sizeof(data_)); + ResetToEmpty(); } // -------------------------------------------------------------------- // Constructors and destructors -Cord::Cord(const Cord& src) : contents_(src.contents_) { - Ref(contents_.tree()); // Does nothing if contents_ has embedded data -} - Cord::Cord(absl::string_view src) { const size_t n = src.size(); if (n <= InlineRep::kMaxInline) { @@ -684,14 +554,18 @@ template Cord::Cord(std::string&& src); // The destruction code is separate so that the compiler can determine // that it does not need to call the destructor on a moved-from Cord. void Cord::DestroyCordSlow() { - Unref(VerifyTree(contents_.tree())); + if (CordRep* tree = contents_.tree()) { + CordRep::Unref(VerifyTree(tree)); + } } // -------------------------------------------------------------------- // Mutators void Cord::Clear() { - Unref(contents_.clear()); + if (CordRep* tree = contents_.clear()) { + CordRep::Unref(tree); + } } Cord& Cord::operator=(absl::string_view src) { @@ -702,19 +576,20 @@ Cord& Cord::operator=(absl::string_view src) { if (length <= InlineRep::kMaxInline) { // Embed into this->contents_ contents_.set_data(data, length, true); - Unref(tree); + if (tree) CordRep::Unref(tree); return *this; } if (tree != nullptr && tree->tag >= FLAT && - TagToLength(tree->tag) >= length && tree->refcount.IsOne()) { + tree->flat()->Capacity() >= length && + tree->refcount.IsOne()) { // Copy in place if the existing FLAT node is reusable. - memmove(tree->data, data, length); + memmove(tree->flat()->Data(), data, length); tree->length = length; VerifyTree(tree); return *this; } contents_.set_tree(NewTree(data, length, 0)); - Unref(tree); + if (tree) CordRep::Unref(tree); return *this; } @@ -734,24 +609,25 @@ template Cord& Cord::operator=(std::string&& src); // we keep it here to make diffs easier. void Cord::InlineRep::AppendArray(const char* src_data, size_t src_size) { if (src_size == 0) return; // memcpy(_, nullptr, 0) is undefined. - // Try to fit in the inline buffer if possible. - size_t inline_length = data_[kMaxInline]; - if (inline_length < kMaxInline && src_size <= kMaxInline - inline_length) { - // Append new data to embedded array - data_[kMaxInline] = static_cast<char>(inline_length + src_size); - memcpy(data_ + inline_length, src_data, src_size); - return; - } - - CordRep* root = tree(); size_t appended = 0; - if (root) { + CordRep* root = nullptr; + if (is_tree()) { + root = data_.as_tree(); char* region; if (PrepareAppendRegion(root, ®ion, &appended, src_size)) { memcpy(region, src_data, appended); } } else { + // Try to fit in the inline buffer if possible. + size_t inline_length = inline_size(); + if (src_size <= kMaxInline - inline_length) { + // Append new data to embedded array + memcpy(data_.as_chars() + inline_length, src_data, src_size); + set_inline_size(inline_length + src_size); + return; + } + // It is possible that src_data == data_, but when we transition from an // InlineRep to a tree we need to assign data_ = root via set_tree. To // avoid corrupting the source data before we copy it, delay calling @@ -760,10 +636,11 @@ void Cord::InlineRep::AppendArray(const char* src_data, size_t src_size) { // either double the inlined size, or the added size + 10%. const size_t size1 = inline_length * 2 + src_size; const size_t size2 = inline_length + src_size / 10; - root = NewFlat(std::max<size_t>(size1, size2)); - appended = std::min(src_size, TagToLength(root->tag) - inline_length); - memcpy(root->data, data_, inline_length); - memcpy(root->data + inline_length, src_data, appended); + root = CordRepFlat::New(std::max<size_t>(size1, size2)); + appended = std::min( + src_size, root->flat()->Capacity() - inline_length); + memcpy(root->flat()->Data(), data_.as_chars(), inline_length); + memcpy(root->flat()->Data() + inline_length, src_data, appended); root->length = inline_length + appended; set_tree(root); } @@ -774,6 +651,13 @@ void Cord::InlineRep::AppendArray(const char* src_data, size_t src_size) { return; } + if (cord_ring_enabled()) { + absl::string_view data(src_data, src_size); + root = ForceRing(root, (data.size() - 1) / kMaxFlatLength + 1); + replace_tree(CordRepRing::Append(root->ring(), data)); + return; + } + // Use new block(s) for any remaining bytes that were not handled above. // Alloc extra memory only if the right child of the root of the new tree is // going to be a FLAT node, which will permit further inplace appends. @@ -790,7 +674,7 @@ void Cord::InlineRep::AppendArray(const char* src_data, size_t src_size) { } inline CordRep* Cord::TakeRep() const& { - return Ref(contents_.tree()); + return CordRep::Ref(contents_.tree()); } inline CordRep* Cord::TakeRep() && { @@ -819,7 +703,7 @@ inline void Cord::AppendImpl(C&& src) { } if (src_tree->tag >= FLAT) { // src tree just has one flat node. - contents_.AppendArray(src_tree->data, src_size); + contents_.AppendArray(src_tree->flat()->Data(), src_size); return; } if (&src == this) { @@ -834,6 +718,7 @@ inline void Cord::AppendImpl(C&& src) { return; } + // Guaranteed to be a tree (kMaxBytesToCopy > kInlinedSize) contents_.AppendTree(std::forward<C>(src).TakeRep()); } @@ -855,7 +740,7 @@ template void Cord::Append(std::string&& src); void Cord::Prepend(const Cord& src) { CordRep* src_tree = src.contents_.tree(); if (src_tree != nullptr) { - Ref(src_tree); + CordRep::Ref(src_tree); contents_.PrependTree(src_tree); return; } @@ -867,18 +752,19 @@ void Cord::Prepend(const Cord& src) { void Cord::Prepend(absl::string_view src) { if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined. - size_t cur_size = contents_.size(); - if (!contents_.is_tree() && cur_size + src.size() <= InlineRep::kMaxInline) { - // Use embedded storage. - char data[InlineRep::kMaxInline + 1] = {0}; - data[InlineRep::kMaxInline] = cur_size + src.size(); // set size - memcpy(data, src.data(), src.size()); - memcpy(data + src.size(), contents_.data(), cur_size); - memcpy(reinterpret_cast<void*>(&contents_), data, - InlineRep::kMaxInline + 1); - } else { - contents_.PrependTree(NewTree(src.data(), src.size(), 0)); + if (!contents_.is_tree()) { + size_t cur_size = contents_.inline_size(); + if (cur_size + src.size() <= InlineRep::kMaxInline) { + // Use embedded storage. + char data[InlineRep::kMaxInline + 1] = {0}; + memcpy(data, src.data(), src.size()); + memcpy(data + src.size(), contents_.data(), cur_size); + memcpy(contents_.data_.as_chars(), data, InlineRep::kMaxInline + 1); + contents_.set_inline_size(cur_size + src.size()); + return; + } } + contents_.PrependTree(NewTree(src.data(), src.size(), 0)); } template <typename T, Cord::EnableIfString<T>> @@ -894,7 +780,7 @@ template void Cord::Prepend(std::string&& src); static CordRep* RemovePrefixFrom(CordRep* node, size_t n) { if (n >= node->length) return nullptr; - if (n == 0) return Ref(node); + if (n == 0) return CordRep::Ref(node); absl::InlinedVector<CordRep*, kInlinedVectorSize> rhs_stack; while (node->tag == CONCAT) { @@ -912,7 +798,7 @@ static CordRep* RemovePrefixFrom(CordRep* node, size_t n) { assert(n <= node->length); if (n == 0) { - Ref(node); + CordRep::Ref(node); } else { size_t start = n; size_t len = node->length - n; @@ -921,10 +807,10 @@ static CordRep* RemovePrefixFrom(CordRep* node, size_t n) { start += node->substring()->start; node = node->substring()->child; } - node = NewSubstring(Ref(node), start, len); + node = NewSubstring(CordRep::Ref(node), start, len); } while (!rhs_stack.empty()) { - node = Concat(node, Ref(rhs_stack.back())); + node = Concat(node, CordRep::Ref(rhs_stack.back())); rhs_stack.pop_back(); } return node; @@ -935,7 +821,7 @@ static CordRep* RemovePrefixFrom(CordRep* node, size_t n) { // edited in place iff that node and all its ancestors have a refcount of 1. static CordRep* RemoveSuffixFrom(CordRep* node, size_t n) { if (n >= node->length) return nullptr; - if (n == 0) return Ref(node); + if (n == 0) return CordRep::Ref(node); absl::InlinedVector<CordRep*, kInlinedVectorSize> lhs_stack; bool inplace_ok = node->refcount.IsOne(); @@ -955,11 +841,11 @@ static CordRep* RemoveSuffixFrom(CordRep* node, size_t n) { assert(n <= node->length); if (n == 0) { - Ref(node); + CordRep::Ref(node); } else if (inplace_ok && node->tag != EXTERNAL) { // Consider making a new buffer if the current node capacity is much // larger than the new length. - Ref(node); + CordRep::Ref(node); node->length -= n; } else { size_t start = 0; @@ -968,10 +854,10 @@ static CordRep* RemoveSuffixFrom(CordRep* node, size_t n) { start = node->substring()->start; node = node->substring()->child; } - node = NewSubstring(Ref(node), start, len); + node = NewSubstring(CordRep::Ref(node), start, len); } while (!lhs_stack.empty()) { - node = Concat(Ref(lhs_stack.back()), node); + node = Concat(CordRep::Ref(lhs_stack.back()), node); lhs_stack.pop_back(); } return node; @@ -984,9 +870,11 @@ void Cord::RemovePrefix(size_t n) { CordRep* tree = contents_.tree(); if (tree == nullptr) { contents_.remove_prefix(n); + } else if (tree->tag == RING) { + contents_.replace_tree(CordRepRing::RemovePrefix(tree->ring(), n)); } else { CordRep* newrep = RemovePrefixFrom(tree, n); - Unref(tree); + CordRep::Unref(tree); contents_.replace_tree(VerifyTree(newrep)); } } @@ -998,9 +886,11 @@ void Cord::RemoveSuffix(size_t n) { CordRep* tree = contents_.tree(); if (tree == nullptr) { contents_.reduce_size(n); + } else if (tree->tag == RING) { + contents_.replace_tree(CordRepRing::RemoveSuffix(tree->ring(), n)); } else { CordRep* newrep = RemoveSuffixFrom(tree, n); - Unref(tree); + CordRep::Unref(tree); contents_.replace_tree(VerifyTree(newrep)); } } @@ -1033,13 +923,13 @@ static CordRep* NewSubRange(CordRep* node, size_t pos, size_t n) { results.pop_back(); results.push_back(Concat(left, right)); } else if (pos == 0 && n == node->length) { - results.push_back(Ref(node)); + results.push_back(CordRep::Ref(node)); } else if (node->tag != CONCAT) { if (node->tag == SUBSTRING) { pos += node->substring()->start; node = node->substring()->child; } - results.push_back(NewSubstring(Ref(node), pos, n)); + results.push_back(NewSubstring(CordRep::Ref(node), pos, n)); } else if (pos + n <= node->concat()->left->length) { todo.push_back(SubRange(node->concat()->left, pos, n)); } else if (pos >= node->concat()->left->length) { @@ -1071,7 +961,7 @@ Cord Cord::Subcord(size_t pos, size_t new_size) const { } else if (new_size <= InlineRep::kMaxInline) { Cord::ChunkIterator it = chunk_begin(); it.AdvanceBytes(pos); - char* dest = sub_cord.contents_.data_; + char* dest = sub_cord.contents_.data_.as_chars(); size_t remaining_size = new_size; while (remaining_size > it->size()) { cord_internal::SmallMemmove(dest, it->data(), it->size()); @@ -1080,7 +970,10 @@ Cord Cord::Subcord(size_t pos, size_t new_size) const { ++it; } cord_internal::SmallMemmove(dest, it->data(), remaining_size); - sub_cord.contents_.data_[InlineRep::kMaxInline] = new_size; + sub_cord.contents_.set_inline_size(new_size); + } else if (tree->tag == RING) { + tree = CordRepRing::SubRing(CordRep::Ref(tree)->ring(), pos, new_size); + sub_cord.contents_.set_tree(tree); } else { sub_cord.contents_.set_tree(NewSubRange(tree, pos, new_size)); } @@ -1117,9 +1010,9 @@ class CordForest { concat_node->left = concat_freelist_; concat_freelist_ = concat_node; } else { - Ref(concat_node->right); - Ref(concat_node->left); - Unref(concat_node); + CordRep::Ref(concat_node->right); + CordRep::Ref(concat_node->left); + CordRep::Unref(concat_node); } } else { AddNode(node); @@ -1269,20 +1162,23 @@ bool ComputeCompareResult<bool>(int memcmp_res) { // Helper routine. Locates the first flat chunk of the Cord without // initializing the iterator. inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const { - size_t n = data_[kMaxInline]; - if (n <= kMaxInline) { - return absl::string_view(data_, n); + if (!is_tree()) { + return absl::string_view(data_.as_chars(), data_.inline_size()); } CordRep* node = tree(); if (node->tag >= FLAT) { - return absl::string_view(node->data, node->length); + return absl::string_view(node->flat()->Data(), node->length); } if (node->tag == EXTERNAL) { return absl::string_view(node->external()->base, node->length); } + if (node->tag == RING) { + return node->ring()->entry_data(node->ring()->head()); + } + // Walk down the left branches until we hit a non-CONCAT node. while (node->tag == CONCAT) { node = node->concat()->left; @@ -1299,7 +1195,7 @@ inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const { } if (node->tag >= FLAT) { - return absl::string_view(node->data + offset, length); + return absl::string_view(node->flat()->Data() + offset, length); } assert((node->tag == EXTERNAL) && "Expect FLAT or EXTERNAL node here"); @@ -1482,26 +1378,22 @@ void Cord::CopyToArraySlowPath(char* dst) const { } } -Cord::ChunkIterator& Cord::ChunkIterator::operator++() { - ABSL_HARDENING_ASSERT(bytes_remaining_ > 0 && - "Attempted to iterate past `end()`"); - assert(bytes_remaining_ >= current_chunk_.size()); - bytes_remaining_ -= current_chunk_.size(); - - if (stack_of_right_children_.empty()) { +Cord::ChunkIterator& Cord::ChunkIterator::AdvanceStack() { + auto& stack_of_right_children = stack_of_right_children_; + if (stack_of_right_children.empty()) { assert(!current_chunk_.empty()); // Called on invalid iterator. // We have reached the end of the Cord. return *this; } // Process the next node on the stack. - CordRep* node = stack_of_right_children_.back(); - stack_of_right_children_.pop_back(); + CordRep* node = stack_of_right_children.back(); + stack_of_right_children.pop_back(); // Walk down the left branches until we hit a non-CONCAT node. Save the // right children to the stack for subsequent traversal. while (node->tag == CONCAT) { - stack_of_right_children_.push_back(node->concat()->right); + stack_of_right_children.push_back(node->concat()->right); node = node->concat()->left; } @@ -1516,7 +1408,7 @@ Cord::ChunkIterator& Cord::ChunkIterator::operator++() { assert(node->tag == EXTERNAL || node->tag >= FLAT); assert(length != 0); const char* data = - node->tag == EXTERNAL ? node->external()->base : node->data; + node->tag == EXTERNAL ? node->external()->base : node->flat()->Data(); current_chunk_ = absl::string_view(data + offset, length); current_leaf_ = node; return *this; @@ -1544,12 +1436,32 @@ Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) { } return subcord; } + + if (ring_reader_) { + size_t chunk_size = current_chunk_.size(); + if (n <= chunk_size && n <= kMaxBytesToCopy) { + subcord = Cord(current_chunk_.substr(0, n)); + } else { + auto* ring = CordRep::Ref(ring_reader_.ring())->ring(); + size_t offset = ring_reader_.length() - bytes_remaining_; + subcord.contents_.set_tree(CordRepRing::SubRing(ring, offset, n)); + } + if (n < chunk_size) { + bytes_remaining_ -= n; + current_chunk_.remove_prefix(n); + } else { + AdvanceBytesRing(n); + } + return subcord; + } + + auto& stack_of_right_children = stack_of_right_children_; if (n < current_chunk_.size()) { // Range to read is a proper subrange of the current chunk. assert(current_leaf_ != nullptr); - CordRep* subnode = Ref(current_leaf_); - const char* data = - subnode->tag == EXTERNAL ? subnode->external()->base : subnode->data; + CordRep* subnode = CordRep::Ref(current_leaf_); + const char* data = subnode->tag == EXTERNAL ? subnode->external()->base + : subnode->flat()->Data(); subnode = NewSubstring(subnode, current_chunk_.data() - data, n); subcord.contents_.set_tree(VerifyTree(subnode)); RemoveChunkPrefix(n); @@ -1559,10 +1471,10 @@ Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) { // Range to read begins with a proper subrange of the current chunk. assert(!current_chunk_.empty()); assert(current_leaf_ != nullptr); - CordRep* subnode = Ref(current_leaf_); + CordRep* subnode = CordRep::Ref(current_leaf_); if (current_chunk_.size() < subnode->length) { - const char* data = - subnode->tag == EXTERNAL ? subnode->external()->base : subnode->data; + const char* data = subnode->tag == EXTERNAL ? subnode->external()->base + : subnode->flat()->Data(); subnode = NewSubstring(subnode, current_chunk_.data() - data, current_chunk_.size()); } @@ -1572,20 +1484,20 @@ Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) { // Process the next node(s) on the stack, reading whole subtrees depending on // their length and how many bytes we are advancing. CordRep* node = nullptr; - while (!stack_of_right_children_.empty()) { - node = stack_of_right_children_.back(); - stack_of_right_children_.pop_back(); + while (!stack_of_right_children.empty()) { + node = stack_of_right_children.back(); + stack_of_right_children.pop_back(); if (node->length > n) break; // TODO(qrczak): This might unnecessarily recreate existing concat nodes. // Avoiding that would need pretty complicated logic (instead of - // current_leaf_, keep current_subtree_ which points to the highest node + // current_leaf, keep current_subtree_ which points to the highest node // such that the current leaf can be found on the path of left children // starting from current_subtree_; delay creating subnode while node is // below current_subtree_; find the proper node along the path of left // children starting from current_subtree_ if this loop exits while staying // below current_subtree_; etc.; alternatively, push parents instead of // right children on the stack). - subnode = Concat(subnode, Ref(node)); + subnode = Concat(subnode, CordRep::Ref(node)); n -= node->length; bytes_remaining_ -= node->length; node = nullptr; @@ -1603,11 +1515,11 @@ Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) { while (node->tag == CONCAT) { if (node->concat()->left->length > n) { // Push right, descend left. - stack_of_right_children_.push_back(node->concat()->right); + stack_of_right_children.push_back(node->concat()->right); node = node->concat()->left; } else { // Read left, descend right. - subnode = Concat(subnode, Ref(node->concat()->left)); + subnode = Concat(subnode, CordRep::Ref(node->concat()->left)); n -= node->concat()->left->length; bytes_remaining_ -= node->concat()->left->length; node = node->concat()->right; @@ -1626,9 +1538,11 @@ Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) { // chunk. assert(node->tag == EXTERNAL || node->tag >= FLAT); assert(length > n); - if (n > 0) subnode = Concat(subnode, NewSubstring(Ref(node), offset, n)); + if (n > 0) { + subnode = Concat(subnode, NewSubstring(CordRep::Ref(node), offset, n)); + } const char* data = - node->tag == EXTERNAL ? node->external()->base : node->data; + node->tag == EXTERNAL ? node->external()->base : node->flat()->Data(); current_chunk_ = absl::string_view(data + offset + n, length - n); current_leaf_ = node; bytes_remaining_ -= n; @@ -1644,12 +1558,19 @@ void Cord::ChunkIterator::AdvanceBytesSlowPath(size_t n) { n -= current_chunk_.size(); bytes_remaining_ -= current_chunk_.size(); + if (stack_of_right_children_.empty()) { + // We have reached the end of the Cord. + assert(bytes_remaining_ == 0); + return; + } + // Process the next node(s) on the stack, skipping whole subtrees depending on // their length and how many bytes we are advancing. CordRep* node = nullptr; - while (!stack_of_right_children_.empty()) { - node = stack_of_right_children_.back(); - stack_of_right_children_.pop_back(); + auto& stack_of_right_children = stack_of_right_children_; + while (!stack_of_right_children.empty()) { + node = stack_of_right_children.back(); + stack_of_right_children.pop_back(); if (node->length > n) break; n -= node->length; bytes_remaining_ -= node->length; @@ -1667,7 +1588,7 @@ void Cord::ChunkIterator::AdvanceBytesSlowPath(size_t n) { while (node->tag == CONCAT) { if (node->concat()->left->length > n) { // Push right, descend left. - stack_of_right_children_.push_back(node->concat()->right); + stack_of_right_children.push_back(node->concat()->right); node = node->concat()->left; } else { // Skip left, descend right. @@ -1688,7 +1609,7 @@ void Cord::ChunkIterator::AdvanceBytesSlowPath(size_t n) { assert(node->tag == EXTERNAL || node->tag >= FLAT); assert(length > n); const char* data = - node->tag == EXTERNAL ? node->external()->base : node->data; + node->tag == EXTERNAL ? node->external()->base : node->flat()->Data(); current_chunk_ = absl::string_view(data + offset + n, length - n); current_leaf_ = node; bytes_remaining_ -= n; @@ -1706,7 +1627,9 @@ char Cord::operator[](size_t i) const { assert(offset < rep->length); if (rep->tag >= FLAT) { // Get the "i"th character directly from the flat array. - return rep->data[offset]; + return rep->flat()->Data()[offset]; + } else if (rep->tag == RING) { + return rep->ring()->GetCharacter(offset); } else if (rep->tag == EXTERNAL) { // Get the "i"th character from the external array. return rep->external()->base[offset]; @@ -1737,9 +1660,9 @@ absl::string_view Cord::FlattenSlowPath() { // Try to put the contents into a new flat rep. If they won't fit in the // biggest possible flat node, use an external rep instead. if (total_size <= kMaxFlatLength) { - new_rep = NewFlat(total_size); + new_rep = CordRepFlat::New(total_size); new_rep->length = total_size; - new_buffer = new_rep->data; + new_buffer = new_rep->flat()->Data(); CopyToArraySlowPath(new_buffer); } else { new_buffer = std::allocator<char>().allocate(total_size); @@ -1750,7 +1673,9 @@ absl::string_view Cord::FlattenSlowPath() { s.size()); }); } - Unref(contents_.tree()); + if (CordRep* tree = contents_.tree()) { + CordRep::Unref(tree); + } contents_.set_tree(new_rep); return absl::string_view(new_buffer, total_size); } @@ -1758,7 +1683,7 @@ absl::string_view Cord::FlattenSlowPath() { /* static */ bool Cord::GetFlatAux(CordRep* rep, absl::string_view* fragment) { assert(rep != nullptr); if (rep->tag >= FLAT) { - *fragment = absl::string_view(rep->data, rep->length); + *fragment = absl::string_view(rep->flat()->Data(), rep->length); return true; } else if (rep->tag == EXTERNAL) { *fragment = absl::string_view(rep->external()->base, rep->length); @@ -1766,8 +1691,8 @@ absl::string_view Cord::FlattenSlowPath() { } else if (rep->tag == SUBSTRING) { CordRep* child = rep->substring()->child; if (child->tag >= FLAT) { - *fragment = - absl::string_view(child->data + rep->substring()->start, rep->length); + *fragment = absl::string_view( + child->flat()->Data() + rep->substring()->start, rep->length); return true; } else if (child->tag == EXTERNAL) { *fragment = absl::string_view( @@ -1781,6 +1706,15 @@ absl::string_view Cord::FlattenSlowPath() { /* static */ void Cord::ForEachChunkAux( absl::cord_internal::CordRep* rep, absl::FunctionRef<void(absl::string_view)> callback) { + if (rep->tag == RING) { + ChunkIterator it(rep), end; + while (it != end) { + callback(*it); + ++it; + } + return; + } + assert(rep != nullptr); int stack_pos = 0; constexpr int stack_max = 128; @@ -1822,9 +1756,9 @@ absl::string_view Cord::FlattenSlowPath() { } } -static void DumpNode(CordRep* rep, bool include_data, std::ostream* os) { +static void DumpNode(CordRep* rep, bool include_data, std::ostream* os, + int indent) { const int kIndentStep = 1; - int indent = 0; absl::InlinedVector<CordRep*, kInlinedVectorSize> stack; absl::InlinedVector<int, kInlinedVectorSize> indents; for (;;) { @@ -1845,17 +1779,28 @@ static void DumpNode(CordRep* rep, bool include_data, std::ostream* os) { *os << "SUBSTRING @ " << rep->substring()->start << "\n"; indent += kIndentStep; rep = rep->substring()->child; - } else { // Leaf + } else { // Leaf or ring if (rep->tag == EXTERNAL) { *os << "EXTERNAL ["; if (include_data) *os << absl::CEscape(std::string(rep->external()->base, rep->length)); *os << "]\n"; - } else { - *os << "FLAT cap=" << TagToLength(rep->tag) << " ["; + } else if (rep->tag >= FLAT) { + *os << "FLAT cap=" << rep->flat()->Capacity() + << " ["; if (include_data) - *os << absl::CEscape(std::string(rep->data, rep->length)); + *os << absl::CEscape(std::string(rep->flat()->Data(), rep->length)); *os << "]\n"; + } else { + assert(rep->tag == RING); + auto* ring = rep->ring(); + *os << "RING, entries = " << ring->entries() << "\n"; + CordRepRing::index_type head = ring->head(); + do { + DumpNode(ring->entry_child(head), include_data, os, + indent + kIndentStep); + head = ring->advance(head);; + } while (head != ring->tail()); } if (stack.empty()) break; rep = stack.back(); @@ -1900,8 +1845,9 @@ static bool VerifyNode(CordRep* root, CordRep* start_node, worklist.push_back(node->concat()->left); } } else if (node->tag >= FLAT) { - ABSL_INTERNAL_CHECK(node->length <= TagToLength(node->tag), - ReportError(root, node)); + ABSL_INTERNAL_CHECK( + node->length <= node->flat()->Capacity(), + ReportError(root, node)); } else if (node->tag == EXTERNAL) { ABSL_INTERNAL_CHECK(node->external()->base != nullptr, ReportError(root, node)); @@ -1948,6 +1894,15 @@ static bool VerifyNode(CordRep* root, CordRep* start_node, } next_node = right; } + } else if (cur_node->tag == RING) { + total_mem_usage += CordRepRing::AllocSize(cur_node->ring()->capacity()); + const CordRepRing* ring = cur_node->ring(); + CordRepRing::index_type pos = ring->head(), tail = ring->tail(); + do { + CordRep* node = ring->entry_child(pos); + assert(node->tag >= FLAT || node->tag == EXTERNAL); + RepMemoryUsageLeaf(node, &total_mem_usage); + } while ((pos = ring->advance(pos)) != tail); } else { // Since cur_node is not a leaf or a concat node it must be a substring. assert(cur_node->tag == SUBSTRING); @@ -1977,14 +1932,14 @@ std::ostream& operator<<(std::ostream& out, const Cord& cord) { } namespace strings_internal { -size_t CordTestAccess::FlatOverhead() { return kFlatOverhead; } -size_t CordTestAccess::MaxFlatLength() { return kMaxFlatLength; } +size_t CordTestAccess::FlatOverhead() { return cord_internal::kFlatOverhead; } +size_t CordTestAccess::MaxFlatLength() { return cord_internal::kMaxFlatLength; } size_t CordTestAccess::FlatTagToLength(uint8_t tag) { - return TagToLength(tag); + return cord_internal::TagToLength(tag); } uint8_t CordTestAccess::LengthToTag(size_t s) { ABSL_INTERNAL_CHECK(s <= kMaxFlatLength, absl::StrCat("Invalid length ", s)); - return AllocatedSizeToTag(s + kFlatOverhead); + return cord_internal::AllocatedSizeToTag(s + cord_internal::kFlatOverhead); } size_t CordTestAccess::SizeofCordRepConcat() { return sizeof(CordRepConcat); } size_t CordTestAccess::SizeofCordRepExternal() { diff --git a/absl/strings/cord.h b/absl/strings/cord.h index b8b251b0..fa9cb913 100644 --- a/absl/strings/cord.h +++ b/absl/strings/cord.h @@ -25,7 +25,7 @@ // // Because a Cord consists of these chunks, data can be added to or removed from // a Cord during its lifetime. Chunks may also be shared between Cords. Unlike a -// `std::string`, a Cord can therefore accomodate data that changes over its +// `std::string`, a Cord can therefore accommodate data that changes over its // lifetime, though it's not quite "mutable"; it can change only in the // attachment, detachment, or rearrangement of chunks of its constituent data. // @@ -78,7 +78,10 @@ #include "absl/functional/function_ref.h" #include "absl/meta/type_traits.h" #include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_ring.h" +#include "absl/strings/internal/cord_rep_ring_reader.h" #include "absl/strings/internal/resize_uninitialized.h" +#include "absl/strings/internal/string_constant.h" #include "absl/strings/string_view.h" #include "absl/types/optional.h" @@ -286,7 +289,7 @@ class Cord { bool StartsWith(const Cord& rhs) const; bool StartsWith(absl::string_view rhs) const; - // Cord::EndsWidth() + // Cord::EndsWith() // // Determines whether the Cord ends with the passed string data `rhs`. bool EndsWith(absl::string_view rhs) const; @@ -360,14 +363,38 @@ class Cord { friend class CharIterator; private: + using CordRep = absl::cord_internal::CordRep; + using CordRepRing = absl::cord_internal::CordRepRing; + using CordRepRingReader = absl::cord_internal::CordRepRingReader; + + // Stack of right children of concat nodes that we have to visit. + // Keep this at the end of the structure to avoid cache-thrashing. + // TODO(jgm): Benchmark to see if there's a more optimal value than 47 for + // the inlined vector size (47 exists for backward compatibility). + using Stack = absl::InlinedVector<absl::cord_internal::CordRep*, 47>; + + // Constructs a `begin()` iterator from `tree`. `tree` must not be null. + explicit ChunkIterator(cord_internal::CordRep* tree); + // Constructs a `begin()` iterator from `cord`. explicit ChunkIterator(const Cord* cord); + // Initializes this instance from a tree. Invoked by constructors. + void InitTree(cord_internal::CordRep* tree); + // Removes `n` bytes from `current_chunk_`. Expects `n` to be smaller than // `current_chunk_.size()`. void RemoveChunkPrefix(size_t n); Cord AdvanceAndReadBytes(size_t n); void AdvanceBytes(size_t n); + + // Stack specific operator++ + ChunkIterator& AdvanceStack(); + + // Ring buffer specific operator++ + ChunkIterator& AdvanceRing(); + void AdvanceBytesRing(size_t n); + // Iterates `n` bytes, where `n` is expected to be greater than or equal to // `current_chunk_.size()`. void AdvanceBytesSlowPath(size_t n); @@ -381,8 +408,12 @@ class Cord { absl::cord_internal::CordRep* current_leaf_ = nullptr; // The number of bytes left in the `Cord` over which we are iterating. size_t bytes_remaining_ = 0; - absl::InlinedVector<absl::cord_internal::CordRep*, 4> - stack_of_right_children_; + + // Cord reader for ring buffers. Empty if not traversing a ring buffer. + CordRepRingReader ring_reader_; + + // See 'Stack' alias definition. + Stack stack_of_right_children_; }; // Cord::ChunkIterator::chunk_begin() @@ -624,6 +655,14 @@ class Cord { return c.HashFragmented(std::move(hash_state)); } + // Create a Cord with the contents of StringConstant<T>::value. + // No allocations will be done and no data will be copied. + // This is an INTERNAL API and subject to change or removal. This API can only + // be used by spelling absl::strings_internal::MakeStringConstant, which is + // also an internal API. + template <typename T> + explicit constexpr Cord(strings_internal::StringConstant<T>); + private: friend class CordTestPeer; friend bool operator==(const Cord& lhs, const Cord& rhs); @@ -644,19 +683,17 @@ class Cord { // InlineRep holds either a tree pointer, or an array of kMaxInline bytes. class InlineRep { public: - static constexpr unsigned char kMaxInline = 15; + static constexpr unsigned char kMaxInline = cord_internal::kMaxInline; static_assert(kMaxInline >= sizeof(absl::cord_internal::CordRep*), ""); - // Tag byte & kMaxInline means we are storing a pointer. - static constexpr unsigned char kTreeFlag = 1 << 4; - // Tag byte & kProfiledFlag means we are profiling the Cord. - static constexpr unsigned char kProfiledFlag = 1 << 5; - constexpr InlineRep() : data_{} {} + constexpr InlineRep() : data_() {} InlineRep(const InlineRep& src); InlineRep(InlineRep&& src); InlineRep& operator=(const InlineRep& src); InlineRep& operator=(InlineRep&& src) noexcept; + explicit constexpr InlineRep(cord_internal::InlineData data); + void Swap(InlineRep* rhs); bool empty() const; size_t size() const; @@ -666,6 +703,7 @@ class Cord { char* set_data(size_t n); // Write data to the result // Returns nullptr if holding bytes absl::cord_internal::CordRep* tree() const; + absl::cord_internal::CordRep* as_tree() const; // Discards old pointer, if any void set_tree(absl::cord_internal::CordRep* rep); // Replaces a tree with a new root. This is faster than set_tree, but it @@ -684,16 +722,16 @@ class Cord { void GetAppendRegion(char** region, size_t* size, size_t max_length); void GetAppendRegion(char** region, size_t* size); bool IsSame(const InlineRep& other) const { - return memcmp(data_, other.data_, sizeof(data_)) == 0; + return memcmp(&data_, &other.data_, sizeof(data_)) == 0; } int BitwiseCompare(const InlineRep& other) const { uint64_t x, y; - // Use memcpy to avoid anti-aliasing issues. - memcpy(&x, data_, sizeof(x)); - memcpy(&y, other.data_, sizeof(y)); + // Use memcpy to avoid aliasing issues. + memcpy(&x, &data_, sizeof(x)); + memcpy(&y, &other.data_, sizeof(y)); if (x == y) { - memcpy(&x, data_ + 8, sizeof(x)); - memcpy(&y, other.data_ + 8, sizeof(y)); + memcpy(&x, reinterpret_cast<const char*>(&data_) + 8, sizeof(x)); + memcpy(&y, reinterpret_cast<const char*>(&other.data_) + 8, sizeof(y)); if (x == y) return 0; } return absl::big_endian::FromHost64(x) < absl::big_endian::FromHost64(y) @@ -706,16 +744,33 @@ class Cord { // to 15 bytes does not cause a memory allocation. absl::strings_internal::STLStringResizeUninitialized(dst, sizeof(data_) - 1); - memcpy(&(*dst)[0], data_, sizeof(data_) - 1); + memcpy(&(*dst)[0], &data_, sizeof(data_) - 1); // erase is faster than resize because the logic for memory allocation is // not needed. - dst->erase(data_[kMaxInline]); + dst->erase(inline_size()); } // Copies the inline contents into `dst`. Assumes the cord is not empty. void CopyToArray(char* dst) const; - bool is_tree() const { return data_[kMaxInline] > kMaxInline; } + bool is_tree() const { return data_.is_tree(); } + + // Returns true if the Cord is being profiled by cordz. + bool is_profiled() const { return data_.is_tree() && data_.is_profiled(); } + + // Returns the profiled CordzInfo, or nullptr if not sampled. + absl::cord_internal::CordzInfo* cordz_info() const { + return data_.cordz_info(); + } + + // Sets the profiled CordzInfo. `cordz_info` must not be null. + void set_cordz_info(cord_internal::CordzInfo* cordz_info) { + assert(cordz_info != nullptr); + data_.set_cordz_info(cordz_info); + } + + // Resets the current cordz_info to null / empty. + void clear_cordz_info() { data_.clear_cordz_info(); } private: friend class Cord; @@ -724,10 +779,12 @@ class Cord { // Unrefs the tree, stops profiling, and zeroes the contents void ClearSlow(); - // If the data has length <= kMaxInline, we store it in data_[0..len-1], - // and store the length in data_[kMaxInline]. Else we store it in a tree - // and store a pointer to that tree in data_[0..sizeof(CordRep*)-1]. - alignas(absl::cord_internal::CordRep*) char data_[kMaxInline + 1]; + void ResetToEmpty() { data_ = {}; } + + void set_inline_size(size_t size) { data_.set_inline_size(size); } + size_t inline_size() const { return data_.inline_size(); } + + cord_internal::InlineData data_; }; InlineRep contents_; @@ -878,13 +935,20 @@ Cord MakeCordFromExternal(absl::string_view data, Releaser&& releaser) { return cord; } -inline Cord::InlineRep::InlineRep(const Cord::InlineRep& src) { - cord_internal::SmallMemmove(data_, src.data_, sizeof(data_)); +constexpr Cord::InlineRep::InlineRep(cord_internal::InlineData data) + : data_(data) {} + +inline Cord::InlineRep::InlineRep(const Cord::InlineRep& src) + : data_(src.data_) { + if (is_tree()) { + data_.clear_cordz_info(); + absl::cord_internal::CordRep::Ref(as_tree()); + } } inline Cord::InlineRep::InlineRep(Cord::InlineRep&& src) { - memcpy(data_, src.data_, sizeof(data_)); - memset(src.data_, 0, sizeof(data_)); + data_ = src.data_; + src.ResetToEmpty(); } inline Cord::InlineRep& Cord::InlineRep::operator=(const Cord::InlineRep& src) { @@ -892,7 +956,7 @@ inline Cord::InlineRep& Cord::InlineRep::operator=(const Cord::InlineRep& src) { return *this; } if (!is_tree() && !src.is_tree()) { - cord_internal::SmallMemmove(data_, src.data_, sizeof(data_)); + data_ = src.data_; return *this; } AssignSlow(src); @@ -904,8 +968,8 @@ inline Cord::InlineRep& Cord::InlineRep::operator=( if (is_tree()) { ClearSlow(); } - memcpy(data_, src.data_, sizeof(data_)); - memset(src.data_, 0, sizeof(data_)); + data_ = src.data_; + src.ResetToEmpty(); return *this; } @@ -913,44 +977,43 @@ inline void Cord::InlineRep::Swap(Cord::InlineRep* rhs) { if (rhs == this) { return; } - - Cord::InlineRep tmp; - cord_internal::SmallMemmove(tmp.data_, data_, sizeof(data_)); - cord_internal::SmallMemmove(data_, rhs->data_, sizeof(data_)); - cord_internal::SmallMemmove(rhs->data_, tmp.data_, sizeof(data_)); + std::swap(data_, rhs->data_); } inline const char* Cord::InlineRep::data() const { - return is_tree() ? nullptr : data_; + return is_tree() ? nullptr : data_.as_chars(); +} + +inline absl::cord_internal::CordRep* Cord::InlineRep::as_tree() const { + assert(data_.is_tree()); + return data_.as_tree(); } inline absl::cord_internal::CordRep* Cord::InlineRep::tree() const { if (is_tree()) { - absl::cord_internal::CordRep* rep; - memcpy(&rep, data_, sizeof(rep)); - return rep; + return as_tree(); } else { return nullptr; } } -inline bool Cord::InlineRep::empty() const { return data_[kMaxInline] == 0; } +inline bool Cord::InlineRep::empty() const { return data_.is_empty(); } inline size_t Cord::InlineRep::size() const { - const char tag = data_[kMaxInline]; - if (tag <= kMaxInline) return tag; - return static_cast<size_t>(tree()->length); + return is_tree() ? as_tree()->length : inline_size(); } inline void Cord::InlineRep::set_tree(absl::cord_internal::CordRep* rep) { if (rep == nullptr) { - memset(data_, 0, sizeof(data_)); + ResetToEmpty(); } else { - bool was_tree = is_tree(); - memcpy(data_, &rep, sizeof(rep)); - memset(data_ + sizeof(rep), 0, sizeof(data_) - sizeof(rep) - 1); - if (!was_tree) { - data_[kMaxInline] = kTreeFlag; + if (data_.is_tree()) { + // `data_` already holds a 'tree' value and an optional cordz_info value. + // Replace the tree value only, leaving the cordz_info value unchanged. + data_.set_tree(rep); + } else { + // `data_` contains inlined data: initialize data_ to tree value `rep`. + data_.make_tree(rep); } } } @@ -961,34 +1024,41 @@ inline void Cord::InlineRep::replace_tree(absl::cord_internal::CordRep* rep) { set_tree(rep); return; } - memcpy(data_, &rep, sizeof(rep)); - memset(data_ + sizeof(rep), 0, sizeof(data_) - sizeof(rep) - 1); + data_.set_tree(rep); } inline absl::cord_internal::CordRep* Cord::InlineRep::clear() { - const char tag = data_[kMaxInline]; - absl::cord_internal::CordRep* result = nullptr; - if (tag > kMaxInline) { - memcpy(&result, data_, sizeof(result)); - } - memset(data_, 0, sizeof(data_)); // Clear the cord + absl::cord_internal::CordRep* result = tree(); + ResetToEmpty(); return result; } inline void Cord::InlineRep::CopyToArray(char* dst) const { assert(!is_tree()); - size_t n = data_[kMaxInline]; + size_t n = inline_size(); assert(n != 0); - cord_internal::SmallMemmove(dst, data_, n); + cord_internal::SmallMemmove(dst, data_.as_chars(), n); } constexpr inline Cord::Cord() noexcept {} +template <typename T> +constexpr Cord::Cord(strings_internal::StringConstant<T>) + : contents_(strings_internal::StringConstant<T>::value.size() <= + cord_internal::kMaxInline + ? cord_internal::InlineData( + strings_internal::StringConstant<T>::value) + : cord_internal::InlineData( + &cord_internal::ConstInitExternalStorage< + strings_internal::StringConstant<T>>::value)) {} + inline Cord& Cord::operator=(const Cord& x) { contents_ = x.contents_; return *this; } +inline Cord::Cord(const Cord& src) : contents_(src.contents_) {} + inline Cord::Cord(Cord&& src) noexcept : contents_(std::move(src.contents_)) {} inline void Cord::swap(Cord& other) noexcept { @@ -1072,17 +1142,64 @@ inline bool Cord::StartsWith(absl::string_view rhs) const { return EqualsImpl(rhs, rhs_size); } +inline void Cord::ChunkIterator::InitTree(cord_internal::CordRep* tree) { + if (tree->tag == cord_internal::RING) { + current_chunk_ = ring_reader_.Reset(tree->ring()); + return; + } + + stack_of_right_children_.push_back(tree); + operator++(); +} + +inline Cord::ChunkIterator::ChunkIterator(cord_internal::CordRep* tree) + : bytes_remaining_(tree->length) { + InitTree(tree); +} + inline Cord::ChunkIterator::ChunkIterator(const Cord* cord) : bytes_remaining_(cord->size()) { - if (cord->empty()) return; if (cord->contents_.is_tree()) { - stack_of_right_children_.push_back(cord->contents_.tree()); - operator++(); + InitTree(cord->contents_.as_tree()); } else { - current_chunk_ = absl::string_view(cord->contents_.data(), cord->size()); + current_chunk_ = + absl::string_view(cord->contents_.data(), bytes_remaining_); } } +inline Cord::ChunkIterator& Cord::ChunkIterator::AdvanceRing() { + current_chunk_ = ring_reader_.Next(); + return *this; +} + +inline void Cord::ChunkIterator::AdvanceBytesRing(size_t n) { + assert(n >= current_chunk_.size()); + bytes_remaining_ -= n; + if (bytes_remaining_) { + if (n == current_chunk_.size()) { + current_chunk_ = ring_reader_.Next(); + } else { + size_t offset = ring_reader_.length() - bytes_remaining_; + current_chunk_ = ring_reader_.Seek(offset); + } + } else { + current_chunk_ = {}; + } +} + +inline Cord::ChunkIterator& Cord::ChunkIterator::operator++() { + ABSL_HARDENING_ASSERT(bytes_remaining_ > 0 && + "Attempted to iterate past `end()`"); + assert(bytes_remaining_ >= current_chunk_.size()); + bytes_remaining_ -= current_chunk_.size(); + if (bytes_remaining_ > 0) { + return ring_reader_ ? AdvanceRing() : AdvanceStack(); + } else { + current_chunk_ = {}; + } + return *this; +} + inline Cord::ChunkIterator Cord::ChunkIterator::operator++(int) { ChunkIterator tmp(*this); operator++(); @@ -1114,10 +1231,11 @@ inline void Cord::ChunkIterator::RemoveChunkPrefix(size_t n) { } inline void Cord::ChunkIterator::AdvanceBytes(size_t n) { + assert(bytes_remaining_ >= n); if (ABSL_PREDICT_TRUE(n < current_chunk_.size())) { RemoveChunkPrefix(n); } else if (n != 0) { - AdvanceBytesSlowPath(n); + ring_reader_ ? AdvanceBytesRing(n) : AdvanceBytesSlowPath(n); } } diff --git a/absl/strings/cord_ring_reader_test.cc b/absl/strings/cord_ring_reader_test.cc new file mode 100644 index 00000000..585616f3 --- /dev/null +++ b/absl/strings/cord_ring_reader_test.cc @@ -0,0 +1,173 @@ +// Copyright 2020 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <cstdlib> +#include <ctime> +#include <memory> +#include <random> +#include <sstream> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/debugging/leak_check.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_ring.h" +#include "absl/strings/internal/cord_rep_ring_reader.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using testing::Eq; + +// Creates a flat for testing +CordRep* MakeFlat(absl::string_view s) { + CordRepFlat* flat = CordRepFlat::New(s.length()); + memcpy(flat->Data(), s.data(), s.length()); + flat->length = s.length(); + return flat; +} + +CordRepRing* FromFlats(Span<absl::string_view const> flats) { + CordRepRing* ring = CordRepRing::Create(MakeFlat(flats[0]), flats.size() - 1); + for (int i = 1; i < flats.size(); ++i) { + ring = CordRepRing::Append(ring, MakeFlat(flats[i])); + } + return ring; +} + +std::array<absl::string_view, 12> TestFlats() { + return {"abcdefghij", "klmnopqrst", "uvwxyz", "ABCDEFGHIJ", + "KLMNOPQRST", "UVWXYZ", "1234567890", "~!@#$%^&*()_", + "+-=", "[]\\{}|;':", ",/<>?", "."}; +} + +TEST(CordRingReaderTest, DefaultInstance) { + CordRepRingReader reader; + EXPECT_FALSE(static_cast<bool>(reader)); + EXPECT_THAT(reader.ring(), Eq(nullptr)); +#ifndef NDEBUG + EXPECT_DEATH_IF_SUPPORTED(reader.length(), ".*"); + EXPECT_DEATH_IF_SUPPORTED(reader.consumed(), ".*"); + EXPECT_DEATH_IF_SUPPORTED(reader.remaining(), ".*"); + EXPECT_DEATH_IF_SUPPORTED(reader.Next(), ".*"); + EXPECT_DEATH_IF_SUPPORTED(reader.Seek(0), ".*"); +#endif +} + +TEST(CordRingReaderTest, Reset) { + CordRepRingReader reader; + auto flats = TestFlats(); + CordRepRing* ring = FromFlats(flats); + + absl::string_view first = reader.Reset(ring); + EXPECT_THAT(first, Eq(flats[0])); + EXPECT_TRUE(static_cast<bool>(reader)); + EXPECT_THAT(reader.ring(), Eq(ring)); + EXPECT_THAT(reader.index(), Eq(ring->head())); + EXPECT_THAT(reader.length(), Eq(ring->length)); + EXPECT_THAT(reader.consumed(), Eq(flats[0].length())); + EXPECT_THAT(reader.remaining(), Eq(ring->length - reader.consumed())); + + reader.Reset(); + EXPECT_FALSE(static_cast<bool>(reader)); + EXPECT_THAT(reader.ring(), Eq(nullptr)); + + CordRep::Unref(ring); +} + +TEST(CordRingReaderTest, Next) { + CordRepRingReader reader; + auto flats = TestFlats(); + CordRepRing* ring = FromFlats(flats); + CordRepRing::index_type head = ring->head(); + + reader.Reset(ring); + size_t consumed = reader.consumed(); + size_t remaining = reader.remaining(); + for (int i = 1; i < flats.size(); ++i) { + consumed += flats[i].length(); + remaining -= flats[i].length(); + absl::string_view next = reader.Next(); + ASSERT_THAT(next, Eq(flats[i])); + ASSERT_THAT(reader.index(), Eq(ring->advance(head, i))); + ASSERT_THAT(reader.consumed(), Eq(consumed)); + ASSERT_THAT(reader.remaining(), Eq(remaining)); + } + +#ifndef NDEBUG + EXPECT_DEATH_IF_SUPPORTED(reader.Next(), ".*"); +#endif + + CordRep::Unref(ring); +} + +TEST(CordRingReaderTest, SeekForward) { + CordRepRingReader reader; + auto flats = TestFlats(); + CordRepRing* ring = FromFlats(flats); + CordRepRing::index_type head = ring->head(); + + reader.Reset(ring); + size_t consumed = 0; + size_t remaining = ring->length;; + for (int i = 0; i < flats.size(); ++i) { + size_t offset = consumed; + consumed += flats[i].length(); + remaining -= flats[i].length(); + for (int off = 0; off < flats[i].length(); ++off) { + absl::string_view chunk = reader.Seek(offset + off); + ASSERT_THAT(chunk, Eq(flats[i].substr(off))); + ASSERT_THAT(reader.index(), Eq(ring->advance(head, i))); + ASSERT_THAT(reader.consumed(), Eq(consumed)); + ASSERT_THAT(reader.remaining(), Eq(remaining)); + } + } + + CordRep::Unref(ring); +} + +TEST(CordRingReaderTest, SeekBackward) { + CordRepRingReader reader; + auto flats = TestFlats(); + CordRepRing* ring = FromFlats(flats); + CordRepRing::index_type head = ring->head(); + + reader.Reset(ring); + size_t consumed = ring->length; + size_t remaining = 0; + for (int i = flats.size() - 1; i >= 0; --i) { + size_t offset = consumed - flats[i].length(); + for (int off = 0; off < flats[i].length(); ++off) { + absl::string_view chunk = reader.Seek(offset + off); + ASSERT_THAT(chunk, Eq(flats[i].substr(off))); + ASSERT_THAT(reader.index(), Eq(ring->advance(head, i))); + ASSERT_THAT(reader.consumed(), Eq(consumed)); + ASSERT_THAT(reader.remaining(), Eq(remaining)); + } + consumed -= flats[i].length(); + remaining += flats[i].length(); + } +#ifndef NDEBUG + EXPECT_DEATH_IF_SUPPORTED(reader.Seek(ring->length), ".*"); +#endif + CordRep::Unref(ring); +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/cord_ring_test.cc b/absl/strings/cord_ring_test.cc new file mode 100644 index 00000000..7d75e106 --- /dev/null +++ b/absl/strings/cord_ring_test.cc @@ -0,0 +1,1572 @@ +// Copyright 2020 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <cstdlib> +#include <ctime> +#include <memory> +#include <random> +#include <sstream> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/base/macros.h" +#include "absl/debugging/leak_check.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_ring.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" + +extern thread_local bool cord_ring; + +// TOOD(b/177688959): weird things happened with the original test +#define ASAN_BUG_177688959_FIXED false + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace { + +using RandomEngine = std::mt19937_64; + +using ::absl::cord_internal::CordRep; +using ::absl::cord_internal::CordRepConcat; +using ::absl::cord_internal::CordRepExternal; +using ::absl::cord_internal::CordRepFlat; +using ::absl::cord_internal::CordRepRing; +using ::absl::cord_internal::CordRepSubstring; + +using ::absl::cord_internal::CONCAT; +using ::absl::cord_internal::EXTERNAL; +using ::absl::cord_internal::SUBSTRING; + +using testing::ElementsAre; +using testing::ElementsAreArray; +using testing::Eq; +using testing::Ge; +using testing::Le; +using testing::Lt; +using testing::Ne; +using testing::SizeIs; + +using index_type = CordRepRing::index_type; + +enum InputShareMode { kPrivate, kShared, kSharedIndirect }; + +// TestParam class used by all test fixtures. +// Not all fixtures use all possible input combinations +struct TestParam { + TestParam() = default; + explicit TestParam(InputShareMode input_share_mode) + : input_share_mode(input_share_mode) {} + + // Run the test with the 'rep under test' to be privately owned. + // Otherwise, the rep has a shared ref count of 2 or higher. + bool refcount_is_one = true; + + // Run the test with the 'rep under test' being allocated with enough capacity + // to accommodate any modifications made to it. Otherwise, the rep has zero + // extra (reserve) capacity. + bool with_capacity = true; + + // For test providing possibly shared input such as Append(.., CordpRep*), + // this field defines if that input is adopted with a refcount of one + // (privately owned / donated), or shared. For composite inputs such as + // 'substring of flat', we also have the 'shared indirect' value which means + // the top level node is not shared, but the contained child node is shared. + InputShareMode input_share_mode = kPrivate; + + std::string ToString() const { + return absl::StrCat(refcount_is_one ? "Private" : "Shared", + with_capacity ? "" : "_NoCapacity", + (input_share_mode == kPrivate) ? "" + : (input_share_mode == kShared) + ? "_SharedInput" + : "_IndirectSharedInput"); + } +}; +using TestParams = std::vector<TestParam>; + +// Matcher validating when mutable copies are required / performed. +MATCHER_P2(EqIfPrivate, param, rep, + absl::StrCat("Equal 0x", absl::Hex(rep), " if private")) { + return param.refcount_is_one ? arg == rep : arg != rep; +} + +// Matcher validating when mutable copies are required / performed. +MATCHER_P2(EqIfPrivateAndCapacity, param, rep, + absl::StrCat("Equal 0x", absl::Hex(rep), + " if private and capacity")) { + return (param.refcount_is_one && param.with_capacity) ? arg == rep + : arg != rep; +} + +MATCHER_P2(EqIfInputPrivate, param, rep, "Equal if input is private") { + return param.input_share_mode == kPrivate ? arg == rep : arg != rep; +} + +// Matcher validating the core in-variants of the CordRepRing instance. +MATCHER(IsValidRingBuffer, "RingBuffer is valid") { + std::stringstream ss; + if (!arg->IsValid(ss)) { + *result_listener << "\nERROR: " << ss.str() << "\nRING = " << *arg; + return false; + } + return true; +} + +// Returns the flats contained in the provided CordRepRing +std::vector<string_view> ToFlats(const CordRepRing* r) { + std::vector<string_view> flats; + flats.reserve(r->entries()); + index_type pos = r->head(); + do { + flats.push_back(r->entry_data(pos)); + } while ((pos = r->advance(pos)) != r->tail()); + return flats; +} + +class not_a_string_view { + public: + explicit not_a_string_view(absl::string_view s) + : data_(s.data()), size_(s.size()) {} + explicit not_a_string_view(const void* data, size_t size) + : data_(data), size_(size) {} + + not_a_string_view remove_prefix(size_t n) const { + return not_a_string_view(static_cast<const char*>(data_) + n, size_ - n); + } + + not_a_string_view remove_suffix(size_t n) const { + return not_a_string_view(data_, size_ - n); + } + + const void* data() const { return data_; } + size_t size() const { return size_; } + + private: + const void* data_; + size_t size_; +}; + +bool operator==(not_a_string_view lhs, not_a_string_view rhs) { + return lhs.data() == rhs.data() && lhs.size() == rhs.size(); +} + +std::ostream& operator<<(std::ostream& s, not_a_string_view rhs) { + return s << "{ data: " << rhs.data() << " size: " << rhs.size() << "}"; +} + +std::vector<not_a_string_view> ToRawFlats(const CordRepRing* r) { + std::vector<not_a_string_view> flats; + flats.reserve(r->entries()); + index_type pos = r->head(); + do { + flats.emplace_back(r->entry_data(pos)); + } while ((pos = r->advance(pos)) != r->tail()); + return flats; +} + +// Returns the value contained in the provided CordRepRing +std::string ToString(const CordRepRing* r) { + std::string value; + value.reserve(r->length); + index_type pos = r->head(); + do { + absl::string_view sv = r->entry_data(pos); + value.append(sv.data(), sv.size()); + } while ((pos = r->advance(pos)) != r->tail()); + return value; +} + +// Creates a flat for testing +CordRep* MakeFlat(absl::string_view s, size_t extra = 0) { + CordRepFlat* flat = CordRepFlat::New(s.length() + extra); + memcpy(flat->Data(), s.data(), s.length()); + flat->length = s.length(); + return flat; +} + +// Creates an external node for testing +CordRepExternal* MakeExternal(absl::string_view s) { + struct Rep : public CordRepExternal { + std::string s; + explicit Rep(absl::string_view s) : s(s) { + this->tag = EXTERNAL; + this->base = s.data(); + this->length = s.length(); + this->releaser_invoker = [](CordRepExternal* self) { + delete static_cast<Rep*>(self); + }; + } + }; + return new Rep(s); +} + +CordRepExternal* MakeFakeExternal(size_t length) { + struct Rep : public CordRepExternal { + std::string s; + explicit Rep(size_t len) { + this->tag = EXTERNAL; + this->base = this->storage; + this->length = len; + this->releaser_invoker = [](CordRepExternal* self) { + delete static_cast<Rep*>(self); + }; + } + }; + return new Rep(length); +} + +// Creates a flat or an external node for testing depending on the size. +CordRep* MakeLeaf(absl::string_view s, size_t extra = 0) { + if (s.size() <= absl::cord_internal::kMaxFlatLength) { + return MakeFlat(s, extra); + } else { + return MakeExternal(s); + } +} + +// Creates a substring node +CordRepSubstring* MakeSubstring(size_t start, size_t len, CordRep* rep) { + auto* sub = new CordRepSubstring; + sub->tag = SUBSTRING; + sub->start = start; + sub->length = (len <= 0) ? rep->length - start + len : len; + sub->child = rep; + return sub; +} + +// Creates a substring node removing the specified prefix +CordRepSubstring* RemovePrefix(size_t start, CordRep* rep) { + return MakeSubstring(start, rep->length - start, rep); +} + +// Creates a substring node removing the specified suffix +CordRepSubstring* RemoveSuffix(size_t length, CordRep* rep) { + return MakeSubstring(0, rep->length - length, rep); +} + +CordRepConcat* MakeConcat(CordRep* left, CordRep* right, int depth = 0) { + auto* concat = new CordRepConcat; + concat->tag = CONCAT; + concat->length = left->length + right->length; + concat->left = left; + concat->right = right; + concat->set_depth(depth); + return concat; +} + +enum Composition { kMix, kAppend, kPrepend }; + +Composition RandomComposition() { + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + return (rng() & 1) ? kMix : ((rng() & 1) ? kAppend : kPrepend); +} + +absl::string_view ToString(Composition composition) { + switch (composition) { + case kAppend: + return "Append"; + case kPrepend: + return "Prepend"; + case kMix: + return "Mix"; + } + assert(false); + return "???"; +} + +constexpr const char* kFox = "The quick brown fox jumps over the lazy dog"; +constexpr const char* kFoxFlats[] = {"The ", "quick ", "brown ", + "fox ", "jumps ", "over ", + "the ", "lazy ", "dog"}; +constexpr const char* kAlphabet = "abcdefghijklmnopqrstuvwxyz"; + +CordRepRing* FromFlats(Span<const char* const> flats, + Composition composition = kAppend) { + if (flats.empty()) return nullptr; + CordRepRing* ring = nullptr; + switch (composition) { + case kAppend: + ring = CordRepRing::Create(MakeLeaf(flats.front()), flats.size() - 1); + for (int i = 1; i < flats.size(); ++i) { + ring = CordRepRing::Append(ring, MakeLeaf(flats[i])); + } + break; + case kPrepend: + ring = CordRepRing::Create(MakeLeaf(flats.back()), flats.size() - 1); + for (int i = static_cast<int>(flats.size() - 2); i >= 0; --i) { + ring = CordRepRing::Prepend(ring, MakeLeaf(flats[i])); + } + break; + case kMix: + size_t middle1 = flats.size() / 2, middle2 = middle1; + ring = CordRepRing::Create(MakeLeaf(flats[middle1]), flats.size() - 1); + if (!flats.empty()) { + if ((flats.size() & 1) == 0) { + ring = CordRepRing::Prepend(ring, MakeLeaf(flats[--middle1])); + } + for (int i = 1; i <= middle1; ++i) { + ring = CordRepRing::Prepend(ring, MakeLeaf(flats[middle1 - i])); + ring = CordRepRing::Append(ring, MakeLeaf(flats[middle2 + i])); + } + } + break; + } + EXPECT_THAT(ToFlats(ring), ElementsAreArray(flats)); + return ring; +} + +std::ostream& operator<<(std::ostream& s, const TestParam& param) { + return s << param.ToString(); +} + +std::string TestParamToString(const testing::TestParamInfo<TestParam>& info) { + return info.param.ToString(); +} + +class CordRingTest : public testing::Test { + public: + ~CordRingTest() override { +#if ASAN_BUG_177688959_FIXED + for (CordRep* rep : unrefs_) { + CordRep::Unref(rep); + } +#endif + } + + template <typename CordRepType> + CordRepType* NeedsUnref(CordRepType* rep) { + assert(rep); +#if ASAN_BUG_177688959_FIXED + unrefs_.push_back(rep); +#endif + return rep; + } + + template <typename CordRepType> + CordRepType* Ref(CordRepType* rep) { + CordRep::Ref(rep); + return NeedsUnref(rep); + } + + void Unref(CordRep* rep) { +#if !ASAN_BUG_177688959_FIXED + CordRep::Unref(rep); +#endif + } + + private: +#if ASAN_BUG_177688959_FIXED + std::vector<CordRep*> unrefs_; +#endif +}; + +class CordRingTestWithParam : public testing::TestWithParam<TestParam> { + public: + ~CordRingTestWithParam() override { +#if ASAN_BUG_177688959_FIXED + for (CordRep* rep : unrefs_) { + CordRep::Unref(rep); + } +#endif + } + + CordRepRing* CreateWithCapacity(CordRep* child, size_t extra_capacity) { + if (!GetParam().with_capacity) extra_capacity = 0; + CordRepRing* ring = CordRepRing::Create(child, extra_capacity); + ring->SetCapacityForTesting(1 + extra_capacity); + return RefIfShared(ring); + } + + bool Shared() const { return !GetParam().refcount_is_one; } + bool InputShared() const { return GetParam().input_share_mode == kShared; } + bool InputSharedIndirect() const { + return GetParam().input_share_mode == kSharedIndirect; + } + + template <typename CordRepType> + CordRepType* NeedsUnref(CordRepType* rep) { + assert(rep); +#if ASAN_BUG_177688959_FIXED + unrefs_.push_back(rep); +#endif + return rep; + } + + template <typename CordRepType> + CordRepType* Ref(CordRepType* rep) { + CordRep::Ref(rep); + return NeedsUnref(rep); + } + + void Unref(CordRep* rep) { +#if !ASAN_BUG_177688959_FIXED + CordRep::Unref(rep); +#endif + } + + template <typename CordRepType> + CordRepType* RefIfShared(CordRepType* rep) { + return Shared() ? Ref(rep) : rep; + } + + void UnrefIfShared(CordRep* rep) { + if (Shared()) Unref(rep); + } + + template <typename CordRepType> + CordRepType* RefIfInputShared(CordRepType* rep) { + return InputShared() ? Ref(rep) : rep; + } + + void UnrefIfInputShared(CordRep* rep) { + if (InputShared()) Unref(rep); + } + + template <typename CordRepType> + CordRepType* RefIfInputSharedIndirect(CordRepType* rep) { + return InputSharedIndirect() ? Ref(rep) : rep; + } + + void UnrefIfInputSharedIndirect(CordRep* rep) { + if (InputSharedIndirect()) Unref(rep); + } + + private: +#if ASAN_BUG_177688959_FIXED + std::vector<CordRep*> unrefs_; +#endif +}; + +class CordRingCreateTest : public CordRingTestWithParam { + public: + static TestParams CreateTestParams() { + TestParams params; + params.emplace_back(InputShareMode::kPrivate); + params.emplace_back(InputShareMode::kShared); + return params; + } +}; + +class CordRingSubTest : public CordRingTestWithParam { + public: + static TestParams CreateTestParams() { + TestParams params; + for (bool refcount_is_one : {true, false}) { + TestParam param; + param.refcount_is_one = refcount_is_one; + params.push_back(param); + } + return params; + } +}; + +class CordRingBuildTest : public CordRingTestWithParam { + public: + static TestParams CreateTestParams() { + TestParams params; + for (bool refcount_is_one : {true, false}) { + for (bool with_capacity : {true, false}) { + TestParam param; + param.refcount_is_one = refcount_is_one; + param.with_capacity = with_capacity; + params.push_back(param); + } + } + return params; + } +}; + +class CordRingCreateFromTreeTest : public CordRingTestWithParam { + public: + static TestParams CreateTestParams() { + TestParams params; + params.emplace_back(InputShareMode::kPrivate); + params.emplace_back(InputShareMode::kShared); + params.emplace_back(InputShareMode::kSharedIndirect); + return params; + } +}; + +class CordRingBuildInputTest : public CordRingTestWithParam { + public: + static TestParams CreateTestParams() { + TestParams params; + for (bool refcount_is_one : {true, false}) { + for (bool with_capacity : {true, false}) { + for (InputShareMode share_mode : {kPrivate, kShared, kSharedIndirect}) { + TestParam param; + param.refcount_is_one = refcount_is_one; + param.with_capacity = with_capacity; + param.input_share_mode = share_mode; + params.push_back(param); + } + } + } + return params; + } +}; + +INSTANTIATE_TEST_CASE_P(WithParam, CordRingSubTest, + testing::ValuesIn(CordRingSubTest::CreateTestParams()), + TestParamToString); + +INSTANTIATE_TEST_CASE_P( + WithParam, CordRingCreateTest, + testing::ValuesIn(CordRingCreateTest::CreateTestParams()), + TestParamToString); + +INSTANTIATE_TEST_CASE_P( + WithParam, CordRingCreateFromTreeTest, + testing::ValuesIn(CordRingCreateFromTreeTest::CreateTestParams()), + TestParamToString); + +INSTANTIATE_TEST_CASE_P( + WithParam, CordRingBuildTest, + testing::ValuesIn(CordRingBuildTest::CreateTestParams()), + TestParamToString); + +INSTANTIATE_TEST_CASE_P( + WithParam, CordRingBuildInputTest, + testing::ValuesIn(CordRingBuildInputTest::CreateTestParams()), + TestParamToString); + +TEST_P(CordRingCreateTest, CreateFromFlat) { + absl::string_view str1 = "abcdefghijklmnopqrstuvwxyz"; + CordRepRing* result = NeedsUnref(CordRepRing::Create(MakeFlat(str1))); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result->length, Eq(str1.size())); + EXPECT_THAT(ToFlats(result), ElementsAre(str1)); + Unref(result); +} + +TEST_P(CordRingCreateTest, CreateFromRing) { + CordRepRing* ring = RefIfShared(FromFlats(kFoxFlats)); + CordRepRing* result = NeedsUnref(CordRepRing::Create(ring)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivate(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAreArray(kFoxFlats)); + UnrefIfShared(ring); + Unref(result); +} + +TEST_P(CordRingCreateFromTreeTest, CreateFromSubstringRing) { + CordRepRing* ring = RefIfInputSharedIndirect(FromFlats(kFoxFlats)); + CordRep* sub = RefIfInputShared(MakeSubstring(2, 11, ring)); + CordRepRing* result = NeedsUnref(CordRepRing::Create(sub)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfInputPrivate(GetParam(), ring)); + EXPECT_THAT(ToString(result), string_view(kFox).substr(2, 11)); + UnrefIfInputSharedIndirect(ring); + UnrefIfInputShared(sub); + Unref(result); +} + +TEST_F(CordRingTest, CreateWithIllegalExtraCapacity) { + CordRep* flat = NeedsUnref(MakeFlat("Hello world")); +#if defined(ABSL_HAVE_EXCEPTIONS) + try { + CordRepRing::Create(flat, CordRepRing::kMaxCapacity); + GTEST_FAIL() << "expected std::length_error exception"; + } catch (const std::length_error&) { + } +#elif defined(GTEST_HAS_DEATH_TEST) + EXPECT_DEATH(CordRepRing::Create(flat, CordRepRing::kMaxCapacity), ".*"); +#endif + Unref(flat); +} + +TEST_P(CordRingCreateFromTreeTest, CreateFromSubstringOfFlat) { + absl::string_view str1 = "abcdefghijklmnopqrstuvwxyz"; + auto* flat = RefIfInputShared(MakeFlat(str1)); + auto* child = RefIfInputSharedIndirect(MakeSubstring(4, 20, flat)); + CordRepRing* result = NeedsUnref(CordRepRing::Create(child)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result->length, Eq(20)); + EXPECT_THAT(ToFlats(result), ElementsAre(str1.substr(4, 20))); + Unref(result); + UnrefIfInputShared(flat); + UnrefIfInputSharedIndirect(child); +} + +TEST_P(CordRingCreateTest, CreateFromExternal) { + absl::string_view str1 = "abcdefghijklmnopqrstuvwxyz"; + auto* child = RefIfInputShared(MakeExternal(str1)); + CordRepRing* result = NeedsUnref(CordRepRing::Create(child)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result->length, Eq(str1.size())); + EXPECT_THAT(ToFlats(result), ElementsAre(str1)); + Unref(result); + UnrefIfInputShared(child); +} + +TEST_P(CordRingCreateFromTreeTest, CreateFromSubstringOfExternal) { + absl::string_view str1 = "abcdefghijklmnopqrstuvwxyz"; + auto* external = RefIfInputShared(MakeExternal(str1)); + auto* child = RefIfInputSharedIndirect(MakeSubstring(1, 24, external)); + CordRepRing* result = NeedsUnref(CordRepRing::Create(child)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result->length, Eq(24)); + EXPECT_THAT(ToFlats(result), ElementsAre(str1.substr(1, 24))); + Unref(result); + UnrefIfInputShared(external); + UnrefIfInputSharedIndirect(child); +} + +TEST_P(CordRingCreateFromTreeTest, CreateFromSubstringOfLargeExternal) { + auto* external = RefIfInputShared(MakeFakeExternal(1 << 20)); + auto str = not_a_string_view(external->base, 1 << 20) + .remove_prefix(1 << 19) + .remove_suffix(6); + auto* child = + RefIfInputSharedIndirect(MakeSubstring(1 << 19, (1 << 19) - 6, external)); + CordRepRing* result = NeedsUnref(CordRepRing::Create(child)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result->length, Eq(str.size())); + EXPECT_THAT(ToRawFlats(result), ElementsAre(str)); + Unref(result); + UnrefIfInputShared(external); + UnrefIfInputSharedIndirect(child); +} + +TEST_P(CordRingBuildInputTest, CreateFromConcat) { + CordRep* flats[] = {MakeFlat("abcdefgh"), MakeFlat("ijklm"), + MakeFlat("nopqrstuv"), MakeFlat("wxyz")}; + auto* left = MakeConcat(RefIfInputSharedIndirect(flats[0]), flats[1]); + auto* right = MakeConcat(flats[2], RefIfInputSharedIndirect(flats[3])); + auto* concat = RefIfInputShared(MakeConcat(left, right)); + CordRepRing* result = NeedsUnref(CordRepRing::Create(concat)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result->length, Eq(26)); + EXPECT_THAT(ToString(result), Eq(kAlphabet)); + UnrefIfInputSharedIndirect(flats[0]); + UnrefIfInputSharedIndirect(flats[3]); + UnrefIfInputShared(concat); + Unref(result); +} + +TEST_P(CordRingBuildInputTest, CreateFromSubstringConcat) { + for (size_t off = 0; off < 26; ++off) { + for (size_t len = 1; len < 26 - off; ++len) { + CordRep* flats[] = {MakeFlat("abcdefgh"), MakeFlat("ijklm"), + MakeFlat("nopqrstuv"), MakeFlat("wxyz")}; + auto* left = MakeConcat(RefIfInputSharedIndirect(flats[0]), flats[1]); + auto* right = MakeConcat(flats[2], RefIfInputSharedIndirect(flats[3])); + auto* concat = MakeConcat(left, right); + auto* child = RefIfInputShared(MakeSubstring(off, len, concat)); + CordRepRing* result = NeedsUnref(CordRepRing::Create(child)); + ASSERT_THAT(result, IsValidRingBuffer()); + ASSERT_THAT(result->length, Eq(len)); + ASSERT_THAT(ToString(result), string_view(kAlphabet).substr(off, len)); + UnrefIfInputSharedIndirect(flats[0]); + UnrefIfInputSharedIndirect(flats[3]); + UnrefIfInputShared(child); + Unref(result); + } + } +} + +TEST_P(CordRingCreateTest, Properties) { + absl::string_view str1 = "abcdefghijklmnopqrstuvwxyz"; + CordRepRing* result = NeedsUnref(CordRepRing::Create(MakeFlat(str1), 120)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result->head(), Eq(0)); + EXPECT_THAT(result->tail(), Eq(1)); + EXPECT_THAT(result->capacity(), Ge(120 + 1)); + EXPECT_THAT(result->capacity(), Le(2 * 120 + 1)); + EXPECT_THAT(result->entries(), Eq(1)); + EXPECT_THAT(result->begin_pos(), Eq(0)); + Unref(result); +} + +TEST_P(CordRingCreateTest, EntryForNewFlat) { + absl::string_view str1 = "abcdefghijklmnopqrstuvwxyz"; + CordRep* child = MakeFlat(str1); + CordRepRing* result = NeedsUnref(CordRepRing::Create(child, 120)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result->entry_child(0), Eq(child)); + EXPECT_THAT(result->entry_end_pos(0), Eq(str1.length())); + EXPECT_THAT(result->entry_data_offset(0), Eq(0)); + Unref(result); +} + +TEST_P(CordRingCreateTest, EntryForNewFlatSubstring) { + absl::string_view str1 = "1234567890abcdefghijklmnopqrstuvwxyz"; + CordRep* child = MakeFlat(str1); + CordRep* substring = MakeSubstring(10, 26, child); + CordRepRing* result = NeedsUnref(CordRepRing::Create(substring, 1)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result->entry_child(0), Eq(child)); + EXPECT_THAT(result->entry_end_pos(0), Eq(26)); + EXPECT_THAT(result->entry_data_offset(0), Eq(10)); + Unref(result); +} + +TEST_P(CordRingBuildTest, AppendFlat) { + absl::string_view str1 = "abcdefghijklmnopqrstuvwxyz"; + absl::string_view str2 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + CordRepRing* ring = CreateWithCapacity(MakeExternal(str1), 1); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, MakeFlat(str2))); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result->length, Eq(str1.size() + str2.size())); + EXPECT_THAT(ToFlats(result), ElementsAre(str1, str2)); + UnrefIfShared(ring); + Unref(result); +} + +TEST_P(CordRingBuildTest, PrependFlat) { + absl::string_view str1 = "abcdefghijklmnopqrstuvwxyz"; + absl::string_view str2 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + CordRepRing* ring = CreateWithCapacity(MakeExternal(str1), 1); + CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, MakeFlat(str2))); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result->length, Eq(str1.size() + str2.size())); + EXPECT_THAT(ToFlats(result), ElementsAre(str2, str1)); + UnrefIfShared(ring); + Unref(result); +} + +TEST_P(CordRingBuildTest, AppendString) { + absl::string_view str1 = "abcdefghijklmnopqrstuvwxyz"; + absl::string_view str2 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + CordRepRing* ring = CreateWithCapacity(MakeExternal(str1), 1); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, str2)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result->length, Eq(str1.size() + str2.size())); + EXPECT_THAT(ToFlats(result), ElementsAre(str1, str2)); + UnrefIfShared(ring); + Unref(result); +} + +TEST_P(CordRingBuildTest, AppendStringHavingExtra) { + absl::string_view str1 = "1234"; + absl::string_view str2 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + CordRepRing* ring = CreateWithCapacity(MakeFlat(str1, 26), 0); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, str2)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result->length, Eq(str1.size() + str2.size())); + EXPECT_THAT(result, EqIfPrivate(GetParam(), ring)); + UnrefIfShared(ring); + Unref(result); +} + +TEST_P(CordRingBuildTest, AppendStringHavingPartialExtra) { + absl::string_view str1 = "1234"; + absl::string_view str2 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + + // Create flat with at least one extra byte. We don't expect to have sized + // alloc and capacity rounding to grant us enough to not make it partial. + auto* flat = MakeFlat(str1, 1); + size_t avail = flat->flat()->Capacity() - flat->length; + ASSERT_THAT(avail, Lt(str2.size())) << " adjust test for larger flats!"; + + // Construct the flats we do expect using all of `avail`. + absl::string_view str1a = str2.substr(0, avail); + absl::string_view str2a = str2.substr(avail); + + CordRepRing* ring = CreateWithCapacity(flat, 1); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, str2)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result->length, Eq(str1.size() + str2.size())); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + if (GetParam().refcount_is_one) { + EXPECT_THAT(ToFlats(result), ElementsAre(StrCat(str1, str1a), str2a)); + } else { + EXPECT_THAT(ToFlats(result), ElementsAre(str1, str2)); + } + UnrefIfShared(ring); + Unref(result); +} + +TEST_P(CordRingBuildTest, AppendStringHavingExtraInSubstring) { + absl::string_view str1 = "123456789_1234"; + absl::string_view str2 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + CordRep* flat = RemovePrefix(10, MakeFlat(str1, 26)); + CordRepRing* ring = CreateWithCapacity(flat, 0); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, str2)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivate(GetParam(), ring)); + EXPECT_THAT(result->length, Eq(4 + str2.size())); + if (GetParam().refcount_is_one) { + EXPECT_THAT(ToFlats(result), ElementsAre(StrCat("1234", str2))); + } else { + EXPECT_THAT(ToFlats(result), ElementsAre("1234", str2)); + } + UnrefIfShared(ring); + Unref(result); +} + +TEST_P(CordRingBuildTest, AppendStringHavingSharedExtra) { + absl::string_view str1 = "123456789_1234"; + absl::string_view str2 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + for (int shared_type = 0; shared_type < 2; ++shared_type) { + SCOPED_TRACE(absl::StrCat("Shared extra type ", shared_type)); + + // Create a flat that is shared in some way. + CordRep* flat = nullptr; + CordRep* flat1 = nullptr; + if (shared_type == 0) { + // Shared flat + flat = CordRep::Ref(MakeFlat(str1.substr(10), 100)); + } else if (shared_type == 1) { + // Shared flat inside private substring + flat1 = CordRep::Ref(MakeFlat(str1)); + flat = RemovePrefix(10, flat1); + } else { + // Private flat inside shared substring + flat = CordRep::Ref(RemovePrefix(10, MakeFlat(str1, 100))); + } + + CordRepRing* ring = CreateWithCapacity(flat, 1); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, str2)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result->length, Eq(4 + str2.size())); + EXPECT_THAT(ToFlats(result), ElementsAre("1234", str2)); + UnrefIfShared(ring); + Unref(result); + + CordRep::Unref(shared_type == 1 ? flat1 : flat); + } +} + +TEST_P(CordRingBuildTest, AppendStringWithExtra) { + absl::string_view str1 = "1234"; + absl::string_view str2 = "1234567890"; + absl::string_view str3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + CordRepRing* ring = CreateWithCapacity(MakeExternal(str1), 1); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, str2, 26)); + result = CordRepRing::Append(result, str3); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result->length, Eq(str1.size() + str2.size() + str3.size())); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAre(str1, StrCat(str2, str3))); + UnrefIfShared(ring); + Unref(result); +} + +TEST_P(CordRingBuildTest, PrependString) { + absl::string_view str1 = "abcdefghijklmnopqrstuvwxyz"; + absl::string_view str2 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + // Use external rep to avoid appending to first flat + CordRepRing* ring = CreateWithCapacity(MakeExternal(str1), 1); + CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, str2)); + ASSERT_THAT(result, IsValidRingBuffer()); + if (GetParam().with_capacity && GetParam().refcount_is_one) { + EXPECT_THAT(result, Eq(ring)); + } else { + EXPECT_THAT(result, Ne(ring)); + } + EXPECT_THAT(result->length, Eq(str1.size() + str2.size())); + EXPECT_THAT(ToFlats(result), ElementsAre(str2, str1)); + UnrefIfShared(ring); + Unref(result); +} + +TEST_P(CordRingBuildTest, PrependStringHavingExtra) { + absl::string_view str1 = "abcdefghijklmnopqrstuvwxyz1234"; + absl::string_view str2 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + CordRep* flat = RemovePrefix(26, MakeFlat(str1)); + CordRepRing* ring = CreateWithCapacity(flat, 0); + CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, str2)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivate(GetParam(), ring)); + EXPECT_THAT(result->length, Eq(4 + str2.size())); + if (GetParam().refcount_is_one) { + EXPECT_THAT(ToFlats(result), ElementsAre(StrCat(str2, "1234"))); + } else { + EXPECT_THAT(ToFlats(result), ElementsAre(str2, "1234")); + } + UnrefIfShared(ring); + Unref(result); +} + +TEST_P(CordRingBuildTest, PrependStringHavingSharedExtra) { + absl::string_view str1 = "123456789_ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + absl::string_view str2 = "abcdefghij"; + absl::string_view str1a = str1.substr(10); + for (int shared_type = 1; shared_type < 2; ++shared_type) { + SCOPED_TRACE(absl::StrCat("Shared extra type ", shared_type)); + + // Create a flat that is shared in some way. + CordRep* flat = nullptr; + CordRep* flat1 = nullptr; + if (shared_type == 1) { + // Shared flat inside private substring + flat = RemovePrefix(10, flat1 = CordRep::Ref(MakeFlat(str1))); + } else { + // Private flat inside shared substring + flat = CordRep::Ref(RemovePrefix(10, MakeFlat(str1, 100))); + } + + CordRepRing* ring = CreateWithCapacity(flat, 1); + CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, str2)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result->length, Eq(str1a.size() + str2.size())); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAre(str2, str1a)); + UnrefIfShared(ring); + Unref(result); + CordRep::Unref(shared_type == 1 ? flat1 : flat); + } +} + +TEST_P(CordRingBuildTest, PrependStringWithExtra) { + absl::string_view str1 = "1234"; + absl::string_view str2 = "1234567890"; + absl::string_view str3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + CordRepRing* ring = CreateWithCapacity(MakeExternal(str1), 1); + CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, str2, 26)); + ASSERT_THAT(result, IsValidRingBuffer()); + result = CordRepRing::Prepend(result, str3); + EXPECT_THAT(result->length, Eq(str1.size() + str2.size() + str3.size())); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAre(StrCat(str3, str2), str1)); + UnrefIfShared(ring); + Unref(result); +} + +TEST_P(CordRingBuildTest, AppendPrependStringMix) { + const auto& flats = kFoxFlats; + CordRepRing* ring = CreateWithCapacity(MakeFlat(flats[4]), 8); + CordRepRing* result = ring; + for (int i = 1; i <= 4; ++i) { + result = CordRepRing::Prepend(result, flats[4 - i]); + result = CordRepRing::Append(result, flats[4 + i]); + } + UnrefIfShared(ring); + NeedsUnref(result); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(ToString(result), kFox); + Unref(result); +} + +TEST_P(CordRingBuildTest, AppendPrependStringMixWithExtra) { + const auto& flats = kFoxFlats; + CordRepRing* ring = CreateWithCapacity(MakeFlat(flats[4], 100), 8); + CordRepRing* result = ring; + for (int i = 1; i <= 4; ++i) { + result = CordRepRing::Prepend(result, flats[4 - i], 100); + result = CordRepRing::Append(result, flats[4 + i], 100); + } + NeedsUnref(result); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + if (GetParam().refcount_is_one) { + EXPECT_THAT(ToFlats(result), + ElementsAre("The quick brown fox ", "jumps over the lazy dog")); + } else { + EXPECT_THAT(ToFlats(result), ElementsAre("The quick brown fox ", "jumps ", + "over the lazy dog")); + } + UnrefIfShared(ring); + Unref(result); +} + +TEST_P(CordRingBuildTest, AppendPrependStringMixWithPrependedExtra) { + const auto& flats = kFoxFlats; + CordRep* flat = MakeFlat(StrCat(std::string(50, '.'), flats[4]), 50); + CordRepRing* ring = CreateWithCapacity(RemovePrefix(50, flat), 0); + CordRepRing* result = ring; + for (int i = 1; i <= 4; ++i) { + result = CordRepRing::Prepend(result, flats[4 - i], 100); + result = CordRepRing::Append(result, flats[4 + i], 100); + } + result = NeedsUnref(result); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivate(GetParam(), ring)); + if (GetParam().refcount_is_one) { + EXPECT_THAT(ToFlats(result), ElementsAre(kFox)); + } else { + EXPECT_THAT(ToFlats(result), ElementsAre("The quick brown fox ", "jumps ", + "over the lazy dog")); + } + UnrefIfShared(ring); + Unref(result); +} + +TEST_P(CordRingSubTest, SubRing) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + string_view all = kFox; + for (size_t offset = 0; offset < all.size() - 1; ++offset) { + CordRepRing* ring = RefIfShared(FromFlats(flats, composition)); + CordRepRing* result = CordRepRing::SubRing(ring, offset, 0); + EXPECT_THAT(result, nullptr); + UnrefIfShared(ring); + + for (size_t len = 1; len < all.size() - offset; ++len) { + ring = RefIfShared(FromFlats(flats, composition)); + result = NeedsUnref(CordRepRing::SubRing(ring, offset, len)); + ASSERT_THAT(result, IsValidRingBuffer()); + ASSERT_THAT(result, EqIfPrivate(GetParam(), ring)); + ASSERT_THAT(ToString(result), Eq(all.substr(offset, len))); + UnrefIfShared(ring); + Unref(result); + } + } +} + +TEST_P(CordRingSubTest, SubRingFromLargeExternal) { + auto composition = RandomComposition(); + std::string large_string(1 << 20, '.'); + const char* flats[] = { + "abcdefghijklmnopqrstuvwxyz", + large_string.c_str(), + "ABCDEFGHIJKLMNOPQRSTUVWXYZ", + }; + std::string buffer = absl::StrCat(flats[0], flats[1], flats[2]); + absl::string_view all = buffer; + for (size_t offset = 0; offset < 30; ++offset) { + CordRepRing* ring = RefIfShared(FromFlats(flats, composition)); + CordRepRing* result = CordRepRing::SubRing(ring, offset, 0); + EXPECT_THAT(result, nullptr); + UnrefIfShared(ring); + + for (size_t len = all.size() - 30; len < all.size() - offset; ++len) { + ring = RefIfShared(FromFlats(flats, composition)); + result = NeedsUnref(CordRepRing::SubRing(ring, offset, len)); + ASSERT_THAT(result, IsValidRingBuffer()); + ASSERT_THAT(result, EqIfPrivate(GetParam(), ring)); + auto str = ToString(result); + ASSERT_THAT(str, SizeIs(len)); + ASSERT_THAT(str, Eq(all.substr(offset, len))); + UnrefIfShared(ring); + Unref(result); + } + } +} + +TEST_P(CordRingSubTest, RemovePrefix) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + string_view all = kFox; + CordRepRing* ring = RefIfShared(FromFlats(flats, composition)); + CordRepRing* result = CordRepRing::RemovePrefix(ring, all.size()); + EXPECT_THAT(result, nullptr); + UnrefIfShared(ring); + + for (size_t len = 1; len < all.size(); ++len) { + ring = RefIfShared(FromFlats(flats, composition)); + result = NeedsUnref(CordRepRing::RemovePrefix(ring, len)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivate(GetParam(), ring)); + EXPECT_THAT(ToString(result), Eq(all.substr(len))); + UnrefIfShared(ring); + Unref(result); + } +} + +TEST_P(CordRingSubTest, RemovePrefixFromLargeExternal) { + CordRepExternal* external1 = MakeFakeExternal(1 << 20); + CordRepExternal* external2 = MakeFakeExternal(1 << 20); + CordRepRing* ring = CordRepRing::Create(external1, 1); + ring = CordRepRing::Append(ring, external2); + CordRepRing* result = NeedsUnref(CordRepRing::RemovePrefix(ring, 1 << 16)); + EXPECT_THAT( + ToRawFlats(result), + ElementsAre( + not_a_string_view(external1->base, 1 << 20).remove_prefix(1 << 16), + not_a_string_view(external2->base, 1 << 20))); + Unref(result); +} + +TEST_P(CordRingSubTest, RemoveSuffix) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + string_view all = kFox; + CordRepRing* ring = RefIfShared(FromFlats(flats, composition)); + CordRepRing* result = CordRepRing::RemoveSuffix(ring, all.size()); + EXPECT_THAT(result, nullptr); + UnrefIfShared(ring); + + for (size_t len = 1; len < all.size(); ++len) { + ring = RefIfShared(FromFlats(flats, composition)); + result = NeedsUnref(CordRepRing::RemoveSuffix(ring, len)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivate(GetParam(), ring)); + EXPECT_THAT(ToString(result), Eq(all.substr(0, all.size() - len))); + UnrefIfShared(ring); + Unref(result); + } +} + +TEST_P(CordRingSubTest, AppendRing) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats).subspan(1); + CordRepRing* ring = CreateWithCapacity(MakeFlat(kFoxFlats[0]), flats.size()); + CordRepRing* child = FromFlats(flats, composition); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, child)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivate(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAreArray(kFoxFlats)); + UnrefIfShared(ring); + Unref(result); +} + +TEST_P(CordRingBuildInputTest, AppendRingWithFlatOffset) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + CordRepRing* ring = CreateWithCapacity(MakeFlat("Head"), flats.size()); + CordRep* child = RefIfInputSharedIndirect(FromFlats(flats, composition)); + CordRep* stripped = RemovePrefix(10, child); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, stripped)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAre("Head", "brown ", "fox ", "jumps ", + "over ", "the ", "lazy ", "dog")); + UnrefIfInputSharedIndirect(child); + UnrefIfShared(ring); + Unref(result); +} + +TEST_P(CordRingBuildInputTest, AppendRingWithBrokenOffset) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + CordRepRing* ring = CreateWithCapacity(MakeFlat("Head"), flats.size()); + CordRep* child = RefIfInputSharedIndirect(FromFlats(flats, composition)); + CordRep* stripped = RemovePrefix(21, child); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, stripped)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), + ElementsAre("Head", "umps ", "over ", "the ", "lazy ", "dog")); + UnrefIfInputSharedIndirect(child); + UnrefIfShared(ring); + Unref(result); +} + +TEST_P(CordRingBuildInputTest, AppendRingWithFlatLength) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + CordRepRing* ring = CreateWithCapacity(MakeFlat("Head"), flats.size()); + CordRep* child = RefIfInputSharedIndirect(FromFlats(flats, composition)); + CordRep* stripped = RemoveSuffix(8, child); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, stripped)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAre("Head", "The ", "quick ", "brown ", + "fox ", "jumps ", "over ", "the ")); + UnrefIfInputSharedIndirect(child); + UnrefIfShared(ring); + Unref(result); +} + +TEST_P(CordRingBuildTest, AppendRingWithBrokenFlatLength) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + CordRepRing* ring = CreateWithCapacity(MakeFlat("Head"), flats.size()); + CordRep* child = RefIfInputSharedIndirect(FromFlats(flats, composition)); + CordRep* stripped = RemoveSuffix(15, child); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, stripped)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAre("Head", "The ", "quick ", "brown ", + "fox ", "jumps ", "ov")); + UnrefIfInputSharedIndirect(child); + UnrefIfShared(ring); + Unref(result); +} + +TEST_P(CordRingBuildTest, AppendRingMiddlePiece) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + CordRepRing* ring = CreateWithCapacity(MakeFlat("Head"), flats.size()); + CordRep* child = RefIfInputSharedIndirect(FromFlats(flats, composition)); + CordRep* stripped = MakeSubstring(7, child->length - 27, child); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, stripped)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), + ElementsAre("Head", "ck ", "brown ", "fox ", "jum")); + UnrefIfInputSharedIndirect(child); + UnrefIfShared(ring); + Unref(result); +} + +TEST_P(CordRingBuildTest, AppendRingSinglePiece) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + CordRepRing* ring = CreateWithCapacity(MakeFlat("Head"), flats.size()); + CordRep* child = RefIfInputSharedIndirect(FromFlats(flats, composition)); + CordRep* stripped = RefIfInputShared(MakeSubstring(11, 3, child)); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, stripped)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAre("Head", "row")); + UnrefIfInputSharedIndirect(child); + UnrefIfInputShared(stripped); + UnrefIfShared(ring); + Unref(result); +} + +TEST_P(CordRingBuildInputTest, AppendRingSinglePieceWithPrefix) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + size_t extra_capacity = 1 + (GetParam().with_capacity ? flats.size() : 0); + CordRepRing* ring = CordRepRing::Create(MakeFlat("Head"), extra_capacity); + ring->SetCapacityForTesting(1 + extra_capacity); + ring = RefIfShared(CordRepRing::Prepend(ring, MakeFlat("Prepend"))); + assert(ring->IsValid(std::cout)); + CordRepRing* child = RefIfInputSharedIndirect(FromFlats(flats, composition)); + CordRep* stripped = RefIfInputShared(MakeSubstring(11, 3, child)); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, stripped)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAre("Prepend", "Head", "row")); + UnrefIfInputSharedIndirect(child); + UnrefIfInputShared(stripped); + UnrefIfShared(ring); + Unref(result); +} + +TEST_P(CordRingBuildInputTest, PrependRing) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto fox = MakeSpan(kFoxFlats); + auto flats = MakeSpan(fox).subspan(0, fox.size() - 1); + CordRepRing* ring = CreateWithCapacity(MakeFlat(fox.back()), flats.size()); + CordRepRing* child = RefIfInputShared(FromFlats(flats, composition)); + CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, child)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAreArray(kFoxFlats)); + UnrefIfInputShared(child); + UnrefIfShared(ring); + Unref(result); +} + +TEST_P(CordRingBuildInputTest, PrependRingWithFlatOffset) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + CordRepRing* ring = CreateWithCapacity(MakeFlat("Tail"), flats.size()); + CordRep* child = RefIfInputShared(FromFlats(flats, composition)); + CordRep* stripped = RefIfInputSharedIndirect(RemovePrefix(10, child)); + CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, stripped)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAre("brown ", "fox ", "jumps ", "over ", + "the ", "lazy ", "dog", "Tail")); + UnrefIfInputShared(child); + UnrefIfInputSharedIndirect(stripped); + UnrefIfShared(ring); + Unref(result); +} + +TEST_P(CordRingBuildInputTest, PrependRingWithBrokenOffset) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + CordRepRing* ring = CreateWithCapacity(MakeFlat("Tail"), flats.size()); + CordRep* child = RefIfInputShared(FromFlats(flats, composition)); + CordRep* stripped = RefIfInputSharedIndirect(RemovePrefix(21, child)); + CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, stripped)); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), + ElementsAre("umps ", "over ", "the ", "lazy ", "dog", "Tail")); + UnrefIfInputShared(child); + UnrefIfInputSharedIndirect(stripped); + UnrefIfShared(ring); + Unref(result); +} + +TEST_P(CordRingBuildInputTest, PrependRingWithFlatLength) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + CordRepRing* ring = CreateWithCapacity(MakeFlat("Tail"), flats.size()); + CordRep* child = RefIfInputShared(FromFlats(flats, composition)); + CordRep* stripped = RefIfInputSharedIndirect(RemoveSuffix(8, child)); + CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, stripped)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAre("The ", "quick ", "brown ", "fox ", + "jumps ", "over ", "the ", "Tail")); + UnrefIfShared(ring); + UnrefIfInputShared(child); + UnrefIfInputSharedIndirect(stripped); + Unref(result); +} + +TEST_P(CordRingBuildInputTest, PrependRingWithBrokenFlatLength) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + CordRepRing* ring = CreateWithCapacity(MakeFlat("Tail"), flats.size()); + CordRep* child = RefIfInputShared(FromFlats(flats, composition)); + CordRep* stripped = RefIfInputSharedIndirect(RemoveSuffix(15, child)); + CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, stripped)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAre("The ", "quick ", "brown ", "fox ", + "jumps ", "ov", "Tail")); + UnrefIfInputShared(child); + UnrefIfInputSharedIndirect(stripped); + UnrefIfShared(ring); + Unref(result); +} + +TEST_P(CordRingBuildInputTest, PrependRingMiddlePiece) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + CordRepRing* ring = CreateWithCapacity(MakeFlat("Tail"), flats.size()); + CordRep* child = RefIfInputShared(FromFlats(flats, composition)); + CordRep* stripped = + RefIfInputSharedIndirect(MakeSubstring(7, child->length - 27, child)); + CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, stripped)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), + ElementsAre("ck ", "brown ", "fox ", "jum", "Tail")); + UnrefIfInputShared(child); + UnrefIfInputSharedIndirect(stripped); + UnrefIfShared(ring); + Unref(result); +} + +TEST_P(CordRingBuildInputTest, PrependRingSinglePiece) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + CordRepRing* ring = CreateWithCapacity(MakeFlat("Tail"), flats.size()); + CordRep* child = RefIfInputShared(FromFlats(flats, composition)); + CordRep* stripped = RefIfInputSharedIndirect(MakeSubstring(11, 3, child)); + CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, stripped)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAre("row", "Tail")); + UnrefIfInputShared(child); + UnrefIfInputSharedIndirect(stripped); + UnrefIfShared(ring); + Unref(result); +} + +TEST_P(CordRingBuildInputTest, PrependRingSinglePieceWithPrefix) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + size_t extra_capacity = 1 + (GetParam().with_capacity ? flats.size() : 0); + CordRepRing* ring = CordRepRing::Create(MakeFlat("Tail"), extra_capacity); + ring->SetCapacityForTesting(1 + extra_capacity); + ring = RefIfShared(CordRepRing::Prepend(ring, MakeFlat("Prepend"))); + CordRep* child = RefIfInputShared(FromFlats(flats, composition)); + CordRep* stripped = RefIfInputSharedIndirect(MakeSubstring(11, 3, child)); + CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, stripped)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAre("row", "Prepend", "Tail")); + UnrefIfInputShared(child); + UnrefIfInputSharedIndirect(stripped); + UnrefIfShared(ring); + Unref(result); +} + +TEST_F(CordRingTest, Find) { + constexpr const char* flats[] = { + "abcdefghij", "klmnopqrst", "uvwxyz", "ABCDEFGHIJ", + "KLMNOPQRST", "UVWXYZ", "1234567890", "~!@#$%^&*()_", + "+-=", "[]\\{}|;':", ",/<>?", "."}; + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + CordRepRing* ring = NeedsUnref(FromFlats(flats, composition)); + std::string value = ToString(ring); + for (int i = 0; i < value.length(); ++i) { + CordRepRing::Position found = ring->Find(i); + auto data = ring->entry_data(found.index); + ASSERT_THAT(found.offset, Lt(data.length())); + ASSERT_THAT(data[found.offset], Eq(value[i])); + } + Unref(ring); +} + +TEST_F(CordRingTest, FindWithHint) { + constexpr const char* flats[] = { + "abcdefghij", "klmnopqrst", "uvwxyz", "ABCDEFGHIJ", + "KLMNOPQRST", "UVWXYZ", "1234567890", "~!@#$%^&*()_", + "+-=", "[]\\{}|;':", ",/<>?", "."}; + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + CordRepRing* ring = NeedsUnref(FromFlats(flats, composition)); + std::string value = ToString(ring); + +#if defined(GTEST_HAS_DEATH_TEST) + // Test hint beyond valid position + index_type head = ring->head(); + EXPECT_DEBUG_DEATH(ring->Find(ring->advance(head), 0), ".*"); + EXPECT_DEBUG_DEATH(ring->Find(ring->advance(head), 9), ".*"); + EXPECT_DEBUG_DEATH(ring->Find(ring->advance(head, 3), 24), ".*"); +#endif + + int flat_pos = 0; + size_t flat_offset = 0; + for (auto sflat : flats) { + string_view flat(sflat); + for (int offset = 0; offset < flat.length(); ++offset) { + for (int start = 0; start <= flat_pos; ++start) { + index_type hint = ring->advance(ring->head(), start); + CordRepRing::Position found = ring->Find(hint, flat_offset + offset); + ASSERT_THAT(found.index, Eq(ring->advance(ring->head(), flat_pos))); + ASSERT_THAT(found.offset, Eq(offset)); + } + } + ++flat_pos; + flat_offset += flat.length(); + } + Unref(ring); +} + +TEST_F(CordRingTest, FindInLargeRing) { + constexpr const char* flats[] = { + "abcdefghij", "klmnopqrst", "uvwxyz", "ABCDEFGHIJ", + "KLMNOPQRST", "UVWXYZ", "1234567890", "~!@#$%^&*()_", + "+-=", "[]\\{}|;':", ",/<>?", "."}; + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + CordRepRing* ring = FromFlats(flats, composition); + for (int i = 0; i < 13; ++i) { + ring = CordRepRing::Append(ring, FromFlats(flats, composition)); + } + NeedsUnref(ring); + std::string value = ToString(ring); + for (int i = 0; i < value.length(); ++i) { + CordRepRing::Position pos = ring->Find(i); + auto data = ring->entry_data(pos.index); + ASSERT_THAT(pos.offset, Lt(data.length())); + ASSERT_THAT(data[pos.offset], Eq(value[i])); + } + Unref(ring); +} + +TEST_F(CordRingTest, FindTail) { + constexpr const char* flats[] = { + "abcdefghij", "klmnopqrst", "uvwxyz", "ABCDEFGHIJ", + "KLMNOPQRST", "UVWXYZ", "1234567890", "~!@#$%^&*()_", + "+-=", "[]\\{}|;':", ",/<>?", "."}; + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + CordRepRing* ring = NeedsUnref(FromFlats(flats, composition)); + std::string value = ToString(ring); + + for (int i = 0; i < value.length(); ++i) { + CordRepRing::Position pos = ring->FindTail(i + 1); + auto data = ring->entry_data(ring->retreat(pos.index)); + ASSERT_THAT(pos.offset, Lt(data.length())); + ASSERT_THAT(data[data.length() - pos.offset - 1], Eq(value[i])); + } + Unref(ring); +} + +TEST_F(CordRingTest, FindTailWithHint) { + constexpr const char* flats[] = { + "abcdefghij", "klmnopqrst", "uvwxyz", "ABCDEFGHIJ", + "KLMNOPQRST", "UVWXYZ", "1234567890", "~!@#$%^&*()_", + "+-=", "[]\\{}|;':", ",/<>?", "."}; + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + CordRepRing* ring = NeedsUnref(FromFlats(flats, composition)); + std::string value = ToString(ring); + + // Test hint beyond valid position +#if defined(GTEST_HAS_DEATH_TEST) + index_type head = ring->head(); + EXPECT_DEBUG_DEATH(ring->FindTail(ring->advance(head), 1), ".*"); + EXPECT_DEBUG_DEATH(ring->FindTail(ring->advance(head), 10), ".*"); + EXPECT_DEBUG_DEATH(ring->FindTail(ring->advance(head, 3), 26), ".*"); +#endif + + for (int i = 0; i < value.length(); ++i) { + CordRepRing::Position pos = ring->FindTail(i + 1); + auto data = ring->entry_data(ring->retreat(pos.index)); + ASSERT_THAT(pos.offset, Lt(data.length())); + ASSERT_THAT(data[data.length() - pos.offset - 1], Eq(value[i])); + } + Unref(ring); +} + +TEST_F(CordRingTest, FindTailInLargeRing) { + constexpr const char* flats[] = { + "abcdefghij", "klmnopqrst", "uvwxyz", "ABCDEFGHIJ", + "KLMNOPQRST", "UVWXYZ", "1234567890", "~!@#$%^&*()_", + "+-=", "[]\\{}|;':", ",/<>?", "."}; + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + CordRepRing* ring = FromFlats(flats, composition); + for (int i = 0; i < 13; ++i) { + ring = CordRepRing::Append(ring, FromFlats(flats, composition)); + } + NeedsUnref(ring); + std::string value = ToString(ring); + for (int i = 0; i < value.length(); ++i) { + CordRepRing::Position pos = ring->FindTail(i + 1); + auto data = ring->entry_data(ring->retreat(pos.index)); + ASSERT_THAT(pos.offset, Lt(data.length())); + ASSERT_THAT(data[data.length() - pos.offset - 1], Eq(value[i])); + } + Unref(ring); +} + +TEST_F(CordRingTest, GetCharacter) { + auto flats = MakeSpan(kFoxFlats); + CordRepRing* ring = CordRepRing::Create(MakeFlat("Tail"), flats.size()); + CordRep* child = FromFlats(flats, kAppend); + CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, child)); + std::string value = ToString(result); + for (int i = 0; i < value.length(); ++i) { + ASSERT_THAT(result->GetCharacter(i), Eq(value[i])); + } + Unref(result); +} + +TEST_F(CordRingTest, GetCharacterWithSubstring) { + absl::string_view str1 = "abcdefghijklmnopqrstuvwxyz"; + auto* child = MakeSubstring(4, 20, MakeFlat(str1)); + CordRepRing* result = NeedsUnref(CordRepRing::Create(child)); + ASSERT_THAT(result, IsValidRingBuffer()); + std::string value = ToString(result); + for (int i = 0; i < value.length(); ++i) { + ASSERT_THAT(result->GetCharacter(i), Eq(value[i])); + } + Unref(result); +} + +TEST_F(CordRingTest, Dump) { + std::stringstream ss; + auto flats = MakeSpan(kFoxFlats); + CordRepRing* ring = NeedsUnref(FromFlats(flats, kPrepend)); + ss << *ring; + Unref(ring); +} + +} // namespace +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/cord_test.cc b/absl/strings/cord_test.cc index 4443c828..f9982428 100644 --- a/absl/strings/cord_test.cc +++ b/absl/strings/cord_test.cc @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include "absl/strings/cord.h" #include <algorithm> @@ -167,6 +181,12 @@ class CordTestPeer { const Cord& c, absl::FunctionRef<void(absl::string_view)> callback) { c.ForEachChunk(callback); } + + static bool IsTree(const Cord& c) { return c.contents_.is_tree(); } + + static cord_internal::CordzInfo* GetCordzInfo(const Cord& c) { + return c.contents_.cordz_info(); + } }; ABSL_NAMESPACE_END @@ -351,7 +371,7 @@ TEST(Cord, Subcord) { for (size_t end_pos : positions) { if (end_pos < pos || end_pos > a.size()) continue; absl::Cord sa = a.Subcord(pos, end_pos - pos); - EXPECT_EQ(absl::string_view(s).substr(pos, end_pos - pos), + ASSERT_EQ(absl::string_view(s).substr(pos, end_pos - pos), std::string(sa)) << a; } @@ -363,7 +383,7 @@ TEST(Cord, Subcord) { for (size_t pos = 0; pos <= sh.size(); ++pos) { for (size_t n = 0; n <= sh.size() - pos; ++n) { absl::Cord sc = c.Subcord(pos, n); - EXPECT_EQ(sh.substr(pos, n), std::string(sc)) << c; + ASSERT_EQ(sh.substr(pos, n), std::string(sc)) << c; } } @@ -373,7 +393,7 @@ TEST(Cord, Subcord) { while (sa.size() > 1) { sa = sa.Subcord(1, sa.size() - 2); ss = ss.substr(1, ss.size() - 2); - EXPECT_EQ(ss, std::string(sa)) << a; + ASSERT_EQ(ss, std::string(sa)) << a; if (HasFailure()) break; // halt cascade } @@ -1613,3 +1633,83 @@ TEST(CordDeathTest, Hardening) { EXPECT_DEATH_IF_SUPPORTED(static_cast<void>(cord.chunk_end()->empty()), ""); EXPECT_DEATH_IF_SUPPORTED(++cord.chunk_end(), ""); } + +class AfterExitCordTester { + public: + bool Set(absl::Cord* cord, absl::string_view expected) { + cord_ = cord; + expected_ = expected; + return true; + } + + ~AfterExitCordTester() { + EXPECT_EQ(*cord_, expected_); + } + private: + absl::Cord* cord_; + absl::string_view expected_; +}; + +template <typename Str> +void TestConstinitConstructor(Str) { + const auto expected = Str::value; + // Defined before `cord` to be destroyed after it. + static AfterExitCordTester exit_tester; // NOLINT + ABSL_CONST_INIT static absl::Cord cord(Str{}); // NOLINT + static bool init_exit_tester = exit_tester.Set(&cord, expected); + (void)init_exit_tester; + + EXPECT_EQ(cord, expected); + // Copy the object and test the copy, and the original. + { + absl::Cord copy = cord; + EXPECT_EQ(copy, expected); + } + // The original still works + EXPECT_EQ(cord, expected); + + // Try making adding more structure to the tree. + { + absl::Cord copy = cord; + std::string expected_copy(expected); + for (int i = 0; i < 10; ++i) { + copy.Append(cord); + absl::StrAppend(&expected_copy, expected); + EXPECT_EQ(copy, expected_copy); + } + } + + // Make sure we are using the right branch during constant evaluation. + EXPECT_EQ(absl::CordTestPeer::IsTree(cord), cord.size() >= 16); + + for (int i = 0; i < 10; ++i) { + // Make a few more Cords from the same global rep. + // This tests what happens when the refcount for it gets below 1. + EXPECT_EQ(expected, absl::Cord(Str{})); + } +} + +constexpr int SimpleStrlen(const char* p) { + return *p ? 1 + SimpleStrlen(p + 1) : 0; +} + +struct ShortView { + constexpr absl::string_view operator()() const { + return absl::string_view("SSO string", SimpleStrlen("SSO string")); + } +}; + +struct LongView { + constexpr absl::string_view operator()() const { + return absl::string_view("String that does not fit SSO.", + SimpleStrlen("String that does not fit SSO.")); + } +}; + + +TEST(Cord, ConstinitConstructor) { + TestConstinitConstructor( + absl::strings_internal::MakeStringConstant(ShortView{})); + TestConstinitConstructor( + absl::strings_internal::MakeStringConstant(LongView{})); +} diff --git a/absl/strings/escaping.cc b/absl/strings/escaping.cc index 9fceeef0..18b20b83 100644 --- a/absl/strings/escaping.cc +++ b/absl/strings/escaping.cc @@ -137,7 +137,7 @@ bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped, // Copy the escape sequence for the null character const ptrdiff_t octal_size = p + 1 - octal_start; *d++ = '\\'; - memcpy(d, octal_start, octal_size); + memmove(d, octal_start, octal_size); d += octal_size; break; } @@ -170,7 +170,7 @@ bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped, // Copy the escape sequence for the null character const ptrdiff_t hex_size = p + 1 - hex_start; *d++ = '\\'; - memcpy(d, hex_start, hex_size); + memmove(d, hex_start, hex_size); d += hex_size; break; } @@ -203,7 +203,7 @@ bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped, if ((rune == 0) && leave_nulls_escaped) { // Copy the escape sequence for the null character *d++ = '\\'; - memcpy(d, hex_start, 5); // u0000 + memmove(d, hex_start, 5); // u0000 d += 5; break; } @@ -251,7 +251,7 @@ bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped, if ((rune == 0) && leave_nulls_escaped) { // Copy the escape sequence for the null character *d++ = '\\'; - memcpy(d, hex_start, 9); // U00000000 + memmove(d, hex_start, 9); // U00000000 d += 9; break; } diff --git a/absl/strings/internal/charconv_bigint_test.cc b/absl/strings/internal/charconv_bigint_test.cc index 363bcb03..a8b99458 100644 --- a/absl/strings/internal/charconv_bigint_test.cc +++ b/absl/strings/internal/charconv_bigint_test.cc @@ -69,6 +69,61 @@ TEST(BigUnsigned, ShiftLeft) { // And we should have fully rotated all bits off by now: EXPECT_EQ(a, BigUnsigned<84>(0u)); } + { + // Bit shifting large and small numbers by large and small offsets. + // Intended to exercise bounds-checking corner on ShiftLeft() (directly + // and under asan). + + // 2**(32*84)-1 + const BigUnsigned<84> all_bits_one( + "1474444211396924248063325089479706787923460402125687709454567433186613" + "6228083464060749874845919674257665016359189106695900028098437021384227" + "3285029708032466536084583113729486015826557532750465299832071590813090" + "2011853039837649252477307070509704043541368002938784757296893793903797" + "8180292336310543540677175225040919704702800559606097685920595947397024" + "8303316808753252115729411497720357971050627997031988036134171378490368" + "6008000778741115399296162550786288457245180872759047016734959330367829" + "5235612397427686310674725251378116268607113017720538636924549612987647" + "5767411074510311386444547332882472126067840027882117834454260409440463" + "9345147252664893456053258463203120637089916304618696601333953616715125" + "2115882482473279040772264257431663818610405673876655957323083702713344" + "4201105427930770976052393421467136557055"); + const BigUnsigned<84> zero(0u); + const BigUnsigned<84> one(1u); + // in bounds shifts + for (int i = 1; i < 84*32; ++i) { + // shifting all_bits_one to the left should result in a smaller number, + // since the high bits rotate off and the low bits are replaced with + // zeroes. + BigUnsigned<84> big_shifted = all_bits_one; + big_shifted.ShiftLeft(i); + EXPECT_GT(all_bits_one, big_shifted); + // Shifting 1 to the left should instead result in a larger number. + BigUnsigned<84> small_shifted = one; + small_shifted.ShiftLeft(i); + EXPECT_LT(one, small_shifted); + } + // Shifting by zero or a negative number has no effect + for (int no_op_shift : {0, -1, -84 * 32, std::numeric_limits<int>::min()}) { + BigUnsigned<84> big_shifted = all_bits_one; + big_shifted.ShiftLeft(no_op_shift); + EXPECT_EQ(all_bits_one, big_shifted); + BigUnsigned<84> small_shifted = one; + big_shifted.ShiftLeft(no_op_shift); + EXPECT_EQ(one, small_shifted); + } + // Shifting by an amount greater than the number of bits should result in + // zero. + for (int out_of_bounds_shift : + {84 * 32, 84 * 32 + 1, std::numeric_limits<int>::max()}) { + BigUnsigned<84> big_shifted = all_bits_one; + big_shifted.ShiftLeft(out_of_bounds_shift); + EXPECT_EQ(zero, big_shifted); + BigUnsigned<84> small_shifted = one; + small_shifted.ShiftLeft(out_of_bounds_shift); + EXPECT_EQ(zero, small_shifted); + } + } } TEST(BigUnsigned, MultiplyByUint32) { diff --git a/absl/strings/internal/charconv_parse.cc b/absl/strings/internal/charconv_parse.cc index fd6d9480..8b11868c 100644 --- a/absl/strings/internal/charconv_parse.cc +++ b/absl/strings/internal/charconv_parse.cc @@ -246,8 +246,8 @@ constexpr int DigitMagnitude<16>() { // ConsumeDigits does not protect against overflow on *out; max_digits must // be chosen with respect to type T to avoid the possibility of overflow. template <int base, typename T> -std::size_t ConsumeDigits(const char* begin, const char* end, int max_digits, - T* out, bool* dropped_nonzero_digit) { +int ConsumeDigits(const char* begin, const char* end, int max_digits, T* out, + bool* dropped_nonzero_digit) { if (base == 10) { assert(max_digits <= std::numeric_limits<T>::digits10); } else if (base == 16) { @@ -282,7 +282,7 @@ std::size_t ConsumeDigits(const char* begin, const char* end, int max_digits, *dropped_nonzero_digit = true; } *out = accumulator; - return begin - original_begin; + return static_cast<int>(begin - original_begin); } // Returns true if `v` is one of the chars allowed inside parentheses following @@ -372,7 +372,7 @@ strings_internal::ParsedFloat ParseFloat(const char* begin, const char* end, int exponent_adjustment = 0; bool mantissa_is_inexact = false; - std::size_t pre_decimal_digits = ConsumeDigits<base>( + int pre_decimal_digits = ConsumeDigits<base>( begin, end, MantissaDigitsMax<base>(), &mantissa, &mantissa_is_inexact); begin += pre_decimal_digits; int digits_left; @@ -398,14 +398,14 @@ strings_internal::ParsedFloat ParseFloat(const char* begin, const char* end, while (begin < end && *begin == '0') { ++begin; } - std::size_t zeros_skipped = begin - begin_zeros; + int zeros_skipped = static_cast<int>(begin - begin_zeros); if (zeros_skipped >= DigitLimit<base>()) { // refuse to parse pathological inputs return result; } exponent_adjustment -= static_cast<int>(zeros_skipped); } - std::size_t post_decimal_digits = ConsumeDigits<base>( + int post_decimal_digits = ConsumeDigits<base>( begin, end, digits_left, &mantissa, &mantissa_is_inexact); begin += post_decimal_digits; diff --git a/absl/strings/internal/cord_internal.cc b/absl/strings/internal/cord_internal.cc new file mode 100644 index 00000000..905ffd0c --- /dev/null +++ b/absl/strings/internal/cord_internal.cc @@ -0,0 +1,83 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "absl/strings/internal/cord_internal.h" + +#include <atomic> +#include <cassert> +#include <memory> + +#include "absl/container/inlined_vector.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/internal/cord_rep_ring.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +ABSL_CONST_INIT std::atomic<bool> cord_ring_buffer_enabled( + kCordEnableRingBufferDefault); +ABSL_CONST_INIT std::atomic<bool> shallow_subcords_enabled( + kCordShallowSubcordsDefault); + +void CordRep::Destroy(CordRep* rep) { + assert(rep != nullptr); + + absl::InlinedVector<CordRep*, Constants::kInlinedVectorSize> pending; + while (true) { + assert(!rep->refcount.IsImmortal()); + if (rep->tag == CONCAT) { + CordRepConcat* rep_concat = rep->concat(); + CordRep* right = rep_concat->right; + if (!right->refcount.Decrement()) { + pending.push_back(right); + } + CordRep* left = rep_concat->left; + delete rep_concat; + rep = nullptr; + if (!left->refcount.Decrement()) { + rep = left; + continue; + } + } else if (rep->tag == RING) { + CordRepRing::Destroy(rep->ring()); + rep = nullptr; + } else if (rep->tag == EXTERNAL) { + CordRepExternal::Delete(rep); + rep = nullptr; + } else if (rep->tag == SUBSTRING) { + CordRepSubstring* rep_substring = rep->substring(); + CordRep* child = rep_substring->child; + delete rep_substring; + rep = nullptr; + if (!child->refcount.Decrement()) { + rep = child; + continue; + } + } else { + CordRepFlat::Delete(rep); + rep = nullptr; + } + + if (!pending.empty()) { + rep = pending.back(); + pending.pop_back(); + } else { + break; + } + } +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cord_internal.h b/absl/strings/internal/cord_internal.h index d456eef8..a1ba67fe 100644 --- a/absl/strings/internal/cord_internal.h +++ b/absl/strings/internal/cord_internal.h @@ -1,4 +1,4 @@ -// Copyright 2020 The Abseil Authors. +// Copyright 2021 The Abseil Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,7 +21,10 @@ #include <cstdint> #include <type_traits> +#include "absl/base/config.h" +#include "absl/base/internal/endian.h" #include "absl/base/internal/invoke.h" +#include "absl/base/optimization.h" #include "absl/container/internal/compressed_tuple.h" #include "absl/meta/type_traits.h" #include "absl/strings/string_view.h" @@ -30,17 +33,55 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace cord_internal { +class CordzInfo; + +// Default feature enable states for cord ring buffers +enum CordFeatureDefaults { + kCordEnableRingBufferDefault = false, + kCordShallowSubcordsDefault = false +}; + +extern std::atomic<bool> cord_ring_buffer_enabled; +extern std::atomic<bool> shallow_subcords_enabled; + +inline void enable_cord_ring_buffer(bool enable) { + cord_ring_buffer_enabled.store(enable, std::memory_order_relaxed); +} + +inline void enable_shallow_subcords(bool enable) { + shallow_subcords_enabled.store(enable, std::memory_order_relaxed); +} + +enum Constants { + // The inlined size to use with absl::InlinedVector. + // + // Note: The InlinedVectors in this file (and in cord.h) do not need to use + // the same value for their inlined size. The fact that they do is historical. + // It may be desirable for each to use a different inlined size optimized for + // that InlinedVector's usage. + // + // TODO(jgm): Benchmark to see if there's a more optimal value than 47 for + // the inlined vector size (47 exists for backward compatibility). + kInlinedVectorSize = 47, + + // Prefer copying blocks of at most this size, otherwise reference count. + kMaxBytesToCopy = 511 +}; + // Wraps std::atomic for reference counting. class Refcount { public: - Refcount() : count_{1} {} - ~Refcount() {} + constexpr Refcount() : count_{kRefIncrement} {} + struct Immortal {}; + explicit constexpr Refcount(Immortal) : count_(kImmortalTag) {} - // Increments the reference count by 1. Imposes no memory ordering. - inline void Increment() { count_.fetch_add(1, std::memory_order_relaxed); } + // Increments the reference count. Imposes no memory ordering. + inline void Increment() { + count_.fetch_add(kRefIncrement, std::memory_order_relaxed); + } // Asserts that the current refcount is greater than 0. If the refcount is - // greater than 1, decrements the reference count by 1. + // greater than 1, decrements the reference count. // // Returns false if there are no references outstanding; true otherwise. // Inserts barriers to ensure that state written before this method returns @@ -48,19 +89,24 @@ class Refcount { // false. inline bool Decrement() { int32_t refcount = count_.load(std::memory_order_acquire); - assert(refcount > 0); - return refcount != 1 && count_.fetch_sub(1, std::memory_order_acq_rel) != 1; + assert(refcount > 0 || refcount & kImmortalTag); + return refcount != kRefIncrement && + count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) != + kRefIncrement; } // Same as Decrement but expect that refcount is greater than 1. inline bool DecrementExpectHighRefcount() { - int32_t refcount = count_.fetch_sub(1, std::memory_order_acq_rel); - assert(refcount > 0); - return refcount != 1; + int32_t refcount = + count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel); + assert(refcount > 0 || refcount & kImmortalTag); + return refcount != kRefIncrement; } // Returns the current reference count using acquire semantics. - inline int32_t Get() const { return count_.load(std::memory_order_acquire); } + inline int32_t Get() const { + return count_.load(std::memory_order_acquire) >> kImmortalShift; + } // Returns whether the atomic integer is 1. // If the reference count is used in the conventional way, a @@ -70,9 +116,27 @@ class Refcount { // performs the memory barrier needed for the owning thread // to act on the object, knowing that it has exclusive access to the // object. - inline bool IsOne() { return count_.load(std::memory_order_acquire) == 1; } + inline bool IsOne() { + return count_.load(std::memory_order_acquire) == kRefIncrement; + } + + bool IsImmortal() const { + return (count_.load(std::memory_order_relaxed) & kImmortalTag) != 0; + } private: + // We reserve the bottom bit to tag a reference count as immortal. + // By making it `1` we ensure that we never reach `0` when adding/subtracting + // `2`, thus it never looks as if it should be destroyed. + // These are used for the StringConstant constructor where we do not increase + // the refcount at construction time (due to constinit requirements) but we + // will still decrease it at destruction time to avoid branching on Unref. + enum { + kImmortalShift = 1, + kRefIncrement = 1 << kImmortalShift, + kImmortalTag = kRefIncrement - 1 + }; + std::atomic<int32_t> count_; }; @@ -82,10 +146,33 @@ class Refcount { // functions in the base class. struct CordRepConcat; -struct CordRepSubstring; struct CordRepExternal; +struct CordRepFlat; +struct CordRepSubstring; +class CordRepRing; + +// Various representations that we allow +enum CordRepKind { + CONCAT = 0, + EXTERNAL = 1, + SUBSTRING = 2, + RING = 3, + + // We have different tags for different sized flat arrays, + // starting with FLAT, and limited to MAX_FLAT_TAG. The 224 value is based on + // the current 'size to tag' encoding of 8 / 32 bytes. If a new tag is needed + // in the future, then 'FLAT' and 'MAX_FLAT_TAG' should be adjusted as well + // as the Tag <---> Size logic so that FLAT stil represents the minimum flat + // allocation size. (32 bytes as of now). + FLAT = 4, + MAX_FLAT_TAG = 224 +}; struct CordRep { + CordRep() = default; + constexpr CordRep(Refcount::Immortal immortal, size_t l) + : length(l), refcount(immortal), tag(EXTERNAL), storage{} {} + // The following three fields have to be less than 32 bytes since // that is the smallest supported flat node size. size_t length; @@ -93,22 +180,40 @@ struct CordRep { // If tag < FLAT, it represents CordRepKind and indicates the type of node. // Otherwise, the node type is CordRepFlat and the tag is the encoded size. uint8_t tag; - char data[1]; // Starting point for flat array: MUST BE LAST FIELD of CordRep + char storage[1]; // Starting point for flat array: MUST BE LAST FIELD + inline CordRepRing* ring(); + inline const CordRepRing* ring() const; inline CordRepConcat* concat(); inline const CordRepConcat* concat() const; inline CordRepSubstring* substring(); inline const CordRepSubstring* substring() const; inline CordRepExternal* external(); inline const CordRepExternal* external() const; + inline CordRepFlat* flat(); + inline const CordRepFlat* flat() const; + + // -------------------------------------------------------------------- + // Memory management + + // Destroys the provided `rep`. + static void Destroy(CordRep* rep); + + // Increments the reference count of `rep`. + // Requires `rep` to be a non-null pointer value. + static inline CordRep* Ref(CordRep* rep); + + // Decrements the reference count of `rep`. Destroys rep if count reaches + // zero. Requires `rep` to be a non-null pointer value. + static inline void Unref(CordRep* rep); }; struct CordRepConcat : public CordRep { CordRep* left; CordRep* right; - uint8_t depth() const { return static_cast<uint8_t>(data[0]); } - void set_depth(uint8_t depth) { data[0] = static_cast<char>(depth); } + uint8_t depth() const { return static_cast<uint8_t>(storage[0]); } + void set_depth(uint8_t depth) { storage[0] = static_cast<char>(depth); } }; struct CordRepSubstring : public CordRep { @@ -124,9 +229,19 @@ using ExternalReleaserInvoker = void (*)(CordRepExternal*); // External CordReps are allocated together with a type erased releaser. The // releaser is stored in the memory directly following the CordRepExternal. struct CordRepExternal : public CordRep { + CordRepExternal() = default; + explicit constexpr CordRepExternal(absl::string_view str) + : CordRep(Refcount::Immortal{}, str.size()), + base(str.data()), + releaser_invoker(nullptr) {} + const char* base; // Pointer to function that knows how to call and destroy the releaser. ExternalReleaserInvoker releaser_invoker; + + // Deletes (releases) the external rep. + // Requires rep != nullptr and rep->tag == EXTERNAL + static void Delete(CordRep* rep); }; struct Rank1 {}; @@ -167,7 +282,262 @@ struct CordRepExternalImpl } }; +inline void CordRepExternal::Delete(CordRep* rep) { + assert(rep != nullptr && rep->tag == EXTERNAL); + auto* rep_external = static_cast<CordRepExternal*>(rep); + assert(rep_external->releaser_invoker != nullptr); + rep_external->releaser_invoker(rep_external); +} + +template <typename Str> +struct ConstInitExternalStorage { + ABSL_CONST_INIT static CordRepExternal value; +}; + +template <typename Str> +CordRepExternal ConstInitExternalStorage<Str>::value(Str::value); + +enum { + kMaxInline = 15, +}; + +constexpr char GetOrNull(absl::string_view data, size_t pos) { + return pos < data.size() ? data[pos] : '\0'; +} + +// We store cordz_info as 64 bit pointer value in big endian format. This +// guarantees that the least significant byte of cordz_info matches the last +// byte of the inline data representation in as_chars_, which holds the inlined +// size or the 'is_tree' bit. +using cordz_info_t = int64_t; + +// Assert that the `cordz_info` pointer value perfectly overlaps the last half +// of `as_chars_` and can hold a pointer value. +static_assert(sizeof(cordz_info_t) * 2 == kMaxInline + 1, ""); +static_assert(sizeof(cordz_info_t) >= sizeof(intptr_t), ""); + +// BigEndianByte() creates a big endian representation of 'value', i.e.: a big +// endian value where the last byte in the host's representation holds 'value`, +// with all other bytes being 0. +static constexpr cordz_info_t BigEndianByte(unsigned char value) { +#if defined(ABSL_IS_BIG_ENDIAN) + return value; +#else + return static_cast<cordz_info_t>(value) << ((sizeof(cordz_info_t) - 1) * 8); +#endif +} + +class InlineData { + public: + // kNullCordzInfo holds the big endian representation of intptr_t(1) + // This is the 'null' / initial value of 'cordz_info'. The null value + // is specifically big endian 1 as with 64-bit pointers, the last + // byte of cordz_info overlaps with the last byte holding the tag. + static constexpr cordz_info_t kNullCordzInfo = BigEndianByte(1); + + // kFakeCordzInfo holds a 'fake', non-null cordz-info value we use to + // emulate the previous 'kProfiled' tag logic in 'set_profiled' until + // cord code is changed to store cordz_info values in InlineData. + static constexpr cordz_info_t kFakeCordzInfo = BigEndianByte(9); + + constexpr InlineData() : as_chars_{0} {} + explicit constexpr InlineData(CordRep* rep) : as_tree_(rep) {} + explicit constexpr InlineData(absl::string_view chars) + : as_chars_{ + GetOrNull(chars, 0), GetOrNull(chars, 1), + GetOrNull(chars, 2), GetOrNull(chars, 3), + GetOrNull(chars, 4), GetOrNull(chars, 5), + GetOrNull(chars, 6), GetOrNull(chars, 7), + GetOrNull(chars, 8), GetOrNull(chars, 9), + GetOrNull(chars, 10), GetOrNull(chars, 11), + GetOrNull(chars, 12), GetOrNull(chars, 13), + GetOrNull(chars, 14), static_cast<char>((chars.size() << 1))} {} + + // Returns true if the current instance is empty. + // The 'empty value' is an inlined data value of zero length. + bool is_empty() const { return tag() == 0; } + + // Returns true if the current instance holds a tree value. + bool is_tree() const { return (tag() & 1) != 0; } + + // Returns true if the current instance holds a cordz_info value. + // Requires the current instance to hold a tree value. + bool is_profiled() const { + assert(is_tree()); + return as_tree_.cordz_info != kNullCordzInfo; + } + + // Returns the cordz_info sampling instance for this instance, or nullptr + // if the current instance is not sampled and does not have CordzInfo data. + // Requires the current instance to hold a tree value. + CordzInfo* cordz_info() const { + assert(is_tree()); + intptr_t info = + static_cast<intptr_t>(absl::big_endian::ToHost64(as_tree_.cordz_info)); + assert(info & 1); + return reinterpret_cast<CordzInfo*>(info - 1); + } + + // Sets the current cordz_info sampling instance for this instance, or nullptr + // if the current instance is not sampled and does not have CordzInfo data. + // Requires the current instance to hold a tree value. + void set_cordz_info(CordzInfo* cordz_info) { + assert(is_tree()); + intptr_t info = reinterpret_cast<intptr_t>(cordz_info) | 1; + as_tree_.cordz_info = absl::big_endian::FromHost64(info); + } + + // Resets the current cordz_info to null / empty. + void clear_cordz_info() { + assert(is_tree()); + as_tree_.cordz_info = kNullCordzInfo; + } + + // Returns a read only pointer to the character data inside this instance. + // Requires the current instance to hold inline data. + const char* as_chars() const { + assert(!is_tree()); + return as_chars_; + } + + // Returns a mutable pointer to the character data inside this instance. + // Should be used for 'write only' operations setting an inlined value. + // Applications can set the value of inlined data either before or after + // setting the inlined size, i.e., both of the below are valid: + // + // // Set inlined data and inline size + // memcpy(data_.as_chars(), data, size); + // data_.set_inline_size(size); + // + // // Set inlined size and inline data + // data_.set_inline_size(size); + // memcpy(data_.as_chars(), data, size); + // + // It's an error to read from the returned pointer without a preceding write + // if the current instance does not hold inline data, i.e.: is_tree() == true. + char* as_chars() { return as_chars_; } + + // Returns the tree value of this value. + // Requires the current instance to hold a tree value. + CordRep* as_tree() const { + assert(is_tree()); + return as_tree_.rep; + } + + // Initialize this instance to holding the tree value `rep`, + // initializing the cordz_info to null, i.e.: 'not profiled'. + void make_tree(CordRep* rep) { + as_tree_.rep = rep; + as_tree_.cordz_info = kNullCordzInfo; + } + + // Set the tree value of this instance to 'rep`. + // Requires the current instance to already hold a tree value. + // Does not affect the value of cordz_info. + void set_tree(CordRep* rep) { + assert(is_tree()); + as_tree_.rep = rep; + } + + // Returns the size of the inlined character data inside this instance. + // Requires the current instance to hold inline data. + size_t inline_size() const { + assert(!is_tree()); + return tag() >> 1; + } + + // Sets the size of the inlined character data inside this instance. + // Requires `size` to be <= kMaxInline. + // See the documentation on 'as_chars()' for more information and examples. + void set_inline_size(size_t size) { + ABSL_ASSERT(size <= kMaxInline); + tag() = static_cast<char>(size << 1); + } + + // Sets or unsets the 'is_profiled' state of this instance. + // Requires the current instance to hold a tree value. + void set_profiled(bool profiled) { + assert(is_tree()); + as_tree_.cordz_info = profiled ? kFakeCordzInfo : kNullCordzInfo; + } + + private: + // See cordz_info_t for forced alignment and size of `cordz_info` details. + struct AsTree { + explicit constexpr AsTree(absl::cord_internal::CordRep* tree) + : rep(tree), cordz_info(kNullCordzInfo) {} + // This union uses up extra space so that whether rep is 32 or 64 bits, + // cordz_info will still start at the eighth byte, and the last + // byte of cordz_info will still be the last byte of InlineData. + union { + absl::cord_internal::CordRep* rep; + cordz_info_t unused_aligner; + }; + cordz_info_t cordz_info; + }; + + char& tag() { return reinterpret_cast<char*>(this)[kMaxInline]; } + char tag() const { return reinterpret_cast<const char*>(this)[kMaxInline]; } + + // If the data has length <= kMaxInline, we store it in `as_chars_`, and + // store the size in the last char of `as_chars_` shifted left + 1. + // Else we store it in a tree and store a pointer to that tree in + // `as_tree_.rep` and store a tag in `tagged_size`. + union { + char as_chars_[kMaxInline + 1]; + AsTree as_tree_; + }; +}; + +static_assert(sizeof(InlineData) == kMaxInline + 1, ""); + +inline CordRepConcat* CordRep::concat() { + assert(tag == CONCAT); + return static_cast<CordRepConcat*>(this); +} + +inline const CordRepConcat* CordRep::concat() const { + assert(tag == CONCAT); + return static_cast<const CordRepConcat*>(this); +} + +inline CordRepSubstring* CordRep::substring() { + assert(tag == SUBSTRING); + return static_cast<CordRepSubstring*>(this); +} + +inline const CordRepSubstring* CordRep::substring() const { + assert(tag == SUBSTRING); + return static_cast<const CordRepSubstring*>(this); +} + +inline CordRepExternal* CordRep::external() { + assert(tag == EXTERNAL); + return static_cast<CordRepExternal*>(this); +} + +inline const CordRepExternal* CordRep::external() const { + assert(tag == EXTERNAL); + return static_cast<const CordRepExternal*>(this); +} + +inline CordRep* CordRep::Ref(CordRep* rep) { + assert(rep != nullptr); + rep->refcount.Increment(); + return rep; +} + +inline void CordRep::Unref(CordRep* rep) { + assert(rep != nullptr); + // Expect refcount to be 0. Avoiding the cost of an atomic decrement should + // typically outweigh the cost of an extra branch checking for ref == 1. + if (ABSL_PREDICT_FALSE(!rep->refcount.DecrementExpectHighRefcount())) { + Destroy(rep); + } +} + } // namespace cord_internal + ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_ diff --git a/absl/strings/internal/cord_rep_flat.h b/absl/strings/internal/cord_rep_flat.h new file mode 100644 index 00000000..a98aa9df --- /dev/null +++ b/absl/strings/internal/cord_rep_flat.h @@ -0,0 +1,146 @@ +// Copyright 2020 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_FLAT_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_FLAT_H_ + +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <memory> + +#include "absl/strings/internal/cord_internal.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// Note: all constants below are never ODR used and internal to cord, we define +// these as static constexpr to avoid 'in struct' definition and usage clutter. + +// Largest and smallest flat node lengths we are willing to allocate +// Flat allocation size is stored in tag, which currently can encode sizes up +// to 4K, encoded as multiple of either 8 or 32 bytes. +// If we allow for larger sizes, we need to change this to 8/64, 16/128, etc. +// kMinFlatSize is bounded by tag needing to be at least FLAT * 8 bytes, and +// ideally a 'nice' size aligning with allocation and cacheline sizes like 32. +// kMaxFlatSize is bounded by the size resulting in a computed tag no greater +// than MAX_FLAT_TAG. MAX_FLAT_TAG provides for additional 'high' tag values. +static constexpr size_t kFlatOverhead = offsetof(CordRep, storage); +static constexpr size_t kMinFlatSize = 32; +static constexpr size_t kMaxFlatSize = 4096; +static constexpr size_t kMaxFlatLength = kMaxFlatSize - kFlatOverhead; +static constexpr size_t kMinFlatLength = kMinFlatSize - kFlatOverhead; + +constexpr uint8_t AllocatedSizeToTagUnchecked(size_t size) { + return static_cast<uint8_t>((size <= 1024) ? size / 8 + : 128 + size / 32 - 1024 / 32); +} + +static_assert(kMinFlatSize / 8 >= FLAT, ""); +static_assert(AllocatedSizeToTagUnchecked(kMaxFlatSize) <= MAX_FLAT_TAG, ""); + +// Helper functions for rounded div, and rounding to exact sizes. +constexpr size_t DivUp(size_t n, size_t m) { return (n + m - 1) / m; } +constexpr size_t RoundUp(size_t n, size_t m) { return DivUp(n, m) * m; } + +// Returns the size to the nearest equal or larger value that can be +// expressed exactly as a tag value. +inline size_t RoundUpForTag(size_t size) { + return RoundUp(size, (size <= 1024) ? 8 : 32); +} + +// Converts the allocated size to a tag, rounding down if the size +// does not exactly match a 'tag expressible' size value. The result is +// undefined if the size exceeds the maximum size that can be encoded in +// a tag, i.e., if size is larger than TagToAllocatedSize(<max tag>). +inline uint8_t AllocatedSizeToTag(size_t size) { + const uint8_t tag = AllocatedSizeToTagUnchecked(size); + assert(tag <= MAX_FLAT_TAG); + return tag; +} + +// Converts the provided tag to the corresponding allocated size +constexpr size_t TagToAllocatedSize(uint8_t tag) { + return (tag <= 128) ? (tag * 8) : (1024 + (tag - 128) * 32); +} + +// Converts the provided tag to the corresponding available data length +constexpr size_t TagToLength(uint8_t tag) { + return TagToAllocatedSize(tag) - kFlatOverhead; +} + +// Enforce that kMaxFlatSize maps to a well-known exact tag value. +static_assert(TagToAllocatedSize(224) == kMaxFlatSize, "Bad tag logic"); + +struct CordRepFlat : public CordRep { + // Creates a new flat node. + static CordRepFlat* New(size_t len) { + if (len <= kMinFlatLength) { + len = kMinFlatLength; + } else if (len > kMaxFlatLength) { + len = kMaxFlatLength; + } + + // Round size up so it matches a size we can exactly express in a tag. + const size_t size = RoundUpForTag(len + kFlatOverhead); + void* const raw_rep = ::operator new(size); + CordRepFlat* rep = new (raw_rep) CordRepFlat(); + rep->tag = AllocatedSizeToTag(size); + return rep; + } + + // Deletes a CordRepFlat instance created previously through a call to New(). + // Flat CordReps are allocated and constructed with raw ::operator new and + // placement new, and must be destructed and deallocated accordingly. + static void Delete(CordRep*rep) { + assert(rep->tag >= FLAT && rep->tag <= MAX_FLAT_TAG); + +#if defined(__cpp_sized_deallocation) + size_t size = TagToAllocatedSize(rep->tag); + rep->~CordRep(); + ::operator delete(rep, size); +#else + rep->~CordRep(); + ::operator delete(rep); +#endif + } + + // Returns a pointer to the data inside this flat rep. + char* Data() { return storage; } + const char* Data() const { return storage; } + + // Returns the maximum capacity (payload size) of this instance. + size_t Capacity() const { return TagToLength(tag); } + + // Returns the allocated size (payload + overhead) of this instance. + size_t AllocatedSize() const { return TagToAllocatedSize(tag); } +}; + +// Now that CordRepFlat is defined, we can define CordRep's helper casts: +inline CordRepFlat* CordRep::flat() { + assert(tag >= FLAT && tag <= MAX_FLAT_TAG); + return reinterpret_cast<CordRepFlat*>(this); +} + +inline const CordRepFlat* CordRep::flat() const { + assert(tag >= FLAT && tag <= MAX_FLAT_TAG); + return reinterpret_cast<const CordRepFlat*>(this); +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_FLAT_H_ diff --git a/absl/strings/internal/cord_rep_ring.cc b/absl/strings/internal/cord_rep_ring.cc new file mode 100644 index 00000000..4d31d1d9 --- /dev/null +++ b/absl/strings/internal/cord_rep_ring.cc @@ -0,0 +1,897 @@ +// Copyright 2020 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "absl/strings/internal/cord_rep_ring.h" + +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iostream> +#include <limits> +#include <memory> +#include <string> + +#include "absl/base/internal/raw_logging.h" +#include "absl/base/internal/throw_delegate.h" +#include "absl/base/macros.h" +#include "absl/container/inlined_vector.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_flat.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// See https://bugs.llvm.org/show_bug.cgi?id=48477 +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wshadow" +#if __has_warning("-Wshadow-field") +#pragma clang diagnostic ignored "-Wshadow-field" +#endif +#endif + +namespace { + +using index_type = CordRepRing::index_type; + +enum class Direction { kForward, kReversed }; + +inline bool IsFlatOrExternal(CordRep* rep) { + return rep->tag >= FLAT || rep->tag == EXTERNAL; +} + +// Verifies that n + extra <= kMaxCapacity: throws std::length_error otherwise. +inline void CheckCapacity(size_t n, size_t extra) { + if (ABSL_PREDICT_FALSE(extra > CordRepRing::kMaxCapacity - n)) { + base_internal::ThrowStdLengthError("Maximum capacity exceeded"); + } +} + +// Removes a reference from `rep` only. +// Asserts that the refcount after decrement is not zero. +inline bool UnrefNeverOne(CordRep* rep) { + bool result = rep->refcount.Decrement(); + assert(result); + return result; +} + +// Creates a flat from the provided string data, allocating up to `extra` +// capacity in the returned flat depending on kMaxFlatLength limitations. +// Requires `len` to be less or equal to `kMaxFlatLength` +CordRepFlat* CreateFlat(const char* s, size_t n, size_t extra = 0) { // NOLINT + assert(n <= kMaxFlatLength); + auto* rep = CordRepFlat::New(n + extra); + rep->length = n; + memcpy(rep->Data(), s, n); + return rep; +} + +// Unrefs the provided `substring`, and returns `substring->child` +// Adds or assumes a reference on `substring->child` +CordRep* ClipSubstring(CordRepSubstring* substring) { + CordRep* child = substring->child; + if (substring->refcount.IsOne()) { + delete substring; + } else { + CordRep::Ref(child); + if (ABSL_PREDICT_FALSE(!substring->refcount.Decrement())) { + UnrefNeverOne(child); + delete substring; + } + } + return child; +} + +// Unrefs the provided `concat`, and returns `{concat->left, concat->right}` +// Adds or assumes a reference on `concat->left` and `concat->right`. +std::pair<CordRep*, CordRep*> ClipConcat(CordRepConcat* concat) { + auto result = std::make_pair(concat->left, concat->right); + if (concat->refcount.IsOne()) { + delete concat; + } else { + CordRep::Ref(result.first); + CordRep::Ref(result.second); + if (ABSL_PREDICT_FALSE(!concat->refcount.Decrement())) { + UnrefNeverOne(result.first); + UnrefNeverOne(result.second); + delete concat; + } + } + return result; +} + +// Unrefs the entries in `[head, tail)`. +// Requires all entries to be a FLAT or EXTERNAL node. +void UnrefEntries(const CordRepRing* rep, index_type head, index_type tail) { + rep->ForEach(head, tail, [rep](index_type ix) { + CordRep* child = rep->entry_child(ix); + if (!child->refcount.Decrement()) { + if (child->tag >= FLAT) { + CordRepFlat::Delete(child->flat()); + } else { + CordRepExternal::Delete(child->external()); + } + } + }); +} + +template <typename F> +void Consume(Direction direction, CordRep* rep, F&& fn) { + size_t offset = 0; + size_t length = rep->length; + struct Entry { + CordRep* rep; + size_t offset; + size_t length; + }; + absl::InlinedVector<Entry, 40> stack; + + for (;;) { + if (rep->tag >= FLAT || rep->tag == EXTERNAL || rep->tag == RING) { + fn(rep, offset, length); + if (stack.empty()) return; + + rep = stack.back().rep; + offset = stack.back().offset; + length = stack.back().length; + stack.pop_back(); + } else if (rep->tag == SUBSTRING) { + offset += rep->substring()->start; + rep = ClipSubstring(rep->substring()); + } else if (rep->tag == CONCAT) { + auto res = ClipConcat(rep->concat()); + CordRep* left = res.first; + CordRep* right = res.second; + + if (left->length <= offset) { + // Don't need left node + offset -= left->length; + CordRep::Unref(left); + rep = right; + continue; + } + + size_t length_left = left->length - offset; + if (length_left >= length) { + // Don't need right node + CordRep::Unref(right); + rep = left; + continue; + } + + // Need both nodes + size_t length_right = length - length_left; + if (direction == Direction::kReversed) { + stack.push_back({left, offset, length_left}); + rep = right; + offset = 0; + length = length_right; + } else { + stack.push_back({right, 0, length_right}); + rep = left; + length = length_left; + } + } else { + assert("Valid tag" == nullptr); + return; + } + } +} + +template <typename F> +void Consume(CordRep* rep, F&& fn) { + return Consume(Direction::kForward, rep, std::forward<F>(fn)); +} + +template <typename F> +void RConsume(CordRep* rep, F&& fn) { + return Consume(Direction::kReversed, rep, std::forward<F>(fn)); +} + +} // namespace + +std::ostream& operator<<(std::ostream& s, const CordRepRing& rep) { + // Note: 'pos' values are defined as size_t (for overflow reasons), but that + // prints really awkward for small prepended values such as -5. ssize_t is not + // portable (POSIX), so we use ptrdiff_t instead to cast to signed values. + s << " CordRepRing(" << &rep << ", length = " << rep.length + << ", head = " << rep.head_ << ", tail = " << rep.tail_ + << ", cap = " << rep.capacity_ << ", rc = " << rep.refcount.Get() + << ", begin_pos_ = " << static_cast<ptrdiff_t>(rep.begin_pos_) << ") {\n"; + CordRepRing::index_type head = rep.head(); + do { + CordRep* child = rep.entry_child(head); + s << " entry[" << head << "] length = " << rep.entry_length(head) + << ", child " << child << ", clen = " << child->length + << ", tag = " << static_cast<int>(child->tag) + << ", rc = " << child->refcount.Get() + << ", offset = " << rep.entry_data_offset(head) + << ", end_pos = " << static_cast<ptrdiff_t>(rep.entry_end_pos(head)) + << "\n"; + head = rep.advance(head); + } while (head != rep.tail()); + return s << "}\n"; +} + +void CordRepRing::AddDataOffset(index_type index, size_t n) { + entry_data_offset()[index] += static_cast<offset_type>(n); +} + +void CordRepRing::SubLength(index_type index, size_t n) { + entry_end_pos()[index] -= n; +} + +class CordRepRing::Filler { + public: + Filler(CordRepRing* rep, index_type pos) : rep_(rep), head_(pos), pos_(pos) {} + + index_type head() const { return head_; } + index_type pos() const { return pos_; } + + void Add(CordRep* child, size_t offset, pos_type end_pos) { + rep_->entry_end_pos()[pos_] = end_pos; + rep_->entry_child()[pos_] = child; + rep_->entry_data_offset()[pos_] = static_cast<offset_type>(offset); + pos_ = rep_->advance(pos_); + } + + private: + CordRepRing* rep_; + index_type head_; + index_type pos_; +}; + +constexpr size_t CordRepRing::kMaxCapacity; // NOLINT: needed for c++11 + +bool CordRepRing::IsValid(std::ostream& output) const { + if (capacity_ == 0) { + output << "capacity == 0"; + return false; + } + + if (head_ >= capacity_ || tail_ >= capacity_) { + output << "head " << head_ << " and/or tail " << tail_ << "exceed capacity " + << capacity_; + return false; + } + + const index_type back = retreat(tail_); + size_t pos_length = Distance(begin_pos_, entry_end_pos(back)); + if (pos_length != length) { + output << "length " << length << " does not match positional length " + << pos_length << " from begin_pos " << begin_pos_ << " and entry[" + << back << "].end_pos " << entry_end_pos(back); + return false; + } + + index_type head = head_; + pos_type begin_pos = begin_pos_; + do { + pos_type end_pos = entry_end_pos(head); + size_t entry_length = Distance(begin_pos, end_pos); + if (entry_length == 0) { + output << "entry[" << head << "] has an invalid length " << entry_length + << " from begin_pos " << begin_pos << " and end_pos " << end_pos; + return false; + } + + CordRep* child = entry_child(head); + if (child == nullptr) { + output << "entry[" << head << "].child == nullptr"; + return false; + } + if (child->tag < FLAT && child->tag != EXTERNAL) { + output << "entry[" << head << "].child has an invalid tag " + << static_cast<int>(child->tag); + return false; + } + + size_t offset = entry_data_offset(head); + if (offset >= child->length || entry_length > child->length - offset) { + output << "entry[" << head << "] has offset " << offset + << " and entry length " << entry_length + << " which are outside of the childs length of " << child->length; + return false; + } + + begin_pos = end_pos; + head = advance(head); + } while (head != tail_); + + return true; +} + +#ifdef EXTRA_CORD_RING_VALIDATION +CordRepRing* CordRepRing::Validate(CordRepRing* rep, const char* file, + int line) { + if (!rep->IsValid(std::cerr)) { + std::cerr << "\nERROR: CordRepRing corrupted"; + if (line) std::cerr << " at line " << line; + if (file) std::cerr << " in file " << file; + std::cerr << "\nContent = " << *rep; + abort(); + } + return rep; +} +#endif // EXTRA_CORD_RING_VALIDATION + +CordRepRing* CordRepRing::New(size_t capacity, size_t extra) { + CheckCapacity(capacity, extra); + + size_t size = AllocSize(capacity += extra); + void* mem = ::operator new(size); + auto* rep = new (mem) CordRepRing(static_cast<index_type>(capacity)); + rep->tag = RING; + rep->capacity_ = static_cast<index_type>(capacity); + rep->begin_pos_ = 0; + return rep; +} + +void CordRepRing::SetCapacityForTesting(size_t capacity) { + // Adjust for the changed layout + assert(capacity <= capacity_); + assert(head() == 0 || head() < tail()); + memmove(Layout::Partial(capacity).Pointer<1>(data_) + head(), + Layout::Partial(capacity_).Pointer<1>(data_) + head(), + entries() * sizeof(Layout::ElementType<1>)); + memmove(Layout::Partial(capacity, capacity).Pointer<2>(data_) + head(), + Layout::Partial(capacity_, capacity_).Pointer<2>(data_) + head(), + entries() * sizeof(Layout::ElementType<2>)); + capacity_ = static_cast<index_type>(capacity); +} + +void CordRepRing::Delete(CordRepRing* rep) { + assert(rep != nullptr && rep->tag == RING); +#if defined(__cpp_sized_deallocation) + size_t size = AllocSize(rep->capacity_); + rep->~CordRepRing(); + ::operator delete(rep, size); +#else + rep->~CordRepRing(); + ::operator delete(rep); +#endif +} + +void CordRepRing::Destroy(CordRepRing* rep) { + UnrefEntries(rep, rep->head(), rep->tail()); + Delete(rep); +} + +template <bool ref> +void CordRepRing::Fill(const CordRepRing* src, index_type head, + index_type tail) { + this->length = src->length; + head_ = 0; + tail_ = advance(0, src->entries(head, tail)); + begin_pos_ = src->begin_pos_; + + // TODO(mvels): there may be opportunities here for large buffers. + auto* dst_pos = entry_end_pos(); + auto* dst_child = entry_child(); + auto* dst_offset = entry_data_offset(); + src->ForEach(head, tail, [&](index_type index) { + *dst_pos++ = src->entry_end_pos(index); + CordRep* child = src->entry_child(index); + *dst_child++ = ref ? CordRep::Ref(child) : child; + *dst_offset++ = src->entry_data_offset(index); + }); +} + +CordRepRing* CordRepRing::Copy(CordRepRing* rep, index_type head, + index_type tail, size_t extra) { + CordRepRing* newrep = CordRepRing::New(rep->entries(head, tail), extra); + newrep->Fill<true>(rep, head, tail); + CordRep::Unref(rep); + return newrep; +} + +CordRepRing* CordRepRing::Mutable(CordRepRing* rep, size_t extra) { + // Get current number of entries, and check for max capacity. + size_t entries = rep->entries(); + + size_t min_extra = (std::max)(extra, rep->capacity() * 2 - entries); + if (!rep->refcount.IsOne()) { + return Copy(rep, rep->head(), rep->tail(), min_extra); + } else if (entries + extra > rep->capacity()) { + CordRepRing* newrep = CordRepRing::New(entries, min_extra); + newrep->Fill<false>(rep, rep->head(), rep->tail()); + CordRepRing::Delete(rep); + return newrep; + } else { + return rep; + } +} + +Span<char> CordRepRing::GetAppendBuffer(size_t size) { + assert(refcount.IsOne()); + index_type back = retreat(tail_); + CordRep* child = entry_child(back); + if (child->tag >= FLAT && child->refcount.IsOne()) { + size_t capacity = child->flat()->Capacity(); + pos_type end_pos = entry_end_pos(back); + size_t data_offset = entry_data_offset(back); + size_t entry_length = Distance(entry_begin_pos(back), end_pos); + size_t used = data_offset + entry_length; + if (size_t n = (std::min)(capacity - used, size)) { + child->length = data_offset + entry_length + n; + entry_end_pos()[back] = end_pos + n; + this->length += n; + return {child->flat()->Data() + used, n}; + } + } + return {nullptr, 0}; +} + +Span<char> CordRepRing::GetPrependBuffer(size_t size) { + assert(refcount.IsOne()); + CordRep* child = entry_child(head_); + size_t data_offset = entry_data_offset(head_); + if (data_offset && child->refcount.IsOne() && child->tag >= FLAT) { + size_t n = (std::min)(data_offset, size); + this->length += n; + begin_pos_ -= n; + data_offset -= n; + entry_data_offset()[head_] = static_cast<offset_type>(data_offset); + return {child->flat()->Data() + data_offset, n}; + } + return {nullptr, 0}; +} + +CordRepRing* CordRepRing::CreateFromLeaf(CordRep* child, size_t offset, + size_t length, size_t extra) { + CordRepRing* rep = CordRepRing::New(1, extra); + rep->head_ = 0; + rep->tail_ = rep->advance(0); + rep->length = length; + rep->entry_end_pos()[0] = length; + rep->entry_child()[0] = child; + rep->entry_data_offset()[0] = static_cast<offset_type>(offset); + return Validate(rep); +} + +CordRepRing* CordRepRing::CreateSlow(CordRep* child, size_t extra) { + CordRepRing* rep = nullptr; + Consume(child, [&](CordRep* child, size_t offset, size_t length) { + if (IsFlatOrExternal(child)) { + rep = rep ? AppendLeaf(rep, child, offset, length) + : CreateFromLeaf(child, offset, length, extra); + } else if (rep) { + rep = AddRing<AddMode::kAppend>(rep, child->ring(), offset, length); + } else if (offset == 0 && child->length == length) { + rep = Mutable(child->ring(), extra); + } else { + rep = SubRing(child->ring(), offset, length, extra); + } + }); + return Validate(rep, nullptr, __LINE__); +} + +CordRepRing* CordRepRing::Create(CordRep* child, size_t extra) { + size_t length = child->length; + if (IsFlatOrExternal(child)) { + return CreateFromLeaf(child, 0, length, extra); + } + if (child->tag == RING) { + return Mutable(child->ring(), extra); + } + return CreateSlow(child, extra); +} + +template <CordRepRing::AddMode mode> +CordRepRing* CordRepRing::AddRing(CordRepRing* rep, CordRepRing* ring, + size_t offset, size_t length) { + assert(offset < ring->length); + constexpr bool append = mode == AddMode::kAppend; + Position head = ring->Find(offset); + Position tail = ring->FindTail(head.index, offset + length); + const index_type entries = ring->entries(head.index, tail.index); + + rep = Mutable(rep, entries); + + // The delta for making ring[head].end_pos into 'len - offset' + const pos_type delta_length = + (append ? rep->begin_pos_ + rep->length : rep->begin_pos_ - length) - + ring->entry_begin_pos(head.index) - head.offset; + + // Start filling at `tail`, or `entries` before `head` + Filler filler(rep, append ? rep->tail_ : rep->retreat(rep->head_, entries)); + + if (ring->refcount.IsOne()) { + // Copy entries from source stealing the ref and adjusting the end position. + // Commit the filler as this is no-op. + ring->ForEach(head.index, tail.index, [&](index_type ix) { + filler.Add(ring->entry_child(ix), ring->entry_data_offset(ix), + ring->entry_end_pos(ix) + delta_length); + }); + + // Unref entries we did not copy over, and delete source. + if (head.index != ring->head_) UnrefEntries(ring, ring->head_, head.index); + if (tail.index != ring->tail_) UnrefEntries(ring, tail.index, ring->tail_); + CordRepRing::Delete(ring); + } else { + ring->ForEach(head.index, tail.index, [&](index_type ix) { + CordRep* child = ring->entry_child(ix); + filler.Add(child, ring->entry_data_offset(ix), + ring->entry_end_pos(ix) + delta_length); + CordRep::Ref(child); + }); + CordRepRing::Unref(ring); + } + + if (head.offset) { + // Increase offset of first 'source' entry appended or prepended. + // This is always the entry in `filler.head()` + rep->AddDataOffset(filler.head(), head.offset); + } + + if (tail.offset) { + // Reduce length of last 'source' entry appended or prepended. + // This is always the entry tailed by `filler.pos()` + rep->SubLength(rep->retreat(filler.pos()), tail.offset); + } + + // Commit changes + rep->length += length; + if (append) { + rep->tail_ = filler.pos(); + } else { + rep->head_ = filler.head(); + rep->begin_pos_ -= length; + } + + return Validate(rep); +} + +CordRepRing* CordRepRing::AppendSlow(CordRepRing* rep, CordRep* child) { + Consume(child, [&rep](CordRep* child, size_t offset, size_t length) { + if (child->tag == RING) { + rep = AddRing<AddMode::kAppend>(rep, child->ring(), offset, length); + } else { + rep = AppendLeaf(rep, child, offset, length); + } + }); + return rep; +} + +CordRepRing* CordRepRing::AppendLeaf(CordRepRing* rep, CordRep* child, + size_t offset, size_t length) { + rep = Mutable(rep, 1); + index_type back = rep->tail_; + const pos_type begin_pos = rep->begin_pos_ + rep->length; + rep->tail_ = rep->advance(rep->tail_); + rep->length += length; + rep->entry_end_pos()[back] = begin_pos + length; + rep->entry_child()[back] = child; + rep->entry_data_offset()[back] = static_cast<offset_type>(offset); + return Validate(rep, nullptr, __LINE__); +} + +CordRepRing* CordRepRing::Append(CordRepRing* rep, CordRep* child) { + size_t length = child->length; + if (IsFlatOrExternal(child)) { + return AppendLeaf(rep, child, 0, length); + } + if (child->tag == RING) { + return AddRing<AddMode::kAppend>(rep, child->ring(), 0, length); + } + return AppendSlow(rep, child); +} + +CordRepRing* CordRepRing::PrependSlow(CordRepRing* rep, CordRep* child) { + RConsume(child, [&](CordRep* child, size_t offset, size_t length) { + if (IsFlatOrExternal(child)) { + rep = PrependLeaf(rep, child, offset, length); + } else { + rep = AddRing<AddMode::kPrepend>(rep, child->ring(), offset, length); + } + }); + return Validate(rep); +} + +CordRepRing* CordRepRing::PrependLeaf(CordRepRing* rep, CordRep* child, + size_t offset, size_t length) { + rep = Mutable(rep, 1); + index_type head = rep->retreat(rep->head_); + pos_type end_pos = rep->begin_pos_; + rep->head_ = head; + rep->length += length; + rep->begin_pos_ -= length; + rep->entry_end_pos()[head] = end_pos; + rep->entry_child()[head] = child; + rep->entry_data_offset()[head] = static_cast<offset_type>(offset); + return Validate(rep); +} + +CordRepRing* CordRepRing::Prepend(CordRepRing* rep, CordRep* child) { + size_t length = child->length; + if (IsFlatOrExternal(child)) { + return PrependLeaf(rep, child, 0, length); + } + if (child->tag == RING) { + return AddRing<AddMode::kPrepend>(rep, child->ring(), 0, length); + } + return PrependSlow(rep, child); +} + +CordRepRing* CordRepRing::Append(CordRepRing* rep, absl::string_view data, + size_t extra) { + if (rep->refcount.IsOne()) { + Span<char> avail = rep->GetAppendBuffer(data.length()); + if (!avail.empty()) { + memcpy(avail.data(), data.data(), avail.length()); + data.remove_prefix(avail.length()); + } + } + if (data.empty()) return Validate(rep); + + const size_t flats = (data.length() - 1) / kMaxFlatLength + 1; + rep = Mutable(rep, flats); + + Filler filler(rep, rep->tail_); + pos_type pos = rep->begin_pos_ + rep->length; + + while (data.length() >= kMaxFlatLength) { + auto* flat = CreateFlat(data.data(), kMaxFlatLength); + filler.Add(flat, 0, pos += kMaxFlatLength); + data.remove_prefix(kMaxFlatLength); + } + + if (data.length()) { + auto* flat = CreateFlat(data.data(), data.length(), extra); + filler.Add(flat, 0, pos += data.length()); + } + + rep->length = pos - rep->begin_pos_; + rep->tail_ = filler.pos(); + + return Validate(rep); +} + +CordRepRing* CordRepRing::Prepend(CordRepRing* rep, absl::string_view data, + size_t extra) { + if (rep->refcount.IsOne()) { + Span<char> avail = rep->GetPrependBuffer(data.length()); + if (!avail.empty()) { + const char* tail = data.data() + data.length() - avail.length(); + memcpy(avail.data(), tail, avail.length()); + data.remove_suffix(avail.length()); + } + } + if (data.empty()) return rep; + + const size_t flats = (data.length() - 1) / kMaxFlatLength + 1; + rep = Mutable(rep, flats); + pos_type pos = rep->begin_pos_; + Filler filler(rep, rep->retreat(rep->head_, static_cast<index_type>(flats))); + + size_t first_size = data.size() - (flats - 1) * kMaxFlatLength; + CordRepFlat* flat = CordRepFlat::New(first_size + extra); + flat->length = first_size + extra; + memcpy(flat->Data() + extra, data.data(), first_size); + data.remove_prefix(first_size); + filler.Add(flat, extra, pos); + pos -= first_size; + + while (!data.empty()) { + assert(data.size() >= kMaxFlatLength); + flat = CreateFlat(data.data(), kMaxFlatLength); + filler.Add(flat, 0, pos); + pos -= kMaxFlatLength; + data.remove_prefix(kMaxFlatLength); + } + + rep->head_ = filler.head(); + rep->length += rep->begin_pos_ - pos; + rep->begin_pos_ = pos; + + return Validate(rep); +} + +// 32 entries is 32 * sizeof(pos_type) = 4 cache lines on x86 +static constexpr index_type kBinarySearchThreshold = 32; +static constexpr index_type kBinarySearchEndCount = 8; + +template <bool wrap> +CordRepRing::index_type CordRepRing::FindBinary(index_type head, + index_type tail, + size_t offset) const { + index_type count = tail + (wrap ? capacity_ : 0) - head; + do { + count = (count - 1) / 2; + assert(count < entries(head, tail_)); + index_type mid = wrap ? advance(head, count) : head + count; + index_type after_mid = wrap ? advance(mid) : mid + 1; + bool larger = (offset >= entry_end_offset(mid)); + head = larger ? after_mid : head; + tail = larger ? tail : mid; + assert(head != tail); + } while (ABSL_PREDICT_TRUE(count > kBinarySearchEndCount)); + return head; +} + +CordRepRing::Position CordRepRing::FindSlow(index_type head, + size_t offset) const { + index_type tail = tail_; + + // Binary search until we are good for linear search + // Optimize for branchless / non wrapping ops + if (tail > head) { + index_type count = tail - head; + if (count > kBinarySearchThreshold) { + head = FindBinary<false>(head, tail, offset); + } + } else { + index_type count = capacity_ + tail - head; + if (count > kBinarySearchThreshold) { + head = FindBinary<true>(head, tail, offset); + } + } + + pos_type pos = entry_begin_pos(head); + pos_type end_pos = entry_end_pos(head); + while (offset >= Distance(begin_pos_, end_pos)) { + head = advance(head); + pos = end_pos; + end_pos = entry_end_pos(head); + } + + return {head, offset - Distance(begin_pos_, pos)}; +} + +CordRepRing::Position CordRepRing::FindTailSlow(index_type head, + size_t offset) const { + index_type tail = tail_; + const size_t tail_offset = offset - 1; + + // Binary search until we are good for linear search + // Optimize for branchless / non wrapping ops + if (tail > head) { + index_type count = tail - head; + if (count > kBinarySearchThreshold) { + head = FindBinary<false>(head, tail, tail_offset); + } + } else { + index_type count = capacity_ + tail - head; + if (count > kBinarySearchThreshold) { + head = FindBinary<true>(head, tail, tail_offset); + } + } + + size_t end_offset = entry_end_offset(head); + while (tail_offset >= end_offset) { + head = advance(head); + end_offset = entry_end_offset(head); + } + + return {advance(head), end_offset - offset}; +} + +char CordRepRing::GetCharacter(size_t offset) const { + assert(offset < length); + + Position pos = Find(offset); + size_t data_offset = entry_data_offset(pos.index) + pos.offset; + return GetRepData(entry_child(pos.index))[data_offset]; +} + +CordRepRing* CordRepRing::SubRing(CordRepRing* rep, size_t offset, + size_t length, size_t extra) { + assert(offset <= rep->length); + assert(offset <= rep->length - length); + + if (length == 0) { + CordRep::Unref(rep); + return nullptr; + } + + // Find position of first byte + Position head = rep->Find(offset); + Position tail = rep->FindTail(head.index, offset + length); + const size_t new_entries = rep->entries(head.index, tail.index); + + if (rep->refcount.IsOne() && extra <= (rep->capacity() - new_entries)) { + // We adopt a privately owned rep and no extra entries needed. + if (head.index != rep->head_) UnrefEntries(rep, rep->head_, head.index); + if (tail.index != rep->tail_) UnrefEntries(rep, tail.index, rep->tail_); + rep->head_ = head.index; + rep->tail_ = tail.index; + } else { + // Copy subset to new rep + rep = Copy(rep, head.index, tail.index, extra); + head.index = rep->head_; + tail.index = rep->tail_; + } + + // Adjust begin_pos and length + rep->length = length; + rep->begin_pos_ += offset; + + // Adjust head and tail blocks + if (head.offset) { + rep->AddDataOffset(head.index, head.offset); + } + if (tail.offset) { + rep->SubLength(rep->retreat(tail.index), tail.offset); + } + + return Validate(rep); +} + +CordRepRing* CordRepRing::RemovePrefix(CordRepRing* rep, size_t len, + size_t extra) { + assert(len <= rep->length); + if (len == rep->length) { + CordRep::Unref(rep); + return nullptr; + } + + Position head = rep->Find(len); + if (rep->refcount.IsOne()) { + if (head.index != rep->head_) UnrefEntries(rep, rep->head_, head.index); + rep->head_ = head.index; + } else { + rep = Copy(rep, head.index, rep->tail_, extra); + head.index = rep->head_; + } + + // Adjust begin_pos and length + rep->length -= len; + rep->begin_pos_ += len; + + // Adjust head block + if (head.offset) { + rep->AddDataOffset(head.index, head.offset); + } + + return Validate(rep); +} + +CordRepRing* CordRepRing::RemoveSuffix(CordRepRing* rep, size_t len, + size_t extra) { + assert(len <= rep->length); + + if (len == rep->length) { + CordRep::Unref(rep); + return nullptr; + } + + Position tail = rep->FindTail(rep->length - len); + if (rep->refcount.IsOne()) { + // We adopt a privately owned rep, scrub. + if (tail.index != rep->tail_) UnrefEntries(rep, tail.index, rep->tail_); + rep->tail_ = tail.index; + } else { + // Copy subset to new rep + rep = Copy(rep, rep->head_, tail.index, extra); + tail.index = rep->tail_; + } + + // Adjust length + rep->length -= len; + + // Adjust tail block + if (tail.offset) { + rep->SubLength(rep->retreat(tail.index), tail.offset); + } + + return Validate(rep); +} + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cord_rep_ring.h b/absl/strings/internal/cord_rep_ring.h new file mode 100644 index 00000000..c74d3353 --- /dev/null +++ b/absl/strings/internal/cord_rep_ring.h @@ -0,0 +1,589 @@ +// Copyright 2020 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_RING_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_RING_H_ + +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iosfwd> +#include <limits> +#include <memory> + +#include "absl/container/internal/layout.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_flat.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// See https://bugs.llvm.org/show_bug.cgi?id=48477 +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wshadow" +#if __has_warning("-Wshadow-field") +#pragma clang diagnostic ignored "-Wshadow-field" +#endif +#endif + +// All operations modifying a ring buffer are implemented as static methods +// requiring a CordRepRing instance with a reference adopted by the method. +// +// The methods return the modified ring buffer, which may be equal to the input +// if the input was not shared, and having large enough capacity to accommodate +// any newly added node(s). Otherwise, a copy of the input rep with the new +// node(s) added is returned. +// +// Any modification on non shared ring buffers with enough capacity will then +// require minimum atomic operations. Caller should where possible provide +// reasonable `extra` hints for both anticipated extra `flat` byte space, as +// well as anticipated extra nodes required for complex operations. +// +// Example of code creating a ring buffer, adding some data to it, +// and discarding the buffer when done: +// +// void FunWithRings() { +// // Create ring with 3 flats +// CordRep* flat = CreateFlat("Hello"); +// CordRepRing* ring = CordRepRing::Create(flat, 2); +// ring = CordRepRing::Append(ring, CreateFlat(" ")); +// ring = CordRepRing::Append(ring, CreateFlat("world")); +// DoSomethingWithRing(ring); +// CordRep::Unref(ring); +// } +// +// Example of code Copying an existing ring buffer and modifying it: +// +// void MoreFunWithRings(CordRepRing* src) { +// CordRepRing* ring = CordRep::Ref(src)->ring(); +// ring = CordRepRing::Append(ring, CreateFlat("Hello")); +// ring = CordRepRing::Append(ring, CreateFlat(" ")); +// ring = CordRepRing::Append(ring, CreateFlat("world")); +// DoSomethingWithRing(ring); +// CordRep::Unref(ring); +// } +// +class CordRepRing : public CordRep { + public: + // `pos_type` represents a 'logical position'. A CordRepRing instance has a + // `begin_pos` (default 0), and each node inside the buffer will have an + // `end_pos` which is the `end_pos` of the previous node (or `begin_pos`) plus + // this node's length. The purpose is to allow for a binary search on this + // position, while allowing O(1) prepend and append operations. + using pos_type = size_t; + + // `index_type` is the type for the `head`, `tail` and `capacity` indexes. + // Ring buffers are limited to having no more than four billion entries. + using index_type = uint32_t; + + // `offset_type` is the type for the data offset inside a child rep's data. + using offset_type = uint32_t; + + // Position holds the node index and relative offset into the node for + // some physical offset in the contained data as returned by the Find() + // and FindTail() methods. + struct Position { + index_type index; + size_t offset; + }; + + // The maximum # of child nodes that can be hosted inside a CordRepRing. + static constexpr size_t kMaxCapacity = (std::numeric_limits<uint32_t>::max)(); + + // CordRepring can not be default constructed, moved, copied or assigned. + CordRepRing() = delete; + CordRepRing(const CordRepRing&) = delete; + CordRepRing& operator=(const CordRepRing&) = delete; + + // Returns true if this instance is valid, false if some or all of the + // invariants are broken. Intended for debug purposes only. + // `output` receives an explanation of the broken invariants. + bool IsValid(std::ostream& output) const; + + // Returns the size in bytes for a CordRepRing with `capacity' entries. + static constexpr size_t AllocSize(size_t capacity); + + // Returns the distance in bytes from `pos` to `end_pos`. + static constexpr size_t Distance(pos_type pos, pos_type end_pos); + + // Creates a new ring buffer from the provided `rep`. Adopts a reference + // on `rep`. The returned ring buffer has a capacity of at least `extra + 1` + static CordRepRing* Create(CordRep* child, size_t extra = 0); + + // `head`, `tail` and `capacity` indexes defining the ring buffer boundaries. + index_type head() const { return head_; } + index_type tail() const { return tail_; } + index_type capacity() const { return capacity_; } + + // Returns the number of entries in this instance. + index_type entries() const { return entries(head_, tail_); } + + // Returns the logical begin position of this instance. + pos_type begin_pos() const { return begin_pos_; } + + // Returns the number of entries for a given head-tail range. + // Requires `head` and `tail` values to be less than `capacity()`. + index_type entries(index_type head, index_type tail) const { + assert(head < capacity_ && tail < capacity_); + return tail - head + ((tail > head) ? 0 : capacity_); + } + + // Returns the logical end position of entry `index`. + pos_type const& entry_end_pos(index_type index) const { + assert(IsValidIndex(index)); + return Layout::Partial().Pointer<0>(data_)[index]; + } + + // Returns the child pointer of entry `index`. + CordRep* const& entry_child(index_type index) const { + assert(IsValidIndex(index)); + return Layout::Partial(capacity()).Pointer<1>(data_)[index]; + } + + // Returns the data offset of entry `index` + offset_type const& entry_data_offset(index_type index) const { + assert(IsValidIndex(index)); + return Layout::Partial(capacity(), capacity()).Pointer<2>(data_)[index]; + } + + // Appends the provided child node to the `rep` instance. + // Adopts a reference from `rep` and `child` which may not be null. + // If the provided child is a FLAT or EXTERNAL node, or a SUBSTRING node + // containing a FLAT or EXTERNAL node, then flat or external the node is added + // 'as is', with an offset added for the SUBSTRING case. + // If the provided child is a RING or CONCAT tree, or a SUBSTRING of a RING or + // CONCAT tree, then all child nodes not excluded by any start offset or + // length values are added recursively. + static CordRepRing* Append(CordRepRing* rep, CordRep* child); + + // Appends the provided string data to the `rep` instance. + // This function will attempt to utilize any remaining capacity in the last + // node of the input if that node is not shared (directly or indirectly), and + // of type FLAT. Remaining data will be added as one or more FLAT nodes. + // Any last node added to the ring buffer will be allocated with up to + // `extra` bytes of capacity for (anticipated) subsequent append actions. + static CordRepRing* Append(CordRepRing* rep, string_view data, + size_t extra = 0); + + // Prepends the provided child node to the `rep` instance. + // Adopts a reference from `rep` and `child` which may not be null. + // If the provided child is a FLAT or EXTERNAL node, or a SUBSTRING node + // containing a FLAT or EXTERNAL node, then flat or external the node is + // prepended 'as is', with an optional offset added for the SUBSTRING case. + // If the provided child is a RING or CONCAT tree, or a SUBSTRING of a RING + // or CONCAT tree, then all child nodes not excluded by any start offset or + // length values are added recursively. + static CordRepRing* Prepend(CordRepRing* rep, CordRep* child); + + // Prepends the provided string data to the `rep` instance. + // This function will attempt to utilize any remaining capacity in the first + // node of the input if that node is not shared (directly or indirectly), and + // of type FLAT. Remaining data will be added as one or more FLAT nodes. + // Any first node prepnded to the ring buffer will be allocated with up to + // `extra` bytes of capacity for (anticipated) subsequent prepend actions. + static CordRepRing* Prepend(CordRepRing* rep, string_view data, + size_t extra = 0); + + // Returns a span referencing potentially unused capacity in the last node. + // The returned span may be empty if no such capacity is available, or if the + // current instance is shared. Else, a span of size `n <= size` is returned. + // If non empty, the ring buffer is adjusted to the new length, with the newly + // added capacity left uninitialized. Callers should assign a value to the + // entire span before any other operations on this instance. + Span<char> GetAppendBuffer(size_t size); + + // Returns a span referencing potentially unused capacity in the first node. + // This function is identical to GetAppendBuffer except that it returns a span + // referencing up to `size` capacity directly before the existing data. + Span<char> GetPrependBuffer(size_t size); + + // Returns a cord ring buffer containing `length` bytes of data starting at + // `offset`. If the input is not shared, this function will remove all head + // and tail child nodes outside of the requested range, and adjust the new + // head and tail nodes as required. If the input is shared, this function + // returns a new instance sharing some or all of the nodes from the input. + static CordRepRing* SubRing(CordRepRing* r, size_t offset, size_t length, + size_t extra = 0); + + // Returns a cord ring buffer with the first `length` bytes removed. + // If the input is not shared, this function will remove all head child nodes + // fully inside the first `length` bytes, and adjust the new head as required. + // If the input is shared, this function returns a new instance sharing some + // or all of the nodes from the input. + static CordRepRing* RemoveSuffix(CordRepRing* r, size_t length, + size_t extra = 0); + + // Returns a cord ring buffer with the last `length` bytes removed. + // If the input is not shared, this function will remove all head child nodes + // fully inside the first `length` bytes, and adjust the new head as required. + // If the input is shared, this function returns a new instance sharing some + // or all of the nodes from the input. + static CordRepRing* RemovePrefix(CordRepRing* r, size_t len, + size_t extra = 0); + + // Returns the character at `offset`. Requires that `offset < length`. + char GetCharacter(size_t offset) const; + + // Testing only: set capacity to requested capacity. + void SetCapacityForTesting(size_t capacity); + + // Returns the CordRep data pointer for the provided CordRep. + // Requires that the provided `rep` is either a FLAT or EXTERNAL CordRep. + static const char* GetLeafData(const CordRep* rep); + + // Returns the CordRep data pointer for the provided CordRep. + // Requires that `rep` is either a FLAT, EXTERNAL, or SUBSTRING CordRep. + static const char* GetRepData(const CordRep* rep); + + // Advances the provided position, wrapping around capacity as needed. + // Requires `index` < capacity() + inline index_type advance(index_type index) const; + + // Advances the provided position by 'n`, wrapping around capacity as needed. + // Requires `index` < capacity() and `n` <= capacity. + inline index_type advance(index_type index, index_type n) const; + + // Retreats the provided position, wrapping around 0 as needed. + // Requires `index` < capacity() + inline index_type retreat(index_type index) const; + + // Retreats the provided position by 'n', wrapping around 0 as needed. + // Requires `index` < capacity() + inline index_type retreat(index_type index, index_type n) const; + + // Returns the logical begin position of entry `index` + pos_type const& entry_begin_pos(index_type index) const { + return (index == head_) ? begin_pos_ : entry_end_pos(retreat(index)); + } + + // Returns the physical start offset of entry `index` + size_t entry_start_offset(index_type index) const { + return Distance(begin_pos_, entry_begin_pos(index)); + } + + // Returns the physical end offset of entry `index` + size_t entry_end_offset(index_type index) const { + return Distance(begin_pos_, entry_end_pos(index)); + } + + // Returns the data length for entry `index` + size_t entry_length(index_type index) const { + return Distance(entry_begin_pos(index), entry_end_pos(index)); + } + + // Returns the data for entry `index` + absl::string_view entry_data(index_type index) const; + + // Returns the position for `offset` as {index, prefix}. `index` holds the + // index of the entry at the specified offset and `prefix` holds the relative + // offset inside that entry. + // Requires `offset` < length. + // + // For example we can implement GetCharacter(offset) as: + // char GetCharacter(size_t offset) { + // Position pos = this->Find(offset); + // return this->entry_data(pos.pos)[pos.offset]; + // } + inline Position Find(size_t offset) const; + + // Find starting at `head` + inline Position Find(index_type head, size_t offset) const; + + // Returns the tail position for `offset` as {tail index, suffix}. + // `tail index` holds holds the index of the entry holding the offset directly + // before 'offset` advanced by one. 'suffix` holds the relative offset from + // that relative offset in the entry to the end of the entry. + // For example, FindTail(length) will return {tail(), 0}, FindTail(length - 5) + // will return {retreat(tail), 5)} provided the preceding entry contains at + // least 5 bytes of data. + // Requires offset >= 1 && offset <= length. + // + // This function is very useful in functions that need to clip the end of some + // ring buffer such as 'RemovePrefix'. + // For example, we could implement RemovePrefix for non shared instances as: + // void RemoveSuffix(size_t n) { + // Position pos = FindTail(length - n); + // UnrefEntries(pos.pos, this->tail_); + // this->tail_ = pos.pos; + // entry(retreat(pos.pos)).end_pos -= pos.offset; + // } + inline Position FindTail(size_t offset) const; + + // Find tail starting at `head` + inline Position FindTail(index_type head, size_t offset) const; + + // Invokes f(index_type index) for each entry inside the range [head, tail> + template <typename F> + void ForEach(index_type head, index_type tail, F&& f) const { + index_type n1 = (tail > head) ? tail : capacity_; + for (index_type i = head; i < n1; ++i) f(i); + if (tail <= head) { + for (index_type i = 0; i < tail; ++i) f(i); + } + } + + // Invokes f(index_type index) for each entry inside this instance. + template <typename F> + void ForEach(F&& f) const { + ForEach(head_, tail_, std::forward<F>(f)); + } + + // Dump this instance's data tp stream `s` in human readable format, excluding + // the actual data content itself. Intended for debug purposes only. + friend std::ostream& operator<<(std::ostream& s, const CordRepRing& rep); + + private: + enum class AddMode { kAppend, kPrepend }; + + using Layout = container_internal::Layout<pos_type, CordRep*, offset_type>; + + class Filler; + class Transaction; + class CreateTransaction; + + static constexpr size_t kLayoutAlignment = Layout::Partial().Alignment(); + + // Creates a new CordRepRing. + explicit CordRepRing(index_type capacity) : capacity_(capacity) {} + + // Returns true if `index` is a valid index into this instance. + bool IsValidIndex(index_type index) const; + + // Debug use only: validates the provided CordRepRing invariants. + // Verification of all CordRepRing methods can be enabled by defining + // EXTRA_CORD_RING_VALIDATION, i.e.: `--copts=-DEXTRA_CORD_RING_VALIDATION` + // Verification is VERY expensive, so only do it for debugging purposes. + static CordRepRing* Validate(CordRepRing* rep, const char* file = nullptr, + int line = 0); + + // Allocates a CordRepRing large enough to hold `capacity + extra' entries. + // The returned capacity may be larger if the allocated memory allows for it. + // The maximum capacity of a CordRepRing is capped at kMaxCapacity. + // Throws `std::length_error` if `capacity + extra' exceeds kMaxCapacity. + static CordRepRing* New(size_t capacity, size_t extra); + + // Deallocates (but does not destroy) the provided ring buffer. + static void Delete(CordRepRing* rep); + + // Destroys the provided ring buffer, decrementing the reference count of all + // contained child CordReps. The provided 1\`rep` should have a ref count of + // one (pre decrement destroy call observing `refcount.IsOne()`) or zero (post + // decrement destroy call observing `!refcount.Decrement()`). + static void Destroy(CordRepRing* rep); + + // Returns a mutable reference to the logical end position array. + pos_type* entry_end_pos() { + return Layout::Partial().Pointer<0>(data_); + } + + // Returns a mutable reference to the child pointer array. + CordRep** entry_child() { + return Layout::Partial(capacity()).Pointer<1>(data_); + } + + // Returns a mutable reference to the data offset array. + offset_type* entry_data_offset() { + return Layout::Partial(capacity(), capacity()).Pointer<2>(data_); + } + + // Find implementations for the non fast path 0 / length cases. + Position FindSlow(index_type head, size_t offset) const; + Position FindTailSlow(index_type head, size_t offset) const; + + // Finds the index of the first node that is inside a reasonable distance + // of the node at `offset` from which we can continue with a linear search. + template <bool wrap> + index_type FindBinary(index_type head, index_type tail, size_t offset) const; + + // Fills the current (initialized) instance from the provided source, copying + // entries [head, tail). Adds a reference to copied entries if `ref` is true. + template <bool ref> + void Fill(const CordRepRing* src, index_type head, index_type tail); + + // Create a copy of 'rep', copying all entries [head, tail), allocating room + // for `extra` entries. Adds a reference on all copied entries. + static CordRepRing* Copy(CordRepRing* rep, index_type head, index_type tail, + size_t extra = 0); + + // Returns a Mutable CordRepRing reference from `rep` with room for at least + // `extra` additional nodes. Adopts a reference count from `rep`. + // This function will return `rep` if, and only if: + // - rep.entries + extra <= rep.capacity + // - rep.refcount == 1 + // Otherwise, this function will create a new copy of `rep` with additional + // capacity to satisfy `extra` extra nodes, and unref the old `rep` instance. + // + // If a new CordRepRing can not be allocated, or the new capacity would exceed + // the maxmimum capacity, then the input is consumed only, and an exception is + // thrown. + static CordRepRing* Mutable(CordRepRing* rep, size_t extra); + + // Slow path for Append(CordRepRing* rep, CordRep* child). This function is + // exercised if the provided `child` in Append() is not a leaf node, i.e., a + // ring buffer or old (concat) cord tree. + static CordRepRing* AppendSlow(CordRepRing* rep, CordRep* child); + + // Appends the provided leaf node. Requires `child` to be FLAT or EXTERNAL. + static CordRepRing* AppendLeaf(CordRepRing* rep, CordRep* child, + size_t offset, size_t length); + + // Prepends the provided leaf node. Requires `child` to be FLAT or EXTERNAL. + static CordRepRing* PrependLeaf(CordRepRing* rep, CordRep* child, + size_t offset, size_t length); + + // Slow path for Prepend(CordRepRing* rep, CordRep* child). This function is + // exercised if the provided `child` in Prepend() is not a leaf node, i.e., a + // ring buffer or old (concat) cord tree. + static CordRepRing* PrependSlow(CordRepRing* rep, CordRep* child); + + // Slow path for Create(CordRep* child, size_t extra). This function is + // exercised if the provided `child` in Prepend() is not a leaf node, i.e., a + // ring buffer or old (concat) cord tree. + static CordRepRing* CreateSlow(CordRep* child, size_t extra); + + // Creates a new ring buffer from the provided `child` leaf node. Requires + // `child` to be FLAT or EXTERNAL. on `rep`. + // The returned ring buffer has a capacity of at least `1 + extra` + static CordRepRing* CreateFromLeaf(CordRep* child, size_t offset, + size_t length, size_t extra); + + // Appends or prepends (depending on AddMode) the ring buffer in `ring' to + // `rep` starting at `offset` with length `length`. + template <AddMode mode> + static CordRepRing* AddRing(CordRepRing* rep, CordRepRing* ring, + size_t offset, size_t length); + + // Increases the data offset for entry `index` by `n`. + void AddDataOffset(index_type index, size_t n); + + // Descreases the length for entry `index` by `n`. + void SubLength(index_type index, size_t n); + + index_type head_; + index_type tail_; + index_type capacity_; + pos_type begin_pos_; + + alignas(kLayoutAlignment) char data_[kLayoutAlignment]; + + friend struct CordRep; +}; + +constexpr size_t CordRepRing::AllocSize(size_t capacity) { + return sizeof(CordRepRing) - sizeof(data_) + + Layout(capacity, capacity, capacity).AllocSize(); +} + +inline constexpr size_t CordRepRing::Distance(pos_type pos, pos_type end_pos) { + return (end_pos - pos); +} + +inline const char* CordRepRing::GetLeafData(const CordRep* rep) { + return rep->tag != EXTERNAL ? rep->flat()->Data() : rep->external()->base; +} + +inline const char* CordRepRing::GetRepData(const CordRep* rep) { + if (rep->tag >= FLAT) return rep->flat()->Data(); + if (rep->tag == EXTERNAL) return rep->external()->base; + return GetLeafData(rep->substring()->child) + rep->substring()->start; +} + +inline CordRepRing::index_type CordRepRing::advance(index_type index) const { + assert(index < capacity_); + return ++index == capacity_ ? 0 : index; +} + +inline CordRepRing::index_type CordRepRing::advance(index_type index, + index_type n) const { + assert(index < capacity_ && n <= capacity_); + return (index += n) >= capacity_ ? index - capacity_ : index; +} + +inline CordRepRing::index_type CordRepRing::retreat(index_type index) const { + assert(index < capacity_); + return (index > 0 ? index : capacity_) - 1; +} + +inline CordRepRing::index_type CordRepRing::retreat(index_type index, + index_type n) const { + assert(index < capacity_ && n <= capacity_); + return index >= n ? index - n : capacity_ - n + index; +} + +inline absl::string_view CordRepRing::entry_data(index_type index) const { + size_t data_offset = entry_data_offset(index); + return {GetRepData(entry_child(index)) + data_offset, entry_length(index)}; +} + +inline bool CordRepRing::IsValidIndex(index_type index) const { + if (index >= capacity_) return false; + return (tail_ > head_) ? (index >= head_ && index < tail_) + : (index >= head_ || index < tail_); +} + +#ifndef EXTRA_CORD_RING_VALIDATION +inline CordRepRing* CordRepRing::Validate(CordRepRing* rep, + const char* /*file*/, int /*line*/) { + return rep; +} +#endif + +inline CordRepRing::Position CordRepRing::Find(size_t offset) const { + assert(offset < length); + return (offset == 0) ? Position{head_, 0} : FindSlow(head_, offset); +} + +inline CordRepRing::Position CordRepRing::Find(index_type head, + size_t offset) const { + assert(offset < length); + assert(IsValidIndex(head) && offset >= entry_start_offset(head)); + return (offset == 0) ? Position{head_, 0} : FindSlow(head, offset); +} + +inline CordRepRing::Position CordRepRing::FindTail(size_t offset) const { + assert(offset > 0 && offset <= length); + return (offset == length) ? Position{tail_, 0} : FindTailSlow(head_, offset); +} + +inline CordRepRing::Position CordRepRing::FindTail(index_type head, + size_t offset) const { + assert(offset > 0 && offset <= length); + assert(IsValidIndex(head) && offset >= entry_start_offset(head) + 1); + return (offset == length) ? Position{tail_, 0} : FindTailSlow(head, offset); +} + +// Now that CordRepRing is defined, we can define CordRep's helper casts: +inline CordRepRing* CordRep::ring() { + assert(tag == RING); + return static_cast<CordRepRing*>(this); +} + +inline const CordRepRing* CordRep::ring() const { + assert(tag == RING); + return static_cast<const CordRepRing*>(this); +} + +std::ostream& operator<<(std::ostream& s, const CordRepRing& rep); + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_RING_H_ diff --git a/absl/strings/internal/cord_rep_ring_reader.h b/absl/strings/internal/cord_rep_ring_reader.h new file mode 100644 index 00000000..396c0e2c --- /dev/null +++ b/absl/strings/internal/cord_rep_ring_reader.h @@ -0,0 +1,114 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_RING_READER_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_RING_READER_H_ + +#include <cassert> +#include <cstddef> +#include <cstdint> + +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_ring.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordRepRingReader provides basic navigation over CordRepRing data. +class CordRepRingReader { + public: + // Returns true if this instance is not empty. + explicit operator bool() const { return ring_ != nullptr; } + + // Returns the ring buffer reference for this instance, or nullptr if empty. + CordRepRing* ring() const { return ring_; } + + // Returns the current node index inside the ring buffer for this instance. + // The returned value is undefined if this instance is empty. + CordRepRing::index_type index() const { return index_; } + + // Returns the length of the referenced ring buffer. + // Requires the current instance to be non empty. + size_t length() const { + assert(ring_); + return ring_->length; + } + + // Returns the end offset of the last navigated-to chunk, which represents the + // total bytes 'consumed' relative to the start of the ring. The returned + // value is never zero. For example, initializing a reader with a ring buffer + // with a first chunk of 19 bytes will return consumed() = 19. + // Requires the current instance to be non empty. + size_t consumed() const { + assert(ring_); + return ring_->entry_end_offset(index_); + } + + // Returns the number of bytes remaining beyond the last navigated-to chunk. + // Requires the current instance to be non empty. + size_t remaining() const { + assert(ring_); + return length() - consumed(); + } + + // Resets this instance to an empty value + void Reset() { ring_ = nullptr; } + + // Resets this instance to the start of `ring`. `ring` must not be null. + // Returns a reference into the first chunk of the provided ring. + absl::string_view Reset(CordRepRing* ring) { + assert(ring); + ring_ = ring; + index_ = ring_->head(); + return ring_->entry_data(index_); + } + + // Navigates to the next chunk inside the reference ring buffer. + // Returns a reference into the navigated-to chunk. + // Requires remaining() to be non zero. + absl::string_view Next() { + assert(remaining()); + index_ = ring_->advance(index_); + return ring_->entry_data(index_); + } + + // Navigates to the chunk at offset `offset`. + // Returns a reference into the navigated-to chunk, adjusted for the relative + // position of `offset` into that chunk. For example, calling Seek(13) on a + // ring buffer containing 2 chunks of 10 and 20 bytes respectively will return + // a string view into the second chunk starting at offset 3 with a size of 17. + // Requires `offset` to be less than `length()` + absl::string_view Seek(size_t offset) { + assert(offset < length()); + size_t current = ring_->entry_end_offset(index_); + CordRepRing::index_type hint = (offset >= current) ? index_ : ring_->head(); + const CordRepRing::Position head = ring_->Find(hint, offset); + index_ = head.index; + auto data = ring_->entry_data(head.index); + data.remove_prefix(head.offset); + return data; + } + + private: + CordRepRing* ring_ = nullptr; + CordRepRing::index_type index_; +}; + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_RING_READER_H_ diff --git a/absl/strings/internal/str_format/arg.cc b/absl/strings/internal/str_format/arg.cc index 9feb2248..e28a29b1 100644 --- a/absl/strings/internal/str_format/arg.cc +++ b/absl/strings/internal/str_format/arg.cc @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + // // POSIX spec: // http://pubs.opengroup.org/onlinepubs/009695399/functions/fprintf.html diff --git a/absl/strings/internal/str_format/arg.h b/absl/strings/internal/str_format/arg.h index 3dbc1526..7040c866 100644 --- a/absl/strings/internal/str_format/arg.h +++ b/absl/strings/internal/str_format/arg.h @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_ #define ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_ diff --git a/absl/strings/internal/str_format/arg_test.cc b/absl/strings/internal/str_format/arg_test.cc index f53fd6bd..1261937c 100644 --- a/absl/strings/internal/str_format/arg_test.cc +++ b/absl/strings/internal/str_format/arg_test.cc @@ -6,6 +6,12 @@ // // https://www.apache.org/licenses/LICENSE-2.0 // +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include "absl/strings/internal/str_format/arg.h" #include <ostream> diff --git a/absl/strings/internal/str_format/bind.cc b/absl/strings/internal/str_format/bind.cc index 6980ed1d..4e68b90b 100644 --- a/absl/strings/internal/str_format/bind.cc +++ b/absl/strings/internal/str_format/bind.cc @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include "absl/strings/internal/str_format/bind.h" #include <cerrno> @@ -221,7 +235,7 @@ int FprintF(std::FILE* output, const UntypedFormatSpecImpl format, errno = sink.error(); return -1; } - if (sink.count() > std::numeric_limits<int>::max()) { + if (sink.count() > static_cast<size_t>(std::numeric_limits<int>::max())) { errno = EFBIG; return -1; } diff --git a/absl/strings/internal/str_format/bind.h b/absl/strings/internal/str_format/bind.h index 585246e7..267cc0ef 100644 --- a/absl/strings/internal/str_format/bind.h +++ b/absl/strings/internal/str_format/bind.h @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_ #define ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_ @@ -119,10 +133,11 @@ class FormatSpecTemplate #endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER - template <FormatConversionCharSet... C, - typename = typename std::enable_if< - AllOf(sizeof...(C) == sizeof...(Args), Contains(Args, - C)...)>::type> + template < + FormatConversionCharSet... C, + typename = typename std::enable_if<sizeof...(C) == sizeof...(Args)>::type, + typename = typename std::enable_if<AllOf(Contains(Args, + C)...)>::type> FormatSpecTemplate(const ExtendedParsedFormat<C...>& pc) // NOLINT : Base(&pc) {} }; diff --git a/absl/strings/internal/str_format/bind_test.cc b/absl/strings/internal/str_format/bind_test.cc index 64790a85..1eef9c43 100644 --- a/absl/strings/internal/str_format/bind_test.cc +++ b/absl/strings/internal/str_format/bind_test.cc @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include "absl/strings/internal/str_format/bind.h" #include <string.h> diff --git a/absl/strings/internal/str_format/checker.h b/absl/strings/internal/str_format/checker.h index 424c51f7..2a2601ec 100644 --- a/absl/strings/internal/str_format/checker.h +++ b/absl/strings/internal/str_format/checker.h @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_ #define ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_ diff --git a/absl/strings/internal/str_format/checker_test.cc b/absl/strings/internal/str_format/checker_test.cc index a76d70b0..7c70f47d 100644 --- a/absl/strings/internal/str_format/checker_test.cc +++ b/absl/strings/internal/str_format/checker_test.cc @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include <string> #include "gmock/gmock.h" diff --git a/absl/strings/internal/str_format/convert_test.cc b/absl/strings/internal/str_format/convert_test.cc index 634ee78b..926283cf 100644 --- a/absl/strings/internal/str_format/convert_test.cc +++ b/absl/strings/internal/str_format/convert_test.cc @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include <errno.h> #include <stdarg.h> #include <stdio.h> @@ -540,7 +554,8 @@ TEST_F(FormatConvertTest, Uint128) { } template <typename Floating> -void TestWithMultipleFormatsHelper(const std::vector<Floating> &floats) { +void TestWithMultipleFormatsHelper(const std::vector<Floating> &floats, + const std::set<Floating> &skip_verify) { const NativePrintfTraits &native_traits = VerifyNativeImplementation(); // Reserve the space to ensure we don't allocate memory in the output itself. std::string str_format_result; @@ -588,7 +603,16 @@ void TestWithMultipleFormatsHelper(const std::vector<Floating> &floats) { AppendPack(&str_format_result, format, absl::MakeSpan(args)); } - if (string_printf_result != str_format_result) { +#ifdef _MSC_VER + // MSVC has a different rounding policy than us so we can't test our + // implementation against the native one there. + continue; +#elif defined(__APPLE__) + // Apple formats NaN differently (+nan) vs. (nan) + if (std::isnan(d)) continue; +#endif + if (string_printf_result != str_format_result && + skip_verify.find(d) == skip_verify.end()) { // We use ASSERT_EQ here because failures are usually correlated and a // bug would print way too many failed expectations causing the test // to time out. @@ -602,12 +626,6 @@ void TestWithMultipleFormatsHelper(const std::vector<Floating> &floats) { } TEST_F(FormatConvertTest, Float) { -#ifdef _MSC_VER - // MSVC has a different rounding policy than us so we can't test our - // implementation against the native one there. - return; -#endif // _MSC_VER - std::vector<float> floats = {0.0f, -0.0f, .9999999f, @@ -621,7 +639,8 @@ TEST_F(FormatConvertTest, Float) { std::numeric_limits<float>::epsilon(), std::numeric_limits<float>::epsilon() + 1.0f, std::numeric_limits<float>::infinity(), - -std::numeric_limits<float>::infinity()}; + -std::numeric_limits<float>::infinity(), + std::nanf("")}; // Some regression tests. floats.push_back(0.999999989f); @@ -650,21 +669,14 @@ TEST_F(FormatConvertTest, Float) { std::sort(floats.begin(), floats.end()); floats.erase(std::unique(floats.begin(), floats.end()), floats.end()); -#ifndef __APPLE__ - // Apple formats NaN differently (+nan) vs. (nan) - floats.push_back(std::nan("")); -#endif - - TestWithMultipleFormatsHelper(floats); + TestWithMultipleFormatsHelper(floats, {}); } TEST_F(FormatConvertTest, Double) { -#ifdef _MSC_VER - // MSVC has a different rounding policy than us so we can't test our - // implementation against the native one there. - return; -#endif // _MSC_VER - + // For values that we know won't match the standard library implementation we + // skip verification, but still run the algorithm to catch asserts/sanitizer + // bugs. + std::set<double> skip_verify; std::vector<double> doubles = {0.0, -0.0, .99999999999999, @@ -678,7 +690,8 @@ TEST_F(FormatConvertTest, Double) { std::numeric_limits<double>::epsilon(), std::numeric_limits<double>::epsilon() + 1, std::numeric_limits<double>::infinity(), - -std::numeric_limits<double>::infinity()}; + -std::numeric_limits<double>::infinity(), + std::nan("")}; // Some regression tests. doubles.push_back(0.99999999999999989); @@ -708,33 +721,29 @@ TEST_F(FormatConvertTest, Double) { "5084551339423045832369032229481658085593321233482747978262041447231" "68738177180919299881250404026184124858368.000000"; - if (!gcc_bug_22142) { - for (int exp = -300; exp <= 300; ++exp) { - const double all_ones_mantissa = 0x1fffffffffffff; - doubles.push_back(std::ldexp(all_ones_mantissa, exp)); + for (int exp = -300; exp <= 300; ++exp) { + const double all_ones_mantissa = 0x1fffffffffffff; + doubles.push_back(std::ldexp(all_ones_mantissa, exp)); + if (gcc_bug_22142) { + skip_verify.insert(doubles.back()); } } if (gcc_bug_22142) { - for (auto &d : doubles) { - using L = std::numeric_limits<double>; - double d2 = std::abs(d); - if (d2 == L::max() || d2 == L::min() || d2 == L::denorm_min()) { - d = 0; - } - } + using L = std::numeric_limits<double>; + skip_verify.insert(L::max()); + skip_verify.insert(L::min()); // NOLINT + skip_verify.insert(L::denorm_min()); + skip_verify.insert(-L::max()); + skip_verify.insert(-L::min()); // NOLINT + skip_verify.insert(-L::denorm_min()); } // Remove duplicates to speed up the logic below. std::sort(doubles.begin(), doubles.end()); doubles.erase(std::unique(doubles.begin(), doubles.end()), doubles.end()); -#ifndef __APPLE__ - // Apple formats NaN differently (+nan) vs. (nan) - doubles.push_back(std::nan("")); -#endif - - TestWithMultipleFormatsHelper(doubles); + TestWithMultipleFormatsHelper(doubles, skip_verify); } TEST_F(FormatConvertTest, DoubleRound) { @@ -1055,11 +1064,6 @@ TEST_F(FormatConvertTest, ExtremeWidthPrecision) { } TEST_F(FormatConvertTest, LongDouble) { -#ifdef _MSC_VER - // MSVC has a different rounding policy than us so we can't test our - // implementation against the native one there. - return; -#endif // _MSC_VER const NativePrintfTraits &native_traits = VerifyNativeImplementation(); const char *const kFormats[] = {"%", "%.3", "%8.5", "%9", "%.5000", "%.60", "%+", "% ", "%-10"}; @@ -1120,10 +1124,18 @@ TEST_F(FormatConvertTest, LongDouble) { for (auto d : doubles) { FormatArgImpl arg(d); UntypedFormatSpecImpl format(fmt_str); + std::string result = FormatPack(format, {&arg, 1}); + +#ifdef _MSC_VER + // MSVC has a different rounding policy than us so we can't test our + // implementation against the native one there. + continue; +#endif // _MSC_VER + // We use ASSERT_EQ here because failures are usually correlated and a // bug would print way too many failed expectations causing the test to // time out. - ASSERT_EQ(StrPrint(fmt_str.c_str(), d), FormatPack(format, {&arg, 1})) + ASSERT_EQ(StrPrint(fmt_str.c_str(), d), result) << fmt_str << " " << StrPrint("%.18Lg", d) << " " << StrPrint("%La", d) << " " << StrPrint("%.1080Lf", d); } diff --git a/absl/strings/internal/str_format/float_conversion.cc b/absl/strings/internal/str_format/float_conversion.cc index 20aeada5..b1c40684 100644 --- a/absl/strings/internal/str_format/float_conversion.cc +++ b/absl/strings/internal/str_format/float_conversion.cc @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include "absl/strings/internal/str_format/float_conversion.h" #include <string.h> @@ -10,11 +24,12 @@ #include "absl/base/attributes.h" #include "absl/base/config.h" -#include "absl/base/internal/bits.h" #include "absl/base/optimization.h" #include "absl/functional/function_ref.h" #include "absl/meta/type_traits.h" +#include "absl/numeric/bits.h" #include "absl/numeric/int128.h" +#include "absl/numeric/internal/representation.h" #include "absl/strings/numbers.h" #include "absl/types/optional.h" #include "absl/types/span.h" @@ -25,6 +40,8 @@ namespace str_format_internal { namespace { +using ::absl::numeric_internal::IsDoubleDouble; + // The code below wants to avoid heap allocations. // To do so it needs to allocate memory on the stack. // `StackArray` will allocate memory on the stack in the form of a uint32_t @@ -98,12 +115,15 @@ inline uint64_t DivideBy10WithCarry(uint64_t *v, uint64_t carry) { return next_carry % divisor; } +using MaxFloatType = + typename std::conditional<IsDoubleDouble(), double, long double>::type; + // Generates the decimal representation for an integer of the form `v * 2^exp`, // where `v` and `exp` are both positive integers. // It generates the digits from the left (ie the most significant digit first) // to allow for direct printing into the sink. // -// Requires `0 <= exp` and `exp <= numeric_limits<long double>::max_exponent`. +// Requires `0 <= exp` and `exp <= numeric_limits<MaxFloatType>::max_exponent`. class BinaryToDecimal { static constexpr int ChunksNeeded(int exp) { // We will left shift a uint128 by `exp` bits, so we need `128+exp` total @@ -118,10 +138,10 @@ class BinaryToDecimal { static void RunConversion(uint128 v, int exp, absl::FunctionRef<void(BinaryToDecimal)> f) { assert(exp > 0); - assert(exp <= std::numeric_limits<long double>::max_exponent); + assert(exp <= std::numeric_limits<MaxFloatType>::max_exponent); static_assert( - StackArray::kMaxCapacity >= - ChunksNeeded(std::numeric_limits<long double>::max_exponent), + static_cast<int>(StackArray::kMaxCapacity) >= + ChunksNeeded(std::numeric_limits<MaxFloatType>::max_exponent), ""); StackArray::RunWithCapacity( @@ -218,14 +238,14 @@ class BinaryToDecimal { // Converts a value of the form `x * 2^-exp` into a sequence of decimal digits. // Requires `-exp < 0` and -// `-exp >= limits<long double>::min_exponent - limits<long double>::digits`. +// `-exp >= limits<MaxFloatType>::min_exponent - limits<MaxFloatType>::digits`. class FractionalDigitGenerator { public: // Run the conversion for `v * 2^exp` and call `f(generator)`. // This function will allocate enough stack space to perform the conversion. static void RunConversion( uint128 v, int exp, absl::FunctionRef<void(FractionalDigitGenerator)> f) { - using Limits = std::numeric_limits<long double>; + using Limits = std::numeric_limits<MaxFloatType>; assert(-exp < 0); assert(-exp >= Limits::min_exponent - 128); static_assert(StackArray::kMaxCapacity >= @@ -301,12 +321,11 @@ class FractionalDigitGenerator { }; // Count the number of leading zero bits. -int LeadingZeros(uint64_t v) { return base_internal::CountLeadingZeros64(v); } +int LeadingZeros(uint64_t v) { return countl_zero(v); } int LeadingZeros(uint128 v) { auto high = static_cast<uint64_t>(v >> 64); auto low = static_cast<uint64_t>(v); - return high != 0 ? base_internal::CountLeadingZeros64(high) - : 64 + base_internal::CountLeadingZeros64(low); + return high != 0 ? countl_zero(high) : 64 + countl_zero(low); } // Round up the text digits starting at `p`. @@ -858,10 +877,10 @@ void FormatA(const HexFloatTypeParams float_traits, Int mantissa, int exp, // This buffer holds the "0x1.ab1de3" portion of "0x1.ab1de3pe+2". Compute the // size with long double which is the largest of the floats. constexpr size_t kBufSizeForHexFloatRepr = - 2 // 0x - + std::numeric_limits<long double>::digits / 4 // number of hex digits - + 1 // round up - + 1; // "." (dot) + 2 // 0x + + std::numeric_limits<MaxFloatType>::digits / 4 // number of hex digits + + 1 // round up + + 1; // "." (dot) char digits_buffer[kBufSizeForHexFloatRepr]; char *digits_iter = digits_buffer; const char *const digits = @@ -1380,10 +1399,9 @@ bool FloatToSink(const Float v, const FormatConversionSpecImpl &conv, bool ConvertFloatImpl(long double v, const FormatConversionSpecImpl &conv, FormatSinkImpl *sink) { - if (std::numeric_limits<long double>::digits == - 2 * std::numeric_limits<double>::digits) { - // This is the `double-double` representation of `long double`. - // We do not handle it natively. Fallback to snprintf. + if (IsDoubleDouble()) { + // This is the `double-double` representation of `long double`. We do not + // handle it natively. Fallback to snprintf. return FallbackToSnprintf(v, conv, sink); } diff --git a/absl/strings/internal/str_format/float_conversion.h b/absl/strings/internal/str_format/float_conversion.h index e78bc191..71100e71 100644 --- a/absl/strings/internal/str_format/float_conversion.h +++ b/absl/strings/internal/str_format/float_conversion.h @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_FLOAT_CONVERSION_H_ #define ABSL_STRINGS_INTERNAL_STR_FORMAT_FLOAT_CONVERSION_H_ diff --git a/absl/strings/internal/str_format/parser.cc b/absl/strings/internal/str_format/parser.cc index cc55dfa9..f308d023 100644 --- a/absl/strings/internal/str_format/parser.cc +++ b/absl/strings/internal/str_format/parser.cc @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include "absl/strings/internal/str_format/parser.h" #include <assert.h> diff --git a/absl/strings/internal/str_format/parser.h b/absl/strings/internal/str_format/parser.h index fffed04f..6504dd3d 100644 --- a/absl/strings/internal/str_format/parser.h +++ b/absl/strings/internal/str_format/parser.h @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_ #define ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_ diff --git a/absl/strings/internal/str_format/parser_test.cc b/absl/strings/internal/str_format/parser_test.cc index 5aced987..a5fa1c79 100644 --- a/absl/strings/internal/str_format/parser_test.cc +++ b/absl/strings/internal/str_format/parser_test.cc @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include "absl/strings/internal/str_format/parser.h" #include <string.h> diff --git a/absl/strings/internal/str_split_internal.h b/absl/strings/internal/str_split_internal.h index 6f5bc095..a2f41c15 100644 --- a/absl/strings/internal/str_split_internal.h +++ b/absl/strings/internal/str_split_internal.h @@ -51,9 +51,9 @@ ABSL_NAMESPACE_BEGIN namespace strings_internal { // This class is implicitly constructible from everything that absl::string_view -// is implicitly constructible from. If it's constructed from a temporary -// string, the data is moved into a data member so its lifetime matches that of -// the ConvertibleToStringView instance. +// is implicitly constructible from, except for rvalue strings. This means it +// can be used as a function parameter in places where passing a temporary +// string might cause memory lifetime issues. class ConvertibleToStringView { public: ConvertibleToStringView(const char* s) // NOLINT(runtime/explicit) @@ -65,41 +65,12 @@ class ConvertibleToStringView { : value_(s) {} // Matches rvalue strings and moves their data to a member. - ConvertibleToStringView(std::string&& s) // NOLINT(runtime/explicit) - : copy_(std::move(s)), value_(copy_) {} - - ConvertibleToStringView(const ConvertibleToStringView& other) - : copy_(other.copy_), - value_(other.IsSelfReferential() ? copy_ : other.value_) {} - - ConvertibleToStringView(ConvertibleToStringView&& other) { - StealMembers(std::move(other)); - } - - ConvertibleToStringView& operator=(ConvertibleToStringView other) { - StealMembers(std::move(other)); - return *this; - } + ConvertibleToStringView(std::string&& s) = delete; + ConvertibleToStringView(const std::string&& s) = delete; absl::string_view value() const { return value_; } private: - // Returns true if ctsp's value refers to its internal copy_ member. - bool IsSelfReferential() const { return value_.data() == copy_.data(); } - - void StealMembers(ConvertibleToStringView&& other) { - if (other.IsSelfReferential()) { - copy_ = std::move(other.copy_); - value_ = copy_; - other.value_ = other.copy_; - } else { - value_ = other.value_; - } - } - - // Holds the data moved from temporary std::string arguments. Declared first - // so that 'value' can refer to 'copy_'. - std::string copy_; absl::string_view value_; }; @@ -273,7 +244,11 @@ struct SplitterIsConvertibleTo // the split strings: only strings for which the predicate returns true will be // kept. A Predicate object is any unary functor that takes an absl::string_view // and returns bool. -template <typename Delimiter, typename Predicate> +// +// The StringType parameter can be either string_view or string, depending on +// whether the Splitter refers to a string stored elsewhere, or if the string +// resides inside the Splitter itself. +template <typename Delimiter, typename Predicate, typename StringType> class Splitter { public: using DelimiterType = Delimiter; @@ -281,12 +256,12 @@ class Splitter { using const_iterator = strings_internal::SplitIterator<Splitter>; using value_type = typename std::iterator_traits<const_iterator>::value_type; - Splitter(ConvertibleToStringView input_text, Delimiter d, Predicate p) + Splitter(StringType input_text, Delimiter d, Predicate p) : text_(std::move(input_text)), delimiter_(std::move(d)), predicate_(std::move(p)) {} - absl::string_view text() const { return text_.value(); } + absl::string_view text() const { return text_; } const Delimiter& delimiter() const { return delimiter_; } const Predicate& predicate() const { return predicate_; } @@ -336,7 +311,7 @@ class Splitter { Container operator()(const Splitter& splitter) const { Container c; auto it = std::inserter(c, c.end()); - for (const auto sp : splitter) { + for (const auto& sp : splitter) { *it++ = ValueType(sp); } return c; @@ -401,7 +376,7 @@ class Splitter { Container m; typename Container::iterator it; bool insert = true; - for (const auto sp : splitter) { + for (const auto& sp : splitter) { if (insert) { it = Inserter<Container>::Insert(&m, First(sp), Second()); } else { @@ -443,7 +418,7 @@ class Splitter { }; }; - ConvertibleToStringView text_; + StringType text_; Delimiter delimiter_; Predicate predicate_; }; diff --git a/absl/strings/internal/string_constant.h b/absl/strings/internal/string_constant.h new file mode 100644 index 00000000..a11336b7 --- /dev/null +++ b/absl/strings/internal/string_constant.h @@ -0,0 +1,64 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_STRING_CONSTANT_H_ +#define ABSL_STRINGS_INTERNAL_STRING_CONSTANT_H_ + +#include "absl/meta/type_traits.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +// StringConstant<T> represents a compile time string constant. +// It can be accessed via its `absl::string_view value` static member. +// It is guaranteed that the `string_view` returned has constant `.data()`, +// constant `.size()` and constant `value[i]` for all `0 <= i < .size()` +// +// The `T` is an opaque type. It is guaranteed that different string constants +// will have different values of `T`. This allows users to associate the string +// constant with other static state at compile time. +// +// Instances should be made using the `MakeStringConstant()` factory function +// below. +template <typename T> +struct StringConstant { + static constexpr absl::string_view value = T{}(); + constexpr absl::string_view operator()() const { return value; } + + // Check to be sure `view` points to constant data. + // Otherwise, it can't be constant evaluated. + static_assert(value.empty() || 2 * value[0] != 1, + "The input string_view must point to constant data."); +}; + +template <typename T> +constexpr absl::string_view StringConstant<T>::value; // NOLINT + +// Factory function for `StringConstant` instances. +// It supports callables that have a constexpr default constructor and a +// constexpr operator(). +// It must return an `absl::string_view` or `const char*` pointing to constant +// data. This is validated at compile time. +template <typename T> +constexpr StringConstant<T> MakeStringConstant(T) { + return {}; +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STRING_CONSTANT_H_ diff --git a/absl/strings/internal/string_constant_test.cc b/absl/strings/internal/string_constant_test.cc new file mode 100644 index 00000000..392833cf --- /dev/null +++ b/absl/strings/internal/string_constant_test.cc @@ -0,0 +1,60 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/string_constant.h" + +#include "absl/meta/type_traits.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace { + +using absl::strings_internal::MakeStringConstant; + +struct Callable { + constexpr absl::string_view operator()() const { + return absl::string_view("Callable", 8); + } +}; + +TEST(StringConstant, Traits) { + constexpr auto str = MakeStringConstant(Callable{}); + using T = decltype(str); + + EXPECT_TRUE(std::is_empty<T>::value); + EXPECT_TRUE(std::is_trivial<T>::value); + EXPECT_TRUE(absl::is_trivially_default_constructible<T>::value); + EXPECT_TRUE(absl::is_trivially_copy_constructible<T>::value); + EXPECT_TRUE(absl::is_trivially_move_constructible<T>::value); + EXPECT_TRUE(absl::is_trivially_destructible<T>::value); +} + +TEST(StringConstant, MakeFromCallable) { + constexpr auto str = MakeStringConstant(Callable{}); + using T = decltype(str); + EXPECT_EQ(Callable{}(), T::value); + EXPECT_EQ(Callable{}(), str()); +} + +TEST(StringConstant, MakeFromStringConstant) { + // We want to make sure the StringConstant itself is a valid input to the + // factory function. + constexpr auto str = MakeStringConstant(Callable{}); + constexpr auto str2 = MakeStringConstant(str); + using T = decltype(str2); + EXPECT_EQ(Callable{}(), T::value); + EXPECT_EQ(Callable{}(), str2()); +} + +} // namespace diff --git a/absl/strings/match.cc b/absl/strings/match.cc index 8127cb0c..2d672509 100644 --- a/absl/strings/match.cc +++ b/absl/strings/match.cc @@ -19,19 +19,22 @@ namespace absl { ABSL_NAMESPACE_BEGIN -bool EqualsIgnoreCase(absl::string_view piece1, absl::string_view piece2) { +bool EqualsIgnoreCase(absl::string_view piece1, + absl::string_view piece2) noexcept { return (piece1.size() == piece2.size() && 0 == absl::strings_internal::memcasecmp(piece1.data(), piece2.data(), piece1.size())); // memcasecmp uses absl::ascii_tolower(). } -bool StartsWithIgnoreCase(absl::string_view text, absl::string_view prefix) { +bool StartsWithIgnoreCase(absl::string_view text, + absl::string_view prefix) noexcept { return (text.size() >= prefix.size()) && EqualsIgnoreCase(text.substr(0, prefix.size()), prefix); } -bool EndsWithIgnoreCase(absl::string_view text, absl::string_view suffix) { +bool EndsWithIgnoreCase(absl::string_view text, + absl::string_view suffix) noexcept { return (text.size() >= suffix.size()) && EqualsIgnoreCase(text.substr(text.size() - suffix.size()), suffix); } diff --git a/absl/strings/match.h b/absl/strings/match.h index 90fca98a..038cbb3f 100644 --- a/absl/strings/match.h +++ b/absl/strings/match.h @@ -43,14 +43,20 @@ ABSL_NAMESPACE_BEGIN // StrContains() // // Returns whether a given string `haystack` contains the substring `needle`. -inline bool StrContains(absl::string_view haystack, absl::string_view needle) { +inline bool StrContains(absl::string_view haystack, + absl::string_view needle) noexcept { return haystack.find(needle, 0) != haystack.npos; } +inline bool StrContains(absl::string_view haystack, char needle) noexcept { + return haystack.find(needle) != haystack.npos; +} + // StartsWith() // // Returns whether a given string `text` begins with `prefix`. -inline bool StartsWith(absl::string_view text, absl::string_view prefix) { +inline bool StartsWith(absl::string_view text, + absl::string_view prefix) noexcept { return prefix.empty() || (text.size() >= prefix.size() && memcmp(text.data(), prefix.data(), prefix.size()) == 0); @@ -59,7 +65,8 @@ inline bool StartsWith(absl::string_view text, absl::string_view prefix) { // EndsWith() // // Returns whether a given string `text` ends with `suffix`. -inline bool EndsWith(absl::string_view text, absl::string_view suffix) { +inline bool EndsWith(absl::string_view text, + absl::string_view suffix) noexcept { return suffix.empty() || (text.size() >= suffix.size() && memcmp(text.data() + (text.size() - suffix.size()), suffix.data(), @@ -70,19 +77,22 @@ inline bool EndsWith(absl::string_view text, absl::string_view suffix) { // // Returns whether given ASCII strings `piece1` and `piece2` are equal, ignoring // case in the comparison. -bool EqualsIgnoreCase(absl::string_view piece1, absl::string_view piece2); +bool EqualsIgnoreCase(absl::string_view piece1, + absl::string_view piece2) noexcept; // StartsWithIgnoreCase() // // Returns whether a given ASCII string `text` starts with `prefix`, // ignoring case in the comparison. -bool StartsWithIgnoreCase(absl::string_view text, absl::string_view prefix); +bool StartsWithIgnoreCase(absl::string_view text, + absl::string_view prefix) noexcept; // EndsWithIgnoreCase() // // Returns whether a given ASCII string `text` ends with `suffix`, ignoring // case in the comparison. -bool EndsWithIgnoreCase(absl::string_view text, absl::string_view suffix); +bool EndsWithIgnoreCase(absl::string_view text, + absl::string_view suffix) noexcept; ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/match_test.cc b/absl/strings/match_test.cc index 4c313dda..5841bc1b 100644 --- a/absl/strings/match_test.cc +++ b/absl/strings/match_test.cc @@ -66,6 +66,23 @@ TEST(MatchTest, Contains) { EXPECT_FALSE(absl::StrContains("", "a")); } +TEST(MatchTest, ContainsChar) { + absl::string_view a("abcdefg"); + absl::string_view b("abcd"); + EXPECT_TRUE(absl::StrContains(a, 'a')); + EXPECT_TRUE(absl::StrContains(a, 'b')); + EXPECT_TRUE(absl::StrContains(a, 'e')); + EXPECT_FALSE(absl::StrContains(a, 'h')); + + EXPECT_TRUE(absl::StrContains(b, 'a')); + EXPECT_TRUE(absl::StrContains(b, 'b')); + EXPECT_FALSE(absl::StrContains(b, 'e')); + EXPECT_FALSE(absl::StrContains(b, 'h')); + + EXPECT_FALSE(absl::StrContains("", 'a')); + EXPECT_FALSE(absl::StrContains("", 'a')); +} + TEST(MatchTest, ContainsNull) { const std::string s = "foo"; const char* cs = "foo"; diff --git a/absl/strings/numbers.cc b/absl/strings/numbers.cc index 68c26dd6..966d94bd 100644 --- a/absl/strings/numbers.cc +++ b/absl/strings/numbers.cc @@ -31,8 +31,8 @@ #include <utility> #include "absl/base/attributes.h" -#include "absl/base/internal/bits.h" #include "absl/base/internal/raw_logging.h" +#include "absl/numeric/bits.h" #include "absl/strings/ascii.h" #include "absl/strings/charconv.h" #include "absl/strings/escaping.h" @@ -46,8 +46,13 @@ ABSL_NAMESPACE_BEGIN bool SimpleAtof(absl::string_view str, float* out) { *out = 0.0; str = StripAsciiWhitespace(str); + // std::from_chars doesn't accept an initial +, but SimpleAtof does, so if one + // is present, skip it, while avoiding accepting "+-0" as valid. if (!str.empty() && str[0] == '+') { str.remove_prefix(1); + if (!str.empty() && str[0] == '-') { + return false; + } } auto result = absl::from_chars(str.data(), str.data() + str.size(), *out); if (result.ec == std::errc::invalid_argument) { @@ -72,8 +77,13 @@ bool SimpleAtof(absl::string_view str, float* out) { bool SimpleAtod(absl::string_view str, double* out) { *out = 0.0; str = StripAsciiWhitespace(str); + // std::from_chars doesn't accept an initial +, but SimpleAtod does, so if one + // is present, skip it, while avoiding accepting "+-0" as valid. if (!str.empty() && str[0] == '+') { str.remove_prefix(1); + if (!str.empty() && str[0] == '-') { + return false; + } } auto result = absl::from_chars(str.data(), str.data() + str.size(), *out); if (result.ec == std::errc::invalid_argument) { @@ -303,7 +313,7 @@ static std::pair<uint64_t, uint64_t> Mul32(std::pair<uint64_t, uint64_t> num, uint64_t bits128_up = (bits96_127 >> 32) + (bits64_127 < bits64_95); if (bits128_up == 0) return {bits64_127, bits0_63}; - int shift = 64 - base_internal::CountLeadingZeros64(bits128_up); + auto shift = static_cast<unsigned>(bit_width(bits128_up)); uint64_t lo = (bits0_63 >> shift) + (bits64_127 << (64 - shift)); uint64_t hi = (bits64_127 >> shift) + (bits128_up << (64 - shift)); return {hi, lo}; @@ -334,7 +344,7 @@ static std::pair<uint64_t, uint64_t> PowFive(uint64_t num, int expfive) { 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5}; result = Mul32(result, powers_of_five[expfive & 15]); - int shift = base_internal::CountLeadingZeros64(result.first); + int shift = countl_zero(result.first); if (shift != 0) { result.first = (result.first << shift) + (result.second >> (64 - shift)); result.second = (result.second << shift); @@ -736,9 +746,18 @@ struct LookupTables { X / 35, X / 36, \ } +// This kVmaxOverBase is generated with +// for (int base = 2; base < 37; ++base) { +// absl::uint128 max = std::numeric_limits<absl::uint128>::max(); +// auto result = max / base; +// std::cout << " MakeUint128(" << absl::Uint128High64(result) << "u, " +// << absl::Uint128Low64(result) << "u),\n"; +// } +// See https://godbolt.org/z/aneYsb +// // uint128& operator/=(uint128) is not constexpr, so hardcode the resulting // array to avoid a static initializer. -template <> +template<> const uint128 LookupTables<uint128>::kVmaxOverBase[] = { 0, 0, @@ -779,6 +798,111 @@ const uint128 LookupTables<uint128>::kVmaxOverBase[] = { MakeUint128(512409557603043100u, 8198552921648689607u), }; +// This kVmaxOverBase generated with +// for (int base = 2; base < 37; ++base) { +// absl::int128 max = std::numeric_limits<absl::int128>::max(); +// auto result = max / base; +// std::cout << "\tMakeInt128(" << absl::Int128High64(result) << ", " +// << absl::Int128Low64(result) << "u),\n"; +// } +// See https://godbolt.org/z/7djYWz +// +// int128& operator/=(int128) is not constexpr, so hardcode the resulting array +// to avoid a static initializer. +template<> +const int128 LookupTables<int128>::kVmaxOverBase[] = { + 0, + 0, + MakeInt128(4611686018427387903, 18446744073709551615u), + MakeInt128(3074457345618258602, 12297829382473034410u), + MakeInt128(2305843009213693951, 18446744073709551615u), + MakeInt128(1844674407370955161, 11068046444225730969u), + MakeInt128(1537228672809129301, 6148914691236517205u), + MakeInt128(1317624576693539401, 2635249153387078802u), + MakeInt128(1152921504606846975, 18446744073709551615u), + MakeInt128(1024819115206086200, 16397105843297379214u), + MakeInt128(922337203685477580, 14757395258967641292u), + MakeInt128(838488366986797800, 13415813871788764811u), + MakeInt128(768614336404564650, 12297829382473034410u), + MakeInt128(709490156681136600, 11351842506898185609u), + MakeInt128(658812288346769700, 10540996613548315209u), + MakeInt128(614891469123651720, 9838263505978427528u), + MakeInt128(576460752303423487, 18446744073709551615u), + MakeInt128(542551296285575047, 9765923333140350855u), + MakeInt128(512409557603043100, 8198552921648689607u), + MakeInt128(485440633518672410, 17475862806672206794u), + MakeInt128(461168601842738790, 7378697629483820646u), + MakeInt128(439208192231179800, 7027331075698876806u), + MakeInt128(419244183493398900, 6707906935894382405u), + MakeInt128(401016175515425035, 2406097053092550210u), + MakeInt128(384307168202282325, 6148914691236517205u), + MakeInt128(368934881474191032, 5902958103587056517u), + MakeInt128(354745078340568300, 5675921253449092804u), + MakeInt128(341606371735362066, 17763531330238827482u), + MakeInt128(329406144173384850, 5270498306774157604u), + MakeInt128(318047311615681924, 7633135478776366185u), + MakeInt128(307445734561825860, 4919131752989213764u), + MakeInt128(297528130221121800, 4760450083537948804u), + MakeInt128(288230376151711743, 18446744073709551615u), + MakeInt128(279496122328932600, 4471937957262921603u), + MakeInt128(271275648142787523, 14106333703424951235u), + MakeInt128(263524915338707880, 4216398645419326083u), + MakeInt128(256204778801521550, 4099276460824344803u), +}; + +// This kVminOverBase generated with +// for (int base = 2; base < 37; ++base) { +// absl::int128 min = std::numeric_limits<absl::int128>::min(); +// auto result = min / base; +// std::cout << "\tMakeInt128(" << absl::Int128High64(result) << ", " +// << absl::Int128Low64(result) << "u),\n"; +// } +// +// See https://godbolt.org/z/7djYWz +// +// int128& operator/=(int128) is not constexpr, so hardcode the resulting array +// to avoid a static initializer. +template<> +const int128 LookupTables<int128>::kVminOverBase[] = { + 0, + 0, + MakeInt128(-4611686018427387904, 0u), + MakeInt128(-3074457345618258603, 6148914691236517206u), + MakeInt128(-2305843009213693952, 0u), + MakeInt128(-1844674407370955162, 7378697629483820647u), + MakeInt128(-1537228672809129302, 12297829382473034411u), + MakeInt128(-1317624576693539402, 15811494920322472814u), + MakeInt128(-1152921504606846976, 0u), + MakeInt128(-1024819115206086201, 2049638230412172402u), + MakeInt128(-922337203685477581, 3689348814741910324u), + MakeInt128(-838488366986797801, 5030930201920786805u), + MakeInt128(-768614336404564651, 6148914691236517206u), + MakeInt128(-709490156681136601, 7094901566811366007u), + MakeInt128(-658812288346769701, 7905747460161236407u), + MakeInt128(-614891469123651721, 8608480567731124088u), + MakeInt128(-576460752303423488, 0u), + MakeInt128(-542551296285575048, 8680820740569200761u), + MakeInt128(-512409557603043101, 10248191152060862009u), + MakeInt128(-485440633518672411, 970881267037344822u), + MakeInt128(-461168601842738791, 11068046444225730970u), + MakeInt128(-439208192231179801, 11419412998010674810u), + MakeInt128(-419244183493398901, 11738837137815169211u), + MakeInt128(-401016175515425036, 16040647020617001406u), + MakeInt128(-384307168202282326, 12297829382473034411u), + MakeInt128(-368934881474191033, 12543785970122495099u), + MakeInt128(-354745078340568301, 12770822820260458812u), + MakeInt128(-341606371735362067, 683212743470724134u), + MakeInt128(-329406144173384851, 13176245766935394012u), + MakeInt128(-318047311615681925, 10813608594933185431u), + MakeInt128(-307445734561825861, 13527612320720337852u), + MakeInt128(-297528130221121801, 13686293990171602812u), + MakeInt128(-288230376151711744, 0u), + MakeInt128(-279496122328932601, 13974806116446630013u), + MakeInt128(-271275648142787524, 4340410370284600381u), + MakeInt128(-263524915338707881, 14230345428290225533u), + MakeInt128(-256204778801521551, 14347467612885206813u), +}; + template <typename IntType> const IntType LookupTables<IntType>::kVmaxOverBase[] = X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::max()); @@ -948,6 +1072,10 @@ bool safe_strto64_base(absl::string_view text, int64_t* value, int base) { return safe_int_internal<int64_t>(text, value, base); } +bool safe_strto128_base(absl::string_view text, int128* value, int base) { + return safe_int_internal<absl::int128>(text, value, base); +} + bool safe_strtou32_base(absl::string_view text, uint32_t* value, int base) { return safe_uint_internal<uint32_t>(text, value, base); } diff --git a/absl/strings/numbers.h b/absl/strings/numbers.h index d872cca5..1780bb44 100644 --- a/absl/strings/numbers.h +++ b/absl/strings/numbers.h @@ -1,4 +1,3 @@ -// // Copyright 2017 The Abseil Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -37,7 +36,6 @@ #include <type_traits> #include "absl/base/config.h" -#include "absl/base/internal/bits.h" #ifdef __SSE4_2__ // TODO(jorg): Remove this when we figure out the right way // to swap bytes on SSE 4.2 that works with the compilers @@ -48,6 +46,7 @@ #endif #include "absl/base/macros.h" #include "absl/base/port.h" +#include "absl/numeric/bits.h" #include "absl/numeric/int128.h" #include "absl/strings/string_view.h" @@ -125,8 +124,11 @@ inline void PutTwoDigits(size_t i, char* buf) { } // safe_strto?() functions for implementing SimpleAtoi() + bool safe_strto32_base(absl::string_view text, int32_t* value, int base); bool safe_strto64_base(absl::string_view text, int64_t* value, int base); +bool safe_strto128_base(absl::string_view text, absl::int128* value, + int base); bool safe_strtou32_base(absl::string_view text, uint32_t* value, int base); bool safe_strtou64_base(absl::string_view text, uint64_t* value, int base); bool safe_strtou128_base(absl::string_view text, absl::uint128* value, @@ -238,24 +240,22 @@ inline size_t FastHexToBufferZeroPad16(uint64_t val, char* out) { } #endif // | 0x1 so that even 0 has 1 digit. - return 16 - absl::base_internal::CountLeadingZeros64(val | 0x1) / 4; + return 16 - countl_zero(val | 0x1) / 4; } } // namespace numbers_internal -// SimpleAtoi() -// -// Converts a string to an integer, using `safe_strto?()` functions for actual -// parsing, returning `true` if successful. The `safe_strto?()` functions apply -// strict checking; the string must be a base-10 integer, optionally followed or -// preceded by ASCII whitespace, with a value in the range of the corresponding -// integer type. template <typename int_type> ABSL_MUST_USE_RESULT bool SimpleAtoi(absl::string_view str, int_type* out) { return numbers_internal::safe_strtoi_base(str, out, 10); } ABSL_MUST_USE_RESULT inline bool SimpleAtoi(absl::string_view str, + absl::int128* out) { + return numbers_internal::safe_strto128_base(str, out, 10); +} + +ABSL_MUST_USE_RESULT inline bool SimpleAtoi(absl::string_view str, absl::uint128* out) { return numbers_internal::safe_strtou128_base(str, out, 10); } diff --git a/absl/strings/numbers_test.cc b/absl/strings/numbers_test.cc index c2f03b63..f3103106 100644 --- a/absl/strings/numbers_test.cc +++ b/absl/strings/numbers_test.cc @@ -46,6 +46,7 @@ namespace { +using absl::SimpleAtoi; using absl::numbers_internal::kSixDigitsToBufferSize; using absl::numbers_internal::safe_strto32_base; using absl::numbers_internal::safe_strto64_base; @@ -55,7 +56,6 @@ using absl::numbers_internal::SixDigitsToBuffer; using absl::strings_internal::Itoa; using absl::strings_internal::strtouint32_test_cases; using absl::strings_internal::strtouint64_test_cases; -using absl::SimpleAtoi; using testing::Eq; using testing::MatchesRegex; @@ -251,7 +251,7 @@ TEST(Numbers, TestFastPrints) { template <typename int_type, typename in_val_type> void VerifySimpleAtoiGood(in_val_type in_value, int_type exp_value) { std::string s; - // uint128 can be streamed but not StrCat'd + // (u)int128 can be streamed but not StrCat'd. absl::strings_internal::OStringStream(&s) << in_value; int_type x = static_cast<int_type>(~exp_value); EXPECT_TRUE(SimpleAtoi(s, &x)) @@ -264,7 +264,9 @@ void VerifySimpleAtoiGood(in_val_type in_value, int_type exp_value) { template <typename int_type, typename in_val_type> void VerifySimpleAtoiBad(in_val_type in_value) { - std::string s = absl::StrCat(in_value); + std::string s; + // (u)int128 can be streamed but not StrCat'd. + absl::strings_internal::OStringStream(&s) << in_value; int_type x; EXPECT_FALSE(SimpleAtoi(s, &x)); EXPECT_FALSE(SimpleAtoi(s.c_str(), &x)); @@ -347,13 +349,38 @@ TEST(NumbersTest, Atoi) { std::numeric_limits<absl::uint128>::max(), std::numeric_limits<absl::uint128>::max()); + // SimpleAtoi(absl::string_view, absl::int128) + VerifySimpleAtoiGood<absl::int128>(0, 0); + VerifySimpleAtoiGood<absl::int128>(42, 42); + VerifySimpleAtoiGood<absl::int128>(-42, -42); + + VerifySimpleAtoiGood<absl::int128>(std::numeric_limits<int32_t>::min(), + std::numeric_limits<int32_t>::min()); + VerifySimpleAtoiGood<absl::int128>(std::numeric_limits<int32_t>::max(), + std::numeric_limits<int32_t>::max()); + VerifySimpleAtoiGood<absl::int128>(std::numeric_limits<uint32_t>::max(), + std::numeric_limits<uint32_t>::max()); + VerifySimpleAtoiGood<absl::int128>(std::numeric_limits<int64_t>::min(), + std::numeric_limits<int64_t>::min()); + VerifySimpleAtoiGood<absl::int128>(std::numeric_limits<int64_t>::max(), + std::numeric_limits<int64_t>::max()); + VerifySimpleAtoiGood<absl::int128>(std::numeric_limits<uint64_t>::max(), + std::numeric_limits<uint64_t>::max()); + VerifySimpleAtoiGood<absl::int128>( + std::numeric_limits<absl::int128>::min(), + std::numeric_limits<absl::int128>::min()); + VerifySimpleAtoiGood<absl::int128>( + std::numeric_limits<absl::int128>::max(), + std::numeric_limits<absl::int128>::max()); + VerifySimpleAtoiBad<absl::int128>(std::numeric_limits<absl::uint128>::max()); + // Some other types VerifySimpleAtoiGood<int>(-42, -42); VerifySimpleAtoiGood<int32_t>(-42, -42); VerifySimpleAtoiGood<uint32_t>(42, 42); VerifySimpleAtoiGood<unsigned int>(42, 42); VerifySimpleAtoiGood<int64_t>(-42, -42); - VerifySimpleAtoiGood<long>(-42, -42); // NOLINT(runtime/int) + VerifySimpleAtoiGood<long>(-42, -42); // NOLINT: runtime-int VerifySimpleAtoiGood<uint64_t>(42, 42); VerifySimpleAtoiGood<size_t>(42, 42); VerifySimpleAtoiGood<std::string::size_type>(42, 42); @@ -365,6 +392,28 @@ TEST(NumbersTest, Atod) { EXPECT_TRUE(std::isnan(d)); } +TEST(NumbersTest, Prefixes) { + double d; + EXPECT_FALSE(absl::SimpleAtod("++1", &d)); + EXPECT_FALSE(absl::SimpleAtod("+-1", &d)); + EXPECT_FALSE(absl::SimpleAtod("-+1", &d)); + EXPECT_FALSE(absl::SimpleAtod("--1", &d)); + EXPECT_TRUE(absl::SimpleAtod("-1", &d)); + EXPECT_EQ(d, -1.); + EXPECT_TRUE(absl::SimpleAtod("+1", &d)); + EXPECT_EQ(d, +1.); + + float f; + EXPECT_FALSE(absl::SimpleAtof("++1", &f)); + EXPECT_FALSE(absl::SimpleAtof("+-1", &f)); + EXPECT_FALSE(absl::SimpleAtof("-+1", &f)); + EXPECT_FALSE(absl::SimpleAtof("--1", &f)); + EXPECT_TRUE(absl::SimpleAtof("-1", &f)); + EXPECT_EQ(f, -1.f); + EXPECT_TRUE(absl::SimpleAtof("+1", &f)); + EXPECT_EQ(f, +1.f); +} + TEST(NumbersTest, Atoenum) { enum E01 { E01_zero = 0, @@ -725,6 +774,51 @@ TEST(stringtest, safe_strtou128_random) { EXPECT_FALSE(parse_func(s, &parsed_value, base)); } } +TEST(stringtest, safe_strto128_random) { + // random number generators don't work for int128, and + // int128 can be streamed but not StrCat'd, so this code must be custom + // implemented for int128, but is generally the same as what's above. + // test_random_integer_parse_base<absl::int128>( + // &absl::numbers_internal::safe_strto128_base); + using RandomEngine = std::minstd_rand0; + using IntType = absl::int128; + constexpr auto parse_func = &absl::numbers_internal::safe_strto128_base; + + std::random_device rd; + RandomEngine rng(rd()); + std::uniform_int_distribution<int64_t> random_int64( + std::numeric_limits<int64_t>::min()); + std::uniform_int_distribution<uint64_t> random_uint64( + std::numeric_limits<uint64_t>::min()); + std::uniform_int_distribution<int> random_base(2, 35); + + for (size_t i = 0; i < kNumRandomTests; ++i) { + int64_t high = random_int64(rng); + uint64_t low = random_uint64(rng); + IntType value = absl::MakeInt128(high, low); + + int base = random_base(rng); + std::string str_value; + EXPECT_TRUE(Itoa<IntType>(value, base, &str_value)); + IntType parsed_value; + + // Test successful parse + EXPECT_TRUE(parse_func(str_value, &parsed_value, base)); + EXPECT_EQ(parsed_value, value); + + // Test overflow + std::string s; + absl::strings_internal::OStringStream(&s) + << std::numeric_limits<IntType>::max() << value; + EXPECT_FALSE(parse_func(s, &parsed_value, base)); + + // Test underflow + s.clear(); + absl::strings_internal::OStringStream(&s) + << std::numeric_limits<IntType>::min() << value; + EXPECT_FALSE(parse_func(s, &parsed_value, base)); + } +} TEST(stringtest, safe_strtou32_base) { for (int i = 0; strtouint32_test_cases()[i].str != nullptr; ++i) { diff --git a/absl/strings/str_format_test.cc b/absl/strings/str_format_test.cc index d9fb25af..c60027ad 100644 --- a/absl/strings/str_format_test.cc +++ b/absl/strings/str_format_test.cc @@ -1,3 +1,16 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #include "absl/strings/str_format.h" diff --git a/absl/strings/str_join.h b/absl/strings/str_join.h index ae5731a4..33534536 100644 --- a/absl/strings/str_join.h +++ b/absl/strings/str_join.h @@ -144,7 +144,7 @@ strings_internal::DereferenceFormatterImpl<Formatter> DereferenceFormatter( std::forward<Formatter>(f)); } -// Function overload of `DererefenceFormatter()` for using a default +// Function overload of `DereferenceFormatter()` for using a default // `AlphaNumFormatter()`. inline strings_internal::DereferenceFormatterImpl< strings_internal::AlphaNumFormatterImpl> diff --git a/absl/strings/str_split.h b/absl/strings/str_split.h index 1ce17f38..bfbca422 100644 --- a/absl/strings/str_split.h +++ b/absl/strings/str_split.h @@ -369,6 +369,12 @@ struct SkipWhitespace { } }; +template <typename T> +using EnableSplitIfString = + typename std::enable_if<std::is_same<T, std::string>::value || + std::is_same<T, const std::string>::value, + int>::type; + //------------------------------------------------------------------------------ // StrSplit() //------------------------------------------------------------------------------ @@ -489,22 +495,50 @@ struct SkipWhitespace { // Try not to depend on this distinction because the bug may one day be fixed. template <typename Delimiter> strings_internal::Splitter< - typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty> + typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty, + absl::string_view> StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d) { using DelimiterType = typename strings_internal::SelectDelimiter<Delimiter>::type; - return strings_internal::Splitter<DelimiterType, AllowEmpty>( + return strings_internal::Splitter<DelimiterType, AllowEmpty, + absl::string_view>( + text.value(), DelimiterType(d), AllowEmpty()); +} + +template <typename Delimiter, typename StringType, + EnableSplitIfString<StringType> = 0> +strings_internal::Splitter< + typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty, + std::string> +StrSplit(StringType&& text, Delimiter d) { + using DelimiterType = + typename strings_internal::SelectDelimiter<Delimiter>::type; + return strings_internal::Splitter<DelimiterType, AllowEmpty, std::string>( std::move(text), DelimiterType(d), AllowEmpty()); } template <typename Delimiter, typename Predicate> strings_internal::Splitter< - typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate> + typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate, + absl::string_view> StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d, Predicate p) { using DelimiterType = typename strings_internal::SelectDelimiter<Delimiter>::type; - return strings_internal::Splitter<DelimiterType, Predicate>( + return strings_internal::Splitter<DelimiterType, Predicate, + absl::string_view>( + text.value(), DelimiterType(d), std::move(p)); +} + +template <typename Delimiter, typename Predicate, typename StringType, + EnableSplitIfString<StringType> = 0> +strings_internal::Splitter< + typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate, + std::string> +StrSplit(StringType&& text, Delimiter d, Predicate p) { + using DelimiterType = + typename strings_internal::SelectDelimiter<Delimiter>::type; + return strings_internal::Splitter<DelimiterType, Predicate, std::string>( std::move(text), DelimiterType(d), std::move(p)); } diff --git a/absl/strings/str_split_test.cc b/absl/strings/str_split_test.cc index b5ce68de..7f7c097f 100644 --- a/absl/strings/str_split_test.cc +++ b/absl/strings/str_split_test.cc @@ -367,7 +367,7 @@ TEST(SplitIterator, EqualityAsEndCondition) { TEST(Splitter, RangeIterators) { auto splitter = absl::StrSplit("a,b,c", ','); std::vector<absl::string_view> output; - for (const absl::string_view p : splitter) { + for (const absl::string_view& p : splitter) { output.push_back(p); } EXPECT_THAT(output, ElementsAre("a", "b", "c")); diff --git a/absl/strings/string_view_test.cc b/absl/strings/string_view_test.cc index dcebb150..643af8f8 100644 --- a/absl/strings/string_view_test.cc +++ b/absl/strings/string_view_test.cc @@ -915,9 +915,9 @@ TEST(StringViewTest, At) { EXPECT_EQ(abc.at(1), 'b'); EXPECT_EQ(abc.at(2), 'c'); #ifdef ABSL_HAVE_EXCEPTIONS - EXPECT_THROW(abc.at(3), std::out_of_range); + EXPECT_THROW((void)abc.at(3), std::out_of_range); #else - ABSL_EXPECT_DEATH_IF_SUPPORTED(abc.at(3), "absl::string_view::at"); + ABSL_EXPECT_DEATH_IF_SUPPORTED((void)abc.at(3), "absl::string_view::at"); #endif } |