diff options
author | Abseil Team <absl-team@google.com> | 2020-02-14 09:41:25 -0800 |
---|---|---|
committer | Mark Barolak <mbar@google.com> | 2020-02-14 12:54:19 -0500 |
commit | 3c814105108680997d0821077694f663693b5382 (patch) | |
tree | f414158f000b24926b4110dd1c3a640106fe630c /absl/strings | |
parent | c44657f55692eddf5504156645d1f4ec7b3acabd (diff) |
Export of internal Abseil changes
--
97faa5fdfa4cd5d7a74cd9332cddd8a7c1e67b89 by Abseil Team <absl-team@google.com>:
Internal changes
PiperOrigin-RevId: 295164378
--
74990f100b3f4172c770ef8c76c05c8e99febdde by Xiaoyi Zhang <zhangxy@google.com>:
Release `absl::Cord`.
PiperOrigin-RevId: 295161959
--
6018c57f43c45c31dc1a61c0cd75fa2aa9be8dab by Gennadiy Rozental <rogeeff@google.com>:
Introduce independent notion of FlagStaticTypeID.
This change separates static flag value type identification from the type specific "vtable" with all the operations specific to value type. This change allows us to do the following:
* We can move most of "vtable" implementation from handle header, which will become public soon, into implementation details of Abseil Flag.
* We can combine back marshalling ops and general ops into a single vtable routine. They were split previously to facilitate type identification without requiring marshalling routines to be exposed in header.
* We do not need to store two vtable pointers. We can now store only one. The static type id can be deduced on request.
Overall we are saving 24 bytes per flag according to size_tester run.
PiperOrigin-RevId: 295149687
--
986b78e9ba571aa85154e70bda4580edd45bb7bf by Abseil Team <absl-team@google.com>:
Update internal comments.
PiperOrigin-RevId: 295030681
--
825412b29fd6015027bbc3e5f802706eee0d2837 by Matthew Brown <matthewbr@google.com>:
Change str_format_internal::ConversionChar to an enum (from a struct-wrapped enum).
PiperOrigin-RevId: 294987462
--
f9f88d91809d2cc33fc129df70fa93e7a2c35c69 by Derek Mauro <dmauro@google.com>:
Use more precise wording in the question on live-at-head
PiperOrigin-RevId: 294957679
GitOrigin-RevId: 97faa5fdfa4cd5d7a74cd9332cddd8a7c1e67b89
Change-Id: I081e70d148ffac7296d65e2a2f775f643eaf70bf
Diffstat (limited to 'absl/strings')
-rw-r--r-- | absl/strings/BUILD.bazel | 69 | ||||
-rw-r--r-- | absl/strings/CMakeLists.txt | 54 | ||||
-rw-r--r-- | absl/strings/cord.cc | 2019 | ||||
-rw-r--r-- | absl/strings/cord.h | 1121 | ||||
-rw-r--r-- | absl/strings/cord_test.cc | 1526 | ||||
-rw-r--r-- | absl/strings/cord_test_helpers.h | 60 | ||||
-rw-r--r-- | absl/strings/internal/cord_internal.h | 151 | ||||
-rw-r--r-- | absl/strings/internal/str_format/arg.cc | 37 | ||||
-rw-r--r-- | absl/strings/internal/str_format/arg.h | 7 | ||||
-rw-r--r-- | absl/strings/internal/str_format/arg_test.cc | 2 | ||||
-rw-r--r-- | absl/strings/internal/str_format/extension.cc | 11 | ||||
-rw-r--r-- | absl/strings/internal/str_format/extension.h | 200 | ||||
-rw-r--r-- | absl/strings/internal/str_format/float_conversion.cc | 18 | ||||
-rw-r--r-- | absl/strings/internal/str_format/parser.cc | 6 | ||||
-rw-r--r-- | absl/strings/internal/str_format/parser.h | 10 | ||||
-rw-r--r-- | absl/strings/internal/str_format/parser_test.cc | 14 | ||||
-rw-r--r-- | absl/strings/str_format_test.cc | 2 |
17 files changed, 5155 insertions, 152 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index d5a362d0..b950ec76 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -126,6 +126,7 @@ cc_test( copts = ABSL_TEST_COPTS, visibility = ["//visibility:private"], deps = [ + ":cord", ":strings", "//absl/base:core_headers", "//absl/container:fixed_array", @@ -250,6 +251,74 @@ cc_test( ], ) +cc_library( + name = "cord_internal", + hdrs = ["internal/cord_internal.h"], + copts = ABSL_DEFAULT_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":strings", + "//absl/meta:type_traits", + ], +) + +cc_library( + name = "cord", + srcs = [ + "cord.cc", + ], + hdrs = [ + "cord.h", + ], + copts = ABSL_DEFAULT_COPTS, + deps = [ + ":cord_internal", + ":internal", + ":str_format", + ":strings", + "//absl/base", + "//absl/base:base_internal", + "//absl/base:core_headers", + "//absl/base:endian", + "//absl/base:raw_logging_internal", + "//absl/container:fixed_array", + "//absl/container:inlined_vector", + "//absl/functional:function_ref", + "//absl/meta:type_traits", + ], +) + +cc_library( + name = "cord_test_helpers", + testonly = 1, + hdrs = [ + "cord_test_helpers.h", + ], + copts = ABSL_DEFAULT_COPTS, + deps = [ + ":cord", + ], +) + +cc_test( + name = "cord_test", + size = "medium", + srcs = ["cord_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":cord", + ":cord_test_helpers", + ":strings", + "//absl/base", + "//absl/base:config", + "//absl/base:endian", + "//absl/base:raw_logging_internal", + "//absl/container:fixed_array", + "@com_google_googletest//:gtest_main", + ], +) + cc_test( name = "substitute_test", size = "small", diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index 3feb5e94..cebc5928 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -526,3 +526,57 @@ absl_cc_test( absl::str_format gmock_main ) + +absl_cc_library( + NAME + cord + HDRS + "cord.h" + SRCS + "cord.cc" + "internal/cord_internal.h" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::strings_internal + absl::base + absl::base_internal + absl::core_headers + absl::endian + absl::fixed_array + absl::function_ref + absl::inlined_vector + absl::raw_logging_internal + absl::type_traits + PUBLIC +) + +absl_cc_library( + NAME + cord_test_helpers + HDRS + "cord_test_helpers.h" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::cord + TESTONLY +) + +absl_cc_test( + NAME + cord_test + SRCS + "cord_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::cord + absl::strings + absl::base + absl::config + absl::endian + absl::raw_logging_internal + absl::fixed_array + gmock_main +) diff --git a/absl/strings/cord.cc b/absl/strings/cord.cc new file mode 100644 index 00000000..cc0cc9d7 --- /dev/null +++ b/absl/strings/cord.cc @@ -0,0 +1,2019 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/cord.h" + +#include <algorithm> +#include <cstddef> +#include <cstdio> +#include <cstdlib> +#include <iomanip> +#include <limits> +#include <ostream> +#include <sstream> +#include <type_traits> +#include <unordered_set> +#include <vector> + +#include "absl/base/casts.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/base/port.h" +#include "absl/container/fixed_array.h" +#include "absl/container/inlined_vector.h" +#include "absl/strings/escaping.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/resize_uninitialized.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" +#include "absl/strings/str_join.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +using ::absl::cord_internal::CordRep; +using ::absl::cord_internal::CordRepConcat; +using ::absl::cord_internal::CordRepExternal; +using ::absl::cord_internal::CordRepSubstring; + +// Various representations that we allow +enum CordRepKind { + CONCAT = 0, + EXTERNAL = 1, + SUBSTRING = 2, + + // We have different tags for different sized flat arrays, + // starting with FLAT + FLAT = 3, +}; + +namespace { + +// Type used with std::allocator for allocating and deallocating +// `CordRepExternal`. std::allocator is used because it opaquely handles the +// different new / delete overloads available on a given platform. +using ExternalAllocType = + absl::aligned_storage_t<absl::cord_internal::ExternalRepAlignment(), + absl::cord_internal::ExternalRepAlignment()>; + +// Returns the number of objects to pass in to std::allocator<ExternalAllocType> +// allocate() and deallocate() to create enough room for `CordRepExternal` with +// `releaser_size` bytes on the end. +constexpr size_t GetExternalAllocNumObjects(size_t releaser_size) { + // Be sure to round up since `releaser_size` could be smaller than + // sizeof(ExternalAllocType)`. + return (sizeof(CordRepExternal) + releaser_size + sizeof(ExternalAllocType) - + 1) / + sizeof(ExternalAllocType); +} + +// Allocates enough memory for `CordRepExternal` and a releaser with size +// `releaser_size` bytes. +void* AllocateExternal(size_t releaser_size) { + return std::allocator<ExternalAllocType>().allocate( + GetExternalAllocNumObjects(releaser_size)); +} + +// Deallocates the memory for a `CordRepExternal` assuming it was allocated with +// a releaser of given size and alignment. +void DeallocateExternal(CordRepExternal* p, size_t releaser_size) { + std::allocator<ExternalAllocType>().deallocate( + reinterpret_cast<ExternalAllocType*>(p), + GetExternalAllocNumObjects(releaser_size)); +} + +// Returns a pointer to the type erased releaser for the given CordRepExternal. +void* GetExternalReleaser(CordRepExternal* rep) { + return rep + 1; +} + +} // namespace + +namespace cord_internal { + +inline CordRepConcat* CordRep::concat() { + assert(tag == CONCAT); + return static_cast<CordRepConcat*>(this); +} + +inline const CordRepConcat* CordRep::concat() const { + assert(tag == CONCAT); + return static_cast<const CordRepConcat*>(this); +} + +inline CordRepSubstring* CordRep::substring() { + assert(tag == SUBSTRING); + return static_cast<CordRepSubstring*>(this); +} + +inline const CordRepSubstring* CordRep::substring() const { + assert(tag == SUBSTRING); + return static_cast<const CordRepSubstring*>(this); +} + +inline CordRepExternal* CordRep::external() { + assert(tag == EXTERNAL); + return static_cast<CordRepExternal*>(this); +} + +inline const CordRepExternal* CordRep::external() const { + assert(tag == EXTERNAL); + return static_cast<const CordRepExternal*>(this); +} + +} // namespace cord_internal + +static const size_t kFlatOverhead = offsetof(CordRep, data); + +static_assert(kFlatOverhead == 13, "Unittests assume kFlatOverhead == 13"); + +// Largest and smallest flat node lengths we are willing to allocate +// Flat allocation size is stored in tag, which currently can encode sizes up +// to 4K, encoded as multiple of either 8 or 32 bytes. +// If we allow for larger sizes, we need to change this to 8/64, 16/128, etc. +static constexpr size_t kMaxFlatSize = 4096; +static constexpr size_t kMaxFlatLength = kMaxFlatSize - kFlatOverhead; +static constexpr size_t kMinFlatLength = 32 - kFlatOverhead; + +// Prefer copying blocks of at most this size, otherwise reference count. +static const size_t kMaxBytesToCopy = 511; + +// Helper functions for rounded div, and rounding to exact sizes. +static size_t DivUp(size_t n, size_t m) { return (n + m - 1) / m; } +static size_t RoundUp(size_t n, size_t m) { return DivUp(n, m) * m; } + +// Returns the size to the nearest equal or larger value that can be +// expressed exactly as a tag value. +static size_t RoundUpForTag(size_t size) { + return RoundUp(size, (size <= 1024) ? 8 : 32); +} + +// Converts the allocated size to a tag, rounding down if the size +// does not exactly match a 'tag expressible' size value. The result is +// undefined if the size exceeds the maximum size that can be encoded in +// a tag, i.e., if size is larger than TagToAllocatedSize(<max tag>). +static uint8_t AllocatedSizeToTag(size_t size) { + const size_t tag = (size <= 1024) ? size / 8 : 128 + size / 32 - 1024 / 32; + assert(tag <= std::numeric_limits<uint8_t>::max()); + return tag; +} + +// Converts the provided tag to the corresponding allocated size +static constexpr size_t TagToAllocatedSize(uint8_t tag) { + return (tag <= 128) ? (tag * 8) : (1024 + (tag - 128) * 32); +} + +// Converts the provided tag to the corresponding available data length +static constexpr size_t TagToLength(uint8_t tag) { + return TagToAllocatedSize(tag) - kFlatOverhead; +} + +// Enforce that kMaxFlatSize maps to a well-known exact tag value. +static_assert(TagToAllocatedSize(224) == kMaxFlatSize, "Bad tag logic"); + +constexpr uint64_t Fibonacci(unsigned char n, uint64_t a = 0, uint64_t b = 1) { + return n == 0 ? a : Fibonacci(n - 1, b, a + b); +} + +static_assert(Fibonacci(63) == 6557470319842, + "Fibonacci values computed incorrectly"); + +// Minimum length required for a given depth tree -- a tree is considered +// balanced if +// length(t) >= min_length[depth(t)] +// The root node depth is allowed to become twice as large to reduce rebalancing +// for larger strings (see IsRootBalanced). +static constexpr uint64_t min_length[] = { + Fibonacci(2), + Fibonacci(3), + Fibonacci(4), + Fibonacci(5), + Fibonacci(6), + Fibonacci(7), + Fibonacci(8), + Fibonacci(9), + Fibonacci(10), + Fibonacci(11), + Fibonacci(12), + Fibonacci(13), + Fibonacci(14), + Fibonacci(15), + Fibonacci(16), + Fibonacci(17), + Fibonacci(18), + Fibonacci(19), + Fibonacci(20), + Fibonacci(21), + Fibonacci(22), + Fibonacci(23), + Fibonacci(24), + Fibonacci(25), + Fibonacci(26), + Fibonacci(27), + Fibonacci(28), + Fibonacci(29), + Fibonacci(30), + Fibonacci(31), + Fibonacci(32), + Fibonacci(33), + Fibonacci(34), + Fibonacci(35), + Fibonacci(36), + Fibonacci(37), + Fibonacci(38), + Fibonacci(39), + Fibonacci(40), + Fibonacci(41), + Fibonacci(42), + Fibonacci(43), + Fibonacci(44), + Fibonacci(45), + Fibonacci(46), + Fibonacci(47), + 0xffffffffffffffffull, // Avoid overflow +}; + +static const int kMinLengthSize = ABSL_ARRAYSIZE(min_length); + +// The inlined size to use with absl::InlinedVector. +// +// Note: The InlinedVectors in this file (and in cord.h) do not need to use +// the same value for their inlined size. The fact that they do is historical. +// It may be desirable for each to use a different inlined size optimized for +// that InlinedVector's usage. +// +// TODO(jgm): Benchmark to see if there's a more optimal value than 47 for +// the inlined vector size (47 exists for backward compatibility). +static const int kInlinedVectorSize = 47; + +static inline bool IsRootBalanced(CordRep* node) { + if (node->tag != CONCAT) { + return true; + } else if (node->concat()->depth() <= 15) { + return true; + } else if (node->concat()->depth() > kMinLengthSize) { + return false; + } else { + // Allow depth to become twice as large as implied by fibonacci rule to + // reduce rebalancing for larger strings. + return (node->length >= min_length[node->concat()->depth() / 2]); + } +} + +static CordRep* Rebalance(CordRep* node); +static void DumpNode(CordRep* rep, bool include_data, std::ostream* os); +static bool VerifyNode(CordRep* root, CordRep* start_node, + bool full_validation); + +static inline CordRep* VerifyTree(CordRep* node) { + // Verification is expensive, so only do it in debug mode. + // Even in debug mode we normally do only light validation. + // If you are debugging Cord itself, you should define the + // macro EXTRA_CORD_VALIDATION, e.g. by adding + // --copt=-DEXTRA_CORD_VALIDATION to the blaze line. +#ifdef EXTRA_CORD_VALIDATION + assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/true)); +#else // EXTRA_CORD_VALIDATION + assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/false)); +#endif // EXTRA_CORD_VALIDATION + static_cast<void>(&VerifyNode); + + return node; +} + +// -------------------------------------------------------------------- +// Memory management + +inline CordRep* Ref(CordRep* rep) { + if (rep != nullptr) { + rep->refcount.Increment(); + } + return rep; +} + +// This internal routine is called from the cold path of Unref below. Keeping it +// in a separate routine allows good inlining of Unref into many profitable call +// sites. However, the call to this function can be highly disruptive to the +// register pressure in those callers. To minimize the cost to callers, we use +// a special LLVM calling convention that preserves most registers. This allows +// the call to this routine in cold paths to not disrupt the caller's register +// pressure. This calling convention is not available on all platforms; we +// intentionally allow LLVM to ignore the attribute rather than attempting to +// hardcode the list of supported platforms. +#if defined(__clang__) && !defined(__i386__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wattributes" +__attribute__((preserve_most)) +#pragma clang diagnostic pop +#endif +static void UnrefInternal(CordRep* rep) { + assert(rep != nullptr); + + absl::InlinedVector<CordRep*, kInlinedVectorSize> pending; + while (true) { + if (rep->tag == CONCAT) { + CordRepConcat* rep_concat = rep->concat(); + CordRep* right = rep_concat->right; + if (!right->refcount.Decrement()) { + pending.push_back(right); + } + CordRep* left = rep_concat->left; + delete rep_concat; + rep = nullptr; + if (!left->refcount.Decrement()) { + rep = left; + continue; + } + } else if (rep->tag == EXTERNAL) { + CordRepExternal* rep_external = rep->external(); + absl::string_view data(rep_external->base, rep->length); + void* releaser = GetExternalReleaser(rep_external); + size_t releaser_size = rep_external->releaser_invoker(releaser, data); + rep_external->~CordRepExternal(); + DeallocateExternal(rep_external, releaser_size); + rep = nullptr; + } else if (rep->tag == SUBSTRING) { + CordRepSubstring* rep_substring = rep->substring(); + CordRep* child = rep_substring->child; + delete rep_substring; + rep = nullptr; + if (!child->refcount.Decrement()) { + rep = child; + continue; + } + } else { + // Flat CordReps are allocated and constructed with raw ::operator new + // and placement new, and must be destructed and deallocated + // accordingly. +#if defined(__cpp_sized_deallocation) + size_t size = TagToAllocatedSize(rep->tag); + rep->~CordRep(); + ::operator delete(rep, size); +#else + rep->~CordRep(); + ::operator delete(rep); +#endif + rep = nullptr; + } + + if (!pending.empty()) { + rep = pending.back(); + pending.pop_back(); + } else { + break; + } + } +} + +inline void Unref(CordRep* rep) { + // Fast-path for two common, hot cases: a null rep and a shared root. + if (ABSL_PREDICT_TRUE(rep == nullptr || + rep->refcount.DecrementExpectHighRefcount())) { + return; + } + + UnrefInternal(rep); +} + +// Return the depth of a node +static int Depth(const CordRep* rep) { + if (rep->tag == CONCAT) { + return rep->concat()->depth(); + } else { + return 0; + } +} + +static void SetConcatChildren(CordRepConcat* concat, CordRep* left, + CordRep* right) { + concat->left = left; + concat->right = right; + + concat->length = left->length + right->length; + concat->set_depth(1 + std::max(Depth(left), Depth(right))); +} + +// Create a concatenation of the specified nodes. +// Does not change the refcounts of "left" and "right". +// The returned node has a refcount of 1. +static CordRep* RawConcat(CordRep* left, CordRep* right) { + // Avoid making degenerate concat nodes (one child is empty) + if (left == nullptr || left->length == 0) { + Unref(left); + return right; + } + if (right == nullptr || right->length == 0) { + Unref(right); + return left; + } + + CordRepConcat* rep = new CordRepConcat(); + rep->tag = CONCAT; + SetConcatChildren(rep, left, right); + + return rep; +} + +static CordRep* Concat(CordRep* left, CordRep* right) { + CordRep* rep = RawConcat(left, right); + if (rep != nullptr && !IsRootBalanced(rep)) { + rep = Rebalance(rep); + } + return VerifyTree(rep); +} + +// Make a balanced tree out of an array of leaf nodes. +static CordRep* MakeBalancedTree(CordRep** reps, size_t n) { + // Make repeated passes over the array, merging adjacent pairs + // until we are left with just a single node. + while (n > 1) { + size_t dst = 0; + for (size_t src = 0; src < n; src += 2) { + if (src + 1 < n) { + reps[dst] = Concat(reps[src], reps[src + 1]); + } else { + reps[dst] = reps[src]; + } + dst++; + } + n = dst; + } + + return reps[0]; +} + +// Create a new flat node. +static CordRep* NewFlat(size_t length_hint) { + if (length_hint <= kMinFlatLength) { + length_hint = kMinFlatLength; + } else if (length_hint > kMaxFlatLength) { + length_hint = kMaxFlatLength; + } + + // Round size up so it matches a size we can exactly express in a tag. + const size_t size = RoundUpForTag(length_hint + kFlatOverhead); + void* const raw_rep = ::operator new(size); + CordRep* rep = new (raw_rep) CordRep(); + rep->tag = AllocatedSizeToTag(size); + return VerifyTree(rep); +} + +// Create a new tree out of the specified array. +// The returned node has a refcount of 1. +static CordRep* NewTree(const char* data, + size_t length, + size_t alloc_hint) { + if (length == 0) return nullptr; + absl::FixedArray<CordRep*> reps((length - 1) / kMaxFlatLength + 1); + size_t n = 0; + do { + const size_t len = std::min(length, kMaxFlatLength); + CordRep* rep = NewFlat(len + alloc_hint); + rep->length = len; + memcpy(rep->data, data, len); + reps[n++] = VerifyTree(rep); + data += len; + length -= len; + } while (length != 0); + return MakeBalancedTree(reps.data(), n); +} + +namespace cord_internal { + +ExternalRepReleaserPair NewExternalWithUninitializedReleaser( + absl::string_view data, ExternalReleaserInvoker invoker, + size_t releaser_size) { + assert(!data.empty()); + + void* raw_rep = AllocateExternal(releaser_size); + auto* rep = new (raw_rep) CordRepExternal(); + rep->length = data.size(); + rep->tag = EXTERNAL; + rep->base = data.data(); + rep->releaser_invoker = invoker; + return {VerifyTree(rep), GetExternalReleaser(rep)}; +} + +} // namespace cord_internal + +static CordRep* NewSubstring(CordRep* child, size_t offset, size_t length) { + // Never create empty substring nodes + if (length == 0) { + Unref(child); + return nullptr; + } else { + CordRepSubstring* rep = new CordRepSubstring(); + assert((offset + length) <= child->length); + rep->length = length; + rep->tag = SUBSTRING; + rep->start = offset; + rep->child = child; + return VerifyTree(rep); + } +} + +// -------------------------------------------------------------------- +// Cord::InlineRep functions + +// This will trigger LNK2005 in MSVC. +#ifndef COMPILER_MSVC +const unsigned char Cord::InlineRep::kMaxInline; +#endif // COMPILER_MSVC + +inline void Cord::InlineRep::set_data(const char* data, size_t n, + bool nullify_tail) { + static_assert(kMaxInline == 15, "set_data is hard-coded for a length of 15"); + + cord_internal::SmallMemmove(data_, data, n, nullify_tail); + data_[kMaxInline] = static_cast<char>(n); +} + +inline char* Cord::InlineRep::set_data(size_t n) { + assert(n <= kMaxInline); + memset(data_, 0, sizeof(data_)); + data_[kMaxInline] = static_cast<char>(n); + return data_; +} + +inline CordRep* Cord::InlineRep::force_tree(size_t extra_hint) { + size_t len = data_[kMaxInline]; + CordRep* result; + if (len > kMaxInline) { + memcpy(&result, data_, sizeof(result)); + } else { + result = NewFlat(len + extra_hint); + result->length = len; + memcpy(result->data, data_, len); + set_tree(result); + } + return result; +} + +inline void Cord::InlineRep::reduce_size(size_t n) { + size_t tag = data_[kMaxInline]; + assert(tag <= kMaxInline); + assert(tag >= n); + tag -= n; + memset(data_ + tag, 0, n); + data_[kMaxInline] = static_cast<char>(tag); +} + +inline void Cord::InlineRep::remove_prefix(size_t n) { + cord_internal::SmallMemmove(data_, data_ + n, data_[kMaxInline] - n); + reduce_size(n); +} + +void Cord::InlineRep::AppendTree(CordRep* tree) { + if (tree == nullptr) return; + size_t len = data_[kMaxInline]; + if (len == 0) { + set_tree(tree); + } else { + set_tree(Concat(force_tree(0), tree)); + } +} + +void Cord::InlineRep::PrependTree(CordRep* tree) { + if (tree == nullptr) return; + size_t len = data_[kMaxInline]; + if (len == 0) { + set_tree(tree); + } else { + set_tree(Concat(tree, force_tree(0))); + } +} + +// Searches for a non-full flat node at the rightmost leaf of the tree. If a +// suitable leaf is found, the function will update the length field for all +// nodes to account for the size increase. The append region address will be +// written to region and the actual size increase will be written to size. +static inline bool PrepareAppendRegion(CordRep* root, char** region, + size_t* size, size_t max_length) { + // Search down the right-hand path for a non-full FLAT node. + CordRep* dst = root; + while (dst->tag == CONCAT && dst->refcount.IsOne()) { + dst = dst->concat()->right; + } + + if (dst->tag < FLAT || !dst->refcount.IsOne()) { + *region = nullptr; + *size = 0; + return false; + } + + const size_t in_use = dst->length; + const size_t capacity = TagToLength(dst->tag); + if (in_use == capacity) { + *region = nullptr; + *size = 0; + return false; + } + + size_t size_increase = std::min(capacity - in_use, max_length); + + // We need to update the length fields for all nodes, including the leaf node. + for (CordRep* rep = root; rep != dst; rep = rep->concat()->right) { + rep->length += size_increase; + } + dst->length += size_increase; + + *region = dst->data + in_use; + *size = size_increase; + return true; +} + +void Cord::InlineRep::GetAppendRegion(char** region, size_t* size, + size_t max_length) { + if (max_length == 0) { + *region = nullptr; + *size = 0; + return; + } + + // Try to fit in the inline buffer if possible. + size_t inline_length = data_[kMaxInline]; + if (inline_length < kMaxInline && max_length <= kMaxInline - inline_length) { + *region = data_ + inline_length; + *size = max_length; + data_[kMaxInline] = static_cast<char>(inline_length + max_length); + return; + } + + CordRep* root = force_tree(max_length); + + if (PrepareAppendRegion(root, region, size, max_length)) { + return; + } + + // Allocate new node. + CordRep* new_node = + NewFlat(std::max(static_cast<size_t>(root->length), max_length)); + new_node->length = + std::min(static_cast<size_t>(TagToLength(new_node->tag)), max_length); + *region = new_node->data; + *size = new_node->length; + replace_tree(Concat(root, new_node)); +} + +void Cord::InlineRep::GetAppendRegion(char** region, size_t* size) { + const size_t max_length = std::numeric_limits<size_t>::max(); + + // Try to fit in the inline buffer if possible. + size_t inline_length = data_[kMaxInline]; + if (inline_length < kMaxInline) { + *region = data_ + inline_length; + *size = kMaxInline - inline_length; + data_[kMaxInline] = kMaxInline; + return; + } + + CordRep* root = force_tree(max_length); + + if (PrepareAppendRegion(root, region, size, max_length)) { + return; + } + + // Allocate new node. + CordRep* new_node = NewFlat(root->length); + new_node->length = TagToLength(new_node->tag); + *region = new_node->data; + *size = new_node->length; + replace_tree(Concat(root, new_node)); +} + +// If the rep is a leaf, this will increment the value at total_mem_usage and +// will return true. +static bool RepMemoryUsageLeaf(const CordRep* rep, size_t* total_mem_usage) { + if (rep->tag >= FLAT) { + *total_mem_usage += TagToAllocatedSize(rep->tag); + return true; + } + if (rep->tag == EXTERNAL) { + *total_mem_usage += sizeof(CordRepConcat) + rep->length; + return true; + } + return false; +} + +void Cord::InlineRep::AssignSlow(const Cord::InlineRep& src) { + ClearSlow(); + + memcpy(data_, src.data_, sizeof(data_)); + if (is_tree()) { + Ref(tree()); + } +} + +void Cord::InlineRep::ClearSlow() { + if (is_tree()) { + Unref(tree()); + } + memset(data_, 0, sizeof(data_)); +} + +// -------------------------------------------------------------------- +// Constructors and destructors + +Cord::Cord(const Cord& src) : contents_(src.contents_) { + Ref(contents_.tree()); // Does nothing if contents_ has embedded data +} + +Cord::Cord(absl::string_view src) { + const size_t n = src.size(); + if (n <= InlineRep::kMaxInline) { + contents_.set_data(src.data(), n, false); + } else { + contents_.set_tree(NewTree(src.data(), n, 0)); + } +} + +// The destruction code is separate so that the compiler can determine +// that it does not need to call the destructor on a moved-from Cord. +void Cord::DestroyCordSlow() { + Unref(VerifyTree(contents_.tree())); +} + +// -------------------------------------------------------------------- +// Mutators + +void Cord::Clear() { + Unref(contents_.clear()); +} + +Cord& Cord::operator=(absl::string_view src) { + + const char* data = src.data(); + size_t length = src.size(); + CordRep* tree = contents_.tree(); + if (length <= InlineRep::kMaxInline) { + // Embed into this->contents_ + contents_.set_data(data, length, true); + Unref(tree); + return *this; + } + if (tree != nullptr && tree->tag >= FLAT && + TagToLength(tree->tag) >= length && tree->refcount.IsOne()) { + // Copy in place if the existing FLAT node is reusable. + memmove(tree->data, data, length); + tree->length = length; + VerifyTree(tree); + return *this; + } + contents_.set_tree(NewTree(data, length, 0)); + Unref(tree); + return *this; +} + +// TODO(sanjay): Move to Cord::InlineRep section of file. For now, +// we keep it here to make diffs easier. +void Cord::InlineRep::AppendArray(const char* src_data, size_t src_size) { + if (src_size == 0) return; // memcpy(_, nullptr, 0) is undefined. + // Try to fit in the inline buffer if possible. + size_t inline_length = data_[kMaxInline]; + if (inline_length < kMaxInline && src_size <= kMaxInline - inline_length) { + // Append new data to embedded array + data_[kMaxInline] = static_cast<char>(inline_length + src_size); + memcpy(data_ + inline_length, src_data, src_size); + return; + } + + CordRep* root = tree(); + + size_t appended = 0; + if (root) { + char* region; + if (PrepareAppendRegion(root, ®ion, &appended, src_size)) { + memcpy(region, src_data, appended); + } + } else { + // It is possible that src_data == data_, but when we transition from an + // InlineRep to a tree we need to assign data_ = root via set_tree. To + // avoid corrupting the source data before we copy it, delay calling + // set_tree until after we've copied data. + // We are going from an inline size to beyond inline size. Make the new size + // either double the inlined size, or the added size + 10%. + const size_t size1 = inline_length * 2 + src_size; + const size_t size2 = inline_length + src_size / 10; + root = NewFlat(std::max<size_t>(size1, size2)); + appended = std::min(src_size, TagToLength(root->tag) - inline_length); + memcpy(root->data, data_, inline_length); + memcpy(root->data + inline_length, src_data, appended); + root->length = inline_length + appended; + set_tree(root); + } + + src_data += appended; + src_size -= appended; + if (src_size == 0) { + return; + } + + // Use new block(s) for any remaining bytes that were not handled above. + // Alloc extra memory only if the right child of the root of the new tree is + // going to be a FLAT node, which will permit further inplace appends. + size_t length = src_size; + if (src_size < kMaxFlatLength) { + // The new length is either + // - old size + 10% + // - old_size + src_size + // This will cause a reasonable conservative step-up in size that is still + // large enough to avoid excessive amounts of small fragments being added. + length = std::max<size_t>(root->length / 10, src_size); + } + set_tree(Concat(root, NewTree(src_data, src_size, length - src_size))); +} + +inline CordRep* Cord::TakeRep() const& { + return Ref(contents_.tree()); +} + +inline CordRep* Cord::TakeRep() && { + CordRep* rep = contents_.tree(); + contents_.clear(); + return rep; +} + +template <typename C> +inline void Cord::AppendImpl(C&& src) { + if (empty()) { + // In case of an empty destination avoid allocating a new node, do not copy + // data. + *this = std::forward<C>(src); + return; + } + + // For short cords, it is faster to copy data if there is room in dst. + const size_t src_size = src.contents_.size(); + if (src_size <= kMaxBytesToCopy) { + CordRep* src_tree = src.contents_.tree(); + if (src_tree == nullptr) { + // src has embedded data. + contents_.AppendArray(src.contents_.data(), src_size); + return; + } + if (src_tree->tag >= FLAT) { + // src tree just has one flat node. + contents_.AppendArray(src_tree->data, src_size); + return; + } + if (&src == this) { + // ChunkIterator below assumes that src is not modified during traversal. + Append(Cord(src)); + return; + } + // TODO(mec): Should we only do this if "dst" has space? + for (absl::string_view chunk : src.Chunks()) { + Append(chunk); + } + return; + } + + contents_.AppendTree(std::forward<C>(src).TakeRep()); +} + +void Cord::Append(const Cord& src) { AppendImpl(src); } + +void Cord::Append(Cord&& src) { AppendImpl(std::move(src)); } + +void Cord::Prepend(const Cord& src) { + CordRep* src_tree = src.contents_.tree(); + if (src_tree != nullptr) { + Ref(src_tree); + contents_.PrependTree(src_tree); + return; + } + + // `src` cord is inlined. + absl::string_view src_contents(src.contents_.data(), src.contents_.size()); + return Prepend(src_contents); +} + +void Cord::Prepend(absl::string_view src) { + if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined. + size_t cur_size = contents_.size(); + if (!contents_.is_tree() && cur_size + src.size() <= InlineRep::kMaxInline) { + // Use embedded storage. + char data[InlineRep::kMaxInline + 1] = {0}; + data[InlineRep::kMaxInline] = cur_size + src.size(); // set size + memcpy(data, src.data(), src.size()); + memcpy(data + src.size(), contents_.data(), cur_size); + memcpy(reinterpret_cast<void*>(&contents_), data, + InlineRep::kMaxInline + 1); + } else { + contents_.PrependTree(NewTree(src.data(), src.size(), 0)); + } +} + +static CordRep* RemovePrefixFrom(CordRep* node, size_t n) { + if (n >= node->length) return nullptr; + if (n == 0) return Ref(node); + absl::InlinedVector<CordRep*, kInlinedVectorSize> rhs_stack; + + while (node->tag == CONCAT) { + assert(n <= node->length); + if (n < node->concat()->left->length) { + // Push right to stack, descend left. + rhs_stack.push_back(node->concat()->right); + node = node->concat()->left; + } else { + // Drop left, descend right. + n -= node->concat()->left->length; + node = node->concat()->right; + } + } + assert(n <= node->length); + + if (n == 0) { + Ref(node); + } else { + size_t start = n; + size_t len = node->length - n; + if (node->tag == SUBSTRING) { + // Consider in-place update of node, similar to in RemoveSuffixFrom(). + start += node->substring()->start; + node = node->substring()->child; + } + node = NewSubstring(Ref(node), start, len); + } + while (!rhs_stack.empty()) { + node = Concat(node, Ref(rhs_stack.back())); + rhs_stack.pop_back(); + } + return node; +} + +// RemoveSuffixFrom() is very similar to RemovePrefixFrom(), with the +// exception that removing a suffix has an optimization where a node may be +// edited in place iff that node and all its ancestors have a refcount of 1. +static CordRep* RemoveSuffixFrom(CordRep* node, size_t n) { + if (n >= node->length) return nullptr; + if (n == 0) return Ref(node); + absl::InlinedVector<CordRep*, kInlinedVectorSize> lhs_stack; + bool inplace_ok = node->refcount.IsOne(); + + while (node->tag == CONCAT) { + assert(n <= node->length); + if (n < node->concat()->right->length) { + // Push left to stack, descend right. + lhs_stack.push_back(node->concat()->left); + node = node->concat()->right; + } else { + // Drop right, descend left. + n -= node->concat()->right->length; + node = node->concat()->left; + } + inplace_ok = inplace_ok && node->refcount.IsOne(); + } + assert(n <= node->length); + + if (n == 0) { + Ref(node); + } else if (inplace_ok && node->tag != EXTERNAL) { + // Consider making a new buffer if the current node capacity is much + // larger than the new length. + Ref(node); + node->length -= n; + } else { + size_t start = 0; + size_t len = node->length - n; + if (node->tag == SUBSTRING) { + start = node->substring()->start; + node = node->substring()->child; + } + node = NewSubstring(Ref(node), start, len); + } + while (!lhs_stack.empty()) { + node = Concat(Ref(lhs_stack.back()), node); + lhs_stack.pop_back(); + } + return node; +} + +void Cord::RemovePrefix(size_t n) { + ABSL_INTERNAL_CHECK(n <= size(), + absl::StrCat("Requested prefix size ", n, + " exceeds Cord's size ", size())); + CordRep* tree = contents_.tree(); + if (tree == nullptr) { + contents_.remove_prefix(n); + } else { + CordRep* newrep = RemovePrefixFrom(tree, n); + Unref(tree); + contents_.replace_tree(VerifyTree(newrep)); + } +} + +void Cord::RemoveSuffix(size_t n) { + ABSL_INTERNAL_CHECK(n <= size(), + absl::StrCat("Requested suffix size ", n, + " exceeds Cord's size ", size())); + CordRep* tree = contents_.tree(); + if (tree == nullptr) { + contents_.reduce_size(n); + } else { + CordRep* newrep = RemoveSuffixFrom(tree, n); + Unref(tree); + contents_.replace_tree(VerifyTree(newrep)); + } +} + +// Work item for NewSubRange(). +struct SubRange { + SubRange(CordRep* a_node, size_t a_pos, size_t a_n) + : node(a_node), pos(a_pos), n(a_n) {} + CordRep* node; // nullptr means concat last 2 results. + size_t pos; + size_t n; +}; + +static CordRep* NewSubRange(CordRep* node, size_t pos, size_t n) { + absl::InlinedVector<CordRep*, kInlinedVectorSize> results; + absl::InlinedVector<SubRange, kInlinedVectorSize> todo; + todo.push_back(SubRange(node, pos, n)); + do { + const SubRange& sr = todo.back(); + node = sr.node; + pos = sr.pos; + n = sr.n; + todo.pop_back(); + + if (node == nullptr) { + assert(results.size() >= 2); + CordRep* right = results.back(); + results.pop_back(); + CordRep* left = results.back(); + results.pop_back(); + results.push_back(Concat(left, right)); + } else if (pos == 0 && n == node->length) { + results.push_back(Ref(node)); + } else if (node->tag != CONCAT) { + if (node->tag == SUBSTRING) { + pos += node->substring()->start; + node = node->substring()->child; + } + results.push_back(NewSubstring(Ref(node), pos, n)); + } else if (pos + n <= node->concat()->left->length) { + todo.push_back(SubRange(node->concat()->left, pos, n)); + } else if (pos >= node->concat()->left->length) { + pos -= node->concat()->left->length; + todo.push_back(SubRange(node->concat()->right, pos, n)); + } else { + size_t left_n = node->concat()->left->length - pos; + todo.push_back(SubRange(nullptr, 0, 0)); // Concat() + todo.push_back(SubRange(node->concat()->right, 0, n - left_n)); + todo.push_back(SubRange(node->concat()->left, pos, left_n)); + } + } while (!todo.empty()); + assert(results.size() == 1); + return results[0]; +} + +Cord Cord::Subcord(size_t pos, size_t new_size) const { + Cord sub_cord; + size_t length = size(); + if (pos > length) pos = length; + if (new_size > length - pos) new_size = length - pos; + CordRep* tree = contents_.tree(); + if (tree == nullptr) { + // sub_cord is newly constructed, no need to re-zero-out the tail of + // contents_ memory. + sub_cord.contents_.set_data(contents_.data() + pos, new_size, false); + } else if (new_size == 0) { + // We want to return empty subcord, so nothing to do. + } else if (new_size <= InlineRep::kMaxInline) { + Cord::ChunkIterator it = chunk_begin(); + it.AdvanceBytes(pos); + char* dest = sub_cord.contents_.data_; + size_t remaining_size = new_size; + while (remaining_size > it->size()) { + cord_internal::SmallMemmove(dest, it->data(), it->size()); + remaining_size -= it->size(); + dest += it->size(); + ++it; + } + cord_internal::SmallMemmove(dest, it->data(), remaining_size); + sub_cord.contents_.data_[InlineRep::kMaxInline] = new_size; + } else { + sub_cord.contents_.set_tree(NewSubRange(tree, pos, new_size)); + } + return sub_cord; +} + +// -------------------------------------------------------------------- +// Balancing + +class CordForest { + public: + explicit CordForest(size_t length) + : root_length_(length), trees_(kMinLengthSize, nullptr) {} + + void Build(CordRep* cord_root) { + std::vector<CordRep*> pending = {cord_root}; + + while (!pending.empty()) { + CordRep* node = pending.back(); + pending.pop_back(); + CheckNode(node); + if (ABSL_PREDICT_FALSE(node->tag != CONCAT)) { + AddNode(node); + continue; + } + + CordRepConcat* concat_node = node->concat(); + if (concat_node->depth() >= kMinLengthSize || + concat_node->length < min_length[concat_node->depth()]) { + pending.push_back(concat_node->right); + pending.push_back(concat_node->left); + + if (concat_node->refcount.IsOne()) { + concat_node->left = concat_freelist_; + concat_freelist_ = concat_node; + } else { + Ref(concat_node->right); + Ref(concat_node->left); + Unref(concat_node); + } + } else { + AddNode(node); + } + } + } + + CordRep* ConcatNodes() { + CordRep* sum = nullptr; + for (auto* node : trees_) { + if (node == nullptr) continue; + + sum = PrependNode(node, sum); + root_length_ -= node->length; + if (root_length_ == 0) break; + } + ABSL_INTERNAL_CHECK(sum != nullptr, "Failed to locate sum node"); + return VerifyTree(sum); + } + + private: + CordRep* AppendNode(CordRep* node, CordRep* sum) { + return (sum == nullptr) ? node : MakeConcat(sum, node); + } + + CordRep* PrependNode(CordRep* node, CordRep* sum) { + return (sum == nullptr) ? node : MakeConcat(node, sum); + } + + void AddNode(CordRep* node) { + CordRep* sum = nullptr; + + // Collect together everything with which we will merge node + int i = 0; + for (; node->length > min_length[i + 1]; ++i) { + auto& tree_at_i = trees_[i]; + + if (tree_at_i == nullptr) continue; + sum = PrependNode(tree_at_i, sum); + tree_at_i = nullptr; + } + + sum = AppendNode(node, sum); + + // Insert sum into appropriate place in the forest + for (; sum->length >= min_length[i]; ++i) { + auto& tree_at_i = trees_[i]; + if (tree_at_i == nullptr) continue; + + sum = MakeConcat(tree_at_i, sum); + tree_at_i = nullptr; + } + + // min_length[0] == 1, which means sum->length >= min_length[0] + assert(i > 0); + trees_[i - 1] = sum; + } + + // Make concat node trying to resue existing CordRepConcat nodes we + // already collected in the concat_freelist_. + CordRep* MakeConcat(CordRep* left, CordRep* right) { + if (concat_freelist_ == nullptr) return RawConcat(left, right); + + CordRepConcat* rep = concat_freelist_; + if (concat_freelist_->left == nullptr) { + concat_freelist_ = nullptr; + } else { + concat_freelist_ = concat_freelist_->left->concat(); + } + SetConcatChildren(rep, left, right); + + return rep; + } + + static void CheckNode(CordRep* node) { + ABSL_INTERNAL_CHECK(node->length != 0u, ""); + if (node->tag == CONCAT) { + ABSL_INTERNAL_CHECK(node->concat()->left != nullptr, ""); + ABSL_INTERNAL_CHECK(node->concat()->right != nullptr, ""); + ABSL_INTERNAL_CHECK(node->length == (node->concat()->left->length + + node->concat()->right->length), + ""); + } + } + + size_t root_length_; + + // use an inlined vector instead of a flat array to get bounds checking + absl::InlinedVector<CordRep*, kInlinedVectorSize> trees_; + + // List of concat nodes we can re-use for Cord balancing. + CordRepConcat* concat_freelist_ = nullptr; +}; + +static CordRep* Rebalance(CordRep* node) { + VerifyTree(node); + assert(node->tag == CONCAT); + + if (node->length == 0) { + return nullptr; + } + + CordForest forest(node->length); + forest.Build(node); + return forest.ConcatNodes(); +} + +// -------------------------------------------------------------------- +// Comparators + +namespace { + +int ClampResult(int memcmp_res) { + return static_cast<int>(memcmp_res > 0) - static_cast<int>(memcmp_res < 0); +} + +int CompareChunks(absl::string_view* lhs, absl::string_view* rhs, + size_t* size_to_compare) { + size_t compared_size = std::min(lhs->size(), rhs->size()); + assert(*size_to_compare >= compared_size); + *size_to_compare -= compared_size; + + int memcmp_res = ::memcmp(lhs->data(), rhs->data(), compared_size); + if (memcmp_res != 0) return memcmp_res; + + lhs->remove_prefix(compared_size); + rhs->remove_prefix(compared_size); + + return 0; +} + +// This overload set computes comparison results from memcmp result. This +// interface is used inside GenericCompare below. Differet implementations +// are specialized for int and bool. For int we clamp result to {-1, 0, 1} +// set. For bool we just interested in "value == 0". +template <typename ResultType> +ResultType ComputeCompareResult(int memcmp_res) { + return ClampResult(memcmp_res); +} +template <> +bool ComputeCompareResult<bool>(int memcmp_res) { + return memcmp_res == 0; +} + +} // namespace + +// Helper routine. Locates the first flat chunk of the Cord without +// initializing the iterator. +inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const { + size_t n = data_[kMaxInline]; + if (n <= kMaxInline) { + return absl::string_view(data_, n); + } + + CordRep* node = tree(); + if (node->tag >= FLAT) { + return absl::string_view(node->data, node->length); + } + + if (node->tag == EXTERNAL) { + return absl::string_view(node->external()->base, node->length); + } + + // Walk down the left branches until we hit a non-CONCAT node. + while (node->tag == CONCAT) { + node = node->concat()->left; + } + + // Get the child node if we encounter a SUBSTRING. + size_t offset = 0; + size_t length = node->length; + assert(length != 0); + + if (node->tag == SUBSTRING) { + offset = node->substring()->start; + node = node->substring()->child; + } + + if (node->tag >= FLAT) { + return absl::string_view(node->data + offset, length); + } + + assert((node->tag == EXTERNAL) && "Expect FLAT or EXTERNAL node here"); + + return absl::string_view(node->external()->base + offset, length); +} + +inline int Cord::CompareSlowPath(absl::string_view rhs, size_t compared_size, + size_t size_to_compare) const { + auto advance = [](Cord::ChunkIterator* it, absl::string_view* chunk) { + if (!chunk->empty()) return true; + ++*it; + if (it->bytes_remaining_ == 0) return false; + *chunk = **it; + return true; + }; + + Cord::ChunkIterator lhs_it = chunk_begin(); + + // compared_size is inside first chunk. + absl::string_view lhs_chunk = + (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view(); + assert(compared_size <= lhs_chunk.size()); + assert(compared_size <= rhs.size()); + lhs_chunk.remove_prefix(compared_size); + rhs.remove_prefix(compared_size); + size_to_compare -= compared_size; // skip already compared size. + + while (advance(&lhs_it, &lhs_chunk) && !rhs.empty()) { + int comparison_result = CompareChunks(&lhs_chunk, &rhs, &size_to_compare); + if (comparison_result != 0) return comparison_result; + if (size_to_compare == 0) return 0; + } + + return static_cast<int>(rhs.empty()) - static_cast<int>(lhs_chunk.empty()); +} + +inline int Cord::CompareSlowPath(const Cord& rhs, size_t compared_size, + size_t size_to_compare) const { + auto advance = [](Cord::ChunkIterator* it, absl::string_view* chunk) { + if (!chunk->empty()) return true; + ++*it; + if (it->bytes_remaining_ == 0) return false; + *chunk = **it; + return true; + }; + + Cord::ChunkIterator lhs_it = chunk_begin(); + Cord::ChunkIterator rhs_it = rhs.chunk_begin(); + + // compared_size is inside both first chunks. + absl::string_view lhs_chunk = + (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view(); + absl::string_view rhs_chunk = + (rhs_it.bytes_remaining_ != 0) ? *rhs_it : absl::string_view(); + assert(compared_size <= lhs_chunk.size()); + assert(compared_size <= rhs_chunk.size()); + lhs_chunk.remove_prefix(compared_size); + rhs_chunk.remove_prefix(compared_size); + size_to_compare -= compared_size; // skip already compared size. + + while (advance(&lhs_it, &lhs_chunk) && advance(&rhs_it, &rhs_chunk)) { + int memcmp_res = CompareChunks(&lhs_chunk, &rhs_chunk, &size_to_compare); + if (memcmp_res != 0) return memcmp_res; + if (size_to_compare == 0) return 0; + } + + return static_cast<int>(rhs_chunk.empty()) - + static_cast<int>(lhs_chunk.empty()); +} + +inline absl::string_view Cord::GetFirstChunk(const Cord& c) { + return c.contents_.FindFlatStartPiece(); +} +inline absl::string_view Cord::GetFirstChunk(absl::string_view sv) { + return sv; +} + +// Compares up to 'size_to_compare' bytes of 'lhs' with 'rhs'. It is assumed +// that 'size_to_compare' is greater that size of smallest of first chunks. +template <typename ResultType, typename RHS> +ResultType GenericCompare(const Cord& lhs, const RHS& rhs, + size_t size_to_compare) { + absl::string_view lhs_chunk = Cord::GetFirstChunk(lhs); + absl::string_view rhs_chunk = Cord::GetFirstChunk(rhs); + + size_t compared_size = std::min(lhs_chunk.size(), rhs_chunk.size()); + assert(size_to_compare >= compared_size); + int memcmp_res = ::memcmp(lhs_chunk.data(), rhs_chunk.data(), compared_size); + if (compared_size == size_to_compare || memcmp_res != 0) { + return ComputeCompareResult<ResultType>(memcmp_res); + } + + return ComputeCompareResult<ResultType>( + lhs.CompareSlowPath(rhs, compared_size, size_to_compare)); +} + +bool Cord::EqualsImpl(absl::string_view rhs, size_t size_to_compare) const { + return GenericCompare<bool>(*this, rhs, size_to_compare); +} + +bool Cord::EqualsImpl(const Cord& rhs, size_t size_to_compare) const { + return GenericCompare<bool>(*this, rhs, size_to_compare); +} + +template <typename RHS> +inline int SharedCompareImpl(const Cord& lhs, const RHS& rhs) { + size_t lhs_size = lhs.size(); + size_t rhs_size = rhs.size(); + if (lhs_size == rhs_size) { + return GenericCompare<int>(lhs, rhs, lhs_size); + } + if (lhs_size < rhs_size) { + auto data_comp_res = GenericCompare<int>(lhs, rhs, lhs_size); + return data_comp_res == 0 ? -1 : data_comp_res; + } + + auto data_comp_res = GenericCompare<int>(lhs, rhs, rhs_size); + return data_comp_res == 0 ? +1 : data_comp_res; +} + +int Cord::Compare(absl::string_view rhs) const { + return SharedCompareImpl(*this, rhs); +} + +int Cord::CompareImpl(const Cord& rhs) const { + return SharedCompareImpl(*this, rhs); +} + +bool Cord::EndsWith(absl::string_view rhs) const { + size_t my_size = size(); + size_t rhs_size = rhs.size(); + + if (my_size < rhs_size) return false; + + Cord tmp(*this); + tmp.RemovePrefix(my_size - rhs_size); + return tmp.EqualsImpl(rhs, rhs_size); +} + +bool Cord::EndsWith(const Cord& rhs) const { + size_t my_size = size(); + size_t rhs_size = rhs.size(); + + if (my_size < rhs_size) return false; + + Cord tmp(*this); + tmp.RemovePrefix(my_size - rhs_size); + return tmp.EqualsImpl(rhs, rhs_size); +} + +// -------------------------------------------------------------------- +// Misc. + +Cord::operator std::string() const { + std::string s; + absl::CopyCordToString(*this, &s); + return s; +} + +void CopyCordToString(const Cord& src, std::string* dst) { + if (!src.contents_.is_tree()) { + src.contents_.CopyTo(dst); + } else { + absl::strings_internal::STLStringResizeUninitialized(dst, src.size()); + src.CopyToArraySlowPath(&(*dst)[0]); + } +} + +void Cord::CopyToArraySlowPath(char* dst) const { + assert(contents_.is_tree()); + absl::string_view fragment; + if (GetFlatAux(contents_.tree(), &fragment)) { + memcpy(dst, fragment.data(), fragment.size()); + return; + } + for (absl::string_view chunk : Chunks()) { + memcpy(dst, chunk.data(), chunk.size()); + dst += chunk.size(); + } +} + +Cord::ChunkIterator& Cord::ChunkIterator::operator++() { + assert(bytes_remaining_ > 0 && "Attempted to iterate past `end()`"); + assert(bytes_remaining_ >= current_chunk_.size()); + bytes_remaining_ -= current_chunk_.size(); + + if (stack_of_right_children_.empty()) { + assert(!current_chunk_.empty()); // Called on invalid iterator. + // We have reached the end of the Cord. + return *this; + } + + // Process the next node on the stack. + CordRep* node = stack_of_right_children_.back(); + stack_of_right_children_.pop_back(); + + // Walk down the left branches until we hit a non-CONCAT node. Save the + // right children to the stack for subsequent traversal. + while (node->tag == CONCAT) { + stack_of_right_children_.push_back(node->concat()->right); + node = node->concat()->left; + } + + // Get the child node if we encounter a SUBSTRING. + size_t offset = 0; + size_t length = node->length; + if (node->tag == SUBSTRING) { + offset = node->substring()->start; + node = node->substring()->child; + } + + assert(node->tag == EXTERNAL || node->tag >= FLAT); + assert(length != 0); + const char* data = + node->tag == EXTERNAL ? node->external()->base : node->data; + current_chunk_ = absl::string_view(data + offset, length); + current_leaf_ = node; + return *this; +} + +Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) { + assert(bytes_remaining_ >= n && "Attempted to iterate past `end()`"); + Cord subcord; + + if (n <= InlineRep::kMaxInline) { + // Range to read fits in inline data. Flatten it. + char* data = subcord.contents_.set_data(n); + while (n > current_chunk_.size()) { + memcpy(data, current_chunk_.data(), current_chunk_.size()); + data += current_chunk_.size(); + n -= current_chunk_.size(); + ++*this; + } + memcpy(data, current_chunk_.data(), n); + if (n < current_chunk_.size()) { + RemoveChunkPrefix(n); + } else if (n > 0) { + ++*this; + } + return subcord; + } + if (n < current_chunk_.size()) { + // Range to read is a proper subrange of the current chunk. + assert(current_leaf_ != nullptr); + CordRep* subnode = Ref(current_leaf_); + const char* data = + subnode->tag == EXTERNAL ? subnode->external()->base : subnode->data; + subnode = NewSubstring(subnode, current_chunk_.data() - data, n); + subcord.contents_.set_tree(VerifyTree(subnode)); + RemoveChunkPrefix(n); + return subcord; + } + + // Range to read begins with a proper subrange of the current chunk. + assert(!current_chunk_.empty()); + assert(current_leaf_ != nullptr); + CordRep* subnode = Ref(current_leaf_); + if (current_chunk_.size() < subnode->length) { + const char* data = + subnode->tag == EXTERNAL ? subnode->external()->base : subnode->data; + subnode = NewSubstring(subnode, current_chunk_.data() - data, + current_chunk_.size()); + } + n -= current_chunk_.size(); + bytes_remaining_ -= current_chunk_.size(); + + // Process the next node(s) on the stack, reading whole subtrees depending on + // their length and how many bytes we are advancing. + CordRep* node = nullptr; + while (!stack_of_right_children_.empty()) { + node = stack_of_right_children_.back(); + stack_of_right_children_.pop_back(); + if (node->length > n) break; + // TODO(qrczak): This might unnecessarily recreate existing concat nodes. + // Avoiding that would need pretty complicated logic (instead of + // current_leaf_, keep current_subtree_ which points to the highest node + // such that the current leaf can be found on the path of left children + // starting from current_subtree_; delay creating subnode while node is + // below current_subtree_; find the proper node along the path of left + // children starting from current_subtree_ if this loop exits while staying + // below current_subtree_; etc.; alternatively, push parents instead of + // right children on the stack). + subnode = Concat(subnode, Ref(node)); + n -= node->length; + bytes_remaining_ -= node->length; + node = nullptr; + } + + if (node == nullptr) { + // We have reached the end of the Cord. + assert(bytes_remaining_ == 0); + subcord.contents_.set_tree(VerifyTree(subnode)); + return subcord; + } + + // Walk down the appropriate branches until we hit a non-CONCAT node. Save the + // right children to the stack for subsequent traversal. + while (node->tag == CONCAT) { + if (node->concat()->left->length > n) { + // Push right, descend left. + stack_of_right_children_.push_back(node->concat()->right); + node = node->concat()->left; + } else { + // Read left, descend right. + subnode = Concat(subnode, Ref(node->concat()->left)); + n -= node->concat()->left->length; + bytes_remaining_ -= node->concat()->left->length; + node = node->concat()->right; + } + } + + // Get the child node if we encounter a SUBSTRING. + size_t offset = 0; + size_t length = node->length; + if (node->tag == SUBSTRING) { + offset = node->substring()->start; + node = node->substring()->child; + } + + // Range to read ends with a proper (possibly empty) subrange of the current + // chunk. + assert(node->tag == EXTERNAL || node->tag >= FLAT); + assert(length > n); + if (n > 0) subnode = Concat(subnode, NewSubstring(Ref(node), offset, n)); + const char* data = + node->tag == EXTERNAL ? node->external()->base : node->data; + current_chunk_ = absl::string_view(data + offset + n, length - n); + current_leaf_ = node; + bytes_remaining_ -= n; + subcord.contents_.set_tree(VerifyTree(subnode)); + return subcord; +} + +void Cord::ChunkIterator::AdvanceBytesSlowPath(size_t n) { + assert(bytes_remaining_ >= n && "Attempted to iterate past `end()`"); + assert(n >= current_chunk_.size()); // This should only be called when + // iterating to a new node. + + n -= current_chunk_.size(); + bytes_remaining_ -= current_chunk_.size(); + + // Process the next node(s) on the stack, skipping whole subtrees depending on + // their length and how many bytes we are advancing. + CordRep* node = nullptr; + while (!stack_of_right_children_.empty()) { + node = stack_of_right_children_.back(); + stack_of_right_children_.pop_back(); + if (node->length > n) break; + n -= node->length; + bytes_remaining_ -= node->length; + node = nullptr; + } + + if (node == nullptr) { + // We have reached the end of the Cord. + assert(bytes_remaining_ == 0); + return; + } + + // Walk down the appropriate branches until we hit a non-CONCAT node. Save the + // right children to the stack for subsequent traversal. + while (node->tag == CONCAT) { + if (node->concat()->left->length > n) { + // Push right, descend left. + stack_of_right_children_.push_back(node->concat()->right); + node = node->concat()->left; + } else { + // Skip left, descend right. + n -= node->concat()->left->length; + bytes_remaining_ -= node->concat()->left->length; + node = node->concat()->right; + } + } + + // Get the child node if we encounter a SUBSTRING. + size_t offset = 0; + size_t length = node->length; + if (node->tag == SUBSTRING) { + offset = node->substring()->start; + node = node->substring()->child; + } + + assert(node->tag == EXTERNAL || node->tag >= FLAT); + assert(length > n); + const char* data = + node->tag == EXTERNAL ? node->external()->base : node->data; + current_chunk_ = absl::string_view(data + offset + n, length - n); + current_leaf_ = node; + bytes_remaining_ -= n; +} + +char Cord::operator[](size_t i) const { + assert(i < size()); + size_t offset = i; + const CordRep* rep = contents_.tree(); + if (rep == nullptr) { + return contents_.data()[i]; + } + while (true) { + assert(rep != nullptr); + assert(offset < rep->length); + if (rep->tag >= FLAT) { + // Get the "i"th character directly from the flat array. + return rep->data[offset]; + } else if (rep->tag == EXTERNAL) { + // Get the "i"th character from the external array. + return rep->external()->base[offset]; + } else if (rep->tag == CONCAT) { + // Recursively branch to the side of the concatenation that the "i"th + // character is on. + size_t left_length = rep->concat()->left->length; + if (offset < left_length) { + rep = rep->concat()->left; + } else { + offset -= left_length; + rep = rep->concat()->right; + } + } else { + // This must be a substring a node, so bypass it to get to the child. + assert(rep->tag == SUBSTRING); + offset += rep->substring()->start; + rep = rep->substring()->child; + } + } +} + +absl::string_view Cord::FlattenSlowPath() { + size_t total_size = size(); + CordRep* new_rep; + char* new_buffer; + + // Try to put the contents into a new flat rep. If they won't fit in the + // biggest possible flat node, use an external rep instead. + if (total_size <= kMaxFlatLength) { + new_rep = NewFlat(total_size); + new_rep->length = total_size; + new_buffer = new_rep->data; + CopyToArraySlowPath(new_buffer); + } else { + new_buffer = std::allocator<char>().allocate(total_size); + CopyToArraySlowPath(new_buffer); + new_rep = absl::cord_internal::NewExternalRep( + absl::string_view(new_buffer, total_size), [](absl::string_view s) { + std::allocator<char>().deallocate(const_cast<char*>(s.data()), + s.size()); + }); + } + Unref(contents_.tree()); + contents_.set_tree(new_rep); + return absl::string_view(new_buffer, total_size); +} + +/* static */ bool Cord::GetFlatAux(CordRep* rep, absl::string_view* fragment) { + assert(rep != nullptr); + if (rep->tag >= FLAT) { + *fragment = absl::string_view(rep->data, rep->length); + return true; + } else if (rep->tag == EXTERNAL) { + *fragment = absl::string_view(rep->external()->base, rep->length); + return true; + } else if (rep->tag == SUBSTRING) { + CordRep* child = rep->substring()->child; + if (child->tag >= FLAT) { + *fragment = + absl::string_view(child->data + rep->substring()->start, rep->length); + return true; + } else if (child->tag == EXTERNAL) { + *fragment = absl::string_view( + child->external()->base + rep->substring()->start, rep->length); + return true; + } + } + return false; +} + +/* static */ void Cord::ForEachChunkAux( + absl::cord_internal::CordRep* rep, + absl::FunctionRef<void(absl::string_view)> callback) { + assert(rep != nullptr); + int stack_pos = 0; + constexpr int stack_max = 128; + // Stack of right branches for tree traversal + absl::cord_internal::CordRep* stack[stack_max]; + absl::cord_internal::CordRep* current_node = rep; + while (true) { + if (current_node->tag == CONCAT) { + if (stack_pos == stack_max) { + // There's no more room on our stack array to add another right branch, + // and the idea is to avoid allocations, so call this function + // recursively to navigate this subtree further. (This is not something + // we expect to happen in practice). + ForEachChunkAux(current_node, callback); + + // Pop the next right branch and iterate. + current_node = stack[--stack_pos]; + continue; + } else { + // Save the right branch for later traversal and continue down the left + // branch. + stack[stack_pos++] = current_node->concat()->right; + current_node = current_node->concat()->left; + continue; + } + } + // This is a leaf node, so invoke our callback. + absl::string_view chunk; + bool success = GetFlatAux(current_node, &chunk); + assert(success); + if (success) { + callback(chunk); + } + if (stack_pos == 0) { + // end of traversal + return; + } + current_node = stack[--stack_pos]; + } +} + +static void DumpNode(CordRep* rep, bool include_data, std::ostream* os) { + const int kIndentStep = 1; + int indent = 0; + absl::InlinedVector<CordRep*, kInlinedVectorSize> stack; + absl::InlinedVector<int, kInlinedVectorSize> indents; + for (;;) { + *os << std::setw(3) << rep->refcount.Get(); + *os << " " << std::setw(7) << rep->length; + *os << " ["; + if (include_data) *os << static_cast<void*>(rep); + *os << "]"; + *os << " " << (IsRootBalanced(rep) ? 'b' : 'u'); + *os << " " << std::setw(indent) << ""; + if (rep->tag == CONCAT) { + *os << "CONCAT depth=" << Depth(rep) << "\n"; + indent += kIndentStep; + indents.push_back(indent); + stack.push_back(rep->concat()->right); + rep = rep->concat()->left; + } else if (rep->tag == SUBSTRING) { + *os << "SUBSTRING @ " << rep->substring()->start << "\n"; + indent += kIndentStep; + rep = rep->substring()->child; + } else { // Leaf + if (rep->tag == EXTERNAL) { + *os << "EXTERNAL ["; + if (include_data) + *os << absl::CEscape(std::string(rep->external()->base, rep->length)); + *os << "]\n"; + } else { + *os << "FLAT cap=" << TagToLength(rep->tag) << " ["; + if (include_data) + *os << absl::CEscape(std::string(rep->data, rep->length)); + *os << "]\n"; + } + if (stack.empty()) break; + rep = stack.back(); + stack.pop_back(); + indent = indents.back(); + indents.pop_back(); + } + } + ABSL_INTERNAL_CHECK(indents.empty(), ""); +} + +static std::string ReportError(CordRep* root, CordRep* node) { + std::ostringstream buf; + buf << "Error at node " << node << " in:"; + DumpNode(root, true, &buf); + return buf.str(); +} + +static bool VerifyNode(CordRep* root, CordRep* start_node, + bool full_validation) { + absl::InlinedVector<CordRep*, 2> worklist; + worklist.push_back(start_node); + do { + CordRep* node = worklist.back(); + worklist.pop_back(); + + ABSL_INTERNAL_CHECK(node != nullptr, ReportError(root, node)); + if (node != root) { + ABSL_INTERNAL_CHECK(node->length != 0, ReportError(root, node)); + } + + if (node->tag == CONCAT) { + ABSL_INTERNAL_CHECK(node->concat()->left != nullptr, + ReportError(root, node)); + ABSL_INTERNAL_CHECK(node->concat()->right != nullptr, + ReportError(root, node)); + ABSL_INTERNAL_CHECK((node->length == node->concat()->left->length + + node->concat()->right->length), + ReportError(root, node)); + if (full_validation) { + worklist.push_back(node->concat()->right); + worklist.push_back(node->concat()->left); + } + } else if (node->tag >= FLAT) { + ABSL_INTERNAL_CHECK(node->length <= TagToLength(node->tag), + ReportError(root, node)); + } else if (node->tag == EXTERNAL) { + ABSL_INTERNAL_CHECK(node->external()->base != nullptr, + ReportError(root, node)); + } else if (node->tag == SUBSTRING) { + ABSL_INTERNAL_CHECK( + node->substring()->start < node->substring()->child->length, + ReportError(root, node)); + ABSL_INTERNAL_CHECK(node->substring()->start + node->length <= + node->substring()->child->length, + ReportError(root, node)); + } + } while (!worklist.empty()); + return true; +} + +// Traverses the tree and computes the total memory allocated. +/* static */ size_t Cord::MemoryUsageAux(const CordRep* rep) { + size_t total_mem_usage = 0; + + // Allow a quick exit for the common case that the root is a leaf. + if (RepMemoryUsageLeaf(rep, &total_mem_usage)) { + return total_mem_usage; + } + + // Iterate over the tree. cur_node is never a leaf node and leaf nodes will + // never be appended to tree_stack. This reduces overhead from manipulating + // tree_stack. + absl::InlinedVector<const CordRep*, kInlinedVectorSize> tree_stack; + const CordRep* cur_node = rep; + while (true) { + const CordRep* next_node = nullptr; + + if (cur_node->tag == CONCAT) { + total_mem_usage += sizeof(CordRepConcat); + const CordRep* left = cur_node->concat()->left; + if (!RepMemoryUsageLeaf(left, &total_mem_usage)) { + next_node = left; + } + + const CordRep* right = cur_node->concat()->right; + if (!RepMemoryUsageLeaf(right, &total_mem_usage)) { + if (next_node) { + tree_stack.push_back(next_node); + } + next_node = right; + } + } else { + // Since cur_node is not a leaf or a concat node it must be a substring. + assert(cur_node->tag == SUBSTRING); + total_mem_usage += sizeof(CordRepSubstring); + next_node = cur_node->substring()->child; + if (RepMemoryUsageLeaf(next_node, &total_mem_usage)) { + next_node = nullptr; + } + } + + if (!next_node) { + if (tree_stack.empty()) { + return total_mem_usage; + } + next_node = tree_stack.back(); + tree_stack.pop_back(); + } + cur_node = next_node; + } +} + +std::ostream& operator<<(std::ostream& out, const Cord& cord) { + for (absl::string_view chunk : cord.Chunks()) { + out.write(chunk.data(), chunk.size()); + } + return out; +} + +namespace strings_internal { +size_t CordTestAccess::FlatOverhead() { return kFlatOverhead; } +size_t CordTestAccess::MaxFlatLength() { return kMaxFlatLength; } +size_t CordTestAccess::FlatTagToLength(uint8_t tag) { + return TagToLength(tag); +} +uint8_t CordTestAccess::LengthToTag(size_t s) { + ABSL_INTERNAL_CHECK(s <= kMaxFlatLength, absl::StrCat("Invalid length ", s)); + return AllocatedSizeToTag(s + kFlatOverhead); +} +size_t CordTestAccess::SizeofCordRepConcat() { return sizeof(CordRepConcat); } +size_t CordTestAccess::SizeofCordRepExternal() { + return sizeof(CordRepExternal); +} +size_t CordTestAccess::SizeofCordRepSubstring() { + return sizeof(CordRepSubstring); +} +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/cord.h b/absl/strings/cord.h new file mode 100644 index 00000000..40566cba --- /dev/null +++ b/absl/strings/cord.h @@ -0,0 +1,1121 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// A Cord is a sequence of characters with some unusual access propreties. +// A Cord supports efficient insertions and deletions at the start and end of +// the byte sequence, but random access reads are slower, and random access +// modifications are not supported by the API. Cord also provides cheap copies +// (using a copy-on-write strategy) and cheap substring operations. +// +// Thread safety +// ------------- +// Cord has the same thread-safety properties as many other types like +// std::string, std::vector<>, int, etc -- it is thread-compatible. In +// particular, if no thread may call a non-const method, then it is safe to +// concurrently call const methods. Copying a Cord produces a new instance that +// can be used concurrently with the original in arbitrary ways. +// +// Implementation is similar to the "Ropes" described in: +// Ropes: An alternative to strings +// Hans J. Boehm, Russ Atkinson, Michael Plass +// Software Practice and Experience, December 1995 + +#ifndef ABSL_STRINGS_CORD_H_ +#define ABSL_STRINGS_CORD_H_ + +#include <algorithm> +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <iostream> +#include <iterator> +#include <string> + +#include "absl/base/internal/endian.h" +#include "absl/base/internal/invoke.h" +#include "absl/base/internal/per_thread_tls.h" +#include "absl/base/macros.h" +#include "absl/base/port.h" +#include "absl/container/inlined_vector.h" +#include "absl/functional/function_ref.h" +#include "absl/meta/type_traits.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/resize_uninitialized.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +class Cord; +class CordTestPeer; +template <typename Releaser> +Cord MakeCordFromExternal(absl::string_view, Releaser&&); +void CopyCordToString(const Cord& src, std::string* dst); +namespace hash_internal { +template <typename H> +H HashFragmentedCord(H, const Cord&); +} + +// A Cord is a sequence of characters. +class Cord { + private: + template <typename T> + using EnableIfString = + absl::enable_if_t<std::is_same<T, std::string>::value, int>; + + public: + // -------------------------------------------------------------------- + // Constructors, destructors and helper factories + + // Create an empty cord + constexpr Cord() noexcept; + + // Cord is copyable and efficiently movable. + // The moved-from state is valid but unspecified. + Cord(const Cord& src); + Cord(Cord&& src) noexcept; + Cord& operator=(const Cord& x); + Cord& operator=(Cord&& x) noexcept; + + // Create a cord out of "src". This constructor is explicit on + // purpose so that people do not get automatic type conversions. + explicit Cord(absl::string_view src); + Cord& operator=(absl::string_view src); + + // These are templated to avoid ambiguities for types that are convertible to + // both `absl::string_view` and `std::string`, such as `const char*`. + // + // Note that these functions reserve the right to reuse the `string&&`'s + // memory and that they will do so in the future. + template <typename T, EnableIfString<T> = 0> + explicit Cord(T&& src) : Cord(absl::string_view(src)) {} + template <typename T, EnableIfString<T> = 0> + Cord& operator=(T&& src); + + // Destroy the cord + ~Cord() { + if (contents_.is_tree()) DestroyCordSlow(); + } + + // Creates a Cord that takes ownership of external memory. The contents of + // `data` are not copied. + // + // This function takes a callable that is invoked when all Cords are + // finished with `data`. The data must remain live and unchanging until the + // releaser is called. The requirements for the releaser are that it: + // * is move constructible, + // * supports `void operator()(absl::string_view) const`, + // * does not have alignment requirement greater than what is guaranteed by + // ::operator new. This is dictated by alignof(std::max_align_t) before + // C++17 and __STDCPP_DEFAULT_NEW_ALIGNMENT__ if compiling with C++17 or + // it is supported by the implementation. + // + // Example: + // + // Cord MakeCord(BlockPool* pool) { + // Block* block = pool->NewBlock(); + // FillBlock(block); + // return absl::MakeCordFromExternal( + // block->ToStringView(), + // [pool, block](absl::string_view /*ignored*/) { + // pool->FreeBlock(block); + // }); + // } + // + // WARNING: It's likely a bug if your releaser doesn't do anything. + // For example, consider the following: + // + // void Foo(const char* buffer, int len) { + // auto c = absl::MakeCordFromExternal(absl::string_view(buffer, len), + // [](absl::string_view) {}); + // + // // BUG: If Bar() copies its cord for any reason, including keeping a + // // substring of it, the lifetime of buffer might be extended beyond + // // when Foo() returns. + // Bar(c); + // } + template <typename Releaser> + friend Cord MakeCordFromExternal(absl::string_view data, Releaser&& releaser); + + // -------------------------------------------------------------------- + // Mutations + + void Clear(); + + void Append(const Cord& src); + void Append(Cord&& src); + void Append(absl::string_view src); + template <typename T, EnableIfString<T> = 0> + void Append(T&& src); + + void Prepend(const Cord& src); + void Prepend(absl::string_view src); + template <typename T, EnableIfString<T> = 0> + void Prepend(T&& src); + + void RemovePrefix(size_t n); + void RemoveSuffix(size_t n); + + // Returns a new cord representing the subrange [pos, pos + new_size) of + // *this. If pos >= size(), the result is empty(). If + // (pos + new_size) >= size(), the result is the subrange [pos, size()). + Cord Subcord(size_t pos, size_t new_size) const; + + friend void swap(Cord& x, Cord& y) noexcept; + + // -------------------------------------------------------------------- + // Accessors + + size_t size() const; + bool empty() const; + + // Returns the approximate number of bytes pinned by this Cord. Note that + // Cords that share memory could each be "charged" independently for the same + // shared memory. + size_t EstimatedMemoryUsage() const; + + // -------------------------------------------------------------------- + // Comparators + + // Compares 'this' Cord with rhs. This function and its relatives + // treat Cords as sequences of unsigned bytes. The comparison is a + // straightforward lexicographic comparison. Return value: + // -1 'this' Cord is smaller + // 0 two Cords are equal + // 1 'this' Cord is larger + int Compare(absl::string_view rhs) const; + int Compare(const Cord& rhs) const; + + // Does 'this' cord start/end with rhs + bool StartsWith(const Cord& rhs) const; + bool StartsWith(absl::string_view rhs) const; + bool EndsWith(absl::string_view rhs) const; + bool EndsWith(const Cord& rhs) const; + + // -------------------------------------------------------------------- + // Conversion to other types + + explicit operator std::string() const; + + // Copies the contents from `src` to `*dst`. + // + // This function optimizes the case of reusing the destination std::string since it + // can reuse previously allocated capacity. However, this function does not + // guarantee that pointers previously returned by `dst->data()` remain valid + // even if `*dst` had enough capacity to hold `src`. If `*dst` is a new + // object, prefer to simply use the conversion operator to `std::string`. + friend void CopyCordToString(const Cord& src, std::string* dst); + + // -------------------------------------------------------------------- + // Iteration + + class CharIterator; + + // Type for iterating over the chunks of a `Cord`. See comments for + // `Cord::chunk_begin()`, `Cord::chunk_end()` and `Cord::Chunks()` below for + // preferred usage. + // + // Additional notes: + // * The `string_view` returned by dereferencing a valid, non-`end()` + // iterator is guaranteed to be non-empty. + // * A `ChunkIterator` object is invalidated after any non-const + // operation on the `Cord` object over which it iterates. + // * Two `ChunkIterator` objects can be equality compared if and only if + // they remain valid and iterate over the same `Cord`. + // * This is a proxy iterator. This means the `string_view` returned by the + // iterator does not live inside the Cord, and its lifetime is limited to + // the lifetime of the iterator itself. To help prevent issues, + // `ChunkIterator::reference` is not a true reference type and is + // equivalent to `value_type`. + // * The iterator keeps state that can grow for `Cord`s that contain many + // nodes and are imbalanced due to sharing. Prefer to pass this type by + // const reference instead of by value. + class ChunkIterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = absl::string_view; + using difference_type = ptrdiff_t; + using pointer = const value_type*; + using reference = value_type; + + ChunkIterator() = default; + + ChunkIterator& operator++(); + ChunkIterator operator++(int); + bool operator==(const ChunkIterator& other) const; + bool operator!=(const ChunkIterator& other) const; + reference operator*() const; + pointer operator->() const; + + friend class Cord; + friend class CharIterator; + + private: + // Constructs a `begin()` iterator from `cord`. + explicit ChunkIterator(const Cord* cord); + + // Removes `n` bytes from `current_chunk_`. Expects `n` to be smaller than + // `current_chunk_.size()`. + void RemoveChunkPrefix(size_t n); + Cord AdvanceAndReadBytes(size_t n); + void AdvanceBytes(size_t n); + // Iterates `n` bytes, where `n` is expected to be greater than or equal to + // `current_chunk_.size()`. + void AdvanceBytesSlowPath(size_t n); + + // A view into bytes of the current `CordRep`. It may only be a view to a + // suffix of bytes if this is being used by `CharIterator`. + absl::string_view current_chunk_; + // The current leaf, or `nullptr` if the iterator points to short data. + // If the current chunk is a substring node, current_leaf_ points to the + // underlying flat or external node. + absl::cord_internal::CordRep* current_leaf_ = nullptr; + // The number of bytes left in the `Cord` over which we are iterating. + size_t bytes_remaining_ = 0; + absl::InlinedVector<absl::cord_internal::CordRep*, 4> + stack_of_right_children_; + }; + + // Returns an iterator to the first chunk of the `Cord`. + // + // This is useful for getting a `ChunkIterator` outside the context of a + // range-based for-loop (in which case see `Cord::Chunks()` below). + // + // Example: + // + // absl::Cord::ChunkIterator FindAsChunk(const absl::Cord& c, + // absl::string_view s) { + // return std::find(c.chunk_begin(), c.chunk_end(), s); + // } + ChunkIterator chunk_begin() const; + // Returns an iterator one increment past the last chunk of the `Cord`. + ChunkIterator chunk_end() const; + + // Convenience wrapper over `Cord::chunk_begin()` and `Cord::chunk_end()` to + // enable range-based for-loop iteration over `Cord` chunks. + // + // Prefer to use `Cord::Chunks()` below instead of constructing this directly. + class ChunkRange { + public: + explicit ChunkRange(const Cord* cord) : cord_(cord) {} + + ChunkIterator begin() const; + ChunkIterator end() const; + + private: + const Cord* cord_; + }; + + // Returns a range for iterating over the chunks of a `Cord` with a + // range-based for-loop. + // + // Example: + // + // void ProcessChunks(const Cord& cord) { + // for (absl::string_view chunk : cord.Chunks()) { ... } + // } + // + // Note that the ordinary caveats of temporary lifetime extension apply: + // + // void Process() { + // for (absl::string_view chunk : CordFactory().Chunks()) { + // // The temporary Cord returned by CordFactory has been destroyed! + // } + // } + ChunkRange Chunks() const; + + // Type for iterating over the characters of a `Cord`. See comments for + // `Cord::char_begin()`, `Cord::char_end()` and `Cord::Chars()` below for + // preferred usage. + // + // Additional notes: + // * A `CharIterator` object is invalidated after any non-const + // operation on the `Cord` object over which it iterates. + // * Two `CharIterator` objects can be equality compared if and only if + // they remain valid and iterate over the same `Cord`. + // * The iterator keeps state that can grow for `Cord`s that contain many + // nodes and are imbalanced due to sharing. Prefer to pass this type by + // const reference instead of by value. + // * This type cannot be a forward iterator because a `Cord` can reuse + // sections of memory. This violates the requirement that if dereferencing + // two iterators returns the same object, the iterators must compare + // equal. + class CharIterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = char; + using difference_type = ptrdiff_t; + using pointer = const char*; + using reference = const char&; + + CharIterator() = default; + + CharIterator& operator++(); + CharIterator operator++(int); + bool operator==(const CharIterator& other) const; + bool operator!=(const CharIterator& other) const; + reference operator*() const; + pointer operator->() const; + + friend Cord; + + private: + explicit CharIterator(const Cord* cord) : chunk_iterator_(cord) {} + + ChunkIterator chunk_iterator_; + }; + + // Advances `*it` by `n_bytes` and returns the bytes passed as a `Cord`. + // + // `n_bytes` must be less than or equal to the number of bytes remaining for + // iteration. Otherwise the behavior is undefined. It is valid to pass + // `char_end()` and 0. + static Cord AdvanceAndRead(CharIterator* it, size_t n_bytes); + + // Advances `*it` by `n_bytes`. + // + // `n_bytes` must be less than or equal to the number of bytes remaining for + // iteration. Otherwise the behavior is undefined. It is valid to pass + // `char_end()` and 0. + static void Advance(CharIterator* it, size_t n_bytes); + + // Returns the longest contiguous view starting at the iterator's position. + // + // `it` must be dereferenceable. + static absl::string_view ChunkRemaining(const CharIterator& it); + + // Returns an iterator to the first character of the `Cord`. + CharIterator char_begin() const; + // Returns an iterator to one past the last character of the `Cord`. + CharIterator char_end() const; + + // Convenience wrapper over `Cord::char_begin()` and `Cord::char_end()` to + // enable range-based for-loop iterator over the characters of a `Cord`. + // + // Prefer to use `Cord::Chars()` below instead of constructing this directly. + class CharRange { + public: + explicit CharRange(const Cord* cord) : cord_(cord) {} + + CharIterator begin() const; + CharIterator end() const; + + private: + const Cord* cord_; + }; + + // Returns a range for iterating over the characters of a `Cord` with a + // range-based for-loop. + // + // Example: + // + // void ProcessCord(const Cord& cord) { + // for (char c : cord.Chars()) { ... } + // } + // + // Note that the ordinary caveats of temporary lifetime extension apply: + // + // void Process() { + // for (char c : CordFactory().Chars()) { + // // The temporary Cord returned by CordFactory has been destroyed! + // } + // } + CharRange Chars() const; + + // -------------------------------------------------------------------- + // Miscellaneous + + // Get the "i"th character of 'this' and return it. + // NOTE: This routine is reasonably efficient. It is roughly + // logarithmic in the number of nodes that make up the cord. Still, + // if you need to iterate over the contents of a cord, you should + // use a CharIterator/CordIterator rather than call operator[] or Get() + // repeatedly in a loop. + // + // REQUIRES: 0 <= i < size() + char operator[](size_t i) const; + + // Flattens the cord into a single array and returns a view of the data. + // + // If the cord was already flat, the contents are not modified. + absl::string_view Flatten(); + + private: + friend class CordTestPeer; + template <typename H> + friend H absl::hash_internal::HashFragmentedCord(H, const Cord&); + friend bool operator==(const Cord& lhs, const Cord& rhs); + friend bool operator==(const Cord& lhs, absl::string_view rhs); + + // Call the provided function once for each cord chunk, in order. Unlike + // Chunks(), this API will not allocate memory. + void ForEachChunk(absl::FunctionRef<void(absl::string_view)>) const; + + // Allocates new contiguous storage for the contents of the cord. This is + // called by Flatten() when the cord was not already flat. + absl::string_view FlattenSlowPath(); + + // Actual cord contents are hidden inside the following simple + // class so that we can isolate the bulk of cord.cc from changes + // to the representation. + // + // InlineRep holds either either a tree pointer, or an array of kMaxInline + // bytes. + class InlineRep { + public: + static const unsigned char kMaxInline = 15; + static_assert(kMaxInline >= sizeof(absl::cord_internal::CordRep*), ""); + // Tag byte & kMaxInline means we are storing a pointer. + static const unsigned char kTreeFlag = 1 << 4; + // Tag byte & kProfiledFlag means we are profiling the Cord. + static const unsigned char kProfiledFlag = 1 << 5; + + constexpr InlineRep() : data_{} {} + InlineRep(const InlineRep& src); + InlineRep(InlineRep&& src); + InlineRep& operator=(const InlineRep& src); + InlineRep& operator=(InlineRep&& src) noexcept; + + void Swap(InlineRep* rhs); + bool empty() const; + size_t size() const; + const char* data() const; // Returns nullptr if holding pointer + void set_data(const char* data, size_t n, + bool nullify_tail); // Discards pointer, if any + char* set_data(size_t n); // Write data to the result + // Returns nullptr if holding bytes + absl::cord_internal::CordRep* tree() const; + // Discards old pointer, if any + void set_tree(absl::cord_internal::CordRep* rep); + // Replaces a tree with a new root. This is faster than set_tree, but it + // should only be used when it's clear that the old rep was a tree. + void replace_tree(absl::cord_internal::CordRep* rep); + // Returns non-null iff was holding a pointer + absl::cord_internal::CordRep* clear(); + // Convert to pointer if necessary + absl::cord_internal::CordRep* force_tree(size_t extra_hint); + void reduce_size(size_t n); // REQUIRES: holding data + void remove_prefix(size_t n); // REQUIRES: holding data + void AppendArray(const char* src_data, size_t src_size); + absl::string_view FindFlatStartPiece() const; + void AppendTree(absl::cord_internal::CordRep* tree); + void PrependTree(absl::cord_internal::CordRep* tree); + void GetAppendRegion(char** region, size_t* size, size_t max_length); + void GetAppendRegion(char** region, size_t* size); + bool IsSame(const InlineRep& other) const { + return memcmp(data_, other.data_, sizeof(data_)) == 0; + } + int BitwiseCompare(const InlineRep& other) const { + uint64_t x, y; + // Use memcpy to avoid anti-aliasing issues. + memcpy(&x, data_, sizeof(x)); + memcpy(&y, other.data_, sizeof(y)); + if (x == y) { + memcpy(&x, data_ + 8, sizeof(x)); + memcpy(&y, other.data_ + 8, sizeof(y)); + if (x == y) return 0; + } + return absl::big_endian::FromHost64(x) < absl::big_endian::FromHost64(y) + ? -1 + : 1; + } + void CopyTo(std::string* dst) const { + // memcpy is much faster when operating on a known size. On most supported + // platforms, the small std::string optimization is large enough that resizing + // to 15 bytes does not cause a memory allocation. + absl::strings_internal::STLStringResizeUninitialized(dst, + sizeof(data_) - 1); + memcpy(&(*dst)[0], data_, sizeof(data_) - 1); + // erase is faster than resize because the logic for memory allocation is + // not needed. + dst->erase(data_[kMaxInline]); + } + + // Copies the inline contents into `dst`. Assumes the cord is not empty. + void CopyToArray(char* dst) const; + + bool is_tree() const { return data_[kMaxInline] > kMaxInline; } + + private: + friend class Cord; + + void AssignSlow(const InlineRep& src); + // Unrefs the tree, stops profiling, and zeroes the contents + void ClearSlow(); + + // If the data has length <= kMaxInline, we store it in data_[0..len-1], + // and store the length in data_[kMaxInline]. Else we store it in a tree + // and store a pointer to that tree in data_[0..sizeof(CordRep*)-1]. + alignas(absl::cord_internal::CordRep*) char data_[kMaxInline + 1]; + }; + InlineRep contents_; + + // Helper for MemoryUsage() + static size_t MemoryUsageAux(const absl::cord_internal::CordRep* rep); + + // Helper for GetFlat() + static bool GetFlatAux(absl::cord_internal::CordRep* rep, + absl::string_view* fragment); + + // Helper for ForEachChunk() + static void ForEachChunkAux( + absl::cord_internal::CordRep* rep, + absl::FunctionRef<void(absl::string_view)> callback); + + // The destructor for non-empty Cords. + void DestroyCordSlow(); + + // Out-of-line implementation of slower parts of logic. + void CopyToArraySlowPath(char* dst) const; + int CompareSlowPath(absl::string_view rhs, size_t compared_size, + size_t size_to_compare) const; + int CompareSlowPath(const Cord& rhs, size_t compared_size, + size_t size_to_compare) const; + bool EqualsImpl(absl::string_view rhs, size_t size_to_compare) const; + bool EqualsImpl(const Cord& rhs, size_t size_to_compare) const; + int CompareImpl(const Cord& rhs) const; + + template <typename ResultType, typename RHS> + friend ResultType GenericCompare(const Cord& lhs, const RHS& rhs, + size_t size_to_compare); + static absl::string_view GetFirstChunk(const Cord& c); + static absl::string_view GetFirstChunk(absl::string_view sv); + + // Returns a new reference to contents_.tree(), or steals an existing + // reference if called on an rvalue. + absl::cord_internal::CordRep* TakeRep() const&; + absl::cord_internal::CordRep* TakeRep() &&; + + // Helper for Append() + template <typename C> + void AppendImpl(C&& src); +}; + +ABSL_NAMESPACE_END +} // namespace absl + +namespace absl { +ABSL_NAMESPACE_BEGIN + +// allow a Cord to be logged +extern std::ostream& operator<<(std::ostream& out, const Cord& cord); + +// ------------------------------------------------------------------ +// Internal details follow. Clients should ignore. + +namespace cord_internal { + +// Fast implementation of memmove for up to 15 bytes. This implementation is +// safe for overlapping regions. If nullify_tail is true, the destination is +// padded with '\0' up to 16 bytes. +inline void SmallMemmove(char* dst, const char* src, size_t n, + bool nullify_tail = false) { + if (n >= 8) { + assert(n <= 16); + uint64_t buf1; + uint64_t buf2; + memcpy(&buf1, src, 8); + memcpy(&buf2, src + n - 8, 8); + if (nullify_tail) { + memset(dst + 8, 0, 8); + } + memcpy(dst, &buf1, 8); + memcpy(dst + n - 8, &buf2, 8); + } else if (n >= 4) { + uint32_t buf1; + uint32_t buf2; + memcpy(&buf1, src, 4); + memcpy(&buf2, src + n - 4, 4); + if (nullify_tail) { + memset(dst + 4, 0, 4); + memset(dst + 8, 0, 8); + } + memcpy(dst, &buf1, 4); + memcpy(dst + n - 4, &buf2, 4); + } else { + if (n != 0) { + dst[0] = src[0]; + dst[n / 2] = src[n / 2]; + dst[n - 1] = src[n - 1]; + } + if (nullify_tail) { + memset(dst + 8, 0, 8); + memset(dst + n, 0, 8); + } + } +} + +struct ExternalRepReleaserPair { + CordRep* rep; + void* releaser_address; +}; + +// Allocates a new external `CordRep` and returns a pointer to it and a pointer +// to `releaser_size` bytes where the desired releaser can be constructed. +// Expects `data` to be non-empty. +ExternalRepReleaserPair NewExternalWithUninitializedReleaser( + absl::string_view data, ExternalReleaserInvoker invoker, + size_t releaser_size); + +// Creates a new `CordRep` that owns `data` and `releaser` and returns a pointer +// to it, or `nullptr` if `data` was empty. +template <typename Releaser> +// NOLINTNEXTLINE - suppress clang-tidy raw pointer return. +CordRep* NewExternalRep(absl::string_view data, Releaser&& releaser) { + static_assert( +#if defined(__STDCPP_DEFAULT_NEW_ALIGNMENT__) + alignof(Releaser) <= __STDCPP_DEFAULT_NEW_ALIGNMENT__, +#else + alignof(Releaser) <= alignof(max_align_t), +#endif + "Releasers with alignment requirement greater than what is returned by " + "default `::operator new()` are not supported."); + + using ReleaserType = absl::decay_t<Releaser>; + if (data.empty()) { + // Never create empty external nodes. + ::absl::base_internal::Invoke( + ReleaserType(std::forward<Releaser>(releaser)), data); + return nullptr; + } + + auto releaser_invoker = [](void* type_erased_releaser, absl::string_view d) { + auto* my_releaser = static_cast<ReleaserType*>(type_erased_releaser); + ::absl::base_internal::Invoke(std::move(*my_releaser), d); + my_releaser->~ReleaserType(); + return sizeof(Releaser); + }; + + ExternalRepReleaserPair external = NewExternalWithUninitializedReleaser( + data, releaser_invoker, sizeof(releaser)); + ::new (external.releaser_address) + ReleaserType(std::forward<Releaser>(releaser)); + return external.rep; +} + +// Overload for function reference types that dispatches using a function +// pointer because there are no `alignof()` or `sizeof()` a function reference. +// NOLINTNEXTLINE - suppress clang-tidy raw pointer return. +inline CordRep* NewExternalRep(absl::string_view data, + void (&releaser)(absl::string_view)) { + return NewExternalRep(data, &releaser); +} + +} // namespace cord_internal + +template <typename Releaser> +Cord MakeCordFromExternal(absl::string_view data, Releaser&& releaser) { + Cord cord; + cord.contents_.set_tree(::absl::cord_internal::NewExternalRep( + data, std::forward<Releaser>(releaser))); + return cord; +} + +inline Cord::InlineRep::InlineRep(const Cord::InlineRep& src) { + cord_internal::SmallMemmove(data_, src.data_, sizeof(data_)); +} + +inline Cord::InlineRep::InlineRep(Cord::InlineRep&& src) { + memcpy(data_, src.data_, sizeof(data_)); + memset(src.data_, 0, sizeof(data_)); +} + +inline Cord::InlineRep& Cord::InlineRep::operator=(const Cord::InlineRep& src) { + if (this == &src) { + return *this; + } + if (!is_tree() && !src.is_tree()) { + cord_internal::SmallMemmove(data_, src.data_, sizeof(data_)); + return *this; + } + AssignSlow(src); + return *this; +} + +inline Cord::InlineRep& Cord::InlineRep::operator=( + Cord::InlineRep&& src) noexcept { + if (is_tree()) { + ClearSlow(); + } + memcpy(data_, src.data_, sizeof(data_)); + memset(src.data_, 0, sizeof(data_)); + return *this; +} + +inline void Cord::InlineRep::Swap(Cord::InlineRep* rhs) { + if (rhs == this) { + return; + } + + Cord::InlineRep tmp; + cord_internal::SmallMemmove(tmp.data_, data_, sizeof(data_)); + cord_internal::SmallMemmove(data_, rhs->data_, sizeof(data_)); + cord_internal::SmallMemmove(rhs->data_, tmp.data_, sizeof(data_)); +} + +inline const char* Cord::InlineRep::data() const { + return is_tree() ? nullptr : data_; +} + +inline absl::cord_internal::CordRep* Cord::InlineRep::tree() const { + if (is_tree()) { + absl::cord_internal::CordRep* rep; + memcpy(&rep, data_, sizeof(rep)); + return rep; + } else { + return nullptr; + } +} + +inline bool Cord::InlineRep::empty() const { return data_[kMaxInline] == 0; } + +inline size_t Cord::InlineRep::size() const { + const char tag = data_[kMaxInline]; + if (tag <= kMaxInline) return tag; + return static_cast<size_t>(tree()->length); +} + +inline void Cord::InlineRep::set_tree(absl::cord_internal::CordRep* rep) { + if (rep == nullptr) { + memset(data_, 0, sizeof(data_)); + } else { + bool was_tree = is_tree(); + memcpy(data_, &rep, sizeof(rep)); + memset(data_ + sizeof(rep), 0, sizeof(data_) - sizeof(rep) - 1); + if (!was_tree) { + data_[kMaxInline] = kTreeFlag; + } + } +} + +inline void Cord::InlineRep::replace_tree(absl::cord_internal::CordRep* rep) { + ABSL_ASSERT(is_tree()); + if (ABSL_PREDICT_FALSE(rep == nullptr)) { + set_tree(rep); + return; + } + memcpy(data_, &rep, sizeof(rep)); + memset(data_ + sizeof(rep), 0, sizeof(data_) - sizeof(rep) - 1); +} + +inline absl::cord_internal::CordRep* Cord::InlineRep::clear() { + const char tag = data_[kMaxInline]; + absl::cord_internal::CordRep* result = nullptr; + if (tag > kMaxInline) { + memcpy(&result, data_, sizeof(result)); + } + memset(data_, 0, sizeof(data_)); // Clear the cord + return result; +} + +inline void Cord::InlineRep::CopyToArray(char* dst) const { + assert(!is_tree()); + size_t n = data_[kMaxInline]; + assert(n != 0); + cord_internal::SmallMemmove(dst, data_, n); +} + +constexpr inline Cord::Cord() noexcept {} + +inline Cord& Cord::operator=(const Cord& x) { + contents_ = x.contents_; + return *this; +} + +inline Cord::Cord(Cord&& src) noexcept : contents_(std::move(src.contents_)) {} + +inline Cord& Cord::operator=(Cord&& x) noexcept { + contents_ = std::move(x.contents_); + return *this; +} + +template <typename T, Cord::EnableIfString<T>> +inline Cord& Cord::operator=(T&& src) { + *this = absl::string_view(src); + return *this; +} + +inline size_t Cord::size() const { + // Length is 1st field in str.rep_ + return contents_.size(); +} + +inline bool Cord::empty() const { return contents_.empty(); } + +inline size_t Cord::EstimatedMemoryUsage() const { + size_t result = sizeof(Cord); + if (const absl::cord_internal::CordRep* rep = contents_.tree()) { + result += MemoryUsageAux(rep); + } + return result; +} + +inline absl::string_view Cord::Flatten() { + absl::cord_internal::CordRep* rep = contents_.tree(); + if (rep == nullptr) { + return absl::string_view(contents_.data(), contents_.size()); + } else { + absl::string_view already_flat_contents; + if (GetFlatAux(rep, &already_flat_contents)) { + return already_flat_contents; + } + } + return FlattenSlowPath(); +} + +inline void Cord::Append(absl::string_view src) { + contents_.AppendArray(src.data(), src.size()); +} + +template <typename T, Cord::EnableIfString<T>> +inline void Cord::Append(T&& src) { + // Note that this function reserves the right to reuse the `string&&`'s + // memory and that it will do so in the future. + Append(absl::string_view(src)); +} + +template <typename T, Cord::EnableIfString<T>> +inline void Cord::Prepend(T&& src) { + // Note that this function reserves the right to reuse the `string&&`'s + // memory and that it will do so in the future. + Prepend(absl::string_view(src)); +} + +inline int Cord::Compare(const Cord& rhs) const { + if (!contents_.is_tree() && !rhs.contents_.is_tree()) { + return contents_.BitwiseCompare(rhs.contents_); + } + + return CompareImpl(rhs); +} + +// Does 'this' cord start/end with rhs +inline bool Cord::StartsWith(const Cord& rhs) const { + if (contents_.IsSame(rhs.contents_)) return true; + size_t rhs_size = rhs.size(); + if (size() < rhs_size) return false; + return EqualsImpl(rhs, rhs_size); +} + +inline bool Cord::StartsWith(absl::string_view rhs) const { + size_t rhs_size = rhs.size(); + if (size() < rhs_size) return false; + return EqualsImpl(rhs, rhs_size); +} + +inline Cord::ChunkIterator::ChunkIterator(const Cord* cord) + : bytes_remaining_(cord->size()) { + if (cord->empty()) return; + if (cord->contents_.is_tree()) { + stack_of_right_children_.push_back(cord->contents_.tree()); + operator++(); + } else { + current_chunk_ = absl::string_view(cord->contents_.data(), cord->size()); + } +} + +inline Cord::ChunkIterator Cord::ChunkIterator::operator++(int) { + ChunkIterator tmp(*this); + operator++(); + return tmp; +} + +inline bool Cord::ChunkIterator::operator==(const ChunkIterator& other) const { + return bytes_remaining_ == other.bytes_remaining_; +} + +inline bool Cord::ChunkIterator::operator!=(const ChunkIterator& other) const { + return !(*this == other); +} + +inline Cord::ChunkIterator::reference Cord::ChunkIterator::operator*() const { + assert(bytes_remaining_ != 0); + return current_chunk_; +} + +inline Cord::ChunkIterator::pointer Cord::ChunkIterator::operator->() const { + assert(bytes_remaining_ != 0); + return ¤t_chunk_; +} + +inline void Cord::ChunkIterator::RemoveChunkPrefix(size_t n) { + assert(n < current_chunk_.size()); + current_chunk_.remove_prefix(n); + bytes_remaining_ -= n; +} + +inline void Cord::ChunkIterator::AdvanceBytes(size_t n) { + if (ABSL_PREDICT_TRUE(n < current_chunk_.size())) { + RemoveChunkPrefix(n); + } else if (n != 0) { + AdvanceBytesSlowPath(n); + } +} + +inline Cord::ChunkIterator Cord::chunk_begin() const { + return ChunkIterator(this); +} + +inline Cord::ChunkIterator Cord::chunk_end() const { return ChunkIterator(); } + +inline Cord::ChunkIterator Cord::ChunkRange::begin() const { + return cord_->chunk_begin(); +} + +inline Cord::ChunkIterator Cord::ChunkRange::end() const { + return cord_->chunk_end(); +} + +inline Cord::ChunkRange Cord::Chunks() const { return ChunkRange(this); } + +inline Cord::CharIterator& Cord::CharIterator::operator++() { + if (ABSL_PREDICT_TRUE(chunk_iterator_->size() > 1)) { + chunk_iterator_.RemoveChunkPrefix(1); + } else { + ++chunk_iterator_; + } + return *this; +} + +inline Cord::CharIterator Cord::CharIterator::operator++(int) { + CharIterator tmp(*this); + operator++(); + return tmp; +} + +inline bool Cord::CharIterator::operator==(const CharIterator& other) const { + return chunk_iterator_ == other.chunk_iterator_; +} + +inline bool Cord::CharIterator::operator!=(const CharIterator& other) const { + return !(*this == other); +} + +inline Cord::CharIterator::reference Cord::CharIterator::operator*() const { + return *chunk_iterator_->data(); +} + +inline Cord::CharIterator::pointer Cord::CharIterator::operator->() const { + return chunk_iterator_->data(); +} + +inline Cord Cord::AdvanceAndRead(CharIterator* it, size_t n_bytes) { + assert(it != nullptr); + return it->chunk_iterator_.AdvanceAndReadBytes(n_bytes); +} + +inline void Cord::Advance(CharIterator* it, size_t n_bytes) { + assert(it != nullptr); + it->chunk_iterator_.AdvanceBytes(n_bytes); +} + +inline absl::string_view Cord::ChunkRemaining(const CharIterator& it) { + return *it.chunk_iterator_; +} + +inline Cord::CharIterator Cord::char_begin() const { + return CharIterator(this); +} + +inline Cord::CharIterator Cord::char_end() const { return CharIterator(); } + +inline Cord::CharIterator Cord::CharRange::begin() const { + return cord_->char_begin(); +} + +inline Cord::CharIterator Cord::CharRange::end() const { + return cord_->char_end(); +} + +inline Cord::CharRange Cord::Chars() const { return CharRange(this); } + +inline void Cord::ForEachChunk( + absl::FunctionRef<void(absl::string_view)> callback) const { + absl::cord_internal::CordRep* rep = contents_.tree(); + if (rep == nullptr) { + callback(absl::string_view(contents_.data(), contents_.size())); + } else { + return ForEachChunkAux(rep, callback); + } +} + +// Nonmember Cord-to-Cord relational operarators. +inline bool operator==(const Cord& lhs, const Cord& rhs) { + if (lhs.contents_.IsSame(rhs.contents_)) return true; + size_t rhs_size = rhs.size(); + if (lhs.size() != rhs_size) return false; + return lhs.EqualsImpl(rhs, rhs_size); +} + +inline bool operator!=(const Cord& x, const Cord& y) { return !(x == y); } +inline bool operator<(const Cord& x, const Cord& y) { + return x.Compare(y) < 0; +} +inline bool operator>(const Cord& x, const Cord& y) { + return x.Compare(y) > 0; +} +inline bool operator<=(const Cord& x, const Cord& y) { + return x.Compare(y) <= 0; +} +inline bool operator>=(const Cord& x, const Cord& y) { + return x.Compare(y) >= 0; +} + +// Nonmember Cord-to-absl::string_view relational operators. +// +// Due to implicit conversions, these also enable comparisons of Cord with +// with std::string, ::string, and const char*. +inline bool operator==(const Cord& lhs, absl::string_view rhs) { + size_t lhs_size = lhs.size(); + size_t rhs_size = rhs.size(); + if (lhs_size != rhs_size) return false; + return lhs.EqualsImpl(rhs, rhs_size); +} + +inline bool operator==(absl::string_view x, const Cord& y) { return y == x; } +inline bool operator!=(const Cord& x, absl::string_view y) { return !(x == y); } +inline bool operator!=(absl::string_view x, const Cord& y) { return !(x == y); } +inline bool operator<(const Cord& x, absl::string_view y) { + return x.Compare(y) < 0; +} +inline bool operator<(absl::string_view x, const Cord& y) { + return y.Compare(x) > 0; +} +inline bool operator>(const Cord& x, absl::string_view y) { return y < x; } +inline bool operator>(absl::string_view x, const Cord& y) { return y < x; } +inline bool operator<=(const Cord& x, absl::string_view y) { return !(y < x); } +inline bool operator<=(absl::string_view x, const Cord& y) { return !(y < x); } +inline bool operator>=(const Cord& x, absl::string_view y) { return !(x < y); } +inline bool operator>=(absl::string_view x, const Cord& y) { return !(x < y); } + +// Overload of swap for Cord. The use of non-const references is +// required. :( +inline void swap(Cord& x, Cord& y) noexcept { y.contents_.Swap(&x.contents_); } + +// Some internals exposed to test code. +namespace strings_internal { +class CordTestAccess { + public: + static size_t FlatOverhead(); + static size_t MaxFlatLength(); + static size_t SizeofCordRepConcat(); + static size_t SizeofCordRepExternal(); + static size_t SizeofCordRepSubstring(); + static size_t FlatTagToLength(uint8_t tag); + static uint8_t LengthToTag(size_t s); +}; +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_CORD_H_ diff --git a/absl/strings/cord_test.cc b/absl/strings/cord_test.cc new file mode 100644 index 00000000..434f3a24 --- /dev/null +++ b/absl/strings/cord_test.cc @@ -0,0 +1,1526 @@ +#include "absl/strings/cord.h" + +#include <algorithm> +#include <climits> +#include <cstdio> +#include <iterator> +#include <map> +#include <numeric> +#include <random> +#include <sstream> +#include <type_traits> +#include <utility> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/casts.h" +#include "absl/base/config.h" +#include "absl/base/internal/endian.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/container/fixed_array.h" +#include "absl/strings/cord_test_helpers.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" + +typedef std::mt19937_64 RandomEngine; + +static std::string RandomLowercaseString(RandomEngine* rng); +static std::string RandomLowercaseString(RandomEngine* rng, size_t length); + +static int GetUniformRandomUpTo(RandomEngine* rng, int upper_bound) { + if (upper_bound > 0) { + std::uniform_int_distribution<int> uniform(0, upper_bound - 1); + return uniform(*rng); + } else { + return 0; + } +} + +static size_t GetUniformRandomUpTo(RandomEngine* rng, size_t upper_bound) { + if (upper_bound > 0) { + std::uniform_int_distribution<size_t> uniform(0, upper_bound - 1); + return uniform(*rng); + } else { + return 0; + } +} + +static int32_t GenerateSkewedRandom(RandomEngine* rng, int max_log) { + const uint32_t base = (*rng)() % (max_log + 1); + const uint32_t mask = ((base < 32) ? (1u << base) : 0u) - 1u; + return (*rng)() & mask; +} + +static std::string RandomLowercaseString(RandomEngine* rng) { + int length; + std::bernoulli_distribution one_in_1k(0.001); + std::bernoulli_distribution one_in_10k(0.0001); + // With low probability, make a large fragment + if (one_in_10k(*rng)) { + length = GetUniformRandomUpTo(rng, 1048576); + } else if (one_in_1k(*rng)) { + length = GetUniformRandomUpTo(rng, 10000); + } else { + length = GenerateSkewedRandom(rng, 10); + } + return RandomLowercaseString(rng, length); +} + +static std::string RandomLowercaseString(RandomEngine* rng, size_t length) { + std::string result(length, '\0'); + std::uniform_int_distribution<int> chars('a', 'z'); + std::generate(result.begin(), result.end(), [&]() { + return static_cast<char>(chars(*rng)); + }); + return result; +} + +static void DoNothing(absl::string_view /* data */, void* /* arg */) {} + +static void DeleteExternalString(absl::string_view data, void* arg) { + std::string* s = reinterpret_cast<std::string*>(arg); + EXPECT_EQ(data, *s); + delete s; +} + +// Add "s" to *dst via `MakeCordFromExternal` +static void AddExternalMemory(absl::string_view s, absl::Cord* dst) { + std::string* str = new std::string(s.data(), s.size()); + dst->Append(absl::MakeCordFromExternal(*str, [str](absl::string_view data) { + DeleteExternalString(data, str); + })); +} + +static void DumpGrowth() { + absl::Cord str; + for (int i = 0; i < 1000; i++) { + char c = 'a' + i % 26; + str.Append(absl::string_view(&c, 1)); + } +} + +// Make a Cord with some number of fragments. Return the size (in bytes) +// of the smallest fragment. +static size_t AppendWithFragments(const std::string& s, RandomEngine* rng, + absl::Cord* cord) { + size_t j = 0; + const size_t max_size = s.size() / 5; // Make approx. 10 fragments + size_t min_size = max_size; // size of smallest fragment + while (j < s.size()) { + size_t N = 1 + GetUniformRandomUpTo(rng, max_size); + if (N > (s.size() - j)) { + N = s.size() - j; + } + if (N < min_size) { + min_size = N; + } + + std::bernoulli_distribution coin_flip(0.5); + if (coin_flip(*rng)) { + // Grow by adding an external-memory. + AddExternalMemory(absl::string_view(s.data() + j, N), cord); + } else { + cord->Append(absl::string_view(s.data() + j, N)); + } + j += N; + } + return min_size; +} + +// Add an external memory that contains the specified std::string to cord +static void AddNewStringBlock(const std::string& str, absl::Cord* dst) { + char* data = new char[str.size()]; + memcpy(data, str.data(), str.size()); + dst->Append(absl::MakeCordFromExternal( + absl::string_view(data, str.size()), + [](absl::string_view s) { delete[] s.data(); })); +} + +// Make a Cord out of many different types of nodes. +static absl::Cord MakeComposite() { + absl::Cord cord; + cord.Append("the"); + AddExternalMemory(" quick brown", &cord); + AddExternalMemory(" fox jumped", &cord); + + absl::Cord full(" over"); + AddExternalMemory(" the lazy", &full); + AddNewStringBlock(" dog slept the whole day away", &full); + absl::Cord substring = full.Subcord(0, 18); + + // Make substring long enough to defeat the copying fast path in Append. + substring.Append(std::string(1000, '.')); + cord.Append(substring); + cord = cord.Subcord(0, cord.size() - 998); // Remove most of extra junk + + return cord; +} + +namespace absl { +ABSL_NAMESPACE_BEGIN + +class CordTestPeer { + public: + static void ForEachChunk( + const Cord& c, absl::FunctionRef<void(absl::string_view)> callback) { + c.ForEachChunk(callback); + } +}; + +ABSL_NAMESPACE_END +} // namespace absl + +TEST(Cord, AllFlatSizes) { + using absl::strings_internal::CordTestAccess; + + for (size_t s = 0; s < CordTestAccess::MaxFlatLength(); s++) { + // Make a std::string of length s. + std::string src; + while (src.size() < s) { + src.push_back('a' + (src.size() % 26)); + } + + absl::Cord dst(src); + EXPECT_EQ(std::string(dst), src) << s; + } +} + +// We create a Cord at least 128GB in size using the fact that Cords can +// internally reference-count; thus the Cord is enormous without actually +// consuming very much memory. +TEST(GigabyteCord, FromExternal) { + const size_t one_gig = 1024U * 1024U * 1024U; + size_t max_size = 2 * one_gig; + if (sizeof(max_size) > 4) max_size = 128 * one_gig; + + size_t length = 128 * 1024; + char* data = new char[length]; + absl::Cord from = absl::MakeCordFromExternal( + absl::string_view(data, length), + [](absl::string_view sv) { delete[] sv.data(); }); + + // This loop may seem odd due to its combination of exponential doubling of + // size and incremental size increases. We do it incrementally to be sure the + // Cord will need rebalancing and will exercise code that, in the past, has + // caused crashes in production. We grow exponentially so that the code will + // execute in a reasonable amount of time. + absl::Cord c; + ABSL_RAW_LOG(INFO, "Made a Cord with %zu bytes!", c.size()); + c.Append(from); + while (c.size() < max_size) { + c.Append(c); + c.Append(from); + c.Append(from); + c.Append(from); + c.Append(from); + } + + for (int i = 0; i < 1024; ++i) { + c.Append(from); + } + ABSL_RAW_LOG(INFO, "Made a Cord with %zu bytes!", c.size()); + // Note: on a 32-bit build, this comes out to 2,818,048,000 bytes. + // Note: on a 64-bit build, this comes out to 171,932,385,280 bytes. +} + +static absl::Cord MakeExternalCord(int size) { + char* buffer = new char[size]; + memset(buffer, 'x', size); + absl::Cord cord; + cord.Append(absl::MakeCordFromExternal( + absl::string_view(buffer, size), + [](absl::string_view s) { delete[] s.data(); })); + return cord; +} + +// Extern to fool clang that this is not constant. Needed to suppress +// a warning of unsafe code we want to test. +extern bool my_unique_true_boolean; +bool my_unique_true_boolean = true; + +TEST(Cord, Assignment) { + absl::Cord x(absl::string_view("hi there")); + absl::Cord y(x); + ASSERT_EQ(std::string(x), "hi there"); + ASSERT_EQ(std::string(y), "hi there"); + ASSERT_TRUE(x == y); + ASSERT_TRUE(x <= y); + ASSERT_TRUE(y <= x); + + x = absl::string_view("foo"); + ASSERT_EQ(std::string(x), "foo"); + ASSERT_EQ(std::string(y), "hi there"); + ASSERT_TRUE(x < y); + ASSERT_TRUE(y > x); + ASSERT_TRUE(x != y); + ASSERT_TRUE(x <= y); + ASSERT_TRUE(y >= x); + + x = "foo"; + ASSERT_EQ(x, "foo"); + + // Test that going from inline rep to tree we don't leak memory. + std::vector<std::pair<absl::string_view, absl::string_view>> + test_string_pairs = {{"hi there", "foo"}, + {"loooooong coooooord", "short cord"}, + {"short cord", "loooooong coooooord"}, + {"loooooong coooooord1", "loooooong coooooord2"}}; + for (std::pair<absl::string_view, absl::string_view> test_strings : + test_string_pairs) { + absl::Cord tmp(test_strings.first); + absl::Cord z(std::move(tmp)); + ASSERT_EQ(std::string(z), test_strings.first); + tmp = test_strings.second; + z = std::move(tmp); + ASSERT_EQ(std::string(z), test_strings.second); + } + { + // Test that self-move assignment doesn't crash/leak. + // Do not write such code! + absl::Cord my_small_cord("foo"); + absl::Cord my_big_cord("loooooong coooooord"); + // Bypass clang's warning on self move-assignment. + absl::Cord* my_small_alias = + my_unique_true_boolean ? &my_small_cord : &my_big_cord; + absl::Cord* my_big_alias = + !my_unique_true_boolean ? &my_small_cord : &my_big_cord; + + *my_small_alias = std::move(my_small_cord); + *my_big_alias = std::move(my_big_cord); + // my_small_cord and my_big_cord are in an unspecified but valid + // state, and will be correctly destroyed here. + } +} + +TEST(Cord, StartsEndsWith) { + absl::Cord x(absl::string_view("abcde")); + absl::Cord empty(""); + + ASSERT_TRUE(x.StartsWith(absl::Cord("abcde"))); + ASSERT_TRUE(x.StartsWith(absl::Cord("abc"))); + ASSERT_TRUE(x.StartsWith(absl::Cord(""))); + ASSERT_TRUE(empty.StartsWith(absl::Cord(""))); + ASSERT_TRUE(x.EndsWith(absl::Cord("abcde"))); + ASSERT_TRUE(x.EndsWith(absl::Cord("cde"))); + ASSERT_TRUE(x.EndsWith(absl::Cord(""))); + ASSERT_TRUE(empty.EndsWith(absl::Cord(""))); + + ASSERT_TRUE(!x.StartsWith(absl::Cord("xyz"))); + ASSERT_TRUE(!empty.StartsWith(absl::Cord("xyz"))); + ASSERT_TRUE(!x.EndsWith(absl::Cord("xyz"))); + ASSERT_TRUE(!empty.EndsWith(absl::Cord("xyz"))); + + ASSERT_TRUE(x.StartsWith("abcde")); + ASSERT_TRUE(x.StartsWith("abc")); + ASSERT_TRUE(x.StartsWith("")); + ASSERT_TRUE(empty.StartsWith("")); + ASSERT_TRUE(x.EndsWith("abcde")); + ASSERT_TRUE(x.EndsWith("cde")); + ASSERT_TRUE(x.EndsWith("")); + ASSERT_TRUE(empty.EndsWith("")); + + ASSERT_TRUE(!x.StartsWith("xyz")); + ASSERT_TRUE(!empty.StartsWith("xyz")); + ASSERT_TRUE(!x.EndsWith("xyz")); + ASSERT_TRUE(!empty.EndsWith("xyz")); +} + +TEST(Cord, Subcord) { + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + const std::string s = RandomLowercaseString(&rng, 1024); + + absl::Cord a; + AppendWithFragments(s, &rng, &a); + ASSERT_EQ(s.size(), a.size()); + + // Check subcords of a, from a variety of interesting points. + std::set<size_t> positions; + for (int i = 0; i <= 32; ++i) { + positions.insert(i); + positions.insert(i * 32 - 1); + positions.insert(i * 32); + positions.insert(i * 32 + 1); + positions.insert(a.size() - i); + } + positions.insert(237); + positions.insert(732); + for (size_t pos : positions) { + if (pos > a.size()) continue; + for (size_t end_pos : positions) { + if (end_pos < pos || end_pos > a.size()) continue; + absl::Cord sa = a.Subcord(pos, end_pos - pos); + EXPECT_EQ(absl::string_view(s).substr(pos, end_pos - pos), + std::string(sa)) + << a; + } + } + + // Do the same thing for an inline cord. + const std::string sh = "short"; + absl::Cord c(sh); + for (size_t pos = 0; pos <= sh.size(); ++pos) { + for (size_t n = 0; n <= sh.size() - pos; ++n) { + absl::Cord sc = c.Subcord(pos, n); + EXPECT_EQ(sh.substr(pos, n), std::string(sc)) << c; + } + } + + // Check subcords of subcords. + absl::Cord sa = a.Subcord(0, a.size()); + std::string ss = s.substr(0, s.size()); + while (sa.size() > 1) { + sa = sa.Subcord(1, sa.size() - 2); + ss = ss.substr(1, ss.size() - 2); + EXPECT_EQ(ss, std::string(sa)) << a; + if (HasFailure()) break; // halt cascade + } + + // It is OK to ask for too much. + sa = a.Subcord(0, a.size() + 1); + EXPECT_EQ(s, std::string(sa)); + + // It is OK to ask for something beyond the end. + sa = a.Subcord(a.size() + 1, 0); + EXPECT_TRUE(sa.empty()); + sa = a.Subcord(a.size() + 1, 1); + EXPECT_TRUE(sa.empty()); +} + +TEST(Cord, Swap) { + absl::string_view a("Dexter"); + absl::string_view b("Mandark"); + absl::Cord x(a); + absl::Cord y(b); + swap(x, y); + ASSERT_EQ(x, absl::Cord(b)); + ASSERT_EQ(y, absl::Cord(a)); +} + +static void VerifyCopyToString(const absl::Cord& cord) { + std::string initially_empty; + absl::CopyCordToString(cord, &initially_empty); + EXPECT_EQ(initially_empty, cord); + + constexpr size_t kInitialLength = 1024; + std::string has_initial_contents(kInitialLength, 'x'); + const char* address_before_copy = has_initial_contents.data(); + absl::CopyCordToString(cord, &has_initial_contents); + EXPECT_EQ(has_initial_contents, cord); + + if (cord.size() <= kInitialLength) { + EXPECT_EQ(has_initial_contents.data(), address_before_copy) + << "CopyCordToString allocated new std::string storage; " + "has_initial_contents = \"" + << has_initial_contents << "\""; + } +} + +TEST(Cord, CopyToString) { + VerifyCopyToString(absl::Cord()); + VerifyCopyToString(absl::Cord("small cord")); + VerifyCopyToString( + absl::MakeFragmentedCord({"fragmented ", "cord ", "to ", "test ", + "copying ", "to ", "a ", "string."})); +} + +static bool IsFlat(const absl::Cord& c) { + return c.chunk_begin() == c.chunk_end() || ++c.chunk_begin() == c.chunk_end(); +} + +static void VerifyFlatten(absl::Cord c) { + std::string old_contents(c); + absl::string_view old_flat; + bool already_flat_and_non_empty = IsFlat(c) && !c.empty(); + if (already_flat_and_non_empty) { + old_flat = *c.chunk_begin(); + } + absl::string_view new_flat = c.Flatten(); + + // Verify that the contents of the flattened Cord are correct. + EXPECT_EQ(new_flat, old_contents); + EXPECT_EQ(std::string(c), old_contents); + + // If the Cord contained data and was already flat, verify that the data + // wasn't copied. + if (already_flat_and_non_empty) { + EXPECT_EQ(old_flat.data(), new_flat.data()) + << "Allocated new memory even though the Cord was already flat."; + } + + // Verify that the flattened Cord is in fact flat. + EXPECT_TRUE(IsFlat(c)); +} + +TEST(Cord, Flatten) { + VerifyFlatten(absl::Cord()); + VerifyFlatten(absl::Cord("small cord")); + VerifyFlatten(absl::Cord("larger than small buffer optimization")); + VerifyFlatten(absl::MakeFragmentedCord({"small ", "fragmented ", "cord"})); + + // Test with a cord that is longer than the largest flat buffer + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + VerifyFlatten(absl::Cord(RandomLowercaseString(&rng, 8192))); +} + +// Test data +namespace { +class TestData { + private: + std::vector<std::string> data_; + + // Return a std::string of the specified length. + static std::string MakeString(int length) { + std::string result; + char buf[30]; + snprintf(buf, sizeof(buf), "(%d)", length); + while (result.size() < length) { + result += buf; + } + result.resize(length); + return result; + } + + public: + TestData() { + // short strings increasing in length by one + for (int i = 0; i < 30; i++) { + data_.push_back(MakeString(i)); + } + + // strings around half kMaxFlatLength + static const int kMaxFlatLength = 4096 - 9; + static const int kHalf = kMaxFlatLength / 2; + + for (int i = -10; i <= +10; i++) { + data_.push_back(MakeString(kHalf + i)); + } + + for (int i = -10; i <= +10; i++) { + data_.push_back(MakeString(kMaxFlatLength + i)); + } + } + + size_t size() const { return data_.size(); } + const std::string& data(size_t i) const { return data_[i]; } +}; +} // namespace + +TEST(Cord, MultipleLengths) { + TestData d; + for (size_t i = 0; i < d.size(); i++) { + std::string a = d.data(i); + + { // Construct from Cord + absl::Cord tmp(a); + absl::Cord x(tmp); + EXPECT_EQ(a, std::string(x)) << "'" << a << "'"; + } + + { // Construct from absl::string_view + absl::Cord x(a); + EXPECT_EQ(a, std::string(x)) << "'" << a << "'"; + } + + { // Append cord to self + absl::Cord self(a); + self.Append(self); + EXPECT_EQ(a + a, std::string(self)) << "'" << a << "' + '" << a << "'"; + } + + { // Prepend cord to self + absl::Cord self(a); + self.Prepend(self); + EXPECT_EQ(a + a, std::string(self)) << "'" << a << "' + '" << a << "'"; + } + + // Try to append/prepend others + for (size_t j = 0; j < d.size(); j++) { + std::string b = d.data(j); + + { // CopyFrom Cord + absl::Cord x(a); + absl::Cord y(b); + x = y; + EXPECT_EQ(b, std::string(x)) << "'" << a << "' + '" << b << "'"; + } + + { // CopyFrom absl::string_view + absl::Cord x(a); + x = b; + EXPECT_EQ(b, std::string(x)) << "'" << a << "' + '" << b << "'"; + } + + { // Cord::Append(Cord) + absl::Cord x(a); + absl::Cord y(b); + x.Append(y); + EXPECT_EQ(a + b, std::string(x)) << "'" << a << "' + '" << b << "'"; + } + + { // Cord::Append(absl::string_view) + absl::Cord x(a); + x.Append(b); + EXPECT_EQ(a + b, std::string(x)) << "'" << a << "' + '" << b << "'"; + } + + { // Cord::Prepend(Cord) + absl::Cord x(a); + absl::Cord y(b); + x.Prepend(y); + EXPECT_EQ(b + a, std::string(x)) << "'" << b << "' + '" << a << "'"; + } + + { // Cord::Prepend(absl::string_view) + absl::Cord x(a); + x.Prepend(b); + EXPECT_EQ(b + a, std::string(x)) << "'" << b << "' + '" << a << "'"; + } + } + } +} + +namespace { + +TEST(Cord, RemoveSuffixWithExternalOrSubstring) { + absl::Cord cord = absl::MakeCordFromExternal( + "foo bar baz", [](absl::string_view s) { DoNothing(s, nullptr); }); + + EXPECT_EQ("foo bar baz", std::string(cord)); + + // This RemoveSuffix() will wrap the EXTERNAL node in a SUBSTRING node. + cord.RemoveSuffix(4); + EXPECT_EQ("foo bar", std::string(cord)); + + // This RemoveSuffix() will adjust the SUBSTRING node in-place. + cord.RemoveSuffix(4); + EXPECT_EQ("foo", std::string(cord)); +} + +TEST(Cord, RemoveSuffixMakesZeroLengthNode) { + absl::Cord c; + c.Append(absl::Cord(std::string(100, 'x'))); + absl::Cord other_ref = c; // Prevent inplace appends + c.Append(absl::Cord(std::string(200, 'y'))); + c.RemoveSuffix(200); + EXPECT_EQ(std::string(100, 'x'), std::string(c)); +} + +} // namespace + +// CordSpliceTest contributed by hendrie. +namespace { + +// Create a cord with an external memory block filled with 'z' +absl::Cord CordWithZedBlock(size_t size) { + char* data = new char[size]; + if (size > 0) { + memset(data, 'z', size); + } + absl::Cord cord = absl::MakeCordFromExternal( + absl::string_view(data, size), + [](absl::string_view s) { delete[] s.data(); }); + return cord; +} + +// Establish that ZedBlock does what we think it does. +TEST(CordSpliceTest, ZedBlock) { + absl::Cord blob = CordWithZedBlock(10); + EXPECT_EQ(10, blob.size()); + std::string s; + absl::CopyCordToString(blob, &s); + EXPECT_EQ("zzzzzzzzzz", s); +} + +TEST(CordSpliceTest, ZedBlock0) { + absl::Cord blob = CordWithZedBlock(0); + EXPECT_EQ(0, blob.size()); + std::string s; + absl::CopyCordToString(blob, &s); + EXPECT_EQ("", s); +} + +TEST(CordSpliceTest, ZedBlockSuffix1) { + absl::Cord blob = CordWithZedBlock(10); + EXPECT_EQ(10, blob.size()); + absl::Cord suffix(blob); + suffix.RemovePrefix(9); + EXPECT_EQ(1, suffix.size()); + std::string s; + absl::CopyCordToString(suffix, &s); + EXPECT_EQ("z", s); +} + +// Remove all of a prefix block +TEST(CordSpliceTest, ZedBlockSuffix0) { + absl::Cord blob = CordWithZedBlock(10); + EXPECT_EQ(10, blob.size()); + absl::Cord suffix(blob); + suffix.RemovePrefix(10); + EXPECT_EQ(0, suffix.size()); + std::string s; + absl::CopyCordToString(suffix, &s); + EXPECT_EQ("", s); +} + +absl::Cord BigCord(size_t len, char v) { + std::string s(len, v); + return absl::Cord(s); +} + +// Splice block into cord. +absl::Cord SpliceCord(const absl::Cord& blob, int64_t offset, + const absl::Cord& block) { + ABSL_RAW_CHECK(offset >= 0, ""); + ABSL_RAW_CHECK(offset + block.size() <= blob.size(), ""); + absl::Cord result(blob); + result.RemoveSuffix(blob.size() - offset); + result.Append(block); + absl::Cord suffix(blob); + suffix.RemovePrefix(offset + block.size()); + result.Append(suffix); + ABSL_RAW_CHECK(blob.size() == result.size(), ""); + return result; +} + +// Taking an empty suffix of a block breaks appending. +TEST(CordSpliceTest, RemoveEntireBlock1) { + absl::Cord zero = CordWithZedBlock(10); + absl::Cord suffix(zero); + suffix.RemovePrefix(10); + absl::Cord result; + result.Append(suffix); +} + +TEST(CordSpliceTest, RemoveEntireBlock2) { + absl::Cord zero = CordWithZedBlock(10); + absl::Cord prefix(zero); + prefix.RemoveSuffix(10); + absl::Cord suffix(zero); + suffix.RemovePrefix(10); + absl::Cord result(prefix); + result.Append(suffix); +} + +TEST(CordSpliceTest, RemoveEntireBlock3) { + absl::Cord blob = CordWithZedBlock(10); + absl::Cord block = BigCord(10, 'b'); + blob = SpliceCord(blob, 0, block); +} + +struct CordCompareTestCase { + template <typename LHS, typename RHS> + CordCompareTestCase(const LHS& lhs, const RHS& rhs) + : lhs_cord(lhs), rhs_cord(rhs) {} + + absl::Cord lhs_cord; + absl::Cord rhs_cord; +}; + +const auto sign = [](int x) { return x == 0 ? 0 : (x > 0 ? 1 : -1); }; + +void VerifyComparison(const CordCompareTestCase& test_case) { + std::string lhs_string(test_case.lhs_cord); + std::string rhs_string(test_case.rhs_cord); + int expected = sign(lhs_string.compare(rhs_string)); + EXPECT_EQ(expected, test_case.lhs_cord.Compare(test_case.rhs_cord)) + << "LHS=" << lhs_string << "; RHS=" << rhs_string; + EXPECT_EQ(expected, test_case.lhs_cord.Compare(rhs_string)) + << "LHS=" << lhs_string << "; RHS=" << rhs_string; + EXPECT_EQ(-expected, test_case.rhs_cord.Compare(test_case.lhs_cord)) + << "LHS=" << rhs_string << "; RHS=" << lhs_string; + EXPECT_EQ(-expected, test_case.rhs_cord.Compare(lhs_string)) + << "LHS=" << rhs_string << "; RHS=" << lhs_string; +} + +TEST(Cord, Compare) { + absl::Cord subcord("aaaaaBBBBBcccccDDDDD"); + subcord = subcord.Subcord(3, 10); + + absl::Cord tmp("aaaaaaaaaaaaaaaa"); + tmp.Append("BBBBBBBBBBBBBBBB"); + absl::Cord concat = absl::Cord("cccccccccccccccc"); + concat.Append("DDDDDDDDDDDDDDDD"); + concat.Prepend(tmp); + + absl::Cord concat2("aaaaaaaaaaaaa"); + concat2.Append("aaaBBBBBBBBBBBBBBBBccccc"); + concat2.Append("cccccccccccDDDDDDDDDDDDDD"); + concat2.Append("DD"); + + std::vector<CordCompareTestCase> test_cases = {{ + // Inline cords + {"abcdef", "abcdef"}, + {"abcdef", "abcdee"}, + {"abcdef", "abcdeg"}, + {"bbcdef", "abcdef"}, + {"bbcdef", "abcdeg"}, + {"abcdefa", "abcdef"}, + {"abcdef", "abcdefa"}, + + // Small flat cords + {"aaaaaBBBBBcccccDDDDD", "aaaaaBBBBBcccccDDDDD"}, + {"aaaaaBBBBBcccccDDDDD", "aaaaaBBBBBxccccDDDDD"}, + {"aaaaaBBBBBcxcccDDDDD", "aaaaaBBBBBcccccDDDDD"}, + {"aaaaaBBBBBxccccDDDDD", "aaaaaBBBBBcccccDDDDX"}, + {"aaaaaBBBBBcccccDDDDDa", "aaaaaBBBBBcccccDDDDD"}, + {"aaaaaBBBBBcccccDDDDD", "aaaaaBBBBBcccccDDDDDa"}, + + // Subcords + {subcord, subcord}, + {subcord, "aaBBBBBccc"}, + {subcord, "aaBBBBBccd"}, + {subcord, "aaBBBBBccb"}, + {subcord, "aaBBBBBxcb"}, + {subcord, "aaBBBBBccca"}, + {subcord, "aaBBBBBcc"}, + + // Concats + {concat, concat}, + {concat, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBccccccccccccccccDDDDDDDDDDDDDDDD"}, + {concat, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBcccccccccccccccxDDDDDDDDDDDDDDDD"}, + {concat, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBacccccccccccccccDDDDDDDDDDDDDDDD"}, + {concat, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBccccccccccccccccDDDDDDDDDDDDDDD"}, + {concat, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBccccccccccccccccDDDDDDDDDDDDDDDDe"}, + + {concat, concat2}, + }}; + + for (const auto& tc : test_cases) { + VerifyComparison(tc); + } +} + +TEST(Cord, CompareAfterAssign) { + absl::Cord a("aaaaaa1111111"); + absl::Cord b("aaaaaa2222222"); + a = "cccccc"; + b = "cccccc"; + EXPECT_EQ(a, b); + EXPECT_FALSE(a < b); + + a = "aaaa"; + b = "bbbbb"; + a = ""; + b = ""; + EXPECT_EQ(a, b); + EXPECT_FALSE(a < b); +} + +// Test CompareTo() and ComparePrefix() against string and substring +// comparison methods from std::basic_string. +static void TestCompare(const absl::Cord& c, const absl::Cord& d, + RandomEngine* rng) { + typedef std::basic_string<uint8_t> ustring; + ustring cs(reinterpret_cast<const uint8_t*>(std::string(c).data()), c.size()); + ustring ds(reinterpret_cast<const uint8_t*>(std::string(d).data()), d.size()); + // ustring comparison is ideal because we expect Cord comparisons to be + // based on unsigned byte comparisons regardless of whether char is signed. + int expected = sign(cs.compare(ds)); + EXPECT_EQ(expected, sign(c.Compare(d))) << c << ", " << d; +} + +TEST(Compare, ComparisonIsUnsigned) { + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + std::uniform_int_distribution<uint32_t> uniform_uint8(0, 255); + char x = static_cast<char>(uniform_uint8(rng)); + TestCompare( + absl::Cord(std::string(GetUniformRandomUpTo(&rng, 100), x)), + absl::Cord(std::string(GetUniformRandomUpTo(&rng, 100), x ^ 0x80)), &rng); +} + +TEST(Compare, RandomComparisons) { + const int kIters = 5000; + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + + int n = GetUniformRandomUpTo(&rng, 5000); + absl::Cord a[] = {MakeExternalCord(n), + absl::Cord("ant"), + absl::Cord("elephant"), + absl::Cord("giraffe"), + absl::Cord(std::string(GetUniformRandomUpTo(&rng, 100), + GetUniformRandomUpTo(&rng, 100))), + absl::Cord(""), + absl::Cord("x"), + absl::Cord("A"), + absl::Cord("B"), + absl::Cord("C")}; + for (int i = 0; i < kIters; i++) { + absl::Cord c, d; + for (int j = 0; j < (i % 7) + 1; j++) { + c.Append(a[GetUniformRandomUpTo(&rng, ABSL_ARRAYSIZE(a))]); + d.Append(a[GetUniformRandomUpTo(&rng, ABSL_ARRAYSIZE(a))]); + } + std::bernoulli_distribution coin_flip(0.5); + TestCompare(coin_flip(rng) ? c : absl::Cord(std::string(c)), + coin_flip(rng) ? d : absl::Cord(std::string(d)), &rng); + } +} + +template <typename T1, typename T2> +void CompareOperators() { + const T1 a("a"); + const T2 b("b"); + + EXPECT_TRUE(a == a); + // For pointer type (i.e. `const char*`), operator== compares the address + // instead of the std::string, so `a == const char*("a")` isn't necessarily true. + EXPECT_TRUE(std::is_pointer<T1>::value || a == T1("a")); + EXPECT_TRUE(std::is_pointer<T2>::value || a == T2("a")); + EXPECT_FALSE(a == b); + + EXPECT_TRUE(a != b); + EXPECT_FALSE(a != a); + + EXPECT_TRUE(a < b); + EXPECT_FALSE(b < a); + + EXPECT_TRUE(b > a); + EXPECT_FALSE(a > b); + + EXPECT_TRUE(a >= a); + EXPECT_TRUE(b >= a); + EXPECT_FALSE(a >= b); + + EXPECT_TRUE(a <= a); + EXPECT_TRUE(a <= b); + EXPECT_FALSE(b <= a); +} + +TEST(ComparisonOperators, Cord_Cord) { + CompareOperators<absl::Cord, absl::Cord>(); +} + +TEST(ComparisonOperators, Cord_StringPiece) { + CompareOperators<absl::Cord, absl::string_view>(); +} + +TEST(ComparisonOperators, StringPiece_Cord) { + CompareOperators<absl::string_view, absl::Cord>(); +} + +TEST(ComparisonOperators, Cord_string) { + CompareOperators<absl::Cord, std::string>(); +} + +TEST(ComparisonOperators, string_Cord) { + CompareOperators<std::string, absl::Cord>(); +} + +TEST(ComparisonOperators, stdstring_Cord) { + CompareOperators<std::string, absl::Cord>(); +} + +TEST(ComparisonOperators, Cord_stdstring) { + CompareOperators<absl::Cord, std::string>(); +} + +TEST(ComparisonOperators, charstar_Cord) { + CompareOperators<const char*, absl::Cord>(); +} + +TEST(ComparisonOperators, Cord_charstar) { + CompareOperators<absl::Cord, const char*>(); +} + +TEST(ConstructFromExternal, ReleaserInvoked) { + // Empty external memory means the releaser should be called immediately. + { + bool invoked = false; + auto releaser = [&invoked](absl::string_view) { invoked = true; }; + { + auto c = absl::MakeCordFromExternal("", releaser); + EXPECT_TRUE(invoked); + } + } + + // If the size of the data is small enough, a future constructor + // implementation may copy the bytes and immediately invoke the releaser + // instead of creating an external node. We make a large dummy std::string to + // make this test independent of such an optimization. + std::string large_dummy(2048, 'c'); + { + bool invoked = false; + auto releaser = [&invoked](absl::string_view) { invoked = true; }; + { + auto c = absl::MakeCordFromExternal(large_dummy, releaser); + EXPECT_FALSE(invoked); + } + EXPECT_TRUE(invoked); + } + + { + bool invoked = false; + auto releaser = [&invoked](absl::string_view) { invoked = true; }; + { + absl::Cord copy; + { + auto c = absl::MakeCordFromExternal(large_dummy, releaser); + copy = c; + EXPECT_FALSE(invoked); + } + EXPECT_FALSE(invoked); + } + EXPECT_TRUE(invoked); + } +} + +TEST(ConstructFromExternal, CompareContents) { + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + + for (int length = 1; length <= 2048; length *= 2) { + std::string data = RandomLowercaseString(&rng, length); + auto* external = new std::string(data); + auto cord = + absl::MakeCordFromExternal(*external, [external](absl::string_view sv) { + EXPECT_EQ(external->data(), sv.data()); + EXPECT_EQ(external->size(), sv.size()); + delete external; + }); + EXPECT_EQ(data, cord); + } +} + +TEST(ConstructFromExternal, LargeReleaser) { + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + constexpr size_t kLength = 256; + std::string data = RandomLowercaseString(&rng, kLength); + std::array<char, kLength> data_array; + for (size_t i = 0; i < kLength; ++i) data_array[i] = data[i]; + bool invoked = false; + auto releaser = [data_array, &invoked](absl::string_view data) { + EXPECT_EQ(data, absl::string_view(data_array.data(), data_array.size())); + invoked = true; + }; + (void)absl::MakeCordFromExternal(data, releaser); + EXPECT_TRUE(invoked); +} + +TEST(ConstructFromExternal, FunctionPointerReleaser) { + static absl::string_view data("hello world"); + static bool invoked; + auto* releaser = + static_cast<void (*)(absl::string_view)>([](absl::string_view sv) { + EXPECT_EQ(data, sv); + invoked = true; + }); + invoked = false; + (void)absl::MakeCordFromExternal(data, releaser); + EXPECT_TRUE(invoked); + + invoked = false; + (void)absl::MakeCordFromExternal(data, *releaser); + EXPECT_TRUE(invoked); +} + +TEST(ConstructFromExternal, MoveOnlyReleaser) { + struct Releaser { + explicit Releaser(bool* invoked) : invoked(invoked) {} + Releaser(Releaser&& other) noexcept : invoked(other.invoked) {} + void operator()(absl::string_view) const { *invoked = true; } + + bool* invoked; + }; + + bool invoked = false; + (void)absl::MakeCordFromExternal("dummy", Releaser(&invoked)); + EXPECT_TRUE(invoked); +} + +TEST(ConstructFromExternal, NonTrivialReleaserDestructor) { + struct Releaser { + explicit Releaser(bool* destroyed) : destroyed(destroyed) {} + ~Releaser() { *destroyed = true; } + void operator()(absl::string_view) const {} + + bool* destroyed; + }; + + bool destroyed = false; + Releaser releaser(&destroyed); + (void)absl::MakeCordFromExternal("dummy", releaser); + EXPECT_TRUE(destroyed); +} + +TEST(ConstructFromExternal, ReferenceQualifierOverloads) { + struct Releaser { + void operator()(absl::string_view) & { *lvalue_invoked = true; } + void operator()(absl::string_view) && { *rvalue_invoked = true; } + + bool* lvalue_invoked; + bool* rvalue_invoked; + }; + + bool lvalue_invoked = false; + bool rvalue_invoked = false; + Releaser releaser = {&lvalue_invoked, &rvalue_invoked}; + (void)absl::MakeCordFromExternal("", releaser); + EXPECT_FALSE(lvalue_invoked); + EXPECT_TRUE(rvalue_invoked); + rvalue_invoked = false; + + (void)absl::MakeCordFromExternal("dummy", releaser); + EXPECT_FALSE(lvalue_invoked); + EXPECT_TRUE(rvalue_invoked); + rvalue_invoked = false; + + // NOLINTNEXTLINE: suppress clang-tidy std::move on trivially copyable type. + (void)absl::MakeCordFromExternal("dummy", std::move(releaser)); + EXPECT_FALSE(lvalue_invoked); + EXPECT_TRUE(rvalue_invoked); +} + +TEST(ExternalMemory, BasicUsage) { + static const char* strings[] = { "", "hello", "there" }; + for (const char* str : strings) { + absl::Cord dst("(prefix)"); + AddExternalMemory(str, &dst); + dst.Append("(suffix)"); + EXPECT_EQ((std::string("(prefix)") + str + std::string("(suffix)")), + std::string(dst)); + } +} + +TEST(ExternalMemory, RemovePrefixSuffix) { + // Exhaustively try all sub-strings. + absl::Cord cord = MakeComposite(); + std::string s = std::string(cord); + for (int offset = 0; offset <= s.size(); offset++) { + for (int length = 0; length <= s.size() - offset; length++) { + absl::Cord result(cord); + result.RemovePrefix(offset); + result.RemoveSuffix(result.size() - length); + EXPECT_EQ(s.substr(offset, length), std::string(result)) + << offset << " " << length; + } + } +} + +TEST(ExternalMemory, Get) { + absl::Cord cord("hello"); + AddExternalMemory(" world!", &cord); + AddExternalMemory(" how are ", &cord); + cord.Append(" you?"); + std::string s = std::string(cord); + for (int i = 0; i < s.size(); i++) { + EXPECT_EQ(s[i], cord[i]); + } +} + +// CordMemoryUsage tests verify the correctness of the EstimatedMemoryUsage() +// These tests take into account that the reported memory usage is approximate +// and non-deterministic. For all tests, We verify that the reported memory +// usage is larger than `size()`, and less than `size() * 1.5` as a cord should +// never reserve more 'extra' capacity than half of its size as it grows. +// Additionally we have some whiteboxed expectations based on our knowledge of +// the layout and size of empty and inlined cords, and flat nodes. + +TEST(CordMemoryUsage, Empty) { + EXPECT_EQ(sizeof(absl::Cord), absl::Cord().EstimatedMemoryUsage()); +} + +TEST(CordMemoryUsage, Embedded) { + absl::Cord a("hello"); + EXPECT_EQ(a.EstimatedMemoryUsage(), sizeof(absl::Cord)); +} + +TEST(CordMemoryUsage, EmbeddedAppend) { + absl::Cord a("a"); + absl::Cord b("bcd"); + EXPECT_EQ(b.EstimatedMemoryUsage(), sizeof(absl::Cord)); + a.Append(b); + EXPECT_EQ(a.EstimatedMemoryUsage(), sizeof(absl::Cord)); +} + +TEST(CordMemoryUsage, ExternalMemory) { + static const int kLength = 1000; + absl::Cord cord; + AddExternalMemory(std::string(kLength, 'x'), &cord); + EXPECT_GT(cord.EstimatedMemoryUsage(), kLength); + EXPECT_LE(cord.EstimatedMemoryUsage(), kLength * 1.5); +} + +TEST(CordMemoryUsage, Flat) { + static const int kLength = 125; + absl::Cord a(std::string(kLength, 'a')); + EXPECT_GT(a.EstimatedMemoryUsage(), kLength); + EXPECT_LE(a.EstimatedMemoryUsage(), kLength * 1.5); +} + +TEST(CordMemoryUsage, AppendFlat) { + using absl::strings_internal::CordTestAccess; + absl::Cord a(std::string(CordTestAccess::MaxFlatLength(), 'a')); + size_t length = a.EstimatedMemoryUsage(); + a.Append(std::string(CordTestAccess::MaxFlatLength(), 'b')); + size_t delta = a.EstimatedMemoryUsage() - length; + EXPECT_GT(delta, CordTestAccess::MaxFlatLength()); + EXPECT_LE(delta, CordTestAccess::MaxFlatLength() * 1.5); +} + +// Regtest for a change that had to be rolled back because it expanded out +// of the InlineRep too soon, which was observable through MemoryUsage(). +TEST(CordMemoryUsage, InlineRep) { + constexpr size_t kMaxInline = 15; // Cord::InlineRep::N + const std::string small_string(kMaxInline, 'x'); + absl::Cord c1(small_string); + + absl::Cord c2; + c2.Append(small_string); + EXPECT_EQ(c1, c2); + EXPECT_EQ(c1.EstimatedMemoryUsage(), c2.EstimatedMemoryUsage()); +} + +} // namespace + +// Regtest for 7510292 (fix a bug introduced by 7465150) +TEST(Cord, Concat_Append) { + // Create a rep of type CONCAT + absl::Cord s1("foobarbarbarbarbar"); + s1.Append("abcdefgabcdefgabcdefgabcdefgabcdefgabcdefgabcdefg"); + size_t size = s1.size(); + + // Create a copy of s1 and append to it. + absl::Cord s2 = s1; + s2.Append("x"); + + // 7465150 modifies s1 when it shouldn't. + EXPECT_EQ(s1.size(), size); + EXPECT_EQ(s2.size(), size + 1); +} + +TEST(MakeFragmentedCord, MakeFragmentedCordFromInitializerList) { + absl::Cord fragmented = + absl::MakeFragmentedCord({"A ", "fragmented ", "Cord"}); + + EXPECT_EQ("A fragmented Cord", fragmented); + + auto chunk_it = fragmented.chunk_begin(); + + ASSERT_TRUE(chunk_it != fragmented.chunk_end()); + EXPECT_EQ("A ", *chunk_it); + + ASSERT_TRUE(++chunk_it != fragmented.chunk_end()); + EXPECT_EQ("fragmented ", *chunk_it); + + ASSERT_TRUE(++chunk_it != fragmented.chunk_end()); + EXPECT_EQ("Cord", *chunk_it); + + ASSERT_TRUE(++chunk_it == fragmented.chunk_end()); +} + +TEST(MakeFragmentedCord, MakeFragmentedCordFromVector) { + std::vector<absl::string_view> chunks = {"A ", "fragmented ", "Cord"}; + absl::Cord fragmented = absl::MakeFragmentedCord(chunks); + + EXPECT_EQ("A fragmented Cord", fragmented); + + auto chunk_it = fragmented.chunk_begin(); + + ASSERT_TRUE(chunk_it != fragmented.chunk_end()); + EXPECT_EQ("A ", *chunk_it); + + ASSERT_TRUE(++chunk_it != fragmented.chunk_end()); + EXPECT_EQ("fragmented ", *chunk_it); + + ASSERT_TRUE(++chunk_it != fragmented.chunk_end()); + EXPECT_EQ("Cord", *chunk_it); + + ASSERT_TRUE(++chunk_it == fragmented.chunk_end()); +} + +TEST(CordChunkIterator, Traits) { + static_assert(std::is_copy_constructible<absl::Cord::ChunkIterator>::value, + ""); + static_assert(std::is_copy_assignable<absl::Cord::ChunkIterator>::value, ""); + + // Move semantics to satisfy swappable via std::swap + static_assert(std::is_move_constructible<absl::Cord::ChunkIterator>::value, + ""); + static_assert(std::is_move_assignable<absl::Cord::ChunkIterator>::value, ""); + + static_assert( + std::is_same< + std::iterator_traits<absl::Cord::ChunkIterator>::iterator_category, + std::input_iterator_tag>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<absl::Cord::ChunkIterator>::value_type, + absl::string_view>::value, + ""); + static_assert( + std::is_same< + std::iterator_traits<absl::Cord::ChunkIterator>::difference_type, + ptrdiff_t>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<absl::Cord::ChunkIterator>::pointer, + const absl::string_view*>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<absl::Cord::ChunkIterator>::reference, + absl::string_view>::value, + ""); +} + +static void VerifyChunkIterator(const absl::Cord& cord, + size_t expected_chunks) { + EXPECT_EQ(cord.chunk_begin() == cord.chunk_end(), cord.empty()) << cord; + EXPECT_EQ(cord.chunk_begin() != cord.chunk_end(), !cord.empty()); + + absl::Cord::ChunkRange range = cord.Chunks(); + EXPECT_EQ(range.begin() == range.end(), cord.empty()); + EXPECT_EQ(range.begin() != range.end(), !cord.empty()); + + std::string content(cord); + size_t pos = 0; + auto pre_iter = cord.chunk_begin(), post_iter = cord.chunk_begin(); + size_t n_chunks = 0; + while (pre_iter != cord.chunk_end() && post_iter != cord.chunk_end()) { + EXPECT_FALSE(pre_iter == cord.chunk_end()); // NOLINT: explicitly test == + EXPECT_FALSE(post_iter == cord.chunk_end()); // NOLINT + + EXPECT_EQ(pre_iter, post_iter); + EXPECT_EQ(*pre_iter, *post_iter); + + EXPECT_EQ(pre_iter->data(), (*pre_iter).data()); + EXPECT_EQ(pre_iter->size(), (*pre_iter).size()); + + absl::string_view chunk = *pre_iter; + EXPECT_FALSE(chunk.empty()); + EXPECT_LE(pos + chunk.size(), content.size()); + EXPECT_EQ(absl::string_view(content.c_str() + pos, chunk.size()), chunk); + + int n_equal_iterators = 0; + for (absl::Cord::ChunkIterator it = range.begin(); it != range.end(); + ++it) { + n_equal_iterators += static_cast<int>(it == pre_iter); + } + EXPECT_EQ(n_equal_iterators, 1); + + ++pre_iter; + EXPECT_EQ(*post_iter++, chunk); + + pos += chunk.size(); + ++n_chunks; + } + EXPECT_EQ(expected_chunks, n_chunks); + EXPECT_EQ(pos, content.size()); + EXPECT_TRUE(pre_iter == cord.chunk_end()); // NOLINT: explicitly test == + EXPECT_TRUE(post_iter == cord.chunk_end()); // NOLINT +} + +TEST(CordChunkIterator, Operations) { + absl::Cord empty_cord; + VerifyChunkIterator(empty_cord, 0); + + absl::Cord small_buffer_cord("small cord"); + VerifyChunkIterator(small_buffer_cord, 1); + + absl::Cord flat_node_cord("larger than small buffer optimization"); + VerifyChunkIterator(flat_node_cord, 1); + + VerifyChunkIterator( + absl::MakeFragmentedCord({"a ", "small ", "fragmented ", "cord ", "for ", + "testing ", "chunk ", "iterations."}), + 8); + + absl::Cord reused_nodes_cord(std::string(40, 'c')); + reused_nodes_cord.Prepend(absl::Cord(std::string(40, 'b'))); + reused_nodes_cord.Prepend(absl::Cord(std::string(40, 'a'))); + size_t expected_chunks = 3; + for (int i = 0; i < 8; ++i) { + reused_nodes_cord.Prepend(reused_nodes_cord); + expected_chunks *= 2; + VerifyChunkIterator(reused_nodes_cord, expected_chunks); + } + + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + absl::Cord flat_cord(RandomLowercaseString(&rng, 256)); + absl::Cord subcords; + for (int i = 0; i < 128; ++i) subcords.Prepend(flat_cord.Subcord(i, 128)); + VerifyChunkIterator(subcords, 128); +} + +TEST(CordCharIterator, Traits) { + static_assert(std::is_copy_constructible<absl::Cord::CharIterator>::value, + ""); + static_assert(std::is_copy_assignable<absl::Cord::CharIterator>::value, ""); + + // Move semantics to satisfy swappable via std::swap + static_assert(std::is_move_constructible<absl::Cord::CharIterator>::value, + ""); + static_assert(std::is_move_assignable<absl::Cord::CharIterator>::value, ""); + + static_assert( + std::is_same< + std::iterator_traits<absl::Cord::CharIterator>::iterator_category, + std::input_iterator_tag>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<absl::Cord::CharIterator>::value_type, + char>::value, + ""); + static_assert( + std::is_same< + std::iterator_traits<absl::Cord::CharIterator>::difference_type, + ptrdiff_t>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<absl::Cord::CharIterator>::pointer, + const char*>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<absl::Cord::CharIterator>::reference, + const char&>::value, + ""); +} + +static void VerifyCharIterator(const absl::Cord& cord) { + EXPECT_EQ(cord.char_begin() == cord.char_end(), cord.empty()); + EXPECT_EQ(cord.char_begin() != cord.char_end(), !cord.empty()); + + absl::Cord::CharRange range = cord.Chars(); + EXPECT_EQ(range.begin() == range.end(), cord.empty()); + EXPECT_EQ(range.begin() != range.end(), !cord.empty()); + + size_t i = 0; + absl::Cord::CharIterator pre_iter = cord.char_begin(); + absl::Cord::CharIterator post_iter = cord.char_begin(); + std::string content(cord); + while (pre_iter != cord.char_end() && post_iter != cord.char_end()) { + EXPECT_FALSE(pre_iter == cord.char_end()); // NOLINT: explicitly test == + EXPECT_FALSE(post_iter == cord.char_end()); // NOLINT + + EXPECT_LT(i, cord.size()); + EXPECT_EQ(content[i], *pre_iter); + + EXPECT_EQ(pre_iter, post_iter); + EXPECT_EQ(*pre_iter, *post_iter); + EXPECT_EQ(&*pre_iter, &*post_iter); + + EXPECT_EQ(&*pre_iter, pre_iter.operator->()); + + const char* character_address = &*pre_iter; + absl::Cord::CharIterator copy = pre_iter; + ++copy; + EXPECT_EQ(character_address, &*pre_iter); + + int n_equal_iterators = 0; + for (absl::Cord::CharIterator it = range.begin(); it != range.end(); ++it) { + n_equal_iterators += static_cast<int>(it == pre_iter); + } + EXPECT_EQ(n_equal_iterators, 1); + + absl::Cord::CharIterator advance_iter = range.begin(); + absl::Cord::Advance(&advance_iter, i); + EXPECT_EQ(pre_iter, advance_iter); + + advance_iter = range.begin(); + EXPECT_EQ(absl::Cord::AdvanceAndRead(&advance_iter, i), cord.Subcord(0, i)); + EXPECT_EQ(pre_iter, advance_iter); + + advance_iter = pre_iter; + absl::Cord::Advance(&advance_iter, cord.size() - i); + EXPECT_EQ(range.end(), advance_iter); + + advance_iter = pre_iter; + EXPECT_EQ(absl::Cord::AdvanceAndRead(&advance_iter, cord.size() - i), + cord.Subcord(i, cord.size() - i)); + EXPECT_EQ(range.end(), advance_iter); + + ++i; + ++pre_iter; + post_iter++; + } + EXPECT_EQ(i, cord.size()); + EXPECT_TRUE(pre_iter == cord.char_end()); // NOLINT: explicitly test == + EXPECT_TRUE(post_iter == cord.char_end()); // NOLINT + + absl::Cord::CharIterator zero_advanced_end = cord.char_end(); + absl::Cord::Advance(&zero_advanced_end, 0); + EXPECT_EQ(zero_advanced_end, cord.char_end()); + + absl::Cord::CharIterator it = cord.char_begin(); + for (absl::string_view chunk : cord.Chunks()) { + while (!chunk.empty()) { + EXPECT_EQ(absl::Cord::ChunkRemaining(it), chunk); + chunk.remove_prefix(1); + ++it; + } + } +} + +TEST(CordCharIterator, Operations) { + absl::Cord empty_cord; + VerifyCharIterator(empty_cord); + + absl::Cord small_buffer_cord("small cord"); + VerifyCharIterator(small_buffer_cord); + + absl::Cord flat_node_cord("larger than small buffer optimization"); + VerifyCharIterator(flat_node_cord); + + VerifyCharIterator( + absl::MakeFragmentedCord({"a ", "small ", "fragmented ", "cord ", "for ", + "testing ", "character ", "iteration."})); + + absl::Cord reused_nodes_cord("ghi"); + reused_nodes_cord.Prepend(absl::Cord("def")); + reused_nodes_cord.Prepend(absl::Cord("abc")); + for (int i = 0; i < 4; ++i) { + reused_nodes_cord.Prepend(reused_nodes_cord); + VerifyCharIterator(reused_nodes_cord); + } + + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + absl::Cord flat_cord(RandomLowercaseString(&rng, 256)); + absl::Cord subcords; + for (int i = 0; i < 4; ++i) subcords.Prepend(flat_cord.Subcord(16 * i, 128)); + VerifyCharIterator(subcords); +} + +TEST(Cord, StreamingOutput) { + absl::Cord c = + absl::MakeFragmentedCord({"A ", "small ", "fragmented ", "Cord", "."}); + std::stringstream output; + output << c; + EXPECT_EQ("A small fragmented Cord.", output.str()); +} + +TEST(Cord, ForEachChunk) { + for (int num_elements : {1, 10, 200}) { + SCOPED_TRACE(num_elements); + std::vector<std::string> cord_chunks; + for (int i = 0; i < num_elements; ++i) { + cord_chunks.push_back(absl::StrCat("[", i, "]")); + } + absl::Cord c = absl::MakeFragmentedCord(cord_chunks); + + std::vector<std::string> iterated_chunks; + absl::CordTestPeer::ForEachChunk(c, + [&iterated_chunks](absl::string_view sv) { + iterated_chunks.emplace_back(sv); + }); + EXPECT_EQ(iterated_chunks, cord_chunks); + } +} + +TEST(Cord, SmallBufferAssignFromOwnData) { + constexpr size_t kMaxInline = 15; + std::string contents = "small buff cord"; + EXPECT_EQ(contents.size(), kMaxInline); + for (size_t pos = 0; pos < contents.size(); ++pos) { + for (size_t count = contents.size() - pos; count > 0; --count) { + absl::Cord c(contents); + absl::string_view flat = c.Flatten(); + c = flat.substr(pos, count); + EXPECT_EQ(c, contents.substr(pos, count)) + << "pos = " << pos << "; count = " << count; + } + } +} diff --git a/absl/strings/cord_test_helpers.h b/absl/strings/cord_test_helpers.h new file mode 100644 index 00000000..f1036e3b --- /dev/null +++ b/absl/strings/cord_test_helpers.h @@ -0,0 +1,60 @@ +// +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef ABSL_STRINGS_CORD_TEST_HELPERS_H_ +#define ABSL_STRINGS_CORD_TEST_HELPERS_H_ + +#include "absl/strings/cord.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +// Creates a multi-segment Cord from an iterable container of strings. The +// resulting Cord is guaranteed to have one segment for every string in the +// container. This allows code to be unit tested with multi-segment Cord +// inputs. +// +// Example: +// +// absl::Cord c = absl::MakeFragmentedCord({"A ", "fragmented ", "Cord"}); +// EXPECT_FALSE(c.GetFlat(&unused)); +// +// The mechanism by which this Cord is created is an implementation detail. Any +// implementation that produces a multi-segment Cord may produce a flat Cord in +// the future as new optimizations are added to the Cord class. +// MakeFragmentedCord will, however, always be updated to return a multi-segment +// Cord. +template <typename Container> +Cord MakeFragmentedCord(const Container& c) { + Cord result; + for (const auto& s : c) { + auto* external = new std::string(s); + Cord tmp = absl::MakeCordFromExternal( + *external, [external](absl::string_view) { delete external; }); + tmp.Prepend(result); + result = tmp; + } + return result; +} + +inline Cord MakeFragmentedCord(std::initializer_list<absl::string_view> list) { + return MakeFragmentedCord<std::initializer_list<absl::string_view>>(list); +} + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_CORD_TEST_HELPERS_H_ diff --git a/absl/strings/internal/cord_internal.h b/absl/strings/internal/cord_internal.h new file mode 100644 index 00000000..5b5d1083 --- /dev/null +++ b/absl/strings/internal/cord_internal.h @@ -0,0 +1,151 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_ +#define ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_ + +#include <atomic> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <type_traits> + +#include "absl/meta/type_traits.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// Wraps std::atomic for reference counting. +class Refcount { + public: + Refcount() : count_{1} {} + ~Refcount() {} + + // Increments the reference count by 1. Imposes no memory ordering. + inline void Increment() { count_.fetch_add(1, std::memory_order_relaxed); } + + // Asserts that the current refcount is greater than 0. If the refcount is + // greater than 1, decrements the reference count by 1. + // + // Returns false if there are no references outstanding; true otherwise. + // Inserts barriers to ensure that state written before this method returns + // false will be visible to a thread that just observed this method returning + // false. + inline bool Decrement() { + int32_t refcount = count_.load(std::memory_order_acquire); + assert(refcount > 0); + return refcount != 1 && count_.fetch_sub(1, std::memory_order_acq_rel) != 1; + } + + // Same as Decrement but expect that refcount is greater than 1. + inline bool DecrementExpectHighRefcount() { + int32_t refcount = count_.fetch_sub(1, std::memory_order_acq_rel); + assert(refcount > 0); + return refcount != 1; + } + + // Returns the current reference count using acquire semantics. + inline int32_t Get() const { return count_.load(std::memory_order_acquire); } + + // Returns whether the atomic integer is 1. + // If the reference count is used in the conventional way, a + // reference count of 1 implies that the current thread owns the + // reference and no other thread shares it. + // This call performs the test for a reference count of one, and + // performs the memory barrier needed for the owning thread + // to act on the object, knowing that it has exclusive access to the + // object. + inline bool IsOne() { return count_.load(std::memory_order_acquire) == 1; } + + private: + std::atomic<int32_t> count_; +}; + +// The overhead of a vtable is too much for Cord, so we roll our own subclasses +// using only a single byte to differentiate classes from each other - the "tag" +// byte. Define the subclasses first so we can provide downcasting helper +// functions in the base class. + +struct CordRepConcat; +struct CordRepSubstring; +struct CordRepExternal; + +struct CordRep { + // The following three fields have to be less than 32 bytes since + // that is the smallest supported flat node size. + // We use uint64_t for the length even in 32-bit binaries. + uint64_t length; + Refcount refcount; + // If tag < FLAT, it represents CordRepKind and indicates the type of node. + // Otherwise, the node type is CordRepFlat and the tag is the encoded size. + uint8_t tag; + char data[1]; // Starting point for flat array: MUST BE LAST FIELD of CordRep + + inline CordRepConcat* concat(); + inline const CordRepConcat* concat() const; + inline CordRepSubstring* substring(); + inline const CordRepSubstring* substring() const; + inline CordRepExternal* external(); + inline const CordRepExternal* external() const; +}; + +struct CordRepConcat : public CordRep { + CordRep* left; + CordRep* right; + + uint8_t depth() const { return static_cast<uint8_t>(data[0]); } + void set_depth(uint8_t depth) { data[0] = static_cast<char>(depth); } +}; + +struct CordRepSubstring : public CordRep { + size_t start; // Starting offset of substring in child + CordRep* child; +}; + +// TODO(strel): replace the following logic (and related functions in cord.cc) +// with container_internal::Layout. + +// Alignment requirement for CordRepExternal so that the type erased releaser +// will be stored at a suitably aligned address. +constexpr size_t ExternalRepAlignment() { +#if defined(__STDCPP_DEFAULT_NEW_ALIGNMENT__) + return __STDCPP_DEFAULT_NEW_ALIGNMENT__; +#else + return alignof(max_align_t); +#endif +} + +// Type for function pointer that will invoke and destroy the type-erased +// releaser function object. Accepts a pointer to the releaser and the +// `string_view` that were passed in to `NewExternalRep` below. The return value +// is the size of the `Releaser` type. +using ExternalReleaserInvoker = size_t (*)(void*, absl::string_view); + +// External CordReps are allocated together with a type erased releaser. The +// releaser is stored in the memory directly following the CordRepExternal. +struct alignas(ExternalRepAlignment()) CordRepExternal : public CordRep { + const char* base; + // Pointer to function that knows how to call and destroy the releaser. + ExternalReleaserInvoker releaser_invoker; +}; + +// TODO(strel): look into removing, it doesn't seem like anything relies on this +static_assert(sizeof(CordRepConcat) == sizeof(CordRepSubstring), ""); + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl +#endif // ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_ diff --git a/absl/strings/internal/str_format/arg.cc b/absl/strings/internal/str_format/arg.cc index d3904124..4d0604e0 100644 --- a/absl/strings/internal/str_format/arg.cc +++ b/absl/strings/internal/str_format/arg.cc @@ -88,7 +88,7 @@ class ConvertedIntInfo { template <typename T> void UnsignedToStringRight(T u, ConversionChar conv) { char *p = end(); - switch (conv.radix()) { + switch (FormatConversionCharRadix(conv)) { default: case 10: for (; u; u /= 10) @@ -99,7 +99,7 @@ class ConvertedIntInfo { *--p = static_cast<char>('0' + static_cast<size_t>(u % 8)); break; case 16: { - const char *digits = kDigit[conv.upper() ? 1 : 0]; + const char *digits = kDigit[FormatConversionCharIsUpper(conv) ? 1 : 0]; for (; u; u /= 16) *--p = digits[static_cast<size_t>(u % 16)]; break; } @@ -121,21 +121,20 @@ class ConvertedIntInfo { string_view BaseIndicator(const ConvertedIntInfo &info, const ConversionSpec conv) { bool alt = conv.flags().alt; - int radix = conv.conv().radix(); - if (conv.conv().id() == ConversionChar::p) - alt = true; // always show 0x for %p. + int radix = FormatConversionCharRadix(conv.conv()); + if (conv.conv() == ConversionChar::p) alt = true; // always show 0x for %p. // From the POSIX description of '#' flag: // "For x or X conversion specifiers, a non-zero result shall have // 0x (or 0X) prefixed to it." if (alt && radix == 16 && !info.digits().empty()) { - if (conv.conv().upper()) return "0X"; + if (FormatConversionCharIsUpper(conv.conv())) return "0X"; return "0x"; } return {}; } string_view SignColumn(bool neg, const ConversionSpec conv) { - if (conv.conv().is_signed()) { + if (FormatConversionCharIsSigned(conv.conv())) { if (neg) return "-"; if (conv.flags().show_pos) return "+"; if (conv.flags().sign_col) return " "; @@ -175,7 +174,7 @@ bool ConvertIntImplInner(const ConvertedIntInfo &info, if (!precision_specified) precision = 1; - if (conv.flags().alt && conv.conv().id() == ConversionChar::o) { + if (conv.flags().alt && conv.conv() == ConversionChar::o) { // From POSIX description of the '#' (alt) flag: // "For o conversion, it increases the precision (if necessary) to // force the first digit of the result to be zero." @@ -211,7 +210,7 @@ bool ConvertIntImplInner(const ConvertedIntInfo &info, template <typename T> bool ConvertIntImplInner(T v, const ConversionSpec conv, FormatSinkImpl *sink) { ConvertedIntInfo info(v, conv.conv()); - if (conv.flags().basic && conv.conv().id() != ConversionChar::p) { + if (conv.flags().basic && (conv.conv() != ConversionChar::p)) { if (info.is_neg()) sink->Append(1, '-'); if (info.digits().empty()) { sink->Append(1, '0'); @@ -225,14 +224,13 @@ bool ConvertIntImplInner(T v, const ConversionSpec conv, FormatSinkImpl *sink) { template <typename T> bool ConvertIntArg(T v, const ConversionSpec conv, FormatSinkImpl *sink) { - if (conv.conv().is_float()) { + if (FormatConversionCharIsFloat(conv.conv())) { return FormatConvertImpl(static_cast<double>(v), conv, sink).value; } - if (conv.conv().id() == ConversionChar::c) + if (conv.conv() == ConversionChar::c) return ConvertCharImpl(static_cast<unsigned char>(v), conv, sink); - if (!conv.conv().is_integral()) - return false; - if (!conv.conv().is_signed() && IsSigned<T>::value) { + if (!FormatConversionCharIsIntegral(conv.conv())) return false; + if (!FormatConversionCharIsSigned(conv.conv()) && IsSigned<T>::value) { using U = typename MakeUnsigned<T>::type; return FormatConvertImpl(static_cast<U>(v), conv, sink).value; } @@ -241,13 +239,13 @@ bool ConvertIntArg(T v, const ConversionSpec conv, FormatSinkImpl *sink) { template <typename T> bool ConvertFloatArg(T v, const ConversionSpec conv, FormatSinkImpl *sink) { - return conv.conv().is_float() && ConvertFloatImpl(v, conv, sink); + return FormatConversionCharIsFloat(conv.conv()) && + ConvertFloatImpl(v, conv, sink); } inline bool ConvertStringArg(string_view v, const ConversionSpec conv, FormatSinkImpl *sink) { - if (conv.conv().id() != ConversionChar::s) - return false; + if (conv.conv() != ConversionChar::s) return false; if (conv.flags().basic) { sink->Append(v); return true; @@ -274,7 +272,7 @@ ConvertResult<Conv::s> FormatConvertImpl(string_view v, ConvertResult<Conv::s | Conv::p> FormatConvertImpl(const char *v, const ConversionSpec conv, FormatSinkImpl *sink) { - if (conv.conv().id() == ConversionChar::p) + if (conv.conv() == ConversionChar::p) return {FormatConvertImpl(VoidPtr(v), conv, sink).value}; size_t len; if (v == nullptr) { @@ -291,8 +289,7 @@ ConvertResult<Conv::s | Conv::p> FormatConvertImpl(const char *v, // ==================== Raw pointers ==================== ConvertResult<Conv::p> FormatConvertImpl(VoidPtr v, const ConversionSpec conv, FormatSinkImpl *sink) { - if (conv.conv().id() != ConversionChar::p) - return {false}; + if (conv.conv() != ConversionChar::p) return {false}; if (!v.value) { sink->Append("(nil)"); return {true}; diff --git a/absl/strings/internal/str_format/arg.h b/absl/strings/internal/str_format/arg.h index b672a229..7a937563 100644 --- a/absl/strings/internal/str_format/arg.h +++ b/absl/strings/internal/str_format/arg.h @@ -70,7 +70,7 @@ template <class AbslCord, ConvertResult<Conv::s> FormatConvertImpl(const AbslCord& value, ConversionSpec conv, FormatSinkImpl* sink) { - if (conv.conv().id() != ConversionChar::s) return {false}; + if (conv.conv() != ConversionChar::s) return {false}; bool is_left = conv.flags().left; size_t space_remaining = 0; @@ -185,8 +185,7 @@ struct FormatCountCaptureHelper { FormatSinkImpl* sink) { const absl::enable_if_t<sizeof(T) != 0, FormatCountCapture>& v2 = v; - if (conv.conv().id() != str_format_internal::ConversionChar::n) - return {false}; + if (conv.conv() != str_format_internal::ConversionChar::n) return {false}; *v2.p_ = static_cast<int>(sink->size()); return {true}; } @@ -378,7 +377,7 @@ class FormatArgImpl { template <typename T> static bool Dispatch(Data arg, ConversionSpec spec, void* out) { // A `none` conv indicates that we want the `int` conversion. - if (ABSL_PREDICT_FALSE(spec.conv().id() == ConversionChar::none)) { + if (ABSL_PREDICT_FALSE(spec.conv() == ConversionChar::none)) { return ToInt<T>(arg, static_cast<int*>(out), std::is_integral<T>(), std::is_enum<T>()); } diff --git a/absl/strings/internal/str_format/arg_test.cc b/absl/strings/internal/str_format/arg_test.cc index 96c9cfd3..04fa56cd 100644 --- a/absl/strings/internal/str_format/arg_test.cc +++ b/absl/strings/internal/str_format/arg_test.cc @@ -96,7 +96,7 @@ TEST_F(FormatArgImplTest, WorksWithCharArraysOfUnknownSize) { std::string s; FormatSinkImpl sink(&s); ConversionSpec conv; - conv.set_conv(ConversionChar::FromChar('s')); + conv.set_conv(ConversionChar::s); conv.set_flags(Flags()); conv.set_width(-1); conv.set_precision(-1); diff --git a/absl/strings/internal/str_format/extension.cc b/absl/strings/internal/str_format/extension.cc index 21688e87..2e5bc2ce 100644 --- a/absl/strings/internal/str_format/extension.cc +++ b/absl/strings/internal/str_format/extension.cc @@ -23,15 +23,6 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { -const ConversionChar::Spec ConversionChar::kSpecs[] = { -#define X_VAL(id) { ConversionChar::id, #id[0] } -#define X_SEP , - ABSL_CONVERSION_CHARS_EXPAND_(X_VAL, X_SEP), - {ConversionChar::none, '\0'}, -#undef X_VAL -#undef X_SEP -}; - std::string Flags::ToString() const { std::string s; s.append(left ? "-" : ""); @@ -42,8 +33,6 @@ std::string Flags::ToString() const { return s; } -const size_t ConversionChar::kNumValues; - bool FormatSinkImpl::PutPaddedString(string_view v, int w, int p, bool l) { size_t space_remaining = 0; if (w >= 0) space_remaining = w; diff --git a/absl/strings/internal/str_format/extension.h b/absl/strings/internal/str_format/extension.h index 4868eac3..1a863c20 100644 --- a/absl/strings/internal/str_format/extension.h +++ b/absl/strings/internal/str_format/extension.h @@ -148,117 +148,122 @@ struct Flags { X_VAL(g) X_SEP X_VAL(G) X_SEP X_VAL(a) X_SEP X_VAL(A) X_SEP \ /* misc */ \ X_VAL(n) X_SEP X_VAL(p) -// clang-format on -struct ABSL_DLL ConversionChar { - public: - enum Id : uint8_t { +enum class FormatConversionChar : uint8_t { c, C, s, S, // text d, i, o, u, x, X, // int f, F, e, E, g, G, a, A, // float n, p, // misc - none - }; - static const size_t kNumValues = none + 1; - - ConversionChar() : id_(none) {} - - public: - // Index into the opaque array of ConversionChar enums. - // Requires: i < kNumValues - static ConversionChar FromIndex(size_t i) { - return ConversionChar(kSpecs[i].value); - } + kNone, + none = kNone +}; +// clang-format on - static ConversionChar FromChar(char c) { - ConversionChar::Id out_id = ConversionChar::none; - switch (c) { -#define X_VAL(id) \ - case #id[0]: \ - out_id = ConversionChar::id; \ - break; - ABSL_CONVERSION_CHARS_EXPAND_(X_VAL, ) +inline FormatConversionChar FormatConversionCharFromChar(char c) { + switch (c) { +#define X_VAL(id) \ + case #id[0]: \ + return FormatConversionChar::id; + ABSL_CONVERSION_CHARS_EXPAND_(X_VAL, ) #undef X_VAL - default: - break; - } - return ConversionChar(out_id); } + return FormatConversionChar::kNone; +} - static ConversionChar FromId(Id id) { return ConversionChar(id); } - Id id() const { return id_; } - - int radix() const { - switch (id()) { - case x: case X: case a: case A: case p: return 16; - case o: return 8; - default: return 10; - } +inline int FormatConversionCharRadix(FormatConversionChar c) { + switch (c) { + case FormatConversionChar::x: + case FormatConversionChar::X: + case FormatConversionChar::a: + case FormatConversionChar::A: + case FormatConversionChar::p: + return 16; + case FormatConversionChar::o: + return 8; + default: + return 10; } +} - bool upper() const { - switch (id()) { - case X: case F: case E: case G: case A: return true; - default: return false; - } +inline bool FormatConversionCharIsUpper(FormatConversionChar c) { + switch (c) { + case FormatConversionChar::X: + case FormatConversionChar::F: + case FormatConversionChar::E: + case FormatConversionChar::G: + case FormatConversionChar::A: + return true; + default: + return false; } +} - bool is_signed() const { - switch (id()) { - case d: case i: return true; - default: return false; - } +inline bool FormatConversionCharIsSigned(FormatConversionChar c) { + switch (c) { + case FormatConversionChar::d: + case FormatConversionChar::i: + return true; + default: + return false; } +} - bool is_integral() const { - switch (id()) { - case d: case i: case u: case o: case x: case X: - return true; - default: return false; - } +inline bool FormatConversionCharIsIntegral(FormatConversionChar c) { + switch (c) { + case FormatConversionChar::d: + case FormatConversionChar::i: + case FormatConversionChar::u: + case FormatConversionChar::o: + case FormatConversionChar::x: + case FormatConversionChar::X: + return true; + default: + return false; } +} - bool is_float() const { - switch (id()) { - case a: case e: case f: case g: case A: case E: case F: case G: - return true; - default: return false; - } +inline bool FormatConversionCharIsFloat(FormatConversionChar c) { + switch (c) { + case FormatConversionChar::a: + case FormatConversionChar::e: + case FormatConversionChar::f: + case FormatConversionChar::g: + case FormatConversionChar::A: + case FormatConversionChar::E: + case FormatConversionChar::F: + case FormatConversionChar::G: + return true; + default: + return false; } +} - bool IsValid() const { return id() != none; } - - // The associated char. - char Char() const { return kSpecs[id_].name; } - - friend bool operator==(const ConversionChar& a, const ConversionChar& b) { - return a.id() == b.id(); - } - friend bool operator!=(const ConversionChar& a, const ConversionChar& b) { - return !(a == b); - } - friend std::ostream& operator<<(std::ostream& os, const ConversionChar& v) { - char c = v.Char(); - if (!c) c = '?'; - return os << c; +inline char FormatConversionCharToChar(FormatConversionChar c) { + switch (c) { +#define X_VAL(e) \ + case FormatConversionChar::e: \ + return #e[0]; +#define X_SEP + ABSL_CONVERSION_CHARS_EXPAND_(X_VAL, X_SEP) + case FormatConversionChar::kNone: + return '\0'; +#undef X_VAL +#undef X_SEP } + return '\0'; +} - private: - struct Spec { - Id value; - char name; - }; - static const Spec kSpecs[]; - - explicit ConversionChar(Id id) : id_(id) {} - - Id id_; -}; +// The associated char. +inline std::ostream& operator<<(std::ostream& os, FormatConversionChar v) { + char c = FormatConversionCharToChar(v); + if (!c) c = '?'; + return os << c; +} class ConversionSpec { public: Flags flags() const { return flags_; } - ConversionChar conv() const { + FormatConversionChar conv() const { // Keep this field first in the struct . It generates better code when // accessing it when ConversionSpec is passed by value in registers. static_assert(offsetof(ConversionSpec, conv_) == 0, ""); @@ -273,22 +278,24 @@ class ConversionSpec { int precision() const { return precision_; } void set_flags(Flags f) { flags_ = f; } - void set_conv(ConversionChar c) { conv_ = c; } + void set_conv(FormatConversionChar c) { conv_ = c; } void set_width(int w) { width_ = w; } void set_precision(int p) { precision_ = p; } void set_left(bool b) { flags_.left = b; } private: - ConversionChar conv_; + FormatConversionChar conv_ = FormatConversionChar::kNone; Flags flags_; int width_; int precision_; }; -constexpr uint64_t ConversionCharToConvValue(char conv) { +constexpr uint64_t FormatConversionCharToConvValue(char conv) { return -#define CONV_SET_CASE(c) \ - conv == #c[0] ? (uint64_t{1} << (1 + ConversionChar::Id::c)): +#define CONV_SET_CASE(c) \ + conv == #c[0] \ + ? (uint64_t{1} << (1 + static_cast<uint8_t>(FormatConversionChar::c))) \ + : ABSL_CONVERSION_CHARS_EXPAND_(CONV_SET_CASE, ) #undef CONV_SET_CASE conv == '*' @@ -297,12 +304,12 @@ constexpr uint64_t ConversionCharToConvValue(char conv) { } enum class Conv : uint64_t { -#define CONV_SET_CASE(c) c = ConversionCharToConvValue(#c[0]), +#define CONV_SET_CASE(c) c = FormatConversionCharToConvValue(#c[0]), ABSL_CONVERSION_CHARS_EXPAND_(CONV_SET_CASE, ) #undef CONV_SET_CASE // Used for width/precision '*' specification. - star = ConversionCharToConvValue('*'), + star = FormatConversionCharToConvValue('*'), // Some predefined values: integral = d | i | u | o | x | X, @@ -323,12 +330,12 @@ constexpr Conv operator|(Conv a, Conv b) { // Get a conversion with a single character in it. constexpr Conv ConversionCharToConv(char c) { - return Conv(ConversionCharToConvValue(c)); + return Conv(FormatConversionCharToConvValue(c)); } // Checks whether `c` exists in `set`. constexpr bool Contains(Conv set, char c) { - return (static_cast<uint64_t>(set) & ConversionCharToConvValue(c)) != 0; + return (static_cast<uint64_t>(set) & FormatConversionCharToConvValue(c)) != 0; } // Checks whether all the characters in `c` are contained in `set` @@ -353,6 +360,9 @@ inline size_t Excess(size_t used, size_t capacity) { return used < capacity ? capacity - used : 0; } +// Type alias for use during migration. +using ConversionChar = FormatConversionChar; + } // namespace str_format_internal ABSL_NAMESPACE_END diff --git a/absl/strings/internal/str_format/float_conversion.cc b/absl/strings/internal/str_format/float_conversion.cc index ebe4da5b..c98ed4ba 100644 --- a/absl/strings/internal/str_format/float_conversion.cc +++ b/absl/strings/internal/str_format/float_conversion.cc @@ -33,7 +33,7 @@ bool FallbackToSnprintf(const Float v, const ConversionSpec &conv, if (std::is_same<long double, Float>()) { *fp++ = 'L'; } - *fp++ = conv.conv().Char(); + *fp++ = FormatConversionCharToChar(conv.conv()); *fp = 0; assert(fp < fmt + sizeof(fmt)); } @@ -100,9 +100,11 @@ bool ConvertNonNumericFloats(char sign_char, Float v, char text[4], *ptr = text; if (sign_char) *ptr++ = sign_char; if (std::isnan(v)) { - ptr = std::copy_n(conv.conv().upper() ? "NAN" : "nan", 3, ptr); + ptr = std::copy_n(FormatConversionCharIsUpper(conv.conv()) ? "NAN" : "nan", + 3, ptr); } else if (std::isinf(v)) { - ptr = std::copy_n(conv.conv().upper() ? "INF" : "inf", 3, ptr); + ptr = std::copy_n(FormatConversionCharIsUpper(conv.conv()) ? "INF" : "inf", + 3, ptr); } else { return false; } @@ -399,7 +401,7 @@ bool FloatToSink(const Float v, const ConversionSpec &conv, Buffer buffer; - switch (conv.conv().id()) { + switch (conv.conv()) { case ConversionChar::f: case ConversionChar::F: if (!FloatToBuffer<FormatStyle::Fixed>(decomposed, precision, &buffer, @@ -416,7 +418,8 @@ bool FloatToSink(const Float v, const ConversionSpec &conv, return FallbackToSnprintf(v, conv, sink); } if (!conv.flags().alt && buffer.back() == '.') buffer.pop_back(); - PrintExponent(exp, conv.conv().upper() ? 'E' : 'e', &buffer); + PrintExponent(exp, FormatConversionCharIsUpper(conv.conv()) ? 'E' : 'e', + &buffer); break; case ConversionChar::g: @@ -447,7 +450,10 @@ bool FloatToSink(const Float v, const ConversionSpec &conv, while (buffer.back() == '0') buffer.pop_back(); if (buffer.back() == '.') buffer.pop_back(); } - if (exp) PrintExponent(exp, conv.conv().upper() ? 'E' : 'e', &buffer); + if (exp) { + PrintExponent(exp, FormatConversionCharIsUpper(conv.conv()) ? 'E' : 'e', + &buffer); + } break; case ConversionChar::a: diff --git a/absl/strings/internal/str_format/parser.cc b/absl/strings/internal/str_format/parser.cc index c4b527bc..aab68db9 100644 --- a/absl/strings/internal/str_format/parser.cc +++ b/absl/strings/internal/str_format/parser.cc @@ -17,7 +17,7 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { -using CC = ConversionChar::Id; +using CC = ConversionChar; using LM = LengthMod; ABSL_CONST_INIT const ConvTag kTags[256] = { @@ -322,7 +322,9 @@ bool ParsedFormatBase::MatchesConversions( if (conv.width.is_from_arg() && !add_if_valid_conv(conv.width.get_from_arg(), '*')) return false; - if (!add_if_valid_conv(conv.arg_position, conv.conv.Char())) return false; + if (!add_if_valid_conv(conv.arg_position, + FormatConversionCharToChar(conv.conv))) + return false; } return used.size() == convs.size() || allow_ignored; } diff --git a/absl/strings/internal/str_format/parser.h b/absl/strings/internal/str_format/parser.h index 6cbe2576..45c90d1d 100644 --- a/absl/strings/internal/str_format/parser.h +++ b/absl/strings/internal/str_format/parser.h @@ -67,7 +67,7 @@ struct UnboundConversion { Flags flags; LengthMod length_mod = LengthMod::none; - ConversionChar conv; + ConversionChar conv = FormatConversionChar::kNone; }; // Consume conversion spec prefix (not including '%') of [p, end) if valid. @@ -79,10 +79,12 @@ const char* ConsumeUnboundConversion(const char* p, const char* end, UnboundConversion* conv, int* next_arg); // Helper tag class for the table below. -// It allows fast `char -> ConversionChar/LengthMod` checking and conversions. +// It allows fast `char -> ConversionChar/LengthMod` checking and +// conversions. class ConvTag { public: - constexpr ConvTag(ConversionChar::Id id) : tag_(id) {} // NOLINT + constexpr ConvTag(ConversionChar conversion_char) // NOLINT + : tag_(static_cast<int8_t>(conversion_char)) {} // We invert the length modifiers to make them negative so that we can easily // test for them. constexpr ConvTag(LengthMod length_mod) // NOLINT @@ -94,7 +96,7 @@ class ConvTag { bool is_length() const { return tag_ < 0 && tag_ != -128; } ConversionChar as_conv() const { assert(is_conv()); - return ConversionChar::FromId(static_cast<ConversionChar::Id>(tag_)); + return static_cast<ConversionChar>(tag_); } LengthMod as_length() const { assert(is_length()); diff --git a/absl/strings/internal/str_format/parser_test.cc b/absl/strings/internal/str_format/parser_test.cc index 4a8efd08..1b1ee030 100644 --- a/absl/strings/internal/str_format/parser_test.cc +++ b/absl/strings/internal/str_format/parser_test.cc @@ -41,7 +41,7 @@ TEST(LengthModTest, Names) { TEST(ConversionCharTest, Names) { struct Expectation { - ConversionChar::Id id; + ConversionChar id; char name; }; // clang-format off @@ -55,12 +55,10 @@ TEST(ConversionCharTest, Names) { {ConversionChar::none, '\0'}, }; // clang-format on - EXPECT_EQ(ABSL_ARRAYSIZE(kExpect), ConversionChar::kNumValues); for (auto e : kExpect) { SCOPED_TRACE(e.name); - ConversionChar v = ConversionChar::FromId(e.id); - EXPECT_EQ(e.id, v.id()); - EXPECT_EQ(e.name, v.Char()); + ConversionChar v = e.id; + EXPECT_EQ(e.name, FormatConversionCharToChar(v)); } } @@ -119,7 +117,7 @@ TEST_F(ConsumeUnboundConversionTest, BasicConversion) { EXPECT_FALSE(Run("dd")); // no excess allowed EXPECT_TRUE(Run("d")); - EXPECT_EQ('d', o.conv.Char()); + EXPECT_EQ('d', FormatConversionCharToChar(o.conv)); EXPECT_FALSE(o.width.is_from_arg()); EXPECT_LT(o.width.value(), 0); EXPECT_FALSE(o.precision.is_from_arg()); @@ -160,7 +158,7 @@ TEST_F(ConsumeUnboundConversionTest, ArgPosition) { TEST_F(ConsumeUnboundConversionTest, WidthAndPrecision) { EXPECT_TRUE(Run("14d")); - EXPECT_EQ('d', o.conv.Char()); + EXPECT_EQ('d', FormatConversionCharToChar(o.conv)); EXPECT_FALSE(o.width.is_from_arg()); EXPECT_EQ(14, o.width.value()); EXPECT_FALSE(o.precision.is_from_arg()); @@ -330,7 +328,7 @@ struct SummarizeConsumer { if (conv.precision.is_from_arg()) { *out += "." + std::to_string(conv.precision.get_from_arg()) + "$*"; } - *out += conv.conv.Char(); + *out += FormatConversionCharToChar(conv.conv); *out += "}"; return true; } diff --git a/absl/strings/str_format_test.cc b/absl/strings/str_format_test.cc index d33bcaa2..acbdbf4a 100644 --- a/absl/strings/str_format_test.cc +++ b/absl/strings/str_format_test.cc @@ -450,7 +450,7 @@ struct SummarizeConsumer { if (conv.precision.is_from_arg()) { *out += "." + std::to_string(conv.precision.get_from_arg()) + "$*"; } - *out += conv.conv.Char(); + *out += FormatConversionCharToChar(conv.conv); *out += "}"; return true; } |