diff options
Diffstat (limited to 'absl/strings')
55 files changed, 3578 insertions, 1115 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index b950ec76..64a13cef 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -258,6 +258,8 @@ cc_library( visibility = ["//visibility:private"], deps = [ ":strings", + "//absl/base:base_internal", + "//absl/container:compressed_tuple", "//absl/meta:type_traits", ], ) @@ -277,7 +279,6 @@ cc_library( ":str_format", ":strings", "//absl/base", - "//absl/base:base_internal", "//absl/base:core_headers", "//absl/base:endian", "//absl/base:raw_logging_internal", @@ -285,6 +286,7 @@ cc_library( "//absl/container:inlined_vector", "//absl/functional:function_ref", "//absl/meta:type_traits", + "//absl/types:optional", ], ) @@ -309,9 +311,11 @@ cc_test( deps = [ ":cord", ":cord_test_helpers", + ":str_format", ":strings", "//absl/base", "//absl/base:config", + "//absl/base:core_headers", "//absl/base:endian", "//absl/base:raw_logging_internal", "//absl/container:fixed_array", @@ -366,6 +370,8 @@ cc_test( ":strings", "//absl/base:core_headers", "//absl/base:dynamic_annotations", + "//absl/container:flat_hash_map", + "//absl/container:node_hash_map", "@com_google_googletest//:gtest_main", ], ) @@ -484,6 +490,7 @@ cc_test( copts = ABSL_TEST_COPTS, visibility = ["//visibility:private"], deps = [ + ":internal", ":pow10_helper", ":strings", "//absl/base:config", @@ -632,10 +639,13 @@ cc_library( visibility = ["//visibility:private"], deps = [ ":strings", + "//absl/base:bits", "//absl/base:config", "//absl/base:core_headers", + "//absl/functional:function_ref", "//absl/meta:type_traits", "//absl/numeric:int128", + "//absl/types:optional", "//absl/types:span", ], ) @@ -646,6 +656,7 @@ cc_test( copts = ABSL_TEST_COPTS, visibility = ["//visibility:private"], deps = [ + ":cord", ":str_format", ":strings", "//absl/base:core_headers", @@ -663,6 +674,7 @@ cc_test( deps = [ ":str_format", ":str_format_internal", + ":strings", "@com_google_googletest//:gtest_main", ], ) @@ -709,8 +721,9 @@ cc_test( visibility = ["//visibility:private"], deps = [ ":str_format_internal", + ":strings", "//absl/base:raw_logging_internal", - "//absl/numeric:int128", + "//absl/types:optional", "@com_google_googletest//:gtest_main", ], ) @@ -721,6 +734,7 @@ cc_test( copts = ABSL_TEST_COPTS, visibility = ["//visibility:private"], deps = [ + ":cord", ":str_format_internal", "@com_google_googletest//:gtest_main", ], diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index cebc5928..d6c2126d 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -210,6 +210,8 @@ absl_cc_test( absl::base absl::core_headers absl::dynamic_annotations + absl::flat_hash_map + absl::node_hash_map gmock_main ) @@ -284,6 +286,7 @@ absl_cc_test( absl::raw_logging_internal absl::random_random absl::random_distributions + absl::strings_internal gmock_main ) @@ -389,6 +392,7 @@ absl_cc_library( COPTS ${ABSL_DEFAULT_COPTS} DEPS + absl::bits absl::strings absl::config absl::core_headers @@ -406,6 +410,7 @@ absl_cc_test( ${ABSL_TEST_COPTS} DEPS absl::str_format + absl::cord absl::strings absl::core_headers gmock_main @@ -421,6 +426,7 @@ absl_cc_test( DEPS absl::str_format absl::str_format_internal + absl::strings gmock_main ) @@ -469,6 +475,7 @@ absl_cc_test( COPTS ${ABSL_TEST_COPTS} DEPS + absl::strings absl::str_format_internal absl::raw_logging_internal absl::int128 @@ -484,6 +491,7 @@ absl_cc_test( ${ABSL_TEST_COPTS} DEPS absl::str_format_internal + absl::cord gmock_main ) @@ -538,15 +546,18 @@ absl_cc_library( COPTS ${ABSL_DEFAULT_COPTS} DEPS - absl::strings_internal absl::base absl::base_internal + absl::compressed_tuple absl::core_headers absl::endian absl::fixed_array absl::function_ref absl::inlined_vector + absl::optional absl::raw_logging_internal + absl::strings + absl::strings_internal absl::type_traits PUBLIC ) @@ -572,9 +583,11 @@ absl_cc_test( ${ABSL_TEST_COPTS} DEPS absl::cord + absl::str_format absl::strings absl::base absl::config + absl::core_headers absl::endian absl::raw_logging_internal absl::fixed_array diff --git a/absl/strings/charconv.cc b/absl/strings/charconv.cc index bdba768d..3613a652 100644 --- a/absl/strings/charconv.cc +++ b/absl/strings/charconv.cc @@ -619,10 +619,10 @@ from_chars_result FromCharsImpl(const char* first, const char* last, // Either we failed to parse a hex float after the "0x", or we read // "0xinf" or "0xnan" which we don't want to match. // - // However, a std::string that begins with "0x" also begins with "0", which + // However, a string that begins with "0x" also begins with "0", which // is normally a valid match for the number zero. So we want these // strings to match zero unless fmt_flags is `scientific`. (This flag - // means an exponent is required, which the std::string "0" does not have.) + // means an exponent is required, which the string "0" does not have.) if (fmt_flags == chars_format::scientific) { result.ec = std::errc::invalid_argument; } else { diff --git a/absl/strings/charconv_benchmark.cc b/absl/strings/charconv_benchmark.cc index 644b2abd..e8c7371d 100644 --- a/absl/strings/charconv_benchmark.cc +++ b/absl/strings/charconv_benchmark.cc @@ -132,7 +132,7 @@ BENCHMARK(BM_Absl_HugeMantissa); std::string MakeHardCase(int length) { // The number 1.1521...e-297 is exactly halfway between 12345 * 2**-1000 and // the next larger representable number. The digits of this number are in - // the std::string below. + // the string below. const std::string digits = "1." "152113937042223790993097181572444900347587985074226836242307364987727724" diff --git a/absl/strings/cord.cc b/absl/strings/cord.cc index d9503ae3..763dcc45 100644 --- a/absl/strings/cord.cc +++ b/absl/strings/cord.cc @@ -15,10 +15,12 @@ #include "absl/strings/cord.h" #include <algorithm> +#include <atomic> #include <cstddef> #include <cstdio> #include <cstdlib> #include <iomanip> +#include <iostream> #include <limits> #include <ostream> #include <sstream> @@ -28,6 +30,7 @@ #include "absl/base/casts.h" #include "absl/base/internal/raw_logging.h" +#include "absl/base/macros.h" #include "absl/base/port.h" #include "absl/container/fixed_array.h" #include "absl/container/inlined_vector.h" @@ -58,48 +61,6 @@ enum CordRepKind { FLAT = 3, }; -namespace { - -// Type used with std::allocator for allocating and deallocating -// `CordRepExternal`. std::allocator is used because it opaquely handles the -// different new / delete overloads available on a given platform. -struct alignas(absl::cord_internal::ExternalRepAlignment()) ExternalAllocType { - unsigned char value[absl::cord_internal::ExternalRepAlignment()]; -}; - -// Returns the number of objects to pass in to std::allocator<ExternalAllocType> -// allocate() and deallocate() to create enough room for `CordRepExternal` with -// `releaser_size` bytes on the end. -constexpr size_t GetExternalAllocNumObjects(size_t releaser_size) { - // Be sure to round up since `releaser_size` could be smaller than - // `sizeof(ExternalAllocType)`. - return (sizeof(CordRepExternal) + releaser_size + sizeof(ExternalAllocType) - - 1) / - sizeof(ExternalAllocType); -} - -// Allocates enough memory for `CordRepExternal` and a releaser with size -// `releaser_size` bytes. -void* AllocateExternal(size_t releaser_size) { - return std::allocator<ExternalAllocType>().allocate( - GetExternalAllocNumObjects(releaser_size)); -} - -// Deallocates the memory for a `CordRepExternal` assuming it was allocated with -// a releaser of given size and alignment. -void DeallocateExternal(CordRepExternal* p, size_t releaser_size) { - std::allocator<ExternalAllocType>().deallocate( - reinterpret_cast<ExternalAllocType*>(p), - GetExternalAllocNumObjects(releaser_size)); -} - -// Returns a pointer to the type erased releaser for the given CordRepExternal. -void* GetExternalReleaser(CordRepExternal* rep) { - return rep + 1; -} - -} // namespace - namespace cord_internal { inline CordRepConcat* CordRep::concat() { @@ -136,8 +97,6 @@ inline const CordRepExternal* CordRep::external() const { static const size_t kFlatOverhead = offsetof(CordRep, data); -static_assert(kFlatOverhead == 13, "Unittests assume kFlatOverhead == 13"); - // Largest and smallest flat node lengths we are willing to allocate // Flat allocation size is stored in tag, which currently can encode sizes up // to 4K, encoded as multiple of either 8 or 32 bytes. @@ -195,52 +154,18 @@ static_assert(Fibonacci(63) == 6557470319842, // The root node depth is allowed to become twice as large to reduce rebalancing // for larger strings (see IsRootBalanced). static constexpr uint64_t min_length[] = { - Fibonacci(2), - Fibonacci(3), - Fibonacci(4), - Fibonacci(5), - Fibonacci(6), - Fibonacci(7), - Fibonacci(8), - Fibonacci(9), - Fibonacci(10), - Fibonacci(11), - Fibonacci(12), - Fibonacci(13), - Fibonacci(14), - Fibonacci(15), - Fibonacci(16), - Fibonacci(17), - Fibonacci(18), - Fibonacci(19), - Fibonacci(20), - Fibonacci(21), - Fibonacci(22), - Fibonacci(23), - Fibonacci(24), - Fibonacci(25), - Fibonacci(26), - Fibonacci(27), - Fibonacci(28), - Fibonacci(29), - Fibonacci(30), - Fibonacci(31), - Fibonacci(32), - Fibonacci(33), - Fibonacci(34), - Fibonacci(35), - Fibonacci(36), - Fibonacci(37), - Fibonacci(38), - Fibonacci(39), - Fibonacci(40), - Fibonacci(41), - Fibonacci(42), - Fibonacci(43), - Fibonacci(44), - Fibonacci(45), - Fibonacci(46), - Fibonacci(47), + Fibonacci(2), Fibonacci(3), Fibonacci(4), Fibonacci(5), + Fibonacci(6), Fibonacci(7), Fibonacci(8), Fibonacci(9), + Fibonacci(10), Fibonacci(11), Fibonacci(12), Fibonacci(13), + Fibonacci(14), Fibonacci(15), Fibonacci(16), Fibonacci(17), + Fibonacci(18), Fibonacci(19), Fibonacci(20), Fibonacci(21), + Fibonacci(22), Fibonacci(23), Fibonacci(24), Fibonacci(25), + Fibonacci(26), Fibonacci(27), Fibonacci(28), Fibonacci(29), + Fibonacci(30), Fibonacci(31), Fibonacci(32), Fibonacci(33), + Fibonacci(34), Fibonacci(35), Fibonacci(36), Fibonacci(37), + Fibonacci(38), Fibonacci(39), Fibonacci(40), Fibonacci(41), + Fibonacci(42), Fibonacci(43), Fibonacci(44), Fibonacci(45), + Fibonacci(46), Fibonacci(47), 0xffffffffffffffffull, // Avoid overflow }; @@ -337,11 +262,7 @@ static void UnrefInternal(CordRep* rep) { } } else if (rep->tag == EXTERNAL) { CordRepExternal* rep_external = rep->external(); - absl::string_view data(rep_external->base, rep->length); - void* releaser = GetExternalReleaser(rep_external); - size_t releaser_size = rep_external->releaser_invoker(releaser, data); - rep_external->~CordRepExternal(); - DeallocateExternal(rep_external, releaser_size); + rep_external->releaser_invoker(rep_external); rep = nullptr; } else if (rep->tag == SUBSTRING) { CordRepSubstring* rep_substring = rep->substring(); @@ -491,18 +412,12 @@ static CordRep* NewTree(const char* data, namespace cord_internal { -ExternalRepReleaserPair NewExternalWithUninitializedReleaser( - absl::string_view data, ExternalReleaserInvoker invoker, - size_t releaser_size) { +void InitializeCordRepExternal(absl::string_view data, CordRepExternal* rep) { assert(!data.empty()); - - void* raw_rep = AllocateExternal(releaser_size); - auto* rep = new (raw_rep) CordRepExternal(); rep->length = data.size(); rep->tag = EXTERNAL; rep->base = data.data(); - rep->releaser_invoker = invoker; - return {VerifyTree(rep), GetExternalReleaser(rep)}; + VerifyTree(rep); } } // namespace cord_internal @@ -526,10 +441,7 @@ static CordRep* NewSubstring(CordRep* child, size_t offset, size_t length) { // -------------------------------------------------------------------- // Cord::InlineRep functions -// This will trigger LNK2005 in MSVC. -#ifndef COMPILER_MSVC -const unsigned char Cord::InlineRep::kMaxInline; -#endif // COMPILER_MSVC +constexpr unsigned char Cord::InlineRep::kMaxInline; inline void Cord::InlineRep::set_data(const char* data, size_t n, bool nullify_tail) { @@ -585,7 +497,7 @@ void Cord::InlineRep::AppendTree(CordRep* tree) { } void Cord::InlineRep::PrependTree(CordRep* tree) { - if (tree == nullptr) return; + assert(tree != nullptr); size_t len = data_[kMaxInline]; if (len == 0) { set_tree(tree); @@ -738,6 +650,37 @@ Cord::Cord(absl::string_view src) { } } +template <typename T, Cord::EnableIfString<T>> +Cord::Cord(T&& src) { + if ( + // String is short: copy data to avoid external block overhead. + src.size() <= kMaxBytesToCopy || + // String is wasteful: copy data to avoid pinning too much unused memory. + src.size() < src.capacity() / 2 + ) { + if (src.size() <= InlineRep::kMaxInline) { + contents_.set_data(src.data(), src.size(), false); + } else { + contents_.set_tree(NewTree(src.data(), src.size(), 0)); + } + } else { + struct StringReleaser { + void operator()(absl::string_view /* data */) {} + std::string data; + }; + const absl::string_view original_data = src; + auto* rep = static_cast< + ::absl::cord_internal::CordRepExternalImpl<StringReleaser>*>( + absl::cord_internal::NewExternalRep( + original_data, StringReleaser{std::forward<T>(src)})); + // Moving src may have invalidated its data pointer, so adjust it. + rep->base = rep->template get<0>().data.data(); + contents_.set_tree(rep); + } +} + +template Cord::Cord(std::string&& src); + // The destruction code is separate so that the compiler can determine // that it does not need to call the destructor on a moved-from Cord. void Cord::DestroyCordSlow() { @@ -775,6 +718,18 @@ Cord& Cord::operator=(absl::string_view src) { return *this; } +template <typename T, Cord::EnableIfString<T>> +Cord& Cord::operator=(T&& src) { + if (src.size() <= kMaxBytesToCopy) { + *this = absl::string_view(src); + } else { + *this = Cord(std::forward<T>(src)); + } + return *this; +} + +template Cord& Cord::operator=(std::string&& src); + // TODO(sanjay): Move to Cord::InlineRep section of file. For now, // we keep it here to make diffs easier. void Cord::InlineRep::AppendArray(const char* src_data, size_t src_size) { @@ -886,6 +841,17 @@ void Cord::Append(const Cord& src) { AppendImpl(src); } void Cord::Append(Cord&& src) { AppendImpl(std::move(src)); } +template <typename T, Cord::EnableIfString<T>> +void Cord::Append(T&& src) { + if (src.size() <= kMaxBytesToCopy) { + Append(absl::string_view(src)); + } else { + Append(Cord(std::forward<T>(src))); + } +} + +template void Cord::Append(std::string&& src); + void Cord::Prepend(const Cord& src) { CordRep* src_tree = src.contents_.tree(); if (src_tree != nullptr) { @@ -915,6 +881,17 @@ void Cord::Prepend(absl::string_view src) { } } +template <typename T, Cord::EnableIfString<T>> +inline void Cord::Prepend(T&& src) { + if (src.size() <= kMaxBytesToCopy) { + Prepend(absl::string_view(src)); + } else { + Prepend(Cord(std::forward<T>(src))); + } +} + +template void Cord::Prepend(std::string&& src); + static CordRep* RemovePrefixFrom(CordRep* node, size_t n) { if (n >= node->length) return nullptr; if (n == 0) return Ref(node); @@ -1175,7 +1152,7 @@ class CordForest { void AddNode(CordRep* node) { CordRep* sum = nullptr; - // Collect together everything with which we will merge node + // Collect together everything with which we will merge with node int i = 0; for (; node->length > min_length[i + 1]; ++i) { auto& tree_at_i = trees_[i]; @@ -1506,7 +1483,8 @@ void Cord::CopyToArraySlowPath(char* dst) const { } Cord::ChunkIterator& Cord::ChunkIterator::operator++() { - assert(bytes_remaining_ > 0 && "Attempted to iterate past `end()`"); + ABSL_HARDENING_ASSERT(bytes_remaining_ > 0 && + "Attempted to iterate past `end()`"); assert(bytes_remaining_ >= current_chunk_.size()); bytes_remaining_ -= current_chunk_.size(); @@ -1545,7 +1523,8 @@ Cord::ChunkIterator& Cord::ChunkIterator::operator++() { } Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) { - assert(bytes_remaining_ >= n && "Attempted to iterate past `end()`"); + ABSL_HARDENING_ASSERT(bytes_remaining_ >= n && + "Attempted to iterate past `end()`"); Cord subcord; if (n <= InlineRep::kMaxInline) { @@ -1716,7 +1695,7 @@ void Cord::ChunkIterator::AdvanceBytesSlowPath(size_t n) { } char Cord::operator[](size_t i) const { - assert(i < size()); + ABSL_HARDENING_ASSERT(i < size()); size_t offset = i; const CordRep* rep = contents_.tree(); if (rep == nullptr) { diff --git a/absl/strings/cord.h b/absl/strings/cord.h index 40566cba..b8b251b0 100644 --- a/absl/strings/cord.h +++ b/absl/strings/cord.h @@ -11,25 +11,52 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. - -// A Cord is a sequence of characters with some unusual access propreties. -// A Cord supports efficient insertions and deletions at the start and end of -// the byte sequence, but random access reads are slower, and random access -// modifications are not supported by the API. Cord also provides cheap copies -// (using a copy-on-write strategy) and cheap substring operations. // -// Thread safety -// ------------- +// ----------------------------------------------------------------------------- +// File: cord.h +// ----------------------------------------------------------------------------- +// +// This file defines the `absl::Cord` data structure and operations on that data +// structure. A Cord is a string-like sequence of characters optimized for +// specific use cases. Unlike a `std::string`, which stores an array of +// contiguous characters, Cord data is stored in a structure consisting of +// separate, reference-counted "chunks." (Currently, this implementation is a +// tree structure, though that implementation may change.) +// +// Because a Cord consists of these chunks, data can be added to or removed from +// a Cord during its lifetime. Chunks may also be shared between Cords. Unlike a +// `std::string`, a Cord can therefore accomodate data that changes over its +// lifetime, though it's not quite "mutable"; it can change only in the +// attachment, detachment, or rearrangement of chunks of its constituent data. +// +// A Cord provides some benefit over `std::string` under the following (albeit +// narrow) circumstances: +// +// * Cord data is designed to grow and shrink over a Cord's lifetime. Cord +// provides efficient insertions and deletions at the start and end of the +// character sequences, avoiding copies in those cases. Static data should +// generally be stored as strings. +// * External memory consisting of string-like data can be directly added to +// a Cord without requiring copies or allocations. +// * Cord data may be shared and copied cheaply. Cord provides a copy-on-write +// implementation and cheap sub-Cord operations. Copying a Cord is an O(1) +// operation. +// +// As a consequence to the above, Cord data is generally large. Small data +// should generally use strings, as construction of a Cord requires some +// overhead. Small Cords (<= 15 bytes) are represented inline, but most small +// Cords are expected to grow over their lifetimes. +// +// Note that because a Cord is made up of separate chunked data, random access +// to character data within a Cord is slower than within a `std::string`. +// +// Thread Safety +// // Cord has the same thread-safety properties as many other types like // std::string, std::vector<>, int, etc -- it is thread-compatible. In -// particular, if no thread may call a non-const method, then it is safe to -// concurrently call const methods. Copying a Cord produces a new instance that -// can be used concurrently with the original in arbitrary ways. -// -// Implementation is similar to the "Ropes" described in: -// Ropes: An alternative to strings -// Hans J. Boehm, Russ Atkinson, Michael Plass -// Software Practice and Experience, December 1995 +// particular, if threads do not call non-const methods, then it is safe to call +// const methods without synchronization. Copying a Cord produces a new instance +// that can be used concurrently with the original in arbitrary ways. #ifndef ABSL_STRINGS_CORD_H_ #define ABSL_STRINGS_CORD_H_ @@ -38,12 +65,12 @@ #include <cstddef> #include <cstdint> #include <cstring> -#include <iostream> +#include <iosfwd> #include <iterator> #include <string> +#include <type_traits> #include "absl/base/internal/endian.h" -#include "absl/base/internal/invoke.h" #include "absl/base/internal/per_thread_tls.h" #include "absl/base/macros.h" #include "absl/base/port.h" @@ -53,6 +80,7 @@ #include "absl/strings/internal/cord_internal.h" #include "absl/strings/internal/resize_uninitialized.h" #include "absl/strings/string_view.h" +#include "absl/types/optional.h" namespace absl { ABSL_NAMESPACE_BEGIN @@ -61,12 +89,35 @@ class CordTestPeer; template <typename Releaser> Cord MakeCordFromExternal(absl::string_view, Releaser&&); void CopyCordToString(const Cord& src, std::string* dst); -namespace hash_internal { -template <typename H> -H HashFragmentedCord(H, const Cord&); -} -// A Cord is a sequence of characters. +// Cord +// +// A Cord is a sequence of characters, designed to be more efficient than a +// `std::string` in certain circumstances: namely, large string data that needs +// to change over its lifetime or shared, especially when such data is shared +// across API boundaries. +// +// A Cord stores its character data in a structure that allows efficient prepend +// and append operations. This makes a Cord useful for large string data sent +// over in a wire format that may need to be prepended or appended at some point +// during the data exchange (e.g. HTTP, protocol buffers). For example, a +// Cord is useful for storing an HTTP request, and prepending an HTTP header to +// such a request. +// +// Cords should not be used for storing general string data, however. They +// require overhead to construct and are slower than strings for random access. +// +// The Cord API provides the following common API operations: +// +// * Create or assign Cords out of existing string data, memory, or other Cords +// * Append and prepend data to an existing Cord +// * Create new Sub-Cords from existing Cord data +// * Swap Cord data and compare Cord equality +// * Write out Cord data by constructing a `std::string` +// +// Additionally, the API provides iterator utilities to iterate through Cord +// data via chunks or character bytes. +// class Cord { private: template <typename T> @@ -74,51 +125,53 @@ class Cord { absl::enable_if_t<std::is_same<T, std::string>::value, int>; public: - // -------------------------------------------------------------------- - // Constructors, destructors and helper factories + // Cord::Cord() Constructors. - // Create an empty cord + // Creates an empty Cord. constexpr Cord() noexcept; - // Cord is copyable and efficiently movable. - // The moved-from state is valid but unspecified. + // Creates a Cord from an existing Cord. Cord is copyable and efficiently + // movable. The moved-from state is valid but unspecified. Cord(const Cord& src); Cord(Cord&& src) noexcept; Cord& operator=(const Cord& x); Cord& operator=(Cord&& x) noexcept; - // Create a cord out of "src". This constructor is explicit on - // purpose so that people do not get automatic type conversions. + // Creates a Cord from a `src` string. This constructor is marked explicit to + // prevent implicit Cord constructions from arguments convertible to an + // `absl::string_view`. explicit Cord(absl::string_view src); Cord& operator=(absl::string_view src); - // These are templated to avoid ambiguities for types that are convertible to - // both `absl::string_view` and `std::string`, such as `const char*`. - // - // Note that these functions reserve the right to reuse the `string&&`'s - // memory and that they will do so in the future. + // Creates a Cord from a `std::string&&` rvalue. These constructors are + // templated to avoid ambiguities for types that are convertible to both + // `absl::string_view` and `std::string`, such as `const char*`. template <typename T, EnableIfString<T> = 0> - explicit Cord(T&& src) : Cord(absl::string_view(src)) {} + explicit Cord(T&& src); template <typename T, EnableIfString<T> = 0> Cord& operator=(T&& src); - // Destroy the cord + // Cord::~Cord() + // + // Destructs the Cord. ~Cord() { if (contents_.is_tree()) DestroyCordSlow(); } - // Creates a Cord that takes ownership of external memory. The contents of - // `data` are not copied. + // MakeCordFromExternal() // - // This function takes a callable that is invoked when all Cords are - // finished with `data`. The data must remain live and unchanging until the - // releaser is called. The requirements for the releaser are that it: - // * is move constructible, - // * supports `void operator()(absl::string_view) const`, - // * does not have alignment requirement greater than what is guaranteed by - // ::operator new. This is dictated by alignof(std::max_align_t) before - // C++17 and __STDCPP_DEFAULT_NEW_ALIGNMENT__ if compiling with C++17 or - // it is supported by the implementation. + // Creates a Cord that takes ownership of external string memory. The + // contents of `data` are not copied to the Cord; instead, the external + // memory is added to the Cord and reference-counted. This data may not be + // changed for the life of the Cord, though it may be prepended or appended + // to. + // + // `MakeCordFromExternal()` takes a callable "releaser" that is invoked when + // the reference count for `data` reaches zero. As noted above, this data must + // remain live until the releaser is invoked. The callable releaser also must: + // + // * be move constructible + // * support `void operator()(absl::string_view) const` or `void operator()` // // Example: // @@ -127,13 +180,13 @@ class Cord { // FillBlock(block); // return absl::MakeCordFromExternal( // block->ToStringView(), - // [pool, block](absl::string_view /*ignored*/) { - // pool->FreeBlock(block); + // [pool, block](absl::string_view v) { + // pool->FreeBlock(block, v); // }); // } // - // WARNING: It's likely a bug if your releaser doesn't do anything. - // For example, consider the following: + // WARNING: Because a Cord can be reference-counted, it's likely a bug if your + // releaser doesn't do anything. For example, consider the following: // // void Foo(const char* buffer, int len) { // auto c = absl::MakeCordFromExternal(absl::string_view(buffer, len), @@ -147,97 +200,143 @@ class Cord { template <typename Releaser> friend Cord MakeCordFromExternal(absl::string_view data, Releaser&& releaser); - // -------------------------------------------------------------------- - // Mutations - + // Cord::Clear() + // + // Releases the Cord data. Any nodes that share data with other Cords, if + // applicable, will have their reference counts reduced by 1. void Clear(); + // Cord::Append() + // + // Appends data to the Cord, which may come from another Cord or other string + // data. void Append(const Cord& src); void Append(Cord&& src); void Append(absl::string_view src); template <typename T, EnableIfString<T> = 0> void Append(T&& src); + // Cord::Prepend() + // + // Prepends data to the Cord, which may come from another Cord or other string + // data. void Prepend(const Cord& src); void Prepend(absl::string_view src); template <typename T, EnableIfString<T> = 0> void Prepend(T&& src); + // Cord::RemovePrefix() + // + // Removes the first `n` bytes of a Cord. void RemovePrefix(size_t n); void RemoveSuffix(size_t n); - // Returns a new cord representing the subrange [pos, pos + new_size) of + // Cord::Subcord() + // + // Returns a new Cord representing the subrange [pos, pos + new_size) of // *this. If pos >= size(), the result is empty(). If // (pos + new_size) >= size(), the result is the subrange [pos, size()). Cord Subcord(size_t pos, size_t new_size) const; - friend void swap(Cord& x, Cord& y) noexcept; + // Cord::swap() + // + // Swaps the contents of the Cord with `other`. + void swap(Cord& other) noexcept; - // -------------------------------------------------------------------- - // Accessors + // swap() + // + // Swaps the contents of two Cords. + friend void swap(Cord& x, Cord& y) noexcept { + x.swap(y); + } + // Cord::size() + // + // Returns the size of the Cord. size_t size() const; + + // Cord::empty() + // + // Determines whether the given Cord is empty, returning `true` is so. bool empty() const; - // Returns the approximate number of bytes pinned by this Cord. Note that - // Cords that share memory could each be "charged" independently for the same - // shared memory. + // Cord::EstimatedMemoryUsage() + // + // Returns the *approximate* number of bytes held in full or in part by this + // Cord (which may not remain the same between invocations). Note that Cords + // that share memory could each be "charged" independently for the same shared + // memory. size_t EstimatedMemoryUsage() const; - // -------------------------------------------------------------------- - // Comparators - - // Compares 'this' Cord with rhs. This function and its relatives - // treat Cords as sequences of unsigned bytes. The comparison is a - // straightforward lexicographic comparison. Return value: + // Cord::Compare() + // + // Compares 'this' Cord with rhs. This function and its relatives treat Cords + // as sequences of unsigned bytes. The comparison is a straightforward + // lexicographic comparison. `Cord::Compare()` returns values as follows: + // // -1 'this' Cord is smaller // 0 two Cords are equal // 1 'this' Cord is larger int Compare(absl::string_view rhs) const; int Compare(const Cord& rhs) const; - // Does 'this' cord start/end with rhs + // Cord::StartsWith() + // + // Determines whether the Cord starts with the passed string data `rhs`. bool StartsWith(const Cord& rhs) const; bool StartsWith(absl::string_view rhs) const; + + // Cord::EndsWidth() + // + // Determines whether the Cord ends with the passed string data `rhs`. bool EndsWith(absl::string_view rhs) const; bool EndsWith(const Cord& rhs) const; - // -------------------------------------------------------------------- - // Conversion to other types - + // Cord::operator std::string() + // + // Converts a Cord into a `std::string()`. This operator is marked explicit to + // prevent unintended Cord usage in functions that take a string. explicit operator std::string() const; - // Copies the contents from `src` to `*dst`. + // CopyCordToString() + // + // Copies the contents of a `src` Cord into a `*dst` string. // - // This function optimizes the case of reusing the destination std::string since it + // This function optimizes the case of reusing the destination string since it // can reuse previously allocated capacity. However, this function does not // guarantee that pointers previously returned by `dst->data()` remain valid // even if `*dst` had enough capacity to hold `src`. If `*dst` is a new // object, prefer to simply use the conversion operator to `std::string`. friend void CopyCordToString(const Cord& src, std::string* dst); - // -------------------------------------------------------------------- - // Iteration - class CharIterator; - // Type for iterating over the chunks of a `Cord`. See comments for - // `Cord::chunk_begin()`, `Cord::chunk_end()` and `Cord::Chunks()` below for - // preferred usage. + //---------------------------------------------------------------------------- + // Cord::ChunkIterator + //---------------------------------------------------------------------------- // - // Additional notes: + // A `Cord::ChunkIterator` allows iteration over the constituent chunks of its + // Cord. Such iteration allows you to perform non-const operatons on the data + // of a Cord without modifying it. + // + // Generally, you do not instantiate a `Cord::ChunkIterator` directly; + // instead, you create one implicitly through use of the `Cord::Chunks()` + // member function. + // + // The `Cord::ChunkIterator` has the following properties: + // + // * The iterator is invalidated after any non-const operation on the + // Cord object over which it iterates. // * The `string_view` returned by dereferencing a valid, non-`end()` // iterator is guaranteed to be non-empty. - // * A `ChunkIterator` object is invalidated after any non-const - // operation on the `Cord` object over which it iterates. - // * Two `ChunkIterator` objects can be equality compared if and only if - // they remain valid and iterate over the same `Cord`. - // * This is a proxy iterator. This means the `string_view` returned by the - // iterator does not live inside the Cord, and its lifetime is limited to - // the lifetime of the iterator itself. To help prevent issues, - // `ChunkIterator::reference` is not a true reference type and is - // equivalent to `value_type`. - // * The iterator keeps state that can grow for `Cord`s that contain many + // * Two `ChunkIterator` objects can be compared equal if and only if they + // remain valid and iterate over the same Cord. + // * The iterator in this case is a proxy iterator; the `string_view` + // returned by the iterator does not live inside the Cord, and its + // lifetime is limited to the lifetime of the iterator itself. To help + // prevent lifetime issues, `ChunkIterator::reference` is not a true + // reference type and is equivalent to `value_type`. + // * The iterator keeps state that can grow for Cords that contain many // nodes and are imbalanced due to sharing. Prefer to pass this type by // const reference instead of by value. class ChunkIterator { @@ -286,10 +385,13 @@ class Cord { stack_of_right_children_; }; + // Cord::ChunkIterator::chunk_begin() + // // Returns an iterator to the first chunk of the `Cord`. // - // This is useful for getting a `ChunkIterator` outside the context of a - // range-based for-loop (in which case see `Cord::Chunks()` below). + // Generally, prefer using `Cord::Chunks()` within a range-based for loop for + // iterating over the chunks of a Cord. This method may be useful for getting + // a `ChunkIterator` where range-based for-loops are not useful. // // Example: // @@ -298,13 +400,28 @@ class Cord { // return std::find(c.chunk_begin(), c.chunk_end(), s); // } ChunkIterator chunk_begin() const; + + // Cord::ChunkItertator::chunk_end() + // // Returns an iterator one increment past the last chunk of the `Cord`. + // + // Generally, prefer using `Cord::Chunks()` within a range-based for loop for + // iterating over the chunks of a Cord. This method may be useful for getting + // a `ChunkIterator` where range-based for-loops may not be available. ChunkIterator chunk_end() const; - // Convenience wrapper over `Cord::chunk_begin()` and `Cord::chunk_end()` to - // enable range-based for-loop iteration over `Cord` chunks. + //---------------------------------------------------------------------------- + // Cord::ChunkIterator::ChunkRange + //---------------------------------------------------------------------------- + // + // `ChunkRange` is a helper class for iterating over the chunks of the `Cord`, + // producing an iterator which can be used within a range-based for loop. + // Construction of a `ChunkRange` will return an iterator pointing to the + // first chunk of the Cord. Generally, do not construct a `ChunkRange` + // directly; instead, prefer to use the `Cord::Chunks()` method. // - // Prefer to use `Cord::Chunks()` below instead of constructing this directly. + // Implementation note: `ChunkRange` is simply a convenience wrapper over + // `Cord::chunk_begin()` and `Cord::chunk_end()`. class ChunkRange { public: explicit ChunkRange(const Cord* cord) : cord_(cord) {} @@ -316,8 +433,11 @@ class Cord { const Cord* cord_; }; - // Returns a range for iterating over the chunks of a `Cord` with a - // range-based for-loop. + // Cord::Chunks() + // + // Returns a `Cord::ChunkIterator::ChunkRange` for iterating over the chunks + // of a `Cord` with a range-based for-loop. For most iteration tasks on a + // Cord, use `Cord::Chunks()` to retrieve this iterator. // // Example: // @@ -334,22 +454,30 @@ class Cord { // } ChunkRange Chunks() const; - // Type for iterating over the characters of a `Cord`. See comments for - // `Cord::char_begin()`, `Cord::char_end()` and `Cord::Chars()` below for - // preferred usage. + //---------------------------------------------------------------------------- + // Cord::CharIterator + //---------------------------------------------------------------------------- + // + // A `Cord::CharIterator` allows iteration over the constituent characters of + // a `Cord`. // - // Additional notes: - // * A `CharIterator` object is invalidated after any non-const - // operation on the `Cord` object over which it iterates. - // * Two `CharIterator` objects can be equality compared if and only if - // they remain valid and iterate over the same `Cord`. - // * The iterator keeps state that can grow for `Cord`s that contain many + // Generally, you do not instantiate a `Cord::CharIterator` directly; instead, + // you create one implicitly through use of the `Cord::Chars()` member + // function. + // + // A `Cord::CharIterator` has the following properties: + // + // * The iterator is invalidated after any non-const operation on the + // Cord object over which it iterates. + // * Two `CharIterator` objects can be compared equal if and only if they + // remain valid and iterate over the same Cord. + // * The iterator keeps state that can grow for Cords that contain many // nodes and are imbalanced due to sharing. Prefer to pass this type by // const reference instead of by value. - // * This type cannot be a forward iterator because a `Cord` can reuse - // sections of memory. This violates the requirement that if dereferencing - // two iterators returns the same object, the iterators must compare - // equal. + // * This type cannot act as a forward iterator because a `Cord` can reuse + // sections of memory. This fact violates the requirement for forward + // iterators to compare equal if dereferencing them returns the same + // object. class CharIterator { public: using iterator_category = std::input_iterator_tag; @@ -375,34 +503,56 @@ class Cord { ChunkIterator chunk_iterator_; }; - // Advances `*it` by `n_bytes` and returns the bytes passed as a `Cord`. + // Cord::CharIterator::AdvanceAndRead() // - // `n_bytes` must be less than or equal to the number of bytes remaining for - // iteration. Otherwise the behavior is undefined. It is valid to pass - // `char_end()` and 0. + // Advances the `Cord::CharIterator` by `n_bytes` and returns the bytes + // advanced as a separate `Cord`. `n_bytes` must be less than or equal to the + // number of bytes within the Cord; otherwise, behavior is undefined. It is + // valid to pass `char_end()` and `0`. static Cord AdvanceAndRead(CharIterator* it, size_t n_bytes); - // Advances `*it` by `n_bytes`. + // Cord::CharIterator::Advance() // - // `n_bytes` must be less than or equal to the number of bytes remaining for - // iteration. Otherwise the behavior is undefined. It is valid to pass - // `char_end()` and 0. + // Advances the `Cord::CharIterator` by `n_bytes`. `n_bytes` must be less than + // or equal to the number of bytes remaining within the Cord; otherwise, + // behavior is undefined. It is valid to pass `char_end()` and `0`. static void Advance(CharIterator* it, size_t n_bytes); + // Cord::CharIterator::ChunkRemaining() + // // Returns the longest contiguous view starting at the iterator's position. // // `it` must be dereferenceable. static absl::string_view ChunkRemaining(const CharIterator& it); + // Cord::CharIterator::char_begin() + // // Returns an iterator to the first character of the `Cord`. + // + // Generally, prefer using `Cord::Chars()` within a range-based for loop for + // iterating over the chunks of a Cord. This method may be useful for getting + // a `CharIterator` where range-based for-loops may not be available. CharIterator char_begin() const; + + // Cord::CharIterator::char_end() + // // Returns an iterator to one past the last character of the `Cord`. + // + // Generally, prefer using `Cord::Chars()` within a range-based for loop for + // iterating over the chunks of a Cord. This method may be useful for getting + // a `CharIterator` where range-based for-loops are not useful. CharIterator char_end() const; - // Convenience wrapper over `Cord::char_begin()` and `Cord::char_end()` to - // enable range-based for-loop iterator over the characters of a `Cord`. + // Cord::CharIterator::CharRange + // + // `CharRange` is a helper class for iterating over the characters of a + // producing an iterator which can be used within a range-based for loop. + // Construction of a `CharRange` will return an iterator pointing to the first + // character of the Cord. Generally, do not construct a `CharRange` directly; + // instead, prefer to use the `Cord::Chars()` method show below. // - // Prefer to use `Cord::Chars()` below instead of constructing this directly. + // Implementation note: `CharRange` is simply a convenience wrapper over + // `Cord::char_begin()` and `Cord::char_end()`. class CharRange { public: explicit CharRange(const Cord* cord) : cord_(cord) {} @@ -414,8 +564,11 @@ class Cord { const Cord* cord_; }; - // Returns a range for iterating over the characters of a `Cord` with a - // range-based for-loop. + // Cord::CharIterator::Chars() + // + // Returns a `Cord::CharIterator` for iterating over the characters of a + // `Cord` with a range-based for-loop. For most character-based iteration + // tasks on a Cord, use `Cord::Chars()` to retrieve this iterator. // // Example: // @@ -432,32 +585,51 @@ class Cord { // } CharRange Chars() const; - // -------------------------------------------------------------------- - // Miscellaneous - - // Get the "i"th character of 'this' and return it. - // NOTE: This routine is reasonably efficient. It is roughly - // logarithmic in the number of nodes that make up the cord. Still, - // if you need to iterate over the contents of a cord, you should - // use a CharIterator/CordIterator rather than call operator[] or Get() - // repeatedly in a loop. + // Cord::operator[] + // + // Gets the "i"th character of the Cord and returns it, provided that + // 0 <= i < Cord.size(). // - // REQUIRES: 0 <= i < size() + // NOTE: This routine is reasonably efficient. It is roughly + // logarithmic based on the number of chunks that make up the cord. Still, + // if you need to iterate over the contents of a cord, you should + // use a CharIterator/ChunkIterator rather than call operator[] or Get() + // repeatedly in a loop. char operator[](size_t i) const; + // Cord::TryFlat() + // + // If this cord's representation is a single flat array, returns a + // string_view referencing that array. Otherwise returns nullopt. + absl::optional<absl::string_view> TryFlat() const; + + // Cord::Flatten() + // // Flattens the cord into a single array and returns a view of the data. // // If the cord was already flat, the contents are not modified. absl::string_view Flatten(); + // Supports absl::Cord as a sink object for absl::Format(). + friend void AbslFormatFlush(absl::Cord* cord, absl::string_view part) { + cord->Append(part); + } + + template <typename H> + friend H AbslHashValue(H hash_state, const absl::Cord& c) { + absl::optional<absl::string_view> maybe_flat = c.TryFlat(); + if (maybe_flat.has_value()) { + return H::combine(std::move(hash_state), *maybe_flat); + } + return c.HashFragmented(std::move(hash_state)); + } + private: friend class CordTestPeer; - template <typename H> - friend H absl::hash_internal::HashFragmentedCord(H, const Cord&); friend bool operator==(const Cord& lhs, const Cord& rhs); friend bool operator==(const Cord& lhs, absl::string_view rhs); - // Call the provided function once for each cord chunk, in order. Unlike + // Calls the provided function once for each cord chunk, in order. Unlike // Chunks(), this API will not allocate memory. void ForEachChunk(absl::FunctionRef<void(absl::string_view)>) const; @@ -469,16 +641,15 @@ class Cord { // class so that we can isolate the bulk of cord.cc from changes // to the representation. // - // InlineRep holds either either a tree pointer, or an array of kMaxInline - // bytes. + // InlineRep holds either a tree pointer, or an array of kMaxInline bytes. class InlineRep { public: - static const unsigned char kMaxInline = 15; + static constexpr unsigned char kMaxInline = 15; static_assert(kMaxInline >= sizeof(absl::cord_internal::CordRep*), ""); // Tag byte & kMaxInline means we are storing a pointer. - static const unsigned char kTreeFlag = 1 << 4; + static constexpr unsigned char kTreeFlag = 1 << 4; // Tag byte & kProfiledFlag means we are profiling the Cord. - static const unsigned char kProfiledFlag = 1 << 5; + static constexpr unsigned char kProfiledFlag = 1 << 5; constexpr InlineRep() : data_{} {} InlineRep(const InlineRep& src); @@ -502,7 +673,7 @@ class Cord { void replace_tree(absl::cord_internal::CordRep* rep); // Returns non-null iff was holding a pointer absl::cord_internal::CordRep* clear(); - // Convert to pointer if necessary + // Converts to pointer if necessary. absl::cord_internal::CordRep* force_tree(size_t extra_hint); void reduce_size(size_t n); // REQUIRES: holding data void remove_prefix(size_t n); // REQUIRES: holding data @@ -531,7 +702,7 @@ class Cord { } void CopyTo(std::string* dst) const { // memcpy is much faster when operating on a known size. On most supported - // platforms, the small std::string optimization is large enough that resizing + // platforms, the small string optimization is large enough that resizing // to 15 bytes does not cause a memory allocation. absl::strings_internal::STLStringResizeUninitialized(dst, sizeof(data_) - 1); @@ -560,14 +731,14 @@ class Cord { }; InlineRep contents_; - // Helper for MemoryUsage() + // Helper for MemoryUsage(). static size_t MemoryUsageAux(const absl::cord_internal::CordRep* rep); - // Helper for GetFlat() + // Helper for GetFlat() and TryFlat(). static bool GetFlatAux(absl::cord_internal::CordRep* rep, absl::string_view* fragment); - // Helper for ForEachChunk() + // Helper for ForEachChunk(). static void ForEachChunkAux( absl::cord_internal::CordRep* rep, absl::FunctionRef<void(absl::string_view)> callback); @@ -596,9 +767,20 @@ class Cord { absl::cord_internal::CordRep* TakeRep() const&; absl::cord_internal::CordRep* TakeRep() &&; - // Helper for Append() + // Helper for Append(). template <typename C> void AppendImpl(C&& src); + + // Helper for AbslHashValue(). + template <typename H> + H HashFragmented(H hash_state) const { + typename H::AbslInternalPiecewiseCombiner combiner; + ForEachChunk([&combiner, &hash_state](absl::string_view chunk) { + hash_state = combiner.add_buffer(std::move(hash_state), chunk.data(), + chunk.size()); + }); + return H::combine(combiner.finalize(std::move(hash_state)), size()); + } }; ABSL_NAMESPACE_END @@ -655,52 +837,27 @@ inline void SmallMemmove(char* dst, const char* src, size_t n, } } -struct ExternalRepReleaserPair { - CordRep* rep; - void* releaser_address; -}; - -// Allocates a new external `CordRep` and returns a pointer to it and a pointer -// to `releaser_size` bytes where the desired releaser can be constructed. +// Does non-template-specific `CordRepExternal` initialization. // Expects `data` to be non-empty. -ExternalRepReleaserPair NewExternalWithUninitializedReleaser( - absl::string_view data, ExternalReleaserInvoker invoker, - size_t releaser_size); +void InitializeCordRepExternal(absl::string_view data, CordRepExternal* rep); // Creates a new `CordRep` that owns `data` and `releaser` and returns a pointer // to it, or `nullptr` if `data` was empty. template <typename Releaser> // NOLINTNEXTLINE - suppress clang-tidy raw pointer return. CordRep* NewExternalRep(absl::string_view data, Releaser&& releaser) { - static_assert( -#if defined(__STDCPP_DEFAULT_NEW_ALIGNMENT__) - alignof(Releaser) <= __STDCPP_DEFAULT_NEW_ALIGNMENT__, -#else - alignof(Releaser) <= alignof(max_align_t), -#endif - "Releasers with alignment requirement greater than what is returned by " - "default `::operator new()` are not supported."); - using ReleaserType = absl::decay_t<Releaser>; if (data.empty()) { // Never create empty external nodes. - ::absl::base_internal::Invoke( - ReleaserType(std::forward<Releaser>(releaser)), data); + InvokeReleaser(Rank0{}, ReleaserType(std::forward<Releaser>(releaser)), + data); return nullptr; } - auto releaser_invoker = [](void* type_erased_releaser, absl::string_view d) { - auto* my_releaser = static_cast<ReleaserType*>(type_erased_releaser); - ::absl::base_internal::Invoke(std::move(*my_releaser), d); - my_releaser->~ReleaserType(); - return sizeof(Releaser); - }; - - ExternalRepReleaserPair external = NewExternalWithUninitializedReleaser( - data, releaser_invoker, sizeof(releaser)); - ::new (external.releaser_address) - ReleaserType(std::forward<Releaser>(releaser)); - return external.rep; + CordRepExternal* rep = new CordRepExternalImpl<ReleaserType>( + std::forward<Releaser>(releaser), 0); + InitializeCordRepExternal(data, rep); + return rep; } // Overload for function reference types that dispatches using a function @@ -834,16 +991,17 @@ inline Cord& Cord::operator=(const Cord& x) { inline Cord::Cord(Cord&& src) noexcept : contents_(std::move(src.contents_)) {} +inline void Cord::swap(Cord& other) noexcept { + contents_.Swap(&other.contents_); +} + inline Cord& Cord::operator=(Cord&& x) noexcept { contents_ = std::move(x.contents_); return *this; } -template <typename T, Cord::EnableIfString<T>> -inline Cord& Cord::operator=(T&& src) { - *this = absl::string_view(src); - return *this; -} +extern template Cord::Cord(std::string&& src); +extern template Cord& Cord::operator=(std::string&& src); inline size_t Cord::size() const { // Length is 1st field in str.rep_ @@ -860,6 +1018,18 @@ inline size_t Cord::EstimatedMemoryUsage() const { return result; } +inline absl::optional<absl::string_view> Cord::TryFlat() const { + absl::cord_internal::CordRep* rep = contents_.tree(); + if (rep == nullptr) { + return absl::string_view(contents_.data(), contents_.size()); + } + absl::string_view fragment; + if (GetFlatAux(rep, &fragment)) { + return fragment; + } + return absl::nullopt; +} + inline absl::string_view Cord::Flatten() { absl::cord_internal::CordRep* rep = contents_.tree(); if (rep == nullptr) { @@ -877,19 +1047,8 @@ inline void Cord::Append(absl::string_view src) { contents_.AppendArray(src.data(), src.size()); } -template <typename T, Cord::EnableIfString<T>> -inline void Cord::Append(T&& src) { - // Note that this function reserves the right to reuse the `string&&`'s - // memory and that it will do so in the future. - Append(absl::string_view(src)); -} - -template <typename T, Cord::EnableIfString<T>> -inline void Cord::Prepend(T&& src) { - // Note that this function reserves the right to reuse the `string&&`'s - // memory and that it will do so in the future. - Prepend(absl::string_view(src)); -} +extern template void Cord::Append(std::string&& src); +extern template void Cord::Prepend(std::string&& src); inline int Cord::Compare(const Cord& rhs) const { if (!contents_.is_tree() && !rhs.contents_.is_tree()) { @@ -939,12 +1098,12 @@ inline bool Cord::ChunkIterator::operator!=(const ChunkIterator& other) const { } inline Cord::ChunkIterator::reference Cord::ChunkIterator::operator*() const { - assert(bytes_remaining_ != 0); + ABSL_HARDENING_ASSERT(bytes_remaining_ != 0); return current_chunk_; } inline Cord::ChunkIterator::pointer Cord::ChunkIterator::operator->() const { - assert(bytes_remaining_ != 0); + ABSL_HARDENING_ASSERT(bytes_remaining_ != 0); return ¤t_chunk_; } @@ -1098,10 +1257,6 @@ inline bool operator<=(absl::string_view x, const Cord& y) { return !(y < x); } inline bool operator>=(const Cord& x, absl::string_view y) { return !(x < y); } inline bool operator>=(absl::string_view x, const Cord& y) { return !(x < y); } -// Overload of swap for Cord. The use of non-const references is -// required. :( -inline void swap(Cord& x, Cord& y) noexcept { y.contents_.Swap(&x.contents_); } - // Some internals exposed to test code. namespace strings_internal { class CordTestAccess { diff --git a/absl/strings/cord_test.cc b/absl/strings/cord_test.cc index 434f3a24..4443c828 100644 --- a/absl/strings/cord_test.cc +++ b/absl/strings/cord_test.cc @@ -18,9 +18,11 @@ #include "absl/base/config.h" #include "absl/base/internal/endian.h" #include "absl/base/internal/raw_logging.h" +#include "absl/base/macros.h" #include "absl/container/fixed_array.h" #include "absl/strings/cord_test_helpers.h" #include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" #include "absl/strings/string_view.h" typedef std::mt19937_64 RandomEngine; @@ -70,9 +72,8 @@ static std::string RandomLowercaseString(RandomEngine* rng) { static std::string RandomLowercaseString(RandomEngine* rng, size_t length) { std::string result(length, '\0'); std::uniform_int_distribution<int> chars('a', 'z'); - std::generate(result.begin(), result.end(), [&]() { - return static_cast<char>(chars(*rng)); - }); + std::generate(result.begin(), result.end(), + [&]() { return static_cast<char>(chars(*rng)); }); return result; } @@ -175,7 +176,7 @@ TEST(Cord, AllFlatSizes) { using absl::strings_internal::CordTestAccess; for (size_t s = 0; s < CordTestAccess::MaxFlatLength(); s++) { - // Make a std::string of length s. + // Make a string of length s. std::string src; while (src.size() < s) { src.push_back('a' + (src.size() % 26)); @@ -395,6 +396,9 @@ TEST(Cord, Swap) { swap(x, y); ASSERT_EQ(x, absl::Cord(b)); ASSERT_EQ(y, absl::Cord(a)); + x.swap(y); + ASSERT_EQ(x, absl::Cord(a)); + ASSERT_EQ(y, absl::Cord(b)); } static void VerifyCopyToString(const absl::Cord& cord) { @@ -410,7 +414,7 @@ static void VerifyCopyToString(const absl::Cord& cord) { if (cord.size() <= kInitialLength) { EXPECT_EQ(has_initial_contents.data(), address_before_copy) - << "CopyCordToString allocated new std::string storage; " + << "CopyCordToString allocated new string storage; " "has_initial_contents = \"" << has_initial_contents << "\""; } @@ -424,6 +428,50 @@ TEST(Cord, CopyToString) { "copying ", "to ", "a ", "string."})); } +TEST(TryFlat, Empty) { + absl::Cord c; + EXPECT_EQ(c.TryFlat(), ""); +} + +TEST(TryFlat, Flat) { + absl::Cord c("hello"); + EXPECT_EQ(c.TryFlat(), "hello"); +} + +TEST(TryFlat, SubstrInlined) { + absl::Cord c("hello"); + c.RemovePrefix(1); + EXPECT_EQ(c.TryFlat(), "ello"); +} + +TEST(TryFlat, SubstrFlat) { + absl::Cord c("longer than 15 bytes"); + c.RemovePrefix(1); + EXPECT_EQ(c.TryFlat(), "onger than 15 bytes"); +} + +TEST(TryFlat, Concat) { + absl::Cord c = absl::MakeFragmentedCord({"hel", "lo"}); + EXPECT_EQ(c.TryFlat(), absl::nullopt); +} + +TEST(TryFlat, External) { + absl::Cord c = absl::MakeCordFromExternal("hell", [](absl::string_view) {}); + EXPECT_EQ(c.TryFlat(), "hell"); +} + +TEST(TryFlat, SubstrExternal) { + absl::Cord c = absl::MakeCordFromExternal("hell", [](absl::string_view) {}); + c.RemovePrefix(1); + EXPECT_EQ(c.TryFlat(), "ell"); +} + +TEST(TryFlat, SubstrConcat) { + absl::Cord c = absl::MakeFragmentedCord({"hello", " world"}); + c.RemovePrefix(1); + EXPECT_EQ(c.TryFlat(), absl::nullopt); +} + static bool IsFlat(const absl::Cord& c) { return c.chunk_begin() == c.chunk_end() || ++c.chunk_begin() == c.chunk_end(); } @@ -511,24 +559,24 @@ TEST(Cord, MultipleLengths) { for (size_t i = 0; i < d.size(); i++) { std::string a = d.data(i); - { // Construct from Cord + { // Construct from Cord absl::Cord tmp(a); absl::Cord x(tmp); EXPECT_EQ(a, std::string(x)) << "'" << a << "'"; } - { // Construct from absl::string_view + { // Construct from absl::string_view absl::Cord x(a); EXPECT_EQ(a, std::string(x)) << "'" << a << "'"; } - { // Append cord to self + { // Append cord to self absl::Cord self(a); self.Append(self); EXPECT_EQ(a + a, std::string(self)) << "'" << a << "' + '" << a << "'"; } - { // Prepend cord to self + { // Prepend cord to self absl::Cord self(a); self.Prepend(self); EXPECT_EQ(a + a, std::string(self)) << "'" << a << "' + '" << a << "'"; @@ -538,40 +586,40 @@ TEST(Cord, MultipleLengths) { for (size_t j = 0; j < d.size(); j++) { std::string b = d.data(j); - { // CopyFrom Cord + { // CopyFrom Cord absl::Cord x(a); absl::Cord y(b); x = y; EXPECT_EQ(b, std::string(x)) << "'" << a << "' + '" << b << "'"; } - { // CopyFrom absl::string_view + { // CopyFrom absl::string_view absl::Cord x(a); x = b; EXPECT_EQ(b, std::string(x)) << "'" << a << "' + '" << b << "'"; } - { // Cord::Append(Cord) + { // Cord::Append(Cord) absl::Cord x(a); absl::Cord y(b); x.Append(y); EXPECT_EQ(a + b, std::string(x)) << "'" << a << "' + '" << b << "'"; } - { // Cord::Append(absl::string_view) + { // Cord::Append(absl::string_view) absl::Cord x(a); x.Append(b); EXPECT_EQ(a + b, std::string(x)) << "'" << a << "' + '" << b << "'"; } - { // Cord::Prepend(Cord) + { // Cord::Prepend(Cord) absl::Cord x(a); absl::Cord y(b); x.Prepend(y); EXPECT_EQ(b + a, std::string(x)) << "'" << b << "' + '" << a << "'"; } - { // Cord::Prepend(absl::string_view) + { // Cord::Prepend(absl::string_view) absl::Cord x(a); x.Prepend(b); EXPECT_EQ(b + a, std::string(x)) << "'" << b << "' + '" << a << "'"; @@ -813,7 +861,7 @@ TEST(Cord, CompareAfterAssign) { } // Test CompareTo() and ComparePrefix() against string and substring -// comparison methods from std::basic_string. +// comparison methods from basic_string. static void TestCompare(const absl::Cord& c, const absl::Cord& d, RandomEngine* rng) { typedef std::basic_string<uint8_t> ustring; @@ -869,7 +917,7 @@ void CompareOperators() { EXPECT_TRUE(a == a); // For pointer type (i.e. `const char*`), operator== compares the address - // instead of the std::string, so `a == const char*("a")` isn't necessarily true. + // instead of the string, so `a == const char*("a")` isn't necessarily true. EXPECT_TRUE(std::is_pointer<T1>::value || a == T1("a")); EXPECT_TRUE(std::is_pointer<T2>::value || a == T2("a")); EXPECT_FALSE(a == b); @@ -1032,6 +1080,19 @@ TEST(ConstructFromExternal, MoveOnlyReleaser) { EXPECT_TRUE(invoked); } +TEST(ConstructFromExternal, NoArgLambda) { + bool invoked = false; + (void)absl::MakeCordFromExternal("dummy", [&invoked]() { invoked = true; }); + EXPECT_TRUE(invoked); +} + +TEST(ConstructFromExternal, StringViewArgLambda) { + bool invoked = false; + (void)absl::MakeCordFromExternal( + "dummy", [&invoked](absl::string_view) { invoked = true; }); + EXPECT_TRUE(invoked); +} + TEST(ConstructFromExternal, NonTrivialReleaserDestructor) { struct Releaser { explicit Releaser(bool* destroyed) : destroyed(destroyed) {} @@ -1076,7 +1137,7 @@ TEST(ConstructFromExternal, ReferenceQualifierOverloads) { } TEST(ExternalMemory, BasicUsage) { - static const char* strings[] = { "", "hello", "there" }; + static const char* strings[] = {"", "hello", "there"}; for (const char* str : strings) { absl::Cord dst("(prefix)"); AddExternalMemory(str, &dst); @@ -1524,3 +1585,31 @@ TEST(Cord, SmallBufferAssignFromOwnData) { } } } + +TEST(Cord, Format) { + absl::Cord c; + absl::Format(&c, "There were %04d little %s.", 3, "pigs"); + EXPECT_EQ(c, "There were 0003 little pigs."); + absl::Format(&c, "And %-3llx bad wolf!", 1); + EXPECT_EQ(c, "There were 0003 little pigs.And 1 bad wolf!"); +} + +TEST(CordDeathTest, Hardening) { + absl::Cord cord("hello"); + // These statement should abort the program in all builds modes. + EXPECT_DEATH_IF_SUPPORTED(cord.RemovePrefix(6), ""); + EXPECT_DEATH_IF_SUPPORTED(cord.RemoveSuffix(6), ""); + + bool test_hardening = false; + ABSL_HARDENING_ASSERT([&]() { + // This only runs when ABSL_HARDENING_ASSERT is active. + test_hardening = true; + return true; + }()); + if (!test_hardening) return; + + EXPECT_DEATH_IF_SUPPORTED(cord[5], ""); + EXPECT_DEATH_IF_SUPPORTED(*cord.chunk_end(), ""); + EXPECT_DEATH_IF_SUPPORTED(static_cast<void>(cord.chunk_end()->empty()), ""); + EXPECT_DEATH_IF_SUPPORTED(++cord.chunk_end(), ""); +} diff --git a/absl/strings/escaping.cc b/absl/strings/escaping.cc index 7adc1b65..9fceeef0 100644 --- a/absl/strings/escaping.cc +++ b/absl/strings/escaping.cc @@ -450,7 +450,7 @@ bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, // The GET_INPUT macro gets the next input character, skipping // over any whitespace, and stopping when we reach the end of the - // std::string or when we read any non-data character. The arguments are + // string or when we read any non-data character. The arguments are // an arbitrary identifier (used as a label for goto) and the number // of data bytes that must remain in the input to avoid aborting the // loop. @@ -473,18 +473,18 @@ bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, if (dest) { // This loop consumes 4 input bytes and produces 3 output bytes // per iteration. We can't know at the start that there is enough - // data left in the std::string for a full iteration, so the loop may + // data left in the string for a full iteration, so the loop may // break out in the middle; if so 'state' will be set to the // number of input bytes read. while (szsrc >= 4) { // We'll start by optimistically assuming that the next four - // bytes of the std::string (src[0..3]) are four good data bytes + // bytes of the string (src[0..3]) are four good data bytes // (that is, no nulls, whitespace, padding chars, or illegal // chars). We need to test src[0..2] for nulls individually // before constructing temp to preserve the property that we - // never read past a null in the std::string (no matter how long - // szsrc claims the std::string is). + // never read past a null in the string (no matter how long + // szsrc claims the string is). if (!src[0] || !src[1] || !src[2] || ((temp = ((unsigned(unbase64[src[0]]) << 18) | @@ -509,7 +509,7 @@ bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, temp = (temp << 6) | decode; } else { // We really did have four good data bytes, so advance four - // characters in the std::string. + // characters in the string. szsrc -= 4; src += 4; @@ -644,7 +644,7 @@ bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, state); } - // The remainder of the std::string should be all whitespace, mixed with + // The remainder of the string should be all whitespace, mixed with // exactly 0 equals signs, or exactly 'expected_equals' equals // signs. (Always accepting 0 equals signs is an Abseil extension // not covered in the RFC, as is accepting dot as the pad character.) @@ -771,7 +771,7 @@ constexpr char kWebSafeBase64Chars[] = template <typename String> bool Base64UnescapeInternal(const char* src, size_t slen, String* dest, const signed char* unbase64) { - // Determine the size of the output std::string. Base64 encodes every 3 bytes into + // Determine the size of the output string. Base64 encodes every 3 bytes into // 4 characters. any leftover chars are added directly for good measure. // This is documented in the base64 RFC: http://tools.ietf.org/html/rfc3548 const size_t dest_len = 3 * (slen / 4) + (slen % 4); @@ -779,7 +779,7 @@ bool Base64UnescapeInternal(const char* src, size_t slen, String* dest, strings_internal::STLStringResizeUninitialized(dest, dest_len); // We are getting the destination buffer by getting the beginning of the - // std::string and converting it into a char *. + // string and converting it into a char *. size_t len; const bool ok = Base64UnescapeInternal(src, slen, &(*dest)[0], dest_len, unbase64, &len); diff --git a/absl/strings/escaping_test.cc b/absl/strings/escaping_test.cc index 1967975b..45671a0e 100644 --- a/absl/strings/escaping_test.cc +++ b/absl/strings/escaping_test.cc @@ -300,7 +300,7 @@ static struct { absl::string_view plaintext; absl::string_view cyphertext; } const base64_tests[] = { - // Empty std::string. + // Empty string. {{"", 0}, {"", 0}}, {{nullptr, 0}, {"", 0}}, // if length is zero, plaintext ptr must be ignored! @@ -586,7 +586,7 @@ void TestEscapeAndUnescape() { EXPECT_EQ(encoded, websafe); EXPECT_EQ(absl::WebSafeBase64Escape(tc.plaintext), websafe); - // Let's try the std::string version of the decoder + // Let's try the string version of the decoder decoded = "this junk should be ignored"; EXPECT_TRUE(absl::WebSafeBase64Unescape(websafe, &decoded)); EXPECT_EQ(decoded, tc.plaintext); @@ -625,7 +625,7 @@ TEST(Base64, DISABLED_HugeData) { std::string escaped; absl::Base64Escape(huge, &escaped); - // Generates the std::string that should match a base64 encoded "xxx..." std::string. + // Generates the string that should match a base64 encoded "xxx..." string. // "xxx" in base64 is "eHh4". std::string expected_encoding; expected_encoding.reserve(kSize / 3 * 4); diff --git a/absl/strings/internal/char_map.h b/absl/strings/internal/char_map.h index a76e6036..61484de0 100644 --- a/absl/strings/internal/char_map.h +++ b/absl/strings/internal/char_map.h @@ -72,7 +72,7 @@ class Charmap { CharMaskForWord(x, 2), CharMaskForWord(x, 3)); } - // Containing all the chars in the C-std::string 's'. + // Containing all the chars in the C-string 's'. // Note that this is expensively recursive because of the C++11 constexpr // formulation. Use only in constexpr initializers. static constexpr Charmap FromString(const char* s) { diff --git a/absl/strings/internal/charconv_bigint.cc b/absl/strings/internal/charconv_bigint.cc index 66f33e72..ebf8c079 100644 --- a/absl/strings/internal/charconv_bigint.cc +++ b/absl/strings/internal/charconv_bigint.cc @@ -208,7 +208,7 @@ int BigUnsigned<max_words>::ReadDigits(const char* begin, const char* end, ++dropped_digits; } if (begin < end && *std::prev(end) == '.') { - // If the std::string ends in '.', either before or after dropping zeroes, then + // If the string ends in '.', either before or after dropping zeroes, then // drop the decimal point and look for more digits to drop. dropped_digits = 0; --end; diff --git a/absl/strings/internal/charconv_bigint.h b/absl/strings/internal/charconv_bigint.h index 999e9ae3..8f702976 100644 --- a/absl/strings/internal/charconv_bigint.h +++ b/absl/strings/internal/charconv_bigint.h @@ -66,7 +66,7 @@ class BigUnsigned { static_cast<uint32_t>(v >> 32)} {} // Constructs a BigUnsigned from the given string_view containing a decimal - // value. If the input std::string is not a decimal integer, constructs a 0 + // value. If the input string is not a decimal integer, constructs a 0 // instead. explicit BigUnsigned(absl::string_view sv) : size_(0), words_{} { // Check for valid input, returning a 0 otherwise. This is reasonable @@ -210,7 +210,7 @@ class BigUnsigned { return words_[index]; } - // Returns this integer as a decimal std::string. This is not used in the decimal- + // Returns this integer as a decimal string. This is not used in the decimal- // to-binary conversion; it is intended to aid in testing. std::string ToString() const; diff --git a/absl/strings/internal/charconv_parse.cc b/absl/strings/internal/charconv_parse.cc index d9a57a78..fd6d9480 100644 --- a/absl/strings/internal/charconv_parse.cc +++ b/absl/strings/internal/charconv_parse.cc @@ -302,7 +302,7 @@ bool ParseInfinityOrNan(const char* begin, const char* end, switch (*begin) { case 'i': case 'I': { - // An infinity std::string consists of the characters "inf" or "infinity", + // An infinity string consists of the characters "inf" or "infinity", // case insensitive. if (strings_internal::memcasecmp(begin + 1, "nf", 2) != 0) { return false; @@ -326,7 +326,7 @@ bool ParseInfinityOrNan(const char* begin, const char* end, } out->type = strings_internal::FloatType::kNan; out->end = begin + 3; - // NaN is allowed to be followed by a parenthesized std::string, consisting of + // NaN is allowed to be followed by a parenthesized string, consisting of // only the characters [a-zA-Z0-9_]. Match that if it's present. begin += 3; if (begin < end && *begin == '(') { diff --git a/absl/strings/internal/charconv_parse_test.cc b/absl/strings/internal/charconv_parse_test.cc index 9511c987..bc2d1118 100644 --- a/absl/strings/internal/charconv_parse_test.cc +++ b/absl/strings/internal/charconv_parse_test.cc @@ -63,7 +63,7 @@ void ExpectParsedFloat(std::string s, absl::chars_format format_flags, } const std::string::size_type expected_characters_matched = s.find('$'); ABSL_RAW_CHECK(expected_characters_matched != std::string::npos, - "Input std::string must contain $"); + "Input string must contain $"); s.replace(expected_characters_matched, 1, ""); ParsedFloat parsed = diff --git a/absl/strings/internal/cord_internal.h b/absl/strings/internal/cord_internal.h index 5b5d1083..d456eef8 100644 --- a/absl/strings/internal/cord_internal.h +++ b/absl/strings/internal/cord_internal.h @@ -21,6 +21,8 @@ #include <cstdint> #include <type_traits> +#include "absl/base/internal/invoke.h" +#include "absl/container/internal/compressed_tuple.h" #include "absl/meta/type_traits.h" #include "absl/strings/string_view.h" @@ -86,8 +88,7 @@ struct CordRepExternal; struct CordRep { // The following three fields have to be less than 32 bytes since // that is the smallest supported flat node size. - // We use uint64_t for the length even in 32-bit binaries. - uint64_t length; + size_t length; Refcount refcount; // If tag < FLAT, it represents CordRepKind and indicates the type of node. // Otherwise, the node type is CordRepFlat and the tag is the encoded size. @@ -115,35 +116,56 @@ struct CordRepSubstring : public CordRep { CordRep* child; }; -// TODO(strel): replace the following logic (and related functions in cord.cc) -// with container_internal::Layout. - -// Alignment requirement for CordRepExternal so that the type erased releaser -// will be stored at a suitably aligned address. -constexpr size_t ExternalRepAlignment() { -#if defined(__STDCPP_DEFAULT_NEW_ALIGNMENT__) - return __STDCPP_DEFAULT_NEW_ALIGNMENT__; -#else - return alignof(max_align_t); -#endif -} - -// Type for function pointer that will invoke and destroy the type-erased -// releaser function object. Accepts a pointer to the releaser and the -// `string_view` that were passed in to `NewExternalRep` below. The return value -// is the size of the `Releaser` type. -using ExternalReleaserInvoker = size_t (*)(void*, absl::string_view); +// Type for function pointer that will invoke the releaser function and also +// delete the `CordRepExternalImpl` corresponding to the passed in +// `CordRepExternal`. +using ExternalReleaserInvoker = void (*)(CordRepExternal*); // External CordReps are allocated together with a type erased releaser. The // releaser is stored in the memory directly following the CordRepExternal. -struct alignas(ExternalRepAlignment()) CordRepExternal : public CordRep { +struct CordRepExternal : public CordRep { const char* base; // Pointer to function that knows how to call and destroy the releaser. ExternalReleaserInvoker releaser_invoker; }; -// TODO(strel): look into removing, it doesn't seem like anything relies on this -static_assert(sizeof(CordRepConcat) == sizeof(CordRepSubstring), ""); +struct Rank1 {}; +struct Rank0 : Rank1 {}; + +template <typename Releaser, typename = ::absl::base_internal::invoke_result_t< + Releaser, absl::string_view>> +void InvokeReleaser(Rank0, Releaser&& releaser, absl::string_view data) { + ::absl::base_internal::invoke(std::forward<Releaser>(releaser), data); +} + +template <typename Releaser, + typename = ::absl::base_internal::invoke_result_t<Releaser>> +void InvokeReleaser(Rank1, Releaser&& releaser, absl::string_view) { + ::absl::base_internal::invoke(std::forward<Releaser>(releaser)); +} + +// We use CompressedTuple so that we can benefit from EBCO. +template <typename Releaser> +struct CordRepExternalImpl + : public CordRepExternal, + public ::absl::container_internal::CompressedTuple<Releaser> { + // The extra int arg is so that we can avoid interfering with copy/move + // constructors while still benefitting from perfect forwarding. + template <typename T> + CordRepExternalImpl(T&& releaser, int) + : CordRepExternalImpl::CompressedTuple(std::forward<T>(releaser)) { + this->releaser_invoker = &Release; + } + + ~CordRepExternalImpl() { + InvokeReleaser(Rank0{}, std::move(this->template get<0>()), + absl::string_view(base, length)); + } + + static void Release(CordRepExternal* rep) { + delete static_cast<CordRepExternalImpl*>(rep); + } +}; } // namespace cord_internal ABSL_NAMESPACE_END diff --git a/absl/strings/internal/numbers_test_common.h b/absl/strings/internal/numbers_test_common.h index 1a1e50c4..eaa88a88 100644 --- a/absl/strings/internal/numbers_test_common.h +++ b/absl/strings/internal/numbers_test_common.h @@ -170,7 +170,7 @@ inline const std::array<uint64_test_case, 34>& strtouint64_test_cases() { {"0x1234", true, 16, 0x1234}, - // Base-10 std::string version. + // Base-10 string version. {"1234", true, 0, 1234}, {nullptr, false, 0, 0}, }}; diff --git a/absl/strings/internal/str_format/arg.cc b/absl/strings/internal/str_format/arg.cc index 4d0604e0..9feb2248 100644 --- a/absl/strings/internal/str_format/arg.cc +++ b/absl/strings/internal/str_format/arg.cc @@ -12,14 +12,13 @@ #include "absl/base/port.h" #include "absl/strings/internal/str_format/float_conversion.h" +#include "absl/strings/numbers.h" namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { namespace { -const char kDigit[2][32] = { "0123456789abcdef", "0123456789ABCDEF" }; - // Reduce *capacity by s.size(), clipped to a 0 minimum. void ReducePadding(string_view s, size_t *capacity) { *capacity = Excess(s.size(), *capacity); @@ -48,125 +47,179 @@ struct IsSigned<absl::int128> : std::true_type {}; template <> struct IsSigned<absl::uint128> : std::false_type {}; -class ConvertedIntInfo { +// Integral digit printer. +// Call one of the PrintAs* routines after construction once. +// Use with_neg_and_zero/without_neg_or_zero/is_negative to access the results. +class IntDigits { public: + // Print the unsigned integer as octal. + // Supports unsigned integral types and uint128. + template <typename T> + void PrintAsOct(T v) { + static_assert(!IsSigned<T>::value, ""); + char *p = storage_ + sizeof(storage_); + do { + *--p = static_cast<char>('0' + (static_cast<size_t>(v) & 7)); + v >>= 3; + } while (v); + start_ = p; + size_ = storage_ + sizeof(storage_) - p; + } + + // Print the signed or unsigned integer as decimal. + // Supports all integral types. template <typename T> - ConvertedIntInfo(T v, ConversionChar conv) { - using Unsigned = typename MakeUnsigned<T>::type; - auto u = static_cast<Unsigned>(v); - if (IsNeg(v)) { - is_neg_ = true; - u = Unsigned{} - u; - } else { - is_neg_ = false; + void PrintAsDec(T v) { + static_assert(std::is_integral<T>::value, ""); + start_ = storage_; + size_ = numbers_internal::FastIntToBuffer(v, storage_) - storage_; + } + + void PrintAsDec(int128 v) { + auto u = static_cast<uint128>(v); + bool add_neg = false; + if (v < 0) { + add_neg = true; + u = uint128{} - u; } - UnsignedToStringRight(u, conv); + PrintAsDec(u, add_neg); } - string_view digits() const { - return {end() - size_, static_cast<size_t>(size_)}; + void PrintAsDec(uint128 v, bool add_neg = false) { + // This function can be sped up if needed. We can call FastIntToBuffer + // twice, or fix FastIntToBuffer to support uint128. + char *p = storage_ + sizeof(storage_); + do { + p -= 2; + numbers_internal::PutTwoDigits(static_cast<size_t>(v % 100), p); + v /= 100; + } while (v); + if (p[0] == '0') { + // We printed one too many hexits. + ++p; + } + if (add_neg) { + *--p = '-'; + } + size_ = storage_ + sizeof(storage_) - p; + start_ = p; } - bool is_neg() const { return is_neg_; } - private: - template <typename T, bool IsSigned> - struct IsNegImpl { - static bool Eval(T v) { return v < 0; } - }; + // Print the unsigned integer as hex using lowercase. + // Supports unsigned integral types and uint128. template <typename T> - struct IsNegImpl<T, false> { - static bool Eval(T) { - return false; + void PrintAsHexLower(T v) { + static_assert(!IsSigned<T>::value, ""); + char *p = storage_ + sizeof(storage_); + + do { + p -= 2; + constexpr const char* table = numbers_internal::kHexTable; + std::memcpy(p, table + 2 * (static_cast<size_t>(v) & 0xFF), 2); + if (sizeof(T) == 1) break; + v >>= 8; + } while (v); + if (p[0] == '0') { + // We printed one too many digits. + ++p; } - }; + start_ = p; + size_ = storage_ + sizeof(storage_) - p; + } + // Print the unsigned integer as hex using uppercase. + // Supports unsigned integral types and uint128. template <typename T> - bool IsNeg(T v) { - return IsNegImpl<T, IsSigned<T>::value>::Eval(v); + void PrintAsHexUpper(T v) { + static_assert(!IsSigned<T>::value, ""); + char *p = storage_ + sizeof(storage_); + + // kHexTable is only lowercase, so do it manually for uppercase. + do { + *--p = "0123456789ABCDEF"[static_cast<size_t>(v) & 15]; + v >>= 4; + } while (v); + start_ = p; + size_ = storage_ + sizeof(storage_) - p; } - template <typename T> - void UnsignedToStringRight(T u, ConversionChar conv) { - char *p = end(); - switch (FormatConversionCharRadix(conv)) { - default: - case 10: - for (; u; u /= 10) - *--p = static_cast<char>('0' + static_cast<size_t>(u % 10)); - break; - case 8: - for (; u; u /= 8) - *--p = static_cast<char>('0' + static_cast<size_t>(u % 8)); - break; - case 16: { - const char *digits = kDigit[FormatConversionCharIsUpper(conv) ? 1 : 0]; - for (; u; u /= 16) *--p = digits[static_cast<size_t>(u % 16)]; - break; - } - } - size_ = static_cast<int>(end() - p); + // The printed value including the '-' sign if available. + // For inputs of value `0`, this will return "0" + string_view with_neg_and_zero() const { return {start_, size_}; } + + // The printed value not including the '-' sign. + // For inputs of value `0`, this will return "". + string_view without_neg_or_zero() const { + static_assert('-' < '0', "The check below verifies both."); + size_t advance = start_[0] <= '0' ? 1 : 0; + return {start_ + advance, size_ - advance}; } - const char *end() const { return storage_ + sizeof(storage_); } - char *end() { return storage_ + sizeof(storage_); } + bool is_negative() const { return start_[0] == '-'; } - bool is_neg_; - int size_; - // Max size: 128 bit value as octal -> 43 digits - char storage_[128 / 3 + 1]; + private: + const char *start_; + size_t size_; + // Max size: 128 bit value as octal -> 43 digits, plus sign char + char storage_[128 / 3 + 1 + 1]; }; // Note: 'o' conversions do not have a base indicator, it's just that // the '#' flag is specified to modify the precision for 'o' conversions. -string_view BaseIndicator(const ConvertedIntInfo &info, - const ConversionSpec conv) { - bool alt = conv.flags().alt; - int radix = FormatConversionCharRadix(conv.conv()); - if (conv.conv() == ConversionChar::p) alt = true; // always show 0x for %p. +string_view BaseIndicator(const IntDigits &as_digits, + const FormatConversionSpecImpl conv) { + // always show 0x for %p. + bool alt = conv.has_alt_flag() || + conv.conversion_char() == FormatConversionCharInternal::p; + bool hex = (conv.conversion_char() == FormatConversionCharInternal::x || + conv.conversion_char() == FormatConversionCharInternal::X || + conv.conversion_char() == FormatConversionCharInternal::p); // From the POSIX description of '#' flag: // "For x or X conversion specifiers, a non-zero result shall have // 0x (or 0X) prefixed to it." - if (alt && radix == 16 && !info.digits().empty()) { - if (FormatConversionCharIsUpper(conv.conv())) return "0X"; - return "0x"; + if (alt && hex && !as_digits.without_neg_or_zero().empty()) { + return conv.conversion_char() == FormatConversionCharInternal::X ? "0X" + : "0x"; } return {}; } -string_view SignColumn(bool neg, const ConversionSpec conv) { - if (FormatConversionCharIsSigned(conv.conv())) { +string_view SignColumn(bool neg, const FormatConversionSpecImpl conv) { + if (conv.conversion_char() == FormatConversionCharInternal::d || + conv.conversion_char() == FormatConversionCharInternal::i) { if (neg) return "-"; - if (conv.flags().show_pos) return "+"; - if (conv.flags().sign_col) return " "; + if (conv.has_show_pos_flag()) return "+"; + if (conv.has_sign_col_flag()) return " "; } return {}; } -bool ConvertCharImpl(unsigned char v, const ConversionSpec conv, +bool ConvertCharImpl(unsigned char v, const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { size_t fill = 0; if (conv.width() >= 0) fill = conv.width(); ReducePadding(1, &fill); - if (!conv.flags().left) sink->Append(fill, ' '); + if (!conv.has_left_flag()) sink->Append(fill, ' '); sink->Append(1, v); - if (conv.flags().left) sink->Append(fill, ' '); + if (conv.has_left_flag()) sink->Append(fill, ' '); return true; } -bool ConvertIntImplInner(const ConvertedIntInfo &info, - const ConversionSpec conv, FormatSinkImpl *sink) { +bool ConvertIntImplInnerSlow(const IntDigits &as_digits, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { // Print as a sequence of Substrings: // [left_spaces][sign][base_indicator][zeroes][formatted][right_spaces] size_t fill = 0; if (conv.width() >= 0) fill = conv.width(); - string_view formatted = info.digits(); + string_view formatted = as_digits.without_neg_or_zero(); ReducePadding(formatted, &fill); - string_view sign = SignColumn(info.is_neg(), conv); + string_view sign = SignColumn(as_digits.is_negative(), conv); ReducePadding(sign, &fill); - string_view base_indicator = BaseIndicator(info, conv); + string_view base_indicator = BaseIndicator(as_digits, conv); ReducePadding(base_indicator, &fill); int precision = conv.precision(); @@ -174,7 +227,8 @@ bool ConvertIntImplInner(const ConvertedIntInfo &info, if (!precision_specified) precision = 1; - if (conv.flags().alt && conv.conv() == ConversionChar::o) { + if (conv.has_alt_flag() && + conv.conversion_char() == FormatConversionCharInternal::o) { // From POSIX description of the '#' (alt) flag: // "For o conversion, it increases the precision (if necessary) to // force the first digit of the result to be zero." @@ -187,13 +241,13 @@ bool ConvertIntImplInner(const ConvertedIntInfo &info, size_t num_zeroes = Excess(formatted.size(), precision); ReducePadding(num_zeroes, &fill); - size_t num_left_spaces = !conv.flags().left ? fill : 0; - size_t num_right_spaces = conv.flags().left ? fill : 0; + size_t num_left_spaces = !conv.has_left_flag() ? fill : 0; + size_t num_right_spaces = conv.has_left_flag() ? fill : 0; // From POSIX description of the '0' (zero) flag: // "For d, i, o, u, x, and X conversion specifiers, if a precision // is specified, the '0' flag is ignored." - if (!precision_specified && conv.flags().zero) { + if (!precision_specified && conv.has_zero_flag()) { num_zeroes += num_left_spaces; num_left_spaces = 0; } @@ -208,71 +262,97 @@ bool ConvertIntImplInner(const ConvertedIntInfo &info, } template <typename T> -bool ConvertIntImplInner(T v, const ConversionSpec conv, FormatSinkImpl *sink) { - ConvertedIntInfo info(v, conv.conv()); - if (conv.flags().basic && (conv.conv() != ConversionChar::p)) { - if (info.is_neg()) sink->Append(1, '-'); - if (info.digits().empty()) { - sink->Append(1, '0'); - } else { - sink->Append(info.digits()); - } - return true; +bool ConvertIntArg(T v, const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + using U = typename MakeUnsigned<T>::type; + IntDigits as_digits; + + // This odd casting is due to a bug in -Wswitch behavior in gcc49 which causes + // it to complain about a switch/case type mismatch, even though both are + // FormatConverionChar. Likely this is because at this point + // FormatConversionChar is declared, but not defined. + switch (static_cast<uint8_t>(conv.conversion_char())) { + case static_cast<uint8_t>(FormatConversionCharInternal::c): + return ConvertCharImpl(static_cast<unsigned char>(v), conv, sink); + + case static_cast<uint8_t>(FormatConversionCharInternal::o): + as_digits.PrintAsOct(static_cast<U>(v)); + break; + + case static_cast<uint8_t>(FormatConversionCharInternal::x): + as_digits.PrintAsHexLower(static_cast<U>(v)); + break; + case static_cast<uint8_t>(FormatConversionCharInternal::X): + as_digits.PrintAsHexUpper(static_cast<U>(v)); + break; + + case static_cast<uint8_t>(FormatConversionCharInternal::u): + as_digits.PrintAsDec(static_cast<U>(v)); + break; + + case static_cast<uint8_t>(FormatConversionCharInternal::d): + case static_cast<uint8_t>(FormatConversionCharInternal::i): + as_digits.PrintAsDec(v); + break; + + case static_cast<uint8_t>(FormatConversionCharInternal::a): + case static_cast<uint8_t>(FormatConversionCharInternal::e): + case static_cast<uint8_t>(FormatConversionCharInternal::f): + case static_cast<uint8_t>(FormatConversionCharInternal::g): + case static_cast<uint8_t>(FormatConversionCharInternal::A): + case static_cast<uint8_t>(FormatConversionCharInternal::E): + case static_cast<uint8_t>(FormatConversionCharInternal::F): + case static_cast<uint8_t>(FormatConversionCharInternal::G): + return ConvertFloatImpl(static_cast<double>(v), conv, sink); + + default: + ABSL_INTERNAL_ASSUME(false); } - return ConvertIntImplInner(info, conv, sink); -} -template <typename T> -bool ConvertIntArg(T v, const ConversionSpec conv, FormatSinkImpl *sink) { - if (FormatConversionCharIsFloat(conv.conv())) { - return FormatConvertImpl(static_cast<double>(v), conv, sink).value; - } - if (conv.conv() == ConversionChar::c) - return ConvertCharImpl(static_cast<unsigned char>(v), conv, sink); - if (!FormatConversionCharIsIntegral(conv.conv())) return false; - if (!FormatConversionCharIsSigned(conv.conv()) && IsSigned<T>::value) { - using U = typename MakeUnsigned<T>::type; - return FormatConvertImpl(static_cast<U>(v), conv, sink).value; + if (conv.is_basic()) { + sink->Append(as_digits.with_neg_and_zero()); + return true; } - return ConvertIntImplInner(v, conv, sink); + return ConvertIntImplInnerSlow(as_digits, conv, sink); } template <typename T> -bool ConvertFloatArg(T v, const ConversionSpec conv, FormatSinkImpl *sink) { - return FormatConversionCharIsFloat(conv.conv()) && +bool ConvertFloatArg(T v, const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return FormatConversionCharIsFloat(conv.conversion_char()) && ConvertFloatImpl(v, conv, sink); } -inline bool ConvertStringArg(string_view v, const ConversionSpec conv, +inline bool ConvertStringArg(string_view v, const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { - if (conv.conv() != ConversionChar::s) return false; - if (conv.flags().basic) { + if (conv.is_basic()) { sink->Append(v); return true; } return sink->PutPaddedString(v, conv.width(), conv.precision(), - conv.flags().left); + conv.has_left_flag()); } } // namespace // ==================== Strings ==================== -ConvertResult<Conv::s> FormatConvertImpl(const std::string &v, - const ConversionSpec conv, - FormatSinkImpl *sink) { +StringConvertResult FormatConvertImpl(const std::string &v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { return {ConvertStringArg(v, conv, sink)}; } -ConvertResult<Conv::s> FormatConvertImpl(string_view v, - const ConversionSpec conv, - FormatSinkImpl *sink) { +StringConvertResult FormatConvertImpl(string_view v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { return {ConvertStringArg(v, conv, sink)}; } -ConvertResult<Conv::s | Conv::p> FormatConvertImpl(const char *v, - const ConversionSpec conv, - FormatSinkImpl *sink) { - if (conv.conv() == ConversionChar::p) +ArgConvertResult<FormatConversionCharSetUnion( + FormatConversionCharSetInternal::s, FormatConversionCharSetInternal::p)> +FormatConvertImpl(const char *v, const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + if (conv.conversion_char() == FormatConversionCharInternal::p) return {FormatConvertImpl(VoidPtr(v), conv, sink).value}; size_t len; if (v == nullptr) { @@ -287,93 +367,99 @@ ConvertResult<Conv::s | Conv::p> FormatConvertImpl(const char *v, } // ==================== Raw pointers ==================== -ConvertResult<Conv::p> FormatConvertImpl(VoidPtr v, const ConversionSpec conv, - FormatSinkImpl *sink) { - if (conv.conv() != ConversionChar::p) return {false}; +ArgConvertResult<FormatConversionCharSetInternal::p> FormatConvertImpl( + VoidPtr v, const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { if (!v.value) { sink->Append("(nil)"); return {true}; } - return {ConvertIntImplInner(v.value, conv, sink)}; + IntDigits as_digits; + as_digits.PrintAsHexLower(v.value); + return {ConvertIntImplInnerSlow(as_digits, conv, sink)}; } // ==================== Floats ==================== -FloatingConvertResult FormatConvertImpl(float v, const ConversionSpec conv, +FloatingConvertResult FormatConvertImpl(float v, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertFloatArg(v, conv, sink)}; } -FloatingConvertResult FormatConvertImpl(double v, const ConversionSpec conv, +FloatingConvertResult FormatConvertImpl(double v, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertFloatArg(v, conv, sink)}; } FloatingConvertResult FormatConvertImpl(long double v, - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertFloatArg(v, conv, sink)}; } // ==================== Chars ==================== -IntegralConvertResult FormatConvertImpl(char v, const ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(char v, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } IntegralConvertResult FormatConvertImpl(signed char v, - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } IntegralConvertResult FormatConvertImpl(unsigned char v, - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } // ==================== Ints ==================== IntegralConvertResult FormatConvertImpl(short v, // NOLINT - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } IntegralConvertResult FormatConvertImpl(unsigned short v, // NOLINT - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } -IntegralConvertResult FormatConvertImpl(int v, const ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(int v, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } -IntegralConvertResult FormatConvertImpl(unsigned v, const ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(unsigned v, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } IntegralConvertResult FormatConvertImpl(long v, // NOLINT - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } IntegralConvertResult FormatConvertImpl(unsigned long v, // NOLINT - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } IntegralConvertResult FormatConvertImpl(long long v, // NOLINT - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } IntegralConvertResult FormatConvertImpl(unsigned long long v, // NOLINT - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } IntegralConvertResult FormatConvertImpl(absl::int128 v, - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } IntegralConvertResult FormatConvertImpl(absl::uint128 v, - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } diff --git a/absl/strings/internal/str_format/arg.h b/absl/strings/internal/str_format/arg.h index 7a937563..3dbc1526 100644 --- a/absl/strings/internal/str_format/arg.h +++ b/absl/strings/internal/str_format/arg.h @@ -25,16 +25,37 @@ class Cord; class FormatCountCapture; class FormatSink; +template <absl::FormatConversionCharSet C> +struct FormatConvertResult; +class FormatConversionSpec; + namespace str_format_internal { template <typename T, typename = void> struct HasUserDefinedConvert : std::false_type {}; template <typename T> -struct HasUserDefinedConvert< - T, void_t<decltype(AbslFormatConvert( - std::declval<const T&>(), std::declval<ConversionSpec>(), - std::declval<FormatSink*>()))>> : std::true_type {}; +struct HasUserDefinedConvert<T, void_t<decltype(AbslFormatConvert( + std::declval<const T&>(), + std::declval<const FormatConversionSpec&>(), + std::declval<FormatSink*>()))>> + : std::true_type {}; + +void AbslFormatConvert(); // Stops the lexical name lookup +template <typename T> +auto FormatConvertImpl(const T& v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink) + -> decltype(AbslFormatConvert(v, + std::declval<const FormatConversionSpec&>(), + std::declval<FormatSink*>())) { + using FormatConversionSpecT = + absl::enable_if_t<sizeof(const T& (*)()) != 0, FormatConversionSpec>; + using FormatSinkT = + absl::enable_if_t<sizeof(const T& (*)()) != 0, FormatSink>; + auto fcs = conv.Wrap<FormatConversionSpecT>(); + auto fs = sink->Wrap<FormatSinkT>(); + return AbslFormatConvert(v, fcs, &fs); +} template <typename T> class StreamedWrapper; @@ -43,6 +64,13 @@ class StreamedWrapper; // then convert it, appending to `sink` and return `true`. // Otherwise fail and return `false`. +// AbslFormatConvert(v, conv, sink) is intended to be found by ADL on 'v' +// as an extension mechanism. These FormatConvertImpl functions are the default +// implementations. +// The ADL search is augmented via the 'Sink*' parameter, which also +// serves as a disambiguator to reject possible unintended 'AbslFormatConvert' +// functions in the namespaces associated with 'v'. + // Raw pointers. struct VoidPtr { VoidPtr() = default; @@ -52,27 +80,45 @@ struct VoidPtr { : value(ptr ? reinterpret_cast<uintptr_t>(ptr) : 0) {} uintptr_t value; }; -ConvertResult<Conv::p> FormatConvertImpl(VoidPtr v, ConversionSpec conv, - FormatSinkImpl* sink); + +template <FormatConversionCharSet C> +struct ArgConvertResult { + bool value; +}; + +template <FormatConversionCharSet C> +constexpr FormatConversionCharSet ExtractCharSet(FormatConvertResult<C>) { + return C; +} + +template <FormatConversionCharSet C> +constexpr FormatConversionCharSet ExtractCharSet(ArgConvertResult<C>) { + return C; +} + +using StringConvertResult = + ArgConvertResult<FormatConversionCharSetInternal::s>; +ArgConvertResult<FormatConversionCharSetInternal::p> FormatConvertImpl( + VoidPtr v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); // Strings. -ConvertResult<Conv::s> FormatConvertImpl(const std::string& v, - ConversionSpec conv, - FormatSinkImpl* sink); -ConvertResult<Conv::s> FormatConvertImpl(string_view v, ConversionSpec conv, - FormatSinkImpl* sink); -ConvertResult<Conv::s | Conv::p> FormatConvertImpl(const char* v, - ConversionSpec conv, - FormatSinkImpl* sink); -template <class AbslCord, - typename std::enable_if< - std::is_same<AbslCord, absl::Cord>::value>::type* = nullptr> -ConvertResult<Conv::s> FormatConvertImpl(const AbslCord& value, - ConversionSpec conv, - FormatSinkImpl* sink) { - if (conv.conv() != ConversionChar::s) return {false}; - - bool is_left = conv.flags().left; +StringConvertResult FormatConvertImpl(const std::string& v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +StringConvertResult FormatConvertImpl(string_view v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +ArgConvertResult<FormatConversionCharSetUnion( + FormatConversionCharSetInternal::s, FormatConversionCharSetInternal::p)> +FormatConvertImpl(const char* v, const FormatConversionSpecImpl conv, + FormatSinkImpl* sink); + +template <class AbslCord, typename std::enable_if<std::is_same< + AbslCord, absl::Cord>::value>::type* = nullptr> +StringConvertResult FormatConvertImpl(const AbslCord& value, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { + bool is_left = conv.has_left_flag(); size_t space_remaining = 0; int width = conv.width(); @@ -105,55 +151,63 @@ ConvertResult<Conv::s> FormatConvertImpl(const AbslCord& value, return {true}; } -using IntegralConvertResult = - ConvertResult<Conv::c | Conv::numeric | Conv::star>; -using FloatingConvertResult = ConvertResult<Conv::floating>; +using IntegralConvertResult = ArgConvertResult<FormatConversionCharSetUnion( + FormatConversionCharSetInternal::c, + FormatConversionCharSetInternal::kNumeric, + FormatConversionCharSetInternal::kStar)>; +using FloatingConvertResult = + ArgConvertResult<FormatConversionCharSetInternal::kFloating>; // Floats. -FloatingConvertResult FormatConvertImpl(float v, ConversionSpec conv, +FloatingConvertResult FormatConvertImpl(float v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); -FloatingConvertResult FormatConvertImpl(double v, ConversionSpec conv, +FloatingConvertResult FormatConvertImpl(double v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); -FloatingConvertResult FormatConvertImpl(long double v, ConversionSpec conv, +FloatingConvertResult FormatConvertImpl(long double v, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); // Chars. -IntegralConvertResult FormatConvertImpl(char v, ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(char v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); -IntegralConvertResult FormatConvertImpl(signed char v, ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(signed char v, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); -IntegralConvertResult FormatConvertImpl(unsigned char v, ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(unsigned char v, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); // Ints. IntegralConvertResult FormatConvertImpl(short v, // NOLINT - ConversionSpec conv, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); IntegralConvertResult FormatConvertImpl(unsigned short v, // NOLINT - ConversionSpec conv, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); -IntegralConvertResult FormatConvertImpl(int v, ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(int v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); -IntegralConvertResult FormatConvertImpl(unsigned v, ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(unsigned v, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); IntegralConvertResult FormatConvertImpl(long v, // NOLINT - ConversionSpec conv, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); IntegralConvertResult FormatConvertImpl(unsigned long v, // NOLINT - ConversionSpec conv, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); IntegralConvertResult FormatConvertImpl(long long v, // NOLINT - ConversionSpec conv, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); IntegralConvertResult FormatConvertImpl(unsigned long long v, // NOLINT - ConversionSpec conv, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); -IntegralConvertResult FormatConvertImpl(int128 v, ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(int128 v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); -IntegralConvertResult FormatConvertImpl(uint128 v, ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(uint128 v, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); template <typename T, enable_if_t<std::is_same<T, bool>::value, int> = 0> -IntegralConvertResult FormatConvertImpl(T v, ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(T v, FormatConversionSpecImpl conv, FormatSinkImpl* sink) { return FormatConvertImpl(static_cast<int>(v), conv, sink); } @@ -164,12 +218,12 @@ template <typename T> typename std::enable_if<std::is_enum<T>::value && !HasUserDefinedConvert<T>::value, IntegralConvertResult>::type -FormatConvertImpl(T v, ConversionSpec conv, FormatSinkImpl* sink); +FormatConvertImpl(T v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); template <typename T> -ConvertResult<Conv::s> FormatConvertImpl(const StreamedWrapper<T>& v, - ConversionSpec conv, - FormatSinkImpl* out) { +StringConvertResult FormatConvertImpl(const StreamedWrapper<T>& v, + FormatConversionSpecImpl conv, + FormatSinkImpl* out) { std::ostringstream oss; oss << v.v_; if (!oss) return {false}; @@ -180,21 +234,24 @@ ConvertResult<Conv::s> FormatConvertImpl(const StreamedWrapper<T>& v, // until after FormatCountCapture is fully defined. struct FormatCountCaptureHelper { template <class T = int> - static ConvertResult<Conv::n> ConvertHelper(const FormatCountCapture& v, - ConversionSpec conv, - FormatSinkImpl* sink) { + static ArgConvertResult<FormatConversionCharSetInternal::n> ConvertHelper( + const FormatCountCapture& v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { const absl::enable_if_t<sizeof(T) != 0, FormatCountCapture>& v2 = v; - if (conv.conv() != str_format_internal::ConversionChar::n) return {false}; + if (conv.conversion_char() != + str_format_internal::FormatConversionCharInternal::n) { + return {false}; + } *v2.p_ = static_cast<int>(sink->size()); return {true}; } }; template <class T = int> -ConvertResult<Conv::n> FormatConvertImpl(const FormatCountCapture& v, - ConversionSpec conv, - FormatSinkImpl* sink) { +ArgConvertResult<FormatConversionCharSetInternal::n> FormatConvertImpl( + const FormatCountCapture& v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { return FormatCountCaptureHelper::ConvertHelper(v, conv, sink); } @@ -203,13 +260,13 @@ ConvertResult<Conv::n> FormatConvertImpl(const FormatCountCapture& v, struct FormatArgImplFriend { template <typename Arg> static bool ToInt(Arg arg, int* out) { - // A value initialized ConversionSpec has a `none` conv, which tells the - // dispatcher to run the `int` conversion. + // A value initialized FormatConversionSpecImpl has a `none` conv, which + // tells the dispatcher to run the `int` conversion. return arg.dispatcher_(arg.data_, {}, out); } template <typename Arg> - static bool Convert(Arg arg, str_format_internal::ConversionSpec conv, + static bool Convert(Arg arg, FormatConversionSpecImpl conv, FormatSinkImpl* out) { return arg.dispatcher_(arg.data_, conv, out); } @@ -220,6 +277,15 @@ struct FormatArgImplFriend { } }; +template <typename Arg> +constexpr FormatConversionCharSet ArgumentToConv() { + return absl::str_format_internal::ExtractCharSet( + decltype(str_format_internal::FormatConvertImpl( + std::declval<const Arg&>(), + std::declval<const FormatConversionSpecImpl&>(), + std::declval<FormatSinkImpl*>())){}); +} + // A type-erased handle to a format argument. class FormatArgImpl { private: @@ -233,7 +299,7 @@ class FormatArgImpl { char buf[kInlinedSpace]; }; - using Dispatcher = bool (*)(Data, ConversionSpec, void* out); + using Dispatcher = bool (*)(Data, FormatConversionSpecImpl, void* out); template <typename T> struct store_by_value @@ -375,15 +441,20 @@ class FormatArgImpl { } template <typename T> - static bool Dispatch(Data arg, ConversionSpec spec, void* out) { + static bool Dispatch(Data arg, FormatConversionSpecImpl spec, void* out) { // A `none` conv indicates that we want the `int` conversion. - if (ABSL_PREDICT_FALSE(spec.conv() == ConversionChar::none)) { + if (ABSL_PREDICT_FALSE(spec.conversion_char() == + FormatConversionCharInternal::kNone)) { return ToInt<T>(arg, static_cast<int*>(out), std::is_integral<T>(), std::is_enum<T>()); } - + if (ABSL_PREDICT_FALSE(!Contains(ArgumentToConv<T>(), + spec.conversion_char()))) { + return false; + } return str_format_internal::FormatConvertImpl( - Manager<T>::Value(arg), spec, static_cast<FormatSinkImpl*>(out)) + Manager<T>::Value(arg), spec, + static_cast<FormatSinkImpl*>(out)) .value; } @@ -391,8 +462,9 @@ class FormatArgImpl { Dispatcher dispatcher_; }; -#define ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(T, E) \ - E template bool FormatArgImpl::Dispatch<T>(Data, ConversionSpec, void*) +#define ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(T, E) \ + E template bool FormatArgImpl::Dispatch<T>(Data, FormatConversionSpecImpl, \ + void*) #define ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(...) \ ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(str_format_internal::VoidPtr, \ diff --git a/absl/strings/internal/str_format/arg_test.cc b/absl/strings/internal/str_format/arg_test.cc index 8d30d8b8..f53fd6bd 100644 --- a/absl/strings/internal/str_format/arg_test.cc +++ b/absl/strings/internal/str_format/arg_test.cc @@ -23,8 +23,17 @@ class FormatArgImplTest : public ::testing::Test { enum Color { kRed, kGreen, kBlue }; static const char *hi() { return "hi"; } + + struct X {}; + + X x_; }; +inline FormatConvertResult<FormatConversionCharSet{}> AbslFormatConvert( + const FormatArgImplTest::X &, const FormatConversionSpec &, FormatSink *) { + return {false}; +} + TEST_F(FormatArgImplTest, ToInt) { int out = 0; EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(1), &out)); @@ -59,6 +68,7 @@ TEST_F(FormatArgImplTest, ToInt) { FormatArgImpl(static_cast<int *>(nullptr)), &out)); EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(hi()), &out)); EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl("hi"), &out)); + EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(x_), &out)); EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(kBlue), &out)); EXPECT_EQ(2, out); } @@ -95,8 +105,9 @@ TEST_F(FormatArgImplTest, OtherPtrDecayToVoidPtr) { TEST_F(FormatArgImplTest, WorksWithCharArraysOfUnknownSize) { std::string s; FormatSinkImpl sink(&s); - ConversionSpec conv; - FormatConversionSpecImplFriend::SetConversionChar(ConversionChar::s, &conv); + FormatConversionSpecImpl conv; + FormatConversionSpecImplFriend::SetConversionChar( + FormatConversionCharInternal::s, &conv); FormatConversionSpecImplFriend::SetFlags(Flags(), &conv); FormatConversionSpecImplFriend::SetWidth(-1, &conv); FormatConversionSpecImplFriend::SetPrecision(-1, &conv); diff --git a/absl/strings/internal/str_format/bind.cc b/absl/strings/internal/str_format/bind.cc index 27522fdb..6980ed1d 100644 --- a/absl/strings/internal/str_format/bind.cc +++ b/absl/strings/internal/str_format/bind.cc @@ -147,7 +147,7 @@ class SummarizingConverter { << FormatConversionSpecImplFriend::FlagsToString(bound); if (bound.width() >= 0) ss << bound.width(); if (bound.precision() >= 0) ss << "." << bound.precision(); - ss << bound.conv() << "}"; + ss << bound.conversion_char() << "}"; Append(ss.str()); return true; } diff --git a/absl/strings/internal/str_format/bind.h b/absl/strings/internal/str_format/bind.h index cf41b197..585246e7 100644 --- a/absl/strings/internal/str_format/bind.h +++ b/absl/strings/internal/str_format/bind.h @@ -19,7 +19,7 @@ class UntypedFormatSpec; namespace str_format_internal { -class BoundConversion : public ConversionSpec { +class BoundConversion : public FormatConversionSpecImpl { public: const FormatArgImpl* arg() const { return arg_; } void set_arg(const FormatArgImpl* a) { arg_ = a; } @@ -60,7 +60,7 @@ class UntypedFormatSpecImpl { size_t size_; }; -template <typename T, typename...> +template <typename T, FormatConversionCharSet...> struct MakeDependent { using type = T; }; @@ -68,7 +68,7 @@ struct MakeDependent { // Implicitly convertible from `const char*`, `string_view`, and the // `ExtendedParsedFormat` type. This abstraction allows all format functions to // operate on any without providing too many overloads. -template <typename... Args> +template <FormatConversionCharSet... Args> class FormatSpecTemplate : public MakeDependent<UntypedFormatSpec, Args...>::type { using Base = typename MakeDependent<UntypedFormatSpec, Args...>::type; @@ -76,11 +76,11 @@ class FormatSpecTemplate public: #ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER - // Honeypot overload for when the std::string is not constexpr. + // Honeypot overload for when the string is not constexpr. // We use the 'unavailable' attribute to give a better compiler error than // just 'method is deleted'. FormatSpecTemplate(...) // NOLINT - __attribute__((unavailable("Format std::string is not constexpr."))); + __attribute__((unavailable("Format string is not constexpr."))); // Honeypot overload for when the format is constexpr and invalid. // We use the 'unavailable' attribute to give a better compiler error than @@ -105,13 +105,11 @@ class FormatSpecTemplate // Good format overload. FormatSpecTemplate(const char* s) // NOLINT - __attribute__((enable_if(ValidFormatImpl<ArgumentToConv<Args>()...>(s), - "bad format trap"))) + __attribute__((enable_if(ValidFormatImpl<Args...>(s), "bad format trap"))) : Base(s) {} FormatSpecTemplate(string_view s) // NOLINT - __attribute__((enable_if(ValidFormatImpl<ArgumentToConv<Args>()...>(s), - "bad format trap"))) + __attribute__((enable_if(ValidFormatImpl<Args...>(s), "bad format trap"))) : Base(s) {} #else // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER @@ -121,19 +119,14 @@ class FormatSpecTemplate #endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER - template <Conv... C, typename = typename std::enable_if< - AllOf(sizeof...(C) == sizeof...(Args), - Contains(ArgumentToConv<Args>(), - C)...)>::type> + template <FormatConversionCharSet... C, + typename = typename std::enable_if< + AllOf(sizeof...(C) == sizeof...(Args), Contains(Args, + C)...)>::type> FormatSpecTemplate(const ExtendedParsedFormat<C...>& pc) // NOLINT : Base(&pc) {} }; -template <typename... Args> -struct FormatSpecDeductionBarrier { - using type = FormatSpecTemplate<Args...>; -}; - class Streamable { public: Streamable(const UntypedFormatSpecImpl& format, @@ -196,9 +189,9 @@ class StreamedWrapper { private: template <typename S> - friend ConvertResult<Conv::s> FormatConvertImpl(const StreamedWrapper<S>& v, - ConversionSpec conv, - FormatSinkImpl* out); + friend ArgConvertResult<FormatConversionCharSetInternal::s> FormatConvertImpl( + const StreamedWrapper<S>& v, FormatConversionSpecImpl conv, + FormatSinkImpl* out); const T& v_; }; diff --git a/absl/strings/internal/str_format/checker.h b/absl/strings/internal/str_format/checker.h index 8993a79b..424c51f7 100644 --- a/absl/strings/internal/str_format/checker.h +++ b/absl/strings/internal/str_format/checker.h @@ -24,13 +24,6 @@ constexpr bool AllOf(bool b, T... t) { return b && AllOf(t...); } -template <typename Arg> -constexpr Conv ArgumentToConv() { - return decltype(str_format_internal::FormatConvertImpl( - std::declval<const Arg&>(), std::declval<const ConversionSpec&>(), - std::declval<FormatSinkImpl*>()))::kConv; -} - #ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER constexpr bool ContainsChar(const char* chars, char c) { @@ -39,14 +32,14 @@ constexpr bool ContainsChar(const char* chars, char c) { // A constexpr compatible list of Convs. struct ConvList { - const Conv* array; + const FormatConversionCharSet* array; int count; // We do the bound check here to avoid having to do it on the callers. - // Returning an empty Conv has the same effect as short circuiting because it - // will never match any conversion. - constexpr Conv operator[](int i) const { - return i < count ? array[i] : Conv{}; + // Returning an empty FormatConversionCharSet has the same effect as + // short circuiting because it will never match any conversion. + constexpr FormatConversionCharSet operator[](int i) const { + return i < count ? array[i] : FormatConversionCharSet{}; } constexpr ConvList without_front() const { @@ -57,7 +50,7 @@ struct ConvList { template <size_t count> struct ConvListT { // Make sure the array has size > 0. - Conv list[count ? count : 1]; + FormatConversionCharSet list[count ? count : 1]; }; constexpr char GetChar(string_view str, size_t index) { @@ -310,7 +303,7 @@ class FormatParser { ConvList args_; }; -template <Conv... C> +template <FormatConversionCharSet... C> constexpr bool ValidFormatImpl(string_view format) { return FormatParser(format, {ConvListT<sizeof...(C)>{{C...}}.list, sizeof...(C)}) diff --git a/absl/strings/internal/str_format/checker_test.cc b/absl/strings/internal/str_format/checker_test.cc index ea2a7681..a76d70b0 100644 --- a/absl/strings/internal/str_format/checker_test.cc +++ b/absl/strings/internal/str_format/checker_test.cc @@ -9,18 +9,22 @@ ABSL_NAMESPACE_BEGIN namespace str_format_internal { namespace { -std::string ConvToString(Conv conv) { +std::string ConvToString(FormatConversionCharSet conv) { std::string out; -#define CONV_SET_CASE(c) \ - if (Contains(conv, Conv::c)) out += #c; +#define CONV_SET_CASE(c) \ + if (Contains(conv, FormatConversionCharSetInternal::c)) { \ + out += #c; \ + } ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(CONV_SET_CASE, ) #undef CONV_SET_CASE - if (Contains(conv, Conv::star)) out += "*"; + if (Contains(conv, FormatConversionCharSetInternal::kStar)) { + out += "*"; + } return out; } TEST(StrFormatChecker, ArgumentToConv) { - Conv conv = ArgumentToConv<std::string>(); + FormatConversionCharSet conv = ArgumentToConv<std::string>(); EXPECT_EQ(ConvToString(conv), "s"); conv = ArgumentToConv<const char*>(); diff --git a/absl/strings/internal/str_format/convert_test.cc b/absl/strings/internal/str_format/convert_test.cc index cbcd7caf..634ee78b 100644 --- a/absl/strings/internal/str_format/convert_test.cc +++ b/absl/strings/internal/str_format/convert_test.cc @@ -1,20 +1,32 @@ #include <errno.h> #include <stdarg.h> #include <stdio.h> + #include <cctype> #include <cmath> +#include <limits> #include <string> +#include <thread> // NOLINT #include "gmock/gmock.h" #include "gtest/gtest.h" #include "absl/base/internal/raw_logging.h" #include "absl/strings/internal/str_format/bind.h" +#include "absl/strings/match.h" +#include "absl/types/optional.h" namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { namespace { +struct NativePrintfTraits { + bool hex_float_has_glibc_rounding; + bool hex_float_prefers_denormal_repr; + bool hex_float_uses_minimal_precision_when_not_specified; + bool hex_float_optimizes_leading_digit_bit_count; +}; + template <typename T, size_t N> size_t ArraySize(T (&)[N]) { return N; @@ -57,7 +69,7 @@ std::string Esc(const T &v) { return oss.str(); } -void StrAppend(std::string *dst, const char *format, va_list ap) { +void StrAppendV(std::string *dst, const char *format, va_list ap) { // First try with a small fixed size buffer static const int kSpaceLength = 1024; char space[kSpaceLength]; @@ -98,15 +110,79 @@ void StrAppend(std::string *dst, const char *format, va_list ap) { delete[] buf; } +void StrAppend(std::string *out, const char *format, ...) { + va_list ap; + va_start(ap, format); + StrAppendV(out, format, ap); + va_end(ap); +} + std::string StrPrint(const char *format, ...) { va_list ap; va_start(ap, format); std::string result; - StrAppend(&result, format, ap); + StrAppendV(&result, format, ap); va_end(ap); return result; } +NativePrintfTraits VerifyNativeImplementationImpl() { + NativePrintfTraits result; + + // >>> hex_float_has_glibc_rounding. To have glibc's rounding behavior we need + // to meet three requirements: + // + // - The threshold for rounding up is 8 (for e.g. MSVC uses 9). + // - If the digits lower than than the 8 are non-zero then we round up. + // - If the digits lower than the 8 are all zero then we round toward even. + // + // The numbers below represent all the cases covering {below,at,above} the + // threshold (8) with both {zero,non-zero} lower bits and both {even,odd} + // preceding digits. + const double d0079 = 65657.0; // 0x1.0079p+16 + const double d0179 = 65913.0; // 0x1.0179p+16 + const double d0080 = 65664.0; // 0x1.0080p+16 + const double d0180 = 65920.0; // 0x1.0180p+16 + const double d0081 = 65665.0; // 0x1.0081p+16 + const double d0181 = 65921.0; // 0x1.0181p+16 + result.hex_float_has_glibc_rounding = + StartsWith(StrPrint("%.2a", d0079), "0x1.00") && + StartsWith(StrPrint("%.2a", d0179), "0x1.01") && + StartsWith(StrPrint("%.2a", d0080), "0x1.00") && + StartsWith(StrPrint("%.2a", d0180), "0x1.02") && + StartsWith(StrPrint("%.2a", d0081), "0x1.01") && + StartsWith(StrPrint("%.2a", d0181), "0x1.02"); + + // >>> hex_float_prefers_denormal_repr. Formatting `denormal` on glibc yields + // "0x0.0000000000001p-1022", whereas on std libs that don't use denormal + // representation it would either be 0x1p-1074 or 0x1.0000000000000-1074. + const double denormal = std::numeric_limits<double>::denorm_min(); + result.hex_float_prefers_denormal_repr = + StartsWith(StrPrint("%a", denormal), "0x0.0000000000001"); + + // >>> hex_float_uses_minimal_precision_when_not_specified. Some (non-glibc) + // libs will format the following as "0x1.0079000000000p+16". + result.hex_float_uses_minimal_precision_when_not_specified = + (StrPrint("%a", d0079) == "0x1.0079p+16"); + + // >>> hex_float_optimizes_leading_digit_bit_count. The number 1.5, when + // formatted by glibc should yield "0x1.8p+0" for `double` and "0xcp-3" for + // `long double`, i.e., number of bits in the leading digit is adapted to the + // number of bits in the mantissa. + const double d_15 = 1.5; + const long double ld_15 = 1.5; + result.hex_float_optimizes_leading_digit_bit_count = + StartsWith(StrPrint("%a", d_15), "0x1.8") && + StartsWith(StrPrint("%La", ld_15), "0xc"); + + return result; +} + +const NativePrintfTraits &VerifyNativeImplementation() { + static NativePrintfTraits native_traits = VerifyNativeImplementationImpl(); + return native_traits; +} + class FormatConvertTest : public ::testing::Test { }; template <typename T> @@ -463,6 +539,68 @@ TEST_F(FormatConvertTest, Uint128) { } } +template <typename Floating> +void TestWithMultipleFormatsHelper(const std::vector<Floating> &floats) { + const NativePrintfTraits &native_traits = VerifyNativeImplementation(); + // Reserve the space to ensure we don't allocate memory in the output itself. + std::string str_format_result; + str_format_result.reserve(1 << 20); + std::string string_printf_result; + string_printf_result.reserve(1 << 20); + + const char *const kFormats[] = { + "%", "%.3", "%8.5", "%500", "%.5000", "%.60", "%.30", "%03", + "%+", "% ", "%-10", "%#15.3", "%#.0", "%.0", "%1$*2$", "%1$.*2$"}; + + for (const char *fmt : kFormats) { + for (char f : {'f', 'F', // + 'g', 'G', // + 'a', 'A', // + 'e', 'E'}) { + std::string fmt_str = std::string(fmt) + f; + + if (fmt == absl::string_view("%.5000") && f != 'f' && f != 'F' && + f != 'a' && f != 'A') { + // This particular test takes way too long with snprintf. + // Disable for the case we are not implementing natively. + continue; + } + + if ((f == 'a' || f == 'A') && + !native_traits.hex_float_has_glibc_rounding) { + continue; + } + + for (Floating d : floats) { + if (!native_traits.hex_float_prefers_denormal_repr && + (f == 'a' || f == 'A') && std::fpclassify(d) == FP_SUBNORMAL) { + continue; + } + int i = -10; + FormatArgImpl args[2] = {FormatArgImpl(d), FormatArgImpl(i)}; + UntypedFormatSpecImpl format(fmt_str); + + string_printf_result.clear(); + StrAppend(&string_printf_result, fmt_str.c_str(), d, i); + str_format_result.clear(); + + { + AppendPack(&str_format_result, format, absl::MakeSpan(args)); + } + + if (string_printf_result != str_format_result) { + // We use ASSERT_EQ here because failures are usually correlated and a + // bug would print way too many failed expectations causing the test + // to time out. + ASSERT_EQ(string_printf_result, str_format_result) + << fmt_str << " " << StrPrint("%.18g", d) << " " + << StrPrint("%a", d) << " " << StrPrint("%.50f", d); + } + } + } + } +} + TEST_F(FormatConvertTest, Float) { #ifdef _MSC_VER // MSVC has a different rounding policy than us so we can't test our @@ -470,9 +608,62 @@ TEST_F(FormatConvertTest, Float) { return; #endif // _MSC_VER - const char *const kFormats[] = { - "%", "%.3", "%8.5", "%9", "%.60", "%.30", "%03", "%+", - "% ", "%-10", "%#15.3", "%#.0", "%.0", "%1$*2$", "%1$.*2$"}; + std::vector<float> floats = {0.0f, + -0.0f, + .9999999f, + 9999999.f, + std::numeric_limits<float>::max(), + -std::numeric_limits<float>::max(), + std::numeric_limits<float>::min(), + -std::numeric_limits<float>::min(), + std::numeric_limits<float>::lowest(), + -std::numeric_limits<float>::lowest(), + std::numeric_limits<float>::epsilon(), + std::numeric_limits<float>::epsilon() + 1.0f, + std::numeric_limits<float>::infinity(), + -std::numeric_limits<float>::infinity()}; + + // Some regression tests. + floats.push_back(0.999999989f); + + if (std::numeric_limits<float>::has_denorm != std::denorm_absent) { + floats.push_back(std::numeric_limits<float>::denorm_min()); + floats.push_back(-std::numeric_limits<float>::denorm_min()); + } + + for (float base : + {1.f, 12.f, 123.f, 1234.f, 12345.f, 123456.f, 1234567.f, 12345678.f, + 123456789.f, 1234567890.f, 12345678901.f, 12345678.f, 12345678.f}) { + for (int exp = -123; exp <= 123; ++exp) { + for (int sign : {1, -1}) { + floats.push_back(sign * std::ldexp(base, exp)); + } + } + } + + for (int exp = -300; exp <= 300; ++exp) { + const float all_ones_mantissa = 0xffffff; + floats.push_back(std::ldexp(all_ones_mantissa, exp)); + } + + // Remove duplicates to speed up the logic below. + std::sort(floats.begin(), floats.end()); + floats.erase(std::unique(floats.begin(), floats.end()), floats.end()); + +#ifndef __APPLE__ + // Apple formats NaN differently (+nan) vs. (nan) + floats.push_back(std::nan("")); +#endif + + TestWithMultipleFormatsHelper(floats); +} + +TEST_F(FormatConvertTest, Double) { +#ifdef _MSC_VER + // MSVC has a different rounding policy than us so we can't test our + // implementation against the native one there. + return; +#endif // _MSC_VER std::vector<double> doubles = {0.0, -0.0, @@ -489,11 +680,6 @@ TEST_F(FormatConvertTest, Float) { std::numeric_limits<double>::infinity(), -std::numeric_limits<double>::infinity()}; -#ifndef __APPLE__ - // Apple formats NaN differently (+nan) vs. (nan) - doubles.push_back(std::nan("")); -#endif - // Some regression tests. doubles.push_back(0.99999999999999989); @@ -512,43 +698,375 @@ TEST_F(FormatConvertTest, Float) { } } - for (const char *fmt : kFormats) { - for (char f : {'f', 'F', // - 'g', 'G', // - 'a', 'A', // - 'e', 'E'}) { - std::string fmt_str = std::string(fmt) + f; - for (double d : doubles) { - int i = -10; - FormatArgImpl args[2] = {FormatArgImpl(d), FormatArgImpl(i)}; - UntypedFormatSpecImpl format(fmt_str); - // We use ASSERT_EQ here because failures are usually correlated and a - // bug would print way too many failed expectations causing the test to - // time out. - ASSERT_EQ(StrPrint(fmt_str.c_str(), d, i), - FormatPack(format, absl::MakeSpan(args))) - << fmt_str << " " << StrPrint("%.18g", d) << " " - << StrPrint("%.999f", d); + // Workaround libc bug. + // https://sourceware.org/bugzilla/show_bug.cgi?id=22142 + const bool gcc_bug_22142 = + StrPrint("%f", std::numeric_limits<double>::max()) != + "1797693134862315708145274237317043567980705675258449965989174768031" + "5726078002853876058955863276687817154045895351438246423432132688946" + "4182768467546703537516986049910576551282076245490090389328944075868" + "5084551339423045832369032229481658085593321233482747978262041447231" + "68738177180919299881250404026184124858368.000000"; + + if (!gcc_bug_22142) { + for (int exp = -300; exp <= 300; ++exp) { + const double all_ones_mantissa = 0x1fffffffffffff; + doubles.push_back(std::ldexp(all_ones_mantissa, exp)); + } + } + + if (gcc_bug_22142) { + for (auto &d : doubles) { + using L = std::numeric_limits<double>; + double d2 = std::abs(d); + if (d2 == L::max() || d2 == L::min() || d2 == L::denorm_min()) { + d = 0; } } } + + // Remove duplicates to speed up the logic below. + std::sort(doubles.begin(), doubles.end()); + doubles.erase(std::unique(doubles.begin(), doubles.end()), doubles.end()); + +#ifndef __APPLE__ + // Apple formats NaN differently (+nan) vs. (nan) + doubles.push_back(std::nan("")); +#endif + + TestWithMultipleFormatsHelper(doubles); +} + +TEST_F(FormatConvertTest, DoubleRound) { + std::string s; + const auto format = [&](const char *fmt, double d) -> std::string & { + s.clear(); + FormatArgImpl args[1] = {FormatArgImpl(d)}; + AppendPack(&s, UntypedFormatSpecImpl(fmt), absl::MakeSpan(args)); +#if !defined(_MSC_VER) + // MSVC has a different rounding policy than us so we can't test our + // implementation against the native one there. + EXPECT_EQ(StrPrint(fmt, d), s); +#endif // _MSC_VER + + return s; + }; + // All of these values have to be exactly represented. + // Otherwise we might not be testing what we think we are testing. + + // These values can fit in a 64bit "fast" representation. + const double exact_value = 0.00000000000005684341886080801486968994140625; + assert(exact_value == std::pow(2, -44)); + // Round up at a 5xx. + EXPECT_EQ(format("%.13f", exact_value), "0.0000000000001"); + // Round up at a >5 + EXPECT_EQ(format("%.14f", exact_value), "0.00000000000006"); + // Round down at a <5 + EXPECT_EQ(format("%.16f", exact_value), "0.0000000000000568"); + // Nine handling + EXPECT_EQ(format("%.35f", exact_value), + "0.00000000000005684341886080801486969"); + EXPECT_EQ(format("%.36f", exact_value), + "0.000000000000056843418860808014869690"); + // Round down the last nine. + EXPECT_EQ(format("%.37f", exact_value), + "0.0000000000000568434188608080148696899"); + EXPECT_EQ(format("%.10f", 0.000003814697265625), "0.0000038147"); + // Round up the last nine + EXPECT_EQ(format("%.11f", 0.000003814697265625), "0.00000381470"); + EXPECT_EQ(format("%.12f", 0.000003814697265625), "0.000003814697"); + + // Round to even (down) + EXPECT_EQ(format("%.43f", exact_value), + "0.0000000000000568434188608080148696899414062"); + // Exact + EXPECT_EQ(format("%.44f", exact_value), + "0.00000000000005684341886080801486968994140625"); + // Round to even (up), let make the last digits 75 instead of 25 + EXPECT_EQ(format("%.43f", exact_value + std::pow(2, -43)), + "0.0000000000001705302565824240446090698242188"); + // Exact, just to check. + EXPECT_EQ(format("%.44f", exact_value + std::pow(2, -43)), + "0.00000000000017053025658242404460906982421875"); + + // This value has to be small enough that it won't fit in the uint128 + // representation for printing. + const double small_exact_value = + 0.000000000000000000000000000000000000752316384526264005099991383822237233803945956334136013765601092018187046051025390625; // NOLINT + assert(small_exact_value == std::pow(2, -120)); + // Round up at a 5xx. + EXPECT_EQ(format("%.37f", small_exact_value), + "0.0000000000000000000000000000000000008"); + // Round down at a <5 + EXPECT_EQ(format("%.38f", small_exact_value), + "0.00000000000000000000000000000000000075"); + // Round up at a >5 + EXPECT_EQ(format("%.41f", small_exact_value), + "0.00000000000000000000000000000000000075232"); + // Nine handling + EXPECT_EQ(format("%.55f", small_exact_value), + "0.0000000000000000000000000000000000007523163845262640051"); + EXPECT_EQ(format("%.56f", small_exact_value), + "0.00000000000000000000000000000000000075231638452626400510"); + EXPECT_EQ(format("%.57f", small_exact_value), + "0.000000000000000000000000000000000000752316384526264005100"); + EXPECT_EQ(format("%.58f", small_exact_value), + "0.0000000000000000000000000000000000007523163845262640051000"); + // Round down the last nine + EXPECT_EQ(format("%.59f", small_exact_value), + "0.00000000000000000000000000000000000075231638452626400509999"); + // Round up the last nine + EXPECT_EQ(format("%.79f", small_exact_value), + "0.000000000000000000000000000000000000" + "7523163845262640050999913838222372338039460"); + + // Round to even (down) + EXPECT_EQ(format("%.119f", small_exact_value), + "0.000000000000000000000000000000000000" + "75231638452626400509999138382223723380" + "394595633413601376560109201818704605102539062"); + // Exact + EXPECT_EQ(format("%.120f", small_exact_value), + "0.000000000000000000000000000000000000" + "75231638452626400509999138382223723380" + "3945956334136013765601092018187046051025390625"); + // Round to even (up), let make the last digits 75 instead of 25 + EXPECT_EQ(format("%.119f", small_exact_value + std::pow(2, -119)), + "0.000000000000000000000000000000000002" + "25694915357879201529997415146671170141" + "183786900240804129680327605456113815307617188"); + // Exact, just to check. + EXPECT_EQ(format("%.120f", small_exact_value + std::pow(2, -119)), + "0.000000000000000000000000000000000002" + "25694915357879201529997415146671170141" + "1837869002408041296803276054561138153076171875"); +} + +TEST_F(FormatConvertTest, DoubleRoundA) { + const NativePrintfTraits &native_traits = VerifyNativeImplementation(); + std::string s; + const auto format = [&](const char *fmt, double d) -> std::string & { + s.clear(); + FormatArgImpl args[1] = {FormatArgImpl(d)}; + AppendPack(&s, UntypedFormatSpecImpl(fmt), absl::MakeSpan(args)); + if (native_traits.hex_float_has_glibc_rounding) { + EXPECT_EQ(StrPrint(fmt, d), s); + } + return s; + }; + + // 0x1.00018000p+100 + const double on_boundary_odd = 1267679614447900152596896153600.0; + EXPECT_EQ(format("%.0a", on_boundary_odd), "0x1p+100"); + EXPECT_EQ(format("%.1a", on_boundary_odd), "0x1.0p+100"); + EXPECT_EQ(format("%.2a", on_boundary_odd), "0x1.00p+100"); + EXPECT_EQ(format("%.3a", on_boundary_odd), "0x1.000p+100"); + EXPECT_EQ(format("%.4a", on_boundary_odd), "0x1.0002p+100"); // round + EXPECT_EQ(format("%.5a", on_boundary_odd), "0x1.00018p+100"); + EXPECT_EQ(format("%.6a", on_boundary_odd), "0x1.000180p+100"); + + // 0x1.00028000p-2 + const double on_boundary_even = 0.250009536743164062500; + EXPECT_EQ(format("%.0a", on_boundary_even), "0x1p-2"); + EXPECT_EQ(format("%.1a", on_boundary_even), "0x1.0p-2"); + EXPECT_EQ(format("%.2a", on_boundary_even), "0x1.00p-2"); + EXPECT_EQ(format("%.3a", on_boundary_even), "0x1.000p-2"); + EXPECT_EQ(format("%.4a", on_boundary_even), "0x1.0002p-2"); // no round + EXPECT_EQ(format("%.5a", on_boundary_even), "0x1.00028p-2"); + EXPECT_EQ(format("%.6a", on_boundary_even), "0x1.000280p-2"); + + // 0x1.00018001p+1 + const double slightly_over = 2.00004577683284878730773925781250; + EXPECT_EQ(format("%.0a", slightly_over), "0x1p+1"); + EXPECT_EQ(format("%.1a", slightly_over), "0x1.0p+1"); + EXPECT_EQ(format("%.2a", slightly_over), "0x1.00p+1"); + EXPECT_EQ(format("%.3a", slightly_over), "0x1.000p+1"); + EXPECT_EQ(format("%.4a", slightly_over), "0x1.0002p+1"); + EXPECT_EQ(format("%.5a", slightly_over), "0x1.00018p+1"); + EXPECT_EQ(format("%.6a", slightly_over), "0x1.000180p+1"); + + // 0x1.00017fffp+0 + const double slightly_under = 1.000022887950763106346130371093750; + EXPECT_EQ(format("%.0a", slightly_under), "0x1p+0"); + EXPECT_EQ(format("%.1a", slightly_under), "0x1.0p+0"); + EXPECT_EQ(format("%.2a", slightly_under), "0x1.00p+0"); + EXPECT_EQ(format("%.3a", slightly_under), "0x1.000p+0"); + EXPECT_EQ(format("%.4a", slightly_under), "0x1.0001p+0"); + EXPECT_EQ(format("%.5a", slightly_under), "0x1.00018p+0"); + EXPECT_EQ(format("%.6a", slightly_under), "0x1.000180p+0"); + EXPECT_EQ(format("%.7a", slightly_under), "0x1.0001800p+0"); + + // 0x1.1b3829ac28058p+3 + const double hex_value = 8.85060580848964661981881363317370414733886718750; + EXPECT_EQ(format("%.0a", hex_value), "0x1p+3"); + EXPECT_EQ(format("%.1a", hex_value), "0x1.2p+3"); + EXPECT_EQ(format("%.2a", hex_value), "0x1.1bp+3"); + EXPECT_EQ(format("%.3a", hex_value), "0x1.1b4p+3"); + EXPECT_EQ(format("%.4a", hex_value), "0x1.1b38p+3"); + EXPECT_EQ(format("%.5a", hex_value), "0x1.1b383p+3"); + EXPECT_EQ(format("%.6a", hex_value), "0x1.1b382ap+3"); + EXPECT_EQ(format("%.7a", hex_value), "0x1.1b3829bp+3"); + EXPECT_EQ(format("%.8a", hex_value), "0x1.1b3829acp+3"); + EXPECT_EQ(format("%.9a", hex_value), "0x1.1b3829ac3p+3"); + EXPECT_EQ(format("%.10a", hex_value), "0x1.1b3829ac28p+3"); + EXPECT_EQ(format("%.11a", hex_value), "0x1.1b3829ac280p+3"); + EXPECT_EQ(format("%.12a", hex_value), "0x1.1b3829ac2806p+3"); + EXPECT_EQ(format("%.13a", hex_value), "0x1.1b3829ac28058p+3"); + EXPECT_EQ(format("%.14a", hex_value), "0x1.1b3829ac280580p+3"); + EXPECT_EQ(format("%.15a", hex_value), "0x1.1b3829ac2805800p+3"); + EXPECT_EQ(format("%.16a", hex_value), "0x1.1b3829ac28058000p+3"); + EXPECT_EQ(format("%.17a", hex_value), "0x1.1b3829ac280580000p+3"); + EXPECT_EQ(format("%.18a", hex_value), "0x1.1b3829ac2805800000p+3"); + EXPECT_EQ(format("%.19a", hex_value), "0x1.1b3829ac28058000000p+3"); + EXPECT_EQ(format("%.20a", hex_value), "0x1.1b3829ac280580000000p+3"); + EXPECT_EQ(format("%.21a", hex_value), "0x1.1b3829ac2805800000000p+3"); + + // 0x1.0818283848586p+3 + const double hex_value2 = 8.2529488658208371987257123691961169242858886718750; + EXPECT_EQ(format("%.0a", hex_value2), "0x1p+3"); + EXPECT_EQ(format("%.1a", hex_value2), "0x1.1p+3"); + EXPECT_EQ(format("%.2a", hex_value2), "0x1.08p+3"); + EXPECT_EQ(format("%.3a", hex_value2), "0x1.082p+3"); + EXPECT_EQ(format("%.4a", hex_value2), "0x1.0818p+3"); + EXPECT_EQ(format("%.5a", hex_value2), "0x1.08183p+3"); + EXPECT_EQ(format("%.6a", hex_value2), "0x1.081828p+3"); + EXPECT_EQ(format("%.7a", hex_value2), "0x1.0818284p+3"); + EXPECT_EQ(format("%.8a", hex_value2), "0x1.08182838p+3"); + EXPECT_EQ(format("%.9a", hex_value2), "0x1.081828385p+3"); + EXPECT_EQ(format("%.10a", hex_value2), "0x1.0818283848p+3"); + EXPECT_EQ(format("%.11a", hex_value2), "0x1.08182838486p+3"); + EXPECT_EQ(format("%.12a", hex_value2), "0x1.081828384858p+3"); + EXPECT_EQ(format("%.13a", hex_value2), "0x1.0818283848586p+3"); + EXPECT_EQ(format("%.14a", hex_value2), "0x1.08182838485860p+3"); + EXPECT_EQ(format("%.15a", hex_value2), "0x1.081828384858600p+3"); + EXPECT_EQ(format("%.16a", hex_value2), "0x1.0818283848586000p+3"); + EXPECT_EQ(format("%.17a", hex_value2), "0x1.08182838485860000p+3"); + EXPECT_EQ(format("%.18a", hex_value2), "0x1.081828384858600000p+3"); + EXPECT_EQ(format("%.19a", hex_value2), "0x1.0818283848586000000p+3"); + EXPECT_EQ(format("%.20a", hex_value2), "0x1.08182838485860000000p+3"); + EXPECT_EQ(format("%.21a", hex_value2), "0x1.081828384858600000000p+3"); +} + +TEST_F(FormatConvertTest, LongDoubleRoundA) { + if (std::numeric_limits<long double>::digits % 4 != 0) { + // This test doesn't really make sense to run on platforms where a long + // double has a different mantissa size (mod 4) than Prod, since then the + // leading digit will be formatted differently. + return; + } + const NativePrintfTraits &native_traits = VerifyNativeImplementation(); + std::string s; + const auto format = [&](const char *fmt, long double d) -> std::string & { + s.clear(); + FormatArgImpl args[1] = {FormatArgImpl(d)}; + AppendPack(&s, UntypedFormatSpecImpl(fmt), absl::MakeSpan(args)); + if (native_traits.hex_float_has_glibc_rounding && + native_traits.hex_float_optimizes_leading_digit_bit_count) { + EXPECT_EQ(StrPrint(fmt, d), s); + } + return s; + }; + + // 0x8.8p+4 + const long double on_boundary_even = 136.0; + EXPECT_EQ(format("%.0La", on_boundary_even), "0x8p+4"); + EXPECT_EQ(format("%.1La", on_boundary_even), "0x8.8p+4"); + EXPECT_EQ(format("%.2La", on_boundary_even), "0x8.80p+4"); + EXPECT_EQ(format("%.3La", on_boundary_even), "0x8.800p+4"); + EXPECT_EQ(format("%.4La", on_boundary_even), "0x8.8000p+4"); + EXPECT_EQ(format("%.5La", on_boundary_even), "0x8.80000p+4"); + EXPECT_EQ(format("%.6La", on_boundary_even), "0x8.800000p+4"); + + // 0x9.8p+4 + const long double on_boundary_odd = 152.0; + EXPECT_EQ(format("%.0La", on_boundary_odd), "0xap+4"); + EXPECT_EQ(format("%.1La", on_boundary_odd), "0x9.8p+4"); + EXPECT_EQ(format("%.2La", on_boundary_odd), "0x9.80p+4"); + EXPECT_EQ(format("%.3La", on_boundary_odd), "0x9.800p+4"); + EXPECT_EQ(format("%.4La", on_boundary_odd), "0x9.8000p+4"); + EXPECT_EQ(format("%.5La", on_boundary_odd), "0x9.80000p+4"); + EXPECT_EQ(format("%.6La", on_boundary_odd), "0x9.800000p+4"); + + // 0x8.80001p+24 + const long double slightly_over = 142606352.0; + EXPECT_EQ(format("%.0La", slightly_over), "0x9p+24"); + EXPECT_EQ(format("%.1La", slightly_over), "0x8.8p+24"); + EXPECT_EQ(format("%.2La", slightly_over), "0x8.80p+24"); + EXPECT_EQ(format("%.3La", slightly_over), "0x8.800p+24"); + EXPECT_EQ(format("%.4La", slightly_over), "0x8.8000p+24"); + EXPECT_EQ(format("%.5La", slightly_over), "0x8.80001p+24"); + EXPECT_EQ(format("%.6La", slightly_over), "0x8.800010p+24"); + + // 0x8.7ffffp+24 + const long double slightly_under = 142606320.0; + EXPECT_EQ(format("%.0La", slightly_under), "0x8p+24"); + EXPECT_EQ(format("%.1La", slightly_under), "0x8.8p+24"); + EXPECT_EQ(format("%.2La", slightly_under), "0x8.80p+24"); + EXPECT_EQ(format("%.3La", slightly_under), "0x8.800p+24"); + EXPECT_EQ(format("%.4La", slightly_under), "0x8.8000p+24"); + EXPECT_EQ(format("%.5La", slightly_under), "0x8.7ffffp+24"); + EXPECT_EQ(format("%.6La", slightly_under), "0x8.7ffff0p+24"); + EXPECT_EQ(format("%.7La", slightly_under), "0x8.7ffff00p+24"); + + // 0xc.0828384858688000p+128 + const long double eights = 4094231060438608800781871108094404067328.0; + EXPECT_EQ(format("%.0La", eights), "0xcp+128"); + EXPECT_EQ(format("%.1La", eights), "0xc.1p+128"); + EXPECT_EQ(format("%.2La", eights), "0xc.08p+128"); + EXPECT_EQ(format("%.3La", eights), "0xc.083p+128"); + EXPECT_EQ(format("%.4La", eights), "0xc.0828p+128"); + EXPECT_EQ(format("%.5La", eights), "0xc.08284p+128"); + EXPECT_EQ(format("%.6La", eights), "0xc.082838p+128"); + EXPECT_EQ(format("%.7La", eights), "0xc.0828385p+128"); + EXPECT_EQ(format("%.8La", eights), "0xc.08283848p+128"); + EXPECT_EQ(format("%.9La", eights), "0xc.082838486p+128"); + EXPECT_EQ(format("%.10La", eights), "0xc.0828384858p+128"); + EXPECT_EQ(format("%.11La", eights), "0xc.08283848587p+128"); + EXPECT_EQ(format("%.12La", eights), "0xc.082838485868p+128"); + EXPECT_EQ(format("%.13La", eights), "0xc.0828384858688p+128"); + EXPECT_EQ(format("%.14La", eights), "0xc.08283848586880p+128"); + EXPECT_EQ(format("%.15La", eights), "0xc.082838485868800p+128"); + EXPECT_EQ(format("%.16La", eights), "0xc.0828384858688000p+128"); +} + +// We don't actually store the results. This is just to exercise the rest of the +// machinery. +struct NullSink { + friend void AbslFormatFlush(NullSink *sink, string_view str) {} +}; + +template <typename... T> +bool FormatWithNullSink(absl::string_view fmt, const T &... a) { + NullSink sink; + FormatArgImpl args[] = {FormatArgImpl(a)...}; + return FormatUntyped(&sink, UntypedFormatSpecImpl(fmt), absl::MakeSpan(args)); +} + +TEST_F(FormatConvertTest, ExtremeWidthPrecision) { + for (const char *fmt : {"f"}) { + for (double d : {1e-100, 1.0, 1e100}) { + constexpr int max = std::numeric_limits<int>::max(); + EXPECT_TRUE(FormatWithNullSink(std::string("%.*") + fmt, max, d)); + EXPECT_TRUE(FormatWithNullSink(std::string("%1.*") + fmt, max, d)); + EXPECT_TRUE(FormatWithNullSink(std::string("%*") + fmt, max, d)); + EXPECT_TRUE(FormatWithNullSink(std::string("%*.*") + fmt, max, max, d)); + } + } } TEST_F(FormatConvertTest, LongDouble) { - const char *const kFormats[] = {"%", "%.3", "%8.5", "%9", +#ifdef _MSC_VER + // MSVC has a different rounding policy than us so we can't test our + // implementation against the native one there. + return; +#endif // _MSC_VER + const NativePrintfTraits &native_traits = VerifyNativeImplementation(); + const char *const kFormats[] = {"%", "%.3", "%8.5", "%9", "%.5000", "%.60", "%+", "% ", "%-10"}; - // This value is not representable in double, but it is in long double that - // uses the extended format. - // This is to verify that we are not truncating the value mistakenly through a - // double. - long double very_precise = 10000000000000000.25L; - std::vector<long double> doubles = { 0.0, -0.0, - very_precise, - 1 / very_precise, std::numeric_limits<long double>::max(), -std::numeric_limits<long double>::max(), std::numeric_limits<long double>::min(), @@ -556,28 +1074,65 @@ TEST_F(FormatConvertTest, LongDouble) { std::numeric_limits<long double>::infinity(), -std::numeric_limits<long double>::infinity()}; + for (long double base : {1.L, 12.L, 123.L, 1234.L, 12345.L, 123456.L, + 1234567.L, 12345678.L, 123456789.L, 1234567890.L, + 12345678901.L, 123456789012.L, 1234567890123.L, + // This value is not representable in double, but it + // is in long double that uses the extended format. + // This is to verify that we are not truncating the + // value mistakenly through a double. + 10000000000000000.25L}) { + for (int exp : {-1000, -500, 0, 500, 1000}) { + for (int sign : {1, -1}) { + doubles.push_back(sign * std::ldexp(base, exp)); + doubles.push_back(sign / std::ldexp(base, exp)); + } + } + } + + // Regression tests + // + // Using a string literal because not all platforms support hex literals or it + // might be out of range. + doubles.push_back(std::strtold("-0xf.ffffffb5feafffbp-16324L", nullptr)); + for (const char *fmt : kFormats) { for (char f : {'f', 'F', // 'g', 'G', // 'a', 'A', // 'e', 'E'}) { std::string fmt_str = std::string(fmt) + 'L' + f; + + if (fmt == absl::string_view("%.5000") && f != 'f' && f != 'F' && + f != 'a' && f != 'A') { + // This particular test takes way too long with snprintf. + // Disable for the case we are not implementing natively. + continue; + } + + if (f == 'a' || f == 'A') { + if (!native_traits.hex_float_has_glibc_rounding || + !native_traits.hex_float_optimizes_leading_digit_bit_count) { + continue; + } + } + for (auto d : doubles) { FormatArgImpl arg(d); UntypedFormatSpecImpl format(fmt_str); // We use ASSERT_EQ here because failures are usually correlated and a // bug would print way too many failed expectations causing the test to // time out. - ASSERT_EQ(StrPrint(fmt_str.c_str(), d), - FormatPack(format, {&arg, 1})) + ASSERT_EQ(StrPrint(fmt_str.c_str(), d), FormatPack(format, {&arg, 1})) << fmt_str << " " << StrPrint("%.18Lg", d) << " " - << StrPrint("%.999Lf", d); + << StrPrint("%La", d) << " " << StrPrint("%.1080Lf", d); } } } } -TEST_F(FormatConvertTest, IntAsFloat) { +TEST_F(FormatConvertTest, IntAsDouble) { + const NativePrintfTraits &native_traits = VerifyNativeImplementation(); const int kMin = std::numeric_limits<int>::min(); const int kMax = std::numeric_limits<int>::max(); const int ia[] = { @@ -593,14 +1148,17 @@ TEST_F(FormatConvertTest, IntAsFloat) { const char *fmt; }; const double dx = static_cast<double>(fx); - const Expectation kExpect[] = { - { __LINE__, StrPrint("%f", dx), "%f" }, - { __LINE__, StrPrint("%12f", dx), "%12f" }, - { __LINE__, StrPrint("%.12f", dx), "%.12f" }, - { __LINE__, StrPrint("%12a", dx), "%12a" }, - { __LINE__, StrPrint("%.12a", dx), "%.12a" }, + std::vector<Expectation> expect = { + {__LINE__, StrPrint("%f", dx), "%f"}, + {__LINE__, StrPrint("%12f", dx), "%12f"}, + {__LINE__, StrPrint("%.12f", dx), "%.12f"}, + {__LINE__, StrPrint("%.12a", dx), "%.12a"}, }; - for (const Expectation &e : kExpect) { + if (native_traits.hex_float_uses_minimal_precision_when_not_specified) { + Expectation ex = {__LINE__, StrPrint("%12a", dx), "%12a"}; + expect.push_back(ex); + } + for (const Expectation &e : expect) { SCOPED_TRACE(e.line); SCOPED_TRACE(e.fmt); UntypedFormatSpecImpl format(e.fmt); @@ -645,6 +1203,25 @@ TEST_F(FormatConvertTest, ExpectedFailures) { EXPECT_TRUE(FormatFails("%*d", "")); } +// Sanity check to make sure that we are testing what we think we're testing on +// e.g. the x86_64+glibc platform. +TEST_F(FormatConvertTest, GlibcHasCorrectTraits) { +#if !defined(__GLIBC__) || !defined(__x86_64__) + return; +#endif + const NativePrintfTraits &native_traits = VerifyNativeImplementation(); + // If one of the following tests break then it is either because the above PP + // macro guards failed to exclude a new platform (likely) or because something + // has changed in the implemention of glibc sprintf float formatting behavior. + // If the latter, then the code that computes these flags needs to be + // revisited and/or possibly the StrFormat implementation. + EXPECT_TRUE(native_traits.hex_float_has_glibc_rounding); + EXPECT_TRUE(native_traits.hex_float_prefers_denormal_repr); + EXPECT_TRUE( + native_traits.hex_float_uses_minimal_precision_when_not_specified); + EXPECT_TRUE(native_traits.hex_float_optimizes_leading_digit_bit_count); +} + } // namespace } // namespace str_format_internal ABSL_NAMESPACE_END diff --git a/absl/strings/internal/str_format/extension.cc b/absl/strings/internal/str_format/extension.cc index 2e5bc2ce..bb0d96cf 100644 --- a/absl/strings/internal/str_format/extension.cc +++ b/absl/strings/internal/str_format/extension.cc @@ -33,16 +33,40 @@ std::string Flags::ToString() const { return s; } -bool FormatSinkImpl::PutPaddedString(string_view v, int w, int p, bool l) { +#define ABSL_INTERNAL_X_VAL(id) \ + constexpr absl::FormatConversionChar FormatConversionCharInternal::id; +ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, ) +#undef ABSL_INTERNAL_X_VAL +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr absl::FormatConversionChar FormatConversionCharInternal::kNone; + +#define ABSL_INTERNAL_CHAR_SET_CASE(c) \ + constexpr FormatConversionCharSet FormatConversionCharSetInternal::c; +ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_CHAR_SET_CASE, ) +#undef ABSL_INTERNAL_CHAR_SET_CASE + +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr FormatConversionCharSet FormatConversionCharSetInternal::kStar; +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr FormatConversionCharSet FormatConversionCharSetInternal::kIntegral; +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr FormatConversionCharSet FormatConversionCharSetInternal::kFloating; +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr FormatConversionCharSet FormatConversionCharSetInternal::kNumeric; +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr FormatConversionCharSet FormatConversionCharSetInternal::kPointer; + +bool FormatSinkImpl::PutPaddedString(string_view value, int width, + int precision, bool left) { size_t space_remaining = 0; - if (w >= 0) space_remaining = w; - size_t n = v.size(); - if (p >= 0) n = std::min(n, static_cast<size_t>(p)); - string_view shown(v.data(), n); + if (width >= 0) space_remaining = width; + size_t n = value.size(); + if (precision >= 0) n = std::min(n, static_cast<size_t>(precision)); + string_view shown(value.data(), n); space_remaining = Excess(shown.size(), space_remaining); - if (!l) Append(space_remaining, ' '); + if (!left) Append(space_remaining, ' '); Append(shown); - if (l) Append(space_remaining, ' '); + if (left) Append(space_remaining, ' '); return true; } diff --git a/absl/strings/internal/str_format/extension.h b/absl/strings/internal/str_format/extension.h index d1665753..a9b9e137 100644 --- a/absl/strings/internal/str_format/extension.h +++ b/absl/strings/internal/str_format/extension.h @@ -24,11 +24,16 @@ #include "absl/base/config.h" #include "absl/base/port.h" +#include "absl/meta/type_traits.h" #include "absl/strings/internal/str_format/output.h" #include "absl/strings/string_view.h" namespace absl { ABSL_NAMESPACE_BEGIN + +enum class FormatConversionChar : uint8_t; +enum class FormatConversionCharSet : uint64_t; + namespace str_format_internal { class FormatRawSinkImpl { @@ -102,7 +107,7 @@ class FormatSinkImpl { size_t size() const { return size_; } // Put 'v' to 'sink' with specified width, precision, and left flag. - bool PutPaddedString(string_view v, int w, int p, bool l); + bool PutPaddedString(string_view v, int width, int precision, bool left); template <typename T> T Wrap() { @@ -139,7 +144,7 @@ struct Flags { // clang-format off #define ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(X_VAL, X_SEP) \ /* text */ \ - X_VAL(c) X_SEP X_VAL(C) X_SEP X_VAL(s) X_SEP X_VAL(S) X_SEP \ + X_VAL(c) X_SEP X_VAL(s) X_SEP \ /* ints */ \ X_VAL(d) X_SEP X_VAL(i) X_SEP X_VAL(o) X_SEP \ X_VAL(u) X_SEP X_VAL(x) X_SEP X_VAL(X) X_SEP \ @@ -148,14 +153,39 @@ struct Flags { X_VAL(g) X_SEP X_VAL(G) X_SEP X_VAL(a) X_SEP X_VAL(A) X_SEP \ /* misc */ \ X_VAL(n) X_SEP X_VAL(p) +// clang-format on + +// This type should not be referenced, it exists only to provide labels +// internally that match the values declared in FormatConversionChar in +// str_format.h. This is meant to allow internal libraries to use the same +// declared interface type as the public interface +// (absl::StrFormatConversionChar) while keeping the definition in a public +// header. +// Internal libraries should use the form +// `FormatConversionCharInternal::c`, `FormatConversionCharInternal::kNone` for +// comparisons. Use in switch statements is not recommended due to a bug in how +// gcc 4.9 -Wswitch handles declared but undefined enums. +struct FormatConversionCharInternal { + FormatConversionCharInternal() = delete; -enum class FormatConversionChar : uint8_t { - c, C, s, S, // text + private: + // clang-format off + enum class Enum : uint8_t { + c, s, // text d, i, o, u, x, X, // int f, F, e, E, g, G, a, A, // float n, p, // misc - kNone, - none = kNone + kNone + }; + // clang-format on + public: +#define ABSL_INTERNAL_X_VAL(id) \ + static constexpr FormatConversionChar id = \ + static_cast<FormatConversionChar>(Enum::id); + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, ) +#undef ABSL_INTERNAL_X_VAL + static constexpr FormatConversionChar kNone = + static_cast<FormatConversionChar>(Enum::kNone); }; // clang-format on @@ -163,95 +193,56 @@ inline FormatConversionChar FormatConversionCharFromChar(char c) { switch (c) { #define ABSL_INTERNAL_X_VAL(id) \ case #id[0]: \ - return FormatConversionChar::id; + return FormatConversionCharInternal::id; ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, ) #undef ABSL_INTERNAL_X_VAL } - return FormatConversionChar::kNone; -} - -inline int FormatConversionCharRadix(FormatConversionChar c) { - switch (c) { - case FormatConversionChar::x: - case FormatConversionChar::X: - case FormatConversionChar::a: - case FormatConversionChar::A: - case FormatConversionChar::p: - return 16; - case FormatConversionChar::o: - return 8; - default: - return 10; - } + return FormatConversionCharInternal::kNone; } inline bool FormatConversionCharIsUpper(FormatConversionChar c) { - switch (c) { - case FormatConversionChar::X: - case FormatConversionChar::F: - case FormatConversionChar::E: - case FormatConversionChar::G: - case FormatConversionChar::A: - return true; - default: - return false; - } -} - -inline bool FormatConversionCharIsSigned(FormatConversionChar c) { - switch (c) { - case FormatConversionChar::d: - case FormatConversionChar::i: - return true; - default: - return false; - } -} - -inline bool FormatConversionCharIsIntegral(FormatConversionChar c) { - switch (c) { - case FormatConversionChar::d: - case FormatConversionChar::i: - case FormatConversionChar::u: - case FormatConversionChar::o: - case FormatConversionChar::x: - case FormatConversionChar::X: - return true; - default: - return false; + if (c == FormatConversionCharInternal::X || + c == FormatConversionCharInternal::F || + c == FormatConversionCharInternal::E || + c == FormatConversionCharInternal::G || + c == FormatConversionCharInternal::A) { + return true; + } else { + return false; } } inline bool FormatConversionCharIsFloat(FormatConversionChar c) { - switch (c) { - case FormatConversionChar::a: - case FormatConversionChar::e: - case FormatConversionChar::f: - case FormatConversionChar::g: - case FormatConversionChar::A: - case FormatConversionChar::E: - case FormatConversionChar::F: - case FormatConversionChar::G: - return true; - default: - return false; + if (c == FormatConversionCharInternal::a || + c == FormatConversionCharInternal::e || + c == FormatConversionCharInternal::f || + c == FormatConversionCharInternal::g || + c == FormatConversionCharInternal::A || + c == FormatConversionCharInternal::E || + c == FormatConversionCharInternal::F || + c == FormatConversionCharInternal::G) { + return true; + } else { + return false; } } inline char FormatConversionCharToChar(FormatConversionChar c) { - switch (c) { -#define ABSL_INTERNAL_X_VAL(e) \ - case FormatConversionChar::e: \ + if (c == FormatConversionCharInternal::kNone) { + return '\0'; + +#define ABSL_INTERNAL_X_VAL(e) \ + } else if (c == FormatConversionCharInternal::e) { \ return #e[0]; #define ABSL_INTERNAL_X_SEP - ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, - ABSL_INTERNAL_X_SEP) - case FormatConversionChar::kNone: - return '\0'; + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, + ABSL_INTERNAL_X_SEP) + } else { + return '\0'; + } + #undef ABSL_INTERNAL_X_VAL #undef ABSL_INTERNAL_X_SEP - } - return '\0'; } // The associated char. @@ -263,7 +254,7 @@ inline std::ostream& operator<<(std::ostream& os, FormatConversionChar v) { struct FormatConversionSpecImplFriend; -class FormatConversionSpec { +class FormatConversionSpecImpl { public: // Width and precison are not specified, no flags are set. bool is_basic() const { return flags_.basic; } @@ -276,7 +267,7 @@ class FormatConversionSpec { FormatConversionChar conversion_char() const { // Keep this field first in the struct . It generates better code when // accessing it when ConversionSpec is passed by value in registers. - static_assert(offsetof(FormatConversionSpec, conv_) == 0, ""); + static_assert(offsetof(FormatConversionSpecImpl, conv_) == 0, ""); return conv_; } @@ -287,41 +278,65 @@ class FormatConversionSpec { // negative value. int precision() const { return precision_; } - // Deprecated (use has_x_flag() instead). - Flags flags() const { return flags_; } - // Deprecated - FormatConversionChar conv() const { return conversion_char(); } + template <typename T> + T Wrap() { + return T(*this); + } private: friend struct str_format_internal::FormatConversionSpecImplFriend; - FormatConversionChar conv_ = FormatConversionChar::kNone; + FormatConversionChar conv_ = FormatConversionCharInternal::kNone; Flags flags_; int width_; int precision_; }; struct FormatConversionSpecImplFriend final { - static void SetFlags(Flags f, FormatConversionSpec* conv) { + static void SetFlags(Flags f, FormatConversionSpecImpl* conv) { conv->flags_ = f; } static void SetConversionChar(FormatConversionChar c, - FormatConversionSpec* conv) { + FormatConversionSpecImpl* conv) { conv->conv_ = c; } - static void SetWidth(int w, FormatConversionSpec* conv) { conv->width_ = w; } - static void SetPrecision(int p, FormatConversionSpec* conv) { + static void SetWidth(int w, FormatConversionSpecImpl* conv) { + conv->width_ = w; + } + static void SetPrecision(int p, FormatConversionSpecImpl* conv) { conv->precision_ = p; } - static std::string FlagsToString(const FormatConversionSpec& spec) { + static std::string FlagsToString(const FormatConversionSpecImpl& spec) { return spec.flags_.ToString(); } }; -constexpr uint64_t FormatConversionCharToConvValue(char conv) { +// Type safe OR operator. +// We need this for two reasons: +// 1. operator| on enums makes them decay to integers and the result is an +// integer. We need the result to stay as an enum. +// 2. We use "enum class" which would not work even if we accepted the decay. +constexpr FormatConversionCharSet FormatConversionCharSetUnion( + FormatConversionCharSet a) { + return a; +} + +template <typename... CharSet> +constexpr FormatConversionCharSet FormatConversionCharSetUnion( + FormatConversionCharSet a, CharSet... rest) { + return static_cast<FormatConversionCharSet>( + static_cast<uint64_t>(a) | + static_cast<uint64_t>(FormatConversionCharSetUnion(rest...))); +} + +constexpr uint64_t FormatConversionCharToConvInt(FormatConversionChar c) { + return uint64_t{1} << (1 + static_cast<uint8_t>(c)); +} + +constexpr uint64_t FormatConversionCharToConvInt(char conv) { return -#define ABSL_INTERNAL_CHAR_SET_CASE(c) \ - conv == #c[0] \ - ? (uint64_t{1} << (1 + static_cast<uint8_t>(FormatConversionChar::c))) \ +#define ABSL_INTERNAL_CHAR_SET_CASE(c) \ + conv == #c[0] \ + ? FormatConversionCharToConvInt(FormatConversionCharInternal::c) \ : ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_CHAR_SET_CASE, ) #undef ABSL_INTERNAL_CHAR_SET_CASE @@ -330,28 +345,29 @@ constexpr uint64_t FormatConversionCharToConvValue(char conv) { : 0; } -enum class FormatConversionCharSet : uint64_t { -#define ABSL_INTERNAL_CHAR_SET_CASE(c) \ - c = FormatConversionCharToConvValue(#c[0]), +constexpr FormatConversionCharSet FormatConversionCharToConvValue(char conv) { + return static_cast<FormatConversionCharSet>( + FormatConversionCharToConvInt(conv)); +} + +struct FormatConversionCharSetInternal { +#define ABSL_INTERNAL_CHAR_SET_CASE(c) \ + static constexpr FormatConversionCharSet c = \ + FormatConversionCharToConvValue(#c[0]); ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_CHAR_SET_CASE, ) #undef ABSL_INTERNAL_CHAR_SET_CASE // Used for width/precision '*' specification. - kStar = FormatConversionCharToConvValue('*'), - // Some predefined values: - kIntegral = d | i | u | o | x | X, - kFloating = a | e | f | g | A | E | F | G, - kNumeric = kIntegral | kFloating, - kString = s, - kPointer = p, - - // The following are deprecated - star = kStar, - integral = kIntegral, - floating = kFloating, - numeric = kNumeric, - string = kString, - pointer = kPointer + static constexpr FormatConversionCharSet kStar = + FormatConversionCharToConvValue('*'); + + static constexpr FormatConversionCharSet kIntegral = + FormatConversionCharSetUnion(d, i, u, o, x, X); + static constexpr FormatConversionCharSet kFloating = + FormatConversionCharSetUnion(a, e, f, g, A, E, F, G); + static constexpr FormatConversionCharSet kNumeric = + FormatConversionCharSetUnion(kIntegral, kFloating); + static constexpr FormatConversionCharSet kPointer = p; }; // Type safe OR operator. @@ -361,18 +377,29 @@ enum class FormatConversionCharSet : uint64_t { // 2. We use "enum class" which would not work even if we accepted the decay. constexpr FormatConversionCharSet operator|(FormatConversionCharSet a, FormatConversionCharSet b) { - return FormatConversionCharSet(static_cast<uint64_t>(a) | - static_cast<uint64_t>(b)); + return FormatConversionCharSetUnion(a, b); +} + +// Overloaded conversion functions to support absl::ParsedFormat. +// Get a conversion with a single character in it. +constexpr FormatConversionCharSet ToFormatConversionCharSet(char c) { + return static_cast<FormatConversionCharSet>( + FormatConversionCharToConvValue(c)); } // Get a conversion with a single character in it. -constexpr FormatConversionCharSet ConversionCharToConv(char c) { - return FormatConversionCharSet(FormatConversionCharToConvValue(c)); +constexpr FormatConversionCharSet ToFormatConversionCharSet( + FormatConversionCharSet c) { + return c; } +template <typename T> +void ToFormatConversionCharSet(T) = delete; + // Checks whether `c` exists in `set`. constexpr bool Contains(FormatConversionCharSet set, char c) { - return (static_cast<uint64_t>(set) & FormatConversionCharToConvValue(c)) != 0; + return (static_cast<uint64_t>(set) & + static_cast<uint64_t>(FormatConversionCharToConvValue(c))) != 0; } // Checks whether all the characters in `c` are contained in `set` @@ -382,31 +409,16 @@ constexpr bool Contains(FormatConversionCharSet set, static_cast<uint64_t>(c); } -// Return type of the AbslFormatConvert() functions. -// The FormatConversionCharSet template parameter is used to inform the -// framework of what conversion characters are supported by that -// AbslFormatConvert routine. -template <FormatConversionCharSet C> -struct FormatConvertResult { - static constexpr FormatConversionCharSet kConv = C; - bool value; -}; - -template <FormatConversionCharSet C> -constexpr FormatConversionCharSet FormatConvertResult<C>::kConv; +// Checks whether all the characters in `c` are contained in `set` +constexpr bool Contains(FormatConversionCharSet set, FormatConversionChar c) { + return (static_cast<uint64_t>(set) & FormatConversionCharToConvInt(c)) != 0; +} // Return capacity - used, clipped to a minimum of 0. inline size_t Excess(size_t used, size_t capacity) { return used < capacity ? capacity - used : 0; } -// Type alias for use during migration. -using ConversionChar = FormatConversionChar; -using ConversionSpec = FormatConversionSpec; -using Conv = FormatConversionCharSet; -template <FormatConversionCharSet C> -using ConvertResult = FormatConvertResult<C>; - } // namespace str_format_internal ABSL_NAMESPACE_END diff --git a/absl/strings/internal/str_format/extension_test.cc b/absl/strings/internal/str_format/extension_test.cc index 4e23fefb..1c93fdb1 100644 --- a/absl/strings/internal/str_format/extension_test.cc +++ b/absl/strings/internal/str_format/extension_test.cc @@ -19,9 +19,26 @@ #include <random> #include <string> +#include "gtest/gtest.h" #include "absl/strings/str_format.h" +#include "absl/strings/string_view.h" -#include "gtest/gtest.h" +namespace my_namespace { +class UserDefinedType { + public: + UserDefinedType() = default; + + void Append(absl::string_view str) { value_.append(str.data(), str.size()); } + const std::string& Value() const { return value_; } + + friend void AbslFormatFlush(UserDefinedType* x, absl::string_view str) { + x->Append(str); + } + + private: + std::string value_; +}; +} // namespace my_namespace namespace { @@ -63,4 +80,19 @@ TEST(FormatExtensionTest, SinkAppendChars) { EXPECT_EQ(actual, expected); } } + +TEST(FormatExtensionTest, VerifyEnumEquality) { +#define X_VAL(id) \ + EXPECT_EQ(absl::FormatConversionChar::id, \ + absl::str_format_internal::FormatConversionCharInternal::id); + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(X_VAL, ); +#undef X_VAL + +#define X_VAL(id) \ + EXPECT_EQ(absl::FormatConversionCharSet::id, \ + absl::str_format_internal::FormatConversionCharSetInternal::id); + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(X_VAL, ); +#undef X_VAL +} + } // namespace diff --git a/absl/strings/internal/str_format/float_conversion.cc b/absl/strings/internal/str_format/float_conversion.cc index d4c647c3..20aeada5 100644 --- a/absl/strings/internal/str_format/float_conversion.cc +++ b/absl/strings/internal/str_format/float_conversion.cc @@ -1,12 +1,23 @@ #include "absl/strings/internal/str_format/float_conversion.h" #include <string.h> + #include <algorithm> #include <cassert> #include <cmath> +#include <limits> #include <string> +#include "absl/base/attributes.h" #include "absl/base/config.h" +#include "absl/base/internal/bits.h" +#include "absl/base/optimization.h" +#include "absl/functional/function_ref.h" +#include "absl/meta/type_traits.h" +#include "absl/numeric/int128.h" +#include "absl/strings/numbers.h" +#include "absl/types/optional.h" +#include "absl/types/span.h" namespace absl { ABSL_NAMESPACE_BEGIN @@ -14,13 +25,901 @@ namespace str_format_internal { namespace { -char *CopyStringTo(string_view v, char *out) { +// The code below wants to avoid heap allocations. +// To do so it needs to allocate memory on the stack. +// `StackArray` will allocate memory on the stack in the form of a uint32_t +// array and call the provided callback with said memory. +// It will allocate memory in increments of 512 bytes. We could allocate the +// largest needed unconditionally, but that is more than we need in most of +// cases. This way we use less stack in the common cases. +class StackArray { + using Func = absl::FunctionRef<void(absl::Span<uint32_t>)>; + static constexpr size_t kStep = 512 / sizeof(uint32_t); + // 5 steps is 2560 bytes, which is enough to hold a long double with the + // largest/smallest exponents. + // The operations below will static_assert their particular maximum. + static constexpr size_t kNumSteps = 5; + + // We do not want this function to be inlined. + // Otherwise the caller will allocate the stack space unnecessarily for all + // the variants even though it only calls one. + template <size_t steps> + ABSL_ATTRIBUTE_NOINLINE static void RunWithCapacityImpl(Func f) { + uint32_t values[steps * kStep]{}; + f(absl::MakeSpan(values)); + } + + public: + static constexpr size_t kMaxCapacity = kStep * kNumSteps; + + static void RunWithCapacity(size_t capacity, Func f) { + assert(capacity <= kMaxCapacity); + const size_t step = (capacity + kStep - 1) / kStep; + assert(step <= kNumSteps); + switch (step) { + case 1: + return RunWithCapacityImpl<1>(f); + case 2: + return RunWithCapacityImpl<2>(f); + case 3: + return RunWithCapacityImpl<3>(f); + case 4: + return RunWithCapacityImpl<4>(f); + case 5: + return RunWithCapacityImpl<5>(f); + } + + assert(false && "Invalid capacity"); + } +}; + +// Calculates `10 * (*v) + carry` and stores the result in `*v` and returns +// the carry. +template <typename Int> +inline Int MultiplyBy10WithCarry(Int *v, Int carry) { + using BiggerInt = absl::conditional_t<sizeof(Int) == 4, uint64_t, uint128>; + BiggerInt tmp = 10 * static_cast<BiggerInt>(*v) + carry; + *v = static_cast<Int>(tmp); + return static_cast<Int>(tmp >> (sizeof(Int) * 8)); +} + +// Calculates `(2^64 * carry + *v) / 10`. +// Stores the quotient in `*v` and returns the remainder. +// Requires: `0 <= carry <= 9` +inline uint64_t DivideBy10WithCarry(uint64_t *v, uint64_t carry) { + constexpr uint64_t divisor = 10; + // 2^64 / divisor = chunk_quotient + chunk_remainder / divisor + constexpr uint64_t chunk_quotient = (uint64_t{1} << 63) / (divisor / 2); + constexpr uint64_t chunk_remainder = uint64_t{} - chunk_quotient * divisor; + + const uint64_t mod = *v % divisor; + const uint64_t next_carry = chunk_remainder * carry + mod; + *v = *v / divisor + carry * chunk_quotient + next_carry / divisor; + return next_carry % divisor; +} + +// Generates the decimal representation for an integer of the form `v * 2^exp`, +// where `v` and `exp` are both positive integers. +// It generates the digits from the left (ie the most significant digit first) +// to allow for direct printing into the sink. +// +// Requires `0 <= exp` and `exp <= numeric_limits<long double>::max_exponent`. +class BinaryToDecimal { + static constexpr int ChunksNeeded(int exp) { + // We will left shift a uint128 by `exp` bits, so we need `128+exp` total + // bits. Round up to 32. + // See constructor for details about adding `10%` to the value. + return (128 + exp + 31) / 32 * 11 / 10; + } + + public: + // Run the conversion for `v * 2^exp` and call `f(binary_to_decimal)`. + // This function will allocate enough stack space to perform the conversion. + static void RunConversion(uint128 v, int exp, + absl::FunctionRef<void(BinaryToDecimal)> f) { + assert(exp > 0); + assert(exp <= std::numeric_limits<long double>::max_exponent); + static_assert( + StackArray::kMaxCapacity >= + ChunksNeeded(std::numeric_limits<long double>::max_exponent), + ""); + + StackArray::RunWithCapacity( + ChunksNeeded(exp), + [=](absl::Span<uint32_t> input) { f(BinaryToDecimal(input, v, exp)); }); + } + + int TotalDigits() const { + return static_cast<int>((decimal_end_ - decimal_start_) * kDigitsPerChunk + + CurrentDigits().size()); + } + + // See the current block of digits. + absl::string_view CurrentDigits() const { + return absl::string_view(digits_ + kDigitsPerChunk - size_, size_); + } + + // Advance the current view of digits. + // Returns `false` when no more digits are available. + bool AdvanceDigits() { + if (decimal_start_ >= decimal_end_) return false; + + uint32_t w = data_[decimal_start_++]; + for (size_ = 0; size_ < kDigitsPerChunk; w /= 10) { + digits_[kDigitsPerChunk - ++size_] = w % 10 + '0'; + } + return true; + } + + private: + BinaryToDecimal(absl::Span<uint32_t> data, uint128 v, int exp) : data_(data) { + // We need to print the digits directly into the sink object without + // buffering them all first. To do this we need two things: + // - to know the total number of digits to do padding when necessary + // - to generate the decimal digits from the left. + // + // In order to do this, we do a two pass conversion. + // On the first pass we convert the binary representation of the value into + // a decimal representation in which each uint32_t chunk holds up to 9 + // decimal digits. In the second pass we take each decimal-holding-uint32_t + // value and generate the ascii decimal digits into `digits_`. + // + // The binary and decimal representations actually share the same memory + // region. As we go converting the chunks from binary to decimal we free + // them up and reuse them for the decimal representation. One caveat is that + // the decimal representation is around 7% less efficient in space than the + // binary one. We allocate an extra 10% memory to account for this. See + // ChunksNeeded for this calculation. + int chunk_index = exp / 32; + decimal_start_ = decimal_end_ = ChunksNeeded(exp); + const int offset = exp % 32; + // Left shift v by exp bits. + data_[chunk_index] = static_cast<uint32_t>(v << offset); + for (v >>= (32 - offset); v; v >>= 32) + data_[++chunk_index] = static_cast<uint32_t>(v); + + while (chunk_index >= 0) { + // While we have more than one chunk available, go in steps of 1e9. + // `data_[chunk_index]` holds the highest non-zero binary chunk, so keep + // the variable updated. + uint32_t carry = 0; + for (int i = chunk_index; i >= 0; --i) { + uint64_t tmp = uint64_t{data_[i]} + (uint64_t{carry} << 32); + data_[i] = static_cast<uint32_t>(tmp / uint64_t{1000000000}); + carry = static_cast<uint32_t>(tmp % uint64_t{1000000000}); + } + + // If the highest chunk is now empty, remove it from view. + if (data_[chunk_index] == 0) --chunk_index; + + --decimal_start_; + assert(decimal_start_ != chunk_index); + data_[decimal_start_] = carry; + } + + // Fill the first set of digits. The first chunk might not be complete, so + // handle differently. + for (uint32_t first = data_[decimal_start_++]; first != 0; first /= 10) { + digits_[kDigitsPerChunk - ++size_] = first % 10 + '0'; + } + } + + private: + static constexpr int kDigitsPerChunk = 9; + + int decimal_start_; + int decimal_end_; + + char digits_[kDigitsPerChunk]; + int size_ = 0; + + absl::Span<uint32_t> data_; +}; + +// Converts a value of the form `x * 2^-exp` into a sequence of decimal digits. +// Requires `-exp < 0` and +// `-exp >= limits<long double>::min_exponent - limits<long double>::digits`. +class FractionalDigitGenerator { + public: + // Run the conversion for `v * 2^exp` and call `f(generator)`. + // This function will allocate enough stack space to perform the conversion. + static void RunConversion( + uint128 v, int exp, absl::FunctionRef<void(FractionalDigitGenerator)> f) { + using Limits = std::numeric_limits<long double>; + assert(-exp < 0); + assert(-exp >= Limits::min_exponent - 128); + static_assert(StackArray::kMaxCapacity >= + (Limits::digits + 128 - Limits::min_exponent + 31) / 32, + ""); + StackArray::RunWithCapacity((Limits::digits + exp + 31) / 32, + [=](absl::Span<uint32_t> input) { + f(FractionalDigitGenerator(input, v, exp)); + }); + } + + // Returns true if there are any more non-zero digits left. + bool HasMoreDigits() const { return next_digit_ != 0 || chunk_index_ >= 0; } + + // Returns true if the remainder digits are greater than 5000... + bool IsGreaterThanHalf() const { + return next_digit_ > 5 || (next_digit_ == 5 && chunk_index_ >= 0); + } + // Returns true if the remainder digits are exactly 5000... + bool IsExactlyHalf() const { return next_digit_ == 5 && chunk_index_ < 0; } + + struct Digits { + int digit_before_nine; + int num_nines; + }; + + // Get the next set of digits. + // They are composed by a non-9 digit followed by a runs of zero or more 9s. + Digits GetDigits() { + Digits digits{next_digit_, 0}; + + next_digit_ = GetOneDigit(); + while (next_digit_ == 9) { + ++digits.num_nines; + next_digit_ = GetOneDigit(); + } + + return digits; + } + + private: + // Return the next digit. + int GetOneDigit() { + if (chunk_index_ < 0) return 0; + + uint32_t carry = 0; + for (int i = chunk_index_; i >= 0; --i) { + carry = MultiplyBy10WithCarry(&data_[i], carry); + } + // If the lowest chunk is now empty, remove it from view. + if (data_[chunk_index_] == 0) --chunk_index_; + return carry; + } + + FractionalDigitGenerator(absl::Span<uint32_t> data, uint128 v, int exp) + : chunk_index_(exp / 32), data_(data) { + const int offset = exp % 32; + // Right shift `v` by `exp` bits. + data_[chunk_index_] = static_cast<uint32_t>(v << (32 - offset)); + v >>= offset; + // Make sure we don't overflow the data. We already calculated that + // non-zero bits fit, so we might not have space for leading zero bits. + for (int pos = chunk_index_; v; v >>= 32) + data_[--pos] = static_cast<uint32_t>(v); + + // Fill next_digit_, as GetDigits expects it to be populated always. + next_digit_ = GetOneDigit(); + } + + int next_digit_; + int chunk_index_; + absl::Span<uint32_t> data_; +}; + +// Count the number of leading zero bits. +int LeadingZeros(uint64_t v) { return base_internal::CountLeadingZeros64(v); } +int LeadingZeros(uint128 v) { + auto high = static_cast<uint64_t>(v >> 64); + auto low = static_cast<uint64_t>(v); + return high != 0 ? base_internal::CountLeadingZeros64(high) + : 64 + base_internal::CountLeadingZeros64(low); +} + +// Round up the text digits starting at `p`. +// The buffer must have an extra digit that is known to not need rounding. +// This is done below by having an extra '0' digit on the left. +void RoundUp(char *p) { + while (*p == '9' || *p == '.') { + if (*p == '9') *p = '0'; + --p; + } + ++*p; +} + +// Check the previous digit and round up or down to follow the round-to-even +// policy. +void RoundToEven(char *p) { + if (*p == '.') --p; + if (*p % 2 == 1) RoundUp(p); +} + +// Simple integral decimal digit printing for values that fit in 64-bits. +// Returns the pointer to the last written digit. +char *PrintIntegralDigitsFromRightFast(uint64_t v, char *p) { + do { + *--p = DivideBy10WithCarry(&v, 0) + '0'; + } while (v != 0); + return p; +} + +// Simple integral decimal digit printing for values that fit in 128-bits. +// Returns the pointer to the last written digit. +char *PrintIntegralDigitsFromRightFast(uint128 v, char *p) { + auto high = static_cast<uint64_t>(v >> 64); + auto low = static_cast<uint64_t>(v); + + while (high != 0) { + uint64_t carry = DivideBy10WithCarry(&high, 0); + carry = DivideBy10WithCarry(&low, carry); + *--p = carry + '0'; + } + return PrintIntegralDigitsFromRightFast(low, p); +} + +// Simple fractional decimal digit printing for values that fir in 64-bits after +// shifting. +// Performs rounding if necessary to fit within `precision`. +// Returns the pointer to one after the last character written. +char *PrintFractionalDigitsFast(uint64_t v, char *start, int exp, + int precision) { + char *p = start; + v <<= (64 - exp); + while (precision > 0) { + if (!v) return p; + *p++ = MultiplyBy10WithCarry(&v, uint64_t{0}) + '0'; + --precision; + } + + // We need to round. + if (v < 0x8000000000000000) { + // We round down, so nothing to do. + } else if (v > 0x8000000000000000) { + // We round up. + RoundUp(p - 1); + } else { + RoundToEven(p - 1); + } + + assert(precision == 0); + // Precision can only be zero here. + return p; +} + +// Simple fractional decimal digit printing for values that fir in 128-bits +// after shifting. +// Performs rounding if necessary to fit within `precision`. +// Returns the pointer to one after the last character written. +char *PrintFractionalDigitsFast(uint128 v, char *start, int exp, + int precision) { + char *p = start; + v <<= (128 - exp); + auto high = static_cast<uint64_t>(v >> 64); + auto low = static_cast<uint64_t>(v); + + // While we have digits to print and `low` is not empty, do the long + // multiplication. + while (precision > 0 && low != 0) { + uint64_t carry = MultiplyBy10WithCarry(&low, uint64_t{0}); + carry = MultiplyBy10WithCarry(&high, carry); + + *p++ = carry + '0'; + --precision; + } + + // Now `low` is empty, so use a faster approach for the rest of the digits. + // This block is pretty much the same as the main loop for the 64-bit case + // above. + while (precision > 0) { + if (!high) return p; + *p++ = MultiplyBy10WithCarry(&high, uint64_t{0}) + '0'; + --precision; + } + + // We need to round. + if (high < 0x8000000000000000) { + // We round down, so nothing to do. + } else if (high > 0x8000000000000000 || low != 0) { + // We round up. + RoundUp(p - 1); + } else { + RoundToEven(p - 1); + } + + assert(precision == 0); + // Precision can only be zero here. + return p; +} + +struct FormatState { + char sign_char; + int precision; + const FormatConversionSpecImpl &conv; + FormatSinkImpl *sink; + + // In `alt` mode (flag #) we keep the `.` even if there are no fractional + // digits. In non-alt mode, we strip it. + bool ShouldPrintDot() const { return precision != 0 || conv.has_alt_flag(); } +}; + +struct Padding { + int left_spaces; + int zeros; + int right_spaces; +}; + +Padding ExtraWidthToPadding(size_t total_size, const FormatState &state) { + if (state.conv.width() < 0 || + static_cast<size_t>(state.conv.width()) <= total_size) { + return {0, 0, 0}; + } + int missing_chars = state.conv.width() - total_size; + if (state.conv.has_left_flag()) { + return {0, 0, missing_chars}; + } else if (state.conv.has_zero_flag()) { + return {0, missing_chars, 0}; + } else { + return {missing_chars, 0, 0}; + } +} + +void FinalPrint(const FormatState &state, absl::string_view data, + int padding_offset, int trailing_zeros, + absl::string_view data_postfix) { + if (state.conv.width() < 0) { + // No width specified. Fast-path. + if (state.sign_char != '\0') state.sink->Append(1, state.sign_char); + state.sink->Append(data); + state.sink->Append(trailing_zeros, '0'); + state.sink->Append(data_postfix); + return; + } + + auto padding = ExtraWidthToPadding((state.sign_char != '\0' ? 1 : 0) + + data.size() + data_postfix.size() + + static_cast<size_t>(trailing_zeros), + state); + + state.sink->Append(padding.left_spaces, ' '); + if (state.sign_char != '\0') state.sink->Append(1, state.sign_char); + // Padding in general needs to be inserted somewhere in the middle of `data`. + state.sink->Append(data.substr(0, padding_offset)); + state.sink->Append(padding.zeros, '0'); + state.sink->Append(data.substr(padding_offset)); + state.sink->Append(trailing_zeros, '0'); + state.sink->Append(data_postfix); + state.sink->Append(padding.right_spaces, ' '); +} + +// Fastpath %f formatter for when the shifted value fits in a simple integral +// type. +// Prints `v*2^exp` with the options from `state`. +template <typename Int> +void FormatFFast(Int v, int exp, const FormatState &state) { + constexpr int input_bits = sizeof(Int) * 8; + + static constexpr size_t integral_size = + /* in case we need to round up an extra digit */ 1 + + /* decimal digits for uint128 */ 40 + 1; + char buffer[integral_size + /* . */ 1 + /* max digits uint128 */ 128]; + buffer[integral_size] = '.'; + char *const integral_digits_end = buffer + integral_size; + char *integral_digits_start; + char *const fractional_digits_start = buffer + integral_size + 1; + char *fractional_digits_end = fractional_digits_start; + + if (exp >= 0) { + const int total_bits = input_bits - LeadingZeros(v) + exp; + integral_digits_start = + total_bits <= 64 + ? PrintIntegralDigitsFromRightFast(static_cast<uint64_t>(v) << exp, + integral_digits_end) + : PrintIntegralDigitsFromRightFast(static_cast<uint128>(v) << exp, + integral_digits_end); + } else { + exp = -exp; + + integral_digits_start = PrintIntegralDigitsFromRightFast( + exp < input_bits ? v >> exp : 0, integral_digits_end); + // PrintFractionalDigits may pull a carried 1 all the way up through the + // integral portion. + integral_digits_start[-1] = '0'; + + fractional_digits_end = + exp <= 64 ? PrintFractionalDigitsFast(v, fractional_digits_start, exp, + state.precision) + : PrintFractionalDigitsFast(static_cast<uint128>(v), + fractional_digits_start, exp, + state.precision); + // There was a carry, so include the first digit too. + if (integral_digits_start[-1] != '0') --integral_digits_start; + } + + size_t size = fractional_digits_end - integral_digits_start; + + // In `alt` mode (flag #) we keep the `.` even if there are no fractional + // digits. In non-alt mode, we strip it. + if (!state.ShouldPrintDot()) --size; + FinalPrint(state, absl::string_view(integral_digits_start, size), + /*padding_offset=*/0, + static_cast<int>(state.precision - (fractional_digits_end - + fractional_digits_start)), + /*data_postfix=*/""); +} + +// Slow %f formatter for when the shifted value does not fit in a uint128, and +// `exp > 0`. +// Prints `v*2^exp` with the options from `state`. +// This one is guaranteed to not have fractional digits, so we don't have to +// worry about anything after the `.`. +void FormatFPositiveExpSlow(uint128 v, int exp, const FormatState &state) { + BinaryToDecimal::RunConversion(v, exp, [&](BinaryToDecimal btd) { + const size_t total_digits = + btd.TotalDigits() + + (state.ShouldPrintDot() ? static_cast<size_t>(state.precision) + 1 : 0); + + const auto padding = ExtraWidthToPadding( + total_digits + (state.sign_char != '\0' ? 1 : 0), state); + + state.sink->Append(padding.left_spaces, ' '); + if (state.sign_char != '\0') state.sink->Append(1, state.sign_char); + state.sink->Append(padding.zeros, '0'); + + do { + state.sink->Append(btd.CurrentDigits()); + } while (btd.AdvanceDigits()); + + if (state.ShouldPrintDot()) state.sink->Append(1, '.'); + state.sink->Append(state.precision, '0'); + state.sink->Append(padding.right_spaces, ' '); + }); +} + +// Slow %f formatter for when the shifted value does not fit in a uint128, and +// `exp < 0`. +// Prints `v*2^exp` with the options from `state`. +// This one is guaranteed to be < 1.0, so we don't have to worry about integral +// digits. +void FormatFNegativeExpSlow(uint128 v, int exp, const FormatState &state) { + const size_t total_digits = + /* 0 */ 1 + + (state.ShouldPrintDot() ? static_cast<size_t>(state.precision) + 1 : 0); + auto padding = + ExtraWidthToPadding(total_digits + (state.sign_char ? 1 : 0), state); + padding.zeros += 1; + state.sink->Append(padding.left_spaces, ' '); + if (state.sign_char != '\0') state.sink->Append(1, state.sign_char); + state.sink->Append(padding.zeros, '0'); + + if (state.ShouldPrintDot()) state.sink->Append(1, '.'); + + // Print digits + int digits_to_go = state.precision; + + FractionalDigitGenerator::RunConversion( + v, exp, [&](FractionalDigitGenerator digit_gen) { + // There are no digits to print here. + if (state.precision == 0) return; + + // We go one digit at a time, while keeping track of runs of nines. + // The runs of nines are used to perform rounding when necessary. + + while (digits_to_go > 0 && digit_gen.HasMoreDigits()) { + auto digits = digit_gen.GetDigits(); + + // Now we have a digit and a run of nines. + // See if we can print them all. + if (digits.num_nines + 1 < digits_to_go) { + // We don't have to round yet, so print them. + state.sink->Append(1, digits.digit_before_nine + '0'); + state.sink->Append(digits.num_nines, '9'); + digits_to_go -= digits.num_nines + 1; + + } else { + // We can't print all the nines, see where we have to truncate. + + bool round_up = false; + if (digits.num_nines + 1 > digits_to_go) { + // We round up at a nine. No need to print them. + round_up = true; + } else { + // We can fit all the nines, but truncate just after it. + if (digit_gen.IsGreaterThanHalf()) { + round_up = true; + } else if (digit_gen.IsExactlyHalf()) { + // Round to even + round_up = + digits.num_nines != 0 || digits.digit_before_nine % 2 == 1; + } + } + + if (round_up) { + state.sink->Append(1, digits.digit_before_nine + '1'); + --digits_to_go; + // The rest will be zeros. + } else { + state.sink->Append(1, digits.digit_before_nine + '0'); + state.sink->Append(digits_to_go - 1, '9'); + digits_to_go = 0; + } + return; + } + } + }); + + state.sink->Append(digits_to_go, '0'); + state.sink->Append(padding.right_spaces, ' '); +} + +template <typename Int> +void FormatF(Int mantissa, int exp, const FormatState &state) { + if (exp >= 0) { + const int total_bits = sizeof(Int) * 8 - LeadingZeros(mantissa) + exp; + + // Fallback to the slow stack-based approach if we can't do it in a 64 or + // 128 bit state. + if (ABSL_PREDICT_FALSE(total_bits > 128)) { + return FormatFPositiveExpSlow(mantissa, exp, state); + } + } else { + // Fallback to the slow stack-based approach if we can't do it in a 64 or + // 128 bit state. + if (ABSL_PREDICT_FALSE(exp < -128)) { + return FormatFNegativeExpSlow(mantissa, -exp, state); + } + } + return FormatFFast(mantissa, exp, state); +} + +// Grab the group of four bits (nibble) from `n`. E.g., nibble 1 corresponds to +// bits 4-7. +template <typename Int> +uint8_t GetNibble(Int n, int nibble_index) { + constexpr Int mask_low_nibble = Int{0xf}; + int shift = nibble_index * 4; + n &= mask_low_nibble << shift; + return static_cast<uint8_t>((n >> shift) & 0xf); +} + +// Add one to the given nibble, applying carry to higher nibbles. Returns true +// if overflow, false otherwise. +template <typename Int> +bool IncrementNibble(int nibble_index, Int *n) { + constexpr int kShift = sizeof(Int) * 8 - 1; + constexpr int kNumNibbles = sizeof(Int) * 8 / 4; + Int before = *n >> kShift; + // Here we essentially want to take the number 1 and move it into the requsted + // nibble, then add it to *n to effectively increment the nibble. However, + // ASan will complain if we try to shift the 1 beyond the limits of the Int, + // i.e., if the nibble_index is out of range. So therefore we check for this + // and if we are out of range we just add 0 which leaves *n unchanged, which + // seems like the reasonable thing to do in that case. + *n += ((nibble_index >= kNumNibbles) ? 0 : (Int{1} << (nibble_index * 4))); + Int after = *n >> kShift; + return (before && !after) || (nibble_index >= kNumNibbles); +} + +// Return a mask with 1's in the given nibble and all lower nibbles. +template <typename Int> +Int MaskUpToNibbleInclusive(int nibble_index) { + constexpr int kNumNibbles = sizeof(Int) * 8 / 4; + static const Int ones = ~Int{0}; + return ones >> std::max(0, 4 * (kNumNibbles - nibble_index - 1)); +} + +// Return a mask with 1's below the given nibble. +template <typename Int> +Int MaskUpToNibbleExclusive(int nibble_index) { + return nibble_index <= 0 ? 0 : MaskUpToNibbleInclusive<Int>(nibble_index - 1); +} + +template <typename Int> +Int MoveToNibble(uint8_t nibble, int nibble_index) { + return Int{nibble} << (4 * nibble_index); +} + +// Given mantissa size, find optimal # of mantissa bits to put in initial digit. +// +// In the hex representation we keep a single hex digit to the left of the dot. +// However, the question as to how many bits of the mantissa should be put into +// that hex digit in theory is arbitrary, but in practice it is optimal to +// choose based on the size of the mantissa. E.g., for a `double`, there are 53 +// mantissa bits, so that means that we should put 1 bit to the left of the dot, +// thereby leaving 52 bits to the right, which is evenly divisible by four and +// thus all fractional digits represent actual precision. For a `long double`, +// on the other hand, there are 64 bits of mantissa, thus we can use all four +// bits for the initial hex digit and still have a number left over (60) that is +// a multiple of four. Once again, the goal is to have all fractional digits +// represent real precision. +template <typename Float> +constexpr int HexFloatLeadingDigitSizeInBits() { + return std::numeric_limits<Float>::digits % 4 > 0 + ? std::numeric_limits<Float>::digits % 4 + : 4; +} + +// This function captures the rounding behavior of glibc for hex float +// representations. E.g. when rounding 0x1.ab800000 to a precision of .2 +// ("%.2a") glibc will round up because it rounds toward the even number (since +// 0xb is an odd number, it will round up to 0xc). However, when rounding at a +// point that is not followed by 800000..., it disregards the parity and rounds +// up if > 8 and rounds down if < 8. +template <typename Int> +bool HexFloatNeedsRoundUp(Int mantissa, int final_nibble_displayed, + uint8_t leading) { + // If the last nibble (hex digit) to be displayed is the lowest on in the + // mantissa then that means that we don't have any further nibbles to inform + // rounding, so don't round. + if (final_nibble_displayed <= 0) { + return false; + } + int rounding_nibble_idx = final_nibble_displayed - 1; + constexpr int kTotalNibbles = sizeof(Int) * 8 / 4; + assert(final_nibble_displayed <= kTotalNibbles); + Int mantissa_up_to_rounding_nibble_inclusive = + mantissa & MaskUpToNibbleInclusive<Int>(rounding_nibble_idx); + Int eight = MoveToNibble<Int>(8, rounding_nibble_idx); + if (mantissa_up_to_rounding_nibble_inclusive != eight) { + return mantissa_up_to_rounding_nibble_inclusive > eight; + } + // Nibble in question == 8. + uint8_t round_if_odd = (final_nibble_displayed == kTotalNibbles) + ? leading + : GetNibble(mantissa, final_nibble_displayed); + return round_if_odd % 2 == 1; +} + +// Stores values associated with a Float type needed by the FormatA +// implementation in order to avoid templatizing that function by the Float +// type. +struct HexFloatTypeParams { + template <typename Float> + explicit HexFloatTypeParams(Float) + : min_exponent(std::numeric_limits<Float>::min_exponent - 1), + leading_digit_size_bits(HexFloatLeadingDigitSizeInBits<Float>()) { + assert(leading_digit_size_bits >= 1 && leading_digit_size_bits <= 4); + } + + int min_exponent; + int leading_digit_size_bits; +}; + +// Hex Float Rounding. First check if we need to round; if so, then we do that +// by manipulating (incrementing) the mantissa, that way we can later print the +// mantissa digits by iterating through them in the same way regardless of +// whether a rounding happened. +template <typename Int> +void FormatARound(bool precision_specified, const FormatState &state, + uint8_t *leading, Int *mantissa, int *exp) { + constexpr int kTotalNibbles = sizeof(Int) * 8 / 4; + // Index of the last nibble that we could display given precision. + int final_nibble_displayed = + precision_specified ? std::max(0, (kTotalNibbles - state.precision)) : 0; + if (HexFloatNeedsRoundUp(*mantissa, final_nibble_displayed, *leading)) { + // Need to round up. + bool overflow = IncrementNibble(final_nibble_displayed, mantissa); + *leading += (overflow ? 1 : 0); + if (ABSL_PREDICT_FALSE(*leading > 15)) { + // We have overflowed the leading digit. This would mean that we would + // need two hex digits to the left of the dot, which is not allowed. So + // adjust the mantissa and exponent so that the result is always 1.0eXXX. + *leading = 1; + *mantissa = 0; + *exp += 4; + } + } + // Now that we have handled a possible round-up we can go ahead and zero out + // all the nibbles of the mantissa that we won't need. + if (precision_specified) { + *mantissa &= ~MaskUpToNibbleExclusive<Int>(final_nibble_displayed); + } +} + +template <typename Int> +void FormatANormalize(const HexFloatTypeParams float_traits, uint8_t *leading, + Int *mantissa, int *exp) { + constexpr int kIntBits = sizeof(Int) * 8; + static const Int kHighIntBit = Int{1} << (kIntBits - 1); + const int kLeadDigitBitsCount = float_traits.leading_digit_size_bits; + // Normalize mantissa so that highest bit set is in MSB position, unless we + // get interrupted by the exponent threshold. + while (*mantissa && !(*mantissa & kHighIntBit)) { + if (ABSL_PREDICT_FALSE(*exp - 1 < float_traits.min_exponent)) { + *mantissa >>= (float_traits.min_exponent - *exp); + *exp = float_traits.min_exponent; + return; + } + *mantissa <<= 1; + --*exp; + } + // Extract bits for leading digit then shift them away leaving the + // fractional part. + *leading = + static_cast<uint8_t>(*mantissa >> (kIntBits - kLeadDigitBitsCount)); + *exp -= (*mantissa != 0) ? kLeadDigitBitsCount : *exp; + *mantissa <<= kLeadDigitBitsCount; +} + +template <typename Int> +void FormatA(const HexFloatTypeParams float_traits, Int mantissa, int exp, + bool uppercase, const FormatState &state) { + // Int properties. + constexpr int kIntBits = sizeof(Int) * 8; + constexpr int kTotalNibbles = sizeof(Int) * 8 / 4; + // Did the user specify a precision explicitly? + const bool precision_specified = state.conv.precision() >= 0; + + // ========== Normalize/Denormalize ========== + exp += kIntBits; // make all digits fractional digits. + // This holds the (up to four) bits of leading digit, i.e., the '1' in the + // number 0x1.e6fp+2. It's always > 0 unless number is zero or denormal. + uint8_t leading = 0; + FormatANormalize(float_traits, &leading, &mantissa, &exp); + + // =============== Rounding ================== + // Check if we need to round; if so, then we do that by manipulating + // (incrementing) the mantissa before beginning to print characters. + FormatARound(precision_specified, state, &leading, &mantissa, &exp); + + // ============= Format Result =============== + // This buffer holds the "0x1.ab1de3" portion of "0x1.ab1de3pe+2". Compute the + // size with long double which is the largest of the floats. + constexpr size_t kBufSizeForHexFloatRepr = + 2 // 0x + + std::numeric_limits<long double>::digits / 4 // number of hex digits + + 1 // round up + + 1; // "." (dot) + char digits_buffer[kBufSizeForHexFloatRepr]; + char *digits_iter = digits_buffer; + const char *const digits = + static_cast<const char *>("0123456789ABCDEF0123456789abcdef") + + (uppercase ? 0 : 16); + + // =============== Hex Prefix ================ + *digits_iter++ = '0'; + *digits_iter++ = uppercase ? 'X' : 'x'; + + // ========== Non-Fractional Digit =========== + *digits_iter++ = digits[leading]; + + // ================== Dot ==================== + // There are three reasons we might need a dot. Keep in mind that, at this + // point, the mantissa holds only the fractional part. + if ((precision_specified && state.precision > 0) || + (!precision_specified && mantissa > 0) || state.conv.has_alt_flag()) { + *digits_iter++ = '.'; + } + + // ============ Fractional Digits ============ + int digits_emitted = 0; + while (mantissa > 0) { + *digits_iter++ = digits[GetNibble(mantissa, kTotalNibbles - 1)]; + mantissa <<= 4; + ++digits_emitted; + } + int trailing_zeros = + precision_specified ? state.precision - digits_emitted : 0; + assert(trailing_zeros >= 0); + auto digits_result = string_view(digits_buffer, digits_iter - digits_buffer); + + // =============== Exponent ================== + constexpr size_t kBufSizeForExpDecRepr = + numbers_internal::kFastToBufferSize // requred for FastIntToBuffer + + 1 // 'p' or 'P' + + 1; // '+' or '-' + char exp_buffer[kBufSizeForExpDecRepr]; + exp_buffer[0] = uppercase ? 'P' : 'p'; + exp_buffer[1] = exp >= 0 ? '+' : '-'; + numbers_internal::FastIntToBuffer(exp < 0 ? -exp : exp, exp_buffer + 2); + + // ============ Assemble Result ============== + FinalPrint(state, // + digits_result, // 0xN.NNN... + 2, // offset in `data` to start padding if needed. + trailing_zeros, // num remaining mantissa padding zeros + exp_buffer); // exponent +} + +char *CopyStringTo(absl::string_view v, char *out) { std::memcpy(out, v.data(), v.size()); return out + v.size(); } template <typename Float> -bool FallbackToSnprintf(const Float v, const ConversionSpec &conv, +bool FallbackToSnprintf(const Float v, const FormatConversionSpecImpl &conv, FormatSinkImpl *sink) { int w = conv.width() >= 0 ? conv.width() : 0; int p = conv.precision() >= 0 ? conv.precision() : -1; @@ -33,17 +932,17 @@ bool FallbackToSnprintf(const Float v, const ConversionSpec &conv, if (std::is_same<long double, Float>()) { *fp++ = 'L'; } - *fp++ = FormatConversionCharToChar(conv.conv()); + *fp++ = FormatConversionCharToChar(conv.conversion_char()); *fp = 0; assert(fp < fmt + sizeof(fmt)); } std::string space(512, '\0'); - string_view result; + absl::string_view result; while (true) { int n = snprintf(&space[0], space.size(), fmt, w, p, v); if (n < 0) return false; if (static_cast<size_t>(n) < space.size()) { - result = string_view(space.data(), n); + result = absl::string_view(space.data(), n); break; } space.resize(n + 1); @@ -96,21 +995,24 @@ enum class FormatStyle { Fixed, Precision }; // Otherwise, return false. template <typename Float> bool ConvertNonNumericFloats(char sign_char, Float v, - const ConversionSpec &conv, FormatSinkImpl *sink) { + const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink) { char text[4], *ptr = text; - if (sign_char) *ptr++ = sign_char; + if (sign_char != '\0') *ptr++ = sign_char; if (std::isnan(v)) { - ptr = std::copy_n(FormatConversionCharIsUpper(conv.conv()) ? "NAN" : "nan", - 3, ptr); + ptr = std::copy_n( + FormatConversionCharIsUpper(conv.conversion_char()) ? "NAN" : "nan", 3, + ptr); } else if (std::isinf(v)) { - ptr = std::copy_n(FormatConversionCharIsUpper(conv.conv()) ? "INF" : "inf", - 3, ptr); + ptr = std::copy_n( + FormatConversionCharIsUpper(conv.conversion_char()) ? "INF" : "inf", 3, + ptr); } else { return false; } return sink->PutPaddedString(string_view(text, ptr - text), conv.width(), -1, - conv.flags().left); + conv.has_left_flag()); } // Round up the last digit of the value. @@ -170,7 +1072,12 @@ constexpr bool CanFitMantissa() { template <typename Float> struct Decomposed { - Float mantissa; + using MantissaType = + absl::conditional_t<std::is_same<long double, Float>::value, uint128, + uint64_t>; + static_assert(std::numeric_limits<Float>::digits <= sizeof(MantissaType) * 8, + ""); + MantissaType mantissa; int exponent; }; @@ -181,7 +1088,8 @@ Decomposed<Float> Decompose(Float v) { Float m = std::frexp(v, &exp); m = std::ldexp(m, std::numeric_limits<Float>::digits); exp -= std::numeric_limits<Float>::digits; - return {m, exp}; + + return {static_cast<typename Decomposed<Float>::MantissaType>(m), exp}; } // Print 'digits' as decimal. @@ -350,31 +1258,32 @@ bool FloatToBuffer(Decomposed<Float> decomposed, int precision, Buffer *out, return false; } -void WriteBufferToSink(char sign_char, string_view str, - const ConversionSpec &conv, FormatSinkImpl *sink) { +void WriteBufferToSink(char sign_char, absl::string_view str, + const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink) { int left_spaces = 0, zeros = 0, right_spaces = 0; int missing_chars = conv.width() >= 0 ? std::max(conv.width() - static_cast<int>(str.size()) - static_cast<int>(sign_char != 0), 0) : 0; - if (conv.flags().left) { + if (conv.has_left_flag()) { right_spaces = missing_chars; - } else if (conv.flags().zero) { + } else if (conv.has_zero_flag()) { zeros = missing_chars; } else { left_spaces = missing_chars; } sink->Append(left_spaces, ' '); - if (sign_char) sink->Append(1, sign_char); + if (sign_char != '\0') sink->Append(1, sign_char); sink->Append(zeros, '0'); sink->Append(str); sink->Append(right_spaces, ' '); } template <typename Float> -bool FloatToSink(const Float v, const ConversionSpec &conv, +bool FloatToSink(const Float v, const FormatConversionSpecImpl &conv, FormatSinkImpl *sink) { // Print the sign or the sign column. Float abs_v = v; @@ -382,9 +1291,9 @@ bool FloatToSink(const Float v, const ConversionSpec &conv, if (std::signbit(abs_v)) { sign_char = '-'; abs_v = -abs_v; - } else if (conv.flags().show_pos) { + } else if (conv.has_show_pos_flag()) { sign_char = '+'; - } else if (conv.flags().sign_col) { + } else if (conv.has_sign_col_flag()) { sign_char = ' '; } @@ -401,89 +1310,92 @@ bool FloatToSink(const Float v, const ConversionSpec &conv, Buffer buffer; - switch (conv.conv()) { - case ConversionChar::f: - case ConversionChar::F: - if (!FloatToBuffer<FormatStyle::Fixed>(decomposed, precision, &buffer, - nullptr)) { - return FallbackToSnprintf(v, conv, sink); - } - if (!conv.flags().alt && buffer.back() == '.') buffer.pop_back(); - break; - - case ConversionChar::e: - case ConversionChar::E: - if (!FloatToBuffer<FormatStyle::Precision>(decomposed, precision, &buffer, - &exp)) { - return FallbackToSnprintf(v, conv, sink); - } - if (!conv.flags().alt && buffer.back() == '.') buffer.pop_back(); - PrintExponent(exp, FormatConversionCharIsUpper(conv.conv()) ? 'E' : 'e', - &buffer); - break; - - case ConversionChar::g: - case ConversionChar::G: - precision = std::max(0, precision - 1); - if (!FloatToBuffer<FormatStyle::Precision>(decomposed, precision, &buffer, - &exp)) { - return FallbackToSnprintf(v, conv, sink); - } - if (precision + 1 > exp && exp >= -4) { - if (exp < 0) { - // Have 1.23456, needs 0.00123456 - // Move the first digit - buffer.begin[1] = *buffer.begin; - // Add some zeros - for (; exp < -1; ++exp) *buffer.begin-- = '0'; - *buffer.begin-- = '.'; - *buffer.begin = '0'; - } else if (exp > 0) { - // Have 1.23456, needs 1234.56 - // Move the '.' exp positions to the right. - std::rotate(buffer.begin + 1, buffer.begin + 2, - buffer.begin + exp + 2); - } - exp = 0; - } - if (!conv.flags().alt) { - while (buffer.back() == '0') buffer.pop_back(); - if (buffer.back() == '.') buffer.pop_back(); - } - if (exp) { - PrintExponent(exp, FormatConversionCharIsUpper(conv.conv()) ? 'E' : 'e', - &buffer); - } - break; + FormatConversionChar c = conv.conversion_char(); - case ConversionChar::a: - case ConversionChar::A: + if (c == FormatConversionCharInternal::f || + c == FormatConversionCharInternal::F) { + FormatF(decomposed.mantissa, decomposed.exponent, + {sign_char, precision, conv, sink}); + return true; + } else if (c == FormatConversionCharInternal::e || + c == FormatConversionCharInternal::E) { + if (!FloatToBuffer<FormatStyle::Precision>(decomposed, precision, &buffer, + &exp)) { return FallbackToSnprintf(v, conv, sink); - - default: - return false; + } + if (!conv.has_alt_flag() && buffer.back() == '.') buffer.pop_back(); + PrintExponent( + exp, FormatConversionCharIsUpper(conv.conversion_char()) ? 'E' : 'e', + &buffer); + } else if (c == FormatConversionCharInternal::g || + c == FormatConversionCharInternal::G) { + precision = std::max(0, precision - 1); + if (!FloatToBuffer<FormatStyle::Precision>(decomposed, precision, &buffer, + &exp)) { + return FallbackToSnprintf(v, conv, sink); + } + if (precision + 1 > exp && exp >= -4) { + if (exp < 0) { + // Have 1.23456, needs 0.00123456 + // Move the first digit + buffer.begin[1] = *buffer.begin; + // Add some zeros + for (; exp < -1; ++exp) *buffer.begin-- = '0'; + *buffer.begin-- = '.'; + *buffer.begin = '0'; + } else if (exp > 0) { + // Have 1.23456, needs 1234.56 + // Move the '.' exp positions to the right. + std::rotate(buffer.begin + 1, buffer.begin + 2, buffer.begin + exp + 2); + } + exp = 0; + } + if (!conv.has_alt_flag()) { + while (buffer.back() == '0') buffer.pop_back(); + if (buffer.back() == '.') buffer.pop_back(); + } + if (exp) { + PrintExponent( + exp, FormatConversionCharIsUpper(conv.conversion_char()) ? 'E' : 'e', + &buffer); + } + } else if (c == FormatConversionCharInternal::a || + c == FormatConversionCharInternal::A) { + bool uppercase = (c == FormatConversionCharInternal::A); + FormatA(HexFloatTypeParams(Float{}), decomposed.mantissa, + decomposed.exponent, uppercase, {sign_char, precision, conv, sink}); + return true; + } else { + return false; } WriteBufferToSink(sign_char, - string_view(buffer.begin, buffer.end - buffer.begin), conv, - sink); + absl::string_view(buffer.begin, buffer.end - buffer.begin), + conv, sink); return true; } } // namespace -bool ConvertFloatImpl(long double v, const ConversionSpec &conv, +bool ConvertFloatImpl(long double v, const FormatConversionSpecImpl &conv, FormatSinkImpl *sink) { + if (std::numeric_limits<long double>::digits == + 2 * std::numeric_limits<double>::digits) { + // This is the `double-double` representation of `long double`. + // We do not handle it natively. Fallback to snprintf. + return FallbackToSnprintf(v, conv, sink); + } + return FloatToSink(v, conv, sink); } -bool ConvertFloatImpl(float v, const ConversionSpec &conv, +bool ConvertFloatImpl(float v, const FormatConversionSpecImpl &conv, FormatSinkImpl *sink) { - return FloatToSink(v, conv, sink); + return FloatToSink(static_cast<double>(v), conv, sink); } -bool ConvertFloatImpl(double v, const ConversionSpec &conv, +bool ConvertFloatImpl(double v, const FormatConversionSpecImpl &conv, FormatSinkImpl *sink) { return FloatToSink(v, conv, sink); } diff --git a/absl/strings/internal/str_format/float_conversion.h b/absl/strings/internal/str_format/float_conversion.h index 49a6a636..e78bc191 100644 --- a/absl/strings/internal/str_format/float_conversion.h +++ b/absl/strings/internal/str_format/float_conversion.h @@ -7,13 +7,13 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { -bool ConvertFloatImpl(float v, const ConversionSpec &conv, +bool ConvertFloatImpl(float v, const FormatConversionSpecImpl &conv, FormatSinkImpl *sink); -bool ConvertFloatImpl(double v, const ConversionSpec &conv, +bool ConvertFloatImpl(double v, const FormatConversionSpecImpl &conv, FormatSinkImpl *sink); -bool ConvertFloatImpl(long double v, const ConversionSpec &conv, +bool ConvertFloatImpl(long double v, const FormatConversionSpecImpl &conv, FormatSinkImpl *sink); } // namespace str_format_internal diff --git a/absl/strings/internal/str_format/output.h b/absl/strings/internal/str_format/output.h index 28b288b7..8030dae0 100644 --- a/absl/strings/internal/str_format/output.h +++ b/absl/strings/internal/str_format/output.h @@ -30,9 +30,6 @@ namespace absl { ABSL_NAMESPACE_BEGIN - -class Cord; - namespace str_format_internal { // RawSink implementation that writes into a char* buffer. @@ -77,12 +74,6 @@ inline void AbslFormatFlush(std::ostream* out, string_view s) { out->write(s.data(), s.size()); } -template <class AbslCord, typename = typename std::enable_if< - std::is_same<AbslCord, absl::Cord>::value>::type> -inline void AbslFormatFlush(AbslCord* out, string_view s) { - out->Append(s); -} - inline void AbslFormatFlush(FILERawSink* sink, string_view v) { sink->Write(v); } @@ -91,10 +82,11 @@ inline void AbslFormatFlush(BufferRawSink* sink, string_view v) { sink->Write(v); } +// This is a SFINAE to get a better compiler error message when the type +// is not supported. template <typename T> -auto InvokeFlush(T* out, string_view s) - -> decltype(str_format_internal::AbslFormatFlush(out, s)) { - str_format_internal::AbslFormatFlush(out, s); +auto InvokeFlush(T* out, string_view s) -> decltype(AbslFormatFlush(out, s)) { + AbslFormatFlush(out, s); } } // namespace str_format_internal diff --git a/absl/strings/internal/str_format/output_test.cc b/absl/strings/internal/str_format/output_test.cc index e54e6f70..ce2e91a0 100644 --- a/absl/strings/internal/str_format/output_test.cc +++ b/absl/strings/internal/str_format/output_test.cc @@ -19,6 +19,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "absl/strings/cord.h" namespace absl { ABSL_NAMESPACE_BEGIN @@ -37,6 +38,12 @@ TEST(InvokeFlush, Stream) { EXPECT_EQ(str.str(), "ABCDEF"); } +TEST(InvokeFlush, Cord) { + absl::Cord str("ABC"); + str_format_internal::InvokeFlush(&str, "DEF"); + EXPECT_EQ(str, "ABCDEF"); +} + TEST(BufferRawSink, Limits) { char buf[16]; { @@ -70,4 +77,3 @@ TEST(BufferRawSink, Limits) { } // namespace ABSL_NAMESPACE_END } // namespace absl - diff --git a/absl/strings/internal/str_format/parser.cc b/absl/strings/internal/str_format/parser.cc index aab68db9..cc55dfa9 100644 --- a/absl/strings/internal/str_format/parser.cc +++ b/absl/strings/internal/str_format/parser.cc @@ -17,7 +17,7 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { -using CC = ConversionChar; +using CC = FormatConversionCharInternal; using LM = LengthMod; ABSL_CONST_INIT const ConvTag kTags[256] = { @@ -29,9 +29,9 @@ ABSL_CONST_INIT const ConvTag kTags[256] = { {}, {}, {}, {}, {}, {}, {}, {}, // 28-2f {}, {}, {}, {}, {}, {}, {}, {}, // 30-37 {}, {}, {}, {}, {}, {}, {}, {}, // 38-3f - {}, CC::A, {}, CC::C, {}, CC::E, CC::F, CC::G, // @ABCDEFG + {}, CC::A, {}, {}, {}, CC::E, CC::F, CC::G, // @ABCDEFG {}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO - {}, {}, {}, CC::S, {}, {}, {}, {}, // PQRSTUVW + {}, {}, {}, {}, {}, {}, {}, {}, // PQRSTUVW CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_ {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno @@ -296,15 +296,17 @@ struct ParsedFormatBase::ParsedFormatConsumer { char* data_pos; }; -ParsedFormatBase::ParsedFormatBase(string_view format, bool allow_ignored, - std::initializer_list<Conv> convs) +ParsedFormatBase::ParsedFormatBase( + string_view format, bool allow_ignored, + std::initializer_list<FormatConversionCharSet> convs) : data_(format.empty() ? nullptr : new char[format.size()]) { has_error_ = !ParseFormatString(format, ParsedFormatConsumer(this)) || !MatchesConversions(allow_ignored, convs); } bool ParsedFormatBase::MatchesConversions( - bool allow_ignored, std::initializer_list<Conv> convs) const { + bool allow_ignored, + std::initializer_list<FormatConversionCharSet> convs) const { std::unordered_set<int> used; auto add_if_valid_conv = [&](int pos, char c) { if (static_cast<size_t>(pos) > convs.size() || diff --git a/absl/strings/internal/str_format/parser.h b/absl/strings/internal/str_format/parser.h index 45c90d1d..fffed04f 100644 --- a/absl/strings/internal/str_format/parser.h +++ b/absl/strings/internal/str_format/parser.h @@ -67,7 +67,7 @@ struct UnboundConversion { Flags flags; LengthMod length_mod = LengthMod::none; - ConversionChar conv = FormatConversionChar::kNone; + FormatConversionChar conv = FormatConversionCharInternal::kNone; }; // Consume conversion spec prefix (not including '%') of [p, end) if valid. @@ -83,7 +83,7 @@ const char* ConsumeUnboundConversion(const char* p, const char* end, // conversions. class ConvTag { public: - constexpr ConvTag(ConversionChar conversion_char) // NOLINT + constexpr ConvTag(FormatConversionChar conversion_char) // NOLINT : tag_(static_cast<int8_t>(conversion_char)) {} // We invert the length modifiers to make them negative so that we can easily // test for them. @@ -94,9 +94,9 @@ class ConvTag { bool is_conv() const { return tag_ >= 0; } bool is_length() const { return tag_ < 0 && tag_ != -128; } - ConversionChar as_conv() const { + FormatConversionChar as_conv() const { assert(is_conv()); - return static_cast<ConversionChar>(tag_); + return static_cast<FormatConversionChar>(tag_); } LengthMod as_length() const { assert(is_length()); @@ -143,7 +143,7 @@ bool ParseFormatString(string_view src, Consumer consumer) { auto tag = GetTagForChar(percent[1]); if (tag.is_conv()) { if (ABSL_PREDICT_FALSE(next_arg < 0)) { - // This indicates an error in the format std::string. + // This indicates an error in the format string. // The only way to get `next_arg < 0` here is to have a positional // argument first which sets next_arg to -1 and then a non-positional // argument. @@ -186,8 +186,9 @@ constexpr bool EnsureConstexpr(string_view s) { class ParsedFormatBase { public: - explicit ParsedFormatBase(string_view format, bool allow_ignored, - std::initializer_list<Conv> convs); + explicit ParsedFormatBase( + string_view format, bool allow_ignored, + std::initializer_list<FormatConversionCharSet> convs); ParsedFormatBase(const ParsedFormatBase& other) { *this = other; } @@ -234,8 +235,9 @@ class ParsedFormatBase { private: // Returns whether the conversions match and if !allow_ignored it verifies // that all conversions are used by the format. - bool MatchesConversions(bool allow_ignored, - std::initializer_list<Conv> convs) const; + bool MatchesConversions( + bool allow_ignored, + std::initializer_list<FormatConversionCharSet> convs) const; struct ParsedFormatConsumer; @@ -280,14 +282,14 @@ class ParsedFormatBase { // This is the only API that allows the user to pass a runtime specified format // string. These factory functions will return NULL if the format does not match // the conversions requested by the user. -template <str_format_internal::Conv... C> +template <FormatConversionCharSet... C> class ExtendedParsedFormat : public str_format_internal::ParsedFormatBase { public: explicit ExtendedParsedFormat(string_view format) #ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER __attribute__(( enable_if(str_format_internal::EnsureConstexpr(format), - "Format std::string is not constexpr."), + "Format string is not constexpr."), enable_if(str_format_internal::ValidFormatImpl<C...>(format), "Format specified does not match the template arguments."))) #endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER diff --git a/absl/strings/internal/str_format/parser_test.cc b/absl/strings/internal/str_format/parser_test.cc index 1b1ee030..5aced987 100644 --- a/absl/strings/internal/str_format/parser_test.cc +++ b/absl/strings/internal/str_format/parser_test.cc @@ -41,23 +41,23 @@ TEST(LengthModTest, Names) { TEST(ConversionCharTest, Names) { struct Expectation { - ConversionChar id; + FormatConversionChar id; char name; }; // clang-format off const Expectation kExpect[] = { -#define X(c) {ConversionChar::c, #c[0]} - X(c), X(C), X(s), X(S), // text +#define X(c) {FormatConversionCharInternal::c, #c[0]} + X(c), X(s), // text X(d), X(i), X(o), X(u), X(x), X(X), // int X(f), X(F), X(e), X(E), X(g), X(G), X(a), X(A), // float X(n), X(p), // misc #undef X - {ConversionChar::none, '\0'}, + {FormatConversionCharInternal::kNone, '\0'}, }; // clang-format on for (auto e : kExpect) { SCOPED_TRACE(e.name); - ConversionChar v = e.id; + FormatConversionChar v = e.id; EXPECT_EQ(e.name, FormatConversionCharToChar(v)); } } @@ -349,7 +349,8 @@ TEST_F(ParsedFormatTest, ValueSemantics) { ParsedFormatBase p2 = p1; // copy construct (empty) EXPECT_EQ(SummarizeParsedFormat(p1), SummarizeParsedFormat(p2)); - p1 = ParsedFormatBase("hello%s", true, {Conv::s}); // move assign + p1 = ParsedFormatBase("hello%s", true, + {FormatConversionCharSetInternal::s}); // move assign EXPECT_EQ("[hello]{s:1$s}", SummarizeParsedFormat(p1)); ParsedFormatBase p3 = p1; // copy construct (nonempty) @@ -367,7 +368,7 @@ TEST_F(ParsedFormatTest, ValueSemantics) { struct ExpectParse { const char* in; - std::initializer_list<Conv> conv_set; + std::initializer_list<FormatConversionCharSet> conv_set; const char* out; }; @@ -377,9 +378,9 @@ TEST_F(ParsedFormatTest, Parsing) { const ExpectParse kExpect[] = { {"", {}, ""}, {"ab", {}, "[ab]"}, - {"a%d", {Conv::d}, "[a]{d:1$d}"}, - {"a%+d", {Conv::d}, "[a]{+d:1$d}"}, - {"a% d", {Conv::d}, "[a]{ d:1$d}"}, + {"a%d", {FormatConversionCharSetInternal::d}, "[a]{d:1$d}"}, + {"a%+d", {FormatConversionCharSetInternal::d}, "[a]{+d:1$d}"}, + {"a% d", {FormatConversionCharSetInternal::d}, "[a]{ d:1$d}"}, {"a%b %d", {}, "[a]!"}, // stop after error }; for (const auto& e : kExpect) { @@ -391,13 +392,13 @@ TEST_F(ParsedFormatTest, Parsing) { TEST_F(ParsedFormatTest, ParsingFlagOrder) { const ExpectParse kExpect[] = { - {"a%+ 0d", {Conv::d}, "[a]{+ 0d:1$d}"}, - {"a%+0 d", {Conv::d}, "[a]{+0 d:1$d}"}, - {"a%0+ d", {Conv::d}, "[a]{0+ d:1$d}"}, - {"a% +0d", {Conv::d}, "[a]{ +0d:1$d}"}, - {"a%0 +d", {Conv::d}, "[a]{0 +d:1$d}"}, - {"a% 0+d", {Conv::d}, "[a]{ 0+d:1$d}"}, - {"a%+ 0+d", {Conv::d}, "[a]{+ 0+d:1$d}"}, + {"a%+ 0d", {FormatConversionCharSetInternal::d}, "[a]{+ 0d:1$d}"}, + {"a%+0 d", {FormatConversionCharSetInternal::d}, "[a]{+0 d:1$d}"}, + {"a%0+ d", {FormatConversionCharSetInternal::d}, "[a]{0+ d:1$d}"}, + {"a% +0d", {FormatConversionCharSetInternal::d}, "[a]{ +0d:1$d}"}, + {"a%0 +d", {FormatConversionCharSetInternal::d}, "[a]{0 +d:1$d}"}, + {"a% 0+d", {FormatConversionCharSetInternal::d}, "[a]{ 0+d:1$d}"}, + {"a%+ 0+d", {FormatConversionCharSetInternal::d}, "[a]{+ 0+d:1$d}"}, }; for (const auto& e : kExpect) { SCOPED_TRACE(e.in); diff --git a/absl/strings/internal/str_split_internal.h b/absl/strings/internal/str_split_internal.h index b54f6ebe..6f5bc095 100644 --- a/absl/strings/internal/str_split_internal.h +++ b/absl/strings/internal/str_split_internal.h @@ -65,8 +65,8 @@ class ConvertibleToStringView { : value_(s) {} // Matches rvalue strings and moves their data to a member. -ConvertibleToStringView(std::string&& s) // NOLINT(runtime/explicit) - : copy_(std::move(s)), value_(copy_) {} + ConvertibleToStringView(std::string&& s) // NOLINT(runtime/explicit) + : copy_(std::move(s)), value_(copy_) {} ConvertibleToStringView(const ConvertibleToStringView& other) : copy_(other.copy_), diff --git a/absl/strings/numbers_test.cc b/absl/strings/numbers_test.cc index 68229b15..c2f03b63 100644 --- a/absl/strings/numbers_test.cc +++ b/absl/strings/numbers_test.cc @@ -40,6 +40,7 @@ #include "absl/random/distributions.h" #include "absl/random/random.h" #include "absl/strings/internal/numbers_test_common.h" +#include "absl/strings/internal/ostringstream.h" #include "absl/strings/internal/pow10_helper.h" #include "absl/strings/str_cat.h" @@ -358,6 +359,12 @@ TEST(NumbersTest, Atoi) { VerifySimpleAtoiGood<std::string::size_type>(42, 42); } +TEST(NumbersTest, Atod) { + double d; + EXPECT_TRUE(absl::SimpleAtod("nan", &d)); + EXPECT_TRUE(std::isnan(d)); +} + TEST(NumbersTest, Atoenum) { enum E01 { E01_zero = 0, @@ -481,7 +488,7 @@ TEST(stringtest, safe_strto32_base) { EXPECT_TRUE(safe_strto32_base(std::string("0x1234"), &value, 16)); EXPECT_EQ(0x1234, value); - // Base-10 std::string version. + // Base-10 string version. EXPECT_TRUE(safe_strto32_base("1234", &value, 10)); EXPECT_EQ(1234, value); } @@ -622,7 +629,7 @@ TEST(stringtest, safe_strto64_base) { EXPECT_TRUE(safe_strto64_base(std::string("0x1234"), &value, 16)); EXPECT_EQ(0x1234, value); - // Base-10 std::string version. + // Base-10 string version. EXPECT_TRUE(safe_strto64_base("1234", &value, 10)); EXPECT_EQ(1234, value); } diff --git a/absl/strings/str_cat.cc b/absl/strings/str_cat.cc index d9afe2f3..dd5d25b0 100644 --- a/absl/strings/str_cat.cc +++ b/absl/strings/str_cat.cc @@ -141,12 +141,12 @@ namespace strings_internal { std::string CatPieces(std::initializer_list<absl::string_view> pieces) { std::string result; size_t total_size = 0; - for (const absl::string_view piece : pieces) total_size += piece.size(); + for (const absl::string_view& piece : pieces) total_size += piece.size(); strings_internal::STLStringResizeUninitialized(&result, total_size); char* const begin = &result[0]; char* out = begin; - for (const absl::string_view piece : pieces) { + for (const absl::string_view& piece : pieces) { const size_t this_size = piece.size(); if (this_size != 0) { memcpy(out, piece.data(), this_size); @@ -170,7 +170,7 @@ void AppendPieces(std::string* dest, std::initializer_list<absl::string_view> pieces) { size_t old_size = dest->size(); size_t total_size = old_size; - for (const absl::string_view piece : pieces) { + for (const absl::string_view& piece : pieces) { ASSERT_NO_OVERLAP(*dest, piece); total_size += piece.size(); } @@ -178,7 +178,7 @@ void AppendPieces(std::string* dest, char* const begin = &(*dest)[0]; char* out = begin + old_size; - for (const absl::string_view piece : pieces) { + for (const absl::string_view& piece : pieces) { const size_t this_size = piece.size(); if (this_size != 0) { memcpy(out, piece.data(), this_size); diff --git a/absl/strings/str_cat.h b/absl/strings/str_cat.h index 292fa235..a8a85c73 100644 --- a/absl/strings/str_cat.h +++ b/absl/strings/str_cat.h @@ -253,7 +253,7 @@ class AlphaNum { const std::basic_string<char, std::char_traits<char>, Allocator>& str) : piece_(str) {} - // Use std::string literals ":" instead of character literals ':'. + // Use string literals ":" instead of character literals ':'. AlphaNum(char c) = delete; // NOLINT(runtime/explicit) AlphaNum(const AlphaNum&) = delete; diff --git a/absl/strings/str_cat_benchmark.cc b/absl/strings/str_cat_benchmark.cc index 14c63b3f..02c4dbe6 100644 --- a/absl/strings/str_cat_benchmark.cc +++ b/absl/strings/str_cat_benchmark.cc @@ -23,7 +23,7 @@ namespace { const char kStringOne[] = "Once Upon A Time, "; -const char kStringTwo[] = "There was a std::string benchmark"; +const char kStringTwo[] = "There was a string benchmark"; // We want to include negative numbers in the benchmark, so this function // is used to count 0, 1, -1, 2, -2, 3, -3, ... @@ -137,4 +137,51 @@ void BM_DoubleToString_By_SixDigits(benchmark::State& state) { } BENCHMARK(BM_DoubleToString_By_SixDigits); +template <typename... Chunks> +void BM_StrAppendImpl(benchmark::State& state, size_t total_bytes, + Chunks... chunks) { + for (auto s : state) { + std::string result; + while (result.size() < total_bytes) { + absl::StrAppend(&result, chunks...); + benchmark::DoNotOptimize(result); + } + } +} + +void BM_StrAppend(benchmark::State& state) { + const int total_bytes = state.range(0); + const int chunks_at_a_time = state.range(1); + const absl::string_view kChunk = "0123456789"; + + switch (chunks_at_a_time) { + case 1: + return BM_StrAppendImpl(state, total_bytes, kChunk); + case 2: + return BM_StrAppendImpl(state, total_bytes, kChunk, kChunk); + case 4: + return BM_StrAppendImpl(state, total_bytes, kChunk, kChunk, kChunk, + kChunk); + case 8: + return BM_StrAppendImpl(state, total_bytes, kChunk, kChunk, kChunk, + kChunk, kChunk, kChunk, kChunk, kChunk); + default: + std::abort(); + } +} + +template <typename B> +void StrAppendConfig(B* benchmark) { + for (int bytes : {10, 100, 1000, 10000}) { + for (int chunks : {1, 2, 4, 8}) { + // Only add the ones that divide properly. Otherwise we are over counting. + if (bytes % (10 * chunks) == 0) { + benchmark->Args({bytes, chunks}); + } + } + } +} + +BENCHMARK(BM_StrAppend)->Apply(StrAppendConfig); + } // namespace diff --git a/absl/strings/str_cat_test.cc b/absl/strings/str_cat_test.cc index be39880b..f3770dc0 100644 --- a/absl/strings/str_cat_test.cc +++ b/absl/strings/str_cat_test.cc @@ -162,7 +162,7 @@ TEST(StrCat, Basics) { EXPECT_EQ(result, "12345678910, 10987654321!"); std::string one = - "1"; // Actually, it's the size of this std::string that we want; a + "1"; // Actually, it's the size of this string that we want; a // 64-bit build distinguishes between size_t and uint64_t, // even though they're both unsigned 64-bit values. result = absl::StrCat("And a ", one.size(), " and a ", @@ -375,7 +375,7 @@ TEST(StrAppend, Basics) { EXPECT_EQ(result.substr(old_size), "12345678910, 10987654321!"); std::string one = - "1"; // Actually, it's the size of this std::string that we want; a + "1"; // Actually, it's the size of this string that we want; a // 64-bit build distinguishes between size_t and uint64_t, // even though they're both unsigned 64-bit values. old_size = result.size(); @@ -463,7 +463,7 @@ TEST(StrAppend, CornerCases) { } TEST(StrAppend, CornerCasesNonEmptyAppend) { - for (std::string result : {"hello", "a std::string too long to fit in the SSO"}) { + for (std::string result : {"hello", "a string too long to fit in the SSO"}) { const std::string expected = result; absl::StrAppend(&result, ""); EXPECT_EQ(result, expected); diff --git a/absl/strings/str_format.h b/absl/strings/str_format.h index 2f9b4b27..01465107 100644 --- a/absl/strings/str_format.h +++ b/absl/strings/str_format.h @@ -19,7 +19,7 @@ // // The `str_format` library is a typesafe replacement for the family of // `printf()` string formatting routines within the `<cstdio>` standard library -// header. Like the `printf` family, the `str_format` uses a "format string" to +// header. Like the `printf` family, `str_format` uses a "format string" to // perform argument substitutions based on types. See the `FormatSpec` section // below for format string documentation. // @@ -57,8 +57,7 @@ // arbitrary sink types: // // * A generic `Format()` function to write outputs to arbitrary sink types, -// which must implement a `RawSinkFormat` interface. (See -// `str_format_sink.h` for more information.) +// which must implement a `FormatRawSink` interface. // // * A `FormatUntyped()` function that is similar to `Format()` except it is // loosely typed. `FormatUntyped()` is not a template and does not perform @@ -66,8 +65,7 @@ // boolean from a runtime check. // // In addition, the `str_format` library provides extension points for -// augmenting formatting to new types. These extensions are fully documented -// within the `str_format_extension.h` header file. +// augmenting formatting to new types. See "StrFormat Extensions" below. #ifndef ABSL_STRINGS_STR_FORMAT_H_ #define ABSL_STRINGS_STR_FORMAT_H_ @@ -255,8 +253,8 @@ class FormatCountCapture { // argument, etc. template <typename... Args> -using FormatSpec = - typename str_format_internal::FormatSpecDeductionBarrier<Args...>::type; +using FormatSpec = str_format_internal::FormatSpecTemplate< + str_format_internal::ArgumentToConv<Args>()...>; // ParsedFormat // @@ -283,9 +281,36 @@ using FormatSpec = // } else { // ... error case ... // } + +#if defined(__cpp_nontype_template_parameter_auto) +// If C++17 is available, an 'extended' format is also allowed that can specify +// multiple conversion characters per format argument, using a combination of +// `absl::FormatConversionCharSet` enum values (logically a set union) +// via the `|` operator. (Single character-based arguments are still accepted, +// but cannot be combined). Some common conversions also have predefined enum +// values, such as `absl::FormatConversionCharSet::kIntegral`. +// +// Example: +// // Extended format supports multiple conversion characters per argument, +// // specified via a combination of `FormatConversionCharSet` enums. +// using MyFormat = absl::ParsedFormat<absl::FormatConversionCharSet::d | +// absl::FormatConversionCharSet::x>; +// MyFormat GetFormat(bool use_hex) { +// if (use_hex) return MyFormat("foo %x bar"); +// return MyFormat("foo %d bar"); +// } +// // `format` can be used with any value that supports 'd' and 'x', +// // like `int`. +// auto format = GetFormat(use_hex); +// value = StringF(format, i); +template <auto... Conv> +using ParsedFormat = absl::str_format_internal::ExtendedParsedFormat< + absl::str_format_internal::ToFormatConversionCharSet(Conv)...>; +#else template <char... Conv> using ParsedFormat = str_format_internal::ExtendedParsedFormat< - str_format_internal::ConversionCharToConv(Conv)...>; + absl::str_format_internal::ToFormatConversionCharSet(Conv)...>; +#endif // defined(__cpp_nontype_template_parameter_auto) // StrFormat() // @@ -432,6 +457,16 @@ int SNPrintF(char* output, std::size_t size, const FormatSpec<Args...>& format, // // FormatRawSink is a type erased wrapper around arbitrary sink objects // specifically used as an argument to `Format()`. +// +// All the object has to do define an overload of `AbslFormatFlush()` for the +// sink, usually by adding a ADL-based free function in the same namespace as +// the sink: +// +// void AbslFormatFlush(MySink* dest, absl::string_view part); +// +// where `dest` is the pointer passed to `absl::Format()`. The function should +// append `part` to `dest`. +// // FormatRawSink does not own the passed sink object. The passed object must // outlive the FormatRawSink. class FormatRawSink { @@ -455,12 +490,13 @@ class FormatRawSink { // `absl::FormatRawSink` interface), using a format string and zero or more // additional arguments. // -// By default, `std::string` and `std::ostream` are supported as destination -// objects. If a `std::string` is used the formatted string is appended to it. +// By default, `std::string`, `std::ostream`, and `absl::Cord` are supported as +// destination objects. If a `std::string` is used the formatted string is +// appended to it. // -// `absl::Format()` is a generic version of `absl::StrFormat(), for custom -// sinks. The format string, like format strings for `StrFormat()`, is checked -// at compile-time. +// `absl::Format()` is a generic version of `absl::StrAppendFormat()`, for +// custom sinks. The format string, like format strings for `StrFormat()`, is +// checked at compile-time. // // On failure, this function returns `false` and the state of the sink is // unspecified. @@ -531,6 +567,246 @@ ABSL_MUST_USE_RESULT inline bool FormatUntyped( str_format_internal::UntypedFormatSpecImpl::Extract(format), args); } +//------------------------------------------------------------------------------ +// StrFormat Extensions +//------------------------------------------------------------------------------ +// +// AbslFormatConvert() +// +// The StrFormat library provides a customization API for formatting +// user-defined types using absl::StrFormat(). The API relies on detecting an +// overload in the user-defined type's namespace of a free (non-member) +// `AbslFormatConvert()` function, usually as a friend definition with the +// following signature: +// +// absl::FormatConvertResult<...> AbslFormatConvert( +// const X& value, +// const absl::FormatConversionSpec& spec, +// absl::FormatSink *sink); +// +// An `AbslFormatConvert()` overload for a type should only be declared in the +// same file and namespace as said type. +// +// The abstractions within this definition include: +// +// * An `absl::FormatConversionSpec` to specify the fields to pull from a +// user-defined type's format string +// * An `absl::FormatSink` to hold the converted string data during the +// conversion process. +// * An `absl::FormatConvertResult` to hold the status of the returned +// formatting operation +// +// The return type encodes all the conversion characters that your +// AbslFormatConvert() routine accepts. The return value should be {true}. +// A return value of {false} will result in `StrFormat()` returning +// an empty string. This result will be propagated to the result of +// `FormatUntyped`. +// +// Example: +// +// struct Point { +// // To add formatting support to `Point`, we simply need to add a free +// // (non-member) function `AbslFormatConvert()`. This method interprets +// // `spec` to print in the request format. The allowed conversion characters +// // can be restricted via the type of the result, in this example +// // string and integral formatting are allowed (but not, for instance +// // floating point characters like "%f"). You can add such a free function +// // using a friend declaration within the body of the class: +// friend absl::FormatConvertResult<absl::FormatConversionCharSet::kString | +// absl::FormatConversionCharSet::kIntegral> +// AbslFormatConvert(const Point& p, const absl::FormatConversionSpec& spec, +// absl::FormatSink* s) { +// if (spec.conversion_char() == absl::FormatConversionChar::s) { +// s->Append(absl::StrCat("x=", p.x, " y=", p.y)); +// } else { +// s->Append(absl::StrCat(p.x, ",", p.y)); +// } +// return {true}; +// } +// +// int x; +// int y; +// }; + +// clang-format off + +// FormatConversionChar +// +// Specifies the formatting character provided in the format string +// passed to `StrFormat()`. +enum class FormatConversionChar : uint8_t { + c, s, // text + d, i, o, u, x, X, // int + f, F, e, E, g, G, a, A, // float + n, p // misc +}; +// clang-format on + +// FormatConversionSpec +// +// Specifies modifications to the conversion of the format string, through use +// of one or more format flags in the source format string. +class FormatConversionSpec { + public: + // FormatConversionSpec::is_basic() + // + // Indicates that width and precision are not specified, and no additional + // flags are set for this conversion character in the format string. + bool is_basic() const { return impl_.is_basic(); } + + // FormatConversionSpec::has_left_flag() + // + // Indicates whether the result should be left justified for this conversion + // character in the format string. This flag is set through use of a '-' + // character in the format string. E.g. "%-s" + bool has_left_flag() const { return impl_.has_left_flag(); } + + // FormatConversionSpec::has_show_pos_flag() + // + // Indicates whether a sign column is prepended to the result for this + // conversion character in the format string, even if the result is positive. + // This flag is set through use of a '+' character in the format string. + // E.g. "%+d" + bool has_show_pos_flag() const { return impl_.has_show_pos_flag(); } + + // FormatConversionSpec::has_sign_col_flag() + // + // Indicates whether a mandatory sign column is added to the result for this + // conversion character. This flag is set through use of a space character + // (' ') in the format string. E.g. "% i" + bool has_sign_col_flag() const { return impl_.has_sign_col_flag(); } + + // FormatConversionSpec::has_alt_flag() + // + // Indicates whether an "alternate" format is applied to the result for this + // conversion character. Alternative forms depend on the type of conversion + // character, and unallowed alternatives are undefined. This flag is set + // through use of a '#' character in the format string. E.g. "%#h" + bool has_alt_flag() const { return impl_.has_alt_flag(); } + + // FormatConversionSpec::has_zero_flag() + // + // Indicates whether zeroes should be prepended to the result for this + // conversion character instead of spaces. This flag is set through use of the + // '0' character in the format string. E.g. "%0f" + bool has_zero_flag() const { return impl_.has_zero_flag(); } + + // FormatConversionSpec::conversion_char() + // + // Returns the underlying conversion character. + FormatConversionChar conversion_char() const { + return impl_.conversion_char(); + } + + // FormatConversionSpec::width() + // + // Returns the specified width (indicated through use of a non-zero integer + // value or '*' character) of the conversion character. If width is + // unspecified, it returns a negative value. + int width() const { return impl_.width(); } + + // FormatConversionSpec::precision() + // + // Returns the specified precision (through use of the '.' character followed + // by a non-zero integer value or '*' character) of the conversion character. + // If precision is unspecified, it returns a negative value. + int precision() const { return impl_.precision(); } + + private: + explicit FormatConversionSpec( + str_format_internal::FormatConversionSpecImpl impl) + : impl_(impl) {} + + friend str_format_internal::FormatConversionSpecImpl; + + absl::str_format_internal::FormatConversionSpecImpl impl_; +}; + +// Type safe OR operator for FormatConversionCharSet to allow accepting multiple +// conversion chars in custom format converters. +constexpr FormatConversionCharSet operator|(FormatConversionCharSet a, + FormatConversionCharSet b) { + return static_cast<FormatConversionCharSet>(static_cast<uint64_t>(a) | + static_cast<uint64_t>(b)); +} + +// FormatConversionCharSet +// +// Specifies the _accepted_ conversion types as a template parameter to +// FormatConvertResult for custom implementations of `AbslFormatConvert`. +// Note the helper predefined alias definitions (kIntegral, etc.) below. +enum class FormatConversionCharSet : uint64_t { + // text + c = str_format_internal::FormatConversionCharToConvInt('c'), + s = str_format_internal::FormatConversionCharToConvInt('s'), + // integer + d = str_format_internal::FormatConversionCharToConvInt('d'), + i = str_format_internal::FormatConversionCharToConvInt('i'), + o = str_format_internal::FormatConversionCharToConvInt('o'), + u = str_format_internal::FormatConversionCharToConvInt('u'), + x = str_format_internal::FormatConversionCharToConvInt('x'), + X = str_format_internal::FormatConversionCharToConvInt('X'), + // Float + f = str_format_internal::FormatConversionCharToConvInt('f'), + F = str_format_internal::FormatConversionCharToConvInt('F'), + e = str_format_internal::FormatConversionCharToConvInt('e'), + E = str_format_internal::FormatConversionCharToConvInt('E'), + g = str_format_internal::FormatConversionCharToConvInt('g'), + G = str_format_internal::FormatConversionCharToConvInt('G'), + a = str_format_internal::FormatConversionCharToConvInt('a'), + A = str_format_internal::FormatConversionCharToConvInt('A'), + // misc + n = str_format_internal::FormatConversionCharToConvInt('n'), + p = str_format_internal::FormatConversionCharToConvInt('p'), + + // Used for width/precision '*' specification. + kStar = static_cast<uint64_t>( + absl::str_format_internal::FormatConversionCharSetInternal::kStar), + // Some predefined values: + kIntegral = d | i | u | o | x | X, + kFloating = a | e | f | g | A | E | F | G, + kNumeric = kIntegral | kFloating, + kString = s, + kPointer = p, +}; + +// FormatSink +// +// An abstraction to which conversions write their string data. +// +class FormatSink { + public: + // Appends `count` copies of `ch`. + void Append(size_t count, char ch) { sink_->Append(count, ch); } + + void Append(string_view v) { sink_->Append(v); } + + // Appends the first `precision` bytes of `v`. If this is less than + // `width`, spaces will be appended first (if `left` is false), or + // after (if `left` is true) to ensure the total amount appended is + // at least `width`. + bool PutPaddedString(string_view v, int width, int precision, bool left) { + return sink_->PutPaddedString(v, width, precision, left); + } + + private: + friend str_format_internal::FormatSinkImpl; + explicit FormatSink(str_format_internal::FormatSinkImpl* s) : sink_(s) {} + str_format_internal::FormatSinkImpl* sink_; +}; + +// FormatConvertResult +// +// Indicates whether a call to AbslFormatConvert() was successful. +// This return type informs the StrFormat extension framework (through +// ADL but using the return type) of what conversion characters are supported. +// It is strongly discouraged to return {false}, as this will result in an +// empty string in StrFormat. +template <FormatConversionCharSet C> +struct FormatConvertResult { + bool value; +}; + ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/str_format_test.cc b/absl/strings/str_format_test.cc index acbdbf4a..d9fb25af 100644 --- a/absl/strings/str_format_test.cc +++ b/absl/strings/str_format_test.cc @@ -1,4 +1,6 @@ +#include "absl/strings/str_format.h" + #include <cstdarg> #include <cstdint> #include <cstdio> @@ -6,7 +8,8 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "absl/strings/str_format.h" +#include "absl/strings/cord.h" +#include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" namespace absl { @@ -242,7 +245,7 @@ class TempFile { std::FILE* file() const { return file_; } - // Read the file into a std::string. + // Read the file into a string. std::string ReadFile() { std::fseek(file_, 0, SEEK_END); int size = std::ftell(file_); @@ -345,11 +348,12 @@ TEST(StrFormat, BehavesAsDocumented) { EXPECT_EQ(StrFormat("%c", int{'a'}), "a"); EXPECT_EQ(StrFormat("%c", long{'a'}), "a"); // NOLINT EXPECT_EQ(StrFormat("%c", uint64_t{'a'}), "a"); - // "s" - std::string Eg: "C" -> "C", std::string("C++") -> "C++" + // "s" - string Eg: "C" -> "C", std::string("C++") -> "C++" // Formats std::string, char*, string_view, and Cord. EXPECT_EQ(StrFormat("%s", "C"), "C"); EXPECT_EQ(StrFormat("%s", std::string("C++")), "C++"); EXPECT_EQ(StrFormat("%s", string_view("view")), "view"); + EXPECT_EQ(StrFormat("%s", absl::Cord("cord")), "cord"); // Integral Conversion // These format integral types: char, int, long, uint64_t, etc. EXPECT_EQ(StrFormat("%d", char{10}), "10"); @@ -450,7 +454,7 @@ struct SummarizeConsumer { if (conv.precision.is_from_arg()) { *out += "." + std::to_string(conv.precision.get_from_arg()) + "$*"; } - *out += FormatConversionCharToChar(conv.conv); + *out += str_format_internal::FormatConversionCharToChar(conv.conv); *out += "}"; return true; } @@ -532,76 +536,152 @@ TEST_F(ParsedFormatTest, SimpleUncheckedIncorrect) { EXPECT_FALSE((ParsedFormat<'s', 'd', 'g'>::New(format))); } -using str_format_internal::Conv; +#if defined(__cpp_nontype_template_parameter_auto) + +template <auto T> +std::true_type IsValidParsedFormatArgTest(ParsedFormat<T>*); + +template <auto T> +std::false_type IsValidParsedFormatArgTest(...); + +template <auto T> +using IsValidParsedFormatArg = decltype(IsValidParsedFormatArgTest<T>(nullptr)); + +TEST_F(ParsedFormatTest, OnlyValidTypesAllowed) { + ASSERT_TRUE(IsValidParsedFormatArg<'c'>::value); + + ASSERT_TRUE(IsValidParsedFormatArg<FormatConversionCharSet::d>::value); + + ASSERT_TRUE(IsValidParsedFormatArg<absl::FormatConversionCharSet::d | + absl::FormatConversionCharSet::x>::value); + ASSERT_TRUE( + IsValidParsedFormatArg<absl::FormatConversionCharSet::kIntegral>::value); + + // This is an easy mistake to make, however, this will reduce to an integer + // which has no meaning, so we need to ensure it doesn't compile. + ASSERT_FALSE(IsValidParsedFormatArg<'x' | 'd'>::value); + + // For now, we disallow construction based on ConversionChar (rather than + // CharSet) + ASSERT_FALSE(IsValidParsedFormatArg<absl::FormatConversionChar::d>::value); +} + +TEST_F(ParsedFormatTest, ExtendedTyping) { + EXPECT_FALSE(ParsedFormat<FormatConversionCharSet::d>::New("")); + ASSERT_TRUE(ParsedFormat<absl::FormatConversionCharSet::d>::New("%d")); + auto v1 = ParsedFormat<'d', absl::FormatConversionCharSet::s>::New("%d%s"); + ASSERT_TRUE(v1); + auto v2 = ParsedFormat<absl::FormatConversionCharSet::d, 's'>::New("%d%s"); + ASSERT_TRUE(v2); + auto v3 = ParsedFormat<absl::FormatConversionCharSet::d | + absl::FormatConversionCharSet::s, + 's'>::New("%d%s"); + ASSERT_TRUE(v3); + auto v4 = ParsedFormat<absl::FormatConversionCharSet::d | + absl::FormatConversionCharSet::s, + 's'>::New("%s%s"); + ASSERT_TRUE(v4); +} +#endif TEST_F(ParsedFormatTest, UncheckedCorrect) { - auto f = ExtendedParsedFormat<Conv::d>::New("ABC%dDEF"); + auto f = + ExtendedParsedFormat<absl::FormatConversionCharSet::d>::New("ABC%dDEF"); ASSERT_TRUE(f); EXPECT_EQ("[ABC]{d:1$d}[DEF]", SummarizeParsedFormat(*f)); std::string format = "%sFFF%dZZZ%f"; - auto f2 = - ExtendedParsedFormat<Conv::string, Conv::d, Conv::floating>::New(format); + auto f2 = ExtendedParsedFormat< + absl::FormatConversionCharSet::kString, absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::kFloating>::New(format); ASSERT_TRUE(f2); EXPECT_EQ("{s:1$s}[FFF]{d:2$d}[ZZZ]{f:3$f}", SummarizeParsedFormat(*f2)); - f2 = ExtendedParsedFormat<Conv::string, Conv::d, Conv::floating>::New( - "%s %d %f"); + f2 = ExtendedParsedFormat< + absl::FormatConversionCharSet::kString, absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::kFloating>::New("%s %d %f"); ASSERT_TRUE(f2); EXPECT_EQ("{s:1$s}[ ]{d:2$d}[ ]{f:3$f}", SummarizeParsedFormat(*f2)); - auto star = ExtendedParsedFormat<Conv::star, Conv::d>::New("%*d"); + auto star = + ExtendedParsedFormat<absl::FormatConversionCharSet::kStar, + absl::FormatConversionCharSet::d>::New("%*d"); ASSERT_TRUE(star); EXPECT_EQ("{*d:2$1$*d}", SummarizeParsedFormat(*star)); - auto dollar = ExtendedParsedFormat<Conv::d, Conv::s>::New("%2$s %1$d"); + auto dollar = + ExtendedParsedFormat<absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::s>::New("%2$s %1$d"); ASSERT_TRUE(dollar); EXPECT_EQ("{2$s:2$s}[ ]{1$d:1$d}", SummarizeParsedFormat(*dollar)); // with reuse - dollar = ExtendedParsedFormat<Conv::d, Conv::s>::New("%2$s %1$d %1$d"); + dollar = ExtendedParsedFormat< + absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::s>::New("%2$s %1$d %1$d"); ASSERT_TRUE(dollar); EXPECT_EQ("{2$s:2$s}[ ]{1$d:1$d}[ ]{1$d:1$d}", SummarizeParsedFormat(*dollar)); } TEST_F(ParsedFormatTest, UncheckedIgnoredArgs) { - EXPECT_FALSE((ExtendedParsedFormat<Conv::d, Conv::s>::New("ABC"))); - EXPECT_FALSE((ExtendedParsedFormat<Conv::d, Conv::s>::New("%dABC"))); - EXPECT_FALSE((ExtendedParsedFormat<Conv::d, Conv::s>::New("ABC%2$s"))); - auto f = ExtendedParsedFormat<Conv::d, Conv::s>::NewAllowIgnored("ABC"); + EXPECT_FALSE( + (ExtendedParsedFormat<absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::s>::New("ABC"))); + EXPECT_FALSE( + (ExtendedParsedFormat<absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::s>::New("%dABC"))); + EXPECT_FALSE( + (ExtendedParsedFormat<absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::s>::New("ABC%2$s"))); + auto f = ExtendedParsedFormat< + absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::s>::NewAllowIgnored("ABC"); ASSERT_TRUE(f); EXPECT_EQ("[ABC]", SummarizeParsedFormat(*f)); - f = ExtendedParsedFormat<Conv::d, Conv::s>::NewAllowIgnored("%dABC"); + f = ExtendedParsedFormat< + absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::s>::NewAllowIgnored("%dABC"); ASSERT_TRUE(f); EXPECT_EQ("{d:1$d}[ABC]", SummarizeParsedFormat(*f)); - f = ExtendedParsedFormat<Conv::d, Conv::s>::NewAllowIgnored("ABC%2$s"); + f = ExtendedParsedFormat< + absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::s>::NewAllowIgnored("ABC%2$s"); ASSERT_TRUE(f); EXPECT_EQ("[ABC]{2$s:2$s}", SummarizeParsedFormat(*f)); } TEST_F(ParsedFormatTest, UncheckedMultipleTypes) { - auto dx = ExtendedParsedFormat<Conv::d | Conv::x>::New("%1$d %1$x"); + auto dx = + ExtendedParsedFormat<absl::FormatConversionCharSet::d | + absl::FormatConversionCharSet::x>::New("%1$d %1$x"); EXPECT_TRUE(dx); EXPECT_EQ("{1$d:1$d}[ ]{1$x:1$x}", SummarizeParsedFormat(*dx)); - dx = ExtendedParsedFormat<Conv::d | Conv::x>::New("%1$d"); + dx = ExtendedParsedFormat<absl::FormatConversionCharSet::d | + absl::FormatConversionCharSet::x>::New("%1$d"); EXPECT_TRUE(dx); EXPECT_EQ("{1$d:1$d}", SummarizeParsedFormat(*dx)); } TEST_F(ParsedFormatTest, UncheckedIncorrect) { - EXPECT_FALSE(ExtendedParsedFormat<Conv::d>::New("")); + EXPECT_FALSE(ExtendedParsedFormat<absl::FormatConversionCharSet::d>::New("")); - EXPECT_FALSE(ExtendedParsedFormat<Conv::d>::New("ABC%dDEF%d")); + EXPECT_FALSE(ExtendedParsedFormat<absl::FormatConversionCharSet::d>::New( + "ABC%dDEF%d")); std::string format = "%sFFF%dZZZ%f"; - EXPECT_FALSE((ExtendedParsedFormat<Conv::s, Conv::d, Conv::g>::New(format))); + EXPECT_FALSE( + (ExtendedParsedFormat<absl::FormatConversionCharSet::s, + absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::g>::New(format))); } TEST_F(ParsedFormatTest, RegressionMixPositional) { - EXPECT_FALSE((ExtendedParsedFormat<Conv::d, Conv::o>::New("%1$d %o"))); + EXPECT_FALSE( + (ExtendedParsedFormat<absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::o>::New("%1$d %o"))); } using FormatWrapperTest = ::testing::Test; @@ -626,6 +706,38 @@ TEST_F(FormatWrapperTest, ParsedFormat) { ABSL_NAMESPACE_END } // namespace absl +using FormatExtensionTest = ::testing::Test; + +struct Point { + friend absl::FormatConvertResult<absl::FormatConversionCharSet::kString | + absl::FormatConversionCharSet::kIntegral> + AbslFormatConvert(const Point& p, const absl::FormatConversionSpec& spec, + absl::FormatSink* s) { + if (spec.conversion_char() == absl::FormatConversionChar::s) { + s->Append(absl::StrCat("x=", p.x, " y=", p.y)); + } else { + s->Append(absl::StrCat(p.x, ",", p.y)); + } + return {true}; + } + + int x = 10; + int y = 20; +}; + +TEST_F(FormatExtensionTest, AbslFormatConvertExample) { + Point p; + EXPECT_EQ(absl::StrFormat("a %s z", p), "a x=10 y=20 z"); + EXPECT_EQ(absl::StrFormat("a %d z", p), "a 10,20 z"); + + // Typed formatting will fail to compile an invalid format. + // StrFormat("%f", p); // Does not compile. + std::string actual; + absl::UntypedFormatSpec f1("%f"); + // FormatUntyped will return false for bad character. + EXPECT_FALSE(absl::FormatUntyped(&actual, f1, {absl::FormatArg(p)})); +} + // Some codegen thunks that we can use to easily dump the generated assembly for // different StrFormat calls. diff --git a/absl/strings/str_join_test.cc b/absl/strings/str_join_test.cc index 921d9c2b..2be6256e 100644 --- a/absl/strings/str_join_test.cc +++ b/absl/strings/str_join_test.cc @@ -134,26 +134,26 @@ TEST(StrJoin, APIExamples) { // { - // Empty range yields an empty std::string. + // Empty range yields an empty string. std::vector<std::string> v; EXPECT_EQ("", absl::StrJoin(v, "-")); } { - // A range of 1 element gives a std::string with that element but no + // A range of 1 element gives a string with that element but no // separator. std::vector<std::string> v = {"foo"}; EXPECT_EQ("foo", absl::StrJoin(v, "-")); } { - // A range with a single empty std::string element + // A range with a single empty string element std::vector<std::string> v = {""}; EXPECT_EQ("", absl::StrJoin(v, "-")); } { - // A range with 2 elements, one of which is an empty std::string + // A range with 2 elements, one of which is an empty string std::vector<std::string> v = {"a", ""}; EXPECT_EQ("a-", absl::StrJoin(v, "-")); } diff --git a/absl/strings/str_replace_benchmark.cc b/absl/strings/str_replace_benchmark.cc index 95b2dc10..01331da2 100644 --- a/absl/strings/str_replace_benchmark.cc +++ b/absl/strings/str_replace_benchmark.cc @@ -62,7 +62,7 @@ void SetUpStrings() { } } // big_string->resize(50); - // OK, we've set up the std::string, now let's set up expectations - first by + // OK, we've set up the string, now let's set up expectations - first by // just replacing "the" with "box" after_replacing_the = new std::string(*big_string); for (size_t pos = 0; diff --git a/absl/strings/str_replace_test.cc b/absl/strings/str_replace_test.cc index 1ca23aff..9d8c7f75 100644 --- a/absl/strings/str_replace_test.cc +++ b/absl/strings/str_replace_test.cc @@ -25,7 +25,7 @@ TEST(StrReplaceAll, OneReplacement) { std::string s; - // Empty std::string. + // Empty string. s = absl::StrReplaceAll(s, {{"", ""}}); EXPECT_EQ(s, ""); s = absl::StrReplaceAll(s, {{"x", ""}}); @@ -47,7 +47,7 @@ TEST(StrReplaceAll, OneReplacement) { s = absl::StrReplaceAll("abc", {{"xyz", "123"}}); EXPECT_EQ(s, "abc"); - // Replace entire std::string. + // Replace entire string. s = absl::StrReplaceAll("abc", {{"abc", "xyz"}}); EXPECT_EQ(s, "xyz"); @@ -88,7 +88,7 @@ TEST(StrReplaceAll, OneReplacement) { TEST(StrReplaceAll, ManyReplacements) { std::string s; - // Empty std::string. + // Empty string. s = absl::StrReplaceAll("", {{"", ""}, {"x", ""}, {"", "y"}, {"x", "y"}}); EXPECT_EQ(s, ""); @@ -96,7 +96,7 @@ TEST(StrReplaceAll, ManyReplacements) { s = absl::StrReplaceAll("abc", {{"", ""}, {"", "y"}, {"x", ""}}); EXPECT_EQ(s, "abc"); - // Replace entire std::string, one char at a time + // Replace entire string, one char at a time s = absl::StrReplaceAll("abc", {{"a", "x"}, {"b", "y"}, {"c", "z"}}); EXPECT_EQ(s, "xyz"); s = absl::StrReplaceAll("zxy", {{"z", "x"}, {"x", "y"}, {"y", "z"}}); @@ -264,7 +264,7 @@ TEST(StrReplaceAll, Inplace) { std::string s; int reps; - // Empty std::string. + // Empty string. s = ""; reps = absl::StrReplaceAll({{"", ""}, {"x", ""}, {"", "y"}, {"x", "y"}}, &s); EXPECT_EQ(reps, 0); @@ -276,7 +276,7 @@ TEST(StrReplaceAll, Inplace) { EXPECT_EQ(reps, 0); EXPECT_EQ(s, "abc"); - // Replace entire std::string, one char at a time + // Replace entire string, one char at a time s = "abc"; reps = absl::StrReplaceAll({{"a", "x"}, {"b", "y"}, {"c", "z"}}, &s); EXPECT_EQ(reps, 3); diff --git a/absl/strings/str_split.cc b/absl/strings/str_split.cc index d0f86669..e08c26b6 100644 --- a/absl/strings/str_split.cc +++ b/absl/strings/str_split.cc @@ -42,7 +42,7 @@ absl::string_view GenericFind(absl::string_view text, absl::string_view delimiter, size_t pos, FindPolicy find_policy) { if (delimiter.empty() && text.length() > 0) { - // Special case for empty std::string delimiters: always return a zero-length + // Special case for empty string delimiters: always return a zero-length // absl::string_view referring to the item at position 1 past pos. return absl::string_view(text.data() + pos + 1, 0); } @@ -127,7 +127,7 @@ absl::string_view ByLength::Find(absl::string_view text, size_t pos) const { pos = std::min(pos, text.size()); // truncate `pos` absl::string_view substr = text.substr(pos); - // If the std::string is shorter than the chunk size we say we + // If the string is shorter than the chunk size we say we // "can't find the delimiter" so this will be the last chunk. if (substr.length() <= static_cast<size_t>(length_)) return absl::string_view(text.data() + text.size(), 0); diff --git a/absl/strings/str_split.h b/absl/strings/str_split.h index a79cd4a0..1ce17f38 100644 --- a/absl/strings/str_split.h +++ b/absl/strings/str_split.h @@ -44,6 +44,7 @@ #include <vector> #include "absl/base/internal/raw_logging.h" +#include "absl/base/macros.h" #include "absl/strings/internal/str_split_internal.h" #include "absl/strings/string_view.h" #include "absl/strings/strip.h" diff --git a/absl/strings/str_split_test.cc b/absl/strings/str_split_test.cc index 02f27bc4..b5ce68de 100644 --- a/absl/strings/str_split_test.cc +++ b/absl/strings/str_split_test.cc @@ -27,8 +27,10 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "absl/base/dynamic_annotations.h" // for RunningOnValgrind +#include "absl/base/dynamic_annotations.h" #include "absl/base/macros.h" +#include "absl/container/flat_hash_map.h" +#include "absl/container/node_hash_map.h" #include "absl/strings/numbers.h" namespace { @@ -71,7 +73,7 @@ TEST(Split, TraitsTest) { // namespaces just like callers will need to use. TEST(Split, APIExamples) { { - // Passes std::string delimiter. Assumes the default of ByString. + // Passes string delimiter. Assumes the default of ByString. std::vector<std::string> v = absl::StrSplit("a,b,c", ","); // NOLINT EXPECT_THAT(v, ElementsAre("a", "b", "c")); @@ -97,7 +99,7 @@ TEST(Split, APIExamples) { } { - // Uses the Literal std::string "=>" as the delimiter. + // Uses the Literal string "=>" as the delimiter. const std::vector<std::string> v = absl::StrSplit("a=>b=>c", "=>"); EXPECT_THAT(v, ElementsAre("a", "b", "c")); } @@ -121,17 +123,17 @@ TEST(Split, APIExamples) { } { - // Splits the input std::string into individual characters by using an empty - // std::string as the delimiter. + // Splits the input string into individual characters by using an empty + // string as the delimiter. std::vector<std::string> v = absl::StrSplit("abc", ""); EXPECT_THAT(v, ElementsAre("a", "b", "c")); } { - // Splits std::string data with embedded NUL characters, using NUL as the + // Splits string data with embedded NUL characters, using NUL as the // delimiter. A simple delimiter of "\0" doesn't work because strlen() will - // say that's the empty std::string when constructing the absl::string_view - // delimiter. Instead, a non-empty std::string containing NUL can be used as the + // say that's the empty string when constructing the absl::string_view + // delimiter. Instead, a non-empty string containing NUL can be used as the // delimiter. std::string embedded_nulls("a\0b\0c", 5); std::string null_delim("\0", 1); @@ -421,6 +423,18 @@ TEST(Splitter, ConversionOperator) { TestMapConversionOperator<std::multimap<std::string, std::string>>(splitter); TestMapConversionOperator<std::unordered_map<std::string, std::string>>( splitter); + TestMapConversionOperator< + absl::node_hash_map<absl::string_view, absl::string_view>>(splitter); + TestMapConversionOperator< + absl::node_hash_map<absl::string_view, std::string>>(splitter); + TestMapConversionOperator< + absl::node_hash_map<std::string, absl::string_view>>(splitter); + TestMapConversionOperator< + absl::flat_hash_map<absl::string_view, absl::string_view>>(splitter); + TestMapConversionOperator< + absl::flat_hash_map<absl::string_view, std::string>>(splitter); + TestMapConversionOperator< + absl::flat_hash_map<std::string, absl::string_view>>(splitter); // Tests conversion to std::pair @@ -436,7 +450,7 @@ TEST(Splitter, ConversionOperator) { // less-than, equal-to, and more-than 2 strings. TEST(Splitter, ToPair) { { - // Empty std::string + // Empty string std::pair<std::string, std::string> p = absl::StrSplit("", ','); EXPECT_EQ("", p.first); EXPECT_EQ("", p.second); @@ -565,7 +579,7 @@ TEST(Split, AcceptsCertainTemporaries) { TEST(Split, Temporary) { // Use a std::string longer than the SSO length, so that when the temporary is - // destroyed, if the splitter keeps a reference to the std::string's contents, + // destroyed, if the splitter keeps a reference to the string's contents, // it'll reference freed memory instead of just dead on-stack memory. const char input[] = "a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u"; EXPECT_LT(sizeof(std::string), ABSL_ARRAYSIZE(input)) @@ -651,14 +665,14 @@ TEST(Split, UTF8) { // Tests splitting utf8 strings and utf8 delimiters. std::string utf8_string = u8"\u03BA\u1F79\u03C3\u03BC\u03B5"; { - // A utf8 input std::string with an ascii delimiter. + // A utf8 input string with an ascii delimiter. std::string to_split = "a," + utf8_string; std::vector<absl::string_view> v = absl::StrSplit(to_split, ','); EXPECT_THAT(v, ElementsAre("a", utf8_string)); } { - // A utf8 input std::string and a utf8 delimiter. + // A utf8 input string and a utf8 delimiter. std::string to_split = "a," + utf8_string + ",b"; std::string unicode_delimiter = "," + utf8_string + ","; std::vector<absl::string_view> v = @@ -667,7 +681,7 @@ TEST(Split, UTF8) { } { - // A utf8 input std::string and ByAnyChar with ascii chars. + // A utf8 input string and ByAnyChar with ascii chars. std::vector<absl::string_view> v = absl::StrSplit(u8"Foo h\u00E4llo th\u4E1Ere", absl::ByAnyChar(" \t")); EXPECT_THAT(v, ElementsAre("Foo", u8"h\u00E4llo", u8"th\u4E1Ere")); @@ -814,10 +828,10 @@ TEST(Delimiter, ByString) { ByString comma_string(","); TestComma(comma_string); - // The first occurrence of empty std::string ("") in a std::string is at position 0. + // The first occurrence of empty string ("") in a string is at position 0. // There is a test below that demonstrates this for absl::string_view::find(). // If the ByString delimiter returned position 0 for this, there would - // be an infinite loop in the SplitIterator code. To avoid this, empty std::string + // be an infinite loop in the SplitIterator code. To avoid this, empty string // is a special case in that it always returns the item at position 1. absl::string_view abc("abc"); EXPECT_EQ(0, abc.find("")); // "" is found at position 0 @@ -876,7 +890,7 @@ TEST(Delimiter, ByAnyChar) { EXPECT_FALSE(IsFoundAt("=", two_delims, -1)); // ByAnyChar behaves just like ByString when given a delimiter of empty - // std::string. That is, it always returns a zero-length absl::string_view + // string. That is, it always returns a zero-length absl::string_view // referring to the item at position 1, not position 0. ByAnyChar empty(""); EXPECT_FALSE(IsFoundAt("", empty, 0)); @@ -913,7 +927,7 @@ TEST(Split, WorksWithLargeStrings) { std::vector<absl::string_view> v = absl::StrSplit(s, '-'); EXPECT_EQ(2, v.size()); // The first element will contain 2G of 'x's. - // testing::StartsWith is too slow with a 2G std::string. + // testing::StartsWith is too slow with a 2G string. EXPECT_EQ('x', v[0][0]); EXPECT_EQ('x', v[0][1]); EXPECT_EQ('x', v[0][3]); diff --git a/absl/strings/string_view.h b/absl/strings/string_view.h index 1861ea62..5260b5b7 100644 --- a/absl/strings/string_view.h +++ b/absl/strings/string_view.h @@ -48,7 +48,7 @@ namespace absl { ABSL_NAMESPACE_BEGIN -using std::string_view; +using string_view = std::string_view; ABSL_NAMESPACE_END } // namespace absl @@ -111,6 +111,11 @@ ABSL_NAMESPACE_BEGIN // example, when splitting a string, `std::vector<absl::string_view>` is a // natural data type for the output. // +// For another example, a Cord is a non-contiguous, potentially very +// long string-like object. The Cord class has an interface that iteratively +// provides string_view objects that point to the successive pieces of a Cord +// object. +// // When constructed from a source which is NUL-terminated, the `string_view` // itself will not include the NUL-terminator unless a specific size (including // the NUL) is passed to the constructor. As a result, common idioms that work @@ -283,7 +288,7 @@ class string_view { // Returns the ith element of the `string_view` using the array operator. // Note that this operator does not perform any bounds checking. constexpr const_reference operator[](size_type i) const { - return ABSL_ASSERT(i < size()), ptr_[i]; + return ABSL_HARDENING_ASSERT(i < size()), ptr_[i]; } // string_view::at() @@ -303,14 +308,14 @@ class string_view { // // Returns the first element of a `string_view`. constexpr const_reference front() const { - return ABSL_ASSERT(!empty()), ptr_[0]; + return ABSL_HARDENING_ASSERT(!empty()), ptr_[0]; } // string_view::back() // // Returns the last element of a `string_view`. constexpr const_reference back() const { - return ABSL_ASSERT(!empty()), ptr_[size() - 1]; + return ABSL_HARDENING_ASSERT(!empty()), ptr_[size() - 1]; } // string_view::data() @@ -319,7 +324,7 @@ class string_view { // stored elsewhere). Note that `string_view::data()` may contain embedded nul // characters, but the returned buffer may or may not be NUL-terminated; // therefore, do not pass `data()` to a routine that expects a NUL-terminated - // std::string. + // string. constexpr const_pointer data() const noexcept { return ptr_; } // Modifiers @@ -327,9 +332,9 @@ class string_view { // string_view::remove_prefix() // // Removes the first `n` characters from the `string_view`. Note that the - // underlying std::string is not changed, only the view. + // underlying string is not changed, only the view. void remove_prefix(size_type n) { - assert(n <= length_); + ABSL_HARDENING_ASSERT(n <= length_); ptr_ += n; length_ -= n; } @@ -337,9 +342,9 @@ class string_view { // string_view::remove_suffix() // // Removes the last `n` characters from the `string_view`. Note that the - // underlying std::string is not changed, only the view. + // underlying string is not changed, only the view. void remove_suffix(size_type n) { - assert(n <= length_); + ABSL_HARDENING_ASSERT(n <= length_); length_ -= n; } @@ -382,18 +387,20 @@ class string_view { // Returns a "substring" of the `string_view` (at offset `pos` and length // `n`) as another string_view. This function throws `std::out_of_bounds` if // `pos > size`. - string_view substr(size_type pos, size_type n = npos) const { - if (ABSL_PREDICT_FALSE(pos > length_)) - base_internal::ThrowStdOutOfRange("absl::string_view::substr"); - n = (std::min)(n, length_ - pos); - return string_view(ptr_ + pos, n); + // Use absl::ClippedSubstr if you need a truncating substr operation. + constexpr string_view substr(size_type pos, size_type n = npos) const { + return ABSL_PREDICT_FALSE(pos > length_) + ? (base_internal::ThrowStdOutOfRange( + "absl::string_view::substr"), + string_view()) + : string_view(ptr_ + pos, Min(n, length_ - pos)); } // string_view::compare() // // Performs a lexicographical comparison between the `string_view` and // another `absl::string_view`, returning -1 if `this` is less than, 0 if - // `this` is equal to, and 1 if `this` is greater than the passed std::string + // `this` is equal to, and 1 if `this` is greater than the passed string // view. Note that in the case of data equality, a further comparison is made // on the respective sizes of the two `string_view`s to determine which is // smaller, equal, or greater. @@ -419,17 +426,17 @@ class string_view { } // Overload of `string_view::compare()` for comparing a `string_view` and a - // a different C-style std::string `s`. + // a different C-style string `s`. int compare(const char* s) const { return compare(string_view(s)); } // Overload of `string_view::compare()` for comparing a substring of the - // `string_view` and a different std::string C-style std::string `s`. + // `string_view` and a different string C-style string `s`. int compare(size_type pos1, size_type count1, const char* s) const { return substr(pos1, count1).compare(string_view(s)); } // Overload of `string_view::compare()` for comparing a substring of the - // `string_view` and a substring of a different C-style std::string `s`. + // `string_view` and a substring of a different C-style string `s`. int compare(size_type pos1, size_type count1, const char* s, size_type count2) const { return substr(pos1, count1).compare(string_view(s, count2)); @@ -519,7 +526,7 @@ class string_view { (std::numeric_limits<difference_type>::max)(); static constexpr size_type CheckLengthInternal(size_type len) { - return (void)ABSL_ASSERT(len <= kMaxSize), len; + return ABSL_HARDENING_ASSERT(len <= kMaxSize), len; } static constexpr size_type StrlenInternal(const char* str) { diff --git a/absl/strings/string_view_test.cc b/absl/strings/string_view_test.cc index 7b1d56fa..dcebb150 100644 --- a/absl/strings/string_view_test.cc +++ b/absl/strings/string_view_test.cc @@ -28,6 +28,7 @@ #include "gtest/gtest.h" #include "absl/base/config.h" #include "absl/base/dynamic_annotations.h" +#include "absl/base/options.h" #if defined(ABSL_HAVE_STD_STRING_VIEW) || defined(__ANDROID__) // We don't control the death messaging when using std::string_view. @@ -410,7 +411,7 @@ TEST(StringViewTest, STL2) { EXPECT_EQ(a.find(e, 17), 17); absl::string_view g("xx not found bb"); EXPECT_EQ(a.find(g), absl::string_view::npos); - // empty std::string nonsense + // empty string nonsense EXPECT_EQ(d.find(b), absl::string_view::npos); EXPECT_EQ(e.find(b), absl::string_view::npos); EXPECT_EQ(d.find(b, 4), absl::string_view::npos); @@ -438,7 +439,7 @@ TEST(StringViewTest, STL2) { EXPECT_EQ(g.find('o', 4), 4); EXPECT_EQ(g.find('o', 5), 8); EXPECT_EQ(a.find('b', 5), absl::string_view::npos); - // empty std::string nonsense + // empty string nonsense EXPECT_EQ(d.find('\0'), absl::string_view::npos); EXPECT_EQ(e.find('\0'), absl::string_view::npos); EXPECT_EQ(d.find('\0', 4), absl::string_view::npos); @@ -465,7 +466,7 @@ TEST(StringViewTest, STL2) { EXPECT_EQ(e.rfind(b), absl::string_view::npos); EXPECT_EQ(d.rfind(b, 4), absl::string_view::npos); EXPECT_EQ(e.rfind(b, 7), absl::string_view::npos); - // empty std::string nonsense + // empty string nonsense EXPECT_EQ(d.rfind(d, 4), std::string().rfind(std::string())); EXPECT_EQ(e.rfind(d, 7), std::string().rfind(std::string())); EXPECT_EQ(d.rfind(e, 4), std::string().rfind(std::string())); @@ -484,7 +485,7 @@ TEST(StringViewTest, STL2) { EXPECT_EQ(f.rfind('\0', 12), 3); EXPECT_EQ(f.rfind('3'), 2); EXPECT_EQ(f.rfind('5'), 5); - // empty std::string nonsense + // empty string nonsense EXPECT_EQ(d.rfind('o'), absl::string_view::npos); EXPECT_EQ(e.rfind('o'), absl::string_view::npos); EXPECT_EQ(d.rfind('o', 4), absl::string_view::npos); @@ -520,7 +521,7 @@ TEST(StringViewTest, STL2FindFirst) { EXPECT_EQ(g.find_first_of(c), 0); EXPECT_EQ(a.find_first_of(f), absl::string_view::npos); EXPECT_EQ(f.find_first_of(a), absl::string_view::npos); - // empty std::string nonsense + // empty string nonsense EXPECT_EQ(a.find_first_of(d), absl::string_view::npos); EXPECT_EQ(a.find_first_of(e), absl::string_view::npos); EXPECT_EQ(d.find_first_of(b), absl::string_view::npos); @@ -538,7 +539,7 @@ TEST(StringViewTest, STL2FindFirst) { EXPECT_EQ(a.find_first_not_of(f), 0); EXPECT_EQ(a.find_first_not_of(d), 0); EXPECT_EQ(a.find_first_not_of(e), 0); - // empty std::string nonsense + // empty string nonsense EXPECT_EQ(a.find_first_not_of(d), 0); EXPECT_EQ(a.find_first_not_of(e), 0); EXPECT_EQ(a.find_first_not_of(d, 1), 1); @@ -566,7 +567,7 @@ TEST(StringViewTest, STL2FindFirst) { EXPECT_EQ(f.find_first_not_of('\0'), 0); EXPECT_EQ(f.find_first_not_of('\0', 3), 4); EXPECT_EQ(f.find_first_not_of('\0', 2), 2); - // empty std::string nonsense + // empty string nonsense EXPECT_EQ(d.find_first_not_of('x'), absl::string_view::npos); EXPECT_EQ(e.find_first_not_of('x'), absl::string_view::npos); EXPECT_EQ(d.find_first_not_of('\0'), absl::string_view::npos); @@ -606,7 +607,7 @@ TEST(StringViewTest, STL2FindLast) { EXPECT_EQ(f.find_last_of(i, 5), 5); EXPECT_EQ(f.find_last_of(i, 6), 6); EXPECT_EQ(f.find_last_of(a, 4), absl::string_view::npos); - // empty std::string nonsense + // empty string nonsense EXPECT_EQ(f.find_last_of(d), absl::string_view::npos); EXPECT_EQ(f.find_last_of(e), absl::string_view::npos); EXPECT_EQ(f.find_last_of(d, 4), absl::string_view::npos); @@ -632,7 +633,7 @@ TEST(StringViewTest, STL2FindLast) { EXPECT_EQ(a.find_last_not_of(c, 24), 22); EXPECT_EQ(a.find_last_not_of(b, 3), 3); EXPECT_EQ(a.find_last_not_of(b, 2), absl::string_view::npos); - // empty std::string nonsense + // empty string nonsense EXPECT_EQ(f.find_last_not_of(d), f.size()-1); EXPECT_EQ(f.find_last_not_of(e), f.size()-1); EXPECT_EQ(f.find_last_not_of(d, 4), 4); @@ -656,7 +657,7 @@ TEST(StringViewTest, STL2FindLast) { EXPECT_EQ(h.find_last_not_of('x', 2), 2); EXPECT_EQ(h.find_last_not_of('=', 2), absl::string_view::npos); EXPECT_EQ(b.find_last_not_of('b', 1), 0); - // empty std::string nonsense + // empty string nonsense EXPECT_EQ(d.find_last_not_of('x'), absl::string_view::npos); EXPECT_EQ(e.find_last_not_of('x'), absl::string_view::npos); EXPECT_EQ(d.find_last_not_of('\0'), absl::string_view::npos); @@ -678,7 +679,7 @@ TEST(StringViewTest, STL2Substr) { EXPECT_EQ(a.substr(23, 99), c); EXPECT_EQ(a.substr(0), a); EXPECT_EQ(a.substr(3, 2), "de"); - // empty std::string nonsense + // empty string nonsense EXPECT_EQ(d.substr(0, 99), e); // use of npos EXPECT_EQ(a.substr(0, absl::string_view::npos), a); @@ -820,7 +821,7 @@ TEST(StringViewTest, FrontBackSingleChar) { TEST(StringViewTest, FrontBackEmpty) { #ifndef ABSL_USES_STD_STRING_VIEW -#ifndef NDEBUG +#if !defined(NDEBUG) || ABSL_OPTION_HARDENED // Abseil's string_view implementation has debug assertions that check that // front() and back() are not called on an empty string_view. absl::string_view sv; @@ -859,7 +860,7 @@ TEST(StringViewTest, NULLInput) { EXPECT_EQ(s.size(), 0); // .ToString() on a absl::string_view with nullptr should produce the empty - // std::string. + // string. EXPECT_EQ("", std::string(s)); #endif // ABSL_HAVE_STRING_VIEW_FROM_NULLPTR } @@ -977,7 +978,7 @@ TEST(StringViewTest, ConstexprCompiles) { #if defined(ABSL_USES_STD_STRING_VIEW) // In libstdc++ (as of 7.2), `std::string_view::string_view(const char*)` - // calls `std::char_traits<char>::length(const char*)` to get the std::string + // calls `std::char_traits<char>::length(const char*)` to get the string // length, but it is not marked constexpr yet. See GCC bug: // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78156 // Also, there is a LWG issue that adds constexpr to length() which was just @@ -1086,6 +1087,14 @@ TEST(StringViewTest, ConstexprCompiles) { EXPECT_EQ(sp_npos, -1); } +TEST(StringViewTest, ConstexprSubstr) { + constexpr absl::string_view foobar("foobar", 6); + constexpr absl::string_view foo = foobar.substr(0, 3); + constexpr absl::string_view bar = foobar.substr(3); + EXPECT_EQ(foo, "foo"); + EXPECT_EQ(bar, "bar"); +} + TEST(StringViewTest, Noexcept) { EXPECT_TRUE((std::is_nothrow_constructible<absl::string_view, const std::string&>::value)); @@ -1122,7 +1131,7 @@ TEST(StringViewTest, Noexcept) { TEST(StringViewTest, BoundsCheck) { #ifndef ABSL_USES_STD_STRING_VIEW -#ifndef NDEBUG +#if !defined(NDEBUG) || ABSL_OPTION_HARDENED // Abseil's string_view implementation has bounds-checking in debug mode. absl::string_view h = "hello"; ABSL_EXPECT_DEATH_IF_SUPPORTED(h[5], ""); @@ -1168,11 +1177,11 @@ TEST(FindOneCharTest, EdgeCases) { EXPECT_EQ(absl::string_view::npos, a.rfind('x')); } -#ifndef THREAD_SANITIZER // Allocates too much memory for tsan. +#ifndef ABSL_HAVE_THREAD_SANITIZER // Allocates too much memory for tsan. TEST(HugeStringView, TwoPointTwoGB) { - if (sizeof(size_t) <= 4 || RunningOnValgrind()) + if (sizeof(size_t) <= 4) return; - // Try a huge std::string piece. + // Try a huge string piece. const size_t size = size_t{2200} * 1000 * 1000; std::string s(size, 'a'); absl::string_view sp(s); @@ -1182,7 +1191,7 @@ TEST(HugeStringView, TwoPointTwoGB) { sp.remove_suffix(2); EXPECT_EQ(size - 1 - 2, sp.length()); } -#endif // THREAD_SANITIZER +#endif // ABSL_HAVE_THREAD_SANITIZER #if !defined(NDEBUG) && !defined(ABSL_USES_STD_STRING_VIEW) TEST(NonNegativeLenTest, NonNegativeLen) { diff --git a/absl/strings/substitute.cc b/absl/strings/substitute.cc index 5b69a3ef..1f3c7409 100644 --- a/absl/strings/substitute.cc +++ b/absl/strings/substitute.cc @@ -36,7 +36,7 @@ void SubstituteAndAppendArray(std::string* output, absl::string_view format, if (i + 1 >= format.size()) { #ifndef NDEBUG ABSL_RAW_LOG(FATAL, - "Invalid absl::Substitute() format std::string: \"%s\".", + "Invalid absl::Substitute() format string: \"%s\".", absl::CEscape(format).c_str()); #endif return; @@ -46,8 +46,8 @@ void SubstituteAndAppendArray(std::string* output, absl::string_view format, #ifndef NDEBUG ABSL_RAW_LOG( FATAL, - "Invalid absl::Substitute() format std::string: asked for \"$" - "%d\", but only %d args were given. Full format std::string was: " + "Invalid absl::Substitute() format string: asked for \"$" + "%d\", but only %d args were given. Full format string was: " "\"%s\".", index, static_cast<int>(num_args), absl::CEscape(format).c_str()); #endif @@ -61,7 +61,7 @@ void SubstituteAndAppendArray(std::string* output, absl::string_view format, } else { #ifndef NDEBUG ABSL_RAW_LOG(FATAL, - "Invalid absl::Substitute() format std::string: \"%s\".", + "Invalid absl::Substitute() format string: \"%s\".", absl::CEscape(format).c_str()); #endif return; @@ -73,7 +73,7 @@ void SubstituteAndAppendArray(std::string* output, absl::string_view format, if (size == 0) return; - // Build the std::string. + // Build the string. size_t original_size = output->size(); strings_internal::STLStringResizeUninitialized(output, original_size + size); char* target = &(*output)[original_size]; diff --git a/absl/strings/substitute.h b/absl/strings/substitute.h index 4d0984d3..c6da4dc6 100644 --- a/absl/strings/substitute.h +++ b/absl/strings/substitute.h @@ -50,7 +50,7 @@ // // Supported types: // * absl::string_view, std::string, const char* (null is equivalent to "") -// * int32_t, int64_t, uint32_t, uint64 +// * int32_t, int64_t, uint32_t, uint64_t // * float, double // * bool (Printed as "true" or "false") // * pointer types other than char* (Printed as "0x<lower case hex string>", @@ -99,7 +99,7 @@ namespace substitute_internal { // This class has implicit constructors. class Arg { public: - // Overloads for std::string-y things + // Overloads for string-y things // // Explicitly overload `const char*` so the compiler doesn't cast to `bool`. Arg(const char* value) // NOLINT(runtime/explicit) @@ -120,7 +120,9 @@ class Arg { // representation. However, we can't really know, so we make the caller decide // what to do. Arg(char value) // NOLINT(runtime/explicit) - : piece_(scratch_, 1) { scratch_[0] = value; } + : piece_(scratch_, 1) { + scratch_[0] = value; + } Arg(short value) // NOLINT(*) : piece_(scratch_, numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {} @@ -203,10 +205,11 @@ constexpr const char* SkipNumber(const char* format) { } constexpr int PlaceholderBitmask(const char* format) { - return !*format ? 0 : *format != '$' - ? PlaceholderBitmask(format + 1) - : (CalculateOneBit(format + 1) | - PlaceholderBitmask(SkipNumber(format + 1))); + return !*format + ? 0 + : *format != '$' ? PlaceholderBitmask(format + 1) + : (CalculateOneBit(format + 1) | + PlaceholderBitmask(SkipNumber(format + 1))); } #endif // ABSL_BAD_CALL_IF @@ -360,13 +363,13 @@ inline void SubstituteAndAppend( void SubstituteAndAppend(std::string* output, const char* format) ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 0, "There were no substitution arguments " - "but this format std::string has a $[0-9] in it"); + "but this format string has a $[0-9] in it"); void SubstituteAndAppend(std::string* output, const char* format, const substitute_internal::Arg& a0) ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 1, "There was 1 substitution argument given, but " - "this format std::string is either missing its $0, or " + "this format string is either missing its $0, or " "contains one of $1-$9"); void SubstituteAndAppend(std::string* output, const char* format, @@ -374,7 +377,7 @@ void SubstituteAndAppend(std::string* output, const char* format, const substitute_internal::Arg& a1) ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 3, "There were 2 substitution arguments given, but " - "this format std::string is either missing its $0/$1, or " + "this format string is either missing its $0/$1, or " "contains one of $2-$9"); void SubstituteAndAppend(std::string* output, const char* format, @@ -383,7 +386,7 @@ void SubstituteAndAppend(std::string* output, const char* format, const substitute_internal::Arg& a2) ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 7, "There were 3 substitution arguments given, but " - "this format std::string is either missing its $0/$1/$2, or " + "this format string is either missing its $0/$1/$2, or " "contains one of $3-$9"); void SubstituteAndAppend(std::string* output, const char* format, @@ -393,7 +396,7 @@ void SubstituteAndAppend(std::string* output, const char* format, const substitute_internal::Arg& a3) ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 15, "There were 4 substitution arguments given, but " - "this format std::string is either missing its $0-$3, or " + "this format string is either missing its $0-$3, or " "contains one of $4-$9"); void SubstituteAndAppend(std::string* output, const char* format, @@ -404,7 +407,7 @@ void SubstituteAndAppend(std::string* output, const char* format, const substitute_internal::Arg& a4) ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 31, "There were 5 substitution arguments given, but " - "this format std::string is either missing its $0-$4, or " + "this format string is either missing its $0-$4, or " "contains one of $5-$9"); void SubstituteAndAppend(std::string* output, const char* format, @@ -416,7 +419,7 @@ void SubstituteAndAppend(std::string* output, const char* format, const substitute_internal::Arg& a5) ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 63, "There were 6 substitution arguments given, but " - "this format std::string is either missing its $0-$5, or " + "this format string is either missing its $0-$5, or " "contains one of $6-$9"); void SubstituteAndAppend( @@ -426,7 +429,7 @@ void SubstituteAndAppend( const substitute_internal::Arg& a5, const substitute_internal::Arg& a6) ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 127, "There were 7 substitution arguments given, but " - "this format std::string is either missing its $0-$6, or " + "this format string is either missing its $0-$6, or " "contains one of $7-$9"); void SubstituteAndAppend( @@ -437,7 +440,7 @@ void SubstituteAndAppend( const substitute_internal::Arg& a7) ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 255, "There were 8 substitution arguments given, but " - "this format std::string is either missing its $0-$7, or " + "this format string is either missing its $0-$7, or " "contains one of $8-$9"); void SubstituteAndAppend( @@ -449,7 +452,7 @@ void SubstituteAndAppend( ABSL_BAD_CALL_IF( substitute_internal::PlaceholderBitmask(format) != 511, "There were 9 substitution arguments given, but " - "this format std::string is either missing its $0-$8, or contains a $9"); + "this format string is either missing its $0-$8, or contains a $9"); void SubstituteAndAppend( std::string* output, const char* format, const substitute_internal::Arg& a0, @@ -460,7 +463,7 @@ void SubstituteAndAppend( const substitute_internal::Arg& a9) ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 1023, "There were 10 substitution arguments given, but this " - "format std::string doesn't contain all of $0 through $9"); + "format string doesn't contain all of $0 through $9"); #endif // ABSL_BAD_CALL_IF // Substitute() @@ -586,19 +589,19 @@ ABSL_MUST_USE_RESULT inline std::string Substitute( std::string Substitute(const char* format) ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 0, "There were no substitution arguments " - "but this format std::string has a $[0-9] in it"); + "but this format string has a $[0-9] in it"); std::string Substitute(const char* format, const substitute_internal::Arg& a0) ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 1, "There was 1 substitution argument given, but " - "this format std::string is either missing its $0, or " + "this format string is either missing its $0, or " "contains one of $1-$9"); std::string Substitute(const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a1) ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 3, "There were 2 substitution arguments given, but " - "this format std::string is either missing its $0/$1, or " + "this format string is either missing its $0/$1, or " "contains one of $2-$9"); std::string Substitute(const char* format, const substitute_internal::Arg& a0, @@ -606,7 +609,7 @@ std::string Substitute(const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a2) ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 7, "There were 3 substitution arguments given, but " - "this format std::string is either missing its $0/$1/$2, or " + "this format string is either missing its $0/$1/$2, or " "contains one of $3-$9"); std::string Substitute(const char* format, const substitute_internal::Arg& a0, @@ -615,7 +618,7 @@ std::string Substitute(const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a3) ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 15, "There were 4 substitution arguments given, but " - "this format std::string is either missing its $0-$3, or " + "this format string is either missing its $0-$3, or " "contains one of $4-$9"); std::string Substitute(const char* format, const substitute_internal::Arg& a0, @@ -625,7 +628,7 @@ std::string Substitute(const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a4) ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 31, "There were 5 substitution arguments given, but " - "this format std::string is either missing its $0-$4, or " + "this format string is either missing its $0-$4, or " "contains one of $5-$9"); std::string Substitute(const char* format, const substitute_internal::Arg& a0, @@ -636,7 +639,7 @@ std::string Substitute(const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a5) ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 63, "There were 6 substitution arguments given, but " - "this format std::string is either missing its $0-$5, or " + "this format string is either missing its $0-$5, or " "contains one of $6-$9"); std::string Substitute(const char* format, const substitute_internal::Arg& a0, @@ -648,7 +651,7 @@ std::string Substitute(const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a6) ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 127, "There were 7 substitution arguments given, but " - "this format std::string is either missing its $0-$6, or " + "this format string is either missing its $0-$6, or " "contains one of $7-$9"); std::string Substitute(const char* format, const substitute_internal::Arg& a0, @@ -661,7 +664,7 @@ std::string Substitute(const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a7) ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 255, "There were 8 substitution arguments given, but " - "this format std::string is either missing its $0-$7, or " + "this format string is either missing its $0-$7, or " "contains one of $8-$9"); std::string Substitute( @@ -673,7 +676,7 @@ std::string Substitute( ABSL_BAD_CALL_IF( substitute_internal::PlaceholderBitmask(format) != 511, "There were 9 substitution arguments given, but " - "this format std::string is either missing its $0-$8, or contains a $9"); + "this format string is either missing its $0-$8, or contains a $9"); std::string Substitute( const char* format, const substitute_internal::Arg& a0, @@ -684,7 +687,7 @@ std::string Substitute( const substitute_internal::Arg& a9) ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 1023, "There were 10 substitution arguments given, but this " - "format std::string doesn't contain all of $0 through $9"); + "format string doesn't contain all of $0 through $9"); #endif // ABSL_BAD_CALL_IF ABSL_NAMESPACE_END diff --git a/absl/strings/substitute_test.cc b/absl/strings/substitute_test.cc index 450cd2bc..442c9215 100644 --- a/absl/strings/substitute_test.cc +++ b/absl/strings/substitute_test.cc @@ -89,7 +89,7 @@ TEST(SubstituteTest, Substitute) { str = absl::Substitute("$0", char_buf); EXPECT_EQ("print me too", str); - // null char* is "doubly" special. Represented as the empty std::string. + // null char* is "doubly" special. Represented as the empty string. char_p = nullptr; str = absl::Substitute("$0", char_p); EXPECT_EQ("", str); @@ -189,14 +189,14 @@ TEST(SubstituteTest, VectorBoolRef) { TEST(SubstituteDeathTest, SubstituteDeath) { EXPECT_DEBUG_DEATH( static_cast<void>(absl::Substitute(absl::string_view("-$2"), "a", "b")), - "Invalid absl::Substitute\\(\\) format std::string: asked for \"\\$2\", " + "Invalid absl::Substitute\\(\\) format string: asked for \"\\$2\", " "but only 2 args were given."); EXPECT_DEBUG_DEATH( static_cast<void>(absl::Substitute(absl::string_view("-$z-"))), - "Invalid absl::Substitute\\(\\) format std::string: \"-\\$z-\""); + "Invalid absl::Substitute\\(\\) format string: \"-\\$z-\""); EXPECT_DEBUG_DEATH( static_cast<void>(absl::Substitute(absl::string_view("-$"))), - "Invalid absl::Substitute\\(\\) format std::string: \"-\\$\""); + "Invalid absl::Substitute\\(\\) format string: \"-\\$\""); } #endif // GTEST_HAS_DEATH_TEST diff --git a/absl/strings/testdata/getline-1.txt b/absl/strings/testdata/getline-1.txt deleted file mode 100644 index 19b90973..00000000 --- a/absl/strings/testdata/getline-1.txt +++ /dev/null @@ -1,3 +0,0 @@ -alpha - -beta gamma diff --git a/absl/strings/testdata/getline-2.txt b/absl/strings/testdata/getline-2.txt deleted file mode 100644 index d6842d8e..00000000 --- a/absl/strings/testdata/getline-2.txt +++ /dev/null @@ -1 +0,0 @@ -one.two.three |