diff options
Diffstat (limited to 'absl/strings')
43 files changed, 923 insertions, 1156 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index 8e8371b3..2cc014ed 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -77,7 +77,6 @@ cc_library( "escaping.h", "has_absl_stringify.h", "internal/damerau_levenshtein_distance.h", - "internal/has_absl_stringify.h", "internal/string_constant.h", "match.h", "numbers.h", @@ -359,6 +358,7 @@ cc_test( "//absl/base:config", "//absl/base:core_headers", "//absl/base:dynamic_annotations", + "//absl/meta:type_traits", "@com_google_googletest//:gtest", "@com_google_googletest//:gtest_main", ], @@ -599,6 +599,7 @@ cc_library( "//absl/functional:function_ref", "//absl/meta:type_traits", "//absl/numeric:bits", + "//absl/types:compare", "//absl/types:optional", "//absl/types:span", ], @@ -614,8 +615,8 @@ cc_library( "//absl:__subpackages__", ], deps = [ - "//absl/base", "//absl/base:config", + "//absl/base:no_destructor", "//absl/base:raw_logging_internal", "//absl/synchronization", ], @@ -829,7 +830,7 @@ cc_test( cc_library( name = "cord_test_helpers", - testonly = 1, + testonly = True, hdrs = [ "cord_test_helpers.h", ], @@ -845,7 +846,7 @@ cc_library( cc_library( name = "cord_rep_test_util", - testonly = 1, + testonly = True, hdrs = ["internal/cord_rep_test_util.h"], copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, @@ -859,7 +860,7 @@ cc_library( cc_library( name = "cordz_test_helpers", - testonly = 1, + testonly = True, hdrs = ["cordz_test_helpers.h"], copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, @@ -899,6 +900,7 @@ cc_test( cc_test( name = "cord_test", size = "medium", + timeout = "long", srcs = ["cord_test.cc"], copts = ABSL_TEST_COPTS, visibility = ["//visibility:private"], @@ -915,12 +917,15 @@ cc_test( "//absl/base:config", "//absl/base:core_headers", "//absl/base:endian", + "//absl/base:no_destructor", "//absl/container:fixed_array", "//absl/functional:function_ref", "//absl/hash", + "//absl/hash:hash_testing", "//absl/log", "//absl/log:check", "//absl/random", + "//absl/types:compare", "//absl/types:optional", "@com_google_googletest//:gtest", "@com_google_googletest//:gtest_main", @@ -1376,6 +1381,7 @@ cc_test( cc_test( name = "str_format_convert_test", size = "medium", + timeout = "long", srcs = ["internal/str_format/convert_test.cc"], copts = ABSL_TEST_COPTS, visibility = ["//visibility:private"], @@ -1448,7 +1454,7 @@ cc_test( cc_binary( name = "atod_manual_test", - testonly = 1, + testonly = True, srcs = ["atod_manual_test.cc"], copts = ABSL_TEST_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index 1b245362..3a1619e8 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -18,9 +18,9 @@ absl_cc_library( NAME string_view HDRS - string_view.h + "string_view.h" SRCS - string_view.cc + "string_view.cc" COPTS ${ABSL_DEFAULT_COPTS} DEPS @@ -42,7 +42,6 @@ absl_cc_library( "has_absl_stringify.h" "internal/damerau_levenshtein_distance.h" "internal/string_constant.h" - "internal/has_absl_stringify.h" "match.h" "numbers.h" "str_cat.h" @@ -274,6 +273,7 @@ absl_cc_test( absl::config absl::core_headers absl::dynamic_annotations + absl::type_traits GTest::gmock_main ) @@ -704,6 +704,7 @@ absl_cc_library( absl::compressed_tuple absl::config absl::container_memory + absl::compare absl::core_headers absl::crc_cord_state absl::endian @@ -799,6 +800,7 @@ absl_cc_library( DEPS absl::base absl::config + absl::no_destructor absl::raw_logging_internal absl::synchronization ) @@ -1072,6 +1074,8 @@ absl_cc_test( absl::fixed_array absl::function_ref absl::hash + absl::hash_testing + absl::no_destructor absl::log absl::optional absl::random_random diff --git a/absl/strings/ascii.cc b/absl/strings/ascii.cc index 5460b2c6..20a696a1 100644 --- a/absl/strings/ascii.cc +++ b/absl/strings/ascii.cc @@ -15,13 +15,14 @@ #include "absl/strings/ascii.h" #include <climits> -#include <cstdint> +#include <cstddef> #include <cstring> #include <string> -#include <type_traits> +#include "absl/base/attributes.h" #include "absl/base/config.h" #include "absl/base/nullability.h" +#include "absl/base/optimization.h" namespace absl { ABSL_NAMESPACE_BEGIN @@ -162,19 +163,6 @@ ABSL_DLL const char kToUpper[256] = { }; // clang-format on -template <class T> -static constexpr T BroadcastByte(unsigned char value) { - static_assert(std::is_integral<T>::value && sizeof(T) <= sizeof(uint64_t) && - std::is_unsigned<T>::value, - "only unsigned integers up to 64-bit allowed"); - T result = value; - constexpr size_t result_bit_width = sizeof(result) * CHAR_BIT; - result |= result << ((CHAR_BIT << 0) & (result_bit_width - 1)); - result |= result << ((CHAR_BIT << 1) & (result_bit_width - 1)); - result |= result << ((CHAR_BIT << 2) & (result_bit_width - 1)); - return result; -} - // Returns whether `c` is in the a-z/A-Z range (w.r.t. `ToUpper`). // Implemented by: // 1. Pushing the a-z/A-Z range to [SCHAR_MIN, SCHAR_MIN + 26). @@ -189,47 +177,10 @@ constexpr bool AsciiInAZRange(unsigned char c) { return static_cast<signed char>(u) < threshold; } +// Force-inline so the compiler won't merge the short and long implementations. template <bool ToUpper> -static constexpr char* PartialAsciiStrCaseFold(absl::Nonnull<char*> p, - absl::Nonnull<char*> end) { - using vec_t = size_t; - const size_t n = static_cast<size_t>(end - p); - - // SWAR algorithm: http://0x80.pl/notesen/2016-01-06-swar-swap-case.html - constexpr char ch_a = ToUpper ? 'a' : 'A', ch_z = ToUpper ? 'z' : 'Z'; - char* const swar_end = p + (n / sizeof(vec_t)) * sizeof(vec_t); - while (p < swar_end) { - vec_t v = vec_t(); - - // memcpy the vector, but constexpr - for (size_t i = 0; i < sizeof(vec_t); ++i) { - v |= static_cast<vec_t>(static_cast<unsigned char>(p[i])) - << (i * CHAR_BIT); - } - - constexpr unsigned int msb = 1u << (CHAR_BIT - 1); - const vec_t v_msb = v & BroadcastByte<vec_t>(msb); - const vec_t v_nonascii_mask = (v_msb << 1) - (v_msb >> (CHAR_BIT - 1)); - const vec_t v_nonascii = v & v_nonascii_mask; - const vec_t v_ascii = v & ~v_nonascii_mask; - const vec_t a = v_ascii + BroadcastByte<vec_t>(msb - ch_a - 0), - z = v_ascii + BroadcastByte<vec_t>(msb - ch_z - 1); - v = v_nonascii | (v_ascii ^ ((a ^ z) & BroadcastByte<vec_t>(msb)) >> 2); - - // memcpy the vector, but constexpr - for (size_t i = 0; i < sizeof(vec_t); ++i) { - p[i] = static_cast<char>(v >> (i * CHAR_BIT)); - } - - p += sizeof(v); - } - - return p; -} - -template <bool ToUpper> -static constexpr void AsciiStrCaseFold(absl::Nonnull<char*> p, - absl::Nonnull<char*> end) { +ABSL_ATTRIBUTE_ALWAYS_INLINE inline constexpr void AsciiStrCaseFoldImpl( + absl::Nonnull<char*> p, size_t size) { // The upper- and lowercase versions of ASCII characters differ by only 1 bit. // When we need to flip the case, we can xor with this bit to achieve the // desired result. Note that the choice of 'a' and 'A' here is arbitrary. We @@ -237,20 +188,32 @@ static constexpr void AsciiStrCaseFold(absl::Nonnull<char*> p, // have the same single bit difference. constexpr unsigned char kAsciiCaseBitFlip = 'a' ^ 'A'; - using vec_t = size_t; - // TODO(b/316380338): When FDO becomes able to vectorize these, - // revert this manual optimization and just leave the naive loop. - if (static_cast<size_t>(end - p) >= sizeof(vec_t)) { - p = ascii_internal::PartialAsciiStrCaseFold<ToUpper>(p, end); - } - while (p < end) { - unsigned char v = static_cast<unsigned char>(*p); + for (size_t i = 0; i < size; ++i) { + unsigned char v = static_cast<unsigned char>(p[i]); v ^= AsciiInAZRange<ToUpper>(v) ? kAsciiCaseBitFlip : 0; - *p = static_cast<char>(v); - ++p; + p[i] = static_cast<char>(v); } } +// The string size threshold for starting using the long string version. +constexpr size_t kCaseFoldThreshold = 16; + +// No-inline so the compiler won't merge the short and long implementations. +template <bool ToUpper> +ABSL_ATTRIBUTE_NOINLINE constexpr void AsciiStrCaseFoldLong( + absl::Nonnull<char*> p, size_t size) { + ABSL_ASSUME(size >= kCaseFoldThreshold); + AsciiStrCaseFoldImpl<ToUpper>(p, size); +} + +// Splitting to short and long strings to allow vectorization decisions +// to be made separately in the long and short cases. +template <bool ToUpper> +constexpr void AsciiStrCaseFold(absl::Nonnull<char*> p, size_t size) { + size < kCaseFoldThreshold ? AsciiStrCaseFoldImpl<ToUpper>(p, size) + : AsciiStrCaseFoldLong<ToUpper>(p, size); +} + static constexpr size_t ValidateAsciiCasefold() { constexpr size_t num_chars = 1 + CHAR_MAX - CHAR_MIN; size_t incorrect_index = 0; @@ -259,8 +222,8 @@ static constexpr size_t ValidateAsciiCasefold() { for (unsigned int i = 0; i < num_chars; ++i) { uppered[i] = lowered[i] = static_cast<char>(i); } - AsciiStrCaseFold<false>(&lowered[0], &lowered[num_chars]); - AsciiStrCaseFold<true>(&uppered[0], &uppered[num_chars]); + AsciiStrCaseFold<false>(&lowered[0], num_chars); + AsciiStrCaseFold<true>(&uppered[0], num_chars); for (size_t i = 0; i < num_chars; ++i) { const char ch = static_cast<char>(i), ch_upper = ('a' <= ch && ch <= 'z' ? 'A' + (ch - 'a') : ch), @@ -278,13 +241,11 @@ static_assert(ValidateAsciiCasefold() == 0, "error in case conversion"); } // namespace ascii_internal void AsciiStrToLower(absl::Nonnull<std::string*> s) { - char* p = &(*s)[0]; // Guaranteed to be valid for empty strings - return ascii_internal::AsciiStrCaseFold<false>(p, p + s->size()); + return ascii_internal::AsciiStrCaseFold<false>(&(*s)[0], s->size()); } void AsciiStrToUpper(absl::Nonnull<std::string*> s) { - char* p = &(*s)[0]; // Guaranteed to be valid for empty strings - return ascii_internal::AsciiStrCaseFold<true>(p, p + s->size()); + return ascii_internal::AsciiStrCaseFold<true>(&(*s)[0], s->size()); } void RemoveExtraAsciiWhitespace(absl::Nonnull<std::string*> str) { diff --git a/absl/strings/ascii_test.cc b/absl/strings/ascii_test.cc index 117140c1..8885bb15 100644 --- a/absl/strings/ascii_test.cc +++ b/absl/strings/ascii_test.cc @@ -190,11 +190,13 @@ TEST(AsciiStrTo, Lower) { const std::string str("GHIJKL"); const std::string str2("MNOPQR"); const absl::string_view sp(str2); + const std::string long_str("ABCDEFGHIJKLMNOPQRSTUVWXYZ1!a"); std::string mutable_str("_`?@[{AMNOPQRSTUVWXYZ"); EXPECT_EQ("abcdef", absl::AsciiStrToLower(buf)); EXPECT_EQ("ghijkl", absl::AsciiStrToLower(str)); EXPECT_EQ("mnopqr", absl::AsciiStrToLower(sp)); + EXPECT_EQ("abcdefghijklmnopqrstuvwxyz1!a", absl::AsciiStrToLower(long_str)); absl::AsciiStrToLower(&mutable_str); EXPECT_EQ("_`?@[{amnopqrstuvwxyz", mutable_str); @@ -210,10 +212,12 @@ TEST(AsciiStrTo, Upper) { const std::string str("ghijkl"); const std::string str2("_`?@[{amnopqrstuvwxyz"); const absl::string_view sp(str2); + const std::string long_str("abcdefghijklmnopqrstuvwxyz1!A"); EXPECT_EQ("ABCDEF", absl::AsciiStrToUpper(buf)); EXPECT_EQ("GHIJKL", absl::AsciiStrToUpper(str)); EXPECT_EQ("_`?@[{AMNOPQRSTUVWXYZ", absl::AsciiStrToUpper(sp)); + EXPECT_EQ("ABCDEFGHIJKLMNOPQRSTUVWXYZ1!A", absl::AsciiStrToUpper(long_str)); char mutable_buf[] = "Mutable"; std::transform(mutable_buf, mutable_buf + strlen(mutable_buf), diff --git a/absl/strings/cord.cc b/absl/strings/cord.cc index f67326fd..f0f4f31a 100644 --- a/absl/strings/cord.cc +++ b/absl/strings/cord.cc @@ -75,7 +75,7 @@ using ::absl::cord_internal::kMinFlatLength; using ::absl::cord_internal::kInlinedVectorSize; using ::absl::cord_internal::kMaxBytesToCopy; -static void DumpNode(absl::Nonnull<CordRep*> rep, bool include_data, +static void DumpNode(absl::Nonnull<CordRep*> nonnull_rep, bool include_data, absl::Nonnull<std::ostream*> os, int indent = 0); static bool VerifyNode(absl::Nonnull<CordRep*> root, absl::Nonnull<CordRep*> start_node); @@ -425,8 +425,8 @@ Cord& Cord::operator=(absl::string_view src) { // we keep it here to make diffs easier. void Cord::InlineRep::AppendArray(absl::string_view src, MethodIdentifier method) { - MaybeRemoveEmptyCrcNode(); if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined. + MaybeRemoveEmptyCrcNode(); size_t appended = 0; CordRep* rep = tree(); @@ -1062,6 +1062,15 @@ void CopyCordToString(const Cord& src, absl::Nonnull<std::string*> dst) { } } +void AppendCordToString(const Cord& src, absl::Nonnull<std::string*> dst) { + const size_t cur_dst_size = dst->size(); + const size_t new_dst_size = cur_dst_size + src.size(); + absl::strings_internal::STLStringResizeUninitializedAmortized(dst, + new_dst_size); + char* append_ptr = &(*dst)[cur_dst_size]; + src.CopyToArrayImpl(append_ptr); +} + void Cord::CopyToArraySlowPath(absl::Nonnull<char*> dst) const { assert(contents_.is_tree()); absl::string_view fragment; @@ -1448,14 +1457,13 @@ absl::string_view Cord::FlattenSlowPath() { } } -static void DumpNode(absl::Nonnull<CordRep*> rep, bool include_data, +static void DumpNode(absl::Nonnull<CordRep*> nonnull_rep, bool include_data, absl::Nonnull<std::ostream*> os, int indent) { + CordRep* rep = nonnull_rep; const int kIndentStep = 1; - absl::InlinedVector<CordRep*, kInlinedVectorSize> stack; - absl::InlinedVector<int, kInlinedVectorSize> indents; for (;;) { - *os << std::setw(3) << rep->refcount.Get(); - *os << " " << std::setw(7) << rep->length; + *os << std::setw(3) << (rep == nullptr ? 0 : rep->refcount.Get()); + *os << " " << std::setw(7) << (rep == nullptr ? 0 : rep->length); *os << " ["; if (include_data) *os << static_cast<void*>(rep); *os << "]"; @@ -1477,26 +1485,23 @@ static void DumpNode(absl::Nonnull<CordRep*> rep, bool include_data, if (rep->IsExternal()) { *os << "EXTERNAL ["; if (include_data) - *os << absl::CEscape(std::string(rep->external()->base, rep->length)); + *os << absl::CEscape( + absl::string_view(rep->external()->base, rep->length)); *os << "]\n"; } else if (rep->IsFlat()) { *os << "FLAT cap=" << rep->flat()->Capacity() << " ["; if (include_data) - *os << absl::CEscape(std::string(rep->flat()->Data(), rep->length)); + *os << absl::CEscape( + absl::string_view(rep->flat()->Data(), rep->length)); *os << "]\n"; } else { CordRepBtree::Dump(rep, /*label=*/"", include_data, *os); } } if (leaf) { - if (stack.empty()) break; - rep = stack.back(); - stack.pop_back(); - indent = indents.back(); - indents.pop_back(); + break; } } - ABSL_INTERNAL_CHECK(indents.empty(), ""); } static std::string ReportError(absl::Nonnull<CordRep*> root, diff --git a/absl/strings/cord.h b/absl/strings/cord.h index b3e556b6..69aa8ef4 100644 --- a/absl/strings/cord.h +++ b/absl/strings/cord.h @@ -75,6 +75,7 @@ #include "absl/base/internal/per_thread_tls.h" #include "absl/base/macros.h" #include "absl/base/nullability.h" +#include "absl/base/optimization.h" #include "absl/base/port.h" #include "absl/container/inlined_vector.h" #include "absl/crc/internal/crc_cord_state.h" @@ -95,6 +96,7 @@ #include "absl/strings/internal/resize_uninitialized.h" #include "absl/strings/internal/string_constant.h" #include "absl/strings/string_view.h" +#include "absl/types/compare.h" #include "absl/types/optional.h" namespace absl { @@ -104,6 +106,7 @@ class CordTestPeer; template <typename Releaser> Cord MakeCordFromExternal(absl::string_view, Releaser&&); void CopyCordToString(const Cord& src, absl::Nonnull<std::string*> dst); +void AppendCordToString(const Cord& src, absl::Nonnull<std::string*> dst); // Cord memory accounting modes enum class CordMemoryAccounting { @@ -420,6 +423,18 @@ class Cord { friend void CopyCordToString(const Cord& src, absl::Nonnull<std::string*> dst); + // AppendCordToString() + // + // Appends the contents of a `src` Cord to a `*dst` string. + // + // This function optimizes the case of appending to a non-empty destination + // string. If `*dst` already has capacity to store the contents of the cord, + // this function does not invalidate pointers previously returned by + // `dst->data()`. If `*dst` is a new object, prefer to simply use the + // conversion operator to `std::string`. + friend void AppendCordToString(const Cord& src, + absl::Nonnull<std::string*> dst); + class CharIterator; //---------------------------------------------------------------------------- @@ -757,7 +772,7 @@ class Cord { // Cord::Find() // - // Returns an iterator to the first occurrance of the substring `needle`. + // Returns an iterator to the first occurrence of the substring `needle`. // // If the substring `needle` does not occur, `Cord::char_end()` is returned. CharIterator Find(absl::string_view needle) const; @@ -835,6 +850,38 @@ class Cord { friend bool operator==(const Cord& lhs, const Cord& rhs); friend bool operator==(const Cord& lhs, absl::string_view rhs); +#ifdef __cpp_impl_three_way_comparison + + // Cords support comparison with other Cords and string_views via operator< + // and others; here we provide a wrapper for the C++20 three-way comparison + // <=> operator. + + static inline std::strong_ordering ConvertCompareResultToStrongOrdering( + int c) { + if (c == 0) { + return std::strong_ordering::equal; + } else if (c < 0) { + return std::strong_ordering::less; + } else { + return std::strong_ordering::greater; + } + } + + friend inline std::strong_ordering operator<=>(const Cord& x, const Cord& y) { + return ConvertCompareResultToStrongOrdering(x.Compare(y)); + } + + friend inline std::strong_ordering operator<=>(const Cord& lhs, + absl::string_view rhs) { + return ConvertCompareResultToStrongOrdering(lhs.Compare(rhs)); + } + + friend inline std::strong_ordering operator<=>(absl::string_view lhs, + const Cord& rhs) { + return ConvertCompareResultToStrongOrdering(-rhs.Compare(lhs)); + } +#endif + friend absl::Nullable<const CordzInfo*> GetCordzInfoForTesting( const Cord& cord); @@ -1065,6 +1112,8 @@ class Cord { const; CharIterator FindImpl(CharIterator it, absl::string_view needle) const; + + void CopyToArrayImpl(absl::Nonnull<char*> dst) const; }; ABSL_NAMESPACE_END @@ -1103,8 +1152,8 @@ absl::Nonnull<CordRep*> NewExternalRep(absl::string_view data, // Overload for function reference types that dispatches using a function // pointer because there are no `alignof()` or `sizeof()` a function reference. // NOLINTNEXTLINE - suppress clang-tidy raw pointer return. -inline absl::Nonnull<CordRep*> NewExternalRep(absl::string_view data, - void (&releaser)(absl::string_view)) { +inline absl::Nonnull<CordRep*> NewExternalRep( + absl::string_view data, void (&releaser)(absl::string_view)) { return NewExternalRep(data, &releaser); } @@ -1120,7 +1169,7 @@ Cord MakeCordFromExternal(absl::string_view data, Releaser&& releaser) { } else { using ReleaserType = absl::decay_t<Releaser>; cord_internal::InvokeReleaser( - cord_internal::Rank0{}, ReleaserType(std::forward<Releaser>(releaser)), + cord_internal::Rank1{}, ReleaserType(std::forward<Releaser>(releaser)), data); } return cord; @@ -1170,7 +1219,8 @@ inline void Cord::InlineRep::Swap(absl::Nonnull<Cord::InlineRep*> rhs) { if (rhs == this) { return; } - std::swap(data_, rhs->data_); + using std::swap; + swap(data_, rhs->data_); } inline absl::Nullable<const char*> Cord::InlineRep::data() const { @@ -1352,7 +1402,8 @@ inline size_t Cord::EstimatedMemoryUsage( return result; } -inline absl::optional<absl::string_view> Cord::TryFlat() const { +inline absl::optional<absl::string_view> Cord::TryFlat() const + ABSL_ATTRIBUTE_LIFETIME_BOUND { absl::cord_internal::CordRep* rep = contents_.tree(); if (rep == nullptr) { return absl::string_view(contents_.data(), contents_.size()); @@ -1364,7 +1415,7 @@ inline absl::optional<absl::string_view> Cord::TryFlat() const { return absl::nullopt; } -inline absl::string_view Cord::Flatten() { +inline absl::string_view Cord::Flatten() ABSL_ATTRIBUTE_LIFETIME_BOUND { absl::cord_internal::CordRep* rep = contents_.tree(); if (rep == nullptr) { return absl::string_view(contents_.data(), contents_.size()); @@ -1387,6 +1438,7 @@ inline void Cord::Prepend(absl::string_view src) { inline void Cord::Append(CordBuffer buffer) { if (ABSL_PREDICT_FALSE(buffer.length() == 0)) return; + contents_.MaybeRemoveEmptyCrcNode(); absl::string_view short_value; if (CordRep* rep = buffer.ConsumeValue(short_value)) { contents_.AppendTree(rep, CordzUpdateTracker::kAppendCordBuffer); @@ -1397,6 +1449,7 @@ inline void Cord::Append(CordBuffer buffer) { inline void Cord::Prepend(CordBuffer buffer) { if (ABSL_PREDICT_FALSE(buffer.length() == 0)) return; + contents_.MaybeRemoveEmptyCrcNode(); absl::string_view short_value; if (CordRep* rep = buffer.ConsumeValue(short_value)) { contents_.PrependTree(rep, CordzUpdateTracker::kPrependCordBuffer); @@ -1445,6 +1498,14 @@ inline bool Cord::StartsWith(absl::string_view rhs) const { return EqualsImpl(rhs, rhs_size); } +inline void Cord::CopyToArrayImpl(absl::Nonnull<char*> dst) const { + if (!contents_.is_tree()) { + if (!empty()) contents_.CopyToArray(dst); + } else { + CopyToArraySlowPath(dst); + } +} + inline void Cord::ChunkIterator::InitTree( absl::Nonnull<cord_internal::CordRep*> tree) { tree = cord_internal::SkipCrcNode(tree); diff --git a/absl/strings/cord_test.cc b/absl/strings/cord_test.cc index f1a5f39c..eaf6d719 100644 --- a/absl/strings/cord_test.cc +++ b/absl/strings/cord_test.cc @@ -38,10 +38,12 @@ #include "absl/base/config.h" #include "absl/base/internal/endian.h" #include "absl/base/macros.h" +#include "absl/base/no_destructor.h" #include "absl/base/options.h" #include "absl/container/fixed_array.h" #include "absl/functional/function_ref.h" #include "absl/hash/hash.h" +#include "absl/hash/hash_testing.h" #include "absl/log/check.h" #include "absl/log/log.h" #include "absl/random/random.h" @@ -58,6 +60,7 @@ #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" #include "absl/strings/string_view.h" +#include "absl/types/compare.h" #include "absl/types/optional.h" // convenience local constants @@ -241,12 +244,14 @@ class CordTestPeer { ABSL_NAMESPACE_END } // namespace absl -// The CordTest fixture runs all tests with and without Cord Btree enabled, -// and with our without expected CRCs being set on the subject Cords. -class CordTest : public testing::TestWithParam<int> { + + +// The CordTest fixture runs all tests with and without expected CRCs being set +// on the subject Cords. +class CordTest : public testing::TestWithParam<bool /*useCrc*/> { public: - // Returns true if test is running with btree enabled. - bool UseCrc() const { return GetParam() == 2 || GetParam() == 3; } + // Returns true if test is running with Crc enabled. + bool UseCrc() const { return GetParam(); } void MaybeHarden(absl::Cord& c) { if (UseCrc()) { c.SetExpectedChecksum(1); @@ -258,20 +263,16 @@ class CordTest : public testing::TestWithParam<int> { } // Returns human readable string representation of the test parameter. - static std::string ToString(testing::TestParamInfo<int> param) { - switch (param.param) { - case 0: - return "Btree"; - case 1: - return "BtreeHardened"; - default: - assert(false); - return "???"; + static std::string ToString(testing::TestParamInfo<bool> useCrc) { + if (useCrc.param) { + return "BtreeHardened"; + } else { + return "Btree"; } } }; -INSTANTIATE_TEST_SUITE_P(WithParam, CordTest, testing::Values(0, 1), +INSTANTIATE_TEST_SUITE_P(WithParam, CordTest, testing::Bool(), CordTest::ToString); TEST(CordRepFlat, AllFlatCapacities) { @@ -702,6 +703,38 @@ TEST_P(CordTest, CopyToString) { "copying ", "to ", "a ", "string."}))); } +static void VerifyAppendCordToString(const absl::Cord& cord) { + std::string initially_empty; + absl::AppendCordToString(cord, &initially_empty); + EXPECT_EQ(initially_empty, cord); + + const absl::string_view kInitialContents = "initial contents."; + std::string expected_after_append = + absl::StrCat(kInitialContents, std::string(cord)); + + std::string no_reserve(kInitialContents); + absl::AppendCordToString(cord, &no_reserve); + EXPECT_EQ(no_reserve, expected_after_append); + + std::string has_reserved_capacity(kInitialContents); + has_reserved_capacity.reserve(has_reserved_capacity.size() + cord.size()); + const char* address_before_copy = has_reserved_capacity.data(); + absl::AppendCordToString(cord, &has_reserved_capacity); + EXPECT_EQ(has_reserved_capacity, expected_after_append); + EXPECT_EQ(has_reserved_capacity.data(), address_before_copy) + << "AppendCordToString allocated new string storage; " + "has_reserved_capacity = \"" + << has_reserved_capacity << "\""; +} + +TEST_P(CordTest, AppendToString) { + VerifyAppendCordToString(absl::Cord()); // empty cords cannot carry CRCs + VerifyAppendCordToString(MaybeHardened(absl::Cord("small cord"))); + VerifyAppendCordToString(MaybeHardened( + absl::MakeFragmentedCord({"fragmented ", "cord ", "to ", "test ", + "appending ", "to ", "a ", "string."}))); +} + TEST_P(CordTest, AppendEmptyBuffer) { absl::Cord cord; cord.Append(absl::CordBuffer()); @@ -1512,12 +1545,11 @@ TEST_P(CordTest, CompareAfterAssign) { // comparison methods from basic_string. static void TestCompare(const absl::Cord& c, const absl::Cord& d, RandomEngine* rng) { - typedef std::basic_string<uint8_t> ustring; - ustring cs(reinterpret_cast<const uint8_t*>(std::string(c).data()), c.size()); - ustring ds(reinterpret_cast<const uint8_t*>(std::string(d).data()), d.size()); - // ustring comparison is ideal because we expect Cord comparisons to be - // based on unsigned byte comparisons regardless of whether char is signed. - int expected = sign(cs.compare(ds)); + // char_traits<char>::lt is guaranteed to do an unsigned comparison: + // https://en.cppreference.com/w/cpp/string/char_traits/cmp. We also expect + // Cord comparisons to be based on unsigned byte comparisons regardless of + // whether char is signed. + int expected = sign(std::string(c).compare(std::string(d))); EXPECT_EQ(expected, sign(c.Compare(d))) << c << ", " << d; } @@ -2013,6 +2045,26 @@ TEST(CordTest, CordMemoryUsageBTree) { rep2_size); } +TEST(CordTest, TestHashFragmentation) { + // Make sure we hit these boundary cases precisely. + EXPECT_EQ(1024, absl::hash_internal::PiecewiseChunkSize()); + EXPECT_TRUE(absl::VerifyTypeImplementsAbslHashCorrectly({ + absl::Cord(), + absl::MakeFragmentedCord({std::string(600, 'a'), std::string(600, 'a')}), + absl::MakeFragmentedCord({std::string(1200, 'a')}), + absl::MakeFragmentedCord({std::string(900, 'b'), std::string(900, 'b')}), + absl::MakeFragmentedCord({std::string(1800, 'b')}), + absl::MakeFragmentedCord( + {std::string(2000, 'c'), std::string(2000, 'c')}), + absl::MakeFragmentedCord({std::string(4000, 'c')}), + absl::MakeFragmentedCord({std::string(1024, 'd')}), + absl::MakeFragmentedCord({std::string(1023, 'd'), "d"}), + absl::MakeFragmentedCord({std::string(1025, 'e')}), + absl::MakeFragmentedCord({std::string(1024, 'e'), "e"}), + absl::MakeFragmentedCord({std::string(1023, 'e'), "e", "e"}), + })); +} + // Regtest for a change that had to be rolled back because it expanded out // of the InlineRep too soon, which was observable through MemoryUsage(). TEST_P(CordTest, CordMemoryUsageInlineRep) { @@ -2744,34 +2796,15 @@ class AfterExitCordTester { absl::string_view expected_; }; -// Deliberately prevents the destructor for an absl::Cord from running. The cord -// is accessible via the cord member during the lifetime of the CordLeaker. -// After the CordLeaker is destroyed, pointers to the cord will remain valid -// until the CordLeaker's memory is deallocated. -struct CordLeaker { - union { - absl::Cord cord; - }; - - template <typename Str> - constexpr explicit CordLeaker(const Str& str) : cord(str) {} - - ~CordLeaker() { - // Don't do anything, including running cord's destructor. (cord's - // destructor won't run automatically because cord is hidden inside a - // union.) - } -}; - template <typename Str> -void TestConstinitConstructor(Str) { +void TestAfterExit(Str) { const auto expected = Str::value; // Defined before `cord` to be destroyed after it. static AfterExitCordTester exit_tester; // NOLINT - ABSL_CONST_INIT static CordLeaker cord_leaker(Str{}); // NOLINT + static absl::NoDestructor<absl::Cord> cord_leaker(Str{}); // cord_leaker is static, so this reference will remain valid through the end // of program execution. - static absl::Cord& cord = cord_leaker.cord; + static absl::Cord& cord = *cord_leaker; static bool init_exit_tester = exit_tester.Set(&cord, expected); (void)init_exit_tester; @@ -2823,11 +2856,9 @@ struct LongView { }; -TEST_P(CordTest, ConstinitConstructor) { - TestConstinitConstructor( - absl::strings_internal::MakeStringConstant(ShortView{})); - TestConstinitConstructor( - absl::strings_internal::MakeStringConstant(LongView{})); +TEST_P(CordTest, AfterExit) { + TestAfterExit(absl::strings_internal::MakeStringConstant(ShortView{})); + TestAfterExit(absl::strings_internal::MakeStringConstant(LongView{})); } namespace { @@ -3253,6 +3284,31 @@ TEST(CrcCordTest, ChecksummedEmptyCordEstimateMemoryUsage) { EXPECT_NE(cord.EstimatedMemoryUsage(), 0); } +TEST(CordThreeWayComparisonTest, CompareCords) { +#ifndef __cpp_impl_three_way_comparison + GTEST_SKIP() << "C++20 three-way <=> comparison not supported"; +#else + EXPECT_EQ(absl::Cord("a") <=> absl::Cord("a"), std::strong_ordering::equal); + EXPECT_EQ(absl::Cord("aaaa") <=> absl::Cord("aaab"), + std::strong_ordering::less); + EXPECT_EQ(absl::Cord("baaa") <=> absl::Cord("a"), + std::strong_ordering::greater); +#endif +} + +TEST(CordThreeWayComparisonTest, CompareCordsAndStringViews) { +#ifndef __cpp_impl_three_way_comparison + GTEST_SKIP() << "C++20 three-way <=> comparison not supported"; +#else + EXPECT_EQ(absl::string_view("a") <=> absl::Cord("a"), + std::strong_ordering::equal); + EXPECT_EQ(absl::Cord("a") <=> absl::string_view("b"), + std::strong_ordering::less); + EXPECT_EQ(absl::string_view("b") <=> absl::Cord("a"), + std::strong_ordering::greater); +#endif +} + #if defined(GTEST_HAS_DEATH_TEST) && defined(ABSL_INTERNAL_CORD_HAVE_SANITIZER) // Returns an expected poison / uninitialized death message expression. diff --git a/absl/strings/escaping.cc b/absl/strings/escaping.cc index 27d3d98c..4ffef94b 100644 --- a/absl/strings/escaping.cc +++ b/absl/strings/escaping.cc @@ -21,10 +21,12 @@ #include <cstring> #include <limits> #include <string> +#include <utility> #include "absl/base/config.h" #include "absl/base/internal/raw_logging.h" #include "absl/base/internal/unaligned_access.h" +#include "absl/base/nullability.h" #include "absl/strings/ascii.h" #include "absl/strings/charset.h" #include "absl/strings/internal/escaping.h" @@ -54,7 +56,8 @@ inline unsigned int hex_digit_to_int(char c) { return x & 0xf; } -inline bool IsSurrogate(char32_t c, absl::string_view src, std::string* error) { +inline bool IsSurrogate(char32_t c, absl::string_view src, + absl::Nullable<std::string*> error) { if (c >= 0xD800 && c <= 0xDFFF) { if (error) { *error = absl::StrCat("invalid surrogate character (0xD800-DFFF): \\", @@ -83,7 +86,9 @@ inline bool IsSurrogate(char32_t c, absl::string_view src, std::string* error) { // UnescapeCEscapeSequences(). // ---------------------------------------------------------------------- bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped, - char* dest, ptrdiff_t* dest_len, std::string* error) { + absl::Nonnull<char*> dest, + absl::Nonnull<ptrdiff_t*> dest_len, + absl::Nullable<std::string*> error) { char* d = dest; const char* p = source.data(); const char* end = p + source.size(); @@ -290,7 +295,8 @@ bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped, // may be the same. // ---------------------------------------------------------------------- bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped, - std::string* dest, std::string* error) { + absl::Nonnull<std::string*> dest, + absl::Nullable<std::string*> error) { strings_internal::STLStringResizeUninitialized(dest, source.size()); ptrdiff_t dest_size; @@ -407,7 +413,8 @@ inline size_t CEscapedLength(absl::string_view src) { return escaped_len; } -void CEscapeAndAppendInternal(absl::string_view src, std::string* dest) { +void CEscapeAndAppendInternal(absl::string_view src, + absl::Nonnull<std::string*> dest) { size_t escaped_len = CEscapedLength(src); if (escaped_len == src.size()) { dest->append(src.data(), src.size()); @@ -464,9 +471,10 @@ void CEscapeAndAppendInternal(absl::string_view src, std::string* dest) { // Reverses the mapping in Base64EscapeInternal; see that method's // documentation for details of the mapping. -bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, - size_t szdest, const signed char* unbase64, - size_t* len) { +bool Base64UnescapeInternal(absl::Nullable<const char*> src_param, size_t szsrc, + absl::Nullable<char*> dest, size_t szdest, + absl::Nonnull<const signed char*> unbase64, + absl::Nonnull<size_t*> len) { static const char kPad64Equals = '='; static const char kPad64Dot = '.'; @@ -802,8 +810,9 @@ constexpr signed char kUnWebSafeBase64[] = { /* clang-format on */ template <typename String> -bool Base64UnescapeInternal(const char* src, size_t slen, String* dest, - const signed char* unbase64) { +bool Base64UnescapeInternal(absl::Nullable<const char*> src, size_t slen, + absl::Nonnull<String*> dest, + absl::Nonnull<const signed char*> unbase64) { // Determine the size of the output string. Base64 encodes every 3 bytes into // 4 characters. Any leftover chars are added directly for good measure. const size_t dest_len = 3 * (slen / 4) + (slen % 4); @@ -847,13 +856,32 @@ constexpr char kHexValueLenient[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; +constexpr signed char kHexValueStrict[256] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // '0'..'9' + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 'A'..'F' + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 'a'..'f' + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +}; /* clang-format on */ // This is a templated function so that T can be either a char* // or a string. This works because we use the [] operator to access // individual characters at a time. template <typename T> -void HexStringToBytesInternal(const char* from, T to, size_t num) { +void HexStringToBytesInternal(absl::Nullable<const char*> from, T to, + size_t num) { for (size_t i = 0; i < num; i++) { to[i] = static_cast<char>(kHexValueLenient[from[i * 2] & 0xFF] << 4) + (kHexValueLenient[from[i * 2 + 1] & 0xFF]); @@ -863,7 +891,8 @@ void HexStringToBytesInternal(const char* from, T to, size_t num) { // This is a templated function so that T can be either a char* or a // std::string. template <typename T> -void BytesToHexStringInternal(const unsigned char* src, T dest, size_t num) { +void BytesToHexStringInternal(absl::Nullable<const unsigned char*> src, T dest, + size_t num) { auto dest_ptr = &dest[0]; for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest_ptr += 2) { const char* hex_p = &numbers_internal::kHexTable[*src_ptr * 2]; @@ -878,8 +907,8 @@ void BytesToHexStringInternal(const unsigned char* src, T dest, size_t num) { // // See CUnescapeInternal() for implementation details. // ---------------------------------------------------------------------- -bool CUnescape(absl::string_view source, std::string* dest, - std::string* error) { +bool CUnescape(absl::string_view source, absl::Nonnull<std::string*> dest, + absl::Nullable<std::string*> error) { return CUnescapeInternal(source, kUnescapeNulls, dest, error); } @@ -901,21 +930,23 @@ std::string Utf8SafeCHexEscape(absl::string_view src) { return CEscapeInternal(src, true, true); } -bool Base64Unescape(absl::string_view src, std::string* dest) { +bool Base64Unescape(absl::string_view src, absl::Nonnull<std::string*> dest) { return Base64UnescapeInternal(src.data(), src.size(), dest, kUnBase64); } -bool WebSafeBase64Unescape(absl::string_view src, std::string* dest) { +bool WebSafeBase64Unescape(absl::string_view src, + absl::Nonnull<std::string*> dest) { return Base64UnescapeInternal(src.data(), src.size(), dest, kUnWebSafeBase64); } -void Base64Escape(absl::string_view src, std::string* dest) { +void Base64Escape(absl::string_view src, absl::Nonnull<std::string*> dest) { strings_internal::Base64EscapeInternal( reinterpret_cast<const unsigned char*>(src.data()), src.size(), dest, true, strings_internal::kBase64Chars); } -void WebSafeBase64Escape(absl::string_view src, std::string* dest) { +void WebSafeBase64Escape(absl::string_view src, + absl::Nonnull<std::string*> dest) { strings_internal::Base64EscapeInternal( reinterpret_cast<const unsigned char*>(src.data()), src.size(), dest, false, strings_internal::kWebSafeBase64Chars); @@ -937,6 +968,32 @@ std::string WebSafeBase64Escape(absl::string_view src) { return dest; } +bool HexStringToBytes(absl::string_view hex, + absl::Nonnull<std::string*> bytes) { + std::string output; + + size_t num_bytes = hex.size() / 2; + if (hex.size() != num_bytes * 2) { + return false; + } + + absl::strings_internal::STLStringResizeUninitialized(&output, num_bytes); + auto hex_p = hex.cbegin(); + for (std::string::iterator bin_p = output.begin(); bin_p != output.end(); + ++bin_p) { + int h1 = absl::kHexValueStrict[static_cast<size_t>(*hex_p++)]; + int h2 = absl::kHexValueStrict[static_cast<size_t>(*hex_p++)]; + if (h1 == -1 || h2 == -1) { + output.resize(static_cast<size_t>(bin_p - output.begin())); + return false; + } + *bin_p = static_cast<char>((h1 << 4) + h2); + } + + *bytes = std::move(output); + return true; +} + std::string HexStringToBytes(absl::string_view from) { std::string result; const auto num = from.size() / 2; diff --git a/absl/strings/escaping.h b/absl/strings/escaping.h index bf2a5898..3f34fbfc 100644 --- a/absl/strings/escaping.h +++ b/absl/strings/escaping.h @@ -27,7 +27,9 @@ #include <string> #include <vector> +#include "absl/base/attributes.h" #include "absl/base/macros.h" +#include "absl/base/nullability.h" #include "absl/strings/ascii.h" #include "absl/strings/str_join.h" #include "absl/strings/string_view.h" @@ -65,14 +67,16 @@ ABSL_NAMESPACE_BEGIN // // std::string s = "foo\\rbar\\nbaz\\t"; // std::string unescaped_s; -// if (!absl::CUnescape(s, &unescaped_s) { +// if (!absl::CUnescape(s, &unescaped_s)) { // ... // } // EXPECT_EQ(unescaped_s, "foo\rbar\nbaz\t"); -bool CUnescape(absl::string_view source, std::string* dest, std::string* error); +bool CUnescape(absl::string_view source, absl::Nonnull<std::string*> dest, + absl::Nullable<std::string*> error); // Overload of `CUnescape()` with no error reporting. -inline bool CUnescape(absl::string_view source, std::string* dest) { +inline bool CUnescape(absl::string_view source, + absl::Nonnull<std::string*> dest) { return CUnescape(source, dest, nullptr); } @@ -122,7 +126,7 @@ std::string Utf8SafeCHexEscape(absl::string_view src); // Encodes a `src` string into a base64-encoded 'dest' string with padding // characters. This function conforms with RFC 4648 section 4 (base64) and RFC // 2045. -void Base64Escape(absl::string_view src, std::string* dest); +void Base64Escape(absl::string_view src, absl::Nonnull<std::string*> dest); std::string Base64Escape(absl::string_view src); // WebSafeBase64Escape() @@ -130,7 +134,8 @@ std::string Base64Escape(absl::string_view src); // Encodes a `src` string into a base64 string, like Base64Escape() does, but // outputs '-' instead of '+' and '_' instead of '/', and does not pad 'dest'. // This function conforms with RFC 4648 section 5 (base64url). -void WebSafeBase64Escape(absl::string_view src, std::string* dest); +void WebSafeBase64Escape(absl::string_view src, + absl::Nonnull<std::string*> dest); std::string WebSafeBase64Escape(absl::string_view src); // Base64Unescape() @@ -140,7 +145,7 @@ std::string WebSafeBase64Escape(absl::string_view src); // `src` contains invalid characters, `dest` is cleared and returns `false`. // If padding is included (note that `Base64Escape()` does produce it), it must // be correct. In the padding, '=' and '.' are treated identically. -bool Base64Unescape(absl::string_view src, std::string* dest); +bool Base64Unescape(absl::string_view src, absl::Nonnull<std::string*> dest); // WebSafeBase64Unescape() // @@ -149,12 +154,24 @@ bool Base64Unescape(absl::string_view src, std::string* dest); // invalid characters, `dest` is cleared and returns `false`. If padding is // included (note that `WebSafeBase64Escape()` does not produce it), it must be // correct. In the padding, '=' and '.' are treated identically. -bool WebSafeBase64Unescape(absl::string_view src, std::string* dest); +bool WebSafeBase64Unescape(absl::string_view src, + absl::Nonnull<std::string*> dest); + +// HexStringToBytes() +// +// Converts the hexadecimal encoded data in `hex` into raw bytes in the `bytes` +// output string. If `hex` does not consist of valid hexadecimal data, this +// function returns false and leaves `bytes` in an unspecified state. Returns +// true on success. +ABSL_MUST_USE_RESULT bool HexStringToBytes(absl::string_view hex, + absl::Nonnull<std::string*> bytes); // HexStringToBytes() // // Converts an ASCII hex string into bytes, returning binary data of length -// `from.size()/2`. +// `from.size()/2`. The input must be valid hexadecimal data, otherwise the +// return value is unspecified. +ABSL_DEPRECATED("Use the HexStringToBytes() that returns a bool") std::string HexStringToBytes(absl::string_view from); // BytesToHexString() diff --git a/absl/strings/escaping_test.cc b/absl/strings/escaping_test.cc index ca1ee45c..25cb685b 100644 --- a/absl/strings/escaping_test.cc +++ b/absl/strings/escaping_test.cc @@ -689,6 +689,42 @@ TEST(Base64, DISABLED_HugeData) { EXPECT_EQ(huge, unescaped); } +TEST(Escaping, HexStringToBytesBackToHex) { + std::string bytes, hex; + + constexpr absl::string_view kTestHexLower = "1c2f0032f40123456789abcdef"; + constexpr absl::string_view kTestHexUpper = "1C2F0032F40123456789ABCDEF"; + constexpr absl::string_view kTestBytes = absl::string_view( + "\x1c\x2f\x00\x32\xf4\x01\x23\x45\x67\x89\xab\xcd\xef", 13); + + EXPECT_TRUE(absl::HexStringToBytes(kTestHexLower, &bytes)); + EXPECT_EQ(bytes, kTestBytes); + + EXPECT_TRUE(absl::HexStringToBytes(kTestHexUpper, &bytes)); + EXPECT_EQ(bytes, kTestBytes); + + hex = absl::BytesToHexString(kTestBytes); + EXPECT_EQ(hex, kTestHexLower); + + // Same buffer. + // We do not care if this works since we do not promise it in the contract. + // The purpose of this test is to to see if the program will crash or if + // sanitizers will catch anything. + bytes = std::string(kTestHexUpper); + (void)absl::HexStringToBytes(bytes, &bytes); + + // Length not a multiple of two. + EXPECT_FALSE(absl::HexStringToBytes("1c2f003", &bytes)); + + // Not hex. + EXPECT_FALSE(absl::HexStringToBytes("1c2f00ft", &bytes)); + + // Empty input. + bytes = "abc"; + EXPECT_TRUE(absl::HexStringToBytes("", &bytes)); + EXPECT_EQ("", bytes); // Results in empty output. +} + TEST(HexAndBack, HexStringToBytes_and_BytesToHexString) { std::string hex_mixed = "0123456789abcdefABCDEF"; std::string bytes_expected = "\x01\x23\x45\x67\x89\xab\xcd\xef\xAB\xCD\xEF"; diff --git a/absl/strings/has_absl_stringify.h b/absl/strings/has_absl_stringify.h index 274a7865..9af0191d 100644 --- a/absl/strings/has_absl_stringify.h +++ b/absl/strings/has_absl_stringify.h @@ -18,6 +18,7 @@ #include <type_traits> #include <utility> +#include "absl/base/config.h" #include "absl/strings/string_view.h" namespace absl { diff --git a/absl/strings/internal/charconv_bigint.h b/absl/strings/internal/charconv_bigint.h index 5c0c375d..cb297676 100644 --- a/absl/strings/internal/charconv_bigint.h +++ b/absl/strings/internal/charconv_bigint.h @@ -109,7 +109,17 @@ class BigUnsigned { size_ = (std::min)(size_ + word_shift, max_words); count %= 32; if (count == 0) { +// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=warray-bounds +// shows a lot of bogus -Warray-bounds warnings under GCC. +// This is not the only one in Abseil. +#if ABSL_INTERNAL_HAVE_MIN_GNUC_VERSION(14, 0) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Warray-bounds" +#endif std::copy_backward(words_, words_ + size_ - word_shift, words_ + size_); +#if ABSL_INTERNAL_HAVE_MIN_GNUC_VERSION(14, 0) +#pragma GCC diagnostic pop +#endif } else { for (int i = (std::min)(size_, max_words - 1); i > word_shift; --i) { words_[i] = (words_[i - word_shift] << count) | diff --git a/absl/strings/internal/cord_internal.h b/absl/strings/internal/cord_internal.h index 8744540e..f0060f10 100644 --- a/absl/strings/internal/cord_internal.h +++ b/absl/strings/internal/cord_internal.h @@ -85,7 +85,7 @@ enum Constants { }; // Emits a fatal error "Unexpected node type: xyz" and aborts the program. -ABSL_ATTRIBUTE_NORETURN void LogFatalNodeType(CordRep* rep); +[[noreturn]] void LogFatalNodeType(CordRep* rep); // Fast implementation of memmove for up to 15 bytes. This implementation is // safe for overlapping regions. If nullify_tail is true, the destination is @@ -259,7 +259,7 @@ struct CordRep { // on the specific layout of these fields. Notably: the non-trivial field // `refcount` being preceded by `length`, and being tailed by POD data // members only. - // # LINT.IfChange + // LINT.IfChange size_t length; RefcountAndFlags refcount; // If tag < FLAT, it represents CordRepKind and indicates the type of node. @@ -275,7 +275,7 @@ struct CordRep { // allocate room for these in the derived class, as not all compilers reuse // padding space from the base class (clang and gcc do, MSVC does not, etc) uint8_t storage[3]; - // # LINT.ThenChange(cord_rep_btree.h:copy_raw) + // LINT.ThenChange(cord_rep_btree.h:copy_raw) // Returns true if this instance's tag matches the requested type. constexpr bool IsSubstring() const { return tag == SUBSTRING; } @@ -352,18 +352,19 @@ struct CordRepExternal : public CordRep { static void Delete(CordRep* rep); }; -struct Rank1 {}; -struct Rank0 : Rank1 {}; +// Use go/ranked-overloads for dispatching. +struct Rank0 {}; +struct Rank1 : Rank0 {}; template <typename Releaser, typename = ::absl::base_internal::invoke_result_t< Releaser, absl::string_view>> -void InvokeReleaser(Rank0, Releaser&& releaser, absl::string_view data) { +void InvokeReleaser(Rank1, Releaser&& releaser, absl::string_view data) { ::absl::base_internal::invoke(std::forward<Releaser>(releaser), data); } template <typename Releaser, typename = ::absl::base_internal::invoke_result_t<Releaser>> -void InvokeReleaser(Rank1, Releaser&& releaser, absl::string_view) { +void InvokeReleaser(Rank0, Releaser&& releaser, absl::string_view) { ::absl::base_internal::invoke(std::forward<Releaser>(releaser)); } @@ -381,7 +382,7 @@ struct CordRepExternalImpl } ~CordRepExternalImpl() { - InvokeReleaser(Rank0{}, std::move(this->template get<0>()), + InvokeReleaser(Rank1{}, std::move(this->template get<0>()), absl::string_view(base, length)); } @@ -398,7 +399,6 @@ inline CordRepSubstring* CordRepSubstring::Create(CordRep* child, size_t pos, assert(pos < child->length); assert(n <= child->length - pos); - // TODO(b/217376272): Harden internal logic. // Move to strategical places inside the Cord logic and make this an assert. if (ABSL_PREDICT_FALSE(!(child->IsExternal() || child->IsFlat()))) { LogFatalNodeType(child); @@ -520,6 +520,7 @@ class InlineData { constexpr InlineData(const InlineData& rhs) noexcept; InlineData& operator=(const InlineData& rhs) noexcept; + friend void swap(InlineData& lhs, InlineData& rhs) noexcept; friend bool operator==(const InlineData& lhs, const InlineData& rhs) { #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER @@ -770,6 +771,12 @@ class InlineData { char data[kMaxInline + 1]; AsTree as_tree; }; + + // TODO(b/145829486): see swap(InlineData, InlineData) for more info. + inline void SwapValue(Rep rhs, Rep& refrhs) { + memcpy(&refrhs, this, sizeof(*this)); + memcpy(this, &rhs, sizeof(*this)); + } }; // Private implementation of `Compare()` @@ -884,6 +891,19 @@ inline void CordRep::Unref(CordRep* rep) { } } +inline void swap(InlineData& lhs, InlineData& rhs) noexcept { + lhs.unpoison(); + rhs.unpoison(); + // TODO(b/145829486): `std::swap(lhs.rep_, rhs.rep_)` results in bad codegen + // on clang, spilling the temporary swap value on the stack. Since `Rep` is + // trivial, we can make clang DTRT by calling a hand-rolled `SwapValue` where + // we pass `rhs` both by value (register allocated) and by reference. The IR + // then folds and inlines correctly into an optimized swap without spill. + lhs.rep_.SwapValue(rhs.rep_, rhs.rep_); + rhs.poison(); + lhs.poison(); +} + } // namespace cord_internal ABSL_NAMESPACE_END diff --git a/absl/strings/internal/cord_rep_btree.h b/absl/strings/internal/cord_rep_btree.h index be94b62e..ab259afe 100644 --- a/absl/strings/internal/cord_rep_btree.h +++ b/absl/strings/internal/cord_rep_btree.h @@ -684,14 +684,14 @@ inline CordRepBtree* CordRepBtree::CopyRaw(size_t new_length) const { // except `refcount` is trivially copyable, and the compiler does not // efficiently coalesce member-wise copy of these members. // See https://gcc.godbolt.org/z/qY8zsca6z - // # LINT.IfChange(copy_raw) + // LINT.IfChange(copy_raw) tree->length = new_length; uint8_t* dst = &tree->tag; const uint8_t* src = &tag; const ptrdiff_t offset = src - reinterpret_cast<const uint8_t*>(this); memcpy(dst, src, sizeof(CordRepBtree) - static_cast<size_t>(offset)); return tree; - // # LINT.ThenChange() + // LINT.ThenChange() } inline CordRepBtree* CordRepBtree::Copy() const { diff --git a/absl/strings/internal/cordz_functions.cc b/absl/strings/internal/cordz_functions.cc index 20d314f0..6033d046 100644 --- a/absl/strings/internal/cordz_functions.cc +++ b/absl/strings/internal/cordz_functions.cc @@ -40,13 +40,15 @@ std::atomic<int> g_cordz_mean_interval(50000); // Special negative 'not initialized' per thread value for cordz_next_sample. static constexpr int64_t kInitCordzNextSample = -1; -ABSL_CONST_INIT thread_local int64_t cordz_next_sample = kInitCordzNextSample; +ABSL_CONST_INIT thread_local SamplingState cordz_next_sample = { + kInitCordzNextSample, 1}; // kIntervalIfDisabled is the number of profile-eligible events need to occur // before the code will confirm that cordz is still disabled. constexpr int64_t kIntervalIfDisabled = 1 << 16; -ABSL_ATTRIBUTE_NOINLINE bool cordz_should_profile_slow() { +ABSL_ATTRIBUTE_NOINLINE int64_t +cordz_should_profile_slow(SamplingState& state) { thread_local absl::profiling_internal::ExponentialBiased exponential_biased_generator; @@ -55,30 +57,34 @@ ABSL_ATTRIBUTE_NOINLINE bool cordz_should_profile_slow() { // Check if we disabled profiling. If so, set the next sample to a "large" // number to minimize the overhead of the should_profile codepath. if (mean_interval <= 0) { - cordz_next_sample = kIntervalIfDisabled; - return false; + state = {kIntervalIfDisabled, kIntervalIfDisabled}; + return 0; } // Check if we're always sampling. if (mean_interval == 1) { - cordz_next_sample = 1; - return true; + state = {1, 1}; + return 1; } - if (cordz_next_sample <= 0) { + if (cordz_next_sample.next_sample <= 0) { // If first check on current thread, check cordz_should_profile() // again using the created (initial) stride in cordz_next_sample. - const bool initialized = cordz_next_sample != kInitCordzNextSample; - cordz_next_sample = exponential_biased_generator.GetStride(mean_interval); - return initialized || cordz_should_profile(); + const bool initialized = + cordz_next_sample.next_sample != kInitCordzNextSample; + auto old_stride = state.sample_stride; + auto stride = exponential_biased_generator.GetStride(mean_interval); + state = {stride, stride}; + bool should_sample = initialized || cordz_should_profile() > 0; + return should_sample ? old_stride : 0; } - --cordz_next_sample; - return false; + --state.next_sample; + return 0; } void cordz_set_next_sample_for_testing(int64_t next_sample) { - cordz_next_sample = next_sample; + cordz_next_sample = {next_sample, next_sample}; } #endif // ABSL_INTERNAL_CORDZ_ENABLED diff --git a/absl/strings/internal/cordz_functions.h b/absl/strings/internal/cordz_functions.h index ed108bf1..84c185e4 100644 --- a/absl/strings/internal/cordz_functions.h +++ b/absl/strings/internal/cordz_functions.h @@ -41,23 +41,33 @@ void set_cordz_mean_interval(int32_t mean_interval); #ifdef ABSL_INTERNAL_CORDZ_ENABLED +struct SamplingState { + int64_t next_sample; + int64_t sample_stride; +}; + // cordz_next_sample is the number of events until the next sample event. If // the value is 1 or less, the code will check on the next event if cordz is // enabled, and if so, will sample the Cord. cordz is only enabled when we can // use thread locals. -ABSL_CONST_INIT extern thread_local int64_t cordz_next_sample; - -// Determines if the next sample should be profiled. If it is, the value pointed -// at by next_sample will be set with the interval until the next sample. -bool cordz_should_profile_slow(); - -// Returns true if the next cord should be sampled. -inline bool cordz_should_profile() { - if (ABSL_PREDICT_TRUE(cordz_next_sample > 1)) { - cordz_next_sample--; - return false; +ABSL_CONST_INIT extern thread_local SamplingState cordz_next_sample; + +// Determines if the next sample should be profiled. +// Returns: +// 0: Do not sample +// >0: Sample with the stride of the last sampling period +int64_t cordz_should_profile_slow(SamplingState& state); + +// Determines if the next sample should be profiled. +// Returns: +// 0: Do not sample +// >0: Sample with the stride of the last sampling period +inline int64_t cordz_should_profile() { + if (ABSL_PREDICT_TRUE(cordz_next_sample.next_sample > 1)) { + cordz_next_sample.next_sample--; + return 0; } - return cordz_should_profile_slow(); + return cordz_should_profile_slow(cordz_next_sample); } // Sets the interval until the next sample (for testing only) @@ -65,7 +75,7 @@ void cordz_set_next_sample_for_testing(int64_t next_sample); #else // ABSL_INTERNAL_CORDZ_ENABLED -inline bool cordz_should_profile() { return false; } +inline int64_t cordz_should_profile() { return 0; } inline void cordz_set_next_sample_for_testing(int64_t) {} #endif // ABSL_INTERNAL_CORDZ_ENABLED diff --git a/absl/strings/internal/cordz_functions_test.cc b/absl/strings/internal/cordz_functions_test.cc index b70a685e..8fb93d53 100644 --- a/absl/strings/internal/cordz_functions_test.cc +++ b/absl/strings/internal/cordz_functions_test.cc @@ -47,9 +47,9 @@ TEST(CordzFunctionsTest, ShouldProfileDisable) { set_cordz_mean_interval(0); cordz_set_next_sample_for_testing(0); - EXPECT_FALSE(cordz_should_profile()); + EXPECT_EQ(cordz_should_profile(), 0); // 1 << 16 is from kIntervalIfDisabled in cordz_functions.cc. - EXPECT_THAT(cordz_next_sample, Eq(1 << 16)); + EXPECT_THAT(cordz_next_sample.next_sample, Eq(1 << 16)); set_cordz_mean_interval(orig_sample_rate); } @@ -59,8 +59,8 @@ TEST(CordzFunctionsTest, ShouldProfileAlways) { set_cordz_mean_interval(1); cordz_set_next_sample_for_testing(1); - EXPECT_TRUE(cordz_should_profile()); - EXPECT_THAT(cordz_next_sample, Le(1)); + EXPECT_GT(cordz_should_profile(), 0); + EXPECT_THAT(cordz_next_sample.next_sample, Le(1)); set_cordz_mean_interval(orig_sample_rate); } @@ -74,9 +74,7 @@ TEST(CordzFunctionsTest, DoesNotAlwaysSampleFirstCord) { do { ++tries; ASSERT_THAT(tries, Le(1000)); - std::thread thread([&sampled] { - sampled = cordz_should_profile(); - }); + std::thread thread([&sampled] { sampled = cordz_should_profile() > 0; }); thread.join(); } while (sampled); } @@ -94,7 +92,7 @@ TEST(CordzFunctionsTest, ShouldProfileRate) { // new value for next_sample each iteration. cordz_set_next_sample_for_testing(0); cordz_should_profile(); - sum_of_intervals += cordz_next_sample; + sum_of_intervals += cordz_next_sample.next_sample; } // The sum of independent exponential variables is an Erlang distribution, diff --git a/absl/strings/internal/cordz_handle.cc b/absl/strings/internal/cordz_handle.cc index a7061dbe..53d5f529 100644 --- a/absl/strings/internal/cordz_handle.cc +++ b/absl/strings/internal/cordz_handle.cc @@ -16,6 +16,7 @@ #include <atomic> #include "absl/base/internal/raw_logging.h" // For ABSL_RAW_CHECK +#include "absl/base/no_destructor.h" #include "absl/synchronization/mutex.h" namespace absl { @@ -43,33 +44,32 @@ struct Queue { } }; -static Queue* GlobalQueue() { - static Queue* global_queue = new Queue; - return global_queue; +static Queue& GlobalQueue() { + static absl::NoDestructor<Queue> global_queue; + return *global_queue; } } // namespace CordzHandle::CordzHandle(bool is_snapshot) : is_snapshot_(is_snapshot) { - Queue* global_queue = GlobalQueue(); + Queue& global_queue = GlobalQueue(); if (is_snapshot) { - MutexLock lock(&global_queue->mutex); - CordzHandle* dq_tail = - global_queue->dq_tail.load(std::memory_order_acquire); + MutexLock lock(&global_queue.mutex); + CordzHandle* dq_tail = global_queue.dq_tail.load(std::memory_order_acquire); if (dq_tail != nullptr) { dq_prev_ = dq_tail; dq_tail->dq_next_ = this; } - global_queue->dq_tail.store(this, std::memory_order_release); + global_queue.dq_tail.store(this, std::memory_order_release); } } CordzHandle::~CordzHandle() { - Queue* global_queue = GlobalQueue(); + Queue& global_queue = GlobalQueue(); if (is_snapshot_) { std::vector<CordzHandle*> to_delete; { - MutexLock lock(&global_queue->mutex); + MutexLock lock(&global_queue.mutex); CordzHandle* next = dq_next_; if (dq_prev_ == nullptr) { // We were head of the queue, delete every CordzHandle until we reach @@ -85,7 +85,7 @@ CordzHandle::~CordzHandle() { if (next) { next->dq_prev_ = dq_prev_; } else { - global_queue->dq_tail.store(dq_prev_, std::memory_order_release); + global_queue.dq_tail.store(dq_prev_, std::memory_order_release); } } for (CordzHandle* handle : to_delete) { @@ -95,20 +95,20 @@ CordzHandle::~CordzHandle() { } bool CordzHandle::SafeToDelete() const { - return is_snapshot_ || GlobalQueue()->IsEmpty(); + return is_snapshot_ || GlobalQueue().IsEmpty(); } void CordzHandle::Delete(CordzHandle* handle) { assert(handle); if (handle) { - Queue* const queue = GlobalQueue(); + Queue& queue = GlobalQueue(); if (!handle->SafeToDelete()) { - MutexLock lock(&queue->mutex); - CordzHandle* dq_tail = queue->dq_tail.load(std::memory_order_acquire); + MutexLock lock(&queue.mutex); + CordzHandle* dq_tail = queue.dq_tail.load(std::memory_order_acquire); if (dq_tail != nullptr) { handle->dq_prev_ = dq_tail; dq_tail->dq_next_ = handle; - queue->dq_tail.store(handle, std::memory_order_release); + queue.dq_tail.store(handle, std::memory_order_release); return; } } @@ -118,9 +118,9 @@ void CordzHandle::Delete(CordzHandle* handle) { std::vector<const CordzHandle*> CordzHandle::DiagnosticsGetDeleteQueue() { std::vector<const CordzHandle*> handles; - Queue* global_queue = GlobalQueue(); - MutexLock lock(&global_queue->mutex); - CordzHandle* dq_tail = global_queue->dq_tail.load(std::memory_order_acquire); + Queue& global_queue = GlobalQueue(); + MutexLock lock(&global_queue.mutex); + CordzHandle* dq_tail = global_queue.dq_tail.load(std::memory_order_acquire); for (const CordzHandle* p = dq_tail; p; p = p->dq_prev_) { handles.push_back(p); } @@ -133,9 +133,9 @@ bool CordzHandle::DiagnosticsHandleIsSafeToInspect( if (handle == nullptr) return true; if (handle->is_snapshot_) return false; bool snapshot_found = false; - Queue* global_queue = GlobalQueue(); - MutexLock lock(&global_queue->mutex); - for (const CordzHandle* p = global_queue->dq_tail; p; p = p->dq_prev_) { + Queue& global_queue = GlobalQueue(); + MutexLock lock(&global_queue.mutex); + for (const CordzHandle* p = global_queue.dq_tail; p; p = p->dq_prev_) { if (p == handle) return !snapshot_found; if (p == this) snapshot_found = true; } @@ -150,8 +150,8 @@ CordzHandle::DiagnosticsGetSafeToInspectDeletedHandles() { return handles; } - Queue* global_queue = GlobalQueue(); - MutexLock lock(&global_queue->mutex); + Queue& global_queue = GlobalQueue(); + MutexLock lock(&global_queue.mutex); for (const CordzHandle* p = dq_next_; p != nullptr; p = p->dq_next_) { if (!p->is_snapshot()) { handles.push_back(p); diff --git a/absl/strings/internal/cordz_info.cc b/absl/strings/internal/cordz_info.cc index b24c3da7..b7c7fed9 100644 --- a/absl/strings/internal/cordz_info.cc +++ b/absl/strings/internal/cordz_info.cc @@ -14,6 +14,8 @@ #include "absl/strings/internal/cordz_info.h" +#include <cstdint> + #include "absl/base/config.h" #include "absl/base/internal/spinlock.h" #include "absl/container/inlined_vector.h" @@ -247,10 +249,12 @@ CordzInfo* CordzInfo::Next(const CordzSnapshot& snapshot) const { return next; } -void CordzInfo::TrackCord(InlineData& cord, MethodIdentifier method) { +void CordzInfo::TrackCord(InlineData& cord, MethodIdentifier method, + int64_t sampling_stride) { assert(cord.is_tree()); assert(!cord.is_profiled()); - CordzInfo* cordz_info = new CordzInfo(cord.as_tree(), nullptr, method); + CordzInfo* cordz_info = + new CordzInfo(cord.as_tree(), nullptr, method, sampling_stride); cord.set_cordz_info(cordz_info); cordz_info->Track(); } @@ -266,7 +270,8 @@ void CordzInfo::TrackCord(InlineData& cord, const InlineData& src, if (cordz_info != nullptr) cordz_info->Untrack(); // Start new cord sample - cordz_info = new CordzInfo(cord.as_tree(), src.cordz_info(), method); + cordz_info = new CordzInfo(cord.as_tree(), src.cordz_info(), method, + src.cordz_info()->sampling_stride()); cord.set_cordz_info(cordz_info); cordz_info->Track(); } @@ -298,9 +303,8 @@ size_t CordzInfo::FillParentStack(const CordzInfo* src, void** stack) { return src->stack_depth_; } -CordzInfo::CordzInfo(CordRep* rep, - const CordzInfo* src, - MethodIdentifier method) +CordzInfo::CordzInfo(CordRep* rep, const CordzInfo* src, + MethodIdentifier method, int64_t sampling_stride) : rep_(rep), stack_depth_( static_cast<size_t>(absl::GetStackTrace(stack_, @@ -309,7 +313,8 @@ CordzInfo::CordzInfo(CordRep* rep, parent_stack_depth_(FillParentStack(src, parent_stack_)), method_(method), parent_method_(GetParentMethod(src)), - create_time_(absl::Now()) { + create_time_(absl::Now()), + sampling_stride_(sampling_stride) { update_tracker_.LossyAdd(method); if (src) { // Copy parent counters. diff --git a/absl/strings/internal/cordz_info.h b/absl/strings/internal/cordz_info.h index 17eaa91c..2dc9d16d 100644 --- a/absl/strings/internal/cordz_info.h +++ b/absl/strings/internal/cordz_info.h @@ -60,7 +60,8 @@ class ABSL_LOCKABLE CordzInfo : public CordzHandle { // and/or deleted. `method` identifies the Cord public API method initiating // the cord to be sampled. // Requires `cord` to hold a tree, and `cord.cordz_info()` to be null. - static void TrackCord(InlineData& cord, MethodIdentifier method); + static void TrackCord(InlineData& cord, MethodIdentifier method, + int64_t sampling_stride); // Identical to TrackCord(), except that this function fills the // `parent_stack` and `parent_method` properties of the returned CordzInfo @@ -181,6 +182,8 @@ class ABSL_LOCKABLE CordzInfo : public CordzHandle { // or RemovePrefix. CordzStatistics GetCordzStatistics() const; + int64_t sampling_stride() const { return sampling_stride_; } + private: using SpinLock = absl::base_internal::SpinLock; using SpinLockHolder = ::absl::base_internal::SpinLockHolder; @@ -199,7 +202,7 @@ class ABSL_LOCKABLE CordzInfo : public CordzHandle { static constexpr size_t kMaxStackDepth = 64; explicit CordzInfo(CordRep* rep, const CordzInfo* src, - MethodIdentifier method); + MethodIdentifier method, int64_t weight); ~CordzInfo() override; // Sets `rep_` without holding a lock. @@ -250,12 +253,14 @@ class ABSL_LOCKABLE CordzInfo : public CordzHandle { const MethodIdentifier parent_method_; CordzUpdateTracker update_tracker_; const absl::Time create_time_; + const int64_t sampling_stride_; }; inline ABSL_ATTRIBUTE_ALWAYS_INLINE void CordzInfo::MaybeTrackCord( InlineData& cord, MethodIdentifier method) { - if (ABSL_PREDICT_FALSE(cordz_should_profile())) { - TrackCord(cord, method); + auto stride = cordz_should_profile(); + if (ABSL_PREDICT_FALSE(stride > 0)) { + TrackCord(cord, method, stride); } } diff --git a/absl/strings/internal/cordz_info_statistics_test.cc b/absl/strings/internal/cordz_info_statistics_test.cc index d55773f2..3e6a8a09 100644 --- a/absl/strings/internal/cordz_info_statistics_test.cc +++ b/absl/strings/internal/cordz_info_statistics_test.cc @@ -152,7 +152,7 @@ size_t FairShare(CordRep* rep, size_t ref = 1) { // Samples the cord and returns CordzInfo::GetStatistics() CordzStatistics SampleCord(CordRep* rep) { InlineData cord(rep); - CordzInfo::TrackCord(cord, CordzUpdateTracker::kUnknown); + CordzInfo::TrackCord(cord, CordzUpdateTracker::kUnknown, 1); CordzStatistics stats = cord.cordz_info()->GetCordzStatistics(); cord.cordz_info()->Untrack(); return stats; @@ -480,7 +480,7 @@ TEST(CordzInfoStatisticsTest, ThreadSafety) { // 50/50 sample if (coin_toss(gen) != 0) { - CordzInfo::TrackCord(cord, CordzUpdateTracker::kUnknown); + CordzInfo::TrackCord(cord, CordzUpdateTracker::kUnknown, 1); } } } diff --git a/absl/strings/internal/cordz_info_test.cc b/absl/strings/internal/cordz_info_test.cc index cd226c3e..81ecce2c 100644 --- a/absl/strings/internal/cordz_info_test.cc +++ b/absl/strings/internal/cordz_info_test.cc @@ -65,7 +65,7 @@ std::string FormatStack(absl::Span<void* const> raw_stack) { TEST(CordzInfoTest, TrackCord) { TestCordData data; - CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo::TrackCord(data.data, kTrackCordMethod, 1); CordzInfo* info = data.data.cordz_info(); ASSERT_THAT(info, Ne(nullptr)); EXPECT_FALSE(info->is_snapshot()); @@ -91,7 +91,7 @@ TEST(CordzInfoTest, MaybeTrackChildCordWithSampling) { TEST(CordzInfoTest, MaybeTrackChildCordWithoutSamplingParentSampled) { CordzSamplingIntervalHelper sample_none(99999); TestCordData parent, child; - CordzInfo::TrackCord(parent.data, kTrackCordMethod); + CordzInfo::TrackCord(parent.data, kTrackCordMethod, 1); CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod); CordzInfo* parent_info = parent.data.cordz_info(); CordzInfo* child_info = child.data.cordz_info(); @@ -105,7 +105,7 @@ TEST(CordzInfoTest, MaybeTrackChildCordWithoutSamplingParentSampled) { TEST(CordzInfoTest, MaybeTrackChildCordWithoutSamplingChildSampled) { CordzSamplingIntervalHelper sample_none(99999); TestCordData parent, child; - CordzInfo::TrackCord(child.data, kTrackCordMethod); + CordzInfo::TrackCord(child.data, kTrackCordMethod, 1); CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod); EXPECT_THAT(child.data.cordz_info(), Eq(nullptr)); } @@ -113,14 +113,14 @@ TEST(CordzInfoTest, MaybeTrackChildCordWithoutSamplingChildSampled) { TEST(CordzInfoTest, MaybeTrackChildCordWithSamplingChildSampled) { CordzSamplingIntervalHelper sample_all(1); TestCordData parent, child; - CordzInfo::TrackCord(child.data, kTrackCordMethod); + CordzInfo::TrackCord(child.data, kTrackCordMethod, 1); CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod); EXPECT_THAT(child.data.cordz_info(), Eq(nullptr)); } TEST(CordzInfoTest, UntrackCord) { TestCordData data; - CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo::TrackCord(data.data, kTrackCordMethod, 1); CordzInfo* info = data.data.cordz_info(); info->Untrack(); @@ -129,7 +129,7 @@ TEST(CordzInfoTest, UntrackCord) { TEST(CordzInfoTest, UntrackCordWithSnapshot) { TestCordData data; - CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo::TrackCord(data.data, kTrackCordMethod, 1); CordzInfo* info = data.data.cordz_info(); CordzSnapshot snapshot; @@ -141,7 +141,7 @@ TEST(CordzInfoTest, UntrackCordWithSnapshot) { TEST(CordzInfoTest, SetCordRep) { TestCordData data; - CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo::TrackCord(data.data, kTrackCordMethod, 1); CordzInfo* info = data.data.cordz_info(); TestCordRep rep; @@ -155,7 +155,7 @@ TEST(CordzInfoTest, SetCordRep) { TEST(CordzInfoTest, SetCordRepNullUntracksCordOnUnlock) { TestCordData data; - CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo::TrackCord(data.data, kTrackCordMethod, 1); CordzInfo* info = data.data.cordz_info(); info->Lock(CordzUpdateTracker::kAppendString); @@ -169,7 +169,7 @@ TEST(CordzInfoTest, SetCordRepNullUntracksCordOnUnlock) { TEST(CordzInfoTest, RefCordRep) { TestCordData data; - CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo::TrackCord(data.data, kTrackCordMethod, 1); CordzInfo* info = data.data.cordz_info(); size_t refcount = data.rep.rep->refcount.Get(); @@ -183,7 +183,7 @@ TEST(CordzInfoTest, RefCordRep) { TEST(CordzInfoTest, SetCordRepRequiresMutex) { TestCordData data; - CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo::TrackCord(data.data, kTrackCordMethod, 1); CordzInfo* info = data.data.cordz_info(); TestCordRep rep; EXPECT_DEBUG_DEATH(info->SetCordRep(rep.rep), ".*"); @@ -197,13 +197,13 @@ TEST(CordzInfoTest, TrackUntrackHeadFirstV2) { EXPECT_THAT(CordzInfo::Head(snapshot), Eq(nullptr)); TestCordData data; - CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo::TrackCord(data.data, kTrackCordMethod, 1); CordzInfo* info1 = data.data.cordz_info(); ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info1)); EXPECT_THAT(info1->Next(snapshot), Eq(nullptr)); TestCordData data2; - CordzInfo::TrackCord(data2.data, kTrackCordMethod); + CordzInfo::TrackCord(data2.data, kTrackCordMethod, 1); CordzInfo* info2 = data2.data.cordz_info(); ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info2)); EXPECT_THAT(info2->Next(snapshot), Eq(info1)); @@ -222,13 +222,13 @@ TEST(CordzInfoTest, TrackUntrackTailFirstV2) { EXPECT_THAT(CordzInfo::Head(snapshot), Eq(nullptr)); TestCordData data; - CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo::TrackCord(data.data, kTrackCordMethod, 1); CordzInfo* info1 = data.data.cordz_info(); ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info1)); EXPECT_THAT(info1->Next(snapshot), Eq(nullptr)); TestCordData data2; - CordzInfo::TrackCord(data2.data, kTrackCordMethod); + CordzInfo::TrackCord(data2.data, kTrackCordMethod, 1); CordzInfo* info2 = data2.data.cordz_info(); ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info2)); EXPECT_THAT(info2->Next(snapshot), Eq(info1)); @@ -254,7 +254,7 @@ TEST(CordzInfoTest, StackV2) { // makes small modifications to its testing stack. 50 is sufficient to prove // that we got a decent stack. static constexpr int kMaxStackDepth = 50; - CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo::TrackCord(data.data, kTrackCordMethod, 1); CordzInfo* info = data.data.cordz_info(); std::vector<void*> local_stack; local_stack.resize(kMaxStackDepth); @@ -284,7 +284,7 @@ CordzInfo* TrackChildCord(InlineData& data, const InlineData& parent) { return data.cordz_info(); } CordzInfo* TrackParentCord(InlineData& data) { - CordzInfo::TrackCord(data, kTrackCordMethod); + CordzInfo::TrackCord(data, kTrackCordMethod, 1); return data.cordz_info(); } diff --git a/absl/strings/internal/cordz_sample_token_test.cc b/absl/strings/internal/cordz_sample_token_test.cc index 6be1770d..7152603d 100644 --- a/absl/strings/internal/cordz_sample_token_test.cc +++ b/absl/strings/internal/cordz_sample_token_test.cc @@ -81,11 +81,11 @@ TEST(CordzSampleTokenTest, IteratorEmpty) { TEST(CordzSampleTokenTest, Iterator) { TestCordData cord1, cord2, cord3; - CordzInfo::TrackCord(cord1.data, kTrackCordMethod); + CordzInfo::TrackCord(cord1.data, kTrackCordMethod, 1); CordzInfo* info1 = cord1.data.cordz_info(); - CordzInfo::TrackCord(cord2.data, kTrackCordMethod); + CordzInfo::TrackCord(cord2.data, kTrackCordMethod, 1); CordzInfo* info2 = cord2.data.cordz_info(); - CordzInfo::TrackCord(cord3.data, kTrackCordMethod); + CordzInfo::TrackCord(cord3.data, kTrackCordMethod, 1); CordzInfo* info3 = cord3.data.cordz_info(); CordzSampleToken token; @@ -105,21 +105,21 @@ TEST(CordzSampleTokenTest, IteratorEquality) { TestCordData cord1; TestCordData cord2; TestCordData cord3; - CordzInfo::TrackCord(cord1.data, kTrackCordMethod); + CordzInfo::TrackCord(cord1.data, kTrackCordMethod, 1); CordzInfo* info1 = cord1.data.cordz_info(); CordzSampleToken token1; // lhs starts with the CordzInfo corresponding to cord1 at the head. CordzSampleToken::Iterator lhs = token1.begin(); - CordzInfo::TrackCord(cord2.data, kTrackCordMethod); + CordzInfo::TrackCord(cord2.data, kTrackCordMethod, 1); CordzInfo* info2 = cord2.data.cordz_info(); CordzSampleToken token2; // rhs starts with the CordzInfo corresponding to cord2 at the head. CordzSampleToken::Iterator rhs = token2.begin(); - CordzInfo::TrackCord(cord3.data, kTrackCordMethod); + CordzInfo::TrackCord(cord3.data, kTrackCordMethod, 1); CordzInfo* info3 = cord3.data.cordz_info(); // lhs is on cord1 while rhs is on cord2. @@ -170,7 +170,7 @@ TEST(CordzSampleTokenTest, MultiThreaded) { cord.data.clear_cordz_info(); } else { // 2) Track - CordzInfo::TrackCord(cord.data, kTrackCordMethod); + CordzInfo::TrackCord(cord.data, kTrackCordMethod, 1); } } else { std::unique_ptr<CordzSampleToken>& token = tokens[index]; diff --git a/absl/strings/internal/cordz_update_scope_test.cc b/absl/strings/internal/cordz_update_scope_test.cc index 3d08c622..1b4701f1 100644 --- a/absl/strings/internal/cordz_update_scope_test.cc +++ b/absl/strings/internal/cordz_update_scope_test.cc @@ -37,7 +37,7 @@ TEST(CordzUpdateScopeTest, ScopeNullptr) { TEST(CordzUpdateScopeTest, ScopeSampledCord) { TestCordData cord; - CordzInfo::TrackCord(cord.data, kTrackCordMethod); + CordzInfo::TrackCord(cord.data, kTrackCordMethod, 1); CordzUpdateScope scope(cord.data.cordz_info(), kTrackCordMethod); cord.data.cordz_info()->SetCordRep(nullptr); } diff --git a/absl/strings/internal/escaping.cc b/absl/strings/internal/escaping.cc index 56a4cbed..d2abe669 100644 --- a/absl/strings/internal/escaping.cc +++ b/absl/strings/internal/escaping.cc @@ -14,6 +14,8 @@ #include "absl/strings/internal/escaping.h" +#include <limits> + #include "absl/base/internal/endian.h" #include "absl/base/internal/raw_logging.h" @@ -31,12 +33,14 @@ ABSL_CONST_INIT const char kBase64Chars[] = ABSL_CONST_INIT const char kWebSafeBase64Chars[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; - size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) { // Base64 encodes three bytes of input at a time. If the input is not // divisible by three, we pad as appropriate. // // Base64 encodes each three bytes of input into four bytes of output. + constexpr size_t kMaxSize = (std::numeric_limits<size_t>::max() - 1) / 4 * 3; + ABSL_INTERNAL_CHECK(input_len <= kMaxSize, + "CalculateBase64EscapedLenInternal() overflow"); size_t len = (input_len / 3) * 4; // Since all base 64 input is an integral number of octets, only the following @@ -66,7 +70,6 @@ size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) { } } - assert(len >= input_len); // make sure we didn't overflow return len; } diff --git a/absl/strings/internal/has_absl_stringify.h b/absl/strings/internal/has_absl_stringify.h deleted file mode 100644 index f82cfe26..00000000 --- a/absl/strings/internal/has_absl_stringify.h +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2024 The Abseil Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef ABSL_STRINGS_INTERNAL_HAS_ABSL_STRINGIFY_H_ -#define ABSL_STRINGS_INTERNAL_HAS_ABSL_STRINGIFY_H_ - -#include "absl/strings/has_absl_stringify.h" - -#include "absl/base/config.h" - -namespace absl { -ABSL_NAMESPACE_BEGIN - -namespace strings_internal { - -// This exists to fix a circular dependency problem with the GoogleTest release. -// GoogleTest referenced this internal file and this internal trait. Since -// simultaneous releases are not possible since once release must reference -// another, we will temporarily add this back. -// https://github.com/google/googletest/blob/v1.14.x/googletest/include/gtest/gtest-printers.h#L119 -// -// This file can be deleted after the next Abseil and GoogleTest release. -// -// https://github.com/google/googletest/pull/4368#issuecomment-1717699895 -// https://github.com/google/googletest/pull/4368#issuecomment-1717699895 -using ::absl::HasAbslStringify; - -} // namespace strings_internal - -ABSL_NAMESPACE_END -} // namespace absl - -#endif // ABSL_STRINGS_INTERNAL_HAS_ABSL_STRINGIFY_H_ diff --git a/absl/strings/internal/str_format/convert_test.cc b/absl/strings/internal/str_format/convert_test.cc index 7f222778..baffe052 100644 --- a/absl/strings/internal/str_format/convert_test.cc +++ b/absl/strings/internal/str_format/convert_test.cc @@ -785,8 +785,7 @@ TEST_F(FormatConvertTest, Uint128) { } template <typename Floating> -void TestWithMultipleFormatsHelper(const std::vector<Floating> &floats, - const std::set<Floating> &skip_verify) { +void TestWithMultipleFormatsHelper(Floating tested_float) { const NativePrintfTraits &native_traits = VerifyNativeImplementation(); // Reserve the space to ensure we don't allocate memory in the output itself. std::string str_format_result; @@ -817,41 +816,41 @@ void TestWithMultipleFormatsHelper(const std::vector<Floating> &floats, continue; } - for (Floating d : floats) { - if (!native_traits.hex_float_prefers_denormal_repr && - (f == 'a' || f == 'A') && std::fpclassify(d) == FP_SUBNORMAL) { - continue; - } + if (!native_traits.hex_float_prefers_denormal_repr && + (f == 'a' || f == 'A') && + std::fpclassify(tested_float) == FP_SUBNORMAL) { + continue; + } int i = -10; - FormatArgImpl args[2] = {FormatArgImpl(d), FormatArgImpl(i)}; + FormatArgImpl args[2] = {FormatArgImpl(tested_float), FormatArgImpl(i)}; UntypedFormatSpecImpl format(fmt_str); string_printf_result.clear(); - StrAppend(&string_printf_result, fmt_str.c_str(), d, i); + StrAppend(&string_printf_result, fmt_str.c_str(), tested_float, i); str_format_result.clear(); { AppendPack(&str_format_result, format, absl::MakeSpan(args)); } + // For values that we know won't match the standard library + // implementation we skip verification, but still run the algorithm to + // catch asserts/sanitizer bugs. #ifdef _MSC_VER // MSVC has a different rounding policy than us so we can't test our // implementation against the native one there. continue; #elif defined(__APPLE__) // Apple formats NaN differently (+nan) vs. (nan) - if (std::isnan(d)) continue; + if (std::isnan(tested_float)) continue; #endif - if (string_printf_result != str_format_result && - skip_verify.find(d) == skip_verify.end()) { - // We use ASSERT_EQ here because failures are usually correlated and a - // bug would print way too many failed expectations causing the test - // to time out. - ASSERT_EQ(string_printf_result, str_format_result) - << fmt_str << " " << StrPrint("%.18g", d) << " " - << StrPrint("%a", d) << " " << StrPrint("%.50f", d); - } - } + // We use ASSERT_EQ here because failures are usually correlated and a + // bug would print way too many failed expectations causing the test + // to time out. + ASSERT_EQ(string_printf_result, str_format_result) + << fmt_str << " " << StrPrint("%.18g", tested_float) << " " + << StrPrint("%a", tested_float) << " " + << StrPrint("%.50f", tested_float); } } } @@ -904,14 +903,12 @@ TEST_F(FormatConvertTest, Float) { }); floats.erase(std::unique(floats.begin(), floats.end()), floats.end()); - TestWithMultipleFormatsHelper(floats, {}); + for (float f : floats) { + TestWithMultipleFormatsHelper(f); + } } TEST_F(FormatConvertTest, Double) { - // For values that we know won't match the standard library implementation we - // skip verification, but still run the algorithm to catch asserts/sanitizer - // bugs. - std::set<double> skip_verify; std::vector<double> doubles = {0.0, -0.0, .99999999999999, @@ -946,32 +943,9 @@ TEST_F(FormatConvertTest, Double) { } } - // Workaround libc bug. - // https://sourceware.org/bugzilla/show_bug.cgi?id=22142 - const bool gcc_bug_22142 = - StrPrint("%f", std::numeric_limits<double>::max()) != - "1797693134862315708145274237317043567980705675258449965989174768031" - "5726078002853876058955863276687817154045895351438246423432132688946" - "4182768467546703537516986049910576551282076245490090389328944075868" - "5084551339423045832369032229481658085593321233482747978262041447231" - "68738177180919299881250404026184124858368.000000"; - for (int exp = -300; exp <= 300; ++exp) { const double all_ones_mantissa = 0x1fffffffffffff; doubles.push_back(std::ldexp(all_ones_mantissa, exp)); - if (gcc_bug_22142) { - skip_verify.insert(doubles.back()); - } - } - - if (gcc_bug_22142) { - using L = std::numeric_limits<double>; - skip_verify.insert(L::max()); - skip_verify.insert(L::min()); // NOLINT - skip_verify.insert(L::denorm_min()); - skip_verify.insert(-L::max()); - skip_verify.insert(-L::min()); // NOLINT - skip_verify.insert(-L::denorm_min()); } // Remove duplicates to speed up the logic below. @@ -982,7 +956,9 @@ TEST_F(FormatConvertTest, Double) { }); doubles.erase(std::unique(doubles.begin(), doubles.end()), doubles.end()); - TestWithMultipleFormatsHelper(doubles, skip_verify); + for (double d : doubles) { + TestWithMultipleFormatsHelper(d); + } } TEST_F(FormatConvertTest, DoubleRound) { diff --git a/absl/strings/internal/str_join_internal.h b/absl/strings/internal/str_join_internal.h index d97d5033..3e730c7a 100644 --- a/absl/strings/internal/str_join_internal.h +++ b/absl/strings/internal/str_join_internal.h @@ -31,16 +31,23 @@ #ifndef ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_ #define ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_ +#include <cstdint> #include <cstring> +#include <initializer_list> #include <iterator> +#include <limits> #include <memory> #include <string> +#include <tuple> #include <type_traits> #include <utility> +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" #include "absl/strings/internal/ostringstream.h" #include "absl/strings/internal/resize_uninitialized.h" #include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" namespace absl { ABSL_NAMESPACE_BEGIN @@ -230,14 +237,19 @@ std::string JoinAlgorithm(Iterator start, Iterator end, absl::string_view s, if (start != end) { // Sums size auto&& start_value = *start; - size_t result_size = start_value.size(); + // Use uint64_t to prevent size_t overflow. We assume it is not possible for + // in memory strings to overflow a uint64_t. + uint64_t result_size = start_value.size(); for (Iterator it = start; ++it != end;) { result_size += s.size(); result_size += (*it).size(); } if (result_size > 0) { - STLStringResizeUninitialized(&result, result_size); + constexpr uint64_t kMaxSize = + uint64_t{(std::numeric_limits<size_t>::max)()}; + ABSL_INTERNAL_CHECK(result_size <= kMaxSize, "size_t overflow"); + STLStringResizeUninitialized(&result, static_cast<size_t>(result_size)); // Joins strings char* result_buf = &*result.begin(); @@ -310,6 +322,15 @@ std::string JoinRange(const Range& range, absl::string_view separator) { return JoinRange(begin(range), end(range), separator); } +template <typename Tuple, std::size_t... I> +std::string JoinTuple(const Tuple& value, absl::string_view separator, + std::index_sequence<I...>) { + return JoinRange( + std::initializer_list<absl::string_view>{ + static_cast<const AlphaNum&>(std::get<I>(value)).Piece()...}, + separator); +} + } // namespace strings_internal ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/str_split_internal.h b/absl/strings/internal/str_split_internal.h index 081ad85a..11ea96f2 100644 --- a/absl/strings/internal/str_split_internal.h +++ b/absl/strings/internal/str_split_internal.h @@ -30,6 +30,7 @@ #define ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ #include <array> +#include <cstddef> #include <initializer_list> #include <iterator> #include <tuple> @@ -402,7 +403,10 @@ class Splitter { ar[index].size = it->size(); ++it; } while (++index != ar.size() && !it.at_end()); - v.insert(v.end(), ar.begin(), ar.begin() + index); + // We static_cast index to a signed type to work around overzealous + // compiler warnings about signedness. + v.insert(v.end(), ar.begin(), + ar.begin() + static_cast<ptrdiff_t>(index)); } return v; } diff --git a/absl/strings/numbers.cc b/absl/strings/numbers.cc index 882c3a8b..b57d9e82 100644 --- a/absl/strings/numbers.cc +++ b/absl/strings/numbers.cc @@ -20,9 +20,7 @@ #include <algorithm> #include <cassert> #include <cfloat> // for DBL_DIG and FLT_DIG -#include <climits> #include <cmath> // for HUGE_VAL -#include <cstddef> #include <cstdint> #include <cstdio> #include <cstdlib> @@ -30,7 +28,6 @@ #include <iterator> #include <limits> #include <system_error> // NOLINT(build/c++11) -#include <type_traits> #include <utility> #include "absl/base/attributes.h" @@ -159,71 +156,28 @@ constexpr uint32_t kTwoZeroBytes = 0x0101 * '0'; constexpr uint64_t kFourZeroBytes = 0x01010101 * '0'; constexpr uint64_t kEightZeroBytes = 0x0101010101010101ull * '0'; -template <typename T> -constexpr T Pow(T base, uint32_t n) { - // Exponentiation by squaring - return static_cast<T>((n > 1 ? Pow(base * base, n >> 1) : static_cast<T>(1)) * - ((n & 1) ? base : static_cast<T>(1))); -} - -// Given n, calculates C where the following holds for all 0 <= x < Pow(100, n): -// x / Pow(10, n) == x * C / Pow(2, n * 10) -// In other words, it allows us to divide by a power of 10 via a single -// multiplication and bit shifts, assuming the input will be smaller than the -// square of that power of 10. -template <typename T> -constexpr T ComputePowerOf100DivisionCoefficient(uint32_t n) { - if (n > 4) { - // This doesn't work for large powers of 100, due to overflow - abort(); - } - T denom = 16 - 1; - T num = (denom + 1) - 10; - T gcd = 3; // Greatest common divisor of numerator and denominator - denom = Pow(denom / gcd, n); - num = Pow(num / gcd, 9 * n); - T quotient = num / denom; - if (num % denom >= denom / 2) { - // Round up, since the remainder is more than half the denominator - ++quotient; - } - return quotient; -} - -// * kDivisionBy10Mul / kDivisionBy10Div is a division by 10 for values from 0 -// to 99. It's also a division of a structure [k takes 2 bytes][m takes 2 -// bytes], then * kDivisionBy10Mul / kDivisionBy10Div will be [k / 10][m / 10]. -// It allows parallel division. -constexpr uint64_t kDivisionBy10Mul = - ComputePowerOf100DivisionCoefficient<uint64_t>(1); -static_assert(kDivisionBy10Mul == 103, - "division coefficient for 10 is incorrect"); +// * 103 / 1024 is a division by 10 for values from 0 to 99. It's also a +// division of a structure [k takes 2 bytes][m takes 2 bytes], then * 103 / 1024 +// will be [k / 10][m / 10]. It allows parallel division. +constexpr uint64_t kDivisionBy10Mul = 103u; constexpr uint64_t kDivisionBy10Div = 1 << 10; -// * kDivisionBy100Mul / kDivisionBy100Div is a division by 100 for values from -// 0 to 9999. -constexpr uint64_t kDivisionBy100Mul = - ComputePowerOf100DivisionCoefficient<uint64_t>(2); -static_assert(kDivisionBy100Mul == 10486, - "division coefficient for 100 is incorrect"); +// * 10486 / 1048576 is a division by 100 for values from 0 to 9999. +constexpr uint64_t kDivisionBy100Mul = 10486u; constexpr uint64_t kDivisionBy100Div = 1 << 20; -static_assert(ComputePowerOf100DivisionCoefficient<uint64_t>(3) == 1073742, - "division coefficient for 1000 is incorrect"); - -// Same as `PrepareEightDigits`, but produces 2 digits for integers < 100. -inline uint32_t PrepareTwoDigitsImpl(uint32_t i, bool reversed) { - assert(i < 100); - uint32_t div10 = (i * kDivisionBy10Mul) / kDivisionBy10Div; - uint32_t mod10 = i - 10u * div10; - return (div10 << (reversed ? 8 : 0)) + (mod10 << (reversed ? 0 : 8)); -} -inline uint32_t PrepareTwoDigits(uint32_t i) { - return PrepareTwoDigitsImpl(i, false); +// Encode functions write the ASCII output of input `n` to `out_str`. +inline char* EncodeHundred(uint32_t n, absl::Nonnull<char*> out_str) { + int num_digits = static_cast<int>(n - 10) >> 8; + uint32_t div10 = (n * kDivisionBy10Mul) / kDivisionBy10Div; + uint32_t mod10 = n - 10u * div10; + uint32_t base = kTwoZeroBytes + div10 + (mod10 << 8); + base >>= num_digits & 8; + little_endian::Store16(out_str, static_cast<uint16_t>(base)); + return out_str + 2 + num_digits; } -// Same as `PrepareEightDigits`, but produces 4 digits for integers < 10000. -inline uint32_t PrepareFourDigitsImpl(uint32_t n, bool reversed) { +inline char* EncodeTenThousand(uint32_t n, absl::Nonnull<char*> out_str) { // We split lower 2 digits and upper 2 digits of n into 2 byte consecutive // blocks. 123 -> [\0\1][\0\23]. We divide by 10 both blocks // (it's 1 division + zeroing upper bits), and compute modulo 10 as well "in @@ -231,19 +185,22 @@ inline uint32_t PrepareFourDigitsImpl(uint32_t n, bool reversed) { // strip trailing zeros, add ASCII '0000' and return. uint32_t div100 = (n * kDivisionBy100Mul) / kDivisionBy100Div; uint32_t mod100 = n - 100ull * div100; - uint32_t hundreds = - (mod100 << (reversed ? 0 : 16)) + (div100 << (reversed ? 16 : 0)); + uint32_t hundreds = (mod100 << 16) + div100; uint32_t tens = (hundreds * kDivisionBy10Mul) / kDivisionBy10Div; tens &= (0xFull << 16) | 0xFull; - tens = (tens << (reversed ? 8 : 0)) + - static_cast<uint32_t>((hundreds - 10ull * tens) << (reversed ? 0 : 8)); - return tens; -} -inline uint32_t PrepareFourDigits(uint32_t n) { - return PrepareFourDigitsImpl(n, false); -} -inline uint32_t PrepareFourDigitsReversed(uint32_t n) { - return PrepareFourDigitsImpl(n, true); + tens += (hundreds - 10ull * tens) << 8; + ABSL_ASSUME(tens != 0); + // The result can contain trailing zero bits, we need to strip them to a first + // significant byte in a final representation. For example, for n = 123, we + // have tens to have representation \0\1\2\3. We do `& -8` to round + // to a multiple to 8 to strip zero bytes, not all zero bits. + // countr_zero to help. + // 0 minus 8 to make MSVC happy. + uint32_t zeroes = static_cast<uint32_t>(absl::countr_zero(tens)) & (0 - 8u); + tens += kFourZeroBytes; + tens >>= zeroes; + little_endian::Store32(out_str, tens); + return out_str + sizeof(tens) - zeroes / 8; } // Helper function to produce an ASCII representation of `i`. @@ -259,309 +216,126 @@ inline uint32_t PrepareFourDigitsReversed(uint32_t n) { // // Note two leading zeros: // EXPECT_EQ(absl::string_view(ascii, 8), "00102030"); // -// If `Reversed` is set to true, the result becomes reversed to "03020100". -// // Pre-condition: `i` must be less than 100000000. -inline uint64_t PrepareEightDigitsImpl(uint32_t i, bool reversed) { +inline uint64_t PrepareEightDigits(uint32_t i) { ABSL_ASSUME(i < 10000'0000); // Prepare 2 blocks of 4 digits "in parallel". uint32_t hi = i / 10000; uint32_t lo = i % 10000; - uint64_t merged = (uint64_t{hi} << (reversed ? 32 : 0)) | - (uint64_t{lo} << (reversed ? 0 : 32)); + uint64_t merged = hi | (uint64_t{lo} << 32); uint64_t div100 = ((merged * kDivisionBy100Mul) / kDivisionBy100Div) & ((0x7Full << 32) | 0x7Full); uint64_t mod100 = merged - 100ull * div100; - uint64_t hundreds = - (mod100 << (reversed ? 0 : 16)) + (div100 << (reversed ? 16 : 0)); + uint64_t hundreds = (mod100 << 16) + div100; uint64_t tens = (hundreds * kDivisionBy10Mul) / kDivisionBy10Div; tens &= (0xFull << 48) | (0xFull << 32) | (0xFull << 16) | 0xFull; - tens = (tens << (reversed ? 8 : 0)) + - ((hundreds - 10ull * tens) << (reversed ? 0 : 8)); + tens += (hundreds - 10ull * tens) << 8; return tens; } -inline uint64_t PrepareEightDigits(uint32_t i) { - return PrepareEightDigitsImpl(i, false); -} -inline uint64_t PrepareEightDigitsReversed(uint32_t i) { - return PrepareEightDigitsImpl(i, true); -} -template <typename T, typename BackwardIt> -class FastUIntToStringConverter { - static_assert( - std::is_same<T, decltype(+std::declval<T>())>::value, - "to avoid code bloat, only instantiate this for int and larger types"); - static_assert(std::is_unsigned<T>::value, - "this class is only for unsigned types"); - - public: - // Outputs the given number backward (like with std::copy_backward), - // starting from the end of the string. - // The number of digits in the number must have been already measured and - // passed *exactly*, otherwise the behavior is undefined. - // (This is an optimization, as calculating the number of digits again would - // slow down the hot path.) - // Returns an iterator to the start of the suffix that was appended. - static BackwardIt FastIntToBufferBackward(T v, BackwardIt end) { - // THIS IS A HOT FUNCTION with a very deliberate structure to exploit branch - // prediction and shorten the critical path for smaller numbers. - // Do not move around the if/else blocks or attempt to simplify it - // without benchmarking any changes. - - if (v < 10) { - goto AT_LEAST_1 /* NOTE: mandatory for the 0 case */; - } - if (v < 1000) { - goto AT_LEAST_10; - } - if (v < 10000000) { - goto AT_LEAST_1000; - } - - if (v >= 100000000 / 10) { - if (v >= 10000000000000000 / 10) { - DoFastIntToBufferBackward<8>(v, end); - } - DoFastIntToBufferBackward<8>(v, end); - } - - if (v >= 10000 / 10) { - AT_LEAST_1000: - DoFastIntToBufferBackward<4>(v, end); - } - - if (v >= 100 / 10) { - AT_LEAST_10: - DoFastIntToBufferBackward<2>(v, end); - } - - if (v >= 10 / 10) { - AT_LEAST_1: - end = DoFastIntToBufferBackward(v, end, std::integral_constant<int, 1>()); - } - return end; +inline ABSL_ATTRIBUTE_ALWAYS_INLINE absl::Nonnull<char*> EncodeFullU32( + uint32_t n, absl::Nonnull<char*> out_str) { + if (n < 10) { + *out_str = static_cast<char>('0' + n); + return out_str + 1; } - - private: - // Only assume pointers are contiguous for now. String and vector iterators - // could be special-cased as well, but there's no need for them here. - // With C++20 we can probably switch to std::contiguous_iterator_tag. - static constexpr bool kIsContiguousIterator = - std::is_pointer<BackwardIt>::value; - - template <int Exponent> - static void DoFastIntToBufferBackward(T& v, BackwardIt& end) { - constexpr T kModulus = Pow<T>(10, Exponent); - T remainder = static_cast<T>(v % kModulus); - v = static_cast<T>(v / kModulus); - end = DoFastIntToBufferBackward(remainder, end, - std::integral_constant<int, Exponent>()); - } - - static BackwardIt DoFastIntToBufferBackward(const T&, BackwardIt end, - std::integral_constant<int, 0>) { - return end; - } - - static BackwardIt DoFastIntToBufferBackward(T v, BackwardIt end, - std::integral_constant<int, 1>) { - *--end = static_cast<char>('0' + v); - return DoFastIntToBufferBackward(v, end, std::integral_constant<int, 0>()); - } - - static BackwardIt DoFastIntToBufferBackward(T v, BackwardIt end, - std::integral_constant<int, 4>) { - if (kIsContiguousIterator) { - const uint32_t digits = - PrepareFourDigits(static_cast<uint32_t>(v)) + kFourZeroBytes; - end -= sizeof(digits); - little_endian::Store32(&*end, digits); - } else { - uint32_t digits = - PrepareFourDigitsReversed(static_cast<uint32_t>(v)) + kFourZeroBytes; - for (size_t i = 0; i < sizeof(digits); ++i) { - *--end = static_cast<char>(digits); - digits >>= CHAR_BIT; - } - } - return end; + if (n < 100'000'000) { + uint64_t bottom = PrepareEightDigits(n); + ABSL_ASSUME(bottom != 0); + // 0 minus 8 to make MSVC happy. + uint32_t zeroes = + static_cast<uint32_t>(absl::countr_zero(bottom)) & (0 - 8u); + little_endian::Store64(out_str, (bottom + kEightZeroBytes) >> zeroes); + return out_str + sizeof(bottom) - zeroes / 8; } - - static BackwardIt DoFastIntToBufferBackward(T v, BackwardIt end, - std::integral_constant<int, 8>) { - if (kIsContiguousIterator) { - const uint64_t digits = - PrepareEightDigits(static_cast<uint32_t>(v)) + kEightZeroBytes; - end -= sizeof(digits); - little_endian::Store64(&*end, digits); - } else { - uint64_t digits = PrepareEightDigitsReversed(static_cast<uint32_t>(v)) + - kEightZeroBytes; - for (size_t i = 0; i < sizeof(digits); ++i) { - *--end = static_cast<char>(digits); - digits >>= CHAR_BIT; - } - } - return end; - } - - template <int Digits> - static BackwardIt DoFastIntToBufferBackward( - T v, BackwardIt end, std::integral_constant<int, Digits>) { - constexpr int kLogModulus = Digits - Digits / 2; - constexpr T kModulus = Pow(static_cast<T>(10), kLogModulus); - bool is_safe_to_use_division_trick = Digits <= 8; - T quotient, remainder; - if (is_safe_to_use_division_trick) { - constexpr uint64_t kCoefficient = - ComputePowerOf100DivisionCoefficient<uint64_t>(kLogModulus); - quotient = (v * kCoefficient) >> (10 * kLogModulus); - remainder = v - quotient * kModulus; - } else { - quotient = v / kModulus; - remainder = v % kModulus; - } - end = DoFastIntToBufferBackward(remainder, end, - std::integral_constant<int, kLogModulus>()); - return DoFastIntToBufferBackward( - quotient, end, std::integral_constant<int, Digits - kLogModulus>()); - } -}; - -// Returns an iterator to the start of the suffix that was appended -template <typename T, typename BackwardIt> -std::enable_if_t<std::is_unsigned<T>::value, BackwardIt> -DoFastIntToBufferBackward(T v, BackwardIt end, uint32_t digits) { - using PromotedT = std::decay_t<decltype(+v)>; - using Converter = FastUIntToStringConverter<PromotedT, BackwardIt>; - (void)digits; - return Converter().FastIntToBufferBackward(v, end); + uint32_t div08 = n / 100'000'000; + uint32_t mod08 = n % 100'000'000; + uint64_t bottom = PrepareEightDigits(mod08) + kEightZeroBytes; + out_str = EncodeHundred(div08, out_str); + little_endian::Store64(out_str, bottom); + return out_str + sizeof(bottom); } -template <typename T, typename BackwardIt> -std::enable_if_t<std::is_signed<T>::value, BackwardIt> -DoFastIntToBufferBackward(T v, BackwardIt end, uint32_t digits) { - if (absl::numbers_internal::IsNegative(v)) { - // Store the minus sign *before* we produce the number itself, not after. - // This gets us a tail call. - end[-static_cast<ptrdiff_t>(digits) - 1] = '-'; +inline ABSL_ATTRIBUTE_ALWAYS_INLINE char* EncodeFullU64(uint64_t i, + char* buffer) { + if (i <= std::numeric_limits<uint32_t>::max()) { + return EncodeFullU32(static_cast<uint32_t>(i), buffer); } - return DoFastIntToBufferBackward( - absl::numbers_internal::UnsignedAbsoluteValue(v), end, digits); -} - -template <class T> -std::enable_if_t<std::is_integral<T>::value, int> -GetNumDigitsOrNegativeIfNegativeImpl(T v) { - const auto /* either bool or std::false_type */ is_negative = - absl::numbers_internal::IsNegative(v); - const int digits = static_cast<int>(absl::numbers_internal::Base10Digits( - absl::numbers_internal::UnsignedAbsoluteValue(v))); - return is_negative ? ~digits : digits; + uint32_t mod08; + if (i < 1'0000'0000'0000'0000ull) { + uint32_t div08 = static_cast<uint32_t>(i / 100'000'000ull); + mod08 = static_cast<uint32_t>(i % 100'000'000ull); + buffer = EncodeFullU32(div08, buffer); + } else { + uint64_t div08 = i / 100'000'000ull; + mod08 = static_cast<uint32_t>(i % 100'000'000ull); + uint32_t div016 = static_cast<uint32_t>(div08 / 100'000'000ull); + uint32_t div08mod08 = static_cast<uint32_t>(div08 % 100'000'000ull); + uint64_t mid_result = PrepareEightDigits(div08mod08) + kEightZeroBytes; + buffer = EncodeTenThousand(div016, buffer); + little_endian::Store64(buffer, mid_result); + buffer += sizeof(mid_result); + } + uint64_t mod_result = PrepareEightDigits(mod08) + kEightZeroBytes; + little_endian::Store64(buffer, mod_result); + return buffer + sizeof(mod_result); } } // namespace void numbers_internal::PutTwoDigits(uint32_t i, absl::Nonnull<char*> buf) { - little_endian::Store16( - buf, static_cast<uint16_t>(PrepareTwoDigits(i) + kTwoZeroBytes)); + assert(i < 100); + uint32_t base = kTwoZeroBytes; + uint32_t div10 = (i * kDivisionBy10Mul) / kDivisionBy10Div; + uint32_t mod10 = i - 10u * div10; + base += div10 + (mod10 << 8); + little_endian::Store16(buf, static_cast<uint16_t>(base)); } absl::Nonnull<char*> numbers_internal::FastIntToBuffer( - uint32_t i, absl::Nonnull<char*> buffer) { - const uint32_t digits = absl::numbers_internal::Base10Digits(i); - buffer += digits; - *buffer = '\0'; // We're going backward, so store this first - FastIntToBufferBackward(i, buffer, digits); - return buffer; + uint32_t n, absl::Nonnull<char*> out_str) { + out_str = EncodeFullU32(n, out_str); + *out_str = '\0'; + return out_str; } absl::Nonnull<char*> numbers_internal::FastIntToBuffer( int32_t i, absl::Nonnull<char*> buffer) { - buffer += static_cast<int>(i < 0); - uint32_t digits = absl::numbers_internal::Base10Digits( - absl::numbers_internal::UnsignedAbsoluteValue(i)); - buffer += digits; - *buffer = '\0'; // We're going backward, so store this first - FastIntToBufferBackward(i, buffer, digits); + uint32_t u = static_cast<uint32_t>(i); + if (i < 0) { + *buffer++ = '-'; + // We need to do the negation in modular (i.e., "unsigned") + // arithmetic; MSVC++ apparently warns for plain "-u", so + // we write the equivalent expression "0 - u" instead. + u = 0 - u; + } + buffer = EncodeFullU32(u, buffer); + *buffer = '\0'; return buffer; } absl::Nonnull<char*> numbers_internal::FastIntToBuffer( uint64_t i, absl::Nonnull<char*> buffer) { - uint32_t digits = absl::numbers_internal::Base10Digits(i); - buffer += digits; - *buffer = '\0'; // We're going backward, so store this first - FastIntToBufferBackward(i, buffer, digits); + buffer = EncodeFullU64(i, buffer); + *buffer = '\0'; return buffer; } absl::Nonnull<char*> numbers_internal::FastIntToBuffer( int64_t i, absl::Nonnull<char*> buffer) { - buffer += static_cast<int>(i < 0); - uint32_t digits = absl::numbers_internal::Base10Digits( - absl::numbers_internal::UnsignedAbsoluteValue(i)); - buffer += digits; - *buffer = '\0'; // We're going backward, so store this first - FastIntToBufferBackward(i, buffer, digits); + uint64_t u = static_cast<uint64_t>(i); + if (i < 0) { + *buffer++ = '-'; + // We need to do the negation in modular (i.e., "unsigned") + // arithmetic; MSVC++ apparently warns for plain "-u", so + // we write the equivalent expression "0 - u" instead. + u = 0 - u; + } + buffer = EncodeFullU64(u, buffer); + *buffer = '\0'; return buffer; } -absl::Nonnull<char*> numbers_internal::FastIntToBufferBackward( - uint32_t i, absl::Nonnull<char*> buffer_end, uint32_t exact_digit_count) { - return DoFastIntToBufferBackward(i, buffer_end, exact_digit_count); -} - -absl::Nonnull<char*> numbers_internal::FastIntToBufferBackward( - int32_t i, absl::Nonnull<char*> buffer_end, uint32_t exact_digit_count) { - return DoFastIntToBufferBackward(i, buffer_end, exact_digit_count); -} - -absl::Nonnull<char*> numbers_internal::FastIntToBufferBackward( - uint64_t i, absl::Nonnull<char*> buffer_end, uint32_t exact_digit_count) { - return DoFastIntToBufferBackward(i, buffer_end, exact_digit_count); -} - -absl::Nonnull<char*> numbers_internal::FastIntToBufferBackward( - int64_t i, absl::Nonnull<char*> buffer_end, uint32_t exact_digit_count) { - return DoFastIntToBufferBackward(i, buffer_end, exact_digit_count); -} - -int numbers_internal::GetNumDigitsOrNegativeIfNegative(signed char v) { - return GetNumDigitsOrNegativeIfNegativeImpl(v); -} -int numbers_internal::GetNumDigitsOrNegativeIfNegative(unsigned char v) { - return GetNumDigitsOrNegativeIfNegativeImpl(v); -} -int numbers_internal::GetNumDigitsOrNegativeIfNegative(short v) { // NOLINT - return GetNumDigitsOrNegativeIfNegativeImpl(v); -} -int numbers_internal::GetNumDigitsOrNegativeIfNegative( - unsigned short v) { // NOLINT - return GetNumDigitsOrNegativeIfNegativeImpl(v); -} -int numbers_internal::GetNumDigitsOrNegativeIfNegative(int v) { - return GetNumDigitsOrNegativeIfNegativeImpl(v); -} -int numbers_internal::GetNumDigitsOrNegativeIfNegative(unsigned int v) { - return GetNumDigitsOrNegativeIfNegativeImpl(v); -} -int numbers_internal::GetNumDigitsOrNegativeIfNegative(long v) { // NOLINT - return GetNumDigitsOrNegativeIfNegativeImpl(v); -} -int numbers_internal::GetNumDigitsOrNegativeIfNegative( - unsigned long v) { // NOLINT - return GetNumDigitsOrNegativeIfNegativeImpl(v); -} -int numbers_internal::GetNumDigitsOrNegativeIfNegative(long long v) { // NOLINT - return GetNumDigitsOrNegativeIfNegativeImpl(v); -} -int numbers_internal::GetNumDigitsOrNegativeIfNegative( - unsigned long long v) { // NOLINT - return GetNumDigitsOrNegativeIfNegativeImpl(v); -} - // Given a 128-bit number expressed as a pair of uint64_t, high half first, // return that number multiplied by the given 32-bit value. If the result is // too large to fit in a 128-bit number, divide it by 2 until it fits. diff --git a/absl/strings/numbers.h b/absl/strings/numbers.h index ad4e66b6..739dbb28 100644 --- a/absl/strings/numbers.h +++ b/absl/strings/numbers.h @@ -32,7 +32,6 @@ #endif #include <cstddef> -#include <cstdint> #include <cstdlib> #include <cstring> #include <ctime> @@ -40,12 +39,10 @@ #include <string> #include <type_traits> -#include "absl/base/attributes.h" #include "absl/base/config.h" #include "absl/base/internal/endian.h" #include "absl/base/macros.h" #include "absl/base/nullability.h" -#include "absl/base/optimization.h" #include "absl/base/port.h" #include "absl/numeric/bits.h" #include "absl/numeric/int128.h" @@ -161,96 +158,6 @@ bool safe_strtou128_base(absl::string_view text, static const int kFastToBufferSize = 32; static const int kSixDigitsToBufferSize = 16; -template <class T> -std::enable_if_t<!std::is_unsigned<T>::value, bool> IsNegative(const T& v) { - return v < T(); -} - -template <class T> -std::enable_if_t<std::is_unsigned<T>::value, std::false_type> IsNegative( - const T&) { - // The integer is unsigned, so return a compile-time constant. - // This can help the optimizer avoid having to prove bool to be false later. - return std::false_type(); -} - -template <class T> -std::enable_if_t<std::is_unsigned<std::decay_t<T>>::value, T&&> -UnsignedAbsoluteValue(T&& v ABSL_ATTRIBUTE_LIFETIME_BOUND) { - // The value is unsigned; just return the original. - return std::forward<T>(v); -} - -template <class T> -ABSL_ATTRIBUTE_CONST_FUNCTION - std::enable_if_t<!std::is_unsigned<T>::value, std::make_unsigned_t<T>> - UnsignedAbsoluteValue(T v) { - using U = std::make_unsigned_t<T>; - return IsNegative(v) ? U() - static_cast<U>(v) : static_cast<U>(v); -} - -// Returns the number of base-10 digits in the given number. -// Note that this strictly counts digits. It does not count the sign. -// The `initial_digits` parameter is the starting point, which is normally equal -// to 1 because the number of digits in 0 is 1 (a special case). -// However, callers may e.g. wish to change it to 2 to account for the sign. -template <typename T> -std::enable_if_t<std::is_unsigned<T>::value, uint32_t> Base10Digits( - T v, const uint32_t initial_digits = 1) { - uint32_t r = initial_digits; - // If code size becomes an issue, the 'if' stage can be removed for a minor - // performance loss. - for (;;) { - if (ABSL_PREDICT_TRUE(v < 10 * 10)) { - r += (v >= 10); - break; - } - if (ABSL_PREDICT_TRUE(v < 1000 * 10)) { - r += (v >= 1000) + 2; - break; - } - if (ABSL_PREDICT_TRUE(v < 100000 * 10)) { - r += (v >= 100000) + 4; - break; - } - r += 6; - v = static_cast<T>(v / 1000000); - } - return r; -} - -template <typename T> -std::enable_if_t<std::is_signed<T>::value, uint32_t> Base10Digits( - T v, uint32_t r = 1) { - // Branchlessly add 1 to account for a minus sign. - r += static_cast<uint32_t>(IsNegative(v)); - return Base10Digits(UnsignedAbsoluteValue(v), r); -} - -// These functions return the number of base-10 digits, but multiplied by -1 if -// the input itself is negative. This is handy and efficient for later usage, -// since the bitwise complement of the result becomes equal to the number of -// characters required. -ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative( - signed char v); -ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative( - unsigned char v); -ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative( - short v); // NOLINT -ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative( - unsigned short v); // NOLINT -ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(int v); -ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative( - unsigned int v); -ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative( - long v); // NOLINT -ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative( - unsigned long v); // NOLINT -ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative( - long long v); // NOLINT -ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative( - unsigned long long v); // NOLINT - // Helper function for fast formatting of floating-point values. // The result is the same as printf's "%g", a.k.a. "%.6g"; that is, six // significant digits are returned, trailing zeros are removed, and numbers @@ -259,18 +166,24 @@ ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative( // Required buffer size is `kSixDigitsToBufferSize`. size_t SixDigitsToBuffer(double d, absl::Nonnull<char*> buffer); -// All of these functions take an output buffer +// WARNING: These functions may write more characters than necessary, because +// they are intended for speed. All functions take an output buffer // as an argument and return a pointer to the last byte they wrote, which is the // terminating '\0'. At most `kFastToBufferSize` bytes are written. -absl::Nonnull<char*> FastIntToBuffer(int32_t i, absl::Nonnull<char*> buffer); -absl::Nonnull<char*> FastIntToBuffer(uint32_t i, absl::Nonnull<char*> buffer); -absl::Nonnull<char*> FastIntToBuffer(int64_t i, absl::Nonnull<char*> buffer); -absl::Nonnull<char*> FastIntToBuffer(uint64_t i, absl::Nonnull<char*> buffer); +absl::Nonnull<char*> FastIntToBuffer(int32_t i, absl::Nonnull<char*> buffer) + ABSL_INTERNAL_NEED_MIN_SIZE(buffer, kFastToBufferSize); +absl::Nonnull<char*> FastIntToBuffer(uint32_t n, absl::Nonnull<char*> out_str) + ABSL_INTERNAL_NEED_MIN_SIZE(out_str, kFastToBufferSize); +absl::Nonnull<char*> FastIntToBuffer(int64_t i, absl::Nonnull<char*> buffer) + ABSL_INTERNAL_NEED_MIN_SIZE(buffer, kFastToBufferSize); +absl::Nonnull<char*> FastIntToBuffer(uint64_t i, absl::Nonnull<char*> buffer) + ABSL_INTERNAL_NEED_MIN_SIZE(buffer, kFastToBufferSize); // For enums and integer types that are not an exact match for the types above, // use templates to call the appropriate one of the four overloads above. template <typename int_type> -absl::Nonnull<char*> FastIntToBuffer(int_type i, absl::Nonnull<char*> buffer) { +absl::Nonnull<char*> FastIntToBuffer(int_type i, absl::Nonnull<char*> buffer) + ABSL_INTERNAL_NEED_MIN_SIZE(buffer, kFastToBufferSize) { static_assert(sizeof(i) <= 64 / 8, "FastIntToBuffer works only with 64-bit-or-less integers."); // TODO(jorg): This signed-ness check is used because it works correctly @@ -294,58 +207,6 @@ absl::Nonnull<char*> FastIntToBuffer(int_type i, absl::Nonnull<char*> buffer) { } } -// These functions do NOT add any null-terminator. -// They return a pointer to the beginning of the written string. -// The digit counts provided must *exactly* match the number of base-10 digits -// in the number, or the behavior is undefined. -// (i.e. do NOT count the minus sign, or over- or under-count the digits.) -absl::Nonnull<char*> FastIntToBufferBackward(int32_t i, - absl::Nonnull<char*> buffer_end, - uint32_t exact_digit_count); -absl::Nonnull<char*> FastIntToBufferBackward(uint32_t i, - absl::Nonnull<char*> buffer_end, - uint32_t exact_digit_count); -absl::Nonnull<char*> FastIntToBufferBackward(int64_t i, - absl::Nonnull<char*> buffer_end, - uint32_t exact_digit_count); -absl::Nonnull<char*> FastIntToBufferBackward(uint64_t i, - absl::Nonnull<char*> buffer_end, - uint32_t exact_digit_count); - -// For enums and integer types that are not an exact match for the types above, -// use templates to call the appropriate one of the four overloads above. -template <typename int_type> -absl::Nonnull<char*> FastIntToBufferBackward(int_type i, - absl::Nonnull<char*> buffer_end, - uint32_t exact_digit_count) { - static_assert( - sizeof(i) <= 64 / 8, - "FastIntToBufferBackward works only with 64-bit-or-less integers."); - // This signed-ness check is used because it works correctly - // with enums, and it also serves to check that int_type is not a pointer. - // If one day something like std::is_signed<enum E> works, switch to it. - // These conditions are constexpr bools to suppress MSVC warning C4127. - constexpr bool kIsSigned = static_cast<int_type>(1) - 2 < 0; - constexpr bool kUse64Bit = sizeof(i) > 32 / 8; - if (kIsSigned) { - if (kUse64Bit) { - return FastIntToBufferBackward(static_cast<int64_t>(i), buffer_end, - exact_digit_count); - } else { - return FastIntToBufferBackward(static_cast<int32_t>(i), buffer_end, - exact_digit_count); - } - } else { - if (kUse64Bit) { - return FastIntToBufferBackward(static_cast<uint64_t>(i), buffer_end, - exact_digit_count); - } else { - return FastIntToBufferBackward(static_cast<uint32_t>(i), buffer_end, - exact_digit_count); - } - } -} - // Implementation of SimpleAtoi, generalized to support arbitrary base (used // with base different from 10 elsewhere in Abseil implementation). template <typename int_type> diff --git a/absl/strings/numbers_test.cc b/absl/strings/numbers_test.cc index 1ceff70f..75c2dcf2 100644 --- a/absl/strings/numbers_test.cc +++ b/absl/strings/numbers_test.cc @@ -231,15 +231,10 @@ TEST(Numbers, TestFastPrints) { CheckInt32(INT_MIN); CheckInt32(INT_MAX); CheckInt64(LONG_MIN); - CheckInt64(uint64_t{10000000}); - CheckInt64(uint64_t{100000000}); CheckInt64(uint64_t{1000000000}); CheckInt64(uint64_t{9999999999}); CheckInt64(uint64_t{100000000000000}); CheckInt64(uint64_t{999999999999999}); - CheckInt64(uint64_t{1000000000000000}); - CheckInt64(uint64_t{10000000000000000}); - CheckInt64(uint64_t{100000000000000000}); CheckInt64(uint64_t{1000000000000000000}); CheckInt64(uint64_t{1199999999999999999}); CheckInt64(int64_t{-700000000000000000}); @@ -251,8 +246,6 @@ TEST(Numbers, TestFastPrints) { CheckUInt64(uint64_t{999999999999999}); CheckUInt64(uint64_t{1000000000000000000}); CheckUInt64(uint64_t{1199999999999999999}); - CheckUInt64(uint64_t{10000000000000000000u}); - CheckUInt64(uint64_t{10200300040000500006u}); CheckUInt64(std::numeric_limits<uint64_t>::max()); for (int i = 0; i < 10000; i++) { diff --git a/absl/strings/str_cat.cc b/absl/strings/str_cat.cc index 098ab183..c51c1373 100644 --- a/absl/strings/str_cat.cc +++ b/absl/strings/str_cat.cc @@ -20,19 +20,18 @@ #include <cstdint> #include <cstring> #include <initializer_list> +#include <limits> #include <string> -#include <type_traits> #include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" #include "absl/base/nullability.h" #include "absl/strings/internal/resize_uninitialized.h" -#include "absl/strings/numbers.h" #include "absl/strings/string_view.h" namespace absl { ABSL_NAMESPACE_BEGIN - // ---------------------------------------------------------------------- // StrCat() // This merges the given strings or integers, with no delimiter. This @@ -43,7 +42,8 @@ ABSL_NAMESPACE_BEGIN namespace { // Append is merely a version of memcpy that returns the address of the byte // after the area just overwritten. -absl::Nonnull<char*> Append(absl::Nonnull<char*> out, const AlphaNum& x) { +inline absl::Nonnull<char*> Append(absl::Nonnull<char*> out, + const AlphaNum& x) { // memcpy is allowed to overwrite arbitrary memory, so doing this after the // call would force an extra fetch of x.size(). char* after = out + x.size(); @@ -53,12 +53,23 @@ absl::Nonnull<char*> Append(absl::Nonnull<char*> out, const AlphaNum& x) { return after; } +inline void STLStringAppendUninitializedAmortized(std::string* dest, + size_t to_append) { + strings_internal::AppendUninitializedTraits<std::string>::Append(dest, + to_append); +} } // namespace std::string StrCat(const AlphaNum& a, const AlphaNum& b) { std::string result; - absl::strings_internal::STLStringResizeUninitialized(&result, - a.size() + b.size()); + // Use uint64_t to prevent size_t overflow. We assume it is not possible for + // in memory strings to overflow a uint64_t. + constexpr uint64_t kMaxSize = uint64_t{std::numeric_limits<size_t>::max()}; + const uint64_t result_size = + static_cast<uint64_t>(a.size()) + static_cast<uint64_t>(b.size()); + ABSL_INTERNAL_CHECK(result_size <= kMaxSize, "size_t overflow"); + absl::strings_internal::STLStringResizeUninitialized( + &result, static_cast<size_t>(result_size)); char* const begin = &result[0]; char* out = begin; out = Append(out, a); @@ -69,8 +80,15 @@ std::string StrCat(const AlphaNum& a, const AlphaNum& b) { std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c) { std::string result; + // Use uint64_t to prevent size_t overflow. We assume it is not possible for + // in memory strings to overflow a uint64_t. + constexpr uint64_t kMaxSize = uint64_t{std::numeric_limits<size_t>::max()}; + const uint64_t result_size = static_cast<uint64_t>(a.size()) + + static_cast<uint64_t>(b.size()) + + static_cast<uint64_t>(c.size()); + ABSL_INTERNAL_CHECK(result_size <= kMaxSize, "size_t overflow"); strings_internal::STLStringResizeUninitialized( - &result, a.size() + b.size() + c.size()); + &result, static_cast<size_t>(result_size)); char* const begin = &result[0]; char* out = begin; out = Append(out, a); @@ -83,8 +101,16 @@ std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c) { std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d) { std::string result; + // Use uint64_t to prevent size_t overflow. We assume it is not possible for + // in memory strings to overflow a uint64_t. + constexpr uint64_t kMaxSize = uint64_t{std::numeric_limits<size_t>::max()}; + const uint64_t result_size = static_cast<uint64_t>(a.size()) + + static_cast<uint64_t>(b.size()) + + static_cast<uint64_t>(c.size()) + + static_cast<uint64_t>(d.size()); + ABSL_INTERNAL_CHECK(result_size <= kMaxSize, "size_t overflow"); strings_internal::STLStringResizeUninitialized( - &result, a.size() + b.size() + c.size() + d.size()); + &result, static_cast<size_t>(result_size)); char* const begin = &result[0]; char* out = begin; out = Append(out, a); @@ -98,135 +124,18 @@ std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, namespace strings_internal { // Do not call directly - these are not part of the public API. -void STLStringAppendUninitializedAmortized(std::string* dest, - size_t to_append) { - strings_internal::AppendUninitializedTraits<std::string>::Append(dest, - to_append); -} - -template <typename Integer> -std::enable_if_t<std::is_integral<Integer>::value, std::string> IntegerToString( - Integer i) { - std::string str; - const auto /* either bool or std::false_type */ is_negative = - absl::numbers_internal::IsNegative(i); - const uint32_t digits = absl::numbers_internal::Base10Digits( - absl::numbers_internal::UnsignedAbsoluteValue(i)); - absl::strings_internal::STLStringResizeUninitialized( - &str, digits + static_cast<uint32_t>(is_negative)); - absl::numbers_internal::FastIntToBufferBackward(i, &str[str.size()], digits); - return str; -} - -template <> -std::string IntegerToString(long i) { // NOLINT - if (sizeof(i) <= sizeof(int)) { - return IntegerToString(static_cast<int>(i)); - } else { - return IntegerToString(static_cast<long long>(i)); // NOLINT - } -} - -template <> -std::string IntegerToString(unsigned long i) { // NOLINT - if (sizeof(i) <= sizeof(unsigned int)) { - return IntegerToString(static_cast<unsigned int>(i)); - } else { - return IntegerToString(static_cast<unsigned long long>(i)); // NOLINT - } -} - -template <typename Float> -std::enable_if_t<std::is_floating_point<Float>::value, std::string> -FloatToString(Float f) { - std::string result; - strings_internal::STLStringResizeUninitialized( - &result, numbers_internal::kSixDigitsToBufferSize); - char* start = &result[0]; - result.erase(numbers_internal::SixDigitsToBuffer(f, start)); - return result; -} - -std::string SingleArgStrCat(int x) { return IntegerToString(x); } -std::string SingleArgStrCat(unsigned int x) { return IntegerToString(x); } -// NOLINTNEXTLINE -std::string SingleArgStrCat(long x) { return IntegerToString(x); } -// NOLINTNEXTLINE -std::string SingleArgStrCat(unsigned long x) { return IntegerToString(x); } -// NOLINTNEXTLINE -std::string SingleArgStrCat(long long x) { return IntegerToString(x); } -// NOLINTNEXTLINE -std::string SingleArgStrCat(unsigned long long x) { return IntegerToString(x); } -std::string SingleArgStrCat(float x) { return FloatToString(x); } -std::string SingleArgStrCat(double x) { return FloatToString(x); } - -template <class Integer> -std::enable_if_t<std::is_integral<Integer>::value, void> AppendIntegerToString( - std::string& str, Integer i) { - const auto /* either bool or std::false_type */ is_negative = - absl::numbers_internal::IsNegative(i); - const uint32_t digits = absl::numbers_internal::Base10Digits( - absl::numbers_internal::UnsignedAbsoluteValue(i)); - absl::strings_internal::STLStringAppendUninitializedAmortized( - &str, digits + static_cast<uint32_t>(is_negative)); - absl::numbers_internal::FastIntToBufferBackward(i, &str[str.size()], digits); -} - -template <> -void AppendIntegerToString(std::string& str, long i) { // NOLINT - if (sizeof(i) <= sizeof(int)) { - return AppendIntegerToString(str, static_cast<int>(i)); - } else { - return AppendIntegerToString(str, static_cast<long long>(i)); // NOLINT - } -} - -template <> -void AppendIntegerToString(std::string& str, - unsigned long i) { // NOLINT - if (sizeof(i) <= sizeof(unsigned int)) { - return AppendIntegerToString(str, static_cast<unsigned int>(i)); - } else { - return AppendIntegerToString(str, - static_cast<unsigned long long>(i)); // NOLINT - } -} - -// `SingleArgStrAppend` overloads are defined here for the same reasons as with -// `SingleArgStrCat` above. -void SingleArgStrAppend(std::string& str, int x) { - return AppendIntegerToString(str, x); -} - -void SingleArgStrAppend(std::string& str, unsigned int x) { - return AppendIntegerToString(str, x); -} - -// NOLINTNEXTLINE -void SingleArgStrAppend(std::string& str, long x) { - return AppendIntegerToString(str, x); -} - -// NOLINTNEXTLINE -void SingleArgStrAppend(std::string& str, unsigned long x) { - return AppendIntegerToString(str, x); -} - -// NOLINTNEXTLINE -void SingleArgStrAppend(std::string& str, long long x) { - return AppendIntegerToString(str, x); -} - -// NOLINTNEXTLINE -void SingleArgStrAppend(std::string& str, unsigned long long x) { - return AppendIntegerToString(str, x); -} - std::string CatPieces(std::initializer_list<absl::string_view> pieces) { std::string result; - size_t total_size = 0; - for (absl::string_view piece : pieces) total_size += piece.size(); - strings_internal::STLStringResizeUninitialized(&result, total_size); + // Use uint64_t to prevent size_t overflow. We assume it is not possible for + // in memory strings to overflow a uint64_t. + constexpr uint64_t kMaxSize = uint64_t{std::numeric_limits<size_t>::max()}; + uint64_t total_size = 0; + for (absl::string_view piece : pieces) { + total_size += piece.size(); + } + ABSL_INTERNAL_CHECK(total_size <= kMaxSize, "size_t overflow"); + strings_internal::STLStringResizeUninitialized( + &result, static_cast<size_t>(total_size)); char* const begin = &result[0]; char* out = begin; @@ -258,7 +167,7 @@ void AppendPieces(absl::Nonnull<std::string*> dest, ASSERT_NO_OVERLAP(*dest, piece); to_append += piece.size(); } - strings_internal::STLStringAppendUninitializedAmortized(dest, to_append); + STLStringAppendUninitializedAmortized(dest, to_append); char* const begin = &(*dest)[0]; char* out = begin + old_size; @@ -277,7 +186,7 @@ void AppendPieces(absl::Nonnull<std::string*> dest, void StrAppend(absl::Nonnull<std::string*> dest, const AlphaNum& a) { ASSERT_NO_OVERLAP(*dest, a); std::string::size_type old_size = dest->size(); - strings_internal::STLStringAppendUninitializedAmortized(dest, a.size()); + STLStringAppendUninitializedAmortized(dest, a.size()); char* const begin = &(*dest)[0]; char* out = begin + old_size; out = Append(out, a); @@ -289,8 +198,7 @@ void StrAppend(absl::Nonnull<std::string*> dest, const AlphaNum& a, ASSERT_NO_OVERLAP(*dest, a); ASSERT_NO_OVERLAP(*dest, b); std::string::size_type old_size = dest->size(); - strings_internal::STLStringAppendUninitializedAmortized(dest, - a.size() + b.size()); + STLStringAppendUninitializedAmortized(dest, a.size() + b.size()); char* const begin = &(*dest)[0]; char* out = begin + old_size; out = Append(out, a); @@ -304,8 +212,7 @@ void StrAppend(absl::Nonnull<std::string*> dest, const AlphaNum& a, ASSERT_NO_OVERLAP(*dest, b); ASSERT_NO_OVERLAP(*dest, c); std::string::size_type old_size = dest->size(); - strings_internal::STLStringAppendUninitializedAmortized( - dest, a.size() + b.size() + c.size()); + STLStringAppendUninitializedAmortized(dest, a.size() + b.size() + c.size()); char* const begin = &(*dest)[0]; char* out = begin + old_size; out = Append(out, a); @@ -321,7 +228,7 @@ void StrAppend(absl::Nonnull<std::string*> dest, const AlphaNum& a, ASSERT_NO_OVERLAP(*dest, c); ASSERT_NO_OVERLAP(*dest, d); std::string::size_type old_size = dest->size(); - strings_internal::STLStringAppendUninitializedAmortized( + STLStringAppendUninitializedAmortized( dest, a.size() + b.size() + c.size() + d.size()); char* const begin = &(*dest)[0]; char* out = begin + old_size; diff --git a/absl/strings/str_cat.h b/absl/strings/str_cat.h index 1b52a36f..b98adc02 100644 --- a/absl/strings/str_cat.h +++ b/absl/strings/str_cat.h @@ -93,6 +93,8 @@ #include <cstddef> #include <cstdint> #include <cstring> +#include <initializer_list> +#include <limits> #include <string> #include <type_traits> #include <utility> @@ -312,6 +314,10 @@ class AlphaNum { // No bool ctor -- bools convert to an integral type. // A bool ctor would also convert incoming pointers (bletch). + // Prevent brace initialization + template <typename T> + AlphaNum(std::initializer_list<T>) = delete; // NOLINT(runtime/explicit) + AlphaNum(int x) // NOLINT(runtime/explicit) : piece_(digits_, static_cast<size_t>( numbers_internal::FastIntToBuffer(x, digits_) - @@ -448,36 +454,77 @@ std::string CatPieces(std::initializer_list<absl::string_view> pieces); void AppendPieces(absl::Nonnull<std::string*> dest, std::initializer_list<absl::string_view> pieces); -void STLStringAppendUninitializedAmortized(std::string* dest, size_t to_append); +template <typename Integer> +std::string IntegerToString(Integer i) { + // Any integer (signed/unsigned) up to 64 bits can be formatted into a buffer + // with 22 bytes (including NULL at the end). + constexpr size_t kMaxDigits10 = 22; + std::string result; + strings_internal::STLStringResizeUninitialized(&result, kMaxDigits10); + char* start = &result[0]; + // note: this can be optimized to not write last zero. + char* end = numbers_internal::FastIntToBuffer(i, start); + auto size = static_cast<size_t>(end - start); + assert((size < result.size()) && + "StrCat(Integer) does not fit into kMaxDigits10"); + result.erase(size); + return result; +} +template <typename Float> +std::string FloatToString(Float f) { + std::string result; + strings_internal::STLStringResizeUninitialized( + &result, numbers_internal::kSixDigitsToBufferSize); + char* start = &result[0]; + result.erase(numbers_internal::SixDigitsToBuffer(f, start)); + return result; +} // `SingleArgStrCat` overloads take built-in `int`, `long` and `long long` types // (signed / unsigned) to avoid ambiguity on the call side. If we used int32_t // and int64_t, then at least one of the three (`int` / `long` / `long long`) // would have been ambiguous when passed to `SingleArgStrCat`. -std::string SingleArgStrCat(int x); -std::string SingleArgStrCat(unsigned int x); -std::string SingleArgStrCat(long x); // NOLINT -std::string SingleArgStrCat(unsigned long x); // NOLINT -std::string SingleArgStrCat(long long x); // NOLINT -std::string SingleArgStrCat(unsigned long long x); // NOLINT -std::string SingleArgStrCat(float x); -std::string SingleArgStrCat(double x); - -// `SingleArgStrAppend` overloads are defined here for the same reasons as with -// `SingleArgStrCat` above. -void SingleArgStrAppend(std::string& str, int x); -void SingleArgStrAppend(std::string& str, unsigned int x); -void SingleArgStrAppend(std::string& str, long x); // NOLINT -void SingleArgStrAppend(std::string& str, unsigned long x); // NOLINT -void SingleArgStrAppend(std::string& str, long long x); // NOLINT -void SingleArgStrAppend(std::string& str, unsigned long long x); // NOLINT - -template <typename T, - typename = std::enable_if_t<std::is_arithmetic<T>::value && - !std::is_same<T, char>::value && - !std::is_same<T, bool>::value>> +inline std::string SingleArgStrCat(int x) { return IntegerToString(x); } +inline std::string SingleArgStrCat(unsigned int x) { + return IntegerToString(x); +} +// NOLINTNEXTLINE +inline std::string SingleArgStrCat(long x) { return IntegerToString(x); } +// NOLINTNEXTLINE +inline std::string SingleArgStrCat(unsigned long x) { + return IntegerToString(x); +} +// NOLINTNEXTLINE +inline std::string SingleArgStrCat(long long x) { return IntegerToString(x); } +// NOLINTNEXTLINE +inline std::string SingleArgStrCat(unsigned long long x) { + return IntegerToString(x); +} +inline std::string SingleArgStrCat(float x) { return FloatToString(x); } +inline std::string SingleArgStrCat(double x) { return FloatToString(x); } + +// As of September 2023, the SingleArgStrCat() optimization is only enabled for +// libc++. The reasons for this are: +// 1) The SSO size for libc++ is 23, while libstdc++ and MSSTL have an SSO size +// of 15. Since IntegerToString unconditionally resizes the string to 22 bytes, +// this causes both libstdc++ and MSSTL to allocate. +// 2) strings_internal::STLStringResizeUninitialized() only has an +// implementation that avoids initialization when using libc++. This isn't as +// relevant as (1), and the cost should be benchmarked if (1) ever changes on +// libstc++ or MSSTL. +#ifdef _LIBCPP_VERSION +#define ABSL_INTERNAL_STRCAT_ENABLE_FAST_CASE true +#else +#define ABSL_INTERNAL_STRCAT_ENABLE_FAST_CASE false +#endif + +template <typename T, typename = std::enable_if_t< + ABSL_INTERNAL_STRCAT_ENABLE_FAST_CASE && + std::is_arithmetic<T>{} && !std::is_same<T, char>{}>> using EnableIfFastCase = T; +#undef ABSL_INTERNAL_STRCAT_ENABLE_FAST_CASE + } // namespace strings_internal ABSL_MUST_USE_RESULT inline std::string StrCat() { return std::string(); } @@ -553,67 +600,6 @@ inline void StrAppend(absl::Nonnull<std::string*> dest, const AlphaNum& a, static_cast<const AlphaNum&>(args).Piece()...}); } -template <class String, class T> -std::enable_if_t< - std::is_integral<absl::strings_internal::EnableIfFastCase<T>>::value, void> -StrAppend(absl::Nonnull<String*> result, T i) { - return absl::strings_internal::SingleArgStrAppend(*result, i); -} - -// This overload is only selected if all the parameters are numbers that can be -// handled quickly. -// Later we can look into how we can extend this to more general argument -// mixtures without bloating codegen too much, or copying unnecessarily. -template <typename String, typename... T> -std::enable_if_t< - (sizeof...(T) > 1), - std::common_type_t<std::conditional_t< - true, void, absl::strings_internal::EnableIfFastCase<T>>...>> -StrAppend(absl::Nonnull<String*> str, T... args) { - // Do not add unnecessary variables, logic, or even "free" lambdas here. - // They can add overhead for the compiler and/or at run time. - // Furthermore, assume this function will be inlined. - // This function is carefully tailored to be able to be largely optimized away - // so that it becomes near-equivalent to the caller handling each argument - // individually while minimizing register pressure, so that the compiler - // can inline it with minimal overhead. - - // First, calculate the total length, so we can perform just a single resize. - // Save all the lengths for later. - size_t total_length = 0; - const ptrdiff_t lengths[] = { - absl::numbers_internal::GetNumDigitsOrNegativeIfNegative(args)...}; - for (const ptrdiff_t possibly_negative_length : lengths) { - // Lengths are negative for negative numbers. Keep them for later use, but - // take their absolute values for calculating total lengths; - total_length += possibly_negative_length < 0 - ? static_cast<size_t>(-possibly_negative_length) - : static_cast<size_t>(possibly_negative_length); - } - - // Now reserve space for all the arguments. - const size_t old_size = str->size(); - absl::strings_internal::STLStringAppendUninitializedAmortized(str, - total_length); - - // Finally, output each argument one-by-one, from left to right. - size_t i = 0; // The current argument we're processing - ptrdiff_t n; // The length of the current argument - typename String::pointer pos = &(*str)[old_size]; - using SomeTrivialEmptyType = std::false_type; - const SomeTrivialEmptyType dummy; - // Ugly code due to the lack of C++17 fold expressions - const SomeTrivialEmptyType dummies[] = { - (/* Comma expressions are poor man's C++17 fold expression for C++14 */ - (void)(n = lengths[i]), - (void)(n < 0 ? (void)(*pos++ = '-'), (n = ~n) : 0), - (void)absl::numbers_internal::FastIntToBufferBackward( - absl::numbers_internal::UnsignedAbsoluteValue(std::move(args)), - pos += n, static_cast<uint32_t>(n)), - (void)++i, dummy)...}; - (void)dummies; // Remove & migrate to fold expressions in C++17 -} - // Helper function for the future StrCat default floating-point format, %.6g // This is fast. inline strings_internal::AlphaNumBuffer< diff --git a/absl/strings/str_cat_test.cc b/absl/strings/str_cat_test.cc index b30a86fe..66eddf0d 100644 --- a/absl/strings/str_cat_test.cc +++ b/absl/strings/str_cat_test.cc @@ -39,24 +39,6 @@ namespace { -template <typename Integer> -void VerifyInteger(Integer value) { - const std::string expected = std::to_string(value); - - EXPECT_EQ(absl::StrCat(value), expected); - - const char* short_prefix = "x"; - const char* long_prefix = "2;k.msabxiuow2[09i;o3k21-93-9=29]"; - - std::string short_str = short_prefix; - absl::StrAppend(&short_str, value); - EXPECT_EQ(short_str, short_prefix + expected); - - std::string long_str = long_prefix; - absl::StrAppend(&long_str, value); - EXPECT_EQ(long_str, long_prefix + expected); -} - // Test absl::StrCat of ints and longs of various sizes and signdedness. TEST(StrCat, Ints) { const short s = -1; // NOLINT(runtime/int) @@ -86,34 +68,6 @@ TEST(StrCat, Ints) { EXPECT_EQ(answer, "-9-12"); answer = absl::StrCat(uintptr, 0); EXPECT_EQ(answer, "130"); - - for (const uint32_t base : {2u, 10u}) { - for (const int extra_shift : {0, 12}) { - for (uint64_t i = 0; i < (1 << 8); ++i) { - uint64_t j = i; - while (true) { - uint64_t v = j ^ (extra_shift != 0 ? (j << extra_shift) * base : 0); - VerifyInteger(static_cast<bool>(v)); - VerifyInteger(static_cast<wchar_t>(v)); - VerifyInteger(static_cast<signed char>(v)); - VerifyInteger(static_cast<unsigned char>(v)); - VerifyInteger(static_cast<short>(v)); // NOLINT - VerifyInteger(static_cast<unsigned short>(v)); // NOLINT - VerifyInteger(static_cast<int>(v)); // NOLINT - VerifyInteger(static_cast<unsigned int>(v)); // NOLINT - VerifyInteger(static_cast<long>(v)); // NOLINT - VerifyInteger(static_cast<unsigned long>(v)); // NOLINT - VerifyInteger(static_cast<long long>(v)); // NOLINT - VerifyInteger(static_cast<unsigned long long>(v)); // NOLINT - const uint64_t next = j == 0 ? 1 : j * base; - if (next <= j) { - break; - } - j = next; - } - } - } - } } TEST(StrCat, Enums) { diff --git a/absl/strings/str_format.h b/absl/strings/str_format.h index 66b6af58..76904d32 100644 --- a/absl/strings/str_format.h +++ b/absl/strings/str_format.h @@ -181,7 +181,7 @@ class FormatCountCapture { // For a `FormatSpec` to be valid at compile-time, it must be provided as // either: // -// * A `constexpr` literal or `absl::string_view`, which is how it most often +// * A `constexpr` literal or `absl::string_view`, which is how it is most often // used. // * A `ParsedFormat` instantiation, which ensures the format string is // valid before use. (See below.) diff --git a/absl/strings/str_join.h b/absl/strings/str_join.h index 6a92c0f5..8d7bc6ba 100644 --- a/absl/strings/str_join.h +++ b/absl/strings/str_join.h @@ -247,12 +247,20 @@ std::string StrJoin(const Range& range, absl::string_view separator, return strings_internal::JoinRange(range, separator, fmt); } -template <typename T, typename Formatter> +template <typename T, typename Formatter, + typename = typename std::enable_if< + !std::is_convertible<T, absl::string_view>::value>::type> std::string StrJoin(std::initializer_list<T> il, absl::string_view separator, Formatter&& fmt) { return strings_internal::JoinRange(il, separator, fmt); } +template <typename Formatter> +inline std::string StrJoin(std::initializer_list<absl::string_view> il, + absl::string_view separator, Formatter&& fmt) { + return strings_internal::JoinRange(il, separator, fmt); +} + template <typename... T, typename Formatter> std::string StrJoin(const std::tuple<T...>& value, absl::string_view separator, Formatter&& fmt) { @@ -269,16 +277,22 @@ std::string StrJoin(const Range& range, absl::string_view separator) { return strings_internal::JoinRange(range, separator); } -template <typename T> -std::string StrJoin(std::initializer_list<T> il, - absl::string_view separator) { +template <typename T, typename = typename std::enable_if<!std::is_convertible< + T, absl::string_view>::value>::type> +std::string StrJoin(std::initializer_list<T> il, absl::string_view separator) { + return strings_internal::JoinRange(il, separator); +} + +inline std::string StrJoin(std::initializer_list<absl::string_view> il, + absl::string_view separator) { return strings_internal::JoinRange(il, separator); } template <typename... T> std::string StrJoin(const std::tuple<T...>& value, absl::string_view separator) { - return strings_internal::JoinAlgorithm(value, separator, AlphaNumFormatter()); + return strings_internal::JoinTuple(value, separator, + std::index_sequence_for<T...>{}); } ABSL_NAMESPACE_END diff --git a/absl/strings/str_join_benchmark.cc b/absl/strings/str_join_benchmark.cc index d6f689ff..be7a7257 100644 --- a/absl/strings/str_join_benchmark.cc +++ b/absl/strings/str_join_benchmark.cc @@ -16,6 +16,7 @@ #include "absl/strings/str_join.h" #include <string> +#include <tuple> #include <vector> #include <utility> @@ -94,4 +95,13 @@ BENCHMARK(BM_JoinStreamable) ->ArgPair(16, 256) ->ArgPair(256, 256); +void BM_JoinTuple(benchmark::State& state) { + for (auto _ : state) { + std::string s = + absl::StrJoin(std::make_tuple(123456789, 987654321, 24680, 13579), "/"); + benchmark::DoNotOptimize(s); + } +} +BENCHMARK(BM_JoinTuple); + } // namespace diff --git a/absl/strings/str_join_test.cc b/absl/strings/str_join_test.cc index 449f95be..cd52e11d 100644 --- a/absl/strings/str_join_test.cc +++ b/absl/strings/str_join_test.cc @@ -428,6 +428,42 @@ TEST(StrJoin, InitializerList) { } } +TEST(StrJoin, StringViewInitializerList) { + { + // Tests initializer_list of string_views + std::string b = "b"; + EXPECT_EQ("a-b-c", absl::StrJoin({"a", b, "c"}, "-")); + } + { + // Tests initializer_list of string_views with a non-default formatter + TestingParenFormatter f; + std::string b = "b"; + EXPECT_EQ("(a)-(b)-(c)", absl::StrJoin({"a", b, "c"}, "-", f)); + } + + class NoCopy { + public: + explicit NoCopy(absl::string_view view) : view_(view) {} + NoCopy(const NoCopy&) = delete; + operator absl::string_view() { return view_; } // NOLINT + private: + absl::string_view view_; + }; + { + // Tests initializer_list of string_views preferred over initializer_list<T> + // for T that is implicitly convertible to string_view + EXPECT_EQ("a-b-c", + absl::StrJoin({NoCopy("a"), NoCopy("b"), NoCopy("c")}, "-")); + } + { + // Tests initializer_list of string_views preferred over initializer_list<T> + // for T that is implicitly convertible to string_view + TestingParenFormatter f; + EXPECT_EQ("(a)-(b)-(c)", + absl::StrJoin({NoCopy("a"), NoCopy("b"), NoCopy("c")}, "-", f)); + } +} + TEST(StrJoin, Tuple) { EXPECT_EQ("", absl::StrJoin(std::make_tuple(), "-")); EXPECT_EQ("hello", absl::StrJoin(std::make_tuple("hello"), "-")); diff --git a/absl/strings/str_split.h b/absl/strings/str_split.h index 87540278..ba176fca 100644 --- a/absl/strings/str_split.h +++ b/absl/strings/str_split.h @@ -456,7 +456,7 @@ using EnableSplitIfString = // // Stores results in a std::set<std::string>, which also performs // // de-duplication and orders the elements in ascending order. // std::set<std::string> a = absl::StrSplit("b,a,c,a,b", ','); -// // v[0] == "a", v[1] == "b", v[2] = "c" +// // a[0] == "a", a[1] == "b", a[2] == "c" // // // `StrSplit()` can be used within a range-based for loop, in which case // // each element will be of type `absl::string_view`. @@ -544,7 +544,7 @@ StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d, typename strings_internal::SelectDelimiter<Delimiter>::type; return strings_internal::Splitter<DelimiterType, Predicate, absl::string_view>( - text.value(), DelimiterType(d), std::move(p)); + text.value(), DelimiterType(std::move(d)), std::move(p)); } template <typename Delimiter, typename Predicate, typename StringType, diff --git a/absl/strings/string_view.h b/absl/strings/string_view.h index 04ca0a38..ff760014 100644 --- a/absl/strings/string_view.h +++ b/absl/strings/string_view.h @@ -159,7 +159,7 @@ ABSL_NAMESPACE_BEGIN // // absl::string_view() == absl::string_view("", 0) // absl::string_view(nullptr, 0) == absl::string_view("abcdef"+6, 0) -class string_view { +class ABSL_INTERNAL_ATTRIBUTE_VIEW string_view { public: using traits_type = std::char_traits<char>; using value_type = char; @@ -173,6 +173,7 @@ class string_view { using reverse_iterator = const_reverse_iterator; using size_type = size_t; using difference_type = std::ptrdiff_t; + using absl_internal_is_view = std::true_type; static constexpr size_type npos = static_cast<size_type>(-1); @@ -670,7 +671,7 @@ class string_view { } static constexpr size_type StrlenInternal(absl::Nonnull<const char*> str) { -#if defined(_MSC_VER) && _MSC_VER >= 1910 && !defined(__clang__) +#if defined(_MSC_VER) && !defined(__clang__) // MSVC 2017+ can evaluate this at compile-time. const char* begin = str; while (*str != '\0') ++str; diff --git a/absl/strings/string_view_test.cc b/absl/strings/string_view_test.cc index 5b1eb01a..e978fc3f 100644 --- a/absl/strings/string_view_test.cc +++ b/absl/strings/string_view_test.cc @@ -32,6 +32,7 @@ #include "gtest/gtest.h" #include "absl/base/config.h" +#include "absl/meta/type_traits.h" #if defined(ABSL_HAVE_STD_STRING_VIEW) || defined(__ANDROID__) // We don't control the death messaging when using std::string_view. @@ -46,6 +47,14 @@ namespace { +static_assert(!absl::type_traits_internal::IsOwner<absl::string_view>::value && + absl::type_traits_internal::IsView<absl::string_view>::value, + "string_view is a view, not an owner"); + +static_assert(absl::type_traits_internal::IsLifetimeBoundAssignment< + absl::string_view, std::string>::value, + "lifetimebound assignment not detected"); + // A minimal allocator that uses malloc(). template <typename T> struct Mallocator { @@ -1051,9 +1060,6 @@ TEST(StringViewTest, ConstexprNullSafeStringView) { EXPECT_EQ(0u, s.size()); EXPECT_EQ(absl::string_view(), s); } -#if !defined(_MSC_VER) || _MSC_VER >= 1910 - // MSVC 2017+ is required for good constexpr string_view support. - // See the implementation of `absl::string_view::StrlenInternal()`. { static constexpr char kHi[] = "hi"; absl::string_view s = absl::NullSafeStringView(kHi); @@ -1066,7 +1072,6 @@ TEST(StringViewTest, ConstexprNullSafeStringView) { EXPECT_EQ(s.size(), 5u); EXPECT_EQ("hello", s); } -#endif } TEST(StringViewTest, ConstexprCompiles) { diff --git a/absl/strings/substitute.cc b/absl/strings/substitute.cc index dd32c75f..a71f565a 100644 --- a/absl/strings/substitute.cc +++ b/absl/strings/substitute.cc @@ -18,6 +18,7 @@ #include <cassert> #include <cstddef> #include <cstdint> +#include <limits> #include <string> #include "absl/base/config.h" @@ -84,6 +85,9 @@ void SubstituteAndAppendArray( // Build the string. size_t original_size = output->size(); + ABSL_INTERNAL_CHECK( + size <= std::numeric_limits<size_t>::max() - original_size, + "size_t overflow"); strings_internal::STLStringResizeUninitializedAmortized(output, original_size + size); char* target = &(*output)[original_size]; |