diff options
Diffstat (limited to 'absl/strings')
68 files changed, 2445 insertions, 1288 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index 5b12c010..e48a9a0a 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -37,11 +37,14 @@ cc_library( "internal/charconv_bigint.h", "internal/charconv_parse.cc", "internal/charconv_parse.h", + "internal/damerau_levenshtein_distance.cc", "internal/memutil.cc", "internal/memutil.h", "internal/stl_type_traits.h", "internal/str_join_internal.h", "internal/str_split_internal.h", + "internal/stringify_sink.cc", + "internal/stringify_sink.h", "match.cc", "numbers.cc", "str_cat.cc", @@ -54,6 +57,8 @@ cc_library( "ascii.h", "charconv.h", "escaping.h", + "internal/damerau_levenshtein_distance.h", + "internal/has_absl_stringify.h", "internal/string_constant.h", "match.h", "numbers.h", @@ -179,6 +184,19 @@ cc_test( ) cc_test( + name = "damerau_levenshtein_distance_test", + size = "small", + srcs = [ + "internal/damerau_levenshtein_distance_test.cc", + ], + copts = ABSL_TEST_COPTS, + deps = [ + "//absl/strings", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( name = "memutil_benchmark", srcs = [ "internal/memutil.h", @@ -304,8 +322,10 @@ cc_library( "//absl/base:raw_logging_internal", "//absl/base:throw_delegate", "//absl/container:compressed_tuple", + "//absl/container:container_memory", "//absl/container:inlined_vector", "//absl/container:layout", + "//absl/crc:crc_cord_state", "//absl/functional:function_ref", "//absl/meta:type_traits", "//absl/types:span", @@ -330,6 +350,7 @@ cc_test( cc_test( name = "cord_rep_btree_test", size = "medium", + timeout = "long", srcs = ["internal/cord_rep_btree_test.cc"], copts = ABSL_TEST_COPTS, visibility = ["//visibility:private"], @@ -387,6 +408,7 @@ cc_test( ":cord_internal", ":cord_rep_test_util", "//absl/base:config", + "//absl/crc:crc_cord_state", "@com_google_googletest//:gtest_main", ], ) @@ -436,7 +458,6 @@ cc_library( ":cordz_update_scope", ":cordz_update_tracker", ":internal", - ":str_format", ":strings", "//absl/base", "//absl/base:config", @@ -445,6 +466,7 @@ cc_library( "//absl/base:raw_logging_internal", "//absl/container:fixed_array", "//absl/container:inlined_vector", + "//absl/crc:crc_cord_state", "//absl/functional:function_ref", "//absl/meta:type_traits", "//absl/numeric:bits", @@ -492,6 +514,7 @@ cc_library( "//absl/container:inlined_vector", "//absl/debugging:stacktrace", "//absl/synchronization", + "//absl/time", "//absl/types:span", ], ) @@ -641,6 +664,7 @@ cc_test( ":cordz_update_scope", ":cordz_update_tracker", "//absl/base:config", + "//absl/crc:crc_cord_state", "//absl/synchronization", "//absl/synchronization:thread_pool", "@com_google_googletest//:gtest_main", @@ -770,8 +794,8 @@ cc_test( "no_test_android_arm64", "no_test_android_x86", "no_test_ios_x86_64", + "no_test_lexan", "no_test_loonix", - "no_test_msvc_x64", ], visibility = ["//visibility:private"], deps = [ @@ -1132,6 +1156,7 @@ cc_library( "internal/str_format/arg.h", "internal/str_format/bind.h", "internal/str_format/checker.h", + "internal/str_format/constexpr_parser.h", "internal/str_format/extension.h", "internal/str_format/float_conversion.h", "internal/str_format/output.h", diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index 01f86184..d2928bd7 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -21,7 +21,9 @@ absl_cc_library( "ascii.h" "charconv.h" "escaping.h" + "internal/damerau_levenshtein_distance.h" "internal/string_constant.h" + "internal/has_absl_stringify.h" "match.h" "numbers.h" "str_cat.h" @@ -39,8 +41,11 @@ absl_cc_library( "internal/charconv_bigint.h" "internal/charconv_parse.cc" "internal/charconv_parse.h" + "internal/damerau_levenshtein_distance.cc" "internal/memutil.cc" "internal/memutil.h" + "internal/stringify_sink.h" + "internal/stringify_sink.cc" "internal/stl_type_traits.h" "internal/str_join_internal.h" "internal/str_split_internal.h" @@ -134,6 +139,19 @@ absl_cc_test( absl_cc_test( NAME + damerau_levenshtein_distance_test + SRCS + "internal/damerau_levenshtein_distance_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::strings + absl::base + GTest::gmock_main +) + +absl_cc_test( + NAME memutil_test SRCS "internal/memutil.h" @@ -395,6 +413,7 @@ absl_cc_library( "internal/str_format/arg.h" "internal/str_format/bind.h" "internal/str_format/checker.h" + "internal/str_format/constexpr_parser.h" "internal/str_format/extension.h" "internal/str_format/float_conversion.h" "internal/str_format/output.h" @@ -585,7 +604,9 @@ absl_cc_library( absl::base_internal absl::compressed_tuple absl::config + absl::container_memory absl::core_headers + absl::crc_cord_state absl::endian absl::inlined_vector absl::layout @@ -724,6 +745,7 @@ absl_cc_library( absl::raw_logging_internal absl::stacktrace absl::synchronization + absl::time ) absl_cc_test( @@ -764,6 +786,7 @@ absl_cc_test( absl::cordz_statistics absl::cordz_update_scope absl::cordz_update_tracker + absl::crc_cord_state absl::thread_pool GTest::gmock_main ) @@ -863,6 +886,7 @@ absl_cc_library( absl::cordz_update_scope absl::cordz_update_tracker absl::core_headers + absl::crc_cord_state absl::endian absl::fixed_array absl::function_ref @@ -1035,6 +1059,7 @@ absl_cc_test( absl::config absl::cord_internal absl::cord_rep_test_util + absl::crc_cord_state GTest::gmock_main ) diff --git a/absl/strings/ascii.cc b/absl/strings/ascii.cc index 868df2d1..16c96899 100644 --- a/absl/strings/ascii.cc +++ b/absl/strings/ascii.cc @@ -14,6 +14,10 @@ #include "absl/strings/ascii.h" +#include <climits> +#include <cstring> +#include <string> + namespace absl { ABSL_NAMESPACE_BEGIN namespace ascii_internal { @@ -153,18 +157,62 @@ ABSL_DLL const char kToUpper[256] = { }; // clang-format on +template <bool ToUpper> +constexpr void AsciiStrCaseFold(char* p, char* end) { + // The upper- and lowercase versions of ASCII characters differ by only 1 bit. + // When we need to flip the case, we can xor with this bit to achieve the + // desired result. Note that the choice of 'a' and 'A' here is arbitrary. We + // could have chosen 'z' and 'Z', or any other pair of characters as they all + // have the same single bit difference. + constexpr unsigned char kAsciiCaseBitFlip = 'a' ^ 'A'; + + constexpr char ch_a = ToUpper ? 'a' : 'A'; + constexpr char ch_z = ToUpper ? 'z' : 'Z'; + for (; p < end; ++p) { + unsigned char v = static_cast<unsigned char>(*p); + // We use & instead of && to ensure this always stays branchless + // We use static_cast<int> to suppress -Wbitwise-instead-of-logical + bool is_in_range = static_cast<bool>(static_cast<int>(ch_a <= v) & + static_cast<int>(v <= ch_z)); + v ^= is_in_range ? kAsciiCaseBitFlip : 0; + *p = static_cast<char>(v); + } +} + +static constexpr size_t ValidateAsciiCasefold() { + constexpr size_t num_chars = 1 + CHAR_MAX - CHAR_MIN; + size_t incorrect_index = 0; + char lowered[num_chars] = {}; + char uppered[num_chars] = {}; + for (unsigned int i = 0; i < num_chars; ++i) { + uppered[i] = lowered[i] = static_cast<char>(i); + } + AsciiStrCaseFold<false>(&lowered[0], &lowered[num_chars]); + AsciiStrCaseFold<true>(&uppered[0], &uppered[num_chars]); + for (size_t i = 0; i < num_chars; ++i) { + const char ch = static_cast<char>(i), + ch_upper = ('a' <= ch && ch <= 'z' ? 'A' + (ch - 'a') : ch), + ch_lower = ('A' <= ch && ch <= 'Z' ? 'a' + (ch - 'A') : ch); + if (uppered[i] != ch_upper || lowered[i] != ch_lower) { + incorrect_index = i > 0 ? i : num_chars; + break; + } + } + return incorrect_index; +} + +static_assert(ValidateAsciiCasefold() == 0, "error in case conversion"); + } // namespace ascii_internal void AsciiStrToLower(std::string* s) { - for (auto& ch : *s) { - ch = absl::ascii_tolower(static_cast<unsigned char>(ch)); - } + char* p = &(*s)[0]; // Guaranteed to be valid for empty strings + return ascii_internal::AsciiStrCaseFold<false>(p, p + s->size()); } void AsciiStrToUpper(std::string* s) { - for (auto& ch : *s) { - ch = absl::ascii_toupper(static_cast<unsigned char>(ch)); - } + char* p = &(*s)[0]; // Guaranteed to be valid for empty strings + return ascii_internal::AsciiStrCaseFold<true>(p, p + s->size()); } void RemoveExtraAsciiWhitespace(std::string* str) { diff --git a/absl/strings/ascii_test.cc b/absl/strings/ascii_test.cc index dfed114c..4ea262f1 100644 --- a/absl/strings/ascii_test.cc +++ b/absl/strings/ascii_test.cc @@ -14,6 +14,7 @@ #include "absl/strings/ascii.h" +#include <algorithm> #include <cctype> #include <clocale> #include <cstring> @@ -189,14 +190,14 @@ TEST(AsciiStrTo, Lower) { const std::string str("GHIJKL"); const std::string str2("MNOPQR"); const absl::string_view sp(str2); - std::string mutable_str("STUVWX"); + std::string mutable_str("_`?@[{AMNOPQRSTUVWXYZ"); EXPECT_EQ("abcdef", absl::AsciiStrToLower(buf)); EXPECT_EQ("ghijkl", absl::AsciiStrToLower(str)); EXPECT_EQ("mnopqr", absl::AsciiStrToLower(sp)); absl::AsciiStrToLower(&mutable_str); - EXPECT_EQ("stuvwx", mutable_str); + EXPECT_EQ("_`?@[{amnopqrstuvwxyz", mutable_str); char mutable_buf[] = "Mutable"; std::transform(mutable_buf, mutable_buf + strlen(mutable_buf), @@ -207,12 +208,12 @@ TEST(AsciiStrTo, Lower) { TEST(AsciiStrTo, Upper) { const char buf[] = "abcdef"; const std::string str("ghijkl"); - const std::string str2("mnopqr"); + const std::string str2("_`?@[{amnopqrstuvwxyz"); const absl::string_view sp(str2); EXPECT_EQ("ABCDEF", absl::AsciiStrToUpper(buf)); EXPECT_EQ("GHIJKL", absl::AsciiStrToUpper(str)); - EXPECT_EQ("MNOPQR", absl::AsciiStrToUpper(sp)); + EXPECT_EQ("_`?@[{AMNOPQRSTUVWXYZ", absl::AsciiStrToUpper(sp)); char mutable_buf[] = "Mutable"; std::transform(mutable_buf, mutable_buf + strlen(mutable_buf), diff --git a/absl/strings/charconv.cc b/absl/strings/charconv.cc index 25ac4499..778a1c75 100644 --- a/absl/strings/charconv.cc +++ b/absl/strings/charconv.cc @@ -21,6 +21,7 @@ #include <limits> #include "absl/base/casts.h" +#include "absl/base/config.h" #include "absl/numeric/bits.h" #include "absl/numeric/int128.h" #include "absl/strings/internal/charconv_bigint.h" @@ -118,10 +119,17 @@ struct FloatTraits<double> { static constexpr int kEiselLemireMaxExclusiveExp10 = 309; static double MakeNan(const char* tagp) { +#if ABSL_HAVE_BUILTIN(__builtin_nan) + // Use __builtin_nan() if available since it has a fix for + // https://bugs.llvm.org/show_bug.cgi?id=37778 + // std::nan may use the glibc implementation. + return __builtin_nan(tagp); +#else // Support nan no matter which namespace it's in. Some platforms // incorrectly don't put it in namespace std. using namespace std; // NOLINT return nan(tagp); +#endif } // Builds a nonzero floating point number out of the provided parts. @@ -184,10 +192,17 @@ struct FloatTraits<float> { static constexpr int kEiselLemireMaxExclusiveExp10 = 39; static float MakeNan(const char* tagp) { +#if ABSL_HAVE_BUILTIN(__builtin_nanf) + // Use __builtin_nanf() if available since it has a fix for + // https://bugs.llvm.org/show_bug.cgi?id=37778 + // std::nanf may use the glibc implementation. + return __builtin_nanf(tagp); +#else // Support nanf no matter which namespace it's in. Some platforms // incorrectly don't put it in namespace std. using namespace std; // NOLINT - return nanf(tagp); + return std::nanf(tagp); +#endif } static float Make(mantissa_t mantissa, int exponent, bool sign) { @@ -203,7 +218,8 @@ struct FloatTraits<float> { if (mantissa > kMantissaMask) { // Normal value. // Adjust by 127 for the exponent representation bias, and an additional - // 23 due to the implied decimal point in the IEEE mantissa represenation. + // 23 due to the implied decimal point in the IEEE mantissa + // representation. flt += static_cast<uint32_t>(exponent + 127 + kTargetMantissaBits - 1) << 23; mantissa &= kMantissaMask; @@ -298,7 +314,9 @@ struct CalculatedFloat { // minus the number of leading zero bits.) int BitWidth(uint128 value) { if (Uint128High64(value) == 0) { - return bit_width(Uint128Low64(value)); + // This static_cast is only needed when using a std::bit_width() + // implementation that does not have the fix for LWG 3656 applied. + return static_cast<int>(bit_width(Uint128Low64(value))); } return 128 - countl_zero(Uint128High64(value)); } @@ -461,7 +479,7 @@ uint64_t ShiftRightAndRound(uint128 value, int shift, bool input_exact, // the low bit of `value` is set. // // In inexact mode, the nonzero error means the actual value is greater - // than the halfway point and we must alway round up. + // than the halfway point and we must always round up. if ((value & 1) == 1 || !input_exact) { ++value; } @@ -581,7 +599,9 @@ CalculatedFloat CalculateFromParsedHexadecimal( const strings_internal::ParsedFloat& parsed_hex) { uint64_t mantissa = parsed_hex.mantissa; int exponent = parsed_hex.exponent; - int mantissa_width = bit_width(mantissa); + // This static_cast is only needed when using a std::bit_width() + // implementation that does not have the fix for LWG 3656 applied. + int mantissa_width = static_cast<int>(bit_width(mantissa)); const int shift = NormalizedShiftSize<FloatType>(mantissa_width, exponent); bool result_exact; exponent += shift; diff --git a/absl/strings/charconv.h b/absl/strings/charconv.h index 7c509812..111c7120 100644 --- a/absl/strings/charconv.h +++ b/absl/strings/charconv.h @@ -22,7 +22,7 @@ namespace absl { ABSL_NAMESPACE_BEGIN -// Workalike compatibilty version of std::chars_format from C++17. +// Workalike compatibility version of std::chars_format from C++17. // // This is an bitfield enumerator which can be passed to absl::from_chars to // configure the string-to-float conversion. @@ -48,7 +48,7 @@ struct from_chars_result { std::errc ec; }; -// Workalike compatibilty version of std::from_chars from C++17. Currently +// Workalike compatibility version of std::from_chars from C++17. Currently // this only supports the `double` and `float` types. // // This interface incorporates the proposed resolutions for library issues diff --git a/absl/strings/cord.cc b/absl/strings/cord.cc index 66f45fef..14976aef 100644 --- a/absl/strings/cord.cc +++ b/absl/strings/cord.cc @@ -35,6 +35,7 @@ #include "absl/base/port.h" #include "absl/container/fixed_array.h" #include "absl/container/inlined_vector.h" +#include "absl/crc/internal/crc_cord_state.h" #include "absl/strings/cord_buffer.h" #include "absl/strings/escaping.h" #include "absl/strings/internal/cord_data_edge.h" @@ -47,7 +48,6 @@ #include "absl/strings/internal/cordz_update_tracker.h" #include "absl/strings/internal/resize_uninitialized.h" #include "absl/strings/str_cat.h" -#include "absl/strings/str_format.h" #include "absl/strings/str_join.h" #include "absl/strings/string_view.h" @@ -167,9 +167,7 @@ constexpr unsigned char Cord::InlineRep::kMaxInline; inline void Cord::InlineRep::set_data(const char* data, size_t n) { static_assert(kMaxInline == 15, "set_data is hard-coded for a length of 15"); - - cord_internal::SmallMemmove<true>(data_.as_chars(), data, n); - set_inline_size(n); + data_.set_inline_data(data, n); } inline char* Cord::InlineRep::set_data(size_t n) { @@ -420,6 +418,7 @@ Cord& Cord::operator=(absl::string_view src) { // we keep it here to make diffs easier. void Cord::InlineRep::AppendArray(absl::string_view src, MethodIdentifier method) { + MaybeRemoveEmptyCrcNode(); if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined. size_t appended = 0; @@ -437,8 +436,8 @@ void Cord::InlineRep::AppendArray(absl::string_view src, size_t inline_length = inline_size(); if (src.size() <= kMaxInline - inline_length) { // Append new data to embedded array - memcpy(data_.as_chars() + inline_length, src.data(), src.size()); set_inline_size(inline_length + src.size()); + memcpy(data_.as_chars() + inline_length, src.data(), src.size()); return; } @@ -479,6 +478,10 @@ inline CordRep* Cord::TakeRep() && { template <typename C> inline void Cord::AppendImpl(C&& src) { auto constexpr method = CordzUpdateTracker::kAppendCord; + + contents_.MaybeRemoveEmptyCrcNode(); + if (src.empty()) return; + if (empty()) { // Since destination is empty, we can avoid allocating a node, if (src.contents_.is_tree()) { @@ -591,6 +594,9 @@ void Cord::Append(T&& src) { template void Cord::Append(std::string&& src); void Cord::Prepend(const Cord& src) { + contents_.MaybeRemoveEmptyCrcNode(); + if (src.empty()) return; + CordRep* src_tree = src.contents_.tree(); if (src_tree != nullptr) { CordRep::Ref(src_tree); @@ -605,15 +611,17 @@ void Cord::Prepend(const Cord& src) { } void Cord::PrependArray(absl::string_view src, MethodIdentifier method) { + contents_.MaybeRemoveEmptyCrcNode(); if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined. + if (!contents_.is_tree()) { size_t cur_size = contents_.inline_size(); if (cur_size + src.size() <= InlineRep::kMaxInline) { // Use embedded storage. InlineData data; + data.set_inline_size(cur_size + src.size()); memcpy(data.as_chars(), src.data(), src.size()); memcpy(data.as_chars() + src.size(), contents_.data(), cur_size); - data.set_inline_size(cur_size + src.size()); contents_.data_ = data; return; } @@ -627,8 +635,8 @@ void Cord::AppendPrecise(absl::string_view src, MethodIdentifier method) { assert(src.size() <= cord_internal::kMaxFlatLength); if (contents_.remaining_inline_capacity() >= src.size()) { const size_t inline_length = contents_.inline_size(); - memcpy(contents_.data_.as_chars() + inline_length, src.data(), src.size()); contents_.set_inline_size(inline_length + src.size()); + memcpy(contents_.data_.as_chars() + inline_length, src.data(), src.size()); } else { contents_.AppendTree(CordRepFlat::Create(src), method); } @@ -640,9 +648,9 @@ void Cord::PrependPrecise(absl::string_view src, MethodIdentifier method) { if (contents_.remaining_inline_capacity() >= src.size()) { const size_t cur_size = contents_.inline_size(); InlineData data; + data.set_inline_size(cur_size + src.size()); memcpy(data.as_chars(), src.data(), src.size()); memcpy(data.as_chars() + src.size(), contents_.data(), cur_size); - data.set_inline_size(cur_size + src.size()); contents_.data_ = data; } else { contents_.PrependTree(CordRepFlat::Create(src), method); @@ -665,6 +673,7 @@ void Cord::RemovePrefix(size_t n) { ABSL_INTERNAL_CHECK(n <= size(), absl::StrCat("Requested prefix size ", n, " exceeds Cord's size ", size())); + contents_.MaybeRemoveEmptyCrcNode(); CordRep* tree = contents_.tree(); if (tree == nullptr) { contents_.remove_prefix(n); @@ -695,6 +704,7 @@ void Cord::RemoveSuffix(size_t n) { ABSL_INTERNAL_CHECK(n <= size(), absl::StrCat("Requested suffix size ", n, " exceeds Cord's size ", size())); + contents_.MaybeRemoveEmptyCrcNode(); CordRep* tree = contents_.tree(); if (tree == nullptr) { contents_.reduce_size(n); @@ -733,6 +743,7 @@ Cord Cord::Subcord(size_t pos, size_t new_size) const { } if (new_size <= InlineRep::kMaxInline) { + sub_cord.contents_.set_inline_size(new_size); char* dest = sub_cord.contents_.data_.as_chars(); Cord::ChunkIterator it = chunk_begin(); it.AdvanceBytes(pos); @@ -744,7 +755,6 @@ Cord Cord::Subcord(size_t pos, size_t new_size) const { ++it; } cord_internal::SmallMemmove(dest, it->data(), remaining_size); - sub_cord.contents_.set_inline_size(new_size); return sub_cord; } @@ -784,7 +794,7 @@ int CompareChunks(absl::string_view* lhs, absl::string_view* rhs, } // This overload set computes comparison results from memcmp result. This -// interface is used inside GenericCompare below. Differet implementations +// interface is used inside GenericCompare below. Different implementations // are specialized for int and bool. For int we clamp result to {-1, 0, 1} // set. For bool we just interested in "value == 0". template <typename ResultType> @@ -842,26 +852,44 @@ inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const { return absl::string_view(node->external()->base + offset, length); } -void Cord::SetExpectedChecksum(uint32_t crc) { +void Cord::SetCrcCordState(crc_internal::CrcCordState state) { auto constexpr method = CordzUpdateTracker::kSetExpectedChecksum; - if (empty()) return; - - if (!contents_.is_tree()) { + if (empty()) { + contents_.MaybeRemoveEmptyCrcNode(); + CordRep* rep = CordRepCrc::New(nullptr, std::move(state)); + contents_.EmplaceTree(rep, method); + } else if (!contents_.is_tree()) { CordRep* rep = contents_.MakeFlatWithExtraCapacity(0); - rep = CordRepCrc::New(rep, crc); + rep = CordRepCrc::New(rep, std::move(state)); contents_.EmplaceTree(rep, method); } else { const CordzUpdateScope scope(contents_.data_.cordz_info(), method); - CordRep* rep = CordRepCrc::New(contents_.data_.as_tree(), crc); + CordRep* rep = CordRepCrc::New(contents_.data_.as_tree(), std::move(state)); contents_.SetTree(rep, scope); } } +void Cord::SetExpectedChecksum(uint32_t crc) { + // Construct a CrcCordState with a single chunk. + crc_internal::CrcCordState state; + state.mutable_rep()->prefix_crc.push_back( + crc_internal::CrcCordState::PrefixCrc(size(), absl::crc32c_t{crc})); + SetCrcCordState(std::move(state)); +} + +const crc_internal::CrcCordState* Cord::MaybeGetCrcCordState() const { + if (!contents_.is_tree() || !contents_.tree()->IsCrc()) { + return nullptr; + } + return &contents_.tree()->crc()->crc_cord_state; +} + absl::optional<uint32_t> Cord::ExpectedChecksum() const { if (!contents_.is_tree() || !contents_.tree()->IsCrc()) { return absl::nullopt; } - return contents_.tree()->crc()->crc; + return static_cast<uint32_t>( + contents_.tree()->crc()->crc_cord_state.Checksum()); } inline int Cord::CompareSlowPath(absl::string_view rhs, size_t compared_size, @@ -929,6 +957,7 @@ inline int Cord::CompareSlowPath(const Cord& rhs, size_t compared_size, } inline absl::string_view Cord::GetFirstChunk(const Cord& c) { + if (c.empty()) return {}; return c.contents_.FindFlatStartPiece(); } inline absl::string_view Cord::GetFirstChunk(absl::string_view sv) { @@ -1166,6 +1195,10 @@ absl::string_view Cord::FlattenSlowPath() { /* static */ bool Cord::GetFlatAux(CordRep* rep, absl::string_view* fragment) { assert(rep != nullptr); + if (rep->length == 0) { + *fragment = absl::string_view(); + return true; + } rep = cord_internal::SkipCrcNode(rep); if (rep->IsFlat()) { *fragment = absl::string_view(rep->flat()->Data(), rep->length); @@ -1197,6 +1230,7 @@ absl::string_view Cord::FlattenSlowPath() { absl::cord_internal::CordRep* rep, absl::FunctionRef<void(absl::string_view)> callback) { assert(rep != nullptr); + if (rep->length == 0) return; rep = cord_internal::SkipCrcNode(rep); if (rep->IsBtree()) { @@ -1230,8 +1264,12 @@ static void DumpNode(CordRep* rep, bool include_data, std::ostream* os, if (include_data) *os << static_cast<void*>(rep); *os << "]"; *os << " " << std::setw(indent) << ""; - if (rep->IsCrc()) { - *os << "CRC crc=" << rep->crc()->crc << "\n"; + bool leaf = false; + if (rep == nullptr) { + *os << "NULL\n"; + leaf = true; + } else if (rep->IsCrc()) { + *os << "CRC crc=" << rep->crc()->crc_cord_state.Checksum() << "\n"; indent += kIndentStep; rep = rep->crc()->child; } else if (rep->IsSubstring()) { @@ -1239,6 +1277,7 @@ static void DumpNode(CordRep* rep, bool include_data, std::ostream* os, indent += kIndentStep; rep = rep->substring()->child; } else { // Leaf or ring + leaf = true; if (rep->IsExternal()) { *os << "EXTERNAL ["; if (include_data) @@ -1252,6 +1291,8 @@ static void DumpNode(CordRep* rep, bool include_data, std::ostream* os, } else { CordRepBtree::Dump(rep, /*label=*/ "", include_data, *os); } + } + if (leaf) { if (stack.empty()) break; rep = stack.back(); stack.pop_back(); @@ -1297,11 +1338,14 @@ static bool VerifyNode(CordRep* root, CordRep* start_node, node->substring()->child->length, ReportError(root, node)); } else if (node->IsCrc()) { - ABSL_INTERNAL_CHECK(node->crc()->child != nullptr, - ReportError(root, node)); - ABSL_INTERNAL_CHECK(node->crc()->length == node->crc()->child->length, - ReportError(root, node)); - worklist.push_back(node->crc()->child); + ABSL_INTERNAL_CHECK( + node->crc()->child != nullptr || node->crc()->length == 0, + ReportError(root, node)); + if (node->crc()->child != nullptr) { + ABSL_INTERNAL_CHECK(node->crc()->length == node->crc()->child->length, + ReportError(root, node)); + worklist.push_back(node->crc()->child); + } } } while (!worklist.empty()); return true; diff --git a/absl/strings/cord.h b/absl/strings/cord.h index 88e1c85d..f5a2da97 100644 --- a/absl/strings/cord.h +++ b/absl/strings/cord.h @@ -76,6 +76,7 @@ #include "absl/base/macros.h" #include "absl/base/port.h" #include "absl/container/inlined_vector.h" +#include "absl/crc/internal/crc_cord_state.h" #include "absl/functional/function_ref.h" #include "absl/meta/type_traits.h" #include "absl/strings/cord_analysis.h" @@ -660,7 +661,7 @@ class Cord { class CharRange { public: // Fulfill minimum c++ container requirements [container.requirements] - // Theses (partial) container type definitions allow CharRange to be used + // These (partial) container type definitions allow CharRange to be used // in various utilities expecting a subset of [container.requirements]. // For example, the below enables using `::testing::ElementsAre(...)` using value_type = char; @@ -814,7 +815,7 @@ class Cord { InlineRep& operator=(const InlineRep& src); InlineRep& operator=(InlineRep&& src) noexcept; - explicit constexpr InlineRep(cord_internal::InlineData data); + explicit constexpr InlineRep(absl::string_view sv, CordRep* rep); void Swap(InlineRep* rhs); bool empty() const; @@ -873,15 +874,14 @@ class Cord { void PrependTreeToTree(CordRep* tree, MethodIdentifier method); void PrependTree(CordRep* tree, MethodIdentifier method); - bool IsSame(const InlineRep& other) const { - return memcmp(&data_, &other.data_, sizeof(data_)) == 0; - } + bool IsSame(const InlineRep& other) const { return data_ == other.data_; } + void CopyTo(std::string* dst) const { // memcpy is much faster when operating on a known size. On most supported // platforms, the small string optimization is large enough that resizing // to 15 bytes does not cause a memory allocation. absl::strings_internal::STLStringResizeUninitialized(dst, kMaxInline); - memcpy(&(*dst)[0], data_.as_chars(), kMaxInline); + data_.copy_max_inline_to(&(*dst)[0]); // erase is faster than resize because the logic for memory allocation is // not needed. dst->erase(inline_size()); @@ -926,6 +926,13 @@ class Cord { void set_inline_size(size_t size) { data_.set_inline_size(size); } size_t inline_size() const { return data_.inline_size(); } + // Empty cords that carry a checksum have a CordRepCrc node with a null + // child node. The code can avoid lots of special cases where it would + // otherwise transition from tree to inline storage if we just remove the + // CordRepCrc node before mutations. Must never be called inside a + // CordzUpdateScope since it untracks the cordz info. + void MaybeRemoveEmptyCrcNode(); + cord_internal::InlineData data_; }; InlineRep contents_; @@ -995,6 +1002,10 @@ class Cord { }); return H::combine(combiner.finalize(std::move(hash_state)), size()); } + + friend class CrcCord; + void SetCrcCordState(crc_internal::CrcCordState state); + const crc_internal::CrcCordState* MaybeGetCrcCordState() const; }; ABSL_NAMESPACE_END @@ -1011,46 +1022,6 @@ extern std::ostream& operator<<(std::ostream& out, const Cord& cord); namespace cord_internal { -// Fast implementation of memmove for up to 15 bytes. This implementation is -// safe for overlapping regions. If nullify_tail is true, the destination is -// padded with '\0' up to 15 bytes. -template <bool nullify_tail = false> -inline void SmallMemmove(char* dst, const char* src, size_t n) { - if (n >= 8) { - assert(n <= 15); - uint64_t buf1; - uint64_t buf2; - memcpy(&buf1, src, 8); - memcpy(&buf2, src + n - 8, 8); - if (nullify_tail) { - memset(dst + 7, 0, 8); - } - memcpy(dst, &buf1, 8); - memcpy(dst + n - 8, &buf2, 8); - } else if (n >= 4) { - uint32_t buf1; - uint32_t buf2; - memcpy(&buf1, src, 4); - memcpy(&buf2, src + n - 4, 4); - if (nullify_tail) { - memset(dst + 4, 0, 4); - memset(dst + 7, 0, 8); - } - memcpy(dst, &buf1, 4); - memcpy(dst + n - 4, &buf2, 4); - } else { - if (n != 0) { - dst[0] = src[0]; - dst[n / 2] = src[n / 2]; - dst[n - 1] = src[n - 1]; - } - if (nullify_tail) { - memset(dst + 7, 0, 8); - memset(dst + n, 0, 8); - } - } -} - // Does non-template-specific `CordRepExternal` initialization. // Requires `data` to be non-empty. void InitializeCordRepExternal(absl::string_view data, CordRepExternal* rep); @@ -1094,8 +1065,8 @@ Cord MakeCordFromExternal(absl::string_view data, Releaser&& releaser) { return cord; } -constexpr Cord::InlineRep::InlineRep(cord_internal::InlineData data) - : data_(data) {} +constexpr Cord::InlineRep::InlineRep(absl::string_view sv, CordRep* rep) + : data_(sv, rep) {} inline Cord::InlineRep::InlineRep(const Cord::InlineRep& src) : data_(InlineData::kDefaultInit) { @@ -1174,7 +1145,7 @@ inline cord_internal::CordRepFlat* Cord::InlineRep::MakeFlatWithExtraCapacity( size_t len = data_.inline_size(); auto* result = CordRepFlat::New(len + extra); result->length = len; - memcpy(result->Data(), data_.as_chars(), InlineRep::kMaxInline); + data_.copy_max_inline_to(result->Data()); return result; } @@ -1236,6 +1207,18 @@ inline void Cord::InlineRep::CopyToArray(char* dst) const { cord_internal::SmallMemmove(dst, data_.as_chars(), n); } +inline void Cord::InlineRep::MaybeRemoveEmptyCrcNode() { + CordRep* rep = tree(); + if (rep == nullptr || ABSL_PREDICT_TRUE(rep->length > 0)) { + return; + } + assert(rep->IsCrc()); + assert(rep->crc()->child == nullptr); + CordzInfo::MaybeUntrackCord(cordz_info()); + CordRep::Unref(rep); + ResetToEmpty(); +} + constexpr inline Cord::Cord() noexcept {} inline Cord::Cord(absl::string_view src) @@ -1243,13 +1226,12 @@ inline Cord::Cord(absl::string_view src) template <typename T> constexpr Cord::Cord(strings_internal::StringConstant<T>) - : contents_(strings_internal::StringConstant<T>::value.size() <= + : contents_(strings_internal::StringConstant<T>::value, + strings_internal::StringConstant<T>::value.size() <= cord_internal::kMaxInline - ? cord_internal::InlineData( - strings_internal::StringConstant<T>::value) - : cord_internal::InlineData( - &cord_internal::ConstInitExternalStorage< - strings_internal::StringConstant<T>>::value)) {} + ? nullptr + : &cord_internal::ConstInitExternalStorage< + strings_internal::StringConstant<T>>::value) {} inline Cord& Cord::operator=(const Cord& x) { contents_ = x.contents_; @@ -1285,7 +1267,7 @@ inline size_t Cord::size() const { return contents_.size(); } -inline bool Cord::empty() const { return contents_.empty(); } +inline bool Cord::empty() const { return size() == 0; } inline size_t Cord::EstimatedMemoryUsage( CordMemoryAccounting accounting_method) const { @@ -1411,7 +1393,11 @@ inline Cord::ChunkIterator::ChunkIterator(cord_internal::CordRep* tree) { inline Cord::ChunkIterator::ChunkIterator(const Cord* cord) { if (CordRep* tree = cord->contents_.tree()) { bytes_remaining_ = tree->length; - InitTree(tree); + if (ABSL_PREDICT_TRUE(bytes_remaining_ != 0)) { + InitTree(tree); + } else { + current_chunk_ = {}; + } } else { bytes_remaining_ = cord->contents_.inline_size(); current_chunk_ = {cord->contents_.data(), bytes_remaining_}; @@ -1580,7 +1566,7 @@ inline void Cord::ForEachChunk( if (rep == nullptr) { callback(absl::string_view(contents_.data(), contents_.size())); } else { - return ForEachChunkAux(rep, callback); + ForEachChunkAux(rep, callback); } } diff --git a/absl/strings/cord_buffer.h b/absl/strings/cord_buffer.h index 15494b31..bc0e4e45 100644 --- a/absl/strings/cord_buffer.h +++ b/absl/strings/cord_buffer.h @@ -160,7 +160,6 @@ class CordBuffer { // for more information on buffer capacities and intended usage. static CordBuffer CreateWithDefaultLimit(size_t capacity); - // CordBuffer::CreateWithCustomLimit() // // Creates a CordBuffer instance of the desired `capacity` rounded to an @@ -336,7 +335,7 @@ class CordBuffer { } // Returns the available area of the internal SSO data - absl::Span<char> long_available() { + absl::Span<char> long_available() const { assert(!is_short()); const size_t length = long_rep.rep->length; return absl::Span<char>(long_rep.rep->Data() + length, @@ -460,9 +459,7 @@ inline constexpr size_t CordBuffer::MaximumPayload() { } inline constexpr size_t CordBuffer::MaximumPayload(size_t block_size) { - // TODO(absl-team): Use std::min when C++11 support is dropped. - return (kCustomLimit < block_size ? kCustomLimit : block_size) - - cord_internal::kFlatOverhead; + return (std::min)(kCustomLimit, block_size) - cord_internal::kFlatOverhead; } inline CordBuffer CordBuffer::CreateWithDefaultLimit(size_t capacity) { diff --git a/absl/strings/cord_test.cc b/absl/strings/cord_test.cc index 1fc4be6e..3fe3967f 100644 --- a/absl/strings/cord_test.cc +++ b/absl/strings/cord_test.cc @@ -58,6 +58,8 @@ using absl::cord_internal::CordRepSubstring; using absl::cord_internal::CordzUpdateTracker; using absl::cord_internal::kFlatOverhead; using absl::cord_internal::kMaxFlatLength; +using ::testing::ElementsAre; +using ::testing::Le; static std::string RandomLowercaseString(RandomEngine* rng); static std::string RandomLowercaseString(RandomEngine* rng, size_t length); @@ -618,7 +620,7 @@ TEST_P(CordTest, AppendEmptyBufferToTree) { TEST_P(CordTest, AppendSmallBuffer) { absl::Cord cord; absl::CordBuffer buffer = absl::CordBuffer::CreateWithDefaultLimit(3); - ASSERT_THAT(buffer.capacity(), ::testing::Le(15)); + ASSERT_THAT(buffer.capacity(), Le(15)); memcpy(buffer.data(), "Abc", 3); buffer.SetLength(3); cord.Append(std::move(buffer)); @@ -632,7 +634,7 @@ TEST_P(CordTest, AppendSmallBuffer) { EXPECT_EQ(buffer.length(), 0); // NOLINT EXPECT_GT(buffer.capacity(), 0); // NOLINT - EXPECT_THAT(cord.Chunks(), ::testing::ElementsAre("Abcdefgh")); + EXPECT_THAT(cord.Chunks(), ElementsAre("Abcdefgh")); } TEST_P(CordTest, AppendAndPrependBufferArePrecise) { @@ -671,7 +673,7 @@ TEST_P(CordTest, AppendAndPrependBufferArePrecise) { TEST_P(CordTest, PrependSmallBuffer) { absl::Cord cord; absl::CordBuffer buffer = absl::CordBuffer::CreateWithDefaultLimit(3); - ASSERT_THAT(buffer.capacity(), ::testing::Le(15)); + ASSERT_THAT(buffer.capacity(), Le(15)); memcpy(buffer.data(), "Abc", 3); buffer.SetLength(3); cord.Prepend(std::move(buffer)); @@ -685,7 +687,7 @@ TEST_P(CordTest, PrependSmallBuffer) { EXPECT_EQ(buffer.length(), 0); // NOLINT EXPECT_GT(buffer.capacity(), 0); // NOLINT - EXPECT_THAT(cord.Chunks(), ::testing::ElementsAre("defghAbc")); + EXPECT_THAT(cord.Chunks(), ElementsAre("defghAbc")); } TEST_P(CordTest, AppendLargeBuffer) { @@ -707,7 +709,7 @@ TEST_P(CordTest, AppendLargeBuffer) { EXPECT_EQ(buffer.length(), 0); // NOLINT EXPECT_GT(buffer.capacity(), 0); // NOLINT - EXPECT_THAT(cord.Chunks(), ::testing::ElementsAre(s1, s2)); + EXPECT_THAT(cord.Chunks(), ElementsAre(s1, s2)); } TEST_P(CordTest, PrependLargeBuffer) { @@ -729,7 +731,7 @@ TEST_P(CordTest, PrependLargeBuffer) { EXPECT_EQ(buffer.length(), 0); // NOLINT EXPECT_GT(buffer.capacity(), 0); // NOLINT - EXPECT_THAT(cord.Chunks(), ::testing::ElementsAre(s2, s1)); + EXPECT_THAT(cord.Chunks(), ElementsAre(s2, s1)); } class CordAppendBufferTest : public testing::TestWithParam<bool> { @@ -1988,6 +1990,12 @@ TEST_P(CordTest, HugeCord) { // Tests that Append() works ok when handed a self reference TEST_P(CordTest, AppendSelf) { + // Test the empty case. + absl::Cord empty; + MaybeHarden(empty); + empty.Append(empty); + ASSERT_EQ(empty, ""); + // We run the test until data is ~16K // This guarantees it covers small, medium and large data. std::string control_data = "Abc"; @@ -2712,7 +2720,7 @@ class CordMutator { // clang-format off // This array is constant-initialized in conformant compilers. -CordMutator cord_mutators[] ={ +CordMutator cord_mutators[] = { {"clear", [](absl::Cord& c) { c.Clear(); }}, {"overwrite", [](absl::Cord& c) { c = "overwritten"; }}, { @@ -2742,6 +2750,25 @@ CordMutator cord_mutators[] ={ [](absl::Cord& c) { c.RemoveSuffix(c.size() / 2); } }, { + "append empty string", + [](absl::Cord& c) { c.Append(""); }, + [](absl::Cord& c) { } + }, + { + "append empty cord", + [](absl::Cord& c) { c.Append(absl::Cord()); }, + [](absl::Cord& c) { } + }, + { + "append empty checksummed cord", + [](absl::Cord& c) { + absl::Cord to_append; + to_append.SetExpectedChecksum(999); + c.Append(to_append); + }, + [](absl::Cord& c) { } + }, + { "prepend string", [](absl::Cord& c) { c.Prepend("9876543210"); }, [](absl::Cord& c) { c.RemovePrefix(10); } @@ -2763,12 +2790,33 @@ CordMutator cord_mutators[] ={ [](absl::Cord& c) { c.RemovePrefix(10); } }, { + "prepend empty string", + [](absl::Cord& c) { c.Prepend(""); }, + [](absl::Cord& c) { } + }, + { + "prepend empty cord", + [](absl::Cord& c) { c.Prepend(absl::Cord()); }, + [](absl::Cord& c) { } + }, + { + "prepend empty checksummed cord", + [](absl::Cord& c) { + absl::Cord to_prepend; + to_prepend.SetExpectedChecksum(999); + c.Prepend(to_prepend); + }, + [](absl::Cord& c) { } + }, + { "prepend self", [](absl::Cord& c) { c.Prepend(c); }, [](absl::Cord& c) { c.RemovePrefix(c.size() / 2); } }, - {"remove prefix", [](absl::Cord& c) { c.RemovePrefix(2); }}, - {"remove suffix", [](absl::Cord& c) { c.RemoveSuffix(2); }}, + {"remove prefix", [](absl::Cord& c) { c.RemovePrefix(c.size() / 2); }}, + {"remove suffix", [](absl::Cord& c) { c.RemoveSuffix(c.size() / 2); }}, + {"remove 0-prefix", [](absl::Cord& c) { c.RemovePrefix(0); }}, + {"remove 0-suffix", [](absl::Cord& c) { c.RemoveSuffix(0); }}, {"subcord", [](absl::Cord& c) { c = c.Subcord(1, c.size() - 2); }}, { "swap inline", @@ -2810,6 +2858,12 @@ TEST_P(CordTest, ExpectedChecksum) { EXPECT_EQ(c1.ExpectedChecksum().value_or(0), 12345); EXPECT_EQ(c1, base_value); + // Test that setting an expected checksum again doesn't crash or leak + // memory. + c1.SetExpectedChecksum(12345); + EXPECT_EQ(c1.ExpectedChecksum().value_or(0), 12345); + EXPECT_EQ(c1, base_value); + // CRC persists through copies, assignments, and moves: absl::Cord c1_copy_construct = c1; EXPECT_EQ(c1_copy_construct.ExpectedChecksum().value_or(0), 12345); @@ -2834,6 +2888,13 @@ TEST_P(CordTest, ExpectedChecksum) { c2.SetExpectedChecksum(24680); mutator.Mutate(c2); + + if (c1 == c2) { + // Not a mutation (for example, appending the empty string). + // Whether the checksum is removed is not defined. + continue; + } + EXPECT_EQ(c2.ExpectedChecksum(), absl::nullopt); if (mutator.CanUndo()) { @@ -2903,3 +2964,164 @@ TEST_P(CordTest, ExpectedChecksum) { } } } + +// Test the special cases encountered with an empty checksummed cord. +TEST_P(CordTest, ChecksummedEmptyCord) { + absl::Cord c1; + EXPECT_FALSE(c1.ExpectedChecksum().has_value()); + + // Setting an expected checksum works. + c1.SetExpectedChecksum(12345); + EXPECT_EQ(c1.ExpectedChecksum().value_or(0), 12345); + EXPECT_EQ(c1, ""); + EXPECT_TRUE(c1.empty()); + + // Test that setting an expected checksum again doesn't crash or leak memory. + c1.SetExpectedChecksum(12345); + EXPECT_EQ(c1.ExpectedChecksum().value_or(0), 12345); + EXPECT_EQ(c1, ""); + EXPECT_TRUE(c1.empty()); + + // CRC persists through copies, assignments, and moves: + absl::Cord c1_copy_construct = c1; + EXPECT_EQ(c1_copy_construct.ExpectedChecksum().value_or(0), 12345); + + absl::Cord c1_copy_assign; + c1_copy_assign = c1; + EXPECT_EQ(c1_copy_assign.ExpectedChecksum().value_or(0), 12345); + + absl::Cord c1_move(std::move(c1_copy_assign)); + EXPECT_EQ(c1_move.ExpectedChecksum().value_or(0), 12345); + + EXPECT_EQ(c1.ExpectedChecksum().value_or(0), 12345); + + // A CRC Cord compares equal to its non-CRC value. + EXPECT_EQ(c1, absl::Cord()); + + for (const CordMutator& mutator : cord_mutators) { + SCOPED_TRACE(mutator.Name()); + + // Exercise mutating an empty checksummed cord to catch crashes and exercise + // memory sanitizers. + absl::Cord c2; + c2.SetExpectedChecksum(24680); + mutator.Mutate(c2); + + if (c2.empty()) { + // Not a mutation + continue; + } + EXPECT_EQ(c2.ExpectedChecksum(), absl::nullopt); + + if (mutator.CanUndo()) { + mutator.Undo(c2); + } + } + + absl::Cord c3; + c3.SetExpectedChecksum(999); + const absl::Cord& cc3 = c3; + + // Test that all cord reading operations function in the face of an + // expected checksum. + EXPECT_TRUE(cc3.StartsWith("")); + EXPECT_TRUE(cc3.EndsWith("")); + EXPECT_TRUE(cc3.empty()); + EXPECT_EQ(cc3, ""); + EXPECT_EQ(cc3, absl::Cord()); + EXPECT_EQ(cc3.size(), 0); + EXPECT_EQ(cc3.Compare(absl::Cord()), 0); + EXPECT_EQ(cc3.Compare(c1), 0); + EXPECT_EQ(cc3.Compare(cc3), 0); + EXPECT_EQ(cc3.Compare(""), 0); + EXPECT_EQ(cc3.Compare("wxyz"), -1); + EXPECT_EQ(cc3.Compare(absl::Cord("wxyz")), -1); + EXPECT_EQ(absl::Cord("wxyz").Compare(cc3), 1); + EXPECT_EQ(std::string(cc3), ""); + + std::string dest; + absl::CopyCordToString(cc3, &dest); + EXPECT_EQ(dest, ""); + + for (absl::string_view chunk : cc3.Chunks()) { // NOLINT(unreachable loop) + static_cast<void>(chunk); + GTEST_FAIL() << "no chunks expected"; + } + EXPECT_TRUE(cc3.chunk_begin() == cc3.chunk_end()); + + for (char ch : cc3.Chars()) { // NOLINT(unreachable loop) + static_cast<void>(ch); + GTEST_FAIL() << "no chars expected"; + } + EXPECT_TRUE(cc3.char_begin() == cc3.char_end()); + + EXPECT_EQ(cc3.TryFlat(), ""); + EXPECT_EQ(absl::HashOf(c3), absl::HashOf(absl::Cord())); + EXPECT_EQ(absl::HashOf(c3), absl::HashOf(absl::string_view())); +} + +#if defined(GTEST_HAS_DEATH_TEST) && defined(ABSL_INTERNAL_CORD_HAVE_SANITIZER) + +// Returns an expected poison / uninitialized death message expression. +const char* MASanDeathExpr() { + return "(use-after-poison|use-of-uninitialized-value)"; +} + +TEST(CordSanitizerTest, SanitizesEmptyCord) { + absl::Cord cord; + const char* data = cord.Flatten().data(); + EXPECT_DEATH(EXPECT_EQ(data[0], 0), MASanDeathExpr()); +} + +TEST(CordSanitizerTest, SanitizesSmallCord) { + absl::Cord cord("Hello"); + const char* data = cord.Flatten().data(); + EXPECT_DEATH(EXPECT_EQ(data[5], 0), MASanDeathExpr()); +} + +TEST(CordSanitizerTest, SanitizesCordOnSetSSOValue) { + absl::Cord cord("String that is too big to be an SSO value"); + cord = "Hello"; + const char* data = cord.Flatten().data(); + EXPECT_DEATH(EXPECT_EQ(data[5], 0), MASanDeathExpr()); +} + +TEST(CordSanitizerTest, SanitizesCordOnCopyCtor) { + absl::Cord src("hello"); + absl::Cord dst(src); + const char* data = dst.Flatten().data(); + EXPECT_DEATH(EXPECT_EQ(data[5], 0), MASanDeathExpr()); +} + +TEST(CordSanitizerTest, SanitizesCordOnMoveCtor) { + absl::Cord src("hello"); + absl::Cord dst(std::move(src)); + const char* data = dst.Flatten().data(); + EXPECT_DEATH(EXPECT_EQ(data[5], 0), MASanDeathExpr()); +} + +TEST(CordSanitizerTest, SanitizesCordOnAssign) { + absl::Cord src("hello"); + absl::Cord dst; + dst = src; + const char* data = dst.Flatten().data(); + EXPECT_DEATH(EXPECT_EQ(data[5], 0), MASanDeathExpr()); +} + +TEST(CordSanitizerTest, SanitizesCordOnMoveAssign) { + absl::Cord src("hello"); + absl::Cord dst; + dst = std::move(src); + const char* data = dst.Flatten().data(); + EXPECT_DEATH(EXPECT_EQ(data[5], 0), MASanDeathExpr()); +} + +TEST(CordSanitizerTest, SanitizesCordOnSsoAssign) { + absl::Cord src("hello"); + absl::Cord dst("String that is too big to be an SSO value"); + dst = src; + const char* data = dst.Flatten().data(); + EXPECT_DEATH(EXPECT_EQ(data[5], 0), MASanDeathExpr()); +} + +#endif // GTEST_HAS_DEATH_TEST && ABSL_INTERNAL_CORD_HAVE_SANITIZER diff --git a/absl/strings/cord_test_helpers.h b/absl/strings/cord_test_helpers.h index 31a1dc89..ca52240a 100644 --- a/absl/strings/cord_test_helpers.h +++ b/absl/strings/cord_test_helpers.h @@ -51,7 +51,7 @@ enum class TestCordSize { // existing inputs rather than copying contents of the input. kMedium = cord_internal::kMaxFlatLength / 2 + 1, - // A string value large enough to cause it to be stored in mutliple flats. + // A string value large enough to cause it to be stored in multiple flats. kLarge = cord_internal::kMaxFlatLength * 4 }; diff --git a/absl/strings/escaping.cc b/absl/strings/escaping.cc index 7d97944e..2827fbaa 100644 --- a/absl/strings/escaping.cc +++ b/absl/strings/escaping.cc @@ -443,6 +443,8 @@ void CEscapeAndAppendInternal(absl::string_view src, std::string* dest) { } } +// Reverses the mapping in Base64EscapeInternal; see that method's +// documentation for details of the mapping. bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, size_t szdest, const signed char* unbase64, size_t* len) { @@ -676,7 +678,10 @@ bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, return ok; } -// The arrays below were generated by the following code +// The arrays below map base64-escaped characters back to their original values. +// For the inverse case, see k(WebSafe)Base64Chars in the internal +// escaping.cc. +// These arrays were generated by the following inversion code: // #include <sys/time.h> // #include <stdlib.h> // #include <string.h> @@ -703,8 +708,8 @@ bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, // } // } // -// where the value of "Base64[]" was replaced by one of the base-64 conversion -// tables from the functions below. +// where the value of "Base64[]" was replaced by one of k(WebSafe)Base64Chars +// in the internal escaping.cc. /* clang-format off */ constexpr signed char kUnBase64[] = { -1, -1, -1, -1, -1, -1, -1, -1, @@ -777,16 +782,11 @@ constexpr signed char kUnWebSafeBase64[] = { }; /* clang-format on */ -constexpr char kWebSafeBase64Chars[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; - template <typename String> bool Base64UnescapeInternal(const char* src, size_t slen, String* dest, const signed char* unbase64) { // Determine the size of the output string. Base64 encodes every 3 bytes into - // 4 characters. any leftover chars are added directly for good measure. - // This is documented in the base64 RFC: - // https://datatracker.ietf.org/doc/html/rfc3548 + // 4 characters. Any leftover chars are added directly for good measure. const size_t dest_len = 3 * (slen / 4) + (slen % 4); strings_internal::STLStringResizeUninitialized(dest, dest_len); @@ -882,30 +882,6 @@ std::string Utf8SafeCHexEscape(absl::string_view src) { return CEscapeInternal(src, true, true); } -// ---------------------------------------------------------------------- -// Base64Unescape() - base64 decoder -// Base64Escape() - base64 encoder -// WebSafeBase64Unescape() - Google's variation of base64 decoder -// WebSafeBase64Escape() - Google's variation of base64 encoder -// -// Check out -// https://datatracker.ietf.org/doc/html/rfc2045 for formal description, but -// what we care about is that... -// Take the encoded stuff in groups of 4 characters and turn each -// character into a code 0 to 63 thus: -// A-Z map to 0 to 25 -// a-z map to 26 to 51 -// 0-9 map to 52 to 61 -// +(- for WebSafe) maps to 62 -// /(_ for WebSafe) maps to 63 -// There will be four numbers, all less than 64 which can be represented -// by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively). -// Arrange the 6 digit binary numbers into three bytes as such: -// aaaaaabb bbbbcccc ccdddddd -// Equals signs (one or two) are used at the end of the encoded block to -// indicate that the text was not an integer multiple of three bytes long. -// ---------------------------------------------------------------------- - bool Base64Unescape(absl::string_view src, std::string* dest) { return Base64UnescapeInternal(src.data(), src.size(), dest, kUnBase64); } @@ -923,7 +899,7 @@ void Base64Escape(absl::string_view src, std::string* dest) { void WebSafeBase64Escape(absl::string_view src, std::string* dest) { strings_internal::Base64EscapeInternal( reinterpret_cast<const unsigned char*>(src.data()), src.size(), dest, - false, kWebSafeBase64Chars); + false, strings_internal::kWebSafeBase64Chars); } std::string Base64Escape(absl::string_view src) { @@ -938,7 +914,7 @@ std::string WebSafeBase64Escape(absl::string_view src) { std::string dest; strings_internal::Base64EscapeInternal( reinterpret_cast<const unsigned char*>(src.data()), src.size(), &dest, - false, kWebSafeBase64Chars); + false, strings_internal::kWebSafeBase64Chars); return dest; } diff --git a/absl/strings/escaping.h b/absl/strings/escaping.h index f5ca26c5..bf2a5898 100644 --- a/absl/strings/escaping.h +++ b/absl/strings/escaping.h @@ -117,35 +117,40 @@ std::string Utf8SafeCEscape(absl::string_view src); // conversion. std::string Utf8SafeCHexEscape(absl::string_view src); -// Base64Unescape() -// -// Converts a `src` string encoded in Base64 to its binary equivalent, writing -// it to a `dest` buffer, returning `true` on success. If `src` contains invalid -// characters, `dest` is cleared and returns `false`. -bool Base64Unescape(absl::string_view src, std::string* dest); - -// WebSafeBase64Unescape() -// -// Converts a `src` string encoded in Base64 to its binary equivalent, writing -// it to a `dest` buffer, but using '-' instead of '+', and '_' instead of '/'. -// If `src` contains invalid characters, `dest` is cleared and returns `false`. -bool WebSafeBase64Unescape(absl::string_view src, std::string* dest); - // Base64Escape() // -// Encodes a `src` string into a base64-encoded string, with padding characters. -// This function conforms with RFC 4648 section 4 (base64). +// Encodes a `src` string into a base64-encoded 'dest' string with padding +// characters. This function conforms with RFC 4648 section 4 (base64) and RFC +// 2045. void Base64Escape(absl::string_view src, std::string* dest); std::string Base64Escape(absl::string_view src); // WebSafeBase64Escape() // -// Encodes a `src` string into a base64-like string, using '-' instead of '+' -// and '_' instead of '/', and without padding. This function conforms with RFC -// 4648 section 5 (base64url). +// Encodes a `src` string into a base64 string, like Base64Escape() does, but +// outputs '-' instead of '+' and '_' instead of '/', and does not pad 'dest'. +// This function conforms with RFC 4648 section 5 (base64url). void WebSafeBase64Escape(absl::string_view src, std::string* dest); std::string WebSafeBase64Escape(absl::string_view src); +// Base64Unescape() +// +// Converts a `src` string encoded in Base64 (RFC 4648 section 4) to its binary +// equivalent, writing it to a `dest` buffer, returning `true` on success. If +// `src` contains invalid characters, `dest` is cleared and returns `false`. +// If padding is included (note that `Base64Escape()` does produce it), it must +// be correct. In the padding, '=' and '.' are treated identically. +bool Base64Unescape(absl::string_view src, std::string* dest); + +// WebSafeBase64Unescape() +// +// Converts a `src` string encoded in "web safe" Base64 (RFC 4648 section 5) to +// its binary equivalent, writing it to a `dest` buffer. If `src` contains +// invalid characters, `dest` is cleared and returns `false`. If padding is +// included (note that `WebSafeBase64Escape()` does not produce it), it must be +// correct. In the padding, '=' and '.' are treated identically. +bool WebSafeBase64Unescape(absl::string_view src, std::string* dest); + // HexStringToBytes() // // Converts an ASCII hex string into bytes, returning binary data of length diff --git a/absl/strings/escaping_test.cc b/absl/strings/escaping_test.cc index 45671a0e..9f62c1ee 100644 --- a/absl/strings/escaping_test.cc +++ b/absl/strings/escaping_test.cc @@ -562,6 +562,7 @@ template <typename StringType> void TestEscapeAndUnescape() { // Check the short strings; this tests the math (and boundaries) for (const auto& tc : base64_tests) { + // Test plain base64. StringType encoded("this junk should be ignored"); absl::Base64Escape(tc.plaintext, &encoded); EXPECT_EQ(encoded, tc.cyphertext); @@ -571,22 +572,26 @@ void TestEscapeAndUnescape() { EXPECT_TRUE(absl::Base64Unescape(encoded, &decoded)); EXPECT_EQ(decoded, tc.plaintext); - StringType websafe(tc.cyphertext); - for (int c = 0; c < websafe.size(); ++c) { - if ('+' == websafe[c]) websafe[c] = '-'; - if ('/' == websafe[c]) websafe[c] = '_'; + StringType websafe_with_padding(tc.cyphertext); + for (unsigned int c = 0; c < websafe_with_padding.size(); ++c) { + if ('+' == websafe_with_padding[c]) websafe_with_padding[c] = '-'; + if ('/' == websafe_with_padding[c]) websafe_with_padding[c] = '_'; + // Intentionally keeping padding aka '='. + } + + // Test plain websafe (aka without padding). + StringType websafe(websafe_with_padding); + for (unsigned int c = 0; c < websafe.size(); ++c) { if ('=' == websafe[c]) { websafe.resize(c); break; } } - encoded = "this junk should be ignored"; absl::WebSafeBase64Escape(tc.plaintext, &encoded); EXPECT_EQ(encoded, websafe); EXPECT_EQ(absl::WebSafeBase64Escape(tc.plaintext), websafe); - // Let's try the string version of the decoder decoded = "this junk should be ignored"; EXPECT_TRUE(absl::WebSafeBase64Unescape(websafe, &decoded)); EXPECT_EQ(decoded, tc.plaintext); @@ -617,6 +622,48 @@ TEST(Base64, EscapeAndUnescape) { TestEscapeAndUnescape<std::string>(); } +TEST(Base64, Padding) { + // Padding is optional. + // '.' is an acceptable padding character, just like '='. + std::initializer_list<absl::string_view> good_padding = { + "YQ", + "YQ==", + "YQ=.", + "YQ.=", + "YQ..", + }; + for (absl::string_view b64 : good_padding) { + std::string decoded; + EXPECT_TRUE(absl::Base64Unescape(b64, &decoded)); + EXPECT_EQ(decoded, "a"); + std::string websafe_decoded; + EXPECT_TRUE(absl::WebSafeBase64Unescape(b64, &websafe_decoded)); + EXPECT_EQ(websafe_decoded, "a"); + } + std::initializer_list<absl::string_view> bad_padding = { + "YQ=", + "YQ.", + "YQ===", + "YQ==.", + "YQ=.=", + "YQ=..", + "YQ.==", + "YQ.=.", + "YQ..=", + "YQ...", + "YQ====", + "YQ....", + "YQ=====", + "YQ.....", + }; + for (absl::string_view b64 : bad_padding) { + std::string decoded; + EXPECT_FALSE(absl::Base64Unescape(b64, &decoded)); + std::string websafe_decoded; + EXPECT_FALSE(absl::WebSafeBase64Unescape(b64, &websafe_decoded)); + } +} + TEST(Base64, DISABLED_HugeData) { const size_t kSize = size_t(3) * 1000 * 1000 * 1000; static_assert(kSize % 3 == 0, "kSize must be divisible by 3"); diff --git a/absl/strings/internal/char_map.h b/absl/strings/internal/char_map.h index 5aabc1fc..70a90343 100644 --- a/absl/strings/internal/char_map.h +++ b/absl/strings/internal/char_map.h @@ -73,10 +73,10 @@ class Charmap { } // Containing all the chars in the C-string 's'. - // Note that this is expensively recursive because of the C++11 constexpr - // formulation. Use only in constexpr initializers. static constexpr Charmap FromString(const char* s) { - return *s == 0 ? Charmap() : (Char(*s) | FromString(s + 1)); + Charmap ret; + while (*s) ret = ret | Char(*s++); + return ret; } // Containing all the chars in the closed interval [lo,hi]. diff --git a/absl/strings/internal/charconv_bigint.cc b/absl/strings/internal/charconv_bigint.cc index 282b639e..46b5289a 100644 --- a/absl/strings/internal/charconv_bigint.cc +++ b/absl/strings/internal/charconv_bigint.cc @@ -296,10 +296,8 @@ template <int max_words> std::min(n / kLargePowerOfFiveStep, kLargestPowerOfFiveIndex); if (first_pass) { // just copy, rather than multiplying by 1 - std::copy( - LargePowerOfFiveData(big_power), - LargePowerOfFiveData(big_power) + LargePowerOfFiveSize(big_power), - answer.words_); + std::copy_n(LargePowerOfFiveData(big_power), + LargePowerOfFiveSize(big_power), answer.words_); answer.size_ = LargePowerOfFiveSize(big_power); first_pass = false; } else { diff --git a/absl/strings/internal/charconv_bigint.h b/absl/strings/internal/charconv_bigint.h index 8f702976..5c0c375d 100644 --- a/absl/strings/internal/charconv_bigint.h +++ b/absl/strings/internal/charconv_bigint.h @@ -92,7 +92,7 @@ class BigUnsigned { // numbers with this many decimal digits or fewer are representable by this // type. // - // Analagous to std::numeric_limits<BigUnsigned>::digits10. + // Analogous to std::numeric_limits<BigUnsigned>::digits10. static constexpr int Digits10() { // 9975007/1035508 is very slightly less than log10(2**32). return static_cast<uint64_t>(max_words) * 9975007 / 1035508; @@ -121,7 +121,7 @@ class BigUnsigned { ++size_; } } - std::fill(words_, words_ + word_shift, 0u); + std::fill_n(words_, word_shift, 0u); } } @@ -197,7 +197,7 @@ class BigUnsigned { } void SetToZero() { - std::fill(words_, words_ + size_, 0u); + std::fill_n(words_, size_, 0u); size_ = 0; } diff --git a/absl/strings/internal/cord_internal.cc b/absl/strings/internal/cord_internal.cc index b6b06cfa..b7874385 100644 --- a/absl/strings/internal/cord_internal.cc +++ b/absl/strings/internal/cord_internal.cc @@ -33,7 +33,6 @@ ABSL_CONST_INIT std::atomic<bool> cord_ring_buffer_enabled( kCordEnableRingBufferDefault); ABSL_CONST_INIT std::atomic<bool> shallow_subcords_enabled( kCordShallowSubcordsDefault); -ABSL_CONST_INIT std::atomic<bool> cord_btree_exhaustive_validation(false); void LogFatalNodeType(CordRep* rep) { ABSL_INTERNAL_LOG(FATAL, absl::StrCat("Unexpected node type: ", diff --git a/absl/strings/internal/cord_internal.h b/absl/strings/internal/cord_internal.h index f32fd416..8d9836ba 100644 --- a/absl/strings/internal/cord_internal.h +++ b/absl/strings/internal/cord_internal.h @@ -27,9 +27,20 @@ #include "absl/base/internal/invoke.h" #include "absl/base/optimization.h" #include "absl/container/internal/compressed_tuple.h" +#include "absl/container/internal/container_memory.h" #include "absl/meta/type_traits.h" #include "absl/strings/string_view.h" +// We can only add poisoning if we can detect consteval executions. +#if defined(ABSL_HAVE_CONSTANT_EVALUATED) && \ + (defined(ABSL_HAVE_ADDRESS_SANITIZER) || \ + defined(ABSL_HAVE_MEMORY_SANITIZER)) +#define ABSL_INTERNAL_CORD_HAVE_SANITIZER 1 +#endif + +#define ABSL_CORD_INTERNAL_NO_SANITIZE \ + ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY + namespace absl { ABSL_NAMESPACE_BEGIN namespace cord_internal { @@ -58,12 +69,6 @@ enum CordFeatureDefaults { extern std::atomic<bool> cord_ring_buffer_enabled; extern std::atomic<bool> shallow_subcords_enabled; -// `cord_btree_exhaustive_validation` can be set to force exhaustive validation -// in debug assertions, and code that calls `IsValid()` explicitly. By default, -// assertions should be relatively cheap and AssertValid() can easily lead to -// O(n^2) complexity as recursive / full tree validation is O(n). -extern std::atomic<bool> cord_btree_exhaustive_validation; - inline void enable_cord_ring_buffer(bool enable) { cord_ring_buffer_enabled.store(enable, std::memory_order_relaxed); } @@ -91,6 +96,46 @@ enum Constants { // Emits a fatal error "Unexpected node type: xyz" and aborts the program. ABSL_ATTRIBUTE_NORETURN void LogFatalNodeType(CordRep* rep); +// Fast implementation of memmove for up to 15 bytes. This implementation is +// safe for overlapping regions. If nullify_tail is true, the destination is +// padded with '\0' up to 15 bytes. +template <bool nullify_tail = false> +inline void SmallMemmove(char* dst, const char* src, size_t n) { + if (n >= 8) { + assert(n <= 15); + uint64_t buf1; + uint64_t buf2; + memcpy(&buf1, src, 8); + memcpy(&buf2, src + n - 8, 8); + if (nullify_tail) { + memset(dst + 7, 0, 8); + } + memcpy(dst, &buf1, 8); + memcpy(dst + n - 8, &buf2, 8); + } else if (n >= 4) { + uint32_t buf1; + uint32_t buf2; + memcpy(&buf1, src, 4); + memcpy(&buf2, src + n - 4, 4); + if (nullify_tail) { + memset(dst + 4, 0, 4); + memset(dst + 7, 0, 8); + } + memcpy(dst, &buf1, 4); + memcpy(dst + n - 4, &buf2, 4); + } else { + if (n != 0) { + dst[0] = src[0]; + dst[n / 2] = src[n / 2]; + dst[n - 1] = src[n - 1]; + } + if (nullify_tail) { + memset(dst + 7, 0, 8); + memset(dst + n, 0, 8); + } + } +} + // Compact class for tracking the reference count and state flags for CordRep // instances. Data is stored in an atomic int32_t for compactness and speed. class RefcountAndFlags { @@ -225,7 +270,11 @@ struct CordRep { : length(l), refcount(immortal), tag(EXTERNAL), storage{} {} // The following three fields have to be less than 32 bytes since - // that is the smallest supported flat node size. + // that is the smallest supported flat node size. Some code optimizations rely + // on the specific layout of these fields. Notably: the non-trivial field + // `refcount` being preceded by `length`, and being tailed by POD data + // members only. + // # LINT.IfChange size_t length; RefcountAndFlags refcount; // If tag < FLAT, it represents CordRepKind and indicates the type of node. @@ -241,6 +290,7 @@ struct CordRep { // allocate room for these in the derived class, as not all compilers reuse // padding space from the base class (clang and gcc do, MSVC does not, etc) uint8_t storage[3]; + // # LINT.ThenChange(cord_rep_btree.h:copy_raw) // Returns true if this instance's tag matches the requested type. constexpr bool IsRing() const { return tag == RING; } @@ -423,25 +473,25 @@ constexpr char GetOrNull(absl::string_view data, size_t pos) { return pos < data.size() ? data[pos] : '\0'; } -// We store cordz_info as 64 bit pointer value in big endian format. This -// guarantees that the least significant byte of cordz_info matches the last -// byte of the inline data representation in as_chars_, which holds the inlined +// We store cordz_info as 64 bit pointer value in little endian format. This +// guarantees that the least significant byte of cordz_info matches the first +// byte of the inline data representation in `data`, which holds the inlined // size or the 'is_tree' bit. using cordz_info_t = int64_t; // Assert that the `cordz_info` pointer value perfectly overlaps the last half -// of `as_chars_` and can hold a pointer value. +// of `data` and can hold a pointer value. static_assert(sizeof(cordz_info_t) * 2 == kMaxInline + 1, ""); static_assert(sizeof(cordz_info_t) >= sizeof(intptr_t), ""); -// BigEndianByte() creates a big endian representation of 'value', i.e.: a big -// endian value where the last byte in the host's representation holds 'value`, -// with all other bytes being 0. -static constexpr cordz_info_t BigEndianByte(unsigned char value) { +// LittleEndianByte() creates a little endian representation of 'value', i.e.: +// a little endian value where the first byte in the host's representation +// holds 'value`, with all other bytes being 0. +static constexpr cordz_info_t LittleEndianByte(unsigned char value) { #if defined(ABSL_IS_BIG_ENDIAN) - return value; -#else return static_cast<cordz_info_t>(value) << ((sizeof(cordz_info_t) - 1) * 8); +#else + return value; #endif } @@ -450,38 +500,80 @@ class InlineData { // DefaultInitType forces the use of the default initialization constructor. enum DefaultInitType { kDefaultInit }; - // kNullCordzInfo holds the big endian representation of intptr_t(1) + // kNullCordzInfo holds the little endian representation of intptr_t(1) // This is the 'null' / initial value of 'cordz_info'. The null value // is specifically big endian 1 as with 64-bit pointers, the last // byte of cordz_info overlaps with the last byte holding the tag. - static constexpr cordz_info_t kNullCordzInfo = BigEndianByte(1); - - constexpr InlineData() : as_chars_{0} {} - explicit InlineData(DefaultInitType) {} - explicit constexpr InlineData(CordRep* rep) : as_tree_(rep) {} - explicit constexpr InlineData(absl::string_view chars) - : as_chars_{ - GetOrNull(chars, 0), GetOrNull(chars, 1), - GetOrNull(chars, 2), GetOrNull(chars, 3), - GetOrNull(chars, 4), GetOrNull(chars, 5), - GetOrNull(chars, 6), GetOrNull(chars, 7), - GetOrNull(chars, 8), GetOrNull(chars, 9), - GetOrNull(chars, 10), GetOrNull(chars, 11), - GetOrNull(chars, 12), GetOrNull(chars, 13), - GetOrNull(chars, 14), static_cast<char>((chars.size() << 1))} {} + static constexpr cordz_info_t kNullCordzInfo = LittleEndianByte(1); + + // kTagOffset contains the offset of the control byte / tag. This constant is + // intended mostly for debugging purposes: do not remove this constant as it + // is actively inspected and used by gdb pretty printing code. + static constexpr size_t kTagOffset = 0; + + // Implement `~InlineData()` conditionally: we only need this destructor to + // unpoison poisoned instances under *SAN, and it will only compile correctly + // if the current compiler supports `absl::is_constant_evaluated()`. +#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER + ~InlineData() noexcept { unpoison(); } +#endif + + constexpr InlineData() noexcept { poison_this(); } + + explicit InlineData(DefaultInitType) noexcept : rep_(kDefaultInit) { + poison_this(); + } + + explicit InlineData(CordRep* rep) noexcept : rep_(rep) { + ABSL_ASSERT(rep != nullptr); + } + + // Explicit constexpr constructor to create a constexpr InlineData + // value. Creates an inlined SSO value if `rep` is null, otherwise + // creates a tree instance value. + constexpr InlineData(absl::string_view sv, CordRep* rep) noexcept + : rep_(rep ? Rep(rep) : Rep(sv)) { + poison(); + } + + constexpr InlineData(const InlineData& rhs) noexcept; + InlineData& operator=(const InlineData& rhs) noexcept; + + friend bool operator==(const InlineData& lhs, const InlineData& rhs) { +#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER + const Rep l = lhs.rep_.SanitizerSafeCopy(); + const Rep r = rhs.rep_.SanitizerSafeCopy(); + return memcmp(&l, &r, sizeof(l)) == 0; +#else + return memcmp(&lhs, &rhs, sizeof(lhs)) == 0; +#endif + } + friend bool operator!=(const InlineData& lhs, const InlineData& rhs) { + return !operator==(lhs, rhs); + } + + // Poisons the unused inlined SSO data if the current instance + // is inlined, else un-poisons the entire instance. + constexpr void poison(); + + // Un-poisons this instance. + constexpr void unpoison(); + + // Poisons the current instance. This is used on default initialization. + constexpr void poison_this(); // Returns true if the current instance is empty. // The 'empty value' is an inlined data value of zero length. - bool is_empty() const { return tag() == 0; } + bool is_empty() const { return rep_.tag() == 0; } // Returns true if the current instance holds a tree value. - bool is_tree() const { return (tag() & 1) != 0; } + bool is_tree() const { return (rep_.tag() & 1) != 0; } // Returns true if the current instance holds a cordz_info value. // Requires the current instance to hold a tree value. bool is_profiled() const { assert(is_tree()); - return as_tree_.cordz_info != kNullCordzInfo; + return rep_.cordz_info() != kNullCordzInfo; } // Returns true if either of the provided instances hold a cordz_info value. @@ -490,7 +582,7 @@ class InlineData { static bool is_either_profiled(const InlineData& data1, const InlineData& data2) { assert(data1.is_tree() && data2.is_tree()); - return (data1.as_tree_.cordz_info | data2.as_tree_.cordz_info) != + return (data1.rep_.cordz_info() | data2.rep_.cordz_info()) != kNullCordzInfo; } @@ -499,8 +591,8 @@ class InlineData { // Requires the current instance to hold a tree value. CordzInfo* cordz_info() const { assert(is_tree()); - intptr_t info = static_cast<intptr_t>( - absl::big_endian::ToHost64(static_cast<uint64_t>(as_tree_.cordz_info))); + intptr_t info = static_cast<intptr_t>(absl::little_endian::ToHost64( + static_cast<uint64_t>(rep_.cordz_info()))); assert(info & 1); return reinterpret_cast<CordzInfo*>(info - 1); } @@ -511,21 +603,21 @@ class InlineData { void set_cordz_info(CordzInfo* cordz_info) { assert(is_tree()); uintptr_t info = reinterpret_cast<uintptr_t>(cordz_info) | 1; - as_tree_.cordz_info = - static_cast<cordz_info_t>(absl::big_endian::FromHost64(info)); + rep_.set_cordz_info( + static_cast<cordz_info_t>(absl::little_endian::FromHost64(info))); } // Resets the current cordz_info to null / empty. void clear_cordz_info() { assert(is_tree()); - as_tree_.cordz_info = kNullCordzInfo; + rep_.set_cordz_info(kNullCordzInfo); } // Returns a read only pointer to the character data inside this instance. // Requires the current instance to hold inline data. const char* as_chars() const { assert(!is_tree()); - return as_chars_; + return rep_.as_chars(); } // Returns a mutable pointer to the character data inside this instance. @@ -543,20 +635,33 @@ class InlineData { // // It's an error to read from the returned pointer without a preceding write // if the current instance does not hold inline data, i.e.: is_tree() == true. - char* as_chars() { return as_chars_; } + char* as_chars() { return rep_.as_chars(); } // Returns the tree value of this value. // Requires the current instance to hold a tree value. CordRep* as_tree() const { assert(is_tree()); - return as_tree_.rep; + return rep_.tree(); + } + + void set_inline_data(const char* data, size_t n) { + ABSL_ASSERT(n <= kMaxInline); + unpoison(); + rep_.set_tag(static_cast<int8_t>(n << 1)); + SmallMemmove<true>(rep_.as_chars(), data, n); + poison(); + } + + void copy_max_inline_to(char* dst) const { + assert(!is_tree()); + memcpy(dst, rep_.SanitizerSafeCopy().as_chars(), kMaxInline); } // Initialize this instance to holding the tree value `rep`, // initializing the cordz_info to null, i.e.: 'not profiled'. void make_tree(CordRep* rep) { - as_tree_.rep = rep; - as_tree_.cordz_info = kNullCordzInfo; + unpoison(); + rep_.make_tree(rep); } // Set the tree value of this instance to 'rep`. @@ -564,22 +669,20 @@ class InlineData { // Does not affect the value of cordz_info. void set_tree(CordRep* rep) { assert(is_tree()); - as_tree_.rep = rep; + rep_.set_tree(rep); } // Returns the size of the inlined character data inside this instance. // Requires the current instance to hold inline data. - size_t inline_size() const { - assert(!is_tree()); - return static_cast<size_t>(tag()) >> 1; - } + size_t inline_size() const { return rep_.inline_size(); } // Sets the size of the inlined character data inside this instance. // Requires `size` to be <= kMaxInline. // See the documentation on 'as_chars()' for more information and examples. void set_inline_size(size_t size) { - ABSL_ASSERT(size <= kMaxInline); - tag() = static_cast<char>(size << 1); + unpoison(); + rep_.set_inline_size(size); + poison(); } // Compares 'this' inlined data with rhs. The comparison is a straightforward @@ -589,15 +692,115 @@ class InlineData { // 0 the InlineData instances are equal // 1 'this' InlineData instance larger int Compare(const InlineData& rhs) const { + return Compare(rep_.SanitizerSafeCopy(), rhs.rep_.SanitizerSafeCopy()); + } + + private: + struct Rep { + // See cordz_info_t for forced alignment and size of `cordz_info` details. + struct AsTree { + explicit constexpr AsTree(absl::cord_internal::CordRep* tree) + : rep(tree) {} + cordz_info_t cordz_info = kNullCordzInfo; + absl::cord_internal::CordRep* rep; + }; + + explicit Rep(DefaultInitType) {} + constexpr Rep() : data{0} {} + constexpr Rep(const Rep&) = default; + constexpr Rep& operator=(const Rep&) = default; + + explicit constexpr Rep(CordRep* rep) : as_tree(rep) {} + + explicit constexpr Rep(absl::string_view chars) + : data{static_cast<char>((chars.size() << 1)), + GetOrNull(chars, 0), + GetOrNull(chars, 1), + GetOrNull(chars, 2), + GetOrNull(chars, 3), + GetOrNull(chars, 4), + GetOrNull(chars, 5), + GetOrNull(chars, 6), + GetOrNull(chars, 7), + GetOrNull(chars, 8), + GetOrNull(chars, 9), + GetOrNull(chars, 10), + GetOrNull(chars, 11), + GetOrNull(chars, 12), + GetOrNull(chars, 13), + GetOrNull(chars, 14)} {} + + // Disable sanitizer as we must always be able to read `tag`. + ABSL_CORD_INTERNAL_NO_SANITIZE + int8_t tag() const { return reinterpret_cast<const int8_t*>(this)[0]; } + void set_tag(int8_t rhs) { reinterpret_cast<int8_t*>(this)[0] = rhs; } + + char* as_chars() { return data + 1; } + const char* as_chars() const { return data + 1; } + + bool is_tree() const { return (tag() & 1) != 0; } + + size_t inline_size() const { + ABSL_ASSERT(!is_tree()); + return static_cast<size_t>(tag()) >> 1; + } + + void set_inline_size(size_t size) { + ABSL_ASSERT(size <= kMaxInline); + set_tag(static_cast<int8_t>(size << 1)); + } + + CordRep* tree() const { return as_tree.rep; } + void set_tree(CordRep* rhs) { as_tree.rep = rhs; } + + cordz_info_t cordz_info() const { return as_tree.cordz_info; } + void set_cordz_info(cordz_info_t rhs) { as_tree.cordz_info = rhs; } + + void make_tree(CordRep* tree) { + as_tree.rep = tree; + as_tree.cordz_info = kNullCordzInfo; + } + +#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER + constexpr Rep SanitizerSafeCopy() const { + if (!absl::is_constant_evaluated()) { + Rep res; + if (is_tree()) { + res = *this; + } else { + res.set_tag(tag()); + memcpy(res.as_chars(), as_chars(), inline_size()); + } + return res; + } else { + return *this; + } + } +#else + constexpr const Rep& SanitizerSafeCopy() const { return *this; } +#endif + + // If the data has length <= kMaxInline, we store it in `data`, and + // store the size in the first char of `data` shifted left + 1. + // Else we store it in a tree and store a pointer to that tree in + // `as_tree.rep` with a tagged pointer to make `tag() & 1` non zero. + union { + char data[kMaxInline + 1]; + AsTree as_tree; + }; + }; + + // Private implementation of `Compare()` + static inline int Compare(const Rep& lhs, const Rep& rhs) { uint64_t x, y; - memcpy(&x, as_chars(), sizeof(x)); + memcpy(&x, lhs.as_chars(), sizeof(x)); memcpy(&y, rhs.as_chars(), sizeof(y)); if (x == y) { - memcpy(&x, as_chars() + 7, sizeof(x)); + memcpy(&x, lhs.as_chars() + 7, sizeof(x)); memcpy(&y, rhs.as_chars() + 7, sizeof(y)); if (x == y) { - if (inline_size() == rhs.inline_size()) return 0; - return inline_size() < rhs.inline_size() ? -1 : 1; + if (lhs.inline_size() == rhs.inline_size()) return 0; + return lhs.inline_size() < rhs.inline_size() ? -1 : 1; } } x = absl::big_endian::FromHost64(x); @@ -605,36 +808,63 @@ class InlineData { return x < y ? -1 : 1; } - private: - // See cordz_info_t for forced alignment and size of `cordz_info` details. - struct AsTree { - explicit constexpr AsTree(absl::cord_internal::CordRep* tree) - : rep(tree), cordz_info(kNullCordzInfo) {} - // This union uses up extra space so that whether rep is 32 or 64 bits, - // cordz_info will still start at the eighth byte, and the last - // byte of cordz_info will still be the last byte of InlineData. - union { - absl::cord_internal::CordRep* rep; - cordz_info_t unused_aligner; - }; - cordz_info_t cordz_info; - }; - - char& tag() { return reinterpret_cast<char*>(this)[kMaxInline]; } - char tag() const { return reinterpret_cast<const char*>(this)[kMaxInline]; } - - // If the data has length <= kMaxInline, we store it in `as_chars_`, and - // store the size in the last char of `as_chars_` shifted left + 1. - // Else we store it in a tree and store a pointer to that tree in - // `as_tree_.rep` and store a tag in `tagged_size`. - union { - char as_chars_[kMaxInline + 1]; - AsTree as_tree_; - }; + Rep rep_; }; static_assert(sizeof(InlineData) == kMaxInline + 1, ""); +#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER + +constexpr InlineData::InlineData(const InlineData& rhs) noexcept + : rep_(rhs.rep_.SanitizerSafeCopy()) { + poison(); +} + +inline InlineData& InlineData::operator=(const InlineData& rhs) noexcept { + unpoison(); + rep_ = rhs.rep_.SanitizerSafeCopy(); + poison(); + return *this; +} + +constexpr void InlineData::poison_this() { + if (!absl::is_constant_evaluated()) { + container_internal::SanitizerPoisonObject(this); + } +} + +constexpr void InlineData::unpoison() { + if (!absl::is_constant_evaluated()) { + container_internal::SanitizerUnpoisonObject(this); + } +} + +constexpr void InlineData::poison() { + if (!absl::is_constant_evaluated()) { + if (is_tree()) { + container_internal::SanitizerUnpoisonObject(this); + } else if (const size_t size = inline_size()) { + if (size < kMaxInline) { + const char* end = rep_.as_chars() + size; + container_internal::SanitizerPoisonMemoryRegion(end, kMaxInline - size); + } + } else { + container_internal::SanitizerPoisonObject(this); + } + } +} + +#else // ABSL_INTERNAL_CORD_HAVE_SANITIZER + +constexpr InlineData::InlineData(const InlineData&) noexcept = default; +inline InlineData& InlineData::operator=(const InlineData&) noexcept = default; + +constexpr void InlineData::poison_this() {} +constexpr void InlineData::unpoison() {} +constexpr void InlineData::poison() {} + +#endif // ABSL_INTERNAL_CORD_HAVE_SANITIZER + inline CordRepSubstring* CordRep::substring() { assert(IsSubstring()); return static_cast<CordRepSubstring*>(this); diff --git a/absl/strings/internal/cord_rep_btree.cc b/absl/strings/internal/cord_rep_btree.cc index 7ce36128..05bd0e20 100644 --- a/absl/strings/internal/cord_rep_btree.cc +++ b/absl/strings/internal/cord_rep_btree.cc @@ -14,6 +14,7 @@ #include "absl/strings/internal/cord_rep_btree.h" +#include <atomic> #include <cassert> #include <cstdint> #include <iostream> @@ -23,6 +24,7 @@ #include "absl/base/attributes.h" #include "absl/base/config.h" #include "absl/base/internal/raw_logging.h" +#include "absl/base/optimization.h" #include "absl/strings/internal/cord_data_edge.h" #include "absl/strings/internal/cord_internal.h" #include "absl/strings/internal/cord_rep_consume.h" @@ -48,9 +50,7 @@ using CopyResult = CordRepBtree::CopyResult; constexpr auto kFront = CordRepBtree::kFront; constexpr auto kBack = CordRepBtree::kBack; -inline bool exhaustive_validation() { - return cord_btree_exhaustive_validation.load(std::memory_order_relaxed); -} +ABSL_CONST_INIT std::atomic<bool> cord_btree_exhaustive_validation(false); // Implementation of the various 'Dump' functions. // Prints the entire tree structure or 'rep'. External callers should @@ -286,7 +286,7 @@ struct StackOperations { case CordRepBtree::kSelf: return result.tree; } - ABSL_INTERNAL_UNREACHABLE; + ABSL_UNREACHABLE(); return result.tree; } @@ -361,6 +361,15 @@ struct StackOperations { } // namespace +void SetCordBtreeExhaustiveValidation(bool do_exaustive_validation) { + cord_btree_exhaustive_validation.store(do_exaustive_validation, + std::memory_order_relaxed); +} + +bool IsCordBtreeExhaustiveValidationEnabled() { + return cord_btree_exhaustive_validation.load(std::memory_order_relaxed); +} + void CordRepBtree::Dump(const CordRep* rep, absl::string_view label, bool include_contents, std::ostream& stream) { stream << "===================================\n"; @@ -449,7 +458,8 @@ bool CordRepBtree::IsValid(const CordRepBtree* tree, bool shallow) { child_length += edge->length; } NODE_CHECK_EQ(child_length, tree->length); - if ((!shallow || exhaustive_validation()) && tree->height() > 0) { + if ((!shallow || IsCordBtreeExhaustiveValidationEnabled()) && + tree->height() > 0) { for (CordRep* edge : tree->Edges()) { if (!IsValid(edge->btree(), shallow)) return false; } @@ -502,7 +512,7 @@ OpResult CordRepBtree::SetEdge(bool owned, CordRep* edge, size_t delta) { // open interval [begin, back) or [begin + 1, end) depending on `edge_type`. // We conveniently cover both case using a constexpr `shift` being 0 or 1 // as `end :== back + 1`. - result = {CopyRaw(), kCopied}; + result = {CopyRaw(length), kCopied}; constexpr int shift = edge_type == kFront ? 1 : 0; for (CordRep* r : Edges(begin() + shift, back() + shift)) { CordRep::Ref(r); diff --git a/absl/strings/internal/cord_rep_btree.h b/absl/strings/internal/cord_rep_btree.h index eed5609e..be94b62e 100644 --- a/absl/strings/internal/cord_rep_btree.h +++ b/absl/strings/internal/cord_rep_btree.h @@ -32,6 +32,14 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace cord_internal { +// `SetCordBtreeExhaustiveValidation()` can be set to force exhaustive +// validation in debug assertions, and code that calls `IsValid()` +// explicitly. By default, assertions should be relatively cheap and +// AssertValid() can easily lead to O(n^2) complexity as recursive / full tree +// validation is O(n). +void SetCordBtreeExhaustiveValidation(bool do_exaustive_validation); +bool IsCordBtreeExhaustiveValidationEnabled(); + class CordRepBtreeNavigator; // CordRepBtree is as the name implies a btree implementation of a Cordrep tree. @@ -446,9 +454,9 @@ class CordRepBtree : public CordRep { template <EdgeType edge_type> static CordRepBtree* NewLeaf(absl::string_view data, size_t extra); - // Creates a raw copy of this Btree node, copying all properties, but - // without adding any references to existing edges. - CordRepBtree* CopyRaw() const; + // Creates a raw copy of this Btree node with the specified length, copying + // all properties, but without adding any references to existing edges. + CordRepBtree* CopyRaw(size_t new_length) const; // Creates a full copy of this Btree node, adding a reference on all edges. CordRepBtree* Copy() const; @@ -666,15 +674,28 @@ inline void CordRepBtree::Unref(absl::Span<CordRep* const> edges) { } } -inline CordRepBtree* CordRepBtree::CopyRaw() const { - auto* tree = static_cast<CordRepBtree*>(::operator new(sizeof(CordRepBtree))); - memcpy(static_cast<void*>(tree), this, sizeof(CordRepBtree)); - new (&tree->refcount) RefcountAndFlags; +inline CordRepBtree* CordRepBtree::CopyRaw(size_t new_length) const { + CordRepBtree* tree = new CordRepBtree; + + // `length` and `refcount` are the first members of `CordRepBtree`. + // We initialize `length` using the given length, have `refcount` be set to + // ref = 1 through its default constructor, and copy all data beyond + // 'refcount' which starts with `tag` using a single memcpy: all contents + // except `refcount` is trivially copyable, and the compiler does not + // efficiently coalesce member-wise copy of these members. + // See https://gcc.godbolt.org/z/qY8zsca6z + // # LINT.IfChange(copy_raw) + tree->length = new_length; + uint8_t* dst = &tree->tag; + const uint8_t* src = &tag; + const ptrdiff_t offset = src - reinterpret_cast<const uint8_t*>(this); + memcpy(dst, src, sizeof(CordRepBtree) - static_cast<size_t>(offset)); return tree; + // # LINT.ThenChange() } inline CordRepBtree* CordRepBtree::Copy() const { - CordRepBtree* tree = CopyRaw(); + CordRepBtree* tree = CopyRaw(length); for (CordRep* rep : Edges()) CordRep::Ref(rep); return tree; } @@ -683,8 +704,7 @@ inline CordRepBtree* CordRepBtree::CopyToEndFrom(size_t begin, size_t new_length) const { assert(begin >= this->begin()); assert(begin <= this->end()); - CordRepBtree* tree = CopyRaw(); - tree->length = new_length; + CordRepBtree* tree = CopyRaw(new_length); tree->set_begin(begin); for (CordRep* edge : tree->Edges()) CordRep::Ref(edge); return tree; @@ -694,8 +714,7 @@ inline CordRepBtree* CordRepBtree::CopyBeginTo(size_t end, size_t new_length) const { assert(end <= capacity()); assert(end >= this->begin()); - CordRepBtree* tree = CopyRaw(); - tree->length = new_length; + CordRepBtree* tree = CopyRaw(new_length); tree->set_end(end); for (CordRep* edge : tree->Edges()) CordRep::Ref(edge); return tree; diff --git a/absl/strings/internal/cord_rep_btree_test.cc b/absl/strings/internal/cord_rep_btree_test.cc index 9d6ce484..840acf9f 100644 --- a/absl/strings/internal/cord_rep_btree_test.cc +++ b/absl/strings/internal/cord_rep_btree_test.cc @@ -507,7 +507,7 @@ TEST_P(CordRepBtreeTest, AppendToTreeTwoDeep) { for (size_t i = max_cap * max_cap + 1; i < max_cap * max_cap * max_cap; ++i) { // Ref top level tree based on param. // Ref child node once every 16 iterations, and leaf node every 4 - // iterrations which which should not have an observable effect other than + // iterations which which should not have an observable effect other than // the node and/or the leaf below it being copied. refs.RefIf(shared(), tree); refs.RefIf(i % 16 == 0, tree->Edges().back()); @@ -568,7 +568,7 @@ TEST_P(CordRepBtreeTest, PrependToTreeTwoDeep) { for (size_t i = max_cap * max_cap + 1; i < max_cap * max_cap * max_cap; ++i) { // Ref top level tree based on param. // Ref child node once every 16 iterations, and leaf node every 4 - // iterrations which which should not have an observable effect other than + // iterations which which should not have an observable effect other than // the node and/or the leaf below it being copied. refs.RefIf(shared(), tree); refs.RefIf(i % 16 == 0, tree->Edges().back()); @@ -1355,9 +1355,9 @@ TEST(CordRepBtreeTest, AssertValid) { TEST(CordRepBtreeTest, CheckAssertValidShallowVsDeep) { // Restore exhaustive validation on any exit. - const bool exhaustive_validation = cord_btree_exhaustive_validation.load(); + const bool exhaustive_validation = IsCordBtreeExhaustiveValidationEnabled(); auto cleanup = absl::MakeCleanup([exhaustive_validation] { - cord_btree_exhaustive_validation.store(exhaustive_validation); + SetCordBtreeExhaustiveValidation(exhaustive_validation); }); // Create a tree of at least 2 levels, and mess with the original flat, which @@ -1372,7 +1372,7 @@ TEST(CordRepBtreeTest, CheckAssertValidShallowVsDeep) { } flat->length = 100; - cord_btree_exhaustive_validation.store(false); + SetCordBtreeExhaustiveValidation(false); EXPECT_FALSE(CordRepBtree::IsValid(tree)); EXPECT_TRUE(CordRepBtree::IsValid(tree, true)); EXPECT_FALSE(CordRepBtree::IsValid(tree, false)); @@ -1382,7 +1382,7 @@ TEST(CordRepBtreeTest, CheckAssertValidShallowVsDeep) { EXPECT_DEBUG_DEATH(CordRepBtree::AssertValid(tree, false), ".*"); #endif - cord_btree_exhaustive_validation.store(true); + SetCordBtreeExhaustiveValidation(true); EXPECT_FALSE(CordRepBtree::IsValid(tree)); EXPECT_FALSE(CordRepBtree::IsValid(tree, true)); EXPECT_FALSE(CordRepBtree::IsValid(tree, false)); diff --git a/absl/strings/internal/cord_rep_consume.cc b/absl/strings/internal/cord_rep_consume.cc index 20a55797..db7d4fef 100644 --- a/absl/strings/internal/cord_rep_consume.cc +++ b/absl/strings/internal/cord_rep_consume.cc @@ -42,7 +42,8 @@ CordRep* ClipSubstring(CordRepSubstring* substring) { } // namespace -void Consume(CordRep* rep, ConsumeFn consume_fn) { +void Consume(CordRep* rep, + FunctionRef<void(CordRep*, size_t, size_t)> consume_fn) { size_t offset = 0; size_t length = rep->length; @@ -53,8 +54,9 @@ void Consume(CordRep* rep, ConsumeFn consume_fn) { consume_fn(rep, offset, length); } -void ReverseConsume(CordRep* rep, ConsumeFn consume_fn) { - return Consume(rep, std::move(consume_fn)); +void ReverseConsume(CordRep* rep, + FunctionRef<void(CordRep*, size_t, size_t)> consume_fn) { + return Consume(rep, consume_fn); } } // namespace cord_internal diff --git a/absl/strings/internal/cord_rep_consume.h b/absl/strings/internal/cord_rep_consume.h index d46fca2b..bece1874 100644 --- a/absl/strings/internal/cord_rep_consume.h +++ b/absl/strings/internal/cord_rep_consume.h @@ -24,11 +24,6 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace cord_internal { -// Functor for the Consume() and ReverseConsume() functions: -// void ConsumeFunc(CordRep* rep, size_t offset, size_t length); -// See the Consume() and ReverseConsume() function comments for documentation. -using ConsumeFn = FunctionRef<void(CordRep*, size_t, size_t)>; - // Consume() and ReverseConsume() consume CONCAT based trees and invoke the // provided functor with the contained nodes in the proper forward or reverse // order, which is used to convert CONCAT trees into other tree or cord data. @@ -40,8 +35,10 @@ using ConsumeFn = FunctionRef<void(CordRep*, size_t, size_t)>; // violations, we can not 100% guarantee that all code respects 'new format' // settings and flags, so we need to be able to parse old data on the fly until // all old code is deprecated / no longer the default format. -void Consume(CordRep* rep, ConsumeFn consume_fn); -void ReverseConsume(CordRep* rep, ConsumeFn consume_fn); +void Consume(CordRep* rep, + FunctionRef<void(CordRep*, size_t, size_t)> consume_fn); +void ReverseConsume(CordRep* rep, + FunctionRef<void(CordRep*, size_t, size_t)> consume_fn); } // namespace cord_internal ABSL_NAMESPACE_END diff --git a/absl/strings/internal/cord_rep_crc.cc b/absl/strings/internal/cord_rep_crc.cc index ee140354..dbe54cc4 100644 --- a/absl/strings/internal/cord_rep_crc.cc +++ b/absl/strings/internal/cord_rep_crc.cc @@ -16,6 +16,7 @@ #include <cassert> #include <cstdint> +#include <utility> #include "absl/base/config.h" #include "absl/strings/internal/cord_internal.h" @@ -24,11 +25,10 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace cord_internal { -CordRepCrc* CordRepCrc::New(CordRep* child, uint32_t crc) { - assert(child != nullptr); - if (child->IsCrc()) { +CordRepCrc* CordRepCrc::New(CordRep* child, crc_internal::CrcCordState state) { + if (child != nullptr && child->IsCrc()) { if (child->refcount.IsOne()) { - child->crc()->crc = crc; + child->crc()->crc_cord_state = std::move(state); return child->crc(); } CordRep* old = child; @@ -37,15 +37,17 @@ CordRepCrc* CordRepCrc::New(CordRep* child, uint32_t crc) { CordRep::Unref(old); } auto* new_cordrep = new CordRepCrc; - new_cordrep->length = child->length; + new_cordrep->length = child != nullptr ? child->length : 0; new_cordrep->tag = cord_internal::CRC; new_cordrep->child = child; - new_cordrep->crc = crc; + new_cordrep->crc_cord_state = std::move(state); return new_cordrep; } void CordRepCrc::Destroy(CordRepCrc* node) { - CordRep::Unref(node->child); + if (node->child != nullptr) { + CordRep::Unref(node->child); + } delete node; } diff --git a/absl/strings/internal/cord_rep_crc.h b/absl/strings/internal/cord_rep_crc.h index 5294b0d1..379d7a60 100644 --- a/absl/strings/internal/cord_rep_crc.h +++ b/absl/strings/internal/cord_rep_crc.h @@ -20,6 +20,7 @@ #include "absl/base/config.h" #include "absl/base/optimization.h" +#include "absl/crc/internal/crc_cord_state.h" #include "absl/strings/internal/cord_internal.h" namespace absl { @@ -34,14 +35,14 @@ namespace cord_internal { // the contained checksum is the user's responsibility. struct CordRepCrc : public CordRep { CordRep* child; - uint32_t crc; + absl::crc_internal::CrcCordState crc_cord_state; // Consumes `child` and returns a CordRepCrc prefixed tree containing `child`. // If the specified `child` is itself a CordRepCrc node, then this method - // either replaces the existing node, or directly updates the crc value in it + // either replaces the existing node, or directly updates the crc state in it // depending on the node being shared or not, i.e.: refcount.IsOne(). - // `child` must not be null. Never returns null. - static CordRepCrc* New(CordRep* child, uint32_t crc); + // `child` must only be null if the Cord is empty. Never returns null. + static CordRepCrc* New(CordRep* child, crc_internal::CrcCordState state); // Destroys (deletes) the provided node. `node` must not be null. static void Destroy(CordRepCrc* node); diff --git a/absl/strings/internal/cord_rep_crc_test.cc b/absl/strings/internal/cord_rep_crc_test.cc index d73ea7b3..3d27c33c 100644 --- a/absl/strings/internal/cord_rep_crc_test.cc +++ b/absl/strings/internal/cord_rep_crc_test.cc @@ -17,6 +17,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include "absl/base/config.h" +#include "absl/crc/internal/crc_cord_state.h" #include "absl/strings/internal/cord_internal.h" #include "absl/strings/internal/cord_rep_test_util.h" @@ -27,47 +28,51 @@ namespace { using ::absl::cordrep_testing::MakeFlat; using ::testing::Eq; +using ::testing::IsNull; using ::testing::Ne; #if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST -TEST(CordRepCrc, NewWithNullPtr) { - EXPECT_DEATH(CordRepCrc::New(nullptr, 0), ""); -} - TEST(CordRepCrc, RemoveCrcWithNullptr) { EXPECT_DEATH(RemoveCrcNode(nullptr), ""); } #endif // !NDEBUG && GTEST_HAS_DEATH_TEST +absl::crc_internal::CrcCordState MakeCrcCordState(uint32_t crc) { + crc_internal::CrcCordState state; + state.mutable_rep()->prefix_crc.push_back( + crc_internal::CrcCordState::PrefixCrc(42, crc32c_t{crc})); + return state; +} + TEST(CordRepCrc, NewDestroy) { CordRep* rep = cordrep_testing::MakeFlat("Hello world"); - CordRepCrc* crc = CordRepCrc::New(rep, 12345); + CordRepCrc* crc = CordRepCrc::New(rep, MakeCrcCordState(12345)); EXPECT_TRUE(crc->refcount.IsOne()); EXPECT_THAT(crc->child, Eq(rep)); - EXPECT_THAT(crc->crc, Eq(12345u)); + EXPECT_THAT(crc->crc_cord_state.Checksum(), Eq(crc32c_t{12345u})); EXPECT_TRUE(rep->refcount.IsOne()); CordRepCrc::Destroy(crc); } TEST(CordRepCrc, NewExistingCrcNotShared) { CordRep* rep = cordrep_testing::MakeFlat("Hello world"); - CordRepCrc* crc = CordRepCrc::New(rep, 12345); - CordRepCrc* new_crc = CordRepCrc::New(crc, 54321); + CordRepCrc* crc = CordRepCrc::New(rep, MakeCrcCordState(12345)); + CordRepCrc* new_crc = CordRepCrc::New(crc, MakeCrcCordState(54321)); EXPECT_THAT(new_crc, Eq(crc)); EXPECT_TRUE(new_crc->refcount.IsOne()); EXPECT_THAT(new_crc->child, Eq(rep)); - EXPECT_THAT(new_crc->crc, Eq(54321u)); + EXPECT_THAT(new_crc->crc_cord_state.Checksum(), Eq(crc32c_t{54321u})); EXPECT_TRUE(rep->refcount.IsOne()); CordRepCrc::Destroy(new_crc); } TEST(CordRepCrc, NewExistingCrcShared) { CordRep* rep = cordrep_testing::MakeFlat("Hello world"); - CordRepCrc* crc = CordRepCrc::New(rep, 12345); + CordRepCrc* crc = CordRepCrc::New(rep, MakeCrcCordState(12345)); CordRep::Ref(crc); - CordRepCrc* new_crc = CordRepCrc::New(crc, 54321); + CordRepCrc* new_crc = CordRepCrc::New(crc, MakeCrcCordState(54321)); EXPECT_THAT(new_crc, Ne(crc)); EXPECT_TRUE(new_crc->refcount.IsOne()); @@ -75,13 +80,23 @@ TEST(CordRepCrc, NewExistingCrcShared) { EXPECT_FALSE(rep->refcount.IsOne()); EXPECT_THAT(crc->child, Eq(rep)); EXPECT_THAT(new_crc->child, Eq(rep)); - EXPECT_THAT(crc->crc, Eq(12345u)); - EXPECT_THAT(new_crc->crc, Eq(54321u)); + EXPECT_THAT(crc->crc_cord_state.Checksum(), Eq(crc32c_t{12345u})); + EXPECT_THAT(new_crc->crc_cord_state.Checksum(), Eq(crc32c_t{54321u})); CordRep::Unref(crc); CordRep::Unref(new_crc); } +TEST(CordRepCrc, NewEmpty) { + CordRepCrc* crc = CordRepCrc::New(nullptr, MakeCrcCordState(12345)); + EXPECT_TRUE(crc->refcount.IsOne()); + EXPECT_THAT(crc->child, IsNull()); + EXPECT_THAT(crc->length, Eq(0u)); + EXPECT_THAT(crc->crc_cord_state.Checksum(), Eq(crc32c_t{12345u})); + EXPECT_TRUE(crc->refcount.IsOne()); + CordRepCrc::Destroy(crc); +} + TEST(CordRepCrc, RemoveCrcNotCrc) { CordRep* rep = cordrep_testing::MakeFlat("Hello world"); CordRep* nocrc = RemoveCrcNode(rep); @@ -91,7 +106,7 @@ TEST(CordRepCrc, RemoveCrcNotCrc) { TEST(CordRepCrc, RemoveCrcNotShared) { CordRep* rep = cordrep_testing::MakeFlat("Hello world"); - CordRepCrc* crc = CordRepCrc::New(rep, 12345); + CordRepCrc* crc = CordRepCrc::New(rep, MakeCrcCordState(12345)); CordRep* nocrc = RemoveCrcNode(crc); EXPECT_THAT(nocrc, Eq(rep)); EXPECT_TRUE(rep->refcount.IsOne()); @@ -100,7 +115,7 @@ TEST(CordRepCrc, RemoveCrcNotShared) { TEST(CordRepCrc, RemoveCrcShared) { CordRep* rep = cordrep_testing::MakeFlat("Hello world"); - CordRepCrc* crc = CordRepCrc::New(rep, 12345); + CordRepCrc* crc = CordRepCrc::New(rep, MakeCrcCordState(12345)); CordRep::Ref(crc); CordRep* nocrc = RemoveCrcNode(crc); EXPECT_THAT(nocrc, Eq(rep)); diff --git a/absl/strings/internal/cord_rep_ring.h b/absl/strings/internal/cord_rep_ring.h index 2000e21e..79a2fdb1 100644 --- a/absl/strings/internal/cord_rep_ring.h +++ b/absl/strings/internal/cord_rep_ring.h @@ -430,7 +430,7 @@ class CordRepRing : public CordRep { // capacity to satisfy `extra` extra nodes, and unref the old `rep` instance. // // If a new CordRepRing can not be allocated, or the new capacity would exceed - // the maxmimum capacity, then the input is consumed only, and an exception is + // the maximum capacity, then the input is consumed only, and an exception is // thrown. static CordRepRing* Mutable(CordRepRing* rep, size_t extra); @@ -472,7 +472,7 @@ class CordRepRing : public CordRep { // Increases the data offset for entry `index` by `n`. void AddDataOffset(index_type index, size_t n); - // Descreases the length for entry `index` by `n`. + // Decreases the length for entry `index` by `n`. void SubLength(index_type index, size_t n); index_type head_; diff --git a/absl/strings/internal/cordz_functions.h b/absl/strings/internal/cordz_functions.h index 93f46ec6..ed108bf1 100644 --- a/absl/strings/internal/cordz_functions.h +++ b/absl/strings/internal/cordz_functions.h @@ -32,18 +32,10 @@ int32_t get_cordz_mean_interval(); // Sets the sample rate with the average interval between samples. void set_cordz_mean_interval(int32_t mean_interval); -// Enable cordz unless any of the following applies: -// - no thread local support -// - MSVC build -// - Android build -// - Apple build -// - DLL build -// Hashtablez is turned off completely in opensource builds. -// MSVC's static atomics are dynamically initialized in debug mode, which breaks -// sampling. -#if defined(ABSL_HAVE_THREAD_LOCAL) && !defined(_MSC_VER) && \ - !defined(ABSL_BUILD_DLL) && !defined(ABSL_CONSUME_DLL) && \ - !defined(__ANDROID__) && !defined(__APPLE__) +// Cordz is only enabled on Linux with thread_local support. +#if defined(ABSL_INTERNAL_CORDZ_ENABLED) +#error ABSL_INTERNAL_CORDZ_ENABLED cannot be set directly +#elif defined(__linux__) && defined(ABSL_HAVE_THREAD_LOCAL) #define ABSL_INTERNAL_CORDZ_ENABLED 1 #endif diff --git a/absl/strings/internal/cordz_functions_test.cc b/absl/strings/internal/cordz_functions_test.cc index 350623c1..b70a685e 100644 --- a/absl/strings/internal/cordz_functions_test.cc +++ b/absl/strings/internal/cordz_functions_test.cc @@ -38,7 +38,7 @@ TEST(CordzFunctionsTest, SampleRate) { } // Cordz is disabled when we don't have thread_local. All calls to -// should_profile will return false when cordz is diabled, so we might want to +// should_profile will return false when cordz is disabled, so we might want to // avoid those tests. #ifdef ABSL_INTERNAL_CORDZ_ENABLED diff --git a/absl/strings/internal/cordz_handle.cc b/absl/strings/internal/cordz_handle.cc index a73fefed..a7061dbe 100644 --- a/absl/strings/internal/cordz_handle.cc +++ b/absl/strings/internal/cordz_handle.cc @@ -16,34 +16,60 @@ #include <atomic> #include "absl/base/internal/raw_logging.h" // For ABSL_RAW_CHECK -#include "absl/base/internal/spinlock.h" +#include "absl/synchronization/mutex.h" namespace absl { ABSL_NAMESPACE_BEGIN namespace cord_internal { -using ::absl::base_internal::SpinLockHolder; +namespace { -ABSL_CONST_INIT CordzHandle::Queue CordzHandle::global_queue_(absl::kConstInit); +struct Queue { + Queue() = default; + + absl::Mutex mutex; + std::atomic<CordzHandle*> dq_tail ABSL_GUARDED_BY(mutex){nullptr}; + + // Returns true if this delete queue is empty. This method does not acquire + // the lock, but does a 'load acquire' observation on the delete queue tail. + // It is used inside Delete() to check for the presence of a delete queue + // without holding the lock. The assumption is that the caller is in the + // state of 'being deleted', and can not be newly discovered by a concurrent + // 'being constructed' snapshot instance. Practically, this means that any + // such discovery (`find`, 'first' or 'next', etc) must have proper 'happens + // before / after' semantics and atomic fences. + bool IsEmpty() const ABSL_NO_THREAD_SAFETY_ANALYSIS { + return dq_tail.load(std::memory_order_acquire) == nullptr; + } +}; + +static Queue* GlobalQueue() { + static Queue* global_queue = new Queue; + return global_queue; +} + +} // namespace CordzHandle::CordzHandle(bool is_snapshot) : is_snapshot_(is_snapshot) { + Queue* global_queue = GlobalQueue(); if (is_snapshot) { - SpinLockHolder lock(&queue_->mutex); - CordzHandle* dq_tail = queue_->dq_tail.load(std::memory_order_acquire); + MutexLock lock(&global_queue->mutex); + CordzHandle* dq_tail = + global_queue->dq_tail.load(std::memory_order_acquire); if (dq_tail != nullptr) { dq_prev_ = dq_tail; dq_tail->dq_next_ = this; } - queue_->dq_tail.store(this, std::memory_order_release); + global_queue->dq_tail.store(this, std::memory_order_release); } } CordzHandle::~CordzHandle() { - ODRCheck(); + Queue* global_queue = GlobalQueue(); if (is_snapshot_) { std::vector<CordzHandle*> to_delete; { - SpinLockHolder lock(&queue_->mutex); + MutexLock lock(&global_queue->mutex); CordzHandle* next = dq_next_; if (dq_prev_ == nullptr) { // We were head of the queue, delete every CordzHandle until we reach @@ -59,7 +85,7 @@ CordzHandle::~CordzHandle() { if (next) { next->dq_prev_ = dq_prev_; } else { - queue_->dq_tail.store(dq_prev_, std::memory_order_release); + global_queue->dq_tail.store(dq_prev_, std::memory_order_release); } } for (CordzHandle* handle : to_delete) { @@ -69,16 +95,15 @@ CordzHandle::~CordzHandle() { } bool CordzHandle::SafeToDelete() const { - return is_snapshot_ || queue_->IsEmpty(); + return is_snapshot_ || GlobalQueue()->IsEmpty(); } void CordzHandle::Delete(CordzHandle* handle) { assert(handle); if (handle) { - handle->ODRCheck(); - Queue* const queue = handle->queue_; + Queue* const queue = GlobalQueue(); if (!handle->SafeToDelete()) { - SpinLockHolder lock(&queue->mutex); + MutexLock lock(&queue->mutex); CordzHandle* dq_tail = queue->dq_tail.load(std::memory_order_acquire); if (dq_tail != nullptr) { handle->dq_prev_ = dq_tail; @@ -93,8 +118,9 @@ void CordzHandle::Delete(CordzHandle* handle) { std::vector<const CordzHandle*> CordzHandle::DiagnosticsGetDeleteQueue() { std::vector<const CordzHandle*> handles; - SpinLockHolder lock(&global_queue_.mutex); - CordzHandle* dq_tail = global_queue_.dq_tail.load(std::memory_order_acquire); + Queue* global_queue = GlobalQueue(); + MutexLock lock(&global_queue->mutex); + CordzHandle* dq_tail = global_queue->dq_tail.load(std::memory_order_acquire); for (const CordzHandle* p = dq_tail; p; p = p->dq_prev_) { handles.push_back(p); } @@ -103,13 +129,13 @@ std::vector<const CordzHandle*> CordzHandle::DiagnosticsGetDeleteQueue() { bool CordzHandle::DiagnosticsHandleIsSafeToInspect( const CordzHandle* handle) const { - ODRCheck(); if (!is_snapshot_) return false; if (handle == nullptr) return true; if (handle->is_snapshot_) return false; bool snapshot_found = false; - SpinLockHolder lock(&queue_->mutex); - for (const CordzHandle* p = queue_->dq_tail; p; p = p->dq_prev_) { + Queue* global_queue = GlobalQueue(); + MutexLock lock(&global_queue->mutex); + for (const CordzHandle* p = global_queue->dq_tail; p; p = p->dq_prev_) { if (p == handle) return !snapshot_found; if (p == this) snapshot_found = true; } @@ -119,13 +145,13 @@ bool CordzHandle::DiagnosticsHandleIsSafeToInspect( std::vector<const CordzHandle*> CordzHandle::DiagnosticsGetSafeToInspectDeletedHandles() { - ODRCheck(); std::vector<const CordzHandle*> handles; if (!is_snapshot()) { return handles; } - SpinLockHolder lock(&queue_->mutex); + Queue* global_queue = GlobalQueue(); + MutexLock lock(&global_queue->mutex); for (const CordzHandle* p = dq_next_; p != nullptr; p = p->dq_next_) { if (!p->is_snapshot()) { handles.push_back(p); diff --git a/absl/strings/internal/cordz_handle.h b/absl/strings/internal/cordz_handle.h index 3c800b43..08e3f0d3 100644 --- a/absl/strings/internal/cordz_handle.h +++ b/absl/strings/internal/cordz_handle.h @@ -20,8 +20,6 @@ #include "absl/base/config.h" #include "absl/base/internal/raw_logging.h" -#include "absl/base/internal/spinlock.h" -#include "absl/synchronization/mutex.h" namespace absl { ABSL_NAMESPACE_BEGIN @@ -34,7 +32,7 @@ namespace cord_internal { // has gained visibility into a CordzInfo object, that CordzInfo object will not // be deleted prematurely. This allows the profiler to inspect all CordzInfo // objects that are alive without needing to hold a global lock. -class CordzHandle { +class ABSL_DLL CordzHandle { public: CordzHandle() : CordzHandle(false) {} @@ -79,37 +77,6 @@ class CordzHandle { virtual ~CordzHandle(); private: - // Global queue data. CordzHandle stores a pointer to the global queue - // instance to harden against ODR violations. - struct Queue { - constexpr explicit Queue(absl::ConstInitType) - : mutex(absl::kConstInit, - absl::base_internal::SCHEDULE_COOPERATIVE_AND_KERNEL) {} - - absl::base_internal::SpinLock mutex; - std::atomic<CordzHandle*> dq_tail ABSL_GUARDED_BY(mutex){nullptr}; - - // Returns true if this delete queue is empty. This method does not acquire - // the lock, but does a 'load acquire' observation on the delete queue tail. - // It is used inside Delete() to check for the presence of a delete queue - // without holding the lock. The assumption is that the caller is in the - // state of 'being deleted', and can not be newly discovered by a concurrent - // 'being constructed' snapshot instance. Practically, this means that any - // such discovery (`find`, 'first' or 'next', etc) must have proper 'happens - // before / after' semantics and atomic fences. - bool IsEmpty() const ABSL_NO_THREAD_SAFETY_ANALYSIS { - return dq_tail.load(std::memory_order_acquire) == nullptr; - } - }; - - void ODRCheck() const { -#ifndef NDEBUG - ABSL_RAW_CHECK(queue_ == &global_queue_, "ODR violation in Cord"); -#endif - } - - ABSL_CONST_INIT static Queue global_queue_; - Queue* const queue_ = &global_queue_; const bool is_snapshot_; // dq_prev_ and dq_next_ require the global queue mutex to be held. diff --git a/absl/strings/internal/cordz_info.cc b/absl/strings/internal/cordz_info.cc index 530f33be..515dfafb 100644 --- a/absl/strings/internal/cordz_info.cc +++ b/absl/strings/internal/cordz_info.cc @@ -26,6 +26,7 @@ #include "absl/strings/internal/cordz_statistics.h" #include "absl/strings/internal/cordz_update_tracker.h" #include "absl/synchronization/mutex.h" +#include "absl/time/clock.h" #include "absl/types/span.h" namespace absl { @@ -53,7 +54,7 @@ namespace { // The top level node is treated specially: we assume the current thread // (typically called from the CordzHandler) to hold a reference purely to // perform a safe analysis, and not being part of the application. So we -// substract 1 from the reference count of the top node to compute the +// subtract 1 from the reference count of the top node to compute the // 'application fair share' excluding the reference of the current thread. // // An example of fair sharing, and why we multiply reference counts: diff --git a/absl/strings/internal/cordz_info_statistics_test.cc b/absl/strings/internal/cordz_info_statistics_test.cc index 6d6feb52..53d2f2ea 100644 --- a/absl/strings/internal/cordz_info_statistics_test.cc +++ b/absl/strings/internal/cordz_info_statistics_test.cc @@ -19,6 +19,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include "absl/base/config.h" +#include "absl/crc/internal/crc_cord_state.h" #include "absl/strings/cord.h" #include "absl/strings/internal/cord_internal.h" #include "absl/strings/internal/cord_rep_btree.h" @@ -451,7 +452,8 @@ TEST(CordzInfoStatisticsTest, BtreeNodeShared) { TEST(CordzInfoStatisticsTest, Crc) { RefHelper ref; auto* left = Flat(1000); - auto* crc = ref.NeedsUnref(CordRepCrc::New(left, 12345)); + auto* crc = + ref.NeedsUnref(CordRepCrc::New(left, crc_internal::CrcCordState())); CordzStatistics expected; expected.size = left->length; diff --git a/absl/strings/internal/cordz_sample_token.h b/absl/strings/internal/cordz_sample_token.h index b58022c3..2a86bc3b 100644 --- a/absl/strings/internal/cordz_sample_token.h +++ b/absl/strings/internal/cordz_sample_token.h @@ -33,11 +33,11 @@ namespace cord_internal { // ST1 <- CH1 <- CH2 <- ST2 <- CH3 <- global_delete_queue_tail // // This list tracks that CH1 and CH2 were created after ST1, so the thread -// holding ST1 might have a referece to CH1, CH2, ST2, and CH3. However, ST2 was -// created later, so the thread holding the ST2 token cannot have a reference to -// ST1, CH1, or CH2. If ST1 is cleaned up first, that thread will delete ST1, -// CH1, and CH2. If instead ST2 is cleaned up first, that thread will only -// delete ST2. +// holding ST1 might have a reference to CH1, CH2, ST2, and CH3. However, ST2 +// was created later, so the thread holding the ST2 token cannot have a +// reference to ST1, CH1, or CH2. If ST1 is cleaned up first, that thread will +// delete ST1, CH1, and CH2. If instead ST2 is cleaned up first, that thread +// will only delete ST2. // // If ST1 is cleaned up first, the new list will be: // ST2 <- CH3 <- global_delete_queue_tail diff --git a/absl/strings/internal/damerau_levenshtein_distance.cc b/absl/strings/internal/damerau_levenshtein_distance.cc new file mode 100644 index 00000000..a084568f --- /dev/null +++ b/absl/strings/internal/damerau_levenshtein_distance.cc @@ -0,0 +1,93 @@ +// Copyright 2022 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/damerau_levenshtein_distance.h" + +#include <algorithm> +#include <array> +#include <numeric> + +#include "absl/strings/string_view.h" +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { +// Calculate DamerauLevenshtein (adjacent transpositions) distance +// between two strings, +// https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance. The +// algorithm follows the condition that no substring is edited more than once. +// While this can reduce is larger distance, it's a) a much simpler algorithm +// and b) more realistic for the case that typographic mistakes should be +// detected. +// When the distance is larger than cutoff, or one of the strings has more +// than MAX_SIZE=100 characters, the code returns min(MAX_SIZE, cutoff) + 1. +uint8_t CappedDamerauLevenshteinDistance(absl::string_view s1, + absl::string_view s2, uint8_t cutoff) { + const uint8_t MAX_SIZE = 100; + const uint8_t _cutoff = std::min(MAX_SIZE, cutoff); + const uint8_t cutoff_plus_1 = static_cast<uint8_t>(_cutoff + 1); + + if (s1.size() > s2.size()) std::swap(s1, s2); + if (s1.size() + _cutoff < s2.size() || s2.size() > MAX_SIZE) + return cutoff_plus_1; + + if (s1.empty()) + return static_cast<uint8_t>(s2.size()); + + // Lower diagonal bound: y = x - lower_diag + const uint8_t lower_diag = + _cutoff - static_cast<uint8_t>(s2.size() - s1.size()); + // Upper diagonal bound: y = x + upper_diag + const uint8_t upper_diag = _cutoff; + + // d[i][j] is the number of edits required to convert s1[0, i] to s2[0, j] + std::array<std::array<uint8_t, MAX_SIZE + 2>, MAX_SIZE + 2> d; + std::iota(d[0].begin(), d[0].begin() + upper_diag + 1, 0); + d[0][cutoff_plus_1] = cutoff_plus_1; + for (size_t i = 1; i <= s1.size(); ++i) { + // Deduce begin of relevant window. + size_t j_begin = 1; + if (i > lower_diag) { + j_begin = i - lower_diag; + d[i][j_begin - 1] = cutoff_plus_1; + } else { + d[i][0] = static_cast<uint8_t>(i); + } + + // Deduce end of relevant window. + size_t j_end = i + upper_diag; + if (j_end > s2.size()) { + j_end = s2.size(); + } else { + d[i][j_end + 1] = cutoff_plus_1; + } + + for (size_t j = j_begin; j <= j_end; ++j) { + const uint8_t deletion_distance = d[i - 1][j] + 1; + const uint8_t insertion_distance = d[i][j - 1] + 1; + const uint8_t mismatched_tail_cost = s1[i - 1] == s2[j - 1] ? 0 : 1; + const uint8_t mismatch_distance = d[i - 1][j - 1] + mismatched_tail_cost; + uint8_t transposition_distance = _cutoff + 1; + if (i > 1 && j > 1 && s1[i - 1] == s2[j - 2] && s1[i - 2] == s2[j - 1]) + transposition_distance = d[i - 2][j - 2] + 1; + d[i][j] = std::min({cutoff_plus_1, deletion_distance, insertion_distance, + mismatch_distance, transposition_distance}); + } + } + return d[s1.size()][s2.size()]; +} + +} // namespace strings_internal + +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/damerau_levenshtein_distance.h b/absl/strings/internal/damerau_levenshtein_distance.h new file mode 100644 index 00000000..7a4bd644 --- /dev/null +++ b/absl/strings/internal/damerau_levenshtein_distance.h @@ -0,0 +1,34 @@ +// Copyright 2022 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_DAMERAU_LEVENSHTEIN_DISTANCE_H_ +#define ABSL_STRINGS_INTERNAL_DAMERAU_LEVENSHTEIN_DISTANCE_H_ + +#include <cstdint> + +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { +// Calculate DamerauLevenshtein distance between two strings. +// When the distance is larger than cutoff, the code just returns cutoff + 1. +uint8_t CappedDamerauLevenshteinDistance(absl::string_view s1, + absl::string_view s2, uint8_t cutoff); + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_DAMERAU_LEVENSHTEIN_DISTANCE_H_ diff --git a/absl/strings/internal/damerau_levenshtein_distance_test.cc b/absl/strings/internal/damerau_levenshtein_distance_test.cc new file mode 100644 index 00000000..49dd105b --- /dev/null +++ b/absl/strings/internal/damerau_levenshtein_distance_test.cc @@ -0,0 +1,99 @@ +// Copyright 2022 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/damerau_levenshtein_distance.h" + +#include <cstdint> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace { + +using absl::strings_internal::CappedDamerauLevenshteinDistance; + +TEST(Distance, TestDistances) { + EXPECT_THAT(CappedDamerauLevenshteinDistance("ab", "ab", 6), uint8_t{0}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("a", "b", 6), uint8_t{1}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("ca", "abc", 6), uint8_t{3}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "ad", 6), uint8_t{2}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "cadb", 6), uint8_t{4}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "bdac", 6), uint8_t{4}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("ab", "ab", 0), uint8_t{0}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("", "", 0), uint8_t{0}); + // combinations for 3-character strings: + // 1, 2, 3 removals, insertions or replacements and transpositions + EXPECT_THAT(CappedDamerauLevenshteinDistance("abc", "abc", 6), uint8_t{0}); + for (auto res : + {"", "ca", "efg", "ea", "ce", "ceb", "eca", "cae", "cea", "bea"}) { + EXPECT_THAT(CappedDamerauLevenshteinDistance("abc", res, 6), uint8_t{3}); + EXPECT_THAT(CappedDamerauLevenshteinDistance(res, "abc", 6), uint8_t{3}); + } + for (auto res : + {"a", "b", "c", "ba", "cb", "bca", "cab", "cba", "ace", + "efc", "ebf", "aef", "ae", "be", "eb", "ec", "ecb", "bec", + "bce", "cbe", "ace", "eac", "aeb", "bae", "eab", "eba"}) { + EXPECT_THAT(CappedDamerauLevenshteinDistance("abc", res, 6), uint8_t{2}); + EXPECT_THAT(CappedDamerauLevenshteinDistance(res, "abc", 6), uint8_t{2}); + } + for (auto res : {"ab", "ac", "bc", "acb", "bac", "ebc", "aec", "abe"}) { + EXPECT_THAT(CappedDamerauLevenshteinDistance("abc", res, 6), uint8_t{1}); + EXPECT_THAT(CappedDamerauLevenshteinDistance(res, "abc", 6), uint8_t{1}); + } +} + +TEST(Distance, TestCutoff) { + // Returning cutoff + 1 if the value is larger than cutoff or string longer + // than MAX_SIZE. + EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "a", 3), uint8_t{3}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "a", 2), uint8_t{3}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "a", 1), uint8_t{2}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("abcdefg", "a", 2), uint8_t{3}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("a", "abcde", 2), uint8_t{3}); + EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(102, 'a'), + std::string(102, 'a'), 105), + uint8_t{101}); + EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(100, 'a'), + std::string(100, 'a'), 100), + uint8_t{0}); + EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(100, 'a'), + std::string(100, 'b'), 100), + uint8_t{100}); + EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(100, 'a'), + std::string(99, 'a'), 2), + uint8_t{1}); + EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(100, 'a'), + std::string(101, 'a'), 2), + uint8_t{3}); + EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(100, 'a'), + std::string(101, 'a'), 2), + uint8_t{3}); + EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(UINT8_MAX + 1, 'a'), + std::string(UINT8_MAX + 1, 'b'), + UINT8_MAX), + uint8_t{101}); + EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(UINT8_MAX - 1, 'a'), + std::string(UINT8_MAX - 1, 'b'), + UINT8_MAX), + uint8_t{101}); + EXPECT_THAT( + CappedDamerauLevenshteinDistance(std::string(UINT8_MAX, 'a'), + std::string(UINT8_MAX, 'b'), UINT8_MAX), + uint8_t{101}); + EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(UINT8_MAX - 1, 'a'), + std::string(UINT8_MAX - 1, 'a'), + UINT8_MAX), + uint8_t{101}); +} +} // namespace diff --git a/absl/strings/internal/escaping.cc b/absl/strings/internal/escaping.cc index cfea0961..56a4cbed 100644 --- a/absl/strings/internal/escaping.cc +++ b/absl/strings/internal/escaping.cc @@ -21,26 +21,26 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace strings_internal { +// The two strings below provide maps from normal 6-bit characters to their +// base64-escaped equivalent. +// For the inverse case, see kUn(WebSafe)Base64 in the external +// escaping.cc. ABSL_CONST_INIT const char kBase64Chars[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +ABSL_CONST_INIT const char kWebSafeBase64Chars[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; + + size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) { // Base64 encodes three bytes of input at a time. If the input is not // divisible by three, we pad as appropriate. // - // (from https://tools.ietf.org/html/rfc3548) - // Special processing is performed if fewer than 24 bits are available - // at the end of the data being encoded. A full encoding quantum is - // always completed at the end of a quantity. When fewer than 24 input - // bits are available in an input group, zero bits are added (on the - // right) to form an integral number of 6-bit groups. Padding at the - // end of the data is performed using the '=' character. Since all base - // 64 input is an integral number of octets, only the following cases - // can arise: - // Base64 encodes each three bytes of input into four bytes of output. size_t len = (input_len / 3) * 4; + // Since all base 64 input is an integral number of octets, only the following + // cases can arise: if (input_len % 3 == 0) { // (from https://tools.ietf.org/html/rfc3548) // (1) the final quantum of encoding input is an integral multiple of 24 @@ -70,6 +70,21 @@ size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) { return len; } +// ---------------------------------------------------------------------- +// Take the input in groups of 4 characters and turn each +// character into a code 0 to 63 thus: +// A-Z map to 0 to 25 +// a-z map to 26 to 51 +// 0-9 map to 52 to 61 +// +(- for WebSafe) maps to 62 +// /(_ for WebSafe) maps to 63 +// There will be four numbers, all less than 64 which can be represented +// by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively). +// Arrange the 6 digit binary numbers into three bytes as such: +// aaaaaabb bbbbcccc ccdddddd +// Equals signs (one or two) are used at the end of the encoded block to +// indicate that the text was not an integer multiple of three bytes long. +// ---------------------------------------------------------------------- size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest, size_t szdest, const char* base64, bool do_padding) { @@ -83,6 +98,16 @@ size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest, char* const limit_dest = dest + szdest; const unsigned char* const limit_src = src + szsrc; + // (from https://tools.ietf.org/html/rfc3548) + // Special processing is performed if fewer than 24 bits are available + // at the end of the data being encoded. A full encoding quantum is + // always completed at the end of a quantity. When fewer than 24 input + // bits are available in an input group, zero bits are added (on the + // right) to form an integral number of 6-bit groups. + // + // If do_padding is true, padding at the end of the data is performed. This + // output padding uses the '=' character. + // Three bytes of data encodes to four characters of cyphertext. // So we can pump through three-byte chunks atomically. if (szsrc >= 3) { // "limit_src - 3" is UB if szsrc < 3. diff --git a/absl/strings/internal/escaping.h b/absl/strings/internal/escaping.h index 6a9ce602..2186f778 100644 --- a/absl/strings/internal/escaping.h +++ b/absl/strings/internal/escaping.h @@ -24,20 +24,19 @@ ABSL_NAMESPACE_BEGIN namespace strings_internal { ABSL_CONST_INIT extern const char kBase64Chars[]; +ABSL_CONST_INIT extern const char kWebSafeBase64Chars[]; -// Calculates how long a string will be when it is base64 encoded given its -// length and whether or not the result should be padded. +// Calculates the length of a Base64 encoding (RFC 4648) of a string of length +// `input_len`, with or without padding per `do_padding`. Note that 'web-safe' +// encoding (section 5 of the RFC) does not change this length. size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding); -// Base64-encodes `src` using the alphabet provided in `base64` and writes the -// result to `dest`. If `do_padding` is true, `dest` is padded with '=' chars -// until its length is a multiple of 3. Returns the length of `dest`. +// Base64-encodes `src` using the alphabet provided in `base64` (which +// determines whether to do web-safe encoding or not) and writes the result to +// `dest`. If `do_padding` is true, `dest` is padded with '=' chars until its +// length is a multiple of 3. Returns the length of `dest`. size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest, size_t szdest, const char* base64, bool do_padding); - -// Base64-encodes `src` using the alphabet provided in `base64` and writes the -// result to `dest`. If `do_padding` is true, `dest` is padded with '=' chars -// until its length is a multiple of 3. template <typename String> void Base64EscapeInternal(const unsigned char* src, size_t szsrc, String* dest, bool do_padding, const char* base64_chars) { diff --git a/absl/strings/internal/has_absl_stringify.h b/absl/strings/internal/has_absl_stringify.h new file mode 100644 index 00000000..55a08508 --- /dev/null +++ b/absl/strings/internal/has_absl_stringify.h @@ -0,0 +1,55 @@ +// Copyright 2022 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_HAS_ABSL_STRINGIFY_H_ +#define ABSL_STRINGS_INTERNAL_HAS_ABSL_STRINGIFY_H_ +#include <string> +#include <type_traits> +#include <utility> + +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +namespace strings_internal { + +// This is an empty class not intended to be used. It exists so that +// `HasAbslStringify` can reference a universal class rather than needing to be +// copied for each new sink. +class UnimplementedSink { + public: + void Append(size_t count, char ch); + + void Append(string_view v); + + // Support `absl::Format(&sink, format, args...)`. + friend void AbslFormatFlush(UnimplementedSink* sink, absl::string_view v); +}; + +template <typename T, typename = void> +struct HasAbslStringify : std::false_type {}; + +template <typename T> +struct HasAbslStringify< + T, std::enable_if_t<std::is_void<decltype(AbslStringify( + std::declval<strings_internal::UnimplementedSink&>(), + std::declval<const T&>()))>::value>> : std::true_type {}; + +} // namespace strings_internal + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_HAS_ABSL_STRINGIFY_H_ diff --git a/absl/strings/internal/stl_type_traits.h b/absl/strings/internal/stl_type_traits.h index 6035ca45..e50468b0 100644 --- a/absl/strings/internal/stl_type_traits.h +++ b/absl/strings/internal/stl_type_traits.h @@ -13,7 +13,7 @@ // limitations under the License. // -// Thie file provides the IsStrictlyBaseOfAndConvertibleToSTLContainer type +// The file provides the IsStrictlyBaseOfAndConvertibleToSTLContainer type // trait metafunction to assist in working with the _GLIBCXX_DEBUG debug // wrappers of STL containers. // diff --git a/absl/strings/internal/str_format/arg.cc b/absl/strings/internal/str_format/arg.cc index 967fe9ca..018dd052 100644 --- a/absl/strings/internal/str_format/arg.cc +++ b/absl/strings/internal/str_format/arg.cc @@ -297,6 +297,37 @@ constexpr auto ConvertV(T) { } template <typename T> +bool ConvertFloatArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl *sink) { + if (conv.conversion_char() == FormatConversionCharInternal::v) { + conv.set_conversion_char(FormatConversionCharInternal::g); + } + + return FormatConversionCharIsFloat(conv.conversion_char()) && + ConvertFloatImpl(v, conv, sink); +} + +inline bool ConvertStringArg(string_view v, const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + if (conv.is_basic()) { + sink->Append(v); + return true; + } + return sink->PutPaddedString(v, conv.width(), conv.precision(), + conv.has_left_flag()); +} + +} // namespace + +bool ConvertBoolArg(bool v, FormatSinkImpl *sink) { + if (v) { + sink->Append("true"); + } else { + sink->Append("false"); + } + return true; +} + +template <typename T> bool ConvertIntArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl *sink) { using U = typename MakeUnsigned<T>::type; IntDigits as_digits; @@ -354,36 +385,37 @@ bool ConvertIntArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return ConvertIntImplInnerSlow(as_digits, conv, sink); } -template <typename T> -bool ConvertFloatArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl *sink) { - if (conv.conversion_char() == FormatConversionCharInternal::v) { - conv.set_conversion_char(FormatConversionCharInternal::g); - } - - return FormatConversionCharIsFloat(conv.conversion_char()) && - ConvertFloatImpl(v, conv, sink); -} - -inline bool ConvertStringArg(string_view v, const FormatConversionSpecImpl conv, - FormatSinkImpl *sink) { - if (conv.is_basic()) { - sink->Append(v); - return true; - } - return sink->PutPaddedString(v, conv.width(), conv.precision(), - conv.has_left_flag()); -} - -} // namespace - -bool ConvertBoolArg(bool v, FormatSinkImpl *sink) { - if (v) { - sink->Append("true"); - } else { - sink->Append("false"); - } - return true; -} +template bool ConvertIntArg<char>(char v, FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<signed char>(signed char v, + FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<unsigned char>(unsigned char v, + FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<short>(short v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<unsigned short>(unsigned short v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<int>(int v, FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<unsigned int>(unsigned int v, + FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<long>(long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<unsigned long>(unsigned long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<long long>(long long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<unsigned long long>(unsigned long long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl *sink); // ==================== Strings ==================== StringConvertResult FormatConvertImpl(const std::string &v, diff --git a/absl/strings/internal/str_format/arg.h b/absl/strings/internal/str_format/arg.h index b3e4ff15..e4b16628 100644 --- a/absl/strings/internal/str_format/arg.h +++ b/absl/strings/internal/str_format/arg.h @@ -18,6 +18,7 @@ #include <string.h> #include <wchar.h> +#include <algorithm> #include <cstdio> #include <iomanip> #include <limits> @@ -25,10 +26,12 @@ #include <sstream> #include <string> #include <type_traits> +#include <utility> #include "absl/base/port.h" #include "absl/meta/type_traits.h" #include "absl/numeric/int128.h" +#include "absl/strings/internal/has_absl_stringify.h" #include "absl/strings/internal/str_format/extension.h" #include "absl/strings/string_view.h" @@ -50,6 +53,19 @@ struct ArgConvertResult { bool value; }; +using IntegralConvertResult = ArgConvertResult<FormatConversionCharSetUnion( + FormatConversionCharSetInternal::c, + FormatConversionCharSetInternal::kNumeric, + FormatConversionCharSetInternal::kStar, + FormatConversionCharSetInternal::v)>; +using FloatingConvertResult = ArgConvertResult<FormatConversionCharSetUnion( + FormatConversionCharSetInternal::kFloating, + FormatConversionCharSetInternal::v)>; +using CharConvertResult = ArgConvertResult<FormatConversionCharSetUnion( + FormatConversionCharSetInternal::c, + FormatConversionCharSetInternal::kNumeric, + FormatConversionCharSetInternal::kStar)>; + template <typename T, typename = void> struct HasUserDefinedConvert : std::false_type {}; @@ -67,6 +83,44 @@ void AbslFormatConvert(); void AbslStringify(); template <typename T> +bool ConvertIntArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); + +// Forward declarations of internal `ConvertIntArg` function template +// instantiations are here to avoid including the template body in the headers +// and instantiating it in large numbers of translation units. Explicit +// instantiations can be found in "absl/strings/internal/str_format/arg.cc" +extern template bool ConvertIntArg<char>(char v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +extern template bool ConvertIntArg<signed char>(signed char v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +extern template bool ConvertIntArg<unsigned char>(unsigned char v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +extern template bool ConvertIntArg<short>(short v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +extern template bool ConvertIntArg<unsigned short>( // NOLINT + unsigned short v, FormatConversionSpecImpl conv, // NOLINT + FormatSinkImpl* sink); +extern template bool ConvertIntArg<int>(int v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +extern template bool ConvertIntArg<unsigned int>(unsigned int v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +extern template bool ConvertIntArg<long>( // NOLINT + long v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); // NOLINT +extern template bool ConvertIntArg<unsigned long>(unsigned long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +extern template bool ConvertIntArg<long long>(long long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +extern template bool ConvertIntArg<unsigned long long>( // NOLINT + unsigned long long v, FormatConversionSpecImpl conv, // NOLINT + FormatSinkImpl* sink); + +template <typename T> auto FormatConvertImpl(const T& v, FormatConversionSpecImpl conv, FormatSinkImpl* sink) -> decltype(AbslFormatConvert(v, @@ -82,10 +136,30 @@ auto FormatConvertImpl(const T& v, FormatConversionSpecImpl conv, } template <typename T> +auto FormatConvertImpl(const T& v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink) + -> std::enable_if_t<std::is_enum<T>::value && + std::is_void<decltype(AbslStringify( + std::declval<FormatSink&>(), v))>::value, + IntegralConvertResult> { + if (conv.conversion_char() == FormatConversionCharInternal::v) { + using FormatSinkT = + absl::enable_if_t<sizeof(const T& (*)()) != 0, FormatSink>; + auto fs = sink->Wrap<FormatSinkT>(); + AbslStringify(fs, v); + return {true}; + } else { + return {ConvertIntArg( + static_cast<typename std::underlying_type<T>::type>(v), conv, sink)}; + } +} + +template <typename T> auto FormatConvertImpl(const T& v, FormatConversionSpecImpl, FormatSinkImpl* sink) - -> std::enable_if_t<std::is_void<decltype(AbslStringify( - std::declval<FormatSink&>(), v))>::value, + -> std::enable_if_t<!std::is_enum<T>::value && + std::is_void<decltype(AbslStringify( + std::declval<FormatSink&>(), v))>::value, ArgConvertResult<FormatConversionCharSetInternal::v>> { using FormatSinkT = absl::enable_if_t<sizeof(const T& (*)()) != 0, FormatSink>; @@ -191,19 +265,6 @@ StringConvertResult FormatConvertImpl(const AbslCord& value, return {true}; } -using IntegralConvertResult = ArgConvertResult<FormatConversionCharSetUnion( - FormatConversionCharSetInternal::c, - FormatConversionCharSetInternal::kNumeric, - FormatConversionCharSetInternal::kStar, - FormatConversionCharSetInternal::v)>; -using FloatingConvertResult = ArgConvertResult<FormatConversionCharSetUnion( - FormatConversionCharSetInternal::kFloating, - FormatConversionCharSetInternal::v)>; -using CharConvertResult = ArgConvertResult<FormatConversionCharSetUnion( - FormatConversionCharSetInternal::c, - FormatConversionCharSetInternal::kNumeric, - FormatConversionCharSetInternal::kStar)>; - bool ConvertBoolArg(bool v, FormatSinkImpl* sink); // Floats. @@ -271,7 +332,8 @@ IntegralConvertResult FormatConvertImpl(T v, FormatConversionSpecImpl conv, // FormatArgImpl will use the underlying Convert functions instead. template <typename T> typename std::enable_if<std::is_enum<T>::value && - !HasUserDefinedConvert<T>::value, + !HasUserDefinedConvert<T>::value && + !strings_internal::HasAbslStringify<T>::value, IntegralConvertResult>::type FormatConvertImpl(T v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); @@ -384,7 +446,8 @@ class FormatArgImpl { template <typename T, typename = void> struct DecayType { static constexpr bool kHasUserDefined = - str_format_internal::HasUserDefinedConvert<T>::value; + str_format_internal::HasUserDefinedConvert<T>::value || + strings_internal::HasAbslStringify<T>::value; using type = typename std::conditional< !kHasUserDefined && std::is_convertible<T, const char*>::value, const char*, @@ -396,6 +459,7 @@ class FormatArgImpl { struct DecayType<T, typename std::enable_if< !str_format_internal::HasUserDefinedConvert<T>::value && + !strings_internal::HasAbslStringify<T>::value && std::is_enum<T>::value>::type> { using type = typename std::underlying_type<T>::type; }; diff --git a/absl/strings/internal/str_format/checker.h b/absl/strings/internal/str_format/checker.h index aeb9d48d..eab6ab9d 100644 --- a/absl/strings/internal/str_format/checker.h +++ b/absl/strings/internal/str_format/checker.h @@ -15,8 +15,11 @@ #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_ #define ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_ +#include <algorithm> + #include "absl/base/attributes.h" #include "absl/strings/internal/str_format/arg.h" +#include "absl/strings/internal/str_format/constexpr_parser.h" #include "absl/strings/internal/str_format/extension.h" // Compile time check support for entry points. @@ -36,333 +39,56 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { -constexpr bool AllOf() { return true; } - -template <typename... T> -constexpr bool AllOf(bool b, T... t) { - return b && AllOf(t...); -} - #ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER -constexpr bool ContainsChar(const char* chars, char c) { - return *chars == c || (*chars && ContainsChar(chars + 1, c)); -} - -// A constexpr compatible list of Convs. -struct ConvList { - const FormatConversionCharSet* array; - int count; - - // We do the bound check here to avoid having to do it on the callers. - // Returning an empty FormatConversionCharSet has the same effect as - // short circuiting because it will never match any conversion. - constexpr FormatConversionCharSet operator[](int i) const { - return i < count ? array[i] : FormatConversionCharSet{}; - } - - constexpr ConvList without_front() const { - return count != 0 ? ConvList{array + 1, count - 1} : *this; - } -}; - -template <size_t count> -struct ConvListT { - // Make sure the array has size > 0. - FormatConversionCharSet list[count ? count : 1]; -}; - -constexpr char GetChar(string_view str, size_t index) { - return index < str.size() ? str[index] : char{}; -} - -constexpr string_view ConsumeFront(string_view str, size_t len = 1) { - return len <= str.size() ? string_view(str.data() + len, str.size() - len) - : string_view(); -} - -constexpr string_view ConsumeAnyOf(string_view format, const char* chars) { - while (ContainsChar(chars, GetChar(format, 0))) { - format = ConsumeFront(format); - } - return format; -} - -constexpr bool IsDigit(char c) { return c >= '0' && c <= '9'; } - -// Helper class for the ParseDigits function. -// It encapsulates the two return values we need there. -struct Integer { - string_view format; - int value; - - // If the next character is a '$', consume it. - // Otherwise, make `this` an invalid positional argument. - constexpr Integer ConsumePositionalDollar() const { - if (GetChar(format, 0) == '$') { - return Integer{ConsumeFront(format), value}; - } else { - return Integer{format, 0}; - } - } -}; - -constexpr Integer ParseDigits(string_view format) { - int value = 0; - while (IsDigit(GetChar(format, 0))) { - value = 10 * value + GetChar(format, 0) - '0'; - format = ConsumeFront(format); - } - - return Integer{format, value}; -} - -// Parse digits for a positional argument. -// The parsing also consumes the '$'. -constexpr Integer ParsePositional(string_view format) { - return ParseDigits(format).ConsumePositionalDollar(); -} - -// Parses a single conversion specifier. -// See ConvParser::Run() for post conditions. -class ConvParser { - constexpr ConvParser SetFormat(string_view format) const { - return ConvParser(format, args_, error_, arg_position_, is_positional_); - } - - constexpr ConvParser SetArgs(ConvList args) const { - return ConvParser(format_, args, error_, arg_position_, is_positional_); - } - - constexpr ConvParser SetError(bool error) const { - return ConvParser(format_, args_, error_ || error, arg_position_, - is_positional_); - } - - constexpr ConvParser SetArgPosition(int arg_position) const { - return ConvParser(format_, args_, error_, arg_position, is_positional_); - } - - // Consumes the next arg and verifies that it matches `conv`. - // `error_` is set if there is no next arg or if it doesn't match `conv`. - constexpr ConvParser ConsumeNextArg(char conv) const { - return SetArgs(args_.without_front()).SetError(!Contains(args_[0], conv)); - } - - // Verify that positional argument `i.value` matches `conv`. - // `error_` is set if `i.value` is not a valid argument or if it doesn't - // match. - constexpr ConvParser VerifyPositional(Integer i, char conv) const { - return SetFormat(i.format).SetError(!Contains(args_[i.value - 1], conv)); - } - - // Parse the position of the arg and store it in `arg_position_`. - constexpr ConvParser ParseArgPosition(Integer arg) const { - return SetFormat(arg.format).SetArgPosition(arg.value); - } - - // Consume the flags. - constexpr ConvParser ParseFlags() const { - return SetFormat(ConsumeAnyOf(format_, "-+ #0")); - } - - // Consume the width. - // If it is '*', we verify that it matches `args_`. `error_` is set if it - // doesn't match. - constexpr ConvParser ParseWidth() const { - char first_char = GetChar(format_, 0); - - if (IsDigit(first_char)) { - return SetFormat(ParseDigits(format_).format); - } else if (first_char == '*') { - if (is_positional_) { - return VerifyPositional(ParsePositional(ConsumeFront(format_)), '*'); - } else { - return SetFormat(ConsumeFront(format_)).ConsumeNextArg('*'); - } - } else { - return *this; +template <FormatConversionCharSet... C> +constexpr bool ValidFormatImpl(string_view format) { + int next_arg = 0; + const char* p = format.data(); + const char* const end = p + format.size(); + constexpr FormatConversionCharSet + kAllowedConvs[(std::max)(sizeof...(C), size_t{1})] = {C...}; + bool used[(std::max)(sizeof...(C), size_t{1})]{}; + constexpr int kNumArgs = sizeof...(C); + while (p != end) { + while (p != end && *p != '%') ++p; + if (p == end) { + break; } - } - - // Consume the precision. - // If it is '*', we verify that it matches `args_`. `error_` is set if it - // doesn't match. - constexpr ConvParser ParsePrecision() const { - if (GetChar(format_, 0) != '.') { - return *this; - } else if (GetChar(format_, 1) == '*') { - if (is_positional_) { - return VerifyPositional(ParsePositional(ConsumeFront(format_, 2)), '*'); - } else { - return SetFormat(ConsumeFront(format_, 2)).ConsumeNextArg('*'); - } - } else { - return SetFormat(ParseDigits(ConsumeFront(format_)).format); + if (p + 1 >= end) return false; + if (p[1] == '%') { + // %% + p += 2; + continue; } - } - - // Consume the length characters. - constexpr ConvParser ParseLength() const { - return SetFormat(ConsumeAnyOf(format_, "lLhjztq")); - } - - // Consume the conversion character and verify that it matches `args_`. - // `error_` is set if it doesn't match. - constexpr ConvParser ParseConversion() const { - char first_char = GetChar(format_, 0); - if (first_char == 'v' && *(format_.data() - 1) != '%') { - return SetError(true); + UnboundConversion conv(absl::kConstInit); + p = ConsumeUnboundConversion(p + 1, end, &conv, &next_arg); + if (p == nullptr) return false; + if (conv.arg_position <= 0 || conv.arg_position > kNumArgs) { + return false; } - - if (is_positional_) { - return VerifyPositional({ConsumeFront(format_), arg_position_}, - first_char); - } else { - return ConsumeNextArg(first_char).SetFormat(ConsumeFront(format_)); + if (!Contains(kAllowedConvs[conv.arg_position - 1], conv.conv)) { + return false; } - } - - constexpr ConvParser(string_view format, ConvList args, bool error, - int arg_position, bool is_positional) - : format_(format), - args_(args), - error_(error), - arg_position_(arg_position), - is_positional_(is_positional) {} - - public: - constexpr ConvParser(string_view format, ConvList args, bool is_positional) - : format_(format), - args_(args), - error_(false), - arg_position_(0), - is_positional_(is_positional) {} - - // Consume the whole conversion specifier. - // `format()` will be set to the character after the conversion character. - // `error()` will be set if any of the arguments do not match. - constexpr ConvParser Run() const { - ConvParser parser = *this; - - if (is_positional_) { - parser = ParseArgPosition(ParsePositional(format_)); - } - - return parser.ParseFlags() - .ParseWidth() - .ParsePrecision() - .ParseLength() - .ParseConversion(); - } - - constexpr string_view format() const { return format_; } - constexpr ConvList args() const { return args_; } - constexpr bool error() const { return error_; } - constexpr bool is_positional() const { return is_positional_; } - - private: - string_view format_; - // Current list of arguments. If we are not in positional mode we will consume - // from the front. - ConvList args_; - bool error_; - // Holds the argument position of the conversion character, if we are in - // positional mode. Otherwise, it is unspecified. - int arg_position_; - // Whether we are in positional mode. - // It changes the behavior of '*' and where to find the converted argument. - bool is_positional_; -}; - -// Parses a whole format expression. -// See FormatParser::Run(). -class FormatParser { - static constexpr bool FoundPercent(string_view format) { - return format.empty() || - (GetChar(format, 0) == '%' && GetChar(format, 1) != '%'); - } - - // We use an inner function to increase the recursion limit. - // The inner function consumes up to `limit` characters on every run. - // This increases the limit from 512 to ~512*limit. - static constexpr string_view ConsumeNonPercentInner(string_view format) { - int limit = 20; - while (!FoundPercent(format) && limit != 0) { - size_t len = 0; - - if (GetChar(format, 0) == '%' && GetChar(format, 1) == '%') { - len = 2; - } else { - len = 1; + used[conv.arg_position - 1] = true; + for (auto extra : {conv.width, conv.precision}) { + if (extra.is_from_arg()) { + int pos = extra.get_from_arg(); + if (pos <= 0 || pos > kNumArgs) return false; + used[pos - 1] = true; + if (!Contains(kAllowedConvs[pos - 1], '*')) { + return false; + } } - - format = ConsumeFront(format, len); - --limit; } - - return format; } - - // Consume characters until the next conversion spec %. - // It skips %%. - static constexpr string_view ConsumeNonPercent(string_view format) { - while (!FoundPercent(format)) { - format = ConsumeNonPercentInner(format); + if (sizeof...(C) != 0) { + for (bool b : used) { + if (!b) return false; } - - return format; - } - - static constexpr bool IsPositional(string_view format) { - while (IsDigit(GetChar(format, 0))) { - format = ConsumeFront(format); - } - - return GetChar(format, 0) == '$'; } - - constexpr bool RunImpl(bool is_positional) const { - // In non-positional mode we require all arguments to be consumed. - // In positional mode just reaching the end of the format without errors is - // enough. - return (format_.empty() && (is_positional || args_.count == 0)) || - (!format_.empty() && - ValidateArg( - ConvParser(ConsumeFront(format_), args_, is_positional).Run())); - } - - constexpr bool ValidateArg(ConvParser conv) const { - return !conv.error() && FormatParser(conv.format(), conv.args()) - .RunImpl(conv.is_positional()); - } - - public: - constexpr FormatParser(string_view format, ConvList args) - : format_(ConsumeNonPercent(format)), args_(args) {} - - // Runs the parser for `format` and `args`. - // It verifies that the format is valid and that all conversion specifiers - // match the arguments passed. - // In non-positional mode it also verfies that all arguments are consumed. - constexpr bool Run() const { - return RunImpl(!format_.empty() && IsPositional(ConsumeFront(format_))); - } - - private: - string_view format_; - // Current list of arguments. - // If we are not in positional mode we will consume from the front and will - // have to be empty in the end. - ConvList args_; -}; - -template <FormatConversionCharSet... C> -constexpr bool ValidFormatImpl(string_view format) { - return FormatParser(format, - {ConvListT<sizeof...(C)>{{C...}}.list, sizeof...(C)}) - .Run(); + return true; } #endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER diff --git a/absl/strings/internal/str_format/checker_test.cc b/absl/strings/internal/str_format/checker_test.cc index 680517f7..a86bed38 100644 --- a/absl/strings/internal/str_format/checker_test.cc +++ b/absl/strings/internal/str_format/checker_test.cc @@ -93,6 +93,7 @@ TEST(StrFormatChecker, ValidFormat) { ValidFormat<void (*)(), volatile int*>("%p %p"), // ValidFormat<string_view, const char*, double, void*>( "string_view=%s const char*=%s double=%f void*=%p)"), + ValidFormat<int>("%v"), // ValidFormat<int>("%% %1$d"), // ValidFormat<int>("%1$ld"), // @@ -109,7 +110,9 @@ TEST(StrFormatChecker, ValidFormat) { ValidFormat<int, double>("%2$.*1$f"), // ValidFormat<void*, string_view, const char*, double>( "string_view=%2$s const char*=%3$s double=%4$f void*=%1$p " - "repeat=%3$s)")}; + "repeat=%3$s)"), + ValidFormat<std::string>("%1$v"), + }; for (Case c : trues) { EXPECT_TRUE(c.result) << c.format; @@ -130,6 +133,8 @@ TEST(StrFormatChecker, ValidFormat) { ValidFormat<int>("%*d"), // ValidFormat<std::string>("%p"), // ValidFormat<int (*)(int)>("%d"), // + ValidFormat<int>("%1v"), // + ValidFormat<int>("%.1v"), // ValidFormat<>("%3$d"), // ValidFormat<>("%1$r"), // @@ -138,13 +143,14 @@ TEST(StrFormatChecker, ValidFormat) { ValidFormat<int>("%1$*2$1d"), // ValidFormat<int>("%1$1-d"), // ValidFormat<std::string, int>("%2$*1$s"), // - ValidFormat<std::string>("%1$p"), + ValidFormat<std::string>("%1$p"), // + ValidFormat<int>("%1$*2$v"), // ValidFormat<int, int>("%d %2$d"), // }; for (Case c : falses) { - EXPECT_FALSE(c.result) << c.format; + EXPECT_FALSE(c.result) << "format<" << c.format << ">"; } } diff --git a/absl/strings/internal/str_format/constexpr_parser.h b/absl/strings/internal/str_format/constexpr_parser.h new file mode 100644 index 00000000..3dc1776b --- /dev/null +++ b/absl/strings/internal/str_format/constexpr_parser.h @@ -0,0 +1,351 @@ +// Copyright 2022 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_ + +#include <cassert> +#include <cstdint> +#include <limits> + +#include "absl/base/const_init.h" +#include "absl/strings/internal/str_format/extension.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +enum class LengthMod : std::uint8_t { h, hh, l, ll, L, j, z, t, q, none }; + +// The analyzed properties of a single specified conversion. +struct UnboundConversion { + // This is a user defined default constructor on purpose to skip the + // initialization of parts of the object that are not necessary. + UnboundConversion() {} // NOLINT + + // This constructor is provided for the static checker. We don't want to do + // the unnecessary initialization in the normal case. + explicit constexpr UnboundConversion(absl::ConstInitType) + : arg_position{}, width{}, precision{} {} + + class InputValue { + public: + constexpr void set_value(int value) { + assert(value >= 0); + value_ = value; + } + constexpr int value() const { return value_; } + + // Marks the value as "from arg". aka the '*' format. + // Requires `value >= 1`. + // When set, is_from_arg() return true and get_from_arg() returns the + // original value. + // `value()`'s return value is unspecified in this state. + constexpr void set_from_arg(int value) { + assert(value > 0); + value_ = -value - 1; + } + constexpr bool is_from_arg() const { return value_ < -1; } + constexpr int get_from_arg() const { + assert(is_from_arg()); + return -value_ - 1; + } + + private: + int value_ = -1; + }; + + // No need to initialize. It will always be set in the parser. + int arg_position; + + InputValue width; + InputValue precision; + + Flags flags = Flags::kBasic; + LengthMod length_mod = LengthMod::none; + FormatConversionChar conv = FormatConversionCharInternal::kNone; +}; + +// Helper tag class for the table below. +// It allows fast `char -> ConversionChar/LengthMod/Flags` checking and +// conversions. +class ConvTag { + public: + constexpr ConvTag(FormatConversionChar conversion_char) // NOLINT + : tag_(static_cast<uint8_t>(conversion_char)) {} + constexpr ConvTag(LengthMod length_mod) // NOLINT + : tag_(0x80 | static_cast<uint8_t>(length_mod)) {} + constexpr ConvTag(Flags flags) // NOLINT + : tag_(0xc0 | static_cast<uint8_t>(flags)) {} + constexpr ConvTag() : tag_(0xFF) {} + + constexpr bool is_conv() const { return (tag_ & 0x80) == 0; } + constexpr bool is_length() const { return (tag_ & 0xC0) == 0x80; } + constexpr bool is_flags() const { return (tag_ & 0xE0) == 0xC0; } + + constexpr FormatConversionChar as_conv() const { + assert(is_conv()); + assert(!is_length()); + assert(!is_flags()); + return static_cast<FormatConversionChar>(tag_); + } + constexpr LengthMod as_length() const { + assert(!is_conv()); + assert(is_length()); + assert(!is_flags()); + return static_cast<LengthMod>(tag_ & 0x3F); + } + constexpr Flags as_flags() const { + assert(!is_conv()); + assert(!is_length()); + assert(is_flags()); + return static_cast<Flags>(tag_ & 0x1F); + } + + private: + uint8_t tag_; +}; + +struct ConvTagHolder { + using CC = FormatConversionCharInternal; + using LM = LengthMod; + + // Abbreviations to fit in the table below. + static constexpr auto kFSign = Flags::kSignCol; + static constexpr auto kFAlt = Flags::kAlt; + static constexpr auto kFPos = Flags::kShowPos; + static constexpr auto kFLeft = Flags::kLeft; + static constexpr auto kFZero = Flags::kZero; + + static constexpr ConvTag value[256] = { + {}, {}, {}, {}, {}, {}, {}, {}, // 00-07 + {}, {}, {}, {}, {}, {}, {}, {}, // 08-0f + {}, {}, {}, {}, {}, {}, {}, {}, // 10-17 + {}, {}, {}, {}, {}, {}, {}, {}, // 18-1f + kFSign, {}, {}, kFAlt, {}, {}, {}, {}, // !"#$%&' + {}, {}, {}, kFPos, {}, kFLeft, {}, {}, // ()*+,-./ + kFZero, {}, {}, {}, {}, {}, {}, {}, // 01234567 + {}, {}, {}, {}, {}, {}, {}, {}, // 89:;<=>? + {}, CC::A, {}, {}, {}, CC::E, CC::F, CC::G, // @ABCDEFG + {}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO + {}, {}, {}, {}, {}, {}, {}, {}, // PQRSTUVW + CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_ + {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg + LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno + CC::p, LM::q, {}, CC::s, LM::t, CC::u, CC::v, {}, // pqrstuvw + CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}! + {}, {}, {}, {}, {}, {}, {}, {}, // 80-87 + {}, {}, {}, {}, {}, {}, {}, {}, // 88-8f + {}, {}, {}, {}, {}, {}, {}, {}, // 90-97 + {}, {}, {}, {}, {}, {}, {}, {}, // 98-9f + {}, {}, {}, {}, {}, {}, {}, {}, // a0-a7 + {}, {}, {}, {}, {}, {}, {}, {}, // a8-af + {}, {}, {}, {}, {}, {}, {}, {}, // b0-b7 + {}, {}, {}, {}, {}, {}, {}, {}, // b8-bf + {}, {}, {}, {}, {}, {}, {}, {}, // c0-c7 + {}, {}, {}, {}, {}, {}, {}, {}, // c8-cf + {}, {}, {}, {}, {}, {}, {}, {}, // d0-d7 + {}, {}, {}, {}, {}, {}, {}, {}, // d8-df + {}, {}, {}, {}, {}, {}, {}, {}, // e0-e7 + {}, {}, {}, {}, {}, {}, {}, {}, // e8-ef + {}, {}, {}, {}, {}, {}, {}, {}, // f0-f7 + {}, {}, {}, {}, {}, {}, {}, {}, // f8-ff + }; +}; + +// Keep a single table for all the conversion chars and length modifiers. +constexpr ConvTag GetTagForChar(char c) { + return ConvTagHolder::value[static_cast<unsigned char>(c)]; +} + +constexpr bool CheckFastPathSetting(const UnboundConversion& conv) { + bool width_precision_needed = + conv.width.value() >= 0 || conv.precision.value() >= 0; + if (width_precision_needed && conv.flags == Flags::kBasic) { +#if defined(__clang__) + // Some compilers complain about this in constexpr even when not executed, + // so only enable the error dump in clang. + fprintf(stderr, + "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d " + "width=%d precision=%d\n", + conv.flags == Flags::kBasic ? 1 : 0, + FlagsContains(conv.flags, Flags::kLeft) ? 1 : 0, + FlagsContains(conv.flags, Flags::kShowPos) ? 1 : 0, + FlagsContains(conv.flags, Flags::kSignCol) ? 1 : 0, + FlagsContains(conv.flags, Flags::kAlt) ? 1 : 0, + FlagsContains(conv.flags, Flags::kZero) ? 1 : 0, conv.width.value(), + conv.precision.value()); +#endif // defined(__clang__) + return false; + } + return true; +} + +constexpr int ParseDigits(char& c, const char*& pos, const char* const end) { + int digits = c - '0'; + // We do not want to overflow `digits` so we consume at most digits10 + // digits. If there are more digits the parsing will fail later on when the + // digit doesn't match the expected characters. + int num_digits = std::numeric_limits<int>::digits10; + for (;;) { + if (ABSL_PREDICT_FALSE(pos == end)) break; + c = *pos++; + if ('0' > c || c > '9') break; + --num_digits; + if (ABSL_PREDICT_FALSE(!num_digits)) break; + digits = 10 * digits + c - '0'; + } + return digits; +} + +template <bool is_positional> +constexpr const char* ConsumeConversion(const char* pos, const char* const end, + UnboundConversion* conv, + int* next_arg) { + const char* const original_pos = pos; + char c = 0; + // Read the next char into `c` and update `pos`. Returns false if there are + // no more chars to read. +#define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \ + do { \ + if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \ + c = *pos++; \ + } while (0) + + if (is_positional) { + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; + conv->arg_position = ParseDigits(c, pos, end); + assert(conv->arg_position > 0); + if (ABSL_PREDICT_FALSE(c != '$')) return nullptr; + } + + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + + // We should start with the basic flag on. + assert(conv->flags == Flags::kBasic); + + // Any non alpha character makes this conversion not basic. + // This includes flags (-+ #0), width (1-9, *) or precision (.). + // All conversion characters and length modifiers are alpha characters. + if (c < 'A') { + while (c <= '0') { + auto tag = GetTagForChar(c); + if (tag.is_flags()) { + conv->flags = conv->flags | tag.as_flags(); + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else { + break; + } + } + + if (c <= '9') { + if (c >= '0') { + int maybe_width = ParseDigits(c, pos, end); + if (!is_positional && c == '$') { + if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr; + // Positional conversion. + *next_arg = -1; + return ConsumeConversion<true>(original_pos, end, conv, next_arg); + } + conv->flags = conv->flags | Flags::kNonBasic; + conv->width.set_value(maybe_width); + } else if (c == '*') { + conv->flags = conv->flags | Flags::kNonBasic; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if (is_positional) { + if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; + conv->width.set_from_arg(ParseDigits(c, pos, end)); + if (ABSL_PREDICT_FALSE(c != '$')) return nullptr; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else { + conv->width.set_from_arg(++*next_arg); + } + } + } + + if (c == '.') { + conv->flags = conv->flags | Flags::kNonBasic; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if ('0' <= c && c <= '9') { + conv->precision.set_value(ParseDigits(c, pos, end)); + } else if (c == '*') { + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if (is_positional) { + if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; + conv->precision.set_from_arg(ParseDigits(c, pos, end)); + if (c != '$') return nullptr; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else { + conv->precision.set_from_arg(++*next_arg); + } + } else { + conv->precision.set_value(0); + } + } + } + + auto tag = GetTagForChar(c); + + if (ABSL_PREDICT_FALSE(c == 'v' && conv->flags != Flags::kBasic)) { + return nullptr; + } + + if (ABSL_PREDICT_FALSE(!tag.is_conv())) { + if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr; + + // It is a length modifier. + using str_format_internal::LengthMod; + LengthMod length_mod = tag.as_length(); + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if (c == 'h' && length_mod == LengthMod::h) { + conv->length_mod = LengthMod::hh; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else if (c == 'l' && length_mod == LengthMod::l) { + conv->length_mod = LengthMod::ll; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else { + conv->length_mod = length_mod; + } + tag = GetTagForChar(c); + + if (ABSL_PREDICT_FALSE(c == 'v')) return nullptr; + if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr; + } + + assert(CheckFastPathSetting(*conv)); + (void)(&CheckFastPathSetting); + + conv->conv = tag.as_conv(); + if (!is_positional) conv->arg_position = ++*next_arg; + return pos; +} + +// Consume conversion spec prefix (not including '%') of [p, end) if valid. +// Examples of valid specs would be e.g.: "s", "d", "-12.6f". +// If valid, it returns the first character following the conversion spec, +// and the spec part is broken down and returned in 'conv'. +// If invalid, returns nullptr. +constexpr const char* ConsumeUnboundConversion(const char* p, const char* end, + UnboundConversion* conv, + int* next_arg) { + if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg); + return ConsumeConversion<false>(p, end, conv, next_arg); +} + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_ diff --git a/absl/strings/internal/str_format/convert_test.cc b/absl/strings/internal/str_format/convert_test.cc index 300612b7..8b5a27ed 100644 --- a/absl/strings/internal/str_format/convert_test.cc +++ b/absl/strings/internal/str_format/convert_test.cc @@ -1241,9 +1241,9 @@ TEST_F(FormatConvertTest, GlibcHasCorrectTraits) { const NativePrintfTraits &native_traits = VerifyNativeImplementation(); // If one of the following tests break then it is either because the above PP // macro guards failed to exclude a new platform (likely) or because something - // has changed in the implemention of glibc sprintf float formatting behavior. - // If the latter, then the code that computes these flags needs to be - // revisited and/or possibly the StrFormat implementation. + // has changed in the implementation of glibc sprintf float formatting + // behavior. If the latter, then the code that computes these flags needs to + // be revisited and/or possibly the StrFormat implementation. EXPECT_TRUE(native_traits.hex_float_has_glibc_rounding); EXPECT_TRUE(native_traits.hex_float_prefers_denormal_repr); EXPECT_TRUE( diff --git a/absl/strings/internal/str_format/extension.h b/absl/strings/internal/str_format/extension.h index 603bd49d..8de42d2c 100644 --- a/absl/strings/internal/str_format/extension.h +++ b/absl/strings/internal/str_format/extension.h @@ -273,7 +273,7 @@ struct FormatConversionSpecImplFriend; class FormatConversionSpecImpl { public: - // Width and precison are not specified, no flags are set. + // Width and precision are not specified, no flags are set. bool is_basic() const { return flags_ == Flags::kBasic; } bool has_left_flag() const { return FlagsContains(flags_, Flags::kLeft); } bool has_show_pos_flag() const { diff --git a/absl/strings/internal/str_format/float_conversion.cc b/absl/strings/internal/str_format/float_conversion.cc index 8e497852..8edf520d 100644 --- a/absl/strings/internal/str_format/float_conversion.cc +++ b/absl/strings/internal/str_format/float_conversion.cc @@ -711,12 +711,12 @@ bool IncrementNibble(size_t nibble_index, Int* n) { constexpr size_t kShift = sizeof(Int) * 8 - 1; constexpr size_t kNumNibbles = sizeof(Int) * 8 / 4; Int before = *n >> kShift; - // Here we essentially want to take the number 1 and move it into the requsted - // nibble, then add it to *n to effectively increment the nibble. However, - // ASan will complain if we try to shift the 1 beyond the limits of the Int, - // i.e., if the nibble_index is out of range. So therefore we check for this - // and if we are out of range we just add 0 which leaves *n unchanged, which - // seems like the reasonable thing to do in that case. + // Here we essentially want to take the number 1 and move it into the + // requested nibble, then add it to *n to effectively increment the nibble. + // However, ASan will complain if we try to shift the 1 beyond the limits of + // the Int, i.e., if the nibble_index is out of range. So therefore we check + // for this and if we are out of range we just add 0 which leaves *n + // unchanged, which seems like the reasonable thing to do in that case. *n += ((nibble_index >= kNumNibbles) ? 0 : (Int{1} << static_cast<int>(nibble_index * 4))); @@ -937,7 +937,7 @@ void FormatA(const HexFloatTypeParams float_traits, Int mantissa, int exp, // =============== Exponent ================== constexpr size_t kBufSizeForExpDecRepr = - numbers_internal::kFastToBufferSize // requred for FastIntToBuffer + numbers_internal::kFastToBufferSize // required for FastIntToBuffer + 1 // 'p' or 'P' + 1; // '+' or '-' char exp_buffer[kBufSizeForExpDecRepr]; @@ -1015,7 +1015,7 @@ struct Buffer { --end; } - char &back() { + char &back() const { assert(begin < end); return end[-1]; } @@ -1102,7 +1102,7 @@ void PrintExponent(int exp, char e, Buffer *out) { template <typename Float, typename Int> constexpr bool CanFitMantissa() { return -#if defined(__clang__) && !defined(__SSE3__) +#if defined(__clang__) && (__clang_major__ < 9) && !defined(__SSE3__) // Workaround for clang bug: https://bugs.llvm.org/show_bug.cgi?id=38289 // Casting from long double to uint64_t is miscompiled and drops bits. (!std::is_same<Float, long double>::value || diff --git a/absl/strings/internal/str_format/parser.cc b/absl/strings/internal/str_format/parser.cc index f9bb6615..5aaab698 100644 --- a/absl/strings/internal/str_format/parser.cc +++ b/absl/strings/internal/str_format/parser.cc @@ -31,211 +31,14 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { -using CC = FormatConversionCharInternal; -using LM = LengthMod; +// Define the array for non-constexpr uses. +constexpr ConvTag ConvTagHolder::value[256]; -// Abbreviations to fit in the table below. -constexpr auto f_sign = Flags::kSignCol; -constexpr auto f_alt = Flags::kAlt; -constexpr auto f_pos = Flags::kShowPos; -constexpr auto f_left = Flags::kLeft; -constexpr auto f_zero = Flags::kZero; - -ABSL_CONST_INIT const ConvTag kTags[256] = { - {}, {}, {}, {}, {}, {}, {}, {}, // 00-07 - {}, {}, {}, {}, {}, {}, {}, {}, // 08-0f - {}, {}, {}, {}, {}, {}, {}, {}, // 10-17 - {}, {}, {}, {}, {}, {}, {}, {}, // 18-1f - f_sign, {}, {}, f_alt, {}, {}, {}, {}, // !"#$%&' - {}, {}, {}, f_pos, {}, f_left, {}, {}, // ()*+,-./ - f_zero, {}, {}, {}, {}, {}, {}, {}, // 01234567 - {}, {}, {}, {}, {}, {}, {}, {}, // 89:;<=>? - {}, CC::A, {}, {}, {}, CC::E, CC::F, CC::G, // @ABCDEFG - {}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO - {}, {}, {}, {}, {}, {}, {}, {}, // PQRSTUVW - CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_ - {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg - LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno - CC::p, LM::q, {}, CC::s, LM::t, CC::u, CC::v, {}, // pqrstuvw - CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}! - {}, {}, {}, {}, {}, {}, {}, {}, // 80-87 - {}, {}, {}, {}, {}, {}, {}, {}, // 88-8f - {}, {}, {}, {}, {}, {}, {}, {}, // 90-97 - {}, {}, {}, {}, {}, {}, {}, {}, // 98-9f - {}, {}, {}, {}, {}, {}, {}, {}, // a0-a7 - {}, {}, {}, {}, {}, {}, {}, {}, // a8-af - {}, {}, {}, {}, {}, {}, {}, {}, // b0-b7 - {}, {}, {}, {}, {}, {}, {}, {}, // b8-bf - {}, {}, {}, {}, {}, {}, {}, {}, // c0-c7 - {}, {}, {}, {}, {}, {}, {}, {}, // c8-cf - {}, {}, {}, {}, {}, {}, {}, {}, // d0-d7 - {}, {}, {}, {}, {}, {}, {}, {}, // d8-df - {}, {}, {}, {}, {}, {}, {}, {}, // e0-e7 - {}, {}, {}, {}, {}, {}, {}, {}, // e8-ef - {}, {}, {}, {}, {}, {}, {}, {}, // f0-f7 - {}, {}, {}, {}, {}, {}, {}, {}, // f8-ff -}; - -namespace { - -bool CheckFastPathSetting(const UnboundConversion& conv) { - bool width_precision_needed = - conv.width.value() >= 0 || conv.precision.value() >= 0; - if (width_precision_needed && conv.flags == Flags::kBasic) { - fprintf(stderr, - "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d " - "width=%d precision=%d\n", - conv.flags == Flags::kBasic ? 1 : 0, - FlagsContains(conv.flags, Flags::kLeft) ? 1 : 0, - FlagsContains(conv.flags, Flags::kShowPos) ? 1 : 0, - FlagsContains(conv.flags, Flags::kSignCol) ? 1 : 0, - FlagsContains(conv.flags, Flags::kAlt) ? 1 : 0, - FlagsContains(conv.flags, Flags::kZero) ? 1 : 0, conv.width.value(), - conv.precision.value()); - return false; - } - return true; -} - -template <bool is_positional> -const char *ConsumeConversion(const char *pos, const char *const end, - UnboundConversion *conv, int *next_arg) { - const char* const original_pos = pos; - char c; - // Read the next char into `c` and update `pos`. Returns false if there are - // no more chars to read. -#define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \ - do { \ - if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \ - c = *pos++; \ - } while (0) - - const auto parse_digits = [&] { - int digits = c - '0'; - // We do not want to overflow `digits` so we consume at most digits10 - // digits. If there are more digits the parsing will fail later on when the - // digit doesn't match the expected characters. - int num_digits = std::numeric_limits<int>::digits10; - for (;;) { - if (ABSL_PREDICT_FALSE(pos == end)) break; - c = *pos++; - if (!std::isdigit(c)) break; - --num_digits; - if (ABSL_PREDICT_FALSE(!num_digits)) break; - digits = 10 * digits + c - '0'; - } - return digits; - }; - - if (is_positional) { - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; - conv->arg_position = parse_digits(); - assert(conv->arg_position > 0); - if (ABSL_PREDICT_FALSE(c != '$')) return nullptr; - } - - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - - // We should start with the basic flag on. - assert(conv->flags == Flags::kBasic); - - // Any non alpha character makes this conversion not basic. - // This includes flags (-+ #0), width (1-9, *) or precision (.). - // All conversion characters and length modifiers are alpha characters. - if (c < 'A') { - while (c <= '0') { - auto tag = GetTagForChar(c); - if (tag.is_flags()) { - conv->flags = conv->flags | tag.as_flags(); - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - } else { - break; - } - } - - if (c <= '9') { - if (c >= '0') { - int maybe_width = parse_digits(); - if (!is_positional && c == '$') { - if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr; - // Positional conversion. - *next_arg = -1; - return ConsumeConversion<true>(original_pos, end, conv, next_arg); - } - conv->flags = conv->flags | Flags::kNonBasic; - conv->width.set_value(maybe_width); - } else if (c == '*') { - conv->flags = conv->flags | Flags::kNonBasic; - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - if (is_positional) { - if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; - conv->width.set_from_arg(parse_digits()); - if (ABSL_PREDICT_FALSE(c != '$')) return nullptr; - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - } else { - conv->width.set_from_arg(++*next_arg); - } - } - } - - if (c == '.') { - conv->flags = conv->flags | Flags::kNonBasic; - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - if (std::isdigit(c)) { - conv->precision.set_value(parse_digits()); - } else if (c == '*') { - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - if (is_positional) { - if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; - conv->precision.set_from_arg(parse_digits()); - if (c != '$') return nullptr; - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - } else { - conv->precision.set_from_arg(++*next_arg); - } - } else { - conv->precision.set_value(0); - } - } - } - - auto tag = GetTagForChar(c); - - if (*(pos - 1) == 'v' && *(pos - 2) != '%') { - return nullptr; - } - - if (ABSL_PREDICT_FALSE(!tag.is_conv())) { - if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr; - - // It is a length modifier. - using str_format_internal::LengthMod; - LengthMod length_mod = tag.as_length(); - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - if (c == 'h' && length_mod == LengthMod::h) { - conv->length_mod = LengthMod::hh; - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - } else if (c == 'l' && length_mod == LengthMod::l) { - conv->length_mod = LengthMod::ll; - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - } else { - conv->length_mod = length_mod; - } - tag = GetTagForChar(c); - if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr; - } - - assert(CheckFastPathSetting(*conv)); - (void)(&CheckFastPathSetting); - - conv->conv = tag.as_conv(); - if (!is_positional) conv->arg_position = ++*next_arg; - return pos; +ABSL_ATTRIBUTE_NOINLINE const char* ConsumeUnboundConversionNoInline( + const char* p, const char* end, UnboundConversion* conv, int* next_arg) { + return ConsumeUnboundConversion(p, end, conv, next_arg); } -} // namespace - std::string LengthModToString(LengthMod v) { switch (v) { case LengthMod::h: @@ -262,12 +65,6 @@ std::string LengthModToString(LengthMod v) { return ""; } -const char *ConsumeUnboundConversion(const char *p, const char *end, - UnboundConversion *conv, int *next_arg) { - if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg); - return ConsumeConversion<false>(p, end, conv, next_arg); -} - struct ParsedFormatBase::ParsedFormatConsumer { explicit ParsedFormatConsumer(ParsedFormatBase *parsedformat) : parsed(parsedformat), data_pos(parsedformat->data_.get()) {} diff --git a/absl/strings/internal/str_format/parser.h b/absl/strings/internal/str_format/parser.h index a81bac83..35b6d49c 100644 --- a/absl/strings/internal/str_format/parser.h +++ b/absl/strings/internal/str_format/parser.h @@ -29,111 +29,18 @@ #include <vector> #include "absl/strings/internal/str_format/checker.h" +#include "absl/strings/internal/str_format/constexpr_parser.h" #include "absl/strings/internal/str_format/extension.h" namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { -enum class LengthMod : std::uint8_t { h, hh, l, ll, L, j, z, t, q, none }; - std::string LengthModToString(LengthMod v); -// The analyzed properties of a single specified conversion. -struct UnboundConversion { - UnboundConversion() {} - - class InputValue { - public: - void set_value(int value) { - assert(value >= 0); - value_ = value; - } - int value() const { return value_; } - - // Marks the value as "from arg". aka the '*' format. - // Requires `value >= 1`. - // When set, is_from_arg() return true and get_from_arg() returns the - // original value. - // `value()`'s return value is unspecfied in this state. - void set_from_arg(int value) { - assert(value > 0); - value_ = -value - 1; - } - bool is_from_arg() const { return value_ < -1; } - int get_from_arg() const { - assert(is_from_arg()); - return -value_ - 1; - } - - private: - int value_ = -1; - }; - - // No need to initialize. It will always be set in the parser. - int arg_position; - - InputValue width; - InputValue precision; - - Flags flags = Flags::kBasic; - LengthMod length_mod = LengthMod::none; - FormatConversionChar conv = FormatConversionCharInternal::kNone; -}; - -// Consume conversion spec prefix (not including '%') of [p, end) if valid. -// Examples of valid specs would be e.g.: "s", "d", "-12.6f". -// If valid, it returns the first character following the conversion spec, -// and the spec part is broken down and returned in 'conv'. -// If invalid, returns nullptr. -const char* ConsumeUnboundConversion(const char* p, const char* end, - UnboundConversion* conv, int* next_arg); - -// Helper tag class for the table below. -// It allows fast `char -> ConversionChar/LengthMod/Flags` checking and -// conversions. -class ConvTag { - public: - constexpr ConvTag(FormatConversionChar conversion_char) // NOLINT - : tag_(static_cast<uint8_t>(conversion_char)) {} - constexpr ConvTag(LengthMod length_mod) // NOLINT - : tag_(0x80 | static_cast<uint8_t>(length_mod)) {} - constexpr ConvTag(Flags flags) // NOLINT - : tag_(0xc0 | static_cast<uint8_t>(flags)) {} - constexpr ConvTag() : tag_(0xFF) {} - - bool is_conv() const { return (tag_ & 0x80) == 0; } - bool is_length() const { return (tag_ & 0xC0) == 0x80; } - bool is_flags() const { return (tag_ & 0xE0) == 0xC0; } - - FormatConversionChar as_conv() const { - assert(is_conv()); - assert(!is_length()); - assert(!is_flags()); - return static_cast<FormatConversionChar>(tag_); - } - LengthMod as_length() const { - assert(!is_conv()); - assert(is_length()); - assert(!is_flags()); - return static_cast<LengthMod>(tag_ & 0x3F); - } - Flags as_flags() const { - assert(!is_conv()); - assert(!is_length()); - assert(is_flags()); - return static_cast<Flags>(tag_ & 0x1F); - } - - private: - uint8_t tag_; -}; - -extern const ConvTag kTags[256]; -// Keep a single table for all the conversion chars and length modifiers. -inline ConvTag GetTagForChar(char c) { - return kTags[static_cast<unsigned char>(c)]; -} +const char* ConsumeUnboundConversionNoInline(const char* p, const char* end, + UnboundConversion* conv, + int* next_arg); // Parse the format string provided in 'src' and pass the identified items into // 'consumer'. @@ -187,7 +94,7 @@ bool ParseFormatString(string_view src, Consumer consumer) { } } else if (percent[1] != '%') { UnboundConversion conv; - p = ConsumeUnboundConversion(percent + 1, end, &conv, &next_arg); + p = ConsumeUnboundConversionNoInline(percent + 1, end, &conv, &next_arg); if (ABSL_PREDICT_FALSE(p == nullptr)) return false; if (ABSL_PREDICT_FALSE(!consumer.ConvertOne( conv, string_view(percent + 1, diff --git a/absl/strings/internal/str_format/parser_test.cc b/absl/strings/internal/str_format/parser_test.cc index fe0d2963..021f6a87 100644 --- a/absl/strings/internal/str_format/parser_test.cc +++ b/absl/strings/internal/str_format/parser_test.cc @@ -110,10 +110,14 @@ TEST_F(ConsumeUnboundConversionTest, ConsumeSpecification) { {__LINE__, "ba", "", "ba"}, // 'b' is invalid {__LINE__, "l", "", "l" }, // just length mod isn't okay {__LINE__, "d", "d", "" }, // basic + {__LINE__, "v", "v", "" }, // basic {__LINE__, "d ", "d", " " }, // leave suffix {__LINE__, "dd", "d", "d" }, // don't be greedy {__LINE__, "d9", "d", "9" }, // leave non-space suffix {__LINE__, "dzz", "d", "zz"}, // length mod as suffix + {__LINE__, "3v", "", "3v"}, // 'v' cannot have modifiers + {__LINE__, "hv", "", "hv"}, // 'v' cannot have modifiers + {__LINE__, "1$v", "1$v", ""}, // 'v' can have use posix syntax {__LINE__, "1$*2$d", "1$*2$d", "" }, // arg indexing and * allowed. {__LINE__, "0-14.3hhd", "0-14.3hhd", ""}, // precision, width {__LINE__, " 0-+#14.3hhd", " 0-+#14.3hhd", ""}, // flags diff --git a/absl/strings/internal/stringify_sink.cc b/absl/strings/internal/stringify_sink.cc new file mode 100644 index 00000000..7c6995ab --- /dev/null +++ b/absl/strings/internal/stringify_sink.cc @@ -0,0 +1,28 @@ +// Copyright 2022 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/stringify_sink.h" +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +void StringifySink::Append(size_t count, char ch) { buffer_.append(count, ch); } + +void StringifySink::Append(string_view v) { + buffer_.append(v.data(), v.size()); +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/stringify_sink.h b/absl/strings/internal/stringify_sink.h new file mode 100644 index 00000000..fc3747bb --- /dev/null +++ b/absl/strings/internal/stringify_sink.h @@ -0,0 +1,57 @@ +// Copyright 2022 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_STRINGIFY_SINK_H_ +#define ABSL_STRINGS_INTERNAL_STRINGIFY_SINK_H_ + +#include <string> +#include <type_traits> +#include <utility> + +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +namespace strings_internal { +class StringifySink { + public: + void Append(size_t count, char ch); + + void Append(string_view v); + + // Support `absl::Format(&sink, format, args...)`. + friend void AbslFormatFlush(StringifySink* sink, absl::string_view v) { + sink->Append(v); + } + + private: + template <typename T> + friend string_view ExtractStringification(StringifySink& sink, const T& v); + + std::string buffer_; +}; + +template <typename T> +string_view ExtractStringification(StringifySink& sink, const T& v) { + AbslStringify(sink, v); + return sink.buffer_; +} + +} // namespace strings_internal + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STRINGIFY_SINK_H_ diff --git a/absl/strings/match.cc b/absl/strings/match.cc index 2d672509..b65cbc67 100644 --- a/absl/strings/match.cc +++ b/absl/strings/match.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "absl/strings/match.h" +#include "absl/strings/ascii.h" #include "absl/strings/internal/memutil.h" @@ -27,6 +28,27 @@ bool EqualsIgnoreCase(absl::string_view piece1, // memcasecmp uses absl::ascii_tolower(). } +bool StrContainsIgnoreCase(absl::string_view haystack, + absl::string_view needle) noexcept { + while (haystack.size() >= needle.size()) { + if (StartsWithIgnoreCase(haystack, needle)) return true; + haystack.remove_prefix(1); + } + return false; +} + +bool StrContainsIgnoreCase(absl::string_view haystack, + char needle) noexcept { + char upper_needle = absl::ascii_toupper(static_cast<unsigned char>(needle)); + char lower_needle = absl::ascii_tolower(static_cast<unsigned char>(needle)); + if (upper_needle == lower_needle) { + return StrContains(haystack, needle); + } else { + const char both_cstr[3] = {lower_needle, upper_needle, '\0'}; + return haystack.find_first_of(both_cstr) != absl::string_view::npos; + } +} + bool StartsWithIgnoreCase(absl::string_view text, absl::string_view prefix) noexcept { return (text.size() >= prefix.size()) && diff --git a/absl/strings/match.h b/absl/strings/match.h index 038cbb3f..1dc0beaf 100644 --- a/absl/strings/match.h +++ b/absl/strings/match.h @@ -72,6 +72,15 @@ inline bool EndsWith(absl::string_view text, memcmp(text.data() + (text.size() - suffix.size()), suffix.data(), suffix.size()) == 0); } +// StrContainsIgnoreCase() +// +// Returns whether a given ASCII string `haystack` contains the ASCII substring +// `needle`, ignoring case in the comparison. +bool StrContainsIgnoreCase(absl::string_view haystack, + absl::string_view needle) noexcept; + +bool StrContainsIgnoreCase(absl::string_view haystack, + char needle) noexcept; // EqualsIgnoreCase() // diff --git a/absl/strings/match_test.cc b/absl/strings/match_test.cc index 5841bc1b..f063b4ea 100644 --- a/absl/strings/match_test.cc +++ b/absl/strings/match_test.cc @@ -124,4 +124,48 @@ TEST(MatchTest, EndsWithIgnoreCase) { EXPECT_FALSE(absl::EndsWithIgnoreCase("", "fo")); } +TEST(MatchTest, ContainsIgnoreCase) { + EXPECT_TRUE(absl::StrContainsIgnoreCase("foo", "foo")); + EXPECT_TRUE(absl::StrContainsIgnoreCase("FOO", "Foo")); + EXPECT_TRUE(absl::StrContainsIgnoreCase("--FOO", "Foo")); + EXPECT_TRUE(absl::StrContainsIgnoreCase("FOO--", "Foo")); + EXPECT_FALSE(absl::StrContainsIgnoreCase("BAR", "Foo")); + EXPECT_FALSE(absl::StrContainsIgnoreCase("BAR", "Foo")); + EXPECT_TRUE(absl::StrContainsIgnoreCase("123456", "123456")); + EXPECT_TRUE(absl::StrContainsIgnoreCase("123456", "234")); + EXPECT_TRUE(absl::StrContainsIgnoreCase("", "")); + EXPECT_TRUE(absl::StrContainsIgnoreCase("abc", "")); + EXPECT_FALSE(absl::StrContainsIgnoreCase("", "a")); +} + +TEST(MatchTest, ContainsCharIgnoreCase) { + absl::string_view a("AaBCdefg!"); + absl::string_view b("AaBCd!"); + EXPECT_TRUE(absl::StrContainsIgnoreCase(a, 'a')); + EXPECT_TRUE(absl::StrContainsIgnoreCase(a, 'A')); + EXPECT_TRUE(absl::StrContainsIgnoreCase(a, 'b')); + EXPECT_TRUE(absl::StrContainsIgnoreCase(a, 'B')); + EXPECT_TRUE(absl::StrContainsIgnoreCase(a, 'e')); + EXPECT_TRUE(absl::StrContainsIgnoreCase(a, 'E')); + EXPECT_FALSE(absl::StrContainsIgnoreCase(a, 'h')); + EXPECT_FALSE(absl::StrContainsIgnoreCase(a, 'H')); + EXPECT_TRUE(absl::StrContainsIgnoreCase(a, '!')); + EXPECT_FALSE(absl::StrContainsIgnoreCase(a, '?')); + + EXPECT_TRUE(absl::StrContainsIgnoreCase(b, 'a')); + EXPECT_TRUE(absl::StrContainsIgnoreCase(b, 'A')); + EXPECT_TRUE(absl::StrContainsIgnoreCase(b, 'b')); + EXPECT_TRUE(absl::StrContainsIgnoreCase(b, 'B')); + EXPECT_FALSE(absl::StrContainsIgnoreCase(b, 'e')); + EXPECT_FALSE(absl::StrContainsIgnoreCase(b, 'E')); + EXPECT_FALSE(absl::StrContainsIgnoreCase(b, 'h')); + EXPECT_FALSE(absl::StrContainsIgnoreCase(b, 'H')); + EXPECT_TRUE(absl::StrContainsIgnoreCase(b, '!')); + EXPECT_FALSE(absl::StrContainsIgnoreCase(b, '?')); + + EXPECT_FALSE(absl::StrContainsIgnoreCase("", 'a')); + EXPECT_FALSE(absl::StrContainsIgnoreCase("", 'A')); + EXPECT_FALSE(absl::StrContainsIgnoreCase("", '0')); +} + } // namespace diff --git a/absl/strings/numbers.cc b/absl/strings/numbers.cc index 2987158e..c2b861ae 100644 --- a/absl/strings/numbers.cc +++ b/absl/strings/numbers.cc @@ -219,7 +219,7 @@ char* numbers_internal::FastIntToBuffer(int32_t i, char* buffer) { if (i < 0) { *buffer++ = '-'; // We need to do the negation in modular (i.e., "unsigned") - // arithmetic; MSVC++ apprently warns for plain "-u", so + // arithmetic; MSVC++ apparently warns for plain "-u", so // we write the equivalent expression "0 - u" instead. u = 0 - u; } diff --git a/absl/strings/str_cat.cc b/absl/strings/str_cat.cc index 6981347a..6c198f85 100644 --- a/absl/strings/str_cat.cc +++ b/absl/strings/str_cat.cc @@ -30,88 +30,6 @@ namespace absl { ABSL_NAMESPACE_BEGIN -namespace strings_internal { -void StringifySink::Append(size_t count, char ch) { buffer_.append(count, ch); } - -void StringifySink::Append(string_view v) { - buffer_.append(v.data(), v.size()); -} - -bool StringifySink::PutPaddedString(string_view v, int width, int precision, - bool left) { - size_t space_remaining = 0; - - if (width >= 0) space_remaining = static_cast<size_t>(width); - - size_t n = v.size(); - - if (precision >= 0) n = (std::min)(n, static_cast<size_t>(precision)); - - string_view shown(v.data(), n); - - if (shown.size() < space_remaining) { - space_remaining = space_remaining - shown.size(); - } else { - space_remaining = 0; - } - - if (!left) Append(space_remaining, ' '); - Append(shown); - if (left) Append(space_remaining, ' '); - return true; -} - -} // namespace strings_internal - -AlphaNum::AlphaNum(Hex hex) { - static_assert(numbers_internal::kFastToBufferSize >= 32, - "This function only works when output buffer >= 32 bytes long"); - char* const end = &digits_[numbers_internal::kFastToBufferSize]; - auto real_width = - absl::numbers_internal::FastHexToBufferZeroPad16(hex.value, end - 16); - if (real_width >= hex.width) { - piece_ = absl::string_view(end - real_width, real_width); - } else { - // Pad first 16 chars because FastHexToBufferZeroPad16 pads only to 16 and - // max pad width can be up to 20. - std::memset(end - 32, hex.fill, 16); - // Patch up everything else up to the real_width. - std::memset(end - real_width - 16, hex.fill, 16); - piece_ = absl::string_view(end - hex.width, hex.width); - } -} - -AlphaNum::AlphaNum(Dec dec) { - assert(dec.width <= numbers_internal::kFastToBufferSize); - char* const end = &digits_[numbers_internal::kFastToBufferSize]; - char* const minfill = end - dec.width; - char* writer = end; - uint64_t value = dec.value; - bool neg = dec.neg; - while (value > 9) { - *--writer = '0' + (value % 10); - value /= 10; - } - *--writer = '0' + static_cast<char>(value); - if (neg) *--writer = '-'; - - ptrdiff_t fillers = writer - minfill; - if (fillers > 0) { - // Tricky: if the fill character is ' ', then it's <fill><+/-><digits> - // But...: if the fill character is '0', then it's <+/-><fill><digits> - bool add_sign_again = false; - if (neg && dec.fill == '0') { // If filling with '0', - ++writer; // ignore the sign we just added - add_sign_again = true; // and re-add the sign later. - } - writer -= fillers; - std::fill_n(writer, fillers, dec.fill); - if (add_sign_again) *--writer = '-'; - } - - piece_ = absl::string_view(writer, static_cast<size_t>(end - writer)); -} - // ---------------------------------------------------------------------- // StrCat() // This merges the given strings or integers, with no delimiter. This @@ -177,12 +95,12 @@ namespace strings_internal { std::string CatPieces(std::initializer_list<absl::string_view> pieces) { std::string result; size_t total_size = 0; - for (const absl::string_view& piece : pieces) total_size += piece.size(); + for (absl::string_view piece : pieces) total_size += piece.size(); strings_internal::STLStringResizeUninitialized(&result, total_size); char* const begin = &result[0]; char* out = begin; - for (const absl::string_view& piece : pieces) { + for (absl::string_view piece : pieces) { const size_t this_size = piece.size(); if (this_size != 0) { memcpy(out, piece.data(), this_size); @@ -206,7 +124,7 @@ void AppendPieces(std::string* dest, std::initializer_list<absl::string_view> pieces) { size_t old_size = dest->size(); size_t total_size = old_size; - for (const absl::string_view& piece : pieces) { + for (absl::string_view piece : pieces) { ASSERT_NO_OVERLAP(*dest, piece); total_size += piece.size(); } @@ -214,7 +132,7 @@ void AppendPieces(std::string* dest, char* const begin = &(*dest)[0]; char* out = begin + old_size; - for (const absl::string_view& piece : pieces) { + for (absl::string_view piece : pieces) { const size_t this_size = piece.size(); if (this_size != 0) { memcpy(out, piece.data(), this_size); diff --git a/absl/strings/str_cat.h b/absl/strings/str_cat.h index 6ee88f14..fcd48c4e 100644 --- a/absl/strings/str_cat.h +++ b/absl/strings/str_cat.h @@ -48,19 +48,58 @@ // `StrCat()` or `StrAppend()`. You may specify a minimum hex field width using // a `PadSpec` enum. // +// User-defined types can be formatted with the `AbslStringify()` customization +// point. The API relies on detecting an overload in the user-defined type's +// namespace of a free (non-member) `AbslStringify()` function as a definition +// (typically declared as a friend and implemented in-line. +// with the following signature: +// +// class MyClass { ... }; +// +// template <typename Sink> +// void AbslStringify(Sink& sink, const MyClass& value); +// +// An `AbslStringify()` overload for a type should only be declared in the same +// file and namespace as said type. +// +// Note that `AbslStringify()` also supports use with `absl::StrFormat()` and +// `absl::Substitute()`. +// +// Example: +// +// struct Point { +// // To add formatting support to `Point`, we simply need to add a free +// // (non-member) function `AbslStringify()`. This method specifies how +// // Point should be printed when absl::StrCat() is called on it. You can add +// // such a free function using a friend declaration within the body of the +// // class. The sink parameter is a templated type to avoid requiring +// // dependencies. +// template <typename Sink> friend void AbslStringify(Sink& +// sink, const Point& p) { +// absl::Format(&sink, "(%v, %v)", p.x, p.y); +// } +// +// int x; +// int y; +// }; // ----------------------------------------------------------------------------- #ifndef ABSL_STRINGS_STR_CAT_H_ #define ABSL_STRINGS_STR_CAT_H_ +#include <algorithm> #include <array> #include <cstdint> +#include <cstring> #include <string> #include <type_traits> #include <utility> #include <vector> +#include "absl/base/attributes.h" #include "absl/base/port.h" +#include "absl/strings/internal/has_absl_stringify.h" +#include "absl/strings/internal/stringify_sink.h" #include "absl/strings/numbers.h" #include "absl/strings/string_view.h" @@ -77,32 +116,6 @@ struct AlphaNumBuffer { size_t size; }; -class StringifySink { - public: - void Append(size_t count, char ch); - - void Append(string_view v); - - bool PutPaddedString(string_view v, int width, int precision, bool left); - - // Support `absl::Format(&sink, format, args...)`. - friend void AbslFormatFlush(StringifySink* sink, absl::string_view v) { - sink->Append(v); - } - - template <typename T> - friend string_view ExtractStringification(StringifySink& sink, const T& v); - - private: - std::string buffer_; -}; - -template <typename T> -string_view ExtractStringification(StringifySink& sink, const T& v) { - AbslStringify(sink, v); - return sink.buffer_; -} - } // namespace strings_internal // Enum that specifies the number of significant digits to return in a `Hex` or @@ -191,6 +204,27 @@ struct Hex { explicit Hex(Pointee* v, PadSpec spec = absl::kNoPad) : Hex(spec, reinterpret_cast<uintptr_t>(v)) {} + template <typename S> + friend void AbslStringify(S& sink, Hex hex) { + static_assert( + numbers_internal::kFastToBufferSize >= 32, + "This function only works when output buffer >= 32 bytes long"); + char buffer[numbers_internal::kFastToBufferSize]; + char* const end = &buffer[numbers_internal::kFastToBufferSize]; + auto real_width = + absl::numbers_internal::FastHexToBufferZeroPad16(hex.value, end - 16); + if (real_width >= hex.width) { + sink.Append(absl::string_view(end - real_width, real_width)); + } else { + // Pad first 16 chars because FastHexToBufferZeroPad16 pads only to 16 and + // max pad width can be up to 20. + std::memset(end - 32, hex.fill, 16); + // Patch up everything else up to the real_width. + std::memset(end - real_width - 16, hex.fill, 16); + sink.Append(absl::string_view(end - hex.width, hex.width)); + } + } + private: Hex(PadSpec spec, uint64_t v) : value(v), @@ -225,6 +259,38 @@ struct Dec { : spec - absl::kZeroPad2 + 2), fill(spec >= absl::kSpacePad2 ? ' ' : '0'), neg(v < 0) {} + + template <typename S> + friend void AbslStringify(S& sink, Dec dec) { + assert(dec.width <= numbers_internal::kFastToBufferSize); + char buffer[numbers_internal::kFastToBufferSize]; + char* const end = &buffer[numbers_internal::kFastToBufferSize]; + char* const minfill = end - dec.width; + char* writer = end; + uint64_t val = dec.value; + while (val > 9) { + *--writer = '0' + (val % 10); + val /= 10; + } + *--writer = '0' + static_cast<char>(val); + if (dec.neg) *--writer = '-'; + + ptrdiff_t fillers = writer - minfill; + if (fillers > 0) { + // Tricky: if the fill character is ' ', then it's <fill><+/-><digits> + // But...: if the fill character is '0', then it's <+/-><fill><digits> + bool add_sign_again = false; + if (dec.neg && dec.fill == '0') { // If filling with '0', + ++writer; // ignore the sign we just added + add_sign_again = true; // and re-add the sign later. + } + writer -= fillers; + std::fill_n(writer, fillers, dec.fill); + if (add_sign_again) *--writer = '-'; + } + + sink.Append(absl::string_view(writer, static_cast<size_t>(end - writer))); + } }; // ----------------------------------------------------------------------------- @@ -232,17 +298,10 @@ struct Dec { // ----------------------------------------------------------------------------- // // The `AlphaNum` class acts as the main parameter type for `StrCat()` and -// `StrAppend()`, providing efficient conversion of numeric, boolean, and -// hexadecimal values (through the `Hex` type) into strings. - -template <typename T, typename = void> -struct HasAbslStringify : std::false_type {}; - -template <typename T> -struct HasAbslStringify<T, std::enable_if_t<std::is_void<decltype(AbslStringify( - std::declval<strings_internal::StringifySink&>(), - std::declval<const T&>()))>::value>> - : std::true_type {}; +// `StrAppend()`, providing efficient conversion of numeric, boolean, decimal, +// and hexadecimal values (through the `Dec` and `Hex` types) into strings. +// `AlphaNum` should only be used as a function parameter. Do not instantiate +// `AlphaNum` directly as a stack variable. class AlphaNum { public: @@ -279,28 +338,30 @@ class AlphaNum { AlphaNum(double f) // NOLINT(runtime/explicit) : piece_(digits_, numbers_internal::SixDigitsToBuffer(f, digits_)) {} - AlphaNum(Hex hex); // NOLINT(runtime/explicit) - AlphaNum(Dec dec); // NOLINT(runtime/explicit) - template <size_t size> AlphaNum( // NOLINT(runtime/explicit) - const strings_internal::AlphaNumBuffer<size>& buf) + const strings_internal::AlphaNumBuffer<size>& buf + ABSL_ATTRIBUTE_LIFETIME_BOUND) : piece_(&buf.data[0], buf.size) {} - AlphaNum(const char* c_str) // NOLINT(runtime/explicit) - : piece_(NullSafeStringView(c_str)) {} // NOLINT(runtime/explicit) - AlphaNum(absl::string_view pc) : piece_(pc) {} // NOLINT(runtime/explicit) + AlphaNum(const char* c_str // NOLINT(runtime/explicit) + ABSL_ATTRIBUTE_LIFETIME_BOUND) + : piece_(NullSafeStringView(c_str)) {} + AlphaNum(absl::string_view pc // NOLINT(runtime/explicit) + ABSL_ATTRIBUTE_LIFETIME_BOUND) + : piece_(pc) {} template <typename T, typename = typename std::enable_if< - HasAbslStringify<T>::value>::type> - AlphaNum( // NOLINT(runtime/explicit) - const T& v, // NOLINT(runtime/explicit) - strings_internal::StringifySink&& sink = {}) // NOLINT(runtime/explicit) + strings_internal::HasAbslStringify<T>::value>::type> + AlphaNum( // NOLINT(runtime/explicit) + const T& v ABSL_ATTRIBUTE_LIFETIME_BOUND, + strings_internal::StringifySink&& sink ABSL_ATTRIBUTE_LIFETIME_BOUND = {}) : piece_(strings_internal::ExtractStringification(sink, v)) {} template <typename Allocator> AlphaNum( // NOLINT(runtime/explicit) - const std::basic_string<char, std::char_traits<char>, Allocator>& str) + const std::basic_string<char, std::char_traits<char>, Allocator>& str + ABSL_ATTRIBUTE_LIFETIME_BOUND) : piece_(str) {} // Use string literals ":" instead of character literals ':'. @@ -317,7 +378,8 @@ class AlphaNum { // This overload matches only scoped enums. template <typename T, typename = typename std::enable_if< - std::is_enum<T>{} && !std::is_convertible<T, int>{}>::type> + std::is_enum<T>{} && !std::is_convertible<T, int>{} && + !strings_internal::HasAbslStringify<T>::value>::type> AlphaNum(T e) // NOLINT(runtime/explicit) : AlphaNum(static_cast<typename std::underlying_type<T>::type>(e)) {} diff --git a/absl/strings/str_cat_test.cc b/absl/strings/str_cat_test.cc index 1b3b7ece..2d74245e 100644 --- a/absl/strings/str_cat_test.cc +++ b/absl/strings/str_cat_test.cc @@ -443,7 +443,7 @@ TEST(StrCat, AvoidsMemcpyWithNullptr) { EXPECT_EQ(result, "12345"); } -#ifdef GTEST_HAS_DEATH_TEST +#if GTEST_HAS_DEATH_TEST TEST(StrAppend, Death) { std::string s = "self"; // on linux it's "assertion", on mac it's "Assertion", @@ -650,4 +650,16 @@ TEST(StrCat, AbslStringifyExampleUsingFormat) { EXPECT_EQ(absl::StrCat("a ", p, " z"), "a (10, 20) z"); } +enum class EnumWithStringify { Many = 0, Choices = 1 }; + +template <typename Sink> +void AbslStringify(Sink& sink, EnumWithStringify e) { + absl::Format(&sink, "%s", e == EnumWithStringify::Many ? "Many" : "Choices"); +} + +TEST(StrCat, AbslStringifyWithEnum) { + const auto e = EnumWithStringify::Choices; + EXPECT_EQ(absl::StrCat(e), "Choices"); +} + } // namespace diff --git a/absl/strings/str_format.h b/absl/strings/str_format.h index ffbcb9af..fc4bf39e 100644 --- a/absl/strings/str_format.h +++ b/absl/strings/str_format.h @@ -36,10 +36,12 @@ // * `absl::StreamFormat()` to more efficiently write a format string to a // stream, such as`std::cout`. // * `absl::PrintF()`, `absl::FPrintF()` and `absl::SNPrintF()` as -// replacements for `std::printf()`, `std::fprintf()` and `std::snprintf()`. +// drop-in replacements for `std::printf()`, `std::fprintf()` and +// `std::snprintf()`. // -// Note: a version of `std::sprintf()` is not supported as it is -// generally unsafe due to buffer overflows. +// Note: An `absl::SPrintF()` drop-in replacement is not supported as it +// is generally unsafe due to buffer overflows. Use `absl::StrFormat` which +// returns the string as output instead of expecting a pre-allocated buffer. // // Additionally, you can provide a format string (and its associated arguments) // using one of the following abstractions: @@ -191,9 +193,9 @@ class FormatCountCapture { // absl::StrFormat(formatString, "TheVillage", 6); // // A format string generally follows the POSIX syntax as used within the POSIX -// `printf` specification. +// `printf` specification. (Exceptions are noted below.) // -// (See http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html.) +// (See http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html) // // In specific, the `FormatSpec` supports the following type specifiers: // * `c` for characters @@ -211,6 +213,10 @@ class FormatCountCapture { // * `n` for the special case of writing out the number of characters // written to this point. The resulting value must be captured within an // `absl::FormatCountCapture` type. +// * `v` for values using the default format for a deduced type. These deduced +// types include many of the primitive types denoted here as well as +// user-defined types containing the proper extensions. (See below for more +// information.) // // Implementation-defined behavior: // * A null pointer provided to "%s" or "%p" is output as "(nil)". @@ -239,6 +245,15 @@ class FormatCountCapture { // "%s%d%n", "hello", 123, absl::FormatCountCapture(&n)); // EXPECT_EQ(8, n); // +// NOTE: the `v` specifier (for "value") is a type specifier not present in the +// POSIX specification. %v will format values according to their deduced type. +// `v` uses `d` for signed integer values, `u` for unsigned integer values, `g` +// for floating point values, and formats boolean values as "true"/"false" +// (instead of 1 or 0 for booleans formatted using d). `const char*` is not +// supported; please use `std:string` and `string_view`. `char` is also not +// supported due to ambiguity of the type. This specifier does not support +// modifiers. +// // The `FormatSpec` intrinsically supports all of these fundamental C++ types: // // * Characters: `char`, `signed char`, `unsigned char` @@ -570,6 +585,41 @@ ABSL_MUST_USE_RESULT inline bool FormatUntyped( // StrFormat Extensions //------------------------------------------------------------------------------ // +// AbslStringify() +// +// A simpler customization API for formatting user-defined types using +// absl::StrFormat(). The API relies on detecting an overload in the +// user-defined type's namespace of a free (non-member) `AbslStringify()` +// function as a friend definition with the following signature: +// +// template <typename Sink> +// void AbslStringify(Sink& sink, const X& value); +// +// An `AbslStringify()` overload for a type should only be declared in the same +// file and namespace as said type. +// +// Note that unlike with AbslFormatConvert(), AbslStringify() does not allow +// customization of allowed conversion characters. AbslStringify() uses `%v` as +// the underlying conversion specififer. Additionally, AbslStringify() supports +// use with absl::StrCat while AbslFormatConvert() does not. +// +// Example: +// +// struct Point { +// // To add formatting support to `Point`, we simply need to add a free +// // (non-member) function `AbslStringify()`. This method prints in the +// // request format using the underlying `%v` specifier. You can add such a +// // free function using a friend declaration within the body of the class. +// // The sink parameter is a templated type to avoid requiring dependencies. +// template <typename Sink> +// friend void AbslStringify(Sink& sink, const Point& p) { +// absl::Format(&sink, "(%v, %v)", p.x, p.y); +// } +// +// int x; +// int y; +// }; +// // AbslFormatConvert() // // The StrFormat library provides a customization API for formatting @@ -616,9 +666,9 @@ ABSL_MUST_USE_RESULT inline bool FormatUntyped( // AbslFormatConvert(const Point& p, const absl::FormatConversionSpec& spec, // absl::FormatSink* s) { // if (spec.conversion_char() == absl::FormatConversionChar::s) { -// s->Append(absl::StrCat("x=", p.x, " y=", p.y)); +// absl::Format(s, "x=%vy=%v", p.x, p.y); // } else { -// s->Append(absl::StrCat(p.x, ",", p.y)); +// absl::Format(s, "%v,%v", p.x, p.y); // } // return {true}; // } @@ -772,17 +822,25 @@ enum class FormatConversionCharSet : uint64_t { // FormatSink // -// An abstraction to which conversions write their string data. +// A format sink is a generic abstraction to which conversions may write their +// formatted string data. `absl::FormatConvert()` uses this sink to write its +// formatted string. // class FormatSink { public: - // Appends `count` copies of `ch`. + // FormatSink::Append() + // + // Appends `count` copies of `ch` to the format sink. void Append(size_t count, char ch) { sink_->Append(count, ch); } + // Overload of FormatSink::Append() for appending the characters of a string + // view to a format sink. void Append(string_view v) { sink_->Append(v); } - // Appends the first `precision` bytes of `v`. If this is less than - // `width`, spaces will be appended first (if `left` is false), or + // FormatSink::PutPaddedString() + // + // Appends `precision` number of bytes of `v` to the format sink. If this is + // less than `width`, spaces will be appended first (if `left` is false), or // after (if `left` is true) to ensure the total amount appended is // at least `width`. bool PutPaddedString(string_view v, int width, int precision, bool left) { diff --git a/absl/strings/str_format_test.cc b/absl/strings/str_format_test.cc index 62ed262d..5198fb33 100644 --- a/absl/strings/str_format_test.cc +++ b/absl/strings/str_format_test.cc @@ -143,13 +143,20 @@ TEST_F(FormatEntryPointTest, AppendFormatFailWithV) { } TEST_F(FormatEntryPointTest, ManyArgs) { - EXPECT_EQ("24", StrFormat("%24$d", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)); - EXPECT_EQ("60", StrFormat("%60$d", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, - 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, - 53, 54, 55, 56, 57, 58, 59, 60)); + EXPECT_EQ( + "60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 " + "36 35 34 33 32 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 " + "12 11 10 9 8 7 6 5 4 3 2 1", + StrFormat("%60$d %59$d %58$d %57$d %56$d %55$d %54$d %53$d %52$d %51$d " + "%50$d %49$d %48$d %47$d %46$d %45$d %44$d %43$d %42$d %41$d " + "%40$d %39$d %38$d %37$d %36$d %35$d %34$d %33$d %32$d %31$d " + "%30$d %29$d %28$d %27$d %26$d %25$d %24$d %23$d %22$d %21$d " + "%20$d %19$d %18$d %17$d %16$d %15$d %14$d %13$d %12$d %11$d " + "%10$d %9$d %8$d %7$d %6$d %5$d %4$d %3$d %2$d %1$d", + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, 59, 60)); } TEST_F(FormatEntryPointTest, Preparsed) { @@ -1135,6 +1142,51 @@ TEST_F(FormatExtensionTest, AbslStringifyExampleUsingFormat) { EXPECT_EQ(absl::StrFormat("a %v z", p), "a (10, 20) z"); } +enum class EnumClassWithStringify { Many = 0, Choices = 1 }; + +template <typename Sink> +void AbslStringify(Sink& sink, EnumClassWithStringify e) { + absl::Format(&sink, "%s", + e == EnumClassWithStringify::Many ? "Many" : "Choices"); +} + +enum EnumWithStringify { Many, Choices }; + +template <typename Sink> +void AbslStringify(Sink& sink, EnumWithStringify e) { + absl::Format(&sink, "%s", e == EnumWithStringify::Many ? "Many" : "Choices"); +} + +TEST_F(FormatExtensionTest, AbslStringifyWithEnumWithV) { + const auto e_class = EnumClassWithStringify::Choices; + EXPECT_EQ(absl::StrFormat("My choice is %v", e_class), + "My choice is Choices"); + + const auto e = EnumWithStringify::Choices; + EXPECT_EQ(absl::StrFormat("My choice is %v", e), "My choice is Choices"); +} + +TEST_F(FormatExtensionTest, AbslStringifyEnumWithD) { + const auto e_class = EnumClassWithStringify::Many; + EXPECT_EQ(absl::StrFormat("My choice is %d", e_class), "My choice is 0"); + + const auto e = EnumWithStringify::Choices; + EXPECT_EQ(absl::StrFormat("My choice is %d", e), "My choice is 1"); +} + +enum class EnumWithLargerValue { x = 32 }; + +template <typename Sink> +void AbslStringify(Sink& sink, EnumWithLargerValue e) { + absl::Format(&sink, "%s", "Many"); +} + +TEST_F(FormatExtensionTest, AbslStringifyEnumOtherSpecifiers) { + const auto e = EnumWithLargerValue::x; + EXPECT_EQ(absl::StrFormat("My choice is %g", e), "My choice is 32"); + EXPECT_EQ(absl::StrFormat("My choice is %x", e), "My choice is 20"); +} + } // namespace // Some codegen thunks that we can use to easily dump the generated assembly for diff --git a/absl/strings/str_split.cc b/absl/strings/str_split.cc index e08c26b6..72ba7c02 100644 --- a/absl/strings/str_split.cc +++ b/absl/strings/str_split.cc @@ -60,19 +60,23 @@ absl::string_view GenericFind(absl::string_view text, // Finds using absl::string_view::find(), therefore the length of the found // delimiter is delimiter.length(). struct LiteralPolicy { - size_t Find(absl::string_view text, absl::string_view delimiter, size_t pos) { + static size_t Find(absl::string_view text, absl::string_view delimiter, + size_t pos) { return text.find(delimiter, pos); } - size_t Length(absl::string_view delimiter) { return delimiter.length(); } + static size_t Length(absl::string_view delimiter) { + return delimiter.length(); + } }; // Finds using absl::string_view::find_first_of(), therefore the length of the // found delimiter is 1. struct AnyOfPolicy { - size_t Find(absl::string_view text, absl::string_view delimiter, size_t pos) { + static size_t Find(absl::string_view text, absl::string_view delimiter, + size_t pos) { return text.find_first_of(delimiter, pos); } - size_t Length(absl::string_view /* delimiter */) { return 1; } + static size_t Length(absl::string_view /* delimiter */) { return 1; } }; } // namespace @@ -123,8 +127,7 @@ ByLength::ByLength(ptrdiff_t length) : length_(length) { ABSL_RAW_CHECK(length > 0, ""); } -absl::string_view ByLength::Find(absl::string_view text, - size_t pos) const { +absl::string_view ByLength::Find(absl::string_view text, size_t pos) const { pos = std::min(pos, text.size()); // truncate `pos` absl::string_view substr = text.substr(pos); // If the string is shorter than the chunk size we say we diff --git a/absl/strings/str_split_test.cc b/absl/strings/str_split_test.cc index 1b4427b8..04a64a42 100644 --- a/absl/strings/str_split_test.cc +++ b/absl/strings/str_split_test.cc @@ -369,7 +369,7 @@ TEST(SplitIterator, EqualityAsEndCondition) { TEST(Splitter, RangeIterators) { auto splitter = absl::StrSplit("a,b,c", ','); std::vector<absl::string_view> output; - for (const absl::string_view& p : splitter) { + for (absl::string_view p : splitter) { output.push_back(p); } EXPECT_THAT(output, ElementsAre("a", "b", "c")); diff --git a/absl/strings/substitute.h b/absl/strings/substitute.h index 692fd03c..d6a5a690 100644 --- a/absl/strings/substitute.h +++ b/absl/strings/substitute.h @@ -55,6 +55,8 @@ // * bool (Printed as "true" or "false") // * pointer types other than char* (Printed as "0x<lower case hex string>", // except that null is printed as "NULL") +// * user-defined types via the `AbslStringify()` customization point. See the +// documentation for `absl::StrCat` for an explanation on how to use this. // // If an invalid format string is provided, Substitute returns an empty string // and SubstituteAndAppend does not change the provided output string. @@ -79,6 +81,7 @@ #include "absl/base/port.h" #include "absl/strings/ascii.h" #include "absl/strings/escaping.h" +#include "absl/strings/internal/stringify_sink.h" #include "absl/strings/numbers.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_split.h" @@ -102,14 +105,14 @@ class Arg { // Overloads for string-y things // // Explicitly overload `const char*` so the compiler doesn't cast to `bool`. - Arg(const char* value) // NOLINT(runtime/explicit) + Arg(const char* value) // NOLINT(google-explicit-constructor) : piece_(absl::NullSafeStringView(value)) {} template <typename Allocator> Arg( // NOLINT const std::basic_string<char, std::char_traits<char>, Allocator>& value) noexcept : piece_(value) {} - Arg(absl::string_view value) // NOLINT(runtime/explicit) + Arg(absl::string_view value) // NOLINT(google-explicit-constructor) : piece_(value) {} // Overloads for primitives @@ -119,7 +122,7 @@ class Arg { // probably using them as 8-bit integers and would probably prefer an integer // representation. However, we can't really know, so we make the caller decide // what to do. - Arg(char value) // NOLINT(runtime/explicit) + Arg(char value) // NOLINT(google-explicit-constructor) : piece_(scratch_, 1) { scratch_[0] = value; } @@ -133,12 +136,12 @@ class Arg { static_cast<size_t>( numbers_internal::FastIntToBuffer(value, scratch_) - scratch_)) {} - Arg(int value) // NOLINT(runtime/explicit) + Arg(int value) // NOLINT(google-explicit-constructor) : piece_(scratch_, static_cast<size_t>( numbers_internal::FastIntToBuffer(value, scratch_) - scratch_)) {} - Arg(unsigned int value) // NOLINT(runtime/explicit) + Arg(unsigned int value) // NOLINT(google-explicit-constructor) : piece_(scratch_, static_cast<size_t>( numbers_internal::FastIntToBuffer(value, scratch_) - @@ -163,17 +166,23 @@ class Arg { static_cast<size_t>( numbers_internal::FastIntToBuffer(value, scratch_) - scratch_)) {} - Arg(float value) // NOLINT(runtime/explicit) + Arg(float value) // NOLINT(google-explicit-constructor) : piece_(scratch_, numbers_internal::SixDigitsToBuffer(value, scratch_)) { } - Arg(double value) // NOLINT(runtime/explicit) + Arg(double value) // NOLINT(google-explicit-constructor) : piece_(scratch_, numbers_internal::SixDigitsToBuffer(value, scratch_)) { } - Arg(bool value) // NOLINT(runtime/explicit) + Arg(bool value) // NOLINT(google-explicit-constructor) : piece_(value ? "true" : "false") {} - Arg(Hex hex); // NOLINT(runtime/explicit) - Arg(Dec dec); // NOLINT(runtime/explicit) + template <typename T, typename = typename std::enable_if< + strings_internal::HasAbslStringify<T>::value>::type> + Arg( // NOLINT(google-explicit-constructor) + const T& v, strings_internal::StringifySink&& sink = {}) + : piece_(strings_internal::ExtractStringification(sink, v)) {} + + Arg(Hex hex); // NOLINT(google-explicit-constructor) + Arg(Dec dec); // NOLINT(google-explicit-constructor) // vector<bool>::reference and const_reference require special help to convert // to `Arg` because it requires two user defined conversions. @@ -188,13 +197,14 @@ class Arg { // `void*` values, with the exception of `char*`, are printed as // "0x<hex value>". However, in the case of `nullptr`, "NULL" is printed. - Arg(const void* value); // NOLINT(runtime/explicit) + Arg(const void* value); // NOLINT(google-explicit-constructor) // Normal enums are already handled by the integer formatters. // This overload matches only scoped enums. template <typename T, typename = typename std::enable_if< - std::is_enum<T>{} && !std::is_convertible<T, int>{}>::type> + std::is_enum<T>{} && !std::is_convertible<T, int>{} && + !strings_internal::HasAbslStringify<T>::value>::type> Arg(T value) // NOLINT(google-explicit-constructor) : Arg(static_cast<typename std::underlying_type<T>::type>(value)) {} diff --git a/absl/strings/substitute_test.cc b/absl/strings/substitute_test.cc index 9e6b9403..ecf78d6b 100644 --- a/absl/strings/substitute_test.cc +++ b/absl/strings/substitute_test.cc @@ -22,6 +22,16 @@ namespace { +struct MyStruct { + template <typename Sink> + friend void AbslStringify(Sink& sink, const MyStruct& s) { + sink.Append("MyStruct{.value = "); + sink.Append(absl::StrCat(s.value)); + sink.Append("}"); + } + int value; +}; + TEST(SubstituteTest, Substitute) { // Basic. EXPECT_EQ("Hello, world!", absl::Substitute("$0, $1!", "Hello", "world")); @@ -70,7 +80,7 @@ TEST(SubstituteTest, Substitute) { // Volatile Pointer. // Like C++ streamed I/O, such pointers implicitly become bool volatile int vol = 237; - volatile int *volatile volptr = &vol; + volatile int* volatile volptr = &vol; str = absl::Substitute("$0", volptr); EXPECT_EQ("true", str); @@ -128,6 +138,11 @@ TEST(SubstituteTest, Substitute) { const char* null_cstring = nullptr; EXPECT_EQ("Text: ''", absl::Substitute("Text: '$0'", null_cstring)); + + MyStruct s1 = MyStruct{17}; + MyStruct s2 = MyStruct{1043}; + EXPECT_EQ("MyStruct{.value = 17}, MyStruct{.value = 1043}", + absl::Substitute("$0, $1", s1, s2)); } TEST(SubstituteTest, SubstituteAndAppend) { @@ -171,6 +186,12 @@ TEST(SubstituteTest, SubstituteAndAppend) { absl::SubstituteAndAppend(&str, "$0 $1 $2 $3 $4 $5 $6 $7 $8 $9", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j"); EXPECT_EQ("a b c d e f g h i j", str); + + str.clear(); + MyStruct s1 = MyStruct{17}; + MyStruct s2 = MyStruct{1043}; + absl::SubstituteAndAppend(&str, "$0, $1", s1, s2); + EXPECT_EQ("MyStruct{.value = 17}, MyStruct{.value = 1043}", str); } TEST(SubstituteTest, VectorBoolRef) { @@ -232,7 +253,19 @@ TEST(SubstituteTest, Enums) { ScopedEnumUInt16::kEnum1)); } -#ifdef GTEST_HAS_DEATH_TEST +enum class EnumWithStringify { Many = 0, Choices = 1 }; + +template <typename Sink> +void AbslStringify(Sink& sink, EnumWithStringify e) { + sink.Append(e == EnumWithStringify::Many ? "Many" : "Choices"); +} + +TEST(SubstituteTest, AbslStringifyWithEnum) { + const auto e = EnumWithStringify::Choices; + EXPECT_EQ(absl::Substitute("$0", e), "Choices"); +} + +#if GTEST_HAS_DEATH_TEST TEST(SubstituteDeathTest, SubstituteDeath) { EXPECT_DEBUG_DEATH( |