diff options
Diffstat (limited to 'absl/strings')
62 files changed, 1362 insertions, 1048 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index 53c57718..819bbe69 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -28,6 +28,20 @@ package( licenses(["notice"]) cc_library( + name = "string_view", + srcs = ["string_view.cc"], + hdrs = ["string_view.h"], + copts = ABSL_DEFAULT_COPTS, + linkopts = ABSL_DEFAULT_LINKOPTS, + deps = [ + "//absl/base", + "//absl/base:config", + "//absl/base:core_headers", + "//absl/base:throw_delegate", + ], +) + +cc_library( name = "strings", srcs = [ "ascii.cc", @@ -50,7 +64,6 @@ cc_library( "str_cat.cc", "str_replace.cc", "str_split.cc", - "string_view.cc", "substitute.cc", ], hdrs = [ @@ -72,8 +85,15 @@ cc_library( ], copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, + textual_hdrs = [ + # string_view.h was once part of :strings, so string_view.h is + # re-exported for backwards compatibility. + # New code should directly depend on :string_view. + "string_view.h", + ], deps = [ ":internal", + ":string_view", "//absl/base", "//absl/base:config", "//absl/base:core_headers", @@ -263,6 +283,7 @@ cc_test( tags = ["benchmark"], visibility = ["//visibility:private"], deps = [ + ":string_view", ":strings", "//absl/base:core_headers", "//absl/base:raw_logging_internal", @@ -277,7 +298,7 @@ cc_test( copts = ABSL_TEST_COPTS, visibility = ["//visibility:private"], deps = [ - ":strings", + ":string_view", "//absl/base:config", "//absl/base:core_headers", "//absl/base:dynamic_annotations", @@ -350,6 +371,7 @@ cc_test( cc_test( name = "cord_rep_btree_test", size = "medium", + timeout = "long", srcs = ["internal/cord_rep_btree_test.cc"], copts = ABSL_TEST_COPTS, visibility = ["//visibility:private"], @@ -457,7 +479,6 @@ cc_library( ":cordz_update_scope", ":cordz_update_tracker", ":internal", - ":str_format", ":strings", "//absl/base", "//absl/base:config", @@ -514,6 +535,7 @@ cc_library( "//absl/container:inlined_vector", "//absl/debugging:stacktrace", "//absl/synchronization", + "//absl/time", "//absl/types:span", ], ) @@ -773,10 +795,10 @@ cc_test( "//absl/base:config", "//absl/base:core_headers", "//absl/base:endian", - "//absl/base:raw_logging_internal", "//absl/container:fixed_array", "//absl/hash", "//absl/log", + "//absl/log:check", "//absl/random", "@com_google_googletest//:gtest_main", ], @@ -1018,7 +1040,7 @@ cc_test( ":pow10_helper", ":strings", "//absl/base:config", - "//absl/base:raw_logging_internal", + "//absl/log", "//absl/random", "//absl/random:distributions", "@com_google_googletest//:gtest_main", @@ -1095,7 +1117,7 @@ cc_test( deps = [ ":strings", "//absl/base:config", - "//absl/base:raw_logging_internal", + "//absl/log:check", "@com_google_googletest//:gtest_main", ], ) @@ -1168,6 +1190,7 @@ cc_library( ":strings", "//absl/base:config", "//absl/base:core_headers", + "//absl/container:inlined_vector", "//absl/functional:function_ref", "//absl/meta:type_traits", "//absl/numeric:bits", @@ -1252,6 +1275,7 @@ cc_test( ":strings", "//absl/base:core_headers", "//absl/base:raw_logging_internal", + "//absl/log", "//absl/types:optional", "@com_google_googletest//:gtest_main", ], @@ -1317,3 +1341,15 @@ cc_binary( "//absl/types:optional", ], ) + +cc_test( + name = "char_formatting_test", + srcs = [ + "char_formatting_test.cc", + ], + deps = [ + ":str_format", + ":strings", + "@com_google_googletest//:gtest_main", + ], +) diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index a0f7cc54..1959dc91 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -16,6 +16,23 @@ absl_cc_library( NAME + string_view + HDRS + string_view.h + SRCS + string_view.cc + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::base + absl::config + absl::core_headers + absl::throw_delegate + PUBLIC +) + +absl_cc_library( + NAME strings HDRS "ascii.h" @@ -30,7 +47,6 @@ absl_cc_library( "str_join.h" "str_replace.h" "str_split.h" - "string_view.h" "strip.h" "substitute.h" SRCS @@ -54,11 +70,11 @@ absl_cc_library( "str_cat.cc" "str_replace.cc" "str_split.cc" - "string_view.cc" "substitute.cc" COPTS ${ABSL_DEFAULT_COPTS} DEPS + absl::string_view absl::strings_internal absl::base absl::bits @@ -317,7 +333,7 @@ absl_cc_test( absl::core_headers absl::pow10_helper absl::config - absl::raw_logging_internal + absl::log absl::random_random absl::random_distributions absl::strings_internal @@ -372,9 +388,9 @@ absl_cc_test( COPTS ${ABSL_TEST_COPTS} DEPS - absl::strings + absl::check absl::config - absl::raw_logging_internal + absl::strings GTest::gmock_main ) @@ -432,6 +448,7 @@ absl_cc_library( absl::strings absl::config absl::core_headers + absl::inlined_vector absl::numeric_representation absl::type_traits absl::utility @@ -516,6 +533,7 @@ absl_cc_test( absl::strings absl::str_format_internal absl::core_headers + absl::log absl::raw_logging_internal absl::int128 GTest::gmock_main @@ -547,6 +565,20 @@ absl_cc_test( GTest::gmock_main ) +absl_cc_test( + NAME + char_formatting_test + SRCS + "char_formatting_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::base + absl::str_format + absl::strings + GTest::gmock_main +) + # Internal-only target, do not depend on directly. absl_cc_library( NAME @@ -745,6 +777,7 @@ absl_cc_library( absl::raw_logging_internal absl::stacktrace absl::synchronization + absl::time ) absl_cc_test( @@ -959,19 +992,20 @@ absl_cc_test( COPTS ${ABSL_TEST_COPTS} DEPS - absl::cord - absl::str_format - absl::strings absl::base + absl::check absl::config + absl::cord absl::cord_test_helpers absl::cordz_test_helpers absl::core_headers absl::endian + absl::fixed_array absl::hash + absl::log absl::random_random - absl::raw_logging_internal - absl::fixed_array + absl::str_format + absl::strings GTest::gmock_main ) diff --git a/absl/strings/ascii.cc b/absl/strings/ascii.cc index 868df2d1..16c96899 100644 --- a/absl/strings/ascii.cc +++ b/absl/strings/ascii.cc @@ -14,6 +14,10 @@ #include "absl/strings/ascii.h" +#include <climits> +#include <cstring> +#include <string> + namespace absl { ABSL_NAMESPACE_BEGIN namespace ascii_internal { @@ -153,18 +157,62 @@ ABSL_DLL const char kToUpper[256] = { }; // clang-format on +template <bool ToUpper> +constexpr void AsciiStrCaseFold(char* p, char* end) { + // The upper- and lowercase versions of ASCII characters differ by only 1 bit. + // When we need to flip the case, we can xor with this bit to achieve the + // desired result. Note that the choice of 'a' and 'A' here is arbitrary. We + // could have chosen 'z' and 'Z', or any other pair of characters as they all + // have the same single bit difference. + constexpr unsigned char kAsciiCaseBitFlip = 'a' ^ 'A'; + + constexpr char ch_a = ToUpper ? 'a' : 'A'; + constexpr char ch_z = ToUpper ? 'z' : 'Z'; + for (; p < end; ++p) { + unsigned char v = static_cast<unsigned char>(*p); + // We use & instead of && to ensure this always stays branchless + // We use static_cast<int> to suppress -Wbitwise-instead-of-logical + bool is_in_range = static_cast<bool>(static_cast<int>(ch_a <= v) & + static_cast<int>(v <= ch_z)); + v ^= is_in_range ? kAsciiCaseBitFlip : 0; + *p = static_cast<char>(v); + } +} + +static constexpr size_t ValidateAsciiCasefold() { + constexpr size_t num_chars = 1 + CHAR_MAX - CHAR_MIN; + size_t incorrect_index = 0; + char lowered[num_chars] = {}; + char uppered[num_chars] = {}; + for (unsigned int i = 0; i < num_chars; ++i) { + uppered[i] = lowered[i] = static_cast<char>(i); + } + AsciiStrCaseFold<false>(&lowered[0], &lowered[num_chars]); + AsciiStrCaseFold<true>(&uppered[0], &uppered[num_chars]); + for (size_t i = 0; i < num_chars; ++i) { + const char ch = static_cast<char>(i), + ch_upper = ('a' <= ch && ch <= 'z' ? 'A' + (ch - 'a') : ch), + ch_lower = ('A' <= ch && ch <= 'Z' ? 'a' + (ch - 'A') : ch); + if (uppered[i] != ch_upper || lowered[i] != ch_lower) { + incorrect_index = i > 0 ? i : num_chars; + break; + } + } + return incorrect_index; +} + +static_assert(ValidateAsciiCasefold() == 0, "error in case conversion"); + } // namespace ascii_internal void AsciiStrToLower(std::string* s) { - for (auto& ch : *s) { - ch = absl::ascii_tolower(static_cast<unsigned char>(ch)); - } + char* p = &(*s)[0]; // Guaranteed to be valid for empty strings + return ascii_internal::AsciiStrCaseFold<false>(p, p + s->size()); } void AsciiStrToUpper(std::string* s) { - for (auto& ch : *s) { - ch = absl::ascii_toupper(static_cast<unsigned char>(ch)); - } + char* p = &(*s)[0]; // Guaranteed to be valid for empty strings + return ascii_internal::AsciiStrCaseFold<true>(p, p + s->size()); } void RemoveExtraAsciiWhitespace(std::string* str) { diff --git a/absl/strings/ascii_test.cc b/absl/strings/ascii_test.cc index dfed114c..4ea262f1 100644 --- a/absl/strings/ascii_test.cc +++ b/absl/strings/ascii_test.cc @@ -14,6 +14,7 @@ #include "absl/strings/ascii.h" +#include <algorithm> #include <cctype> #include <clocale> #include <cstring> @@ -189,14 +190,14 @@ TEST(AsciiStrTo, Lower) { const std::string str("GHIJKL"); const std::string str2("MNOPQR"); const absl::string_view sp(str2); - std::string mutable_str("STUVWX"); + std::string mutable_str("_`?@[{AMNOPQRSTUVWXYZ"); EXPECT_EQ("abcdef", absl::AsciiStrToLower(buf)); EXPECT_EQ("ghijkl", absl::AsciiStrToLower(str)); EXPECT_EQ("mnopqr", absl::AsciiStrToLower(sp)); absl::AsciiStrToLower(&mutable_str); - EXPECT_EQ("stuvwx", mutable_str); + EXPECT_EQ("_`?@[{amnopqrstuvwxyz", mutable_str); char mutable_buf[] = "Mutable"; std::transform(mutable_buf, mutable_buf + strlen(mutable_buf), @@ -207,12 +208,12 @@ TEST(AsciiStrTo, Lower) { TEST(AsciiStrTo, Upper) { const char buf[] = "abcdef"; const std::string str("ghijkl"); - const std::string str2("mnopqr"); + const std::string str2("_`?@[{amnopqrstuvwxyz"); const absl::string_view sp(str2); EXPECT_EQ("ABCDEF", absl::AsciiStrToUpper(buf)); EXPECT_EQ("GHIJKL", absl::AsciiStrToUpper(str)); - EXPECT_EQ("MNOPQR", absl::AsciiStrToUpper(sp)); + EXPECT_EQ("_`?@[{AMNOPQRSTUVWXYZ", absl::AsciiStrToUpper(sp)); char mutable_buf[] = "Mutable"; std::transform(mutable_buf, mutable_buf + strlen(mutable_buf), diff --git a/absl/strings/char_formatting_test.cc b/absl/strings/char_formatting_test.cc new file mode 100644 index 00000000..1692da70 --- /dev/null +++ b/absl/strings/char_formatting_test.cc @@ -0,0 +1,169 @@ +// Copyright 2023 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <cstddef> + +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" +#include "absl/strings/substitute.h" + +namespace { + +TEST(CharFormatting, Char) { + const char v = 'A'; + + // Desired behavior: does not compile: + // EXPECT_EQ(absl::StrCat(v, "B"), "AB"); + // EXPECT_EQ(absl::StrFormat("%vB", v), "AB"); + + // Legacy behavior: format as char: + EXPECT_EQ(absl::Substitute("$0B", v), "AB"); +} + +enum CharEnum : char {}; +TEST(CharFormatting, CharEnum) { + auto v = static_cast<CharEnum>('A'); + + // Desired behavior: format as decimal + EXPECT_EQ(absl::StrFormat("%vB", v), "65B"); + EXPECT_EQ(absl::StrCat(v, "B"), "65B"); + + // Legacy behavior: format as character: + + // Some older versions of gcc behave differently in this one case +#if !defined(__GNUC__) || defined(__clang__) + EXPECT_EQ(absl::Substitute("$0B", v), "AB"); +#endif +} + +enum class CharEnumClass: char {}; +TEST(CharFormatting, CharEnumClass) { + auto v = static_cast<CharEnumClass>('A'); + + // Desired behavior: format as decimal: + EXPECT_EQ(absl::StrFormat("%vB", v), "65B"); + EXPECT_EQ(absl::StrCat(v, "B"), "65B"); + + // Legacy behavior: format as character: + EXPECT_EQ(absl::Substitute("$0B", v), "AB"); +} + +TEST(CharFormatting, UnsignedChar) { + const unsigned char v = 'A'; + + // Desired behavior: format as decimal: + EXPECT_EQ(absl::StrCat(v, "B"), "65B"); + EXPECT_EQ(absl::Substitute("$0B", v), "65B"); + EXPECT_EQ(absl::StrFormat("%vB", v), "65B"); + + // Signedness check + const unsigned char w = 255; + EXPECT_EQ(absl::StrCat(w, "B"), "255B"); + EXPECT_EQ(absl::Substitute("$0B", w), "255B"); + // EXPECT_EQ(absl::StrFormat("%vB", v), "255B"); +} + +TEST(CharFormatting, SignedChar) { + const signed char v = 'A'; + + // Desired behavior: format as decimal: + EXPECT_EQ(absl::StrCat(v, "B"), "65B"); + EXPECT_EQ(absl::Substitute("$0B", v), "65B"); + EXPECT_EQ(absl::StrFormat("%vB", v), "65B"); + + // Signedness check + const signed char w = -128; + EXPECT_EQ(absl::StrCat(w, "B"), "-128B"); + EXPECT_EQ(absl::Substitute("$0B", w), "-128B"); +} + +enum UnsignedCharEnum : unsigned char {}; +TEST(CharFormatting, UnsignedCharEnum) { + auto v = static_cast<UnsignedCharEnum>('A'); + + // Desired behavior: format as decimal: + EXPECT_EQ(absl::StrCat(v, "B"), "65B"); + EXPECT_EQ(absl::Substitute("$0B", v), "65B"); + EXPECT_EQ(absl::StrFormat("%vB", v), "65B"); + + // Signedness check + auto w = static_cast<UnsignedCharEnum>(255); + EXPECT_EQ(absl::StrCat(w, "B"), "255B"); + EXPECT_EQ(absl::Substitute("$0B", w), "255B"); + EXPECT_EQ(absl::StrFormat("%vB", w), "255B"); +} + +enum SignedCharEnum : signed char {}; +TEST(CharFormatting, SignedCharEnum) { + auto v = static_cast<SignedCharEnum>('A'); + + // Desired behavior: format as decimal: + EXPECT_EQ(absl::StrCat(v, "B"), "65B"); + EXPECT_EQ(absl::Substitute("$0B", v), "65B"); + EXPECT_EQ(absl::StrFormat("%vB", v), "65B"); + + // Signedness check + auto w = static_cast<SignedCharEnum>(-128); + EXPECT_EQ(absl::StrCat(w, "B"), "-128B"); + EXPECT_EQ(absl::Substitute("$0B", w), "-128B"); + EXPECT_EQ(absl::StrFormat("%vB", w), "-128B"); +} + +enum class UnsignedCharEnumClass : unsigned char {}; +TEST(CharFormatting, UnsignedCharEnumClass) { + auto v = static_cast<UnsignedCharEnumClass>('A'); + + // Desired behavior: format as decimal: + EXPECT_EQ(absl::StrCat(v, "B"), "65B"); + EXPECT_EQ(absl::Substitute("$0B", v), "65B"); + EXPECT_EQ(absl::StrFormat("%vB", v), "65B"); + + // Signedness check + auto w = static_cast<UnsignedCharEnumClass>(255); + EXPECT_EQ(absl::StrCat(w, "B"), "255B"); + EXPECT_EQ(absl::Substitute("$0B", w), "255B"); + EXPECT_EQ(absl::StrFormat("%vB", w), "255B"); +} + +enum SignedCharEnumClass : signed char {}; +TEST(CharFormatting, SignedCharEnumClass) { + auto v = static_cast<SignedCharEnumClass>('A'); + + // Desired behavior: format as decimal: + EXPECT_EQ(absl::StrCat(v, "B"), "65B"); + EXPECT_EQ(absl::Substitute("$0B", v), "65B"); + EXPECT_EQ(absl::StrFormat("%vB", v), "65B"); + + // Signedness check + auto w = static_cast<SignedCharEnumClass>(-128); + EXPECT_EQ(absl::StrCat(w, "B"), "-128B"); + EXPECT_EQ(absl::Substitute("$0B", w), "-128B"); + EXPECT_EQ(absl::StrFormat("%vB", w), "-128B"); +} + +#ifdef __cpp_lib_byte +TEST(CharFormatting, StdByte) { + auto v = static_cast<std::byte>('A'); + // Desired behavior: format as 0xff + // (No APIs do this today.) + + // Legacy behavior: format as decimal: + EXPECT_EQ(absl::StrCat(v, "B"), "65B"); + EXPECT_EQ(absl::Substitute("$0B", v), "65B"); + EXPECT_EQ(absl::StrFormat("%vB", v), "65B"); +} +#endif // _cpp_lib_byte + +} // namespace diff --git a/absl/strings/charconv.cc b/absl/strings/charconv.cc index 69d420bc..778a1c75 100644 --- a/absl/strings/charconv.cc +++ b/absl/strings/charconv.cc @@ -21,6 +21,7 @@ #include <limits> #include "absl/base/casts.h" +#include "absl/base/config.h" #include "absl/numeric/bits.h" #include "absl/numeric/int128.h" #include "absl/strings/internal/charconv_bigint.h" @@ -118,10 +119,17 @@ struct FloatTraits<double> { static constexpr int kEiselLemireMaxExclusiveExp10 = 309; static double MakeNan(const char* tagp) { +#if ABSL_HAVE_BUILTIN(__builtin_nan) + // Use __builtin_nan() if available since it has a fix for + // https://bugs.llvm.org/show_bug.cgi?id=37778 + // std::nan may use the glibc implementation. + return __builtin_nan(tagp); +#else // Support nan no matter which namespace it's in. Some platforms // incorrectly don't put it in namespace std. using namespace std; // NOLINT return nan(tagp); +#endif } // Builds a nonzero floating point number out of the provided parts. @@ -184,10 +192,17 @@ struct FloatTraits<float> { static constexpr int kEiselLemireMaxExclusiveExp10 = 39; static float MakeNan(const char* tagp) { +#if ABSL_HAVE_BUILTIN(__builtin_nanf) + // Use __builtin_nanf() if available since it has a fix for + // https://bugs.llvm.org/show_bug.cgi?id=37778 + // std::nanf may use the glibc implementation. + return __builtin_nanf(tagp); +#else // Support nanf no matter which namespace it's in. Some platforms // incorrectly don't put it in namespace std. using namespace std; // NOLINT - return nanf(tagp); + return std::nanf(tagp); +#endif } static float Make(mantissa_t mantissa, int exponent, bool sign) { @@ -203,7 +218,8 @@ struct FloatTraits<float> { if (mantissa > kMantissaMask) { // Normal value. // Adjust by 127 for the exponent representation bias, and an additional - // 23 due to the implied decimal point in the IEEE mantissa represenation. + // 23 due to the implied decimal point in the IEEE mantissa + // representation. flt += static_cast<uint32_t>(exponent + 127 + kTargetMantissaBits - 1) << 23; mantissa &= kMantissaMask; @@ -349,7 +365,8 @@ bool HandleEdgeCase(const strings_internal::ParsedFloat& input, bool negative, // https://bugs.llvm.org/show_bug.cgi?id=37778 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86113 constexpr ptrdiff_t kNanBufferSize = 128; -#if defined(__GNUC__) || (defined(__clang__) && __clang_major__ < 7) +#if (defined(__GNUC__) && !defined(__clang__)) || \ + (defined(__clang__) && __clang_major__ < 7) volatile char n_char_sequence[kNanBufferSize]; #else char n_char_sequence[kNanBufferSize]; @@ -462,7 +479,7 @@ uint64_t ShiftRightAndRound(uint128 value, int shift, bool input_exact, // the low bit of `value` is set. // // In inexact mode, the nonzero error means the actual value is greater - // than the halfway point and we must alway round up. + // than the halfway point and we must always round up. if ((value & 1) == 1 || !input_exact) { ++value; } diff --git a/absl/strings/charconv.h b/absl/strings/charconv.h index 7c509812..111c7120 100644 --- a/absl/strings/charconv.h +++ b/absl/strings/charconv.h @@ -22,7 +22,7 @@ namespace absl { ABSL_NAMESPACE_BEGIN -// Workalike compatibilty version of std::chars_format from C++17. +// Workalike compatibility version of std::chars_format from C++17. // // This is an bitfield enumerator which can be passed to absl::from_chars to // configure the string-to-float conversion. @@ -48,7 +48,7 @@ struct from_chars_result { std::errc ec; }; -// Workalike compatibilty version of std::from_chars from C++17. Currently +// Workalike compatibility version of std::from_chars from C++17. Currently // this only supports the `double` and `float` types. // // This interface incorporates the proposed resolutions for library issues diff --git a/absl/strings/cord.cc b/absl/strings/cord.cc index 1d33dd83..14976aef 100644 --- a/absl/strings/cord.cc +++ b/absl/strings/cord.cc @@ -48,7 +48,6 @@ #include "absl/strings/internal/cordz_update_tracker.h" #include "absl/strings/internal/resize_uninitialized.h" #include "absl/strings/str_cat.h" -#include "absl/strings/str_format.h" #include "absl/strings/str_join.h" #include "absl/strings/string_view.h" @@ -795,7 +794,7 @@ int CompareChunks(absl::string_view* lhs, absl::string_view* rhs, } // This overload set computes comparison results from memcmp result. This -// interface is used inside GenericCompare below. Differet implementations +// interface is used inside GenericCompare below. Different implementations // are specialized for int and bool. For int we clamp result to {-1, 0, 1} // set. For bool we just interested in "value == 0". template <typename ResultType> diff --git a/absl/strings/cord.h b/absl/strings/cord.h index c4a0d5aa..457ccf06 100644 --- a/absl/strings/cord.h +++ b/absl/strings/cord.h @@ -110,9 +110,30 @@ enum class CordMemoryAccounting { // Counts the *approximate* number of bytes held in full or in part by this // Cord (which may not remain the same between invocations). Cords that share // memory could each be "charged" independently for the same shared memory. + // See also comment on `kTotalMorePrecise` on internally shared memory. kTotal, // Counts the *approximate* number of bytes held in full or in part by this + // Cord for the distinct memory held by this cord. This option is similar + // to `kTotal`, except that if the cord has multiple references to the same + // memory, that memory is only counted once. + // + // For example: + // absl::Cord cord; + // cord.append(some_other_cord); + // cord.append(some_other_cord); + // // Counts `some_other_cord` twice: + // cord.EstimatedMemoryUsage(kTotal); + // // Counts `some_other_cord` once: + // cord.EstimatedMemoryUsage(kTotalMorePrecise); + // + // The `kTotalMorePrecise` number is more expensive to compute as it requires + // deduplicating all memory references. Applications should prefer to use + // `kFairShare` or `kTotal` unless they really need a more precise estimate + // on "how much memory is potentially held / kept alive by this cord?" + kTotalMorePrecise, + + // Counts the *approximate* number of bytes held in full or in part by this // Cord weighted by the sharing ratio of that data. For example, if some data // edge is shared by 4 different Cords, then each cord is attributed 1/4th of // the total memory usage as a 'fair share' of the total memory usage. @@ -661,7 +682,7 @@ class Cord { class CharRange { public: // Fulfill minimum c++ container requirements [container.requirements] - // Theses (partial) container type definitions allow CharRange to be used + // These (partial) container type definitions allow CharRange to be used // in various utilities expecting a subset of [container.requirements]. // For example, the below enables using `::testing::ElementsAre(...)` using value_type = char; @@ -1273,10 +1294,16 @@ inline size_t Cord::EstimatedMemoryUsage( CordMemoryAccounting accounting_method) const { size_t result = sizeof(Cord); if (const absl::cord_internal::CordRep* rep = contents_.tree()) { - if (accounting_method == CordMemoryAccounting::kFairShare) { - result += cord_internal::GetEstimatedFairShareMemoryUsage(rep); - } else { - result += cord_internal::GetEstimatedMemoryUsage(rep); + switch (accounting_method) { + case CordMemoryAccounting::kFairShare: + result += cord_internal::GetEstimatedFairShareMemoryUsage(rep); + break; + case CordMemoryAccounting::kTotalMorePrecise: + result += cord_internal::GetMorePreciseMemoryUsage(rep); + break; + case CordMemoryAccounting::kTotal: + result += cord_internal::GetEstimatedMemoryUsage(rep); + break; } } return result; diff --git a/absl/strings/cord_analysis.cc b/absl/strings/cord_analysis.cc index 73d3c4e6..e859b0db 100644 --- a/absl/strings/cord_analysis.cc +++ b/absl/strings/cord_analysis.cc @@ -16,6 +16,7 @@ #include <cstddef> #include <cstdint> +#include <unordered_set> #include "absl/base/attributes.h" #include "absl/base/config.h" @@ -37,7 +38,7 @@ namespace cord_internal { namespace { // Accounting mode for analyzing memory usage. -enum class Mode { kTotal, kFairShare }; +enum class Mode { kFairShare, kTotal, kTotalMorePrecise }; // CordRepRef holds a `const CordRep*` reference in rep, and depending on mode, // holds a 'fraction' representing a cumulative inverse refcount weight. @@ -62,6 +63,23 @@ struct RawUsage { void Add(size_t size, CordRepRef<mode>) { total += size; } }; +// Overloaded representation of RawUsage that tracks the set of objects +// counted, and avoids double-counting objects referenced more than once +// by the same Cord. +template <> +struct RawUsage<Mode::kTotalMorePrecise> { + size_t total = 0; + // TODO(b/289250880): Replace this with a flat_hash_set. + std::unordered_set<const CordRep*> counted; + + void Add(size_t size, CordRepRef<Mode::kTotalMorePrecise> repref) { + if (counted.find(repref.rep) == counted.end()) { + counted.insert(repref.rep); + total += size; + } + } +}; + // Returns n / refcount avoiding a div for the common refcount == 1. template <typename refcount_t> double MaybeDiv(double d, refcount_t refcount) { @@ -183,6 +201,10 @@ size_t GetEstimatedFairShareMemoryUsage(const CordRep* rep) { return GetEstimatedUsage<Mode::kFairShare>(rep); } +size_t GetMorePreciseMemoryUsage(const CordRep* rep) { + return GetEstimatedUsage<Mode::kTotalMorePrecise>(rep); +} + } // namespace cord_internal ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/cord_analysis.h b/absl/strings/cord_analysis.h index 7041ad1a..9b9527a5 100644 --- a/absl/strings/cord_analysis.h +++ b/absl/strings/cord_analysis.h @@ -31,6 +31,24 @@ namespace cord_internal { size_t GetEstimatedMemoryUsage(const CordRep* rep); // Returns the *approximate* number of bytes held in full or in part by this +// Cord for the distinct memory held by this cord. This is similar to +// `GetEstimatedMemoryUsage()`, except that if the cord has multiple references +// to the same memory, that memory is only counted once. +// +// For example: +// absl::Cord cord; +// cord.append(some_other_cord); +// cord.append(some_other_cord); +// // Calls GetEstimatedMemoryUsage() and counts `other_cord` twice: +// cord.EstimatedMemoryUsage(kTotal); +// // Calls GetMorePreciseMemoryUsage() and counts `other_cord` once: +// cord.EstimatedMemoryUsage(kTotalMorePrecise); +// +// This is more expensive than `GetEstimatedMemoryUsage()` as it requires +// deduplicating all memory references. +size_t GetMorePreciseMemoryUsage(const CordRep* rep); + +// Returns the *approximate* number of bytes held in full or in part by this // CordRep weighted by the sharing ratio of that data. For example, if some data // edge is shared by 4 different Cords, then each cord is attribute 1/4th of // the total memory usage as a 'fair share' of the total memory usage. diff --git a/absl/strings/cord_buffer.h b/absl/strings/cord_buffer.h index 15494b31..bc0e4e45 100644 --- a/absl/strings/cord_buffer.h +++ b/absl/strings/cord_buffer.h @@ -160,7 +160,6 @@ class CordBuffer { // for more information on buffer capacities and intended usage. static CordBuffer CreateWithDefaultLimit(size_t capacity); - // CordBuffer::CreateWithCustomLimit() // // Creates a CordBuffer instance of the desired `capacity` rounded to an @@ -336,7 +335,7 @@ class CordBuffer { } // Returns the available area of the internal SSO data - absl::Span<char> long_available() { + absl::Span<char> long_available() const { assert(!is_short()); const size_t length = long_rep.rep->length; return absl::Span<char>(long_rep.rep->Data() + length, @@ -460,9 +459,7 @@ inline constexpr size_t CordBuffer::MaximumPayload() { } inline constexpr size_t CordBuffer::MaximumPayload(size_t block_size) { - // TODO(absl-team): Use std::min when C++11 support is dropped. - return (kCustomLimit < block_size ? kCustomLimit : block_size) - - cord_internal::kFlatOverhead; + return (std::min)(kCustomLimit, block_size) - cord_internal::kFlatOverhead; } inline CordBuffer CordBuffer::CreateWithDefaultLimit(size_t capacity) { diff --git a/absl/strings/cord_test.cc b/absl/strings/cord_test.cc index 5603e94c..36e397ed 100644 --- a/absl/strings/cord_test.cc +++ b/absl/strings/cord_test.cc @@ -30,10 +30,11 @@ #include "gtest/gtest.h" #include "absl/base/config.h" #include "absl/base/internal/endian.h" -#include "absl/base/internal/raw_logging.h" #include "absl/base/macros.h" #include "absl/container/fixed_array.h" #include "absl/hash/hash.h" +#include "absl/log/check.h" +#include "absl/log/log.h" #include "absl/random/random.h" #include "absl/strings/cord_test_helpers.h" #include "absl/strings/cordz_test_helpers.h" @@ -58,6 +59,8 @@ using absl::cord_internal::CordRepSubstring; using absl::cord_internal::CordzUpdateTracker; using absl::cord_internal::kFlatOverhead; using absl::cord_internal::kMaxFlatLength; +using ::testing::ElementsAre; +using ::testing::Le; static std::string RandomLowercaseString(RandomEngine* rng); static std::string RandomLowercaseString(RandomEngine* rng, size_t length); @@ -208,9 +211,8 @@ class CordTestPeer { } static Cord MakeSubstring(Cord src, size_t offset, size_t length) { - ABSL_RAW_CHECK(src.contents_.is_tree(), "Can not be inlined"); - ABSL_RAW_CHECK(src.ExpectedChecksum() == absl::nullopt, - "Can not be hardened"); + CHECK(src.contents_.is_tree()) << "Can not be inlined"; + CHECK(!src.ExpectedChecksum().has_value()) << "Can not be hardened"; Cord cord; auto* tree = cord_internal::SkipCrcNode(src.contents_.tree()); auto* rep = CordRepSubstring::Create(CordRep::Ref(tree), offset, length); @@ -372,7 +374,7 @@ TEST_P(CordTest, GigabyteCordFromExternal) { for (int i = 0; i < 1024; ++i) { c.Append(from); } - ABSL_RAW_LOG(INFO, "Made a Cord with %zu bytes!", c.size()); + LOG(INFO) << "Made a Cord with " << c.size() << " bytes!"; // Note: on a 32-bit build, this comes out to 2,818,048,000 bytes. // Note: on a 64-bit build, this comes out to 171,932,385,280 bytes. } @@ -618,7 +620,7 @@ TEST_P(CordTest, AppendEmptyBufferToTree) { TEST_P(CordTest, AppendSmallBuffer) { absl::Cord cord; absl::CordBuffer buffer = absl::CordBuffer::CreateWithDefaultLimit(3); - ASSERT_THAT(buffer.capacity(), ::testing::Le(15)); + ASSERT_THAT(buffer.capacity(), Le(15)); memcpy(buffer.data(), "Abc", 3); buffer.SetLength(3); cord.Append(std::move(buffer)); @@ -632,7 +634,7 @@ TEST_P(CordTest, AppendSmallBuffer) { EXPECT_EQ(buffer.length(), 0); // NOLINT EXPECT_GT(buffer.capacity(), 0); // NOLINT - EXPECT_THAT(cord.Chunks(), ::testing::ElementsAre("Abcdefgh")); + EXPECT_THAT(cord.Chunks(), ElementsAre("Abcdefgh")); } TEST_P(CordTest, AppendAndPrependBufferArePrecise) { @@ -671,7 +673,7 @@ TEST_P(CordTest, AppendAndPrependBufferArePrecise) { TEST_P(CordTest, PrependSmallBuffer) { absl::Cord cord; absl::CordBuffer buffer = absl::CordBuffer::CreateWithDefaultLimit(3); - ASSERT_THAT(buffer.capacity(), ::testing::Le(15)); + ASSERT_THAT(buffer.capacity(), Le(15)); memcpy(buffer.data(), "Abc", 3); buffer.SetLength(3); cord.Prepend(std::move(buffer)); @@ -685,7 +687,7 @@ TEST_P(CordTest, PrependSmallBuffer) { EXPECT_EQ(buffer.length(), 0); // NOLINT EXPECT_GT(buffer.capacity(), 0); // NOLINT - EXPECT_THAT(cord.Chunks(), ::testing::ElementsAre("defghAbc")); + EXPECT_THAT(cord.Chunks(), ElementsAre("defghAbc")); } TEST_P(CordTest, AppendLargeBuffer) { @@ -707,7 +709,7 @@ TEST_P(CordTest, AppendLargeBuffer) { EXPECT_EQ(buffer.length(), 0); // NOLINT EXPECT_GT(buffer.capacity(), 0); // NOLINT - EXPECT_THAT(cord.Chunks(), ::testing::ElementsAre(s1, s2)); + EXPECT_THAT(cord.Chunks(), ElementsAre(s1, s2)); } TEST_P(CordTest, PrependLargeBuffer) { @@ -729,7 +731,7 @@ TEST_P(CordTest, PrependLargeBuffer) { EXPECT_EQ(buffer.length(), 0); // NOLINT EXPECT_GT(buffer.capacity(), 0); // NOLINT - EXPECT_THAT(cord.Chunks(), ::testing::ElementsAre(s2, s1)); + EXPECT_THAT(cord.Chunks(), ElementsAre(s2, s1)); } class CordAppendBufferTest : public testing::TestWithParam<bool> { @@ -1245,15 +1247,15 @@ absl::Cord BigCord(size_t len, char v) { // Splice block into cord. absl::Cord SpliceCord(const absl::Cord& blob, int64_t offset, const absl::Cord& block) { - ABSL_RAW_CHECK(offset >= 0, ""); - ABSL_RAW_CHECK(offset + block.size() <= blob.size(), ""); + CHECK_GE(offset, 0); + CHECK_LE(static_cast<size_t>(offset) + block.size(), blob.size()); absl::Cord result(blob); result.RemoveSuffix(blob.size() - offset); result.Append(block); absl::Cord suffix(blob); suffix.RemovePrefix(offset + block.size()); result.Append(suffix); - ABSL_RAW_CHECK(blob.size() == result.size(), ""); + CHECK_EQ(blob.size(), result.size()); return result; } @@ -1763,6 +1765,8 @@ TEST_P(CordTest, ExternalMemoryGet) { // of empty and inlined cords, and flat nodes. constexpr auto kFairShare = absl::CordMemoryAccounting::kFairShare; +constexpr auto kTotalMorePrecise = + absl::CordMemoryAccounting::kTotalMorePrecise; // Creates a cord of `n` `c` values, making sure no string stealing occurs. absl::Cord MakeCord(size_t n, char c) { @@ -1774,12 +1778,14 @@ TEST(CordTest, CordMemoryUsageEmpty) { absl::Cord cord; EXPECT_EQ(sizeof(absl::Cord), cord.EstimatedMemoryUsage()); EXPECT_EQ(sizeof(absl::Cord), cord.EstimatedMemoryUsage(kFairShare)); + EXPECT_EQ(sizeof(absl::Cord), cord.EstimatedMemoryUsage(kTotalMorePrecise)); } TEST(CordTest, CordMemoryUsageInlined) { absl::Cord a("hello"); EXPECT_EQ(a.EstimatedMemoryUsage(), sizeof(absl::Cord)); EXPECT_EQ(a.EstimatedMemoryUsage(kFairShare), sizeof(absl::Cord)); + EXPECT_EQ(a.EstimatedMemoryUsage(kTotalMorePrecise), sizeof(absl::Cord)); } TEST(CordTest, CordMemoryUsageExternalMemory) { @@ -1789,6 +1795,7 @@ TEST(CordTest, CordMemoryUsageExternalMemory) { sizeof(absl::Cord) + 1000 + sizeof(CordRepExternal) + sizeof(intptr_t); EXPECT_EQ(cord.EstimatedMemoryUsage(), expected); EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare), expected); + EXPECT_EQ(cord.EstimatedMemoryUsage(kTotalMorePrecise), expected); } TEST(CordTest, CordMemoryUsageFlat) { @@ -1798,6 +1805,8 @@ TEST(CordTest, CordMemoryUsageFlat) { EXPECT_EQ(cord.EstimatedMemoryUsage(), sizeof(absl::Cord) + flat_size); EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare), sizeof(absl::Cord) + flat_size); + EXPECT_EQ(cord.EstimatedMemoryUsage(kTotalMorePrecise), + sizeof(absl::Cord) + flat_size); } TEST(CordTest, CordMemoryUsageSubStringSharedFlat) { @@ -1807,6 +1816,8 @@ TEST(CordTest, CordMemoryUsageSubStringSharedFlat) { absl::Cord cord = flat.Subcord(500, 1000); EXPECT_EQ(cord.EstimatedMemoryUsage(), sizeof(absl::Cord) + sizeof(CordRepSubstring) + flat_size); + EXPECT_EQ(cord.EstimatedMemoryUsage(kTotalMorePrecise), + sizeof(absl::Cord) + sizeof(CordRepSubstring) + flat_size); EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare), sizeof(absl::Cord) + sizeof(CordRepSubstring) + flat_size / 2); } @@ -1817,6 +1828,8 @@ TEST(CordTest, CordMemoryUsageFlatShared) { const size_t flat_size = absl::CordTestPeer::Tree(cord)->flat()->AllocatedSize(); EXPECT_EQ(cord.EstimatedMemoryUsage(), sizeof(absl::Cord) + flat_size); + EXPECT_EQ(cord.EstimatedMemoryUsage(kTotalMorePrecise), + sizeof(absl::Cord) + flat_size); EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare), sizeof(absl::Cord) + flat_size / 2); } @@ -1835,6 +1848,8 @@ TEST(CordTest, CordMemoryUsageFlatHardenedAndShared) { absl::Cord cord2(cord); EXPECT_EQ(cord2.EstimatedMemoryUsage(), sizeof(absl::Cord) + sizeof(CordRepCrc) + flat_size); + EXPECT_EQ(cord2.EstimatedMemoryUsage(kTotalMorePrecise), + sizeof(absl::Cord) + sizeof(CordRepCrc) + flat_size); EXPECT_EQ(cord2.EstimatedMemoryUsage(kFairShare), sizeof(absl::Cord) + (sizeof(CordRepCrc) + flat_size / 2) / 2); } @@ -1853,7 +1868,7 @@ TEST(CordTest, CordMemoryUsageBTree) { // windows DLL, we may have ODR like effects on the flag, meaning the DLL // code will run with the picked up default. if (!absl::CordTestPeer::Tree(cord1)->IsBtree()) { - ABSL_RAW_LOG(WARNING, "Cord library code not respecting btree flag"); + LOG(WARNING) << "Cord library code not respecting btree flag"; return; } @@ -1861,6 +1876,8 @@ TEST(CordTest, CordMemoryUsageBTree) { size_t rep1_shared_size = sizeof(CordRepBtree) + flats1_size / 2; EXPECT_EQ(cord1.EstimatedMemoryUsage(), sizeof(absl::Cord) + rep1_size); + EXPECT_EQ(cord1.EstimatedMemoryUsage(kTotalMorePrecise), + sizeof(absl::Cord) + rep1_size); EXPECT_EQ(cord1.EstimatedMemoryUsage(kFairShare), sizeof(absl::Cord) + rep1_shared_size); @@ -1875,6 +1892,8 @@ TEST(CordTest, CordMemoryUsageBTree) { size_t rep2_size = sizeof(CordRepBtree) + flats2_size; EXPECT_EQ(cord2.EstimatedMemoryUsage(), sizeof(absl::Cord) + rep2_size); + EXPECT_EQ(cord2.EstimatedMemoryUsage(kTotalMorePrecise), + sizeof(absl::Cord) + rep2_size); EXPECT_EQ(cord2.EstimatedMemoryUsage(kFairShare), sizeof(absl::Cord) + rep2_size); @@ -1883,6 +1902,8 @@ TEST(CordTest, CordMemoryUsageBTree) { EXPECT_EQ(cord.EstimatedMemoryUsage(), sizeof(absl::Cord) + sizeof(CordRepBtree) + rep1_size + rep2_size); + EXPECT_EQ(cord.EstimatedMemoryUsage(kTotalMorePrecise), + sizeof(absl::Cord) + sizeof(CordRepBtree) + rep1_size + rep2_size); EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare), sizeof(absl::Cord) + sizeof(CordRepBtree) + rep1_shared_size / 2 + rep2_size); @@ -1901,6 +1922,66 @@ TEST_P(CordTest, CordMemoryUsageInlineRep) { EXPECT_EQ(c1.EstimatedMemoryUsage(), c2.EstimatedMemoryUsage()); } +TEST_P(CordTest, CordMemoryUsageTotalMorePreciseMode) { + constexpr size_t kChunkSize = 2000; + std::string tmp_str(kChunkSize, 'x'); + const absl::Cord flat(std::move(tmp_str)); + + // Construct `fragmented` with two references into the same + // underlying buffer shared with `flat`: + absl::Cord fragmented(flat); + fragmented.Append(flat); + + // Memory usage of `flat`, minus the top-level Cord object: + const size_t flat_internal_usage = + flat.EstimatedMemoryUsage() - sizeof(absl::Cord); + + // `fragmented` holds a Cord and a CordRepBtree. That tree points to two + // copies of flat's internals, which we expect to dedup: + EXPECT_EQ(fragmented.EstimatedMemoryUsage(kTotalMorePrecise), + sizeof(absl::Cord) + + sizeof(CordRepBtree) + + flat_internal_usage); + + // This is a case where kTotal produces an overestimate: + EXPECT_EQ(fragmented.EstimatedMemoryUsage(), + sizeof(absl::Cord) + + sizeof(CordRepBtree) + + 2 * flat_internal_usage); +} + +TEST_P(CordTest, CordMemoryUsageTotalMorePreciseModeWithSubstring) { + constexpr size_t kChunkSize = 2000; + std::string tmp_str(kChunkSize, 'x'); + const absl::Cord flat(std::move(tmp_str)); + + // Construct `fragmented` with two references into the same + // underlying buffer shared with `flat`. + // + // This time, each reference is through a Subcord(): + absl::Cord fragmented; + fragmented.Append(flat.Subcord(1, kChunkSize - 2)); + fragmented.Append(flat.Subcord(1, kChunkSize - 2)); + + // Memory usage of `flat`, minus the top-level Cord object: + const size_t flat_internal_usage = + flat.EstimatedMemoryUsage() - sizeof(absl::Cord); + + // `fragmented` holds a Cord and a CordRepBtree. That tree points to two + // CordRepSubstrings, each pointing at flat's internals. + EXPECT_EQ(fragmented.EstimatedMemoryUsage(kTotalMorePrecise), + sizeof(absl::Cord) + + sizeof(CordRepBtree) + + 2 * sizeof(CordRepSubstring) + + flat_internal_usage); + + // This is a case where kTotal produces an overestimate: + EXPECT_EQ(fragmented.EstimatedMemoryUsage(), + sizeof(absl::Cord) + + sizeof(CordRepBtree) + + 2 * sizeof(CordRepSubstring) + + 2 * flat_internal_usage); +} } // namespace // Regtest for 7510292 (fix a bug introduced by 7465150) @@ -1938,8 +2019,7 @@ TEST_P(CordTest, DiabolicalGrowth) { std::string value; absl::CopyCordToString(cord, &value); EXPECT_EQ(value, expected); - ABSL_RAW_LOG(INFO, "Diabolical size allocated = %zu", - cord.EstimatedMemoryUsage()); + LOG(INFO) << "Diabolical size allocated = " << cord.EstimatedMemoryUsage(); } // The following tests check support for >4GB cords in 64-bit binaries, and diff --git a/absl/strings/cord_test_helpers.h b/absl/strings/cord_test_helpers.h index 31a1dc89..ca52240a 100644 --- a/absl/strings/cord_test_helpers.h +++ b/absl/strings/cord_test_helpers.h @@ -51,7 +51,7 @@ enum class TestCordSize { // existing inputs rather than copying contents of the input. kMedium = cord_internal::kMaxFlatLength / 2 + 1, - // A string value large enough to cause it to be stored in mutliple flats. + // A string value large enough to cause it to be stored in multiple flats. kLarge = cord_internal::kMaxFlatLength * 4 }; diff --git a/absl/strings/escaping.cc b/absl/strings/escaping.cc index 93966846..2827fbaa 100644 --- a/absl/strings/escaping.cc +++ b/absl/strings/escaping.cc @@ -443,6 +443,8 @@ void CEscapeAndAppendInternal(absl::string_view src, std::string* dest) { } } +// Reverses the mapping in Base64EscapeInternal; see that method's +// documentation for details of the mapping. bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, size_t szdest, const signed char* unbase64, size_t* len) { @@ -676,7 +678,10 @@ bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, return ok; } -// The arrays below were generated by the following code +// The arrays below map base64-escaped characters back to their original values. +// For the inverse case, see k(WebSafe)Base64Chars in the internal +// escaping.cc. +// These arrays were generated by the following inversion code: // #include <sys/time.h> // #include <stdlib.h> // #include <string.h> @@ -703,8 +708,8 @@ bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, // } // } // -// where the value of "Base64[]" was replaced by one of the base-64 conversion -// tables from the functions below. +// where the value of "Base64[]" was replaced by one of k(WebSafe)Base64Chars +// in the internal escaping.cc. /* clang-format off */ constexpr signed char kUnBase64[] = { -1, -1, -1, -1, -1, -1, -1, -1, @@ -777,9 +782,6 @@ constexpr signed char kUnWebSafeBase64[] = { }; /* clang-format on */ -constexpr char kWebSafeBase64Chars[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; - template <typename String> bool Base64UnescapeInternal(const char* src, size_t slen, String* dest, const signed char* unbase64) { @@ -880,30 +882,6 @@ std::string Utf8SafeCHexEscape(absl::string_view src) { return CEscapeInternal(src, true, true); } -// ---------------------------------------------------------------------- -// Base64Unescape() - base64 decoder -// Base64Escape() - base64 encoder -// WebSafeBase64Unescape() - Google's variation of base64 decoder -// WebSafeBase64Escape() - Google's variation of base64 encoder -// -// Check out -// https://datatracker.ietf.org/doc/html/rfc2045 for formal description, but -// what we care about is that... -// Take the encoded stuff in groups of 4 characters and turn each -// character into a code 0 to 63 thus: -// A-Z map to 0 to 25 -// a-z map to 26 to 51 -// 0-9 map to 52 to 61 -// +(- for WebSafe) maps to 62 -// /(_ for WebSafe) maps to 63 -// There will be four numbers, all less than 64 which can be represented -// by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively). -// Arrange the 6 digit binary numbers into three bytes as such: -// aaaaaabb bbbbcccc ccdddddd -// Equals signs (one or two) are used at the end of the encoded block to -// indicate that the text was not an integer multiple of three bytes long. -// ---------------------------------------------------------------------- - bool Base64Unescape(absl::string_view src, std::string* dest) { return Base64UnescapeInternal(src.data(), src.size(), dest, kUnBase64); } @@ -921,7 +899,7 @@ void Base64Escape(absl::string_view src, std::string* dest) { void WebSafeBase64Escape(absl::string_view src, std::string* dest) { strings_internal::Base64EscapeInternal( reinterpret_cast<const unsigned char*>(src.data()), src.size(), dest, - false, kWebSafeBase64Chars); + false, strings_internal::kWebSafeBase64Chars); } std::string Base64Escape(absl::string_view src) { @@ -936,7 +914,7 @@ std::string WebSafeBase64Escape(absl::string_view src) { std::string dest; strings_internal::Base64EscapeInternal( reinterpret_cast<const unsigned char*>(src.data()), src.size(), &dest, - false, kWebSafeBase64Chars); + false, strings_internal::kWebSafeBase64Chars); return dest; } diff --git a/absl/strings/escaping.h b/absl/strings/escaping.h index 7c082fef..bf2a5898 100644 --- a/absl/strings/escaping.h +++ b/absl/strings/escaping.h @@ -121,7 +121,7 @@ std::string Utf8SafeCHexEscape(absl::string_view src); // // Encodes a `src` string into a base64-encoded 'dest' string with padding // characters. This function conforms with RFC 4648 section 4 (base64) and RFC -// 2045. See also CalculateBase64EscapedLen(). +// 2045. void Base64Escape(absl::string_view src, std::string* dest); std::string Base64Escape(absl::string_view src); diff --git a/absl/strings/escaping_test.cc b/absl/strings/escaping_test.cc index 44ffcba7..9f62c1ee 100644 --- a/absl/strings/escaping_test.cc +++ b/absl/strings/escaping_test.cc @@ -562,6 +562,7 @@ template <typename StringType> void TestEscapeAndUnescape() { // Check the short strings; this tests the math (and boundaries) for (const auto& tc : base64_tests) { + // Test plain base64. StringType encoded("this junk should be ignored"); absl::Base64Escape(tc.plaintext, &encoded); EXPECT_EQ(encoded, tc.cyphertext); @@ -571,22 +572,26 @@ void TestEscapeAndUnescape() { EXPECT_TRUE(absl::Base64Unescape(encoded, &decoded)); EXPECT_EQ(decoded, tc.plaintext); - StringType websafe(tc.cyphertext); - for (int c = 0; c < websafe.size(); ++c) { - if ('+' == websafe[c]) websafe[c] = '-'; - if ('/' == websafe[c]) websafe[c] = '_'; + StringType websafe_with_padding(tc.cyphertext); + for (unsigned int c = 0; c < websafe_with_padding.size(); ++c) { + if ('+' == websafe_with_padding[c]) websafe_with_padding[c] = '-'; + if ('/' == websafe_with_padding[c]) websafe_with_padding[c] = '_'; + // Intentionally keeping padding aka '='. + } + + // Test plain websafe (aka without padding). + StringType websafe(websafe_with_padding); + for (unsigned int c = 0; c < websafe.size(); ++c) { if ('=' == websafe[c]) { websafe.resize(c); break; } } - encoded = "this junk should be ignored"; absl::WebSafeBase64Escape(tc.plaintext, &encoded); EXPECT_EQ(encoded, websafe); EXPECT_EQ(absl::WebSafeBase64Escape(tc.plaintext), websafe); - // Let's try the string version of the decoder decoded = "this junk should be ignored"; EXPECT_TRUE(absl::WebSafeBase64Unescape(websafe, &decoded)); EXPECT_EQ(decoded, tc.plaintext); diff --git a/absl/strings/internal/charconv_bigint.cc b/absl/strings/internal/charconv_bigint.cc index 282b639e..46b5289a 100644 --- a/absl/strings/internal/charconv_bigint.cc +++ b/absl/strings/internal/charconv_bigint.cc @@ -296,10 +296,8 @@ template <int max_words> std::min(n / kLargePowerOfFiveStep, kLargestPowerOfFiveIndex); if (first_pass) { // just copy, rather than multiplying by 1 - std::copy( - LargePowerOfFiveData(big_power), - LargePowerOfFiveData(big_power) + LargePowerOfFiveSize(big_power), - answer.words_); + std::copy_n(LargePowerOfFiveData(big_power), + LargePowerOfFiveSize(big_power), answer.words_); answer.size_ = LargePowerOfFiveSize(big_power); first_pass = false; } else { diff --git a/absl/strings/internal/charconv_bigint.h b/absl/strings/internal/charconv_bigint.h index 8f702976..5c0c375d 100644 --- a/absl/strings/internal/charconv_bigint.h +++ b/absl/strings/internal/charconv_bigint.h @@ -92,7 +92,7 @@ class BigUnsigned { // numbers with this many decimal digits or fewer are representable by this // type. // - // Analagous to std::numeric_limits<BigUnsigned>::digits10. + // Analogous to std::numeric_limits<BigUnsigned>::digits10. static constexpr int Digits10() { // 9975007/1035508 is very slightly less than log10(2**32). return static_cast<uint64_t>(max_words) * 9975007 / 1035508; @@ -121,7 +121,7 @@ class BigUnsigned { ++size_; } } - std::fill(words_, words_ + word_shift, 0u); + std::fill_n(words_, word_shift, 0u); } } @@ -197,7 +197,7 @@ class BigUnsigned { } void SetToZero() { - std::fill(words_, words_ + size_, 0u); + std::fill_n(words_, size_, 0u); size_ = 0; } diff --git a/absl/strings/internal/charconv_parse_test.cc b/absl/strings/internal/charconv_parse_test.cc index bc2d1118..2b7b0821 100644 --- a/absl/strings/internal/charconv_parse_test.cc +++ b/absl/strings/internal/charconv_parse_test.cc @@ -19,7 +19,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "absl/base/internal/raw_logging.h" +#include "absl/log/check.h" #include "absl/strings/str_cat.h" using absl::chars_format; @@ -56,14 +56,14 @@ void ExpectParsedFloat(std::string s, absl::chars_format format_flags, begin_subrange = static_cast<int>(open_bracket_pos); s.replace(open_bracket_pos, 1, ""); std::string::size_type close_bracket_pos = s.find(']'); - ABSL_RAW_CHECK(close_bracket_pos != absl::string_view::npos, - "Test input contains [ without matching ]"); + CHECK_NE(close_bracket_pos, absl::string_view::npos) + << "Test input contains [ without matching ]"; end_subrange = static_cast<int>(close_bracket_pos); s.replace(close_bracket_pos, 1, ""); } const std::string::size_type expected_characters_matched = s.find('$'); - ABSL_RAW_CHECK(expected_characters_matched != std::string::npos, - "Input string must contain $"); + CHECK_NE(expected_characters_matched, std::string::npos) + << "Input string must contain $"; s.replace(expected_characters_matched, 1, ""); ParsedFloat parsed = diff --git a/absl/strings/internal/cord_internal.cc b/absl/strings/internal/cord_internal.cc index b6b06cfa..b7874385 100644 --- a/absl/strings/internal/cord_internal.cc +++ b/absl/strings/internal/cord_internal.cc @@ -33,7 +33,6 @@ ABSL_CONST_INIT std::atomic<bool> cord_ring_buffer_enabled( kCordEnableRingBufferDefault); ABSL_CONST_INIT std::atomic<bool> shallow_subcords_enabled( kCordShallowSubcordsDefault); -ABSL_CONST_INIT std::atomic<bool> cord_btree_exhaustive_validation(false); void LogFatalNodeType(CordRep* rep) { ABSL_INTERNAL_LOG(FATAL, absl::StrCat("Unexpected node type: ", diff --git a/absl/strings/internal/cord_internal.h b/absl/strings/internal/cord_internal.h index e6f0d544..20dd008c 100644 --- a/absl/strings/internal/cord_internal.h +++ b/absl/strings/internal/cord_internal.h @@ -69,12 +69,6 @@ enum CordFeatureDefaults { extern std::atomic<bool> cord_ring_buffer_enabled; extern std::atomic<bool> shallow_subcords_enabled; -// `cord_btree_exhaustive_validation` can be set to force exhaustive validation -// in debug assertions, and code that calls `IsValid()` explicitly. By default, -// assertions should be relatively cheap and AssertValid() can easily lead to -// O(n^2) complexity as recursive / full tree validation is O(n). -extern std::atomic<bool> cord_btree_exhaustive_validation; - inline void enable_cord_ring_buffer(bool enable) { cord_ring_buffer_enabled.store(enable, std::memory_order_relaxed); } @@ -163,20 +157,19 @@ class RefcountAndFlags { // false will be visible to a thread that just observed this method returning // false. Always returns false when the immortal bit is set. inline bool Decrement() { - int32_t refcount = count_.load(std::memory_order_acquire) & kRefcountMask; - assert(refcount > 0 || refcount & kImmortalFlag); + int32_t refcount = count_.load(std::memory_order_acquire); + assert((refcount & kRefcountMask) > 0 || refcount & kImmortalFlag); return refcount != kRefIncrement && (count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) & - kRefcountMask) != kRefIncrement; + kHighRefcountMask) != 0; } // Same as Decrement but expect that refcount is greater than 1. inline bool DecrementExpectHighRefcount() { int32_t refcount = - count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) & - kRefcountMask; - assert(refcount > 0 || refcount & kImmortalFlag); - return refcount != kRefIncrement; + count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel); + assert((refcount & kRefcountMask) > 0 || refcount & kImmortalFlag); + return (refcount & kHighRefcountMask) != 0; } // Returns the current reference count using acquire semantics. @@ -220,6 +213,15 @@ class RefcountAndFlags { // purposes of equality. (A refcount of 0 or 1 does not count as 0 or 1 // if the immortal bit is set.) kRefcountMask = ~kReservedFlag, + + // Bitmask to use when checking if refcount is equal to 1 and not + // immortal when decrementing the refcount. This masks out kRefIncrement and + // all flags except kImmortalFlag. If the masked RefcountAndFlags is 0, we + // assume the refcount is equal to 1, since we know it's not immortal and + // not greater than 1. If the masked RefcountAndFlags is not 0, we can + // assume the refcount is not equal to 1 since either a higher bit in the + // refcount is set, or kImmortal is set. + kHighRefcountMask = kRefcountMask & ~kRefIncrement, }; std::atomic<int32_t> count_; diff --git a/absl/strings/internal/cord_rep_btree.cc b/absl/strings/internal/cord_rep_btree.cc index a86fdc0b..05bd0e20 100644 --- a/absl/strings/internal/cord_rep_btree.cc +++ b/absl/strings/internal/cord_rep_btree.cc @@ -14,6 +14,7 @@ #include "absl/strings/internal/cord_rep_btree.h" +#include <atomic> #include <cassert> #include <cstdint> #include <iostream> @@ -49,9 +50,7 @@ using CopyResult = CordRepBtree::CopyResult; constexpr auto kFront = CordRepBtree::kFront; constexpr auto kBack = CordRepBtree::kBack; -inline bool exhaustive_validation() { - return cord_btree_exhaustive_validation.load(std::memory_order_relaxed); -} +ABSL_CONST_INIT std::atomic<bool> cord_btree_exhaustive_validation(false); // Implementation of the various 'Dump' functions. // Prints the entire tree structure or 'rep'. External callers should @@ -362,6 +361,15 @@ struct StackOperations { } // namespace +void SetCordBtreeExhaustiveValidation(bool do_exaustive_validation) { + cord_btree_exhaustive_validation.store(do_exaustive_validation, + std::memory_order_relaxed); +} + +bool IsCordBtreeExhaustiveValidationEnabled() { + return cord_btree_exhaustive_validation.load(std::memory_order_relaxed); +} + void CordRepBtree::Dump(const CordRep* rep, absl::string_view label, bool include_contents, std::ostream& stream) { stream << "===================================\n"; @@ -450,7 +458,8 @@ bool CordRepBtree::IsValid(const CordRepBtree* tree, bool shallow) { child_length += edge->length; } NODE_CHECK_EQ(child_length, tree->length); - if ((!shallow || exhaustive_validation()) && tree->height() > 0) { + if ((!shallow || IsCordBtreeExhaustiveValidationEnabled()) && + tree->height() > 0) { for (CordRep* edge : tree->Edges()) { if (!IsValid(edge->btree(), shallow)) return false; } diff --git a/absl/strings/internal/cord_rep_btree.h b/absl/strings/internal/cord_rep_btree.h index 4209e512..be94b62e 100644 --- a/absl/strings/internal/cord_rep_btree.h +++ b/absl/strings/internal/cord_rep_btree.h @@ -32,6 +32,14 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace cord_internal { +// `SetCordBtreeExhaustiveValidation()` can be set to force exhaustive +// validation in debug assertions, and code that calls `IsValid()` +// explicitly. By default, assertions should be relatively cheap and +// AssertValid() can easily lead to O(n^2) complexity as recursive / full tree +// validation is O(n). +void SetCordBtreeExhaustiveValidation(bool do_exaustive_validation); +bool IsCordBtreeExhaustiveValidationEnabled(); + class CordRepBtreeNavigator; // CordRepBtree is as the name implies a btree implementation of a Cordrep tree. diff --git a/absl/strings/internal/cord_rep_btree_test.cc b/absl/strings/internal/cord_rep_btree_test.cc index 9d6ce484..840acf9f 100644 --- a/absl/strings/internal/cord_rep_btree_test.cc +++ b/absl/strings/internal/cord_rep_btree_test.cc @@ -507,7 +507,7 @@ TEST_P(CordRepBtreeTest, AppendToTreeTwoDeep) { for (size_t i = max_cap * max_cap + 1; i < max_cap * max_cap * max_cap; ++i) { // Ref top level tree based on param. // Ref child node once every 16 iterations, and leaf node every 4 - // iterrations which which should not have an observable effect other than + // iterations which which should not have an observable effect other than // the node and/or the leaf below it being copied. refs.RefIf(shared(), tree); refs.RefIf(i % 16 == 0, tree->Edges().back()); @@ -568,7 +568,7 @@ TEST_P(CordRepBtreeTest, PrependToTreeTwoDeep) { for (size_t i = max_cap * max_cap + 1; i < max_cap * max_cap * max_cap; ++i) { // Ref top level tree based on param. // Ref child node once every 16 iterations, and leaf node every 4 - // iterrations which which should not have an observable effect other than + // iterations which which should not have an observable effect other than // the node and/or the leaf below it being copied. refs.RefIf(shared(), tree); refs.RefIf(i % 16 == 0, tree->Edges().back()); @@ -1355,9 +1355,9 @@ TEST(CordRepBtreeTest, AssertValid) { TEST(CordRepBtreeTest, CheckAssertValidShallowVsDeep) { // Restore exhaustive validation on any exit. - const bool exhaustive_validation = cord_btree_exhaustive_validation.load(); + const bool exhaustive_validation = IsCordBtreeExhaustiveValidationEnabled(); auto cleanup = absl::MakeCleanup([exhaustive_validation] { - cord_btree_exhaustive_validation.store(exhaustive_validation); + SetCordBtreeExhaustiveValidation(exhaustive_validation); }); // Create a tree of at least 2 levels, and mess with the original flat, which @@ -1372,7 +1372,7 @@ TEST(CordRepBtreeTest, CheckAssertValidShallowVsDeep) { } flat->length = 100; - cord_btree_exhaustive_validation.store(false); + SetCordBtreeExhaustiveValidation(false); EXPECT_FALSE(CordRepBtree::IsValid(tree)); EXPECT_TRUE(CordRepBtree::IsValid(tree, true)); EXPECT_FALSE(CordRepBtree::IsValid(tree, false)); @@ -1382,7 +1382,7 @@ TEST(CordRepBtreeTest, CheckAssertValidShallowVsDeep) { EXPECT_DEBUG_DEATH(CordRepBtree::AssertValid(tree, false), ".*"); #endif - cord_btree_exhaustive_validation.store(true); + SetCordBtreeExhaustiveValidation(true); EXPECT_FALSE(CordRepBtree::IsValid(tree)); EXPECT_FALSE(CordRepBtree::IsValid(tree, true)); EXPECT_FALSE(CordRepBtree::IsValid(tree, false)); diff --git a/absl/strings/internal/cord_rep_consume.cc b/absl/strings/internal/cord_rep_consume.cc index 20a55797..db7d4fef 100644 --- a/absl/strings/internal/cord_rep_consume.cc +++ b/absl/strings/internal/cord_rep_consume.cc @@ -42,7 +42,8 @@ CordRep* ClipSubstring(CordRepSubstring* substring) { } // namespace -void Consume(CordRep* rep, ConsumeFn consume_fn) { +void Consume(CordRep* rep, + FunctionRef<void(CordRep*, size_t, size_t)> consume_fn) { size_t offset = 0; size_t length = rep->length; @@ -53,8 +54,9 @@ void Consume(CordRep* rep, ConsumeFn consume_fn) { consume_fn(rep, offset, length); } -void ReverseConsume(CordRep* rep, ConsumeFn consume_fn) { - return Consume(rep, std::move(consume_fn)); +void ReverseConsume(CordRep* rep, + FunctionRef<void(CordRep*, size_t, size_t)> consume_fn) { + return Consume(rep, consume_fn); } } // namespace cord_internal diff --git a/absl/strings/internal/cord_rep_consume.h b/absl/strings/internal/cord_rep_consume.h index d46fca2b..bece1874 100644 --- a/absl/strings/internal/cord_rep_consume.h +++ b/absl/strings/internal/cord_rep_consume.h @@ -24,11 +24,6 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace cord_internal { -// Functor for the Consume() and ReverseConsume() functions: -// void ConsumeFunc(CordRep* rep, size_t offset, size_t length); -// See the Consume() and ReverseConsume() function comments for documentation. -using ConsumeFn = FunctionRef<void(CordRep*, size_t, size_t)>; - // Consume() and ReverseConsume() consume CONCAT based trees and invoke the // provided functor with the contained nodes in the proper forward or reverse // order, which is used to convert CONCAT trees into other tree or cord data. @@ -40,8 +35,10 @@ using ConsumeFn = FunctionRef<void(CordRep*, size_t, size_t)>; // violations, we can not 100% guarantee that all code respects 'new format' // settings and flags, so we need to be able to parse old data on the fly until // all old code is deprecated / no longer the default format. -void Consume(CordRep* rep, ConsumeFn consume_fn); -void ReverseConsume(CordRep* rep, ConsumeFn consume_fn); +void Consume(CordRep* rep, + FunctionRef<void(CordRep*, size_t, size_t)> consume_fn); +void ReverseConsume(CordRep* rep, + FunctionRef<void(CordRep*, size_t, size_t)> consume_fn); } // namespace cord_internal ABSL_NAMESPACE_END diff --git a/absl/strings/internal/cord_rep_flat.h b/absl/strings/internal/cord_rep_flat.h index e3e27fcd..27c4b21e 100644 --- a/absl/strings/internal/cord_rep_flat.h +++ b/absl/strings/internal/cord_rep_flat.h @@ -120,8 +120,16 @@ struct CordRepFlat : public CordRep { // Round size up so it matches a size we can exactly express in a tag. const size_t size = RoundUpForTag(len + kFlatOverhead); void* const raw_rep = ::operator new(size); + // GCC 13 has a false-positive -Wstringop-overflow warning here. + #if ABSL_INTERNAL_HAVE_MIN_GNUC_VERSION(13, 0) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wstringop-overflow" + #endif CordRepFlat* rep = new (raw_rep) CordRepFlat(); rep->tag = AllocatedSizeToTag(size); + #if ABSL_INTERNAL_HAVE_MIN_GNUC_VERSION(13, 0) + #pragma GCC diagnostic pop + #endif return rep; } diff --git a/absl/strings/internal/cord_rep_ring.h b/absl/strings/internal/cord_rep_ring.h index 2000e21e..79a2fdb1 100644 --- a/absl/strings/internal/cord_rep_ring.h +++ b/absl/strings/internal/cord_rep_ring.h @@ -430,7 +430,7 @@ class CordRepRing : public CordRep { // capacity to satisfy `extra` extra nodes, and unref the old `rep` instance. // // If a new CordRepRing can not be allocated, or the new capacity would exceed - // the maxmimum capacity, then the input is consumed only, and an exception is + // the maximum capacity, then the input is consumed only, and an exception is // thrown. static CordRepRing* Mutable(CordRepRing* rep, size_t extra); @@ -472,7 +472,7 @@ class CordRepRing : public CordRep { // Increases the data offset for entry `index` by `n`. void AddDataOffset(index_type index, size_t n); - // Descreases the length for entry `index` by `n`. + // Decreases the length for entry `index` by `n`. void SubLength(index_type index, size_t n); index_type head_; diff --git a/absl/strings/internal/cordz_functions_test.cc b/absl/strings/internal/cordz_functions_test.cc index 350623c1..b70a685e 100644 --- a/absl/strings/internal/cordz_functions_test.cc +++ b/absl/strings/internal/cordz_functions_test.cc @@ -38,7 +38,7 @@ TEST(CordzFunctionsTest, SampleRate) { } // Cordz is disabled when we don't have thread_local. All calls to -// should_profile will return false when cordz is diabled, so we might want to +// should_profile will return false when cordz is disabled, so we might want to // avoid those tests. #ifdef ABSL_INTERNAL_CORDZ_ENABLED diff --git a/absl/strings/internal/cordz_handle.cc b/absl/strings/internal/cordz_handle.cc index a73fefed..a7061dbe 100644 --- a/absl/strings/internal/cordz_handle.cc +++ b/absl/strings/internal/cordz_handle.cc @@ -16,34 +16,60 @@ #include <atomic> #include "absl/base/internal/raw_logging.h" // For ABSL_RAW_CHECK -#include "absl/base/internal/spinlock.h" +#include "absl/synchronization/mutex.h" namespace absl { ABSL_NAMESPACE_BEGIN namespace cord_internal { -using ::absl::base_internal::SpinLockHolder; +namespace { -ABSL_CONST_INIT CordzHandle::Queue CordzHandle::global_queue_(absl::kConstInit); +struct Queue { + Queue() = default; + + absl::Mutex mutex; + std::atomic<CordzHandle*> dq_tail ABSL_GUARDED_BY(mutex){nullptr}; + + // Returns true if this delete queue is empty. This method does not acquire + // the lock, but does a 'load acquire' observation on the delete queue tail. + // It is used inside Delete() to check for the presence of a delete queue + // without holding the lock. The assumption is that the caller is in the + // state of 'being deleted', and can not be newly discovered by a concurrent + // 'being constructed' snapshot instance. Practically, this means that any + // such discovery (`find`, 'first' or 'next', etc) must have proper 'happens + // before / after' semantics and atomic fences. + bool IsEmpty() const ABSL_NO_THREAD_SAFETY_ANALYSIS { + return dq_tail.load(std::memory_order_acquire) == nullptr; + } +}; + +static Queue* GlobalQueue() { + static Queue* global_queue = new Queue; + return global_queue; +} + +} // namespace CordzHandle::CordzHandle(bool is_snapshot) : is_snapshot_(is_snapshot) { + Queue* global_queue = GlobalQueue(); if (is_snapshot) { - SpinLockHolder lock(&queue_->mutex); - CordzHandle* dq_tail = queue_->dq_tail.load(std::memory_order_acquire); + MutexLock lock(&global_queue->mutex); + CordzHandle* dq_tail = + global_queue->dq_tail.load(std::memory_order_acquire); if (dq_tail != nullptr) { dq_prev_ = dq_tail; dq_tail->dq_next_ = this; } - queue_->dq_tail.store(this, std::memory_order_release); + global_queue->dq_tail.store(this, std::memory_order_release); } } CordzHandle::~CordzHandle() { - ODRCheck(); + Queue* global_queue = GlobalQueue(); if (is_snapshot_) { std::vector<CordzHandle*> to_delete; { - SpinLockHolder lock(&queue_->mutex); + MutexLock lock(&global_queue->mutex); CordzHandle* next = dq_next_; if (dq_prev_ == nullptr) { // We were head of the queue, delete every CordzHandle until we reach @@ -59,7 +85,7 @@ CordzHandle::~CordzHandle() { if (next) { next->dq_prev_ = dq_prev_; } else { - queue_->dq_tail.store(dq_prev_, std::memory_order_release); + global_queue->dq_tail.store(dq_prev_, std::memory_order_release); } } for (CordzHandle* handle : to_delete) { @@ -69,16 +95,15 @@ CordzHandle::~CordzHandle() { } bool CordzHandle::SafeToDelete() const { - return is_snapshot_ || queue_->IsEmpty(); + return is_snapshot_ || GlobalQueue()->IsEmpty(); } void CordzHandle::Delete(CordzHandle* handle) { assert(handle); if (handle) { - handle->ODRCheck(); - Queue* const queue = handle->queue_; + Queue* const queue = GlobalQueue(); if (!handle->SafeToDelete()) { - SpinLockHolder lock(&queue->mutex); + MutexLock lock(&queue->mutex); CordzHandle* dq_tail = queue->dq_tail.load(std::memory_order_acquire); if (dq_tail != nullptr) { handle->dq_prev_ = dq_tail; @@ -93,8 +118,9 @@ void CordzHandle::Delete(CordzHandle* handle) { std::vector<const CordzHandle*> CordzHandle::DiagnosticsGetDeleteQueue() { std::vector<const CordzHandle*> handles; - SpinLockHolder lock(&global_queue_.mutex); - CordzHandle* dq_tail = global_queue_.dq_tail.load(std::memory_order_acquire); + Queue* global_queue = GlobalQueue(); + MutexLock lock(&global_queue->mutex); + CordzHandle* dq_tail = global_queue->dq_tail.load(std::memory_order_acquire); for (const CordzHandle* p = dq_tail; p; p = p->dq_prev_) { handles.push_back(p); } @@ -103,13 +129,13 @@ std::vector<const CordzHandle*> CordzHandle::DiagnosticsGetDeleteQueue() { bool CordzHandle::DiagnosticsHandleIsSafeToInspect( const CordzHandle* handle) const { - ODRCheck(); if (!is_snapshot_) return false; if (handle == nullptr) return true; if (handle->is_snapshot_) return false; bool snapshot_found = false; - SpinLockHolder lock(&queue_->mutex); - for (const CordzHandle* p = queue_->dq_tail; p; p = p->dq_prev_) { + Queue* global_queue = GlobalQueue(); + MutexLock lock(&global_queue->mutex); + for (const CordzHandle* p = global_queue->dq_tail; p; p = p->dq_prev_) { if (p == handle) return !snapshot_found; if (p == this) snapshot_found = true; } @@ -119,13 +145,13 @@ bool CordzHandle::DiagnosticsHandleIsSafeToInspect( std::vector<const CordzHandle*> CordzHandle::DiagnosticsGetSafeToInspectDeletedHandles() { - ODRCheck(); std::vector<const CordzHandle*> handles; if (!is_snapshot()) { return handles; } - SpinLockHolder lock(&queue_->mutex); + Queue* global_queue = GlobalQueue(); + MutexLock lock(&global_queue->mutex); for (const CordzHandle* p = dq_next_; p != nullptr; p = p->dq_next_) { if (!p->is_snapshot()) { handles.push_back(p); diff --git a/absl/strings/internal/cordz_handle.h b/absl/strings/internal/cordz_handle.h index 3c800b43..08e3f0d3 100644 --- a/absl/strings/internal/cordz_handle.h +++ b/absl/strings/internal/cordz_handle.h @@ -20,8 +20,6 @@ #include "absl/base/config.h" #include "absl/base/internal/raw_logging.h" -#include "absl/base/internal/spinlock.h" -#include "absl/synchronization/mutex.h" namespace absl { ABSL_NAMESPACE_BEGIN @@ -34,7 +32,7 @@ namespace cord_internal { // has gained visibility into a CordzInfo object, that CordzInfo object will not // be deleted prematurely. This allows the profiler to inspect all CordzInfo // objects that are alive without needing to hold a global lock. -class CordzHandle { +class ABSL_DLL CordzHandle { public: CordzHandle() : CordzHandle(false) {} @@ -79,37 +77,6 @@ class CordzHandle { virtual ~CordzHandle(); private: - // Global queue data. CordzHandle stores a pointer to the global queue - // instance to harden against ODR violations. - struct Queue { - constexpr explicit Queue(absl::ConstInitType) - : mutex(absl::kConstInit, - absl::base_internal::SCHEDULE_COOPERATIVE_AND_KERNEL) {} - - absl::base_internal::SpinLock mutex; - std::atomic<CordzHandle*> dq_tail ABSL_GUARDED_BY(mutex){nullptr}; - - // Returns true if this delete queue is empty. This method does not acquire - // the lock, but does a 'load acquire' observation on the delete queue tail. - // It is used inside Delete() to check for the presence of a delete queue - // without holding the lock. The assumption is that the caller is in the - // state of 'being deleted', and can not be newly discovered by a concurrent - // 'being constructed' snapshot instance. Practically, this means that any - // such discovery (`find`, 'first' or 'next', etc) must have proper 'happens - // before / after' semantics and atomic fences. - bool IsEmpty() const ABSL_NO_THREAD_SAFETY_ANALYSIS { - return dq_tail.load(std::memory_order_acquire) == nullptr; - } - }; - - void ODRCheck() const { -#ifndef NDEBUG - ABSL_RAW_CHECK(queue_ == &global_queue_, "ODR violation in Cord"); -#endif - } - - ABSL_CONST_INIT static Queue global_queue_; - Queue* const queue_ = &global_queue_; const bool is_snapshot_; // dq_prev_ and dq_next_ require the global queue mutex to be held. diff --git a/absl/strings/internal/cordz_info.cc b/absl/strings/internal/cordz_info.cc index 530f33be..515dfafb 100644 --- a/absl/strings/internal/cordz_info.cc +++ b/absl/strings/internal/cordz_info.cc @@ -26,6 +26,7 @@ #include "absl/strings/internal/cordz_statistics.h" #include "absl/strings/internal/cordz_update_tracker.h" #include "absl/synchronization/mutex.h" +#include "absl/time/clock.h" #include "absl/types/span.h" namespace absl { @@ -53,7 +54,7 @@ namespace { // The top level node is treated specially: we assume the current thread // (typically called from the CordzHandler) to hold a reference purely to // perform a safe analysis, and not being part of the application. So we -// substract 1 from the reference count of the top node to compute the +// subtract 1 from the reference count of the top node to compute the // 'application fair share' excluding the reference of the current thread. // // An example of fair sharing, and why we multiply reference counts: diff --git a/absl/strings/internal/cordz_sample_token.h b/absl/strings/internal/cordz_sample_token.h index b58022c3..2a86bc3b 100644 --- a/absl/strings/internal/cordz_sample_token.h +++ b/absl/strings/internal/cordz_sample_token.h @@ -33,11 +33,11 @@ namespace cord_internal { // ST1 <- CH1 <- CH2 <- ST2 <- CH3 <- global_delete_queue_tail // // This list tracks that CH1 and CH2 were created after ST1, so the thread -// holding ST1 might have a referece to CH1, CH2, ST2, and CH3. However, ST2 was -// created later, so the thread holding the ST2 token cannot have a reference to -// ST1, CH1, or CH2. If ST1 is cleaned up first, that thread will delete ST1, -// CH1, and CH2. If instead ST2 is cleaned up first, that thread will only -// delete ST2. +// holding ST1 might have a reference to CH1, CH2, ST2, and CH3. However, ST2 +// was created later, so the thread holding the ST2 token cannot have a +// reference to ST1, CH1, or CH2. If ST1 is cleaned up first, that thread will +// delete ST1, CH1, and CH2. If instead ST2 is cleaned up first, that thread +// will only delete ST2. // // If ST1 is cleaned up first, the new list will be: // ST2 <- CH3 <- global_delete_queue_tail diff --git a/absl/strings/internal/damerau_levenshtein_distance_test.cc b/absl/strings/internal/damerau_levenshtein_distance_test.cc index a342b7db..49dd105b 100644 --- a/absl/strings/internal/damerau_levenshtein_distance_test.cc +++ b/absl/strings/internal/damerau_levenshtein_distance_test.cc @@ -54,7 +54,7 @@ TEST(Distance, TestDistances) { } TEST(Distance, TestCutoff) { - // Returing cutoff + 1 if the value is larger than cutoff or string longer + // Returning cutoff + 1 if the value is larger than cutoff or string longer // than MAX_SIZE. EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "a", 3), uint8_t{3}); EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "a", 2), uint8_t{3}); diff --git a/absl/strings/internal/escaping.cc b/absl/strings/internal/escaping.cc index 8bd0890d..56a4cbed 100644 --- a/absl/strings/internal/escaping.cc +++ b/absl/strings/internal/escaping.cc @@ -21,9 +21,17 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace strings_internal { +// The two strings below provide maps from normal 6-bit characters to their +// base64-escaped equivalent. +// For the inverse case, see kUn(WebSafe)Base64 in the external +// escaping.cc. ABSL_CONST_INIT const char kBase64Chars[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +ABSL_CONST_INIT const char kWebSafeBase64Chars[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; + + size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) { // Base64 encodes three bytes of input at a time. If the input is not // divisible by three, we pad as appropriate. @@ -62,6 +70,21 @@ size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) { return len; } +// ---------------------------------------------------------------------- +// Take the input in groups of 4 characters and turn each +// character into a code 0 to 63 thus: +// A-Z map to 0 to 25 +// a-z map to 26 to 51 +// 0-9 map to 52 to 61 +// +(- for WebSafe) maps to 62 +// /(_ for WebSafe) maps to 63 +// There will be four numbers, all less than 64 which can be represented +// by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively). +// Arrange the 6 digit binary numbers into three bytes as such: +// aaaaaabb bbbbcccc ccdddddd +// Equals signs (one or two) are used at the end of the encoded block to +// indicate that the text was not an integer multiple of three bytes long. +// ---------------------------------------------------------------------- size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest, size_t szdest, const char* base64, bool do_padding) { diff --git a/absl/strings/internal/escaping.h b/absl/strings/internal/escaping.h index b04033ff..2186f778 100644 --- a/absl/strings/internal/escaping.h +++ b/absl/strings/internal/escaping.h @@ -24,6 +24,7 @@ ABSL_NAMESPACE_BEGIN namespace strings_internal { ABSL_CONST_INIT extern const char kBase64Chars[]; +ABSL_CONST_INIT extern const char kWebSafeBase64Chars[]; // Calculates the length of a Base64 encoding (RFC 4648) of a string of length // `input_len`, with or without padding per `do_padding`. Note that 'web-safe' diff --git a/absl/strings/internal/memutil.cc b/absl/strings/internal/memutil.cc index 44996a75..e2e7347c 100644 --- a/absl/strings/internal/memutil.cc +++ b/absl/strings/internal/memutil.cc @@ -16,6 +16,8 @@ #include <cstdlib> +#include "absl/strings/ascii.h" + namespace absl { ABSL_NAMESPACE_BEGIN namespace strings_internal { @@ -33,83 +35,6 @@ int memcasecmp(const char* s1, const char* s2, size_t len) { return 0; } -char* memdup(const char* s, size_t slen) { - void* copy; - if ((copy = malloc(slen)) == nullptr) return nullptr; - memcpy(copy, s, slen); - return reinterpret_cast<char*>(copy); -} - -char* memrchr(const char* s, int c, size_t slen) { - for (const char* e = s + slen - 1; e >= s; e--) { - if (*e == c) return const_cast<char*>(e); - } - return nullptr; -} - -size_t memspn(const char* s, size_t slen, const char* accept) { - const char* p = s; - const char* spanp; - char c, sc; - -cont: - c = *p++; - if (slen-- == 0) - return static_cast<size_t>(p - 1 - s); - for (spanp = accept; (sc = *spanp++) != '\0';) - if (sc == c) goto cont; - return static_cast<size_t>(p - 1 - s); -} - -size_t memcspn(const char* s, size_t slen, const char* reject) { - const char* p = s; - const char* spanp; - char c, sc; - - while (slen-- != 0) { - c = *p++; - for (spanp = reject; (sc = *spanp++) != '\0';) - if (sc == c) - return static_cast<size_t>(p - 1 - s); - } - return static_cast<size_t>(p - s); -} - -char* mempbrk(const char* s, size_t slen, const char* accept) { - const char* scanp; - int sc; - - for (; slen; ++s, --slen) { - for (scanp = accept; (sc = *scanp++) != '\0';) - if (sc == *s) return const_cast<char*>(s); - } - return nullptr; -} - -// This is significantly faster for case-sensitive matches with very -// few possible matches. See unit test for benchmarks. -const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle, - size_t neelen) { - if (0 == neelen) { - return phaystack; // even if haylen is 0 - } - if (haylen < neelen) return nullptr; - - const char* match; - const char* hayend = phaystack + haylen - neelen + 1; - // A static cast is used here to work around the fact that memchr returns - // a void* on Posix-compliant systems and const void* on Windows. - while ( - (match = static_cast<const char*>(memchr( - phaystack, pneedle[0], static_cast<size_t>(hayend - phaystack))))) { - if (memcmp(match, pneedle, neelen) == 0) - return match; - else - phaystack = match + 1; - } - return nullptr; -} - } // namespace strings_internal ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/memutil.h b/absl/strings/internal/memutil.h index 9ad05358..b5911a01 100644 --- a/absl/strings/internal/memutil.h +++ b/absl/strings/internal/memutil.h @@ -14,51 +14,6 @@ // limitations under the License. // -// These routines provide mem versions of standard C string routines, -// such as strpbrk. They function exactly the same as the str versions, -// so if you wonder what they are, replace the word "mem" by -// "str" and check out the man page. I could return void*, as the -// strutil.h mem*() routines tend to do, but I return char* instead -// since this is by far the most common way these functions are called. -// -// The difference between the mem and str versions is the mem version -// takes a pointer and a length, rather than a '\0'-terminated string. -// The memcase* routines defined here assume the locale is "C" -// (they use absl::ascii_tolower instead of tolower). -// -// These routines are based on the BSD library. -// -// Here's a list of routines from string.h, and their mem analogues. -// Functions in lowercase are defined in string.h; those in UPPERCASE -// are defined here: -// -// strlen -- -// strcat strncat MEMCAT -// strcpy strncpy memcpy -// -- memccpy (very cool function, btw) -// -- memmove -// -- memset -// strcmp strncmp memcmp -// strcasecmp strncasecmp MEMCASECMP -// strchr memchr -// strcoll -- -// strxfrm -- -// strdup strndup MEMDUP -// strrchr MEMRCHR -// strspn MEMSPN -// strcspn MEMCSPN -// strpbrk MEMPBRK -// strstr MEMSTR MEMMEM -// (g)strcasestr MEMCASESTR MEMCASEMEM -// strtok -- -// strprefix MEMPREFIX (strprefix is from strutil.h) -// strcaseprefix MEMCASEPREFIX (strcaseprefix is from strutil.h) -// strsuffix MEMSUFFIX (strsuffix is from strutil.h) -// strcasesuffix MEMCASESUFFIX (strcasesuffix is from strutil.h) -// -- MEMIS -// -- MEMCASEIS -// strcount MEMCOUNT (strcount is from strutil.h) - #ifndef ABSL_STRINGS_INTERNAL_MEMUTIL_H_ #define ABSL_STRINGS_INTERNAL_MEMUTIL_H_ @@ -72,74 +27,11 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace strings_internal { -inline char* memcat(char* dest, size_t destlen, const char* src, - size_t srclen) { - return reinterpret_cast<char*>(memcpy(dest + destlen, src, srclen)); -} - +// Performs a byte-by-byte comparison of `len` bytes of the strings `s1` and +// `s2`, ignoring the case of the characters. It returns an integer less than, +// equal to, or greater than zero if `s1` is found, respectively, to be less +// than, to match, or be greater than `s2`. int memcasecmp(const char* s1, const char* s2, size_t len); -char* memdup(const char* s, size_t slen); -char* memrchr(const char* s, int c, size_t slen); -size_t memspn(const char* s, size_t slen, const char* accept); -size_t memcspn(const char* s, size_t slen, const char* reject); -char* mempbrk(const char* s, size_t slen, const char* accept); - -// This is for internal use only. Don't call this directly -template <bool case_sensitive> -const char* int_memmatch(const char* haystack, size_t haylen, - const char* needle, size_t neelen) { - if (0 == neelen) { - return haystack; // even if haylen is 0 - } - const char* hayend = haystack + haylen; - const char* needlestart = needle; - const char* needleend = needlestart + neelen; - - for (; haystack < hayend; ++haystack) { - char hay = case_sensitive - ? *haystack - : absl::ascii_tolower(static_cast<unsigned char>(*haystack)); - char nee = case_sensitive - ? *needle - : absl::ascii_tolower(static_cast<unsigned char>(*needle)); - if (hay == nee) { - if (++needle == needleend) { - return haystack + 1 - neelen; - } - } else if (needle != needlestart) { - // must back up haystack in case a prefix matched (find "aab" in "aaab") - haystack -= needle - needlestart; // for loop will advance one more - needle = needlestart; - } - } - return nullptr; -} - -// These are the guys you can call directly -inline const char* memstr(const char* phaystack, size_t haylen, - const char* pneedle) { - return int_memmatch<true>(phaystack, haylen, pneedle, strlen(pneedle)); -} - -inline const char* memcasestr(const char* phaystack, size_t haylen, - const char* pneedle) { - return int_memmatch<false>(phaystack, haylen, pneedle, strlen(pneedle)); -} - -inline const char* memmem(const char* phaystack, size_t haylen, - const char* pneedle, size_t needlelen) { - return int_memmatch<true>(phaystack, haylen, pneedle, needlelen); -} - -inline const char* memcasemem(const char* phaystack, size_t haylen, - const char* pneedle, size_t needlelen) { - return int_memmatch<false>(phaystack, haylen, pneedle, needlelen); -} - -// This is significantly faster for case-sensitive matches with very -// few possible matches. See unit test for benchmarks. -const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle, - size_t neelen); } // namespace strings_internal ABSL_NAMESPACE_END diff --git a/absl/strings/internal/memutil_benchmark.cc b/absl/strings/internal/memutil_benchmark.cc index dc95c3e5..61e323a4 100644 --- a/absl/strings/internal/memutil_benchmark.cc +++ b/absl/strings/internal/memutil_benchmark.cc @@ -25,62 +25,6 @@ // - an easy search: 'b' // - a medium search: 'ab'. That means every letter is a possible match. // - a pathological search: 'aaaaaa.......aaaaab' (half as many a's as haytack) -// We benchmark case-sensitive and case-insensitive versions of -// three memmem implementations: -// - memmem() from memutil.h -// - search() from STL -// - memmatch(), a custom implementation using memchr and memcmp. -// Here are sample results: -// -// Run on (12 X 3800 MHz CPU s) -// CPU Caches: -// L1 Data 32K (x6) -// L1 Instruction 32K (x6) -// L2 Unified 256K (x6) -// L3 Unified 15360K (x1) -// ---------------------------------------------------------------- -// Benchmark Time CPU Iterations -// ---------------------------------------------------------------- -// BM_Memmem 3583 ns 3582 ns 196469 2.59966GB/s -// BM_MemmemMedium 13743 ns 13742 ns 50901 693.986MB/s -// BM_MemmemPathological 13695030 ns 13693977 ns 51 713.133kB/s -// BM_Memcasemem 3299 ns 3299 ns 212942 2.82309GB/s -// BM_MemcasememMedium 16407 ns 16406 ns 42170 581.309MB/s -// BM_MemcasememPathological 17267745 ns 17266030 ns 41 565.598kB/s -// BM_Search 1610 ns 1609 ns 431321 5.78672GB/s -// BM_SearchMedium 11111 ns 11110 ns 63001 858.414MB/s -// BM_SearchPathological 12117390 ns 12116397 ns 58 805.984kB/s -// BM_Searchcase 3081 ns 3081 ns 229949 3.02313GB/s -// BM_SearchcaseMedium 16003 ns 16001 ns 44170 595.998MB/s -// BM_SearchcasePathological 15823413 ns 15821909 ns 44 617.222kB/s -// BM_Memmatch 197 ns 197 ns 3584225 47.2951GB/s -// BM_MemmatchMedium 52333 ns 52329 ns 13280 182.244MB/s -// BM_MemmatchPathological 659799 ns 659727 ns 1058 14.4556MB/s -// BM_Memcasematch 5460 ns 5460 ns 127606 1.70586GB/s -// BM_MemcasematchMedium 32861 ns 32857 ns 21258 290.248MB/s -// BM_MemcasematchPathological 15154243 ns 15153089 ns 46 644.464kB/s -// BM_MemmemStartup 5 ns 5 ns 150821500 -// BM_SearchStartup 5 ns 5 ns 150644203 -// BM_MemmatchStartup 7 ns 7 ns 97068802 -// -// Conclusions: -// -// The following recommendations are based on the sample results above. However, -// we have found that the performance of STL search can vary significantly -// depending on compiler and standard library implementation. We recommend you -// run the benchmarks for yourself on relevant platforms. -// -// If you need case-insensitive, STL search is slightly better than memmem for -// all cases. -// -// Case-sensitive is more subtle: -// Custom memmatch is _very_ fast at scanning, so if you have very few possible -// matches in your haystack, that's the way to go. Performance drops -// significantly with more matches. -// -// STL search is slightly faster than memmem in the medium and pathological -// benchmarks. However, the performance of memmem is currently more dependable -// across platforms and build configurations. namespace { @@ -94,96 +38,10 @@ const char* MakeHaystack() { } const char* const kHaystack = MakeHaystack(); -void BM_Memmem(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize( - absl::strings_internal::memmem(kHaystack, kHaystackSize, "b", 1)); - } - state.SetBytesProcessed(kHaystackSize64 * state.iterations()); -} -BENCHMARK(BM_Memmem); - -void BM_MemmemMedium(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize( - absl::strings_internal::memmem(kHaystack, kHaystackSize, "ab", 2)); - } - state.SetBytesProcessed(kHaystackSize64 * state.iterations()); -} -BENCHMARK(BM_MemmemMedium); - -void BM_MemmemPathological(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize(absl::strings_internal::memmem( - kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2, - kHaystackSize - kHaystackSize / 2)); - } - state.SetBytesProcessed(kHaystackSize64 * state.iterations()); -} -BENCHMARK(BM_MemmemPathological); - -void BM_Memcasemem(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize( - absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "b", 1)); - } - state.SetBytesProcessed(kHaystackSize64 * state.iterations()); -} -BENCHMARK(BM_Memcasemem); - -void BM_MemcasememMedium(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize( - absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "ab", 2)); - } - state.SetBytesProcessed(kHaystackSize64 * state.iterations()); -} -BENCHMARK(BM_MemcasememMedium); - -void BM_MemcasememPathological(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize(absl::strings_internal::memcasemem( - kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2, - kHaystackSize - kHaystackSize / 2)); - } - state.SetBytesProcessed(kHaystackSize64 * state.iterations()); -} -BENCHMARK(BM_MemcasememPathological); - bool case_eq(const char a, const char b) { return absl::ascii_tolower(a) == absl::ascii_tolower(b); } -void BM_Search(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, - kHaystack + kHaystackSize - 1, - kHaystack + kHaystackSize)); - } - state.SetBytesProcessed(kHaystackSize64 * state.iterations()); -} -BENCHMARK(BM_Search); - -void BM_SearchMedium(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, - kHaystack + kHaystackSize - 2, - kHaystack + kHaystackSize)); - } - state.SetBytesProcessed(kHaystackSize64 * state.iterations()); -} -BENCHMARK(BM_SearchMedium); - -void BM_SearchPathological(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, - kHaystack + kHaystackSize / 2, - kHaystack + kHaystackSize)); - } - state.SetBytesProcessed(kHaystackSize64 * state.iterations()); -} -BENCHMARK(BM_SearchPathological); - void BM_Searchcase(benchmark::State& state) { for (auto _ : state) { benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, @@ -241,34 +99,6 @@ const char* memcasematch(const char* phaystack, size_t haylen, return nullptr; } -void BM_Memmatch(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize( - absl::strings_internal::memmatch(kHaystack, kHaystackSize, "b", 1)); - } - state.SetBytesProcessed(kHaystackSize64 * state.iterations()); -} -BENCHMARK(BM_Memmatch); - -void BM_MemmatchMedium(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize( - absl::strings_internal::memmatch(kHaystack, kHaystackSize, "ab", 2)); - } - state.SetBytesProcessed(kHaystackSize64 * state.iterations()); -} -BENCHMARK(BM_MemmatchMedium); - -void BM_MemmatchPathological(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize(absl::strings_internal::memmatch( - kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2, - kHaystackSize - kHaystackSize / 2)); - } - state.SetBytesProcessed(kHaystackSize64 * state.iterations()); -} -BENCHMARK(BM_MemmatchPathological); - void BM_Memcasematch(benchmark::State& state) { for (auto _ : state) { benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, "b", 1)); @@ -295,29 +125,4 @@ void BM_MemcasematchPathological(benchmark::State& state) { } BENCHMARK(BM_MemcasematchPathological); -void BM_MemmemStartup(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize(absl::strings_internal::memmem( - kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1)); - } -} -BENCHMARK(BM_MemmemStartup); - -void BM_SearchStartup(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize( - std::search(kHaystack + kHaystackSize - 10, kHaystack + kHaystackSize, - kHaystack + kHaystackSize - 1, kHaystack + kHaystackSize)); - } -} -BENCHMARK(BM_SearchStartup); - -void BM_MemmatchStartup(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize(absl::strings_internal::memmatch( - kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1)); - } -} -BENCHMARK(BM_MemmatchStartup); - } // namespace diff --git a/absl/strings/internal/memutil_test.cc b/absl/strings/internal/memutil_test.cc index d8681ddf..277be2c4 100644 --- a/absl/strings/internal/memutil_test.cc +++ b/absl/strings/internal/memutil_test.cc @@ -19,42 +19,12 @@ #include <cstdlib> #include "gtest/gtest.h" -#include "absl/strings/ascii.h" namespace { -static char* memcasechr(const char* s, int c, size_t slen) { - c = absl::ascii_tolower(c); - for (; slen; ++s, --slen) { - if (absl::ascii_tolower(*s) == c) return const_cast<char*>(s); - } - return nullptr; -} - -static const char* memcasematch(const char* phaystack, size_t haylen, - const char* pneedle, size_t neelen) { - if (0 == neelen) { - return phaystack; // even if haylen is 0 - } - if (haylen < neelen) return nullptr; - - const char* match; - const char* hayend = phaystack + haylen - neelen + 1; - while ((match = static_cast<char*>( - memcasechr(phaystack, pneedle[0], hayend - phaystack)))) { - if (absl::strings_internal::memcasecmp(match, pneedle, neelen) == 0) - return match; - else - phaystack = match + 1; - } - return nullptr; -} - -TEST(MemUtilTest, AllTests) { +TEST(MemUtil, memcasecmp) { // check memutil functions - char a[1000]; - absl::strings_internal::memcat(a, 0, "hello", sizeof("hello") - 1); - absl::strings_internal::memcat(a, 5, " there", sizeof(" there") - 1); + const char a[] = "hello there"; EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO there", sizeof("hello there") - 1), @@ -66,114 +36,6 @@ TEST(MemUtilTest, AllTests) { sizeof("hello there") - 2), 0); EXPECT_EQ(absl::strings_internal::memcasecmp(a, "whatever", 0), 0); - - char* p = absl::strings_internal::memdup("hello", 5); - free(p); - - p = absl::strings_internal::memrchr("hello there", 'e', - sizeof("hello there") - 1); - EXPECT_TRUE(p && p[-1] == 'r'); - p = absl::strings_internal::memrchr("hello there", 'e', - sizeof("hello there") - 2); - EXPECT_TRUE(p && p[-1] == 'h'); - p = absl::strings_internal::memrchr("hello there", 'u', - sizeof("hello there") - 1); - EXPECT_TRUE(p == nullptr); - - int len = absl::strings_internal::memspn("hello there", - sizeof("hello there") - 1, "hole"); - EXPECT_EQ(len, sizeof("hello") - 1); - len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1, - "u"); - EXPECT_EQ(len, 0); - len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1, - ""); - EXPECT_EQ(len, 0); - len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1, - "trole h"); - EXPECT_EQ(len, sizeof("hello there") - 1); - len = absl::strings_internal::memspn("hello there!", - sizeof("hello there!") - 1, "trole h"); - EXPECT_EQ(len, sizeof("hello there") - 1); - len = absl::strings_internal::memspn("hello there!", - sizeof("hello there!") - 2, "trole h!"); - EXPECT_EQ(len, sizeof("hello there!") - 2); - - len = absl::strings_internal::memcspn("hello there", - sizeof("hello there") - 1, "leho"); - EXPECT_EQ(len, 0); - len = absl::strings_internal::memcspn("hello there", - sizeof("hello there") - 1, "u"); - EXPECT_EQ(len, sizeof("hello there") - 1); - len = absl::strings_internal::memcspn("hello there", - sizeof("hello there") - 1, ""); - EXPECT_EQ(len, sizeof("hello there") - 1); - len = absl::strings_internal::memcspn("hello there", - sizeof("hello there") - 1, " "); - EXPECT_EQ(len, 5); - - p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1, - "leho"); - EXPECT_TRUE(p && p[1] == 'e' && p[2] == 'l'); - p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1, - "nu"); - EXPECT_TRUE(p == nullptr); - p = absl::strings_internal::mempbrk("hello there!", - sizeof("hello there!") - 2, "!"); - EXPECT_TRUE(p == nullptr); - p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1, - " t "); - EXPECT_TRUE(p && p[-1] == 'o' && p[1] == 't'); - - { - const char kHaystack[] = "0123456789"; - EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 0, "", 0), kHaystack); - EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "012", 3), - kHaystack); - EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "0xx", 1), - kHaystack); - EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "789", 3), - kHaystack + 7); - EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "9xx", 1), - kHaystack + 9); - EXPECT_TRUE(absl::strings_internal::memmem(kHaystack, 10, "9xx", 3) == - nullptr); - EXPECT_TRUE(absl::strings_internal::memmem(kHaystack, 10, "xxx", 1) == - nullptr); - } - { - const char kHaystack[] = "aBcDeFgHiJ"; - EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 0, "", 0), - kHaystack); - EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "Abc", 3), - kHaystack); - EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "Axx", 1), - kHaystack); - EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "hIj", 3), - kHaystack + 7); - EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "jxx", 1), - kHaystack + 9); - EXPECT_TRUE(absl::strings_internal::memcasemem(kHaystack, 10, "jxx", 3) == - nullptr); - EXPECT_TRUE(absl::strings_internal::memcasemem(kHaystack, 10, "xxx", 1) == - nullptr); - } - { - const char kHaystack[] = "0123456789"; - EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 0, "", 0), kHaystack); - EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "012", 3), - kHaystack); - EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "0xx", 1), - kHaystack); - EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "789", 3), - kHaystack + 7); - EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "9xx", 1), - kHaystack + 9); - EXPECT_TRUE(absl::strings_internal::memmatch(kHaystack, 10, "9xx", 3) == - nullptr); - EXPECT_TRUE(absl::strings_internal::memmatch(kHaystack, 10, "xxx", 1) == - nullptr); - } } } // namespace diff --git a/absl/strings/internal/stl_type_traits.h b/absl/strings/internal/stl_type_traits.h index 6035ca45..e50468b0 100644 --- a/absl/strings/internal/stl_type_traits.h +++ b/absl/strings/internal/stl_type_traits.h @@ -13,7 +13,7 @@ // limitations under the License. // -// Thie file provides the IsStrictlyBaseOfAndConvertibleToSTLContainer type +// The file provides the IsStrictlyBaseOfAndConvertibleToSTLContainer type // trait metafunction to assist in working with the _GLIBCXX_DEBUG debug // wrappers of STL containers. // diff --git a/absl/strings/internal/str_format/arg.cc b/absl/strings/internal/str_format/arg.cc index 018dd052..c0a9a28e 100644 --- a/absl/strings/internal/str_format/arg.cc +++ b/absl/strings/internal/str_format/arg.cc @@ -106,7 +106,7 @@ class IntDigits { char *p = storage_ + sizeof(storage_); do { p -= 2; - numbers_internal::PutTwoDigits(static_cast<size_t>(v % 100), p); + numbers_internal::PutTwoDigits(static_cast<uint32_t>(v % 100), p); v /= 100; } while (v); if (p[0] == '0') { @@ -278,24 +278,6 @@ bool ConvertIntImplInnerSlow(const IntDigits &as_digits, return true; } -template <typename T, - typename std::enable_if<(std::is_integral<T>::value && - std::is_signed<T>::value) || - std::is_same<T, int128>::value, - int>::type = 0> -constexpr auto ConvertV(T) { - return FormatConversionCharInternal::d; -} - -template <typename T, - typename std::enable_if<(std::is_integral<T>::value && - std::is_unsigned<T>::value) || - std::is_same<T, uint128>::value, - int>::type = 0> -constexpr auto ConvertV(T) { - return FormatConversionCharInternal::u; -} - template <typename T> bool ConvertFloatArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl *sink) { if (conv.conversion_char() == FormatConversionCharInternal::v) { @@ -332,10 +314,6 @@ bool ConvertIntArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl *sink) { using U = typename MakeUnsigned<T>::type; IntDigits as_digits; - if (conv.conversion_char() == FormatConversionCharInternal::v) { - conv.set_conversion_char(ConvertV(T{})); - } - // This odd casting is due to a bug in -Wswitch behavior in gcc49 which causes // it to complain about a switch/case type mismatch, even though both are // FormatConverionChar. Likely this is because at this point @@ -361,6 +339,7 @@ bool ConvertIntArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl *sink) { case static_cast<uint8_t>(FormatConversionCharInternal::d): case static_cast<uint8_t>(FormatConversionCharInternal::i): + case static_cast<uint8_t>(FormatConversionCharInternal::v): as_digits.PrintAsDec(v); break; @@ -482,18 +461,18 @@ CharConvertResult FormatConvertImpl(char v, const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } -CharConvertResult FormatConvertImpl(signed char v, - const FormatConversionSpecImpl conv, - FormatSinkImpl *sink) { + +// ==================== Ints ==================== +IntegralConvertResult FormatConvertImpl(signed char v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } -CharConvertResult FormatConvertImpl(unsigned char v, - const FormatConversionSpecImpl conv, - FormatSinkImpl *sink) { +IntegralConvertResult FormatConvertImpl(unsigned char v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } - -// ==================== Ints ==================== IntegralConvertResult FormatConvertImpl(short v, // NOLINT const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { diff --git a/absl/strings/internal/str_format/arg.h b/absl/strings/internal/str_format/arg.h index e4b16628..3ce30feb 100644 --- a/absl/strings/internal/str_format/arg.h +++ b/absl/strings/internal/str_format/arg.h @@ -279,14 +279,14 @@ FloatingConvertResult FormatConvertImpl(long double v, // Chars. CharConvertResult FormatConvertImpl(char v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); -CharConvertResult FormatConvertImpl(signed char v, - FormatConversionSpecImpl conv, - FormatSinkImpl* sink); -CharConvertResult FormatConvertImpl(unsigned char v, - FormatConversionSpecImpl conv, - FormatSinkImpl* sink); // Ints. +IntegralConvertResult FormatConvertImpl(signed char v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(unsigned char v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); IntegralConvertResult FormatConvertImpl(short v, // NOLINT FormatConversionSpecImpl conv, FormatSinkImpl* sink); @@ -441,7 +441,7 @@ class FormatArgImpl { // For everything else: // - Decay char* and char arrays into `const char*` // - Decay any other pointer to `const void*` - // - Decay all enums to their underlying type. + // - Decay all enums to the integral promotion of their underlying type. // - Decay function pointers to void*. template <typename T, typename = void> struct DecayType { @@ -461,7 +461,7 @@ class FormatArgImpl { !str_format_internal::HasUserDefinedConvert<T>::value && !strings_internal::HasAbslStringify<T>::value && std::is_enum<T>::value>::type> { - using type = typename std::underlying_type<T>::type; + using type = decltype(+typename std::underlying_type<T>::type()); }; public: diff --git a/absl/strings/internal/str_format/bind.h b/absl/strings/internal/str_format/bind.h index b73c5028..5e2a43d5 100644 --- a/absl/strings/internal/str_format/bind.h +++ b/absl/strings/internal/str_format/bind.h @@ -21,6 +21,7 @@ #include <string> #include "absl/base/port.h" +#include "absl/container/inlined_vector.h" #include "absl/strings/internal/str_format/arg.h" #include "absl/strings/internal/str_format/checker.h" #include "absl/strings/internal/str_format/parser.h" @@ -177,17 +178,7 @@ class Streamable { public: Streamable(const UntypedFormatSpecImpl& format, absl::Span<const FormatArgImpl> args) - : format_(format) { - if (args.size() <= ABSL_ARRAYSIZE(few_args_)) { - for (size_t i = 0; i < args.size(); ++i) { - few_args_[i] = args[i]; - } - args_ = absl::MakeSpan(few_args_, args.size()); - } else { - many_args_.assign(args.begin(), args.end()); - args_ = many_args_; - } - } + : format_(format), args_(args.begin(), args.end()) {} std::ostream& Print(std::ostream& os) const; @@ -197,12 +188,7 @@ class Streamable { private: const UntypedFormatSpecImpl& format_; - absl::Span<const FormatArgImpl> args_; - // if args_.size() is 4 or less: - FormatArgImpl few_args_[4] = {FormatArgImpl(0), FormatArgImpl(0), - FormatArgImpl(0), FormatArgImpl(0)}; - // if args_.size() is more than 4: - std::vector<FormatArgImpl> many_args_; + absl::InlinedVector<FormatArgImpl, 4> args_; }; // for testing @@ -211,8 +197,7 @@ std::string Summarize(UntypedFormatSpecImpl format, bool BindWithPack(const UnboundConversion* props, absl::Span<const FormatArgImpl> pack, BoundConversion* bound); -bool FormatUntyped(FormatRawSinkImpl raw_sink, - UntypedFormatSpecImpl format, +bool FormatUntyped(FormatRawSinkImpl raw_sink, UntypedFormatSpecImpl format, absl::Span<const FormatArgImpl> args); std::string& AppendPack(std::string* out, UntypedFormatSpecImpl format, @@ -231,7 +216,7 @@ int SnprintF(char* output, size_t size, UntypedFormatSpecImpl format, template <typename T> class StreamedWrapper { public: - explicit StreamedWrapper(const T& v) : v_(v) { } + explicit StreamedWrapper(const T& v) : v_(v) {} private: template <typename S> diff --git a/absl/strings/internal/str_format/constexpr_parser.h b/absl/strings/internal/str_format/constexpr_parser.h index 3dc1776b..b70a16e4 100644 --- a/absl/strings/internal/str_format/constexpr_parser.h +++ b/absl/strings/internal/str_format/constexpr_parser.h @@ -323,6 +323,7 @@ constexpr const char* ConsumeConversion(const char* pos, const char* const end, if (ABSL_PREDICT_FALSE(c == 'v')) return nullptr; if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr; } +#undef ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR assert(CheckFastPathSetting(*conv)); (void)(&CheckFastPathSetting); diff --git a/absl/strings/internal/str_format/convert_test.cc b/absl/strings/internal/str_format/convert_test.cc index 300612b7..16ff9879 100644 --- a/absl/strings/internal/str_format/convert_test.cc +++ b/absl/strings/internal/str_format/convert_test.cc @@ -26,6 +26,7 @@ #include "gtest/gtest.h" #include "absl/base/attributes.h" #include "absl/base/internal/raw_logging.h" +#include "absl/log/log.h" #include "absl/strings/internal/str_format/bind.h" #include "absl/strings/match.h" #include "absl/types/optional.h" @@ -264,7 +265,7 @@ MATCHER_P(MatchesPointerString, ptr, "") { } void* parsed = nullptr; if (sscanf(arg.c_str(), "%p", &parsed) != 1) { - ABSL_RAW_LOG(FATAL, "Could not parse %s", arg.c_str()); + LOG(FATAL) << "Could not parse " << arg; } return ptr == parsed; } @@ -1241,9 +1242,9 @@ TEST_F(FormatConvertTest, GlibcHasCorrectTraits) { const NativePrintfTraits &native_traits = VerifyNativeImplementation(); // If one of the following tests break then it is either because the above PP // macro guards failed to exclude a new platform (likely) or because something - // has changed in the implemention of glibc sprintf float formatting behavior. - // If the latter, then the code that computes these flags needs to be - // revisited and/or possibly the StrFormat implementation. + // has changed in the implementation of glibc sprintf float formatting + // behavior. If the latter, then the code that computes these flags needs to + // be revisited and/or possibly the StrFormat implementation. EXPECT_TRUE(native_traits.hex_float_has_glibc_rounding); EXPECT_TRUE(native_traits.hex_float_prefers_denormal_repr); EXPECT_TRUE( diff --git a/absl/strings/internal/str_format/extension.h b/absl/strings/internal/str_format/extension.h index 603bd49d..8de42d2c 100644 --- a/absl/strings/internal/str_format/extension.h +++ b/absl/strings/internal/str_format/extension.h @@ -273,7 +273,7 @@ struct FormatConversionSpecImplFriend; class FormatConversionSpecImpl { public: - // Width and precison are not specified, no flags are set. + // Width and precision are not specified, no flags are set. bool is_basic() const { return flags_ == Flags::kBasic; } bool has_left_flag() const { return FlagsContains(flags_, Flags::kLeft); } bool has_show_pos_flag() const { diff --git a/absl/strings/internal/str_format/float_conversion.cc b/absl/strings/internal/str_format/float_conversion.cc index 8e497852..8edf520d 100644 --- a/absl/strings/internal/str_format/float_conversion.cc +++ b/absl/strings/internal/str_format/float_conversion.cc @@ -711,12 +711,12 @@ bool IncrementNibble(size_t nibble_index, Int* n) { constexpr size_t kShift = sizeof(Int) * 8 - 1; constexpr size_t kNumNibbles = sizeof(Int) * 8 / 4; Int before = *n >> kShift; - // Here we essentially want to take the number 1 and move it into the requsted - // nibble, then add it to *n to effectively increment the nibble. However, - // ASan will complain if we try to shift the 1 beyond the limits of the Int, - // i.e., if the nibble_index is out of range. So therefore we check for this - // and if we are out of range we just add 0 which leaves *n unchanged, which - // seems like the reasonable thing to do in that case. + // Here we essentially want to take the number 1 and move it into the + // requested nibble, then add it to *n to effectively increment the nibble. + // However, ASan will complain if we try to shift the 1 beyond the limits of + // the Int, i.e., if the nibble_index is out of range. So therefore we check + // for this and if we are out of range we just add 0 which leaves *n + // unchanged, which seems like the reasonable thing to do in that case. *n += ((nibble_index >= kNumNibbles) ? 0 : (Int{1} << static_cast<int>(nibble_index * 4))); @@ -937,7 +937,7 @@ void FormatA(const HexFloatTypeParams float_traits, Int mantissa, int exp, // =============== Exponent ================== constexpr size_t kBufSizeForExpDecRepr = - numbers_internal::kFastToBufferSize // requred for FastIntToBuffer + numbers_internal::kFastToBufferSize // required for FastIntToBuffer + 1 // 'p' or 'P' + 1; // '+' or '-' char exp_buffer[kBufSizeForExpDecRepr]; @@ -1015,7 +1015,7 @@ struct Buffer { --end; } - char &back() { + char &back() const { assert(begin < end); return end[-1]; } @@ -1102,7 +1102,7 @@ void PrintExponent(int exp, char e, Buffer *out) { template <typename Float, typename Int> constexpr bool CanFitMantissa() { return -#if defined(__clang__) && !defined(__SSE3__) +#if defined(__clang__) && (__clang_major__ < 9) && !defined(__SSE3__) // Workaround for clang bug: https://bugs.llvm.org/show_bug.cgi?id=38289 // Casting from long double to uint64_t is miscompiled and drops bits. (!std::is_same<Float, long double>::value || diff --git a/absl/strings/internal/str_split_internal.h b/absl/strings/internal/str_split_internal.h index 35edf3aa..081ad85a 100644 --- a/absl/strings/internal/str_split_internal.h +++ b/absl/strings/internal/str_split_internal.h @@ -235,6 +235,24 @@ struct SplitterIsConvertibleTo HasMappedType<C>::value> { }; +template <typename StringType, typename Container, typename = void> +struct ShouldUseLifetimeBound : std::false_type {}; + +template <typename StringType, typename Container> +struct ShouldUseLifetimeBound< + StringType, Container, + std::enable_if_t< + std::is_same<StringType, std::string>::value && + std::is_same<typename Container::value_type, absl::string_view>::value>> + : std::true_type {}; + +template <typename StringType, typename First, typename Second> +using ShouldUseLifetimeBoundForPair = std::integral_constant< + bool, std::is_same<StringType, std::string>::value && + (std::is_same<First, absl::string_view>::value || + std::is_same<Second, absl::string_view>::value)>; + + // This class implements the range that is returned by absl::StrSplit(). This // class has templated conversion operators that allow it to be implicitly // converted to a variety of types that the caller may have specified on the @@ -281,10 +299,24 @@ class Splitter { // An implicit conversion operator that is restricted to only those containers // that the splitter is convertible to. - template <typename Container, - typename = typename std::enable_if< - SplitterIsConvertibleTo<Container>::value>::type> - operator Container() const { // NOLINT(runtime/explicit) + template < + typename Container, + std::enable_if_t<ShouldUseLifetimeBound<StringType, Container>::value && + SplitterIsConvertibleTo<Container>::value, + std::nullptr_t> = nullptr> + // NOLINTNEXTLINE(google-explicit-constructor) + operator Container() const ABSL_ATTRIBUTE_LIFETIME_BOUND { + return ConvertToContainer<Container, typename Container::value_type, + HasMappedType<Container>::value>()(*this); + } + + template < + typename Container, + std::enable_if_t<!ShouldUseLifetimeBound<StringType, Container>::value && + SplitterIsConvertibleTo<Container>::value, + std::nullptr_t> = nullptr> + // NOLINTNEXTLINE(google-explicit-constructor) + operator Container() const { return ConvertToContainer<Container, typename Container::value_type, HasMappedType<Container>::value>()(*this); } @@ -293,8 +325,27 @@ class Splitter { // strings returned by the begin() iterator. Either/both of .first and .second // will be constructed with empty strings if the iterator doesn't have a // corresponding value. + template <typename First, typename Second, + std::enable_if_t< + ShouldUseLifetimeBoundForPair<StringType, First, Second>::value, + std::nullptr_t> = nullptr> + // NOLINTNEXTLINE(google-explicit-constructor) + operator std::pair<First, Second>() const ABSL_ATTRIBUTE_LIFETIME_BOUND { + return ConvertToPair<First, Second>(); + } + + template <typename First, typename Second, + std::enable_if_t<!ShouldUseLifetimeBoundForPair<StringType, First, + Second>::value, + std::nullptr_t> = nullptr> + // NOLINTNEXTLINE(google-explicit-constructor) + operator std::pair<First, Second>() const { + return ConvertToPair<First, Second>(); + } + + private: template <typename First, typename Second> - operator std::pair<First, Second>() const { // NOLINT(runtime/explicit) + std::pair<First, Second> ConvertToPair() const { absl::string_view first, second; auto it = begin(); if (it != end()) { @@ -306,7 +357,6 @@ class Splitter { return {First(first), Second(second)}; } - private: // ConvertToContainer is a functor converting a Splitter to the requested // Container of ValueType. It is specialized below to optimize splitting to // certain combinations of Container and ValueType. diff --git a/absl/strings/match.cc b/absl/strings/match.cc index 2d672509..3b81b2c0 100644 --- a/absl/strings/match.cc +++ b/absl/strings/match.cc @@ -14,6 +14,12 @@ #include "absl/strings/match.h" +#include <algorithm> +#include <cstdint> + +#include "absl/base/internal/endian.h" +#include "absl/numeric/bits.h" +#include "absl/strings/ascii.h" #include "absl/strings/internal/memutil.h" namespace absl { @@ -27,6 +33,27 @@ bool EqualsIgnoreCase(absl::string_view piece1, // memcasecmp uses absl::ascii_tolower(). } +bool StrContainsIgnoreCase(absl::string_view haystack, + absl::string_view needle) noexcept { + while (haystack.size() >= needle.size()) { + if (StartsWithIgnoreCase(haystack, needle)) return true; + haystack.remove_prefix(1); + } + return false; +} + +bool StrContainsIgnoreCase(absl::string_view haystack, + char needle) noexcept { + char upper_needle = absl::ascii_toupper(static_cast<unsigned char>(needle)); + char lower_needle = absl::ascii_tolower(static_cast<unsigned char>(needle)); + if (upper_needle == lower_needle) { + return StrContains(haystack, needle); + } else { + const char both_cstr[3] = {lower_needle, upper_needle, '\0'}; + return haystack.find_first_of(both_cstr) != absl::string_view::npos; + } +} + bool StartsWithIgnoreCase(absl::string_view text, absl::string_view prefix) noexcept { return (text.size() >= prefix.size()) && @@ -39,5 +66,65 @@ bool EndsWithIgnoreCase(absl::string_view text, EqualsIgnoreCase(text.substr(text.size() - suffix.size()), suffix); } +absl::string_view FindLongestCommonPrefix(absl::string_view a, + absl::string_view b) { + const absl::string_view::size_type limit = std::min(a.size(), b.size()); + const char* const pa = a.data(); + const char* const pb = b.data(); + absl::string_view::size_type count = (unsigned) 0; + + if (ABSL_PREDICT_FALSE(limit < 8)) { + while (ABSL_PREDICT_TRUE(count + 2 <= limit)) { + uint16_t xor_bytes = absl::little_endian::Load16(pa + count) ^ + absl::little_endian::Load16(pb + count); + if (ABSL_PREDICT_FALSE(xor_bytes != 0)) { + if (ABSL_PREDICT_TRUE((xor_bytes & 0xff) == 0)) ++count; + return absl::string_view(pa, count); + } + count += 2; + } + if (ABSL_PREDICT_TRUE(count != limit)) { + if (ABSL_PREDICT_TRUE(pa[count] == pb[count])) ++count; + } + return absl::string_view(pa, count); + } + + do { + uint64_t xor_bytes = absl::little_endian::Load64(pa + count) ^ + absl::little_endian::Load64(pb + count); + if (ABSL_PREDICT_FALSE(xor_bytes != 0)) { + count += static_cast<uint64_t>(absl::countr_zero(xor_bytes) >> 3); + return absl::string_view(pa, count); + } + count += 8; + } while (ABSL_PREDICT_TRUE(count + 8 < limit)); + + count = limit - 8; + uint64_t xor_bytes = absl::little_endian::Load64(pa + count) ^ + absl::little_endian::Load64(pb + count); + if (ABSL_PREDICT_TRUE(xor_bytes != 0)) { + count += static_cast<uint64_t>(absl::countr_zero(xor_bytes) >> 3); + return absl::string_view(pa, count); + } + return absl::string_view(pa, limit); +} + +absl::string_view FindLongestCommonSuffix(absl::string_view a, + absl::string_view b) { + const absl::string_view::size_type limit = std::min(a.size(), b.size()); + if (limit == 0) return absl::string_view(); + + const char* pa = a.data() + a.size() - 1; + const char* pb = b.data() + b.size() - 1; + absl::string_view::size_type count = (unsigned) 0; + while (count < limit && *pa == *pb) { + --pa; + --pb; + ++count; + } + + return absl::string_view(++pa, count); +} + ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/match.h b/absl/strings/match.h index 038cbb3f..1eeafbbf 100644 --- a/absl/strings/match.h +++ b/absl/strings/match.h @@ -72,6 +72,15 @@ inline bool EndsWith(absl::string_view text, memcmp(text.data() + (text.size() - suffix.size()), suffix.data(), suffix.size()) == 0); } +// StrContainsIgnoreCase() +// +// Returns whether a given ASCII string `haystack` contains the ASCII substring +// `needle`, ignoring case in the comparison. +bool StrContainsIgnoreCase(absl::string_view haystack, + absl::string_view needle) noexcept; + +bool StrContainsIgnoreCase(absl::string_view haystack, + char needle) noexcept; // EqualsIgnoreCase() // @@ -94,6 +103,16 @@ bool StartsWithIgnoreCase(absl::string_view text, bool EndsWithIgnoreCase(absl::string_view text, absl::string_view suffix) noexcept; +// Yields the longest prefix in common between both input strings. +// Pointer-wise, the returned result is a subset of input "a". +absl::string_view FindLongestCommonPrefix(absl::string_view a, + absl::string_view b); + +// Yields the longest suffix in common between both input strings. +// Pointer-wise, the returned result is a subset of input "a". +absl::string_view FindLongestCommonSuffix(absl::string_view a, + absl::string_view b); + ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/match_test.cc b/absl/strings/match_test.cc index 5841bc1b..71618f71 100644 --- a/absl/strings/match_test.cc +++ b/absl/strings/match_test.cc @@ -124,4 +124,165 @@ TEST(MatchTest, EndsWithIgnoreCase) { EXPECT_FALSE(absl::EndsWithIgnoreCase("", "fo")); } +TEST(MatchTest, ContainsIgnoreCase) { + EXPECT_TRUE(absl::StrContainsIgnoreCase("foo", "foo")); + EXPECT_TRUE(absl::StrContainsIgnoreCase("FOO", "Foo")); + EXPECT_TRUE(absl::StrContainsIgnoreCase("--FOO", "Foo")); + EXPECT_TRUE(absl::StrContainsIgnoreCase("FOO--", "Foo")); + EXPECT_FALSE(absl::StrContainsIgnoreCase("BAR", "Foo")); + EXPECT_FALSE(absl::StrContainsIgnoreCase("BAR", "Foo")); + EXPECT_TRUE(absl::StrContainsIgnoreCase("123456", "123456")); + EXPECT_TRUE(absl::StrContainsIgnoreCase("123456", "234")); + EXPECT_TRUE(absl::StrContainsIgnoreCase("", "")); + EXPECT_TRUE(absl::StrContainsIgnoreCase("abc", "")); + EXPECT_FALSE(absl::StrContainsIgnoreCase("", "a")); +} + +TEST(MatchTest, ContainsCharIgnoreCase) { + absl::string_view a("AaBCdefg!"); + absl::string_view b("AaBCd!"); + EXPECT_TRUE(absl::StrContainsIgnoreCase(a, 'a')); + EXPECT_TRUE(absl::StrContainsIgnoreCase(a, 'A')); + EXPECT_TRUE(absl::StrContainsIgnoreCase(a, 'b')); + EXPECT_TRUE(absl::StrContainsIgnoreCase(a, 'B')); + EXPECT_TRUE(absl::StrContainsIgnoreCase(a, 'e')); + EXPECT_TRUE(absl::StrContainsIgnoreCase(a, 'E')); + EXPECT_FALSE(absl::StrContainsIgnoreCase(a, 'h')); + EXPECT_FALSE(absl::StrContainsIgnoreCase(a, 'H')); + EXPECT_TRUE(absl::StrContainsIgnoreCase(a, '!')); + EXPECT_FALSE(absl::StrContainsIgnoreCase(a, '?')); + + EXPECT_TRUE(absl::StrContainsIgnoreCase(b, 'a')); + EXPECT_TRUE(absl::StrContainsIgnoreCase(b, 'A')); + EXPECT_TRUE(absl::StrContainsIgnoreCase(b, 'b')); + EXPECT_TRUE(absl::StrContainsIgnoreCase(b, 'B')); + EXPECT_FALSE(absl::StrContainsIgnoreCase(b, 'e')); + EXPECT_FALSE(absl::StrContainsIgnoreCase(b, 'E')); + EXPECT_FALSE(absl::StrContainsIgnoreCase(b, 'h')); + EXPECT_FALSE(absl::StrContainsIgnoreCase(b, 'H')); + EXPECT_TRUE(absl::StrContainsIgnoreCase(b, '!')); + EXPECT_FALSE(absl::StrContainsIgnoreCase(b, '?')); + + EXPECT_FALSE(absl::StrContainsIgnoreCase("", 'a')); + EXPECT_FALSE(absl::StrContainsIgnoreCase("", 'A')); + EXPECT_FALSE(absl::StrContainsIgnoreCase("", '0')); +} + +TEST(MatchTest, FindLongestCommonPrefix) { + EXPECT_EQ(absl::FindLongestCommonPrefix("", ""), ""); + EXPECT_EQ(absl::FindLongestCommonPrefix("", "abc"), ""); + EXPECT_EQ(absl::FindLongestCommonPrefix("abc", ""), ""); + EXPECT_EQ(absl::FindLongestCommonPrefix("ab", "abc"), "ab"); + EXPECT_EQ(absl::FindLongestCommonPrefix("abc", "ab"), "ab"); + EXPECT_EQ(absl::FindLongestCommonPrefix("abc", "abd"), "ab"); + EXPECT_EQ(absl::FindLongestCommonPrefix("abc", "abcd"), "abc"); + EXPECT_EQ(absl::FindLongestCommonPrefix("abcd", "abcd"), "abcd"); + EXPECT_EQ(absl::FindLongestCommonPrefix("abcd", "efgh"), ""); + + // "abcde" v. "abc" but in the middle of other data + EXPECT_EQ(absl::FindLongestCommonPrefix( + absl::string_view("1234 abcdef").substr(5, 5), + absl::string_view("5678 abcdef").substr(5, 3)), + "abc"); +} + +// Since the little-endian implementation involves a bit of if-else and various +// return paths, the following tests aims to provide full test coverage of the +// implementation. +TEST(MatchTest, FindLongestCommonPrefixLoad16Mismatch) { + const std::string x1 = "abcdefgh"; + const std::string x2 = "abcde_"; + EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcde"); + EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcde"); +} + +TEST(MatchTest, FindLongestCommonPrefixLoad16MatchesNoLast) { + const std::string x1 = "abcdef"; + const std::string x2 = "abcdef"; + EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcdef"); + EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcdef"); +} + +TEST(MatchTest, FindLongestCommonPrefixLoad16MatchesLastCharMismatches) { + const std::string x1 = "abcdefg"; + const std::string x2 = "abcdef_h"; + EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcdef"); + EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcdef"); +} + +TEST(MatchTest, FindLongestCommonPrefixLoad16MatchesLastMatches) { + const std::string x1 = "abcde"; + const std::string x2 = "abcdefgh"; + EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcde"); + EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcde"); +} + +TEST(MatchTest, FindLongestCommonPrefixSize8Load64Mismatches) { + const std::string x1 = "abcdefghijk"; + const std::string x2 = "abcde_g_"; + EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcde"); + EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcde"); +} + +TEST(MatchTest, FindLongestCommonPrefixSize8Load64Matches) { + const std::string x1 = "abcdefgh"; + const std::string x2 = "abcdefgh"; + EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcdefgh"); + EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcdefgh"); +} + +TEST(MatchTest, FindLongestCommonPrefixSize15Load64Mismatches) { + const std::string x1 = "012345670123456"; + const std::string x2 = "0123456701_34_6"; + EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "0123456701"); + EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "0123456701"); +} + +TEST(MatchTest, FindLongestCommonPrefixSize15Load64Matches) { + const std::string x1 = "012345670123456"; + const std::string x2 = "0123456701234567"; + EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "012345670123456"); + EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "012345670123456"); +} + +TEST(MatchTest, FindLongestCommonPrefixSizeFirstByteOfLast8BytesMismatch) { + const std::string x1 = "012345670123456701234567"; + const std::string x2 = "0123456701234567_1234567"; + EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "0123456701234567"); + EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "0123456701234567"); +} + +TEST(MatchTest, FindLongestCommonPrefixLargeLastCharMismatches) { + const std::string x1(300, 'x'); + std::string x2 = x1; + x2.back() = '#'; + EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), std::string(299, 'x')); + EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), std::string(299, 'x')); +} + +TEST(MatchTest, FindLongestCommonPrefixLargeFullMatch) { + const std::string x1(300, 'x'); + const std::string x2 = x1; + EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), std::string(300, 'x')); + EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), std::string(300, 'x')); +} + +TEST(MatchTest, FindLongestCommonSuffix) { + EXPECT_EQ(absl::FindLongestCommonSuffix("", ""), ""); + EXPECT_EQ(absl::FindLongestCommonSuffix("", "abc"), ""); + EXPECT_EQ(absl::FindLongestCommonSuffix("abc", ""), ""); + EXPECT_EQ(absl::FindLongestCommonSuffix("bc", "abc"), "bc"); + EXPECT_EQ(absl::FindLongestCommonSuffix("abc", "bc"), "bc"); + EXPECT_EQ(absl::FindLongestCommonSuffix("abc", "dbc"), "bc"); + EXPECT_EQ(absl::FindLongestCommonSuffix("bcd", "abcd"), "bcd"); + EXPECT_EQ(absl::FindLongestCommonSuffix("abcd", "abcd"), "abcd"); + EXPECT_EQ(absl::FindLongestCommonSuffix("abcd", "efgh"), ""); + + // "abcde" v. "cde" but in the middle of other data + EXPECT_EQ(absl::FindLongestCommonSuffix( + absl::string_view("1234 abcdef").substr(5, 5), + absl::string_view("5678 abcdef").substr(7, 3)), + "cde"); +} + } // namespace diff --git a/absl/strings/numbers.cc b/absl/strings/numbers.cc index 2987158e..c43c6bcc 100644 --- a/absl/strings/numbers.cc +++ b/absl/strings/numbers.cc @@ -31,7 +31,9 @@ #include <utility> #include "absl/base/attributes.h" +#include "absl/base/internal/endian.h" #include "absl/base/internal/raw_logging.h" +#include "absl/base/optimization.h" #include "absl/numeric/bits.h" #include "absl/strings/ascii.h" #include "absl/strings/charconv.h" @@ -136,82 +138,132 @@ bool SimpleAtob(absl::string_view str, bool* out) { namespace { -// Used to optimize printing a decimal number's final digit. -const char one_ASCII_final_digits[10][2] { - {'0', 0}, {'1', 0}, {'2', 0}, {'3', 0}, {'4', 0}, - {'5', 0}, {'6', 0}, {'7', 0}, {'8', 0}, {'9', 0}, -}; +// Various routines to encode integers to strings. + +// We split data encodings into a group of 2 digits, 4 digits, 8 digits as +// it's easier to combine powers of two into scalar arithmetic. + +// Previous implementation used a lookup table of 200 bytes for every 2 bytes +// and it was memory bound, any L1 cache miss would result in a much slower +// result. When benchmarking with a cache eviction rate of several percent, +// this implementation proved to be better. + +// These constants represent '00', '0000' and '00000000' as ascii strings in +// integers. We can add these numbers if we encode to bytes from 0 to 9. as +// 'i' = '0' + i for 0 <= i <= 9. +constexpr uint32_t kTwoZeroBytes = 0x0101 * '0'; +constexpr uint64_t kFourZeroBytes = 0x01010101 * '0'; +constexpr uint64_t kEightZeroBytes = 0x0101010101010101ull * '0'; + +// * 103 / 1024 is a division by 10 for values from 0 to 99. It's also a +// division of a structure [k takes 2 bytes][m takes 2 bytes], then * 103 / 1024 +// will be [k / 10][m / 10]. It allows parallel division. +constexpr uint64_t kDivisionBy10Mul = 103u; +constexpr uint64_t kDivisionBy10Div = 1 << 10; + +// * 10486 / 1048576 is a division by 100 for values from 0 to 9999. +constexpr uint64_t kDivisionBy100Mul = 10486u; +constexpr uint64_t kDivisionBy100Div = 1 << 20; + +// Encode functions write the ASCII output of input `n` to `out_str`. +inline char* EncodeHundred(uint32_t n, char* out_str) { + int num_digits = static_cast<int>(n - 10) >> 8; + uint32_t base = kTwoZeroBytes; + uint32_t div10 = (n * kDivisionBy10Mul) / kDivisionBy10Div; + uint32_t mod10 = n - 10u * div10; + base += div10 + (mod10 << 8); + base >>= num_digits & 8; + little_endian::Store16(out_str, static_cast<uint16_t>(base)); + return out_str + 2 + num_digits; +} -} // namespace +inline char* EncodeTenThousand(uint32_t n, char* out_str) { + // We split lower 2 digits and upper 2 digits of n into 2 byte consecutive + // blocks. 123 -> [\0\1][\0\23]. We divide by 10 both blocks + // (it's 1 division + zeroing upper bits), and compute modulo 10 as well "in + // parallel". Then we combine both results to have both ASCII digits, + // strip trailing zeros, add ASCII '0000' and return. + uint32_t div100 = (n * kDivisionBy100Mul) / kDivisionBy100Div; + uint32_t mod100 = n - 100ull * div100; + uint32_t hundreds = (mod100 << 16) + div100; + uint32_t tens = (hundreds * kDivisionBy10Mul) / kDivisionBy10Div; + tens &= (0xFull << 16) | 0xFull; + tens += (hundreds - 10ull * tens) << 8; + ABSL_ASSUME(tens != 0); + // The result can contain trailing zero bits, we need to strip them to a first + // significant byte in a final representation. For example, for n = 123, we + // have tens to have representation \0\1\2\3. We do `& -8` to round + // to a multiple to 8 to strip zero bytes, not all zero bits. + // countr_zero to help. + // 0 minus 8 to make MSVC happy. + uint32_t zeroes = static_cast<uint32_t>(absl::countr_zero(tens)) & (0 - 8ull); + tens += kFourZeroBytes; + tens >>= zeroes; + little_endian::Store32(out_str, tens); + return out_str + sizeof(tens) - zeroes / 8; +} -char* numbers_internal::FastIntToBuffer(uint32_t i, char* buffer) { - uint32_t digits; - // The idea of this implementation is to trim the number of divides to as few - // as possible, and also reducing memory stores and branches, by going in - // steps of two digits at a time rather than one whenever possible. - // The huge-number case is first, in the hopes that the compiler will output - // that case in one branch-free block of code, and only output conditional - // branches into it from below. - if (i >= 1000000000) { // >= 1,000,000,000 - digits = i / 100000000; // 100,000,000 - i -= digits * 100000000; - PutTwoDigits(digits, buffer); - buffer += 2; - lt100_000_000: - digits = i / 1000000; // 1,000,000 - i -= digits * 1000000; - PutTwoDigits(digits, buffer); - buffer += 2; - lt1_000_000: - digits = i / 10000; // 10,000 - i -= digits * 10000; - PutTwoDigits(digits, buffer); - buffer += 2; - lt10_000: - digits = i / 100; - i -= digits * 100; - PutTwoDigits(digits, buffer); - buffer += 2; - lt100: - digits = i; - PutTwoDigits(digits, buffer); - buffer += 2; - *buffer = 0; - return buffer; - } +// Prepare functions return an integer that should be written to out_str +// (but possibly include trailing zeros). +// For hi < 10000, lo < 10000 returns uint64_t as encoded in ASCII with +// possibly trailing zeroes of the number hi * 10000 + lo. +inline uint64_t PrepareTenThousands(uint64_t hi, uint64_t lo) { + uint64_t merged = hi | (lo << 32); + uint64_t div100 = ((merged * kDivisionBy100Mul) / kDivisionBy100Div) & + ((0x7Full << 32) | 0x7Full); + uint64_t mod100 = merged - 100ull * div100; + uint64_t hundreds = (mod100 << 16) + div100; + uint64_t tens = (hundreds * kDivisionBy10Mul) / kDivisionBy10Div; + tens &= (0xFull << 48) | (0xFull << 32) | (0xFull << 16) | 0xFull; + tens += (hundreds - 10ull * tens) << 8; + return tens; +} - if (i < 100) { - digits = i; - if (i >= 10) goto lt100; - memcpy(buffer, one_ASCII_final_digits[i], 2); - return buffer + 1; +inline char* EncodeFullU32(uint32_t n, char* out_str) { + if (n < 100'000'000) { + uint64_t bottom = PrepareTenThousands(n / 10000, n % 10000); + ABSL_ASSUME(bottom != 0); + // 0 minus 8 to make MSVC happy. + uint32_t zeroes = static_cast<uint32_t>(absl::countr_zero(bottom)) + & (0 - 8ull); + uint64_t bottom_res = bottom + kEightZeroBytes; + bottom_res >>= zeroes; + little_endian::Store64(out_str, bottom_res); + return out_str + sizeof(bottom) - zeroes / 8; } - if (i < 10000) { // 10,000 - if (i >= 1000) goto lt10_000; - digits = i / 100; - i -= digits * 100; - *buffer++ = '0' + static_cast<char>(digits); - goto lt100; - } - if (i < 1000000) { // 1,000,000 - if (i >= 100000) goto lt1_000_000; - digits = i / 10000; // 10,000 - i -= digits * 10000; - *buffer++ = '0' + static_cast<char>(digits); - goto lt10_000; + uint32_t top = n / 100'000'000; + n %= 100'000'000; + uint64_t bottom = PrepareTenThousands(n / 10000, n % 10000); + uint64_t bottom_res = bottom + kEightZeroBytes; + out_str = EncodeHundred(top, out_str); + little_endian::Store64(out_str, bottom_res); + return out_str + sizeof(bottom); +} + +} // namespace + +void numbers_internal::PutTwoDigits(uint32_t i, char* buf) { + assert(i < 100); + uint32_t base = kTwoZeroBytes; + uint32_t div10 = (i * kDivisionBy10Mul) / kDivisionBy10Div; + uint32_t mod10 = i - 10u * div10; + base += div10 + (mod10 << 8); + little_endian::Store16(buf, static_cast<uint16_t>(base)); +} + +char* numbers_internal::FastIntToBuffer(uint32_t n, char* out_str) { + if (n < 100) { + out_str = EncodeHundred(n, out_str); + goto set_last_zero; } - if (i < 100000000) { // 100,000,000 - if (i >= 10000000) goto lt100_000_000; - digits = i / 1000000; // 1,000,000 - i -= digits * 1000000; - *buffer++ = '0' + static_cast<char>(digits); - goto lt1_000_000; + if (n < 10000) { + out_str = EncodeTenThousand(n, out_str); + goto set_last_zero; } - // we already know that i < 1,000,000,000 - digits = i / 100000000; // 100,000,000 - i -= digits * 100000000; - *buffer++ = '0' + static_cast<char>(digits); - goto lt100_000_000; + out_str = EncodeFullU32(n, out_str); +set_last_zero: + *out_str = '\0'; + return out_str; } char* numbers_internal::FastIntToBuffer(int32_t i, char* buffer) { @@ -219,7 +271,7 @@ char* numbers_internal::FastIntToBuffer(int32_t i, char* buffer) { if (i < 0) { *buffer++ = '-'; // We need to do the negation in modular (i.e., "unsigned") - // arithmetic; MSVC++ apprently warns for plain "-u", so + // arithmetic; MSVC++ apparently warns for plain "-u", so // we write the equivalent expression "0 - u" instead. u = 0 - u; } @@ -230,41 +282,40 @@ char* numbers_internal::FastIntToBuffer(uint64_t i, char* buffer) { uint32_t u32 = static_cast<uint32_t>(i); if (u32 == i) return numbers_internal::FastIntToBuffer(u32, buffer); - // Here we know i has at least 10 decimal digits. - uint64_t top_1to11 = i / 1000000000; - u32 = static_cast<uint32_t>(i - top_1to11 * 1000000000); - uint32_t top_1to11_32 = static_cast<uint32_t>(top_1to11); + // 10**9 < 2**32 <= i < 10**10, we can do 2+8 + uint64_t div08 = i / 100'000'000ull; + uint64_t mod08 = i % 100'000'000ull; + uint64_t mod_result = + PrepareTenThousands(mod08 / 10000, mod08 % 10000) + kEightZeroBytes; + if (i < 10'000'000'000ull) { + buffer = EncodeHundred(static_cast<uint32_t>(div08), buffer); + little_endian::Store64(buffer, mod_result); + buffer += 8; + goto set_last_zero; + } - if (top_1to11_32 == top_1to11) { - buffer = numbers_internal::FastIntToBuffer(top_1to11_32, buffer); + // i < 10**16, in this case 8+8 + if (i < 10'000'000'000'000'000ull) { + buffer = EncodeFullU32(static_cast<uint32_t>(div08), buffer); + little_endian::Store64(buffer, mod_result); + buffer += 8; + goto set_last_zero; } else { - // top_1to11 has more than 32 bits too; print it in two steps. - uint32_t top_8to9 = static_cast<uint32_t>(top_1to11 / 100); - uint32_t mid_2 = static_cast<uint32_t>(top_1to11 - top_8to9 * 100); - buffer = numbers_internal::FastIntToBuffer(top_8to9, buffer); - PutTwoDigits(mid_2, buffer); - buffer += 2; + // 4 + 8 + 8 + uint64_t div016 = i / 10'000'000'000'000'000ull; + buffer = EncodeTenThousand(static_cast<uint32_t>(div016), buffer); + uint64_t mid_result = div08 - div016 * 100'000'000ull; + mid_result = PrepareTenThousands(mid_result / 10000, mid_result % 10000) + + kEightZeroBytes; + little_endian::Store64(buffer, mid_result); + buffer += 8; + little_endian::Store64(buffer, mod_result); + buffer += 8; + goto set_last_zero; } - - // We have only 9 digits now, again the maximum uint32_t can handle fully. - uint32_t digits = u32 / 10000000; // 10,000,000 - u32 -= digits * 10000000; - PutTwoDigits(digits, buffer); - buffer += 2; - digits = u32 / 100000; // 100,000 - u32 -= digits * 100000; - PutTwoDigits(digits, buffer); - buffer += 2; - digits = u32 / 1000; // 1,000 - u32 -= digits * 1000; - PutTwoDigits(digits, buffer); - buffer += 2; - digits = u32 / 10; - u32 -= digits * 10; - PutTwoDigits(digits, buffer); - buffer += 2; - memcpy(buffer, one_ASCII_final_digits[u32], 2); - return buffer + 1; +set_last_zero: + *buffer = '\0'; + return buffer; } char* numbers_internal::FastIntToBuffer(int64_t i, char* buffer) { @@ -1048,25 +1099,6 @@ ABSL_CONST_INIT ABSL_DLL const char kHexTable[513] = "e0e1e2e3e4e5e6e7e8e9eaebecedeeef" "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff"; -ABSL_CONST_INIT ABSL_DLL const char two_ASCII_digits[100][2] = { - {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, {'0', '5'}, - {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'}, {'1', '0'}, {'1', '1'}, - {'1', '2'}, {'1', '3'}, {'1', '4'}, {'1', '5'}, {'1', '6'}, {'1', '7'}, - {'1', '8'}, {'1', '9'}, {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'}, - {'2', '4'}, {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'}, - {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, {'3', '5'}, - {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'}, {'4', '0'}, {'4', '1'}, - {'4', '2'}, {'4', '3'}, {'4', '4'}, {'4', '5'}, {'4', '6'}, {'4', '7'}, - {'4', '8'}, {'4', '9'}, {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, - {'5', '4'}, {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'}, - {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'}, {'6', '5'}, - {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'}, {'7', '0'}, {'7', '1'}, - {'7', '2'}, {'7', '3'}, {'7', '4'}, {'7', '5'}, {'7', '6'}, {'7', '7'}, - {'7', '8'}, {'7', '9'}, {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, - {'8', '4'}, {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'}, - {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, {'9', '5'}, - {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'}}; - bool safe_strto32_base(absl::string_view text, int32_t* value, int base) { return safe_int_internal<int32_t>(text, value, base); } diff --git a/absl/strings/numbers.h b/absl/strings/numbers.h index 86c84ed3..d7630cef 100644 --- a/absl/strings/numbers.h +++ b/absl/strings/numbers.h @@ -125,8 +125,6 @@ namespace numbers_internal { ABSL_DLL extern const char kHexChar[17]; // 0123456789abcdef ABSL_DLL extern const char kHexTable[513]; // 000102030405060708090a0b0c0d0e0f1011... -ABSL_DLL extern const char - two_ASCII_digits[100][2]; // 00, 01, 02, 03... // Writes a two-character representation of 'i' to 'buf'. 'i' must be in the // range 0 <= i < 100, and buf must have space for two characters. Example: @@ -134,10 +132,7 @@ ABSL_DLL extern const char // PutTwoDigits(42, buf); // // buf[0] == '4' // // buf[1] == '2' -inline void PutTwoDigits(size_t i, char* buf) { - assert(i < 100); - memcpy(buf, two_ASCII_digits[i], 2); -} +void PutTwoDigits(uint32_t i, char* buf); // safe_strto?() functions for implementing SimpleAtoi() diff --git a/absl/strings/numbers_test.cc b/absl/strings/numbers_test.cc index b3c098d1..2864bda2 100644 --- a/absl/strings/numbers_test.cc +++ b/absl/strings/numbers_test.cc @@ -37,7 +37,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "absl/base/internal/raw_logging.h" +#include "absl/log/log.h" #include "absl/random/distributions.h" #include "absl/random/random.h" #include "absl/strings/internal/numbers_test_common.h" @@ -1337,11 +1337,9 @@ TEST_F(SimpleDtoaTest, ExhaustiveDoubleToSixDigits) { if (strcmp(sixdigitsbuf, snprintfbuf) != 0) { mismatches.push_back(d); if (mismatches.size() < 10) { - ABSL_RAW_LOG(ERROR, "%s", - absl::StrCat("Six-digit failure with double. ", "d=", d, - "=", d, " sixdigits=", sixdigitsbuf, - " printf(%g)=", snprintfbuf) - .c_str()); + LOG(ERROR) << "Six-digit failure with double. d=" << d + << " sixdigits=" << sixdigitsbuf + << " printf(%g)=" << snprintfbuf; } } }; @@ -1389,12 +1387,10 @@ TEST_F(SimpleDtoaTest, ExhaustiveDoubleToSixDigits) { if (kFloatNumCases >= 1e9) { // The exhaustive test takes a very long time, so log progress. char buf[kSixDigitsToBufferSize]; - ABSL_RAW_LOG( - INFO, "%s", - absl::StrCat("Exp ", exponent, " powten=", powten, "(", powten, - ") (", - std::string(buf, SixDigitsToBuffer(powten, buf)), ")") - .c_str()); + LOG(INFO) << "Exp " << exponent << " powten=" << powten << "(" << powten + << ") (" + << absl::string_view(buf, SixDigitsToBuffer(powten, buf)) + << ")"; } for (int digits : digit_testcases) { if (exponent == 308 && digits >= 179769) break; // don't overflow! @@ -1419,20 +1415,17 @@ TEST_F(SimpleDtoaTest, ExhaustiveDoubleToSixDigits) { double before = nextafter(d, 0.0); double after = nextafter(d, 1.7976931348623157e308); char b1[32], b2[kSixDigitsToBufferSize]; - ABSL_RAW_LOG( - ERROR, "%s", - absl::StrCat( - "Mismatch #", i, " d=", d, " (", ToNineDigits(d), ")", - " sixdigits='", sixdigitsbuf, "'", " snprintf='", snprintfbuf, - "'", " Before.=", PerfectDtoa(before), " ", - (SixDigitsToBuffer(before, b2), b2), - " vs snprintf=", (snprintf(b1, sizeof(b1), "%g", before), b1), - " Perfect=", PerfectDtoa(d), " ", (SixDigitsToBuffer(d, b2), b2), - " vs snprintf=", (snprintf(b1, sizeof(b1), "%g", d), b1), - " After.=.", PerfectDtoa(after), " ", - (SixDigitsToBuffer(after, b2), b2), - " vs snprintf=", (snprintf(b1, sizeof(b1), "%g", after), b1)) - .c_str()); + LOG(ERROR) << "Mismatch #" << i << " d=" << d << " (" << ToNineDigits(d) + << ") sixdigits='" << sixdigitsbuf << "' snprintf='" + << snprintfbuf << "' Before.=" << PerfectDtoa(before) << " " + << (SixDigitsToBuffer(before, b2), b2) << " vs snprintf=" + << (snprintf(b1, sizeof(b1), "%g", before), b1) + << " Perfect=" << PerfectDtoa(d) << " " + << (SixDigitsToBuffer(d, b2), b2) + << " vs snprintf=" << (snprintf(b1, sizeof(b1), "%g", d), b1) + << " After.=." << PerfectDtoa(after) << " " + << (SixDigitsToBuffer(after, b2), b2) << " vs snprintf=" + << (snprintf(b1, sizeof(b1), "%g", after), b1); } } } diff --git a/absl/strings/str_cat.cc b/absl/strings/str_cat.cc index 114a2ff2..2e49c31b 100644 --- a/absl/strings/str_cat.cc +++ b/absl/strings/str_cat.cc @@ -30,55 +30,6 @@ namespace absl { ABSL_NAMESPACE_BEGIN -AlphaNum::AlphaNum(Hex hex) { - static_assert(numbers_internal::kFastToBufferSize >= 32, - "This function only works when output buffer >= 32 bytes long"); - char* const end = &digits_[numbers_internal::kFastToBufferSize]; - auto real_width = - absl::numbers_internal::FastHexToBufferZeroPad16(hex.value, end - 16); - if (real_width >= hex.width) { - piece_ = absl::string_view(end - real_width, real_width); - } else { - // Pad first 16 chars because FastHexToBufferZeroPad16 pads only to 16 and - // max pad width can be up to 20. - std::memset(end - 32, hex.fill, 16); - // Patch up everything else up to the real_width. - std::memset(end - real_width - 16, hex.fill, 16); - piece_ = absl::string_view(end - hex.width, hex.width); - } -} - -AlphaNum::AlphaNum(Dec dec) { - assert(dec.width <= numbers_internal::kFastToBufferSize); - char* const end = &digits_[numbers_internal::kFastToBufferSize]; - char* const minfill = end - dec.width; - char* writer = end; - uint64_t value = dec.value; - bool neg = dec.neg; - while (value > 9) { - *--writer = '0' + (value % 10); - value /= 10; - } - *--writer = '0' + static_cast<char>(value); - if (neg) *--writer = '-'; - - ptrdiff_t fillers = writer - minfill; - if (fillers > 0) { - // Tricky: if the fill character is ' ', then it's <fill><+/-><digits> - // But...: if the fill character is '0', then it's <+/-><fill><digits> - bool add_sign_again = false; - if (neg && dec.fill == '0') { // If filling with '0', - ++writer; // ignore the sign we just added - add_sign_again = true; // and re-add the sign later. - } - writer -= fillers; - std::fill_n(writer, fillers, dec.fill); - if (add_sign_again) *--writer = '-'; - } - - piece_ = absl::string_view(writer, static_cast<size_t>(end - writer)); -} - // ---------------------------------------------------------------------- // StrCat() // This merges the given strings or integers, with no delimiter. This @@ -195,7 +146,13 @@ void AppendPieces(std::string* dest, void StrAppend(std::string* dest, const AlphaNum& a) { ASSERT_NO_OVERLAP(*dest, a); - dest->append(a.data(), a.size()); + std::string::size_type old_size = dest->size(); + strings_internal::STLStringResizeUninitializedAmortized(dest, + old_size + a.size()); + char* const begin = &(*dest)[0]; + char* out = begin + old_size; + out = Append(out, a); + assert(out == begin + dest->size()); } void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b) { diff --git a/absl/strings/str_cat.h b/absl/strings/str_cat.h index 730b4d8c..d5f71ff0 100644 --- a/absl/strings/str_cat.h +++ b/absl/strings/str_cat.h @@ -87,13 +87,16 @@ #ifndef ABSL_STRINGS_STR_CAT_H_ #define ABSL_STRINGS_STR_CAT_H_ +#include <algorithm> #include <array> #include <cstdint> +#include <cstring> #include <string> #include <type_traits> #include <utility> #include <vector> +#include "absl/base/attributes.h" #include "absl/base/port.h" #include "absl/strings/internal/has_absl_stringify.h" #include "absl/strings/internal/stringify_sink.h" @@ -201,6 +204,27 @@ struct Hex { explicit Hex(Pointee* v, PadSpec spec = absl::kNoPad) : Hex(spec, reinterpret_cast<uintptr_t>(v)) {} + template <typename S> + friend void AbslStringify(S& sink, Hex hex) { + static_assert( + numbers_internal::kFastToBufferSize >= 32, + "This function only works when output buffer >= 32 bytes long"); + char buffer[numbers_internal::kFastToBufferSize]; + char* const end = &buffer[numbers_internal::kFastToBufferSize]; + auto real_width = + absl::numbers_internal::FastHexToBufferZeroPad16(hex.value, end - 16); + if (real_width >= hex.width) { + sink.Append(absl::string_view(end - real_width, real_width)); + } else { + // Pad first 16 chars because FastHexToBufferZeroPad16 pads only to 16 and + // max pad width can be up to 20. + std::memset(end - 32, hex.fill, 16); + // Patch up everything else up to the real_width. + std::memset(end - real_width - 16, hex.fill, 16); + sink.Append(absl::string_view(end - hex.width, hex.width)); + } + } + private: Hex(PadSpec spec, uint64_t v) : value(v), @@ -235,6 +259,38 @@ struct Dec { : spec - absl::kZeroPad2 + 2), fill(spec >= absl::kSpacePad2 ? ' ' : '0'), neg(v < 0) {} + + template <typename S> + friend void AbslStringify(S& sink, Dec dec) { + assert(dec.width <= numbers_internal::kFastToBufferSize); + char buffer[numbers_internal::kFastToBufferSize]; + char* const end = &buffer[numbers_internal::kFastToBufferSize]; + char* const minfill = end - dec.width; + char* writer = end; + uint64_t val = dec.value; + while (val > 9) { + *--writer = '0' + (val % 10); + val /= 10; + } + *--writer = '0' + static_cast<char>(val); + if (dec.neg) *--writer = '-'; + + ptrdiff_t fillers = writer - minfill; + if (fillers > 0) { + // Tricky: if the fill character is ' ', then it's <fill><+/-><digits> + // But...: if the fill character is '0', then it's <+/-><fill><digits> + bool add_sign_again = false; + if (dec.neg && dec.fill == '0') { // If filling with '0', + ++writer; // ignore the sign we just added + add_sign_again = true; // and re-add the sign later. + } + writer -= fillers; + std::fill_n(writer, fillers, dec.fill); + if (add_sign_again) *--writer = '-'; + } + + sink.Append(absl::string_view(writer, static_cast<size_t>(end - writer))); + } }; // ----------------------------------------------------------------------------- @@ -282,28 +338,30 @@ class AlphaNum { AlphaNum(double f) // NOLINT(runtime/explicit) : piece_(digits_, numbers_internal::SixDigitsToBuffer(f, digits_)) {} - AlphaNum(Hex hex); // NOLINT(runtime/explicit) - AlphaNum(Dec dec); // NOLINT(runtime/explicit) - template <size_t size> AlphaNum( // NOLINT(runtime/explicit) - const strings_internal::AlphaNumBuffer<size>& buf) + const strings_internal::AlphaNumBuffer<size>& buf + ABSL_ATTRIBUTE_LIFETIME_BOUND) : piece_(&buf.data[0], buf.size) {} - AlphaNum(const char* c_str) // NOLINT(runtime/explicit) - : piece_(NullSafeStringView(c_str)) {} // NOLINT(runtime/explicit) - AlphaNum(absl::string_view pc) : piece_(pc) {} // NOLINT(runtime/explicit) + AlphaNum(const char* c_str // NOLINT(runtime/explicit) + ABSL_ATTRIBUTE_LIFETIME_BOUND) + : piece_(NullSafeStringView(c_str)) {} + AlphaNum(absl::string_view pc // NOLINT(runtime/explicit) + ABSL_ATTRIBUTE_LIFETIME_BOUND) + : piece_(pc) {} template <typename T, typename = typename std::enable_if< strings_internal::HasAbslStringify<T>::value>::type> - AlphaNum( // NOLINT(runtime/explicit) - const T& v, // NOLINT(runtime/explicit) - strings_internal::StringifySink&& sink = {}) // NOLINT(runtime/explicit) + AlphaNum( // NOLINT(runtime/explicit) + const T& v ABSL_ATTRIBUTE_LIFETIME_BOUND, + strings_internal::StringifySink&& sink ABSL_ATTRIBUTE_LIFETIME_BOUND = {}) : piece_(strings_internal::ExtractStringification(sink, v)) {} template <typename Allocator> AlphaNum( // NOLINT(runtime/explicit) - const std::basic_string<char, std::char_traits<char>, Allocator>& str) + const std::basic_string<char, std::char_traits<char>, Allocator>& str + ABSL_ATTRIBUTE_LIFETIME_BOUND) : piece_(str) {} // Use string literals ":" instead of character literals ':'. @@ -316,14 +374,24 @@ class AlphaNum { const char* data() const { return piece_.data(); } absl::string_view Piece() const { return piece_; } - // Normal enums are already handled by the integer formatters. - // This overload matches only scoped enums. + // Match unscoped enums. Use integral promotion so that a `char`-backed + // enum becomes a wider integral type AlphaNum will accept. template <typename T, typename = typename std::enable_if< - std::is_enum<T>{} && !std::is_convertible<T, int>{} && + std::is_enum<T>{} && std::is_convertible<T, int>{} && !strings_internal::HasAbslStringify<T>::value>::type> AlphaNum(T e) // NOLINT(runtime/explicit) - : AlphaNum(static_cast<typename std::underlying_type<T>::type>(e)) {} + : AlphaNum(+e) {} + + // This overload matches scoped enums. We must explicitly cast to the + // underlying type, but use integral promotion for the same reason as above. + template <typename T, + typename std::enable_if< + std::is_enum<T>{} && !std::is_convertible<T, int>{} && + !strings_internal::HasAbslStringify<T>::value, + char*>::type = nullptr> + AlphaNum(T e) // NOLINT(runtime/explicit) + : AlphaNum(+static_cast<typename std::underlying_type<T>::type>(e)) {} // vector<bool>::reference and const_reference require special help to // convert to `AlphaNum` because it requires two user defined conversions. diff --git a/absl/strings/str_format.h b/absl/strings/str_format.h index 3536b70e..023e4350 100644 --- a/absl/strings/str_format.h +++ b/absl/strings/str_format.h @@ -36,10 +36,12 @@ // * `absl::StreamFormat()` to more efficiently write a format string to a // stream, such as`std::cout`. // * `absl::PrintF()`, `absl::FPrintF()` and `absl::SNPrintF()` as -// replacements for `std::printf()`, `std::fprintf()` and `std::snprintf()`. +// drop-in replacements for `std::printf()`, `std::fprintf()` and +// `std::snprintf()`. // -// Note: a version of `std::sprintf()` is not supported as it is -// generally unsafe due to buffer overflows. +// Note: An `absl::SPrintF()` drop-in replacement is not supported as it +// is generally unsafe due to buffer overflows. Use `absl::StrFormat` which +// returns the string as output instead of expecting a pre-allocated buffer. // // Additionally, you can provide a format string (and its associated arguments) // using one of the following abstractions: @@ -257,6 +259,7 @@ class FormatCountCapture { // * Characters: `char`, `signed char`, `unsigned char` // * Integers: `int`, `short`, `unsigned short`, `unsigned`, `long`, // `unsigned long`, `long long`, `unsigned long long` +// * Enums: printed as their underlying integral value // * Floating-point: `float`, `double`, `long double` // // However, in the `str_format` library, a format conversion specifies a broader diff --git a/absl/strings/str_format_test.cc b/absl/strings/str_format_test.cc index 5198fb33..20fd0289 100644 --- a/absl/strings/str_format_test.cc +++ b/absl/strings/str_format_test.cc @@ -638,6 +638,8 @@ TEST(StrFormat, BehavesAsDocumented) { EXPECT_EQ(StrFormat("%#o", 10), "012"); EXPECT_EQ(StrFormat("%#x", 15), "0xf"); EXPECT_EQ(StrFormat("%04d", 8), "0008"); + EXPECT_EQ(StrFormat("%#04x", 0), "0000"); + EXPECT_EQ(StrFormat("%#04x", 1), "0x01"); // Posix positional substitution. EXPECT_EQ(absl::StrFormat("%2$s, %3$s, %1$s!", "vici", "veni", "vidi"), "veni, vidi, vici!"); diff --git a/absl/strings/str_split.cc b/absl/strings/str_split.cc index e08c26b6..72ba7c02 100644 --- a/absl/strings/str_split.cc +++ b/absl/strings/str_split.cc @@ -60,19 +60,23 @@ absl::string_view GenericFind(absl::string_view text, // Finds using absl::string_view::find(), therefore the length of the found // delimiter is delimiter.length(). struct LiteralPolicy { - size_t Find(absl::string_view text, absl::string_view delimiter, size_t pos) { + static size_t Find(absl::string_view text, absl::string_view delimiter, + size_t pos) { return text.find(delimiter, pos); } - size_t Length(absl::string_view delimiter) { return delimiter.length(); } + static size_t Length(absl::string_view delimiter) { + return delimiter.length(); + } }; // Finds using absl::string_view::find_first_of(), therefore the length of the // found delimiter is 1. struct AnyOfPolicy { - size_t Find(absl::string_view text, absl::string_view delimiter, size_t pos) { + static size_t Find(absl::string_view text, absl::string_view delimiter, + size_t pos) { return text.find_first_of(delimiter, pos); } - size_t Length(absl::string_view /* delimiter */) { return 1; } + static size_t Length(absl::string_view /* delimiter */) { return 1; } }; } // namespace @@ -123,8 +127,7 @@ ByLength::ByLength(ptrdiff_t length) : length_(length) { ABSL_RAW_CHECK(length > 0, ""); } -absl::string_view ByLength::Find(absl::string_view text, - size_t pos) const { +absl::string_view ByLength::Find(absl::string_view text, size_t pos) const { pos = std::min(pos, text.size()); // truncate `pos` absl::string_view substr = text.substr(pos); // If the string is shorter than the chunk size we say we diff --git a/absl/strings/string_view.cc b/absl/strings/string_view.cc index e2261625..f20ff530 100644 --- a/absl/strings/string_view.cc +++ b/absl/strings/string_view.cc @@ -21,12 +21,35 @@ #include <cstring> #include <ostream> -#include "absl/strings/internal/memutil.h" - namespace absl { ABSL_NAMESPACE_BEGIN namespace { + +// This is significantly faster for case-sensitive matches with very +// few possible matches. +const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle, + size_t neelen) { + if (0 == neelen) { + return phaystack; // even if haylen is 0 + } + if (haylen < neelen) return nullptr; + + const char* match; + const char* hayend = phaystack + haylen - neelen + 1; + // A static cast is used here to work around the fact that memchr returns + // a void* on Posix-compliant systems and const void* on Windows. + while ( + (match = static_cast<const char*>(memchr( + phaystack, pneedle[0], static_cast<size_t>(hayend - phaystack))))) { + if (memcmp(match, pneedle, neelen) == 0) + return match; + else + phaystack = match + 1; + } + return nullptr; +} + void WritePadding(std::ostream& o, size_t pad) { char fill_buf[32]; memset(fill_buf, o.fill(), sizeof(fill_buf)); @@ -84,8 +107,7 @@ string_view::size_type string_view::find(string_view s, if (empty() && pos == 0 && s.empty()) return 0; return npos; } - const char* result = - strings_internal::memmatch(ptr_ + pos, length_ - pos, s.ptr_, s.length_); + const char* result = memmatch(ptr_ + pos, length_ - pos, s.ptr_, s.length_); return result ? static_cast<size_type>(result - ptr_) : npos; } |