summaryrefslogtreecommitdiff
path: root/absl/strings
diff options
context:
space:
mode:
Diffstat (limited to 'absl/strings')
-rw-r--r--absl/strings/BUILD.bazel48
-rw-r--r--absl/strings/CMakeLists.txt54
-rw-r--r--absl/strings/ascii.cc60
-rw-r--r--absl/strings/ascii_test.cc9
-rw-r--r--absl/strings/char_formatting_test.cc169
-rw-r--r--absl/strings/charconv.cc25
-rw-r--r--absl/strings/charconv.h4
-rw-r--r--absl/strings/cord.cc3
-rw-r--r--absl/strings/cord.h37
-rw-r--r--absl/strings/cord_analysis.cc24
-rw-r--r--absl/strings/cord_analysis.h18
-rw-r--r--absl/strings/cord_buffer.h7
-rw-r--r--absl/strings/cord_test.cc114
-rw-r--r--absl/strings/cord_test_helpers.h2
-rw-r--r--absl/strings/escaping.cc42
-rw-r--r--absl/strings/escaping.h2
-rw-r--r--absl/strings/escaping_test.cc17
-rw-r--r--absl/strings/internal/charconv_bigint.cc6
-rw-r--r--absl/strings/internal/charconv_bigint.h6
-rw-r--r--absl/strings/internal/charconv_parse_test.cc10
-rw-r--r--absl/strings/internal/cord_internal.cc1
-rw-r--r--absl/strings/internal/cord_internal.h28
-rw-r--r--absl/strings/internal/cord_rep_btree.cc17
-rw-r--r--absl/strings/internal/cord_rep_btree.h8
-rw-r--r--absl/strings/internal/cord_rep_btree_test.cc12
-rw-r--r--absl/strings/internal/cord_rep_consume.cc8
-rw-r--r--absl/strings/internal/cord_rep_consume.h11
-rw-r--r--absl/strings/internal/cord_rep_flat.h8
-rw-r--r--absl/strings/internal/cord_rep_ring.h4
-rw-r--r--absl/strings/internal/cordz_functions_test.cc2
-rw-r--r--absl/strings/internal/cordz_handle.cc66
-rw-r--r--absl/strings/internal/cordz_handle.h35
-rw-r--r--absl/strings/internal/cordz_info.cc3
-rw-r--r--absl/strings/internal/cordz_sample_token.h10
-rw-r--r--absl/strings/internal/damerau_levenshtein_distance_test.cc2
-rw-r--r--absl/strings/internal/escaping.cc23
-rw-r--r--absl/strings/internal/escaping.h1
-rw-r--r--absl/strings/internal/memutil.cc79
-rw-r--r--absl/strings/internal/memutil.h116
-rw-r--r--absl/strings/internal/memutil_benchmark.cc195
-rw-r--r--absl/strings/internal/memutil_test.cc142
-rw-r--r--absl/strings/internal/stl_type_traits.h2
-rw-r--r--absl/strings/internal/str_format/arg.cc41
-rw-r--r--absl/strings/internal/str_format/arg.h16
-rw-r--r--absl/strings/internal/str_format/bind.h25
-rw-r--r--absl/strings/internal/str_format/constexpr_parser.h1
-rw-r--r--absl/strings/internal/str_format/convert_test.cc9
-rw-r--r--absl/strings/internal/str_format/extension.h2
-rw-r--r--absl/strings/internal/str_format/float_conversion.cc18
-rw-r--r--absl/strings/internal/str_split_internal.h62
-rw-r--r--absl/strings/match.cc87
-rw-r--r--absl/strings/match.h19
-rw-r--r--absl/strings/match_test.cc161
-rw-r--r--absl/strings/numbers.cc276
-rw-r--r--absl/strings/numbers.h7
-rw-r--r--absl/strings/numbers_test.cc45
-rw-r--r--absl/strings/str_cat.cc57
-rw-r--r--absl/strings/str_cat.h98
-rw-r--r--absl/strings/str_format.h9
-rw-r--r--absl/strings/str_format_test.cc2
-rw-r--r--absl/strings/str_split.cc15
-rw-r--r--absl/strings/string_view.cc30
62 files changed, 1362 insertions, 1048 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel
index 53c57718..819bbe69 100644
--- a/absl/strings/BUILD.bazel
+++ b/absl/strings/BUILD.bazel
@@ -28,6 +28,20 @@ package(
licenses(["notice"])
cc_library(
+ name = "string_view",
+ srcs = ["string_view.cc"],
+ hdrs = ["string_view.h"],
+ copts = ABSL_DEFAULT_COPTS,
+ linkopts = ABSL_DEFAULT_LINKOPTS,
+ deps = [
+ "//absl/base",
+ "//absl/base:config",
+ "//absl/base:core_headers",
+ "//absl/base:throw_delegate",
+ ],
+)
+
+cc_library(
name = "strings",
srcs = [
"ascii.cc",
@@ -50,7 +64,6 @@ cc_library(
"str_cat.cc",
"str_replace.cc",
"str_split.cc",
- "string_view.cc",
"substitute.cc",
],
hdrs = [
@@ -72,8 +85,15 @@ cc_library(
],
copts = ABSL_DEFAULT_COPTS,
linkopts = ABSL_DEFAULT_LINKOPTS,
+ textual_hdrs = [
+ # string_view.h was once part of :strings, so string_view.h is
+ # re-exported for backwards compatibility.
+ # New code should directly depend on :string_view.
+ "string_view.h",
+ ],
deps = [
":internal",
+ ":string_view",
"//absl/base",
"//absl/base:config",
"//absl/base:core_headers",
@@ -263,6 +283,7 @@ cc_test(
tags = ["benchmark"],
visibility = ["//visibility:private"],
deps = [
+ ":string_view",
":strings",
"//absl/base:core_headers",
"//absl/base:raw_logging_internal",
@@ -277,7 +298,7 @@ cc_test(
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
- ":strings",
+ ":string_view",
"//absl/base:config",
"//absl/base:core_headers",
"//absl/base:dynamic_annotations",
@@ -350,6 +371,7 @@ cc_test(
cc_test(
name = "cord_rep_btree_test",
size = "medium",
+ timeout = "long",
srcs = ["internal/cord_rep_btree_test.cc"],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
@@ -457,7 +479,6 @@ cc_library(
":cordz_update_scope",
":cordz_update_tracker",
":internal",
- ":str_format",
":strings",
"//absl/base",
"//absl/base:config",
@@ -514,6 +535,7 @@ cc_library(
"//absl/container:inlined_vector",
"//absl/debugging:stacktrace",
"//absl/synchronization",
+ "//absl/time",
"//absl/types:span",
],
)
@@ -773,10 +795,10 @@ cc_test(
"//absl/base:config",
"//absl/base:core_headers",
"//absl/base:endian",
- "//absl/base:raw_logging_internal",
"//absl/container:fixed_array",
"//absl/hash",
"//absl/log",
+ "//absl/log:check",
"//absl/random",
"@com_google_googletest//:gtest_main",
],
@@ -1018,7 +1040,7 @@ cc_test(
":pow10_helper",
":strings",
"//absl/base:config",
- "//absl/base:raw_logging_internal",
+ "//absl/log",
"//absl/random",
"//absl/random:distributions",
"@com_google_googletest//:gtest_main",
@@ -1095,7 +1117,7 @@ cc_test(
deps = [
":strings",
"//absl/base:config",
- "//absl/base:raw_logging_internal",
+ "//absl/log:check",
"@com_google_googletest//:gtest_main",
],
)
@@ -1168,6 +1190,7 @@ cc_library(
":strings",
"//absl/base:config",
"//absl/base:core_headers",
+ "//absl/container:inlined_vector",
"//absl/functional:function_ref",
"//absl/meta:type_traits",
"//absl/numeric:bits",
@@ -1252,6 +1275,7 @@ cc_test(
":strings",
"//absl/base:core_headers",
"//absl/base:raw_logging_internal",
+ "//absl/log",
"//absl/types:optional",
"@com_google_googletest//:gtest_main",
],
@@ -1317,3 +1341,15 @@ cc_binary(
"//absl/types:optional",
],
)
+
+cc_test(
+ name = "char_formatting_test",
+ srcs = [
+ "char_formatting_test.cc",
+ ],
+ deps = [
+ ":str_format",
+ ":strings",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt
index a0f7cc54..1959dc91 100644
--- a/absl/strings/CMakeLists.txt
+++ b/absl/strings/CMakeLists.txt
@@ -16,6 +16,23 @@
absl_cc_library(
NAME
+ string_view
+ HDRS
+ string_view.h
+ SRCS
+ string_view.cc
+ COPTS
+ ${ABSL_DEFAULT_COPTS}
+ DEPS
+ absl::base
+ absl::config
+ absl::core_headers
+ absl::throw_delegate
+ PUBLIC
+)
+
+absl_cc_library(
+ NAME
strings
HDRS
"ascii.h"
@@ -30,7 +47,6 @@ absl_cc_library(
"str_join.h"
"str_replace.h"
"str_split.h"
- "string_view.h"
"strip.h"
"substitute.h"
SRCS
@@ -54,11 +70,11 @@ absl_cc_library(
"str_cat.cc"
"str_replace.cc"
"str_split.cc"
- "string_view.cc"
"substitute.cc"
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
+ absl::string_view
absl::strings_internal
absl::base
absl::bits
@@ -317,7 +333,7 @@ absl_cc_test(
absl::core_headers
absl::pow10_helper
absl::config
- absl::raw_logging_internal
+ absl::log
absl::random_random
absl::random_distributions
absl::strings_internal
@@ -372,9 +388,9 @@ absl_cc_test(
COPTS
${ABSL_TEST_COPTS}
DEPS
- absl::strings
+ absl::check
absl::config
- absl::raw_logging_internal
+ absl::strings
GTest::gmock_main
)
@@ -432,6 +448,7 @@ absl_cc_library(
absl::strings
absl::config
absl::core_headers
+ absl::inlined_vector
absl::numeric_representation
absl::type_traits
absl::utility
@@ -516,6 +533,7 @@ absl_cc_test(
absl::strings
absl::str_format_internal
absl::core_headers
+ absl::log
absl::raw_logging_internal
absl::int128
GTest::gmock_main
@@ -547,6 +565,20 @@ absl_cc_test(
GTest::gmock_main
)
+absl_cc_test(
+ NAME
+ char_formatting_test
+ SRCS
+ "char_formatting_test.cc"
+ COPTS
+ ${ABSL_TEST_COPTS}
+ DEPS
+ absl::base
+ absl::str_format
+ absl::strings
+ GTest::gmock_main
+)
+
# Internal-only target, do not depend on directly.
absl_cc_library(
NAME
@@ -745,6 +777,7 @@ absl_cc_library(
absl::raw_logging_internal
absl::stacktrace
absl::synchronization
+ absl::time
)
absl_cc_test(
@@ -959,19 +992,20 @@ absl_cc_test(
COPTS
${ABSL_TEST_COPTS}
DEPS
- absl::cord
- absl::str_format
- absl::strings
absl::base
+ absl::check
absl::config
+ absl::cord
absl::cord_test_helpers
absl::cordz_test_helpers
absl::core_headers
absl::endian
+ absl::fixed_array
absl::hash
+ absl::log
absl::random_random
- absl::raw_logging_internal
- absl::fixed_array
+ absl::str_format
+ absl::strings
GTest::gmock_main
)
diff --git a/absl/strings/ascii.cc b/absl/strings/ascii.cc
index 868df2d1..16c96899 100644
--- a/absl/strings/ascii.cc
+++ b/absl/strings/ascii.cc
@@ -14,6 +14,10 @@
#include "absl/strings/ascii.h"
+#include <climits>
+#include <cstring>
+#include <string>
+
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace ascii_internal {
@@ -153,18 +157,62 @@ ABSL_DLL const char kToUpper[256] = {
};
// clang-format on
+template <bool ToUpper>
+constexpr void AsciiStrCaseFold(char* p, char* end) {
+ // The upper- and lowercase versions of ASCII characters differ by only 1 bit.
+ // When we need to flip the case, we can xor with this bit to achieve the
+ // desired result. Note that the choice of 'a' and 'A' here is arbitrary. We
+ // could have chosen 'z' and 'Z', or any other pair of characters as they all
+ // have the same single bit difference.
+ constexpr unsigned char kAsciiCaseBitFlip = 'a' ^ 'A';
+
+ constexpr char ch_a = ToUpper ? 'a' : 'A';
+ constexpr char ch_z = ToUpper ? 'z' : 'Z';
+ for (; p < end; ++p) {
+ unsigned char v = static_cast<unsigned char>(*p);
+ // We use & instead of && to ensure this always stays branchless
+ // We use static_cast<int> to suppress -Wbitwise-instead-of-logical
+ bool is_in_range = static_cast<bool>(static_cast<int>(ch_a <= v) &
+ static_cast<int>(v <= ch_z));
+ v ^= is_in_range ? kAsciiCaseBitFlip : 0;
+ *p = static_cast<char>(v);
+ }
+}
+
+static constexpr size_t ValidateAsciiCasefold() {
+ constexpr size_t num_chars = 1 + CHAR_MAX - CHAR_MIN;
+ size_t incorrect_index = 0;
+ char lowered[num_chars] = {};
+ char uppered[num_chars] = {};
+ for (unsigned int i = 0; i < num_chars; ++i) {
+ uppered[i] = lowered[i] = static_cast<char>(i);
+ }
+ AsciiStrCaseFold<false>(&lowered[0], &lowered[num_chars]);
+ AsciiStrCaseFold<true>(&uppered[0], &uppered[num_chars]);
+ for (size_t i = 0; i < num_chars; ++i) {
+ const char ch = static_cast<char>(i),
+ ch_upper = ('a' <= ch && ch <= 'z' ? 'A' + (ch - 'a') : ch),
+ ch_lower = ('A' <= ch && ch <= 'Z' ? 'a' + (ch - 'A') : ch);
+ if (uppered[i] != ch_upper || lowered[i] != ch_lower) {
+ incorrect_index = i > 0 ? i : num_chars;
+ break;
+ }
+ }
+ return incorrect_index;
+}
+
+static_assert(ValidateAsciiCasefold() == 0, "error in case conversion");
+
} // namespace ascii_internal
void AsciiStrToLower(std::string* s) {
- for (auto& ch : *s) {
- ch = absl::ascii_tolower(static_cast<unsigned char>(ch));
- }
+ char* p = &(*s)[0]; // Guaranteed to be valid for empty strings
+ return ascii_internal::AsciiStrCaseFold<false>(p, p + s->size());
}
void AsciiStrToUpper(std::string* s) {
- for (auto& ch : *s) {
- ch = absl::ascii_toupper(static_cast<unsigned char>(ch));
- }
+ char* p = &(*s)[0]; // Guaranteed to be valid for empty strings
+ return ascii_internal::AsciiStrCaseFold<true>(p, p + s->size());
}
void RemoveExtraAsciiWhitespace(std::string* str) {
diff --git a/absl/strings/ascii_test.cc b/absl/strings/ascii_test.cc
index dfed114c..4ea262f1 100644
--- a/absl/strings/ascii_test.cc
+++ b/absl/strings/ascii_test.cc
@@ -14,6 +14,7 @@
#include "absl/strings/ascii.h"
+#include <algorithm>
#include <cctype>
#include <clocale>
#include <cstring>
@@ -189,14 +190,14 @@ TEST(AsciiStrTo, Lower) {
const std::string str("GHIJKL");
const std::string str2("MNOPQR");
const absl::string_view sp(str2);
- std::string mutable_str("STUVWX");
+ std::string mutable_str("_`?@[{AMNOPQRSTUVWXYZ");
EXPECT_EQ("abcdef", absl::AsciiStrToLower(buf));
EXPECT_EQ("ghijkl", absl::AsciiStrToLower(str));
EXPECT_EQ("mnopqr", absl::AsciiStrToLower(sp));
absl::AsciiStrToLower(&mutable_str);
- EXPECT_EQ("stuvwx", mutable_str);
+ EXPECT_EQ("_`?@[{amnopqrstuvwxyz", mutable_str);
char mutable_buf[] = "Mutable";
std::transform(mutable_buf, mutable_buf + strlen(mutable_buf),
@@ -207,12 +208,12 @@ TEST(AsciiStrTo, Lower) {
TEST(AsciiStrTo, Upper) {
const char buf[] = "abcdef";
const std::string str("ghijkl");
- const std::string str2("mnopqr");
+ const std::string str2("_`?@[{amnopqrstuvwxyz");
const absl::string_view sp(str2);
EXPECT_EQ("ABCDEF", absl::AsciiStrToUpper(buf));
EXPECT_EQ("GHIJKL", absl::AsciiStrToUpper(str));
- EXPECT_EQ("MNOPQR", absl::AsciiStrToUpper(sp));
+ EXPECT_EQ("_`?@[{AMNOPQRSTUVWXYZ", absl::AsciiStrToUpper(sp));
char mutable_buf[] = "Mutable";
std::transform(mutable_buf, mutable_buf + strlen(mutable_buf),
diff --git a/absl/strings/char_formatting_test.cc b/absl/strings/char_formatting_test.cc
new file mode 100644
index 00000000..1692da70
--- /dev/null
+++ b/absl/strings/char_formatting_test.cc
@@ -0,0 +1,169 @@
+// Copyright 2023 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstddef>
+
+#include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_format.h"
+#include "absl/strings/substitute.h"
+
+namespace {
+
+TEST(CharFormatting, Char) {
+ const char v = 'A';
+
+ // Desired behavior: does not compile:
+ // EXPECT_EQ(absl::StrCat(v, "B"), "AB");
+ // EXPECT_EQ(absl::StrFormat("%vB", v), "AB");
+
+ // Legacy behavior: format as char:
+ EXPECT_EQ(absl::Substitute("$0B", v), "AB");
+}
+
+enum CharEnum : char {};
+TEST(CharFormatting, CharEnum) {
+ auto v = static_cast<CharEnum>('A');
+
+ // Desired behavior: format as decimal
+ EXPECT_EQ(absl::StrFormat("%vB", v), "65B");
+ EXPECT_EQ(absl::StrCat(v, "B"), "65B");
+
+ // Legacy behavior: format as character:
+
+ // Some older versions of gcc behave differently in this one case
+#if !defined(__GNUC__) || defined(__clang__)
+ EXPECT_EQ(absl::Substitute("$0B", v), "AB");
+#endif
+}
+
+enum class CharEnumClass: char {};
+TEST(CharFormatting, CharEnumClass) {
+ auto v = static_cast<CharEnumClass>('A');
+
+ // Desired behavior: format as decimal:
+ EXPECT_EQ(absl::StrFormat("%vB", v), "65B");
+ EXPECT_EQ(absl::StrCat(v, "B"), "65B");
+
+ // Legacy behavior: format as character:
+ EXPECT_EQ(absl::Substitute("$0B", v), "AB");
+}
+
+TEST(CharFormatting, UnsignedChar) {
+ const unsigned char v = 'A';
+
+ // Desired behavior: format as decimal:
+ EXPECT_EQ(absl::StrCat(v, "B"), "65B");
+ EXPECT_EQ(absl::Substitute("$0B", v), "65B");
+ EXPECT_EQ(absl::StrFormat("%vB", v), "65B");
+
+ // Signedness check
+ const unsigned char w = 255;
+ EXPECT_EQ(absl::StrCat(w, "B"), "255B");
+ EXPECT_EQ(absl::Substitute("$0B", w), "255B");
+ // EXPECT_EQ(absl::StrFormat("%vB", v), "255B");
+}
+
+TEST(CharFormatting, SignedChar) {
+ const signed char v = 'A';
+
+ // Desired behavior: format as decimal:
+ EXPECT_EQ(absl::StrCat(v, "B"), "65B");
+ EXPECT_EQ(absl::Substitute("$0B", v), "65B");
+ EXPECT_EQ(absl::StrFormat("%vB", v), "65B");
+
+ // Signedness check
+ const signed char w = -128;
+ EXPECT_EQ(absl::StrCat(w, "B"), "-128B");
+ EXPECT_EQ(absl::Substitute("$0B", w), "-128B");
+}
+
+enum UnsignedCharEnum : unsigned char {};
+TEST(CharFormatting, UnsignedCharEnum) {
+ auto v = static_cast<UnsignedCharEnum>('A');
+
+ // Desired behavior: format as decimal:
+ EXPECT_EQ(absl::StrCat(v, "B"), "65B");
+ EXPECT_EQ(absl::Substitute("$0B", v), "65B");
+ EXPECT_EQ(absl::StrFormat("%vB", v), "65B");
+
+ // Signedness check
+ auto w = static_cast<UnsignedCharEnum>(255);
+ EXPECT_EQ(absl::StrCat(w, "B"), "255B");
+ EXPECT_EQ(absl::Substitute("$0B", w), "255B");
+ EXPECT_EQ(absl::StrFormat("%vB", w), "255B");
+}
+
+enum SignedCharEnum : signed char {};
+TEST(CharFormatting, SignedCharEnum) {
+ auto v = static_cast<SignedCharEnum>('A');
+
+ // Desired behavior: format as decimal:
+ EXPECT_EQ(absl::StrCat(v, "B"), "65B");
+ EXPECT_EQ(absl::Substitute("$0B", v), "65B");
+ EXPECT_EQ(absl::StrFormat("%vB", v), "65B");
+
+ // Signedness check
+ auto w = static_cast<SignedCharEnum>(-128);
+ EXPECT_EQ(absl::StrCat(w, "B"), "-128B");
+ EXPECT_EQ(absl::Substitute("$0B", w), "-128B");
+ EXPECT_EQ(absl::StrFormat("%vB", w), "-128B");
+}
+
+enum class UnsignedCharEnumClass : unsigned char {};
+TEST(CharFormatting, UnsignedCharEnumClass) {
+ auto v = static_cast<UnsignedCharEnumClass>('A');
+
+ // Desired behavior: format as decimal:
+ EXPECT_EQ(absl::StrCat(v, "B"), "65B");
+ EXPECT_EQ(absl::Substitute("$0B", v), "65B");
+ EXPECT_EQ(absl::StrFormat("%vB", v), "65B");
+
+ // Signedness check
+ auto w = static_cast<UnsignedCharEnumClass>(255);
+ EXPECT_EQ(absl::StrCat(w, "B"), "255B");
+ EXPECT_EQ(absl::Substitute("$0B", w), "255B");
+ EXPECT_EQ(absl::StrFormat("%vB", w), "255B");
+}
+
+enum SignedCharEnumClass : signed char {};
+TEST(CharFormatting, SignedCharEnumClass) {
+ auto v = static_cast<SignedCharEnumClass>('A');
+
+ // Desired behavior: format as decimal:
+ EXPECT_EQ(absl::StrCat(v, "B"), "65B");
+ EXPECT_EQ(absl::Substitute("$0B", v), "65B");
+ EXPECT_EQ(absl::StrFormat("%vB", v), "65B");
+
+ // Signedness check
+ auto w = static_cast<SignedCharEnumClass>(-128);
+ EXPECT_EQ(absl::StrCat(w, "B"), "-128B");
+ EXPECT_EQ(absl::Substitute("$0B", w), "-128B");
+ EXPECT_EQ(absl::StrFormat("%vB", w), "-128B");
+}
+
+#ifdef __cpp_lib_byte
+TEST(CharFormatting, StdByte) {
+ auto v = static_cast<std::byte>('A');
+ // Desired behavior: format as 0xff
+ // (No APIs do this today.)
+
+ // Legacy behavior: format as decimal:
+ EXPECT_EQ(absl::StrCat(v, "B"), "65B");
+ EXPECT_EQ(absl::Substitute("$0B", v), "65B");
+ EXPECT_EQ(absl::StrFormat("%vB", v), "65B");
+}
+#endif // _cpp_lib_byte
+
+} // namespace
diff --git a/absl/strings/charconv.cc b/absl/strings/charconv.cc
index 69d420bc..778a1c75 100644
--- a/absl/strings/charconv.cc
+++ b/absl/strings/charconv.cc
@@ -21,6 +21,7 @@
#include <limits>
#include "absl/base/casts.h"
+#include "absl/base/config.h"
#include "absl/numeric/bits.h"
#include "absl/numeric/int128.h"
#include "absl/strings/internal/charconv_bigint.h"
@@ -118,10 +119,17 @@ struct FloatTraits<double> {
static constexpr int kEiselLemireMaxExclusiveExp10 = 309;
static double MakeNan(const char* tagp) {
+#if ABSL_HAVE_BUILTIN(__builtin_nan)
+ // Use __builtin_nan() if available since it has a fix for
+ // https://bugs.llvm.org/show_bug.cgi?id=37778
+ // std::nan may use the glibc implementation.
+ return __builtin_nan(tagp);
+#else
// Support nan no matter which namespace it's in. Some platforms
// incorrectly don't put it in namespace std.
using namespace std; // NOLINT
return nan(tagp);
+#endif
}
// Builds a nonzero floating point number out of the provided parts.
@@ -184,10 +192,17 @@ struct FloatTraits<float> {
static constexpr int kEiselLemireMaxExclusiveExp10 = 39;
static float MakeNan(const char* tagp) {
+#if ABSL_HAVE_BUILTIN(__builtin_nanf)
+ // Use __builtin_nanf() if available since it has a fix for
+ // https://bugs.llvm.org/show_bug.cgi?id=37778
+ // std::nanf may use the glibc implementation.
+ return __builtin_nanf(tagp);
+#else
// Support nanf no matter which namespace it's in. Some platforms
// incorrectly don't put it in namespace std.
using namespace std; // NOLINT
- return nanf(tagp);
+ return std::nanf(tagp);
+#endif
}
static float Make(mantissa_t mantissa, int exponent, bool sign) {
@@ -203,7 +218,8 @@ struct FloatTraits<float> {
if (mantissa > kMantissaMask) {
// Normal value.
// Adjust by 127 for the exponent representation bias, and an additional
- // 23 due to the implied decimal point in the IEEE mantissa represenation.
+ // 23 due to the implied decimal point in the IEEE mantissa
+ // representation.
flt += static_cast<uint32_t>(exponent + 127 + kTargetMantissaBits - 1)
<< 23;
mantissa &= kMantissaMask;
@@ -349,7 +365,8 @@ bool HandleEdgeCase(const strings_internal::ParsedFloat& input, bool negative,
// https://bugs.llvm.org/show_bug.cgi?id=37778
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86113
constexpr ptrdiff_t kNanBufferSize = 128;
-#if defined(__GNUC__) || (defined(__clang__) && __clang_major__ < 7)
+#if (defined(__GNUC__) && !defined(__clang__)) || \
+ (defined(__clang__) && __clang_major__ < 7)
volatile char n_char_sequence[kNanBufferSize];
#else
char n_char_sequence[kNanBufferSize];
@@ -462,7 +479,7 @@ uint64_t ShiftRightAndRound(uint128 value, int shift, bool input_exact,
// the low bit of `value` is set.
//
// In inexact mode, the nonzero error means the actual value is greater
- // than the halfway point and we must alway round up.
+ // than the halfway point and we must always round up.
if ((value & 1) == 1 || !input_exact) {
++value;
}
diff --git a/absl/strings/charconv.h b/absl/strings/charconv.h
index 7c509812..111c7120 100644
--- a/absl/strings/charconv.h
+++ b/absl/strings/charconv.h
@@ -22,7 +22,7 @@
namespace absl {
ABSL_NAMESPACE_BEGIN
-// Workalike compatibilty version of std::chars_format from C++17.
+// Workalike compatibility version of std::chars_format from C++17.
//
// This is an bitfield enumerator which can be passed to absl::from_chars to
// configure the string-to-float conversion.
@@ -48,7 +48,7 @@ struct from_chars_result {
std::errc ec;
};
-// Workalike compatibilty version of std::from_chars from C++17. Currently
+// Workalike compatibility version of std::from_chars from C++17. Currently
// this only supports the `double` and `float` types.
//
// This interface incorporates the proposed resolutions for library issues
diff --git a/absl/strings/cord.cc b/absl/strings/cord.cc
index 1d33dd83..14976aef 100644
--- a/absl/strings/cord.cc
+++ b/absl/strings/cord.cc
@@ -48,7 +48,6 @@
#include "absl/strings/internal/cordz_update_tracker.h"
#include "absl/strings/internal/resize_uninitialized.h"
#include "absl/strings/str_cat.h"
-#include "absl/strings/str_format.h"
#include "absl/strings/str_join.h"
#include "absl/strings/string_view.h"
@@ -795,7 +794,7 @@ int CompareChunks(absl::string_view* lhs, absl::string_view* rhs,
}
// This overload set computes comparison results from memcmp result. This
-// interface is used inside GenericCompare below. Differet implementations
+// interface is used inside GenericCompare below. Different implementations
// are specialized for int and bool. For int we clamp result to {-1, 0, 1}
// set. For bool we just interested in "value == 0".
template <typename ResultType>
diff --git a/absl/strings/cord.h b/absl/strings/cord.h
index c4a0d5aa..457ccf06 100644
--- a/absl/strings/cord.h
+++ b/absl/strings/cord.h
@@ -110,9 +110,30 @@ enum class CordMemoryAccounting {
// Counts the *approximate* number of bytes held in full or in part by this
// Cord (which may not remain the same between invocations). Cords that share
// memory could each be "charged" independently for the same shared memory.
+ // See also comment on `kTotalMorePrecise` on internally shared memory.
kTotal,
// Counts the *approximate* number of bytes held in full or in part by this
+ // Cord for the distinct memory held by this cord. This option is similar
+ // to `kTotal`, except that if the cord has multiple references to the same
+ // memory, that memory is only counted once.
+ //
+ // For example:
+ // absl::Cord cord;
+ // cord.append(some_other_cord);
+ // cord.append(some_other_cord);
+ // // Counts `some_other_cord` twice:
+ // cord.EstimatedMemoryUsage(kTotal);
+ // // Counts `some_other_cord` once:
+ // cord.EstimatedMemoryUsage(kTotalMorePrecise);
+ //
+ // The `kTotalMorePrecise` number is more expensive to compute as it requires
+ // deduplicating all memory references. Applications should prefer to use
+ // `kFairShare` or `kTotal` unless they really need a more precise estimate
+ // on "how much memory is potentially held / kept alive by this cord?"
+ kTotalMorePrecise,
+
+ // Counts the *approximate* number of bytes held in full or in part by this
// Cord weighted by the sharing ratio of that data. For example, if some data
// edge is shared by 4 different Cords, then each cord is attributed 1/4th of
// the total memory usage as a 'fair share' of the total memory usage.
@@ -661,7 +682,7 @@ class Cord {
class CharRange {
public:
// Fulfill minimum c++ container requirements [container.requirements]
- // Theses (partial) container type definitions allow CharRange to be used
+ // These (partial) container type definitions allow CharRange to be used
// in various utilities expecting a subset of [container.requirements].
// For example, the below enables using `::testing::ElementsAre(...)`
using value_type = char;
@@ -1273,10 +1294,16 @@ inline size_t Cord::EstimatedMemoryUsage(
CordMemoryAccounting accounting_method) const {
size_t result = sizeof(Cord);
if (const absl::cord_internal::CordRep* rep = contents_.tree()) {
- if (accounting_method == CordMemoryAccounting::kFairShare) {
- result += cord_internal::GetEstimatedFairShareMemoryUsage(rep);
- } else {
- result += cord_internal::GetEstimatedMemoryUsage(rep);
+ switch (accounting_method) {
+ case CordMemoryAccounting::kFairShare:
+ result += cord_internal::GetEstimatedFairShareMemoryUsage(rep);
+ break;
+ case CordMemoryAccounting::kTotalMorePrecise:
+ result += cord_internal::GetMorePreciseMemoryUsage(rep);
+ break;
+ case CordMemoryAccounting::kTotal:
+ result += cord_internal::GetEstimatedMemoryUsage(rep);
+ break;
}
}
return result;
diff --git a/absl/strings/cord_analysis.cc b/absl/strings/cord_analysis.cc
index 73d3c4e6..e859b0db 100644
--- a/absl/strings/cord_analysis.cc
+++ b/absl/strings/cord_analysis.cc
@@ -16,6 +16,7 @@
#include <cstddef>
#include <cstdint>
+#include <unordered_set>
#include "absl/base/attributes.h"
#include "absl/base/config.h"
@@ -37,7 +38,7 @@ namespace cord_internal {
namespace {
// Accounting mode for analyzing memory usage.
-enum class Mode { kTotal, kFairShare };
+enum class Mode { kFairShare, kTotal, kTotalMorePrecise };
// CordRepRef holds a `const CordRep*` reference in rep, and depending on mode,
// holds a 'fraction' representing a cumulative inverse refcount weight.
@@ -62,6 +63,23 @@ struct RawUsage {
void Add(size_t size, CordRepRef<mode>) { total += size; }
};
+// Overloaded representation of RawUsage that tracks the set of objects
+// counted, and avoids double-counting objects referenced more than once
+// by the same Cord.
+template <>
+struct RawUsage<Mode::kTotalMorePrecise> {
+ size_t total = 0;
+ // TODO(b/289250880): Replace this with a flat_hash_set.
+ std::unordered_set<const CordRep*> counted;
+
+ void Add(size_t size, CordRepRef<Mode::kTotalMorePrecise> repref) {
+ if (counted.find(repref.rep) == counted.end()) {
+ counted.insert(repref.rep);
+ total += size;
+ }
+ }
+};
+
// Returns n / refcount avoiding a div for the common refcount == 1.
template <typename refcount_t>
double MaybeDiv(double d, refcount_t refcount) {
@@ -183,6 +201,10 @@ size_t GetEstimatedFairShareMemoryUsage(const CordRep* rep) {
return GetEstimatedUsage<Mode::kFairShare>(rep);
}
+size_t GetMorePreciseMemoryUsage(const CordRep* rep) {
+ return GetEstimatedUsage<Mode::kTotalMorePrecise>(rep);
+}
+
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
diff --git a/absl/strings/cord_analysis.h b/absl/strings/cord_analysis.h
index 7041ad1a..9b9527a5 100644
--- a/absl/strings/cord_analysis.h
+++ b/absl/strings/cord_analysis.h
@@ -31,6 +31,24 @@ namespace cord_internal {
size_t GetEstimatedMemoryUsage(const CordRep* rep);
// Returns the *approximate* number of bytes held in full or in part by this
+// Cord for the distinct memory held by this cord. This is similar to
+// `GetEstimatedMemoryUsage()`, except that if the cord has multiple references
+// to the same memory, that memory is only counted once.
+//
+// For example:
+// absl::Cord cord;
+// cord.append(some_other_cord);
+// cord.append(some_other_cord);
+// // Calls GetEstimatedMemoryUsage() and counts `other_cord` twice:
+// cord.EstimatedMemoryUsage(kTotal);
+// // Calls GetMorePreciseMemoryUsage() and counts `other_cord` once:
+// cord.EstimatedMemoryUsage(kTotalMorePrecise);
+//
+// This is more expensive than `GetEstimatedMemoryUsage()` as it requires
+// deduplicating all memory references.
+size_t GetMorePreciseMemoryUsage(const CordRep* rep);
+
+// Returns the *approximate* number of bytes held in full or in part by this
// CordRep weighted by the sharing ratio of that data. For example, if some data
// edge is shared by 4 different Cords, then each cord is attribute 1/4th of
// the total memory usage as a 'fair share' of the total memory usage.
diff --git a/absl/strings/cord_buffer.h b/absl/strings/cord_buffer.h
index 15494b31..bc0e4e45 100644
--- a/absl/strings/cord_buffer.h
+++ b/absl/strings/cord_buffer.h
@@ -160,7 +160,6 @@ class CordBuffer {
// for more information on buffer capacities and intended usage.
static CordBuffer CreateWithDefaultLimit(size_t capacity);
-
// CordBuffer::CreateWithCustomLimit()
//
// Creates a CordBuffer instance of the desired `capacity` rounded to an
@@ -336,7 +335,7 @@ class CordBuffer {
}
// Returns the available area of the internal SSO data
- absl::Span<char> long_available() {
+ absl::Span<char> long_available() const {
assert(!is_short());
const size_t length = long_rep.rep->length;
return absl::Span<char>(long_rep.rep->Data() + length,
@@ -460,9 +459,7 @@ inline constexpr size_t CordBuffer::MaximumPayload() {
}
inline constexpr size_t CordBuffer::MaximumPayload(size_t block_size) {
- // TODO(absl-team): Use std::min when C++11 support is dropped.
- return (kCustomLimit < block_size ? kCustomLimit : block_size) -
- cord_internal::kFlatOverhead;
+ return (std::min)(kCustomLimit, block_size) - cord_internal::kFlatOverhead;
}
inline CordBuffer CordBuffer::CreateWithDefaultLimit(size_t capacity) {
diff --git a/absl/strings/cord_test.cc b/absl/strings/cord_test.cc
index 5603e94c..36e397ed 100644
--- a/absl/strings/cord_test.cc
+++ b/absl/strings/cord_test.cc
@@ -30,10 +30,11 @@
#include "gtest/gtest.h"
#include "absl/base/config.h"
#include "absl/base/internal/endian.h"
-#include "absl/base/internal/raw_logging.h"
#include "absl/base/macros.h"
#include "absl/container/fixed_array.h"
#include "absl/hash/hash.h"
+#include "absl/log/check.h"
+#include "absl/log/log.h"
#include "absl/random/random.h"
#include "absl/strings/cord_test_helpers.h"
#include "absl/strings/cordz_test_helpers.h"
@@ -58,6 +59,8 @@ using absl::cord_internal::CordRepSubstring;
using absl::cord_internal::CordzUpdateTracker;
using absl::cord_internal::kFlatOverhead;
using absl::cord_internal::kMaxFlatLength;
+using ::testing::ElementsAre;
+using ::testing::Le;
static std::string RandomLowercaseString(RandomEngine* rng);
static std::string RandomLowercaseString(RandomEngine* rng, size_t length);
@@ -208,9 +211,8 @@ class CordTestPeer {
}
static Cord MakeSubstring(Cord src, size_t offset, size_t length) {
- ABSL_RAW_CHECK(src.contents_.is_tree(), "Can not be inlined");
- ABSL_RAW_CHECK(src.ExpectedChecksum() == absl::nullopt,
- "Can not be hardened");
+ CHECK(src.contents_.is_tree()) << "Can not be inlined";
+ CHECK(!src.ExpectedChecksum().has_value()) << "Can not be hardened";
Cord cord;
auto* tree = cord_internal::SkipCrcNode(src.contents_.tree());
auto* rep = CordRepSubstring::Create(CordRep::Ref(tree), offset, length);
@@ -372,7 +374,7 @@ TEST_P(CordTest, GigabyteCordFromExternal) {
for (int i = 0; i < 1024; ++i) {
c.Append(from);
}
- ABSL_RAW_LOG(INFO, "Made a Cord with %zu bytes!", c.size());
+ LOG(INFO) << "Made a Cord with " << c.size() << " bytes!";
// Note: on a 32-bit build, this comes out to 2,818,048,000 bytes.
// Note: on a 64-bit build, this comes out to 171,932,385,280 bytes.
}
@@ -618,7 +620,7 @@ TEST_P(CordTest, AppendEmptyBufferToTree) {
TEST_P(CordTest, AppendSmallBuffer) {
absl::Cord cord;
absl::CordBuffer buffer = absl::CordBuffer::CreateWithDefaultLimit(3);
- ASSERT_THAT(buffer.capacity(), ::testing::Le(15));
+ ASSERT_THAT(buffer.capacity(), Le(15));
memcpy(buffer.data(), "Abc", 3);
buffer.SetLength(3);
cord.Append(std::move(buffer));
@@ -632,7 +634,7 @@ TEST_P(CordTest, AppendSmallBuffer) {
EXPECT_EQ(buffer.length(), 0); // NOLINT
EXPECT_GT(buffer.capacity(), 0); // NOLINT
- EXPECT_THAT(cord.Chunks(), ::testing::ElementsAre("Abcdefgh"));
+ EXPECT_THAT(cord.Chunks(), ElementsAre("Abcdefgh"));
}
TEST_P(CordTest, AppendAndPrependBufferArePrecise) {
@@ -671,7 +673,7 @@ TEST_P(CordTest, AppendAndPrependBufferArePrecise) {
TEST_P(CordTest, PrependSmallBuffer) {
absl::Cord cord;
absl::CordBuffer buffer = absl::CordBuffer::CreateWithDefaultLimit(3);
- ASSERT_THAT(buffer.capacity(), ::testing::Le(15));
+ ASSERT_THAT(buffer.capacity(), Le(15));
memcpy(buffer.data(), "Abc", 3);
buffer.SetLength(3);
cord.Prepend(std::move(buffer));
@@ -685,7 +687,7 @@ TEST_P(CordTest, PrependSmallBuffer) {
EXPECT_EQ(buffer.length(), 0); // NOLINT
EXPECT_GT(buffer.capacity(), 0); // NOLINT
- EXPECT_THAT(cord.Chunks(), ::testing::ElementsAre("defghAbc"));
+ EXPECT_THAT(cord.Chunks(), ElementsAre("defghAbc"));
}
TEST_P(CordTest, AppendLargeBuffer) {
@@ -707,7 +709,7 @@ TEST_P(CordTest, AppendLargeBuffer) {
EXPECT_EQ(buffer.length(), 0); // NOLINT
EXPECT_GT(buffer.capacity(), 0); // NOLINT
- EXPECT_THAT(cord.Chunks(), ::testing::ElementsAre(s1, s2));
+ EXPECT_THAT(cord.Chunks(), ElementsAre(s1, s2));
}
TEST_P(CordTest, PrependLargeBuffer) {
@@ -729,7 +731,7 @@ TEST_P(CordTest, PrependLargeBuffer) {
EXPECT_EQ(buffer.length(), 0); // NOLINT
EXPECT_GT(buffer.capacity(), 0); // NOLINT
- EXPECT_THAT(cord.Chunks(), ::testing::ElementsAre(s2, s1));
+ EXPECT_THAT(cord.Chunks(), ElementsAre(s2, s1));
}
class CordAppendBufferTest : public testing::TestWithParam<bool> {
@@ -1245,15 +1247,15 @@ absl::Cord BigCord(size_t len, char v) {
// Splice block into cord.
absl::Cord SpliceCord(const absl::Cord& blob, int64_t offset,
const absl::Cord& block) {
- ABSL_RAW_CHECK(offset >= 0, "");
- ABSL_RAW_CHECK(offset + block.size() <= blob.size(), "");
+ CHECK_GE(offset, 0);
+ CHECK_LE(static_cast<size_t>(offset) + block.size(), blob.size());
absl::Cord result(blob);
result.RemoveSuffix(blob.size() - offset);
result.Append(block);
absl::Cord suffix(blob);
suffix.RemovePrefix(offset + block.size());
result.Append(suffix);
- ABSL_RAW_CHECK(blob.size() == result.size(), "");
+ CHECK_EQ(blob.size(), result.size());
return result;
}
@@ -1763,6 +1765,8 @@ TEST_P(CordTest, ExternalMemoryGet) {
// of empty and inlined cords, and flat nodes.
constexpr auto kFairShare = absl::CordMemoryAccounting::kFairShare;
+constexpr auto kTotalMorePrecise =
+ absl::CordMemoryAccounting::kTotalMorePrecise;
// Creates a cord of `n` `c` values, making sure no string stealing occurs.
absl::Cord MakeCord(size_t n, char c) {
@@ -1774,12 +1778,14 @@ TEST(CordTest, CordMemoryUsageEmpty) {
absl::Cord cord;
EXPECT_EQ(sizeof(absl::Cord), cord.EstimatedMemoryUsage());
EXPECT_EQ(sizeof(absl::Cord), cord.EstimatedMemoryUsage(kFairShare));
+ EXPECT_EQ(sizeof(absl::Cord), cord.EstimatedMemoryUsage(kTotalMorePrecise));
}
TEST(CordTest, CordMemoryUsageInlined) {
absl::Cord a("hello");
EXPECT_EQ(a.EstimatedMemoryUsage(), sizeof(absl::Cord));
EXPECT_EQ(a.EstimatedMemoryUsage(kFairShare), sizeof(absl::Cord));
+ EXPECT_EQ(a.EstimatedMemoryUsage(kTotalMorePrecise), sizeof(absl::Cord));
}
TEST(CordTest, CordMemoryUsageExternalMemory) {
@@ -1789,6 +1795,7 @@ TEST(CordTest, CordMemoryUsageExternalMemory) {
sizeof(absl::Cord) + 1000 + sizeof(CordRepExternal) + sizeof(intptr_t);
EXPECT_EQ(cord.EstimatedMemoryUsage(), expected);
EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare), expected);
+ EXPECT_EQ(cord.EstimatedMemoryUsage(kTotalMorePrecise), expected);
}
TEST(CordTest, CordMemoryUsageFlat) {
@@ -1798,6 +1805,8 @@ TEST(CordTest, CordMemoryUsageFlat) {
EXPECT_EQ(cord.EstimatedMemoryUsage(), sizeof(absl::Cord) + flat_size);
EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare),
sizeof(absl::Cord) + flat_size);
+ EXPECT_EQ(cord.EstimatedMemoryUsage(kTotalMorePrecise),
+ sizeof(absl::Cord) + flat_size);
}
TEST(CordTest, CordMemoryUsageSubStringSharedFlat) {
@@ -1807,6 +1816,8 @@ TEST(CordTest, CordMemoryUsageSubStringSharedFlat) {
absl::Cord cord = flat.Subcord(500, 1000);
EXPECT_EQ(cord.EstimatedMemoryUsage(),
sizeof(absl::Cord) + sizeof(CordRepSubstring) + flat_size);
+ EXPECT_EQ(cord.EstimatedMemoryUsage(kTotalMorePrecise),
+ sizeof(absl::Cord) + sizeof(CordRepSubstring) + flat_size);
EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare),
sizeof(absl::Cord) + sizeof(CordRepSubstring) + flat_size / 2);
}
@@ -1817,6 +1828,8 @@ TEST(CordTest, CordMemoryUsageFlatShared) {
const size_t flat_size =
absl::CordTestPeer::Tree(cord)->flat()->AllocatedSize();
EXPECT_EQ(cord.EstimatedMemoryUsage(), sizeof(absl::Cord) + flat_size);
+ EXPECT_EQ(cord.EstimatedMemoryUsage(kTotalMorePrecise),
+ sizeof(absl::Cord) + flat_size);
EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare),
sizeof(absl::Cord) + flat_size / 2);
}
@@ -1835,6 +1848,8 @@ TEST(CordTest, CordMemoryUsageFlatHardenedAndShared) {
absl::Cord cord2(cord);
EXPECT_EQ(cord2.EstimatedMemoryUsage(),
sizeof(absl::Cord) + sizeof(CordRepCrc) + flat_size);
+ EXPECT_EQ(cord2.EstimatedMemoryUsage(kTotalMorePrecise),
+ sizeof(absl::Cord) + sizeof(CordRepCrc) + flat_size);
EXPECT_EQ(cord2.EstimatedMemoryUsage(kFairShare),
sizeof(absl::Cord) + (sizeof(CordRepCrc) + flat_size / 2) / 2);
}
@@ -1853,7 +1868,7 @@ TEST(CordTest, CordMemoryUsageBTree) {
// windows DLL, we may have ODR like effects on the flag, meaning the DLL
// code will run with the picked up default.
if (!absl::CordTestPeer::Tree(cord1)->IsBtree()) {
- ABSL_RAW_LOG(WARNING, "Cord library code not respecting btree flag");
+ LOG(WARNING) << "Cord library code not respecting btree flag";
return;
}
@@ -1861,6 +1876,8 @@ TEST(CordTest, CordMemoryUsageBTree) {
size_t rep1_shared_size = sizeof(CordRepBtree) + flats1_size / 2;
EXPECT_EQ(cord1.EstimatedMemoryUsage(), sizeof(absl::Cord) + rep1_size);
+ EXPECT_EQ(cord1.EstimatedMemoryUsage(kTotalMorePrecise),
+ sizeof(absl::Cord) + rep1_size);
EXPECT_EQ(cord1.EstimatedMemoryUsage(kFairShare),
sizeof(absl::Cord) + rep1_shared_size);
@@ -1875,6 +1892,8 @@ TEST(CordTest, CordMemoryUsageBTree) {
size_t rep2_size = sizeof(CordRepBtree) + flats2_size;
EXPECT_EQ(cord2.EstimatedMemoryUsage(), sizeof(absl::Cord) + rep2_size);
+ EXPECT_EQ(cord2.EstimatedMemoryUsage(kTotalMorePrecise),
+ sizeof(absl::Cord) + rep2_size);
EXPECT_EQ(cord2.EstimatedMemoryUsage(kFairShare),
sizeof(absl::Cord) + rep2_size);
@@ -1883,6 +1902,8 @@ TEST(CordTest, CordMemoryUsageBTree) {
EXPECT_EQ(cord.EstimatedMemoryUsage(),
sizeof(absl::Cord) + sizeof(CordRepBtree) + rep1_size + rep2_size);
+ EXPECT_EQ(cord.EstimatedMemoryUsage(kTotalMorePrecise),
+ sizeof(absl::Cord) + sizeof(CordRepBtree) + rep1_size + rep2_size);
EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare),
sizeof(absl::Cord) + sizeof(CordRepBtree) + rep1_shared_size / 2 +
rep2_size);
@@ -1901,6 +1922,66 @@ TEST_P(CordTest, CordMemoryUsageInlineRep) {
EXPECT_EQ(c1.EstimatedMemoryUsage(), c2.EstimatedMemoryUsage());
}
+TEST_P(CordTest, CordMemoryUsageTotalMorePreciseMode) {
+ constexpr size_t kChunkSize = 2000;
+ std::string tmp_str(kChunkSize, 'x');
+ const absl::Cord flat(std::move(tmp_str));
+
+ // Construct `fragmented` with two references into the same
+ // underlying buffer shared with `flat`:
+ absl::Cord fragmented(flat);
+ fragmented.Append(flat);
+
+ // Memory usage of `flat`, minus the top-level Cord object:
+ const size_t flat_internal_usage =
+ flat.EstimatedMemoryUsage() - sizeof(absl::Cord);
+
+ // `fragmented` holds a Cord and a CordRepBtree. That tree points to two
+ // copies of flat's internals, which we expect to dedup:
+ EXPECT_EQ(fragmented.EstimatedMemoryUsage(kTotalMorePrecise),
+ sizeof(absl::Cord) +
+ sizeof(CordRepBtree) +
+ flat_internal_usage);
+
+ // This is a case where kTotal produces an overestimate:
+ EXPECT_EQ(fragmented.EstimatedMemoryUsage(),
+ sizeof(absl::Cord) +
+ sizeof(CordRepBtree) +
+ 2 * flat_internal_usage);
+}
+
+TEST_P(CordTest, CordMemoryUsageTotalMorePreciseModeWithSubstring) {
+ constexpr size_t kChunkSize = 2000;
+ std::string tmp_str(kChunkSize, 'x');
+ const absl::Cord flat(std::move(tmp_str));
+
+ // Construct `fragmented` with two references into the same
+ // underlying buffer shared with `flat`.
+ //
+ // This time, each reference is through a Subcord():
+ absl::Cord fragmented;
+ fragmented.Append(flat.Subcord(1, kChunkSize - 2));
+ fragmented.Append(flat.Subcord(1, kChunkSize - 2));
+
+ // Memory usage of `flat`, minus the top-level Cord object:
+ const size_t flat_internal_usage =
+ flat.EstimatedMemoryUsage() - sizeof(absl::Cord);
+
+ // `fragmented` holds a Cord and a CordRepBtree. That tree points to two
+ // CordRepSubstrings, each pointing at flat's internals.
+ EXPECT_EQ(fragmented.EstimatedMemoryUsage(kTotalMorePrecise),
+ sizeof(absl::Cord) +
+ sizeof(CordRepBtree) +
+ 2 * sizeof(CordRepSubstring) +
+ flat_internal_usage);
+
+ // This is a case where kTotal produces an overestimate:
+ EXPECT_EQ(fragmented.EstimatedMemoryUsage(),
+ sizeof(absl::Cord) +
+ sizeof(CordRepBtree) +
+ 2 * sizeof(CordRepSubstring) +
+ 2 * flat_internal_usage);
+}
} // namespace
// Regtest for 7510292 (fix a bug introduced by 7465150)
@@ -1938,8 +2019,7 @@ TEST_P(CordTest, DiabolicalGrowth) {
std::string value;
absl::CopyCordToString(cord, &value);
EXPECT_EQ(value, expected);
- ABSL_RAW_LOG(INFO, "Diabolical size allocated = %zu",
- cord.EstimatedMemoryUsage());
+ LOG(INFO) << "Diabolical size allocated = " << cord.EstimatedMemoryUsage();
}
// The following tests check support for >4GB cords in 64-bit binaries, and
diff --git a/absl/strings/cord_test_helpers.h b/absl/strings/cord_test_helpers.h
index 31a1dc89..ca52240a 100644
--- a/absl/strings/cord_test_helpers.h
+++ b/absl/strings/cord_test_helpers.h
@@ -51,7 +51,7 @@ enum class TestCordSize {
// existing inputs rather than copying contents of the input.
kMedium = cord_internal::kMaxFlatLength / 2 + 1,
- // A string value large enough to cause it to be stored in mutliple flats.
+ // A string value large enough to cause it to be stored in multiple flats.
kLarge = cord_internal::kMaxFlatLength * 4
};
diff --git a/absl/strings/escaping.cc b/absl/strings/escaping.cc
index 93966846..2827fbaa 100644
--- a/absl/strings/escaping.cc
+++ b/absl/strings/escaping.cc
@@ -443,6 +443,8 @@ void CEscapeAndAppendInternal(absl::string_view src, std::string* dest) {
}
}
+// Reverses the mapping in Base64EscapeInternal; see that method's
+// documentation for details of the mapping.
bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest,
size_t szdest, const signed char* unbase64,
size_t* len) {
@@ -676,7 +678,10 @@ bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest,
return ok;
}
-// The arrays below were generated by the following code
+// The arrays below map base64-escaped characters back to their original values.
+// For the inverse case, see k(WebSafe)Base64Chars in the internal
+// escaping.cc.
+// These arrays were generated by the following inversion code:
// #include <sys/time.h>
// #include <stdlib.h>
// #include <string.h>
@@ -703,8 +708,8 @@ bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest,
// }
// }
//
-// where the value of "Base64[]" was replaced by one of the base-64 conversion
-// tables from the functions below.
+// where the value of "Base64[]" was replaced by one of k(WebSafe)Base64Chars
+// in the internal escaping.cc.
/* clang-format off */
constexpr signed char kUnBase64[] = {
-1, -1, -1, -1, -1, -1, -1, -1,
@@ -777,9 +782,6 @@ constexpr signed char kUnWebSafeBase64[] = {
};
/* clang-format on */
-constexpr char kWebSafeBase64Chars[] =
- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
-
template <typename String>
bool Base64UnescapeInternal(const char* src, size_t slen, String* dest,
const signed char* unbase64) {
@@ -880,30 +882,6 @@ std::string Utf8SafeCHexEscape(absl::string_view src) {
return CEscapeInternal(src, true, true);
}
-// ----------------------------------------------------------------------
-// Base64Unescape() - base64 decoder
-// Base64Escape() - base64 encoder
-// WebSafeBase64Unescape() - Google's variation of base64 decoder
-// WebSafeBase64Escape() - Google's variation of base64 encoder
-//
-// Check out
-// https://datatracker.ietf.org/doc/html/rfc2045 for formal description, but
-// what we care about is that...
-// Take the encoded stuff in groups of 4 characters and turn each
-// character into a code 0 to 63 thus:
-// A-Z map to 0 to 25
-// a-z map to 26 to 51
-// 0-9 map to 52 to 61
-// +(- for WebSafe) maps to 62
-// /(_ for WebSafe) maps to 63
-// There will be four numbers, all less than 64 which can be represented
-// by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively).
-// Arrange the 6 digit binary numbers into three bytes as such:
-// aaaaaabb bbbbcccc ccdddddd
-// Equals signs (one or two) are used at the end of the encoded block to
-// indicate that the text was not an integer multiple of three bytes long.
-// ----------------------------------------------------------------------
-
bool Base64Unescape(absl::string_view src, std::string* dest) {
return Base64UnescapeInternal(src.data(), src.size(), dest, kUnBase64);
}
@@ -921,7 +899,7 @@ void Base64Escape(absl::string_view src, std::string* dest) {
void WebSafeBase64Escape(absl::string_view src, std::string* dest) {
strings_internal::Base64EscapeInternal(
reinterpret_cast<const unsigned char*>(src.data()), src.size(), dest,
- false, kWebSafeBase64Chars);
+ false, strings_internal::kWebSafeBase64Chars);
}
std::string Base64Escape(absl::string_view src) {
@@ -936,7 +914,7 @@ std::string WebSafeBase64Escape(absl::string_view src) {
std::string dest;
strings_internal::Base64EscapeInternal(
reinterpret_cast<const unsigned char*>(src.data()), src.size(), &dest,
- false, kWebSafeBase64Chars);
+ false, strings_internal::kWebSafeBase64Chars);
return dest;
}
diff --git a/absl/strings/escaping.h b/absl/strings/escaping.h
index 7c082fef..bf2a5898 100644
--- a/absl/strings/escaping.h
+++ b/absl/strings/escaping.h
@@ -121,7 +121,7 @@ std::string Utf8SafeCHexEscape(absl::string_view src);
//
// Encodes a `src` string into a base64-encoded 'dest' string with padding
// characters. This function conforms with RFC 4648 section 4 (base64) and RFC
-// 2045. See also CalculateBase64EscapedLen().
+// 2045.
void Base64Escape(absl::string_view src, std::string* dest);
std::string Base64Escape(absl::string_view src);
diff --git a/absl/strings/escaping_test.cc b/absl/strings/escaping_test.cc
index 44ffcba7..9f62c1ee 100644
--- a/absl/strings/escaping_test.cc
+++ b/absl/strings/escaping_test.cc
@@ -562,6 +562,7 @@ template <typename StringType>
void TestEscapeAndUnescape() {
// Check the short strings; this tests the math (and boundaries)
for (const auto& tc : base64_tests) {
+ // Test plain base64.
StringType encoded("this junk should be ignored");
absl::Base64Escape(tc.plaintext, &encoded);
EXPECT_EQ(encoded, tc.cyphertext);
@@ -571,22 +572,26 @@ void TestEscapeAndUnescape() {
EXPECT_TRUE(absl::Base64Unescape(encoded, &decoded));
EXPECT_EQ(decoded, tc.plaintext);
- StringType websafe(tc.cyphertext);
- for (int c = 0; c < websafe.size(); ++c) {
- if ('+' == websafe[c]) websafe[c] = '-';
- if ('/' == websafe[c]) websafe[c] = '_';
+ StringType websafe_with_padding(tc.cyphertext);
+ for (unsigned int c = 0; c < websafe_with_padding.size(); ++c) {
+ if ('+' == websafe_with_padding[c]) websafe_with_padding[c] = '-';
+ if ('/' == websafe_with_padding[c]) websafe_with_padding[c] = '_';
+ // Intentionally keeping padding aka '='.
+ }
+
+ // Test plain websafe (aka without padding).
+ StringType websafe(websafe_with_padding);
+ for (unsigned int c = 0; c < websafe.size(); ++c) {
if ('=' == websafe[c]) {
websafe.resize(c);
break;
}
}
-
encoded = "this junk should be ignored";
absl::WebSafeBase64Escape(tc.plaintext, &encoded);
EXPECT_EQ(encoded, websafe);
EXPECT_EQ(absl::WebSafeBase64Escape(tc.plaintext), websafe);
- // Let's try the string version of the decoder
decoded = "this junk should be ignored";
EXPECT_TRUE(absl::WebSafeBase64Unescape(websafe, &decoded));
EXPECT_EQ(decoded, tc.plaintext);
diff --git a/absl/strings/internal/charconv_bigint.cc b/absl/strings/internal/charconv_bigint.cc
index 282b639e..46b5289a 100644
--- a/absl/strings/internal/charconv_bigint.cc
+++ b/absl/strings/internal/charconv_bigint.cc
@@ -296,10 +296,8 @@ template <int max_words>
std::min(n / kLargePowerOfFiveStep, kLargestPowerOfFiveIndex);
if (first_pass) {
// just copy, rather than multiplying by 1
- std::copy(
- LargePowerOfFiveData(big_power),
- LargePowerOfFiveData(big_power) + LargePowerOfFiveSize(big_power),
- answer.words_);
+ std::copy_n(LargePowerOfFiveData(big_power),
+ LargePowerOfFiveSize(big_power), answer.words_);
answer.size_ = LargePowerOfFiveSize(big_power);
first_pass = false;
} else {
diff --git a/absl/strings/internal/charconv_bigint.h b/absl/strings/internal/charconv_bigint.h
index 8f702976..5c0c375d 100644
--- a/absl/strings/internal/charconv_bigint.h
+++ b/absl/strings/internal/charconv_bigint.h
@@ -92,7 +92,7 @@ class BigUnsigned {
// numbers with this many decimal digits or fewer are representable by this
// type.
//
- // Analagous to std::numeric_limits<BigUnsigned>::digits10.
+ // Analogous to std::numeric_limits<BigUnsigned>::digits10.
static constexpr int Digits10() {
// 9975007/1035508 is very slightly less than log10(2**32).
return static_cast<uint64_t>(max_words) * 9975007 / 1035508;
@@ -121,7 +121,7 @@ class BigUnsigned {
++size_;
}
}
- std::fill(words_, words_ + word_shift, 0u);
+ std::fill_n(words_, word_shift, 0u);
}
}
@@ -197,7 +197,7 @@ class BigUnsigned {
}
void SetToZero() {
- std::fill(words_, words_ + size_, 0u);
+ std::fill_n(words_, size_, 0u);
size_ = 0;
}
diff --git a/absl/strings/internal/charconv_parse_test.cc b/absl/strings/internal/charconv_parse_test.cc
index bc2d1118..2b7b0821 100644
--- a/absl/strings/internal/charconv_parse_test.cc
+++ b/absl/strings/internal/charconv_parse_test.cc
@@ -19,7 +19,7 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
-#include "absl/base/internal/raw_logging.h"
+#include "absl/log/check.h"
#include "absl/strings/str_cat.h"
using absl::chars_format;
@@ -56,14 +56,14 @@ void ExpectParsedFloat(std::string s, absl::chars_format format_flags,
begin_subrange = static_cast<int>(open_bracket_pos);
s.replace(open_bracket_pos, 1, "");
std::string::size_type close_bracket_pos = s.find(']');
- ABSL_RAW_CHECK(close_bracket_pos != absl::string_view::npos,
- "Test input contains [ without matching ]");
+ CHECK_NE(close_bracket_pos, absl::string_view::npos)
+ << "Test input contains [ without matching ]";
end_subrange = static_cast<int>(close_bracket_pos);
s.replace(close_bracket_pos, 1, "");
}
const std::string::size_type expected_characters_matched = s.find('$');
- ABSL_RAW_CHECK(expected_characters_matched != std::string::npos,
- "Input string must contain $");
+ CHECK_NE(expected_characters_matched, std::string::npos)
+ << "Input string must contain $";
s.replace(expected_characters_matched, 1, "");
ParsedFloat parsed =
diff --git a/absl/strings/internal/cord_internal.cc b/absl/strings/internal/cord_internal.cc
index b6b06cfa..b7874385 100644
--- a/absl/strings/internal/cord_internal.cc
+++ b/absl/strings/internal/cord_internal.cc
@@ -33,7 +33,6 @@ ABSL_CONST_INIT std::atomic<bool> cord_ring_buffer_enabled(
kCordEnableRingBufferDefault);
ABSL_CONST_INIT std::atomic<bool> shallow_subcords_enabled(
kCordShallowSubcordsDefault);
-ABSL_CONST_INIT std::atomic<bool> cord_btree_exhaustive_validation(false);
void LogFatalNodeType(CordRep* rep) {
ABSL_INTERNAL_LOG(FATAL, absl::StrCat("Unexpected node type: ",
diff --git a/absl/strings/internal/cord_internal.h b/absl/strings/internal/cord_internal.h
index e6f0d544..20dd008c 100644
--- a/absl/strings/internal/cord_internal.h
+++ b/absl/strings/internal/cord_internal.h
@@ -69,12 +69,6 @@ enum CordFeatureDefaults {
extern std::atomic<bool> cord_ring_buffer_enabled;
extern std::atomic<bool> shallow_subcords_enabled;
-// `cord_btree_exhaustive_validation` can be set to force exhaustive validation
-// in debug assertions, and code that calls `IsValid()` explicitly. By default,
-// assertions should be relatively cheap and AssertValid() can easily lead to
-// O(n^2) complexity as recursive / full tree validation is O(n).
-extern std::atomic<bool> cord_btree_exhaustive_validation;
-
inline void enable_cord_ring_buffer(bool enable) {
cord_ring_buffer_enabled.store(enable, std::memory_order_relaxed);
}
@@ -163,20 +157,19 @@ class RefcountAndFlags {
// false will be visible to a thread that just observed this method returning
// false. Always returns false when the immortal bit is set.
inline bool Decrement() {
- int32_t refcount = count_.load(std::memory_order_acquire) & kRefcountMask;
- assert(refcount > 0 || refcount & kImmortalFlag);
+ int32_t refcount = count_.load(std::memory_order_acquire);
+ assert((refcount & kRefcountMask) > 0 || refcount & kImmortalFlag);
return refcount != kRefIncrement &&
(count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) &
- kRefcountMask) != kRefIncrement;
+ kHighRefcountMask) != 0;
}
// Same as Decrement but expect that refcount is greater than 1.
inline bool DecrementExpectHighRefcount() {
int32_t refcount =
- count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) &
- kRefcountMask;
- assert(refcount > 0 || refcount & kImmortalFlag);
- return refcount != kRefIncrement;
+ count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel);
+ assert((refcount & kRefcountMask) > 0 || refcount & kImmortalFlag);
+ return (refcount & kHighRefcountMask) != 0;
}
// Returns the current reference count using acquire semantics.
@@ -220,6 +213,15 @@ class RefcountAndFlags {
// purposes of equality. (A refcount of 0 or 1 does not count as 0 or 1
// if the immortal bit is set.)
kRefcountMask = ~kReservedFlag,
+
+ // Bitmask to use when checking if refcount is equal to 1 and not
+ // immortal when decrementing the refcount. This masks out kRefIncrement and
+ // all flags except kImmortalFlag. If the masked RefcountAndFlags is 0, we
+ // assume the refcount is equal to 1, since we know it's not immortal and
+ // not greater than 1. If the masked RefcountAndFlags is not 0, we can
+ // assume the refcount is not equal to 1 since either a higher bit in the
+ // refcount is set, or kImmortal is set.
+ kHighRefcountMask = kRefcountMask & ~kRefIncrement,
};
std::atomic<int32_t> count_;
diff --git a/absl/strings/internal/cord_rep_btree.cc b/absl/strings/internal/cord_rep_btree.cc
index a86fdc0b..05bd0e20 100644
--- a/absl/strings/internal/cord_rep_btree.cc
+++ b/absl/strings/internal/cord_rep_btree.cc
@@ -14,6 +14,7 @@
#include "absl/strings/internal/cord_rep_btree.h"
+#include <atomic>
#include <cassert>
#include <cstdint>
#include <iostream>
@@ -49,9 +50,7 @@ using CopyResult = CordRepBtree::CopyResult;
constexpr auto kFront = CordRepBtree::kFront;
constexpr auto kBack = CordRepBtree::kBack;
-inline bool exhaustive_validation() {
- return cord_btree_exhaustive_validation.load(std::memory_order_relaxed);
-}
+ABSL_CONST_INIT std::atomic<bool> cord_btree_exhaustive_validation(false);
// Implementation of the various 'Dump' functions.
// Prints the entire tree structure or 'rep'. External callers should
@@ -362,6 +361,15 @@ struct StackOperations {
} // namespace
+void SetCordBtreeExhaustiveValidation(bool do_exaustive_validation) {
+ cord_btree_exhaustive_validation.store(do_exaustive_validation,
+ std::memory_order_relaxed);
+}
+
+bool IsCordBtreeExhaustiveValidationEnabled() {
+ return cord_btree_exhaustive_validation.load(std::memory_order_relaxed);
+}
+
void CordRepBtree::Dump(const CordRep* rep, absl::string_view label,
bool include_contents, std::ostream& stream) {
stream << "===================================\n";
@@ -450,7 +458,8 @@ bool CordRepBtree::IsValid(const CordRepBtree* tree, bool shallow) {
child_length += edge->length;
}
NODE_CHECK_EQ(child_length, tree->length);
- if ((!shallow || exhaustive_validation()) && tree->height() > 0) {
+ if ((!shallow || IsCordBtreeExhaustiveValidationEnabled()) &&
+ tree->height() > 0) {
for (CordRep* edge : tree->Edges()) {
if (!IsValid(edge->btree(), shallow)) return false;
}
diff --git a/absl/strings/internal/cord_rep_btree.h b/absl/strings/internal/cord_rep_btree.h
index 4209e512..be94b62e 100644
--- a/absl/strings/internal/cord_rep_btree.h
+++ b/absl/strings/internal/cord_rep_btree.h
@@ -32,6 +32,14 @@ namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
+// `SetCordBtreeExhaustiveValidation()` can be set to force exhaustive
+// validation in debug assertions, and code that calls `IsValid()`
+// explicitly. By default, assertions should be relatively cheap and
+// AssertValid() can easily lead to O(n^2) complexity as recursive / full tree
+// validation is O(n).
+void SetCordBtreeExhaustiveValidation(bool do_exaustive_validation);
+bool IsCordBtreeExhaustiveValidationEnabled();
+
class CordRepBtreeNavigator;
// CordRepBtree is as the name implies a btree implementation of a Cordrep tree.
diff --git a/absl/strings/internal/cord_rep_btree_test.cc b/absl/strings/internal/cord_rep_btree_test.cc
index 9d6ce484..840acf9f 100644
--- a/absl/strings/internal/cord_rep_btree_test.cc
+++ b/absl/strings/internal/cord_rep_btree_test.cc
@@ -507,7 +507,7 @@ TEST_P(CordRepBtreeTest, AppendToTreeTwoDeep) {
for (size_t i = max_cap * max_cap + 1; i < max_cap * max_cap * max_cap; ++i) {
// Ref top level tree based on param.
// Ref child node once every 16 iterations, and leaf node every 4
- // iterrations which which should not have an observable effect other than
+ // iterations which which should not have an observable effect other than
// the node and/or the leaf below it being copied.
refs.RefIf(shared(), tree);
refs.RefIf(i % 16 == 0, tree->Edges().back());
@@ -568,7 +568,7 @@ TEST_P(CordRepBtreeTest, PrependToTreeTwoDeep) {
for (size_t i = max_cap * max_cap + 1; i < max_cap * max_cap * max_cap; ++i) {
// Ref top level tree based on param.
// Ref child node once every 16 iterations, and leaf node every 4
- // iterrations which which should not have an observable effect other than
+ // iterations which which should not have an observable effect other than
// the node and/or the leaf below it being copied.
refs.RefIf(shared(), tree);
refs.RefIf(i % 16 == 0, tree->Edges().back());
@@ -1355,9 +1355,9 @@ TEST(CordRepBtreeTest, AssertValid) {
TEST(CordRepBtreeTest, CheckAssertValidShallowVsDeep) {
// Restore exhaustive validation on any exit.
- const bool exhaustive_validation = cord_btree_exhaustive_validation.load();
+ const bool exhaustive_validation = IsCordBtreeExhaustiveValidationEnabled();
auto cleanup = absl::MakeCleanup([exhaustive_validation] {
- cord_btree_exhaustive_validation.store(exhaustive_validation);
+ SetCordBtreeExhaustiveValidation(exhaustive_validation);
});
// Create a tree of at least 2 levels, and mess with the original flat, which
@@ -1372,7 +1372,7 @@ TEST(CordRepBtreeTest, CheckAssertValidShallowVsDeep) {
}
flat->length = 100;
- cord_btree_exhaustive_validation.store(false);
+ SetCordBtreeExhaustiveValidation(false);
EXPECT_FALSE(CordRepBtree::IsValid(tree));
EXPECT_TRUE(CordRepBtree::IsValid(tree, true));
EXPECT_FALSE(CordRepBtree::IsValid(tree, false));
@@ -1382,7 +1382,7 @@ TEST(CordRepBtreeTest, CheckAssertValidShallowVsDeep) {
EXPECT_DEBUG_DEATH(CordRepBtree::AssertValid(tree, false), ".*");
#endif
- cord_btree_exhaustive_validation.store(true);
+ SetCordBtreeExhaustiveValidation(true);
EXPECT_FALSE(CordRepBtree::IsValid(tree));
EXPECT_FALSE(CordRepBtree::IsValid(tree, true));
EXPECT_FALSE(CordRepBtree::IsValid(tree, false));
diff --git a/absl/strings/internal/cord_rep_consume.cc b/absl/strings/internal/cord_rep_consume.cc
index 20a55797..db7d4fef 100644
--- a/absl/strings/internal/cord_rep_consume.cc
+++ b/absl/strings/internal/cord_rep_consume.cc
@@ -42,7 +42,8 @@ CordRep* ClipSubstring(CordRepSubstring* substring) {
} // namespace
-void Consume(CordRep* rep, ConsumeFn consume_fn) {
+void Consume(CordRep* rep,
+ FunctionRef<void(CordRep*, size_t, size_t)> consume_fn) {
size_t offset = 0;
size_t length = rep->length;
@@ -53,8 +54,9 @@ void Consume(CordRep* rep, ConsumeFn consume_fn) {
consume_fn(rep, offset, length);
}
-void ReverseConsume(CordRep* rep, ConsumeFn consume_fn) {
- return Consume(rep, std::move(consume_fn));
+void ReverseConsume(CordRep* rep,
+ FunctionRef<void(CordRep*, size_t, size_t)> consume_fn) {
+ return Consume(rep, consume_fn);
}
} // namespace cord_internal
diff --git a/absl/strings/internal/cord_rep_consume.h b/absl/strings/internal/cord_rep_consume.h
index d46fca2b..bece1874 100644
--- a/absl/strings/internal/cord_rep_consume.h
+++ b/absl/strings/internal/cord_rep_consume.h
@@ -24,11 +24,6 @@ namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
-// Functor for the Consume() and ReverseConsume() functions:
-// void ConsumeFunc(CordRep* rep, size_t offset, size_t length);
-// See the Consume() and ReverseConsume() function comments for documentation.
-using ConsumeFn = FunctionRef<void(CordRep*, size_t, size_t)>;
-
// Consume() and ReverseConsume() consume CONCAT based trees and invoke the
// provided functor with the contained nodes in the proper forward or reverse
// order, which is used to convert CONCAT trees into other tree or cord data.
@@ -40,8 +35,10 @@ using ConsumeFn = FunctionRef<void(CordRep*, size_t, size_t)>;
// violations, we can not 100% guarantee that all code respects 'new format'
// settings and flags, so we need to be able to parse old data on the fly until
// all old code is deprecated / no longer the default format.
-void Consume(CordRep* rep, ConsumeFn consume_fn);
-void ReverseConsume(CordRep* rep, ConsumeFn consume_fn);
+void Consume(CordRep* rep,
+ FunctionRef<void(CordRep*, size_t, size_t)> consume_fn);
+void ReverseConsume(CordRep* rep,
+ FunctionRef<void(CordRep*, size_t, size_t)> consume_fn);
} // namespace cord_internal
ABSL_NAMESPACE_END
diff --git a/absl/strings/internal/cord_rep_flat.h b/absl/strings/internal/cord_rep_flat.h
index e3e27fcd..27c4b21e 100644
--- a/absl/strings/internal/cord_rep_flat.h
+++ b/absl/strings/internal/cord_rep_flat.h
@@ -120,8 +120,16 @@ struct CordRepFlat : public CordRep {
// Round size up so it matches a size we can exactly express in a tag.
const size_t size = RoundUpForTag(len + kFlatOverhead);
void* const raw_rep = ::operator new(size);
+ // GCC 13 has a false-positive -Wstringop-overflow warning here.
+ #if ABSL_INTERNAL_HAVE_MIN_GNUC_VERSION(13, 0)
+ #pragma GCC diagnostic push
+ #pragma GCC diagnostic ignored "-Wstringop-overflow"
+ #endif
CordRepFlat* rep = new (raw_rep) CordRepFlat();
rep->tag = AllocatedSizeToTag(size);
+ #if ABSL_INTERNAL_HAVE_MIN_GNUC_VERSION(13, 0)
+ #pragma GCC diagnostic pop
+ #endif
return rep;
}
diff --git a/absl/strings/internal/cord_rep_ring.h b/absl/strings/internal/cord_rep_ring.h
index 2000e21e..79a2fdb1 100644
--- a/absl/strings/internal/cord_rep_ring.h
+++ b/absl/strings/internal/cord_rep_ring.h
@@ -430,7 +430,7 @@ class CordRepRing : public CordRep {
// capacity to satisfy `extra` extra nodes, and unref the old `rep` instance.
//
// If a new CordRepRing can not be allocated, or the new capacity would exceed
- // the maxmimum capacity, then the input is consumed only, and an exception is
+ // the maximum capacity, then the input is consumed only, and an exception is
// thrown.
static CordRepRing* Mutable(CordRepRing* rep, size_t extra);
@@ -472,7 +472,7 @@ class CordRepRing : public CordRep {
// Increases the data offset for entry `index` by `n`.
void AddDataOffset(index_type index, size_t n);
- // Descreases the length for entry `index` by `n`.
+ // Decreases the length for entry `index` by `n`.
void SubLength(index_type index, size_t n);
index_type head_;
diff --git a/absl/strings/internal/cordz_functions_test.cc b/absl/strings/internal/cordz_functions_test.cc
index 350623c1..b70a685e 100644
--- a/absl/strings/internal/cordz_functions_test.cc
+++ b/absl/strings/internal/cordz_functions_test.cc
@@ -38,7 +38,7 @@ TEST(CordzFunctionsTest, SampleRate) {
}
// Cordz is disabled when we don't have thread_local. All calls to
-// should_profile will return false when cordz is diabled, so we might want to
+// should_profile will return false when cordz is disabled, so we might want to
// avoid those tests.
#ifdef ABSL_INTERNAL_CORDZ_ENABLED
diff --git a/absl/strings/internal/cordz_handle.cc b/absl/strings/internal/cordz_handle.cc
index a73fefed..a7061dbe 100644
--- a/absl/strings/internal/cordz_handle.cc
+++ b/absl/strings/internal/cordz_handle.cc
@@ -16,34 +16,60 @@
#include <atomic>
#include "absl/base/internal/raw_logging.h" // For ABSL_RAW_CHECK
-#include "absl/base/internal/spinlock.h"
+#include "absl/synchronization/mutex.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
-using ::absl::base_internal::SpinLockHolder;
+namespace {
-ABSL_CONST_INIT CordzHandle::Queue CordzHandle::global_queue_(absl::kConstInit);
+struct Queue {
+ Queue() = default;
+
+ absl::Mutex mutex;
+ std::atomic<CordzHandle*> dq_tail ABSL_GUARDED_BY(mutex){nullptr};
+
+ // Returns true if this delete queue is empty. This method does not acquire
+ // the lock, but does a 'load acquire' observation on the delete queue tail.
+ // It is used inside Delete() to check for the presence of a delete queue
+ // without holding the lock. The assumption is that the caller is in the
+ // state of 'being deleted', and can not be newly discovered by a concurrent
+ // 'being constructed' snapshot instance. Practically, this means that any
+ // such discovery (`find`, 'first' or 'next', etc) must have proper 'happens
+ // before / after' semantics and atomic fences.
+ bool IsEmpty() const ABSL_NO_THREAD_SAFETY_ANALYSIS {
+ return dq_tail.load(std::memory_order_acquire) == nullptr;
+ }
+};
+
+static Queue* GlobalQueue() {
+ static Queue* global_queue = new Queue;
+ return global_queue;
+}
+
+} // namespace
CordzHandle::CordzHandle(bool is_snapshot) : is_snapshot_(is_snapshot) {
+ Queue* global_queue = GlobalQueue();
if (is_snapshot) {
- SpinLockHolder lock(&queue_->mutex);
- CordzHandle* dq_tail = queue_->dq_tail.load(std::memory_order_acquire);
+ MutexLock lock(&global_queue->mutex);
+ CordzHandle* dq_tail =
+ global_queue->dq_tail.load(std::memory_order_acquire);
if (dq_tail != nullptr) {
dq_prev_ = dq_tail;
dq_tail->dq_next_ = this;
}
- queue_->dq_tail.store(this, std::memory_order_release);
+ global_queue->dq_tail.store(this, std::memory_order_release);
}
}
CordzHandle::~CordzHandle() {
- ODRCheck();
+ Queue* global_queue = GlobalQueue();
if (is_snapshot_) {
std::vector<CordzHandle*> to_delete;
{
- SpinLockHolder lock(&queue_->mutex);
+ MutexLock lock(&global_queue->mutex);
CordzHandle* next = dq_next_;
if (dq_prev_ == nullptr) {
// We were head of the queue, delete every CordzHandle until we reach
@@ -59,7 +85,7 @@ CordzHandle::~CordzHandle() {
if (next) {
next->dq_prev_ = dq_prev_;
} else {
- queue_->dq_tail.store(dq_prev_, std::memory_order_release);
+ global_queue->dq_tail.store(dq_prev_, std::memory_order_release);
}
}
for (CordzHandle* handle : to_delete) {
@@ -69,16 +95,15 @@ CordzHandle::~CordzHandle() {
}
bool CordzHandle::SafeToDelete() const {
- return is_snapshot_ || queue_->IsEmpty();
+ return is_snapshot_ || GlobalQueue()->IsEmpty();
}
void CordzHandle::Delete(CordzHandle* handle) {
assert(handle);
if (handle) {
- handle->ODRCheck();
- Queue* const queue = handle->queue_;
+ Queue* const queue = GlobalQueue();
if (!handle->SafeToDelete()) {
- SpinLockHolder lock(&queue->mutex);
+ MutexLock lock(&queue->mutex);
CordzHandle* dq_tail = queue->dq_tail.load(std::memory_order_acquire);
if (dq_tail != nullptr) {
handle->dq_prev_ = dq_tail;
@@ -93,8 +118,9 @@ void CordzHandle::Delete(CordzHandle* handle) {
std::vector<const CordzHandle*> CordzHandle::DiagnosticsGetDeleteQueue() {
std::vector<const CordzHandle*> handles;
- SpinLockHolder lock(&global_queue_.mutex);
- CordzHandle* dq_tail = global_queue_.dq_tail.load(std::memory_order_acquire);
+ Queue* global_queue = GlobalQueue();
+ MutexLock lock(&global_queue->mutex);
+ CordzHandle* dq_tail = global_queue->dq_tail.load(std::memory_order_acquire);
for (const CordzHandle* p = dq_tail; p; p = p->dq_prev_) {
handles.push_back(p);
}
@@ -103,13 +129,13 @@ std::vector<const CordzHandle*> CordzHandle::DiagnosticsGetDeleteQueue() {
bool CordzHandle::DiagnosticsHandleIsSafeToInspect(
const CordzHandle* handle) const {
- ODRCheck();
if (!is_snapshot_) return false;
if (handle == nullptr) return true;
if (handle->is_snapshot_) return false;
bool snapshot_found = false;
- SpinLockHolder lock(&queue_->mutex);
- for (const CordzHandle* p = queue_->dq_tail; p; p = p->dq_prev_) {
+ Queue* global_queue = GlobalQueue();
+ MutexLock lock(&global_queue->mutex);
+ for (const CordzHandle* p = global_queue->dq_tail; p; p = p->dq_prev_) {
if (p == handle) return !snapshot_found;
if (p == this) snapshot_found = true;
}
@@ -119,13 +145,13 @@ bool CordzHandle::DiagnosticsHandleIsSafeToInspect(
std::vector<const CordzHandle*>
CordzHandle::DiagnosticsGetSafeToInspectDeletedHandles() {
- ODRCheck();
std::vector<const CordzHandle*> handles;
if (!is_snapshot()) {
return handles;
}
- SpinLockHolder lock(&queue_->mutex);
+ Queue* global_queue = GlobalQueue();
+ MutexLock lock(&global_queue->mutex);
for (const CordzHandle* p = dq_next_; p != nullptr; p = p->dq_next_) {
if (!p->is_snapshot()) {
handles.push_back(p);
diff --git a/absl/strings/internal/cordz_handle.h b/absl/strings/internal/cordz_handle.h
index 3c800b43..08e3f0d3 100644
--- a/absl/strings/internal/cordz_handle.h
+++ b/absl/strings/internal/cordz_handle.h
@@ -20,8 +20,6 @@
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
-#include "absl/base/internal/spinlock.h"
-#include "absl/synchronization/mutex.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
@@ -34,7 +32,7 @@ namespace cord_internal {
// has gained visibility into a CordzInfo object, that CordzInfo object will not
// be deleted prematurely. This allows the profiler to inspect all CordzInfo
// objects that are alive without needing to hold a global lock.
-class CordzHandle {
+class ABSL_DLL CordzHandle {
public:
CordzHandle() : CordzHandle(false) {}
@@ -79,37 +77,6 @@ class CordzHandle {
virtual ~CordzHandle();
private:
- // Global queue data. CordzHandle stores a pointer to the global queue
- // instance to harden against ODR violations.
- struct Queue {
- constexpr explicit Queue(absl::ConstInitType)
- : mutex(absl::kConstInit,
- absl::base_internal::SCHEDULE_COOPERATIVE_AND_KERNEL) {}
-
- absl::base_internal::SpinLock mutex;
- std::atomic<CordzHandle*> dq_tail ABSL_GUARDED_BY(mutex){nullptr};
-
- // Returns true if this delete queue is empty. This method does not acquire
- // the lock, but does a 'load acquire' observation on the delete queue tail.
- // It is used inside Delete() to check for the presence of a delete queue
- // without holding the lock. The assumption is that the caller is in the
- // state of 'being deleted', and can not be newly discovered by a concurrent
- // 'being constructed' snapshot instance. Practically, this means that any
- // such discovery (`find`, 'first' or 'next', etc) must have proper 'happens
- // before / after' semantics and atomic fences.
- bool IsEmpty() const ABSL_NO_THREAD_SAFETY_ANALYSIS {
- return dq_tail.load(std::memory_order_acquire) == nullptr;
- }
- };
-
- void ODRCheck() const {
-#ifndef NDEBUG
- ABSL_RAW_CHECK(queue_ == &global_queue_, "ODR violation in Cord");
-#endif
- }
-
- ABSL_CONST_INIT static Queue global_queue_;
- Queue* const queue_ = &global_queue_;
const bool is_snapshot_;
// dq_prev_ and dq_next_ require the global queue mutex to be held.
diff --git a/absl/strings/internal/cordz_info.cc b/absl/strings/internal/cordz_info.cc
index 530f33be..515dfafb 100644
--- a/absl/strings/internal/cordz_info.cc
+++ b/absl/strings/internal/cordz_info.cc
@@ -26,6 +26,7 @@
#include "absl/strings/internal/cordz_statistics.h"
#include "absl/strings/internal/cordz_update_tracker.h"
#include "absl/synchronization/mutex.h"
+#include "absl/time/clock.h"
#include "absl/types/span.h"
namespace absl {
@@ -53,7 +54,7 @@ namespace {
// The top level node is treated specially: we assume the current thread
// (typically called from the CordzHandler) to hold a reference purely to
// perform a safe analysis, and not being part of the application. So we
-// substract 1 from the reference count of the top node to compute the
+// subtract 1 from the reference count of the top node to compute the
// 'application fair share' excluding the reference of the current thread.
//
// An example of fair sharing, and why we multiply reference counts:
diff --git a/absl/strings/internal/cordz_sample_token.h b/absl/strings/internal/cordz_sample_token.h
index b58022c3..2a86bc3b 100644
--- a/absl/strings/internal/cordz_sample_token.h
+++ b/absl/strings/internal/cordz_sample_token.h
@@ -33,11 +33,11 @@ namespace cord_internal {
// ST1 <- CH1 <- CH2 <- ST2 <- CH3 <- global_delete_queue_tail
//
// This list tracks that CH1 and CH2 were created after ST1, so the thread
-// holding ST1 might have a referece to CH1, CH2, ST2, and CH3. However, ST2 was
-// created later, so the thread holding the ST2 token cannot have a reference to
-// ST1, CH1, or CH2. If ST1 is cleaned up first, that thread will delete ST1,
-// CH1, and CH2. If instead ST2 is cleaned up first, that thread will only
-// delete ST2.
+// holding ST1 might have a reference to CH1, CH2, ST2, and CH3. However, ST2
+// was created later, so the thread holding the ST2 token cannot have a
+// reference to ST1, CH1, or CH2. If ST1 is cleaned up first, that thread will
+// delete ST1, CH1, and CH2. If instead ST2 is cleaned up first, that thread
+// will only delete ST2.
//
// If ST1 is cleaned up first, the new list will be:
// ST2 <- CH3 <- global_delete_queue_tail
diff --git a/absl/strings/internal/damerau_levenshtein_distance_test.cc b/absl/strings/internal/damerau_levenshtein_distance_test.cc
index a342b7db..49dd105b 100644
--- a/absl/strings/internal/damerau_levenshtein_distance_test.cc
+++ b/absl/strings/internal/damerau_levenshtein_distance_test.cc
@@ -54,7 +54,7 @@ TEST(Distance, TestDistances) {
}
TEST(Distance, TestCutoff) {
- // Returing cutoff + 1 if the value is larger than cutoff or string longer
+ // Returning cutoff + 1 if the value is larger than cutoff or string longer
// than MAX_SIZE.
EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "a", 3), uint8_t{3});
EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "a", 2), uint8_t{3});
diff --git a/absl/strings/internal/escaping.cc b/absl/strings/internal/escaping.cc
index 8bd0890d..56a4cbed 100644
--- a/absl/strings/internal/escaping.cc
+++ b/absl/strings/internal/escaping.cc
@@ -21,9 +21,17 @@ namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
+// The two strings below provide maps from normal 6-bit characters to their
+// base64-escaped equivalent.
+// For the inverse case, see kUn(WebSafe)Base64 in the external
+// escaping.cc.
ABSL_CONST_INIT const char kBase64Chars[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+ABSL_CONST_INIT const char kWebSafeBase64Chars[] =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
+
+
size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) {
// Base64 encodes three bytes of input at a time. If the input is not
// divisible by three, we pad as appropriate.
@@ -62,6 +70,21 @@ size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) {
return len;
}
+// ----------------------------------------------------------------------
+// Take the input in groups of 4 characters and turn each
+// character into a code 0 to 63 thus:
+// A-Z map to 0 to 25
+// a-z map to 26 to 51
+// 0-9 map to 52 to 61
+// +(- for WebSafe) maps to 62
+// /(_ for WebSafe) maps to 63
+// There will be four numbers, all less than 64 which can be represented
+// by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively).
+// Arrange the 6 digit binary numbers into three bytes as such:
+// aaaaaabb bbbbcccc ccdddddd
+// Equals signs (one or two) are used at the end of the encoded block to
+// indicate that the text was not an integer multiple of three bytes long.
+// ----------------------------------------------------------------------
size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest,
size_t szdest, const char* base64,
bool do_padding) {
diff --git a/absl/strings/internal/escaping.h b/absl/strings/internal/escaping.h
index b04033ff..2186f778 100644
--- a/absl/strings/internal/escaping.h
+++ b/absl/strings/internal/escaping.h
@@ -24,6 +24,7 @@ ABSL_NAMESPACE_BEGIN
namespace strings_internal {
ABSL_CONST_INIT extern const char kBase64Chars[];
+ABSL_CONST_INIT extern const char kWebSafeBase64Chars[];
// Calculates the length of a Base64 encoding (RFC 4648) of a string of length
// `input_len`, with or without padding per `do_padding`. Note that 'web-safe'
diff --git a/absl/strings/internal/memutil.cc b/absl/strings/internal/memutil.cc
index 44996a75..e2e7347c 100644
--- a/absl/strings/internal/memutil.cc
+++ b/absl/strings/internal/memutil.cc
@@ -16,6 +16,8 @@
#include <cstdlib>
+#include "absl/strings/ascii.h"
+
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
@@ -33,83 +35,6 @@ int memcasecmp(const char* s1, const char* s2, size_t len) {
return 0;
}
-char* memdup(const char* s, size_t slen) {
- void* copy;
- if ((copy = malloc(slen)) == nullptr) return nullptr;
- memcpy(copy, s, slen);
- return reinterpret_cast<char*>(copy);
-}
-
-char* memrchr(const char* s, int c, size_t slen) {
- for (const char* e = s + slen - 1; e >= s; e--) {
- if (*e == c) return const_cast<char*>(e);
- }
- return nullptr;
-}
-
-size_t memspn(const char* s, size_t slen, const char* accept) {
- const char* p = s;
- const char* spanp;
- char c, sc;
-
-cont:
- c = *p++;
- if (slen-- == 0)
- return static_cast<size_t>(p - 1 - s);
- for (spanp = accept; (sc = *spanp++) != '\0';)
- if (sc == c) goto cont;
- return static_cast<size_t>(p - 1 - s);
-}
-
-size_t memcspn(const char* s, size_t slen, const char* reject) {
- const char* p = s;
- const char* spanp;
- char c, sc;
-
- while (slen-- != 0) {
- c = *p++;
- for (spanp = reject; (sc = *spanp++) != '\0';)
- if (sc == c)
- return static_cast<size_t>(p - 1 - s);
- }
- return static_cast<size_t>(p - s);
-}
-
-char* mempbrk(const char* s, size_t slen, const char* accept) {
- const char* scanp;
- int sc;
-
- for (; slen; ++s, --slen) {
- for (scanp = accept; (sc = *scanp++) != '\0';)
- if (sc == *s) return const_cast<char*>(s);
- }
- return nullptr;
-}
-
-// This is significantly faster for case-sensitive matches with very
-// few possible matches. See unit test for benchmarks.
-const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle,
- size_t neelen) {
- if (0 == neelen) {
- return phaystack; // even if haylen is 0
- }
- if (haylen < neelen) return nullptr;
-
- const char* match;
- const char* hayend = phaystack + haylen - neelen + 1;
- // A static cast is used here to work around the fact that memchr returns
- // a void* on Posix-compliant systems and const void* on Windows.
- while (
- (match = static_cast<const char*>(memchr(
- phaystack, pneedle[0], static_cast<size_t>(hayend - phaystack))))) {
- if (memcmp(match, pneedle, neelen) == 0)
- return match;
- else
- phaystack = match + 1;
- }
- return nullptr;
-}
-
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
diff --git a/absl/strings/internal/memutil.h b/absl/strings/internal/memutil.h
index 9ad05358..b5911a01 100644
--- a/absl/strings/internal/memutil.h
+++ b/absl/strings/internal/memutil.h
@@ -14,51 +14,6 @@
// limitations under the License.
//
-// These routines provide mem versions of standard C string routines,
-// such as strpbrk. They function exactly the same as the str versions,
-// so if you wonder what they are, replace the word "mem" by
-// "str" and check out the man page. I could return void*, as the
-// strutil.h mem*() routines tend to do, but I return char* instead
-// since this is by far the most common way these functions are called.
-//
-// The difference between the mem and str versions is the mem version
-// takes a pointer and a length, rather than a '\0'-terminated string.
-// The memcase* routines defined here assume the locale is "C"
-// (they use absl::ascii_tolower instead of tolower).
-//
-// These routines are based on the BSD library.
-//
-// Here's a list of routines from string.h, and their mem analogues.
-// Functions in lowercase are defined in string.h; those in UPPERCASE
-// are defined here:
-//
-// strlen --
-// strcat strncat MEMCAT
-// strcpy strncpy memcpy
-// -- memccpy (very cool function, btw)
-// -- memmove
-// -- memset
-// strcmp strncmp memcmp
-// strcasecmp strncasecmp MEMCASECMP
-// strchr memchr
-// strcoll --
-// strxfrm --
-// strdup strndup MEMDUP
-// strrchr MEMRCHR
-// strspn MEMSPN
-// strcspn MEMCSPN
-// strpbrk MEMPBRK
-// strstr MEMSTR MEMMEM
-// (g)strcasestr MEMCASESTR MEMCASEMEM
-// strtok --
-// strprefix MEMPREFIX (strprefix is from strutil.h)
-// strcaseprefix MEMCASEPREFIX (strcaseprefix is from strutil.h)
-// strsuffix MEMSUFFIX (strsuffix is from strutil.h)
-// strcasesuffix MEMCASESUFFIX (strcasesuffix is from strutil.h)
-// -- MEMIS
-// -- MEMCASEIS
-// strcount MEMCOUNT (strcount is from strutil.h)
-
#ifndef ABSL_STRINGS_INTERNAL_MEMUTIL_H_
#define ABSL_STRINGS_INTERNAL_MEMUTIL_H_
@@ -72,74 +27,11 @@ namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
-inline char* memcat(char* dest, size_t destlen, const char* src,
- size_t srclen) {
- return reinterpret_cast<char*>(memcpy(dest + destlen, src, srclen));
-}
-
+// Performs a byte-by-byte comparison of `len` bytes of the strings `s1` and
+// `s2`, ignoring the case of the characters. It returns an integer less than,
+// equal to, or greater than zero if `s1` is found, respectively, to be less
+// than, to match, or be greater than `s2`.
int memcasecmp(const char* s1, const char* s2, size_t len);
-char* memdup(const char* s, size_t slen);
-char* memrchr(const char* s, int c, size_t slen);
-size_t memspn(const char* s, size_t slen, const char* accept);
-size_t memcspn(const char* s, size_t slen, const char* reject);
-char* mempbrk(const char* s, size_t slen, const char* accept);
-
-// This is for internal use only. Don't call this directly
-template <bool case_sensitive>
-const char* int_memmatch(const char* haystack, size_t haylen,
- const char* needle, size_t neelen) {
- if (0 == neelen) {
- return haystack; // even if haylen is 0
- }
- const char* hayend = haystack + haylen;
- const char* needlestart = needle;
- const char* needleend = needlestart + neelen;
-
- for (; haystack < hayend; ++haystack) {
- char hay = case_sensitive
- ? *haystack
- : absl::ascii_tolower(static_cast<unsigned char>(*haystack));
- char nee = case_sensitive
- ? *needle
- : absl::ascii_tolower(static_cast<unsigned char>(*needle));
- if (hay == nee) {
- if (++needle == needleend) {
- return haystack + 1 - neelen;
- }
- } else if (needle != needlestart) {
- // must back up haystack in case a prefix matched (find "aab" in "aaab")
- haystack -= needle - needlestart; // for loop will advance one more
- needle = needlestart;
- }
- }
- return nullptr;
-}
-
-// These are the guys you can call directly
-inline const char* memstr(const char* phaystack, size_t haylen,
- const char* pneedle) {
- return int_memmatch<true>(phaystack, haylen, pneedle, strlen(pneedle));
-}
-
-inline const char* memcasestr(const char* phaystack, size_t haylen,
- const char* pneedle) {
- return int_memmatch<false>(phaystack, haylen, pneedle, strlen(pneedle));
-}
-
-inline const char* memmem(const char* phaystack, size_t haylen,
- const char* pneedle, size_t needlelen) {
- return int_memmatch<true>(phaystack, haylen, pneedle, needlelen);
-}
-
-inline const char* memcasemem(const char* phaystack, size_t haylen,
- const char* pneedle, size_t needlelen) {
- return int_memmatch<false>(phaystack, haylen, pneedle, needlelen);
-}
-
-// This is significantly faster for case-sensitive matches with very
-// few possible matches. See unit test for benchmarks.
-const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle,
- size_t neelen);
} // namespace strings_internal
ABSL_NAMESPACE_END
diff --git a/absl/strings/internal/memutil_benchmark.cc b/absl/strings/internal/memutil_benchmark.cc
index dc95c3e5..61e323a4 100644
--- a/absl/strings/internal/memutil_benchmark.cc
+++ b/absl/strings/internal/memutil_benchmark.cc
@@ -25,62 +25,6 @@
// - an easy search: 'b'
// - a medium search: 'ab'. That means every letter is a possible match.
// - a pathological search: 'aaaaaa.......aaaaab' (half as many a's as haytack)
-// We benchmark case-sensitive and case-insensitive versions of
-// three memmem implementations:
-// - memmem() from memutil.h
-// - search() from STL
-// - memmatch(), a custom implementation using memchr and memcmp.
-// Here are sample results:
-//
-// Run on (12 X 3800 MHz CPU s)
-// CPU Caches:
-// L1 Data 32K (x6)
-// L1 Instruction 32K (x6)
-// L2 Unified 256K (x6)
-// L3 Unified 15360K (x1)
-// ----------------------------------------------------------------
-// Benchmark Time CPU Iterations
-// ----------------------------------------------------------------
-// BM_Memmem 3583 ns 3582 ns 196469 2.59966GB/s
-// BM_MemmemMedium 13743 ns 13742 ns 50901 693.986MB/s
-// BM_MemmemPathological 13695030 ns 13693977 ns 51 713.133kB/s
-// BM_Memcasemem 3299 ns 3299 ns 212942 2.82309GB/s
-// BM_MemcasememMedium 16407 ns 16406 ns 42170 581.309MB/s
-// BM_MemcasememPathological 17267745 ns 17266030 ns 41 565.598kB/s
-// BM_Search 1610 ns 1609 ns 431321 5.78672GB/s
-// BM_SearchMedium 11111 ns 11110 ns 63001 858.414MB/s
-// BM_SearchPathological 12117390 ns 12116397 ns 58 805.984kB/s
-// BM_Searchcase 3081 ns 3081 ns 229949 3.02313GB/s
-// BM_SearchcaseMedium 16003 ns 16001 ns 44170 595.998MB/s
-// BM_SearchcasePathological 15823413 ns 15821909 ns 44 617.222kB/s
-// BM_Memmatch 197 ns 197 ns 3584225 47.2951GB/s
-// BM_MemmatchMedium 52333 ns 52329 ns 13280 182.244MB/s
-// BM_MemmatchPathological 659799 ns 659727 ns 1058 14.4556MB/s
-// BM_Memcasematch 5460 ns 5460 ns 127606 1.70586GB/s
-// BM_MemcasematchMedium 32861 ns 32857 ns 21258 290.248MB/s
-// BM_MemcasematchPathological 15154243 ns 15153089 ns 46 644.464kB/s
-// BM_MemmemStartup 5 ns 5 ns 150821500
-// BM_SearchStartup 5 ns 5 ns 150644203
-// BM_MemmatchStartup 7 ns 7 ns 97068802
-//
-// Conclusions:
-//
-// The following recommendations are based on the sample results above. However,
-// we have found that the performance of STL search can vary significantly
-// depending on compiler and standard library implementation. We recommend you
-// run the benchmarks for yourself on relevant platforms.
-//
-// If you need case-insensitive, STL search is slightly better than memmem for
-// all cases.
-//
-// Case-sensitive is more subtle:
-// Custom memmatch is _very_ fast at scanning, so if you have very few possible
-// matches in your haystack, that's the way to go. Performance drops
-// significantly with more matches.
-//
-// STL search is slightly faster than memmem in the medium and pathological
-// benchmarks. However, the performance of memmem is currently more dependable
-// across platforms and build configurations.
namespace {
@@ -94,96 +38,10 @@ const char* MakeHaystack() {
}
const char* const kHaystack = MakeHaystack();
-void BM_Memmem(benchmark::State& state) {
- for (auto _ : state) {
- benchmark::DoNotOptimize(
- absl::strings_internal::memmem(kHaystack, kHaystackSize, "b", 1));
- }
- state.SetBytesProcessed(kHaystackSize64 * state.iterations());
-}
-BENCHMARK(BM_Memmem);
-
-void BM_MemmemMedium(benchmark::State& state) {
- for (auto _ : state) {
- benchmark::DoNotOptimize(
- absl::strings_internal::memmem(kHaystack, kHaystackSize, "ab", 2));
- }
- state.SetBytesProcessed(kHaystackSize64 * state.iterations());
-}
-BENCHMARK(BM_MemmemMedium);
-
-void BM_MemmemPathological(benchmark::State& state) {
- for (auto _ : state) {
- benchmark::DoNotOptimize(absl::strings_internal::memmem(
- kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2,
- kHaystackSize - kHaystackSize / 2));
- }
- state.SetBytesProcessed(kHaystackSize64 * state.iterations());
-}
-BENCHMARK(BM_MemmemPathological);
-
-void BM_Memcasemem(benchmark::State& state) {
- for (auto _ : state) {
- benchmark::DoNotOptimize(
- absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "b", 1));
- }
- state.SetBytesProcessed(kHaystackSize64 * state.iterations());
-}
-BENCHMARK(BM_Memcasemem);
-
-void BM_MemcasememMedium(benchmark::State& state) {
- for (auto _ : state) {
- benchmark::DoNotOptimize(
- absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "ab", 2));
- }
- state.SetBytesProcessed(kHaystackSize64 * state.iterations());
-}
-BENCHMARK(BM_MemcasememMedium);
-
-void BM_MemcasememPathological(benchmark::State& state) {
- for (auto _ : state) {
- benchmark::DoNotOptimize(absl::strings_internal::memcasemem(
- kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2,
- kHaystackSize - kHaystackSize / 2));
- }
- state.SetBytesProcessed(kHaystackSize64 * state.iterations());
-}
-BENCHMARK(BM_MemcasememPathological);
-
bool case_eq(const char a, const char b) {
return absl::ascii_tolower(a) == absl::ascii_tolower(b);
}
-void BM_Search(benchmark::State& state) {
- for (auto _ : state) {
- benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
- kHaystack + kHaystackSize - 1,
- kHaystack + kHaystackSize));
- }
- state.SetBytesProcessed(kHaystackSize64 * state.iterations());
-}
-BENCHMARK(BM_Search);
-
-void BM_SearchMedium(benchmark::State& state) {
- for (auto _ : state) {
- benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
- kHaystack + kHaystackSize - 2,
- kHaystack + kHaystackSize));
- }
- state.SetBytesProcessed(kHaystackSize64 * state.iterations());
-}
-BENCHMARK(BM_SearchMedium);
-
-void BM_SearchPathological(benchmark::State& state) {
- for (auto _ : state) {
- benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
- kHaystack + kHaystackSize / 2,
- kHaystack + kHaystackSize));
- }
- state.SetBytesProcessed(kHaystackSize64 * state.iterations());
-}
-BENCHMARK(BM_SearchPathological);
-
void BM_Searchcase(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
@@ -241,34 +99,6 @@ const char* memcasematch(const char* phaystack, size_t haylen,
return nullptr;
}
-void BM_Memmatch(benchmark::State& state) {
- for (auto _ : state) {
- benchmark::DoNotOptimize(
- absl::strings_internal::memmatch(kHaystack, kHaystackSize, "b", 1));
- }
- state.SetBytesProcessed(kHaystackSize64 * state.iterations());
-}
-BENCHMARK(BM_Memmatch);
-
-void BM_MemmatchMedium(benchmark::State& state) {
- for (auto _ : state) {
- benchmark::DoNotOptimize(
- absl::strings_internal::memmatch(kHaystack, kHaystackSize, "ab", 2));
- }
- state.SetBytesProcessed(kHaystackSize64 * state.iterations());
-}
-BENCHMARK(BM_MemmatchMedium);
-
-void BM_MemmatchPathological(benchmark::State& state) {
- for (auto _ : state) {
- benchmark::DoNotOptimize(absl::strings_internal::memmatch(
- kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2,
- kHaystackSize - kHaystackSize / 2));
- }
- state.SetBytesProcessed(kHaystackSize64 * state.iterations());
-}
-BENCHMARK(BM_MemmatchPathological);
-
void BM_Memcasematch(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, "b", 1));
@@ -295,29 +125,4 @@ void BM_MemcasematchPathological(benchmark::State& state) {
}
BENCHMARK(BM_MemcasematchPathological);
-void BM_MemmemStartup(benchmark::State& state) {
- for (auto _ : state) {
- benchmark::DoNotOptimize(absl::strings_internal::memmem(
- kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1));
- }
-}
-BENCHMARK(BM_MemmemStartup);
-
-void BM_SearchStartup(benchmark::State& state) {
- for (auto _ : state) {
- benchmark::DoNotOptimize(
- std::search(kHaystack + kHaystackSize - 10, kHaystack + kHaystackSize,
- kHaystack + kHaystackSize - 1, kHaystack + kHaystackSize));
- }
-}
-BENCHMARK(BM_SearchStartup);
-
-void BM_MemmatchStartup(benchmark::State& state) {
- for (auto _ : state) {
- benchmark::DoNotOptimize(absl::strings_internal::memmatch(
- kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1));
- }
-}
-BENCHMARK(BM_MemmatchStartup);
-
} // namespace
diff --git a/absl/strings/internal/memutil_test.cc b/absl/strings/internal/memutil_test.cc
index d8681ddf..277be2c4 100644
--- a/absl/strings/internal/memutil_test.cc
+++ b/absl/strings/internal/memutil_test.cc
@@ -19,42 +19,12 @@
#include <cstdlib>
#include "gtest/gtest.h"
-#include "absl/strings/ascii.h"
namespace {
-static char* memcasechr(const char* s, int c, size_t slen) {
- c = absl::ascii_tolower(c);
- for (; slen; ++s, --slen) {
- if (absl::ascii_tolower(*s) == c) return const_cast<char*>(s);
- }
- return nullptr;
-}
-
-static const char* memcasematch(const char* phaystack, size_t haylen,
- const char* pneedle, size_t neelen) {
- if (0 == neelen) {
- return phaystack; // even if haylen is 0
- }
- if (haylen < neelen) return nullptr;
-
- const char* match;
- const char* hayend = phaystack + haylen - neelen + 1;
- while ((match = static_cast<char*>(
- memcasechr(phaystack, pneedle[0], hayend - phaystack)))) {
- if (absl::strings_internal::memcasecmp(match, pneedle, neelen) == 0)
- return match;
- else
- phaystack = match + 1;
- }
- return nullptr;
-}
-
-TEST(MemUtilTest, AllTests) {
+TEST(MemUtil, memcasecmp) {
// check memutil functions
- char a[1000];
- absl::strings_internal::memcat(a, 0, "hello", sizeof("hello") - 1);
- absl::strings_internal::memcat(a, 5, " there", sizeof(" there") - 1);
+ const char a[] = "hello there";
EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO there",
sizeof("hello there") - 1),
@@ -66,114 +36,6 @@ TEST(MemUtilTest, AllTests) {
sizeof("hello there") - 2),
0);
EXPECT_EQ(absl::strings_internal::memcasecmp(a, "whatever", 0), 0);
-
- char* p = absl::strings_internal::memdup("hello", 5);
- free(p);
-
- p = absl::strings_internal::memrchr("hello there", 'e',
- sizeof("hello there") - 1);
- EXPECT_TRUE(p && p[-1] == 'r');
- p = absl::strings_internal::memrchr("hello there", 'e',
- sizeof("hello there") - 2);
- EXPECT_TRUE(p && p[-1] == 'h');
- p = absl::strings_internal::memrchr("hello there", 'u',
- sizeof("hello there") - 1);
- EXPECT_TRUE(p == nullptr);
-
- int len = absl::strings_internal::memspn("hello there",
- sizeof("hello there") - 1, "hole");
- EXPECT_EQ(len, sizeof("hello") - 1);
- len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1,
- "u");
- EXPECT_EQ(len, 0);
- len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1,
- "");
- EXPECT_EQ(len, 0);
- len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1,
- "trole h");
- EXPECT_EQ(len, sizeof("hello there") - 1);
- len = absl::strings_internal::memspn("hello there!",
- sizeof("hello there!") - 1, "trole h");
- EXPECT_EQ(len, sizeof("hello there") - 1);
- len = absl::strings_internal::memspn("hello there!",
- sizeof("hello there!") - 2, "trole h!");
- EXPECT_EQ(len, sizeof("hello there!") - 2);
-
- len = absl::strings_internal::memcspn("hello there",
- sizeof("hello there") - 1, "leho");
- EXPECT_EQ(len, 0);
- len = absl::strings_internal::memcspn("hello there",
- sizeof("hello there") - 1, "u");
- EXPECT_EQ(len, sizeof("hello there") - 1);
- len = absl::strings_internal::memcspn("hello there",
- sizeof("hello there") - 1, "");
- EXPECT_EQ(len, sizeof("hello there") - 1);
- len = absl::strings_internal::memcspn("hello there",
- sizeof("hello there") - 1, " ");
- EXPECT_EQ(len, 5);
-
- p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1,
- "leho");
- EXPECT_TRUE(p && p[1] == 'e' && p[2] == 'l');
- p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1,
- "nu");
- EXPECT_TRUE(p == nullptr);
- p = absl::strings_internal::mempbrk("hello there!",
- sizeof("hello there!") - 2, "!");
- EXPECT_TRUE(p == nullptr);
- p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1,
- " t ");
- EXPECT_TRUE(p && p[-1] == 'o' && p[1] == 't');
-
- {
- const char kHaystack[] = "0123456789";
- EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 0, "", 0), kHaystack);
- EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "012", 3),
- kHaystack);
- EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "0xx", 1),
- kHaystack);
- EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "789", 3),
- kHaystack + 7);
- EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "9xx", 1),
- kHaystack + 9);
- EXPECT_TRUE(absl::strings_internal::memmem(kHaystack, 10, "9xx", 3) ==
- nullptr);
- EXPECT_TRUE(absl::strings_internal::memmem(kHaystack, 10, "xxx", 1) ==
- nullptr);
- }
- {
- const char kHaystack[] = "aBcDeFgHiJ";
- EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 0, "", 0),
- kHaystack);
- EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "Abc", 3),
- kHaystack);
- EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "Axx", 1),
- kHaystack);
- EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "hIj", 3),
- kHaystack + 7);
- EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "jxx", 1),
- kHaystack + 9);
- EXPECT_TRUE(absl::strings_internal::memcasemem(kHaystack, 10, "jxx", 3) ==
- nullptr);
- EXPECT_TRUE(absl::strings_internal::memcasemem(kHaystack, 10, "xxx", 1) ==
- nullptr);
- }
- {
- const char kHaystack[] = "0123456789";
- EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 0, "", 0), kHaystack);
- EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "012", 3),
- kHaystack);
- EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "0xx", 1),
- kHaystack);
- EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "789", 3),
- kHaystack + 7);
- EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "9xx", 1),
- kHaystack + 9);
- EXPECT_TRUE(absl::strings_internal::memmatch(kHaystack, 10, "9xx", 3) ==
- nullptr);
- EXPECT_TRUE(absl::strings_internal::memmatch(kHaystack, 10, "xxx", 1) ==
- nullptr);
- }
}
} // namespace
diff --git a/absl/strings/internal/stl_type_traits.h b/absl/strings/internal/stl_type_traits.h
index 6035ca45..e50468b0 100644
--- a/absl/strings/internal/stl_type_traits.h
+++ b/absl/strings/internal/stl_type_traits.h
@@ -13,7 +13,7 @@
// limitations under the License.
//
-// Thie file provides the IsStrictlyBaseOfAndConvertibleToSTLContainer type
+// The file provides the IsStrictlyBaseOfAndConvertibleToSTLContainer type
// trait metafunction to assist in working with the _GLIBCXX_DEBUG debug
// wrappers of STL containers.
//
diff --git a/absl/strings/internal/str_format/arg.cc b/absl/strings/internal/str_format/arg.cc
index 018dd052..c0a9a28e 100644
--- a/absl/strings/internal/str_format/arg.cc
+++ b/absl/strings/internal/str_format/arg.cc
@@ -106,7 +106,7 @@ class IntDigits {
char *p = storage_ + sizeof(storage_);
do {
p -= 2;
- numbers_internal::PutTwoDigits(static_cast<size_t>(v % 100), p);
+ numbers_internal::PutTwoDigits(static_cast<uint32_t>(v % 100), p);
v /= 100;
} while (v);
if (p[0] == '0') {
@@ -278,24 +278,6 @@ bool ConvertIntImplInnerSlow(const IntDigits &as_digits,
return true;
}
-template <typename T,
- typename std::enable_if<(std::is_integral<T>::value &&
- std::is_signed<T>::value) ||
- std::is_same<T, int128>::value,
- int>::type = 0>
-constexpr auto ConvertV(T) {
- return FormatConversionCharInternal::d;
-}
-
-template <typename T,
- typename std::enable_if<(std::is_integral<T>::value &&
- std::is_unsigned<T>::value) ||
- std::is_same<T, uint128>::value,
- int>::type = 0>
-constexpr auto ConvertV(T) {
- return FormatConversionCharInternal::u;
-}
-
template <typename T>
bool ConvertFloatArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl *sink) {
if (conv.conversion_char() == FormatConversionCharInternal::v) {
@@ -332,10 +314,6 @@ bool ConvertIntArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl *sink) {
using U = typename MakeUnsigned<T>::type;
IntDigits as_digits;
- if (conv.conversion_char() == FormatConversionCharInternal::v) {
- conv.set_conversion_char(ConvertV(T{}));
- }
-
// This odd casting is due to a bug in -Wswitch behavior in gcc49 which causes
// it to complain about a switch/case type mismatch, even though both are
// FormatConverionChar. Likely this is because at this point
@@ -361,6 +339,7 @@ bool ConvertIntArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl *sink) {
case static_cast<uint8_t>(FormatConversionCharInternal::d):
case static_cast<uint8_t>(FormatConversionCharInternal::i):
+ case static_cast<uint8_t>(FormatConversionCharInternal::v):
as_digits.PrintAsDec(v);
break;
@@ -482,18 +461,18 @@ CharConvertResult FormatConvertImpl(char v, const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
-CharConvertResult FormatConvertImpl(signed char v,
- const FormatConversionSpecImpl conv,
- FormatSinkImpl *sink) {
+
+// ==================== Ints ====================
+IntegralConvertResult FormatConvertImpl(signed char v,
+ const FormatConversionSpecImpl conv,
+ FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
-CharConvertResult FormatConvertImpl(unsigned char v,
- const FormatConversionSpecImpl conv,
- FormatSinkImpl *sink) {
+IntegralConvertResult FormatConvertImpl(unsigned char v,
+ const FormatConversionSpecImpl conv,
+ FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
-
-// ==================== Ints ====================
IntegralConvertResult FormatConvertImpl(short v, // NOLINT
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
diff --git a/absl/strings/internal/str_format/arg.h b/absl/strings/internal/str_format/arg.h
index e4b16628..3ce30feb 100644
--- a/absl/strings/internal/str_format/arg.h
+++ b/absl/strings/internal/str_format/arg.h
@@ -279,14 +279,14 @@ FloatingConvertResult FormatConvertImpl(long double v,
// Chars.
CharConvertResult FormatConvertImpl(char v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
-CharConvertResult FormatConvertImpl(signed char v,
- FormatConversionSpecImpl conv,
- FormatSinkImpl* sink);
-CharConvertResult FormatConvertImpl(unsigned char v,
- FormatConversionSpecImpl conv,
- FormatSinkImpl* sink);
// Ints.
+IntegralConvertResult FormatConvertImpl(signed char v,
+ FormatConversionSpecImpl conv,
+ FormatSinkImpl* sink);
+IntegralConvertResult FormatConvertImpl(unsigned char v,
+ FormatConversionSpecImpl conv,
+ FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(short v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
@@ -441,7 +441,7 @@ class FormatArgImpl {
// For everything else:
// - Decay char* and char arrays into `const char*`
// - Decay any other pointer to `const void*`
- // - Decay all enums to their underlying type.
+ // - Decay all enums to the integral promotion of their underlying type.
// - Decay function pointers to void*.
template <typename T, typename = void>
struct DecayType {
@@ -461,7 +461,7 @@ class FormatArgImpl {
!str_format_internal::HasUserDefinedConvert<T>::value &&
!strings_internal::HasAbslStringify<T>::value &&
std::is_enum<T>::value>::type> {
- using type = typename std::underlying_type<T>::type;
+ using type = decltype(+typename std::underlying_type<T>::type());
};
public:
diff --git a/absl/strings/internal/str_format/bind.h b/absl/strings/internal/str_format/bind.h
index b73c5028..5e2a43d5 100644
--- a/absl/strings/internal/str_format/bind.h
+++ b/absl/strings/internal/str_format/bind.h
@@ -21,6 +21,7 @@
#include <string>
#include "absl/base/port.h"
+#include "absl/container/inlined_vector.h"
#include "absl/strings/internal/str_format/arg.h"
#include "absl/strings/internal/str_format/checker.h"
#include "absl/strings/internal/str_format/parser.h"
@@ -177,17 +178,7 @@ class Streamable {
public:
Streamable(const UntypedFormatSpecImpl& format,
absl::Span<const FormatArgImpl> args)
- : format_(format) {
- if (args.size() <= ABSL_ARRAYSIZE(few_args_)) {
- for (size_t i = 0; i < args.size(); ++i) {
- few_args_[i] = args[i];
- }
- args_ = absl::MakeSpan(few_args_, args.size());
- } else {
- many_args_.assign(args.begin(), args.end());
- args_ = many_args_;
- }
- }
+ : format_(format), args_(args.begin(), args.end()) {}
std::ostream& Print(std::ostream& os) const;
@@ -197,12 +188,7 @@ class Streamable {
private:
const UntypedFormatSpecImpl& format_;
- absl::Span<const FormatArgImpl> args_;
- // if args_.size() is 4 or less:
- FormatArgImpl few_args_[4] = {FormatArgImpl(0), FormatArgImpl(0),
- FormatArgImpl(0), FormatArgImpl(0)};
- // if args_.size() is more than 4:
- std::vector<FormatArgImpl> many_args_;
+ absl::InlinedVector<FormatArgImpl, 4> args_;
};
// for testing
@@ -211,8 +197,7 @@ std::string Summarize(UntypedFormatSpecImpl format,
bool BindWithPack(const UnboundConversion* props,
absl::Span<const FormatArgImpl> pack, BoundConversion* bound);
-bool FormatUntyped(FormatRawSinkImpl raw_sink,
- UntypedFormatSpecImpl format,
+bool FormatUntyped(FormatRawSinkImpl raw_sink, UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args);
std::string& AppendPack(std::string* out, UntypedFormatSpecImpl format,
@@ -231,7 +216,7 @@ int SnprintF(char* output, size_t size, UntypedFormatSpecImpl format,
template <typename T>
class StreamedWrapper {
public:
- explicit StreamedWrapper(const T& v) : v_(v) { }
+ explicit StreamedWrapper(const T& v) : v_(v) {}
private:
template <typename S>
diff --git a/absl/strings/internal/str_format/constexpr_parser.h b/absl/strings/internal/str_format/constexpr_parser.h
index 3dc1776b..b70a16e4 100644
--- a/absl/strings/internal/str_format/constexpr_parser.h
+++ b/absl/strings/internal/str_format/constexpr_parser.h
@@ -323,6 +323,7 @@ constexpr const char* ConsumeConversion(const char* pos, const char* const end,
if (ABSL_PREDICT_FALSE(c == 'v')) return nullptr;
if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr;
}
+#undef ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR
assert(CheckFastPathSetting(*conv));
(void)(&CheckFastPathSetting);
diff --git a/absl/strings/internal/str_format/convert_test.cc b/absl/strings/internal/str_format/convert_test.cc
index 300612b7..16ff9879 100644
--- a/absl/strings/internal/str_format/convert_test.cc
+++ b/absl/strings/internal/str_format/convert_test.cc
@@ -26,6 +26,7 @@
#include "gtest/gtest.h"
#include "absl/base/attributes.h"
#include "absl/base/internal/raw_logging.h"
+#include "absl/log/log.h"
#include "absl/strings/internal/str_format/bind.h"
#include "absl/strings/match.h"
#include "absl/types/optional.h"
@@ -264,7 +265,7 @@ MATCHER_P(MatchesPointerString, ptr, "") {
}
void* parsed = nullptr;
if (sscanf(arg.c_str(), "%p", &parsed) != 1) {
- ABSL_RAW_LOG(FATAL, "Could not parse %s", arg.c_str());
+ LOG(FATAL) << "Could not parse " << arg;
}
return ptr == parsed;
}
@@ -1241,9 +1242,9 @@ TEST_F(FormatConvertTest, GlibcHasCorrectTraits) {
const NativePrintfTraits &native_traits = VerifyNativeImplementation();
// If one of the following tests break then it is either because the above PP
// macro guards failed to exclude a new platform (likely) or because something
- // has changed in the implemention of glibc sprintf float formatting behavior.
- // If the latter, then the code that computes these flags needs to be
- // revisited and/or possibly the StrFormat implementation.
+ // has changed in the implementation of glibc sprintf float formatting
+ // behavior. If the latter, then the code that computes these flags needs to
+ // be revisited and/or possibly the StrFormat implementation.
EXPECT_TRUE(native_traits.hex_float_has_glibc_rounding);
EXPECT_TRUE(native_traits.hex_float_prefers_denormal_repr);
EXPECT_TRUE(
diff --git a/absl/strings/internal/str_format/extension.h b/absl/strings/internal/str_format/extension.h
index 603bd49d..8de42d2c 100644
--- a/absl/strings/internal/str_format/extension.h
+++ b/absl/strings/internal/str_format/extension.h
@@ -273,7 +273,7 @@ struct FormatConversionSpecImplFriend;
class FormatConversionSpecImpl {
public:
- // Width and precison are not specified, no flags are set.
+ // Width and precision are not specified, no flags are set.
bool is_basic() const { return flags_ == Flags::kBasic; }
bool has_left_flag() const { return FlagsContains(flags_, Flags::kLeft); }
bool has_show_pos_flag() const {
diff --git a/absl/strings/internal/str_format/float_conversion.cc b/absl/strings/internal/str_format/float_conversion.cc
index 8e497852..8edf520d 100644
--- a/absl/strings/internal/str_format/float_conversion.cc
+++ b/absl/strings/internal/str_format/float_conversion.cc
@@ -711,12 +711,12 @@ bool IncrementNibble(size_t nibble_index, Int* n) {
constexpr size_t kShift = sizeof(Int) * 8 - 1;
constexpr size_t kNumNibbles = sizeof(Int) * 8 / 4;
Int before = *n >> kShift;
- // Here we essentially want to take the number 1 and move it into the requsted
- // nibble, then add it to *n to effectively increment the nibble. However,
- // ASan will complain if we try to shift the 1 beyond the limits of the Int,
- // i.e., if the nibble_index is out of range. So therefore we check for this
- // and if we are out of range we just add 0 which leaves *n unchanged, which
- // seems like the reasonable thing to do in that case.
+ // Here we essentially want to take the number 1 and move it into the
+ // requested nibble, then add it to *n to effectively increment the nibble.
+ // However, ASan will complain if we try to shift the 1 beyond the limits of
+ // the Int, i.e., if the nibble_index is out of range. So therefore we check
+ // for this and if we are out of range we just add 0 which leaves *n
+ // unchanged, which seems like the reasonable thing to do in that case.
*n += ((nibble_index >= kNumNibbles)
? 0
: (Int{1} << static_cast<int>(nibble_index * 4)));
@@ -937,7 +937,7 @@ void FormatA(const HexFloatTypeParams float_traits, Int mantissa, int exp,
// =============== Exponent ==================
constexpr size_t kBufSizeForExpDecRepr =
- numbers_internal::kFastToBufferSize // requred for FastIntToBuffer
+ numbers_internal::kFastToBufferSize // required for FastIntToBuffer
+ 1 // 'p' or 'P'
+ 1; // '+' or '-'
char exp_buffer[kBufSizeForExpDecRepr];
@@ -1015,7 +1015,7 @@ struct Buffer {
--end;
}
- char &back() {
+ char &back() const {
assert(begin < end);
return end[-1];
}
@@ -1102,7 +1102,7 @@ void PrintExponent(int exp, char e, Buffer *out) {
template <typename Float, typename Int>
constexpr bool CanFitMantissa() {
return
-#if defined(__clang__) && !defined(__SSE3__)
+#if defined(__clang__) && (__clang_major__ < 9) && !defined(__SSE3__)
// Workaround for clang bug: https://bugs.llvm.org/show_bug.cgi?id=38289
// Casting from long double to uint64_t is miscompiled and drops bits.
(!std::is_same<Float, long double>::value ||
diff --git a/absl/strings/internal/str_split_internal.h b/absl/strings/internal/str_split_internal.h
index 35edf3aa..081ad85a 100644
--- a/absl/strings/internal/str_split_internal.h
+++ b/absl/strings/internal/str_split_internal.h
@@ -235,6 +235,24 @@ struct SplitterIsConvertibleTo
HasMappedType<C>::value> {
};
+template <typename StringType, typename Container, typename = void>
+struct ShouldUseLifetimeBound : std::false_type {};
+
+template <typename StringType, typename Container>
+struct ShouldUseLifetimeBound<
+ StringType, Container,
+ std::enable_if_t<
+ std::is_same<StringType, std::string>::value &&
+ std::is_same<typename Container::value_type, absl::string_view>::value>>
+ : std::true_type {};
+
+template <typename StringType, typename First, typename Second>
+using ShouldUseLifetimeBoundForPair = std::integral_constant<
+ bool, std::is_same<StringType, std::string>::value &&
+ (std::is_same<First, absl::string_view>::value ||
+ std::is_same<Second, absl::string_view>::value)>;
+
+
// This class implements the range that is returned by absl::StrSplit(). This
// class has templated conversion operators that allow it to be implicitly
// converted to a variety of types that the caller may have specified on the
@@ -281,10 +299,24 @@ class Splitter {
// An implicit conversion operator that is restricted to only those containers
// that the splitter is convertible to.
- template <typename Container,
- typename = typename std::enable_if<
- SplitterIsConvertibleTo<Container>::value>::type>
- operator Container() const { // NOLINT(runtime/explicit)
+ template <
+ typename Container,
+ std::enable_if_t<ShouldUseLifetimeBound<StringType, Container>::value &&
+ SplitterIsConvertibleTo<Container>::value,
+ std::nullptr_t> = nullptr>
+ // NOLINTNEXTLINE(google-explicit-constructor)
+ operator Container() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
+ return ConvertToContainer<Container, typename Container::value_type,
+ HasMappedType<Container>::value>()(*this);
+ }
+
+ template <
+ typename Container,
+ std::enable_if_t<!ShouldUseLifetimeBound<StringType, Container>::value &&
+ SplitterIsConvertibleTo<Container>::value,
+ std::nullptr_t> = nullptr>
+ // NOLINTNEXTLINE(google-explicit-constructor)
+ operator Container() const {
return ConvertToContainer<Container, typename Container::value_type,
HasMappedType<Container>::value>()(*this);
}
@@ -293,8 +325,27 @@ class Splitter {
// strings returned by the begin() iterator. Either/both of .first and .second
// will be constructed with empty strings if the iterator doesn't have a
// corresponding value.
+ template <typename First, typename Second,
+ std::enable_if_t<
+ ShouldUseLifetimeBoundForPair<StringType, First, Second>::value,
+ std::nullptr_t> = nullptr>
+ // NOLINTNEXTLINE(google-explicit-constructor)
+ operator std::pair<First, Second>() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
+ return ConvertToPair<First, Second>();
+ }
+
+ template <typename First, typename Second,
+ std::enable_if_t<!ShouldUseLifetimeBoundForPair<StringType, First,
+ Second>::value,
+ std::nullptr_t> = nullptr>
+ // NOLINTNEXTLINE(google-explicit-constructor)
+ operator std::pair<First, Second>() const {
+ return ConvertToPair<First, Second>();
+ }
+
+ private:
template <typename First, typename Second>
- operator std::pair<First, Second>() const { // NOLINT(runtime/explicit)
+ std::pair<First, Second> ConvertToPair() const {
absl::string_view first, second;
auto it = begin();
if (it != end()) {
@@ -306,7 +357,6 @@ class Splitter {
return {First(first), Second(second)};
}
- private:
// ConvertToContainer is a functor converting a Splitter to the requested
// Container of ValueType. It is specialized below to optimize splitting to
// certain combinations of Container and ValueType.
diff --git a/absl/strings/match.cc b/absl/strings/match.cc
index 2d672509..3b81b2c0 100644
--- a/absl/strings/match.cc
+++ b/absl/strings/match.cc
@@ -14,6 +14,12 @@
#include "absl/strings/match.h"
+#include <algorithm>
+#include <cstdint>
+
+#include "absl/base/internal/endian.h"
+#include "absl/numeric/bits.h"
+#include "absl/strings/ascii.h"
#include "absl/strings/internal/memutil.h"
namespace absl {
@@ -27,6 +33,27 @@ bool EqualsIgnoreCase(absl::string_view piece1,
// memcasecmp uses absl::ascii_tolower().
}
+bool StrContainsIgnoreCase(absl::string_view haystack,
+ absl::string_view needle) noexcept {
+ while (haystack.size() >= needle.size()) {
+ if (StartsWithIgnoreCase(haystack, needle)) return true;
+ haystack.remove_prefix(1);
+ }
+ return false;
+}
+
+bool StrContainsIgnoreCase(absl::string_view haystack,
+ char needle) noexcept {
+ char upper_needle = absl::ascii_toupper(static_cast<unsigned char>(needle));
+ char lower_needle = absl::ascii_tolower(static_cast<unsigned char>(needle));
+ if (upper_needle == lower_needle) {
+ return StrContains(haystack, needle);
+ } else {
+ const char both_cstr[3] = {lower_needle, upper_needle, '\0'};
+ return haystack.find_first_of(both_cstr) != absl::string_view::npos;
+ }
+}
+
bool StartsWithIgnoreCase(absl::string_view text,
absl::string_view prefix) noexcept {
return (text.size() >= prefix.size()) &&
@@ -39,5 +66,65 @@ bool EndsWithIgnoreCase(absl::string_view text,
EqualsIgnoreCase(text.substr(text.size() - suffix.size()), suffix);
}
+absl::string_view FindLongestCommonPrefix(absl::string_view a,
+ absl::string_view b) {
+ const absl::string_view::size_type limit = std::min(a.size(), b.size());
+ const char* const pa = a.data();
+ const char* const pb = b.data();
+ absl::string_view::size_type count = (unsigned) 0;
+
+ if (ABSL_PREDICT_FALSE(limit < 8)) {
+ while (ABSL_PREDICT_TRUE(count + 2 <= limit)) {
+ uint16_t xor_bytes = absl::little_endian::Load16(pa + count) ^
+ absl::little_endian::Load16(pb + count);
+ if (ABSL_PREDICT_FALSE(xor_bytes != 0)) {
+ if (ABSL_PREDICT_TRUE((xor_bytes & 0xff) == 0)) ++count;
+ return absl::string_view(pa, count);
+ }
+ count += 2;
+ }
+ if (ABSL_PREDICT_TRUE(count != limit)) {
+ if (ABSL_PREDICT_TRUE(pa[count] == pb[count])) ++count;
+ }
+ return absl::string_view(pa, count);
+ }
+
+ do {
+ uint64_t xor_bytes = absl::little_endian::Load64(pa + count) ^
+ absl::little_endian::Load64(pb + count);
+ if (ABSL_PREDICT_FALSE(xor_bytes != 0)) {
+ count += static_cast<uint64_t>(absl::countr_zero(xor_bytes) >> 3);
+ return absl::string_view(pa, count);
+ }
+ count += 8;
+ } while (ABSL_PREDICT_TRUE(count + 8 < limit));
+
+ count = limit - 8;
+ uint64_t xor_bytes = absl::little_endian::Load64(pa + count) ^
+ absl::little_endian::Load64(pb + count);
+ if (ABSL_PREDICT_TRUE(xor_bytes != 0)) {
+ count += static_cast<uint64_t>(absl::countr_zero(xor_bytes) >> 3);
+ return absl::string_view(pa, count);
+ }
+ return absl::string_view(pa, limit);
+}
+
+absl::string_view FindLongestCommonSuffix(absl::string_view a,
+ absl::string_view b) {
+ const absl::string_view::size_type limit = std::min(a.size(), b.size());
+ if (limit == 0) return absl::string_view();
+
+ const char* pa = a.data() + a.size() - 1;
+ const char* pb = b.data() + b.size() - 1;
+ absl::string_view::size_type count = (unsigned) 0;
+ while (count < limit && *pa == *pb) {
+ --pa;
+ --pb;
+ ++count;
+ }
+
+ return absl::string_view(++pa, count);
+}
+
ABSL_NAMESPACE_END
} // namespace absl
diff --git a/absl/strings/match.h b/absl/strings/match.h
index 038cbb3f..1eeafbbf 100644
--- a/absl/strings/match.h
+++ b/absl/strings/match.h
@@ -72,6 +72,15 @@ inline bool EndsWith(absl::string_view text,
memcmp(text.data() + (text.size() - suffix.size()), suffix.data(),
suffix.size()) == 0);
}
+// StrContainsIgnoreCase()
+//
+// Returns whether a given ASCII string `haystack` contains the ASCII substring
+// `needle`, ignoring case in the comparison.
+bool StrContainsIgnoreCase(absl::string_view haystack,
+ absl::string_view needle) noexcept;
+
+bool StrContainsIgnoreCase(absl::string_view haystack,
+ char needle) noexcept;
// EqualsIgnoreCase()
//
@@ -94,6 +103,16 @@ bool StartsWithIgnoreCase(absl::string_view text,
bool EndsWithIgnoreCase(absl::string_view text,
absl::string_view suffix) noexcept;
+// Yields the longest prefix in common between both input strings.
+// Pointer-wise, the returned result is a subset of input "a".
+absl::string_view FindLongestCommonPrefix(absl::string_view a,
+ absl::string_view b);
+
+// Yields the longest suffix in common between both input strings.
+// Pointer-wise, the returned result is a subset of input "a".
+absl::string_view FindLongestCommonSuffix(absl::string_view a,
+ absl::string_view b);
+
ABSL_NAMESPACE_END
} // namespace absl
diff --git a/absl/strings/match_test.cc b/absl/strings/match_test.cc
index 5841bc1b..71618f71 100644
--- a/absl/strings/match_test.cc
+++ b/absl/strings/match_test.cc
@@ -124,4 +124,165 @@ TEST(MatchTest, EndsWithIgnoreCase) {
EXPECT_FALSE(absl::EndsWithIgnoreCase("", "fo"));
}
+TEST(MatchTest, ContainsIgnoreCase) {
+ EXPECT_TRUE(absl::StrContainsIgnoreCase("foo", "foo"));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase("FOO", "Foo"));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase("--FOO", "Foo"));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase("FOO--", "Foo"));
+ EXPECT_FALSE(absl::StrContainsIgnoreCase("BAR", "Foo"));
+ EXPECT_FALSE(absl::StrContainsIgnoreCase("BAR", "Foo"));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase("123456", "123456"));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase("123456", "234"));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase("", ""));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase("abc", ""));
+ EXPECT_FALSE(absl::StrContainsIgnoreCase("", "a"));
+}
+
+TEST(MatchTest, ContainsCharIgnoreCase) {
+ absl::string_view a("AaBCdefg!");
+ absl::string_view b("AaBCd!");
+ EXPECT_TRUE(absl::StrContainsIgnoreCase(a, 'a'));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase(a, 'A'));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase(a, 'b'));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase(a, 'B'));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase(a, 'e'));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase(a, 'E'));
+ EXPECT_FALSE(absl::StrContainsIgnoreCase(a, 'h'));
+ EXPECT_FALSE(absl::StrContainsIgnoreCase(a, 'H'));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase(a, '!'));
+ EXPECT_FALSE(absl::StrContainsIgnoreCase(a, '?'));
+
+ EXPECT_TRUE(absl::StrContainsIgnoreCase(b, 'a'));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase(b, 'A'));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase(b, 'b'));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase(b, 'B'));
+ EXPECT_FALSE(absl::StrContainsIgnoreCase(b, 'e'));
+ EXPECT_FALSE(absl::StrContainsIgnoreCase(b, 'E'));
+ EXPECT_FALSE(absl::StrContainsIgnoreCase(b, 'h'));
+ EXPECT_FALSE(absl::StrContainsIgnoreCase(b, 'H'));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase(b, '!'));
+ EXPECT_FALSE(absl::StrContainsIgnoreCase(b, '?'));
+
+ EXPECT_FALSE(absl::StrContainsIgnoreCase("", 'a'));
+ EXPECT_FALSE(absl::StrContainsIgnoreCase("", 'A'));
+ EXPECT_FALSE(absl::StrContainsIgnoreCase("", '0'));
+}
+
+TEST(MatchTest, FindLongestCommonPrefix) {
+ EXPECT_EQ(absl::FindLongestCommonPrefix("", ""), "");
+ EXPECT_EQ(absl::FindLongestCommonPrefix("", "abc"), "");
+ EXPECT_EQ(absl::FindLongestCommonPrefix("abc", ""), "");
+ EXPECT_EQ(absl::FindLongestCommonPrefix("ab", "abc"), "ab");
+ EXPECT_EQ(absl::FindLongestCommonPrefix("abc", "ab"), "ab");
+ EXPECT_EQ(absl::FindLongestCommonPrefix("abc", "abd"), "ab");
+ EXPECT_EQ(absl::FindLongestCommonPrefix("abc", "abcd"), "abc");
+ EXPECT_EQ(absl::FindLongestCommonPrefix("abcd", "abcd"), "abcd");
+ EXPECT_EQ(absl::FindLongestCommonPrefix("abcd", "efgh"), "");
+
+ // "abcde" v. "abc" but in the middle of other data
+ EXPECT_EQ(absl::FindLongestCommonPrefix(
+ absl::string_view("1234 abcdef").substr(5, 5),
+ absl::string_view("5678 abcdef").substr(5, 3)),
+ "abc");
+}
+
+// Since the little-endian implementation involves a bit of if-else and various
+// return paths, the following tests aims to provide full test coverage of the
+// implementation.
+TEST(MatchTest, FindLongestCommonPrefixLoad16Mismatch) {
+ const std::string x1 = "abcdefgh";
+ const std::string x2 = "abcde_";
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcde");
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcde");
+}
+
+TEST(MatchTest, FindLongestCommonPrefixLoad16MatchesNoLast) {
+ const std::string x1 = "abcdef";
+ const std::string x2 = "abcdef";
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcdef");
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcdef");
+}
+
+TEST(MatchTest, FindLongestCommonPrefixLoad16MatchesLastCharMismatches) {
+ const std::string x1 = "abcdefg";
+ const std::string x2 = "abcdef_h";
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcdef");
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcdef");
+}
+
+TEST(MatchTest, FindLongestCommonPrefixLoad16MatchesLastMatches) {
+ const std::string x1 = "abcde";
+ const std::string x2 = "abcdefgh";
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcde");
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcde");
+}
+
+TEST(MatchTest, FindLongestCommonPrefixSize8Load64Mismatches) {
+ const std::string x1 = "abcdefghijk";
+ const std::string x2 = "abcde_g_";
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcde");
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcde");
+}
+
+TEST(MatchTest, FindLongestCommonPrefixSize8Load64Matches) {
+ const std::string x1 = "abcdefgh";
+ const std::string x2 = "abcdefgh";
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcdefgh");
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcdefgh");
+}
+
+TEST(MatchTest, FindLongestCommonPrefixSize15Load64Mismatches) {
+ const std::string x1 = "012345670123456";
+ const std::string x2 = "0123456701_34_6";
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "0123456701");
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "0123456701");
+}
+
+TEST(MatchTest, FindLongestCommonPrefixSize15Load64Matches) {
+ const std::string x1 = "012345670123456";
+ const std::string x2 = "0123456701234567";
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "012345670123456");
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "012345670123456");
+}
+
+TEST(MatchTest, FindLongestCommonPrefixSizeFirstByteOfLast8BytesMismatch) {
+ const std::string x1 = "012345670123456701234567";
+ const std::string x2 = "0123456701234567_1234567";
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "0123456701234567");
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "0123456701234567");
+}
+
+TEST(MatchTest, FindLongestCommonPrefixLargeLastCharMismatches) {
+ const std::string x1(300, 'x');
+ std::string x2 = x1;
+ x2.back() = '#';
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), std::string(299, 'x'));
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), std::string(299, 'x'));
+}
+
+TEST(MatchTest, FindLongestCommonPrefixLargeFullMatch) {
+ const std::string x1(300, 'x');
+ const std::string x2 = x1;
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), std::string(300, 'x'));
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), std::string(300, 'x'));
+}
+
+TEST(MatchTest, FindLongestCommonSuffix) {
+ EXPECT_EQ(absl::FindLongestCommonSuffix("", ""), "");
+ EXPECT_EQ(absl::FindLongestCommonSuffix("", "abc"), "");
+ EXPECT_EQ(absl::FindLongestCommonSuffix("abc", ""), "");
+ EXPECT_EQ(absl::FindLongestCommonSuffix("bc", "abc"), "bc");
+ EXPECT_EQ(absl::FindLongestCommonSuffix("abc", "bc"), "bc");
+ EXPECT_EQ(absl::FindLongestCommonSuffix("abc", "dbc"), "bc");
+ EXPECT_EQ(absl::FindLongestCommonSuffix("bcd", "abcd"), "bcd");
+ EXPECT_EQ(absl::FindLongestCommonSuffix("abcd", "abcd"), "abcd");
+ EXPECT_EQ(absl::FindLongestCommonSuffix("abcd", "efgh"), "");
+
+ // "abcde" v. "cde" but in the middle of other data
+ EXPECT_EQ(absl::FindLongestCommonSuffix(
+ absl::string_view("1234 abcdef").substr(5, 5),
+ absl::string_view("5678 abcdef").substr(7, 3)),
+ "cde");
+}
+
} // namespace
diff --git a/absl/strings/numbers.cc b/absl/strings/numbers.cc
index 2987158e..c43c6bcc 100644
--- a/absl/strings/numbers.cc
+++ b/absl/strings/numbers.cc
@@ -31,7 +31,9 @@
#include <utility>
#include "absl/base/attributes.h"
+#include "absl/base/internal/endian.h"
#include "absl/base/internal/raw_logging.h"
+#include "absl/base/optimization.h"
#include "absl/numeric/bits.h"
#include "absl/strings/ascii.h"
#include "absl/strings/charconv.h"
@@ -136,82 +138,132 @@ bool SimpleAtob(absl::string_view str, bool* out) {
namespace {
-// Used to optimize printing a decimal number's final digit.
-const char one_ASCII_final_digits[10][2] {
- {'0', 0}, {'1', 0}, {'2', 0}, {'3', 0}, {'4', 0},
- {'5', 0}, {'6', 0}, {'7', 0}, {'8', 0}, {'9', 0},
-};
+// Various routines to encode integers to strings.
+
+// We split data encodings into a group of 2 digits, 4 digits, 8 digits as
+// it's easier to combine powers of two into scalar arithmetic.
+
+// Previous implementation used a lookup table of 200 bytes for every 2 bytes
+// and it was memory bound, any L1 cache miss would result in a much slower
+// result. When benchmarking with a cache eviction rate of several percent,
+// this implementation proved to be better.
+
+// These constants represent '00', '0000' and '00000000' as ascii strings in
+// integers. We can add these numbers if we encode to bytes from 0 to 9. as
+// 'i' = '0' + i for 0 <= i <= 9.
+constexpr uint32_t kTwoZeroBytes = 0x0101 * '0';
+constexpr uint64_t kFourZeroBytes = 0x01010101 * '0';
+constexpr uint64_t kEightZeroBytes = 0x0101010101010101ull * '0';
+
+// * 103 / 1024 is a division by 10 for values from 0 to 99. It's also a
+// division of a structure [k takes 2 bytes][m takes 2 bytes], then * 103 / 1024
+// will be [k / 10][m / 10]. It allows parallel division.
+constexpr uint64_t kDivisionBy10Mul = 103u;
+constexpr uint64_t kDivisionBy10Div = 1 << 10;
+
+// * 10486 / 1048576 is a division by 100 for values from 0 to 9999.
+constexpr uint64_t kDivisionBy100Mul = 10486u;
+constexpr uint64_t kDivisionBy100Div = 1 << 20;
+
+// Encode functions write the ASCII output of input `n` to `out_str`.
+inline char* EncodeHundred(uint32_t n, char* out_str) {
+ int num_digits = static_cast<int>(n - 10) >> 8;
+ uint32_t base = kTwoZeroBytes;
+ uint32_t div10 = (n * kDivisionBy10Mul) / kDivisionBy10Div;
+ uint32_t mod10 = n - 10u * div10;
+ base += div10 + (mod10 << 8);
+ base >>= num_digits & 8;
+ little_endian::Store16(out_str, static_cast<uint16_t>(base));
+ return out_str + 2 + num_digits;
+}
-} // namespace
+inline char* EncodeTenThousand(uint32_t n, char* out_str) {
+ // We split lower 2 digits and upper 2 digits of n into 2 byte consecutive
+ // blocks. 123 -> [\0\1][\0\23]. We divide by 10 both blocks
+ // (it's 1 division + zeroing upper bits), and compute modulo 10 as well "in
+ // parallel". Then we combine both results to have both ASCII digits,
+ // strip trailing zeros, add ASCII '0000' and return.
+ uint32_t div100 = (n * kDivisionBy100Mul) / kDivisionBy100Div;
+ uint32_t mod100 = n - 100ull * div100;
+ uint32_t hundreds = (mod100 << 16) + div100;
+ uint32_t tens = (hundreds * kDivisionBy10Mul) / kDivisionBy10Div;
+ tens &= (0xFull << 16) | 0xFull;
+ tens += (hundreds - 10ull * tens) << 8;
+ ABSL_ASSUME(tens != 0);
+ // The result can contain trailing zero bits, we need to strip them to a first
+ // significant byte in a final representation. For example, for n = 123, we
+ // have tens to have representation \0\1\2\3. We do `& -8` to round
+ // to a multiple to 8 to strip zero bytes, not all zero bits.
+ // countr_zero to help.
+ // 0 minus 8 to make MSVC happy.
+ uint32_t zeroes = static_cast<uint32_t>(absl::countr_zero(tens)) & (0 - 8ull);
+ tens += kFourZeroBytes;
+ tens >>= zeroes;
+ little_endian::Store32(out_str, tens);
+ return out_str + sizeof(tens) - zeroes / 8;
+}
-char* numbers_internal::FastIntToBuffer(uint32_t i, char* buffer) {
- uint32_t digits;
- // The idea of this implementation is to trim the number of divides to as few
- // as possible, and also reducing memory stores and branches, by going in
- // steps of two digits at a time rather than one whenever possible.
- // The huge-number case is first, in the hopes that the compiler will output
- // that case in one branch-free block of code, and only output conditional
- // branches into it from below.
- if (i >= 1000000000) { // >= 1,000,000,000
- digits = i / 100000000; // 100,000,000
- i -= digits * 100000000;
- PutTwoDigits(digits, buffer);
- buffer += 2;
- lt100_000_000:
- digits = i / 1000000; // 1,000,000
- i -= digits * 1000000;
- PutTwoDigits(digits, buffer);
- buffer += 2;
- lt1_000_000:
- digits = i / 10000; // 10,000
- i -= digits * 10000;
- PutTwoDigits(digits, buffer);
- buffer += 2;
- lt10_000:
- digits = i / 100;
- i -= digits * 100;
- PutTwoDigits(digits, buffer);
- buffer += 2;
- lt100:
- digits = i;
- PutTwoDigits(digits, buffer);
- buffer += 2;
- *buffer = 0;
- return buffer;
- }
+// Prepare functions return an integer that should be written to out_str
+// (but possibly include trailing zeros).
+// For hi < 10000, lo < 10000 returns uint64_t as encoded in ASCII with
+// possibly trailing zeroes of the number hi * 10000 + lo.
+inline uint64_t PrepareTenThousands(uint64_t hi, uint64_t lo) {
+ uint64_t merged = hi | (lo << 32);
+ uint64_t div100 = ((merged * kDivisionBy100Mul) / kDivisionBy100Div) &
+ ((0x7Full << 32) | 0x7Full);
+ uint64_t mod100 = merged - 100ull * div100;
+ uint64_t hundreds = (mod100 << 16) + div100;
+ uint64_t tens = (hundreds * kDivisionBy10Mul) / kDivisionBy10Div;
+ tens &= (0xFull << 48) | (0xFull << 32) | (0xFull << 16) | 0xFull;
+ tens += (hundreds - 10ull * tens) << 8;
+ return tens;
+}
- if (i < 100) {
- digits = i;
- if (i >= 10) goto lt100;
- memcpy(buffer, one_ASCII_final_digits[i], 2);
- return buffer + 1;
+inline char* EncodeFullU32(uint32_t n, char* out_str) {
+ if (n < 100'000'000) {
+ uint64_t bottom = PrepareTenThousands(n / 10000, n % 10000);
+ ABSL_ASSUME(bottom != 0);
+ // 0 minus 8 to make MSVC happy.
+ uint32_t zeroes = static_cast<uint32_t>(absl::countr_zero(bottom))
+ & (0 - 8ull);
+ uint64_t bottom_res = bottom + kEightZeroBytes;
+ bottom_res >>= zeroes;
+ little_endian::Store64(out_str, bottom_res);
+ return out_str + sizeof(bottom) - zeroes / 8;
}
- if (i < 10000) { // 10,000
- if (i >= 1000) goto lt10_000;
- digits = i / 100;
- i -= digits * 100;
- *buffer++ = '0' + static_cast<char>(digits);
- goto lt100;
- }
- if (i < 1000000) { // 1,000,000
- if (i >= 100000) goto lt1_000_000;
- digits = i / 10000; // 10,000
- i -= digits * 10000;
- *buffer++ = '0' + static_cast<char>(digits);
- goto lt10_000;
+ uint32_t top = n / 100'000'000;
+ n %= 100'000'000;
+ uint64_t bottom = PrepareTenThousands(n / 10000, n % 10000);
+ uint64_t bottom_res = bottom + kEightZeroBytes;
+ out_str = EncodeHundred(top, out_str);
+ little_endian::Store64(out_str, bottom_res);
+ return out_str + sizeof(bottom);
+}
+
+} // namespace
+
+void numbers_internal::PutTwoDigits(uint32_t i, char* buf) {
+ assert(i < 100);
+ uint32_t base = kTwoZeroBytes;
+ uint32_t div10 = (i * kDivisionBy10Mul) / kDivisionBy10Div;
+ uint32_t mod10 = i - 10u * div10;
+ base += div10 + (mod10 << 8);
+ little_endian::Store16(buf, static_cast<uint16_t>(base));
+}
+
+char* numbers_internal::FastIntToBuffer(uint32_t n, char* out_str) {
+ if (n < 100) {
+ out_str = EncodeHundred(n, out_str);
+ goto set_last_zero;
}
- if (i < 100000000) { // 100,000,000
- if (i >= 10000000) goto lt100_000_000;
- digits = i / 1000000; // 1,000,000
- i -= digits * 1000000;
- *buffer++ = '0' + static_cast<char>(digits);
- goto lt1_000_000;
+ if (n < 10000) {
+ out_str = EncodeTenThousand(n, out_str);
+ goto set_last_zero;
}
- // we already know that i < 1,000,000,000
- digits = i / 100000000; // 100,000,000
- i -= digits * 100000000;
- *buffer++ = '0' + static_cast<char>(digits);
- goto lt100_000_000;
+ out_str = EncodeFullU32(n, out_str);
+set_last_zero:
+ *out_str = '\0';
+ return out_str;
}
char* numbers_internal::FastIntToBuffer(int32_t i, char* buffer) {
@@ -219,7 +271,7 @@ char* numbers_internal::FastIntToBuffer(int32_t i, char* buffer) {
if (i < 0) {
*buffer++ = '-';
// We need to do the negation in modular (i.e., "unsigned")
- // arithmetic; MSVC++ apprently warns for plain "-u", so
+ // arithmetic; MSVC++ apparently warns for plain "-u", so
// we write the equivalent expression "0 - u" instead.
u = 0 - u;
}
@@ -230,41 +282,40 @@ char* numbers_internal::FastIntToBuffer(uint64_t i, char* buffer) {
uint32_t u32 = static_cast<uint32_t>(i);
if (u32 == i) return numbers_internal::FastIntToBuffer(u32, buffer);
- // Here we know i has at least 10 decimal digits.
- uint64_t top_1to11 = i / 1000000000;
- u32 = static_cast<uint32_t>(i - top_1to11 * 1000000000);
- uint32_t top_1to11_32 = static_cast<uint32_t>(top_1to11);
+ // 10**9 < 2**32 <= i < 10**10, we can do 2+8
+ uint64_t div08 = i / 100'000'000ull;
+ uint64_t mod08 = i % 100'000'000ull;
+ uint64_t mod_result =
+ PrepareTenThousands(mod08 / 10000, mod08 % 10000) + kEightZeroBytes;
+ if (i < 10'000'000'000ull) {
+ buffer = EncodeHundred(static_cast<uint32_t>(div08), buffer);
+ little_endian::Store64(buffer, mod_result);
+ buffer += 8;
+ goto set_last_zero;
+ }
- if (top_1to11_32 == top_1to11) {
- buffer = numbers_internal::FastIntToBuffer(top_1to11_32, buffer);
+ // i < 10**16, in this case 8+8
+ if (i < 10'000'000'000'000'000ull) {
+ buffer = EncodeFullU32(static_cast<uint32_t>(div08), buffer);
+ little_endian::Store64(buffer, mod_result);
+ buffer += 8;
+ goto set_last_zero;
} else {
- // top_1to11 has more than 32 bits too; print it in two steps.
- uint32_t top_8to9 = static_cast<uint32_t>(top_1to11 / 100);
- uint32_t mid_2 = static_cast<uint32_t>(top_1to11 - top_8to9 * 100);
- buffer = numbers_internal::FastIntToBuffer(top_8to9, buffer);
- PutTwoDigits(mid_2, buffer);
- buffer += 2;
+ // 4 + 8 + 8
+ uint64_t div016 = i / 10'000'000'000'000'000ull;
+ buffer = EncodeTenThousand(static_cast<uint32_t>(div016), buffer);
+ uint64_t mid_result = div08 - div016 * 100'000'000ull;
+ mid_result = PrepareTenThousands(mid_result / 10000, mid_result % 10000) +
+ kEightZeroBytes;
+ little_endian::Store64(buffer, mid_result);
+ buffer += 8;
+ little_endian::Store64(buffer, mod_result);
+ buffer += 8;
+ goto set_last_zero;
}
-
- // We have only 9 digits now, again the maximum uint32_t can handle fully.
- uint32_t digits = u32 / 10000000; // 10,000,000
- u32 -= digits * 10000000;
- PutTwoDigits(digits, buffer);
- buffer += 2;
- digits = u32 / 100000; // 100,000
- u32 -= digits * 100000;
- PutTwoDigits(digits, buffer);
- buffer += 2;
- digits = u32 / 1000; // 1,000
- u32 -= digits * 1000;
- PutTwoDigits(digits, buffer);
- buffer += 2;
- digits = u32 / 10;
- u32 -= digits * 10;
- PutTwoDigits(digits, buffer);
- buffer += 2;
- memcpy(buffer, one_ASCII_final_digits[u32], 2);
- return buffer + 1;
+set_last_zero:
+ *buffer = '\0';
+ return buffer;
}
char* numbers_internal::FastIntToBuffer(int64_t i, char* buffer) {
@@ -1048,25 +1099,6 @@ ABSL_CONST_INIT ABSL_DLL const char kHexTable[513] =
"e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
"f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
-ABSL_CONST_INIT ABSL_DLL const char two_ASCII_digits[100][2] = {
- {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, {'0', '5'},
- {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'}, {'1', '0'}, {'1', '1'},
- {'1', '2'}, {'1', '3'}, {'1', '4'}, {'1', '5'}, {'1', '6'}, {'1', '7'},
- {'1', '8'}, {'1', '9'}, {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'},
- {'2', '4'}, {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'},
- {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, {'3', '5'},
- {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'}, {'4', '0'}, {'4', '1'},
- {'4', '2'}, {'4', '3'}, {'4', '4'}, {'4', '5'}, {'4', '6'}, {'4', '7'},
- {'4', '8'}, {'4', '9'}, {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'},
- {'5', '4'}, {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'},
- {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'}, {'6', '5'},
- {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'}, {'7', '0'}, {'7', '1'},
- {'7', '2'}, {'7', '3'}, {'7', '4'}, {'7', '5'}, {'7', '6'}, {'7', '7'},
- {'7', '8'}, {'7', '9'}, {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'},
- {'8', '4'}, {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'},
- {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, {'9', '5'},
- {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'}};
-
bool safe_strto32_base(absl::string_view text, int32_t* value, int base) {
return safe_int_internal<int32_t>(text, value, base);
}
diff --git a/absl/strings/numbers.h b/absl/strings/numbers.h
index 86c84ed3..d7630cef 100644
--- a/absl/strings/numbers.h
+++ b/absl/strings/numbers.h
@@ -125,8 +125,6 @@ namespace numbers_internal {
ABSL_DLL extern const char kHexChar[17]; // 0123456789abcdef
ABSL_DLL extern const char
kHexTable[513]; // 000102030405060708090a0b0c0d0e0f1011...
-ABSL_DLL extern const char
- two_ASCII_digits[100][2]; // 00, 01, 02, 03...
// Writes a two-character representation of 'i' to 'buf'. 'i' must be in the
// range 0 <= i < 100, and buf must have space for two characters. Example:
@@ -134,10 +132,7 @@ ABSL_DLL extern const char
// PutTwoDigits(42, buf);
// // buf[0] == '4'
// // buf[1] == '2'
-inline void PutTwoDigits(size_t i, char* buf) {
- assert(i < 100);
- memcpy(buf, two_ASCII_digits[i], 2);
-}
+void PutTwoDigits(uint32_t i, char* buf);
// safe_strto?() functions for implementing SimpleAtoi()
diff --git a/absl/strings/numbers_test.cc b/absl/strings/numbers_test.cc
index b3c098d1..2864bda2 100644
--- a/absl/strings/numbers_test.cc
+++ b/absl/strings/numbers_test.cc
@@ -37,7 +37,7 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
-#include "absl/base/internal/raw_logging.h"
+#include "absl/log/log.h"
#include "absl/random/distributions.h"
#include "absl/random/random.h"
#include "absl/strings/internal/numbers_test_common.h"
@@ -1337,11 +1337,9 @@ TEST_F(SimpleDtoaTest, ExhaustiveDoubleToSixDigits) {
if (strcmp(sixdigitsbuf, snprintfbuf) != 0) {
mismatches.push_back(d);
if (mismatches.size() < 10) {
- ABSL_RAW_LOG(ERROR, "%s",
- absl::StrCat("Six-digit failure with double. ", "d=", d,
- "=", d, " sixdigits=", sixdigitsbuf,
- " printf(%g)=", snprintfbuf)
- .c_str());
+ LOG(ERROR) << "Six-digit failure with double. d=" << d
+ << " sixdigits=" << sixdigitsbuf
+ << " printf(%g)=" << snprintfbuf;
}
}
};
@@ -1389,12 +1387,10 @@ TEST_F(SimpleDtoaTest, ExhaustiveDoubleToSixDigits) {
if (kFloatNumCases >= 1e9) {
// The exhaustive test takes a very long time, so log progress.
char buf[kSixDigitsToBufferSize];
- ABSL_RAW_LOG(
- INFO, "%s",
- absl::StrCat("Exp ", exponent, " powten=", powten, "(", powten,
- ") (",
- std::string(buf, SixDigitsToBuffer(powten, buf)), ")")
- .c_str());
+ LOG(INFO) << "Exp " << exponent << " powten=" << powten << "(" << powten
+ << ") ("
+ << absl::string_view(buf, SixDigitsToBuffer(powten, buf))
+ << ")";
}
for (int digits : digit_testcases) {
if (exponent == 308 && digits >= 179769) break; // don't overflow!
@@ -1419,20 +1415,17 @@ TEST_F(SimpleDtoaTest, ExhaustiveDoubleToSixDigits) {
double before = nextafter(d, 0.0);
double after = nextafter(d, 1.7976931348623157e308);
char b1[32], b2[kSixDigitsToBufferSize];
- ABSL_RAW_LOG(
- ERROR, "%s",
- absl::StrCat(
- "Mismatch #", i, " d=", d, " (", ToNineDigits(d), ")",
- " sixdigits='", sixdigitsbuf, "'", " snprintf='", snprintfbuf,
- "'", " Before.=", PerfectDtoa(before), " ",
- (SixDigitsToBuffer(before, b2), b2),
- " vs snprintf=", (snprintf(b1, sizeof(b1), "%g", before), b1),
- " Perfect=", PerfectDtoa(d), " ", (SixDigitsToBuffer(d, b2), b2),
- " vs snprintf=", (snprintf(b1, sizeof(b1), "%g", d), b1),
- " After.=.", PerfectDtoa(after), " ",
- (SixDigitsToBuffer(after, b2), b2),
- " vs snprintf=", (snprintf(b1, sizeof(b1), "%g", after), b1))
- .c_str());
+ LOG(ERROR) << "Mismatch #" << i << " d=" << d << " (" << ToNineDigits(d)
+ << ") sixdigits='" << sixdigitsbuf << "' snprintf='"
+ << snprintfbuf << "' Before.=" << PerfectDtoa(before) << " "
+ << (SixDigitsToBuffer(before, b2), b2) << " vs snprintf="
+ << (snprintf(b1, sizeof(b1), "%g", before), b1)
+ << " Perfect=" << PerfectDtoa(d) << " "
+ << (SixDigitsToBuffer(d, b2), b2)
+ << " vs snprintf=" << (snprintf(b1, sizeof(b1), "%g", d), b1)
+ << " After.=." << PerfectDtoa(after) << " "
+ << (SixDigitsToBuffer(after, b2), b2) << " vs snprintf="
+ << (snprintf(b1, sizeof(b1), "%g", after), b1);
}
}
}
diff --git a/absl/strings/str_cat.cc b/absl/strings/str_cat.cc
index 114a2ff2..2e49c31b 100644
--- a/absl/strings/str_cat.cc
+++ b/absl/strings/str_cat.cc
@@ -30,55 +30,6 @@
namespace absl {
ABSL_NAMESPACE_BEGIN
-AlphaNum::AlphaNum(Hex hex) {
- static_assert(numbers_internal::kFastToBufferSize >= 32,
- "This function only works when output buffer >= 32 bytes long");
- char* const end = &digits_[numbers_internal::kFastToBufferSize];
- auto real_width =
- absl::numbers_internal::FastHexToBufferZeroPad16(hex.value, end - 16);
- if (real_width >= hex.width) {
- piece_ = absl::string_view(end - real_width, real_width);
- } else {
- // Pad first 16 chars because FastHexToBufferZeroPad16 pads only to 16 and
- // max pad width can be up to 20.
- std::memset(end - 32, hex.fill, 16);
- // Patch up everything else up to the real_width.
- std::memset(end - real_width - 16, hex.fill, 16);
- piece_ = absl::string_view(end - hex.width, hex.width);
- }
-}
-
-AlphaNum::AlphaNum(Dec dec) {
- assert(dec.width <= numbers_internal::kFastToBufferSize);
- char* const end = &digits_[numbers_internal::kFastToBufferSize];
- char* const minfill = end - dec.width;
- char* writer = end;
- uint64_t value = dec.value;
- bool neg = dec.neg;
- while (value > 9) {
- *--writer = '0' + (value % 10);
- value /= 10;
- }
- *--writer = '0' + static_cast<char>(value);
- if (neg) *--writer = '-';
-
- ptrdiff_t fillers = writer - minfill;
- if (fillers > 0) {
- // Tricky: if the fill character is ' ', then it's <fill><+/-><digits>
- // But...: if the fill character is '0', then it's <+/-><fill><digits>
- bool add_sign_again = false;
- if (neg && dec.fill == '0') { // If filling with '0',
- ++writer; // ignore the sign we just added
- add_sign_again = true; // and re-add the sign later.
- }
- writer -= fillers;
- std::fill_n(writer, fillers, dec.fill);
- if (add_sign_again) *--writer = '-';
- }
-
- piece_ = absl::string_view(writer, static_cast<size_t>(end - writer));
-}
-
// ----------------------------------------------------------------------
// StrCat()
// This merges the given strings or integers, with no delimiter. This
@@ -195,7 +146,13 @@ void AppendPieces(std::string* dest,
void StrAppend(std::string* dest, const AlphaNum& a) {
ASSERT_NO_OVERLAP(*dest, a);
- dest->append(a.data(), a.size());
+ std::string::size_type old_size = dest->size();
+ strings_internal::STLStringResizeUninitializedAmortized(dest,
+ old_size + a.size());
+ char* const begin = &(*dest)[0];
+ char* out = begin + old_size;
+ out = Append(out, a);
+ assert(out == begin + dest->size());
}
void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b) {
diff --git a/absl/strings/str_cat.h b/absl/strings/str_cat.h
index 730b4d8c..d5f71ff0 100644
--- a/absl/strings/str_cat.h
+++ b/absl/strings/str_cat.h
@@ -87,13 +87,16 @@
#ifndef ABSL_STRINGS_STR_CAT_H_
#define ABSL_STRINGS_STR_CAT_H_
+#include <algorithm>
#include <array>
#include <cstdint>
+#include <cstring>
#include <string>
#include <type_traits>
#include <utility>
#include <vector>
+#include "absl/base/attributes.h"
#include "absl/base/port.h"
#include "absl/strings/internal/has_absl_stringify.h"
#include "absl/strings/internal/stringify_sink.h"
@@ -201,6 +204,27 @@ struct Hex {
explicit Hex(Pointee* v, PadSpec spec = absl::kNoPad)
: Hex(spec, reinterpret_cast<uintptr_t>(v)) {}
+ template <typename S>
+ friend void AbslStringify(S& sink, Hex hex) {
+ static_assert(
+ numbers_internal::kFastToBufferSize >= 32,
+ "This function only works when output buffer >= 32 bytes long");
+ char buffer[numbers_internal::kFastToBufferSize];
+ char* const end = &buffer[numbers_internal::kFastToBufferSize];
+ auto real_width =
+ absl::numbers_internal::FastHexToBufferZeroPad16(hex.value, end - 16);
+ if (real_width >= hex.width) {
+ sink.Append(absl::string_view(end - real_width, real_width));
+ } else {
+ // Pad first 16 chars because FastHexToBufferZeroPad16 pads only to 16 and
+ // max pad width can be up to 20.
+ std::memset(end - 32, hex.fill, 16);
+ // Patch up everything else up to the real_width.
+ std::memset(end - real_width - 16, hex.fill, 16);
+ sink.Append(absl::string_view(end - hex.width, hex.width));
+ }
+ }
+
private:
Hex(PadSpec spec, uint64_t v)
: value(v),
@@ -235,6 +259,38 @@ struct Dec {
: spec - absl::kZeroPad2 + 2),
fill(spec >= absl::kSpacePad2 ? ' ' : '0'),
neg(v < 0) {}
+
+ template <typename S>
+ friend void AbslStringify(S& sink, Dec dec) {
+ assert(dec.width <= numbers_internal::kFastToBufferSize);
+ char buffer[numbers_internal::kFastToBufferSize];
+ char* const end = &buffer[numbers_internal::kFastToBufferSize];
+ char* const minfill = end - dec.width;
+ char* writer = end;
+ uint64_t val = dec.value;
+ while (val > 9) {
+ *--writer = '0' + (val % 10);
+ val /= 10;
+ }
+ *--writer = '0' + static_cast<char>(val);
+ if (dec.neg) *--writer = '-';
+
+ ptrdiff_t fillers = writer - minfill;
+ if (fillers > 0) {
+ // Tricky: if the fill character is ' ', then it's <fill><+/-><digits>
+ // But...: if the fill character is '0', then it's <+/-><fill><digits>
+ bool add_sign_again = false;
+ if (dec.neg && dec.fill == '0') { // If filling with '0',
+ ++writer; // ignore the sign we just added
+ add_sign_again = true; // and re-add the sign later.
+ }
+ writer -= fillers;
+ std::fill_n(writer, fillers, dec.fill);
+ if (add_sign_again) *--writer = '-';
+ }
+
+ sink.Append(absl::string_view(writer, static_cast<size_t>(end - writer)));
+ }
};
// -----------------------------------------------------------------------------
@@ -282,28 +338,30 @@ class AlphaNum {
AlphaNum(double f) // NOLINT(runtime/explicit)
: piece_(digits_, numbers_internal::SixDigitsToBuffer(f, digits_)) {}
- AlphaNum(Hex hex); // NOLINT(runtime/explicit)
- AlphaNum(Dec dec); // NOLINT(runtime/explicit)
-
template <size_t size>
AlphaNum( // NOLINT(runtime/explicit)
- const strings_internal::AlphaNumBuffer<size>& buf)
+ const strings_internal::AlphaNumBuffer<size>& buf
+ ABSL_ATTRIBUTE_LIFETIME_BOUND)
: piece_(&buf.data[0], buf.size) {}
- AlphaNum(const char* c_str) // NOLINT(runtime/explicit)
- : piece_(NullSafeStringView(c_str)) {} // NOLINT(runtime/explicit)
- AlphaNum(absl::string_view pc) : piece_(pc) {} // NOLINT(runtime/explicit)
+ AlphaNum(const char* c_str // NOLINT(runtime/explicit)
+ ABSL_ATTRIBUTE_LIFETIME_BOUND)
+ : piece_(NullSafeStringView(c_str)) {}
+ AlphaNum(absl::string_view pc // NOLINT(runtime/explicit)
+ ABSL_ATTRIBUTE_LIFETIME_BOUND)
+ : piece_(pc) {}
template <typename T, typename = typename std::enable_if<
strings_internal::HasAbslStringify<T>::value>::type>
- AlphaNum( // NOLINT(runtime/explicit)
- const T& v, // NOLINT(runtime/explicit)
- strings_internal::StringifySink&& sink = {}) // NOLINT(runtime/explicit)
+ AlphaNum( // NOLINT(runtime/explicit)
+ const T& v ABSL_ATTRIBUTE_LIFETIME_BOUND,
+ strings_internal::StringifySink&& sink ABSL_ATTRIBUTE_LIFETIME_BOUND = {})
: piece_(strings_internal::ExtractStringification(sink, v)) {}
template <typename Allocator>
AlphaNum( // NOLINT(runtime/explicit)
- const std::basic_string<char, std::char_traits<char>, Allocator>& str)
+ const std::basic_string<char, std::char_traits<char>, Allocator>& str
+ ABSL_ATTRIBUTE_LIFETIME_BOUND)
: piece_(str) {}
// Use string literals ":" instead of character literals ':'.
@@ -316,14 +374,24 @@ class AlphaNum {
const char* data() const { return piece_.data(); }
absl::string_view Piece() const { return piece_; }
- // Normal enums are already handled by the integer formatters.
- // This overload matches only scoped enums.
+ // Match unscoped enums. Use integral promotion so that a `char`-backed
+ // enum becomes a wider integral type AlphaNum will accept.
template <typename T,
typename = typename std::enable_if<
- std::is_enum<T>{} && !std::is_convertible<T, int>{} &&
+ std::is_enum<T>{} && std::is_convertible<T, int>{} &&
!strings_internal::HasAbslStringify<T>::value>::type>
AlphaNum(T e) // NOLINT(runtime/explicit)
- : AlphaNum(static_cast<typename std::underlying_type<T>::type>(e)) {}
+ : AlphaNum(+e) {}
+
+ // This overload matches scoped enums. We must explicitly cast to the
+ // underlying type, but use integral promotion for the same reason as above.
+ template <typename T,
+ typename std::enable_if<
+ std::is_enum<T>{} && !std::is_convertible<T, int>{} &&
+ !strings_internal::HasAbslStringify<T>::value,
+ char*>::type = nullptr>
+ AlphaNum(T e) // NOLINT(runtime/explicit)
+ : AlphaNum(+static_cast<typename std::underlying_type<T>::type>(e)) {}
// vector<bool>::reference and const_reference require special help to
// convert to `AlphaNum` because it requires two user defined conversions.
diff --git a/absl/strings/str_format.h b/absl/strings/str_format.h
index 3536b70e..023e4350 100644
--- a/absl/strings/str_format.h
+++ b/absl/strings/str_format.h
@@ -36,10 +36,12 @@
// * `absl::StreamFormat()` to more efficiently write a format string to a
// stream, such as`std::cout`.
// * `absl::PrintF()`, `absl::FPrintF()` and `absl::SNPrintF()` as
-// replacements for `std::printf()`, `std::fprintf()` and `std::snprintf()`.
+// drop-in replacements for `std::printf()`, `std::fprintf()` and
+// `std::snprintf()`.
//
-// Note: a version of `std::sprintf()` is not supported as it is
-// generally unsafe due to buffer overflows.
+// Note: An `absl::SPrintF()` drop-in replacement is not supported as it
+// is generally unsafe due to buffer overflows. Use `absl::StrFormat` which
+// returns the string as output instead of expecting a pre-allocated buffer.
//
// Additionally, you can provide a format string (and its associated arguments)
// using one of the following abstractions:
@@ -257,6 +259,7 @@ class FormatCountCapture {
// * Characters: `char`, `signed char`, `unsigned char`
// * Integers: `int`, `short`, `unsigned short`, `unsigned`, `long`,
// `unsigned long`, `long long`, `unsigned long long`
+// * Enums: printed as their underlying integral value
// * Floating-point: `float`, `double`, `long double`
//
// However, in the `str_format` library, a format conversion specifies a broader
diff --git a/absl/strings/str_format_test.cc b/absl/strings/str_format_test.cc
index 5198fb33..20fd0289 100644
--- a/absl/strings/str_format_test.cc
+++ b/absl/strings/str_format_test.cc
@@ -638,6 +638,8 @@ TEST(StrFormat, BehavesAsDocumented) {
EXPECT_EQ(StrFormat("%#o", 10), "012");
EXPECT_EQ(StrFormat("%#x", 15), "0xf");
EXPECT_EQ(StrFormat("%04d", 8), "0008");
+ EXPECT_EQ(StrFormat("%#04x", 0), "0000");
+ EXPECT_EQ(StrFormat("%#04x", 1), "0x01");
// Posix positional substitution.
EXPECT_EQ(absl::StrFormat("%2$s, %3$s, %1$s!", "vici", "veni", "vidi"),
"veni, vidi, vici!");
diff --git a/absl/strings/str_split.cc b/absl/strings/str_split.cc
index e08c26b6..72ba7c02 100644
--- a/absl/strings/str_split.cc
+++ b/absl/strings/str_split.cc
@@ -60,19 +60,23 @@ absl::string_view GenericFind(absl::string_view text,
// Finds using absl::string_view::find(), therefore the length of the found
// delimiter is delimiter.length().
struct LiteralPolicy {
- size_t Find(absl::string_view text, absl::string_view delimiter, size_t pos) {
+ static size_t Find(absl::string_view text, absl::string_view delimiter,
+ size_t pos) {
return text.find(delimiter, pos);
}
- size_t Length(absl::string_view delimiter) { return delimiter.length(); }
+ static size_t Length(absl::string_view delimiter) {
+ return delimiter.length();
+ }
};
// Finds using absl::string_view::find_first_of(), therefore the length of the
// found delimiter is 1.
struct AnyOfPolicy {
- size_t Find(absl::string_view text, absl::string_view delimiter, size_t pos) {
+ static size_t Find(absl::string_view text, absl::string_view delimiter,
+ size_t pos) {
return text.find_first_of(delimiter, pos);
}
- size_t Length(absl::string_view /* delimiter */) { return 1; }
+ static size_t Length(absl::string_view /* delimiter */) { return 1; }
};
} // namespace
@@ -123,8 +127,7 @@ ByLength::ByLength(ptrdiff_t length) : length_(length) {
ABSL_RAW_CHECK(length > 0, "");
}
-absl::string_view ByLength::Find(absl::string_view text,
- size_t pos) const {
+absl::string_view ByLength::Find(absl::string_view text, size_t pos) const {
pos = std::min(pos, text.size()); // truncate `pos`
absl::string_view substr = text.substr(pos);
// If the string is shorter than the chunk size we say we
diff --git a/absl/strings/string_view.cc b/absl/strings/string_view.cc
index e2261625..f20ff530 100644
--- a/absl/strings/string_view.cc
+++ b/absl/strings/string_view.cc
@@ -21,12 +21,35 @@
#include <cstring>
#include <ostream>
-#include "absl/strings/internal/memutil.h"
-
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace {
+
+// This is significantly faster for case-sensitive matches with very
+// few possible matches.
+const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle,
+ size_t neelen) {
+ if (0 == neelen) {
+ return phaystack; // even if haylen is 0
+ }
+ if (haylen < neelen) return nullptr;
+
+ const char* match;
+ const char* hayend = phaystack + haylen - neelen + 1;
+ // A static cast is used here to work around the fact that memchr returns
+ // a void* on Posix-compliant systems and const void* on Windows.
+ while (
+ (match = static_cast<const char*>(memchr(
+ phaystack, pneedle[0], static_cast<size_t>(hayend - phaystack))))) {
+ if (memcmp(match, pneedle, neelen) == 0)
+ return match;
+ else
+ phaystack = match + 1;
+ }
+ return nullptr;
+}
+
void WritePadding(std::ostream& o, size_t pad) {
char fill_buf[32];
memset(fill_buf, o.fill(), sizeof(fill_buf));
@@ -84,8 +107,7 @@ string_view::size_type string_view::find(string_view s,
if (empty() && pos == 0 && s.empty()) return 0;
return npos;
}
- const char* result =
- strings_internal::memmatch(ptr_ + pos, length_ - pos, s.ptr_, s.length_);
+ const char* result = memmatch(ptr_ + pos, length_ - pos, s.ptr_, s.length_);
return result ? static_cast<size_type>(result - ptr_) : npos;
}