summaryrefslogtreecommitdiff
path: root/absl/strings
diff options
context:
space:
mode:
Diffstat (limited to 'absl/strings')
-rw-r--r--absl/strings/BUILD.bazel29
-rw-r--r--absl/strings/CMakeLists.txt25
-rw-r--r--absl/strings/ascii.cc60
-rw-r--r--absl/strings/ascii_test.cc9
-rw-r--r--absl/strings/charconv.cc30
-rw-r--r--absl/strings/charconv.h4
-rw-r--r--absl/strings/cord.cc92
-rw-r--r--absl/strings/cord.h102
-rw-r--r--absl/strings/cord_buffer.h7
-rw-r--r--absl/strings/cord_test.cc240
-rw-r--r--absl/strings/cord_test_helpers.h2
-rw-r--r--absl/strings/escaping.cc46
-rw-r--r--absl/strings/escaping.h43
-rw-r--r--absl/strings/escaping_test.cc59
-rw-r--r--absl/strings/internal/char_map.h6
-rw-r--r--absl/strings/internal/charconv_bigint.cc6
-rw-r--r--absl/strings/internal/charconv_bigint.h6
-rw-r--r--absl/strings/internal/cord_internal.cc1
-rw-r--r--absl/strings/internal/cord_internal.h398
-rw-r--r--absl/strings/internal/cord_rep_btree.cc22
-rw-r--r--absl/strings/internal/cord_rep_btree.h43
-rw-r--r--absl/strings/internal/cord_rep_btree_test.cc12
-rw-r--r--absl/strings/internal/cord_rep_consume.cc8
-rw-r--r--absl/strings/internal/cord_rep_consume.h11
-rw-r--r--absl/strings/internal/cord_rep_crc.cc16
-rw-r--r--absl/strings/internal/cord_rep_crc.h9
-rw-r--r--absl/strings/internal/cord_rep_crc_test.cc45
-rw-r--r--absl/strings/internal/cord_rep_ring.h4
-rw-r--r--absl/strings/internal/cordz_functions.h16
-rw-r--r--absl/strings/internal/cordz_functions_test.cc2
-rw-r--r--absl/strings/internal/cordz_handle.cc66
-rw-r--r--absl/strings/internal/cordz_handle.h35
-rw-r--r--absl/strings/internal/cordz_info.cc3
-rw-r--r--absl/strings/internal/cordz_info_statistics_test.cc4
-rw-r--r--absl/strings/internal/cordz_sample_token.h10
-rw-r--r--absl/strings/internal/damerau_levenshtein_distance.cc93
-rw-r--r--absl/strings/internal/damerau_levenshtein_distance.h34
-rw-r--r--absl/strings/internal/damerau_levenshtein_distance_test.cc99
-rw-r--r--absl/strings/internal/escaping.cc45
-rw-r--r--absl/strings/internal/escaping.h17
-rw-r--r--absl/strings/internal/has_absl_stringify.h55
-rw-r--r--absl/strings/internal/stl_type_traits.h2
-rw-r--r--absl/strings/internal/str_format/arg.cc92
-rw-r--r--absl/strings/internal/str_format/arg.h98
-rw-r--r--absl/strings/internal/str_format/checker.h356
-rw-r--r--absl/strings/internal/str_format/checker_test.cc12
-rw-r--r--absl/strings/internal/str_format/constexpr_parser.h351
-rw-r--r--absl/strings/internal/str_format/convert_test.cc6
-rw-r--r--absl/strings/internal/str_format/extension.h2
-rw-r--r--absl/strings/internal/str_format/float_conversion.cc18
-rw-r--r--absl/strings/internal/str_format/parser.cc213
-rw-r--r--absl/strings/internal/str_format/parser.h103
-rw-r--r--absl/strings/internal/str_format/parser_test.cc4
-rw-r--r--absl/strings/internal/stringify_sink.cc28
-rw-r--r--absl/strings/internal/stringify_sink.h57
-rw-r--r--absl/strings/match.cc22
-rw-r--r--absl/strings/match.h9
-rw-r--r--absl/strings/match_test.cc44
-rw-r--r--absl/strings/numbers.cc2
-rw-r--r--absl/strings/str_cat.cc90
-rw-r--r--absl/strings/str_cat.h162
-rw-r--r--absl/strings/str_cat_test.cc14
-rw-r--r--absl/strings/str_format.h80
-rw-r--r--absl/strings/str_format_test.cc66
-rw-r--r--absl/strings/str_split.cc15
-rw-r--r--absl/strings/str_split_test.cc2
-rw-r--r--absl/strings/substitute.h34
-rw-r--r--absl/strings/substitute_test.cc37
68 files changed, 2445 insertions, 1288 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel
index 5b12c010..e48a9a0a 100644
--- a/absl/strings/BUILD.bazel
+++ b/absl/strings/BUILD.bazel
@@ -37,11 +37,14 @@ cc_library(
"internal/charconv_bigint.h",
"internal/charconv_parse.cc",
"internal/charconv_parse.h",
+ "internal/damerau_levenshtein_distance.cc",
"internal/memutil.cc",
"internal/memutil.h",
"internal/stl_type_traits.h",
"internal/str_join_internal.h",
"internal/str_split_internal.h",
+ "internal/stringify_sink.cc",
+ "internal/stringify_sink.h",
"match.cc",
"numbers.cc",
"str_cat.cc",
@@ -54,6 +57,8 @@ cc_library(
"ascii.h",
"charconv.h",
"escaping.h",
+ "internal/damerau_levenshtein_distance.h",
+ "internal/has_absl_stringify.h",
"internal/string_constant.h",
"match.h",
"numbers.h",
@@ -179,6 +184,19 @@ cc_test(
)
cc_test(
+ name = "damerau_levenshtein_distance_test",
+ size = "small",
+ srcs = [
+ "internal/damerau_levenshtein_distance_test.cc",
+ ],
+ copts = ABSL_TEST_COPTS,
+ deps = [
+ "//absl/strings",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
name = "memutil_benchmark",
srcs = [
"internal/memutil.h",
@@ -304,8 +322,10 @@ cc_library(
"//absl/base:raw_logging_internal",
"//absl/base:throw_delegate",
"//absl/container:compressed_tuple",
+ "//absl/container:container_memory",
"//absl/container:inlined_vector",
"//absl/container:layout",
+ "//absl/crc:crc_cord_state",
"//absl/functional:function_ref",
"//absl/meta:type_traits",
"//absl/types:span",
@@ -330,6 +350,7 @@ cc_test(
cc_test(
name = "cord_rep_btree_test",
size = "medium",
+ timeout = "long",
srcs = ["internal/cord_rep_btree_test.cc"],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
@@ -387,6 +408,7 @@ cc_test(
":cord_internal",
":cord_rep_test_util",
"//absl/base:config",
+ "//absl/crc:crc_cord_state",
"@com_google_googletest//:gtest_main",
],
)
@@ -436,7 +458,6 @@ cc_library(
":cordz_update_scope",
":cordz_update_tracker",
":internal",
- ":str_format",
":strings",
"//absl/base",
"//absl/base:config",
@@ -445,6 +466,7 @@ cc_library(
"//absl/base:raw_logging_internal",
"//absl/container:fixed_array",
"//absl/container:inlined_vector",
+ "//absl/crc:crc_cord_state",
"//absl/functional:function_ref",
"//absl/meta:type_traits",
"//absl/numeric:bits",
@@ -492,6 +514,7 @@ cc_library(
"//absl/container:inlined_vector",
"//absl/debugging:stacktrace",
"//absl/synchronization",
+ "//absl/time",
"//absl/types:span",
],
)
@@ -641,6 +664,7 @@ cc_test(
":cordz_update_scope",
":cordz_update_tracker",
"//absl/base:config",
+ "//absl/crc:crc_cord_state",
"//absl/synchronization",
"//absl/synchronization:thread_pool",
"@com_google_googletest//:gtest_main",
@@ -770,8 +794,8 @@ cc_test(
"no_test_android_arm64",
"no_test_android_x86",
"no_test_ios_x86_64",
+ "no_test_lexan",
"no_test_loonix",
- "no_test_msvc_x64",
],
visibility = ["//visibility:private"],
deps = [
@@ -1132,6 +1156,7 @@ cc_library(
"internal/str_format/arg.h",
"internal/str_format/bind.h",
"internal/str_format/checker.h",
+ "internal/str_format/constexpr_parser.h",
"internal/str_format/extension.h",
"internal/str_format/float_conversion.h",
"internal/str_format/output.h",
diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt
index 01f86184..d2928bd7 100644
--- a/absl/strings/CMakeLists.txt
+++ b/absl/strings/CMakeLists.txt
@@ -21,7 +21,9 @@ absl_cc_library(
"ascii.h"
"charconv.h"
"escaping.h"
+ "internal/damerau_levenshtein_distance.h"
"internal/string_constant.h"
+ "internal/has_absl_stringify.h"
"match.h"
"numbers.h"
"str_cat.h"
@@ -39,8 +41,11 @@ absl_cc_library(
"internal/charconv_bigint.h"
"internal/charconv_parse.cc"
"internal/charconv_parse.h"
+ "internal/damerau_levenshtein_distance.cc"
"internal/memutil.cc"
"internal/memutil.h"
+ "internal/stringify_sink.h"
+ "internal/stringify_sink.cc"
"internal/stl_type_traits.h"
"internal/str_join_internal.h"
"internal/str_split_internal.h"
@@ -134,6 +139,19 @@ absl_cc_test(
absl_cc_test(
NAME
+ damerau_levenshtein_distance_test
+ SRCS
+ "internal/damerau_levenshtein_distance_test.cc"
+ COPTS
+ ${ABSL_TEST_COPTS}
+ DEPS
+ absl::strings
+ absl::base
+ GTest::gmock_main
+)
+
+absl_cc_test(
+ NAME
memutil_test
SRCS
"internal/memutil.h"
@@ -395,6 +413,7 @@ absl_cc_library(
"internal/str_format/arg.h"
"internal/str_format/bind.h"
"internal/str_format/checker.h"
+ "internal/str_format/constexpr_parser.h"
"internal/str_format/extension.h"
"internal/str_format/float_conversion.h"
"internal/str_format/output.h"
@@ -585,7 +604,9 @@ absl_cc_library(
absl::base_internal
absl::compressed_tuple
absl::config
+ absl::container_memory
absl::core_headers
+ absl::crc_cord_state
absl::endian
absl::inlined_vector
absl::layout
@@ -724,6 +745,7 @@ absl_cc_library(
absl::raw_logging_internal
absl::stacktrace
absl::synchronization
+ absl::time
)
absl_cc_test(
@@ -764,6 +786,7 @@ absl_cc_test(
absl::cordz_statistics
absl::cordz_update_scope
absl::cordz_update_tracker
+ absl::crc_cord_state
absl::thread_pool
GTest::gmock_main
)
@@ -863,6 +886,7 @@ absl_cc_library(
absl::cordz_update_scope
absl::cordz_update_tracker
absl::core_headers
+ absl::crc_cord_state
absl::endian
absl::fixed_array
absl::function_ref
@@ -1035,6 +1059,7 @@ absl_cc_test(
absl::config
absl::cord_internal
absl::cord_rep_test_util
+ absl::crc_cord_state
GTest::gmock_main
)
diff --git a/absl/strings/ascii.cc b/absl/strings/ascii.cc
index 868df2d1..16c96899 100644
--- a/absl/strings/ascii.cc
+++ b/absl/strings/ascii.cc
@@ -14,6 +14,10 @@
#include "absl/strings/ascii.h"
+#include <climits>
+#include <cstring>
+#include <string>
+
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace ascii_internal {
@@ -153,18 +157,62 @@ ABSL_DLL const char kToUpper[256] = {
};
// clang-format on
+template <bool ToUpper>
+constexpr void AsciiStrCaseFold(char* p, char* end) {
+ // The upper- and lowercase versions of ASCII characters differ by only 1 bit.
+ // When we need to flip the case, we can xor with this bit to achieve the
+ // desired result. Note that the choice of 'a' and 'A' here is arbitrary. We
+ // could have chosen 'z' and 'Z', or any other pair of characters as they all
+ // have the same single bit difference.
+ constexpr unsigned char kAsciiCaseBitFlip = 'a' ^ 'A';
+
+ constexpr char ch_a = ToUpper ? 'a' : 'A';
+ constexpr char ch_z = ToUpper ? 'z' : 'Z';
+ for (; p < end; ++p) {
+ unsigned char v = static_cast<unsigned char>(*p);
+ // We use & instead of && to ensure this always stays branchless
+ // We use static_cast<int> to suppress -Wbitwise-instead-of-logical
+ bool is_in_range = static_cast<bool>(static_cast<int>(ch_a <= v) &
+ static_cast<int>(v <= ch_z));
+ v ^= is_in_range ? kAsciiCaseBitFlip : 0;
+ *p = static_cast<char>(v);
+ }
+}
+
+static constexpr size_t ValidateAsciiCasefold() {
+ constexpr size_t num_chars = 1 + CHAR_MAX - CHAR_MIN;
+ size_t incorrect_index = 0;
+ char lowered[num_chars] = {};
+ char uppered[num_chars] = {};
+ for (unsigned int i = 0; i < num_chars; ++i) {
+ uppered[i] = lowered[i] = static_cast<char>(i);
+ }
+ AsciiStrCaseFold<false>(&lowered[0], &lowered[num_chars]);
+ AsciiStrCaseFold<true>(&uppered[0], &uppered[num_chars]);
+ for (size_t i = 0; i < num_chars; ++i) {
+ const char ch = static_cast<char>(i),
+ ch_upper = ('a' <= ch && ch <= 'z' ? 'A' + (ch - 'a') : ch),
+ ch_lower = ('A' <= ch && ch <= 'Z' ? 'a' + (ch - 'A') : ch);
+ if (uppered[i] != ch_upper || lowered[i] != ch_lower) {
+ incorrect_index = i > 0 ? i : num_chars;
+ break;
+ }
+ }
+ return incorrect_index;
+}
+
+static_assert(ValidateAsciiCasefold() == 0, "error in case conversion");
+
} // namespace ascii_internal
void AsciiStrToLower(std::string* s) {
- for (auto& ch : *s) {
- ch = absl::ascii_tolower(static_cast<unsigned char>(ch));
- }
+ char* p = &(*s)[0]; // Guaranteed to be valid for empty strings
+ return ascii_internal::AsciiStrCaseFold<false>(p, p + s->size());
}
void AsciiStrToUpper(std::string* s) {
- for (auto& ch : *s) {
- ch = absl::ascii_toupper(static_cast<unsigned char>(ch));
- }
+ char* p = &(*s)[0]; // Guaranteed to be valid for empty strings
+ return ascii_internal::AsciiStrCaseFold<true>(p, p + s->size());
}
void RemoveExtraAsciiWhitespace(std::string* str) {
diff --git a/absl/strings/ascii_test.cc b/absl/strings/ascii_test.cc
index dfed114c..4ea262f1 100644
--- a/absl/strings/ascii_test.cc
+++ b/absl/strings/ascii_test.cc
@@ -14,6 +14,7 @@
#include "absl/strings/ascii.h"
+#include <algorithm>
#include <cctype>
#include <clocale>
#include <cstring>
@@ -189,14 +190,14 @@ TEST(AsciiStrTo, Lower) {
const std::string str("GHIJKL");
const std::string str2("MNOPQR");
const absl::string_view sp(str2);
- std::string mutable_str("STUVWX");
+ std::string mutable_str("_`?@[{AMNOPQRSTUVWXYZ");
EXPECT_EQ("abcdef", absl::AsciiStrToLower(buf));
EXPECT_EQ("ghijkl", absl::AsciiStrToLower(str));
EXPECT_EQ("mnopqr", absl::AsciiStrToLower(sp));
absl::AsciiStrToLower(&mutable_str);
- EXPECT_EQ("stuvwx", mutable_str);
+ EXPECT_EQ("_`?@[{amnopqrstuvwxyz", mutable_str);
char mutable_buf[] = "Mutable";
std::transform(mutable_buf, mutable_buf + strlen(mutable_buf),
@@ -207,12 +208,12 @@ TEST(AsciiStrTo, Lower) {
TEST(AsciiStrTo, Upper) {
const char buf[] = "abcdef";
const std::string str("ghijkl");
- const std::string str2("mnopqr");
+ const std::string str2("_`?@[{amnopqrstuvwxyz");
const absl::string_view sp(str2);
EXPECT_EQ("ABCDEF", absl::AsciiStrToUpper(buf));
EXPECT_EQ("GHIJKL", absl::AsciiStrToUpper(str));
- EXPECT_EQ("MNOPQR", absl::AsciiStrToUpper(sp));
+ EXPECT_EQ("_`?@[{AMNOPQRSTUVWXYZ", absl::AsciiStrToUpper(sp));
char mutable_buf[] = "Mutable";
std::transform(mutable_buf, mutable_buf + strlen(mutable_buf),
diff --git a/absl/strings/charconv.cc b/absl/strings/charconv.cc
index 25ac4499..778a1c75 100644
--- a/absl/strings/charconv.cc
+++ b/absl/strings/charconv.cc
@@ -21,6 +21,7 @@
#include <limits>
#include "absl/base/casts.h"
+#include "absl/base/config.h"
#include "absl/numeric/bits.h"
#include "absl/numeric/int128.h"
#include "absl/strings/internal/charconv_bigint.h"
@@ -118,10 +119,17 @@ struct FloatTraits<double> {
static constexpr int kEiselLemireMaxExclusiveExp10 = 309;
static double MakeNan(const char* tagp) {
+#if ABSL_HAVE_BUILTIN(__builtin_nan)
+ // Use __builtin_nan() if available since it has a fix for
+ // https://bugs.llvm.org/show_bug.cgi?id=37778
+ // std::nan may use the glibc implementation.
+ return __builtin_nan(tagp);
+#else
// Support nan no matter which namespace it's in. Some platforms
// incorrectly don't put it in namespace std.
using namespace std; // NOLINT
return nan(tagp);
+#endif
}
// Builds a nonzero floating point number out of the provided parts.
@@ -184,10 +192,17 @@ struct FloatTraits<float> {
static constexpr int kEiselLemireMaxExclusiveExp10 = 39;
static float MakeNan(const char* tagp) {
+#if ABSL_HAVE_BUILTIN(__builtin_nanf)
+ // Use __builtin_nanf() if available since it has a fix for
+ // https://bugs.llvm.org/show_bug.cgi?id=37778
+ // std::nanf may use the glibc implementation.
+ return __builtin_nanf(tagp);
+#else
// Support nanf no matter which namespace it's in. Some platforms
// incorrectly don't put it in namespace std.
using namespace std; // NOLINT
- return nanf(tagp);
+ return std::nanf(tagp);
+#endif
}
static float Make(mantissa_t mantissa, int exponent, bool sign) {
@@ -203,7 +218,8 @@ struct FloatTraits<float> {
if (mantissa > kMantissaMask) {
// Normal value.
// Adjust by 127 for the exponent representation bias, and an additional
- // 23 due to the implied decimal point in the IEEE mantissa represenation.
+ // 23 due to the implied decimal point in the IEEE mantissa
+ // representation.
flt += static_cast<uint32_t>(exponent + 127 + kTargetMantissaBits - 1)
<< 23;
mantissa &= kMantissaMask;
@@ -298,7 +314,9 @@ struct CalculatedFloat {
// minus the number of leading zero bits.)
int BitWidth(uint128 value) {
if (Uint128High64(value) == 0) {
- return bit_width(Uint128Low64(value));
+ // This static_cast is only needed when using a std::bit_width()
+ // implementation that does not have the fix for LWG 3656 applied.
+ return static_cast<int>(bit_width(Uint128Low64(value)));
}
return 128 - countl_zero(Uint128High64(value));
}
@@ -461,7 +479,7 @@ uint64_t ShiftRightAndRound(uint128 value, int shift, bool input_exact,
// the low bit of `value` is set.
//
// In inexact mode, the nonzero error means the actual value is greater
- // than the halfway point and we must alway round up.
+ // than the halfway point and we must always round up.
if ((value & 1) == 1 || !input_exact) {
++value;
}
@@ -581,7 +599,9 @@ CalculatedFloat CalculateFromParsedHexadecimal(
const strings_internal::ParsedFloat& parsed_hex) {
uint64_t mantissa = parsed_hex.mantissa;
int exponent = parsed_hex.exponent;
- int mantissa_width = bit_width(mantissa);
+ // This static_cast is only needed when using a std::bit_width()
+ // implementation that does not have the fix for LWG 3656 applied.
+ int mantissa_width = static_cast<int>(bit_width(mantissa));
const int shift = NormalizedShiftSize<FloatType>(mantissa_width, exponent);
bool result_exact;
exponent += shift;
diff --git a/absl/strings/charconv.h b/absl/strings/charconv.h
index 7c509812..111c7120 100644
--- a/absl/strings/charconv.h
+++ b/absl/strings/charconv.h
@@ -22,7 +22,7 @@
namespace absl {
ABSL_NAMESPACE_BEGIN
-// Workalike compatibilty version of std::chars_format from C++17.
+// Workalike compatibility version of std::chars_format from C++17.
//
// This is an bitfield enumerator which can be passed to absl::from_chars to
// configure the string-to-float conversion.
@@ -48,7 +48,7 @@ struct from_chars_result {
std::errc ec;
};
-// Workalike compatibilty version of std::from_chars from C++17. Currently
+// Workalike compatibility version of std::from_chars from C++17. Currently
// this only supports the `double` and `float` types.
//
// This interface incorporates the proposed resolutions for library issues
diff --git a/absl/strings/cord.cc b/absl/strings/cord.cc
index 66f45fef..14976aef 100644
--- a/absl/strings/cord.cc
+++ b/absl/strings/cord.cc
@@ -35,6 +35,7 @@
#include "absl/base/port.h"
#include "absl/container/fixed_array.h"
#include "absl/container/inlined_vector.h"
+#include "absl/crc/internal/crc_cord_state.h"
#include "absl/strings/cord_buffer.h"
#include "absl/strings/escaping.h"
#include "absl/strings/internal/cord_data_edge.h"
@@ -47,7 +48,6 @@
#include "absl/strings/internal/cordz_update_tracker.h"
#include "absl/strings/internal/resize_uninitialized.h"
#include "absl/strings/str_cat.h"
-#include "absl/strings/str_format.h"
#include "absl/strings/str_join.h"
#include "absl/strings/string_view.h"
@@ -167,9 +167,7 @@ constexpr unsigned char Cord::InlineRep::kMaxInline;
inline void Cord::InlineRep::set_data(const char* data, size_t n) {
static_assert(kMaxInline == 15, "set_data is hard-coded for a length of 15");
-
- cord_internal::SmallMemmove<true>(data_.as_chars(), data, n);
- set_inline_size(n);
+ data_.set_inline_data(data, n);
}
inline char* Cord::InlineRep::set_data(size_t n) {
@@ -420,6 +418,7 @@ Cord& Cord::operator=(absl::string_view src) {
// we keep it here to make diffs easier.
void Cord::InlineRep::AppendArray(absl::string_view src,
MethodIdentifier method) {
+ MaybeRemoveEmptyCrcNode();
if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined.
size_t appended = 0;
@@ -437,8 +436,8 @@ void Cord::InlineRep::AppendArray(absl::string_view src,
size_t inline_length = inline_size();
if (src.size() <= kMaxInline - inline_length) {
// Append new data to embedded array
- memcpy(data_.as_chars() + inline_length, src.data(), src.size());
set_inline_size(inline_length + src.size());
+ memcpy(data_.as_chars() + inline_length, src.data(), src.size());
return;
}
@@ -479,6 +478,10 @@ inline CordRep* Cord::TakeRep() && {
template <typename C>
inline void Cord::AppendImpl(C&& src) {
auto constexpr method = CordzUpdateTracker::kAppendCord;
+
+ contents_.MaybeRemoveEmptyCrcNode();
+ if (src.empty()) return;
+
if (empty()) {
// Since destination is empty, we can avoid allocating a node,
if (src.contents_.is_tree()) {
@@ -591,6 +594,9 @@ void Cord::Append(T&& src) {
template void Cord::Append(std::string&& src);
void Cord::Prepend(const Cord& src) {
+ contents_.MaybeRemoveEmptyCrcNode();
+ if (src.empty()) return;
+
CordRep* src_tree = src.contents_.tree();
if (src_tree != nullptr) {
CordRep::Ref(src_tree);
@@ -605,15 +611,17 @@ void Cord::Prepend(const Cord& src) {
}
void Cord::PrependArray(absl::string_view src, MethodIdentifier method) {
+ contents_.MaybeRemoveEmptyCrcNode();
if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined.
+
if (!contents_.is_tree()) {
size_t cur_size = contents_.inline_size();
if (cur_size + src.size() <= InlineRep::kMaxInline) {
// Use embedded storage.
InlineData data;
+ data.set_inline_size(cur_size + src.size());
memcpy(data.as_chars(), src.data(), src.size());
memcpy(data.as_chars() + src.size(), contents_.data(), cur_size);
- data.set_inline_size(cur_size + src.size());
contents_.data_ = data;
return;
}
@@ -627,8 +635,8 @@ void Cord::AppendPrecise(absl::string_view src, MethodIdentifier method) {
assert(src.size() <= cord_internal::kMaxFlatLength);
if (contents_.remaining_inline_capacity() >= src.size()) {
const size_t inline_length = contents_.inline_size();
- memcpy(contents_.data_.as_chars() + inline_length, src.data(), src.size());
contents_.set_inline_size(inline_length + src.size());
+ memcpy(contents_.data_.as_chars() + inline_length, src.data(), src.size());
} else {
contents_.AppendTree(CordRepFlat::Create(src), method);
}
@@ -640,9 +648,9 @@ void Cord::PrependPrecise(absl::string_view src, MethodIdentifier method) {
if (contents_.remaining_inline_capacity() >= src.size()) {
const size_t cur_size = contents_.inline_size();
InlineData data;
+ data.set_inline_size(cur_size + src.size());
memcpy(data.as_chars(), src.data(), src.size());
memcpy(data.as_chars() + src.size(), contents_.data(), cur_size);
- data.set_inline_size(cur_size + src.size());
contents_.data_ = data;
} else {
contents_.PrependTree(CordRepFlat::Create(src), method);
@@ -665,6 +673,7 @@ void Cord::RemovePrefix(size_t n) {
ABSL_INTERNAL_CHECK(n <= size(),
absl::StrCat("Requested prefix size ", n,
" exceeds Cord's size ", size()));
+ contents_.MaybeRemoveEmptyCrcNode();
CordRep* tree = contents_.tree();
if (tree == nullptr) {
contents_.remove_prefix(n);
@@ -695,6 +704,7 @@ void Cord::RemoveSuffix(size_t n) {
ABSL_INTERNAL_CHECK(n <= size(),
absl::StrCat("Requested suffix size ", n,
" exceeds Cord's size ", size()));
+ contents_.MaybeRemoveEmptyCrcNode();
CordRep* tree = contents_.tree();
if (tree == nullptr) {
contents_.reduce_size(n);
@@ -733,6 +743,7 @@ Cord Cord::Subcord(size_t pos, size_t new_size) const {
}
if (new_size <= InlineRep::kMaxInline) {
+ sub_cord.contents_.set_inline_size(new_size);
char* dest = sub_cord.contents_.data_.as_chars();
Cord::ChunkIterator it = chunk_begin();
it.AdvanceBytes(pos);
@@ -744,7 +755,6 @@ Cord Cord::Subcord(size_t pos, size_t new_size) const {
++it;
}
cord_internal::SmallMemmove(dest, it->data(), remaining_size);
- sub_cord.contents_.set_inline_size(new_size);
return sub_cord;
}
@@ -784,7 +794,7 @@ int CompareChunks(absl::string_view* lhs, absl::string_view* rhs,
}
// This overload set computes comparison results from memcmp result. This
-// interface is used inside GenericCompare below. Differet implementations
+// interface is used inside GenericCompare below. Different implementations
// are specialized for int and bool. For int we clamp result to {-1, 0, 1}
// set. For bool we just interested in "value == 0".
template <typename ResultType>
@@ -842,26 +852,44 @@ inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const {
return absl::string_view(node->external()->base + offset, length);
}
-void Cord::SetExpectedChecksum(uint32_t crc) {
+void Cord::SetCrcCordState(crc_internal::CrcCordState state) {
auto constexpr method = CordzUpdateTracker::kSetExpectedChecksum;
- if (empty()) return;
-
- if (!contents_.is_tree()) {
+ if (empty()) {
+ contents_.MaybeRemoveEmptyCrcNode();
+ CordRep* rep = CordRepCrc::New(nullptr, std::move(state));
+ contents_.EmplaceTree(rep, method);
+ } else if (!contents_.is_tree()) {
CordRep* rep = contents_.MakeFlatWithExtraCapacity(0);
- rep = CordRepCrc::New(rep, crc);
+ rep = CordRepCrc::New(rep, std::move(state));
contents_.EmplaceTree(rep, method);
} else {
const CordzUpdateScope scope(contents_.data_.cordz_info(), method);
- CordRep* rep = CordRepCrc::New(contents_.data_.as_tree(), crc);
+ CordRep* rep = CordRepCrc::New(contents_.data_.as_tree(), std::move(state));
contents_.SetTree(rep, scope);
}
}
+void Cord::SetExpectedChecksum(uint32_t crc) {
+ // Construct a CrcCordState with a single chunk.
+ crc_internal::CrcCordState state;
+ state.mutable_rep()->prefix_crc.push_back(
+ crc_internal::CrcCordState::PrefixCrc(size(), absl::crc32c_t{crc}));
+ SetCrcCordState(std::move(state));
+}
+
+const crc_internal::CrcCordState* Cord::MaybeGetCrcCordState() const {
+ if (!contents_.is_tree() || !contents_.tree()->IsCrc()) {
+ return nullptr;
+ }
+ return &contents_.tree()->crc()->crc_cord_state;
+}
+
absl::optional<uint32_t> Cord::ExpectedChecksum() const {
if (!contents_.is_tree() || !contents_.tree()->IsCrc()) {
return absl::nullopt;
}
- return contents_.tree()->crc()->crc;
+ return static_cast<uint32_t>(
+ contents_.tree()->crc()->crc_cord_state.Checksum());
}
inline int Cord::CompareSlowPath(absl::string_view rhs, size_t compared_size,
@@ -929,6 +957,7 @@ inline int Cord::CompareSlowPath(const Cord& rhs, size_t compared_size,
}
inline absl::string_view Cord::GetFirstChunk(const Cord& c) {
+ if (c.empty()) return {};
return c.contents_.FindFlatStartPiece();
}
inline absl::string_view Cord::GetFirstChunk(absl::string_view sv) {
@@ -1166,6 +1195,10 @@ absl::string_view Cord::FlattenSlowPath() {
/* static */ bool Cord::GetFlatAux(CordRep* rep, absl::string_view* fragment) {
assert(rep != nullptr);
+ if (rep->length == 0) {
+ *fragment = absl::string_view();
+ return true;
+ }
rep = cord_internal::SkipCrcNode(rep);
if (rep->IsFlat()) {
*fragment = absl::string_view(rep->flat()->Data(), rep->length);
@@ -1197,6 +1230,7 @@ absl::string_view Cord::FlattenSlowPath() {
absl::cord_internal::CordRep* rep,
absl::FunctionRef<void(absl::string_view)> callback) {
assert(rep != nullptr);
+ if (rep->length == 0) return;
rep = cord_internal::SkipCrcNode(rep);
if (rep->IsBtree()) {
@@ -1230,8 +1264,12 @@ static void DumpNode(CordRep* rep, bool include_data, std::ostream* os,
if (include_data) *os << static_cast<void*>(rep);
*os << "]";
*os << " " << std::setw(indent) << "";
- if (rep->IsCrc()) {
- *os << "CRC crc=" << rep->crc()->crc << "\n";
+ bool leaf = false;
+ if (rep == nullptr) {
+ *os << "NULL\n";
+ leaf = true;
+ } else if (rep->IsCrc()) {
+ *os << "CRC crc=" << rep->crc()->crc_cord_state.Checksum() << "\n";
indent += kIndentStep;
rep = rep->crc()->child;
} else if (rep->IsSubstring()) {
@@ -1239,6 +1277,7 @@ static void DumpNode(CordRep* rep, bool include_data, std::ostream* os,
indent += kIndentStep;
rep = rep->substring()->child;
} else { // Leaf or ring
+ leaf = true;
if (rep->IsExternal()) {
*os << "EXTERNAL [";
if (include_data)
@@ -1252,6 +1291,8 @@ static void DumpNode(CordRep* rep, bool include_data, std::ostream* os,
} else {
CordRepBtree::Dump(rep, /*label=*/ "", include_data, *os);
}
+ }
+ if (leaf) {
if (stack.empty()) break;
rep = stack.back();
stack.pop_back();
@@ -1297,11 +1338,14 @@ static bool VerifyNode(CordRep* root, CordRep* start_node,
node->substring()->child->length,
ReportError(root, node));
} else if (node->IsCrc()) {
- ABSL_INTERNAL_CHECK(node->crc()->child != nullptr,
- ReportError(root, node));
- ABSL_INTERNAL_CHECK(node->crc()->length == node->crc()->child->length,
- ReportError(root, node));
- worklist.push_back(node->crc()->child);
+ ABSL_INTERNAL_CHECK(
+ node->crc()->child != nullptr || node->crc()->length == 0,
+ ReportError(root, node));
+ if (node->crc()->child != nullptr) {
+ ABSL_INTERNAL_CHECK(node->crc()->length == node->crc()->child->length,
+ ReportError(root, node));
+ worklist.push_back(node->crc()->child);
+ }
}
} while (!worklist.empty());
return true;
diff --git a/absl/strings/cord.h b/absl/strings/cord.h
index 88e1c85d..f5a2da97 100644
--- a/absl/strings/cord.h
+++ b/absl/strings/cord.h
@@ -76,6 +76,7 @@
#include "absl/base/macros.h"
#include "absl/base/port.h"
#include "absl/container/inlined_vector.h"
+#include "absl/crc/internal/crc_cord_state.h"
#include "absl/functional/function_ref.h"
#include "absl/meta/type_traits.h"
#include "absl/strings/cord_analysis.h"
@@ -660,7 +661,7 @@ class Cord {
class CharRange {
public:
// Fulfill minimum c++ container requirements [container.requirements]
- // Theses (partial) container type definitions allow CharRange to be used
+ // These (partial) container type definitions allow CharRange to be used
// in various utilities expecting a subset of [container.requirements].
// For example, the below enables using `::testing::ElementsAre(...)`
using value_type = char;
@@ -814,7 +815,7 @@ class Cord {
InlineRep& operator=(const InlineRep& src);
InlineRep& operator=(InlineRep&& src) noexcept;
- explicit constexpr InlineRep(cord_internal::InlineData data);
+ explicit constexpr InlineRep(absl::string_view sv, CordRep* rep);
void Swap(InlineRep* rhs);
bool empty() const;
@@ -873,15 +874,14 @@ class Cord {
void PrependTreeToTree(CordRep* tree, MethodIdentifier method);
void PrependTree(CordRep* tree, MethodIdentifier method);
- bool IsSame(const InlineRep& other) const {
- return memcmp(&data_, &other.data_, sizeof(data_)) == 0;
- }
+ bool IsSame(const InlineRep& other) const { return data_ == other.data_; }
+
void CopyTo(std::string* dst) const {
// memcpy is much faster when operating on a known size. On most supported
// platforms, the small string optimization is large enough that resizing
// to 15 bytes does not cause a memory allocation.
absl::strings_internal::STLStringResizeUninitialized(dst, kMaxInline);
- memcpy(&(*dst)[0], data_.as_chars(), kMaxInline);
+ data_.copy_max_inline_to(&(*dst)[0]);
// erase is faster than resize because the logic for memory allocation is
// not needed.
dst->erase(inline_size());
@@ -926,6 +926,13 @@ class Cord {
void set_inline_size(size_t size) { data_.set_inline_size(size); }
size_t inline_size() const { return data_.inline_size(); }
+ // Empty cords that carry a checksum have a CordRepCrc node with a null
+ // child node. The code can avoid lots of special cases where it would
+ // otherwise transition from tree to inline storage if we just remove the
+ // CordRepCrc node before mutations. Must never be called inside a
+ // CordzUpdateScope since it untracks the cordz info.
+ void MaybeRemoveEmptyCrcNode();
+
cord_internal::InlineData data_;
};
InlineRep contents_;
@@ -995,6 +1002,10 @@ class Cord {
});
return H::combine(combiner.finalize(std::move(hash_state)), size());
}
+
+ friend class CrcCord;
+ void SetCrcCordState(crc_internal::CrcCordState state);
+ const crc_internal::CrcCordState* MaybeGetCrcCordState() const;
};
ABSL_NAMESPACE_END
@@ -1011,46 +1022,6 @@ extern std::ostream& operator<<(std::ostream& out, const Cord& cord);
namespace cord_internal {
-// Fast implementation of memmove for up to 15 bytes. This implementation is
-// safe for overlapping regions. If nullify_tail is true, the destination is
-// padded with '\0' up to 15 bytes.
-template <bool nullify_tail = false>
-inline void SmallMemmove(char* dst, const char* src, size_t n) {
- if (n >= 8) {
- assert(n <= 15);
- uint64_t buf1;
- uint64_t buf2;
- memcpy(&buf1, src, 8);
- memcpy(&buf2, src + n - 8, 8);
- if (nullify_tail) {
- memset(dst + 7, 0, 8);
- }
- memcpy(dst, &buf1, 8);
- memcpy(dst + n - 8, &buf2, 8);
- } else if (n >= 4) {
- uint32_t buf1;
- uint32_t buf2;
- memcpy(&buf1, src, 4);
- memcpy(&buf2, src + n - 4, 4);
- if (nullify_tail) {
- memset(dst + 4, 0, 4);
- memset(dst + 7, 0, 8);
- }
- memcpy(dst, &buf1, 4);
- memcpy(dst + n - 4, &buf2, 4);
- } else {
- if (n != 0) {
- dst[0] = src[0];
- dst[n / 2] = src[n / 2];
- dst[n - 1] = src[n - 1];
- }
- if (nullify_tail) {
- memset(dst + 7, 0, 8);
- memset(dst + n, 0, 8);
- }
- }
-}
-
// Does non-template-specific `CordRepExternal` initialization.
// Requires `data` to be non-empty.
void InitializeCordRepExternal(absl::string_view data, CordRepExternal* rep);
@@ -1094,8 +1065,8 @@ Cord MakeCordFromExternal(absl::string_view data, Releaser&& releaser) {
return cord;
}
-constexpr Cord::InlineRep::InlineRep(cord_internal::InlineData data)
- : data_(data) {}
+constexpr Cord::InlineRep::InlineRep(absl::string_view sv, CordRep* rep)
+ : data_(sv, rep) {}
inline Cord::InlineRep::InlineRep(const Cord::InlineRep& src)
: data_(InlineData::kDefaultInit) {
@@ -1174,7 +1145,7 @@ inline cord_internal::CordRepFlat* Cord::InlineRep::MakeFlatWithExtraCapacity(
size_t len = data_.inline_size();
auto* result = CordRepFlat::New(len + extra);
result->length = len;
- memcpy(result->Data(), data_.as_chars(), InlineRep::kMaxInline);
+ data_.copy_max_inline_to(result->Data());
return result;
}
@@ -1236,6 +1207,18 @@ inline void Cord::InlineRep::CopyToArray(char* dst) const {
cord_internal::SmallMemmove(dst, data_.as_chars(), n);
}
+inline void Cord::InlineRep::MaybeRemoveEmptyCrcNode() {
+ CordRep* rep = tree();
+ if (rep == nullptr || ABSL_PREDICT_TRUE(rep->length > 0)) {
+ return;
+ }
+ assert(rep->IsCrc());
+ assert(rep->crc()->child == nullptr);
+ CordzInfo::MaybeUntrackCord(cordz_info());
+ CordRep::Unref(rep);
+ ResetToEmpty();
+}
+
constexpr inline Cord::Cord() noexcept {}
inline Cord::Cord(absl::string_view src)
@@ -1243,13 +1226,12 @@ inline Cord::Cord(absl::string_view src)
template <typename T>
constexpr Cord::Cord(strings_internal::StringConstant<T>)
- : contents_(strings_internal::StringConstant<T>::value.size() <=
+ : contents_(strings_internal::StringConstant<T>::value,
+ strings_internal::StringConstant<T>::value.size() <=
cord_internal::kMaxInline
- ? cord_internal::InlineData(
- strings_internal::StringConstant<T>::value)
- : cord_internal::InlineData(
- &cord_internal::ConstInitExternalStorage<
- strings_internal::StringConstant<T>>::value)) {}
+ ? nullptr
+ : &cord_internal::ConstInitExternalStorage<
+ strings_internal::StringConstant<T>>::value) {}
inline Cord& Cord::operator=(const Cord& x) {
contents_ = x.contents_;
@@ -1285,7 +1267,7 @@ inline size_t Cord::size() const {
return contents_.size();
}
-inline bool Cord::empty() const { return contents_.empty(); }
+inline bool Cord::empty() const { return size() == 0; }
inline size_t Cord::EstimatedMemoryUsage(
CordMemoryAccounting accounting_method) const {
@@ -1411,7 +1393,11 @@ inline Cord::ChunkIterator::ChunkIterator(cord_internal::CordRep* tree) {
inline Cord::ChunkIterator::ChunkIterator(const Cord* cord) {
if (CordRep* tree = cord->contents_.tree()) {
bytes_remaining_ = tree->length;
- InitTree(tree);
+ if (ABSL_PREDICT_TRUE(bytes_remaining_ != 0)) {
+ InitTree(tree);
+ } else {
+ current_chunk_ = {};
+ }
} else {
bytes_remaining_ = cord->contents_.inline_size();
current_chunk_ = {cord->contents_.data(), bytes_remaining_};
@@ -1580,7 +1566,7 @@ inline void Cord::ForEachChunk(
if (rep == nullptr) {
callback(absl::string_view(contents_.data(), contents_.size()));
} else {
- return ForEachChunkAux(rep, callback);
+ ForEachChunkAux(rep, callback);
}
}
diff --git a/absl/strings/cord_buffer.h b/absl/strings/cord_buffer.h
index 15494b31..bc0e4e45 100644
--- a/absl/strings/cord_buffer.h
+++ b/absl/strings/cord_buffer.h
@@ -160,7 +160,6 @@ class CordBuffer {
// for more information on buffer capacities and intended usage.
static CordBuffer CreateWithDefaultLimit(size_t capacity);
-
// CordBuffer::CreateWithCustomLimit()
//
// Creates a CordBuffer instance of the desired `capacity` rounded to an
@@ -336,7 +335,7 @@ class CordBuffer {
}
// Returns the available area of the internal SSO data
- absl::Span<char> long_available() {
+ absl::Span<char> long_available() const {
assert(!is_short());
const size_t length = long_rep.rep->length;
return absl::Span<char>(long_rep.rep->Data() + length,
@@ -460,9 +459,7 @@ inline constexpr size_t CordBuffer::MaximumPayload() {
}
inline constexpr size_t CordBuffer::MaximumPayload(size_t block_size) {
- // TODO(absl-team): Use std::min when C++11 support is dropped.
- return (kCustomLimit < block_size ? kCustomLimit : block_size) -
- cord_internal::kFlatOverhead;
+ return (std::min)(kCustomLimit, block_size) - cord_internal::kFlatOverhead;
}
inline CordBuffer CordBuffer::CreateWithDefaultLimit(size_t capacity) {
diff --git a/absl/strings/cord_test.cc b/absl/strings/cord_test.cc
index 1fc4be6e..3fe3967f 100644
--- a/absl/strings/cord_test.cc
+++ b/absl/strings/cord_test.cc
@@ -58,6 +58,8 @@ using absl::cord_internal::CordRepSubstring;
using absl::cord_internal::CordzUpdateTracker;
using absl::cord_internal::kFlatOverhead;
using absl::cord_internal::kMaxFlatLength;
+using ::testing::ElementsAre;
+using ::testing::Le;
static std::string RandomLowercaseString(RandomEngine* rng);
static std::string RandomLowercaseString(RandomEngine* rng, size_t length);
@@ -618,7 +620,7 @@ TEST_P(CordTest, AppendEmptyBufferToTree) {
TEST_P(CordTest, AppendSmallBuffer) {
absl::Cord cord;
absl::CordBuffer buffer = absl::CordBuffer::CreateWithDefaultLimit(3);
- ASSERT_THAT(buffer.capacity(), ::testing::Le(15));
+ ASSERT_THAT(buffer.capacity(), Le(15));
memcpy(buffer.data(), "Abc", 3);
buffer.SetLength(3);
cord.Append(std::move(buffer));
@@ -632,7 +634,7 @@ TEST_P(CordTest, AppendSmallBuffer) {
EXPECT_EQ(buffer.length(), 0); // NOLINT
EXPECT_GT(buffer.capacity(), 0); // NOLINT
- EXPECT_THAT(cord.Chunks(), ::testing::ElementsAre("Abcdefgh"));
+ EXPECT_THAT(cord.Chunks(), ElementsAre("Abcdefgh"));
}
TEST_P(CordTest, AppendAndPrependBufferArePrecise) {
@@ -671,7 +673,7 @@ TEST_P(CordTest, AppendAndPrependBufferArePrecise) {
TEST_P(CordTest, PrependSmallBuffer) {
absl::Cord cord;
absl::CordBuffer buffer = absl::CordBuffer::CreateWithDefaultLimit(3);
- ASSERT_THAT(buffer.capacity(), ::testing::Le(15));
+ ASSERT_THAT(buffer.capacity(), Le(15));
memcpy(buffer.data(), "Abc", 3);
buffer.SetLength(3);
cord.Prepend(std::move(buffer));
@@ -685,7 +687,7 @@ TEST_P(CordTest, PrependSmallBuffer) {
EXPECT_EQ(buffer.length(), 0); // NOLINT
EXPECT_GT(buffer.capacity(), 0); // NOLINT
- EXPECT_THAT(cord.Chunks(), ::testing::ElementsAre("defghAbc"));
+ EXPECT_THAT(cord.Chunks(), ElementsAre("defghAbc"));
}
TEST_P(CordTest, AppendLargeBuffer) {
@@ -707,7 +709,7 @@ TEST_P(CordTest, AppendLargeBuffer) {
EXPECT_EQ(buffer.length(), 0); // NOLINT
EXPECT_GT(buffer.capacity(), 0); // NOLINT
- EXPECT_THAT(cord.Chunks(), ::testing::ElementsAre(s1, s2));
+ EXPECT_THAT(cord.Chunks(), ElementsAre(s1, s2));
}
TEST_P(CordTest, PrependLargeBuffer) {
@@ -729,7 +731,7 @@ TEST_P(CordTest, PrependLargeBuffer) {
EXPECT_EQ(buffer.length(), 0); // NOLINT
EXPECT_GT(buffer.capacity(), 0); // NOLINT
- EXPECT_THAT(cord.Chunks(), ::testing::ElementsAre(s2, s1));
+ EXPECT_THAT(cord.Chunks(), ElementsAre(s2, s1));
}
class CordAppendBufferTest : public testing::TestWithParam<bool> {
@@ -1988,6 +1990,12 @@ TEST_P(CordTest, HugeCord) {
// Tests that Append() works ok when handed a self reference
TEST_P(CordTest, AppendSelf) {
+ // Test the empty case.
+ absl::Cord empty;
+ MaybeHarden(empty);
+ empty.Append(empty);
+ ASSERT_EQ(empty, "");
+
// We run the test until data is ~16K
// This guarantees it covers small, medium and large data.
std::string control_data = "Abc";
@@ -2712,7 +2720,7 @@ class CordMutator {
// clang-format off
// This array is constant-initialized in conformant compilers.
-CordMutator cord_mutators[] ={
+CordMutator cord_mutators[] = {
{"clear", [](absl::Cord& c) { c.Clear(); }},
{"overwrite", [](absl::Cord& c) { c = "overwritten"; }},
{
@@ -2742,6 +2750,25 @@ CordMutator cord_mutators[] ={
[](absl::Cord& c) { c.RemoveSuffix(c.size() / 2); }
},
{
+ "append empty string",
+ [](absl::Cord& c) { c.Append(""); },
+ [](absl::Cord& c) { }
+ },
+ {
+ "append empty cord",
+ [](absl::Cord& c) { c.Append(absl::Cord()); },
+ [](absl::Cord& c) { }
+ },
+ {
+ "append empty checksummed cord",
+ [](absl::Cord& c) {
+ absl::Cord to_append;
+ to_append.SetExpectedChecksum(999);
+ c.Append(to_append);
+ },
+ [](absl::Cord& c) { }
+ },
+ {
"prepend string",
[](absl::Cord& c) { c.Prepend("9876543210"); },
[](absl::Cord& c) { c.RemovePrefix(10); }
@@ -2763,12 +2790,33 @@ CordMutator cord_mutators[] ={
[](absl::Cord& c) { c.RemovePrefix(10); }
},
{
+ "prepend empty string",
+ [](absl::Cord& c) { c.Prepend(""); },
+ [](absl::Cord& c) { }
+ },
+ {
+ "prepend empty cord",
+ [](absl::Cord& c) { c.Prepend(absl::Cord()); },
+ [](absl::Cord& c) { }
+ },
+ {
+ "prepend empty checksummed cord",
+ [](absl::Cord& c) {
+ absl::Cord to_prepend;
+ to_prepend.SetExpectedChecksum(999);
+ c.Prepend(to_prepend);
+ },
+ [](absl::Cord& c) { }
+ },
+ {
"prepend self",
[](absl::Cord& c) { c.Prepend(c); },
[](absl::Cord& c) { c.RemovePrefix(c.size() / 2); }
},
- {"remove prefix", [](absl::Cord& c) { c.RemovePrefix(2); }},
- {"remove suffix", [](absl::Cord& c) { c.RemoveSuffix(2); }},
+ {"remove prefix", [](absl::Cord& c) { c.RemovePrefix(c.size() / 2); }},
+ {"remove suffix", [](absl::Cord& c) { c.RemoveSuffix(c.size() / 2); }},
+ {"remove 0-prefix", [](absl::Cord& c) { c.RemovePrefix(0); }},
+ {"remove 0-suffix", [](absl::Cord& c) { c.RemoveSuffix(0); }},
{"subcord", [](absl::Cord& c) { c = c.Subcord(1, c.size() - 2); }},
{
"swap inline",
@@ -2810,6 +2858,12 @@ TEST_P(CordTest, ExpectedChecksum) {
EXPECT_EQ(c1.ExpectedChecksum().value_or(0), 12345);
EXPECT_EQ(c1, base_value);
+ // Test that setting an expected checksum again doesn't crash or leak
+ // memory.
+ c1.SetExpectedChecksum(12345);
+ EXPECT_EQ(c1.ExpectedChecksum().value_or(0), 12345);
+ EXPECT_EQ(c1, base_value);
+
// CRC persists through copies, assignments, and moves:
absl::Cord c1_copy_construct = c1;
EXPECT_EQ(c1_copy_construct.ExpectedChecksum().value_or(0), 12345);
@@ -2834,6 +2888,13 @@ TEST_P(CordTest, ExpectedChecksum) {
c2.SetExpectedChecksum(24680);
mutator.Mutate(c2);
+
+ if (c1 == c2) {
+ // Not a mutation (for example, appending the empty string).
+ // Whether the checksum is removed is not defined.
+ continue;
+ }
+
EXPECT_EQ(c2.ExpectedChecksum(), absl::nullopt);
if (mutator.CanUndo()) {
@@ -2903,3 +2964,164 @@ TEST_P(CordTest, ExpectedChecksum) {
}
}
}
+
+// Test the special cases encountered with an empty checksummed cord.
+TEST_P(CordTest, ChecksummedEmptyCord) {
+ absl::Cord c1;
+ EXPECT_FALSE(c1.ExpectedChecksum().has_value());
+
+ // Setting an expected checksum works.
+ c1.SetExpectedChecksum(12345);
+ EXPECT_EQ(c1.ExpectedChecksum().value_or(0), 12345);
+ EXPECT_EQ(c1, "");
+ EXPECT_TRUE(c1.empty());
+
+ // Test that setting an expected checksum again doesn't crash or leak memory.
+ c1.SetExpectedChecksum(12345);
+ EXPECT_EQ(c1.ExpectedChecksum().value_or(0), 12345);
+ EXPECT_EQ(c1, "");
+ EXPECT_TRUE(c1.empty());
+
+ // CRC persists through copies, assignments, and moves:
+ absl::Cord c1_copy_construct = c1;
+ EXPECT_EQ(c1_copy_construct.ExpectedChecksum().value_or(0), 12345);
+
+ absl::Cord c1_copy_assign;
+ c1_copy_assign = c1;
+ EXPECT_EQ(c1_copy_assign.ExpectedChecksum().value_or(0), 12345);
+
+ absl::Cord c1_move(std::move(c1_copy_assign));
+ EXPECT_EQ(c1_move.ExpectedChecksum().value_or(0), 12345);
+
+ EXPECT_EQ(c1.ExpectedChecksum().value_or(0), 12345);
+
+ // A CRC Cord compares equal to its non-CRC value.
+ EXPECT_EQ(c1, absl::Cord());
+
+ for (const CordMutator& mutator : cord_mutators) {
+ SCOPED_TRACE(mutator.Name());
+
+ // Exercise mutating an empty checksummed cord to catch crashes and exercise
+ // memory sanitizers.
+ absl::Cord c2;
+ c2.SetExpectedChecksum(24680);
+ mutator.Mutate(c2);
+
+ if (c2.empty()) {
+ // Not a mutation
+ continue;
+ }
+ EXPECT_EQ(c2.ExpectedChecksum(), absl::nullopt);
+
+ if (mutator.CanUndo()) {
+ mutator.Undo(c2);
+ }
+ }
+
+ absl::Cord c3;
+ c3.SetExpectedChecksum(999);
+ const absl::Cord& cc3 = c3;
+
+ // Test that all cord reading operations function in the face of an
+ // expected checksum.
+ EXPECT_TRUE(cc3.StartsWith(""));
+ EXPECT_TRUE(cc3.EndsWith(""));
+ EXPECT_TRUE(cc3.empty());
+ EXPECT_EQ(cc3, "");
+ EXPECT_EQ(cc3, absl::Cord());
+ EXPECT_EQ(cc3.size(), 0);
+ EXPECT_EQ(cc3.Compare(absl::Cord()), 0);
+ EXPECT_EQ(cc3.Compare(c1), 0);
+ EXPECT_EQ(cc3.Compare(cc3), 0);
+ EXPECT_EQ(cc3.Compare(""), 0);
+ EXPECT_EQ(cc3.Compare("wxyz"), -1);
+ EXPECT_EQ(cc3.Compare(absl::Cord("wxyz")), -1);
+ EXPECT_EQ(absl::Cord("wxyz").Compare(cc3), 1);
+ EXPECT_EQ(std::string(cc3), "");
+
+ std::string dest;
+ absl::CopyCordToString(cc3, &dest);
+ EXPECT_EQ(dest, "");
+
+ for (absl::string_view chunk : cc3.Chunks()) { // NOLINT(unreachable loop)
+ static_cast<void>(chunk);
+ GTEST_FAIL() << "no chunks expected";
+ }
+ EXPECT_TRUE(cc3.chunk_begin() == cc3.chunk_end());
+
+ for (char ch : cc3.Chars()) { // NOLINT(unreachable loop)
+ static_cast<void>(ch);
+ GTEST_FAIL() << "no chars expected";
+ }
+ EXPECT_TRUE(cc3.char_begin() == cc3.char_end());
+
+ EXPECT_EQ(cc3.TryFlat(), "");
+ EXPECT_EQ(absl::HashOf(c3), absl::HashOf(absl::Cord()));
+ EXPECT_EQ(absl::HashOf(c3), absl::HashOf(absl::string_view()));
+}
+
+#if defined(GTEST_HAS_DEATH_TEST) && defined(ABSL_INTERNAL_CORD_HAVE_SANITIZER)
+
+// Returns an expected poison / uninitialized death message expression.
+const char* MASanDeathExpr() {
+ return "(use-after-poison|use-of-uninitialized-value)";
+}
+
+TEST(CordSanitizerTest, SanitizesEmptyCord) {
+ absl::Cord cord;
+ const char* data = cord.Flatten().data();
+ EXPECT_DEATH(EXPECT_EQ(data[0], 0), MASanDeathExpr());
+}
+
+TEST(CordSanitizerTest, SanitizesSmallCord) {
+ absl::Cord cord("Hello");
+ const char* data = cord.Flatten().data();
+ EXPECT_DEATH(EXPECT_EQ(data[5], 0), MASanDeathExpr());
+}
+
+TEST(CordSanitizerTest, SanitizesCordOnSetSSOValue) {
+ absl::Cord cord("String that is too big to be an SSO value");
+ cord = "Hello";
+ const char* data = cord.Flatten().data();
+ EXPECT_DEATH(EXPECT_EQ(data[5], 0), MASanDeathExpr());
+}
+
+TEST(CordSanitizerTest, SanitizesCordOnCopyCtor) {
+ absl::Cord src("hello");
+ absl::Cord dst(src);
+ const char* data = dst.Flatten().data();
+ EXPECT_DEATH(EXPECT_EQ(data[5], 0), MASanDeathExpr());
+}
+
+TEST(CordSanitizerTest, SanitizesCordOnMoveCtor) {
+ absl::Cord src("hello");
+ absl::Cord dst(std::move(src));
+ const char* data = dst.Flatten().data();
+ EXPECT_DEATH(EXPECT_EQ(data[5], 0), MASanDeathExpr());
+}
+
+TEST(CordSanitizerTest, SanitizesCordOnAssign) {
+ absl::Cord src("hello");
+ absl::Cord dst;
+ dst = src;
+ const char* data = dst.Flatten().data();
+ EXPECT_DEATH(EXPECT_EQ(data[5], 0), MASanDeathExpr());
+}
+
+TEST(CordSanitizerTest, SanitizesCordOnMoveAssign) {
+ absl::Cord src("hello");
+ absl::Cord dst;
+ dst = std::move(src);
+ const char* data = dst.Flatten().data();
+ EXPECT_DEATH(EXPECT_EQ(data[5], 0), MASanDeathExpr());
+}
+
+TEST(CordSanitizerTest, SanitizesCordOnSsoAssign) {
+ absl::Cord src("hello");
+ absl::Cord dst("String that is too big to be an SSO value");
+ dst = src;
+ const char* data = dst.Flatten().data();
+ EXPECT_DEATH(EXPECT_EQ(data[5], 0), MASanDeathExpr());
+}
+
+#endif // GTEST_HAS_DEATH_TEST && ABSL_INTERNAL_CORD_HAVE_SANITIZER
diff --git a/absl/strings/cord_test_helpers.h b/absl/strings/cord_test_helpers.h
index 31a1dc89..ca52240a 100644
--- a/absl/strings/cord_test_helpers.h
+++ b/absl/strings/cord_test_helpers.h
@@ -51,7 +51,7 @@ enum class TestCordSize {
// existing inputs rather than copying contents of the input.
kMedium = cord_internal::kMaxFlatLength / 2 + 1,
- // A string value large enough to cause it to be stored in mutliple flats.
+ // A string value large enough to cause it to be stored in multiple flats.
kLarge = cord_internal::kMaxFlatLength * 4
};
diff --git a/absl/strings/escaping.cc b/absl/strings/escaping.cc
index 7d97944e..2827fbaa 100644
--- a/absl/strings/escaping.cc
+++ b/absl/strings/escaping.cc
@@ -443,6 +443,8 @@ void CEscapeAndAppendInternal(absl::string_view src, std::string* dest) {
}
}
+// Reverses the mapping in Base64EscapeInternal; see that method's
+// documentation for details of the mapping.
bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest,
size_t szdest, const signed char* unbase64,
size_t* len) {
@@ -676,7 +678,10 @@ bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest,
return ok;
}
-// The arrays below were generated by the following code
+// The arrays below map base64-escaped characters back to their original values.
+// For the inverse case, see k(WebSafe)Base64Chars in the internal
+// escaping.cc.
+// These arrays were generated by the following inversion code:
// #include <sys/time.h>
// #include <stdlib.h>
// #include <string.h>
@@ -703,8 +708,8 @@ bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest,
// }
// }
//
-// where the value of "Base64[]" was replaced by one of the base-64 conversion
-// tables from the functions below.
+// where the value of "Base64[]" was replaced by one of k(WebSafe)Base64Chars
+// in the internal escaping.cc.
/* clang-format off */
constexpr signed char kUnBase64[] = {
-1, -1, -1, -1, -1, -1, -1, -1,
@@ -777,16 +782,11 @@ constexpr signed char kUnWebSafeBase64[] = {
};
/* clang-format on */
-constexpr char kWebSafeBase64Chars[] =
- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
-
template <typename String>
bool Base64UnescapeInternal(const char* src, size_t slen, String* dest,
const signed char* unbase64) {
// Determine the size of the output string. Base64 encodes every 3 bytes into
- // 4 characters. any leftover chars are added directly for good measure.
- // This is documented in the base64 RFC:
- // https://datatracker.ietf.org/doc/html/rfc3548
+ // 4 characters. Any leftover chars are added directly for good measure.
const size_t dest_len = 3 * (slen / 4) + (slen % 4);
strings_internal::STLStringResizeUninitialized(dest, dest_len);
@@ -882,30 +882,6 @@ std::string Utf8SafeCHexEscape(absl::string_view src) {
return CEscapeInternal(src, true, true);
}
-// ----------------------------------------------------------------------
-// Base64Unescape() - base64 decoder
-// Base64Escape() - base64 encoder
-// WebSafeBase64Unescape() - Google's variation of base64 decoder
-// WebSafeBase64Escape() - Google's variation of base64 encoder
-//
-// Check out
-// https://datatracker.ietf.org/doc/html/rfc2045 for formal description, but
-// what we care about is that...
-// Take the encoded stuff in groups of 4 characters and turn each
-// character into a code 0 to 63 thus:
-// A-Z map to 0 to 25
-// a-z map to 26 to 51
-// 0-9 map to 52 to 61
-// +(- for WebSafe) maps to 62
-// /(_ for WebSafe) maps to 63
-// There will be four numbers, all less than 64 which can be represented
-// by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively).
-// Arrange the 6 digit binary numbers into three bytes as such:
-// aaaaaabb bbbbcccc ccdddddd
-// Equals signs (one or two) are used at the end of the encoded block to
-// indicate that the text was not an integer multiple of three bytes long.
-// ----------------------------------------------------------------------
-
bool Base64Unescape(absl::string_view src, std::string* dest) {
return Base64UnescapeInternal(src.data(), src.size(), dest, kUnBase64);
}
@@ -923,7 +899,7 @@ void Base64Escape(absl::string_view src, std::string* dest) {
void WebSafeBase64Escape(absl::string_view src, std::string* dest) {
strings_internal::Base64EscapeInternal(
reinterpret_cast<const unsigned char*>(src.data()), src.size(), dest,
- false, kWebSafeBase64Chars);
+ false, strings_internal::kWebSafeBase64Chars);
}
std::string Base64Escape(absl::string_view src) {
@@ -938,7 +914,7 @@ std::string WebSafeBase64Escape(absl::string_view src) {
std::string dest;
strings_internal::Base64EscapeInternal(
reinterpret_cast<const unsigned char*>(src.data()), src.size(), &dest,
- false, kWebSafeBase64Chars);
+ false, strings_internal::kWebSafeBase64Chars);
return dest;
}
diff --git a/absl/strings/escaping.h b/absl/strings/escaping.h
index f5ca26c5..bf2a5898 100644
--- a/absl/strings/escaping.h
+++ b/absl/strings/escaping.h
@@ -117,35 +117,40 @@ std::string Utf8SafeCEscape(absl::string_view src);
// conversion.
std::string Utf8SafeCHexEscape(absl::string_view src);
-// Base64Unescape()
-//
-// Converts a `src` string encoded in Base64 to its binary equivalent, writing
-// it to a `dest` buffer, returning `true` on success. If `src` contains invalid
-// characters, `dest` is cleared and returns `false`.
-bool Base64Unescape(absl::string_view src, std::string* dest);
-
-// WebSafeBase64Unescape()
-//
-// Converts a `src` string encoded in Base64 to its binary equivalent, writing
-// it to a `dest` buffer, but using '-' instead of '+', and '_' instead of '/'.
-// If `src` contains invalid characters, `dest` is cleared and returns `false`.
-bool WebSafeBase64Unescape(absl::string_view src, std::string* dest);
-
// Base64Escape()
//
-// Encodes a `src` string into a base64-encoded string, with padding characters.
-// This function conforms with RFC 4648 section 4 (base64).
+// Encodes a `src` string into a base64-encoded 'dest' string with padding
+// characters. This function conforms with RFC 4648 section 4 (base64) and RFC
+// 2045.
void Base64Escape(absl::string_view src, std::string* dest);
std::string Base64Escape(absl::string_view src);
// WebSafeBase64Escape()
//
-// Encodes a `src` string into a base64-like string, using '-' instead of '+'
-// and '_' instead of '/', and without padding. This function conforms with RFC
-// 4648 section 5 (base64url).
+// Encodes a `src` string into a base64 string, like Base64Escape() does, but
+// outputs '-' instead of '+' and '_' instead of '/', and does not pad 'dest'.
+// This function conforms with RFC 4648 section 5 (base64url).
void WebSafeBase64Escape(absl::string_view src, std::string* dest);
std::string WebSafeBase64Escape(absl::string_view src);
+// Base64Unescape()
+//
+// Converts a `src` string encoded in Base64 (RFC 4648 section 4) to its binary
+// equivalent, writing it to a `dest` buffer, returning `true` on success. If
+// `src` contains invalid characters, `dest` is cleared and returns `false`.
+// If padding is included (note that `Base64Escape()` does produce it), it must
+// be correct. In the padding, '=' and '.' are treated identically.
+bool Base64Unescape(absl::string_view src, std::string* dest);
+
+// WebSafeBase64Unescape()
+//
+// Converts a `src` string encoded in "web safe" Base64 (RFC 4648 section 5) to
+// its binary equivalent, writing it to a `dest` buffer. If `src` contains
+// invalid characters, `dest` is cleared and returns `false`. If padding is
+// included (note that `WebSafeBase64Escape()` does not produce it), it must be
+// correct. In the padding, '=' and '.' are treated identically.
+bool WebSafeBase64Unescape(absl::string_view src, std::string* dest);
+
// HexStringToBytes()
//
// Converts an ASCII hex string into bytes, returning binary data of length
diff --git a/absl/strings/escaping_test.cc b/absl/strings/escaping_test.cc
index 45671a0e..9f62c1ee 100644
--- a/absl/strings/escaping_test.cc
+++ b/absl/strings/escaping_test.cc
@@ -562,6 +562,7 @@ template <typename StringType>
void TestEscapeAndUnescape() {
// Check the short strings; this tests the math (and boundaries)
for (const auto& tc : base64_tests) {
+ // Test plain base64.
StringType encoded("this junk should be ignored");
absl::Base64Escape(tc.plaintext, &encoded);
EXPECT_EQ(encoded, tc.cyphertext);
@@ -571,22 +572,26 @@ void TestEscapeAndUnescape() {
EXPECT_TRUE(absl::Base64Unescape(encoded, &decoded));
EXPECT_EQ(decoded, tc.plaintext);
- StringType websafe(tc.cyphertext);
- for (int c = 0; c < websafe.size(); ++c) {
- if ('+' == websafe[c]) websafe[c] = '-';
- if ('/' == websafe[c]) websafe[c] = '_';
+ StringType websafe_with_padding(tc.cyphertext);
+ for (unsigned int c = 0; c < websafe_with_padding.size(); ++c) {
+ if ('+' == websafe_with_padding[c]) websafe_with_padding[c] = '-';
+ if ('/' == websafe_with_padding[c]) websafe_with_padding[c] = '_';
+ // Intentionally keeping padding aka '='.
+ }
+
+ // Test plain websafe (aka without padding).
+ StringType websafe(websafe_with_padding);
+ for (unsigned int c = 0; c < websafe.size(); ++c) {
if ('=' == websafe[c]) {
websafe.resize(c);
break;
}
}
-
encoded = "this junk should be ignored";
absl::WebSafeBase64Escape(tc.plaintext, &encoded);
EXPECT_EQ(encoded, websafe);
EXPECT_EQ(absl::WebSafeBase64Escape(tc.plaintext), websafe);
- // Let's try the string version of the decoder
decoded = "this junk should be ignored";
EXPECT_TRUE(absl::WebSafeBase64Unescape(websafe, &decoded));
EXPECT_EQ(decoded, tc.plaintext);
@@ -617,6 +622,48 @@ TEST(Base64, EscapeAndUnescape) {
TestEscapeAndUnescape<std::string>();
}
+TEST(Base64, Padding) {
+ // Padding is optional.
+ // '.' is an acceptable padding character, just like '='.
+ std::initializer_list<absl::string_view> good_padding = {
+ "YQ",
+ "YQ==",
+ "YQ=.",
+ "YQ.=",
+ "YQ..",
+ };
+ for (absl::string_view b64 : good_padding) {
+ std::string decoded;
+ EXPECT_TRUE(absl::Base64Unescape(b64, &decoded));
+ EXPECT_EQ(decoded, "a");
+ std::string websafe_decoded;
+ EXPECT_TRUE(absl::WebSafeBase64Unescape(b64, &websafe_decoded));
+ EXPECT_EQ(websafe_decoded, "a");
+ }
+ std::initializer_list<absl::string_view> bad_padding = {
+ "YQ=",
+ "YQ.",
+ "YQ===",
+ "YQ==.",
+ "YQ=.=",
+ "YQ=..",
+ "YQ.==",
+ "YQ.=.",
+ "YQ..=",
+ "YQ...",
+ "YQ====",
+ "YQ....",
+ "YQ=====",
+ "YQ.....",
+ };
+ for (absl::string_view b64 : bad_padding) {
+ std::string decoded;
+ EXPECT_FALSE(absl::Base64Unescape(b64, &decoded));
+ std::string websafe_decoded;
+ EXPECT_FALSE(absl::WebSafeBase64Unescape(b64, &websafe_decoded));
+ }
+}
+
TEST(Base64, DISABLED_HugeData) {
const size_t kSize = size_t(3) * 1000 * 1000 * 1000;
static_assert(kSize % 3 == 0, "kSize must be divisible by 3");
diff --git a/absl/strings/internal/char_map.h b/absl/strings/internal/char_map.h
index 5aabc1fc..70a90343 100644
--- a/absl/strings/internal/char_map.h
+++ b/absl/strings/internal/char_map.h
@@ -73,10 +73,10 @@ class Charmap {
}
// Containing all the chars in the C-string 's'.
- // Note that this is expensively recursive because of the C++11 constexpr
- // formulation. Use only in constexpr initializers.
static constexpr Charmap FromString(const char* s) {
- return *s == 0 ? Charmap() : (Char(*s) | FromString(s + 1));
+ Charmap ret;
+ while (*s) ret = ret | Char(*s++);
+ return ret;
}
// Containing all the chars in the closed interval [lo,hi].
diff --git a/absl/strings/internal/charconv_bigint.cc b/absl/strings/internal/charconv_bigint.cc
index 282b639e..46b5289a 100644
--- a/absl/strings/internal/charconv_bigint.cc
+++ b/absl/strings/internal/charconv_bigint.cc
@@ -296,10 +296,8 @@ template <int max_words>
std::min(n / kLargePowerOfFiveStep, kLargestPowerOfFiveIndex);
if (first_pass) {
// just copy, rather than multiplying by 1
- std::copy(
- LargePowerOfFiveData(big_power),
- LargePowerOfFiveData(big_power) + LargePowerOfFiveSize(big_power),
- answer.words_);
+ std::copy_n(LargePowerOfFiveData(big_power),
+ LargePowerOfFiveSize(big_power), answer.words_);
answer.size_ = LargePowerOfFiveSize(big_power);
first_pass = false;
} else {
diff --git a/absl/strings/internal/charconv_bigint.h b/absl/strings/internal/charconv_bigint.h
index 8f702976..5c0c375d 100644
--- a/absl/strings/internal/charconv_bigint.h
+++ b/absl/strings/internal/charconv_bigint.h
@@ -92,7 +92,7 @@ class BigUnsigned {
// numbers with this many decimal digits or fewer are representable by this
// type.
//
- // Analagous to std::numeric_limits<BigUnsigned>::digits10.
+ // Analogous to std::numeric_limits<BigUnsigned>::digits10.
static constexpr int Digits10() {
// 9975007/1035508 is very slightly less than log10(2**32).
return static_cast<uint64_t>(max_words) * 9975007 / 1035508;
@@ -121,7 +121,7 @@ class BigUnsigned {
++size_;
}
}
- std::fill(words_, words_ + word_shift, 0u);
+ std::fill_n(words_, word_shift, 0u);
}
}
@@ -197,7 +197,7 @@ class BigUnsigned {
}
void SetToZero() {
- std::fill(words_, words_ + size_, 0u);
+ std::fill_n(words_, size_, 0u);
size_ = 0;
}
diff --git a/absl/strings/internal/cord_internal.cc b/absl/strings/internal/cord_internal.cc
index b6b06cfa..b7874385 100644
--- a/absl/strings/internal/cord_internal.cc
+++ b/absl/strings/internal/cord_internal.cc
@@ -33,7 +33,6 @@ ABSL_CONST_INIT std::atomic<bool> cord_ring_buffer_enabled(
kCordEnableRingBufferDefault);
ABSL_CONST_INIT std::atomic<bool> shallow_subcords_enabled(
kCordShallowSubcordsDefault);
-ABSL_CONST_INIT std::atomic<bool> cord_btree_exhaustive_validation(false);
void LogFatalNodeType(CordRep* rep) {
ABSL_INTERNAL_LOG(FATAL, absl::StrCat("Unexpected node type: ",
diff --git a/absl/strings/internal/cord_internal.h b/absl/strings/internal/cord_internal.h
index f32fd416..8d9836ba 100644
--- a/absl/strings/internal/cord_internal.h
+++ b/absl/strings/internal/cord_internal.h
@@ -27,9 +27,20 @@
#include "absl/base/internal/invoke.h"
#include "absl/base/optimization.h"
#include "absl/container/internal/compressed_tuple.h"
+#include "absl/container/internal/container_memory.h"
#include "absl/meta/type_traits.h"
#include "absl/strings/string_view.h"
+// We can only add poisoning if we can detect consteval executions.
+#if defined(ABSL_HAVE_CONSTANT_EVALUATED) && \
+ (defined(ABSL_HAVE_ADDRESS_SANITIZER) || \
+ defined(ABSL_HAVE_MEMORY_SANITIZER))
+#define ABSL_INTERNAL_CORD_HAVE_SANITIZER 1
+#endif
+
+#define ABSL_CORD_INTERNAL_NO_SANITIZE \
+ ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY
+
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
@@ -58,12 +69,6 @@ enum CordFeatureDefaults {
extern std::atomic<bool> cord_ring_buffer_enabled;
extern std::atomic<bool> shallow_subcords_enabled;
-// `cord_btree_exhaustive_validation` can be set to force exhaustive validation
-// in debug assertions, and code that calls `IsValid()` explicitly. By default,
-// assertions should be relatively cheap and AssertValid() can easily lead to
-// O(n^2) complexity as recursive / full tree validation is O(n).
-extern std::atomic<bool> cord_btree_exhaustive_validation;
-
inline void enable_cord_ring_buffer(bool enable) {
cord_ring_buffer_enabled.store(enable, std::memory_order_relaxed);
}
@@ -91,6 +96,46 @@ enum Constants {
// Emits a fatal error "Unexpected node type: xyz" and aborts the program.
ABSL_ATTRIBUTE_NORETURN void LogFatalNodeType(CordRep* rep);
+// Fast implementation of memmove for up to 15 bytes. This implementation is
+// safe for overlapping regions. If nullify_tail is true, the destination is
+// padded with '\0' up to 15 bytes.
+template <bool nullify_tail = false>
+inline void SmallMemmove(char* dst, const char* src, size_t n) {
+ if (n >= 8) {
+ assert(n <= 15);
+ uint64_t buf1;
+ uint64_t buf2;
+ memcpy(&buf1, src, 8);
+ memcpy(&buf2, src + n - 8, 8);
+ if (nullify_tail) {
+ memset(dst + 7, 0, 8);
+ }
+ memcpy(dst, &buf1, 8);
+ memcpy(dst + n - 8, &buf2, 8);
+ } else if (n >= 4) {
+ uint32_t buf1;
+ uint32_t buf2;
+ memcpy(&buf1, src, 4);
+ memcpy(&buf2, src + n - 4, 4);
+ if (nullify_tail) {
+ memset(dst + 4, 0, 4);
+ memset(dst + 7, 0, 8);
+ }
+ memcpy(dst, &buf1, 4);
+ memcpy(dst + n - 4, &buf2, 4);
+ } else {
+ if (n != 0) {
+ dst[0] = src[0];
+ dst[n / 2] = src[n / 2];
+ dst[n - 1] = src[n - 1];
+ }
+ if (nullify_tail) {
+ memset(dst + 7, 0, 8);
+ memset(dst + n, 0, 8);
+ }
+ }
+}
+
// Compact class for tracking the reference count and state flags for CordRep
// instances. Data is stored in an atomic int32_t for compactness and speed.
class RefcountAndFlags {
@@ -225,7 +270,11 @@ struct CordRep {
: length(l), refcount(immortal), tag(EXTERNAL), storage{} {}
// The following three fields have to be less than 32 bytes since
- // that is the smallest supported flat node size.
+ // that is the smallest supported flat node size. Some code optimizations rely
+ // on the specific layout of these fields. Notably: the non-trivial field
+ // `refcount` being preceded by `length`, and being tailed by POD data
+ // members only.
+ // # LINT.IfChange
size_t length;
RefcountAndFlags refcount;
// If tag < FLAT, it represents CordRepKind and indicates the type of node.
@@ -241,6 +290,7 @@ struct CordRep {
// allocate room for these in the derived class, as not all compilers reuse
// padding space from the base class (clang and gcc do, MSVC does not, etc)
uint8_t storage[3];
+ // # LINT.ThenChange(cord_rep_btree.h:copy_raw)
// Returns true if this instance's tag matches the requested type.
constexpr bool IsRing() const { return tag == RING; }
@@ -423,25 +473,25 @@ constexpr char GetOrNull(absl::string_view data, size_t pos) {
return pos < data.size() ? data[pos] : '\0';
}
-// We store cordz_info as 64 bit pointer value in big endian format. This
-// guarantees that the least significant byte of cordz_info matches the last
-// byte of the inline data representation in as_chars_, which holds the inlined
+// We store cordz_info as 64 bit pointer value in little endian format. This
+// guarantees that the least significant byte of cordz_info matches the first
+// byte of the inline data representation in `data`, which holds the inlined
// size or the 'is_tree' bit.
using cordz_info_t = int64_t;
// Assert that the `cordz_info` pointer value perfectly overlaps the last half
-// of `as_chars_` and can hold a pointer value.
+// of `data` and can hold a pointer value.
static_assert(sizeof(cordz_info_t) * 2 == kMaxInline + 1, "");
static_assert(sizeof(cordz_info_t) >= sizeof(intptr_t), "");
-// BigEndianByte() creates a big endian representation of 'value', i.e.: a big
-// endian value where the last byte in the host's representation holds 'value`,
-// with all other bytes being 0.
-static constexpr cordz_info_t BigEndianByte(unsigned char value) {
+// LittleEndianByte() creates a little endian representation of 'value', i.e.:
+// a little endian value where the first byte in the host's representation
+// holds 'value`, with all other bytes being 0.
+static constexpr cordz_info_t LittleEndianByte(unsigned char value) {
#if defined(ABSL_IS_BIG_ENDIAN)
- return value;
-#else
return static_cast<cordz_info_t>(value) << ((sizeof(cordz_info_t) - 1) * 8);
+#else
+ return value;
#endif
}
@@ -450,38 +500,80 @@ class InlineData {
// DefaultInitType forces the use of the default initialization constructor.
enum DefaultInitType { kDefaultInit };
- // kNullCordzInfo holds the big endian representation of intptr_t(1)
+ // kNullCordzInfo holds the little endian representation of intptr_t(1)
// This is the 'null' / initial value of 'cordz_info'. The null value
// is specifically big endian 1 as with 64-bit pointers, the last
// byte of cordz_info overlaps with the last byte holding the tag.
- static constexpr cordz_info_t kNullCordzInfo = BigEndianByte(1);
-
- constexpr InlineData() : as_chars_{0} {}
- explicit InlineData(DefaultInitType) {}
- explicit constexpr InlineData(CordRep* rep) : as_tree_(rep) {}
- explicit constexpr InlineData(absl::string_view chars)
- : as_chars_{
- GetOrNull(chars, 0), GetOrNull(chars, 1),
- GetOrNull(chars, 2), GetOrNull(chars, 3),
- GetOrNull(chars, 4), GetOrNull(chars, 5),
- GetOrNull(chars, 6), GetOrNull(chars, 7),
- GetOrNull(chars, 8), GetOrNull(chars, 9),
- GetOrNull(chars, 10), GetOrNull(chars, 11),
- GetOrNull(chars, 12), GetOrNull(chars, 13),
- GetOrNull(chars, 14), static_cast<char>((chars.size() << 1))} {}
+ static constexpr cordz_info_t kNullCordzInfo = LittleEndianByte(1);
+
+ // kTagOffset contains the offset of the control byte / tag. This constant is
+ // intended mostly for debugging purposes: do not remove this constant as it
+ // is actively inspected and used by gdb pretty printing code.
+ static constexpr size_t kTagOffset = 0;
+
+ // Implement `~InlineData()` conditionally: we only need this destructor to
+ // unpoison poisoned instances under *SAN, and it will only compile correctly
+ // if the current compiler supports `absl::is_constant_evaluated()`.
+#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
+ ~InlineData() noexcept { unpoison(); }
+#endif
+
+ constexpr InlineData() noexcept { poison_this(); }
+
+ explicit InlineData(DefaultInitType) noexcept : rep_(kDefaultInit) {
+ poison_this();
+ }
+
+ explicit InlineData(CordRep* rep) noexcept : rep_(rep) {
+ ABSL_ASSERT(rep != nullptr);
+ }
+
+ // Explicit constexpr constructor to create a constexpr InlineData
+ // value. Creates an inlined SSO value if `rep` is null, otherwise
+ // creates a tree instance value.
+ constexpr InlineData(absl::string_view sv, CordRep* rep) noexcept
+ : rep_(rep ? Rep(rep) : Rep(sv)) {
+ poison();
+ }
+
+ constexpr InlineData(const InlineData& rhs) noexcept;
+ InlineData& operator=(const InlineData& rhs) noexcept;
+
+ friend bool operator==(const InlineData& lhs, const InlineData& rhs) {
+#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
+ const Rep l = lhs.rep_.SanitizerSafeCopy();
+ const Rep r = rhs.rep_.SanitizerSafeCopy();
+ return memcmp(&l, &r, sizeof(l)) == 0;
+#else
+ return memcmp(&lhs, &rhs, sizeof(lhs)) == 0;
+#endif
+ }
+ friend bool operator!=(const InlineData& lhs, const InlineData& rhs) {
+ return !operator==(lhs, rhs);
+ }
+
+ // Poisons the unused inlined SSO data if the current instance
+ // is inlined, else un-poisons the entire instance.
+ constexpr void poison();
+
+ // Un-poisons this instance.
+ constexpr void unpoison();
+
+ // Poisons the current instance. This is used on default initialization.
+ constexpr void poison_this();
// Returns true if the current instance is empty.
// The 'empty value' is an inlined data value of zero length.
- bool is_empty() const { return tag() == 0; }
+ bool is_empty() const { return rep_.tag() == 0; }
// Returns true if the current instance holds a tree value.
- bool is_tree() const { return (tag() & 1) != 0; }
+ bool is_tree() const { return (rep_.tag() & 1) != 0; }
// Returns true if the current instance holds a cordz_info value.
// Requires the current instance to hold a tree value.
bool is_profiled() const {
assert(is_tree());
- return as_tree_.cordz_info != kNullCordzInfo;
+ return rep_.cordz_info() != kNullCordzInfo;
}
// Returns true if either of the provided instances hold a cordz_info value.
@@ -490,7 +582,7 @@ class InlineData {
static bool is_either_profiled(const InlineData& data1,
const InlineData& data2) {
assert(data1.is_tree() && data2.is_tree());
- return (data1.as_tree_.cordz_info | data2.as_tree_.cordz_info) !=
+ return (data1.rep_.cordz_info() | data2.rep_.cordz_info()) !=
kNullCordzInfo;
}
@@ -499,8 +591,8 @@ class InlineData {
// Requires the current instance to hold a tree value.
CordzInfo* cordz_info() const {
assert(is_tree());
- intptr_t info = static_cast<intptr_t>(
- absl::big_endian::ToHost64(static_cast<uint64_t>(as_tree_.cordz_info)));
+ intptr_t info = static_cast<intptr_t>(absl::little_endian::ToHost64(
+ static_cast<uint64_t>(rep_.cordz_info())));
assert(info & 1);
return reinterpret_cast<CordzInfo*>(info - 1);
}
@@ -511,21 +603,21 @@ class InlineData {
void set_cordz_info(CordzInfo* cordz_info) {
assert(is_tree());
uintptr_t info = reinterpret_cast<uintptr_t>(cordz_info) | 1;
- as_tree_.cordz_info =
- static_cast<cordz_info_t>(absl::big_endian::FromHost64(info));
+ rep_.set_cordz_info(
+ static_cast<cordz_info_t>(absl::little_endian::FromHost64(info)));
}
// Resets the current cordz_info to null / empty.
void clear_cordz_info() {
assert(is_tree());
- as_tree_.cordz_info = kNullCordzInfo;
+ rep_.set_cordz_info(kNullCordzInfo);
}
// Returns a read only pointer to the character data inside this instance.
// Requires the current instance to hold inline data.
const char* as_chars() const {
assert(!is_tree());
- return as_chars_;
+ return rep_.as_chars();
}
// Returns a mutable pointer to the character data inside this instance.
@@ -543,20 +635,33 @@ class InlineData {
//
// It's an error to read from the returned pointer without a preceding write
// if the current instance does not hold inline data, i.e.: is_tree() == true.
- char* as_chars() { return as_chars_; }
+ char* as_chars() { return rep_.as_chars(); }
// Returns the tree value of this value.
// Requires the current instance to hold a tree value.
CordRep* as_tree() const {
assert(is_tree());
- return as_tree_.rep;
+ return rep_.tree();
+ }
+
+ void set_inline_data(const char* data, size_t n) {
+ ABSL_ASSERT(n <= kMaxInline);
+ unpoison();
+ rep_.set_tag(static_cast<int8_t>(n << 1));
+ SmallMemmove<true>(rep_.as_chars(), data, n);
+ poison();
+ }
+
+ void copy_max_inline_to(char* dst) const {
+ assert(!is_tree());
+ memcpy(dst, rep_.SanitizerSafeCopy().as_chars(), kMaxInline);
}
// Initialize this instance to holding the tree value `rep`,
// initializing the cordz_info to null, i.e.: 'not profiled'.
void make_tree(CordRep* rep) {
- as_tree_.rep = rep;
- as_tree_.cordz_info = kNullCordzInfo;
+ unpoison();
+ rep_.make_tree(rep);
}
// Set the tree value of this instance to 'rep`.
@@ -564,22 +669,20 @@ class InlineData {
// Does not affect the value of cordz_info.
void set_tree(CordRep* rep) {
assert(is_tree());
- as_tree_.rep = rep;
+ rep_.set_tree(rep);
}
// Returns the size of the inlined character data inside this instance.
// Requires the current instance to hold inline data.
- size_t inline_size() const {
- assert(!is_tree());
- return static_cast<size_t>(tag()) >> 1;
- }
+ size_t inline_size() const { return rep_.inline_size(); }
// Sets the size of the inlined character data inside this instance.
// Requires `size` to be <= kMaxInline.
// See the documentation on 'as_chars()' for more information and examples.
void set_inline_size(size_t size) {
- ABSL_ASSERT(size <= kMaxInline);
- tag() = static_cast<char>(size << 1);
+ unpoison();
+ rep_.set_inline_size(size);
+ poison();
}
// Compares 'this' inlined data with rhs. The comparison is a straightforward
@@ -589,15 +692,115 @@ class InlineData {
// 0 the InlineData instances are equal
// 1 'this' InlineData instance larger
int Compare(const InlineData& rhs) const {
+ return Compare(rep_.SanitizerSafeCopy(), rhs.rep_.SanitizerSafeCopy());
+ }
+
+ private:
+ struct Rep {
+ // See cordz_info_t for forced alignment and size of `cordz_info` details.
+ struct AsTree {
+ explicit constexpr AsTree(absl::cord_internal::CordRep* tree)
+ : rep(tree) {}
+ cordz_info_t cordz_info = kNullCordzInfo;
+ absl::cord_internal::CordRep* rep;
+ };
+
+ explicit Rep(DefaultInitType) {}
+ constexpr Rep() : data{0} {}
+ constexpr Rep(const Rep&) = default;
+ constexpr Rep& operator=(const Rep&) = default;
+
+ explicit constexpr Rep(CordRep* rep) : as_tree(rep) {}
+
+ explicit constexpr Rep(absl::string_view chars)
+ : data{static_cast<char>((chars.size() << 1)),
+ GetOrNull(chars, 0),
+ GetOrNull(chars, 1),
+ GetOrNull(chars, 2),
+ GetOrNull(chars, 3),
+ GetOrNull(chars, 4),
+ GetOrNull(chars, 5),
+ GetOrNull(chars, 6),
+ GetOrNull(chars, 7),
+ GetOrNull(chars, 8),
+ GetOrNull(chars, 9),
+ GetOrNull(chars, 10),
+ GetOrNull(chars, 11),
+ GetOrNull(chars, 12),
+ GetOrNull(chars, 13),
+ GetOrNull(chars, 14)} {}
+
+ // Disable sanitizer as we must always be able to read `tag`.
+ ABSL_CORD_INTERNAL_NO_SANITIZE
+ int8_t tag() const { return reinterpret_cast<const int8_t*>(this)[0]; }
+ void set_tag(int8_t rhs) { reinterpret_cast<int8_t*>(this)[0] = rhs; }
+
+ char* as_chars() { return data + 1; }
+ const char* as_chars() const { return data + 1; }
+
+ bool is_tree() const { return (tag() & 1) != 0; }
+
+ size_t inline_size() const {
+ ABSL_ASSERT(!is_tree());
+ return static_cast<size_t>(tag()) >> 1;
+ }
+
+ void set_inline_size(size_t size) {
+ ABSL_ASSERT(size <= kMaxInline);
+ set_tag(static_cast<int8_t>(size << 1));
+ }
+
+ CordRep* tree() const { return as_tree.rep; }
+ void set_tree(CordRep* rhs) { as_tree.rep = rhs; }
+
+ cordz_info_t cordz_info() const { return as_tree.cordz_info; }
+ void set_cordz_info(cordz_info_t rhs) { as_tree.cordz_info = rhs; }
+
+ void make_tree(CordRep* tree) {
+ as_tree.rep = tree;
+ as_tree.cordz_info = kNullCordzInfo;
+ }
+
+#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
+ constexpr Rep SanitizerSafeCopy() const {
+ if (!absl::is_constant_evaluated()) {
+ Rep res;
+ if (is_tree()) {
+ res = *this;
+ } else {
+ res.set_tag(tag());
+ memcpy(res.as_chars(), as_chars(), inline_size());
+ }
+ return res;
+ } else {
+ return *this;
+ }
+ }
+#else
+ constexpr const Rep& SanitizerSafeCopy() const { return *this; }
+#endif
+
+ // If the data has length <= kMaxInline, we store it in `data`, and
+ // store the size in the first char of `data` shifted left + 1.
+ // Else we store it in a tree and store a pointer to that tree in
+ // `as_tree.rep` with a tagged pointer to make `tag() & 1` non zero.
+ union {
+ char data[kMaxInline + 1];
+ AsTree as_tree;
+ };
+ };
+
+ // Private implementation of `Compare()`
+ static inline int Compare(const Rep& lhs, const Rep& rhs) {
uint64_t x, y;
- memcpy(&x, as_chars(), sizeof(x));
+ memcpy(&x, lhs.as_chars(), sizeof(x));
memcpy(&y, rhs.as_chars(), sizeof(y));
if (x == y) {
- memcpy(&x, as_chars() + 7, sizeof(x));
+ memcpy(&x, lhs.as_chars() + 7, sizeof(x));
memcpy(&y, rhs.as_chars() + 7, sizeof(y));
if (x == y) {
- if (inline_size() == rhs.inline_size()) return 0;
- return inline_size() < rhs.inline_size() ? -1 : 1;
+ if (lhs.inline_size() == rhs.inline_size()) return 0;
+ return lhs.inline_size() < rhs.inline_size() ? -1 : 1;
}
}
x = absl::big_endian::FromHost64(x);
@@ -605,36 +808,63 @@ class InlineData {
return x < y ? -1 : 1;
}
- private:
- // See cordz_info_t for forced alignment and size of `cordz_info` details.
- struct AsTree {
- explicit constexpr AsTree(absl::cord_internal::CordRep* tree)
- : rep(tree), cordz_info(kNullCordzInfo) {}
- // This union uses up extra space so that whether rep is 32 or 64 bits,
- // cordz_info will still start at the eighth byte, and the last
- // byte of cordz_info will still be the last byte of InlineData.
- union {
- absl::cord_internal::CordRep* rep;
- cordz_info_t unused_aligner;
- };
- cordz_info_t cordz_info;
- };
-
- char& tag() { return reinterpret_cast<char*>(this)[kMaxInline]; }
- char tag() const { return reinterpret_cast<const char*>(this)[kMaxInline]; }
-
- // If the data has length <= kMaxInline, we store it in `as_chars_`, and
- // store the size in the last char of `as_chars_` shifted left + 1.
- // Else we store it in a tree and store a pointer to that tree in
- // `as_tree_.rep` and store a tag in `tagged_size`.
- union {
- char as_chars_[kMaxInline + 1];
- AsTree as_tree_;
- };
+ Rep rep_;
};
static_assert(sizeof(InlineData) == kMaxInline + 1, "");
+#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
+
+constexpr InlineData::InlineData(const InlineData& rhs) noexcept
+ : rep_(rhs.rep_.SanitizerSafeCopy()) {
+ poison();
+}
+
+inline InlineData& InlineData::operator=(const InlineData& rhs) noexcept {
+ unpoison();
+ rep_ = rhs.rep_.SanitizerSafeCopy();
+ poison();
+ return *this;
+}
+
+constexpr void InlineData::poison_this() {
+ if (!absl::is_constant_evaluated()) {
+ container_internal::SanitizerPoisonObject(this);
+ }
+}
+
+constexpr void InlineData::unpoison() {
+ if (!absl::is_constant_evaluated()) {
+ container_internal::SanitizerUnpoisonObject(this);
+ }
+}
+
+constexpr void InlineData::poison() {
+ if (!absl::is_constant_evaluated()) {
+ if (is_tree()) {
+ container_internal::SanitizerUnpoisonObject(this);
+ } else if (const size_t size = inline_size()) {
+ if (size < kMaxInline) {
+ const char* end = rep_.as_chars() + size;
+ container_internal::SanitizerPoisonMemoryRegion(end, kMaxInline - size);
+ }
+ } else {
+ container_internal::SanitizerPoisonObject(this);
+ }
+ }
+}
+
+#else // ABSL_INTERNAL_CORD_HAVE_SANITIZER
+
+constexpr InlineData::InlineData(const InlineData&) noexcept = default;
+inline InlineData& InlineData::operator=(const InlineData&) noexcept = default;
+
+constexpr void InlineData::poison_this() {}
+constexpr void InlineData::unpoison() {}
+constexpr void InlineData::poison() {}
+
+#endif // ABSL_INTERNAL_CORD_HAVE_SANITIZER
+
inline CordRepSubstring* CordRep::substring() {
assert(IsSubstring());
return static_cast<CordRepSubstring*>(this);
diff --git a/absl/strings/internal/cord_rep_btree.cc b/absl/strings/internal/cord_rep_btree.cc
index 7ce36128..05bd0e20 100644
--- a/absl/strings/internal/cord_rep_btree.cc
+++ b/absl/strings/internal/cord_rep_btree.cc
@@ -14,6 +14,7 @@
#include "absl/strings/internal/cord_rep_btree.h"
+#include <atomic>
#include <cassert>
#include <cstdint>
#include <iostream>
@@ -23,6 +24,7 @@
#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
+#include "absl/base/optimization.h"
#include "absl/strings/internal/cord_data_edge.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_consume.h"
@@ -48,9 +50,7 @@ using CopyResult = CordRepBtree::CopyResult;
constexpr auto kFront = CordRepBtree::kFront;
constexpr auto kBack = CordRepBtree::kBack;
-inline bool exhaustive_validation() {
- return cord_btree_exhaustive_validation.load(std::memory_order_relaxed);
-}
+ABSL_CONST_INIT std::atomic<bool> cord_btree_exhaustive_validation(false);
// Implementation of the various 'Dump' functions.
// Prints the entire tree structure or 'rep'. External callers should
@@ -286,7 +286,7 @@ struct StackOperations {
case CordRepBtree::kSelf:
return result.tree;
}
- ABSL_INTERNAL_UNREACHABLE;
+ ABSL_UNREACHABLE();
return result.tree;
}
@@ -361,6 +361,15 @@ struct StackOperations {
} // namespace
+void SetCordBtreeExhaustiveValidation(bool do_exaustive_validation) {
+ cord_btree_exhaustive_validation.store(do_exaustive_validation,
+ std::memory_order_relaxed);
+}
+
+bool IsCordBtreeExhaustiveValidationEnabled() {
+ return cord_btree_exhaustive_validation.load(std::memory_order_relaxed);
+}
+
void CordRepBtree::Dump(const CordRep* rep, absl::string_view label,
bool include_contents, std::ostream& stream) {
stream << "===================================\n";
@@ -449,7 +458,8 @@ bool CordRepBtree::IsValid(const CordRepBtree* tree, bool shallow) {
child_length += edge->length;
}
NODE_CHECK_EQ(child_length, tree->length);
- if ((!shallow || exhaustive_validation()) && tree->height() > 0) {
+ if ((!shallow || IsCordBtreeExhaustiveValidationEnabled()) &&
+ tree->height() > 0) {
for (CordRep* edge : tree->Edges()) {
if (!IsValid(edge->btree(), shallow)) return false;
}
@@ -502,7 +512,7 @@ OpResult CordRepBtree::SetEdge(bool owned, CordRep* edge, size_t delta) {
// open interval [begin, back) or [begin + 1, end) depending on `edge_type`.
// We conveniently cover both case using a constexpr `shift` being 0 or 1
// as `end :== back + 1`.
- result = {CopyRaw(), kCopied};
+ result = {CopyRaw(length), kCopied};
constexpr int shift = edge_type == kFront ? 1 : 0;
for (CordRep* r : Edges(begin() + shift, back() + shift)) {
CordRep::Ref(r);
diff --git a/absl/strings/internal/cord_rep_btree.h b/absl/strings/internal/cord_rep_btree.h
index eed5609e..be94b62e 100644
--- a/absl/strings/internal/cord_rep_btree.h
+++ b/absl/strings/internal/cord_rep_btree.h
@@ -32,6 +32,14 @@ namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
+// `SetCordBtreeExhaustiveValidation()` can be set to force exhaustive
+// validation in debug assertions, and code that calls `IsValid()`
+// explicitly. By default, assertions should be relatively cheap and
+// AssertValid() can easily lead to O(n^2) complexity as recursive / full tree
+// validation is O(n).
+void SetCordBtreeExhaustiveValidation(bool do_exaustive_validation);
+bool IsCordBtreeExhaustiveValidationEnabled();
+
class CordRepBtreeNavigator;
// CordRepBtree is as the name implies a btree implementation of a Cordrep tree.
@@ -446,9 +454,9 @@ class CordRepBtree : public CordRep {
template <EdgeType edge_type>
static CordRepBtree* NewLeaf(absl::string_view data, size_t extra);
- // Creates a raw copy of this Btree node, copying all properties, but
- // without adding any references to existing edges.
- CordRepBtree* CopyRaw() const;
+ // Creates a raw copy of this Btree node with the specified length, copying
+ // all properties, but without adding any references to existing edges.
+ CordRepBtree* CopyRaw(size_t new_length) const;
// Creates a full copy of this Btree node, adding a reference on all edges.
CordRepBtree* Copy() const;
@@ -666,15 +674,28 @@ inline void CordRepBtree::Unref(absl::Span<CordRep* const> edges) {
}
}
-inline CordRepBtree* CordRepBtree::CopyRaw() const {
- auto* tree = static_cast<CordRepBtree*>(::operator new(sizeof(CordRepBtree)));
- memcpy(static_cast<void*>(tree), this, sizeof(CordRepBtree));
- new (&tree->refcount) RefcountAndFlags;
+inline CordRepBtree* CordRepBtree::CopyRaw(size_t new_length) const {
+ CordRepBtree* tree = new CordRepBtree;
+
+ // `length` and `refcount` are the first members of `CordRepBtree`.
+ // We initialize `length` using the given length, have `refcount` be set to
+ // ref = 1 through its default constructor, and copy all data beyond
+ // 'refcount' which starts with `tag` using a single memcpy: all contents
+ // except `refcount` is trivially copyable, and the compiler does not
+ // efficiently coalesce member-wise copy of these members.
+ // See https://gcc.godbolt.org/z/qY8zsca6z
+ // # LINT.IfChange(copy_raw)
+ tree->length = new_length;
+ uint8_t* dst = &tree->tag;
+ const uint8_t* src = &tag;
+ const ptrdiff_t offset = src - reinterpret_cast<const uint8_t*>(this);
+ memcpy(dst, src, sizeof(CordRepBtree) - static_cast<size_t>(offset));
return tree;
+ // # LINT.ThenChange()
}
inline CordRepBtree* CordRepBtree::Copy() const {
- CordRepBtree* tree = CopyRaw();
+ CordRepBtree* tree = CopyRaw(length);
for (CordRep* rep : Edges()) CordRep::Ref(rep);
return tree;
}
@@ -683,8 +704,7 @@ inline CordRepBtree* CordRepBtree::CopyToEndFrom(size_t begin,
size_t new_length) const {
assert(begin >= this->begin());
assert(begin <= this->end());
- CordRepBtree* tree = CopyRaw();
- tree->length = new_length;
+ CordRepBtree* tree = CopyRaw(new_length);
tree->set_begin(begin);
for (CordRep* edge : tree->Edges()) CordRep::Ref(edge);
return tree;
@@ -694,8 +714,7 @@ inline CordRepBtree* CordRepBtree::CopyBeginTo(size_t end,
size_t new_length) const {
assert(end <= capacity());
assert(end >= this->begin());
- CordRepBtree* tree = CopyRaw();
- tree->length = new_length;
+ CordRepBtree* tree = CopyRaw(new_length);
tree->set_end(end);
for (CordRep* edge : tree->Edges()) CordRep::Ref(edge);
return tree;
diff --git a/absl/strings/internal/cord_rep_btree_test.cc b/absl/strings/internal/cord_rep_btree_test.cc
index 9d6ce484..840acf9f 100644
--- a/absl/strings/internal/cord_rep_btree_test.cc
+++ b/absl/strings/internal/cord_rep_btree_test.cc
@@ -507,7 +507,7 @@ TEST_P(CordRepBtreeTest, AppendToTreeTwoDeep) {
for (size_t i = max_cap * max_cap + 1; i < max_cap * max_cap * max_cap; ++i) {
// Ref top level tree based on param.
// Ref child node once every 16 iterations, and leaf node every 4
- // iterrations which which should not have an observable effect other than
+ // iterations which which should not have an observable effect other than
// the node and/or the leaf below it being copied.
refs.RefIf(shared(), tree);
refs.RefIf(i % 16 == 0, tree->Edges().back());
@@ -568,7 +568,7 @@ TEST_P(CordRepBtreeTest, PrependToTreeTwoDeep) {
for (size_t i = max_cap * max_cap + 1; i < max_cap * max_cap * max_cap; ++i) {
// Ref top level tree based on param.
// Ref child node once every 16 iterations, and leaf node every 4
- // iterrations which which should not have an observable effect other than
+ // iterations which which should not have an observable effect other than
// the node and/or the leaf below it being copied.
refs.RefIf(shared(), tree);
refs.RefIf(i % 16 == 0, tree->Edges().back());
@@ -1355,9 +1355,9 @@ TEST(CordRepBtreeTest, AssertValid) {
TEST(CordRepBtreeTest, CheckAssertValidShallowVsDeep) {
// Restore exhaustive validation on any exit.
- const bool exhaustive_validation = cord_btree_exhaustive_validation.load();
+ const bool exhaustive_validation = IsCordBtreeExhaustiveValidationEnabled();
auto cleanup = absl::MakeCleanup([exhaustive_validation] {
- cord_btree_exhaustive_validation.store(exhaustive_validation);
+ SetCordBtreeExhaustiveValidation(exhaustive_validation);
});
// Create a tree of at least 2 levels, and mess with the original flat, which
@@ -1372,7 +1372,7 @@ TEST(CordRepBtreeTest, CheckAssertValidShallowVsDeep) {
}
flat->length = 100;
- cord_btree_exhaustive_validation.store(false);
+ SetCordBtreeExhaustiveValidation(false);
EXPECT_FALSE(CordRepBtree::IsValid(tree));
EXPECT_TRUE(CordRepBtree::IsValid(tree, true));
EXPECT_FALSE(CordRepBtree::IsValid(tree, false));
@@ -1382,7 +1382,7 @@ TEST(CordRepBtreeTest, CheckAssertValidShallowVsDeep) {
EXPECT_DEBUG_DEATH(CordRepBtree::AssertValid(tree, false), ".*");
#endif
- cord_btree_exhaustive_validation.store(true);
+ SetCordBtreeExhaustiveValidation(true);
EXPECT_FALSE(CordRepBtree::IsValid(tree));
EXPECT_FALSE(CordRepBtree::IsValid(tree, true));
EXPECT_FALSE(CordRepBtree::IsValid(tree, false));
diff --git a/absl/strings/internal/cord_rep_consume.cc b/absl/strings/internal/cord_rep_consume.cc
index 20a55797..db7d4fef 100644
--- a/absl/strings/internal/cord_rep_consume.cc
+++ b/absl/strings/internal/cord_rep_consume.cc
@@ -42,7 +42,8 @@ CordRep* ClipSubstring(CordRepSubstring* substring) {
} // namespace
-void Consume(CordRep* rep, ConsumeFn consume_fn) {
+void Consume(CordRep* rep,
+ FunctionRef<void(CordRep*, size_t, size_t)> consume_fn) {
size_t offset = 0;
size_t length = rep->length;
@@ -53,8 +54,9 @@ void Consume(CordRep* rep, ConsumeFn consume_fn) {
consume_fn(rep, offset, length);
}
-void ReverseConsume(CordRep* rep, ConsumeFn consume_fn) {
- return Consume(rep, std::move(consume_fn));
+void ReverseConsume(CordRep* rep,
+ FunctionRef<void(CordRep*, size_t, size_t)> consume_fn) {
+ return Consume(rep, consume_fn);
}
} // namespace cord_internal
diff --git a/absl/strings/internal/cord_rep_consume.h b/absl/strings/internal/cord_rep_consume.h
index d46fca2b..bece1874 100644
--- a/absl/strings/internal/cord_rep_consume.h
+++ b/absl/strings/internal/cord_rep_consume.h
@@ -24,11 +24,6 @@ namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
-// Functor for the Consume() and ReverseConsume() functions:
-// void ConsumeFunc(CordRep* rep, size_t offset, size_t length);
-// See the Consume() and ReverseConsume() function comments for documentation.
-using ConsumeFn = FunctionRef<void(CordRep*, size_t, size_t)>;
-
// Consume() and ReverseConsume() consume CONCAT based trees and invoke the
// provided functor with the contained nodes in the proper forward or reverse
// order, which is used to convert CONCAT trees into other tree or cord data.
@@ -40,8 +35,10 @@ using ConsumeFn = FunctionRef<void(CordRep*, size_t, size_t)>;
// violations, we can not 100% guarantee that all code respects 'new format'
// settings and flags, so we need to be able to parse old data on the fly until
// all old code is deprecated / no longer the default format.
-void Consume(CordRep* rep, ConsumeFn consume_fn);
-void ReverseConsume(CordRep* rep, ConsumeFn consume_fn);
+void Consume(CordRep* rep,
+ FunctionRef<void(CordRep*, size_t, size_t)> consume_fn);
+void ReverseConsume(CordRep* rep,
+ FunctionRef<void(CordRep*, size_t, size_t)> consume_fn);
} // namespace cord_internal
ABSL_NAMESPACE_END
diff --git a/absl/strings/internal/cord_rep_crc.cc b/absl/strings/internal/cord_rep_crc.cc
index ee140354..dbe54cc4 100644
--- a/absl/strings/internal/cord_rep_crc.cc
+++ b/absl/strings/internal/cord_rep_crc.cc
@@ -16,6 +16,7 @@
#include <cassert>
#include <cstdint>
+#include <utility>
#include "absl/base/config.h"
#include "absl/strings/internal/cord_internal.h"
@@ -24,11 +25,10 @@ namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
-CordRepCrc* CordRepCrc::New(CordRep* child, uint32_t crc) {
- assert(child != nullptr);
- if (child->IsCrc()) {
+CordRepCrc* CordRepCrc::New(CordRep* child, crc_internal::CrcCordState state) {
+ if (child != nullptr && child->IsCrc()) {
if (child->refcount.IsOne()) {
- child->crc()->crc = crc;
+ child->crc()->crc_cord_state = std::move(state);
return child->crc();
}
CordRep* old = child;
@@ -37,15 +37,17 @@ CordRepCrc* CordRepCrc::New(CordRep* child, uint32_t crc) {
CordRep::Unref(old);
}
auto* new_cordrep = new CordRepCrc;
- new_cordrep->length = child->length;
+ new_cordrep->length = child != nullptr ? child->length : 0;
new_cordrep->tag = cord_internal::CRC;
new_cordrep->child = child;
- new_cordrep->crc = crc;
+ new_cordrep->crc_cord_state = std::move(state);
return new_cordrep;
}
void CordRepCrc::Destroy(CordRepCrc* node) {
- CordRep::Unref(node->child);
+ if (node->child != nullptr) {
+ CordRep::Unref(node->child);
+ }
delete node;
}
diff --git a/absl/strings/internal/cord_rep_crc.h b/absl/strings/internal/cord_rep_crc.h
index 5294b0d1..379d7a60 100644
--- a/absl/strings/internal/cord_rep_crc.h
+++ b/absl/strings/internal/cord_rep_crc.h
@@ -20,6 +20,7 @@
#include "absl/base/config.h"
#include "absl/base/optimization.h"
+#include "absl/crc/internal/crc_cord_state.h"
#include "absl/strings/internal/cord_internal.h"
namespace absl {
@@ -34,14 +35,14 @@ namespace cord_internal {
// the contained checksum is the user's responsibility.
struct CordRepCrc : public CordRep {
CordRep* child;
- uint32_t crc;
+ absl::crc_internal::CrcCordState crc_cord_state;
// Consumes `child` and returns a CordRepCrc prefixed tree containing `child`.
// If the specified `child` is itself a CordRepCrc node, then this method
- // either replaces the existing node, or directly updates the crc value in it
+ // either replaces the existing node, or directly updates the crc state in it
// depending on the node being shared or not, i.e.: refcount.IsOne().
- // `child` must not be null. Never returns null.
- static CordRepCrc* New(CordRep* child, uint32_t crc);
+ // `child` must only be null if the Cord is empty. Never returns null.
+ static CordRepCrc* New(CordRep* child, crc_internal::CrcCordState state);
// Destroys (deletes) the provided node. `node` must not be null.
static void Destroy(CordRepCrc* node);
diff --git a/absl/strings/internal/cord_rep_crc_test.cc b/absl/strings/internal/cord_rep_crc_test.cc
index d73ea7b3..3d27c33c 100644
--- a/absl/strings/internal/cord_rep_crc_test.cc
+++ b/absl/strings/internal/cord_rep_crc_test.cc
@@ -17,6 +17,7 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/config.h"
+#include "absl/crc/internal/crc_cord_state.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_test_util.h"
@@ -27,47 +28,51 @@ namespace {
using ::absl::cordrep_testing::MakeFlat;
using ::testing::Eq;
+using ::testing::IsNull;
using ::testing::Ne;
#if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST
-TEST(CordRepCrc, NewWithNullPtr) {
- EXPECT_DEATH(CordRepCrc::New(nullptr, 0), "");
-}
-
TEST(CordRepCrc, RemoveCrcWithNullptr) {
EXPECT_DEATH(RemoveCrcNode(nullptr), "");
}
#endif // !NDEBUG && GTEST_HAS_DEATH_TEST
+absl::crc_internal::CrcCordState MakeCrcCordState(uint32_t crc) {
+ crc_internal::CrcCordState state;
+ state.mutable_rep()->prefix_crc.push_back(
+ crc_internal::CrcCordState::PrefixCrc(42, crc32c_t{crc}));
+ return state;
+}
+
TEST(CordRepCrc, NewDestroy) {
CordRep* rep = cordrep_testing::MakeFlat("Hello world");
- CordRepCrc* crc = CordRepCrc::New(rep, 12345);
+ CordRepCrc* crc = CordRepCrc::New(rep, MakeCrcCordState(12345));
EXPECT_TRUE(crc->refcount.IsOne());
EXPECT_THAT(crc->child, Eq(rep));
- EXPECT_THAT(crc->crc, Eq(12345u));
+ EXPECT_THAT(crc->crc_cord_state.Checksum(), Eq(crc32c_t{12345u}));
EXPECT_TRUE(rep->refcount.IsOne());
CordRepCrc::Destroy(crc);
}
TEST(CordRepCrc, NewExistingCrcNotShared) {
CordRep* rep = cordrep_testing::MakeFlat("Hello world");
- CordRepCrc* crc = CordRepCrc::New(rep, 12345);
- CordRepCrc* new_crc = CordRepCrc::New(crc, 54321);
+ CordRepCrc* crc = CordRepCrc::New(rep, MakeCrcCordState(12345));
+ CordRepCrc* new_crc = CordRepCrc::New(crc, MakeCrcCordState(54321));
EXPECT_THAT(new_crc, Eq(crc));
EXPECT_TRUE(new_crc->refcount.IsOne());
EXPECT_THAT(new_crc->child, Eq(rep));
- EXPECT_THAT(new_crc->crc, Eq(54321u));
+ EXPECT_THAT(new_crc->crc_cord_state.Checksum(), Eq(crc32c_t{54321u}));
EXPECT_TRUE(rep->refcount.IsOne());
CordRepCrc::Destroy(new_crc);
}
TEST(CordRepCrc, NewExistingCrcShared) {
CordRep* rep = cordrep_testing::MakeFlat("Hello world");
- CordRepCrc* crc = CordRepCrc::New(rep, 12345);
+ CordRepCrc* crc = CordRepCrc::New(rep, MakeCrcCordState(12345));
CordRep::Ref(crc);
- CordRepCrc* new_crc = CordRepCrc::New(crc, 54321);
+ CordRepCrc* new_crc = CordRepCrc::New(crc, MakeCrcCordState(54321));
EXPECT_THAT(new_crc, Ne(crc));
EXPECT_TRUE(new_crc->refcount.IsOne());
@@ -75,13 +80,23 @@ TEST(CordRepCrc, NewExistingCrcShared) {
EXPECT_FALSE(rep->refcount.IsOne());
EXPECT_THAT(crc->child, Eq(rep));
EXPECT_THAT(new_crc->child, Eq(rep));
- EXPECT_THAT(crc->crc, Eq(12345u));
- EXPECT_THAT(new_crc->crc, Eq(54321u));
+ EXPECT_THAT(crc->crc_cord_state.Checksum(), Eq(crc32c_t{12345u}));
+ EXPECT_THAT(new_crc->crc_cord_state.Checksum(), Eq(crc32c_t{54321u}));
CordRep::Unref(crc);
CordRep::Unref(new_crc);
}
+TEST(CordRepCrc, NewEmpty) {
+ CordRepCrc* crc = CordRepCrc::New(nullptr, MakeCrcCordState(12345));
+ EXPECT_TRUE(crc->refcount.IsOne());
+ EXPECT_THAT(crc->child, IsNull());
+ EXPECT_THAT(crc->length, Eq(0u));
+ EXPECT_THAT(crc->crc_cord_state.Checksum(), Eq(crc32c_t{12345u}));
+ EXPECT_TRUE(crc->refcount.IsOne());
+ CordRepCrc::Destroy(crc);
+}
+
TEST(CordRepCrc, RemoveCrcNotCrc) {
CordRep* rep = cordrep_testing::MakeFlat("Hello world");
CordRep* nocrc = RemoveCrcNode(rep);
@@ -91,7 +106,7 @@ TEST(CordRepCrc, RemoveCrcNotCrc) {
TEST(CordRepCrc, RemoveCrcNotShared) {
CordRep* rep = cordrep_testing::MakeFlat("Hello world");
- CordRepCrc* crc = CordRepCrc::New(rep, 12345);
+ CordRepCrc* crc = CordRepCrc::New(rep, MakeCrcCordState(12345));
CordRep* nocrc = RemoveCrcNode(crc);
EXPECT_THAT(nocrc, Eq(rep));
EXPECT_TRUE(rep->refcount.IsOne());
@@ -100,7 +115,7 @@ TEST(CordRepCrc, RemoveCrcNotShared) {
TEST(CordRepCrc, RemoveCrcShared) {
CordRep* rep = cordrep_testing::MakeFlat("Hello world");
- CordRepCrc* crc = CordRepCrc::New(rep, 12345);
+ CordRepCrc* crc = CordRepCrc::New(rep, MakeCrcCordState(12345));
CordRep::Ref(crc);
CordRep* nocrc = RemoveCrcNode(crc);
EXPECT_THAT(nocrc, Eq(rep));
diff --git a/absl/strings/internal/cord_rep_ring.h b/absl/strings/internal/cord_rep_ring.h
index 2000e21e..79a2fdb1 100644
--- a/absl/strings/internal/cord_rep_ring.h
+++ b/absl/strings/internal/cord_rep_ring.h
@@ -430,7 +430,7 @@ class CordRepRing : public CordRep {
// capacity to satisfy `extra` extra nodes, and unref the old `rep` instance.
//
// If a new CordRepRing can not be allocated, or the new capacity would exceed
- // the maxmimum capacity, then the input is consumed only, and an exception is
+ // the maximum capacity, then the input is consumed only, and an exception is
// thrown.
static CordRepRing* Mutable(CordRepRing* rep, size_t extra);
@@ -472,7 +472,7 @@ class CordRepRing : public CordRep {
// Increases the data offset for entry `index` by `n`.
void AddDataOffset(index_type index, size_t n);
- // Descreases the length for entry `index` by `n`.
+ // Decreases the length for entry `index` by `n`.
void SubLength(index_type index, size_t n);
index_type head_;
diff --git a/absl/strings/internal/cordz_functions.h b/absl/strings/internal/cordz_functions.h
index 93f46ec6..ed108bf1 100644
--- a/absl/strings/internal/cordz_functions.h
+++ b/absl/strings/internal/cordz_functions.h
@@ -32,18 +32,10 @@ int32_t get_cordz_mean_interval();
// Sets the sample rate with the average interval between samples.
void set_cordz_mean_interval(int32_t mean_interval);
-// Enable cordz unless any of the following applies:
-// - no thread local support
-// - MSVC build
-// - Android build
-// - Apple build
-// - DLL build
-// Hashtablez is turned off completely in opensource builds.
-// MSVC's static atomics are dynamically initialized in debug mode, which breaks
-// sampling.
-#if defined(ABSL_HAVE_THREAD_LOCAL) && !defined(_MSC_VER) && \
- !defined(ABSL_BUILD_DLL) && !defined(ABSL_CONSUME_DLL) && \
- !defined(__ANDROID__) && !defined(__APPLE__)
+// Cordz is only enabled on Linux with thread_local support.
+#if defined(ABSL_INTERNAL_CORDZ_ENABLED)
+#error ABSL_INTERNAL_CORDZ_ENABLED cannot be set directly
+#elif defined(__linux__) && defined(ABSL_HAVE_THREAD_LOCAL)
#define ABSL_INTERNAL_CORDZ_ENABLED 1
#endif
diff --git a/absl/strings/internal/cordz_functions_test.cc b/absl/strings/internal/cordz_functions_test.cc
index 350623c1..b70a685e 100644
--- a/absl/strings/internal/cordz_functions_test.cc
+++ b/absl/strings/internal/cordz_functions_test.cc
@@ -38,7 +38,7 @@ TEST(CordzFunctionsTest, SampleRate) {
}
// Cordz is disabled when we don't have thread_local. All calls to
-// should_profile will return false when cordz is diabled, so we might want to
+// should_profile will return false when cordz is disabled, so we might want to
// avoid those tests.
#ifdef ABSL_INTERNAL_CORDZ_ENABLED
diff --git a/absl/strings/internal/cordz_handle.cc b/absl/strings/internal/cordz_handle.cc
index a73fefed..a7061dbe 100644
--- a/absl/strings/internal/cordz_handle.cc
+++ b/absl/strings/internal/cordz_handle.cc
@@ -16,34 +16,60 @@
#include <atomic>
#include "absl/base/internal/raw_logging.h" // For ABSL_RAW_CHECK
-#include "absl/base/internal/spinlock.h"
+#include "absl/synchronization/mutex.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
-using ::absl::base_internal::SpinLockHolder;
+namespace {
-ABSL_CONST_INIT CordzHandle::Queue CordzHandle::global_queue_(absl::kConstInit);
+struct Queue {
+ Queue() = default;
+
+ absl::Mutex mutex;
+ std::atomic<CordzHandle*> dq_tail ABSL_GUARDED_BY(mutex){nullptr};
+
+ // Returns true if this delete queue is empty. This method does not acquire
+ // the lock, but does a 'load acquire' observation on the delete queue tail.
+ // It is used inside Delete() to check for the presence of a delete queue
+ // without holding the lock. The assumption is that the caller is in the
+ // state of 'being deleted', and can not be newly discovered by a concurrent
+ // 'being constructed' snapshot instance. Practically, this means that any
+ // such discovery (`find`, 'first' or 'next', etc) must have proper 'happens
+ // before / after' semantics and atomic fences.
+ bool IsEmpty() const ABSL_NO_THREAD_SAFETY_ANALYSIS {
+ return dq_tail.load(std::memory_order_acquire) == nullptr;
+ }
+};
+
+static Queue* GlobalQueue() {
+ static Queue* global_queue = new Queue;
+ return global_queue;
+}
+
+} // namespace
CordzHandle::CordzHandle(bool is_snapshot) : is_snapshot_(is_snapshot) {
+ Queue* global_queue = GlobalQueue();
if (is_snapshot) {
- SpinLockHolder lock(&queue_->mutex);
- CordzHandle* dq_tail = queue_->dq_tail.load(std::memory_order_acquire);
+ MutexLock lock(&global_queue->mutex);
+ CordzHandle* dq_tail =
+ global_queue->dq_tail.load(std::memory_order_acquire);
if (dq_tail != nullptr) {
dq_prev_ = dq_tail;
dq_tail->dq_next_ = this;
}
- queue_->dq_tail.store(this, std::memory_order_release);
+ global_queue->dq_tail.store(this, std::memory_order_release);
}
}
CordzHandle::~CordzHandle() {
- ODRCheck();
+ Queue* global_queue = GlobalQueue();
if (is_snapshot_) {
std::vector<CordzHandle*> to_delete;
{
- SpinLockHolder lock(&queue_->mutex);
+ MutexLock lock(&global_queue->mutex);
CordzHandle* next = dq_next_;
if (dq_prev_ == nullptr) {
// We were head of the queue, delete every CordzHandle until we reach
@@ -59,7 +85,7 @@ CordzHandle::~CordzHandle() {
if (next) {
next->dq_prev_ = dq_prev_;
} else {
- queue_->dq_tail.store(dq_prev_, std::memory_order_release);
+ global_queue->dq_tail.store(dq_prev_, std::memory_order_release);
}
}
for (CordzHandle* handle : to_delete) {
@@ -69,16 +95,15 @@ CordzHandle::~CordzHandle() {
}
bool CordzHandle::SafeToDelete() const {
- return is_snapshot_ || queue_->IsEmpty();
+ return is_snapshot_ || GlobalQueue()->IsEmpty();
}
void CordzHandle::Delete(CordzHandle* handle) {
assert(handle);
if (handle) {
- handle->ODRCheck();
- Queue* const queue = handle->queue_;
+ Queue* const queue = GlobalQueue();
if (!handle->SafeToDelete()) {
- SpinLockHolder lock(&queue->mutex);
+ MutexLock lock(&queue->mutex);
CordzHandle* dq_tail = queue->dq_tail.load(std::memory_order_acquire);
if (dq_tail != nullptr) {
handle->dq_prev_ = dq_tail;
@@ -93,8 +118,9 @@ void CordzHandle::Delete(CordzHandle* handle) {
std::vector<const CordzHandle*> CordzHandle::DiagnosticsGetDeleteQueue() {
std::vector<const CordzHandle*> handles;
- SpinLockHolder lock(&global_queue_.mutex);
- CordzHandle* dq_tail = global_queue_.dq_tail.load(std::memory_order_acquire);
+ Queue* global_queue = GlobalQueue();
+ MutexLock lock(&global_queue->mutex);
+ CordzHandle* dq_tail = global_queue->dq_tail.load(std::memory_order_acquire);
for (const CordzHandle* p = dq_tail; p; p = p->dq_prev_) {
handles.push_back(p);
}
@@ -103,13 +129,13 @@ std::vector<const CordzHandle*> CordzHandle::DiagnosticsGetDeleteQueue() {
bool CordzHandle::DiagnosticsHandleIsSafeToInspect(
const CordzHandle* handle) const {
- ODRCheck();
if (!is_snapshot_) return false;
if (handle == nullptr) return true;
if (handle->is_snapshot_) return false;
bool snapshot_found = false;
- SpinLockHolder lock(&queue_->mutex);
- for (const CordzHandle* p = queue_->dq_tail; p; p = p->dq_prev_) {
+ Queue* global_queue = GlobalQueue();
+ MutexLock lock(&global_queue->mutex);
+ for (const CordzHandle* p = global_queue->dq_tail; p; p = p->dq_prev_) {
if (p == handle) return !snapshot_found;
if (p == this) snapshot_found = true;
}
@@ -119,13 +145,13 @@ bool CordzHandle::DiagnosticsHandleIsSafeToInspect(
std::vector<const CordzHandle*>
CordzHandle::DiagnosticsGetSafeToInspectDeletedHandles() {
- ODRCheck();
std::vector<const CordzHandle*> handles;
if (!is_snapshot()) {
return handles;
}
- SpinLockHolder lock(&queue_->mutex);
+ Queue* global_queue = GlobalQueue();
+ MutexLock lock(&global_queue->mutex);
for (const CordzHandle* p = dq_next_; p != nullptr; p = p->dq_next_) {
if (!p->is_snapshot()) {
handles.push_back(p);
diff --git a/absl/strings/internal/cordz_handle.h b/absl/strings/internal/cordz_handle.h
index 3c800b43..08e3f0d3 100644
--- a/absl/strings/internal/cordz_handle.h
+++ b/absl/strings/internal/cordz_handle.h
@@ -20,8 +20,6 @@
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
-#include "absl/base/internal/spinlock.h"
-#include "absl/synchronization/mutex.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
@@ -34,7 +32,7 @@ namespace cord_internal {
// has gained visibility into a CordzInfo object, that CordzInfo object will not
// be deleted prematurely. This allows the profiler to inspect all CordzInfo
// objects that are alive without needing to hold a global lock.
-class CordzHandle {
+class ABSL_DLL CordzHandle {
public:
CordzHandle() : CordzHandle(false) {}
@@ -79,37 +77,6 @@ class CordzHandle {
virtual ~CordzHandle();
private:
- // Global queue data. CordzHandle stores a pointer to the global queue
- // instance to harden against ODR violations.
- struct Queue {
- constexpr explicit Queue(absl::ConstInitType)
- : mutex(absl::kConstInit,
- absl::base_internal::SCHEDULE_COOPERATIVE_AND_KERNEL) {}
-
- absl::base_internal::SpinLock mutex;
- std::atomic<CordzHandle*> dq_tail ABSL_GUARDED_BY(mutex){nullptr};
-
- // Returns true if this delete queue is empty. This method does not acquire
- // the lock, but does a 'load acquire' observation on the delete queue tail.
- // It is used inside Delete() to check for the presence of a delete queue
- // without holding the lock. The assumption is that the caller is in the
- // state of 'being deleted', and can not be newly discovered by a concurrent
- // 'being constructed' snapshot instance. Practically, this means that any
- // such discovery (`find`, 'first' or 'next', etc) must have proper 'happens
- // before / after' semantics and atomic fences.
- bool IsEmpty() const ABSL_NO_THREAD_SAFETY_ANALYSIS {
- return dq_tail.load(std::memory_order_acquire) == nullptr;
- }
- };
-
- void ODRCheck() const {
-#ifndef NDEBUG
- ABSL_RAW_CHECK(queue_ == &global_queue_, "ODR violation in Cord");
-#endif
- }
-
- ABSL_CONST_INIT static Queue global_queue_;
- Queue* const queue_ = &global_queue_;
const bool is_snapshot_;
// dq_prev_ and dq_next_ require the global queue mutex to be held.
diff --git a/absl/strings/internal/cordz_info.cc b/absl/strings/internal/cordz_info.cc
index 530f33be..515dfafb 100644
--- a/absl/strings/internal/cordz_info.cc
+++ b/absl/strings/internal/cordz_info.cc
@@ -26,6 +26,7 @@
#include "absl/strings/internal/cordz_statistics.h"
#include "absl/strings/internal/cordz_update_tracker.h"
#include "absl/synchronization/mutex.h"
+#include "absl/time/clock.h"
#include "absl/types/span.h"
namespace absl {
@@ -53,7 +54,7 @@ namespace {
// The top level node is treated specially: we assume the current thread
// (typically called from the CordzHandler) to hold a reference purely to
// perform a safe analysis, and not being part of the application. So we
-// substract 1 from the reference count of the top node to compute the
+// subtract 1 from the reference count of the top node to compute the
// 'application fair share' excluding the reference of the current thread.
//
// An example of fair sharing, and why we multiply reference counts:
diff --git a/absl/strings/internal/cordz_info_statistics_test.cc b/absl/strings/internal/cordz_info_statistics_test.cc
index 6d6feb52..53d2f2ea 100644
--- a/absl/strings/internal/cordz_info_statistics_test.cc
+++ b/absl/strings/internal/cordz_info_statistics_test.cc
@@ -19,6 +19,7 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/config.h"
+#include "absl/crc/internal/crc_cord_state.h"
#include "absl/strings/cord.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
@@ -451,7 +452,8 @@ TEST(CordzInfoStatisticsTest, BtreeNodeShared) {
TEST(CordzInfoStatisticsTest, Crc) {
RefHelper ref;
auto* left = Flat(1000);
- auto* crc = ref.NeedsUnref(CordRepCrc::New(left, 12345));
+ auto* crc =
+ ref.NeedsUnref(CordRepCrc::New(left, crc_internal::CrcCordState()));
CordzStatistics expected;
expected.size = left->length;
diff --git a/absl/strings/internal/cordz_sample_token.h b/absl/strings/internal/cordz_sample_token.h
index b58022c3..2a86bc3b 100644
--- a/absl/strings/internal/cordz_sample_token.h
+++ b/absl/strings/internal/cordz_sample_token.h
@@ -33,11 +33,11 @@ namespace cord_internal {
// ST1 <- CH1 <- CH2 <- ST2 <- CH3 <- global_delete_queue_tail
//
// This list tracks that CH1 and CH2 were created after ST1, so the thread
-// holding ST1 might have a referece to CH1, CH2, ST2, and CH3. However, ST2 was
-// created later, so the thread holding the ST2 token cannot have a reference to
-// ST1, CH1, or CH2. If ST1 is cleaned up first, that thread will delete ST1,
-// CH1, and CH2. If instead ST2 is cleaned up first, that thread will only
-// delete ST2.
+// holding ST1 might have a reference to CH1, CH2, ST2, and CH3. However, ST2
+// was created later, so the thread holding the ST2 token cannot have a
+// reference to ST1, CH1, or CH2. If ST1 is cleaned up first, that thread will
+// delete ST1, CH1, and CH2. If instead ST2 is cleaned up first, that thread
+// will only delete ST2.
//
// If ST1 is cleaned up first, the new list will be:
// ST2 <- CH3 <- global_delete_queue_tail
diff --git a/absl/strings/internal/damerau_levenshtein_distance.cc b/absl/strings/internal/damerau_levenshtein_distance.cc
new file mode 100644
index 00000000..a084568f
--- /dev/null
+++ b/absl/strings/internal/damerau_levenshtein_distance.cc
@@ -0,0 +1,93 @@
+// Copyright 2022 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/internal/damerau_levenshtein_distance.h"
+
+#include <algorithm>
+#include <array>
+#include <numeric>
+
+#include "absl/strings/string_view.h"
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace strings_internal {
+// Calculate DamerauLevenshtein (adjacent transpositions) distance
+// between two strings,
+// https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance. The
+// algorithm follows the condition that no substring is edited more than once.
+// While this can reduce is larger distance, it's a) a much simpler algorithm
+// and b) more realistic for the case that typographic mistakes should be
+// detected.
+// When the distance is larger than cutoff, or one of the strings has more
+// than MAX_SIZE=100 characters, the code returns min(MAX_SIZE, cutoff) + 1.
+uint8_t CappedDamerauLevenshteinDistance(absl::string_view s1,
+ absl::string_view s2, uint8_t cutoff) {
+ const uint8_t MAX_SIZE = 100;
+ const uint8_t _cutoff = std::min(MAX_SIZE, cutoff);
+ const uint8_t cutoff_plus_1 = static_cast<uint8_t>(_cutoff + 1);
+
+ if (s1.size() > s2.size()) std::swap(s1, s2);
+ if (s1.size() + _cutoff < s2.size() || s2.size() > MAX_SIZE)
+ return cutoff_plus_1;
+
+ if (s1.empty())
+ return static_cast<uint8_t>(s2.size());
+
+ // Lower diagonal bound: y = x - lower_diag
+ const uint8_t lower_diag =
+ _cutoff - static_cast<uint8_t>(s2.size() - s1.size());
+ // Upper diagonal bound: y = x + upper_diag
+ const uint8_t upper_diag = _cutoff;
+
+ // d[i][j] is the number of edits required to convert s1[0, i] to s2[0, j]
+ std::array<std::array<uint8_t, MAX_SIZE + 2>, MAX_SIZE + 2> d;
+ std::iota(d[0].begin(), d[0].begin() + upper_diag + 1, 0);
+ d[0][cutoff_plus_1] = cutoff_plus_1;
+ for (size_t i = 1; i <= s1.size(); ++i) {
+ // Deduce begin of relevant window.
+ size_t j_begin = 1;
+ if (i > lower_diag) {
+ j_begin = i - lower_diag;
+ d[i][j_begin - 1] = cutoff_plus_1;
+ } else {
+ d[i][0] = static_cast<uint8_t>(i);
+ }
+
+ // Deduce end of relevant window.
+ size_t j_end = i + upper_diag;
+ if (j_end > s2.size()) {
+ j_end = s2.size();
+ } else {
+ d[i][j_end + 1] = cutoff_plus_1;
+ }
+
+ for (size_t j = j_begin; j <= j_end; ++j) {
+ const uint8_t deletion_distance = d[i - 1][j] + 1;
+ const uint8_t insertion_distance = d[i][j - 1] + 1;
+ const uint8_t mismatched_tail_cost = s1[i - 1] == s2[j - 1] ? 0 : 1;
+ const uint8_t mismatch_distance = d[i - 1][j - 1] + mismatched_tail_cost;
+ uint8_t transposition_distance = _cutoff + 1;
+ if (i > 1 && j > 1 && s1[i - 1] == s2[j - 2] && s1[i - 2] == s2[j - 1])
+ transposition_distance = d[i - 2][j - 2] + 1;
+ d[i][j] = std::min({cutoff_plus_1, deletion_distance, insertion_distance,
+ mismatch_distance, transposition_distance});
+ }
+ }
+ return d[s1.size()][s2.size()];
+}
+
+} // namespace strings_internal
+
+ABSL_NAMESPACE_END
+} // namespace absl
diff --git a/absl/strings/internal/damerau_levenshtein_distance.h b/absl/strings/internal/damerau_levenshtein_distance.h
new file mode 100644
index 00000000..7a4bd644
--- /dev/null
+++ b/absl/strings/internal/damerau_levenshtein_distance.h
@@ -0,0 +1,34 @@
+// Copyright 2022 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ABSL_STRINGS_INTERNAL_DAMERAU_LEVENSHTEIN_DISTANCE_H_
+#define ABSL_STRINGS_INTERNAL_DAMERAU_LEVENSHTEIN_DISTANCE_H_
+
+#include <cstdint>
+
+#include "absl/strings/string_view.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace strings_internal {
+// Calculate DamerauLevenshtein distance between two strings.
+// When the distance is larger than cutoff, the code just returns cutoff + 1.
+uint8_t CappedDamerauLevenshteinDistance(absl::string_view s1,
+ absl::string_view s2, uint8_t cutoff);
+
+} // namespace strings_internal
+ABSL_NAMESPACE_END
+} // namespace absl
+
+#endif // ABSL_STRINGS_INTERNAL_DAMERAU_LEVENSHTEIN_DISTANCE_H_
diff --git a/absl/strings/internal/damerau_levenshtein_distance_test.cc b/absl/strings/internal/damerau_levenshtein_distance_test.cc
new file mode 100644
index 00000000..49dd105b
--- /dev/null
+++ b/absl/strings/internal/damerau_levenshtein_distance_test.cc
@@ -0,0 +1,99 @@
+// Copyright 2022 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/internal/damerau_levenshtein_distance.h"
+
+#include <cstdint>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace {
+
+using absl::strings_internal::CappedDamerauLevenshteinDistance;
+
+TEST(Distance, TestDistances) {
+ EXPECT_THAT(CappedDamerauLevenshteinDistance("ab", "ab", 6), uint8_t{0});
+ EXPECT_THAT(CappedDamerauLevenshteinDistance("a", "b", 6), uint8_t{1});
+ EXPECT_THAT(CappedDamerauLevenshteinDistance("ca", "abc", 6), uint8_t{3});
+ EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "ad", 6), uint8_t{2});
+ EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "cadb", 6), uint8_t{4});
+ EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "bdac", 6), uint8_t{4});
+ EXPECT_THAT(CappedDamerauLevenshteinDistance("ab", "ab", 0), uint8_t{0});
+ EXPECT_THAT(CappedDamerauLevenshteinDistance("", "", 0), uint8_t{0});
+ // combinations for 3-character strings:
+ // 1, 2, 3 removals, insertions or replacements and transpositions
+ EXPECT_THAT(CappedDamerauLevenshteinDistance("abc", "abc", 6), uint8_t{0});
+ for (auto res :
+ {"", "ca", "efg", "ea", "ce", "ceb", "eca", "cae", "cea", "bea"}) {
+ EXPECT_THAT(CappedDamerauLevenshteinDistance("abc", res, 6), uint8_t{3});
+ EXPECT_THAT(CappedDamerauLevenshteinDistance(res, "abc", 6), uint8_t{3});
+ }
+ for (auto res :
+ {"a", "b", "c", "ba", "cb", "bca", "cab", "cba", "ace",
+ "efc", "ebf", "aef", "ae", "be", "eb", "ec", "ecb", "bec",
+ "bce", "cbe", "ace", "eac", "aeb", "bae", "eab", "eba"}) {
+ EXPECT_THAT(CappedDamerauLevenshteinDistance("abc", res, 6), uint8_t{2});
+ EXPECT_THAT(CappedDamerauLevenshteinDistance(res, "abc", 6), uint8_t{2});
+ }
+ for (auto res : {"ab", "ac", "bc", "acb", "bac", "ebc", "aec", "abe"}) {
+ EXPECT_THAT(CappedDamerauLevenshteinDistance("abc", res, 6), uint8_t{1});
+ EXPECT_THAT(CappedDamerauLevenshteinDistance(res, "abc", 6), uint8_t{1});
+ }
+}
+
+TEST(Distance, TestCutoff) {
+ // Returning cutoff + 1 if the value is larger than cutoff or string longer
+ // than MAX_SIZE.
+ EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "a", 3), uint8_t{3});
+ EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "a", 2), uint8_t{3});
+ EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "a", 1), uint8_t{2});
+ EXPECT_THAT(CappedDamerauLevenshteinDistance("abcdefg", "a", 2), uint8_t{3});
+ EXPECT_THAT(CappedDamerauLevenshteinDistance("a", "abcde", 2), uint8_t{3});
+ EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(102, 'a'),
+ std::string(102, 'a'), 105),
+ uint8_t{101});
+ EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(100, 'a'),
+ std::string(100, 'a'), 100),
+ uint8_t{0});
+ EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(100, 'a'),
+ std::string(100, 'b'), 100),
+ uint8_t{100});
+ EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(100, 'a'),
+ std::string(99, 'a'), 2),
+ uint8_t{1});
+ EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(100, 'a'),
+ std::string(101, 'a'), 2),
+ uint8_t{3});
+ EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(100, 'a'),
+ std::string(101, 'a'), 2),
+ uint8_t{3});
+ EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(UINT8_MAX + 1, 'a'),
+ std::string(UINT8_MAX + 1, 'b'),
+ UINT8_MAX),
+ uint8_t{101});
+ EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(UINT8_MAX - 1, 'a'),
+ std::string(UINT8_MAX - 1, 'b'),
+ UINT8_MAX),
+ uint8_t{101});
+ EXPECT_THAT(
+ CappedDamerauLevenshteinDistance(std::string(UINT8_MAX, 'a'),
+ std::string(UINT8_MAX, 'b'), UINT8_MAX),
+ uint8_t{101});
+ EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(UINT8_MAX - 1, 'a'),
+ std::string(UINT8_MAX - 1, 'a'),
+ UINT8_MAX),
+ uint8_t{101});
+}
+} // namespace
diff --git a/absl/strings/internal/escaping.cc b/absl/strings/internal/escaping.cc
index cfea0961..56a4cbed 100644
--- a/absl/strings/internal/escaping.cc
+++ b/absl/strings/internal/escaping.cc
@@ -21,26 +21,26 @@ namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
+// The two strings below provide maps from normal 6-bit characters to their
+// base64-escaped equivalent.
+// For the inverse case, see kUn(WebSafe)Base64 in the external
+// escaping.cc.
ABSL_CONST_INIT const char kBase64Chars[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+ABSL_CONST_INIT const char kWebSafeBase64Chars[] =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
+
+
size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) {
// Base64 encodes three bytes of input at a time. If the input is not
// divisible by three, we pad as appropriate.
//
- // (from https://tools.ietf.org/html/rfc3548)
- // Special processing is performed if fewer than 24 bits are available
- // at the end of the data being encoded. A full encoding quantum is
- // always completed at the end of a quantity. When fewer than 24 input
- // bits are available in an input group, zero bits are added (on the
- // right) to form an integral number of 6-bit groups. Padding at the
- // end of the data is performed using the '=' character. Since all base
- // 64 input is an integral number of octets, only the following cases
- // can arise:
-
// Base64 encodes each three bytes of input into four bytes of output.
size_t len = (input_len / 3) * 4;
+ // Since all base 64 input is an integral number of octets, only the following
+ // cases can arise:
if (input_len % 3 == 0) {
// (from https://tools.ietf.org/html/rfc3548)
// (1) the final quantum of encoding input is an integral multiple of 24
@@ -70,6 +70,21 @@ size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) {
return len;
}
+// ----------------------------------------------------------------------
+// Take the input in groups of 4 characters and turn each
+// character into a code 0 to 63 thus:
+// A-Z map to 0 to 25
+// a-z map to 26 to 51
+// 0-9 map to 52 to 61
+// +(- for WebSafe) maps to 62
+// /(_ for WebSafe) maps to 63
+// There will be four numbers, all less than 64 which can be represented
+// by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively).
+// Arrange the 6 digit binary numbers into three bytes as such:
+// aaaaaabb bbbbcccc ccdddddd
+// Equals signs (one or two) are used at the end of the encoded block to
+// indicate that the text was not an integer multiple of three bytes long.
+// ----------------------------------------------------------------------
size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest,
size_t szdest, const char* base64,
bool do_padding) {
@@ -83,6 +98,16 @@ size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest,
char* const limit_dest = dest + szdest;
const unsigned char* const limit_src = src + szsrc;
+ // (from https://tools.ietf.org/html/rfc3548)
+ // Special processing is performed if fewer than 24 bits are available
+ // at the end of the data being encoded. A full encoding quantum is
+ // always completed at the end of a quantity. When fewer than 24 input
+ // bits are available in an input group, zero bits are added (on the
+ // right) to form an integral number of 6-bit groups.
+ //
+ // If do_padding is true, padding at the end of the data is performed. This
+ // output padding uses the '=' character.
+
// Three bytes of data encodes to four characters of cyphertext.
// So we can pump through three-byte chunks atomically.
if (szsrc >= 3) { // "limit_src - 3" is UB if szsrc < 3.
diff --git a/absl/strings/internal/escaping.h b/absl/strings/internal/escaping.h
index 6a9ce602..2186f778 100644
--- a/absl/strings/internal/escaping.h
+++ b/absl/strings/internal/escaping.h
@@ -24,20 +24,19 @@ ABSL_NAMESPACE_BEGIN
namespace strings_internal {
ABSL_CONST_INIT extern const char kBase64Chars[];
+ABSL_CONST_INIT extern const char kWebSafeBase64Chars[];
-// Calculates how long a string will be when it is base64 encoded given its
-// length and whether or not the result should be padded.
+// Calculates the length of a Base64 encoding (RFC 4648) of a string of length
+// `input_len`, with or without padding per `do_padding`. Note that 'web-safe'
+// encoding (section 5 of the RFC) does not change this length.
size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding);
-// Base64-encodes `src` using the alphabet provided in `base64` and writes the
-// result to `dest`. If `do_padding` is true, `dest` is padded with '=' chars
-// until its length is a multiple of 3. Returns the length of `dest`.
+// Base64-encodes `src` using the alphabet provided in `base64` (which
+// determines whether to do web-safe encoding or not) and writes the result to
+// `dest`. If `do_padding` is true, `dest` is padded with '=' chars until its
+// length is a multiple of 3. Returns the length of `dest`.
size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest,
size_t szdest, const char* base64, bool do_padding);
-
-// Base64-encodes `src` using the alphabet provided in `base64` and writes the
-// result to `dest`. If `do_padding` is true, `dest` is padded with '=' chars
-// until its length is a multiple of 3.
template <typename String>
void Base64EscapeInternal(const unsigned char* src, size_t szsrc, String* dest,
bool do_padding, const char* base64_chars) {
diff --git a/absl/strings/internal/has_absl_stringify.h b/absl/strings/internal/has_absl_stringify.h
new file mode 100644
index 00000000..55a08508
--- /dev/null
+++ b/absl/strings/internal/has_absl_stringify.h
@@ -0,0 +1,55 @@
+// Copyright 2022 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ABSL_STRINGS_INTERNAL_HAS_ABSL_STRINGIFY_H_
+#define ABSL_STRINGS_INTERNAL_HAS_ABSL_STRINGIFY_H_
+#include <string>
+#include <type_traits>
+#include <utility>
+
+#include "absl/strings/string_view.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+
+namespace strings_internal {
+
+// This is an empty class not intended to be used. It exists so that
+// `HasAbslStringify` can reference a universal class rather than needing to be
+// copied for each new sink.
+class UnimplementedSink {
+ public:
+ void Append(size_t count, char ch);
+
+ void Append(string_view v);
+
+ // Support `absl::Format(&sink, format, args...)`.
+ friend void AbslFormatFlush(UnimplementedSink* sink, absl::string_view v);
+};
+
+template <typename T, typename = void>
+struct HasAbslStringify : std::false_type {};
+
+template <typename T>
+struct HasAbslStringify<
+ T, std::enable_if_t<std::is_void<decltype(AbslStringify(
+ std::declval<strings_internal::UnimplementedSink&>(),
+ std::declval<const T&>()))>::value>> : std::true_type {};
+
+} // namespace strings_internal
+
+ABSL_NAMESPACE_END
+} // namespace absl
+
+#endif // ABSL_STRINGS_INTERNAL_HAS_ABSL_STRINGIFY_H_
diff --git a/absl/strings/internal/stl_type_traits.h b/absl/strings/internal/stl_type_traits.h
index 6035ca45..e50468b0 100644
--- a/absl/strings/internal/stl_type_traits.h
+++ b/absl/strings/internal/stl_type_traits.h
@@ -13,7 +13,7 @@
// limitations under the License.
//
-// Thie file provides the IsStrictlyBaseOfAndConvertibleToSTLContainer type
+// The file provides the IsStrictlyBaseOfAndConvertibleToSTLContainer type
// trait metafunction to assist in working with the _GLIBCXX_DEBUG debug
// wrappers of STL containers.
//
diff --git a/absl/strings/internal/str_format/arg.cc b/absl/strings/internal/str_format/arg.cc
index 967fe9ca..018dd052 100644
--- a/absl/strings/internal/str_format/arg.cc
+++ b/absl/strings/internal/str_format/arg.cc
@@ -297,6 +297,37 @@ constexpr auto ConvertV(T) {
}
template <typename T>
+bool ConvertFloatArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl *sink) {
+ if (conv.conversion_char() == FormatConversionCharInternal::v) {
+ conv.set_conversion_char(FormatConversionCharInternal::g);
+ }
+
+ return FormatConversionCharIsFloat(conv.conversion_char()) &&
+ ConvertFloatImpl(v, conv, sink);
+}
+
+inline bool ConvertStringArg(string_view v, const FormatConversionSpecImpl conv,
+ FormatSinkImpl *sink) {
+ if (conv.is_basic()) {
+ sink->Append(v);
+ return true;
+ }
+ return sink->PutPaddedString(v, conv.width(), conv.precision(),
+ conv.has_left_flag());
+}
+
+} // namespace
+
+bool ConvertBoolArg(bool v, FormatSinkImpl *sink) {
+ if (v) {
+ sink->Append("true");
+ } else {
+ sink->Append("false");
+ }
+ return true;
+}
+
+template <typename T>
bool ConvertIntArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl *sink) {
using U = typename MakeUnsigned<T>::type;
IntDigits as_digits;
@@ -354,36 +385,37 @@ bool ConvertIntArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl *sink) {
return ConvertIntImplInnerSlow(as_digits, conv, sink);
}
-template <typename T>
-bool ConvertFloatArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl *sink) {
- if (conv.conversion_char() == FormatConversionCharInternal::v) {
- conv.set_conversion_char(FormatConversionCharInternal::g);
- }
-
- return FormatConversionCharIsFloat(conv.conversion_char()) &&
- ConvertFloatImpl(v, conv, sink);
-}
-
-inline bool ConvertStringArg(string_view v, const FormatConversionSpecImpl conv,
- FormatSinkImpl *sink) {
- if (conv.is_basic()) {
- sink->Append(v);
- return true;
- }
- return sink->PutPaddedString(v, conv.width(), conv.precision(),
- conv.has_left_flag());
-}
-
-} // namespace
-
-bool ConvertBoolArg(bool v, FormatSinkImpl *sink) {
- if (v) {
- sink->Append("true");
- } else {
- sink->Append("false");
- }
- return true;
-}
+template bool ConvertIntArg<char>(char v, FormatConversionSpecImpl conv,
+ FormatSinkImpl *sink);
+template bool ConvertIntArg<signed char>(signed char v,
+ FormatConversionSpecImpl conv,
+ FormatSinkImpl *sink);
+template bool ConvertIntArg<unsigned char>(unsigned char v,
+ FormatConversionSpecImpl conv,
+ FormatSinkImpl *sink);
+template bool ConvertIntArg<short>(short v, // NOLINT
+ FormatConversionSpecImpl conv,
+ FormatSinkImpl *sink);
+template bool ConvertIntArg<unsigned short>(unsigned short v, // NOLINT
+ FormatConversionSpecImpl conv,
+ FormatSinkImpl *sink);
+template bool ConvertIntArg<int>(int v, FormatConversionSpecImpl conv,
+ FormatSinkImpl *sink);
+template bool ConvertIntArg<unsigned int>(unsigned int v,
+ FormatConversionSpecImpl conv,
+ FormatSinkImpl *sink);
+template bool ConvertIntArg<long>(long v, // NOLINT
+ FormatConversionSpecImpl conv,
+ FormatSinkImpl *sink);
+template bool ConvertIntArg<unsigned long>(unsigned long v, // NOLINT
+ FormatConversionSpecImpl conv,
+ FormatSinkImpl *sink);
+template bool ConvertIntArg<long long>(long long v, // NOLINT
+ FormatConversionSpecImpl conv,
+ FormatSinkImpl *sink);
+template bool ConvertIntArg<unsigned long long>(unsigned long long v, // NOLINT
+ FormatConversionSpecImpl conv,
+ FormatSinkImpl *sink);
// ==================== Strings ====================
StringConvertResult FormatConvertImpl(const std::string &v,
diff --git a/absl/strings/internal/str_format/arg.h b/absl/strings/internal/str_format/arg.h
index b3e4ff15..e4b16628 100644
--- a/absl/strings/internal/str_format/arg.h
+++ b/absl/strings/internal/str_format/arg.h
@@ -18,6 +18,7 @@
#include <string.h>
#include <wchar.h>
+#include <algorithm>
#include <cstdio>
#include <iomanip>
#include <limits>
@@ -25,10 +26,12 @@
#include <sstream>
#include <string>
#include <type_traits>
+#include <utility>
#include "absl/base/port.h"
#include "absl/meta/type_traits.h"
#include "absl/numeric/int128.h"
+#include "absl/strings/internal/has_absl_stringify.h"
#include "absl/strings/internal/str_format/extension.h"
#include "absl/strings/string_view.h"
@@ -50,6 +53,19 @@ struct ArgConvertResult {
bool value;
};
+using IntegralConvertResult = ArgConvertResult<FormatConversionCharSetUnion(
+ FormatConversionCharSetInternal::c,
+ FormatConversionCharSetInternal::kNumeric,
+ FormatConversionCharSetInternal::kStar,
+ FormatConversionCharSetInternal::v)>;
+using FloatingConvertResult = ArgConvertResult<FormatConversionCharSetUnion(
+ FormatConversionCharSetInternal::kFloating,
+ FormatConversionCharSetInternal::v)>;
+using CharConvertResult = ArgConvertResult<FormatConversionCharSetUnion(
+ FormatConversionCharSetInternal::c,
+ FormatConversionCharSetInternal::kNumeric,
+ FormatConversionCharSetInternal::kStar)>;
+
template <typename T, typename = void>
struct HasUserDefinedConvert : std::false_type {};
@@ -67,6 +83,44 @@ void AbslFormatConvert();
void AbslStringify();
template <typename T>
+bool ConvertIntArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl* sink);
+
+// Forward declarations of internal `ConvertIntArg` function template
+// instantiations are here to avoid including the template body in the headers
+// and instantiating it in large numbers of translation units. Explicit
+// instantiations can be found in "absl/strings/internal/str_format/arg.cc"
+extern template bool ConvertIntArg<char>(char v, FormatConversionSpecImpl conv,
+ FormatSinkImpl* sink);
+extern template bool ConvertIntArg<signed char>(signed char v,
+ FormatConversionSpecImpl conv,
+ FormatSinkImpl* sink);
+extern template bool ConvertIntArg<unsigned char>(unsigned char v,
+ FormatConversionSpecImpl conv,
+ FormatSinkImpl* sink);
+extern template bool ConvertIntArg<short>(short v, // NOLINT
+ FormatConversionSpecImpl conv,
+ FormatSinkImpl* sink);
+extern template bool ConvertIntArg<unsigned short>( // NOLINT
+ unsigned short v, FormatConversionSpecImpl conv, // NOLINT
+ FormatSinkImpl* sink);
+extern template bool ConvertIntArg<int>(int v, FormatConversionSpecImpl conv,
+ FormatSinkImpl* sink);
+extern template bool ConvertIntArg<unsigned int>(unsigned int v,
+ FormatConversionSpecImpl conv,
+ FormatSinkImpl* sink);
+extern template bool ConvertIntArg<long>( // NOLINT
+ long v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); // NOLINT
+extern template bool ConvertIntArg<unsigned long>(unsigned long v, // NOLINT
+ FormatConversionSpecImpl conv,
+ FormatSinkImpl* sink);
+extern template bool ConvertIntArg<long long>(long long v, // NOLINT
+ FormatConversionSpecImpl conv,
+ FormatSinkImpl* sink);
+extern template bool ConvertIntArg<unsigned long long>( // NOLINT
+ unsigned long long v, FormatConversionSpecImpl conv, // NOLINT
+ FormatSinkImpl* sink);
+
+template <typename T>
auto FormatConvertImpl(const T& v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink)
-> decltype(AbslFormatConvert(v,
@@ -82,10 +136,30 @@ auto FormatConvertImpl(const T& v, FormatConversionSpecImpl conv,
}
template <typename T>
+auto FormatConvertImpl(const T& v, FormatConversionSpecImpl conv,
+ FormatSinkImpl* sink)
+ -> std::enable_if_t<std::is_enum<T>::value &&
+ std::is_void<decltype(AbslStringify(
+ std::declval<FormatSink&>(), v))>::value,
+ IntegralConvertResult> {
+ if (conv.conversion_char() == FormatConversionCharInternal::v) {
+ using FormatSinkT =
+ absl::enable_if_t<sizeof(const T& (*)()) != 0, FormatSink>;
+ auto fs = sink->Wrap<FormatSinkT>();
+ AbslStringify(fs, v);
+ return {true};
+ } else {
+ return {ConvertIntArg(
+ static_cast<typename std::underlying_type<T>::type>(v), conv, sink)};
+ }
+}
+
+template <typename T>
auto FormatConvertImpl(const T& v, FormatConversionSpecImpl,
FormatSinkImpl* sink)
- -> std::enable_if_t<std::is_void<decltype(AbslStringify(
- std::declval<FormatSink&>(), v))>::value,
+ -> std::enable_if_t<!std::is_enum<T>::value &&
+ std::is_void<decltype(AbslStringify(
+ std::declval<FormatSink&>(), v))>::value,
ArgConvertResult<FormatConversionCharSetInternal::v>> {
using FormatSinkT =
absl::enable_if_t<sizeof(const T& (*)()) != 0, FormatSink>;
@@ -191,19 +265,6 @@ StringConvertResult FormatConvertImpl(const AbslCord& value,
return {true};
}
-using IntegralConvertResult = ArgConvertResult<FormatConversionCharSetUnion(
- FormatConversionCharSetInternal::c,
- FormatConversionCharSetInternal::kNumeric,
- FormatConversionCharSetInternal::kStar,
- FormatConversionCharSetInternal::v)>;
-using FloatingConvertResult = ArgConvertResult<FormatConversionCharSetUnion(
- FormatConversionCharSetInternal::kFloating,
- FormatConversionCharSetInternal::v)>;
-using CharConvertResult = ArgConvertResult<FormatConversionCharSetUnion(
- FormatConversionCharSetInternal::c,
- FormatConversionCharSetInternal::kNumeric,
- FormatConversionCharSetInternal::kStar)>;
-
bool ConvertBoolArg(bool v, FormatSinkImpl* sink);
// Floats.
@@ -271,7 +332,8 @@ IntegralConvertResult FormatConvertImpl(T v, FormatConversionSpecImpl conv,
// FormatArgImpl will use the underlying Convert functions instead.
template <typename T>
typename std::enable_if<std::is_enum<T>::value &&
- !HasUserDefinedConvert<T>::value,
+ !HasUserDefinedConvert<T>::value &&
+ !strings_internal::HasAbslStringify<T>::value,
IntegralConvertResult>::type
FormatConvertImpl(T v, FormatConversionSpecImpl conv, FormatSinkImpl* sink);
@@ -384,7 +446,8 @@ class FormatArgImpl {
template <typename T, typename = void>
struct DecayType {
static constexpr bool kHasUserDefined =
- str_format_internal::HasUserDefinedConvert<T>::value;
+ str_format_internal::HasUserDefinedConvert<T>::value ||
+ strings_internal::HasAbslStringify<T>::value;
using type = typename std::conditional<
!kHasUserDefined && std::is_convertible<T, const char*>::value,
const char*,
@@ -396,6 +459,7 @@ class FormatArgImpl {
struct DecayType<T,
typename std::enable_if<
!str_format_internal::HasUserDefinedConvert<T>::value &&
+ !strings_internal::HasAbslStringify<T>::value &&
std::is_enum<T>::value>::type> {
using type = typename std::underlying_type<T>::type;
};
diff --git a/absl/strings/internal/str_format/checker.h b/absl/strings/internal/str_format/checker.h
index aeb9d48d..eab6ab9d 100644
--- a/absl/strings/internal/str_format/checker.h
+++ b/absl/strings/internal/str_format/checker.h
@@ -15,8 +15,11 @@
#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_
#define ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_
+#include <algorithm>
+
#include "absl/base/attributes.h"
#include "absl/strings/internal/str_format/arg.h"
+#include "absl/strings/internal/str_format/constexpr_parser.h"
#include "absl/strings/internal/str_format/extension.h"
// Compile time check support for entry points.
@@ -36,333 +39,56 @@ namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
-constexpr bool AllOf() { return true; }
-
-template <typename... T>
-constexpr bool AllOf(bool b, T... t) {
- return b && AllOf(t...);
-}
-
#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
-constexpr bool ContainsChar(const char* chars, char c) {
- return *chars == c || (*chars && ContainsChar(chars + 1, c));
-}
-
-// A constexpr compatible list of Convs.
-struct ConvList {
- const FormatConversionCharSet* array;
- int count;
-
- // We do the bound check here to avoid having to do it on the callers.
- // Returning an empty FormatConversionCharSet has the same effect as
- // short circuiting because it will never match any conversion.
- constexpr FormatConversionCharSet operator[](int i) const {
- return i < count ? array[i] : FormatConversionCharSet{};
- }
-
- constexpr ConvList without_front() const {
- return count != 0 ? ConvList{array + 1, count - 1} : *this;
- }
-};
-
-template <size_t count>
-struct ConvListT {
- // Make sure the array has size > 0.
- FormatConversionCharSet list[count ? count : 1];
-};
-
-constexpr char GetChar(string_view str, size_t index) {
- return index < str.size() ? str[index] : char{};
-}
-
-constexpr string_view ConsumeFront(string_view str, size_t len = 1) {
- return len <= str.size() ? string_view(str.data() + len, str.size() - len)
- : string_view();
-}
-
-constexpr string_view ConsumeAnyOf(string_view format, const char* chars) {
- while (ContainsChar(chars, GetChar(format, 0))) {
- format = ConsumeFront(format);
- }
- return format;
-}
-
-constexpr bool IsDigit(char c) { return c >= '0' && c <= '9'; }
-
-// Helper class for the ParseDigits function.
-// It encapsulates the two return values we need there.
-struct Integer {
- string_view format;
- int value;
-
- // If the next character is a '$', consume it.
- // Otherwise, make `this` an invalid positional argument.
- constexpr Integer ConsumePositionalDollar() const {
- if (GetChar(format, 0) == '$') {
- return Integer{ConsumeFront(format), value};
- } else {
- return Integer{format, 0};
- }
- }
-};
-
-constexpr Integer ParseDigits(string_view format) {
- int value = 0;
- while (IsDigit(GetChar(format, 0))) {
- value = 10 * value + GetChar(format, 0) - '0';
- format = ConsumeFront(format);
- }
-
- return Integer{format, value};
-}
-
-// Parse digits for a positional argument.
-// The parsing also consumes the '$'.
-constexpr Integer ParsePositional(string_view format) {
- return ParseDigits(format).ConsumePositionalDollar();
-}
-
-// Parses a single conversion specifier.
-// See ConvParser::Run() for post conditions.
-class ConvParser {
- constexpr ConvParser SetFormat(string_view format) const {
- return ConvParser(format, args_, error_, arg_position_, is_positional_);
- }
-
- constexpr ConvParser SetArgs(ConvList args) const {
- return ConvParser(format_, args, error_, arg_position_, is_positional_);
- }
-
- constexpr ConvParser SetError(bool error) const {
- return ConvParser(format_, args_, error_ || error, arg_position_,
- is_positional_);
- }
-
- constexpr ConvParser SetArgPosition(int arg_position) const {
- return ConvParser(format_, args_, error_, arg_position, is_positional_);
- }
-
- // Consumes the next arg and verifies that it matches `conv`.
- // `error_` is set if there is no next arg or if it doesn't match `conv`.
- constexpr ConvParser ConsumeNextArg(char conv) const {
- return SetArgs(args_.without_front()).SetError(!Contains(args_[0], conv));
- }
-
- // Verify that positional argument `i.value` matches `conv`.
- // `error_` is set if `i.value` is not a valid argument or if it doesn't
- // match.
- constexpr ConvParser VerifyPositional(Integer i, char conv) const {
- return SetFormat(i.format).SetError(!Contains(args_[i.value - 1], conv));
- }
-
- // Parse the position of the arg and store it in `arg_position_`.
- constexpr ConvParser ParseArgPosition(Integer arg) const {
- return SetFormat(arg.format).SetArgPosition(arg.value);
- }
-
- // Consume the flags.
- constexpr ConvParser ParseFlags() const {
- return SetFormat(ConsumeAnyOf(format_, "-+ #0"));
- }
-
- // Consume the width.
- // If it is '*', we verify that it matches `args_`. `error_` is set if it
- // doesn't match.
- constexpr ConvParser ParseWidth() const {
- char first_char = GetChar(format_, 0);
-
- if (IsDigit(first_char)) {
- return SetFormat(ParseDigits(format_).format);
- } else if (first_char == '*') {
- if (is_positional_) {
- return VerifyPositional(ParsePositional(ConsumeFront(format_)), '*');
- } else {
- return SetFormat(ConsumeFront(format_)).ConsumeNextArg('*');
- }
- } else {
- return *this;
+template <FormatConversionCharSet... C>
+constexpr bool ValidFormatImpl(string_view format) {
+ int next_arg = 0;
+ const char* p = format.data();
+ const char* const end = p + format.size();
+ constexpr FormatConversionCharSet
+ kAllowedConvs[(std::max)(sizeof...(C), size_t{1})] = {C...};
+ bool used[(std::max)(sizeof...(C), size_t{1})]{};
+ constexpr int kNumArgs = sizeof...(C);
+ while (p != end) {
+ while (p != end && *p != '%') ++p;
+ if (p == end) {
+ break;
}
- }
-
- // Consume the precision.
- // If it is '*', we verify that it matches `args_`. `error_` is set if it
- // doesn't match.
- constexpr ConvParser ParsePrecision() const {
- if (GetChar(format_, 0) != '.') {
- return *this;
- } else if (GetChar(format_, 1) == '*') {
- if (is_positional_) {
- return VerifyPositional(ParsePositional(ConsumeFront(format_, 2)), '*');
- } else {
- return SetFormat(ConsumeFront(format_, 2)).ConsumeNextArg('*');
- }
- } else {
- return SetFormat(ParseDigits(ConsumeFront(format_)).format);
+ if (p + 1 >= end) return false;
+ if (p[1] == '%') {
+ // %%
+ p += 2;
+ continue;
}
- }
-
- // Consume the length characters.
- constexpr ConvParser ParseLength() const {
- return SetFormat(ConsumeAnyOf(format_, "lLhjztq"));
- }
-
- // Consume the conversion character and verify that it matches `args_`.
- // `error_` is set if it doesn't match.
- constexpr ConvParser ParseConversion() const {
- char first_char = GetChar(format_, 0);
- if (first_char == 'v' && *(format_.data() - 1) != '%') {
- return SetError(true);
+ UnboundConversion conv(absl::kConstInit);
+ p = ConsumeUnboundConversion(p + 1, end, &conv, &next_arg);
+ if (p == nullptr) return false;
+ if (conv.arg_position <= 0 || conv.arg_position > kNumArgs) {
+ return false;
}
-
- if (is_positional_) {
- return VerifyPositional({ConsumeFront(format_), arg_position_},
- first_char);
- } else {
- return ConsumeNextArg(first_char).SetFormat(ConsumeFront(format_));
+ if (!Contains(kAllowedConvs[conv.arg_position - 1], conv.conv)) {
+ return false;
}
- }
-
- constexpr ConvParser(string_view format, ConvList args, bool error,
- int arg_position, bool is_positional)
- : format_(format),
- args_(args),
- error_(error),
- arg_position_(arg_position),
- is_positional_(is_positional) {}
-
- public:
- constexpr ConvParser(string_view format, ConvList args, bool is_positional)
- : format_(format),
- args_(args),
- error_(false),
- arg_position_(0),
- is_positional_(is_positional) {}
-
- // Consume the whole conversion specifier.
- // `format()` will be set to the character after the conversion character.
- // `error()` will be set if any of the arguments do not match.
- constexpr ConvParser Run() const {
- ConvParser parser = *this;
-
- if (is_positional_) {
- parser = ParseArgPosition(ParsePositional(format_));
- }
-
- return parser.ParseFlags()
- .ParseWidth()
- .ParsePrecision()
- .ParseLength()
- .ParseConversion();
- }
-
- constexpr string_view format() const { return format_; }
- constexpr ConvList args() const { return args_; }
- constexpr bool error() const { return error_; }
- constexpr bool is_positional() const { return is_positional_; }
-
- private:
- string_view format_;
- // Current list of arguments. If we are not in positional mode we will consume
- // from the front.
- ConvList args_;
- bool error_;
- // Holds the argument position of the conversion character, if we are in
- // positional mode. Otherwise, it is unspecified.
- int arg_position_;
- // Whether we are in positional mode.
- // It changes the behavior of '*' and where to find the converted argument.
- bool is_positional_;
-};
-
-// Parses a whole format expression.
-// See FormatParser::Run().
-class FormatParser {
- static constexpr bool FoundPercent(string_view format) {
- return format.empty() ||
- (GetChar(format, 0) == '%' && GetChar(format, 1) != '%');
- }
-
- // We use an inner function to increase the recursion limit.
- // The inner function consumes up to `limit` characters on every run.
- // This increases the limit from 512 to ~512*limit.
- static constexpr string_view ConsumeNonPercentInner(string_view format) {
- int limit = 20;
- while (!FoundPercent(format) && limit != 0) {
- size_t len = 0;
-
- if (GetChar(format, 0) == '%' && GetChar(format, 1) == '%') {
- len = 2;
- } else {
- len = 1;
+ used[conv.arg_position - 1] = true;
+ for (auto extra : {conv.width, conv.precision}) {
+ if (extra.is_from_arg()) {
+ int pos = extra.get_from_arg();
+ if (pos <= 0 || pos > kNumArgs) return false;
+ used[pos - 1] = true;
+ if (!Contains(kAllowedConvs[pos - 1], '*')) {
+ return false;
+ }
}
-
- format = ConsumeFront(format, len);
- --limit;
}
-
- return format;
}
-
- // Consume characters until the next conversion spec %.
- // It skips %%.
- static constexpr string_view ConsumeNonPercent(string_view format) {
- while (!FoundPercent(format)) {
- format = ConsumeNonPercentInner(format);
+ if (sizeof...(C) != 0) {
+ for (bool b : used) {
+ if (!b) return false;
}
-
- return format;
- }
-
- static constexpr bool IsPositional(string_view format) {
- while (IsDigit(GetChar(format, 0))) {
- format = ConsumeFront(format);
- }
-
- return GetChar(format, 0) == '$';
}
-
- constexpr bool RunImpl(bool is_positional) const {
- // In non-positional mode we require all arguments to be consumed.
- // In positional mode just reaching the end of the format without errors is
- // enough.
- return (format_.empty() && (is_positional || args_.count == 0)) ||
- (!format_.empty() &&
- ValidateArg(
- ConvParser(ConsumeFront(format_), args_, is_positional).Run()));
- }
-
- constexpr bool ValidateArg(ConvParser conv) const {
- return !conv.error() && FormatParser(conv.format(), conv.args())
- .RunImpl(conv.is_positional());
- }
-
- public:
- constexpr FormatParser(string_view format, ConvList args)
- : format_(ConsumeNonPercent(format)), args_(args) {}
-
- // Runs the parser for `format` and `args`.
- // It verifies that the format is valid and that all conversion specifiers
- // match the arguments passed.
- // In non-positional mode it also verfies that all arguments are consumed.
- constexpr bool Run() const {
- return RunImpl(!format_.empty() && IsPositional(ConsumeFront(format_)));
- }
-
- private:
- string_view format_;
- // Current list of arguments.
- // If we are not in positional mode we will consume from the front and will
- // have to be empty in the end.
- ConvList args_;
-};
-
-template <FormatConversionCharSet... C>
-constexpr bool ValidFormatImpl(string_view format) {
- return FormatParser(format,
- {ConvListT<sizeof...(C)>{{C...}}.list, sizeof...(C)})
- .Run();
+ return true;
}
#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
diff --git a/absl/strings/internal/str_format/checker_test.cc b/absl/strings/internal/str_format/checker_test.cc
index 680517f7..a86bed38 100644
--- a/absl/strings/internal/str_format/checker_test.cc
+++ b/absl/strings/internal/str_format/checker_test.cc
@@ -93,6 +93,7 @@ TEST(StrFormatChecker, ValidFormat) {
ValidFormat<void (*)(), volatile int*>("%p %p"), //
ValidFormat<string_view, const char*, double, void*>(
"string_view=%s const char*=%s double=%f void*=%p)"),
+ ValidFormat<int>("%v"), //
ValidFormat<int>("%% %1$d"), //
ValidFormat<int>("%1$ld"), //
@@ -109,7 +110,9 @@ TEST(StrFormatChecker, ValidFormat) {
ValidFormat<int, double>("%2$.*1$f"), //
ValidFormat<void*, string_view, const char*, double>(
"string_view=%2$s const char*=%3$s double=%4$f void*=%1$p "
- "repeat=%3$s)")};
+ "repeat=%3$s)"),
+ ValidFormat<std::string>("%1$v"),
+ };
for (Case c : trues) {
EXPECT_TRUE(c.result) << c.format;
@@ -130,6 +133,8 @@ TEST(StrFormatChecker, ValidFormat) {
ValidFormat<int>("%*d"), //
ValidFormat<std::string>("%p"), //
ValidFormat<int (*)(int)>("%d"), //
+ ValidFormat<int>("%1v"), //
+ ValidFormat<int>("%.1v"), //
ValidFormat<>("%3$d"), //
ValidFormat<>("%1$r"), //
@@ -138,13 +143,14 @@ TEST(StrFormatChecker, ValidFormat) {
ValidFormat<int>("%1$*2$1d"), //
ValidFormat<int>("%1$1-d"), //
ValidFormat<std::string, int>("%2$*1$s"), //
- ValidFormat<std::string>("%1$p"),
+ ValidFormat<std::string>("%1$p"), //
+ ValidFormat<int>("%1$*2$v"), //
ValidFormat<int, int>("%d %2$d"), //
};
for (Case c : falses) {
- EXPECT_FALSE(c.result) << c.format;
+ EXPECT_FALSE(c.result) << "format<" << c.format << ">";
}
}
diff --git a/absl/strings/internal/str_format/constexpr_parser.h b/absl/strings/internal/str_format/constexpr_parser.h
new file mode 100644
index 00000000..3dc1776b
--- /dev/null
+++ b/absl/strings/internal/str_format/constexpr_parser.h
@@ -0,0 +1,351 @@
+// Copyright 2022 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_
+#define ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_
+
+#include <cassert>
+#include <cstdint>
+#include <limits>
+
+#include "absl/base/const_init.h"
+#include "absl/strings/internal/str_format/extension.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace str_format_internal {
+
+enum class LengthMod : std::uint8_t { h, hh, l, ll, L, j, z, t, q, none };
+
+// The analyzed properties of a single specified conversion.
+struct UnboundConversion {
+ // This is a user defined default constructor on purpose to skip the
+ // initialization of parts of the object that are not necessary.
+ UnboundConversion() {} // NOLINT
+
+ // This constructor is provided for the static checker. We don't want to do
+ // the unnecessary initialization in the normal case.
+ explicit constexpr UnboundConversion(absl::ConstInitType)
+ : arg_position{}, width{}, precision{} {}
+
+ class InputValue {
+ public:
+ constexpr void set_value(int value) {
+ assert(value >= 0);
+ value_ = value;
+ }
+ constexpr int value() const { return value_; }
+
+ // Marks the value as "from arg". aka the '*' format.
+ // Requires `value >= 1`.
+ // When set, is_from_arg() return true and get_from_arg() returns the
+ // original value.
+ // `value()`'s return value is unspecified in this state.
+ constexpr void set_from_arg(int value) {
+ assert(value > 0);
+ value_ = -value - 1;
+ }
+ constexpr bool is_from_arg() const { return value_ < -1; }
+ constexpr int get_from_arg() const {
+ assert(is_from_arg());
+ return -value_ - 1;
+ }
+
+ private:
+ int value_ = -1;
+ };
+
+ // No need to initialize. It will always be set in the parser.
+ int arg_position;
+
+ InputValue width;
+ InputValue precision;
+
+ Flags flags = Flags::kBasic;
+ LengthMod length_mod = LengthMod::none;
+ FormatConversionChar conv = FormatConversionCharInternal::kNone;
+};
+
+// Helper tag class for the table below.
+// It allows fast `char -> ConversionChar/LengthMod/Flags` checking and
+// conversions.
+class ConvTag {
+ public:
+ constexpr ConvTag(FormatConversionChar conversion_char) // NOLINT
+ : tag_(static_cast<uint8_t>(conversion_char)) {}
+ constexpr ConvTag(LengthMod length_mod) // NOLINT
+ : tag_(0x80 | static_cast<uint8_t>(length_mod)) {}
+ constexpr ConvTag(Flags flags) // NOLINT
+ : tag_(0xc0 | static_cast<uint8_t>(flags)) {}
+ constexpr ConvTag() : tag_(0xFF) {}
+
+ constexpr bool is_conv() const { return (tag_ & 0x80) == 0; }
+ constexpr bool is_length() const { return (tag_ & 0xC0) == 0x80; }
+ constexpr bool is_flags() const { return (tag_ & 0xE0) == 0xC0; }
+
+ constexpr FormatConversionChar as_conv() const {
+ assert(is_conv());
+ assert(!is_length());
+ assert(!is_flags());
+ return static_cast<FormatConversionChar>(tag_);
+ }
+ constexpr LengthMod as_length() const {
+ assert(!is_conv());
+ assert(is_length());
+ assert(!is_flags());
+ return static_cast<LengthMod>(tag_ & 0x3F);
+ }
+ constexpr Flags as_flags() const {
+ assert(!is_conv());
+ assert(!is_length());
+ assert(is_flags());
+ return static_cast<Flags>(tag_ & 0x1F);
+ }
+
+ private:
+ uint8_t tag_;
+};
+
+struct ConvTagHolder {
+ using CC = FormatConversionCharInternal;
+ using LM = LengthMod;
+
+ // Abbreviations to fit in the table below.
+ static constexpr auto kFSign = Flags::kSignCol;
+ static constexpr auto kFAlt = Flags::kAlt;
+ static constexpr auto kFPos = Flags::kShowPos;
+ static constexpr auto kFLeft = Flags::kLeft;
+ static constexpr auto kFZero = Flags::kZero;
+
+ static constexpr ConvTag value[256] = {
+ {}, {}, {}, {}, {}, {}, {}, {}, // 00-07
+ {}, {}, {}, {}, {}, {}, {}, {}, // 08-0f
+ {}, {}, {}, {}, {}, {}, {}, {}, // 10-17
+ {}, {}, {}, {}, {}, {}, {}, {}, // 18-1f
+ kFSign, {}, {}, kFAlt, {}, {}, {}, {}, // !"#$%&'
+ {}, {}, {}, kFPos, {}, kFLeft, {}, {}, // ()*+,-./
+ kFZero, {}, {}, {}, {}, {}, {}, {}, // 01234567
+ {}, {}, {}, {}, {}, {}, {}, {}, // 89:;<=>?
+ {}, CC::A, {}, {}, {}, CC::E, CC::F, CC::G, // @ABCDEFG
+ {}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO
+ {}, {}, {}, {}, {}, {}, {}, {}, // PQRSTUVW
+ CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_
+ {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg
+ LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno
+ CC::p, LM::q, {}, CC::s, LM::t, CC::u, CC::v, {}, // pqrstuvw
+ CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}!
+ {}, {}, {}, {}, {}, {}, {}, {}, // 80-87
+ {}, {}, {}, {}, {}, {}, {}, {}, // 88-8f
+ {}, {}, {}, {}, {}, {}, {}, {}, // 90-97
+ {}, {}, {}, {}, {}, {}, {}, {}, // 98-9f
+ {}, {}, {}, {}, {}, {}, {}, {}, // a0-a7
+ {}, {}, {}, {}, {}, {}, {}, {}, // a8-af
+ {}, {}, {}, {}, {}, {}, {}, {}, // b0-b7
+ {}, {}, {}, {}, {}, {}, {}, {}, // b8-bf
+ {}, {}, {}, {}, {}, {}, {}, {}, // c0-c7
+ {}, {}, {}, {}, {}, {}, {}, {}, // c8-cf
+ {}, {}, {}, {}, {}, {}, {}, {}, // d0-d7
+ {}, {}, {}, {}, {}, {}, {}, {}, // d8-df
+ {}, {}, {}, {}, {}, {}, {}, {}, // e0-e7
+ {}, {}, {}, {}, {}, {}, {}, {}, // e8-ef
+ {}, {}, {}, {}, {}, {}, {}, {}, // f0-f7
+ {}, {}, {}, {}, {}, {}, {}, {}, // f8-ff
+ };
+};
+
+// Keep a single table for all the conversion chars and length modifiers.
+constexpr ConvTag GetTagForChar(char c) {
+ return ConvTagHolder::value[static_cast<unsigned char>(c)];
+}
+
+constexpr bool CheckFastPathSetting(const UnboundConversion& conv) {
+ bool width_precision_needed =
+ conv.width.value() >= 0 || conv.precision.value() >= 0;
+ if (width_precision_needed && conv.flags == Flags::kBasic) {
+#if defined(__clang__)
+ // Some compilers complain about this in constexpr even when not executed,
+ // so only enable the error dump in clang.
+ fprintf(stderr,
+ "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d "
+ "width=%d precision=%d\n",
+ conv.flags == Flags::kBasic ? 1 : 0,
+ FlagsContains(conv.flags, Flags::kLeft) ? 1 : 0,
+ FlagsContains(conv.flags, Flags::kShowPos) ? 1 : 0,
+ FlagsContains(conv.flags, Flags::kSignCol) ? 1 : 0,
+ FlagsContains(conv.flags, Flags::kAlt) ? 1 : 0,
+ FlagsContains(conv.flags, Flags::kZero) ? 1 : 0, conv.width.value(),
+ conv.precision.value());
+#endif // defined(__clang__)
+ return false;
+ }
+ return true;
+}
+
+constexpr int ParseDigits(char& c, const char*& pos, const char* const end) {
+ int digits = c - '0';
+ // We do not want to overflow `digits` so we consume at most digits10
+ // digits. If there are more digits the parsing will fail later on when the
+ // digit doesn't match the expected characters.
+ int num_digits = std::numeric_limits<int>::digits10;
+ for (;;) {
+ if (ABSL_PREDICT_FALSE(pos == end)) break;
+ c = *pos++;
+ if ('0' > c || c > '9') break;
+ --num_digits;
+ if (ABSL_PREDICT_FALSE(!num_digits)) break;
+ digits = 10 * digits + c - '0';
+ }
+ return digits;
+}
+
+template <bool is_positional>
+constexpr const char* ConsumeConversion(const char* pos, const char* const end,
+ UnboundConversion* conv,
+ int* next_arg) {
+ const char* const original_pos = pos;
+ char c = 0;
+ // Read the next char into `c` and update `pos`. Returns false if there are
+ // no more chars to read.
+#define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \
+ do { \
+ if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \
+ c = *pos++; \
+ } while (0)
+
+ if (is_positional) {
+ ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
+ if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
+ conv->arg_position = ParseDigits(c, pos, end);
+ assert(conv->arg_position > 0);
+ if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
+ }
+
+ ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
+
+ // We should start with the basic flag on.
+ assert(conv->flags == Flags::kBasic);
+
+ // Any non alpha character makes this conversion not basic.
+ // This includes flags (-+ #0), width (1-9, *) or precision (.).
+ // All conversion characters and length modifiers are alpha characters.
+ if (c < 'A') {
+ while (c <= '0') {
+ auto tag = GetTagForChar(c);
+ if (tag.is_flags()) {
+ conv->flags = conv->flags | tag.as_flags();
+ ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
+ } else {
+ break;
+ }
+ }
+
+ if (c <= '9') {
+ if (c >= '0') {
+ int maybe_width = ParseDigits(c, pos, end);
+ if (!is_positional && c == '$') {
+ if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr;
+ // Positional conversion.
+ *next_arg = -1;
+ return ConsumeConversion<true>(original_pos, end, conv, next_arg);
+ }
+ conv->flags = conv->flags | Flags::kNonBasic;
+ conv->width.set_value(maybe_width);
+ } else if (c == '*') {
+ conv->flags = conv->flags | Flags::kNonBasic;
+ ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
+ if (is_positional) {
+ if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
+ conv->width.set_from_arg(ParseDigits(c, pos, end));
+ if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
+ ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
+ } else {
+ conv->width.set_from_arg(++*next_arg);
+ }
+ }
+ }
+
+ if (c == '.') {
+ conv->flags = conv->flags | Flags::kNonBasic;
+ ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
+ if ('0' <= c && c <= '9') {
+ conv->precision.set_value(ParseDigits(c, pos, end));
+ } else if (c == '*') {
+ ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
+ if (is_positional) {
+ if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
+ conv->precision.set_from_arg(ParseDigits(c, pos, end));
+ if (c != '$') return nullptr;
+ ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
+ } else {
+ conv->precision.set_from_arg(++*next_arg);
+ }
+ } else {
+ conv->precision.set_value(0);
+ }
+ }
+ }
+
+ auto tag = GetTagForChar(c);
+
+ if (ABSL_PREDICT_FALSE(c == 'v' && conv->flags != Flags::kBasic)) {
+ return nullptr;
+ }
+
+ if (ABSL_PREDICT_FALSE(!tag.is_conv())) {
+ if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr;
+
+ // It is a length modifier.
+ using str_format_internal::LengthMod;
+ LengthMod length_mod = tag.as_length();
+ ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
+ if (c == 'h' && length_mod == LengthMod::h) {
+ conv->length_mod = LengthMod::hh;
+ ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
+ } else if (c == 'l' && length_mod == LengthMod::l) {
+ conv->length_mod = LengthMod::ll;
+ ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
+ } else {
+ conv->length_mod = length_mod;
+ }
+ tag = GetTagForChar(c);
+
+ if (ABSL_PREDICT_FALSE(c == 'v')) return nullptr;
+ if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr;
+ }
+
+ assert(CheckFastPathSetting(*conv));
+ (void)(&CheckFastPathSetting);
+
+ conv->conv = tag.as_conv();
+ if (!is_positional) conv->arg_position = ++*next_arg;
+ return pos;
+}
+
+// Consume conversion spec prefix (not including '%') of [p, end) if valid.
+// Examples of valid specs would be e.g.: "s", "d", "-12.6f".
+// If valid, it returns the first character following the conversion spec,
+// and the spec part is broken down and returned in 'conv'.
+// If invalid, returns nullptr.
+constexpr const char* ConsumeUnboundConversion(const char* p, const char* end,
+ UnboundConversion* conv,
+ int* next_arg) {
+ if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg);
+ return ConsumeConversion<false>(p, end, conv, next_arg);
+}
+
+} // namespace str_format_internal
+ABSL_NAMESPACE_END
+} // namespace absl
+
+#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_
diff --git a/absl/strings/internal/str_format/convert_test.cc b/absl/strings/internal/str_format/convert_test.cc
index 300612b7..8b5a27ed 100644
--- a/absl/strings/internal/str_format/convert_test.cc
+++ b/absl/strings/internal/str_format/convert_test.cc
@@ -1241,9 +1241,9 @@ TEST_F(FormatConvertTest, GlibcHasCorrectTraits) {
const NativePrintfTraits &native_traits = VerifyNativeImplementation();
// If one of the following tests break then it is either because the above PP
// macro guards failed to exclude a new platform (likely) or because something
- // has changed in the implemention of glibc sprintf float formatting behavior.
- // If the latter, then the code that computes these flags needs to be
- // revisited and/or possibly the StrFormat implementation.
+ // has changed in the implementation of glibc sprintf float formatting
+ // behavior. If the latter, then the code that computes these flags needs to
+ // be revisited and/or possibly the StrFormat implementation.
EXPECT_TRUE(native_traits.hex_float_has_glibc_rounding);
EXPECT_TRUE(native_traits.hex_float_prefers_denormal_repr);
EXPECT_TRUE(
diff --git a/absl/strings/internal/str_format/extension.h b/absl/strings/internal/str_format/extension.h
index 603bd49d..8de42d2c 100644
--- a/absl/strings/internal/str_format/extension.h
+++ b/absl/strings/internal/str_format/extension.h
@@ -273,7 +273,7 @@ struct FormatConversionSpecImplFriend;
class FormatConversionSpecImpl {
public:
- // Width and precison are not specified, no flags are set.
+ // Width and precision are not specified, no flags are set.
bool is_basic() const { return flags_ == Flags::kBasic; }
bool has_left_flag() const { return FlagsContains(flags_, Flags::kLeft); }
bool has_show_pos_flag() const {
diff --git a/absl/strings/internal/str_format/float_conversion.cc b/absl/strings/internal/str_format/float_conversion.cc
index 8e497852..8edf520d 100644
--- a/absl/strings/internal/str_format/float_conversion.cc
+++ b/absl/strings/internal/str_format/float_conversion.cc
@@ -711,12 +711,12 @@ bool IncrementNibble(size_t nibble_index, Int* n) {
constexpr size_t kShift = sizeof(Int) * 8 - 1;
constexpr size_t kNumNibbles = sizeof(Int) * 8 / 4;
Int before = *n >> kShift;
- // Here we essentially want to take the number 1 and move it into the requsted
- // nibble, then add it to *n to effectively increment the nibble. However,
- // ASan will complain if we try to shift the 1 beyond the limits of the Int,
- // i.e., if the nibble_index is out of range. So therefore we check for this
- // and if we are out of range we just add 0 which leaves *n unchanged, which
- // seems like the reasonable thing to do in that case.
+ // Here we essentially want to take the number 1 and move it into the
+ // requested nibble, then add it to *n to effectively increment the nibble.
+ // However, ASan will complain if we try to shift the 1 beyond the limits of
+ // the Int, i.e., if the nibble_index is out of range. So therefore we check
+ // for this and if we are out of range we just add 0 which leaves *n
+ // unchanged, which seems like the reasonable thing to do in that case.
*n += ((nibble_index >= kNumNibbles)
? 0
: (Int{1} << static_cast<int>(nibble_index * 4)));
@@ -937,7 +937,7 @@ void FormatA(const HexFloatTypeParams float_traits, Int mantissa, int exp,
// =============== Exponent ==================
constexpr size_t kBufSizeForExpDecRepr =
- numbers_internal::kFastToBufferSize // requred for FastIntToBuffer
+ numbers_internal::kFastToBufferSize // required for FastIntToBuffer
+ 1 // 'p' or 'P'
+ 1; // '+' or '-'
char exp_buffer[kBufSizeForExpDecRepr];
@@ -1015,7 +1015,7 @@ struct Buffer {
--end;
}
- char &back() {
+ char &back() const {
assert(begin < end);
return end[-1];
}
@@ -1102,7 +1102,7 @@ void PrintExponent(int exp, char e, Buffer *out) {
template <typename Float, typename Int>
constexpr bool CanFitMantissa() {
return
-#if defined(__clang__) && !defined(__SSE3__)
+#if defined(__clang__) && (__clang_major__ < 9) && !defined(__SSE3__)
// Workaround for clang bug: https://bugs.llvm.org/show_bug.cgi?id=38289
// Casting from long double to uint64_t is miscompiled and drops bits.
(!std::is_same<Float, long double>::value ||
diff --git a/absl/strings/internal/str_format/parser.cc b/absl/strings/internal/str_format/parser.cc
index f9bb6615..5aaab698 100644
--- a/absl/strings/internal/str_format/parser.cc
+++ b/absl/strings/internal/str_format/parser.cc
@@ -31,211 +31,14 @@ namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
-using CC = FormatConversionCharInternal;
-using LM = LengthMod;
+// Define the array for non-constexpr uses.
+constexpr ConvTag ConvTagHolder::value[256];
-// Abbreviations to fit in the table below.
-constexpr auto f_sign = Flags::kSignCol;
-constexpr auto f_alt = Flags::kAlt;
-constexpr auto f_pos = Flags::kShowPos;
-constexpr auto f_left = Flags::kLeft;
-constexpr auto f_zero = Flags::kZero;
-
-ABSL_CONST_INIT const ConvTag kTags[256] = {
- {}, {}, {}, {}, {}, {}, {}, {}, // 00-07
- {}, {}, {}, {}, {}, {}, {}, {}, // 08-0f
- {}, {}, {}, {}, {}, {}, {}, {}, // 10-17
- {}, {}, {}, {}, {}, {}, {}, {}, // 18-1f
- f_sign, {}, {}, f_alt, {}, {}, {}, {}, // !"#$%&'
- {}, {}, {}, f_pos, {}, f_left, {}, {}, // ()*+,-./
- f_zero, {}, {}, {}, {}, {}, {}, {}, // 01234567
- {}, {}, {}, {}, {}, {}, {}, {}, // 89:;<=>?
- {}, CC::A, {}, {}, {}, CC::E, CC::F, CC::G, // @ABCDEFG
- {}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO
- {}, {}, {}, {}, {}, {}, {}, {}, // PQRSTUVW
- CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_
- {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg
- LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno
- CC::p, LM::q, {}, CC::s, LM::t, CC::u, CC::v, {}, // pqrstuvw
- CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}!
- {}, {}, {}, {}, {}, {}, {}, {}, // 80-87
- {}, {}, {}, {}, {}, {}, {}, {}, // 88-8f
- {}, {}, {}, {}, {}, {}, {}, {}, // 90-97
- {}, {}, {}, {}, {}, {}, {}, {}, // 98-9f
- {}, {}, {}, {}, {}, {}, {}, {}, // a0-a7
- {}, {}, {}, {}, {}, {}, {}, {}, // a8-af
- {}, {}, {}, {}, {}, {}, {}, {}, // b0-b7
- {}, {}, {}, {}, {}, {}, {}, {}, // b8-bf
- {}, {}, {}, {}, {}, {}, {}, {}, // c0-c7
- {}, {}, {}, {}, {}, {}, {}, {}, // c8-cf
- {}, {}, {}, {}, {}, {}, {}, {}, // d0-d7
- {}, {}, {}, {}, {}, {}, {}, {}, // d8-df
- {}, {}, {}, {}, {}, {}, {}, {}, // e0-e7
- {}, {}, {}, {}, {}, {}, {}, {}, // e8-ef
- {}, {}, {}, {}, {}, {}, {}, {}, // f0-f7
- {}, {}, {}, {}, {}, {}, {}, {}, // f8-ff
-};
-
-namespace {
-
-bool CheckFastPathSetting(const UnboundConversion& conv) {
- bool width_precision_needed =
- conv.width.value() >= 0 || conv.precision.value() >= 0;
- if (width_precision_needed && conv.flags == Flags::kBasic) {
- fprintf(stderr,
- "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d "
- "width=%d precision=%d\n",
- conv.flags == Flags::kBasic ? 1 : 0,
- FlagsContains(conv.flags, Flags::kLeft) ? 1 : 0,
- FlagsContains(conv.flags, Flags::kShowPos) ? 1 : 0,
- FlagsContains(conv.flags, Flags::kSignCol) ? 1 : 0,
- FlagsContains(conv.flags, Flags::kAlt) ? 1 : 0,
- FlagsContains(conv.flags, Flags::kZero) ? 1 : 0, conv.width.value(),
- conv.precision.value());
- return false;
- }
- return true;
-}
-
-template <bool is_positional>
-const char *ConsumeConversion(const char *pos, const char *const end,
- UnboundConversion *conv, int *next_arg) {
- const char* const original_pos = pos;
- char c;
- // Read the next char into `c` and update `pos`. Returns false if there are
- // no more chars to read.
-#define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \
- do { \
- if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \
- c = *pos++; \
- } while (0)
-
- const auto parse_digits = [&] {
- int digits = c - '0';
- // We do not want to overflow `digits` so we consume at most digits10
- // digits. If there are more digits the parsing will fail later on when the
- // digit doesn't match the expected characters.
- int num_digits = std::numeric_limits<int>::digits10;
- for (;;) {
- if (ABSL_PREDICT_FALSE(pos == end)) break;
- c = *pos++;
- if (!std::isdigit(c)) break;
- --num_digits;
- if (ABSL_PREDICT_FALSE(!num_digits)) break;
- digits = 10 * digits + c - '0';
- }
- return digits;
- };
-
- if (is_positional) {
- ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
- if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
- conv->arg_position = parse_digits();
- assert(conv->arg_position > 0);
- if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
- }
-
- ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
-
- // We should start with the basic flag on.
- assert(conv->flags == Flags::kBasic);
-
- // Any non alpha character makes this conversion not basic.
- // This includes flags (-+ #0), width (1-9, *) or precision (.).
- // All conversion characters and length modifiers are alpha characters.
- if (c < 'A') {
- while (c <= '0') {
- auto tag = GetTagForChar(c);
- if (tag.is_flags()) {
- conv->flags = conv->flags | tag.as_flags();
- ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
- } else {
- break;
- }
- }
-
- if (c <= '9') {
- if (c >= '0') {
- int maybe_width = parse_digits();
- if (!is_positional && c == '$') {
- if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr;
- // Positional conversion.
- *next_arg = -1;
- return ConsumeConversion<true>(original_pos, end, conv, next_arg);
- }
- conv->flags = conv->flags | Flags::kNonBasic;
- conv->width.set_value(maybe_width);
- } else if (c == '*') {
- conv->flags = conv->flags | Flags::kNonBasic;
- ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
- if (is_positional) {
- if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
- conv->width.set_from_arg(parse_digits());
- if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
- ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
- } else {
- conv->width.set_from_arg(++*next_arg);
- }
- }
- }
-
- if (c == '.') {
- conv->flags = conv->flags | Flags::kNonBasic;
- ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
- if (std::isdigit(c)) {
- conv->precision.set_value(parse_digits());
- } else if (c == '*') {
- ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
- if (is_positional) {
- if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
- conv->precision.set_from_arg(parse_digits());
- if (c != '$') return nullptr;
- ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
- } else {
- conv->precision.set_from_arg(++*next_arg);
- }
- } else {
- conv->precision.set_value(0);
- }
- }
- }
-
- auto tag = GetTagForChar(c);
-
- if (*(pos - 1) == 'v' && *(pos - 2) != '%') {
- return nullptr;
- }
-
- if (ABSL_PREDICT_FALSE(!tag.is_conv())) {
- if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr;
-
- // It is a length modifier.
- using str_format_internal::LengthMod;
- LengthMod length_mod = tag.as_length();
- ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
- if (c == 'h' && length_mod == LengthMod::h) {
- conv->length_mod = LengthMod::hh;
- ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
- } else if (c == 'l' && length_mod == LengthMod::l) {
- conv->length_mod = LengthMod::ll;
- ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
- } else {
- conv->length_mod = length_mod;
- }
- tag = GetTagForChar(c);
- if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr;
- }
-
- assert(CheckFastPathSetting(*conv));
- (void)(&CheckFastPathSetting);
-
- conv->conv = tag.as_conv();
- if (!is_positional) conv->arg_position = ++*next_arg;
- return pos;
+ABSL_ATTRIBUTE_NOINLINE const char* ConsumeUnboundConversionNoInline(
+ const char* p, const char* end, UnboundConversion* conv, int* next_arg) {
+ return ConsumeUnboundConversion(p, end, conv, next_arg);
}
-} // namespace
-
std::string LengthModToString(LengthMod v) {
switch (v) {
case LengthMod::h:
@@ -262,12 +65,6 @@ std::string LengthModToString(LengthMod v) {
return "";
}
-const char *ConsumeUnboundConversion(const char *p, const char *end,
- UnboundConversion *conv, int *next_arg) {
- if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg);
- return ConsumeConversion<false>(p, end, conv, next_arg);
-}
-
struct ParsedFormatBase::ParsedFormatConsumer {
explicit ParsedFormatConsumer(ParsedFormatBase *parsedformat)
: parsed(parsedformat), data_pos(parsedformat->data_.get()) {}
diff --git a/absl/strings/internal/str_format/parser.h b/absl/strings/internal/str_format/parser.h
index a81bac83..35b6d49c 100644
--- a/absl/strings/internal/str_format/parser.h
+++ b/absl/strings/internal/str_format/parser.h
@@ -29,111 +29,18 @@
#include <vector>
#include "absl/strings/internal/str_format/checker.h"
+#include "absl/strings/internal/str_format/constexpr_parser.h"
#include "absl/strings/internal/str_format/extension.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
-enum class LengthMod : std::uint8_t { h, hh, l, ll, L, j, z, t, q, none };
-
std::string LengthModToString(LengthMod v);
-// The analyzed properties of a single specified conversion.
-struct UnboundConversion {
- UnboundConversion() {}
-
- class InputValue {
- public:
- void set_value(int value) {
- assert(value >= 0);
- value_ = value;
- }
- int value() const { return value_; }
-
- // Marks the value as "from arg". aka the '*' format.
- // Requires `value >= 1`.
- // When set, is_from_arg() return true and get_from_arg() returns the
- // original value.
- // `value()`'s return value is unspecfied in this state.
- void set_from_arg(int value) {
- assert(value > 0);
- value_ = -value - 1;
- }
- bool is_from_arg() const { return value_ < -1; }
- int get_from_arg() const {
- assert(is_from_arg());
- return -value_ - 1;
- }
-
- private:
- int value_ = -1;
- };
-
- // No need to initialize. It will always be set in the parser.
- int arg_position;
-
- InputValue width;
- InputValue precision;
-
- Flags flags = Flags::kBasic;
- LengthMod length_mod = LengthMod::none;
- FormatConversionChar conv = FormatConversionCharInternal::kNone;
-};
-
-// Consume conversion spec prefix (not including '%') of [p, end) if valid.
-// Examples of valid specs would be e.g.: "s", "d", "-12.6f".
-// If valid, it returns the first character following the conversion spec,
-// and the spec part is broken down and returned in 'conv'.
-// If invalid, returns nullptr.
-const char* ConsumeUnboundConversion(const char* p, const char* end,
- UnboundConversion* conv, int* next_arg);
-
-// Helper tag class for the table below.
-// It allows fast `char -> ConversionChar/LengthMod/Flags` checking and
-// conversions.
-class ConvTag {
- public:
- constexpr ConvTag(FormatConversionChar conversion_char) // NOLINT
- : tag_(static_cast<uint8_t>(conversion_char)) {}
- constexpr ConvTag(LengthMod length_mod) // NOLINT
- : tag_(0x80 | static_cast<uint8_t>(length_mod)) {}
- constexpr ConvTag(Flags flags) // NOLINT
- : tag_(0xc0 | static_cast<uint8_t>(flags)) {}
- constexpr ConvTag() : tag_(0xFF) {}
-
- bool is_conv() const { return (tag_ & 0x80) == 0; }
- bool is_length() const { return (tag_ & 0xC0) == 0x80; }
- bool is_flags() const { return (tag_ & 0xE0) == 0xC0; }
-
- FormatConversionChar as_conv() const {
- assert(is_conv());
- assert(!is_length());
- assert(!is_flags());
- return static_cast<FormatConversionChar>(tag_);
- }
- LengthMod as_length() const {
- assert(!is_conv());
- assert(is_length());
- assert(!is_flags());
- return static_cast<LengthMod>(tag_ & 0x3F);
- }
- Flags as_flags() const {
- assert(!is_conv());
- assert(!is_length());
- assert(is_flags());
- return static_cast<Flags>(tag_ & 0x1F);
- }
-
- private:
- uint8_t tag_;
-};
-
-extern const ConvTag kTags[256];
-// Keep a single table for all the conversion chars and length modifiers.
-inline ConvTag GetTagForChar(char c) {
- return kTags[static_cast<unsigned char>(c)];
-}
+const char* ConsumeUnboundConversionNoInline(const char* p, const char* end,
+ UnboundConversion* conv,
+ int* next_arg);
// Parse the format string provided in 'src' and pass the identified items into
// 'consumer'.
@@ -187,7 +94,7 @@ bool ParseFormatString(string_view src, Consumer consumer) {
}
} else if (percent[1] != '%') {
UnboundConversion conv;
- p = ConsumeUnboundConversion(percent + 1, end, &conv, &next_arg);
+ p = ConsumeUnboundConversionNoInline(percent + 1, end, &conv, &next_arg);
if (ABSL_PREDICT_FALSE(p == nullptr)) return false;
if (ABSL_PREDICT_FALSE(!consumer.ConvertOne(
conv, string_view(percent + 1,
diff --git a/absl/strings/internal/str_format/parser_test.cc b/absl/strings/internal/str_format/parser_test.cc
index fe0d2963..021f6a87 100644
--- a/absl/strings/internal/str_format/parser_test.cc
+++ b/absl/strings/internal/str_format/parser_test.cc
@@ -110,10 +110,14 @@ TEST_F(ConsumeUnboundConversionTest, ConsumeSpecification) {
{__LINE__, "ba", "", "ba"}, // 'b' is invalid
{__LINE__, "l", "", "l" }, // just length mod isn't okay
{__LINE__, "d", "d", "" }, // basic
+ {__LINE__, "v", "v", "" }, // basic
{__LINE__, "d ", "d", " " }, // leave suffix
{__LINE__, "dd", "d", "d" }, // don't be greedy
{__LINE__, "d9", "d", "9" }, // leave non-space suffix
{__LINE__, "dzz", "d", "zz"}, // length mod as suffix
+ {__LINE__, "3v", "", "3v"}, // 'v' cannot have modifiers
+ {__LINE__, "hv", "", "hv"}, // 'v' cannot have modifiers
+ {__LINE__, "1$v", "1$v", ""}, // 'v' can have use posix syntax
{__LINE__, "1$*2$d", "1$*2$d", "" }, // arg indexing and * allowed.
{__LINE__, "0-14.3hhd", "0-14.3hhd", ""}, // precision, width
{__LINE__, " 0-+#14.3hhd", " 0-+#14.3hhd", ""}, // flags
diff --git a/absl/strings/internal/stringify_sink.cc b/absl/strings/internal/stringify_sink.cc
new file mode 100644
index 00000000..7c6995ab
--- /dev/null
+++ b/absl/strings/internal/stringify_sink.cc
@@ -0,0 +1,28 @@
+// Copyright 2022 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/internal/stringify_sink.h"
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace strings_internal {
+
+void StringifySink::Append(size_t count, char ch) { buffer_.append(count, ch); }
+
+void StringifySink::Append(string_view v) {
+ buffer_.append(v.data(), v.size());
+}
+
+} // namespace strings_internal
+ABSL_NAMESPACE_END
+} // namespace absl
diff --git a/absl/strings/internal/stringify_sink.h b/absl/strings/internal/stringify_sink.h
new file mode 100644
index 00000000..fc3747bb
--- /dev/null
+++ b/absl/strings/internal/stringify_sink.h
@@ -0,0 +1,57 @@
+// Copyright 2022 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ABSL_STRINGS_INTERNAL_STRINGIFY_SINK_H_
+#define ABSL_STRINGS_INTERNAL_STRINGIFY_SINK_H_
+
+#include <string>
+#include <type_traits>
+#include <utility>
+
+#include "absl/strings/string_view.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+
+namespace strings_internal {
+class StringifySink {
+ public:
+ void Append(size_t count, char ch);
+
+ void Append(string_view v);
+
+ // Support `absl::Format(&sink, format, args...)`.
+ friend void AbslFormatFlush(StringifySink* sink, absl::string_view v) {
+ sink->Append(v);
+ }
+
+ private:
+ template <typename T>
+ friend string_view ExtractStringification(StringifySink& sink, const T& v);
+
+ std::string buffer_;
+};
+
+template <typename T>
+string_view ExtractStringification(StringifySink& sink, const T& v) {
+ AbslStringify(sink, v);
+ return sink.buffer_;
+}
+
+} // namespace strings_internal
+
+ABSL_NAMESPACE_END
+} // namespace absl
+
+#endif // ABSL_STRINGS_INTERNAL_STRINGIFY_SINK_H_
diff --git a/absl/strings/match.cc b/absl/strings/match.cc
index 2d672509..b65cbc67 100644
--- a/absl/strings/match.cc
+++ b/absl/strings/match.cc
@@ -13,6 +13,7 @@
// limitations under the License.
#include "absl/strings/match.h"
+#include "absl/strings/ascii.h"
#include "absl/strings/internal/memutil.h"
@@ -27,6 +28,27 @@ bool EqualsIgnoreCase(absl::string_view piece1,
// memcasecmp uses absl::ascii_tolower().
}
+bool StrContainsIgnoreCase(absl::string_view haystack,
+ absl::string_view needle) noexcept {
+ while (haystack.size() >= needle.size()) {
+ if (StartsWithIgnoreCase(haystack, needle)) return true;
+ haystack.remove_prefix(1);
+ }
+ return false;
+}
+
+bool StrContainsIgnoreCase(absl::string_view haystack,
+ char needle) noexcept {
+ char upper_needle = absl::ascii_toupper(static_cast<unsigned char>(needle));
+ char lower_needle = absl::ascii_tolower(static_cast<unsigned char>(needle));
+ if (upper_needle == lower_needle) {
+ return StrContains(haystack, needle);
+ } else {
+ const char both_cstr[3] = {lower_needle, upper_needle, '\0'};
+ return haystack.find_first_of(both_cstr) != absl::string_view::npos;
+ }
+}
+
bool StartsWithIgnoreCase(absl::string_view text,
absl::string_view prefix) noexcept {
return (text.size() >= prefix.size()) &&
diff --git a/absl/strings/match.h b/absl/strings/match.h
index 038cbb3f..1dc0beaf 100644
--- a/absl/strings/match.h
+++ b/absl/strings/match.h
@@ -72,6 +72,15 @@ inline bool EndsWith(absl::string_view text,
memcmp(text.data() + (text.size() - suffix.size()), suffix.data(),
suffix.size()) == 0);
}
+// StrContainsIgnoreCase()
+//
+// Returns whether a given ASCII string `haystack` contains the ASCII substring
+// `needle`, ignoring case in the comparison.
+bool StrContainsIgnoreCase(absl::string_view haystack,
+ absl::string_view needle) noexcept;
+
+bool StrContainsIgnoreCase(absl::string_view haystack,
+ char needle) noexcept;
// EqualsIgnoreCase()
//
diff --git a/absl/strings/match_test.cc b/absl/strings/match_test.cc
index 5841bc1b..f063b4ea 100644
--- a/absl/strings/match_test.cc
+++ b/absl/strings/match_test.cc
@@ -124,4 +124,48 @@ TEST(MatchTest, EndsWithIgnoreCase) {
EXPECT_FALSE(absl::EndsWithIgnoreCase("", "fo"));
}
+TEST(MatchTest, ContainsIgnoreCase) {
+ EXPECT_TRUE(absl::StrContainsIgnoreCase("foo", "foo"));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase("FOO", "Foo"));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase("--FOO", "Foo"));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase("FOO--", "Foo"));
+ EXPECT_FALSE(absl::StrContainsIgnoreCase("BAR", "Foo"));
+ EXPECT_FALSE(absl::StrContainsIgnoreCase("BAR", "Foo"));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase("123456", "123456"));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase("123456", "234"));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase("", ""));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase("abc", ""));
+ EXPECT_FALSE(absl::StrContainsIgnoreCase("", "a"));
+}
+
+TEST(MatchTest, ContainsCharIgnoreCase) {
+ absl::string_view a("AaBCdefg!");
+ absl::string_view b("AaBCd!");
+ EXPECT_TRUE(absl::StrContainsIgnoreCase(a, 'a'));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase(a, 'A'));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase(a, 'b'));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase(a, 'B'));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase(a, 'e'));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase(a, 'E'));
+ EXPECT_FALSE(absl::StrContainsIgnoreCase(a, 'h'));
+ EXPECT_FALSE(absl::StrContainsIgnoreCase(a, 'H'));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase(a, '!'));
+ EXPECT_FALSE(absl::StrContainsIgnoreCase(a, '?'));
+
+ EXPECT_TRUE(absl::StrContainsIgnoreCase(b, 'a'));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase(b, 'A'));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase(b, 'b'));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase(b, 'B'));
+ EXPECT_FALSE(absl::StrContainsIgnoreCase(b, 'e'));
+ EXPECT_FALSE(absl::StrContainsIgnoreCase(b, 'E'));
+ EXPECT_FALSE(absl::StrContainsIgnoreCase(b, 'h'));
+ EXPECT_FALSE(absl::StrContainsIgnoreCase(b, 'H'));
+ EXPECT_TRUE(absl::StrContainsIgnoreCase(b, '!'));
+ EXPECT_FALSE(absl::StrContainsIgnoreCase(b, '?'));
+
+ EXPECT_FALSE(absl::StrContainsIgnoreCase("", 'a'));
+ EXPECT_FALSE(absl::StrContainsIgnoreCase("", 'A'));
+ EXPECT_FALSE(absl::StrContainsIgnoreCase("", '0'));
+}
+
} // namespace
diff --git a/absl/strings/numbers.cc b/absl/strings/numbers.cc
index 2987158e..c2b861ae 100644
--- a/absl/strings/numbers.cc
+++ b/absl/strings/numbers.cc
@@ -219,7 +219,7 @@ char* numbers_internal::FastIntToBuffer(int32_t i, char* buffer) {
if (i < 0) {
*buffer++ = '-';
// We need to do the negation in modular (i.e., "unsigned")
- // arithmetic; MSVC++ apprently warns for plain "-u", so
+ // arithmetic; MSVC++ apparently warns for plain "-u", so
// we write the equivalent expression "0 - u" instead.
u = 0 - u;
}
diff --git a/absl/strings/str_cat.cc b/absl/strings/str_cat.cc
index 6981347a..6c198f85 100644
--- a/absl/strings/str_cat.cc
+++ b/absl/strings/str_cat.cc
@@ -30,88 +30,6 @@
namespace absl {
ABSL_NAMESPACE_BEGIN
-namespace strings_internal {
-void StringifySink::Append(size_t count, char ch) { buffer_.append(count, ch); }
-
-void StringifySink::Append(string_view v) {
- buffer_.append(v.data(), v.size());
-}
-
-bool StringifySink::PutPaddedString(string_view v, int width, int precision,
- bool left) {
- size_t space_remaining = 0;
-
- if (width >= 0) space_remaining = static_cast<size_t>(width);
-
- size_t n = v.size();
-
- if (precision >= 0) n = (std::min)(n, static_cast<size_t>(precision));
-
- string_view shown(v.data(), n);
-
- if (shown.size() < space_remaining) {
- space_remaining = space_remaining - shown.size();
- } else {
- space_remaining = 0;
- }
-
- if (!left) Append(space_remaining, ' ');
- Append(shown);
- if (left) Append(space_remaining, ' ');
- return true;
-}
-
-} // namespace strings_internal
-
-AlphaNum::AlphaNum(Hex hex) {
- static_assert(numbers_internal::kFastToBufferSize >= 32,
- "This function only works when output buffer >= 32 bytes long");
- char* const end = &digits_[numbers_internal::kFastToBufferSize];
- auto real_width =
- absl::numbers_internal::FastHexToBufferZeroPad16(hex.value, end - 16);
- if (real_width >= hex.width) {
- piece_ = absl::string_view(end - real_width, real_width);
- } else {
- // Pad first 16 chars because FastHexToBufferZeroPad16 pads only to 16 and
- // max pad width can be up to 20.
- std::memset(end - 32, hex.fill, 16);
- // Patch up everything else up to the real_width.
- std::memset(end - real_width - 16, hex.fill, 16);
- piece_ = absl::string_view(end - hex.width, hex.width);
- }
-}
-
-AlphaNum::AlphaNum(Dec dec) {
- assert(dec.width <= numbers_internal::kFastToBufferSize);
- char* const end = &digits_[numbers_internal::kFastToBufferSize];
- char* const minfill = end - dec.width;
- char* writer = end;
- uint64_t value = dec.value;
- bool neg = dec.neg;
- while (value > 9) {
- *--writer = '0' + (value % 10);
- value /= 10;
- }
- *--writer = '0' + static_cast<char>(value);
- if (neg) *--writer = '-';
-
- ptrdiff_t fillers = writer - minfill;
- if (fillers > 0) {
- // Tricky: if the fill character is ' ', then it's <fill><+/-><digits>
- // But...: if the fill character is '0', then it's <+/-><fill><digits>
- bool add_sign_again = false;
- if (neg && dec.fill == '0') { // If filling with '0',
- ++writer; // ignore the sign we just added
- add_sign_again = true; // and re-add the sign later.
- }
- writer -= fillers;
- std::fill_n(writer, fillers, dec.fill);
- if (add_sign_again) *--writer = '-';
- }
-
- piece_ = absl::string_view(writer, static_cast<size_t>(end - writer));
-}
-
// ----------------------------------------------------------------------
// StrCat()
// This merges the given strings or integers, with no delimiter. This
@@ -177,12 +95,12 @@ namespace strings_internal {
std::string CatPieces(std::initializer_list<absl::string_view> pieces) {
std::string result;
size_t total_size = 0;
- for (const absl::string_view& piece : pieces) total_size += piece.size();
+ for (absl::string_view piece : pieces) total_size += piece.size();
strings_internal::STLStringResizeUninitialized(&result, total_size);
char* const begin = &result[0];
char* out = begin;
- for (const absl::string_view& piece : pieces) {
+ for (absl::string_view piece : pieces) {
const size_t this_size = piece.size();
if (this_size != 0) {
memcpy(out, piece.data(), this_size);
@@ -206,7 +124,7 @@ void AppendPieces(std::string* dest,
std::initializer_list<absl::string_view> pieces) {
size_t old_size = dest->size();
size_t total_size = old_size;
- for (const absl::string_view& piece : pieces) {
+ for (absl::string_view piece : pieces) {
ASSERT_NO_OVERLAP(*dest, piece);
total_size += piece.size();
}
@@ -214,7 +132,7 @@ void AppendPieces(std::string* dest,
char* const begin = &(*dest)[0];
char* out = begin + old_size;
- for (const absl::string_view& piece : pieces) {
+ for (absl::string_view piece : pieces) {
const size_t this_size = piece.size();
if (this_size != 0) {
memcpy(out, piece.data(), this_size);
diff --git a/absl/strings/str_cat.h b/absl/strings/str_cat.h
index 6ee88f14..fcd48c4e 100644
--- a/absl/strings/str_cat.h
+++ b/absl/strings/str_cat.h
@@ -48,19 +48,58 @@
// `StrCat()` or `StrAppend()`. You may specify a minimum hex field width using
// a `PadSpec` enum.
//
+// User-defined types can be formatted with the `AbslStringify()` customization
+// point. The API relies on detecting an overload in the user-defined type's
+// namespace of a free (non-member) `AbslStringify()` function as a definition
+// (typically declared as a friend and implemented in-line.
+// with the following signature:
+//
+// class MyClass { ... };
+//
+// template <typename Sink>
+// void AbslStringify(Sink& sink, const MyClass& value);
+//
+// An `AbslStringify()` overload for a type should only be declared in the same
+// file and namespace as said type.
+//
+// Note that `AbslStringify()` also supports use with `absl::StrFormat()` and
+// `absl::Substitute()`.
+//
+// Example:
+//
+// struct Point {
+// // To add formatting support to `Point`, we simply need to add a free
+// // (non-member) function `AbslStringify()`. This method specifies how
+// // Point should be printed when absl::StrCat() is called on it. You can add
+// // such a free function using a friend declaration within the body of the
+// // class. The sink parameter is a templated type to avoid requiring
+// // dependencies.
+// template <typename Sink> friend void AbslStringify(Sink&
+// sink, const Point& p) {
+// absl::Format(&sink, "(%v, %v)", p.x, p.y);
+// }
+//
+// int x;
+// int y;
+// };
// -----------------------------------------------------------------------------
#ifndef ABSL_STRINGS_STR_CAT_H_
#define ABSL_STRINGS_STR_CAT_H_
+#include <algorithm>
#include <array>
#include <cstdint>
+#include <cstring>
#include <string>
#include <type_traits>
#include <utility>
#include <vector>
+#include "absl/base/attributes.h"
#include "absl/base/port.h"
+#include "absl/strings/internal/has_absl_stringify.h"
+#include "absl/strings/internal/stringify_sink.h"
#include "absl/strings/numbers.h"
#include "absl/strings/string_view.h"
@@ -77,32 +116,6 @@ struct AlphaNumBuffer {
size_t size;
};
-class StringifySink {
- public:
- void Append(size_t count, char ch);
-
- void Append(string_view v);
-
- bool PutPaddedString(string_view v, int width, int precision, bool left);
-
- // Support `absl::Format(&sink, format, args...)`.
- friend void AbslFormatFlush(StringifySink* sink, absl::string_view v) {
- sink->Append(v);
- }
-
- template <typename T>
- friend string_view ExtractStringification(StringifySink& sink, const T& v);
-
- private:
- std::string buffer_;
-};
-
-template <typename T>
-string_view ExtractStringification(StringifySink& sink, const T& v) {
- AbslStringify(sink, v);
- return sink.buffer_;
-}
-
} // namespace strings_internal
// Enum that specifies the number of significant digits to return in a `Hex` or
@@ -191,6 +204,27 @@ struct Hex {
explicit Hex(Pointee* v, PadSpec spec = absl::kNoPad)
: Hex(spec, reinterpret_cast<uintptr_t>(v)) {}
+ template <typename S>
+ friend void AbslStringify(S& sink, Hex hex) {
+ static_assert(
+ numbers_internal::kFastToBufferSize >= 32,
+ "This function only works when output buffer >= 32 bytes long");
+ char buffer[numbers_internal::kFastToBufferSize];
+ char* const end = &buffer[numbers_internal::kFastToBufferSize];
+ auto real_width =
+ absl::numbers_internal::FastHexToBufferZeroPad16(hex.value, end - 16);
+ if (real_width >= hex.width) {
+ sink.Append(absl::string_view(end - real_width, real_width));
+ } else {
+ // Pad first 16 chars because FastHexToBufferZeroPad16 pads only to 16 and
+ // max pad width can be up to 20.
+ std::memset(end - 32, hex.fill, 16);
+ // Patch up everything else up to the real_width.
+ std::memset(end - real_width - 16, hex.fill, 16);
+ sink.Append(absl::string_view(end - hex.width, hex.width));
+ }
+ }
+
private:
Hex(PadSpec spec, uint64_t v)
: value(v),
@@ -225,6 +259,38 @@ struct Dec {
: spec - absl::kZeroPad2 + 2),
fill(spec >= absl::kSpacePad2 ? ' ' : '0'),
neg(v < 0) {}
+
+ template <typename S>
+ friend void AbslStringify(S& sink, Dec dec) {
+ assert(dec.width <= numbers_internal::kFastToBufferSize);
+ char buffer[numbers_internal::kFastToBufferSize];
+ char* const end = &buffer[numbers_internal::kFastToBufferSize];
+ char* const minfill = end - dec.width;
+ char* writer = end;
+ uint64_t val = dec.value;
+ while (val > 9) {
+ *--writer = '0' + (val % 10);
+ val /= 10;
+ }
+ *--writer = '0' + static_cast<char>(val);
+ if (dec.neg) *--writer = '-';
+
+ ptrdiff_t fillers = writer - minfill;
+ if (fillers > 0) {
+ // Tricky: if the fill character is ' ', then it's <fill><+/-><digits>
+ // But...: if the fill character is '0', then it's <+/-><fill><digits>
+ bool add_sign_again = false;
+ if (dec.neg && dec.fill == '0') { // If filling with '0',
+ ++writer; // ignore the sign we just added
+ add_sign_again = true; // and re-add the sign later.
+ }
+ writer -= fillers;
+ std::fill_n(writer, fillers, dec.fill);
+ if (add_sign_again) *--writer = '-';
+ }
+
+ sink.Append(absl::string_view(writer, static_cast<size_t>(end - writer)));
+ }
};
// -----------------------------------------------------------------------------
@@ -232,17 +298,10 @@ struct Dec {
// -----------------------------------------------------------------------------
//
// The `AlphaNum` class acts as the main parameter type for `StrCat()` and
-// `StrAppend()`, providing efficient conversion of numeric, boolean, and
-// hexadecimal values (through the `Hex` type) into strings.
-
-template <typename T, typename = void>
-struct HasAbslStringify : std::false_type {};
-
-template <typename T>
-struct HasAbslStringify<T, std::enable_if_t<std::is_void<decltype(AbslStringify(
- std::declval<strings_internal::StringifySink&>(),
- std::declval<const T&>()))>::value>>
- : std::true_type {};
+// `StrAppend()`, providing efficient conversion of numeric, boolean, decimal,
+// and hexadecimal values (through the `Dec` and `Hex` types) into strings.
+// `AlphaNum` should only be used as a function parameter. Do not instantiate
+// `AlphaNum` directly as a stack variable.
class AlphaNum {
public:
@@ -279,28 +338,30 @@ class AlphaNum {
AlphaNum(double f) // NOLINT(runtime/explicit)
: piece_(digits_, numbers_internal::SixDigitsToBuffer(f, digits_)) {}
- AlphaNum(Hex hex); // NOLINT(runtime/explicit)
- AlphaNum(Dec dec); // NOLINT(runtime/explicit)
-
template <size_t size>
AlphaNum( // NOLINT(runtime/explicit)
- const strings_internal::AlphaNumBuffer<size>& buf)
+ const strings_internal::AlphaNumBuffer<size>& buf
+ ABSL_ATTRIBUTE_LIFETIME_BOUND)
: piece_(&buf.data[0], buf.size) {}
- AlphaNum(const char* c_str) // NOLINT(runtime/explicit)
- : piece_(NullSafeStringView(c_str)) {} // NOLINT(runtime/explicit)
- AlphaNum(absl::string_view pc) : piece_(pc) {} // NOLINT(runtime/explicit)
+ AlphaNum(const char* c_str // NOLINT(runtime/explicit)
+ ABSL_ATTRIBUTE_LIFETIME_BOUND)
+ : piece_(NullSafeStringView(c_str)) {}
+ AlphaNum(absl::string_view pc // NOLINT(runtime/explicit)
+ ABSL_ATTRIBUTE_LIFETIME_BOUND)
+ : piece_(pc) {}
template <typename T, typename = typename std::enable_if<
- HasAbslStringify<T>::value>::type>
- AlphaNum( // NOLINT(runtime/explicit)
- const T& v, // NOLINT(runtime/explicit)
- strings_internal::StringifySink&& sink = {}) // NOLINT(runtime/explicit)
+ strings_internal::HasAbslStringify<T>::value>::type>
+ AlphaNum( // NOLINT(runtime/explicit)
+ const T& v ABSL_ATTRIBUTE_LIFETIME_BOUND,
+ strings_internal::StringifySink&& sink ABSL_ATTRIBUTE_LIFETIME_BOUND = {})
: piece_(strings_internal::ExtractStringification(sink, v)) {}
template <typename Allocator>
AlphaNum( // NOLINT(runtime/explicit)
- const std::basic_string<char, std::char_traits<char>, Allocator>& str)
+ const std::basic_string<char, std::char_traits<char>, Allocator>& str
+ ABSL_ATTRIBUTE_LIFETIME_BOUND)
: piece_(str) {}
// Use string literals ":" instead of character literals ':'.
@@ -317,7 +378,8 @@ class AlphaNum {
// This overload matches only scoped enums.
template <typename T,
typename = typename std::enable_if<
- std::is_enum<T>{} && !std::is_convertible<T, int>{}>::type>
+ std::is_enum<T>{} && !std::is_convertible<T, int>{} &&
+ !strings_internal::HasAbslStringify<T>::value>::type>
AlphaNum(T e) // NOLINT(runtime/explicit)
: AlphaNum(static_cast<typename std::underlying_type<T>::type>(e)) {}
diff --git a/absl/strings/str_cat_test.cc b/absl/strings/str_cat_test.cc
index 1b3b7ece..2d74245e 100644
--- a/absl/strings/str_cat_test.cc
+++ b/absl/strings/str_cat_test.cc
@@ -443,7 +443,7 @@ TEST(StrCat, AvoidsMemcpyWithNullptr) {
EXPECT_EQ(result, "12345");
}
-#ifdef GTEST_HAS_DEATH_TEST
+#if GTEST_HAS_DEATH_TEST
TEST(StrAppend, Death) {
std::string s = "self";
// on linux it's "assertion", on mac it's "Assertion",
@@ -650,4 +650,16 @@ TEST(StrCat, AbslStringifyExampleUsingFormat) {
EXPECT_EQ(absl::StrCat("a ", p, " z"), "a (10, 20) z");
}
+enum class EnumWithStringify { Many = 0, Choices = 1 };
+
+template <typename Sink>
+void AbslStringify(Sink& sink, EnumWithStringify e) {
+ absl::Format(&sink, "%s", e == EnumWithStringify::Many ? "Many" : "Choices");
+}
+
+TEST(StrCat, AbslStringifyWithEnum) {
+ const auto e = EnumWithStringify::Choices;
+ EXPECT_EQ(absl::StrCat(e), "Choices");
+}
+
} // namespace
diff --git a/absl/strings/str_format.h b/absl/strings/str_format.h
index ffbcb9af..fc4bf39e 100644
--- a/absl/strings/str_format.h
+++ b/absl/strings/str_format.h
@@ -36,10 +36,12 @@
// * `absl::StreamFormat()` to more efficiently write a format string to a
// stream, such as`std::cout`.
// * `absl::PrintF()`, `absl::FPrintF()` and `absl::SNPrintF()` as
-// replacements for `std::printf()`, `std::fprintf()` and `std::snprintf()`.
+// drop-in replacements for `std::printf()`, `std::fprintf()` and
+// `std::snprintf()`.
//
-// Note: a version of `std::sprintf()` is not supported as it is
-// generally unsafe due to buffer overflows.
+// Note: An `absl::SPrintF()` drop-in replacement is not supported as it
+// is generally unsafe due to buffer overflows. Use `absl::StrFormat` which
+// returns the string as output instead of expecting a pre-allocated buffer.
//
// Additionally, you can provide a format string (and its associated arguments)
// using one of the following abstractions:
@@ -191,9 +193,9 @@ class FormatCountCapture {
// absl::StrFormat(formatString, "TheVillage", 6);
//
// A format string generally follows the POSIX syntax as used within the POSIX
-// `printf` specification.
+// `printf` specification. (Exceptions are noted below.)
//
-// (See http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html.)
+// (See http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html)
//
// In specific, the `FormatSpec` supports the following type specifiers:
// * `c` for characters
@@ -211,6 +213,10 @@ class FormatCountCapture {
// * `n` for the special case of writing out the number of characters
// written to this point. The resulting value must be captured within an
// `absl::FormatCountCapture` type.
+// * `v` for values using the default format for a deduced type. These deduced
+// types include many of the primitive types denoted here as well as
+// user-defined types containing the proper extensions. (See below for more
+// information.)
//
// Implementation-defined behavior:
// * A null pointer provided to "%s" or "%p" is output as "(nil)".
@@ -239,6 +245,15 @@ class FormatCountCapture {
// "%s%d%n", "hello", 123, absl::FormatCountCapture(&n));
// EXPECT_EQ(8, n);
//
+// NOTE: the `v` specifier (for "value") is a type specifier not present in the
+// POSIX specification. %v will format values according to their deduced type.
+// `v` uses `d` for signed integer values, `u` for unsigned integer values, `g`
+// for floating point values, and formats boolean values as "true"/"false"
+// (instead of 1 or 0 for booleans formatted using d). `const char*` is not
+// supported; please use `std:string` and `string_view`. `char` is also not
+// supported due to ambiguity of the type. This specifier does not support
+// modifiers.
+//
// The `FormatSpec` intrinsically supports all of these fundamental C++ types:
//
// * Characters: `char`, `signed char`, `unsigned char`
@@ -570,6 +585,41 @@ ABSL_MUST_USE_RESULT inline bool FormatUntyped(
// StrFormat Extensions
//------------------------------------------------------------------------------
//
+// AbslStringify()
+//
+// A simpler customization API for formatting user-defined types using
+// absl::StrFormat(). The API relies on detecting an overload in the
+// user-defined type's namespace of a free (non-member) `AbslStringify()`
+// function as a friend definition with the following signature:
+//
+// template <typename Sink>
+// void AbslStringify(Sink& sink, const X& value);
+//
+// An `AbslStringify()` overload for a type should only be declared in the same
+// file and namespace as said type.
+//
+// Note that unlike with AbslFormatConvert(), AbslStringify() does not allow
+// customization of allowed conversion characters. AbslStringify() uses `%v` as
+// the underlying conversion specififer. Additionally, AbslStringify() supports
+// use with absl::StrCat while AbslFormatConvert() does not.
+//
+// Example:
+//
+// struct Point {
+// // To add formatting support to `Point`, we simply need to add a free
+// // (non-member) function `AbslStringify()`. This method prints in the
+// // request format using the underlying `%v` specifier. You can add such a
+// // free function using a friend declaration within the body of the class.
+// // The sink parameter is a templated type to avoid requiring dependencies.
+// template <typename Sink>
+// friend void AbslStringify(Sink& sink, const Point& p) {
+// absl::Format(&sink, "(%v, %v)", p.x, p.y);
+// }
+//
+// int x;
+// int y;
+// };
+//
// AbslFormatConvert()
//
// The StrFormat library provides a customization API for formatting
@@ -616,9 +666,9 @@ ABSL_MUST_USE_RESULT inline bool FormatUntyped(
// AbslFormatConvert(const Point& p, const absl::FormatConversionSpec& spec,
// absl::FormatSink* s) {
// if (spec.conversion_char() == absl::FormatConversionChar::s) {
-// s->Append(absl::StrCat("x=", p.x, " y=", p.y));
+// absl::Format(s, "x=%vy=%v", p.x, p.y);
// } else {
-// s->Append(absl::StrCat(p.x, ",", p.y));
+// absl::Format(s, "%v,%v", p.x, p.y);
// }
// return {true};
// }
@@ -772,17 +822,25 @@ enum class FormatConversionCharSet : uint64_t {
// FormatSink
//
-// An abstraction to which conversions write their string data.
+// A format sink is a generic abstraction to which conversions may write their
+// formatted string data. `absl::FormatConvert()` uses this sink to write its
+// formatted string.
//
class FormatSink {
public:
- // Appends `count` copies of `ch`.
+ // FormatSink::Append()
+ //
+ // Appends `count` copies of `ch` to the format sink.
void Append(size_t count, char ch) { sink_->Append(count, ch); }
+ // Overload of FormatSink::Append() for appending the characters of a string
+ // view to a format sink.
void Append(string_view v) { sink_->Append(v); }
- // Appends the first `precision` bytes of `v`. If this is less than
- // `width`, spaces will be appended first (if `left` is false), or
+ // FormatSink::PutPaddedString()
+ //
+ // Appends `precision` number of bytes of `v` to the format sink. If this is
+ // less than `width`, spaces will be appended first (if `left` is false), or
// after (if `left` is true) to ensure the total amount appended is
// at least `width`.
bool PutPaddedString(string_view v, int width, int precision, bool left) {
diff --git a/absl/strings/str_format_test.cc b/absl/strings/str_format_test.cc
index 62ed262d..5198fb33 100644
--- a/absl/strings/str_format_test.cc
+++ b/absl/strings/str_format_test.cc
@@ -143,13 +143,20 @@ TEST_F(FormatEntryPointTest, AppendFormatFailWithV) {
}
TEST_F(FormatEntryPointTest, ManyArgs) {
- EXPECT_EQ("24", StrFormat("%24$d", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
- 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24));
- EXPECT_EQ("60", StrFormat("%60$d", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
- 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
- 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
- 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52,
- 53, 54, 55, 56, 57, 58, 59, 60));
+ EXPECT_EQ(
+ "60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 "
+ "36 35 34 33 32 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 "
+ "12 11 10 9 8 7 6 5 4 3 2 1",
+ StrFormat("%60$d %59$d %58$d %57$d %56$d %55$d %54$d %53$d %52$d %51$d "
+ "%50$d %49$d %48$d %47$d %46$d %45$d %44$d %43$d %42$d %41$d "
+ "%40$d %39$d %38$d %37$d %36$d %35$d %34$d %33$d %32$d %31$d "
+ "%30$d %29$d %28$d %27$d %26$d %25$d %24$d %23$d %22$d %21$d "
+ "%20$d %19$d %18$d %17$d %16$d %15$d %14$d %13$d %12$d %11$d "
+ "%10$d %9$d %8$d %7$d %6$d %5$d %4$d %3$d %2$d %1$d",
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
+ 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+ 51, 52, 53, 54, 55, 56, 57, 58, 59, 60));
}
TEST_F(FormatEntryPointTest, Preparsed) {
@@ -1135,6 +1142,51 @@ TEST_F(FormatExtensionTest, AbslStringifyExampleUsingFormat) {
EXPECT_EQ(absl::StrFormat("a %v z", p), "a (10, 20) z");
}
+enum class EnumClassWithStringify { Many = 0, Choices = 1 };
+
+template <typename Sink>
+void AbslStringify(Sink& sink, EnumClassWithStringify e) {
+ absl::Format(&sink, "%s",
+ e == EnumClassWithStringify::Many ? "Many" : "Choices");
+}
+
+enum EnumWithStringify { Many, Choices };
+
+template <typename Sink>
+void AbslStringify(Sink& sink, EnumWithStringify e) {
+ absl::Format(&sink, "%s", e == EnumWithStringify::Many ? "Many" : "Choices");
+}
+
+TEST_F(FormatExtensionTest, AbslStringifyWithEnumWithV) {
+ const auto e_class = EnumClassWithStringify::Choices;
+ EXPECT_EQ(absl::StrFormat("My choice is %v", e_class),
+ "My choice is Choices");
+
+ const auto e = EnumWithStringify::Choices;
+ EXPECT_EQ(absl::StrFormat("My choice is %v", e), "My choice is Choices");
+}
+
+TEST_F(FormatExtensionTest, AbslStringifyEnumWithD) {
+ const auto e_class = EnumClassWithStringify::Many;
+ EXPECT_EQ(absl::StrFormat("My choice is %d", e_class), "My choice is 0");
+
+ const auto e = EnumWithStringify::Choices;
+ EXPECT_EQ(absl::StrFormat("My choice is %d", e), "My choice is 1");
+}
+
+enum class EnumWithLargerValue { x = 32 };
+
+template <typename Sink>
+void AbslStringify(Sink& sink, EnumWithLargerValue e) {
+ absl::Format(&sink, "%s", "Many");
+}
+
+TEST_F(FormatExtensionTest, AbslStringifyEnumOtherSpecifiers) {
+ const auto e = EnumWithLargerValue::x;
+ EXPECT_EQ(absl::StrFormat("My choice is %g", e), "My choice is 32");
+ EXPECT_EQ(absl::StrFormat("My choice is %x", e), "My choice is 20");
+}
+
} // namespace
// Some codegen thunks that we can use to easily dump the generated assembly for
diff --git a/absl/strings/str_split.cc b/absl/strings/str_split.cc
index e08c26b6..72ba7c02 100644
--- a/absl/strings/str_split.cc
+++ b/absl/strings/str_split.cc
@@ -60,19 +60,23 @@ absl::string_view GenericFind(absl::string_view text,
// Finds using absl::string_view::find(), therefore the length of the found
// delimiter is delimiter.length().
struct LiteralPolicy {
- size_t Find(absl::string_view text, absl::string_view delimiter, size_t pos) {
+ static size_t Find(absl::string_view text, absl::string_view delimiter,
+ size_t pos) {
return text.find(delimiter, pos);
}
- size_t Length(absl::string_view delimiter) { return delimiter.length(); }
+ static size_t Length(absl::string_view delimiter) {
+ return delimiter.length();
+ }
};
// Finds using absl::string_view::find_first_of(), therefore the length of the
// found delimiter is 1.
struct AnyOfPolicy {
- size_t Find(absl::string_view text, absl::string_view delimiter, size_t pos) {
+ static size_t Find(absl::string_view text, absl::string_view delimiter,
+ size_t pos) {
return text.find_first_of(delimiter, pos);
}
- size_t Length(absl::string_view /* delimiter */) { return 1; }
+ static size_t Length(absl::string_view /* delimiter */) { return 1; }
};
} // namespace
@@ -123,8 +127,7 @@ ByLength::ByLength(ptrdiff_t length) : length_(length) {
ABSL_RAW_CHECK(length > 0, "");
}
-absl::string_view ByLength::Find(absl::string_view text,
- size_t pos) const {
+absl::string_view ByLength::Find(absl::string_view text, size_t pos) const {
pos = std::min(pos, text.size()); // truncate `pos`
absl::string_view substr = text.substr(pos);
// If the string is shorter than the chunk size we say we
diff --git a/absl/strings/str_split_test.cc b/absl/strings/str_split_test.cc
index 1b4427b8..04a64a42 100644
--- a/absl/strings/str_split_test.cc
+++ b/absl/strings/str_split_test.cc
@@ -369,7 +369,7 @@ TEST(SplitIterator, EqualityAsEndCondition) {
TEST(Splitter, RangeIterators) {
auto splitter = absl::StrSplit("a,b,c", ',');
std::vector<absl::string_view> output;
- for (const absl::string_view& p : splitter) {
+ for (absl::string_view p : splitter) {
output.push_back(p);
}
EXPECT_THAT(output, ElementsAre("a", "b", "c"));
diff --git a/absl/strings/substitute.h b/absl/strings/substitute.h
index 692fd03c..d6a5a690 100644
--- a/absl/strings/substitute.h
+++ b/absl/strings/substitute.h
@@ -55,6 +55,8 @@
// * bool (Printed as "true" or "false")
// * pointer types other than char* (Printed as "0x<lower case hex string>",
// except that null is printed as "NULL")
+// * user-defined types via the `AbslStringify()` customization point. See the
+// documentation for `absl::StrCat` for an explanation on how to use this.
//
// If an invalid format string is provided, Substitute returns an empty string
// and SubstituteAndAppend does not change the provided output string.
@@ -79,6 +81,7 @@
#include "absl/base/port.h"
#include "absl/strings/ascii.h"
#include "absl/strings/escaping.h"
+#include "absl/strings/internal/stringify_sink.h"
#include "absl/strings/numbers.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_split.h"
@@ -102,14 +105,14 @@ class Arg {
// Overloads for string-y things
//
// Explicitly overload `const char*` so the compiler doesn't cast to `bool`.
- Arg(const char* value) // NOLINT(runtime/explicit)
+ Arg(const char* value) // NOLINT(google-explicit-constructor)
: piece_(absl::NullSafeStringView(value)) {}
template <typename Allocator>
Arg( // NOLINT
const std::basic_string<char, std::char_traits<char>, Allocator>&
value) noexcept
: piece_(value) {}
- Arg(absl::string_view value) // NOLINT(runtime/explicit)
+ Arg(absl::string_view value) // NOLINT(google-explicit-constructor)
: piece_(value) {}
// Overloads for primitives
@@ -119,7 +122,7 @@ class Arg {
// probably using them as 8-bit integers and would probably prefer an integer
// representation. However, we can't really know, so we make the caller decide
// what to do.
- Arg(char value) // NOLINT(runtime/explicit)
+ Arg(char value) // NOLINT(google-explicit-constructor)
: piece_(scratch_, 1) {
scratch_[0] = value;
}
@@ -133,12 +136,12 @@ class Arg {
static_cast<size_t>(
numbers_internal::FastIntToBuffer(value, scratch_) -
scratch_)) {}
- Arg(int value) // NOLINT(runtime/explicit)
+ Arg(int value) // NOLINT(google-explicit-constructor)
: piece_(scratch_,
static_cast<size_t>(
numbers_internal::FastIntToBuffer(value, scratch_) -
scratch_)) {}
- Arg(unsigned int value) // NOLINT(runtime/explicit)
+ Arg(unsigned int value) // NOLINT(google-explicit-constructor)
: piece_(scratch_,
static_cast<size_t>(
numbers_internal::FastIntToBuffer(value, scratch_) -
@@ -163,17 +166,23 @@ class Arg {
static_cast<size_t>(
numbers_internal::FastIntToBuffer(value, scratch_) -
scratch_)) {}
- Arg(float value) // NOLINT(runtime/explicit)
+ Arg(float value) // NOLINT(google-explicit-constructor)
: piece_(scratch_, numbers_internal::SixDigitsToBuffer(value, scratch_)) {
}
- Arg(double value) // NOLINT(runtime/explicit)
+ Arg(double value) // NOLINT(google-explicit-constructor)
: piece_(scratch_, numbers_internal::SixDigitsToBuffer(value, scratch_)) {
}
- Arg(bool value) // NOLINT(runtime/explicit)
+ Arg(bool value) // NOLINT(google-explicit-constructor)
: piece_(value ? "true" : "false") {}
- Arg(Hex hex); // NOLINT(runtime/explicit)
- Arg(Dec dec); // NOLINT(runtime/explicit)
+ template <typename T, typename = typename std::enable_if<
+ strings_internal::HasAbslStringify<T>::value>::type>
+ Arg( // NOLINT(google-explicit-constructor)
+ const T& v, strings_internal::StringifySink&& sink = {})
+ : piece_(strings_internal::ExtractStringification(sink, v)) {}
+
+ Arg(Hex hex); // NOLINT(google-explicit-constructor)
+ Arg(Dec dec); // NOLINT(google-explicit-constructor)
// vector<bool>::reference and const_reference require special help to convert
// to `Arg` because it requires two user defined conversions.
@@ -188,13 +197,14 @@ class Arg {
// `void*` values, with the exception of `char*`, are printed as
// "0x<hex value>". However, in the case of `nullptr`, "NULL" is printed.
- Arg(const void* value); // NOLINT(runtime/explicit)
+ Arg(const void* value); // NOLINT(google-explicit-constructor)
// Normal enums are already handled by the integer formatters.
// This overload matches only scoped enums.
template <typename T,
typename = typename std::enable_if<
- std::is_enum<T>{} && !std::is_convertible<T, int>{}>::type>
+ std::is_enum<T>{} && !std::is_convertible<T, int>{} &&
+ !strings_internal::HasAbslStringify<T>::value>::type>
Arg(T value) // NOLINT(google-explicit-constructor)
: Arg(static_cast<typename std::underlying_type<T>::type>(value)) {}
diff --git a/absl/strings/substitute_test.cc b/absl/strings/substitute_test.cc
index 9e6b9403..ecf78d6b 100644
--- a/absl/strings/substitute_test.cc
+++ b/absl/strings/substitute_test.cc
@@ -22,6 +22,16 @@
namespace {
+struct MyStruct {
+ template <typename Sink>
+ friend void AbslStringify(Sink& sink, const MyStruct& s) {
+ sink.Append("MyStruct{.value = ");
+ sink.Append(absl::StrCat(s.value));
+ sink.Append("}");
+ }
+ int value;
+};
+
TEST(SubstituteTest, Substitute) {
// Basic.
EXPECT_EQ("Hello, world!", absl::Substitute("$0, $1!", "Hello", "world"));
@@ -70,7 +80,7 @@ TEST(SubstituteTest, Substitute) {
// Volatile Pointer.
// Like C++ streamed I/O, such pointers implicitly become bool
volatile int vol = 237;
- volatile int *volatile volptr = &vol;
+ volatile int* volatile volptr = &vol;
str = absl::Substitute("$0", volptr);
EXPECT_EQ("true", str);
@@ -128,6 +138,11 @@ TEST(SubstituteTest, Substitute) {
const char* null_cstring = nullptr;
EXPECT_EQ("Text: ''", absl::Substitute("Text: '$0'", null_cstring));
+
+ MyStruct s1 = MyStruct{17};
+ MyStruct s2 = MyStruct{1043};
+ EXPECT_EQ("MyStruct{.value = 17}, MyStruct{.value = 1043}",
+ absl::Substitute("$0, $1", s1, s2));
}
TEST(SubstituteTest, SubstituteAndAppend) {
@@ -171,6 +186,12 @@ TEST(SubstituteTest, SubstituteAndAppend) {
absl::SubstituteAndAppend(&str, "$0 $1 $2 $3 $4 $5 $6 $7 $8 $9", "a", "b",
"c", "d", "e", "f", "g", "h", "i", "j");
EXPECT_EQ("a b c d e f g h i j", str);
+
+ str.clear();
+ MyStruct s1 = MyStruct{17};
+ MyStruct s2 = MyStruct{1043};
+ absl::SubstituteAndAppend(&str, "$0, $1", s1, s2);
+ EXPECT_EQ("MyStruct{.value = 17}, MyStruct{.value = 1043}", str);
}
TEST(SubstituteTest, VectorBoolRef) {
@@ -232,7 +253,19 @@ TEST(SubstituteTest, Enums) {
ScopedEnumUInt16::kEnum1));
}
-#ifdef GTEST_HAS_DEATH_TEST
+enum class EnumWithStringify { Many = 0, Choices = 1 };
+
+template <typename Sink>
+void AbslStringify(Sink& sink, EnumWithStringify e) {
+ sink.Append(e == EnumWithStringify::Many ? "Many" : "Choices");
+}
+
+TEST(SubstituteTest, AbslStringifyWithEnum) {
+ const auto e = EnumWithStringify::Choices;
+ EXPECT_EQ(absl::Substitute("$0", e), "Choices");
+}
+
+#if GTEST_HAS_DEATH_TEST
TEST(SubstituteDeathTest, SubstituteDeath) {
EXPECT_DEBUG_DEATH(