summaryrefslogtreecommitdiff
path: root/absl/strings
diff options
context:
space:
mode:
authorGravatar Abseil Team <absl-team@google.com>2023-12-05 14:15:15 -0800
committerGravatar Copybara-Service <copybara-worker@google.com>2023-12-05 14:15:57 -0800
commit5dc2cc1a6a992e09d9cf930cdce7984640e2b7e0 (patch)
treea02601dddbd8a5ccbe63fdc5fee787d37cf103ed /absl/strings
parent3e6ecec7d3c9c504c9951b34230b22527758e0cd (diff)
Adds support for wchar_t/wchar_t*/std::wstring{_view} arguments to StrFormat().
This converts to UTF-8 regardless of locale. PiperOrigin-RevId: 588186076 Change-Id: I2c9598279b413d460e13ad65da2ba421c0b40b83
Diffstat (limited to 'absl/strings')
-rw-r--r--absl/strings/BUILD.bazel12
-rw-r--r--absl/strings/CMakeLists.txt17
-rw-r--r--absl/strings/internal/str_format/arg.cc153
-rw-r--r--absl/strings/internal/str_format/arg.h80
-rw-r--r--absl/strings/internal/str_format/arg_test.cc34
-rw-r--r--absl/strings/internal/str_format/bind.cc18
-rw-r--r--absl/strings/internal/str_format/bind.h11
-rw-r--r--absl/strings/internal/str_format/constexpr_parser.h11
-rw-r--r--absl/strings/internal/str_format/convert_test.cc328
-rw-r--r--absl/strings/internal/str_format/extension.h14
-rw-r--r--absl/strings/internal/str_format/parser.h9
-rw-r--r--absl/strings/internal/str_format/parser_test.cc10
-rw-r--r--absl/strings/str_format.h14
-rw-r--r--absl/strings/str_format_test.cc4
14 files changed, 608 insertions, 107 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel
index a3ef3ae8..d8883bf2 100644
--- a/absl/strings/BUILD.bazel
+++ b/absl/strings/BUILD.bazel
@@ -1247,6 +1247,10 @@ cc_library(
linkopts = ABSL_DEFAULT_LINKOPTS,
deps = [
":str_format_internal",
+ ":string_view",
+ "//absl/base:config",
+ "//absl/base:core_headers",
+ "//absl/types:span",
],
)
@@ -1277,6 +1281,7 @@ cc_library(
":strings",
"//absl/base:config",
"//absl/base:core_headers",
+ "//absl/container:fixed_array",
"//absl/container:inlined_vector",
"//absl/functional:function_ref",
"//absl/meta:type_traits",
@@ -1330,6 +1335,7 @@ cc_test(
deps = [
":str_format",
":str_format_internal",
+ "//absl/base:config",
"@com_google_googletest//:gtest",
"@com_google_googletest//:gtest_main",
],
@@ -1366,12 +1372,16 @@ cc_test(
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
+ ":str_format",
":str_format_internal",
":strings",
+ "//absl/base:config",
"//absl/base:core_headers",
"//absl/base:raw_logging_internal",
"//absl/log",
+ "//absl/numeric:int128",
"//absl/types:optional",
+ "//absl/types:span",
"@com_google_googletest//:gtest",
"@com_google_googletest//:gtest_main",
],
@@ -1397,6 +1407,8 @@ cc_test(
visibility = ["//visibility:private"],
deps = [
":str_format_internal",
+ ":string_view",
+ "//absl/base:config",
"//absl/base:core_headers",
"@com_google_googletest//:gtest",
"@com_google_googletest//:gtest_main",
diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt
index b129096d..6ec9e0eb 100644
--- a/absl/strings/CMakeLists.txt
+++ b/absl/strings/CMakeLists.txt
@@ -470,7 +470,11 @@ absl_cc_library(
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
+ absl::config
+ absl::core_headers
+ absl::span
absl::str_format_internal
+ absl::string_view
PUBLIC
)
@@ -501,6 +505,7 @@ absl_cc_library(
absl::strings
absl::config
absl::core_headers
+ absl::fixed_array
absl::inlined_vector
absl::numeric_representation
absl::type_traits
@@ -548,6 +553,7 @@ absl_cc_test(
COPTS
${ABSL_TEST_COPTS}
DEPS
+ absl::config
absl::str_format
absl::str_format_internal
GTest::gmock_main
@@ -585,12 +591,15 @@ absl_cc_test(
COPTS
${ABSL_TEST_COPTS}
DEPS
- absl::strings
- absl::str_format_internal
+ absl::config
absl::core_headers
+ absl::int128
absl::log
absl::raw_logging_internal
- absl::int128
+ absl::span
+ absl::str_format
+ absl::str_format_internal
+ absl::strings
GTest::gmock_main
)
@@ -616,6 +625,8 @@ absl_cc_test(
${ABSL_TEST_COPTS}
DEPS
absl::str_format_internal
+ absl::string_view
+ absl::config
absl::core_headers
GTest::gmock_main
)
diff --git a/absl/strings/internal/str_format/arg.cc b/absl/strings/internal/str_format/arg.cc
index c0a9a28e..eeb21081 100644
--- a/absl/strings/internal/str_format/arg.cc
+++ b/absl/strings/internal/str_format/arg.cc
@@ -18,15 +18,28 @@
//
#include "absl/strings/internal/str_format/arg.h"
+#include <algorithm>
#include <cassert>
-#include <cerrno>
+#include <cstddef>
+#include <cstdint>
#include <cstdlib>
+#include <cstring>
+#include <cwchar>
#include <string>
#include <type_traits>
-#include "absl/base/port.h"
+#include "absl/base/config.h"
+#include "absl/base/optimization.h"
+#include "absl/container/fixed_array.h"
+#include "absl/numeric/int128.h"
+#include "absl/strings/internal/str_format/extension.h"
#include "absl/strings/internal/str_format/float_conversion.h"
#include "absl/strings/numbers.h"
+#include "absl/strings/string_view.h"
+
+#if defined(ABSL_HAVE_STD_STRING_VIEW)
+#include <string_view>
+#endif
namespace absl {
ABSL_NAMESPACE_BEGIN
@@ -298,6 +311,83 @@ inline bool ConvertStringArg(string_view v, const FormatConversionSpecImpl conv,
conv.has_left_flag());
}
+struct ShiftState {
+ bool saw_high_surrogate = false;
+ uint8_t bits = 0;
+};
+
+// Converts `v` from UTF-16 or UTF-32 to UTF-8 and writes to `buf`. `buf` is
+// assumed to have enough space for the output. `s` is used to carry state
+// between successive calls with a UTF-16 surrogate pair. Returns the number of
+// chars written, or `static_cast<size_t>(-1)` on failure.
+//
+// This is basically std::wcrtomb(), but always outputting UTF-8 instead of
+// respecting the current locale.
+inline size_t WideToUtf8(wchar_t wc, char *buf, ShiftState &s) {
+ const auto v = static_cast<uint32_t>(wc);
+ if (v < 0x80) {
+ *buf = static_cast<char>(v);
+ return 1;
+ } else if (v < 0x800) {
+ *buf++ = static_cast<char>(0xc0 | (v >> 6));
+ *buf = static_cast<char>(0x80 | (v & 0x3f));
+ return 2;
+ } else if (v < 0xd800 || (v - 0xe000) < 0x2000) {
+ *buf++ = static_cast<char>(0xe0 | (v >> 12));
+ *buf++ = static_cast<char>(0x80 | ((v >> 6) & 0x3f));
+ *buf = static_cast<char>(0x80 | (v & 0x3f));
+ return 3;
+ } else if ((v - 0x10000) < 0x100000) {
+ *buf++ = static_cast<char>(0xf0 | (v >> 18));
+ *buf++ = static_cast<char>(0x80 | ((v >> 12) & 0x3f));
+ *buf++ = static_cast<char>(0x80 | ((v >> 6) & 0x3f));
+ *buf = static_cast<char>(0x80 | (v & 0x3f));
+ return 4;
+ } else if (v < 0xdc00) {
+ s.saw_high_surrogate = true;
+ s.bits = static_cast<uint8_t>(v & 0x3);
+ const uint8_t high_bits = ((v >> 6) & 0xf) + 1;
+ *buf++ = static_cast<char>(0xf0 | (high_bits >> 2));
+ *buf =
+ static_cast<char>(0x80 | static_cast<uint8_t>((high_bits & 0x3) << 4) |
+ static_cast<uint8_t>((v >> 2) & 0xf));
+ return 2;
+ } else if (v < 0xe000 && s.saw_high_surrogate) {
+ *buf++ = static_cast<char>(0x80 | static_cast<uint8_t>(s.bits << 4) |
+ static_cast<uint8_t>((v >> 6) & 0xf));
+ *buf = static_cast<char>(0x80 | (v & 0x3f));
+ s.saw_high_surrogate = false;
+ s.bits = 0;
+ return 2;
+ } else {
+ return static_cast<size_t>(-1);
+ }
+}
+
+inline bool ConvertStringArg(const wchar_t *v,
+ size_t len,
+ const FormatConversionSpecImpl conv,
+ FormatSinkImpl *sink) {
+ FixedArray<char> mb(len * 4);
+ ShiftState s;
+ size_t chars_written = 0;
+ for (size_t i = 0; i < len; ++i) {
+ const size_t chars = WideToUtf8(v[i], &mb[chars_written], s);
+ if (chars == static_cast<size_t>(-1)) { return false; }
+ chars_written += chars;
+ }
+ return ConvertStringArg(string_view(mb.data(), chars_written), conv, sink);
+}
+
+bool ConvertWCharTImpl(wchar_t v, const FormatConversionSpecImpl conv,
+ FormatSinkImpl *sink) {
+ char mb[4];
+ ShiftState s;
+ const size_t chars_written = WideToUtf8(v, mb, s);
+ return chars_written != static_cast<size_t>(-1) && !s.saw_high_surrogate &&
+ ConvertStringArg(string_view(mb, chars_written), conv, sink);
+}
+
} // namespace
bool ConvertBoolArg(bool v, FormatSinkImpl *sink) {
@@ -316,11 +406,14 @@ bool ConvertIntArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl *sink) {
// This odd casting is due to a bug in -Wswitch behavior in gcc49 which causes
// it to complain about a switch/case type mismatch, even though both are
- // FormatConverionChar. Likely this is because at this point
+ // FormatConversionChar. Likely this is because at this point
// FormatConversionChar is declared, but not defined.
switch (static_cast<uint8_t>(conv.conversion_char())) {
case static_cast<uint8_t>(FormatConversionCharInternal::c):
- return ConvertCharImpl(static_cast<char>(v), conv, sink);
+ return (std::is_same<T, wchar_t>::value ||
+ (conv.length_mod() == LengthMod::l))
+ ? ConvertWCharTImpl(static_cast<wchar_t>(v), conv, sink)
+ : ConvertCharImpl(static_cast<char>(v), conv, sink);
case static_cast<uint8_t>(FormatConversionCharInternal::o):
as_digits.PrintAsOct(static_cast<U>(v));
@@ -372,6 +465,8 @@ template bool ConvertIntArg<signed char>(signed char v,
template bool ConvertIntArg<unsigned char>(unsigned char v,
FormatConversionSpecImpl conv,
FormatSinkImpl *sink);
+template bool ConvertIntArg<wchar_t>(wchar_t v, FormatConversionSpecImpl conv,
+ FormatSinkImpl *sink);
template bool ConvertIntArg<short>(short v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl *sink);
@@ -403,16 +498,29 @@ StringConvertResult FormatConvertImpl(const std::string &v,
return {ConvertStringArg(v, conv, sink)};
}
+StringConvertResult FormatConvertImpl(const std::wstring &v,
+ const FormatConversionSpecImpl conv,
+ FormatSinkImpl *sink) {
+ return {ConvertStringArg(v.data(), v.size(), conv, sink)};
+}
+
StringConvertResult FormatConvertImpl(string_view v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertStringArg(v, conv, sink)};
}
-ArgConvertResult<FormatConversionCharSetUnion(
- FormatConversionCharSetInternal::s, FormatConversionCharSetInternal::p)>
-FormatConvertImpl(const char *v, const FormatConversionSpecImpl conv,
- FormatSinkImpl *sink) {
+#if defined(ABSL_HAVE_STD_STRING_VIEW)
+StringConvertResult FormatConvertImpl(std::wstring_view v,
+ const FormatConversionSpecImpl conv,
+ FormatSinkImpl* sink) {
+ return {ConvertStringArg(v.data(), v.size(), conv, sink)};
+}
+#endif
+
+StringPtrConvertResult FormatConvertImpl(const char* v,
+ const FormatConversionSpecImpl conv,
+ FormatSinkImpl* sink) {
if (conv.conversion_char() == FormatConversionCharInternal::p)
return {FormatConvertImpl(VoidPtr(v), conv, sink).value};
size_t len;
@@ -427,6 +535,30 @@ FormatConvertImpl(const char *v, const FormatConversionSpecImpl conv,
return {ConvertStringArg(string_view(v, len), conv, sink)};
}
+StringPtrConvertResult FormatConvertImpl(const wchar_t* v,
+ const FormatConversionSpecImpl conv,
+ FormatSinkImpl* sink) {
+ if (conv.conversion_char() == FormatConversionCharInternal::p) {
+ return {FormatConvertImpl(VoidPtr(v), conv, sink).value};
+ }
+ size_t len;
+ if (v == nullptr) {
+ len = 0;
+ } else if (conv.precision() < 0) {
+ len = std::wcslen(v);
+ } else {
+ // If precision is set, we look for the NUL-terminator on the valid range.
+ len = static_cast<size_t>(std::find(v, v + conv.precision(), L'\0') - v);
+ }
+ return {ConvertStringArg(v, len, conv, sink)};
+}
+
+StringPtrConvertResult FormatConvertImpl(std::nullptr_t,
+ const FormatConversionSpecImpl conv,
+ FormatSinkImpl* sink) {
+ return FormatConvertImpl(static_cast<const char*>(nullptr), conv, sink);
+}
+
// ==================== Raw pointers ====================
ArgConvertResult<FormatConversionCharSetInternal::p> FormatConvertImpl(
VoidPtr v, const FormatConversionSpecImpl conv, FormatSinkImpl *sink) {
@@ -461,6 +593,11 @@ CharConvertResult FormatConvertImpl(char v, const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
+CharConvertResult FormatConvertImpl(wchar_t v,
+ const FormatConversionSpecImpl conv,
+ FormatSinkImpl* sink) {
+ return {ConvertIntArg(v, conv, sink)};
+}
// ==================== Ints ====================
IntegralConvertResult FormatConvertImpl(signed char v,
diff --git a/absl/strings/internal/str_format/arg.h b/absl/strings/internal/str_format/arg.h
index 20483aff..309161d5 100644
--- a/absl/strings/internal/str_format/arg.h
+++ b/absl/strings/internal/str_format/arg.h
@@ -19,8 +19,9 @@
#include <wchar.h>
#include <algorithm>
+#include <cstddef>
+#include <cstdint>
#include <cstdio>
-#include <iomanip>
#include <limits>
#include <memory>
#include <sstream>
@@ -28,13 +29,18 @@
#include <type_traits>
#include <utility>
-#include "absl/base/port.h"
+#include "absl/base/config.h"
+#include "absl/base/optimization.h"
#include "absl/meta/type_traits.h"
#include "absl/numeric/int128.h"
#include "absl/strings/has_absl_stringify.h"
#include "absl/strings/internal/str_format/extension.h"
#include "absl/strings/string_view.h"
+#if defined(ABSL_HAVE_STD_STRING_VIEW)
+#include <string_view>
+#endif
+
namespace absl {
ABSL_NAMESPACE_BEGIN
@@ -97,6 +103,9 @@ extern template bool ConvertIntArg<signed char>(signed char v,
extern template bool ConvertIntArg<unsigned char>(unsigned char v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
+extern template bool ConvertIntArg<wchar_t>(wchar_t v,
+ FormatConversionSpecImpl conv,
+ FormatSinkImpl* sink);
extern template bool ConvertIntArg<short>(short v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
@@ -203,30 +212,49 @@ constexpr FormatConversionCharSet ExtractCharSet(ArgConvertResult<C>) {
return C;
}
-using StringConvertResult = ArgConvertResult<FormatConversionCharSetUnion(
- FormatConversionCharSetInternal::s, FormatConversionCharSetInternal::v)>;
ArgConvertResult<FormatConversionCharSetInternal::p> FormatConvertImpl(
VoidPtr v, FormatConversionSpecImpl conv, FormatSinkImpl* sink);
// Strings.
+using StringConvertResult = ArgConvertResult<FormatConversionCharSetUnion(
+ FormatConversionCharSetInternal::s,
+ FormatConversionCharSetInternal::v)>;
StringConvertResult FormatConvertImpl(const std::string& v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
+StringConvertResult FormatConvertImpl(const std::wstring& v,
+ FormatConversionSpecImpl conv,
+ FormatSinkImpl* sink);
StringConvertResult FormatConvertImpl(string_view v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
-#if defined(ABSL_HAVE_STD_STRING_VIEW) && !defined(ABSL_USES_STD_STRING_VIEW)
+#if defined(ABSL_HAVE_STD_STRING_VIEW)
+StringConvertResult FormatConvertImpl(std::wstring_view v,
+ FormatConversionSpecImpl conv,
+ FormatSinkImpl* sink);
+#if !defined(ABSL_USES_STD_STRING_VIEW)
inline StringConvertResult FormatConvertImpl(std::string_view v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink) {
return FormatConvertImpl(absl::string_view(v.data(), v.size()), conv, sink);
}
-#endif // ABSL_HAVE_STD_STRING_VIEW && !ABSL_USES_STD_STRING_VIEW
-
-ArgConvertResult<FormatConversionCharSetUnion(
- FormatConversionCharSetInternal::s, FormatConversionCharSetInternal::p)>
-FormatConvertImpl(const char* v, const FormatConversionSpecImpl conv,
- FormatSinkImpl* sink);
+#endif // !ABSL_USES_STD_STRING_VIEW
+#endif // ABSL_HAVE_STD_STRING_VIEW
+
+using StringPtrConvertResult = ArgConvertResult<FormatConversionCharSetUnion(
+ FormatConversionCharSetInternal::s,
+ FormatConversionCharSetInternal::p)>;
+StringPtrConvertResult FormatConvertImpl(const char* v,
+ FormatConversionSpecImpl conv,
+ FormatSinkImpl* sink);
+StringPtrConvertResult FormatConvertImpl(const wchar_t* v,
+ FormatConversionSpecImpl conv,
+ FormatSinkImpl* sink);
+// This overload is needed to disambiguate, since `nullptr` could match either
+// of the other overloads equally well.
+StringPtrConvertResult FormatConvertImpl(std::nullptr_t,
+ FormatConversionSpecImpl conv,
+ FormatSinkImpl* sink);
template <class AbslCord, typename std::enable_if<std::is_same<
AbslCord, absl::Cord>::value>::type* = nullptr>
@@ -280,6 +308,9 @@ FloatingConvertResult FormatConvertImpl(long double v,
// Chars.
CharConvertResult FormatConvertImpl(char v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
+CharConvertResult FormatConvertImpl(wchar_t v,
+ FormatConversionSpecImpl conv,
+ FormatSinkImpl* sink);
// Ints.
IntegralConvertResult FormatConvertImpl(signed char v,
@@ -441,6 +472,7 @@ class FormatArgImpl {
// Anything with a user-defined Convert will get its own vtable.
// For everything else:
// - Decay char* and char arrays into `const char*`
+ // - Decay wchar_t* and wchar_t arrays into `const wchar_t*`
// - Decay any other pointer to `const void*`
// - Decay all enums to the integral promotion of their underlying type.
// - Decay function pointers to void*.
@@ -452,9 +484,13 @@ class FormatArgImpl {
using type = typename std::conditional<
!kHasUserDefined && std::is_convertible<T, const char*>::value,
const char*,
- typename std::conditional<!kHasUserDefined &&
- std::is_convertible<T, VoidPtr>::value,
- VoidPtr, const T&>::type>::type;
+ typename std::conditional<
+ !kHasUserDefined && std::is_convertible<T, const wchar_t*>::value,
+ const wchar_t*,
+ typename std::conditional<
+ !kHasUserDefined && std::is_convertible<T, VoidPtr>::value,
+ VoidPtr,
+ const T&>::type>::type>::type;
};
template <typename T>
struct DecayType<
@@ -585,7 +621,7 @@ class FormatArgImpl {
E template bool FormatArgImpl::Dispatch<T>(Data, FormatConversionSpecImpl, \
void*)
-#define ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(...) \
+#define ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_NO_WSTRING_VIEW_(...) \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(str_format_internal::VoidPtr, \
__VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(bool, __VA_ARGS__); \
@@ -611,7 +647,19 @@ class FormatArgImpl {
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(long double, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(const char*, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(std::string, __VA_ARGS__); \
- ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(string_view, __VA_ARGS__)
+ ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(string_view, __VA_ARGS__); \
+ ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(const wchar_t*, __VA_ARGS__); \
+ ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(std::wstring, __VA_ARGS__)
+
+#if defined(ABSL_HAVE_STD_STRING_VIEW)
+#define ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(...) \
+ ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_NO_WSTRING_VIEW_( \
+ __VA_ARGS__); \
+ ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(std::wstring_view, __VA_ARGS__)
+#else
+#define ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(...) \
+ ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_NO_WSTRING_VIEW_(__VA_ARGS__)
+#endif
ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(extern);
diff --git a/absl/strings/internal/str_format/arg_test.cc b/absl/strings/internal/str_format/arg_test.cc
index 1261937c..f663d7c5 100644
--- a/absl/strings/internal/str_format/arg_test.cc
+++ b/absl/strings/internal/str_format/arg_test.cc
@@ -14,9 +14,10 @@
#include "absl/strings/internal/str_format/arg.h"
-#include <ostream>
+#include <limits>
#include <string>
#include "gtest/gtest.h"
+#include "absl/base/config.h"
#include "absl/strings/str_format.h"
namespace absl {
@@ -93,6 +94,21 @@ TEST_F(FormatArgImplTest, CharArraysDecayToCharPtr) {
FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(kMyArray)));
}
+extern const wchar_t kMyWCharTArray[];
+
+TEST_F(FormatArgImplTest, WCharTArraysDecayToWCharTPtr) {
+ const wchar_t* a = L"";
+ EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)),
+ FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(L"")));
+ EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)),
+ FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(L"A")));
+ EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)),
+ FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(L"ABC")));
+ EXPECT_EQ(
+ FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)),
+ FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(kMyWCharTArray)));
+}
+
TEST_F(FormatArgImplTest, OtherPtrDecayToVoidPtr) {
auto expected = FormatArgImplFriend::GetVTablePtrForTest(
FormatArgImpl(static_cast<void *>(nullptr)));
@@ -124,6 +140,22 @@ TEST_F(FormatArgImplTest, WorksWithCharArraysOfUnknownSize) {
}
const char kMyArray[] = "ABCDE";
+TEST_F(FormatArgImplTest, WorksWithWCharTArraysOfUnknownSize) {
+ std::string s;
+ FormatSinkImpl sink(&s);
+ FormatConversionSpecImpl conv;
+ FormatConversionSpecImplFriend::SetConversionChar(
+ FormatConversionCharInternal::s, &conv);
+ FormatConversionSpecImplFriend::SetFlags(Flags(), &conv);
+ FormatConversionSpecImplFriend::SetWidth(-1, &conv);
+ FormatConversionSpecImplFriend::SetPrecision(-1, &conv);
+ EXPECT_TRUE(
+ FormatArgImplFriend::Convert(FormatArgImpl(kMyWCharTArray), conv, &sink));
+ sink.Flush();
+ EXPECT_EQ("ABCDE", s);
+}
+const wchar_t kMyWCharTArray[] = L"ABCDE";
+
} // namespace
} // namespace str_format_internal
ABSL_NAMESPACE_END
diff --git a/absl/strings/internal/str_format/bind.cc b/absl/strings/internal/str_format/bind.cc
index 77a42223..87e23b56 100644
--- a/absl/strings/internal/str_format/bind.cc
+++ b/absl/strings/internal/str_format/bind.cc
@@ -14,10 +14,24 @@
#include "absl/strings/internal/str_format/bind.h"
+#include <algorithm>
+#include <cassert>
#include <cerrno>
+#include <cstddef>
+#include <cstdio>
+#include <ios>
#include <limits>
+#include <ostream>
#include <sstream>
#include <string>
+#include "absl/base/config.h"
+#include "absl/base/optimization.h"
+#include "absl/strings/internal/str_format/arg.h"
+#include "absl/strings/internal/str_format/constexpr_parser.h"
+#include "absl/strings/internal/str_format/extension.h"
+#include "absl/strings/internal/str_format/output.h"
+#include "absl/strings/string_view.h"
+#include "absl/types/span.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
@@ -90,6 +104,8 @@ inline bool ArgContext::Bind(const UnboundConversion* unbound,
} else {
FormatConversionSpecImplFriend::SetFlags(unbound->flags, bound);
}
+
+ FormatConversionSpecImplFriend::SetLengthMod(unbound->length_mod, bound);
} else {
FormatConversionSpecImplFriend::SetFlags(unbound->flags, bound);
FormatConversionSpecImplFriend::SetWidth(-1, bound);
@@ -215,7 +231,7 @@ std::string& AppendPack(std::string* out, const UntypedFormatSpecImpl format,
return *out;
}
-std::string FormatPack(const UntypedFormatSpecImpl format,
+std::string FormatPack(UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args) {
std::string out;
if (ABSL_PREDICT_FALSE(!FormatUntyped(&out, format, args))) {
diff --git a/absl/strings/internal/str_format/bind.h b/absl/strings/internal/str_format/bind.h
index 5e2a43d5..120bc355 100644
--- a/absl/strings/internal/str_format/bind.h
+++ b/absl/strings/internal/str_format/bind.h
@@ -15,16 +15,19 @@
#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_
#define ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_
-#include <array>
+#include <cassert>
#include <cstdio>
-#include <sstream>
+#include <ostream>
#include <string>
-#include "absl/base/port.h"
+#include "absl/base/config.h"
#include "absl/container/inlined_vector.h"
#include "absl/strings/internal/str_format/arg.h"
#include "absl/strings/internal/str_format/checker.h"
+#include "absl/strings/internal/str_format/constexpr_parser.h"
+#include "absl/strings/internal/str_format/extension.h"
#include "absl/strings/internal/str_format/parser.h"
+#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "absl/utility/utility.h"
@@ -203,7 +206,7 @@ bool FormatUntyped(FormatRawSinkImpl raw_sink, UntypedFormatSpecImpl format,
std::string& AppendPack(std::string* out, UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args);
-std::string FormatPack(const UntypedFormatSpecImpl format,
+std::string FormatPack(UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args);
int FprintF(std::FILE* output, UntypedFormatSpecImpl format,
diff --git a/absl/strings/internal/str_format/constexpr_parser.h b/absl/strings/internal/str_format/constexpr_parser.h
index b70a16e4..8f593870 100644
--- a/absl/strings/internal/str_format/constexpr_parser.h
+++ b/absl/strings/internal/str_format/constexpr_parser.h
@@ -17,17 +17,18 @@
#include <cassert>
#include <cstdint>
+#include <cstdio>
#include <limits>
+#include "absl/base/config.h"
#include "absl/base/const_init.h"
+#include "absl/base/optimization.h"
#include "absl/strings/internal/str_format/extension.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
-enum class LengthMod : std::uint8_t { h, hh, l, ll, L, j, z, t, q, none };
-
// The analyzed properties of a single specified conversion.
struct UnboundConversion {
// This is a user defined default constructor on purpose to skip the
@@ -306,7 +307,6 @@ constexpr const char* ConsumeConversion(const char* pos, const char* const end,
if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr;
// It is a length modifier.
- using str_format_internal::LengthMod;
LengthMod length_mod = tag.as_length();
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
if (c == 'h' && length_mod == LengthMod::h) {
@@ -322,6 +322,11 @@ constexpr const char* ConsumeConversion(const char* pos, const char* const end,
if (ABSL_PREDICT_FALSE(c == 'v')) return nullptr;
if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr;
+
+ // `wchar_t` args are marked non-basic so `Bind()` will copy the length mod.
+ if (conv->length_mod == LengthMod::l && c == 'c') {
+ conv->flags = conv->flags | Flags::kNonBasic;
+ }
}
#undef ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR
diff --git a/absl/strings/internal/str_format/convert_test.cc b/absl/strings/internal/str_format/convert_test.cc
index d14ecb24..7f222778 100644
--- a/absl/strings/internal/str_format/convert_test.cc
+++ b/absl/strings/internal/str_format/convert_test.cc
@@ -12,25 +12,43 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include <errno.h>
+#include <assert.h>
+#include <locale.h>
#include <stdarg.h>
#include <stdio.h>
#include <algorithm>
-#include <cctype>
+#include <climits>
#include <cmath>
+#include <cstdlib>
+#include <cstring>
+#include <cwctype>
#include <limits>
+#include <set>
+#include <sstream>
#include <string>
#include <thread> // NOLINT
+#include <type_traits>
+#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/attributes.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/log/log.h"
+#include "absl/numeric/int128.h"
+#include "absl/strings/ascii.h"
+#include "absl/strings/internal/str_format/arg.h"
#include "absl/strings/internal/str_format/bind.h"
#include "absl/strings/match.h"
+#include "absl/strings/str_format.h"
+#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
+#include "absl/types/span.h"
+
+#if defined(ABSL_HAVE_STD_STRING_VIEW)
+#include <string_view>
+#endif
namespace absl {
ABSL_NAMESPACE_BEGIN
@@ -49,36 +67,103 @@ size_t ArraySize(T (&)[N]) {
return N;
}
-std::string LengthModFor(float) { return ""; }
-std::string LengthModFor(double) { return ""; }
-std::string LengthModFor(long double) { return "L"; }
-std::string LengthModFor(char) { return "hh"; }
-std::string LengthModFor(signed char) { return "hh"; }
-std::string LengthModFor(unsigned char) { return "hh"; }
-std::string LengthModFor(short) { return "h"; } // NOLINT
-std::string LengthModFor(unsigned short) { return "h"; } // NOLINT
-std::string LengthModFor(int) { return ""; }
-std::string LengthModFor(unsigned) { return ""; }
-std::string LengthModFor(long) { return "l"; } // NOLINT
-std::string LengthModFor(unsigned long) { return "l"; } // NOLINT
-std::string LengthModFor(long long) { return "ll"; } // NOLINT
-std::string LengthModFor(unsigned long long) { return "ll"; } // NOLINT
+template <typename T>
+struct AlwaysFalse : std::false_type {};
+
+template <typename T>
+std::string LengthModFor() {
+ static_assert(AlwaysFalse<T>::value, "Unsupported type");
+ return "";
+}
+template <>
+std::string LengthModFor<char>() {
+ return "hh";
+}
+template <>
+std::string LengthModFor<signed char>() {
+ return "hh";
+}
+template <>
+std::string LengthModFor<unsigned char>() {
+ return "hh";
+}
+template <>
+std::string LengthModFor<short>() { // NOLINT
+ return "h";
+}
+template <>
+std::string LengthModFor<unsigned short>() { // NOLINT
+ return "h";
+}
+template <>
+std::string LengthModFor<int>() {
+ return "";
+}
+template <>
+std::string LengthModFor<unsigned>() {
+ return "";
+}
+template <>
+std::string LengthModFor<long>() { // NOLINT
+ return "l";
+}
+template <>
+std::string LengthModFor<unsigned long>() { // NOLINT
+ return "l";
+}
+template <>
+std::string LengthModFor<long long>() { // NOLINT
+ return "ll";
+}
+template <>
+std::string LengthModFor<unsigned long long>() { // NOLINT
+ return "ll";
+}
+
+// An integral type of the same rank and signedness as `wchar_t`, that isn't
+// `wchar_t`.
+using IntegralTypeForWCharT =
+ std::conditional_t<std::is_signed<wchar_t>::value,
+ // Some STLs are broken and return `wchar_t` from
+ // `std::make_[un]signed_t<wchar_t>` when the signedness
+ // matches. Work around by round-tripping through the
+ // opposite signedness.
+ std::make_signed_t<std::make_unsigned_t<wchar_t>>,
+ std::make_unsigned_t<std::make_signed_t<wchar_t>>>;
+
+// Given an integral type `T`, returns a type of the same rank and signedness
+// that is guaranteed to not be `wchar_t`.
+template <typename T>
+using MatchingIntegralType = std::conditional_t<std::is_same<T, wchar_t>::value,
+ IntegralTypeForWCharT, T>;
std::string EscCharImpl(int v) {
- if (std::isprint(static_cast<unsigned char>(v))) {
- return std::string(1, static_cast<char>(v));
- }
char buf[64];
- int n = snprintf(buf, sizeof(buf), "\\%#.2x",
- static_cast<unsigned>(v & 0xff));
- assert(n > 0 && n < sizeof(buf));
- return std::string(buf, n);
+ int n = absl::ascii_isprint(static_cast<unsigned char>(v))
+ ? snprintf(buf, sizeof(buf), "'%c'", v)
+ : snprintf(buf, sizeof(buf), "'\\x%.*x'", CHAR_BIT / 4,
+ static_cast<unsigned>(
+ static_cast<std::make_unsigned_t<char>>(v)));
+ assert(n > 0 && static_cast<size_t>(n) < sizeof(buf));
+ return std::string(buf, static_cast<size_t>(n));
}
std::string Esc(char v) { return EscCharImpl(v); }
std::string Esc(signed char v) { return EscCharImpl(v); }
std::string Esc(unsigned char v) { return EscCharImpl(v); }
+std::string Esc(wchar_t v) {
+ char buf[64];
+ int n = std::iswprint(static_cast<wint_t>(v))
+ ? snprintf(buf, sizeof(buf), "L'%lc'", static_cast<wint_t>(v))
+ : snprintf(buf, sizeof(buf), "L'\\x%.*llx'",
+ static_cast<int>(sizeof(wchar_t) * CHAR_BIT / 4),
+ static_cast<unsigned long long>(
+ static_cast<std::make_unsigned_t<wchar_t>>(v)));
+ assert(n > 0 && static_cast<size_t>(n) < sizeof(buf));
+ return std::string(buf, static_cast<size_t>(n));
+}
+
template <typename T>
std::string Esc(const T &v) {
std::ostringstream oss;
@@ -101,7 +186,7 @@ void StrAppendV(std::string *dst, const char *format, va_list ap) {
if (result < kSpaceLength) {
if (result >= 0) {
// Normal case -- everything fit.
- dst->append(space, result);
+ dst->append(space, static_cast<size_t>(result));
return;
}
if (result < 0) {
@@ -112,7 +197,7 @@ void StrAppendV(std::string *dst, const char *format, va_list ap) {
// Increase the buffer size to the size requested by vsnprintf,
// plus one for the closing \0.
- int length = result + 1;
+ size_t length = static_cast<size_t>(result) + 1;
char *buf = new char[length];
// Restore the va_list before we use it again
@@ -120,9 +205,9 @@ void StrAppendV(std::string *dst, const char *format, va_list ap) {
result = vsnprintf(buf, length, format, backup_ap);
va_end(backup_ap);
- if (result >= 0 && result < length) {
+ if (result >= 0 && static_cast<size_t>(result) < length) {
// It fit
- dst->append(buf, result);
+ dst->append(buf, static_cast<size_t>(result));
}
delete[] buf;
}
@@ -231,11 +316,15 @@ void TestStringConvert(const T& str) {
TEST_F(FormatConvertTest, BasicString) {
TestStringConvert("hello"); // As char array.
+ TestStringConvert(L"hello");
TestStringConvert(static_cast<const char*>("hello"));
+ TestStringConvert(static_cast<const wchar_t*>(L"hello"));
TestStringConvert(std::string("hello"));
+ TestStringConvert(std::wstring(L"hello"));
TestStringConvert(string_view("hello"));
#if defined(ABSL_HAVE_STD_STRING_VIEW)
TestStringConvert(std::string_view("hello"));
+ TestStringConvert(std::wstring_view(L"hello"));
#endif // ABSL_HAVE_STD_STRING_VIEW
}
@@ -243,6 +332,10 @@ TEST_F(FormatConvertTest, NullString) {
const char* p = nullptr;
UntypedFormatSpecImpl format("%s");
EXPECT_EQ("", FormatPack(format, {FormatArgImpl(p)}));
+
+ const wchar_t* wp = nullptr;
+ UntypedFormatSpecImpl wformat("%ls");
+ EXPECT_EQ("", FormatPack(wformat, {FormatArgImpl(wp)}));
}
TEST_F(FormatConvertTest, StringPrecision) {
@@ -252,10 +345,19 @@ TEST_F(FormatConvertTest, StringPrecision) {
UntypedFormatSpecImpl format("%.1s");
EXPECT_EQ("a", FormatPack(format, {FormatArgImpl(p)}));
+ wchar_t wc = L'a';
+ const wchar_t* wp = &wc;
+ UntypedFormatSpecImpl wformat("%.1ls");
+ EXPECT_EQ("a", FormatPack(wformat, {FormatArgImpl(wp)}));
+
// We cap at the NUL-terminator.
p = "ABC";
UntypedFormatSpecImpl format2("%.10s");
EXPECT_EQ("ABC", FormatPack(format2, {FormatArgImpl(p)}));
+
+ wp = L"ABC";
+ UntypedFormatSpecImpl wformat2("%.10ls");
+ EXPECT_EQ("ABC", FormatPack(wformat2, {FormatArgImpl(wp)}));
}
// Pointer formatting is implementation defined. This checks that the argument
@@ -278,16 +380,25 @@ TEST_F(FormatConvertTest, Pointer) {
char *mcp = &c;
const char *cp = "hi";
const char *cnil = nullptr;
+ wchar_t wc = L'h';
+ wchar_t *mwcp = &wc;
+ const wchar_t *wcp = L"hi";
+ const wchar_t *wcnil = nullptr;
const int *inil = nullptr;
using VoidF = void (*)();
VoidF fp = [] {}, fnil = nullptr;
volatile char vc;
volatile char *vcp = &vc;
volatile char *vcnil = nullptr;
+ volatile wchar_t vwc;
+ volatile wchar_t *vwcp = &vwc;
+ volatile wchar_t *vwcnil = nullptr;
const FormatArgImpl args_array[] = {
- FormatArgImpl(xp), FormatArgImpl(cp), FormatArgImpl(inil),
- FormatArgImpl(cnil), FormatArgImpl(mcp), FormatArgImpl(fp),
- FormatArgImpl(fnil), FormatArgImpl(vcp), FormatArgImpl(vcnil),
+ FormatArgImpl(xp), FormatArgImpl(cp), FormatArgImpl(wcp),
+ FormatArgImpl(inil), FormatArgImpl(cnil), FormatArgImpl(wcnil),
+ FormatArgImpl(mcp), FormatArgImpl(mwcp), FormatArgImpl(fp),
+ FormatArgImpl(fnil), FormatArgImpl(vcp), FormatArgImpl(vwcp),
+ FormatArgImpl(vcnil), FormatArgImpl(vwcnil),
};
auto args = absl::MakeConstSpan(args_array);
@@ -313,30 +424,49 @@ TEST_F(FormatConvertTest, Pointer) {
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%-30.20p"), args),
MatchesPointerString(&x));
+ // const int*
+ EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%1$p"), args),
+ MatchesPointerString(xp));
// const char*
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%2$p"), args),
MatchesPointerString(cp));
- // null const int*
+ // const wchar_t*
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%3$p"), args),
+ MatchesPointerString(wcp));
+ // null const int*
+ EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%4$p"), args),
MatchesPointerString(nullptr));
// null const char*
- EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%4$p"), args),
+ EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%5$p"), args),
+ MatchesPointerString(nullptr));
+ // null const wchar_t*
+ EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%6$p"), args),
MatchesPointerString(nullptr));
// nonconst char*
- EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%5$p"), args),
+ EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%7$p"), args),
MatchesPointerString(mcp));
-
- // function pointers
- EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%6$p"), args),
- MatchesPointerString(reinterpret_cast<const void*>(fp)));
+ // nonconst wchar_t*
+ EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%8$p"), args),
+ MatchesPointerString(mwcp));
+ // function pointer
+ EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%9$p"), args),
+ MatchesPointerString(reinterpret_cast<const void *>(fp)));
+ // null function pointer
+ EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%10$p"), args),
+ MatchesPointerString(nullptr));
+ // volatile char*
EXPECT_THAT(
- FormatPack(UntypedFormatSpecImpl("%8$p"), args),
+ FormatPack(UntypedFormatSpecImpl("%11$p"), args),
MatchesPointerString(reinterpret_cast<volatile const void *>(vcp)));
-
- // null function pointers
- EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%7$p"), args),
+ // volatile wchar_t*
+ EXPECT_THAT(
+ FormatPack(UntypedFormatSpecImpl("%12$p"), args),
+ MatchesPointerString(reinterpret_cast<volatile const void *>(vwcp)));
+ // null volatile char*
+ EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%13$p"), args),
MatchesPointerString(nullptr));
- EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%9$p"), args),
+ // null volatile wchar_t*
+ EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%14$p"), args),
MatchesPointerString(nullptr));
}
@@ -436,12 +566,15 @@ TYPED_TEST_P(TypedFormatConvertTest, AllIntsWithFlags) {
// as printf can't do that conversion properly. For those
// cases, we do expect agreement with printf with a "%u"
// and the unsigned equivalent of 'val'.
- UnsignedT uval = val;
- old_fmt += LengthModFor(uval);
+ UnsignedT uval =
+ static_cast<std::remove_volatile_t<UnsignedT>>(val);
+ old_fmt += LengthModFor<
+ MatchingIntegralType<std::remove_cv_t<decltype(uval)>>>();
old_fmt += "u";
old_result = StrPrint(old_fmt.c_str(), uval);
} else {
- old_fmt += LengthModFor(val);
+ old_fmt += LengthModFor<
+ MatchingIntegralType<std::remove_cv_t<decltype(val)>>>();
old_fmt += conv_char;
old_result = StrPrint(old_fmt.c_str(), val);
}
@@ -459,6 +592,47 @@ TYPED_TEST_P(TypedFormatConvertTest, AllIntsWithFlags) {
}
}
+template <typename T>
+absl::optional<std::string> StrPrintChar(T c) {
+ return StrPrint("%c", static_cast<int>(c));
+}
+template <>
+absl::optional<std::string> StrPrintChar(wchar_t c) {
+ // musl libc has a bug where ("%lc", 0) writes no characters, and Android
+ // doesn't support forcing UTF-8 via setlocale(). Hardcode the expected
+ // answers for ASCII inputs to maximize test coverage on these platforms.
+ if (static_cast<std::make_unsigned_t<wchar_t>>(c) < 0x80) {
+ return std::string(1, static_cast<char>(c));
+ }
+
+ // Force a UTF-8 locale to match the expected `StrFormat()` behavior.
+ // It's important to copy the string returned by `old_locale` here, because
+ // its contents are not guaranteed to be valid after the next `setlocale()`
+ // call.
+ std::string old_locale = setlocale(LC_CTYPE, nullptr);
+ if (!setlocale(LC_CTYPE, "en_US.UTF-8")) {
+ return absl::nullopt;
+ }
+ const std::string output = StrPrint("%lc", static_cast<wint_t>(c));
+ setlocale(LC_CTYPE, old_locale.c_str());
+ return output;
+}
+
+template <typename T>
+typename std::remove_volatile<T>::type GetMaxForConversion() {
+ return static_cast<typename std::remove_volatile<T>::type>(
+ std::numeric_limits<int>::max());
+}
+
+template <>
+wchar_t GetMaxForConversion<wchar_t>() {
+ // Don't return values that aren't legal Unicode. For wchar_t conversions in a
+ // UTF-8 locale, conversion behavior for such values is unspecified, and we
+ // don't care about matching it.
+ return (sizeof(wchar_t) * CHAR_BIT <= 16) ? wchar_t{0xffff}
+ : static_cast<wchar_t>(0x10ffff);
+}
+
TYPED_TEST_P(TypedFormatConvertTest, Char) {
// Pass a bunch of values of type TypeParam to both FormatPack and libc's
// vsnprintf("%c", ...) (wrapped in StrPrint) to make sure we get the same
@@ -475,28 +649,50 @@ TYPED_TEST_P(TypedFormatConvertTest, Char) {
// std::numeric_limits::max(), too, but vsnprintf("%c", ...) can't handle
// anything larger than an int. Add in the most extreme values we can without
// exceeding that range.
+ // Special case: Formatting a wchar_t should behave like vsnprintf("%lc").
+ // Technically vsnprintf can accept a wint_t in this case, but since we must
+ // pass a wchar_t to FormatPack, the largest type we can use here is wchar_t.
+ using ArgType =
+ std::conditional_t<std::is_same<T, wchar_t>::value, wchar_t, int>;
static const T kMin =
- static_cast<remove_volatile_t>(std::numeric_limits<int>::min());
- static const T kMax =
- static_cast<remove_volatile_t>(std::numeric_limits<int>::max());
- vals.insert(vals.end(), {kMin + 1, kMin, kMax - 1, kMax});
+ static_cast<remove_volatile_t>(std::numeric_limits<ArgType>::min());
+ static const T kMax = GetMaxForConversion<T>();
+ vals.insert(vals.end(), {static_cast<remove_volatile_t>(kMin + 1), kMin,
+ static_cast<remove_volatile_t>(kMax - 1), kMax});
+ static const auto kMaxWCharT =
+ static_cast<remove_volatile_t>(GetMaxForConversion<wchar_t>());
for (const T c : vals) {
+ SCOPED_TRACE(Esc(c));
const FormatArgImpl args[] = {FormatArgImpl(c)};
UntypedFormatSpecImpl format("%c");
- EXPECT_EQ(StrPrint("%c", static_cast<int>(c)),
- FormatPack(format, absl::MakeSpan(args)));
+ absl::optional<std::string> result = StrPrintChar(c);
+ if (result.has_value()) {
+ EXPECT_EQ(result.value(), FormatPack(format, absl::MakeSpan(args)));
+ }
+
+ // Also test that if the format specifier is "%lc", the argument is treated
+ // as if it's a `wchar_t`.
+ const T wc =
+ std::max(remove_volatile_t{0},
+ std::min(static_cast<remove_volatile_t>(c), kMaxWCharT));
+ SCOPED_TRACE(Esc(wc));
+ const FormatArgImpl wide_args[] = {FormatArgImpl(wc)};
+ UntypedFormatSpecImpl wide_format("%lc");
+ result = StrPrintChar(static_cast<wchar_t>(wc));
+ if (result.has_value()) {
+ EXPECT_EQ(result.value(),
+ FormatPack(wide_format, absl::MakeSpan(wide_args)));
+ }
}
}
REGISTER_TYPED_TEST_SUITE_P(TypedFormatConvertTest, AllIntsWithFlags, Char);
-typedef ::testing::Types<
- int, unsigned, volatile int,
- short, unsigned short,
- long, unsigned long,
- long long, unsigned long long,
- signed char, unsigned char, char>
+typedef ::testing::Types<int, unsigned, volatile int, short, // NOLINT
+ unsigned short, long, unsigned long, // NOLINT
+ long long, unsigned long long, // NOLINT
+ signed char, unsigned char, char, wchar_t>
AllIntTypes;
INSTANTIATE_TYPED_TEST_SUITE_P(TypedFormatConvertTestWithAllIntTypes,
TypedFormatConvertTest, AllIntTypes);
@@ -511,6 +707,22 @@ TEST_F(FormatConvertTest, VectorBool) {
FormatArgImpl(cv[0]), FormatArgImpl(cv[1])})));
}
+TEST_F(FormatConvertTest, UnicodeWideString) {
+ // StrFormat() should be able to convert wide strings containing Unicode
+ // characters (to UTF-8).
+ const FormatArgImpl args[] = {FormatArgImpl(L"\u47e3 \U00011112")};
+ // `u8""` forces UTF-8 encoding; MSVC will default to e.g. CP1252 (and warn)
+ // without it. However, the resulting character type differs between pre-C++20
+ // (`char`) and C++20 (`char8_t`). So deduce the right character type for all
+ // C++ versions, init it with UTF-8, then `memcpy()` to get the result as a
+ // `char*`.
+ using ConstChar8T = std::remove_reference_t<decltype(*u8"a")>;
+ ConstChar8T kOutputUtf8[] = u8"\u47e3 \U00011112";
+ char output[sizeof kOutputUtf8];
+ std::memcpy(output, kOutputUtf8, sizeof kOutputUtf8);
+ EXPECT_EQ(output,
+ FormatPack(UntypedFormatSpecImpl("%ls"), absl::MakeSpan(args)));
+}
TEST_F(FormatConvertTest, Int128) {
absl::int128 positive = static_cast<absl::int128>(0x1234567890abcdef) * 1979;
@@ -1068,7 +1280,7 @@ TEST_F(FormatConvertTest, LongDoubleRoundA) {
// We don't actually store the results. This is just to exercise the rest of the
// machinery.
struct NullSink {
- friend void AbslFormatFlush(NullSink *sink, string_view str) {}
+ friend void AbslFormatFlush(NullSink *, string_view) {}
};
template <typename... T>
diff --git a/absl/strings/internal/str_format/extension.h b/absl/strings/internal/str_format/extension.h
index 8de42d2c..173284c6 100644
--- a/absl/strings/internal/str_format/extension.h
+++ b/absl/strings/internal/str_format/extension.h
@@ -16,16 +16,14 @@
#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_EXTENSION_H_
#define ABSL_STRINGS_INTERNAL_STR_FORMAT_EXTENSION_H_
-#include <limits.h>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <ostream>
+#include <string>
#include "absl/base/config.h"
-#include "absl/base/port.h"
-#include "absl/meta/type_traits.h"
#include "absl/strings/internal/str_format/output.h"
#include "absl/strings/string_view.h"
@@ -34,6 +32,7 @@ ABSL_NAMESPACE_BEGIN
enum class FormatConversionChar : uint8_t;
enum class FormatConversionCharSet : uint64_t;
+enum class LengthMod : std::uint8_t { h, hh, l, ll, L, j, z, t, q, none };
namespace str_format_internal {
@@ -139,7 +138,8 @@ enum class Flags : uint8_t {
kAlt = 1 << 3,
kZero = 1 << 4,
// This is not a real flag. It just exists to turn off kBasic when no other
- // flags are set. This is for when width/precision are specified.
+ // flags are set. This is for when width/precision are specified, or a length
+ // modifier affects the behavior ("%lc").
kNonBasic = 1 << 5,
};
@@ -285,6 +285,8 @@ class FormatConversionSpecImpl {
bool has_alt_flag() const { return FlagsContains(flags_, Flags::kAlt); }
bool has_zero_flag() const { return FlagsContains(flags_, Flags::kZero); }
+ LengthMod length_mod() const { return length_mod_; }
+
FormatConversionChar conversion_char() const {
// Keep this field first in the struct . It generates better code when
// accessing it when ConversionSpec is passed by value in registers.
@@ -310,6 +312,7 @@ class FormatConversionSpecImpl {
friend struct str_format_internal::FormatConversionSpecImplFriend;
FormatConversionChar conv_ = FormatConversionCharInternal::kNone;
Flags flags_;
+ LengthMod length_mod_ = LengthMod::none;
int width_;
int precision_;
};
@@ -318,6 +321,9 @@ struct FormatConversionSpecImplFriend final {
static void SetFlags(Flags f, FormatConversionSpecImpl* conv) {
conv->flags_ = f;
}
+ static void SetLengthMod(LengthMod l, FormatConversionSpecImpl* conv) {
+ conv->length_mod_ = l;
+ }
static void SetConversionChar(FormatConversionChar c,
FormatConversionSpecImpl* conv) {
conv->conv_ = c;
diff --git a/absl/strings/internal/str_format/parser.h b/absl/strings/internal/str_format/parser.h
index 35b6d49c..b1d6d5fd 100644
--- a/absl/strings/internal/str_format/parser.h
+++ b/absl/strings/internal/str_format/parser.h
@@ -15,22 +15,23 @@
#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
#define ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
-#include <limits.h>
#include <stddef.h>
#include <stdlib.h>
#include <cassert>
-#include <cstdint>
+#include <cstring>
#include <initializer_list>
-#include <iosfwd>
-#include <iterator>
#include <memory>
#include <string>
+#include <utility>
#include <vector>
+#include "absl/base/config.h"
+#include "absl/base/optimization.h"
#include "absl/strings/internal/str_format/checker.h"
#include "absl/strings/internal/str_format/constexpr_parser.h"
#include "absl/strings/internal/str_format/extension.h"
+#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
diff --git a/absl/strings/internal/str_format/parser_test.cc b/absl/strings/internal/str_format/parser_test.cc
index 021f6a87..e2225c60 100644
--- a/absl/strings/internal/str_format/parser_test.cc
+++ b/absl/strings/internal/str_format/parser_test.cc
@@ -15,10 +15,18 @@
#include "absl/strings/internal/str_format/parser.h"
#include <string.h>
+#include <algorithm>
+#include <initializer_list>
+#include <string>
+#include <utility>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+#include "absl/base/config.h"
#include "absl/base/macros.h"
+#include "absl/strings/internal/str_format/constexpr_parser.h"
+#include "absl/strings/internal/str_format/extension.h"
+#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
@@ -303,7 +311,7 @@ TEST_F(ConsumeUnboundConversionTest, BasicFlag) {
}
// Flag is off
- for (const char* fmt : {"3d", ".llx", "-G", "1$#X"}) {
+ for (const char* fmt : {"3d", ".llx", "-G", "1$#X", "lc"}) {
SCOPED_TRACE(fmt);
EXPECT_TRUE(Run(fmt));
EXPECT_NE(o.flags, Flags::kBasic);
diff --git a/absl/strings/str_format.h b/absl/strings/str_format.h
index 21ee179e..c9b350fd 100644
--- a/absl/strings/str_format.h
+++ b/absl/strings/str_format.h
@@ -72,14 +72,20 @@
#ifndef ABSL_STRINGS_STR_FORMAT_H_
#define ABSL_STRINGS_STR_FORMAT_H_
+#include <cstdint>
#include <cstdio>
#include <string>
+#include <type_traits>
+#include "absl/base/attributes.h"
+#include "absl/base/config.h"
#include "absl/strings/internal/str_format/arg.h" // IWYU pragma: export
#include "absl/strings/internal/str_format/bind.h" // IWYU pragma: export
#include "absl/strings/internal/str_format/checker.h" // IWYU pragma: export
#include "absl/strings/internal/str_format/extension.h" // IWYU pragma: export
#include "absl/strings/internal/str_format/parser.h" // IWYU pragma: export
+#include "absl/strings/string_view.h"
+#include "absl/types/span.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
@@ -256,7 +262,7 @@ class FormatCountCapture {
//
// The `FormatSpec` intrinsically supports all of these fundamental C++ types:
//
-// * Characters: `char`, `signed char`, `unsigned char`
+// * Characters: `char`, `signed char`, `unsigned char`, `wchar_t`
// * Integers: `int`, `short`, `unsigned short`, `unsigned`, `long`,
// `unsigned long`, `long long`, `unsigned long long`
// * Enums: printed as their underlying integral value
@@ -264,9 +270,9 @@ class FormatCountCapture {
//
// However, in the `str_format` library, a format conversion specifies a broader
// C++ conceptual category instead of an exact type. For example, `%s` binds to
-// any string-like argument, so `std::string`, `absl::string_view`, and
-// `const char*` are all accepted. Likewise, `%d` accepts any integer-like
-// argument, etc.
+// any string-like argument, so `std::string`, `std::wstring`,
+// `absl::string_view`, `const char*`, and `const wchar_t*` are all accepted.
+// Likewise, `%d` accepts any integer-like argument, etc.
template <typename... Args>
using FormatSpec = str_format_internal::FormatSpecTemplate<
diff --git a/absl/strings/str_format_test.cc b/absl/strings/str_format_test.cc
index 195ef3fe..3c52be1e 100644
--- a/absl/strings/str_format_test.cc
+++ b/absl/strings/str_format_test.cc
@@ -634,6 +634,10 @@ TEST(StrFormat, BehavesAsDocumented) {
const int& something = *reinterpret_cast<const int*>(ptr_value);
EXPECT_EQ(StrFormat("%p", &something), StrFormat("0x%x", ptr_value));
+ // The output of formatting a null pointer is not documented as being a
+ // specific thing, but the attempt should at least compile.
+ (void)StrFormat("%p", nullptr);
+
// Output widths are supported, with optional flags.
EXPECT_EQ(StrFormat("%3d", 1), " 1");
EXPECT_EQ(StrFormat("%3d", 123456), "123456");