diff options
author | Abseil Team <absl-team@google.com> | 2023-12-05 14:15:15 -0800 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2023-12-05 14:15:57 -0800 |
commit | 5dc2cc1a6a992e09d9cf930cdce7984640e2b7e0 (patch) | |
tree | a02601dddbd8a5ccbe63fdc5fee787d37cf103ed /absl | |
parent | 3e6ecec7d3c9c504c9951b34230b22527758e0cd (diff) |
Adds support for wchar_t/wchar_t*/std::wstring{_view} arguments to StrFormat().
This converts to UTF-8 regardless of locale.
PiperOrigin-RevId: 588186076
Change-Id: I2c9598279b413d460e13ad65da2ba421c0b40b83
Diffstat (limited to 'absl')
-rw-r--r-- | absl/strings/BUILD.bazel | 12 | ||||
-rw-r--r-- | absl/strings/CMakeLists.txt | 17 | ||||
-rw-r--r-- | absl/strings/internal/str_format/arg.cc | 153 | ||||
-rw-r--r-- | absl/strings/internal/str_format/arg.h | 80 | ||||
-rw-r--r-- | absl/strings/internal/str_format/arg_test.cc | 34 | ||||
-rw-r--r-- | absl/strings/internal/str_format/bind.cc | 18 | ||||
-rw-r--r-- | absl/strings/internal/str_format/bind.h | 11 | ||||
-rw-r--r-- | absl/strings/internal/str_format/constexpr_parser.h | 11 | ||||
-rw-r--r-- | absl/strings/internal/str_format/convert_test.cc | 328 | ||||
-rw-r--r-- | absl/strings/internal/str_format/extension.h | 14 | ||||
-rw-r--r-- | absl/strings/internal/str_format/parser.h | 9 | ||||
-rw-r--r-- | absl/strings/internal/str_format/parser_test.cc | 10 | ||||
-rw-r--r-- | absl/strings/str_format.h | 14 | ||||
-rw-r--r-- | absl/strings/str_format_test.cc | 4 |
14 files changed, 608 insertions, 107 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index a3ef3ae8..d8883bf2 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -1247,6 +1247,10 @@ cc_library( linkopts = ABSL_DEFAULT_LINKOPTS, deps = [ ":str_format_internal", + ":string_view", + "//absl/base:config", + "//absl/base:core_headers", + "//absl/types:span", ], ) @@ -1277,6 +1281,7 @@ cc_library( ":strings", "//absl/base:config", "//absl/base:core_headers", + "//absl/container:fixed_array", "//absl/container:inlined_vector", "//absl/functional:function_ref", "//absl/meta:type_traits", @@ -1330,6 +1335,7 @@ cc_test( deps = [ ":str_format", ":str_format_internal", + "//absl/base:config", "@com_google_googletest//:gtest", "@com_google_googletest//:gtest_main", ], @@ -1366,12 +1372,16 @@ cc_test( copts = ABSL_TEST_COPTS, visibility = ["//visibility:private"], deps = [ + ":str_format", ":str_format_internal", ":strings", + "//absl/base:config", "//absl/base:core_headers", "//absl/base:raw_logging_internal", "//absl/log", + "//absl/numeric:int128", "//absl/types:optional", + "//absl/types:span", "@com_google_googletest//:gtest", "@com_google_googletest//:gtest_main", ], @@ -1397,6 +1407,8 @@ cc_test( visibility = ["//visibility:private"], deps = [ ":str_format_internal", + ":string_view", + "//absl/base:config", "//absl/base:core_headers", "@com_google_googletest//:gtest", "@com_google_googletest//:gtest_main", diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index b129096d..6ec9e0eb 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -470,7 +470,11 @@ absl_cc_library( COPTS ${ABSL_DEFAULT_COPTS} DEPS + absl::config + absl::core_headers + absl::span absl::str_format_internal + absl::string_view PUBLIC ) @@ -501,6 +505,7 @@ absl_cc_library( absl::strings absl::config absl::core_headers + absl::fixed_array absl::inlined_vector absl::numeric_representation absl::type_traits @@ -548,6 +553,7 @@ absl_cc_test( COPTS ${ABSL_TEST_COPTS} DEPS + absl::config absl::str_format absl::str_format_internal GTest::gmock_main @@ -585,12 +591,15 @@ absl_cc_test( COPTS ${ABSL_TEST_COPTS} DEPS - absl::strings - absl::str_format_internal + absl::config absl::core_headers + absl::int128 absl::log absl::raw_logging_internal - absl::int128 + absl::span + absl::str_format + absl::str_format_internal + absl::strings GTest::gmock_main ) @@ -616,6 +625,8 @@ absl_cc_test( ${ABSL_TEST_COPTS} DEPS absl::str_format_internal + absl::string_view + absl::config absl::core_headers GTest::gmock_main ) diff --git a/absl/strings/internal/str_format/arg.cc b/absl/strings/internal/str_format/arg.cc index c0a9a28e..eeb21081 100644 --- a/absl/strings/internal/str_format/arg.cc +++ b/absl/strings/internal/str_format/arg.cc @@ -18,15 +18,28 @@ // #include "absl/strings/internal/str_format/arg.h" +#include <algorithm> #include <cassert> -#include <cerrno> +#include <cstddef> +#include <cstdint> #include <cstdlib> +#include <cstring> +#include <cwchar> #include <string> #include <type_traits> -#include "absl/base/port.h" +#include "absl/base/config.h" +#include "absl/base/optimization.h" +#include "absl/container/fixed_array.h" +#include "absl/numeric/int128.h" +#include "absl/strings/internal/str_format/extension.h" #include "absl/strings/internal/str_format/float_conversion.h" #include "absl/strings/numbers.h" +#include "absl/strings/string_view.h" + +#if defined(ABSL_HAVE_STD_STRING_VIEW) +#include <string_view> +#endif namespace absl { ABSL_NAMESPACE_BEGIN @@ -298,6 +311,83 @@ inline bool ConvertStringArg(string_view v, const FormatConversionSpecImpl conv, conv.has_left_flag()); } +struct ShiftState { + bool saw_high_surrogate = false; + uint8_t bits = 0; +}; + +// Converts `v` from UTF-16 or UTF-32 to UTF-8 and writes to `buf`. `buf` is +// assumed to have enough space for the output. `s` is used to carry state +// between successive calls with a UTF-16 surrogate pair. Returns the number of +// chars written, or `static_cast<size_t>(-1)` on failure. +// +// This is basically std::wcrtomb(), but always outputting UTF-8 instead of +// respecting the current locale. +inline size_t WideToUtf8(wchar_t wc, char *buf, ShiftState &s) { + const auto v = static_cast<uint32_t>(wc); + if (v < 0x80) { + *buf = static_cast<char>(v); + return 1; + } else if (v < 0x800) { + *buf++ = static_cast<char>(0xc0 | (v >> 6)); + *buf = static_cast<char>(0x80 | (v & 0x3f)); + return 2; + } else if (v < 0xd800 || (v - 0xe000) < 0x2000) { + *buf++ = static_cast<char>(0xe0 | (v >> 12)); + *buf++ = static_cast<char>(0x80 | ((v >> 6) & 0x3f)); + *buf = static_cast<char>(0x80 | (v & 0x3f)); + return 3; + } else if ((v - 0x10000) < 0x100000) { + *buf++ = static_cast<char>(0xf0 | (v >> 18)); + *buf++ = static_cast<char>(0x80 | ((v >> 12) & 0x3f)); + *buf++ = static_cast<char>(0x80 | ((v >> 6) & 0x3f)); + *buf = static_cast<char>(0x80 | (v & 0x3f)); + return 4; + } else if (v < 0xdc00) { + s.saw_high_surrogate = true; + s.bits = static_cast<uint8_t>(v & 0x3); + const uint8_t high_bits = ((v >> 6) & 0xf) + 1; + *buf++ = static_cast<char>(0xf0 | (high_bits >> 2)); + *buf = + static_cast<char>(0x80 | static_cast<uint8_t>((high_bits & 0x3) << 4) | + static_cast<uint8_t>((v >> 2) & 0xf)); + return 2; + } else if (v < 0xe000 && s.saw_high_surrogate) { + *buf++ = static_cast<char>(0x80 | static_cast<uint8_t>(s.bits << 4) | + static_cast<uint8_t>((v >> 6) & 0xf)); + *buf = static_cast<char>(0x80 | (v & 0x3f)); + s.saw_high_surrogate = false; + s.bits = 0; + return 2; + } else { + return static_cast<size_t>(-1); + } +} + +inline bool ConvertStringArg(const wchar_t *v, + size_t len, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + FixedArray<char> mb(len * 4); + ShiftState s; + size_t chars_written = 0; + for (size_t i = 0; i < len; ++i) { + const size_t chars = WideToUtf8(v[i], &mb[chars_written], s); + if (chars == static_cast<size_t>(-1)) { return false; } + chars_written += chars; + } + return ConvertStringArg(string_view(mb.data(), chars_written), conv, sink); +} + +bool ConvertWCharTImpl(wchar_t v, const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + char mb[4]; + ShiftState s; + const size_t chars_written = WideToUtf8(v, mb, s); + return chars_written != static_cast<size_t>(-1) && !s.saw_high_surrogate && + ConvertStringArg(string_view(mb, chars_written), conv, sink); +} + } // namespace bool ConvertBoolArg(bool v, FormatSinkImpl *sink) { @@ -316,11 +406,14 @@ bool ConvertIntArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl *sink) { // This odd casting is due to a bug in -Wswitch behavior in gcc49 which causes // it to complain about a switch/case type mismatch, even though both are - // FormatConverionChar. Likely this is because at this point + // FormatConversionChar. Likely this is because at this point // FormatConversionChar is declared, but not defined. switch (static_cast<uint8_t>(conv.conversion_char())) { case static_cast<uint8_t>(FormatConversionCharInternal::c): - return ConvertCharImpl(static_cast<char>(v), conv, sink); + return (std::is_same<T, wchar_t>::value || + (conv.length_mod() == LengthMod::l)) + ? ConvertWCharTImpl(static_cast<wchar_t>(v), conv, sink) + : ConvertCharImpl(static_cast<char>(v), conv, sink); case static_cast<uint8_t>(FormatConversionCharInternal::o): as_digits.PrintAsOct(static_cast<U>(v)); @@ -372,6 +465,8 @@ template bool ConvertIntArg<signed char>(signed char v, template bool ConvertIntArg<unsigned char>(unsigned char v, FormatConversionSpecImpl conv, FormatSinkImpl *sink); +template bool ConvertIntArg<wchar_t>(wchar_t v, FormatConversionSpecImpl conv, + FormatSinkImpl *sink); template bool ConvertIntArg<short>(short v, // NOLINT FormatConversionSpecImpl conv, FormatSinkImpl *sink); @@ -403,16 +498,29 @@ StringConvertResult FormatConvertImpl(const std::string &v, return {ConvertStringArg(v, conv, sink)}; } +StringConvertResult FormatConvertImpl(const std::wstring &v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertStringArg(v.data(), v.size(), conv, sink)}; +} + StringConvertResult FormatConvertImpl(string_view v, const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertStringArg(v, conv, sink)}; } -ArgConvertResult<FormatConversionCharSetUnion( - FormatConversionCharSetInternal::s, FormatConversionCharSetInternal::p)> -FormatConvertImpl(const char *v, const FormatConversionSpecImpl conv, - FormatSinkImpl *sink) { +#if defined(ABSL_HAVE_STD_STRING_VIEW) +StringConvertResult FormatConvertImpl(std::wstring_view v, + const FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { + return {ConvertStringArg(v.data(), v.size(), conv, sink)}; +} +#endif + +StringPtrConvertResult FormatConvertImpl(const char* v, + const FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { if (conv.conversion_char() == FormatConversionCharInternal::p) return {FormatConvertImpl(VoidPtr(v), conv, sink).value}; size_t len; @@ -427,6 +535,30 @@ FormatConvertImpl(const char *v, const FormatConversionSpecImpl conv, return {ConvertStringArg(string_view(v, len), conv, sink)}; } +StringPtrConvertResult FormatConvertImpl(const wchar_t* v, + const FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { + if (conv.conversion_char() == FormatConversionCharInternal::p) { + return {FormatConvertImpl(VoidPtr(v), conv, sink).value}; + } + size_t len; + if (v == nullptr) { + len = 0; + } else if (conv.precision() < 0) { + len = std::wcslen(v); + } else { + // If precision is set, we look for the NUL-terminator on the valid range. + len = static_cast<size_t>(std::find(v, v + conv.precision(), L'\0') - v); + } + return {ConvertStringArg(v, len, conv, sink)}; +} + +StringPtrConvertResult FormatConvertImpl(std::nullptr_t, + const FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { + return FormatConvertImpl(static_cast<const char*>(nullptr), conv, sink); +} + // ==================== Raw pointers ==================== ArgConvertResult<FormatConversionCharSetInternal::p> FormatConvertImpl( VoidPtr v, const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { @@ -461,6 +593,11 @@ CharConvertResult FormatConvertImpl(char v, const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } +CharConvertResult FormatConvertImpl(wchar_t v, + const FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { + return {ConvertIntArg(v, conv, sink)}; +} // ==================== Ints ==================== IntegralConvertResult FormatConvertImpl(signed char v, diff --git a/absl/strings/internal/str_format/arg.h b/absl/strings/internal/str_format/arg.h index 20483aff..309161d5 100644 --- a/absl/strings/internal/str_format/arg.h +++ b/absl/strings/internal/str_format/arg.h @@ -19,8 +19,9 @@ #include <wchar.h> #include <algorithm> +#include <cstddef> +#include <cstdint> #include <cstdio> -#include <iomanip> #include <limits> #include <memory> #include <sstream> @@ -28,13 +29,18 @@ #include <type_traits> #include <utility> -#include "absl/base/port.h" +#include "absl/base/config.h" +#include "absl/base/optimization.h" #include "absl/meta/type_traits.h" #include "absl/numeric/int128.h" #include "absl/strings/has_absl_stringify.h" #include "absl/strings/internal/str_format/extension.h" #include "absl/strings/string_view.h" +#if defined(ABSL_HAVE_STD_STRING_VIEW) +#include <string_view> +#endif + namespace absl { ABSL_NAMESPACE_BEGIN @@ -97,6 +103,9 @@ extern template bool ConvertIntArg<signed char>(signed char v, extern template bool ConvertIntArg<unsigned char>(unsigned char v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); +extern template bool ConvertIntArg<wchar_t>(wchar_t v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); extern template bool ConvertIntArg<short>(short v, // NOLINT FormatConversionSpecImpl conv, FormatSinkImpl* sink); @@ -203,30 +212,49 @@ constexpr FormatConversionCharSet ExtractCharSet(ArgConvertResult<C>) { return C; } -using StringConvertResult = ArgConvertResult<FormatConversionCharSetUnion( - FormatConversionCharSetInternal::s, FormatConversionCharSetInternal::v)>; ArgConvertResult<FormatConversionCharSetInternal::p> FormatConvertImpl( VoidPtr v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); // Strings. +using StringConvertResult = ArgConvertResult<FormatConversionCharSetUnion( + FormatConversionCharSetInternal::s, + FormatConversionCharSetInternal::v)>; StringConvertResult FormatConvertImpl(const std::string& v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); +StringConvertResult FormatConvertImpl(const std::wstring& v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); StringConvertResult FormatConvertImpl(string_view v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); -#if defined(ABSL_HAVE_STD_STRING_VIEW) && !defined(ABSL_USES_STD_STRING_VIEW) +#if defined(ABSL_HAVE_STD_STRING_VIEW) +StringConvertResult FormatConvertImpl(std::wstring_view v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +#if !defined(ABSL_USES_STD_STRING_VIEW) inline StringConvertResult FormatConvertImpl(std::string_view v, FormatConversionSpecImpl conv, FormatSinkImpl* sink) { return FormatConvertImpl(absl::string_view(v.data(), v.size()), conv, sink); } -#endif // ABSL_HAVE_STD_STRING_VIEW && !ABSL_USES_STD_STRING_VIEW - -ArgConvertResult<FormatConversionCharSetUnion( - FormatConversionCharSetInternal::s, FormatConversionCharSetInternal::p)> -FormatConvertImpl(const char* v, const FormatConversionSpecImpl conv, - FormatSinkImpl* sink); +#endif // !ABSL_USES_STD_STRING_VIEW +#endif // ABSL_HAVE_STD_STRING_VIEW + +using StringPtrConvertResult = ArgConvertResult<FormatConversionCharSetUnion( + FormatConversionCharSetInternal::s, + FormatConversionCharSetInternal::p)>; +StringPtrConvertResult FormatConvertImpl(const char* v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +StringPtrConvertResult FormatConvertImpl(const wchar_t* v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +// This overload is needed to disambiguate, since `nullptr` could match either +// of the other overloads equally well. +StringPtrConvertResult FormatConvertImpl(std::nullptr_t, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); template <class AbslCord, typename std::enable_if<std::is_same< AbslCord, absl::Cord>::value>::type* = nullptr> @@ -280,6 +308,9 @@ FloatingConvertResult FormatConvertImpl(long double v, // Chars. CharConvertResult FormatConvertImpl(char v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); +CharConvertResult FormatConvertImpl(wchar_t v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); // Ints. IntegralConvertResult FormatConvertImpl(signed char v, @@ -441,6 +472,7 @@ class FormatArgImpl { // Anything with a user-defined Convert will get its own vtable. // For everything else: // - Decay char* and char arrays into `const char*` + // - Decay wchar_t* and wchar_t arrays into `const wchar_t*` // - Decay any other pointer to `const void*` // - Decay all enums to the integral promotion of their underlying type. // - Decay function pointers to void*. @@ -452,9 +484,13 @@ class FormatArgImpl { using type = typename std::conditional< !kHasUserDefined && std::is_convertible<T, const char*>::value, const char*, - typename std::conditional<!kHasUserDefined && - std::is_convertible<T, VoidPtr>::value, - VoidPtr, const T&>::type>::type; + typename std::conditional< + !kHasUserDefined && std::is_convertible<T, const wchar_t*>::value, + const wchar_t*, + typename std::conditional< + !kHasUserDefined && std::is_convertible<T, VoidPtr>::value, + VoidPtr, + const T&>::type>::type>::type; }; template <typename T> struct DecayType< @@ -585,7 +621,7 @@ class FormatArgImpl { E template bool FormatArgImpl::Dispatch<T>(Data, FormatConversionSpecImpl, \ void*) -#define ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(...) \ +#define ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_NO_WSTRING_VIEW_(...) \ ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(str_format_internal::VoidPtr, \ __VA_ARGS__); \ ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(bool, __VA_ARGS__); \ @@ -611,7 +647,19 @@ class FormatArgImpl { ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(long double, __VA_ARGS__); \ ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(const char*, __VA_ARGS__); \ ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(std::string, __VA_ARGS__); \ - ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(string_view, __VA_ARGS__) + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(string_view, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(const wchar_t*, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(std::wstring, __VA_ARGS__) + +#if defined(ABSL_HAVE_STD_STRING_VIEW) +#define ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(...) \ + ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_NO_WSTRING_VIEW_( \ + __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(std::wstring_view, __VA_ARGS__) +#else +#define ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(...) \ + ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_NO_WSTRING_VIEW_(__VA_ARGS__) +#endif ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(extern); diff --git a/absl/strings/internal/str_format/arg_test.cc b/absl/strings/internal/str_format/arg_test.cc index 1261937c..f663d7c5 100644 --- a/absl/strings/internal/str_format/arg_test.cc +++ b/absl/strings/internal/str_format/arg_test.cc @@ -14,9 +14,10 @@ #include "absl/strings/internal/str_format/arg.h" -#include <ostream> +#include <limits> #include <string> #include "gtest/gtest.h" +#include "absl/base/config.h" #include "absl/strings/str_format.h" namespace absl { @@ -93,6 +94,21 @@ TEST_F(FormatArgImplTest, CharArraysDecayToCharPtr) { FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(kMyArray))); } +extern const wchar_t kMyWCharTArray[]; + +TEST_F(FormatArgImplTest, WCharTArraysDecayToWCharTPtr) { + const wchar_t* a = L""; + EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)), + FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(L""))); + EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)), + FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(L"A"))); + EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)), + FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(L"ABC"))); + EXPECT_EQ( + FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)), + FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(kMyWCharTArray))); +} + TEST_F(FormatArgImplTest, OtherPtrDecayToVoidPtr) { auto expected = FormatArgImplFriend::GetVTablePtrForTest( FormatArgImpl(static_cast<void *>(nullptr))); @@ -124,6 +140,22 @@ TEST_F(FormatArgImplTest, WorksWithCharArraysOfUnknownSize) { } const char kMyArray[] = "ABCDE"; +TEST_F(FormatArgImplTest, WorksWithWCharTArraysOfUnknownSize) { + std::string s; + FormatSinkImpl sink(&s); + FormatConversionSpecImpl conv; + FormatConversionSpecImplFriend::SetConversionChar( + FormatConversionCharInternal::s, &conv); + FormatConversionSpecImplFriend::SetFlags(Flags(), &conv); + FormatConversionSpecImplFriend::SetWidth(-1, &conv); + FormatConversionSpecImplFriend::SetPrecision(-1, &conv); + EXPECT_TRUE( + FormatArgImplFriend::Convert(FormatArgImpl(kMyWCharTArray), conv, &sink)); + sink.Flush(); + EXPECT_EQ("ABCDE", s); +} +const wchar_t kMyWCharTArray[] = L"ABCDE"; + } // namespace } // namespace str_format_internal ABSL_NAMESPACE_END diff --git a/absl/strings/internal/str_format/bind.cc b/absl/strings/internal/str_format/bind.cc index 77a42223..87e23b56 100644 --- a/absl/strings/internal/str_format/bind.cc +++ b/absl/strings/internal/str_format/bind.cc @@ -14,10 +14,24 @@ #include "absl/strings/internal/str_format/bind.h" +#include <algorithm> +#include <cassert> #include <cerrno> +#include <cstddef> +#include <cstdio> +#include <ios> #include <limits> +#include <ostream> #include <sstream> #include <string> +#include "absl/base/config.h" +#include "absl/base/optimization.h" +#include "absl/strings/internal/str_format/arg.h" +#include "absl/strings/internal/str_format/constexpr_parser.h" +#include "absl/strings/internal/str_format/extension.h" +#include "absl/strings/internal/str_format/output.h" +#include "absl/strings/string_view.h" +#include "absl/types/span.h" namespace absl { ABSL_NAMESPACE_BEGIN @@ -90,6 +104,8 @@ inline bool ArgContext::Bind(const UnboundConversion* unbound, } else { FormatConversionSpecImplFriend::SetFlags(unbound->flags, bound); } + + FormatConversionSpecImplFriend::SetLengthMod(unbound->length_mod, bound); } else { FormatConversionSpecImplFriend::SetFlags(unbound->flags, bound); FormatConversionSpecImplFriend::SetWidth(-1, bound); @@ -215,7 +231,7 @@ std::string& AppendPack(std::string* out, const UntypedFormatSpecImpl format, return *out; } -std::string FormatPack(const UntypedFormatSpecImpl format, +std::string FormatPack(UntypedFormatSpecImpl format, absl::Span<const FormatArgImpl> args) { std::string out; if (ABSL_PREDICT_FALSE(!FormatUntyped(&out, format, args))) { diff --git a/absl/strings/internal/str_format/bind.h b/absl/strings/internal/str_format/bind.h index 5e2a43d5..120bc355 100644 --- a/absl/strings/internal/str_format/bind.h +++ b/absl/strings/internal/str_format/bind.h @@ -15,16 +15,19 @@ #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_ #define ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_ -#include <array> +#include <cassert> #include <cstdio> -#include <sstream> +#include <ostream> #include <string> -#include "absl/base/port.h" +#include "absl/base/config.h" #include "absl/container/inlined_vector.h" #include "absl/strings/internal/str_format/arg.h" #include "absl/strings/internal/str_format/checker.h" +#include "absl/strings/internal/str_format/constexpr_parser.h" +#include "absl/strings/internal/str_format/extension.h" #include "absl/strings/internal/str_format/parser.h" +#include "absl/strings/string_view.h" #include "absl/types/span.h" #include "absl/utility/utility.h" @@ -203,7 +206,7 @@ bool FormatUntyped(FormatRawSinkImpl raw_sink, UntypedFormatSpecImpl format, std::string& AppendPack(std::string* out, UntypedFormatSpecImpl format, absl::Span<const FormatArgImpl> args); -std::string FormatPack(const UntypedFormatSpecImpl format, +std::string FormatPack(UntypedFormatSpecImpl format, absl::Span<const FormatArgImpl> args); int FprintF(std::FILE* output, UntypedFormatSpecImpl format, diff --git a/absl/strings/internal/str_format/constexpr_parser.h b/absl/strings/internal/str_format/constexpr_parser.h index b70a16e4..8f593870 100644 --- a/absl/strings/internal/str_format/constexpr_parser.h +++ b/absl/strings/internal/str_format/constexpr_parser.h @@ -17,17 +17,18 @@ #include <cassert> #include <cstdint> +#include <cstdio> #include <limits> +#include "absl/base/config.h" #include "absl/base/const_init.h" +#include "absl/base/optimization.h" #include "absl/strings/internal/str_format/extension.h" namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { -enum class LengthMod : std::uint8_t { h, hh, l, ll, L, j, z, t, q, none }; - // The analyzed properties of a single specified conversion. struct UnboundConversion { // This is a user defined default constructor on purpose to skip the @@ -306,7 +307,6 @@ constexpr const char* ConsumeConversion(const char* pos, const char* const end, if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr; // It is a length modifier. - using str_format_internal::LengthMod; LengthMod length_mod = tag.as_length(); ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); if (c == 'h' && length_mod == LengthMod::h) { @@ -322,6 +322,11 @@ constexpr const char* ConsumeConversion(const char* pos, const char* const end, if (ABSL_PREDICT_FALSE(c == 'v')) return nullptr; if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr; + + // `wchar_t` args are marked non-basic so `Bind()` will copy the length mod. + if (conv->length_mod == LengthMod::l && c == 'c') { + conv->flags = conv->flags | Flags::kNonBasic; + } } #undef ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR diff --git a/absl/strings/internal/str_format/convert_test.cc b/absl/strings/internal/str_format/convert_test.cc index d14ecb24..7f222778 100644 --- a/absl/strings/internal/str_format/convert_test.cc +++ b/absl/strings/internal/str_format/convert_test.cc @@ -12,25 +12,43 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include <errno.h> +#include <assert.h> +#include <locale.h> #include <stdarg.h> #include <stdio.h> #include <algorithm> -#include <cctype> +#include <climits> #include <cmath> +#include <cstdlib> +#include <cstring> +#include <cwctype> #include <limits> +#include <set> +#include <sstream> #include <string> #include <thread> // NOLINT +#include <type_traits> +#include <vector> #include "gmock/gmock.h" #include "gtest/gtest.h" #include "absl/base/attributes.h" #include "absl/base/internal/raw_logging.h" #include "absl/log/log.h" +#include "absl/numeric/int128.h" +#include "absl/strings/ascii.h" +#include "absl/strings/internal/str_format/arg.h" #include "absl/strings/internal/str_format/bind.h" #include "absl/strings/match.h" +#include "absl/strings/str_format.h" +#include "absl/strings/string_view.h" #include "absl/types/optional.h" +#include "absl/types/span.h" + +#if defined(ABSL_HAVE_STD_STRING_VIEW) +#include <string_view> +#endif namespace absl { ABSL_NAMESPACE_BEGIN @@ -49,36 +67,103 @@ size_t ArraySize(T (&)[N]) { return N; } -std::string LengthModFor(float) { return ""; } -std::string LengthModFor(double) { return ""; } -std::string LengthModFor(long double) { return "L"; } -std::string LengthModFor(char) { return "hh"; } -std::string LengthModFor(signed char) { return "hh"; } -std::string LengthModFor(unsigned char) { return "hh"; } -std::string LengthModFor(short) { return "h"; } // NOLINT -std::string LengthModFor(unsigned short) { return "h"; } // NOLINT -std::string LengthModFor(int) { return ""; } -std::string LengthModFor(unsigned) { return ""; } -std::string LengthModFor(long) { return "l"; } // NOLINT -std::string LengthModFor(unsigned long) { return "l"; } // NOLINT -std::string LengthModFor(long long) { return "ll"; } // NOLINT -std::string LengthModFor(unsigned long long) { return "ll"; } // NOLINT +template <typename T> +struct AlwaysFalse : std::false_type {}; + +template <typename T> +std::string LengthModFor() { + static_assert(AlwaysFalse<T>::value, "Unsupported type"); + return ""; +} +template <> +std::string LengthModFor<char>() { + return "hh"; +} +template <> +std::string LengthModFor<signed char>() { + return "hh"; +} +template <> +std::string LengthModFor<unsigned char>() { + return "hh"; +} +template <> +std::string LengthModFor<short>() { // NOLINT + return "h"; +} +template <> +std::string LengthModFor<unsigned short>() { // NOLINT + return "h"; +} +template <> +std::string LengthModFor<int>() { + return ""; +} +template <> +std::string LengthModFor<unsigned>() { + return ""; +} +template <> +std::string LengthModFor<long>() { // NOLINT + return "l"; +} +template <> +std::string LengthModFor<unsigned long>() { // NOLINT + return "l"; +} +template <> +std::string LengthModFor<long long>() { // NOLINT + return "ll"; +} +template <> +std::string LengthModFor<unsigned long long>() { // NOLINT + return "ll"; +} + +// An integral type of the same rank and signedness as `wchar_t`, that isn't +// `wchar_t`. +using IntegralTypeForWCharT = + std::conditional_t<std::is_signed<wchar_t>::value, + // Some STLs are broken and return `wchar_t` from + // `std::make_[un]signed_t<wchar_t>` when the signedness + // matches. Work around by round-tripping through the + // opposite signedness. + std::make_signed_t<std::make_unsigned_t<wchar_t>>, + std::make_unsigned_t<std::make_signed_t<wchar_t>>>; + +// Given an integral type `T`, returns a type of the same rank and signedness +// that is guaranteed to not be `wchar_t`. +template <typename T> +using MatchingIntegralType = std::conditional_t<std::is_same<T, wchar_t>::value, + IntegralTypeForWCharT, T>; std::string EscCharImpl(int v) { - if (std::isprint(static_cast<unsigned char>(v))) { - return std::string(1, static_cast<char>(v)); - } char buf[64]; - int n = snprintf(buf, sizeof(buf), "\\%#.2x", - static_cast<unsigned>(v & 0xff)); - assert(n > 0 && n < sizeof(buf)); - return std::string(buf, n); + int n = absl::ascii_isprint(static_cast<unsigned char>(v)) + ? snprintf(buf, sizeof(buf), "'%c'", v) + : snprintf(buf, sizeof(buf), "'\\x%.*x'", CHAR_BIT / 4, + static_cast<unsigned>( + static_cast<std::make_unsigned_t<char>>(v))); + assert(n > 0 && static_cast<size_t>(n) < sizeof(buf)); + return std::string(buf, static_cast<size_t>(n)); } std::string Esc(char v) { return EscCharImpl(v); } std::string Esc(signed char v) { return EscCharImpl(v); } std::string Esc(unsigned char v) { return EscCharImpl(v); } +std::string Esc(wchar_t v) { + char buf[64]; + int n = std::iswprint(static_cast<wint_t>(v)) + ? snprintf(buf, sizeof(buf), "L'%lc'", static_cast<wint_t>(v)) + : snprintf(buf, sizeof(buf), "L'\\x%.*llx'", + static_cast<int>(sizeof(wchar_t) * CHAR_BIT / 4), + static_cast<unsigned long long>( + static_cast<std::make_unsigned_t<wchar_t>>(v))); + assert(n > 0 && static_cast<size_t>(n) < sizeof(buf)); + return std::string(buf, static_cast<size_t>(n)); +} + template <typename T> std::string Esc(const T &v) { std::ostringstream oss; @@ -101,7 +186,7 @@ void StrAppendV(std::string *dst, const char *format, va_list ap) { if (result < kSpaceLength) { if (result >= 0) { // Normal case -- everything fit. - dst->append(space, result); + dst->append(space, static_cast<size_t>(result)); return; } if (result < 0) { @@ -112,7 +197,7 @@ void StrAppendV(std::string *dst, const char *format, va_list ap) { // Increase the buffer size to the size requested by vsnprintf, // plus one for the closing \0. - int length = result + 1; + size_t length = static_cast<size_t>(result) + 1; char *buf = new char[length]; // Restore the va_list before we use it again @@ -120,9 +205,9 @@ void StrAppendV(std::string *dst, const char *format, va_list ap) { result = vsnprintf(buf, length, format, backup_ap); va_end(backup_ap); - if (result >= 0 && result < length) { + if (result >= 0 && static_cast<size_t>(result) < length) { // It fit - dst->append(buf, result); + dst->append(buf, static_cast<size_t>(result)); } delete[] buf; } @@ -231,11 +316,15 @@ void TestStringConvert(const T& str) { TEST_F(FormatConvertTest, BasicString) { TestStringConvert("hello"); // As char array. + TestStringConvert(L"hello"); TestStringConvert(static_cast<const char*>("hello")); + TestStringConvert(static_cast<const wchar_t*>(L"hello")); TestStringConvert(std::string("hello")); + TestStringConvert(std::wstring(L"hello")); TestStringConvert(string_view("hello")); #if defined(ABSL_HAVE_STD_STRING_VIEW) TestStringConvert(std::string_view("hello")); + TestStringConvert(std::wstring_view(L"hello")); #endif // ABSL_HAVE_STD_STRING_VIEW } @@ -243,6 +332,10 @@ TEST_F(FormatConvertTest, NullString) { const char* p = nullptr; UntypedFormatSpecImpl format("%s"); EXPECT_EQ("", FormatPack(format, {FormatArgImpl(p)})); + + const wchar_t* wp = nullptr; + UntypedFormatSpecImpl wformat("%ls"); + EXPECT_EQ("", FormatPack(wformat, {FormatArgImpl(wp)})); } TEST_F(FormatConvertTest, StringPrecision) { @@ -252,10 +345,19 @@ TEST_F(FormatConvertTest, StringPrecision) { UntypedFormatSpecImpl format("%.1s"); EXPECT_EQ("a", FormatPack(format, {FormatArgImpl(p)})); + wchar_t wc = L'a'; + const wchar_t* wp = &wc; + UntypedFormatSpecImpl wformat("%.1ls"); + EXPECT_EQ("a", FormatPack(wformat, {FormatArgImpl(wp)})); + // We cap at the NUL-terminator. p = "ABC"; UntypedFormatSpecImpl format2("%.10s"); EXPECT_EQ("ABC", FormatPack(format2, {FormatArgImpl(p)})); + + wp = L"ABC"; + UntypedFormatSpecImpl wformat2("%.10ls"); + EXPECT_EQ("ABC", FormatPack(wformat2, {FormatArgImpl(wp)})); } // Pointer formatting is implementation defined. This checks that the argument @@ -278,16 +380,25 @@ TEST_F(FormatConvertTest, Pointer) { char *mcp = &c; const char *cp = "hi"; const char *cnil = nullptr; + wchar_t wc = L'h'; + wchar_t *mwcp = &wc; + const wchar_t *wcp = L"hi"; + const wchar_t *wcnil = nullptr; const int *inil = nullptr; using VoidF = void (*)(); VoidF fp = [] {}, fnil = nullptr; volatile char vc; volatile char *vcp = &vc; volatile char *vcnil = nullptr; + volatile wchar_t vwc; + volatile wchar_t *vwcp = &vwc; + volatile wchar_t *vwcnil = nullptr; const FormatArgImpl args_array[] = { - FormatArgImpl(xp), FormatArgImpl(cp), FormatArgImpl(inil), - FormatArgImpl(cnil), FormatArgImpl(mcp), FormatArgImpl(fp), - FormatArgImpl(fnil), FormatArgImpl(vcp), FormatArgImpl(vcnil), + FormatArgImpl(xp), FormatArgImpl(cp), FormatArgImpl(wcp), + FormatArgImpl(inil), FormatArgImpl(cnil), FormatArgImpl(wcnil), + FormatArgImpl(mcp), FormatArgImpl(mwcp), FormatArgImpl(fp), + FormatArgImpl(fnil), FormatArgImpl(vcp), FormatArgImpl(vwcp), + FormatArgImpl(vcnil), FormatArgImpl(vwcnil), }; auto args = absl::MakeConstSpan(args_array); @@ -313,30 +424,49 @@ TEST_F(FormatConvertTest, Pointer) { EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%-30.20p"), args), MatchesPointerString(&x)); + // const int* + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%1$p"), args), + MatchesPointerString(xp)); // const char* EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%2$p"), args), MatchesPointerString(cp)); - // null const int* + // const wchar_t* EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%3$p"), args), + MatchesPointerString(wcp)); + // null const int* + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%4$p"), args), MatchesPointerString(nullptr)); // null const char* - EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%4$p"), args), + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%5$p"), args), + MatchesPointerString(nullptr)); + // null const wchar_t* + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%6$p"), args), MatchesPointerString(nullptr)); // nonconst char* - EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%5$p"), args), + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%7$p"), args), MatchesPointerString(mcp)); - - // function pointers - EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%6$p"), args), - MatchesPointerString(reinterpret_cast<const void*>(fp))); + // nonconst wchar_t* + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%8$p"), args), + MatchesPointerString(mwcp)); + // function pointer + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%9$p"), args), + MatchesPointerString(reinterpret_cast<const void *>(fp))); + // null function pointer + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%10$p"), args), + MatchesPointerString(nullptr)); + // volatile char* EXPECT_THAT( - FormatPack(UntypedFormatSpecImpl("%8$p"), args), + FormatPack(UntypedFormatSpecImpl("%11$p"), args), MatchesPointerString(reinterpret_cast<volatile const void *>(vcp))); - - // null function pointers - EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%7$p"), args), + // volatile wchar_t* + EXPECT_THAT( + FormatPack(UntypedFormatSpecImpl("%12$p"), args), + MatchesPointerString(reinterpret_cast<volatile const void *>(vwcp))); + // null volatile char* + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%13$p"), args), MatchesPointerString(nullptr)); - EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%9$p"), args), + // null volatile wchar_t* + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%14$p"), args), MatchesPointerString(nullptr)); } @@ -436,12 +566,15 @@ TYPED_TEST_P(TypedFormatConvertTest, AllIntsWithFlags) { // as printf can't do that conversion properly. For those // cases, we do expect agreement with printf with a "%u" // and the unsigned equivalent of 'val'. - UnsignedT uval = val; - old_fmt += LengthModFor(uval); + UnsignedT uval = + static_cast<std::remove_volatile_t<UnsignedT>>(val); + old_fmt += LengthModFor< + MatchingIntegralType<std::remove_cv_t<decltype(uval)>>>(); old_fmt += "u"; old_result = StrPrint(old_fmt.c_str(), uval); } else { - old_fmt += LengthModFor(val); + old_fmt += LengthModFor< + MatchingIntegralType<std::remove_cv_t<decltype(val)>>>(); old_fmt += conv_char; old_result = StrPrint(old_fmt.c_str(), val); } @@ -459,6 +592,47 @@ TYPED_TEST_P(TypedFormatConvertTest, AllIntsWithFlags) { } } +template <typename T> +absl::optional<std::string> StrPrintChar(T c) { + return StrPrint("%c", static_cast<int>(c)); +} +template <> +absl::optional<std::string> StrPrintChar(wchar_t c) { + // musl libc has a bug where ("%lc", 0) writes no characters, and Android + // doesn't support forcing UTF-8 via setlocale(). Hardcode the expected + // answers for ASCII inputs to maximize test coverage on these platforms. + if (static_cast<std::make_unsigned_t<wchar_t>>(c) < 0x80) { + return std::string(1, static_cast<char>(c)); + } + + // Force a UTF-8 locale to match the expected `StrFormat()` behavior. + // It's important to copy the string returned by `old_locale` here, because + // its contents are not guaranteed to be valid after the next `setlocale()` + // call. + std::string old_locale = setlocale(LC_CTYPE, nullptr); + if (!setlocale(LC_CTYPE, "en_US.UTF-8")) { + return absl::nullopt; + } + const std::string output = StrPrint("%lc", static_cast<wint_t>(c)); + setlocale(LC_CTYPE, old_locale.c_str()); + return output; +} + +template <typename T> +typename std::remove_volatile<T>::type GetMaxForConversion() { + return static_cast<typename std::remove_volatile<T>::type>( + std::numeric_limits<int>::max()); +} + +template <> +wchar_t GetMaxForConversion<wchar_t>() { + // Don't return values that aren't legal Unicode. For wchar_t conversions in a + // UTF-8 locale, conversion behavior for such values is unspecified, and we + // don't care about matching it. + return (sizeof(wchar_t) * CHAR_BIT <= 16) ? wchar_t{0xffff} + : static_cast<wchar_t>(0x10ffff); +} + TYPED_TEST_P(TypedFormatConvertTest, Char) { // Pass a bunch of values of type TypeParam to both FormatPack and libc's // vsnprintf("%c", ...) (wrapped in StrPrint) to make sure we get the same @@ -475,28 +649,50 @@ TYPED_TEST_P(TypedFormatConvertTest, Char) { // std::numeric_limits::max(), too, but vsnprintf("%c", ...) can't handle // anything larger than an int. Add in the most extreme values we can without // exceeding that range. + // Special case: Formatting a wchar_t should behave like vsnprintf("%lc"). + // Technically vsnprintf can accept a wint_t in this case, but since we must + // pass a wchar_t to FormatPack, the largest type we can use here is wchar_t. + using ArgType = + std::conditional_t<std::is_same<T, wchar_t>::value, wchar_t, int>; static const T kMin = - static_cast<remove_volatile_t>(std::numeric_limits<int>::min()); - static const T kMax = - static_cast<remove_volatile_t>(std::numeric_limits<int>::max()); - vals.insert(vals.end(), {kMin + 1, kMin, kMax - 1, kMax}); + static_cast<remove_volatile_t>(std::numeric_limits<ArgType>::min()); + static const T kMax = GetMaxForConversion<T>(); + vals.insert(vals.end(), {static_cast<remove_volatile_t>(kMin + 1), kMin, + static_cast<remove_volatile_t>(kMax - 1), kMax}); + static const auto kMaxWCharT = + static_cast<remove_volatile_t>(GetMaxForConversion<wchar_t>()); for (const T c : vals) { + SCOPED_TRACE(Esc(c)); const FormatArgImpl args[] = {FormatArgImpl(c)}; UntypedFormatSpecImpl format("%c"); - EXPECT_EQ(StrPrint("%c", static_cast<int>(c)), - FormatPack(format, absl::MakeSpan(args))); + absl::optional<std::string> result = StrPrintChar(c); + if (result.has_value()) { + EXPECT_EQ(result.value(), FormatPack(format, absl::MakeSpan(args))); + } + + // Also test that if the format specifier is "%lc", the argument is treated + // as if it's a `wchar_t`. + const T wc = + std::max(remove_volatile_t{0}, + std::min(static_cast<remove_volatile_t>(c), kMaxWCharT)); + SCOPED_TRACE(Esc(wc)); + const FormatArgImpl wide_args[] = {FormatArgImpl(wc)}; + UntypedFormatSpecImpl wide_format("%lc"); + result = StrPrintChar(static_cast<wchar_t>(wc)); + if (result.has_value()) { + EXPECT_EQ(result.value(), + FormatPack(wide_format, absl::MakeSpan(wide_args))); + } } } REGISTER_TYPED_TEST_SUITE_P(TypedFormatConvertTest, AllIntsWithFlags, Char); -typedef ::testing::Types< - int, unsigned, volatile int, - short, unsigned short, - long, unsigned long, - long long, unsigned long long, - signed char, unsigned char, char> +typedef ::testing::Types<int, unsigned, volatile int, short, // NOLINT + unsigned short, long, unsigned long, // NOLINT + long long, unsigned long long, // NOLINT + signed char, unsigned char, char, wchar_t> AllIntTypes; INSTANTIATE_TYPED_TEST_SUITE_P(TypedFormatConvertTestWithAllIntTypes, TypedFormatConvertTest, AllIntTypes); @@ -511,6 +707,22 @@ TEST_F(FormatConvertTest, VectorBool) { FormatArgImpl(cv[0]), FormatArgImpl(cv[1])}))); } +TEST_F(FormatConvertTest, UnicodeWideString) { + // StrFormat() should be able to convert wide strings containing Unicode + // characters (to UTF-8). + const FormatArgImpl args[] = {FormatArgImpl(L"\u47e3 \U00011112")}; + // `u8""` forces UTF-8 encoding; MSVC will default to e.g. CP1252 (and warn) + // without it. However, the resulting character type differs between pre-C++20 + // (`char`) and C++20 (`char8_t`). So deduce the right character type for all + // C++ versions, init it with UTF-8, then `memcpy()` to get the result as a + // `char*`. + using ConstChar8T = std::remove_reference_t<decltype(*u8"a")>; + ConstChar8T kOutputUtf8[] = u8"\u47e3 \U00011112"; + char output[sizeof kOutputUtf8]; + std::memcpy(output, kOutputUtf8, sizeof kOutputUtf8); + EXPECT_EQ(output, + FormatPack(UntypedFormatSpecImpl("%ls"), absl::MakeSpan(args))); +} TEST_F(FormatConvertTest, Int128) { absl::int128 positive = static_cast<absl::int128>(0x1234567890abcdef) * 1979; @@ -1068,7 +1280,7 @@ TEST_F(FormatConvertTest, LongDoubleRoundA) { // We don't actually store the results. This is just to exercise the rest of the // machinery. struct NullSink { - friend void AbslFormatFlush(NullSink *sink, string_view str) {} + friend void AbslFormatFlush(NullSink *, string_view) {} }; template <typename... T> diff --git a/absl/strings/internal/str_format/extension.h b/absl/strings/internal/str_format/extension.h index 8de42d2c..173284c6 100644 --- a/absl/strings/internal/str_format/extension.h +++ b/absl/strings/internal/str_format/extension.h @@ -16,16 +16,14 @@ #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_EXTENSION_H_ #define ABSL_STRINGS_INTERNAL_STR_FORMAT_EXTENSION_H_ -#include <limits.h> #include <cstddef> #include <cstdint> #include <cstring> #include <ostream> +#include <string> #include "absl/base/config.h" -#include "absl/base/port.h" -#include "absl/meta/type_traits.h" #include "absl/strings/internal/str_format/output.h" #include "absl/strings/string_view.h" @@ -34,6 +32,7 @@ ABSL_NAMESPACE_BEGIN enum class FormatConversionChar : uint8_t; enum class FormatConversionCharSet : uint64_t; +enum class LengthMod : std::uint8_t { h, hh, l, ll, L, j, z, t, q, none }; namespace str_format_internal { @@ -139,7 +138,8 @@ enum class Flags : uint8_t { kAlt = 1 << 3, kZero = 1 << 4, // This is not a real flag. It just exists to turn off kBasic when no other - // flags are set. This is for when width/precision are specified. + // flags are set. This is for when width/precision are specified, or a length + // modifier affects the behavior ("%lc"). kNonBasic = 1 << 5, }; @@ -285,6 +285,8 @@ class FormatConversionSpecImpl { bool has_alt_flag() const { return FlagsContains(flags_, Flags::kAlt); } bool has_zero_flag() const { return FlagsContains(flags_, Flags::kZero); } + LengthMod length_mod() const { return length_mod_; } + FormatConversionChar conversion_char() const { // Keep this field first in the struct . It generates better code when // accessing it when ConversionSpec is passed by value in registers. @@ -310,6 +312,7 @@ class FormatConversionSpecImpl { friend struct str_format_internal::FormatConversionSpecImplFriend; FormatConversionChar conv_ = FormatConversionCharInternal::kNone; Flags flags_; + LengthMod length_mod_ = LengthMod::none; int width_; int precision_; }; @@ -318,6 +321,9 @@ struct FormatConversionSpecImplFriend final { static void SetFlags(Flags f, FormatConversionSpecImpl* conv) { conv->flags_ = f; } + static void SetLengthMod(LengthMod l, FormatConversionSpecImpl* conv) { + conv->length_mod_ = l; + } static void SetConversionChar(FormatConversionChar c, FormatConversionSpecImpl* conv) { conv->conv_ = c; diff --git a/absl/strings/internal/str_format/parser.h b/absl/strings/internal/str_format/parser.h index 35b6d49c..b1d6d5fd 100644 --- a/absl/strings/internal/str_format/parser.h +++ b/absl/strings/internal/str_format/parser.h @@ -15,22 +15,23 @@ #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_ #define ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_ -#include <limits.h> #include <stddef.h> #include <stdlib.h> #include <cassert> -#include <cstdint> +#include <cstring> #include <initializer_list> -#include <iosfwd> -#include <iterator> #include <memory> #include <string> +#include <utility> #include <vector> +#include "absl/base/config.h" +#include "absl/base/optimization.h" #include "absl/strings/internal/str_format/checker.h" #include "absl/strings/internal/str_format/constexpr_parser.h" #include "absl/strings/internal/str_format/extension.h" +#include "absl/strings/string_view.h" namespace absl { ABSL_NAMESPACE_BEGIN diff --git a/absl/strings/internal/str_format/parser_test.cc b/absl/strings/internal/str_format/parser_test.cc index 021f6a87..e2225c60 100644 --- a/absl/strings/internal/str_format/parser_test.cc +++ b/absl/strings/internal/str_format/parser_test.cc @@ -15,10 +15,18 @@ #include "absl/strings/internal/str_format/parser.h" #include <string.h> +#include <algorithm> +#include <initializer_list> +#include <string> +#include <utility> #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "absl/base/config.h" #include "absl/base/macros.h" +#include "absl/strings/internal/str_format/constexpr_parser.h" +#include "absl/strings/internal/str_format/extension.h" +#include "absl/strings/string_view.h" namespace absl { ABSL_NAMESPACE_BEGIN @@ -303,7 +311,7 @@ TEST_F(ConsumeUnboundConversionTest, BasicFlag) { } // Flag is off - for (const char* fmt : {"3d", ".llx", "-G", "1$#X"}) { + for (const char* fmt : {"3d", ".llx", "-G", "1$#X", "lc"}) { SCOPED_TRACE(fmt); EXPECT_TRUE(Run(fmt)); EXPECT_NE(o.flags, Flags::kBasic); diff --git a/absl/strings/str_format.h b/absl/strings/str_format.h index 21ee179e..c9b350fd 100644 --- a/absl/strings/str_format.h +++ b/absl/strings/str_format.h @@ -72,14 +72,20 @@ #ifndef ABSL_STRINGS_STR_FORMAT_H_ #define ABSL_STRINGS_STR_FORMAT_H_ +#include <cstdint> #include <cstdio> #include <string> +#include <type_traits> +#include "absl/base/attributes.h" +#include "absl/base/config.h" #include "absl/strings/internal/str_format/arg.h" // IWYU pragma: export #include "absl/strings/internal/str_format/bind.h" // IWYU pragma: export #include "absl/strings/internal/str_format/checker.h" // IWYU pragma: export #include "absl/strings/internal/str_format/extension.h" // IWYU pragma: export #include "absl/strings/internal/str_format/parser.h" // IWYU pragma: export +#include "absl/strings/string_view.h" +#include "absl/types/span.h" namespace absl { ABSL_NAMESPACE_BEGIN @@ -256,7 +262,7 @@ class FormatCountCapture { // // The `FormatSpec` intrinsically supports all of these fundamental C++ types: // -// * Characters: `char`, `signed char`, `unsigned char` +// * Characters: `char`, `signed char`, `unsigned char`, `wchar_t` // * Integers: `int`, `short`, `unsigned short`, `unsigned`, `long`, // `unsigned long`, `long long`, `unsigned long long` // * Enums: printed as their underlying integral value @@ -264,9 +270,9 @@ class FormatCountCapture { // // However, in the `str_format` library, a format conversion specifies a broader // C++ conceptual category instead of an exact type. For example, `%s` binds to -// any string-like argument, so `std::string`, `absl::string_view`, and -// `const char*` are all accepted. Likewise, `%d` accepts any integer-like -// argument, etc. +// any string-like argument, so `std::string`, `std::wstring`, +// `absl::string_view`, `const char*`, and `const wchar_t*` are all accepted. +// Likewise, `%d` accepts any integer-like argument, etc. template <typename... Args> using FormatSpec = str_format_internal::FormatSpecTemplate< diff --git a/absl/strings/str_format_test.cc b/absl/strings/str_format_test.cc index 195ef3fe..3c52be1e 100644 --- a/absl/strings/str_format_test.cc +++ b/absl/strings/str_format_test.cc @@ -634,6 +634,10 @@ TEST(StrFormat, BehavesAsDocumented) { const int& something = *reinterpret_cast<const int*>(ptr_value); EXPECT_EQ(StrFormat("%p", &something), StrFormat("0x%x", ptr_value)); + // The output of formatting a null pointer is not documented as being a + // specific thing, but the attempt should at least compile. + (void)StrFormat("%p", nullptr); + // Output widths are supported, with optional flags. EXPECT_EQ(StrFormat("%3d", 1), " 1"); EXPECT_EQ(StrFormat("%3d", 123456), "123456"); |