summaryrefslogtreecommitdiff
path: root/absl/strings/internal/str_format
diff options
context:
space:
mode:
authorGravatar Samuel Benzaquen <sbenza@google.com>2022-11-29 08:57:48 -0800
committerGravatar Copybara-Service <copybara-worker@google.com>2022-11-29 08:58:23 -0800
commit82196f059f213c50738142a799bb166b2971950d (patch)
tree5dbfdb957de4369303797515ad34d3099aeeece7 /absl/strings/internal/str_format
parent13708db87b1ab69f4f2b3214f3f51e986546f282 (diff)
Convert the full parser into constexpr now that Abseil requires C++14, and use
this parser for the static checker. This fixes some outstanding bugs where the static checker differed from the dynamic one. Also, fix `%v` to be accepted with POSIX syntax. Tested: Presubmit TGP OCL:487237262:BASE:490275393:1669141454896:92dd62e3 PiperOrigin-RevId: 491650577 Change-Id: Id138c108187428b3aea46f8887495f1da12c91b2
Diffstat (limited to 'absl/strings/internal/str_format')
-rw-r--r--absl/strings/internal/str_format/checker.h356
-rw-r--r--absl/strings/internal/str_format/checker_test.cc12
-rw-r--r--absl/strings/internal/str_format/constexpr_parser.h351
-rw-r--r--absl/strings/internal/str_format/parser.cc213
-rw-r--r--absl/strings/internal/str_format/parser.h103
-rw-r--r--absl/strings/internal/str_format/parser_test.cc1
6 files changed, 412 insertions, 624 deletions
diff --git a/absl/strings/internal/str_format/checker.h b/absl/strings/internal/str_format/checker.h
index aeb9d48d..eab6ab9d 100644
--- a/absl/strings/internal/str_format/checker.h
+++ b/absl/strings/internal/str_format/checker.h
@@ -15,8 +15,11 @@
#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_
#define ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_
+#include <algorithm>
+
#include "absl/base/attributes.h"
#include "absl/strings/internal/str_format/arg.h"
+#include "absl/strings/internal/str_format/constexpr_parser.h"
#include "absl/strings/internal/str_format/extension.h"
// Compile time check support for entry points.
@@ -36,333 +39,56 @@ namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
-constexpr bool AllOf() { return true; }
-
-template <typename... T>
-constexpr bool AllOf(bool b, T... t) {
- return b && AllOf(t...);
-}
-
#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
-constexpr bool ContainsChar(const char* chars, char c) {
- return *chars == c || (*chars && ContainsChar(chars + 1, c));
-}
-
-// A constexpr compatible list of Convs.
-struct ConvList {
- const FormatConversionCharSet* array;
- int count;
-
- // We do the bound check here to avoid having to do it on the callers.
- // Returning an empty FormatConversionCharSet has the same effect as
- // short circuiting because it will never match any conversion.
- constexpr FormatConversionCharSet operator[](int i) const {
- return i < count ? array[i] : FormatConversionCharSet{};
- }
-
- constexpr ConvList without_front() const {
- return count != 0 ? ConvList{array + 1, count - 1} : *this;
- }
-};
-
-template <size_t count>
-struct ConvListT {
- // Make sure the array has size > 0.
- FormatConversionCharSet list[count ? count : 1];
-};
-
-constexpr char GetChar(string_view str, size_t index) {
- return index < str.size() ? str[index] : char{};
-}
-
-constexpr string_view ConsumeFront(string_view str, size_t len = 1) {
- return len <= str.size() ? string_view(str.data() + len, str.size() - len)
- : string_view();
-}
-
-constexpr string_view ConsumeAnyOf(string_view format, const char* chars) {
- while (ContainsChar(chars, GetChar(format, 0))) {
- format = ConsumeFront(format);
- }
- return format;
-}
-
-constexpr bool IsDigit(char c) { return c >= '0' && c <= '9'; }
-
-// Helper class for the ParseDigits function.
-// It encapsulates the two return values we need there.
-struct Integer {
- string_view format;
- int value;
-
- // If the next character is a '$', consume it.
- // Otherwise, make `this` an invalid positional argument.
- constexpr Integer ConsumePositionalDollar() const {
- if (GetChar(format, 0) == '$') {
- return Integer{ConsumeFront(format), value};
- } else {
- return Integer{format, 0};
- }
- }
-};
-
-constexpr Integer ParseDigits(string_view format) {
- int value = 0;
- while (IsDigit(GetChar(format, 0))) {
- value = 10 * value + GetChar(format, 0) - '0';
- format = ConsumeFront(format);
- }
-
- return Integer{format, value};
-}
-
-// Parse digits for a positional argument.
-// The parsing also consumes the '$'.
-constexpr Integer ParsePositional(string_view format) {
- return ParseDigits(format).ConsumePositionalDollar();
-}
-
-// Parses a single conversion specifier.
-// See ConvParser::Run() for post conditions.
-class ConvParser {
- constexpr ConvParser SetFormat(string_view format) const {
- return ConvParser(format, args_, error_, arg_position_, is_positional_);
- }
-
- constexpr ConvParser SetArgs(ConvList args) const {
- return ConvParser(format_, args, error_, arg_position_, is_positional_);
- }
-
- constexpr ConvParser SetError(bool error) const {
- return ConvParser(format_, args_, error_ || error, arg_position_,
- is_positional_);
- }
-
- constexpr ConvParser SetArgPosition(int arg_position) const {
- return ConvParser(format_, args_, error_, arg_position, is_positional_);
- }
-
- // Consumes the next arg and verifies that it matches `conv`.
- // `error_` is set if there is no next arg or if it doesn't match `conv`.
- constexpr ConvParser ConsumeNextArg(char conv) const {
- return SetArgs(args_.without_front()).SetError(!Contains(args_[0], conv));
- }
-
- // Verify that positional argument `i.value` matches `conv`.
- // `error_` is set if `i.value` is not a valid argument or if it doesn't
- // match.
- constexpr ConvParser VerifyPositional(Integer i, char conv) const {
- return SetFormat(i.format).SetError(!Contains(args_[i.value - 1], conv));
- }
-
- // Parse the position of the arg and store it in `arg_position_`.
- constexpr ConvParser ParseArgPosition(Integer arg) const {
- return SetFormat(arg.format).SetArgPosition(arg.value);
- }
-
- // Consume the flags.
- constexpr ConvParser ParseFlags() const {
- return SetFormat(ConsumeAnyOf(format_, "-+ #0"));
- }
-
- // Consume the width.
- // If it is '*', we verify that it matches `args_`. `error_` is set if it
- // doesn't match.
- constexpr ConvParser ParseWidth() const {
- char first_char = GetChar(format_, 0);
-
- if (IsDigit(first_char)) {
- return SetFormat(ParseDigits(format_).format);
- } else if (first_char == '*') {
- if (is_positional_) {
- return VerifyPositional(ParsePositional(ConsumeFront(format_)), '*');
- } else {
- return SetFormat(ConsumeFront(format_)).ConsumeNextArg('*');
- }
- } else {
- return *this;
+template <FormatConversionCharSet... C>
+constexpr bool ValidFormatImpl(string_view format) {
+ int next_arg = 0;
+ const char* p = format.data();
+ const char* const end = p + format.size();
+ constexpr FormatConversionCharSet
+ kAllowedConvs[(std::max)(sizeof...(C), size_t{1})] = {C...};
+ bool used[(std::max)(sizeof...(C), size_t{1})]{};
+ constexpr int kNumArgs = sizeof...(C);
+ while (p != end) {
+ while (p != end && *p != '%') ++p;
+ if (p == end) {
+ break;
}
- }
-
- // Consume the precision.
- // If it is '*', we verify that it matches `args_`. `error_` is set if it
- // doesn't match.
- constexpr ConvParser ParsePrecision() const {
- if (GetChar(format_, 0) != '.') {
- return *this;
- } else if (GetChar(format_, 1) == '*') {
- if (is_positional_) {
- return VerifyPositional(ParsePositional(ConsumeFront(format_, 2)), '*');
- } else {
- return SetFormat(ConsumeFront(format_, 2)).ConsumeNextArg('*');
- }
- } else {
- return SetFormat(ParseDigits(ConsumeFront(format_)).format);
+ if (p + 1 >= end) return false;
+ if (p[1] == '%') {
+ // %%
+ p += 2;
+ continue;
}
- }
-
- // Consume the length characters.
- constexpr ConvParser ParseLength() const {
- return SetFormat(ConsumeAnyOf(format_, "lLhjztq"));
- }
-
- // Consume the conversion character and verify that it matches `args_`.
- // `error_` is set if it doesn't match.
- constexpr ConvParser ParseConversion() const {
- char first_char = GetChar(format_, 0);
- if (first_char == 'v' && *(format_.data() - 1) != '%') {
- return SetError(true);
+ UnboundConversion conv(absl::kConstInit);
+ p = ConsumeUnboundConversion(p + 1, end, &conv, &next_arg);
+ if (p == nullptr) return false;
+ if (conv.arg_position <= 0 || conv.arg_position > kNumArgs) {
+ return false;
}
-
- if (is_positional_) {
- return VerifyPositional({ConsumeFront(format_), arg_position_},
- first_char);
- } else {
- return ConsumeNextArg(first_char).SetFormat(ConsumeFront(format_));
+ if (!Contains(kAllowedConvs[conv.arg_position - 1], conv.conv)) {
+ return false;
}
- }
-
- constexpr ConvParser(string_view format, ConvList args, bool error,
- int arg_position, bool is_positional)
- : format_(format),
- args_(args),
- error_(error),
- arg_position_(arg_position),
- is_positional_(is_positional) {}
-
- public:
- constexpr ConvParser(string_view format, ConvList args, bool is_positional)
- : format_(format),
- args_(args),
- error_(false),
- arg_position_(0),
- is_positional_(is_positional) {}
-
- // Consume the whole conversion specifier.
- // `format()` will be set to the character after the conversion character.
- // `error()` will be set if any of the arguments do not match.
- constexpr ConvParser Run() const {
- ConvParser parser = *this;
-
- if (is_positional_) {
- parser = ParseArgPosition(ParsePositional(format_));
- }
-
- return parser.ParseFlags()
- .ParseWidth()
- .ParsePrecision()
- .ParseLength()
- .ParseConversion();
- }
-
- constexpr string_view format() const { return format_; }
- constexpr ConvList args() const { return args_; }
- constexpr bool error() const { return error_; }
- constexpr bool is_positional() const { return is_positional_; }
-
- private:
- string_view format_;
- // Current list of arguments. If we are not in positional mode we will consume
- // from the front.
- ConvList args_;
- bool error_;
- // Holds the argument position of the conversion character, if we are in
- // positional mode. Otherwise, it is unspecified.
- int arg_position_;
- // Whether we are in positional mode.
- // It changes the behavior of '*' and where to find the converted argument.
- bool is_positional_;
-};
-
-// Parses a whole format expression.
-// See FormatParser::Run().
-class FormatParser {
- static constexpr bool FoundPercent(string_view format) {
- return format.empty() ||
- (GetChar(format, 0) == '%' && GetChar(format, 1) != '%');
- }
-
- // We use an inner function to increase the recursion limit.
- // The inner function consumes up to `limit` characters on every run.
- // This increases the limit from 512 to ~512*limit.
- static constexpr string_view ConsumeNonPercentInner(string_view format) {
- int limit = 20;
- while (!FoundPercent(format) && limit != 0) {
- size_t len = 0;
-
- if (GetChar(format, 0) == '%' && GetChar(format, 1) == '%') {
- len = 2;
- } else {
- len = 1;
+ used[conv.arg_position - 1] = true;
+ for (auto extra : {conv.width, conv.precision}) {
+ if (extra.is_from_arg()) {
+ int pos = extra.get_from_arg();
+ if (pos <= 0 || pos > kNumArgs) return false;
+ used[pos - 1] = true;
+ if (!Contains(kAllowedConvs[pos - 1], '*')) {
+ return false;
+ }
}
-
- format = ConsumeFront(format, len);
- --limit;
}
-
- return format;
}
-
- // Consume characters until the next conversion spec %.
- // It skips %%.
- static constexpr string_view ConsumeNonPercent(string_view format) {
- while (!FoundPercent(format)) {
- format = ConsumeNonPercentInner(format);
+ if (sizeof...(C) != 0) {
+ for (bool b : used) {
+ if (!b) return false;
}
-
- return format;
- }
-
- static constexpr bool IsPositional(string_view format) {
- while (IsDigit(GetChar(format, 0))) {
- format = ConsumeFront(format);
- }
-
- return GetChar(format, 0) == '$';
}
-
- constexpr bool RunImpl(bool is_positional) const {
- // In non-positional mode we require all arguments to be consumed.
- // In positional mode just reaching the end of the format without errors is
- // enough.
- return (format_.empty() && (is_positional || args_.count == 0)) ||
- (!format_.empty() &&
- ValidateArg(
- ConvParser(ConsumeFront(format_), args_, is_positional).Run()));
- }
-
- constexpr bool ValidateArg(ConvParser conv) const {
- return !conv.error() && FormatParser(conv.format(), conv.args())
- .RunImpl(conv.is_positional());
- }
-
- public:
- constexpr FormatParser(string_view format, ConvList args)
- : format_(ConsumeNonPercent(format)), args_(args) {}
-
- // Runs the parser for `format` and `args`.
- // It verifies that the format is valid and that all conversion specifiers
- // match the arguments passed.
- // In non-positional mode it also verfies that all arguments are consumed.
- constexpr bool Run() const {
- return RunImpl(!format_.empty() && IsPositional(ConsumeFront(format_)));
- }
-
- private:
- string_view format_;
- // Current list of arguments.
- // If we are not in positional mode we will consume from the front and will
- // have to be empty in the end.
- ConvList args_;
-};
-
-template <FormatConversionCharSet... C>
-constexpr bool ValidFormatImpl(string_view format) {
- return FormatParser(format,
- {ConvListT<sizeof...(C)>{{C...}}.list, sizeof...(C)})
- .Run();
+ return true;
}
#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
diff --git a/absl/strings/internal/str_format/checker_test.cc b/absl/strings/internal/str_format/checker_test.cc
index 680517f7..a86bed38 100644
--- a/absl/strings/internal/str_format/checker_test.cc
+++ b/absl/strings/internal/str_format/checker_test.cc
@@ -93,6 +93,7 @@ TEST(StrFormatChecker, ValidFormat) {
ValidFormat<void (*)(), volatile int*>("%p %p"), //
ValidFormat<string_view, const char*, double, void*>(
"string_view=%s const char*=%s double=%f void*=%p)"),
+ ValidFormat<int>("%v"), //
ValidFormat<int>("%% %1$d"), //
ValidFormat<int>("%1$ld"), //
@@ -109,7 +110,9 @@ TEST(StrFormatChecker, ValidFormat) {
ValidFormat<int, double>("%2$.*1$f"), //
ValidFormat<void*, string_view, const char*, double>(
"string_view=%2$s const char*=%3$s double=%4$f void*=%1$p "
- "repeat=%3$s)")};
+ "repeat=%3$s)"),
+ ValidFormat<std::string>("%1$v"),
+ };
for (Case c : trues) {
EXPECT_TRUE(c.result) << c.format;
@@ -130,6 +133,8 @@ TEST(StrFormatChecker, ValidFormat) {
ValidFormat<int>("%*d"), //
ValidFormat<std::string>("%p"), //
ValidFormat<int (*)(int)>("%d"), //
+ ValidFormat<int>("%1v"), //
+ ValidFormat<int>("%.1v"), //
ValidFormat<>("%3$d"), //
ValidFormat<>("%1$r"), //
@@ -138,13 +143,14 @@ TEST(StrFormatChecker, ValidFormat) {
ValidFormat<int>("%1$*2$1d"), //
ValidFormat<int>("%1$1-d"), //
ValidFormat<std::string, int>("%2$*1$s"), //
- ValidFormat<std::string>("%1$p"),
+ ValidFormat<std::string>("%1$p"), //
+ ValidFormat<int>("%1$*2$v"), //
ValidFormat<int, int>("%d %2$d"), //
};
for (Case c : falses) {
- EXPECT_FALSE(c.result) << c.format;
+ EXPECT_FALSE(c.result) << "format<" << c.format << ">";
}
}
diff --git a/absl/strings/internal/str_format/constexpr_parser.h b/absl/strings/internal/str_format/constexpr_parser.h
new file mode 100644
index 00000000..3dc1776b
--- /dev/null
+++ b/absl/strings/internal/str_format/constexpr_parser.h
@@ -0,0 +1,351 @@
+// Copyright 2022 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_
+#define ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_
+
+#include <cassert>
+#include <cstdint>
+#include <limits>
+
+#include "absl/base/const_init.h"
+#include "absl/strings/internal/str_format/extension.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace str_format_internal {
+
+enum class LengthMod : std::uint8_t { h, hh, l, ll, L, j, z, t, q, none };
+
+// The analyzed properties of a single specified conversion.
+struct UnboundConversion {
+ // This is a user defined default constructor on purpose to skip the
+ // initialization of parts of the object that are not necessary.
+ UnboundConversion() {} // NOLINT
+
+ // This constructor is provided for the static checker. We don't want to do
+ // the unnecessary initialization in the normal case.
+ explicit constexpr UnboundConversion(absl::ConstInitType)
+ : arg_position{}, width{}, precision{} {}
+
+ class InputValue {
+ public:
+ constexpr void set_value(int value) {
+ assert(value >= 0);
+ value_ = value;
+ }
+ constexpr int value() const { return value_; }
+
+ // Marks the value as "from arg". aka the '*' format.
+ // Requires `value >= 1`.
+ // When set, is_from_arg() return true and get_from_arg() returns the
+ // original value.
+ // `value()`'s return value is unspecified in this state.
+ constexpr void set_from_arg(int value) {
+ assert(value > 0);
+ value_ = -value - 1;
+ }
+ constexpr bool is_from_arg() const { return value_ < -1; }
+ constexpr int get_from_arg() const {
+ assert(is_from_arg());
+ return -value_ - 1;
+ }
+
+ private:
+ int value_ = -1;
+ };
+
+ // No need to initialize. It will always be set in the parser.
+ int arg_position;
+
+ InputValue width;
+ InputValue precision;
+
+ Flags flags = Flags::kBasic;
+ LengthMod length_mod = LengthMod::none;
+ FormatConversionChar conv = FormatConversionCharInternal::kNone;
+};
+
+// Helper tag class for the table below.
+// It allows fast `char -> ConversionChar/LengthMod/Flags` checking and
+// conversions.
+class ConvTag {
+ public:
+ constexpr ConvTag(FormatConversionChar conversion_char) // NOLINT
+ : tag_(static_cast<uint8_t>(conversion_char)) {}
+ constexpr ConvTag(LengthMod length_mod) // NOLINT
+ : tag_(0x80 | static_cast<uint8_t>(length_mod)) {}
+ constexpr ConvTag(Flags flags) // NOLINT
+ : tag_(0xc0 | static_cast<uint8_t>(flags)) {}
+ constexpr ConvTag() : tag_(0xFF) {}
+
+ constexpr bool is_conv() const { return (tag_ & 0x80) == 0; }
+ constexpr bool is_length() const { return (tag_ & 0xC0) == 0x80; }
+ constexpr bool is_flags() const { return (tag_ & 0xE0) == 0xC0; }
+
+ constexpr FormatConversionChar as_conv() const {
+ assert(is_conv());
+ assert(!is_length());
+ assert(!is_flags());
+ return static_cast<FormatConversionChar>(tag_);
+ }
+ constexpr LengthMod as_length() const {
+ assert(!is_conv());
+ assert(is_length());
+ assert(!is_flags());
+ return static_cast<LengthMod>(tag_ & 0x3F);
+ }
+ constexpr Flags as_flags() const {
+ assert(!is_conv());
+ assert(!is_length());
+ assert(is_flags());
+ return static_cast<Flags>(tag_ & 0x1F);
+ }
+
+ private:
+ uint8_t tag_;
+};
+
+struct ConvTagHolder {
+ using CC = FormatConversionCharInternal;
+ using LM = LengthMod;
+
+ // Abbreviations to fit in the table below.
+ static constexpr auto kFSign = Flags::kSignCol;
+ static constexpr auto kFAlt = Flags::kAlt;
+ static constexpr auto kFPos = Flags::kShowPos;
+ static constexpr auto kFLeft = Flags::kLeft;
+ static constexpr auto kFZero = Flags::kZero;
+
+ static constexpr ConvTag value[256] = {
+ {}, {}, {}, {}, {}, {}, {}, {}, // 00-07
+ {}, {}, {}, {}, {}, {}, {}, {}, // 08-0f
+ {}, {}, {}, {}, {}, {}, {}, {}, // 10-17
+ {}, {}, {}, {}, {}, {}, {}, {}, // 18-1f
+ kFSign, {}, {}, kFAlt, {}, {}, {}, {}, // !"#$%&'
+ {}, {}, {}, kFPos, {}, kFLeft, {}, {}, // ()*+,-./
+ kFZero, {}, {}, {}, {}, {}, {}, {}, // 01234567
+ {}, {}, {}, {}, {}, {}, {}, {}, // 89:;<=>?
+ {}, CC::A, {}, {}, {}, CC::E, CC::F, CC::G, // @ABCDEFG
+ {}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO
+ {}, {}, {}, {}, {}, {}, {}, {}, // PQRSTUVW
+ CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_
+ {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg
+ LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno
+ CC::p, LM::q, {}, CC::s, LM::t, CC::u, CC::v, {}, // pqrstuvw
+ CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}!
+ {}, {}, {}, {}, {}, {}, {}, {}, // 80-87
+ {}, {}, {}, {}, {}, {}, {}, {}, // 88-8f
+ {}, {}, {}, {}, {}, {}, {}, {}, // 90-97
+ {}, {}, {}, {}, {}, {}, {}, {}, // 98-9f
+ {}, {}, {}, {}, {}, {}, {}, {}, // a0-a7
+ {}, {}, {}, {}, {}, {}, {}, {}, // a8-af
+ {}, {}, {}, {}, {}, {}, {}, {}, // b0-b7
+ {}, {}, {}, {}, {}, {}, {}, {}, // b8-bf
+ {}, {}, {}, {}, {}, {}, {}, {}, // c0-c7
+ {}, {}, {}, {}, {}, {}, {}, {}, // c8-cf
+ {}, {}, {}, {}, {}, {}, {}, {}, // d0-d7
+ {}, {}, {}, {}, {}, {}, {}, {}, // d8-df
+ {}, {}, {}, {}, {}, {}, {}, {}, // e0-e7
+ {}, {}, {}, {}, {}, {}, {}, {}, // e8-ef
+ {}, {}, {}, {}, {}, {}, {}, {}, // f0-f7
+ {}, {}, {}, {}, {}, {}, {}, {}, // f8-ff
+ };
+};
+
+// Keep a single table for all the conversion chars and length modifiers.
+constexpr ConvTag GetTagForChar(char c) {
+ return ConvTagHolder::value[static_cast<unsigned char>(c)];
+}
+
+constexpr bool CheckFastPathSetting(const UnboundConversion& conv) {
+ bool width_precision_needed =
+ conv.width.value() >= 0 || conv.precision.value() >= 0;
+ if (width_precision_needed && conv.flags == Flags::kBasic) {
+#if defined(__clang__)
+ // Some compilers complain about this in constexpr even when not executed,
+ // so only enable the error dump in clang.
+ fprintf(stderr,
+ "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d "
+ "width=%d precision=%d\n",
+ conv.flags == Flags::kBasic ? 1 : 0,
+ FlagsContains(conv.flags, Flags::kLeft) ? 1 : 0,
+ FlagsContains(conv.flags, Flags::kShowPos) ? 1 : 0,
+ FlagsContains(conv.flags, Flags::kSignCol) ? 1 : 0,
+ FlagsContains(conv.flags, Flags::kAlt) ? 1 : 0,
+ FlagsContains(conv.flags, Flags::kZero) ? 1 : 0, conv.width.value(),
+ conv.precision.value());
+#endif // defined(__clang__)
+ return false;
+ }
+ return true;
+}
+
+constexpr int ParseDigits(char& c, const char*& pos, const char* const end) {
+ int digits = c - '0';
+ // We do not want to overflow `digits` so we consume at most digits10
+ // digits. If there are more digits the parsing will fail later on when the
+ // digit doesn't match the expected characters.
+ int num_digits = std::numeric_limits<int>::digits10;
+ for (;;) {
+ if (ABSL_PREDICT_FALSE(pos == end)) break;
+ c = *pos++;
+ if ('0' > c || c > '9') break;
+ --num_digits;
+ if (ABSL_PREDICT_FALSE(!num_digits)) break;
+ digits = 10 * digits + c - '0';
+ }
+ return digits;
+}
+
+template <bool is_positional>
+constexpr const char* ConsumeConversion(const char* pos, const char* const end,
+ UnboundConversion* conv,
+ int* next_arg) {
+ const char* const original_pos = pos;
+ char c = 0;
+ // Read the next char into `c` and update `pos`. Returns false if there are
+ // no more chars to read.
+#define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \
+ do { \
+ if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \
+ c = *pos++; \
+ } while (0)
+
+ if (is_positional) {
+ ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
+ if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
+ conv->arg_position = ParseDigits(c, pos, end);
+ assert(conv->arg_position > 0);
+ if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
+ }
+
+ ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
+
+ // We should start with the basic flag on.
+ assert(conv->flags == Flags::kBasic);
+
+ // Any non alpha character makes this conversion not basic.
+ // This includes flags (-+ #0), width (1-9, *) or precision (.).
+ // All conversion characters and length modifiers are alpha characters.
+ if (c < 'A') {
+ while (c <= '0') {
+ auto tag = GetTagForChar(c);
+ if (tag.is_flags()) {
+ conv->flags = conv->flags | tag.as_flags();
+ ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
+ } else {
+ break;
+ }
+ }
+
+ if (c <= '9') {
+ if (c >= '0') {
+ int maybe_width = ParseDigits(c, pos, end);
+ if (!is_positional && c == '$') {
+ if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr;
+ // Positional conversion.
+ *next_arg = -1;
+ return ConsumeConversion<true>(original_pos, end, conv, next_arg);
+ }
+ conv->flags = conv->flags | Flags::kNonBasic;
+ conv->width.set_value(maybe_width);
+ } else if (c == '*') {
+ conv->flags = conv->flags | Flags::kNonBasic;
+ ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
+ if (is_positional) {
+ if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
+ conv->width.set_from_arg(ParseDigits(c, pos, end));
+ if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
+ ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
+ } else {
+ conv->width.set_from_arg(++*next_arg);
+ }
+ }
+ }
+
+ if (c == '.') {
+ conv->flags = conv->flags | Flags::kNonBasic;
+ ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
+ if ('0' <= c && c <= '9') {
+ conv->precision.set_value(ParseDigits(c, pos, end));
+ } else if (c == '*') {
+ ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
+ if (is_positional) {
+ if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
+ conv->precision.set_from_arg(ParseDigits(c, pos, end));
+ if (c != '$') return nullptr;
+ ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
+ } else {
+ conv->precision.set_from_arg(++*next_arg);
+ }
+ } else {
+ conv->precision.set_value(0);
+ }
+ }
+ }
+
+ auto tag = GetTagForChar(c);
+
+ if (ABSL_PREDICT_FALSE(c == 'v' && conv->flags != Flags::kBasic)) {
+ return nullptr;
+ }
+
+ if (ABSL_PREDICT_FALSE(!tag.is_conv())) {
+ if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr;
+
+ // It is a length modifier.
+ using str_format_internal::LengthMod;
+ LengthMod length_mod = tag.as_length();
+ ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
+ if (c == 'h' && length_mod == LengthMod::h) {
+ conv->length_mod = LengthMod::hh;
+ ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
+ } else if (c == 'l' && length_mod == LengthMod::l) {
+ conv->length_mod = LengthMod::ll;
+ ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
+ } else {
+ conv->length_mod = length_mod;
+ }
+ tag = GetTagForChar(c);
+
+ if (ABSL_PREDICT_FALSE(c == 'v')) return nullptr;
+ if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr;
+ }
+
+ assert(CheckFastPathSetting(*conv));
+ (void)(&CheckFastPathSetting);
+
+ conv->conv = tag.as_conv();
+ if (!is_positional) conv->arg_position = ++*next_arg;
+ return pos;
+}
+
+// Consume conversion spec prefix (not including '%') of [p, end) if valid.
+// Examples of valid specs would be e.g.: "s", "d", "-12.6f".
+// If valid, it returns the first character following the conversion spec,
+// and the spec part is broken down and returned in 'conv'.
+// If invalid, returns nullptr.
+constexpr const char* ConsumeUnboundConversion(const char* p, const char* end,
+ UnboundConversion* conv,
+ int* next_arg) {
+ if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg);
+ return ConsumeConversion<false>(p, end, conv, next_arg);
+}
+
+} // namespace str_format_internal
+ABSL_NAMESPACE_END
+} // namespace absl
+
+#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_
diff --git a/absl/strings/internal/str_format/parser.cc b/absl/strings/internal/str_format/parser.cc
index 13731ee2..5aaab698 100644
--- a/absl/strings/internal/str_format/parser.cc
+++ b/absl/strings/internal/str_format/parser.cc
@@ -31,211 +31,14 @@ namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
-using CC = FormatConversionCharInternal;
-using LM = LengthMod;
+// Define the array for non-constexpr uses.
+constexpr ConvTag ConvTagHolder::value[256];
-// Abbreviations to fit in the table below.
-constexpr auto f_sign = Flags::kSignCol;
-constexpr auto f_alt = Flags::kAlt;
-constexpr auto f_pos = Flags::kShowPos;
-constexpr auto f_left = Flags::kLeft;
-constexpr auto f_zero = Flags::kZero;
-
-ABSL_CONST_INIT const ConvTag kTags[256] = {
- {}, {}, {}, {}, {}, {}, {}, {}, // 00-07
- {}, {}, {}, {}, {}, {}, {}, {}, // 08-0f
- {}, {}, {}, {}, {}, {}, {}, {}, // 10-17
- {}, {}, {}, {}, {}, {}, {}, {}, // 18-1f
- f_sign, {}, {}, f_alt, {}, {}, {}, {}, // !"#$%&'
- {}, {}, {}, f_pos, {}, f_left, {}, {}, // ()*+,-./
- f_zero, {}, {}, {}, {}, {}, {}, {}, // 01234567
- {}, {}, {}, {}, {}, {}, {}, {}, // 89:;<=>?
- {}, CC::A, {}, {}, {}, CC::E, CC::F, CC::G, // @ABCDEFG
- {}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO
- {}, {}, {}, {}, {}, {}, {}, {}, // PQRSTUVW
- CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_
- {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg
- LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno
- CC::p, LM::q, {}, CC::s, LM::t, CC::u, CC::v, {}, // pqrstuvw
- CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}!
- {}, {}, {}, {}, {}, {}, {}, {}, // 80-87
- {}, {}, {}, {}, {}, {}, {}, {}, // 88-8f
- {}, {}, {}, {}, {}, {}, {}, {}, // 90-97
- {}, {}, {}, {}, {}, {}, {}, {}, // 98-9f
- {}, {}, {}, {}, {}, {}, {}, {}, // a0-a7
- {}, {}, {}, {}, {}, {}, {}, {}, // a8-af
- {}, {}, {}, {}, {}, {}, {}, {}, // b0-b7
- {}, {}, {}, {}, {}, {}, {}, {}, // b8-bf
- {}, {}, {}, {}, {}, {}, {}, {}, // c0-c7
- {}, {}, {}, {}, {}, {}, {}, {}, // c8-cf
- {}, {}, {}, {}, {}, {}, {}, {}, // d0-d7
- {}, {}, {}, {}, {}, {}, {}, {}, // d8-df
- {}, {}, {}, {}, {}, {}, {}, {}, // e0-e7
- {}, {}, {}, {}, {}, {}, {}, {}, // e8-ef
- {}, {}, {}, {}, {}, {}, {}, {}, // f0-f7
- {}, {}, {}, {}, {}, {}, {}, {}, // f8-ff
-};
-
-namespace {
-
-bool CheckFastPathSetting(const UnboundConversion& conv) {
- bool width_precision_needed =
- conv.width.value() >= 0 || conv.precision.value() >= 0;
- if (width_precision_needed && conv.flags == Flags::kBasic) {
- fprintf(stderr,
- "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d "
- "width=%d precision=%d\n",
- conv.flags == Flags::kBasic ? 1 : 0,
- FlagsContains(conv.flags, Flags::kLeft) ? 1 : 0,
- FlagsContains(conv.flags, Flags::kShowPos) ? 1 : 0,
- FlagsContains(conv.flags, Flags::kSignCol) ? 1 : 0,
- FlagsContains(conv.flags, Flags::kAlt) ? 1 : 0,
- FlagsContains(conv.flags, Flags::kZero) ? 1 : 0, conv.width.value(),
- conv.precision.value());
- return false;
- }
- return true;
-}
-
-template <bool is_positional>
-const char *ConsumeConversion(const char *pos, const char *const end,
- UnboundConversion *conv, int *next_arg) {
- const char* const original_pos = pos;
- char c;
- // Read the next char into `c` and update `pos`. Returns false if there are
- // no more chars to read.
-#define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \
- do { \
- if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \
- c = *pos++; \
- } while (0)
-
- const auto parse_digits = [&] {
- int digits = c - '0';
- // We do not want to overflow `digits` so we consume at most digits10
- // digits. If there are more digits the parsing will fail later on when the
- // digit doesn't match the expected characters.
- int num_digits = std::numeric_limits<int>::digits10;
- for (;;) {
- if (ABSL_PREDICT_FALSE(pos == end)) break;
- c = *pos++;
- if (!std::isdigit(c)) break;
- --num_digits;
- if (ABSL_PREDICT_FALSE(!num_digits)) break;
- digits = 10 * digits + c - '0';
- }
- return digits;
- };
-
- if (is_positional) {
- ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
- if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
- conv->arg_position = parse_digits();
- assert(conv->arg_position > 0);
- if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
- }
-
- ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
-
- // We should start with the basic flag on.
- assert(conv->flags == Flags::kBasic);
-
- // Any non alpha character makes this conversion not basic.
- // This includes flags (-+ #0), width (1-9, *) or precision (.).
- // All conversion characters and length modifiers are alpha characters.
- if (c < 'A') {
- while (c <= '0') {
- auto tag = GetTagForChar(c);
- if (tag.is_flags()) {
- conv->flags = conv->flags | tag.as_flags();
- ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
- } else {
- break;
- }
- }
-
- if (c <= '9') {
- if (c >= '0') {
- int maybe_width = parse_digits();
- if (!is_positional && c == '$') {
- if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr;
- // Positional conversion.
- *next_arg = -1;
- return ConsumeConversion<true>(original_pos, end, conv, next_arg);
- }
- conv->flags = conv->flags | Flags::kNonBasic;
- conv->width.set_value(maybe_width);
- } else if (c == '*') {
- conv->flags = conv->flags | Flags::kNonBasic;
- ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
- if (is_positional) {
- if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
- conv->width.set_from_arg(parse_digits());
- if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
- ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
- } else {
- conv->width.set_from_arg(++*next_arg);
- }
- }
- }
-
- if (c == '.') {
- conv->flags = conv->flags | Flags::kNonBasic;
- ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
- if (std::isdigit(c)) {
- conv->precision.set_value(parse_digits());
- } else if (c == '*') {
- ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
- if (is_positional) {
- if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
- conv->precision.set_from_arg(parse_digits());
- if (c != '$') return nullptr;
- ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
- } else {
- conv->precision.set_from_arg(++*next_arg);
- }
- } else {
- conv->precision.set_value(0);
- }
- }
- }
-
- auto tag = GetTagForChar(c);
-
- if (ABSL_PREDICT_FALSE(c == 'v' && (pos - original_pos) != 1)) return nullptr;
-
- if (ABSL_PREDICT_FALSE(!tag.is_conv())) {
- if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr;
-
- // It is a length modifier.
- using str_format_internal::LengthMod;
- LengthMod length_mod = tag.as_length();
- ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
- if (c == 'h' && length_mod == LengthMod::h) {
- conv->length_mod = LengthMod::hh;
- ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
- } else if (c == 'l' && length_mod == LengthMod::l) {
- conv->length_mod = LengthMod::ll;
- ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
- } else {
- conv->length_mod = length_mod;
- }
- tag = GetTagForChar(c);
-
- if (ABSL_PREDICT_FALSE(c == 'v')) return nullptr;
- if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr;
- }
-
- assert(CheckFastPathSetting(*conv));
- (void)(&CheckFastPathSetting);
-
- conv->conv = tag.as_conv();
- if (!is_positional) conv->arg_position = ++*next_arg;
- return pos;
+ABSL_ATTRIBUTE_NOINLINE const char* ConsumeUnboundConversionNoInline(
+ const char* p, const char* end, UnboundConversion* conv, int* next_arg) {
+ return ConsumeUnboundConversion(p, end, conv, next_arg);
}
-} // namespace
-
std::string LengthModToString(LengthMod v) {
switch (v) {
case LengthMod::h:
@@ -262,12 +65,6 @@ std::string LengthModToString(LengthMod v) {
return "";
}
-const char *ConsumeUnboundConversion(const char *p, const char *end,
- UnboundConversion *conv, int *next_arg) {
- if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg);
- return ConsumeConversion<false>(p, end, conv, next_arg);
-}
-
struct ParsedFormatBase::ParsedFormatConsumer {
explicit ParsedFormatConsumer(ParsedFormatBase *parsedformat)
: parsed(parsedformat), data_pos(parsedformat->data_.get()) {}
diff --git a/absl/strings/internal/str_format/parser.h b/absl/strings/internal/str_format/parser.h
index a81bac83..35b6d49c 100644
--- a/absl/strings/internal/str_format/parser.h
+++ b/absl/strings/internal/str_format/parser.h
@@ -29,111 +29,18 @@
#include <vector>
#include "absl/strings/internal/str_format/checker.h"
+#include "absl/strings/internal/str_format/constexpr_parser.h"
#include "absl/strings/internal/str_format/extension.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
-enum class LengthMod : std::uint8_t { h, hh, l, ll, L, j, z, t, q, none };
-
std::string LengthModToString(LengthMod v);
-// The analyzed properties of a single specified conversion.
-struct UnboundConversion {
- UnboundConversion() {}
-
- class InputValue {
- public:
- void set_value(int value) {
- assert(value >= 0);
- value_ = value;
- }
- int value() const { return value_; }
-
- // Marks the value as "from arg". aka the '*' format.
- // Requires `value >= 1`.
- // When set, is_from_arg() return true and get_from_arg() returns the
- // original value.
- // `value()`'s return value is unspecfied in this state.
- void set_from_arg(int value) {
- assert(value > 0);
- value_ = -value - 1;
- }
- bool is_from_arg() const { return value_ < -1; }
- int get_from_arg() const {
- assert(is_from_arg());
- return -value_ - 1;
- }
-
- private:
- int value_ = -1;
- };
-
- // No need to initialize. It will always be set in the parser.
- int arg_position;
-
- InputValue width;
- InputValue precision;
-
- Flags flags = Flags::kBasic;
- LengthMod length_mod = LengthMod::none;
- FormatConversionChar conv = FormatConversionCharInternal::kNone;
-};
-
-// Consume conversion spec prefix (not including '%') of [p, end) if valid.
-// Examples of valid specs would be e.g.: "s", "d", "-12.6f".
-// If valid, it returns the first character following the conversion spec,
-// and the spec part is broken down and returned in 'conv'.
-// If invalid, returns nullptr.
-const char* ConsumeUnboundConversion(const char* p, const char* end,
- UnboundConversion* conv, int* next_arg);
-
-// Helper tag class for the table below.
-// It allows fast `char -> ConversionChar/LengthMod/Flags` checking and
-// conversions.
-class ConvTag {
- public:
- constexpr ConvTag(FormatConversionChar conversion_char) // NOLINT
- : tag_(static_cast<uint8_t>(conversion_char)) {}
- constexpr ConvTag(LengthMod length_mod) // NOLINT
- : tag_(0x80 | static_cast<uint8_t>(length_mod)) {}
- constexpr ConvTag(Flags flags) // NOLINT
- : tag_(0xc0 | static_cast<uint8_t>(flags)) {}
- constexpr ConvTag() : tag_(0xFF) {}
-
- bool is_conv() const { return (tag_ & 0x80) == 0; }
- bool is_length() const { return (tag_ & 0xC0) == 0x80; }
- bool is_flags() const { return (tag_ & 0xE0) == 0xC0; }
-
- FormatConversionChar as_conv() const {
- assert(is_conv());
- assert(!is_length());
- assert(!is_flags());
- return static_cast<FormatConversionChar>(tag_);
- }
- LengthMod as_length() const {
- assert(!is_conv());
- assert(is_length());
- assert(!is_flags());
- return static_cast<LengthMod>(tag_ & 0x3F);
- }
- Flags as_flags() const {
- assert(!is_conv());
- assert(!is_length());
- assert(is_flags());
- return static_cast<Flags>(tag_ & 0x1F);
- }
-
- private:
- uint8_t tag_;
-};
-
-extern const ConvTag kTags[256];
-// Keep a single table for all the conversion chars and length modifiers.
-inline ConvTag GetTagForChar(char c) {
- return kTags[static_cast<unsigned char>(c)];
-}
+const char* ConsumeUnboundConversionNoInline(const char* p, const char* end,
+ UnboundConversion* conv,
+ int* next_arg);
// Parse the format string provided in 'src' and pass the identified items into
// 'consumer'.
@@ -187,7 +94,7 @@ bool ParseFormatString(string_view src, Consumer consumer) {
}
} else if (percent[1] != '%') {
UnboundConversion conv;
- p = ConsumeUnboundConversion(percent + 1, end, &conv, &next_arg);
+ p = ConsumeUnboundConversionNoInline(percent + 1, end, &conv, &next_arg);
if (ABSL_PREDICT_FALSE(p == nullptr)) return false;
if (ABSL_PREDICT_FALSE(!consumer.ConvertOne(
conv, string_view(percent + 1,
diff --git a/absl/strings/internal/str_format/parser_test.cc b/absl/strings/internal/str_format/parser_test.cc
index c3e825fe..021f6a87 100644
--- a/absl/strings/internal/str_format/parser_test.cc
+++ b/absl/strings/internal/str_format/parser_test.cc
@@ -117,6 +117,7 @@ TEST_F(ConsumeUnboundConversionTest, ConsumeSpecification) {
{__LINE__, "dzz", "d", "zz"}, // length mod as suffix
{__LINE__, "3v", "", "3v"}, // 'v' cannot have modifiers
{__LINE__, "hv", "", "hv"}, // 'v' cannot have modifiers
+ {__LINE__, "1$v", "1$v", ""}, // 'v' can have use posix syntax
{__LINE__, "1$*2$d", "1$*2$d", "" }, // arg indexing and * allowed.
{__LINE__, "0-14.3hhd", "0-14.3hhd", ""}, // precision, width
{__LINE__, " 0-+#14.3hhd", " 0-+#14.3hhd", ""}, // flags