From d902eb869bcfacc1bad14933ed9af4bed006d481 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Tue, 16 Apr 2019 12:11:35 -0700 Subject: Export of internal Abseil changes. -- babbb6421068af3831870fd5995444437ace6769 by Derek Mauro : Rollback of: Make raw_hash_set_test less flaky. Split the timing loop into chunks so that we are less suceptible to antogantistic processes. PiperOrigin-RevId: 243854490 -- a2711f17a712f6d09799bf32363d67526737b486 by CJ Johnson : Relocates IsAtLeastForwardIterator to internal/inlined_vector.h PiperOrigin-RevId: 243846090 -- 6c14cdbeb9a61022c27f8957654f930d8abf2fc1 by Matt Kulukundis : Make raw_hash_set_test less flaky. Split the timing loop into chunks so that we are less suceptible to antogantistic processes. PiperOrigin-RevId: 243824289 -- ee6072a6b6e0ac653622524ceb09db3b9e870f96 by Samuel Benzaquen : Improve format parser performance. Replace the main switch with a lookup in the existing tag table. Improve the ABI of ConsumeUnboundConversion a little. PiperOrigin-RevId: 243824112 -- 24b9e6476dfa4be8d644359eab8ac6816492f187 by Abseil Team : Fix DR numbers: 3800 ? 3080, 3801 ? 3081. PiperOrigin-RevId: 243804213 -- 0660404074707e197684f07cc0bffe4a9c35cd2f by Abseil Team : Internal change. PiperOrigin-RevId: 243757359 -- ba0f5bb9b8584d75c4ffc44ff3cb8c691796ffc6 by Xiaoyi Zhang : Consolidate ABSL_INTERNAL_UNALIGNED_* implementation into memcpy. The compiler should be good enough to optimize these operations. See https://github.com/abseil/abseil-cpp/issues/269 for background. PiperOrigin-RevId: 243323941 -- 00853a8756548df7217513c562d604b4ee5c6ab9 by Eric Fiselier : Reexport memory.h from optional.h for compatibility between libc++ and libstdc++. PiperOrigin-RevId: 243313425 GitOrigin-RevId: babbb6421068af3831870fd5995444437ace6769 Change-Id: Ic53c127ad857a431ad60c98b27cc585fed50a3e3 --- absl/base/internal/unaligned_access.h | 165 ------------------------ absl/container/inlined_vector.h | 25 ++-- absl/container/internal/inlined_vector.h | 9 +- absl/hash/BUILD.bazel | 2 +- absl/strings/charconv.h | 4 +- absl/strings/internal/str_format/bind_test.cc | 9 +- absl/strings/internal/str_format/parser.cc | 143 ++++++++++---------- absl/strings/internal/str_format/parser.h | 128 +++++++++++------- absl/strings/internal/str_format/parser_test.cc | 25 ++-- absl/strings/numbers.cc | 4 +- 10 files changed, 186 insertions(+), 328 deletions(-) diff --git a/absl/base/internal/unaligned_access.h b/absl/base/internal/unaligned_access.h index 2d66737..2cf7c1d 100644 --- a/absl/base/internal/unaligned_access.h +++ b/absl/base/internal/unaligned_access.h @@ -25,15 +25,6 @@ // unaligned APIs // Portable handling of unaligned loads, stores, and copies. -// On some platforms, like ARM, the copy functions can be more efficient -// then a load and a store. -// -// It is possible to implement all of these these using constant-length memcpy -// calls, which is portable and will usually be inlined into simple loads and -// stores if the architecture supports it. However, such inlining usually -// happens in a pass that's quite late in compilation, which means the resulting -// loads and stores cannot participate in many other optimizations, leading to -// overall worse code. // The unaligned API is C++ only. The declarations use C++ features // (namespaces, inline) which are absent or incompatible in C. @@ -108,164 +99,8 @@ inline void UnalignedStore64(void *p, uint64_t v) { #define ABSL_INTERNAL_UNALIGNED_STORE64(_p, _val) \ (absl::base_internal::UnalignedStore64(_p, _val)) -#elif defined(UNDEFINED_BEHAVIOR_SANITIZER) - -namespace absl { -namespace base_internal { - -inline uint16_t UnalignedLoad16(const void *p) { - uint16_t t; - memcpy(&t, p, sizeof t); - return t; -} - -inline uint32_t UnalignedLoad32(const void *p) { - uint32_t t; - memcpy(&t, p, sizeof t); - return t; -} - -inline uint64_t UnalignedLoad64(const void *p) { - uint64_t t; - memcpy(&t, p, sizeof t); - return t; -} - -inline void UnalignedStore16(void *p, uint16_t v) { memcpy(p, &v, sizeof v); } - -inline void UnalignedStore32(void *p, uint32_t v) { memcpy(p, &v, sizeof v); } - -inline void UnalignedStore64(void *p, uint64_t v) { memcpy(p, &v, sizeof v); } - -} // namespace base_internal -} // namespace absl - -#define ABSL_INTERNAL_UNALIGNED_LOAD16(_p) \ - (absl::base_internal::UnalignedLoad16(_p)) -#define ABSL_INTERNAL_UNALIGNED_LOAD32(_p) \ - (absl::base_internal::UnalignedLoad32(_p)) -#define ABSL_INTERNAL_UNALIGNED_LOAD64(_p) \ - (absl::base_internal::UnalignedLoad64(_p)) - -#define ABSL_INTERNAL_UNALIGNED_STORE16(_p, _val) \ - (absl::base_internal::UnalignedStore16(_p, _val)) -#define ABSL_INTERNAL_UNALIGNED_STORE32(_p, _val) \ - (absl::base_internal::UnalignedStore32(_p, _val)) -#define ABSL_INTERNAL_UNALIGNED_STORE64(_p, _val) \ - (absl::base_internal::UnalignedStore64(_p, _val)) - -#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386) || \ - defined(_M_IX86) || defined(__ppc__) || defined(__PPC__) || \ - defined(__ppc64__) || defined(__PPC64__) - -// x86 and x86-64 can perform unaligned loads/stores directly; -// modern PowerPC hardware can also do unaligned integer loads and stores; -// but note: the FPU still sends unaligned loads and stores to a trap handler! - -#define ABSL_INTERNAL_UNALIGNED_LOAD16(_p) \ - (*reinterpret_cast(_p)) -#define ABSL_INTERNAL_UNALIGNED_LOAD32(_p) \ - (*reinterpret_cast(_p)) -#define ABSL_INTERNAL_UNALIGNED_LOAD64(_p) \ - (*reinterpret_cast(_p)) - -#define ABSL_INTERNAL_UNALIGNED_STORE16(_p, _val) \ - (*reinterpret_cast(_p) = (_val)) -#define ABSL_INTERNAL_UNALIGNED_STORE32(_p, _val) \ - (*reinterpret_cast(_p) = (_val)) -#define ABSL_INTERNAL_UNALIGNED_STORE64(_p, _val) \ - (*reinterpret_cast(_p) = (_val)) - -#elif defined(__arm__) && \ - !defined(__ARM_ARCH_5__) && \ - !defined(__ARM_ARCH_5T__) && \ - !defined(__ARM_ARCH_5TE__) && \ - !defined(__ARM_ARCH_5TEJ__) && \ - !defined(__ARM_ARCH_6__) && \ - !defined(__ARM_ARCH_6J__) && \ - !defined(__ARM_ARCH_6K__) && \ - !defined(__ARM_ARCH_6Z__) && \ - !defined(__ARM_ARCH_6ZK__) && \ - !defined(__ARM_ARCH_6T2__) - - -// ARMv7 and newer support native unaligned accesses, but only of 16-bit -// and 32-bit values (not 64-bit); older versions either raise a fatal signal, -// do an unaligned read and rotate the words around a bit, or do the reads very -// slowly (trip through kernel mode). There's no simple #define that says just -// "ARMv7 or higher", so we have to filter away all ARMv5 and ARMv6 -// sub-architectures. Newer gcc (>= 4.6) set an __ARM_FEATURE_ALIGNED #define, -// so in time, maybe we can move on to that. -// -// This is a mess, but there's not much we can do about it. -// -// To further complicate matters, only LDR instructions (single reads) are -// allowed to be unaligned, not LDRD (two reads) or LDM (many reads). Unless we -// explicitly tell the compiler that these accesses can be unaligned, it can and -// will combine accesses. On armcc, the way to signal this is done by accessing -// through the type (uint32_t __packed *), but GCC has no such attribute -// (it ignores __attribute__((packed)) on individual variables). However, -// we can tell it that a _struct_ is unaligned, which has the same effect, -// so we do that. - -namespace absl { -namespace base_internal { - -struct Unaligned16Struct { - uint16_t value; - uint8_t dummy; // To make the size non-power-of-two. -} ABSL_ATTRIBUTE_PACKED; - -struct Unaligned32Struct { - uint32_t value; - uint8_t dummy; // To make the size non-power-of-two. -} ABSL_ATTRIBUTE_PACKED; - -} // namespace base_internal -} // namespace absl - -#define ABSL_INTERNAL_UNALIGNED_LOAD16(_p) \ - ((reinterpret_cast(_p)) \ - ->value) -#define ABSL_INTERNAL_UNALIGNED_LOAD32(_p) \ - ((reinterpret_cast(_p)) \ - ->value) - -#define ABSL_INTERNAL_UNALIGNED_STORE16(_p, _val) \ - ((reinterpret_cast< ::absl::base_internal::Unaligned16Struct *>(_p)) \ - ->value = (_val)) -#define ABSL_INTERNAL_UNALIGNED_STORE32(_p, _val) \ - ((reinterpret_cast< ::absl::base_internal::Unaligned32Struct *>(_p)) \ - ->value = (_val)) - -namespace absl { -namespace base_internal { - -inline uint64_t UnalignedLoad64(const void *p) { - uint64_t t; - memcpy(&t, p, sizeof t); - return t; -} - -inline void UnalignedStore64(void *p, uint64_t v) { memcpy(p, &v, sizeof v); } - -} // namespace base_internal -} // namespace absl - -#define ABSL_INTERNAL_UNALIGNED_LOAD64(_p) \ - (absl::base_internal::UnalignedLoad64(_p)) -#define ABSL_INTERNAL_UNALIGNED_STORE64(_p, _val) \ - (absl::base_internal::UnalignedStore64(_p, _val)) - #else -// ABSL_INTERNAL_NEED_ALIGNED_LOADS is defined when the underlying platform -// doesn't support unaligned access. -#define ABSL_INTERNAL_NEED_ALIGNED_LOADS - -// These functions are provided for architectures that don't support -// unaligned loads and stores. - namespace absl { namespace base_internal { diff --git a/absl/container/inlined_vector.h b/absl/container/inlined_vector.h index 1ab4b2f..978d503 100644 --- a/absl/container/inlined_vector.h +++ b/absl/container/inlined_vector.h @@ -73,17 +73,12 @@ class InlinedVector { using AllocatorTraits = typename Storage::AllocatorTraits; template - using IsAtLeastForwardIterator = std::is_convertible< - typename std::iterator_traits::iterator_category, - std::forward_iterator_tag>; + using EnableIfAtLeastForwardIterator = absl::enable_if_t< + inlined_vector_internal::IsAtLeastForwardIterator::value>; template - using EnableIfAtLeastForwardIterator = - absl::enable_if_t::value>; - - template - using DisableIfAtLeastForwardIterator = - absl::enable_if_t::value>; + using DisableIfAtLeastForwardIterator = absl::enable_if_t< + !inlined_vector_internal::IsAtLeastForwardIterator::value>; using rvalue_reference = typename Storage::rvalue_reference; @@ -1060,7 +1055,9 @@ class InlinedVector { template void AssignForwardRange(ForwardIt first, ForwardIt last) { - static_assert(IsAtLeastForwardIterator::value, ""); + static_assert(absl::inlined_vector_internal::IsAtLeastForwardIterator< + ForwardIt>::value, + ""); auto length = std::distance(first, last); @@ -1084,7 +1081,9 @@ class InlinedVector { template void AppendForwardRange(ForwardIt first, ForwardIt last) { - static_assert(IsAtLeastForwardIterator::value, ""); + static_assert(absl::inlined_vector_internal::IsAtLeastForwardIterator< + ForwardIt>::value, + ""); auto length = std::distance(first, last); reserve(size() + length); @@ -1113,7 +1112,9 @@ class InlinedVector { template iterator InsertWithForwardRange(const_iterator position, ForwardIt first, ForwardIt last) { - static_assert(IsAtLeastForwardIterator::value, ""); + static_assert(absl::inlined_vector_internal::IsAtLeastForwardIterator< + ForwardIt>::value, + ""); assert(position >= begin() && position <= end()); if (ABSL_PREDICT_FALSE(first == last)) diff --git a/absl/container/internal/inlined_vector.h b/absl/container/internal/inlined_vector.h index 168d5b4..b8b4f4c 100644 --- a/absl/container/internal/inlined_vector.h +++ b/absl/container/internal/inlined_vector.h @@ -26,6 +26,11 @@ namespace absl { namespace inlined_vector_internal { +template +using IsAtLeastForwardIterator = std::is_convertible< + typename std::iterator_traits::iterator_category, + std::forward_iterator_tag>; + template class Storage; @@ -89,9 +94,7 @@ class Storage> { void AddSize(size_type count) { GetSizeAndIsAllocated() += count << 1; } - void SetAllocatedData(pointer data) { - data_.allocated.allocated_data = data; - } + void SetAllocatedData(pointer data) { data_.allocated.allocated_data = data; } void SetAllocatedCapacity(size_type capacity) { data_.allocated.allocated_capacity = capacity; diff --git a/absl/hash/BUILD.bazel b/absl/hash/BUILD.bazel index 8c2daf7..e1e6eae 100644 --- a/absl/hash/BUILD.bazel +++ b/absl/hash/BUILD.bazel @@ -35,10 +35,10 @@ cc_library( copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, deps = [ - ":city", "//absl/base:core_headers", "//absl/base:endian", "//absl/container:fixed_array", + "//absl/hash:city", "//absl/meta:type_traits", "//absl/numeric:int128", "//absl/strings", diff --git a/absl/strings/charconv.h b/absl/strings/charconv.h index 59f74bf..3f5891b 100644 --- a/absl/strings/charconv.h +++ b/absl/strings/charconv.h @@ -49,9 +49,9 @@ struct from_chars_result { // this only supports the `double` and `float` types. // // This interface incorporates the proposed resolutions for library issues -// DR 3800 and DR 3801. If these are adopted with different wording, +// DR 3080 and DR 3081. If these are adopted with different wording, // Abseil's behavior will change to match the standard. (The behavior most -// likely to change is for DR 3801, which says what `value` will be set to in +// likely to change is for DR 3081, which says what `value` will be set to in // the case of overflow and underflow. Code that wants to avoid possible // breaking changes in this area should not depend on `value` when the returned // from_chars_result indicates a range error.) diff --git a/absl/strings/internal/str_format/bind_test.cc b/absl/strings/internal/str_format/bind_test.cc index ba6470e..2574801 100644 --- a/absl/strings/internal/str_format/bind_test.cc +++ b/absl/strings/internal/str_format/bind_test.cc @@ -9,16 +9,11 @@ namespace absl { namespace str_format_internal { namespace { -template -size_t ArraySize(T (&)[N]) { - return N; -} - class FormatBindTest : public ::testing::Test { public: bool Extract(const char *s, UnboundConversion *props, int *next) const { - absl::string_view src = s; - return ConsumeUnboundConversion(&src, props, next) && src.empty(); + return ConsumeUnboundConversion(s, s + strlen(s), props, next) == + s + strlen(s); } }; diff --git a/absl/strings/internal/str_format/parser.cc b/absl/strings/internal/str_format/parser.cc index 10487f2..9ef5615 100644 --- a/absl/strings/internal/str_format/parser.cc +++ b/absl/strings/internal/str_format/parser.cc @@ -15,6 +15,44 @@ namespace absl { namespace str_format_internal { + +using CC = ConversionChar::Id; +using LM = LengthMod::Id; +ABSL_CONST_INIT const ConvTag kTags[256] = { + {}, {}, {}, {}, {}, {}, {}, {}, // 00-07 + {}, {}, {}, {}, {}, {}, {}, {}, // 08-0f + {}, {}, {}, {}, {}, {}, {}, {}, // 10-17 + {}, {}, {}, {}, {}, {}, {}, {}, // 18-1f + {}, {}, {}, {}, {}, {}, {}, {}, // 20-27 + {}, {}, {}, {}, {}, {}, {}, {}, // 28-2f + {}, {}, {}, {}, {}, {}, {}, {}, // 30-37 + {}, {}, {}, {}, {}, {}, {}, {}, // 38-3f + {}, CC::A, {}, CC::C, {}, CC::E, CC::F, CC::G, // @ABCDEFG + {}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO + {}, {}, {}, CC::S, {}, {}, {}, {}, // PQRSTUVW + CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_ + {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg + LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno + CC::p, LM::q, {}, CC::s, LM::t, CC::u, {}, {}, // pqrstuvw + CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}! + {}, {}, {}, {}, {}, {}, {}, {}, // 80-87 + {}, {}, {}, {}, {}, {}, {}, {}, // 88-8f + {}, {}, {}, {}, {}, {}, {}, {}, // 90-97 + {}, {}, {}, {}, {}, {}, {}, {}, // 98-9f + {}, {}, {}, {}, {}, {}, {}, {}, // a0-a7 + {}, {}, {}, {}, {}, {}, {}, {}, // a8-af + {}, {}, {}, {}, {}, {}, {}, {}, // b0-b7 + {}, {}, {}, {}, {}, {}, {}, {}, // b8-bf + {}, {}, {}, {}, {}, {}, {}, {}, // c0-c7 + {}, {}, {}, {}, {}, {}, {}, {}, // c8-cf + {}, {}, {}, {}, {}, {}, {}, {}, // d0-d7 + {}, {}, {}, {}, {}, {}, {}, {}, // d8-df + {}, {}, {}, {}, {}, {}, {}, {}, // e0-e7 + {}, {}, {}, {}, {}, {}, {}, {}, // e8-ef + {}, {}, {}, {}, {}, {}, {}, {}, // f0-f7 + {}, {}, {}, {}, {}, {}, {}, {}, // f8-ff +}; + namespace { bool CheckFastPathSetting(const UnboundConversion& conv) { @@ -36,60 +74,17 @@ bool CheckFastPathSetting(const UnboundConversion& conv) { return should_be_basic == conv.flags.basic; } -// Keep a single table for all the conversion chars and length modifiers. -// We invert the length modifiers to make them negative so that we can easily -// test for them. -// Everything else is `none`, which is a negative constant. -using CC = ConversionChar::Id; -using LM = LengthMod::Id; -static constexpr std::int8_t none = -128; -static constexpr std::int8_t kIds[] = { - none, none, none, none, none, none, none, none, // 00-07 - none, none, none, none, none, none, none, none, // 08-0f - none, none, none, none, none, none, none, none, // 10-17 - none, none, none, none, none, none, none, none, // 18-1f - none, none, none, none, none, none, none, none, // 20-27 - none, none, none, none, none, none, none, none, // 28-2f - none, none, none, none, none, none, none, none, // 30-37 - none, none, none, none, none, none, none, none, // 38-3f - none, CC::A, none, CC::C, none, CC::E, CC::F, CC::G, // @ABCDEFG - none, none, none, none, ~LM::L, none, none, none, // HIJKLMNO - none, none, none, CC::S, none, none, none, none, // PQRSTUVW - CC::X, none, none, none, none, none, none, none, // XYZ[\]^_ - none, CC::a, none, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg - ~LM::h, CC::i, ~LM::j, none, ~LM::l, none, CC::n, CC::o, // hijklmno - CC::p, ~LM::q, none, CC::s, ~LM::t, CC::u, none, none, // pqrstuvw - CC::x, none, ~LM::z, none, none, none, none, none, // xyz{|}~! - none, none, none, none, none, none, none, none, // 80-87 - none, none, none, none, none, none, none, none, // 88-8f - none, none, none, none, none, none, none, none, // 90-97 - none, none, none, none, none, none, none, none, // 98-9f - none, none, none, none, none, none, none, none, // a0-a7 - none, none, none, none, none, none, none, none, // a8-af - none, none, none, none, none, none, none, none, // b0-b7 - none, none, none, none, none, none, none, none, // b8-bf - none, none, none, none, none, none, none, none, // c0-c7 - none, none, none, none, none, none, none, none, // c8-cf - none, none, none, none, none, none, none, none, // d0-d7 - none, none, none, none, none, none, none, none, // d8-df - none, none, none, none, none, none, none, none, // e0-e7 - none, none, none, none, none, none, none, none, // e8-ef - none, none, none, none, none, none, none, none, // f0-f7 - none, none, none, none, none, none, none, none, // f8-ff -}; - template -bool ConsumeConversion(string_view *src, UnboundConversion *conv, - int *next_arg) { - const char *pos = src->data(); - const char *const end = pos + src->size(); +const char *ConsumeConversion(const char *pos, const char *const end, + UnboundConversion *conv, int *next_arg) { + const char* const original_pos = pos; char c; // Read the next char into `c` and update `pos`. Returns false if there are // no more chars to read. -#define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \ - do { \ - if (ABSL_PREDICT_FALSE(pos == end)) return false; \ - c = *pos++; \ +#define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \ + do { \ + if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \ + c = *pos++; \ } while (0) const auto parse_digits = [&] { @@ -111,10 +106,10 @@ bool ConsumeConversion(string_view *src, UnboundConversion *conv, if (is_positional) { ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return false; + if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; conv->arg_position = parse_digits(); assert(conv->arg_position > 0); - if (ABSL_PREDICT_FALSE(c != '$')) return false; + if (ABSL_PREDICT_FALSE(c != '$')) return nullptr; } ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); @@ -129,10 +124,9 @@ bool ConsumeConversion(string_view *src, UnboundConversion *conv, conv->flags.basic = false; for (; c <= '0';) { - // FIXME: We might be able to speed this up reusing the kIds lookup table - // from above. - // It might require changing Flags to be a plain integer where we can |= a - // value. + // FIXME: We might be able to speed this up reusing the lookup table from + // above. It might require changing Flags to be a plain integer where we + // can |= a value. switch (c) { case '-': conv->flags.left = true; @@ -160,20 +154,20 @@ flags_done: if (c >= '0') { int maybe_width = parse_digits(); if (!is_positional && c == '$') { - if (ABSL_PREDICT_FALSE(*next_arg != 0)) return false; + if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr; // Positional conversion. *next_arg = -1; conv->flags = Flags(); conv->flags.basic = true; - return ConsumeConversion(src, conv, next_arg); + return ConsumeConversion(original_pos, end, conv, next_arg); } conv->width.set_value(maybe_width); } else if (c == '*') { ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); if (is_positional) { - if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return false; + if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; conv->width.set_from_arg(parse_digits()); - if (ABSL_PREDICT_FALSE(c != '$')) return false; + if (ABSL_PREDICT_FALSE(c != '$')) return nullptr; ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); } else { conv->width.set_from_arg(++*next_arg); @@ -188,9 +182,9 @@ flags_done: } else if (c == '*') { ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); if (is_positional) { - if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return false; + if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; conv->precision.set_from_arg(parse_digits()); - if (c != '$') return false; + if (c != '$') return nullptr; ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); } else { conv->precision.set_from_arg(++*next_arg); @@ -201,14 +195,14 @@ flags_done: } } - std::int8_t id = kIds[static_cast(c)]; + auto tag = GetTagForChar(c); - if (id < 0) { - if (ABSL_PREDICT_FALSE(id == none)) return false; + if (ABSL_PREDICT_FALSE(!tag.is_conv())) { + if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr; // It is a length modifier. using str_format_internal::LengthMod; - LengthMod length_mod = LengthMod::FromId(static_cast(~id)); + LengthMod length_mod = tag.as_length(); ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); if (c == 'h' && length_mod.id() == LengthMod::h) { conv->length_mod = LengthMod::FromId(LengthMod::hh); @@ -219,25 +213,24 @@ flags_done: } else { conv->length_mod = length_mod; } - id = kIds[static_cast(c)]; - if (ABSL_PREDICT_FALSE(id < 0)) return false; + tag = GetTagForChar(c); + if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr; } assert(CheckFastPathSetting(*conv)); (void)(&CheckFastPathSetting); - conv->conv = ConversionChar::FromId(static_cast(id)); + conv->conv = tag.as_conv(); if (!is_positional) conv->arg_position = ++*next_arg; - *src = string_view(pos, end - pos); - return true; + return pos; } } // namespace -bool ConsumeUnboundConversion(string_view *src, UnboundConversion *conv, - int *next_arg) { - if (*next_arg < 0) return ConsumeConversion(src, conv, next_arg); - return ConsumeConversion(src, conv, next_arg); +const char *ConsumeUnboundConversion(const char *p, const char *end, + UnboundConversion *conv, int *next_arg) { + if (*next_arg < 0) return ConsumeConversion(p, end, conv, next_arg); + return ConsumeConversion(p, end, conv, next_arg); } struct ParsedFormatBase::ParsedFormatConsumer { diff --git a/absl/strings/internal/str_format/parser.h b/absl/strings/internal/str_format/parser.h index 1022f06..b7614a0 100644 --- a/absl/strings/internal/str_format/parser.h +++ b/absl/strings/internal/str_format/parser.h @@ -63,17 +63,45 @@ struct UnboundConversion { ConversionChar conv; }; -// Consume conversion spec prefix (not including '%') of '*src' if valid. +// Consume conversion spec prefix (not including '%') of [p, end) if valid. // Examples of valid specs would be e.g.: "s", "d", "-12.6f". -// If valid, the front of src is advanced such that src becomes the -// part following the conversion spec, and the spec part is broken down and -// returned in 'conv'. -// If invalid, returns false and leaves 'src' unmodified. -// For example: -// Given "d9", returns "d", and leaves src="9", -// Given "!", returns "" and leaves src="!". -bool ConsumeUnboundConversion(string_view* src, UnboundConversion* conv, - int* next_arg); +// If valid, it returns the first character following the conversion spec, +// and the spec part is broken down and returned in 'conv'. +// If invalid, returns nullptr. +const char* ConsumeUnboundConversion(const char* p, const char* end, + UnboundConversion* conv, int* next_arg); + +// Helper tag class for the table below. +// It allows fast `char -> ConversionChar/LengthMod` checking and conversions. +class ConvTag { + public: + constexpr ConvTag(ConversionChar::Id id) : tag_(id) {} // NOLINT + // We invert the length modifiers to make them negative so that we can easily + // test for them. + constexpr ConvTag(LengthMod::Id id) : tag_(~id) {} // NOLINT + // Everything else is -128, which is negative to make is_conv() simpler. + constexpr ConvTag() : tag_(-128) {} + + bool is_conv() const { return tag_ >= 0; } + bool is_length() const { return tag_ < 0 && tag_ != -128; } + ConversionChar as_conv() const { + assert(is_conv()); + return ConversionChar::FromId(static_cast(tag_)); + } + LengthMod as_length() const { + assert(is_length()); + return LengthMod::FromId(static_cast(~tag_)); + } + + private: + std::int8_t tag_; +}; + +extern const ConvTag kTags[256]; +// Keep a single table for all the conversion chars and length modifiers. +inline ConvTag GetTagForChar(char c) { + return kTags[static_cast(c)]; +} // Parse the format string provided in 'src' and pass the identified items into // 'consumer'. @@ -88,51 +116,53 @@ bool ConsumeUnboundConversion(string_view* src, UnboundConversion* conv, template bool ParseFormatString(string_view src, Consumer consumer) { int next_arg = 0; - while (!src.empty()) { - const char* percent = - static_cast(memchr(src.data(), '%', src.size())); + const char* p = src.data(); + const char* const end = p + src.size(); + while (p != end) { + const char* percent = static_cast(memchr(p, '%', end - p)); if (!percent) { // We found the last substring. - return consumer.Append(src); + return consumer.Append(string_view(p, end - p)); } // We found a percent, so push the text run then process the percent. - size_t percent_loc = percent - src.data(); - if (!consumer.Append(string_view(src.data(), percent_loc))) return false; - if (percent + 1 >= src.data() + src.size()) return false; - - UnboundConversion conv; - - switch (percent[1]) { - case '%': - if (!consumer.Append("%")) return false; - src.remove_prefix(percent_loc + 2); - continue; - -#define PARSER_CASE(ch) \ - case #ch[0]: \ - src.remove_prefix(percent_loc + 2); \ - conv.conv = ConversionChar::FromId(ConversionChar::ch); \ - conv.arg_position = ++next_arg; \ - break; - ABSL_CONVERSION_CHARS_EXPAND_(PARSER_CASE, ); -#undef PARSER_CASE - - default: - src.remove_prefix(percent_loc + 1); - if (!ConsumeUnboundConversion(&src, &conv, &next_arg)) return false; - break; - } - if (next_arg == 0) { - // This indicates an error in the format std::string. - // The only way to get next_arg == 0 is to have a positional argument - // first which sets next_arg to -1 and then a non-positional argument - // which does ++next_arg. - // Checking here seems to be the cheapeast place to do it. + if (ABSL_PREDICT_FALSE(!consumer.Append(string_view(p, percent - p)))) { return false; } - if (!consumer.ConvertOne( - conv, string_view(percent + 1, src.data() - (percent + 1)))) { - return false; + if (ABSL_PREDICT_FALSE(percent + 1 >= end)) return false; + + auto tag = GetTagForChar(percent[1]); + if (tag.is_conv()) { + if (ABSL_PREDICT_FALSE(next_arg < 0)) { + // This indicates an error in the format std::string. + // The only way to get `next_arg < 0` here is to have a positional + // argument first which sets next_arg to -1 and then a non-positional + // argument. + return false; + } + p = percent + 2; + + // Keep this case separate from the one below. + // ConvertOne is more efficient when the compiler can see that the `basic` + // flag is set. + UnboundConversion conv; + conv.conv = tag.as_conv(); + conv.arg_position = ++next_arg; + if (ABSL_PREDICT_FALSE( + !consumer.ConvertOne(conv, string_view(percent + 1, 1)))) { + return false; + } + } else if (percent[1] != '%') { + UnboundConversion conv; + p = ConsumeUnboundConversion(percent + 1, end, &conv, &next_arg); + if (ABSL_PREDICT_FALSE(p == nullptr)) return false; + if (ABSL_PREDICT_FALSE(!consumer.ConvertOne( + conv, string_view(percent + 1, p - (percent + 1))))) { + return false; + } + } else { + if (ABSL_PREDICT_FALSE(!consumer.Append("%"))) return false; + p = percent + 2; + continue; } } return true; diff --git a/absl/strings/internal/str_format/parser_test.cc b/absl/strings/internal/str_format/parser_test.cc index ff70575..6d35609 100644 --- a/absl/strings/internal/str_format/parser_test.cc +++ b/absl/strings/internal/str_format/parser_test.cc @@ -1,6 +1,8 @@ #include "absl/strings/internal/str_format/parser.h" #include + +#include "gmock/gmock.h" #include "gtest/gtest.h" #include "absl/base/macros.h" @@ -9,6 +11,8 @@ namespace str_format_internal { namespace { +using testing::Pair; + TEST(LengthModTest, Names) { struct Expectation { int line; @@ -63,20 +67,21 @@ TEST(ConversionCharTest, Names) { class ConsumeUnboundConversionTest : public ::testing::Test { public: - typedef UnboundConversion Props; - string_view Consume(string_view* src) { + std::pair Consume(string_view src) { int next = 0; - const char* prev_begin = src->data(); o = UnboundConversion(); // refresh - ConsumeUnboundConversion(src, &o, &next); - return {prev_begin, static_cast(src->data() - prev_begin)}; + const char* p = ConsumeUnboundConversion( + src.data(), src.data() + src.size(), &o, &next); + if (!p) return {{}, src}; + return {string_view(src.data(), p - src.data()), + string_view(p, src.data() + src.size() - p)}; } bool Run(const char *fmt, bool force_positional = false) { - string_view src = fmt; int next = force_positional ? -1 : 0; o = UnboundConversion(); // refresh - return ConsumeUnboundConversion(&src, &o, &next) && src.empty(); + return ConsumeUnboundConversion(fmt, fmt + strlen(fmt), &o, &next) == + fmt + strlen(fmt); } UnboundConversion o; }; @@ -104,11 +109,7 @@ TEST_F(ConsumeUnboundConversionTest, ConsumeSpecification) { }; for (const auto& e : kExpect) { SCOPED_TRACE(e.line); - string_view src = e.src; - EXPECT_EQ(e.src, src); - string_view out = Consume(&src); - EXPECT_EQ(e.out, out); - EXPECT_EQ(e.src_post, src); + EXPECT_THAT(Consume(e.src), Pair(e.out, e.src_post)); } } diff --git a/absl/strings/numbers.cc b/absl/strings/numbers.cc index 558c339..38d1486 100644 --- a/absl/strings/numbers.cc +++ b/absl/strings/numbers.cc @@ -54,7 +54,7 @@ bool SimpleAtof(absl::string_view str, float* out) { // not all non-whitespace characters consumed return false; } - // from_chars() with DR 3801's current wording will return max() on + // from_chars() with DR 3081's current wording will return max() on // overflow. SimpleAtof returns infinity instead. if (result.ec == std::errc::result_out_of_range) { if (*out > 1.0) { @@ -80,7 +80,7 @@ bool SimpleAtod(absl::string_view str, double* out) { // not all non-whitespace characters consumed return false; } - // from_chars() with DR 3801's current wording will return max() on + // from_chars() with DR 3081's current wording will return max() on // overflow. SimpleAtod returns infinity instead. if (result.ec == std::errc::result_out_of_range) { if (*out > 1.0) { -- cgit v1.2.3