summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Abseil Team <absl-team@google.com>2024-01-04 13:14:50 -0800
committerGravatar Copybara-Service <copybara-worker@google.com>2024-01-04 13:15:44 -0800
commitd5a2cec006d14c6801ddeb768bf2574a1cf4fa7f (patch)
tree3dc1ffb00d164ddda4c22b8abfc42594db1fa2da
parentccf0c7730db976d2b7814e74f6f01d044ae6f853 (diff)
Optimize integer-to-string conversions
The updated code is designed to: - Be branch-predictor-friendly - Be cache-friendly - Minimize the lengths of critical paths - Minimize slow operations (particularly multiplications) - Minimize binary/codegen bloat The most notable performance trick here is perhaps the precomputation & caching of the number of digits, so that we can reuse/exploit it when writing the output. This precomputation of the exact length enables 2 further performance benefits: - It makes `StrCat` and `StrAppend` zero-copy when only integers are passed, by avoiding intermediate `AlphaNum` entirely in those cases. If needed in the future, we can probably also make many other mixtures of non-integer types zero-copy as well. - It avoids over-reservation of the string buffer, allowing for more strings to fit inside SSO, which will likely have further performance benefits. There is also a side benefit of preventing `FastIntToBuffer` from writing beyond the end of the buffer, which has caused buffer overflows in the past. The new code continues to use & extend some of the existing core tricks (such as the division-by-100 trick), as those are already efficient. PiperOrigin-RevId: 595785531 Change-Id: Id6920e7e038fec10b2c45f213de75dc7e2cbddd1
-rw-r--r--absl/base/macros.h12
-rw-r--r--absl/strings/numbers.cc440
-rw-r--r--absl/strings/numbers.h163
-rw-r--r--absl/strings/numbers_test.cc7
-rw-r--r--absl/strings/str_cat.cc146
-rw-r--r--absl/strings/str_cat.h157
-rw-r--r--absl/strings/str_cat_test.cc46
7 files changed, 759 insertions, 212 deletions
diff --git a/absl/base/macros.h b/absl/base/macros.h
index cd7cf498..f33cd192 100644
--- a/absl/base/macros.h
+++ b/absl/base/macros.h
@@ -138,16 +138,4 @@ ABSL_NAMESPACE_END
#define ABSL_INTERNAL_RETHROW do {} while (false)
#endif // ABSL_HAVE_EXCEPTIONS
-// Requires the compiler to prove that the size of the given object is at least
-// the expected amount.
-#if ABSL_HAVE_ATTRIBUTE(diagnose_if) && ABSL_HAVE_BUILTIN(__builtin_object_size)
-#define ABSL_INTERNAL_NEED_MIN_SIZE(Obj, N) \
- __attribute__((diagnose_if(__builtin_object_size(Obj, 0) < N, \
- "object size provably too small " \
- "(this would corrupt memory)", \
- "error")))
-#else
-#define ABSL_INTERNAL_NEED_MIN_SIZE(Obj, N)
-#endif
-
#endif // ABSL_BASE_MACROS_H_
diff --git a/absl/strings/numbers.cc b/absl/strings/numbers.cc
index b57d9e82..882c3a8b 100644
--- a/absl/strings/numbers.cc
+++ b/absl/strings/numbers.cc
@@ -20,7 +20,9 @@
#include <algorithm>
#include <cassert>
#include <cfloat> // for DBL_DIG and FLT_DIG
+#include <climits>
#include <cmath> // for HUGE_VAL
+#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
@@ -28,6 +30,7 @@
#include <iterator>
#include <limits>
#include <system_error> // NOLINT(build/c++11)
+#include <type_traits>
#include <utility>
#include "absl/base/attributes.h"
@@ -156,28 +159,71 @@ constexpr uint32_t kTwoZeroBytes = 0x0101 * '0';
constexpr uint64_t kFourZeroBytes = 0x01010101 * '0';
constexpr uint64_t kEightZeroBytes = 0x0101010101010101ull * '0';
-// * 103 / 1024 is a division by 10 for values from 0 to 99. It's also a
-// division of a structure [k takes 2 bytes][m takes 2 bytes], then * 103 / 1024
-// will be [k / 10][m / 10]. It allows parallel division.
-constexpr uint64_t kDivisionBy10Mul = 103u;
+template <typename T>
+constexpr T Pow(T base, uint32_t n) {
+ // Exponentiation by squaring
+ return static_cast<T>((n > 1 ? Pow(base * base, n >> 1) : static_cast<T>(1)) *
+ ((n & 1) ? base : static_cast<T>(1)));
+}
+
+// Given n, calculates C where the following holds for all 0 <= x < Pow(100, n):
+// x / Pow(10, n) == x * C / Pow(2, n * 10)
+// In other words, it allows us to divide by a power of 10 via a single
+// multiplication and bit shifts, assuming the input will be smaller than the
+// square of that power of 10.
+template <typename T>
+constexpr T ComputePowerOf100DivisionCoefficient(uint32_t n) {
+ if (n > 4) {
+ // This doesn't work for large powers of 100, due to overflow
+ abort();
+ }
+ T denom = 16 - 1;
+ T num = (denom + 1) - 10;
+ T gcd = 3; // Greatest common divisor of numerator and denominator
+ denom = Pow(denom / gcd, n);
+ num = Pow(num / gcd, 9 * n);
+ T quotient = num / denom;
+ if (num % denom >= denom / 2) {
+ // Round up, since the remainder is more than half the denominator
+ ++quotient;
+ }
+ return quotient;
+}
+
+// * kDivisionBy10Mul / kDivisionBy10Div is a division by 10 for values from 0
+// to 99. It's also a division of a structure [k takes 2 bytes][m takes 2
+// bytes], then * kDivisionBy10Mul / kDivisionBy10Div will be [k / 10][m / 10].
+// It allows parallel division.
+constexpr uint64_t kDivisionBy10Mul =
+ ComputePowerOf100DivisionCoefficient<uint64_t>(1);
+static_assert(kDivisionBy10Mul == 103,
+ "division coefficient for 10 is incorrect");
constexpr uint64_t kDivisionBy10Div = 1 << 10;
-// * 10486 / 1048576 is a division by 100 for values from 0 to 9999.
-constexpr uint64_t kDivisionBy100Mul = 10486u;
+// * kDivisionBy100Mul / kDivisionBy100Div is a division by 100 for values from
+// 0 to 9999.
+constexpr uint64_t kDivisionBy100Mul =
+ ComputePowerOf100DivisionCoefficient<uint64_t>(2);
+static_assert(kDivisionBy100Mul == 10486,
+ "division coefficient for 100 is incorrect");
constexpr uint64_t kDivisionBy100Div = 1 << 20;
-// Encode functions write the ASCII output of input `n` to `out_str`.
-inline char* EncodeHundred(uint32_t n, absl::Nonnull<char*> out_str) {
- int num_digits = static_cast<int>(n - 10) >> 8;
- uint32_t div10 = (n * kDivisionBy10Mul) / kDivisionBy10Div;
- uint32_t mod10 = n - 10u * div10;
- uint32_t base = kTwoZeroBytes + div10 + (mod10 << 8);
- base >>= num_digits & 8;
- little_endian::Store16(out_str, static_cast<uint16_t>(base));
- return out_str + 2 + num_digits;
+static_assert(ComputePowerOf100DivisionCoefficient<uint64_t>(3) == 1073742,
+ "division coefficient for 1000 is incorrect");
+
+// Same as `PrepareEightDigits`, but produces 2 digits for integers < 100.
+inline uint32_t PrepareTwoDigitsImpl(uint32_t i, bool reversed) {
+ assert(i < 100);
+ uint32_t div10 = (i * kDivisionBy10Mul) / kDivisionBy10Div;
+ uint32_t mod10 = i - 10u * div10;
+ return (div10 << (reversed ? 8 : 0)) + (mod10 << (reversed ? 0 : 8));
+}
+inline uint32_t PrepareTwoDigits(uint32_t i) {
+ return PrepareTwoDigitsImpl(i, false);
}
-inline char* EncodeTenThousand(uint32_t n, absl::Nonnull<char*> out_str) {
+// Same as `PrepareEightDigits`, but produces 4 digits for integers < 10000.
+inline uint32_t PrepareFourDigitsImpl(uint32_t n, bool reversed) {
// We split lower 2 digits and upper 2 digits of n into 2 byte consecutive
// blocks. 123 -> [\0\1][\0\23]. We divide by 10 both blocks
// (it's 1 division + zeroing upper bits), and compute modulo 10 as well "in
@@ -185,22 +231,19 @@ inline char* EncodeTenThousand(uint32_t n, absl::Nonnull<char*> out_str) {
// strip trailing zeros, add ASCII '0000' and return.
uint32_t div100 = (n * kDivisionBy100Mul) / kDivisionBy100Div;
uint32_t mod100 = n - 100ull * div100;
- uint32_t hundreds = (mod100 << 16) + div100;
+ uint32_t hundreds =
+ (mod100 << (reversed ? 0 : 16)) + (div100 << (reversed ? 16 : 0));
uint32_t tens = (hundreds * kDivisionBy10Mul) / kDivisionBy10Div;
tens &= (0xFull << 16) | 0xFull;
- tens += (hundreds - 10ull * tens) << 8;
- ABSL_ASSUME(tens != 0);
- // The result can contain trailing zero bits, we need to strip them to a first
- // significant byte in a final representation. For example, for n = 123, we
- // have tens to have representation \0\1\2\3. We do `& -8` to round
- // to a multiple to 8 to strip zero bytes, not all zero bits.
- // countr_zero to help.
- // 0 minus 8 to make MSVC happy.
- uint32_t zeroes = static_cast<uint32_t>(absl::countr_zero(tens)) & (0 - 8u);
- tens += kFourZeroBytes;
- tens >>= zeroes;
- little_endian::Store32(out_str, tens);
- return out_str + sizeof(tens) - zeroes / 8;
+ tens = (tens << (reversed ? 8 : 0)) +
+ static_cast<uint32_t>((hundreds - 10ull * tens) << (reversed ? 0 : 8));
+ return tens;
+}
+inline uint32_t PrepareFourDigits(uint32_t n) {
+ return PrepareFourDigitsImpl(n, false);
+}
+inline uint32_t PrepareFourDigitsReversed(uint32_t n) {
+ return PrepareFourDigitsImpl(n, true);
}
// Helper function to produce an ASCII representation of `i`.
@@ -216,126 +259,309 @@ inline char* EncodeTenThousand(uint32_t n, absl::Nonnull<char*> out_str) {
// // Note two leading zeros:
// EXPECT_EQ(absl::string_view(ascii, 8), "00102030");
//
+// If `Reversed` is set to true, the result becomes reversed to "03020100".
+//
// Pre-condition: `i` must be less than 100000000.
-inline uint64_t PrepareEightDigits(uint32_t i) {
+inline uint64_t PrepareEightDigitsImpl(uint32_t i, bool reversed) {
ABSL_ASSUME(i < 10000'0000);
// Prepare 2 blocks of 4 digits "in parallel".
uint32_t hi = i / 10000;
uint32_t lo = i % 10000;
- uint64_t merged = hi | (uint64_t{lo} << 32);
+ uint64_t merged = (uint64_t{hi} << (reversed ? 32 : 0)) |
+ (uint64_t{lo} << (reversed ? 0 : 32));
uint64_t div100 = ((merged * kDivisionBy100Mul) / kDivisionBy100Div) &
((0x7Full << 32) | 0x7Full);
uint64_t mod100 = merged - 100ull * div100;
- uint64_t hundreds = (mod100 << 16) + div100;
+ uint64_t hundreds =
+ (mod100 << (reversed ? 0 : 16)) + (div100 << (reversed ? 16 : 0));
uint64_t tens = (hundreds * kDivisionBy10Mul) / kDivisionBy10Div;
tens &= (0xFull << 48) | (0xFull << 32) | (0xFull << 16) | 0xFull;
- tens += (hundreds - 10ull * tens) << 8;
+ tens = (tens << (reversed ? 8 : 0)) +
+ ((hundreds - 10ull * tens) << (reversed ? 0 : 8));
return tens;
}
+inline uint64_t PrepareEightDigits(uint32_t i) {
+ return PrepareEightDigitsImpl(i, false);
+}
+inline uint64_t PrepareEightDigitsReversed(uint32_t i) {
+ return PrepareEightDigitsImpl(i, true);
+}
-inline ABSL_ATTRIBUTE_ALWAYS_INLINE absl::Nonnull<char*> EncodeFullU32(
- uint32_t n, absl::Nonnull<char*> out_str) {
- if (n < 10) {
- *out_str = static_cast<char>('0' + n);
- return out_str + 1;
+template <typename T, typename BackwardIt>
+class FastUIntToStringConverter {
+ static_assert(
+ std::is_same<T, decltype(+std::declval<T>())>::value,
+ "to avoid code bloat, only instantiate this for int and larger types");
+ static_assert(std::is_unsigned<T>::value,
+ "this class is only for unsigned types");
+
+ public:
+ // Outputs the given number backward (like with std::copy_backward),
+ // starting from the end of the string.
+ // The number of digits in the number must have been already measured and
+ // passed *exactly*, otherwise the behavior is undefined.
+ // (This is an optimization, as calculating the number of digits again would
+ // slow down the hot path.)
+ // Returns an iterator to the start of the suffix that was appended.
+ static BackwardIt FastIntToBufferBackward(T v, BackwardIt end) {
+ // THIS IS A HOT FUNCTION with a very deliberate structure to exploit branch
+ // prediction and shorten the critical path for smaller numbers.
+ // Do not move around the if/else blocks or attempt to simplify it
+ // without benchmarking any changes.
+
+ if (v < 10) {
+ goto AT_LEAST_1 /* NOTE: mandatory for the 0 case */;
+ }
+ if (v < 1000) {
+ goto AT_LEAST_10;
+ }
+ if (v < 10000000) {
+ goto AT_LEAST_1000;
+ }
+
+ if (v >= 100000000 / 10) {
+ if (v >= 10000000000000000 / 10) {
+ DoFastIntToBufferBackward<8>(v, end);
+ }
+ DoFastIntToBufferBackward<8>(v, end);
+ }
+
+ if (v >= 10000 / 10) {
+ AT_LEAST_1000:
+ DoFastIntToBufferBackward<4>(v, end);
+ }
+
+ if (v >= 100 / 10) {
+ AT_LEAST_10:
+ DoFastIntToBufferBackward<2>(v, end);
+ }
+
+ if (v >= 10 / 10) {
+ AT_LEAST_1:
+ end = DoFastIntToBufferBackward(v, end, std::integral_constant<int, 1>());
+ }
+ return end;
}
- if (n < 100'000'000) {
- uint64_t bottom = PrepareEightDigits(n);
- ABSL_ASSUME(bottom != 0);
- // 0 minus 8 to make MSVC happy.
- uint32_t zeroes =
- static_cast<uint32_t>(absl::countr_zero(bottom)) & (0 - 8u);
- little_endian::Store64(out_str, (bottom + kEightZeroBytes) >> zeroes);
- return out_str + sizeof(bottom) - zeroes / 8;
+
+ private:
+ // Only assume pointers are contiguous for now. String and vector iterators
+ // could be special-cased as well, but there's no need for them here.
+ // With C++20 we can probably switch to std::contiguous_iterator_tag.
+ static constexpr bool kIsContiguousIterator =
+ std::is_pointer<BackwardIt>::value;
+
+ template <int Exponent>
+ static void DoFastIntToBufferBackward(T& v, BackwardIt& end) {
+ constexpr T kModulus = Pow<T>(10, Exponent);
+ T remainder = static_cast<T>(v % kModulus);
+ v = static_cast<T>(v / kModulus);
+ end = DoFastIntToBufferBackward(remainder, end,
+ std::integral_constant<int, Exponent>());
}
- uint32_t div08 = n / 100'000'000;
- uint32_t mod08 = n % 100'000'000;
- uint64_t bottom = PrepareEightDigits(mod08) + kEightZeroBytes;
- out_str = EncodeHundred(div08, out_str);
- little_endian::Store64(out_str, bottom);
- return out_str + sizeof(bottom);
-}
-inline ABSL_ATTRIBUTE_ALWAYS_INLINE char* EncodeFullU64(uint64_t i,
- char* buffer) {
- if (i <= std::numeric_limits<uint32_t>::max()) {
- return EncodeFullU32(static_cast<uint32_t>(i), buffer);
+ static BackwardIt DoFastIntToBufferBackward(const T&, BackwardIt end,
+ std::integral_constant<int, 0>) {
+ return end;
}
- uint32_t mod08;
- if (i < 1'0000'0000'0000'0000ull) {
- uint32_t div08 = static_cast<uint32_t>(i / 100'000'000ull);
- mod08 = static_cast<uint32_t>(i % 100'000'000ull);
- buffer = EncodeFullU32(div08, buffer);
- } else {
- uint64_t div08 = i / 100'000'000ull;
- mod08 = static_cast<uint32_t>(i % 100'000'000ull);
- uint32_t div016 = static_cast<uint32_t>(div08 / 100'000'000ull);
- uint32_t div08mod08 = static_cast<uint32_t>(div08 % 100'000'000ull);
- uint64_t mid_result = PrepareEightDigits(div08mod08) + kEightZeroBytes;
- buffer = EncodeTenThousand(div016, buffer);
- little_endian::Store64(buffer, mid_result);
- buffer += sizeof(mid_result);
+
+ static BackwardIt DoFastIntToBufferBackward(T v, BackwardIt end,
+ std::integral_constant<int, 1>) {
+ *--end = static_cast<char>('0' + v);
+ return DoFastIntToBufferBackward(v, end, std::integral_constant<int, 0>());
+ }
+
+ static BackwardIt DoFastIntToBufferBackward(T v, BackwardIt end,
+ std::integral_constant<int, 4>) {
+ if (kIsContiguousIterator) {
+ const uint32_t digits =
+ PrepareFourDigits(static_cast<uint32_t>(v)) + kFourZeroBytes;
+ end -= sizeof(digits);
+ little_endian::Store32(&*end, digits);
+ } else {
+ uint32_t digits =
+ PrepareFourDigitsReversed(static_cast<uint32_t>(v)) + kFourZeroBytes;
+ for (size_t i = 0; i < sizeof(digits); ++i) {
+ *--end = static_cast<char>(digits);
+ digits >>= CHAR_BIT;
+ }
+ }
+ return end;
}
- uint64_t mod_result = PrepareEightDigits(mod08) + kEightZeroBytes;
- little_endian::Store64(buffer, mod_result);
- return buffer + sizeof(mod_result);
+
+ static BackwardIt DoFastIntToBufferBackward(T v, BackwardIt end,
+ std::integral_constant<int, 8>) {
+ if (kIsContiguousIterator) {
+ const uint64_t digits =
+ PrepareEightDigits(static_cast<uint32_t>(v)) + kEightZeroBytes;
+ end -= sizeof(digits);
+ little_endian::Store64(&*end, digits);
+ } else {
+ uint64_t digits = PrepareEightDigitsReversed(static_cast<uint32_t>(v)) +
+ kEightZeroBytes;
+ for (size_t i = 0; i < sizeof(digits); ++i) {
+ *--end = static_cast<char>(digits);
+ digits >>= CHAR_BIT;
+ }
+ }
+ return end;
+ }
+
+ template <int Digits>
+ static BackwardIt DoFastIntToBufferBackward(
+ T v, BackwardIt end, std::integral_constant<int, Digits>) {
+ constexpr int kLogModulus = Digits - Digits / 2;
+ constexpr T kModulus = Pow(static_cast<T>(10), kLogModulus);
+ bool is_safe_to_use_division_trick = Digits <= 8;
+ T quotient, remainder;
+ if (is_safe_to_use_division_trick) {
+ constexpr uint64_t kCoefficient =
+ ComputePowerOf100DivisionCoefficient<uint64_t>(kLogModulus);
+ quotient = (v * kCoefficient) >> (10 * kLogModulus);
+ remainder = v - quotient * kModulus;
+ } else {
+ quotient = v / kModulus;
+ remainder = v % kModulus;
+ }
+ end = DoFastIntToBufferBackward(remainder, end,
+ std::integral_constant<int, kLogModulus>());
+ return DoFastIntToBufferBackward(
+ quotient, end, std::integral_constant<int, Digits - kLogModulus>());
+ }
+};
+
+// Returns an iterator to the start of the suffix that was appended
+template <typename T, typename BackwardIt>
+std::enable_if_t<std::is_unsigned<T>::value, BackwardIt>
+DoFastIntToBufferBackward(T v, BackwardIt end, uint32_t digits) {
+ using PromotedT = std::decay_t<decltype(+v)>;
+ using Converter = FastUIntToStringConverter<PromotedT, BackwardIt>;
+ (void)digits;
+ return Converter().FastIntToBufferBackward(v, end);
+}
+
+template <typename T, typename BackwardIt>
+std::enable_if_t<std::is_signed<T>::value, BackwardIt>
+DoFastIntToBufferBackward(T v, BackwardIt end, uint32_t digits) {
+ if (absl::numbers_internal::IsNegative(v)) {
+ // Store the minus sign *before* we produce the number itself, not after.
+ // This gets us a tail call.
+ end[-static_cast<ptrdiff_t>(digits) - 1] = '-';
+ }
+ return DoFastIntToBufferBackward(
+ absl::numbers_internal::UnsignedAbsoluteValue(v), end, digits);
+}
+
+template <class T>
+std::enable_if_t<std::is_integral<T>::value, int>
+GetNumDigitsOrNegativeIfNegativeImpl(T v) {
+ const auto /* either bool or std::false_type */ is_negative =
+ absl::numbers_internal::IsNegative(v);
+ const int digits = static_cast<int>(absl::numbers_internal::Base10Digits(
+ absl::numbers_internal::UnsignedAbsoluteValue(v)));
+ return is_negative ? ~digits : digits;
}
} // namespace
void numbers_internal::PutTwoDigits(uint32_t i, absl::Nonnull<char*> buf) {
- assert(i < 100);
- uint32_t base = kTwoZeroBytes;
- uint32_t div10 = (i * kDivisionBy10Mul) / kDivisionBy10Div;
- uint32_t mod10 = i - 10u * div10;
- base += div10 + (mod10 << 8);
- little_endian::Store16(buf, static_cast<uint16_t>(base));
+ little_endian::Store16(
+ buf, static_cast<uint16_t>(PrepareTwoDigits(i) + kTwoZeroBytes));
}
absl::Nonnull<char*> numbers_internal::FastIntToBuffer(
- uint32_t n, absl::Nonnull<char*> out_str) {
- out_str = EncodeFullU32(n, out_str);
- *out_str = '\0';
- return out_str;
+ uint32_t i, absl::Nonnull<char*> buffer) {
+ const uint32_t digits = absl::numbers_internal::Base10Digits(i);
+ buffer += digits;
+ *buffer = '\0'; // We're going backward, so store this first
+ FastIntToBufferBackward(i, buffer, digits);
+ return buffer;
}
absl::Nonnull<char*> numbers_internal::FastIntToBuffer(
int32_t i, absl::Nonnull<char*> buffer) {
- uint32_t u = static_cast<uint32_t>(i);
- if (i < 0) {
- *buffer++ = '-';
- // We need to do the negation in modular (i.e., "unsigned")
- // arithmetic; MSVC++ apparently warns for plain "-u", so
- // we write the equivalent expression "0 - u" instead.
- u = 0 - u;
- }
- buffer = EncodeFullU32(u, buffer);
- *buffer = '\0';
+ buffer += static_cast<int>(i < 0);
+ uint32_t digits = absl::numbers_internal::Base10Digits(
+ absl::numbers_internal::UnsignedAbsoluteValue(i));
+ buffer += digits;
+ *buffer = '\0'; // We're going backward, so store this first
+ FastIntToBufferBackward(i, buffer, digits);
return buffer;
}
absl::Nonnull<char*> numbers_internal::FastIntToBuffer(
uint64_t i, absl::Nonnull<char*> buffer) {
- buffer = EncodeFullU64(i, buffer);
- *buffer = '\0';
+ uint32_t digits = absl::numbers_internal::Base10Digits(i);
+ buffer += digits;
+ *buffer = '\0'; // We're going backward, so store this first
+ FastIntToBufferBackward(i, buffer, digits);
return buffer;
}
absl::Nonnull<char*> numbers_internal::FastIntToBuffer(
int64_t i, absl::Nonnull<char*> buffer) {
- uint64_t u = static_cast<uint64_t>(i);
- if (i < 0) {
- *buffer++ = '-';
- // We need to do the negation in modular (i.e., "unsigned")
- // arithmetic; MSVC++ apparently warns for plain "-u", so
- // we write the equivalent expression "0 - u" instead.
- u = 0 - u;
- }
- buffer = EncodeFullU64(u, buffer);
- *buffer = '\0';
+ buffer += static_cast<int>(i < 0);
+ uint32_t digits = absl::numbers_internal::Base10Digits(
+ absl::numbers_internal::UnsignedAbsoluteValue(i));
+ buffer += digits;
+ *buffer = '\0'; // We're going backward, so store this first
+ FastIntToBufferBackward(i, buffer, digits);
return buffer;
}
+absl::Nonnull<char*> numbers_internal::FastIntToBufferBackward(
+ uint32_t i, absl::Nonnull<char*> buffer_end, uint32_t exact_digit_count) {
+ return DoFastIntToBufferBackward(i, buffer_end, exact_digit_count);
+}
+
+absl::Nonnull<char*> numbers_internal::FastIntToBufferBackward(
+ int32_t i, absl::Nonnull<char*> buffer_end, uint32_t exact_digit_count) {
+ return DoFastIntToBufferBackward(i, buffer_end, exact_digit_count);
+}
+
+absl::Nonnull<char*> numbers_internal::FastIntToBufferBackward(
+ uint64_t i, absl::Nonnull<char*> buffer_end, uint32_t exact_digit_count) {
+ return DoFastIntToBufferBackward(i, buffer_end, exact_digit_count);
+}
+
+absl::Nonnull<char*> numbers_internal::FastIntToBufferBackward(
+ int64_t i, absl::Nonnull<char*> buffer_end, uint32_t exact_digit_count) {
+ return DoFastIntToBufferBackward(i, buffer_end, exact_digit_count);
+}
+
+int numbers_internal::GetNumDigitsOrNegativeIfNegative(signed char v) {
+ return GetNumDigitsOrNegativeIfNegativeImpl(v);
+}
+int numbers_internal::GetNumDigitsOrNegativeIfNegative(unsigned char v) {
+ return GetNumDigitsOrNegativeIfNegativeImpl(v);
+}
+int numbers_internal::GetNumDigitsOrNegativeIfNegative(short v) { // NOLINT
+ return GetNumDigitsOrNegativeIfNegativeImpl(v);
+}
+int numbers_internal::GetNumDigitsOrNegativeIfNegative(
+ unsigned short v) { // NOLINT
+ return GetNumDigitsOrNegativeIfNegativeImpl(v);
+}
+int numbers_internal::GetNumDigitsOrNegativeIfNegative(int v) {
+ return GetNumDigitsOrNegativeIfNegativeImpl(v);
+}
+int numbers_internal::GetNumDigitsOrNegativeIfNegative(unsigned int v) {
+ return GetNumDigitsOrNegativeIfNegativeImpl(v);
+}
+int numbers_internal::GetNumDigitsOrNegativeIfNegative(long v) { // NOLINT
+ return GetNumDigitsOrNegativeIfNegativeImpl(v);
+}
+int numbers_internal::GetNumDigitsOrNegativeIfNegative(
+ unsigned long v) { // NOLINT
+ return GetNumDigitsOrNegativeIfNegativeImpl(v);
+}
+int numbers_internal::GetNumDigitsOrNegativeIfNegative(long long v) { // NOLINT
+ return GetNumDigitsOrNegativeIfNegativeImpl(v);
+}
+int numbers_internal::GetNumDigitsOrNegativeIfNegative(
+ unsigned long long v) { // NOLINT
+ return GetNumDigitsOrNegativeIfNegativeImpl(v);
+}
+
// Given a 128-bit number expressed as a pair of uint64_t, high half first,
// return that number multiplied by the given 32-bit value. If the result is
// too large to fit in a 128-bit number, divide it by 2 until it fits.
diff --git a/absl/strings/numbers.h b/absl/strings/numbers.h
index d2a89b98..ad4e66b6 100644
--- a/absl/strings/numbers.h
+++ b/absl/strings/numbers.h
@@ -32,6 +32,7 @@
#endif
#include <cstddef>
+#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <ctime>
@@ -39,10 +40,12 @@
#include <string>
#include <type_traits>
+#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/base/internal/endian.h"
#include "absl/base/macros.h"
#include "absl/base/nullability.h"
+#include "absl/base/optimization.h"
#include "absl/base/port.h"
#include "absl/numeric/bits.h"
#include "absl/numeric/int128.h"
@@ -158,6 +161,96 @@ bool safe_strtou128_base(absl::string_view text,
static const int kFastToBufferSize = 32;
static const int kSixDigitsToBufferSize = 16;
+template <class T>
+std::enable_if_t<!std::is_unsigned<T>::value, bool> IsNegative(const T& v) {
+ return v < T();
+}
+
+template <class T>
+std::enable_if_t<std::is_unsigned<T>::value, std::false_type> IsNegative(
+ const T&) {
+ // The integer is unsigned, so return a compile-time constant.
+ // This can help the optimizer avoid having to prove bool to be false later.
+ return std::false_type();
+}
+
+template <class T>
+std::enable_if_t<std::is_unsigned<std::decay_t<T>>::value, T&&>
+UnsignedAbsoluteValue(T&& v ABSL_ATTRIBUTE_LIFETIME_BOUND) {
+ // The value is unsigned; just return the original.
+ return std::forward<T>(v);
+}
+
+template <class T>
+ABSL_ATTRIBUTE_CONST_FUNCTION
+ std::enable_if_t<!std::is_unsigned<T>::value, std::make_unsigned_t<T>>
+ UnsignedAbsoluteValue(T v) {
+ using U = std::make_unsigned_t<T>;
+ return IsNegative(v) ? U() - static_cast<U>(v) : static_cast<U>(v);
+}
+
+// Returns the number of base-10 digits in the given number.
+// Note that this strictly counts digits. It does not count the sign.
+// The `initial_digits` parameter is the starting point, which is normally equal
+// to 1 because the number of digits in 0 is 1 (a special case).
+// However, callers may e.g. wish to change it to 2 to account for the sign.
+template <typename T>
+std::enable_if_t<std::is_unsigned<T>::value, uint32_t> Base10Digits(
+ T v, const uint32_t initial_digits = 1) {
+ uint32_t r = initial_digits;
+ // If code size becomes an issue, the 'if' stage can be removed for a minor
+ // performance loss.
+ for (;;) {
+ if (ABSL_PREDICT_TRUE(v < 10 * 10)) {
+ r += (v >= 10);
+ break;
+ }
+ if (ABSL_PREDICT_TRUE(v < 1000 * 10)) {
+ r += (v >= 1000) + 2;
+ break;
+ }
+ if (ABSL_PREDICT_TRUE(v < 100000 * 10)) {
+ r += (v >= 100000) + 4;
+ break;
+ }
+ r += 6;
+ v = static_cast<T>(v / 1000000);
+ }
+ return r;
+}
+
+template <typename T>
+std::enable_if_t<std::is_signed<T>::value, uint32_t> Base10Digits(
+ T v, uint32_t r = 1) {
+ // Branchlessly add 1 to account for a minus sign.
+ r += static_cast<uint32_t>(IsNegative(v));
+ return Base10Digits(UnsignedAbsoluteValue(v), r);
+}
+
+// These functions return the number of base-10 digits, but multiplied by -1 if
+// the input itself is negative. This is handy and efficient for later usage,
+// since the bitwise complement of the result becomes equal to the number of
+// characters required.
+ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(
+ signed char v);
+ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(
+ unsigned char v);
+ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(
+ short v); // NOLINT
+ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(
+ unsigned short v); // NOLINT
+ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(int v);
+ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(
+ unsigned int v);
+ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(
+ long v); // NOLINT
+ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(
+ unsigned long v); // NOLINT
+ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(
+ long long v); // NOLINT
+ABSL_ATTRIBUTE_CONST_FUNCTION int GetNumDigitsOrNegativeIfNegative(
+ unsigned long long v); // NOLINT
+
// Helper function for fast formatting of floating-point values.
// The result is the same as printf's "%g", a.k.a. "%.6g"; that is, six
// significant digits are returned, trailing zeros are removed, and numbers
@@ -166,24 +259,18 @@ static const int kSixDigitsToBufferSize = 16;
// Required buffer size is `kSixDigitsToBufferSize`.
size_t SixDigitsToBuffer(double d, absl::Nonnull<char*> buffer);
-// WARNING: These functions may write more characters than necessary, because
-// they are intended for speed. All functions take an output buffer
+// All of these functions take an output buffer
// as an argument and return a pointer to the last byte they wrote, which is the
// terminating '\0'. At most `kFastToBufferSize` bytes are written.
-absl::Nonnull<char*> FastIntToBuffer(int32_t i, absl::Nonnull<char*> buffer)
- ABSL_INTERNAL_NEED_MIN_SIZE(buffer, kFastToBufferSize);
-absl::Nonnull<char*> FastIntToBuffer(uint32_t i, absl::Nonnull<char*> buffer)
- ABSL_INTERNAL_NEED_MIN_SIZE(buffer, kFastToBufferSize);
-absl::Nonnull<char*> FastIntToBuffer(int64_t i, absl::Nonnull<char*> buffer)
- ABSL_INTERNAL_NEED_MIN_SIZE(buffer, kFastToBufferSize);
-absl::Nonnull<char*> FastIntToBuffer(uint64_t i, absl::Nonnull<char*> buffer)
- ABSL_INTERNAL_NEED_MIN_SIZE(buffer, kFastToBufferSize);
+absl::Nonnull<char*> FastIntToBuffer(int32_t i, absl::Nonnull<char*> buffer);
+absl::Nonnull<char*> FastIntToBuffer(uint32_t i, absl::Nonnull<char*> buffer);
+absl::Nonnull<char*> FastIntToBuffer(int64_t i, absl::Nonnull<char*> buffer);
+absl::Nonnull<char*> FastIntToBuffer(uint64_t i, absl::Nonnull<char*> buffer);
// For enums and integer types that are not an exact match for the types above,
// use templates to call the appropriate one of the four overloads above.
template <typename int_type>
-absl::Nonnull<char*> FastIntToBuffer(int_type i, absl::Nonnull<char*> buffer)
- ABSL_INTERNAL_NEED_MIN_SIZE(buffer, kFastToBufferSize) {
+absl::Nonnull<char*> FastIntToBuffer(int_type i, absl::Nonnull<char*> buffer) {
static_assert(sizeof(i) <= 64 / 8,
"FastIntToBuffer works only with 64-bit-or-less integers.");
// TODO(jorg): This signed-ness check is used because it works correctly
@@ -207,6 +294,58 @@ absl::Nonnull<char*> FastIntToBuffer(int_type i, absl::Nonnull<char*> buffer)
}
}
+// These functions do NOT add any null-terminator.
+// They return a pointer to the beginning of the written string.
+// The digit counts provided must *exactly* match the number of base-10 digits
+// in the number, or the behavior is undefined.
+// (i.e. do NOT count the minus sign, or over- or under-count the digits.)
+absl::Nonnull<char*> FastIntToBufferBackward(int32_t i,
+ absl::Nonnull<char*> buffer_end,
+ uint32_t exact_digit_count);
+absl::Nonnull<char*> FastIntToBufferBackward(uint32_t i,
+ absl::Nonnull<char*> buffer_end,
+ uint32_t exact_digit_count);
+absl::Nonnull<char*> FastIntToBufferBackward(int64_t i,
+ absl::Nonnull<char*> buffer_end,
+ uint32_t exact_digit_count);
+absl::Nonnull<char*> FastIntToBufferBackward(uint64_t i,
+ absl::Nonnull<char*> buffer_end,
+ uint32_t exact_digit_count);
+
+// For enums and integer types that are not an exact match for the types above,
+// use templates to call the appropriate one of the four overloads above.
+template <typename int_type>
+absl::Nonnull<char*> FastIntToBufferBackward(int_type i,
+ absl::Nonnull<char*> buffer_end,
+ uint32_t exact_digit_count) {
+ static_assert(
+ sizeof(i) <= 64 / 8,
+ "FastIntToBufferBackward works only with 64-bit-or-less integers.");
+ // This signed-ness check is used because it works correctly
+ // with enums, and it also serves to check that int_type is not a pointer.
+ // If one day something like std::is_signed<enum E> works, switch to it.
+ // These conditions are constexpr bools to suppress MSVC warning C4127.
+ constexpr bool kIsSigned = static_cast<int_type>(1) - 2 < 0;
+ constexpr bool kUse64Bit = sizeof(i) > 32 / 8;
+ if (kIsSigned) {
+ if (kUse64Bit) {
+ return FastIntToBufferBackward(static_cast<int64_t>(i), buffer_end,
+ exact_digit_count);
+ } else {
+ return FastIntToBufferBackward(static_cast<int32_t>(i), buffer_end,
+ exact_digit_count);
+ }
+ } else {
+ if (kUse64Bit) {
+ return FastIntToBufferBackward(static_cast<uint64_t>(i), buffer_end,
+ exact_digit_count);
+ } else {
+ return FastIntToBufferBackward(static_cast<uint32_t>(i), buffer_end,
+ exact_digit_count);
+ }
+ }
+}
+
// Implementation of SimpleAtoi, generalized to support arbitrary base (used
// with base different from 10 elsewhere in Abseil implementation).
template <typename int_type>
diff --git a/absl/strings/numbers_test.cc b/absl/strings/numbers_test.cc
index 75c2dcf2..1ceff70f 100644
--- a/absl/strings/numbers_test.cc
+++ b/absl/strings/numbers_test.cc
@@ -231,10 +231,15 @@ TEST(Numbers, TestFastPrints) {
CheckInt32(INT_MIN);
CheckInt32(INT_MAX);
CheckInt64(LONG_MIN);
+ CheckInt64(uint64_t{10000000});
+ CheckInt64(uint64_t{100000000});
CheckInt64(uint64_t{1000000000});
CheckInt64(uint64_t{9999999999});
CheckInt64(uint64_t{100000000000000});
CheckInt64(uint64_t{999999999999999});
+ CheckInt64(uint64_t{1000000000000000});
+ CheckInt64(uint64_t{10000000000000000});
+ CheckInt64(uint64_t{100000000000000000});
CheckInt64(uint64_t{1000000000000000000});
CheckInt64(uint64_t{1199999999999999999});
CheckInt64(int64_t{-700000000000000000});
@@ -246,6 +251,8 @@ TEST(Numbers, TestFastPrints) {
CheckUInt64(uint64_t{999999999999999});
CheckUInt64(uint64_t{1000000000000000000});
CheckUInt64(uint64_t{1199999999999999999});
+ CheckUInt64(uint64_t{10000000000000000000u});
+ CheckUInt64(uint64_t{10200300040000500006u});
CheckUInt64(std::numeric_limits<uint64_t>::max());
for (int i = 0; i < 10000; i++) {
diff --git a/absl/strings/str_cat.cc b/absl/strings/str_cat.cc
index e7f7052a..098ab183 100644
--- a/absl/strings/str_cat.cc
+++ b/absl/strings/str_cat.cc
@@ -21,10 +21,12 @@
#include <cstring>
#include <initializer_list>
#include <string>
+#include <type_traits>
#include "absl/base/config.h"
#include "absl/base/nullability.h"
#include "absl/strings/internal/resize_uninitialized.h"
+#include "absl/strings/numbers.h"
#include "absl/strings/string_view.h"
namespace absl {
@@ -41,8 +43,7 @@ ABSL_NAMESPACE_BEGIN
namespace {
// Append is merely a version of memcpy that returns the address of the byte
// after the area just overwritten.
-inline absl::Nonnull<char*> Append(absl::Nonnull<char*> out,
- const AlphaNum& x) {
+absl::Nonnull<char*> Append(absl::Nonnull<char*> out, const AlphaNum& x) {
// memcpy is allowed to overwrite arbitrary memory, so doing this after the
// call would force an extra fetch of x.size().
char* after = out + x.size();
@@ -52,11 +53,6 @@ inline absl::Nonnull<char*> Append(absl::Nonnull<char*> out,
return after;
}
-inline void STLStringAppendUninitializedAmortized(std::string* dest,
- size_t to_append) {
- strings_internal::AppendUninitializedTraits<std::string>::Append(dest,
- to_append);
-}
} // namespace
std::string StrCat(const AlphaNum& a, const AlphaNum& b) {
@@ -102,6 +98,130 @@ std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c,
namespace strings_internal {
// Do not call directly - these are not part of the public API.
+void STLStringAppendUninitializedAmortized(std::string* dest,
+ size_t to_append) {
+ strings_internal::AppendUninitializedTraits<std::string>::Append(dest,
+ to_append);
+}
+
+template <typename Integer>
+std::enable_if_t<std::is_integral<Integer>::value, std::string> IntegerToString(
+ Integer i) {
+ std::string str;
+ const auto /* either bool or std::false_type */ is_negative =
+ absl::numbers_internal::IsNegative(i);
+ const uint32_t digits = absl::numbers_internal::Base10Digits(
+ absl::numbers_internal::UnsignedAbsoluteValue(i));
+ absl::strings_internal::STLStringResizeUninitialized(
+ &str, digits + static_cast<uint32_t>(is_negative));
+ absl::numbers_internal::FastIntToBufferBackward(i, &str[str.size()], digits);
+ return str;
+}
+
+template <>
+std::string IntegerToString(long i) { // NOLINT
+ if (sizeof(i) <= sizeof(int)) {
+ return IntegerToString(static_cast<int>(i));
+ } else {
+ return IntegerToString(static_cast<long long>(i)); // NOLINT
+ }
+}
+
+template <>
+std::string IntegerToString(unsigned long i) { // NOLINT
+ if (sizeof(i) <= sizeof(unsigned int)) {
+ return IntegerToString(static_cast<unsigned int>(i));
+ } else {
+ return IntegerToString(static_cast<unsigned long long>(i)); // NOLINT
+ }
+}
+
+template <typename Float>
+std::enable_if_t<std::is_floating_point<Float>::value, std::string>
+FloatToString(Float f) {
+ std::string result;
+ strings_internal::STLStringResizeUninitialized(
+ &result, numbers_internal::kSixDigitsToBufferSize);
+ char* start = &result[0];
+ result.erase(numbers_internal::SixDigitsToBuffer(f, start));
+ return result;
+}
+
+std::string SingleArgStrCat(int x) { return IntegerToString(x); }
+std::string SingleArgStrCat(unsigned int x) { return IntegerToString(x); }
+// NOLINTNEXTLINE
+std::string SingleArgStrCat(long x) { return IntegerToString(x); }
+// NOLINTNEXTLINE
+std::string SingleArgStrCat(unsigned long x) { return IntegerToString(x); }
+// NOLINTNEXTLINE
+std::string SingleArgStrCat(long long x) { return IntegerToString(x); }
+// NOLINTNEXTLINE
+std::string SingleArgStrCat(unsigned long long x) { return IntegerToString(x); }
+std::string SingleArgStrCat(float x) { return FloatToString(x); }
+std::string SingleArgStrCat(double x) { return FloatToString(x); }
+
+template <class Integer>
+std::enable_if_t<std::is_integral<Integer>::value, void> AppendIntegerToString(
+ std::string& str, Integer i) {
+ const auto /* either bool or std::false_type */ is_negative =
+ absl::numbers_internal::IsNegative(i);
+ const uint32_t digits = absl::numbers_internal::Base10Digits(
+ absl::numbers_internal::UnsignedAbsoluteValue(i));
+ absl::strings_internal::STLStringAppendUninitializedAmortized(
+ &str, digits + static_cast<uint32_t>(is_negative));
+ absl::numbers_internal::FastIntToBufferBackward(i, &str[str.size()], digits);
+}
+
+template <>
+void AppendIntegerToString(std::string& str, long i) { // NOLINT
+ if (sizeof(i) <= sizeof(int)) {
+ return AppendIntegerToString(str, static_cast<int>(i));
+ } else {
+ return AppendIntegerToString(str, static_cast<long long>(i)); // NOLINT
+ }
+}
+
+template <>
+void AppendIntegerToString(std::string& str,
+ unsigned long i) { // NOLINT
+ if (sizeof(i) <= sizeof(unsigned int)) {
+ return AppendIntegerToString(str, static_cast<unsigned int>(i));
+ } else {
+ return AppendIntegerToString(str,
+ static_cast<unsigned long long>(i)); // NOLINT
+ }
+}
+
+// `SingleArgStrAppend` overloads are defined here for the same reasons as with
+// `SingleArgStrCat` above.
+void SingleArgStrAppend(std::string& str, int x) {
+ return AppendIntegerToString(str, x);
+}
+
+void SingleArgStrAppend(std::string& str, unsigned int x) {
+ return AppendIntegerToString(str, x);
+}
+
+// NOLINTNEXTLINE
+void SingleArgStrAppend(std::string& str, long x) {
+ return AppendIntegerToString(str, x);
+}
+
+// NOLINTNEXTLINE
+void SingleArgStrAppend(std::string& str, unsigned long x) {
+ return AppendIntegerToString(str, x);
+}
+
+// NOLINTNEXTLINE
+void SingleArgStrAppend(std::string& str, long long x) {
+ return AppendIntegerToString(str, x);
+}
+
+// NOLINTNEXTLINE
+void SingleArgStrAppend(std::string& str, unsigned long long x) {
+ return AppendIntegerToString(str, x);
+}
+
std::string CatPieces(std::initializer_list<absl::string_view> pieces) {
std::string result;
size_t total_size = 0;
@@ -138,7 +258,7 @@ void AppendPieces(absl::Nonnull<std::string*> dest,
ASSERT_NO_OVERLAP(*dest, piece);
to_append += piece.size();
}
- STLStringAppendUninitializedAmortized(dest, to_append);
+ strings_internal::STLStringAppendUninitializedAmortized(dest, to_append);
char* const begin = &(*dest)[0];
char* out = begin + old_size;
@@ -157,7 +277,7 @@ void AppendPieces(absl::Nonnull<std::string*> dest,
void StrAppend(absl::Nonnull<std::string*> dest, const AlphaNum& a) {
ASSERT_NO_OVERLAP(*dest, a);
std::string::size_type old_size = dest->size();
- STLStringAppendUninitializedAmortized(dest, a.size());
+ strings_internal::STLStringAppendUninitializedAmortized(dest, a.size());
char* const begin = &(*dest)[0];
char* out = begin + old_size;
out = Append(out, a);
@@ -169,7 +289,8 @@ void StrAppend(absl::Nonnull<std::string*> dest, const AlphaNum& a,
ASSERT_NO_OVERLAP(*dest, a);
ASSERT_NO_OVERLAP(*dest, b);
std::string::size_type old_size = dest->size();
- STLStringAppendUninitializedAmortized(dest, a.size() + b.size());
+ strings_internal::STLStringAppendUninitializedAmortized(dest,
+ a.size() + b.size());
char* const begin = &(*dest)[0];
char* out = begin + old_size;
out = Append(out, a);
@@ -183,7 +304,8 @@ void StrAppend(absl::Nonnull<std::string*> dest, const AlphaNum& a,
ASSERT_NO_OVERLAP(*dest, b);
ASSERT_NO_OVERLAP(*dest, c);
std::string::size_type old_size = dest->size();
- STLStringAppendUninitializedAmortized(dest, a.size() + b.size() + c.size());
+ strings_internal::STLStringAppendUninitializedAmortized(
+ dest, a.size() + b.size() + c.size());
char* const begin = &(*dest)[0];
char* out = begin + old_size;
out = Append(out, a);
@@ -199,7 +321,7 @@ void StrAppend(absl::Nonnull<std::string*> dest, const AlphaNum& a,
ASSERT_NO_OVERLAP(*dest, c);
ASSERT_NO_OVERLAP(*dest, d);
std::string::size_type old_size = dest->size();
- STLStringAppendUninitializedAmortized(
+ strings_internal::STLStringAppendUninitializedAmortized(
dest, a.size() + b.size() + c.size() + d.size());
char* const begin = &(*dest)[0];
char* out = begin + old_size;
diff --git a/absl/strings/str_cat.h b/absl/strings/str_cat.h
index 68637ce8..ea2c4dca 100644
--- a/absl/strings/str_cat.h
+++ b/absl/strings/str_cat.h
@@ -93,7 +93,6 @@
#include <cstddef>
#include <cstdint>
#include <cstring>
-#include <limits>
#include <string>
#include <type_traits>
#include <utility>
@@ -259,10 +258,9 @@ struct Dec {
typename std::enable_if<(sizeof(Int) <= 8)>::type* = nullptr)
: value(v >= 0 ? static_cast<uint64_t>(v)
: uint64_t{0} - static_cast<uint64_t>(v)),
- width(spec == absl::kNoPad
- ? 1
- : spec >= absl::kSpacePad2 ? spec - absl::kSpacePad2 + 2
- : spec - absl::kZeroPad2 + 2),
+ width(spec == absl::kNoPad ? 1
+ : spec >= absl::kSpacePad2 ? spec - absl::kSpacePad2 + 2
+ : spec - absl::kZeroPad2 + 2),
fill(spec >= absl::kSpacePad2 ? ' ' : '0'),
neg(v < 0) {}
@@ -450,77 +448,36 @@ std::string CatPieces(std::initializer_list<absl::string_view> pieces);
void AppendPieces(absl::Nonnull<std::string*> dest,
std::initializer_list<absl::string_view> pieces);
-template <typename Integer>
-std::string IntegerToString(Integer i) {
- // Any integer (signed/unsigned) up to 64 bits can be formatted into a buffer
- // with 22 bytes (including NULL at the end).
- constexpr size_t kMaxDigits10 = 22;
- std::string result;
- strings_internal::STLStringResizeUninitialized(&result, kMaxDigits10);
- char* start = &result[0];
- // note: this can be optimized to not write last zero.
- char* end = numbers_internal::FastIntToBuffer(i, start);
- auto size = static_cast<size_t>(end - start);
- assert((size < result.size()) &&
- "StrCat(Integer) does not fit into kMaxDigits10");
- result.erase(size);
- return result;
-}
-template <typename Float>
-std::string FloatToString(Float f) {
- std::string result;
- strings_internal::STLStringResizeUninitialized(
- &result, numbers_internal::kSixDigitsToBufferSize);
- char* start = &result[0];
- result.erase(numbers_internal::SixDigitsToBuffer(f, start));
- return result;
-}
+void STLStringAppendUninitializedAmortized(std::string* dest, size_t to_append);
// `SingleArgStrCat` overloads take built-in `int`, `long` and `long long` types
// (signed / unsigned) to avoid ambiguity on the call side. If we used int32_t
// and int64_t, then at least one of the three (`int` / `long` / `long long`)
// would have been ambiguous when passed to `SingleArgStrCat`.
-inline std::string SingleArgStrCat(int x) { return IntegerToString(x); }
-inline std::string SingleArgStrCat(unsigned int x) {
- return IntegerToString(x);
-}
-// NOLINTNEXTLINE
-inline std::string SingleArgStrCat(long x) { return IntegerToString(x); }
-// NOLINTNEXTLINE
-inline std::string SingleArgStrCat(unsigned long x) {
- return IntegerToString(x);
-}
-// NOLINTNEXTLINE
-inline std::string SingleArgStrCat(long long x) { return IntegerToString(x); }
-// NOLINTNEXTLINE
-inline std::string SingleArgStrCat(unsigned long long x) {
- return IntegerToString(x);
-}
-inline std::string SingleArgStrCat(float x) { return FloatToString(x); }
-inline std::string SingleArgStrCat(double x) { return FloatToString(x); }
-
-// As of September 2023, the SingleArgStrCat() optimization is only enabled for
-// libc++. The reasons for this are:
-// 1) The SSO size for libc++ is 23, while libstdc++ and MSSTL have an SSO size
-// of 15. Since IntegerToString unconditionally resizes the string to 22 bytes,
-// this causes both libstdc++ and MSSTL to allocate.
-// 2) strings_internal::STLStringResizeUninitialized() only has an
-// implementation that avoids initialization when using libc++. This isn't as
-// relevant as (1), and the cost should be benchmarked if (1) ever changes on
-// libstc++ or MSSTL.
-#ifdef _LIBCPP_VERSION
-#define ABSL_INTERNAL_STRCAT_ENABLE_FAST_CASE true
-#else
-#define ABSL_INTERNAL_STRCAT_ENABLE_FAST_CASE false
-#endif
-
-template <typename T, typename = std::enable_if_t<
- ABSL_INTERNAL_STRCAT_ENABLE_FAST_CASE &&
- std::is_arithmetic<T>{} && !std::is_same<T, char>{}>>
+std::string SingleArgStrCat(int x);
+std::string SingleArgStrCat(unsigned int x);
+std::string SingleArgStrCat(long x); // NOLINT
+std::string SingleArgStrCat(unsigned long x); // NOLINT
+std::string SingleArgStrCat(long long x); // NOLINT
+std::string SingleArgStrCat(unsigned long long x); // NOLINT
+std::string SingleArgStrCat(float x);
+std::string SingleArgStrCat(double x);
+
+// `SingleArgStrAppend` overloads are defined here for the same reasons as with
+// `SingleArgStrCat` above.
+void SingleArgStrAppend(std::string& str, int x);
+void SingleArgStrAppend(std::string& str, unsigned int x);
+void SingleArgStrAppend(std::string& str, long x); // NOLINT
+void SingleArgStrAppend(std::string& str, unsigned long x); // NOLINT
+void SingleArgStrAppend(std::string& str, long long x); // NOLINT
+void SingleArgStrAppend(std::string& str, unsigned long long x); // NOLINT
+
+template <typename T,
+ typename = std::enable_if_t<std::is_arithmetic<T>::value &&
+ !std::is_same<T, char>::value &&
+ !std::is_same<T, bool>::value>>
using EnableIfFastCase = T;
-#undef ABSL_INTERNAL_STRCAT_ENABLE_FAST_CASE
-
} // namespace strings_internal
ABSL_MUST_USE_RESULT inline std::string StrCat() { return std::string(); }
@@ -596,6 +553,68 @@ inline void StrAppend(absl::Nonnull<std::string*> dest, const AlphaNum& a,
static_cast<const AlphaNum&>(args).Piece()...});
}
+template <class String, class T>
+std::enable_if_t<
+ std::is_integral<absl::strings_internal::EnableIfFastCase<T>>::value, void>
+StrAppend(absl::Nonnull<String*> result, T i) {
+ return absl::strings_internal::SingleArgStrAppend(*result, i);
+}
+
+// This overload is only selected if all the parameters are numbers that can be
+// handled quickly.
+// Later we can look into how we can extend this to more general argument
+// mixtures without bloating codegen too much, or copying unnecessarily.
+template <typename String, typename... T>
+std::enable_if_t<
+ (sizeof...(T) > 1),
+ std::common_type_t<std::conditional_t<
+ true, void, absl::strings_internal::EnableIfFastCase<T>>...>>
+StrAppend(absl::Nonnull<String*> str, T... args) {
+ // Do not add unnecessary variables, logic, or even "free" lambdas here.
+ // They can add overhead for the compiler and/or at run time.
+ // Furthermore, assume this function will be inlined.
+ // This function is carefully tailored to be able to be largely optimized away
+ // so that it becomes near-equivalent to the caller handling each argument
+ // individually while minimizing register pressure, so that the compiler
+ // can inline it with minimal overhead.
+
+ // First, calculate the total length, so we can perform just a single resize.
+ // Save all the lengths for later.
+ size_t total_length = 0;
+ const ptrdiff_t lengths[] = {
+ absl::numbers_internal::GetNumDigitsOrNegativeIfNegative(args)...};
+ for (const ptrdiff_t possibly_negative_length : lengths) {
+ // Lengths are negative for negative numbers. Keep them for later use, but
+ // take their absolute values for calculating total lengths;
+ total_length += possibly_negative_length < 0
+ ? static_cast<size_t>(-possibly_negative_length)
+ : static_cast<size_t>(possibly_negative_length);
+ }
+
+ // Now reserve space for all the arguments.
+ const size_t old_size = str->size();
+ absl::strings_internal::STLStringAppendUninitializedAmortized(str,
+ total_length);
+
+ // Finally, output each argument one-by-one, from left to right.
+ size_t i = 0; // The current argument we're processing
+ ptrdiff_t n; // The length of the current argument
+ typename String::pointer pos = &(*str)[old_size];
+ using SomeTrivialEmptyType = std::false_type;
+ // Ugly code due to the lack of C++14 fold expression makes us.
+ const SomeTrivialEmptyType dummy1;
+ for (const SomeTrivialEmptyType& dummy2 :
+ {(/* Comma expressions are poor man's C++17 fold expression for C++14 */
+ (void)(n = lengths[i]),
+ (void)(n < 0 ? (void)(*pos++ = '-'), (n = ~n) : 0),
+ (void)absl::numbers_internal::FastIntToBufferBackward(
+ absl::numbers_internal::UnsignedAbsoluteValue(std::move(args)),
+ pos += n, static_cast<uint32_t>(n)),
+ (void)++i, dummy1)...}) {
+ (void)dummy2; // Remove & migrate to fold expressions in C++17
+ }
+}
+
// Helper function for the future StrCat default floating-point format, %.6g
// This is fast.
inline strings_internal::AlphaNumBuffer<
diff --git a/absl/strings/str_cat_test.cc b/absl/strings/str_cat_test.cc
index 66eddf0d..b30a86fe 100644
--- a/absl/strings/str_cat_test.cc
+++ b/absl/strings/str_cat_test.cc
@@ -39,6 +39,24 @@
namespace {
+template <typename Integer>
+void VerifyInteger(Integer value) {
+ const std::string expected = std::to_string(value);
+
+ EXPECT_EQ(absl::StrCat(value), expected);
+
+ const char* short_prefix = "x";
+ const char* long_prefix = "2;k.msabxiuow2[09i;o3k21-93-9=29]";
+
+ std::string short_str = short_prefix;
+ absl::StrAppend(&short_str, value);
+ EXPECT_EQ(short_str, short_prefix + expected);
+
+ std::string long_str = long_prefix;
+ absl::StrAppend(&long_str, value);
+ EXPECT_EQ(long_str, long_prefix + expected);
+}
+
// Test absl::StrCat of ints and longs of various sizes and signdedness.
TEST(StrCat, Ints) {
const short s = -1; // NOLINT(runtime/int)
@@ -68,6 +86,34 @@ TEST(StrCat, Ints) {
EXPECT_EQ(answer, "-9-12");
answer = absl::StrCat(uintptr, 0);
EXPECT_EQ(answer, "130");
+
+ for (const uint32_t base : {2u, 10u}) {
+ for (const int extra_shift : {0, 12}) {
+ for (uint64_t i = 0; i < (1 << 8); ++i) {
+ uint64_t j = i;
+ while (true) {
+ uint64_t v = j ^ (extra_shift != 0 ? (j << extra_shift) * base : 0);
+ VerifyInteger(static_cast<bool>(v));
+ VerifyInteger(static_cast<wchar_t>(v));
+ VerifyInteger(static_cast<signed char>(v));
+ VerifyInteger(static_cast<unsigned char>(v));
+ VerifyInteger(static_cast<short>(v)); // NOLINT
+ VerifyInteger(static_cast<unsigned short>(v)); // NOLINT
+ VerifyInteger(static_cast<int>(v)); // NOLINT
+ VerifyInteger(static_cast<unsigned int>(v)); // NOLINT
+ VerifyInteger(static_cast<long>(v)); // NOLINT
+ VerifyInteger(static_cast<unsigned long>(v)); // NOLINT
+ VerifyInteger(static_cast<long long>(v)); // NOLINT
+ VerifyInteger(static_cast<unsigned long long>(v)); // NOLINT
+ const uint64_t next = j == 0 ? 1 : j * base;
+ if (next <= j) {
+ break;
+ }
+ j = next;
+ }
+ }
+ }
+ }
}
TEST(StrCat, Enums) {