Optimize integer-to-string conversions

The updated code is designed to: - Be branch-predictor-friendly - Be cache-friendly - Minimize the lengths of critical paths - Minimize slow operations (particularly multiplications) - Minimize binary/codegen bloat The most notable performance trick here is perhaps the precomputation & caching of the number of digits, so that we can reuse/exploit it when writing the output. This precomputation of the exact length enables 2 further performance benefits: - It makes `StrCat` and `StrAppend` zero-copy when only integers are passed, by avoiding intermediate `AlphaNum` entirely in those cases. If needed in the future, we can probably also make many other mixtures of non-integer types zero-copy as well. - It avoids over-reservation of the string buffer, allowing for more strings to fit inside SSO, which will likely have further performance benefits. There is also a side benefit of preventing `FastIntToBuffer` from writing beyond the end of the buffer, which has caused buffer overflows in the past. The new code continues to use & extend some of the existing core tricks (such as the division-by-100 trick), as those are already efficient. PiperOrigin-RevId: 595785531 Change-Id: Id6920e7e038fec10b2c45f213de75dc7e2cbddd1
author: Abseil Team <absl-team@google.com> 2024-01-04 13:14:50 -0800
committer: Copybara-Service <copybara-worker@google.com> 2024-01-04 13:15:44 -0800
commit: d5a2cec006d14c6801ddeb768bf2574a1cf4fa7f (patch)
tree: 3dc1ffb00d164ddda4c22b8abfc42594db1fa2da /absl/strings/str_cat.h
parent: ccf0c7730db976d2b7814e74f6f01d044ae6f853 (diff)
1 files changed, 88 insertions, 69 deletions
diff --git a/absl/strings/str_cat.h b/absl/strings/str_cat.h
index 68637ce8..ea2c4dca 100644
--- a/absl/strings/str_cat.h
+++ b/absl/strings/str_cat.h
@@ -93,7 +93,6 @@
 #include <cstddef>
 #include <cstdint>
 #include <cstring>
-#include <limits>
 #include <string>
 #include <type_traits>
 #include <utility>
@@ -259,10 +258,9 @@ struct Dec {
                typename std::enable_if<(sizeof(Int) <= 8)>::type* = nullptr)
       : value(v >= 0 ? static_cast<uint64_t>(v)
                      : uint64_t{0} - static_cast<uint64_t>(v)),
-        width(spec == absl::kNoPad
-                  ? 1
-                  : spec >= absl::kSpacePad2 ? spec - absl::kSpacePad2 + 2
-                                             : spec - absl::kZeroPad2 + 2),
+        width(spec == absl::kNoPad       ? 1
+              : spec >= absl::kSpacePad2 ? spec - absl::kSpacePad2 + 2
+                                         : spec - absl::kZeroPad2 + 2),
         fill(spec >= absl::kSpacePad2 ? ' ' : '0'),
         neg(v < 0) {}
 
@@ -450,77 +448,36 @@ std::string CatPieces(std::initializer_list<absl::string_view> pieces);
 void AppendPieces(absl::Nonnull<std::string*> dest,
                   std::initializer_list<absl::string_view> pieces);
 
-template <typename Integer>
-std::string IntegerToString(Integer i) {
-  // Any integer (signed/unsigned) up to 64 bits can be formatted into a buffer
-  // with 22 bytes (including NULL at the end).
-  constexpr size_t kMaxDigits10 = 22;
-  std::string result;
-  strings_internal::STLStringResizeUninitialized(&result, kMaxDigits10);
-  char* start = &result[0];
-  // note: this can be optimized to not write last zero.
-  char* end = numbers_internal::FastIntToBuffer(i, start);
-  auto size = static_cast<size_t>(end - start);
-  assert((size < result.size()) &&
-         "StrCat(Integer) does not fit into kMaxDigits10");
-  result.erase(size);
-  return result;
-}
-template <typename Float>
-std::string FloatToString(Float f) {
-  std::string result;
-  strings_internal::STLStringResizeUninitialized(
-      &result, numbers_internal::kSixDigitsToBufferSize);
-  char* start = &result[0];
-  result.erase(numbers_internal::SixDigitsToBuffer(f, start));
-  return result;
-}
+void STLStringAppendUninitializedAmortized(std::string* dest, size_t to_append);
 
 // `SingleArgStrCat` overloads take built-in `int`, `long` and `long long` types
 // (signed / unsigned) to avoid ambiguity on the call side. If we used int32_t
 // and int64_t, then at least one of the three (`int` / `long` / `long long`)
 // would have been ambiguous when passed to `SingleArgStrCat`.
-inline std::string SingleArgStrCat(int x) { return IntegerToString(x); }
-inline std::string SingleArgStrCat(unsigned int x) {
-  return IntegerToString(x);
-}
-// NOLINTNEXTLINE
-inline std::string SingleArgStrCat(long x) { return IntegerToString(x); }
-// NOLINTNEXTLINE
-inline std::string SingleArgStrCat(unsigned long x) {
-  return IntegerToString(x);
-}
-// NOLINTNEXTLINE
-inline std::string SingleArgStrCat(long long x) { return IntegerToString(x); }
-// NOLINTNEXTLINE
-inline std::string SingleArgStrCat(unsigned long long x) {
-  return IntegerToString(x);
-}
-inline std::string SingleArgStrCat(float x) { return FloatToString(x); }
-inline std::string SingleArgStrCat(double x) { return FloatToString(x); }
-
-// As of September 2023, the SingleArgStrCat() optimization is only enabled for
-// libc++. The reasons for this are:
-// 1) The SSO size for libc++ is 23, while libstdc++ and MSSTL have an SSO size
-// of 15. Since IntegerToString unconditionally resizes the string to 22 bytes,
-// this causes both libstdc++ and MSSTL to allocate.
-// 2) strings_internal::STLStringResizeUninitialized() only has an
-// implementation that avoids initialization when using libc++. This isn't as
-// relevant as (1), and the cost should be benchmarked if (1) ever changes on
-// libstc++ or MSSTL.
-#ifdef _LIBCPP_VERSION
-#define ABSL_INTERNAL_STRCAT_ENABLE_FAST_CASE true
-#else
-#define ABSL_INTERNAL_STRCAT_ENABLE_FAST_CASE false
-#endif
-
-template <typename T, typename = std::enable_if_t<
-                          ABSL_INTERNAL_STRCAT_ENABLE_FAST_CASE &&
-                          std::is_arithmetic<T>{} && !std::is_same<T, char>{}>>
+std::string SingleArgStrCat(int x);
+std::string SingleArgStrCat(unsigned int x);
+std::string SingleArgStrCat(long x);                // NOLINT
+std::string SingleArgStrCat(unsigned long x);       // NOLINT
+std::string SingleArgStrCat(long long x);           // NOLINT
+std::string SingleArgStrCat(unsigned long long x);  // NOLINT
+std::string SingleArgStrCat(float x);
+std::string SingleArgStrCat(double x);
+
+// `SingleArgStrAppend` overloads are defined here for the same reasons as with
+// `SingleArgStrCat` above.
+void SingleArgStrAppend(std::string& str, int x);
+void SingleArgStrAppend(std::string& str, unsigned int x);
+void SingleArgStrAppend(std::string& str, long x);                // NOLINT
+void SingleArgStrAppend(std::string& str, unsigned long x);       // NOLINT
+void SingleArgStrAppend(std::string& str, long long x);           // NOLINT
+void SingleArgStrAppend(std::string& str, unsigned long long x);  // NOLINT
+
+template <typename T,
+          typename = std::enable_if_t<std::is_arithmetic<T>::value &&
+                                      !std::is_same<T, char>::value &&
+                                      !std::is_same<T, bool>::value>>
 using EnableIfFastCase = T;
 
-#undef ABSL_INTERNAL_STRCAT_ENABLE_FAST_CASE
-
 }  // namespace strings_internal
 
 ABSL_MUST_USE_RESULT inline std::string StrCat() { return std::string(); }
@@ -596,6 +553,68 @@ inline void StrAppend(absl::Nonnull<std::string*> dest, const AlphaNum& a,
              static_cast<const AlphaNum&>(args).Piece()...});
 }
 
+template <class String, class T>
+std::enable_if_t<
+    std::is_integral<absl::strings_internal::EnableIfFastCase<T>>::value, void>
+StrAppend(absl::Nonnull<String*> result, T i) {
+  return absl::strings_internal::SingleArgStrAppend(*result, i);
+}
+
+// This overload is only selected if all the parameters are numbers that can be
+// handled quickly.
+// Later we can look into how we can extend this to more general argument
+// mixtures without bloating codegen too much, or copying unnecessarily.
+template <typename String, typename... T>
+std::enable_if_t<
+    (sizeof...(T) > 1),
+    std::common_type_t<std::conditional_t<
+        true, void, absl::strings_internal::EnableIfFastCase<T>>...>>
+StrAppend(absl::Nonnull<String*> str, T... args) {
+  // Do not add unnecessary variables, logic, or even "free" lambdas here.
+  // They can add overhead for the compiler and/or at run time.
+  // Furthermore, assume this function will be inlined.
+  // This function is carefully tailored to be able to be largely optimized away
+  // so that it becomes near-equivalent to the caller handling each argument
+  // individually while minimizing register pressure, so that the compiler
+  // can inline it with minimal overhead.
+
+  // First, calculate the total length, so we can perform just a single resize.
+  // Save all the lengths for later.
+  size_t total_length = 0;
+  const ptrdiff_t lengths[] = {
+      absl::numbers_internal::GetNumDigitsOrNegativeIfNegative(args)...};
+  for (const ptrdiff_t possibly_negative_length : lengths) {
+    // Lengths are negative for negative numbers. Keep them for later use, but
+    // take their absolute values for calculating total lengths;
+    total_length += possibly_negative_length < 0
+                        ? static_cast<size_t>(-possibly_negative_length)
+                        : static_cast<size_t>(possibly_negative_length);
+  }
+
+  // Now reserve space for all the arguments.
+  const size_t old_size = str->size();
+  absl::strings_internal::STLStringAppendUninitializedAmortized(str,
+                                                                total_length);
+
+  // Finally, output each argument one-by-one, from left to right.
+  size_t i = 0;  // The current argument we're processing
+  ptrdiff_t n;   // The length of the current argument
+  typename String::pointer pos = &(*str)[old_size];
+  using SomeTrivialEmptyType = std::false_type;
+  // Ugly code due to the lack of C++14 fold expression makes us.
+  const SomeTrivialEmptyType dummy1;
+  for (const SomeTrivialEmptyType& dummy2 :
+       {(/* Comma expressions are poor man's C++17 fold expression for C++14 */
+         (void)(n = lengths[i]),
+         (void)(n < 0 ? (void)(*pos++ = '-'), (n = ~n) : 0),
+         (void)absl::numbers_internal::FastIntToBufferBackward(
+             absl::numbers_internal::UnsignedAbsoluteValue(std::move(args)),
+             pos += n, static_cast<uint32_t>(n)),
+         (void)++i, dummy1)...}) {
+    (void)dummy2;  // Remove & migrate to fold expressions in C++17
+  }
+}
+
 // Helper function for the future StrCat default floating-point format, %.6g
 // This is fast.
 inline strings_internal::AlphaNumBuffer<
author	Abseil Team <absl-team@google.com>	2024-01-04 13:14:50 -0800
committer	Copybara-Service <copybara-worker@google.com>	2024-01-04 13:15:44 -0800
commit	d5a2cec006d14c6801ddeb768bf2574a1cf4fa7f (patch)
tree	3dc1ffb00d164ddda4c22b8abfc42594db1fa2da /absl/strings/str_cat.h
parent	ccf0c7730db976d2b7814e74f6f01d044ae6f853 (diff)