diff options
author | Abseil Team <absl-team@google.com> | 2024-01-04 13:14:50 -0800 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2024-01-04 13:15:44 -0800 |
commit | d5a2cec006d14c6801ddeb768bf2574a1cf4fa7f (patch) | |
tree | 3dc1ffb00d164ddda4c22b8abfc42594db1fa2da /absl/strings/str_cat.h | |
parent | ccf0c7730db976d2b7814e74f6f01d044ae6f853 (diff) |
Optimize integer-to-string conversions
The updated code is designed to:
- Be branch-predictor-friendly
- Be cache-friendly
- Minimize the lengths of critical paths
- Minimize slow operations (particularly multiplications)
- Minimize binary/codegen bloat
The most notable performance trick here is perhaps the precomputation & caching of the number of digits, so that we can reuse/exploit it when writing the output.
This precomputation of the exact length enables 2 further performance benefits:
- It makes `StrCat` and `StrAppend` zero-copy when only integers are passed, by avoiding intermediate `AlphaNum` entirely in those cases. If needed in the future, we can probably also make many other mixtures of non-integer types zero-copy as well.
- It avoids over-reservation of the string buffer, allowing for more strings to fit inside SSO, which will likely have further performance benefits.
There is also a side benefit of preventing `FastIntToBuffer` from writing beyond the end of the buffer, which has caused buffer overflows in the past.
The new code continues to use & extend some of the existing core tricks (such as the division-by-100 trick), as those are already efficient.
PiperOrigin-RevId: 595785531
Change-Id: Id6920e7e038fec10b2c45f213de75dc7e2cbddd1
Diffstat (limited to 'absl/strings/str_cat.h')
-rw-r--r-- | absl/strings/str_cat.h | 157 |
1 files changed, 88 insertions, 69 deletions
diff --git a/absl/strings/str_cat.h b/absl/strings/str_cat.h index 68637ce8..ea2c4dca 100644 --- a/absl/strings/str_cat.h +++ b/absl/strings/str_cat.h @@ -93,7 +93,6 @@ #include <cstddef> #include <cstdint> #include <cstring> -#include <limits> #include <string> #include <type_traits> #include <utility> @@ -259,10 +258,9 @@ struct Dec { typename std::enable_if<(sizeof(Int) <= 8)>::type* = nullptr) : value(v >= 0 ? static_cast<uint64_t>(v) : uint64_t{0} - static_cast<uint64_t>(v)), - width(spec == absl::kNoPad - ? 1 - : spec >= absl::kSpacePad2 ? spec - absl::kSpacePad2 + 2 - : spec - absl::kZeroPad2 + 2), + width(spec == absl::kNoPad ? 1 + : spec >= absl::kSpacePad2 ? spec - absl::kSpacePad2 + 2 + : spec - absl::kZeroPad2 + 2), fill(spec >= absl::kSpacePad2 ? ' ' : '0'), neg(v < 0) {} @@ -450,77 +448,36 @@ std::string CatPieces(std::initializer_list<absl::string_view> pieces); void AppendPieces(absl::Nonnull<std::string*> dest, std::initializer_list<absl::string_view> pieces); -template <typename Integer> -std::string IntegerToString(Integer i) { - // Any integer (signed/unsigned) up to 64 bits can be formatted into a buffer - // with 22 bytes (including NULL at the end). - constexpr size_t kMaxDigits10 = 22; - std::string result; - strings_internal::STLStringResizeUninitialized(&result, kMaxDigits10); - char* start = &result[0]; - // note: this can be optimized to not write last zero. - char* end = numbers_internal::FastIntToBuffer(i, start); - auto size = static_cast<size_t>(end - start); - assert((size < result.size()) && - "StrCat(Integer) does not fit into kMaxDigits10"); - result.erase(size); - return result; -} -template <typename Float> -std::string FloatToString(Float f) { - std::string result; - strings_internal::STLStringResizeUninitialized( - &result, numbers_internal::kSixDigitsToBufferSize); - char* start = &result[0]; - result.erase(numbers_internal::SixDigitsToBuffer(f, start)); - return result; -} +void STLStringAppendUninitializedAmortized(std::string* dest, size_t to_append); // `SingleArgStrCat` overloads take built-in `int`, `long` and `long long` types // (signed / unsigned) to avoid ambiguity on the call side. If we used int32_t // and int64_t, then at least one of the three (`int` / `long` / `long long`) // would have been ambiguous when passed to `SingleArgStrCat`. -inline std::string SingleArgStrCat(int x) { return IntegerToString(x); } -inline std::string SingleArgStrCat(unsigned int x) { - return IntegerToString(x); -} -// NOLINTNEXTLINE -inline std::string SingleArgStrCat(long x) { return IntegerToString(x); } -// NOLINTNEXTLINE -inline std::string SingleArgStrCat(unsigned long x) { - return IntegerToString(x); -} -// NOLINTNEXTLINE -inline std::string SingleArgStrCat(long long x) { return IntegerToString(x); } -// NOLINTNEXTLINE -inline std::string SingleArgStrCat(unsigned long long x) { - return IntegerToString(x); -} -inline std::string SingleArgStrCat(float x) { return FloatToString(x); } -inline std::string SingleArgStrCat(double x) { return FloatToString(x); } - -// As of September 2023, the SingleArgStrCat() optimization is only enabled for -// libc++. The reasons for this are: -// 1) The SSO size for libc++ is 23, while libstdc++ and MSSTL have an SSO size -// of 15. Since IntegerToString unconditionally resizes the string to 22 bytes, -// this causes both libstdc++ and MSSTL to allocate. -// 2) strings_internal::STLStringResizeUninitialized() only has an -// implementation that avoids initialization when using libc++. This isn't as -// relevant as (1), and the cost should be benchmarked if (1) ever changes on -// libstc++ or MSSTL. -#ifdef _LIBCPP_VERSION -#define ABSL_INTERNAL_STRCAT_ENABLE_FAST_CASE true -#else -#define ABSL_INTERNAL_STRCAT_ENABLE_FAST_CASE false -#endif - -template <typename T, typename = std::enable_if_t< - ABSL_INTERNAL_STRCAT_ENABLE_FAST_CASE && - std::is_arithmetic<T>{} && !std::is_same<T, char>{}>> +std::string SingleArgStrCat(int x); +std::string SingleArgStrCat(unsigned int x); +std::string SingleArgStrCat(long x); // NOLINT +std::string SingleArgStrCat(unsigned long x); // NOLINT +std::string SingleArgStrCat(long long x); // NOLINT +std::string SingleArgStrCat(unsigned long long x); // NOLINT +std::string SingleArgStrCat(float x); +std::string SingleArgStrCat(double x); + +// `SingleArgStrAppend` overloads are defined here for the same reasons as with +// `SingleArgStrCat` above. +void SingleArgStrAppend(std::string& str, int x); +void SingleArgStrAppend(std::string& str, unsigned int x); +void SingleArgStrAppend(std::string& str, long x); // NOLINT +void SingleArgStrAppend(std::string& str, unsigned long x); // NOLINT +void SingleArgStrAppend(std::string& str, long long x); // NOLINT +void SingleArgStrAppend(std::string& str, unsigned long long x); // NOLINT + +template <typename T, + typename = std::enable_if_t<std::is_arithmetic<T>::value && + !std::is_same<T, char>::value && + !std::is_same<T, bool>::value>> using EnableIfFastCase = T; -#undef ABSL_INTERNAL_STRCAT_ENABLE_FAST_CASE - } // namespace strings_internal ABSL_MUST_USE_RESULT inline std::string StrCat() { return std::string(); } @@ -596,6 +553,68 @@ inline void StrAppend(absl::Nonnull<std::string*> dest, const AlphaNum& a, static_cast<const AlphaNum&>(args).Piece()...}); } +template <class String, class T> +std::enable_if_t< + std::is_integral<absl::strings_internal::EnableIfFastCase<T>>::value, void> +StrAppend(absl::Nonnull<String*> result, T i) { + return absl::strings_internal::SingleArgStrAppend(*result, i); +} + +// This overload is only selected if all the parameters are numbers that can be +// handled quickly. +// Later we can look into how we can extend this to more general argument +// mixtures without bloating codegen too much, or copying unnecessarily. +template <typename String, typename... T> +std::enable_if_t< + (sizeof...(T) > 1), + std::common_type_t<std::conditional_t< + true, void, absl::strings_internal::EnableIfFastCase<T>>...>> +StrAppend(absl::Nonnull<String*> str, T... args) { + // Do not add unnecessary variables, logic, or even "free" lambdas here. + // They can add overhead for the compiler and/or at run time. + // Furthermore, assume this function will be inlined. + // This function is carefully tailored to be able to be largely optimized away + // so that it becomes near-equivalent to the caller handling each argument + // individually while minimizing register pressure, so that the compiler + // can inline it with minimal overhead. + + // First, calculate the total length, so we can perform just a single resize. + // Save all the lengths for later. + size_t total_length = 0; + const ptrdiff_t lengths[] = { + absl::numbers_internal::GetNumDigitsOrNegativeIfNegative(args)...}; + for (const ptrdiff_t possibly_negative_length : lengths) { + // Lengths are negative for negative numbers. Keep them for later use, but + // take their absolute values for calculating total lengths; + total_length += possibly_negative_length < 0 + ? static_cast<size_t>(-possibly_negative_length) + : static_cast<size_t>(possibly_negative_length); + } + + // Now reserve space for all the arguments. + const size_t old_size = str->size(); + absl::strings_internal::STLStringAppendUninitializedAmortized(str, + total_length); + + // Finally, output each argument one-by-one, from left to right. + size_t i = 0; // The current argument we're processing + ptrdiff_t n; // The length of the current argument + typename String::pointer pos = &(*str)[old_size]; + using SomeTrivialEmptyType = std::false_type; + // Ugly code due to the lack of C++14 fold expression makes us. + const SomeTrivialEmptyType dummy1; + for (const SomeTrivialEmptyType& dummy2 : + {(/* Comma expressions are poor man's C++17 fold expression for C++14 */ + (void)(n = lengths[i]), + (void)(n < 0 ? (void)(*pos++ = '-'), (n = ~n) : 0), + (void)absl::numbers_internal::FastIntToBufferBackward( + absl::numbers_internal::UnsignedAbsoluteValue(std::move(args)), + pos += n, static_cast<uint32_t>(n)), + (void)++i, dummy1)...}) { + (void)dummy2; // Remove & migrate to fold expressions in C++17 + } +} + // Helper function for the future StrCat default floating-point format, %.6g // This is fast. inline strings_internal::AlphaNumBuffer< |