Merge new upstream LTS 20230802.0

author: Benjamin Barenblat <bbaren@google.com> 2023-09-07 13:16:09 -0400
committer: Benjamin Barenblat <bbaren@google.com> 2023-09-07 13:16:09 -0400
commit: 6fdbff8bbce2a1debdc060df381f39e3dcfb65af (patch)
tree: 71f1ef38477a65d5cce472fc042c90087c2bb351 /absl/strings/numbers.cc
parent: 8d4a80fe37176b1170d7dce0772dea9584ec3e32 (diff)
parent: 29bf8085f3bf17b84d30e34b3d7ff8248fda404e (diff)
1 files changed, 154 insertions, 122 deletions
diff --git a/absl/strings/numbers.cc b/absl/strings/numbers.cc
index 2987158e..c43c6bcc 100644
--- a/absl/strings/numbers.cc
+++ b/absl/strings/numbers.cc
@@ -31,7 +31,9 @@
 #include <utility>
 
 #include "absl/base/attributes.h"
+#include "absl/base/internal/endian.h"
 #include "absl/base/internal/raw_logging.h"
+#include "absl/base/optimization.h"
 #include "absl/numeric/bits.h"
 #include "absl/strings/ascii.h"
 #include "absl/strings/charconv.h"
@@ -136,82 +138,132 @@ bool SimpleAtob(absl::string_view str, bool* out) {
 
 namespace {
 
-// Used to optimize printing a decimal number's final digit.
-const char one_ASCII_final_digits[10][2] {
-  {'0', 0}, {'1', 0}, {'2', 0}, {'3', 0}, {'4', 0},
-  {'5', 0}, {'6', 0}, {'7', 0}, {'8', 0}, {'9', 0},
-};
+// Various routines to encode integers to strings.
+
+// We split data encodings into a group of 2 digits, 4 digits, 8 digits as
+// it's easier to combine powers of two into scalar arithmetic.
+
+// Previous implementation used a lookup table of 200 bytes for every 2 bytes
+// and it was memory bound, any L1 cache miss would result in a much slower
+// result. When benchmarking with a cache eviction rate of several percent,
+// this implementation proved to be better.
+
+// These constants represent '00', '0000' and '00000000' as ascii strings in
+// integers. We can add these numbers if we encode to bytes from 0 to 9. as
+// 'i' = '0' + i for 0 <= i <= 9.
+constexpr uint32_t kTwoZeroBytes = 0x0101 * '0';
+constexpr uint64_t kFourZeroBytes = 0x01010101 * '0';
+constexpr uint64_t kEightZeroBytes = 0x0101010101010101ull * '0';
+
+// * 103 / 1024 is a division by 10 for values from 0 to 99. It's also a
+// division of a structure [k takes 2 bytes][m takes 2 bytes], then * 103 / 1024
+// will be [k / 10][m / 10]. It allows parallel division.
+constexpr uint64_t kDivisionBy10Mul = 103u;
+constexpr uint64_t kDivisionBy10Div = 1 << 10;
+
+// * 10486 / 1048576 is a division by 100 for values from 0 to 9999.
+constexpr uint64_t kDivisionBy100Mul = 10486u;
+constexpr uint64_t kDivisionBy100Div = 1 << 20;
+
+// Encode functions write the ASCII output of input `n` to `out_str`.
+inline char* EncodeHundred(uint32_t n, char* out_str) {
+  int num_digits = static_cast<int>(n - 10) >> 8;
+  uint32_t base = kTwoZeroBytes;
+  uint32_t div10 = (n * kDivisionBy10Mul) / kDivisionBy10Div;
+  uint32_t mod10 = n - 10u * div10;
+  base += div10 + (mod10 << 8);
+  base >>= num_digits & 8;
+  little_endian::Store16(out_str, static_cast<uint16_t>(base));
+  return out_str + 2 + num_digits;
+}
 
-}  // namespace
+inline char* EncodeTenThousand(uint32_t n, char* out_str) {
+  // We split lower 2 digits and upper 2 digits of n into 2 byte consecutive
+  // blocks. 123 ->  [\0\1][\0\23]. We divide by 10 both blocks
+  // (it's 1 division + zeroing upper bits), and compute modulo 10 as well "in
+  // parallel". Then we combine both results to have both ASCII digits,
+  // strip trailing zeros, add ASCII '0000' and return.
+  uint32_t div100 = (n * kDivisionBy100Mul) / kDivisionBy100Div;
+  uint32_t mod100 = n - 100ull * div100;
+  uint32_t hundreds = (mod100 << 16) + div100;
+  uint32_t tens = (hundreds * kDivisionBy10Mul) / kDivisionBy10Div;
+  tens &= (0xFull << 16) | 0xFull;
+  tens += (hundreds - 10ull * tens) << 8;
+  ABSL_ASSUME(tens != 0);
+  // The result can contain trailing zero bits, we need to strip them to a first
+  // significant byte in a final representation. For example, for n = 123, we
+  // have tens to have representation \0\1\2\3. We do `& -8` to round
+  // to a multiple to 8 to strip zero bytes, not all zero bits.
+  // countr_zero to help.
+  // 0 minus 8 to make MSVC happy.
+  uint32_t zeroes = static_cast<uint32_t>(absl::countr_zero(tens)) & (0 - 8ull);
+  tens += kFourZeroBytes;
+  tens >>= zeroes;
+  little_endian::Store32(out_str, tens);
+  return out_str + sizeof(tens) - zeroes / 8;
+}
 
-char* numbers_internal::FastIntToBuffer(uint32_t i, char* buffer) {
-  uint32_t digits;
-  // The idea of this implementation is to trim the number of divides to as few
-  // as possible, and also reducing memory stores and branches, by going in
-  // steps of two digits at a time rather than one whenever possible.
-  // The huge-number case is first, in the hopes that the compiler will output
-  // that case in one branch-free block of code, and only output conditional
-  // branches into it from below.
-  if (i >= 1000000000) {     // >= 1,000,000,000
-    digits = i / 100000000;  //      100,000,000
-    i -= digits * 100000000;
-    PutTwoDigits(digits, buffer);
-    buffer += 2;
-  lt100_000_000:
-    digits = i / 1000000;  // 1,000,000
-    i -= digits * 1000000;
-    PutTwoDigits(digits, buffer);
-    buffer += 2;
-  lt1_000_000:
-    digits = i / 10000;  // 10,000
-    i -= digits * 10000;
-    PutTwoDigits(digits, buffer);
-    buffer += 2;
-  lt10_000:
-    digits = i / 100;
-    i -= digits * 100;
-    PutTwoDigits(digits, buffer);
-    buffer += 2;
- lt100:
-    digits = i;
-    PutTwoDigits(digits, buffer);
-    buffer += 2;
-    *buffer = 0;
-    return buffer;
-  }
+// Prepare functions return an integer that should be written to out_str
+// (but possibly include trailing zeros).
+// For hi < 10000, lo < 10000 returns uint64_t as encoded in ASCII with
+// possibly trailing zeroes of the number hi * 10000 + lo.
+inline uint64_t PrepareTenThousands(uint64_t hi, uint64_t lo) {
+  uint64_t merged = hi | (lo << 32);
+  uint64_t div100 = ((merged * kDivisionBy100Mul) / kDivisionBy100Div) &
+                    ((0x7Full << 32) | 0x7Full);
+  uint64_t mod100 = merged - 100ull * div100;
+  uint64_t hundreds = (mod100 << 16) + div100;
+  uint64_t tens = (hundreds * kDivisionBy10Mul) / kDivisionBy10Div;
+  tens &= (0xFull << 48) | (0xFull << 32) | (0xFull << 16) | 0xFull;
+  tens += (hundreds - 10ull * tens) << 8;
+  return tens;
+}
 
-  if (i < 100) {
-    digits = i;
-    if (i >= 10) goto lt100;
-    memcpy(buffer, one_ASCII_final_digits[i], 2);
-    return buffer + 1;
+inline char* EncodeFullU32(uint32_t n, char* out_str) {
+  if (n < 100'000'000) {
+    uint64_t bottom = PrepareTenThousands(n / 10000, n % 10000);
+    ABSL_ASSUME(bottom != 0);
+    // 0 minus 8 to make MSVC happy.
+    uint32_t zeroes = static_cast<uint32_t>(absl::countr_zero(bottom))
+        & (0 - 8ull);
+    uint64_t bottom_res = bottom + kEightZeroBytes;
+    bottom_res >>= zeroes;
+    little_endian::Store64(out_str, bottom_res);
+    return out_str + sizeof(bottom) - zeroes / 8;
   }
-  if (i < 10000) {  //    10,000
-    if (i >= 1000) goto lt10_000;
-    digits = i / 100;
-    i -= digits * 100;
-    *buffer++ = '0' + static_cast<char>(digits);
-    goto lt100;
-  }
-  if (i < 1000000) {  //    1,000,000
-    if (i >= 100000) goto lt1_000_000;
-    digits = i / 10000;  //    10,000
-    i -= digits * 10000;
-    *buffer++ = '0' + static_cast<char>(digits);
-    goto lt10_000;
+  uint32_t top = n / 100'000'000;
+  n %= 100'000'000;
+  uint64_t bottom = PrepareTenThousands(n / 10000, n % 10000);
+  uint64_t bottom_res = bottom + kEightZeroBytes;
+  out_str = EncodeHundred(top, out_str);
+  little_endian::Store64(out_str, bottom_res);
+  return out_str + sizeof(bottom);
+}
+
+}  // namespace
+
+void numbers_internal::PutTwoDigits(uint32_t i, char* buf) {
+  assert(i < 100);
+  uint32_t base = kTwoZeroBytes;
+  uint32_t div10 = (i * kDivisionBy10Mul) / kDivisionBy10Div;
+  uint32_t mod10 = i - 10u * div10;
+  base += div10 + (mod10 << 8);
+  little_endian::Store16(buf, static_cast<uint16_t>(base));
+}
+
+char* numbers_internal::FastIntToBuffer(uint32_t n, char* out_str) {
+  if (n < 100) {
+    out_str = EncodeHundred(n, out_str);
+    goto set_last_zero;
   }
-  if (i < 100000000) {  //    100,000,000
-    if (i >= 10000000) goto lt100_000_000;
-    digits = i / 1000000;  //   1,000,000
-    i -= digits * 1000000;
-    *buffer++ = '0' + static_cast<char>(digits);
-    goto lt1_000_000;
+  if (n < 10000) {
+    out_str = EncodeTenThousand(n, out_str);
+    goto set_last_zero;
   }
-  // we already know that i < 1,000,000,000
-  digits = i / 100000000;  //   100,000,000
-  i -= digits * 100000000;
-  *buffer++ = '0' + static_cast<char>(digits);
-  goto lt100_000_000;
+  out_str = EncodeFullU32(n, out_str);
+set_last_zero:
+  *out_str = '\0';
+  return out_str;
 }
 
 char* numbers_internal::FastIntToBuffer(int32_t i, char* buffer) {
@@ -219,7 +271,7 @@ char* numbers_internal::FastIntToBuffer(int32_t i, char* buffer) {
   if (i < 0) {
     *buffer++ = '-';
     // We need to do the negation in modular (i.e., "unsigned")
-    // arithmetic; MSVC++ apprently warns for plain "-u", so
+    // arithmetic; MSVC++ apparently warns for plain "-u", so
     // we write the equivalent expression "0 - u" instead.
     u = 0 - u;
   }
@@ -230,41 +282,40 @@ char* numbers_internal::FastIntToBuffer(uint64_t i, char* buffer) {
   uint32_t u32 = static_cast<uint32_t>(i);
   if (u32 == i) return numbers_internal::FastIntToBuffer(u32, buffer);
 
-  // Here we know i has at least 10 decimal digits.
-  uint64_t top_1to11 = i / 1000000000;
-  u32 = static_cast<uint32_t>(i - top_1to11 * 1000000000);
-  uint32_t top_1to11_32 = static_cast<uint32_t>(top_1to11);
+  // 10**9 < 2**32 <= i < 10**10, we can do 2+8
+  uint64_t div08 = i / 100'000'000ull;
+  uint64_t mod08 = i % 100'000'000ull;
+  uint64_t mod_result =
+      PrepareTenThousands(mod08 / 10000, mod08 % 10000) + kEightZeroBytes;
+  if (i < 10'000'000'000ull) {
+    buffer = EncodeHundred(static_cast<uint32_t>(div08), buffer);
+    little_endian::Store64(buffer, mod_result);
+    buffer += 8;
+    goto set_last_zero;
+  }
 
-  if (top_1to11_32 == top_1to11) {
-    buffer = numbers_internal::FastIntToBuffer(top_1to11_32, buffer);
+  // i < 10**16, in this case 8+8
+  if (i < 10'000'000'000'000'000ull) {
+    buffer = EncodeFullU32(static_cast<uint32_t>(div08), buffer);
+    little_endian::Store64(buffer, mod_result);
+    buffer += 8;
+    goto set_last_zero;
   } else {
-    // top_1to11 has more than 32 bits too; print it in two steps.
-    uint32_t top_8to9 = static_cast<uint32_t>(top_1to11 / 100);
-    uint32_t mid_2 = static_cast<uint32_t>(top_1to11 - top_8to9 * 100);
-    buffer = numbers_internal::FastIntToBuffer(top_8to9, buffer);
-    PutTwoDigits(mid_2, buffer);
-    buffer += 2;
+    // 4 + 8 + 8
+    uint64_t div016 = i / 10'000'000'000'000'000ull;
+    buffer = EncodeTenThousand(static_cast<uint32_t>(div016), buffer);
+    uint64_t mid_result = div08 - div016 * 100'000'000ull;
+    mid_result = PrepareTenThousands(mid_result / 10000, mid_result % 10000) +
+                 kEightZeroBytes;
+    little_endian::Store64(buffer, mid_result);
+    buffer += 8;
+    little_endian::Store64(buffer, mod_result);
+    buffer += 8;
+    goto set_last_zero;
   }
-
-  // We have only 9 digits now, again the maximum uint32_t can handle fully.
-  uint32_t digits = u32 / 10000000;  // 10,000,000
-  u32 -= digits * 10000000;
-  PutTwoDigits(digits, buffer);
-  buffer += 2;
-  digits = u32 / 100000;  // 100,000
-  u32 -= digits * 100000;
-  PutTwoDigits(digits, buffer);
-  buffer += 2;
-  digits = u32 / 1000;  // 1,000
-  u32 -= digits * 1000;
-  PutTwoDigits(digits, buffer);
-  buffer += 2;
-  digits = u32 / 10;
-  u32 -= digits * 10;
-  PutTwoDigits(digits, buffer);
-  buffer += 2;
-  memcpy(buffer, one_ASCII_final_digits[u32], 2);
-  return buffer + 1;
+set_last_zero:
+  *buffer = '\0';
+  return buffer;
 }
 
 char* numbers_internal::FastIntToBuffer(int64_t i, char* buffer) {
@@ -1048,25 +1099,6 @@ ABSL_CONST_INIT ABSL_DLL const char kHexTable[513] =
     "e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
     "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
 
-ABSL_CONST_INIT ABSL_DLL const char two_ASCII_digits[100][2] = {
-    {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, {'0', '5'},
-    {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'}, {'1', '0'}, {'1', '1'},
-    {'1', '2'}, {'1', '3'}, {'1', '4'}, {'1', '5'}, {'1', '6'}, {'1', '7'},
-    {'1', '8'}, {'1', '9'}, {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'},
-    {'2', '4'}, {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'},
-    {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, {'3', '5'},
-    {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'}, {'4', '0'}, {'4', '1'},
-    {'4', '2'}, {'4', '3'}, {'4', '4'}, {'4', '5'}, {'4', '6'}, {'4', '7'},
-    {'4', '8'}, {'4', '9'}, {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'},
-    {'5', '4'}, {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'},
-    {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'}, {'6', '5'},
-    {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'}, {'7', '0'}, {'7', '1'},
-    {'7', '2'}, {'7', '3'}, {'7', '4'}, {'7', '5'}, {'7', '6'}, {'7', '7'},
-    {'7', '8'}, {'7', '9'}, {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'},
-    {'8', '4'}, {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'},
-    {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, {'9', '5'},
-    {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'}};
-
 bool safe_strto32_base(absl::string_view text, int32_t* value, int base) {
   return safe_int_internal<int32_t>(text, value, base);
 }
author	Benjamin Barenblat <bbaren@google.com>	2023-09-07 13:16:09 -0400
committer	Benjamin Barenblat <bbaren@google.com>	2023-09-07 13:16:09 -0400
commit	6fdbff8bbce2a1debdc060df381f39e3dcfb65af (patch)
tree	71f1ef38477a65d5cce472fc042c90087c2bb351 /absl/strings/numbers.cc
parent	8d4a80fe37176b1170d7dce0772dea9584ec3e32 (diff)
parent	29bf8085f3bf17b84d30e34b3d7ff8248fda404e (diff)