summaryrefslogtreecommitdiff
path: root/absl/strings/ascii.cc
diff options
context:
space:
mode:
authorGravatar Abseil Team <absl-team@google.com>2024-02-20 15:55:18 -0800
committerGravatar Copybara-Service <copybara-worker@google.com>2024-02-20 15:56:24 -0800
commitf576ea0ed7eaa1e9f2a3cf82160af8ef7c906bb7 (patch)
treef2af0eece496495c50c575556df743d7b7a1897e /absl/strings/ascii.cc
parentc28f689cd0c05fd73c9eacda7f3ceb193093304d (diff)
Performance improvement for absl::AsciiStrToUpper() and absl::AsciiStrToLower()
PiperOrigin-RevId: 608770171 Change-Id: Icca54086037e42826c272f04374aeb33d060ace5
Diffstat (limited to 'absl/strings/ascii.cc')
-rw-r--r--absl/strings/ascii.cc71
1 files changed, 63 insertions, 8 deletions
diff --git a/absl/strings/ascii.cc b/absl/strings/ascii.cc
index 8f778a45..5460b2c6 100644
--- a/absl/strings/ascii.cc
+++ b/absl/strings/ascii.cc
@@ -15,8 +15,10 @@
#include "absl/strings/ascii.h"
#include <climits>
+#include <cstdint>
#include <cstring>
#include <string>
+#include <type_traits>
#include "absl/base/config.h"
#include "absl/base/nullability.h"
@@ -160,6 +162,19 @@ ABSL_DLL const char kToUpper[256] = {
};
// clang-format on
+template <class T>
+static constexpr T BroadcastByte(unsigned char value) {
+ static_assert(std::is_integral<T>::value && sizeof(T) <= sizeof(uint64_t) &&
+ std::is_unsigned<T>::value,
+ "only unsigned integers up to 64-bit allowed");
+ T result = value;
+ constexpr size_t result_bit_width = sizeof(result) * CHAR_BIT;
+ result |= result << ((CHAR_BIT << 0) & (result_bit_width - 1));
+ result |= result << ((CHAR_BIT << 1) & (result_bit_width - 1));
+ result |= result << ((CHAR_BIT << 2) & (result_bit_width - 1));
+ return result;
+}
+
// Returns whether `c` is in the a-z/A-Z range (w.r.t. `ToUpper`).
// Implemented by:
// 1. Pushing the a-z/A-Z range to [SCHAR_MIN, SCHAR_MIN + 26).
@@ -175,8 +190,46 @@ constexpr bool AsciiInAZRange(unsigned char c) {
}
template <bool ToUpper>
-constexpr void AsciiStrCaseFold(absl::Nonnull<char*> p,
- absl::Nonnull<char*> end) {
+static constexpr char* PartialAsciiStrCaseFold(absl::Nonnull<char*> p,
+ absl::Nonnull<char*> end) {
+ using vec_t = size_t;
+ const size_t n = static_cast<size_t>(end - p);
+
+ // SWAR algorithm: http://0x80.pl/notesen/2016-01-06-swar-swap-case.html
+ constexpr char ch_a = ToUpper ? 'a' : 'A', ch_z = ToUpper ? 'z' : 'Z';
+ char* const swar_end = p + (n / sizeof(vec_t)) * sizeof(vec_t);
+ while (p < swar_end) {
+ vec_t v = vec_t();
+
+ // memcpy the vector, but constexpr
+ for (size_t i = 0; i < sizeof(vec_t); ++i) {
+ v |= static_cast<vec_t>(static_cast<unsigned char>(p[i]))
+ << (i * CHAR_BIT);
+ }
+
+ constexpr unsigned int msb = 1u << (CHAR_BIT - 1);
+ const vec_t v_msb = v & BroadcastByte<vec_t>(msb);
+ const vec_t v_nonascii_mask = (v_msb << 1) - (v_msb >> (CHAR_BIT - 1));
+ const vec_t v_nonascii = v & v_nonascii_mask;
+ const vec_t v_ascii = v & ~v_nonascii_mask;
+ const vec_t a = v_ascii + BroadcastByte<vec_t>(msb - ch_a - 0),
+ z = v_ascii + BroadcastByte<vec_t>(msb - ch_z - 1);
+ v = v_nonascii | (v_ascii ^ ((a ^ z) & BroadcastByte<vec_t>(msb)) >> 2);
+
+ // memcpy the vector, but constexpr
+ for (size_t i = 0; i < sizeof(vec_t); ++i) {
+ p[i] = static_cast<char>(v >> (i * CHAR_BIT));
+ }
+
+ p += sizeof(v);
+ }
+
+ return p;
+}
+
+template <bool ToUpper>
+static constexpr void AsciiStrCaseFold(absl::Nonnull<char*> p,
+ absl::Nonnull<char*> end) {
// The upper- and lowercase versions of ASCII characters differ by only 1 bit.
// When we need to flip the case, we can xor with this bit to achieve the
// desired result. Note that the choice of 'a' and 'A' here is arbitrary. We
@@ -184,15 +237,17 @@ constexpr void AsciiStrCaseFold(absl::Nonnull<char*> p,
// have the same single bit difference.
constexpr unsigned char kAsciiCaseBitFlip = 'a' ^ 'A';
-#ifdef __clang__
-// Temporary workaround until the mentioned bug is fixed.
-// NOLINTNEXTLINE(whitespace/line_length)
-#pragma clang loop vectorize(enable)
-#endif
- for (; p < end; ++p) {
+ using vec_t = size_t;
+ // TODO(b/316380338): When FDO becomes able to vectorize these,
+ // revert this manual optimization and just leave the naive loop.
+ if (static_cast<size_t>(end - p) >= sizeof(vec_t)) {
+ p = ascii_internal::PartialAsciiStrCaseFold<ToUpper>(p, end);
+ }
+ while (p < end) {
unsigned char v = static_cast<unsigned char>(*p);
v ^= AsciiInAZRange<ToUpper>(v) ? kAsciiCaseBitFlip : 0;
*p = static_cast<char>(v);
+ ++p;
}
}