summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Abseil Team <absl-team@google.com>2024-03-20 13:43:01 -0700
committerGravatar Copybara-Service <copybara-worker@google.com>2024-03-20 13:43:57 -0700
commite4c00cc67a48ef85336d3f66c6a28e32510d8fe6 (patch)
treeb20530a94c4d1a5b10819905e4a8035a54aa2c27
parent85166c912d2a8cfb4eafb78b66a37facf15ae2e5 (diff)
Performance improvement for absl::AsciiStrToUpper() and absl::AsciiStrToLower()
PiperOrigin-RevId: 617613544 Change-Id: I526b5bc087edf54046c77795dddf5412478ac6a8
-rw-r--r--absl/strings/ascii.cc103
-rw-r--r--absl/strings/ascii_test.cc4
2 files changed, 36 insertions, 71 deletions
diff --git a/absl/strings/ascii.cc b/absl/strings/ascii.cc
index 5460b2c6..20a696a1 100644
--- a/absl/strings/ascii.cc
+++ b/absl/strings/ascii.cc
@@ -15,13 +15,14 @@
#include "absl/strings/ascii.h"
#include <climits>
-#include <cstdint>
+#include <cstddef>
#include <cstring>
#include <string>
-#include <type_traits>
+#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/base/nullability.h"
+#include "absl/base/optimization.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
@@ -162,19 +163,6 @@ ABSL_DLL const char kToUpper[256] = {
};
// clang-format on
-template <class T>
-static constexpr T BroadcastByte(unsigned char value) {
- static_assert(std::is_integral<T>::value && sizeof(T) <= sizeof(uint64_t) &&
- std::is_unsigned<T>::value,
- "only unsigned integers up to 64-bit allowed");
- T result = value;
- constexpr size_t result_bit_width = sizeof(result) * CHAR_BIT;
- result |= result << ((CHAR_BIT << 0) & (result_bit_width - 1));
- result |= result << ((CHAR_BIT << 1) & (result_bit_width - 1));
- result |= result << ((CHAR_BIT << 2) & (result_bit_width - 1));
- return result;
-}
-
// Returns whether `c` is in the a-z/A-Z range (w.r.t. `ToUpper`).
// Implemented by:
// 1. Pushing the a-z/A-Z range to [SCHAR_MIN, SCHAR_MIN + 26).
@@ -189,47 +177,10 @@ constexpr bool AsciiInAZRange(unsigned char c) {
return static_cast<signed char>(u) < threshold;
}
+// Force-inline so the compiler won't merge the short and long implementations.
template <bool ToUpper>
-static constexpr char* PartialAsciiStrCaseFold(absl::Nonnull<char*> p,
- absl::Nonnull<char*> end) {
- using vec_t = size_t;
- const size_t n = static_cast<size_t>(end - p);
-
- // SWAR algorithm: http://0x80.pl/notesen/2016-01-06-swar-swap-case.html
- constexpr char ch_a = ToUpper ? 'a' : 'A', ch_z = ToUpper ? 'z' : 'Z';
- char* const swar_end = p + (n / sizeof(vec_t)) * sizeof(vec_t);
- while (p < swar_end) {
- vec_t v = vec_t();
-
- // memcpy the vector, but constexpr
- for (size_t i = 0; i < sizeof(vec_t); ++i) {
- v |= static_cast<vec_t>(static_cast<unsigned char>(p[i]))
- << (i * CHAR_BIT);
- }
-
- constexpr unsigned int msb = 1u << (CHAR_BIT - 1);
- const vec_t v_msb = v & BroadcastByte<vec_t>(msb);
- const vec_t v_nonascii_mask = (v_msb << 1) - (v_msb >> (CHAR_BIT - 1));
- const vec_t v_nonascii = v & v_nonascii_mask;
- const vec_t v_ascii = v & ~v_nonascii_mask;
- const vec_t a = v_ascii + BroadcastByte<vec_t>(msb - ch_a - 0),
- z = v_ascii + BroadcastByte<vec_t>(msb - ch_z - 1);
- v = v_nonascii | (v_ascii ^ ((a ^ z) & BroadcastByte<vec_t>(msb)) >> 2);
-
- // memcpy the vector, but constexpr
- for (size_t i = 0; i < sizeof(vec_t); ++i) {
- p[i] = static_cast<char>(v >> (i * CHAR_BIT));
- }
-
- p += sizeof(v);
- }
-
- return p;
-}
-
-template <bool ToUpper>
-static constexpr void AsciiStrCaseFold(absl::Nonnull<char*> p,
- absl::Nonnull<char*> end) {
+ABSL_ATTRIBUTE_ALWAYS_INLINE inline constexpr void AsciiStrCaseFoldImpl(
+ absl::Nonnull<char*> p, size_t size) {
// The upper- and lowercase versions of ASCII characters differ by only 1 bit.
// When we need to flip the case, we can xor with this bit to achieve the
// desired result. Note that the choice of 'a' and 'A' here is arbitrary. We
@@ -237,20 +188,32 @@ static constexpr void AsciiStrCaseFold(absl::Nonnull<char*> p,
// have the same single bit difference.
constexpr unsigned char kAsciiCaseBitFlip = 'a' ^ 'A';
- using vec_t = size_t;
- // TODO(b/316380338): When FDO becomes able to vectorize these,
- // revert this manual optimization and just leave the naive loop.
- if (static_cast<size_t>(end - p) >= sizeof(vec_t)) {
- p = ascii_internal::PartialAsciiStrCaseFold<ToUpper>(p, end);
- }
- while (p < end) {
- unsigned char v = static_cast<unsigned char>(*p);
+ for (size_t i = 0; i < size; ++i) {
+ unsigned char v = static_cast<unsigned char>(p[i]);
v ^= AsciiInAZRange<ToUpper>(v) ? kAsciiCaseBitFlip : 0;
- *p = static_cast<char>(v);
- ++p;
+ p[i] = static_cast<char>(v);
}
}
+// The string size threshold for starting using the long string version.
+constexpr size_t kCaseFoldThreshold = 16;
+
+// No-inline so the compiler won't merge the short and long implementations.
+template <bool ToUpper>
+ABSL_ATTRIBUTE_NOINLINE constexpr void AsciiStrCaseFoldLong(
+ absl::Nonnull<char*> p, size_t size) {
+ ABSL_ASSUME(size >= kCaseFoldThreshold);
+ AsciiStrCaseFoldImpl<ToUpper>(p, size);
+}
+
+// Splitting to short and long strings to allow vectorization decisions
+// to be made separately in the long and short cases.
+template <bool ToUpper>
+constexpr void AsciiStrCaseFold(absl::Nonnull<char*> p, size_t size) {
+ size < kCaseFoldThreshold ? AsciiStrCaseFoldImpl<ToUpper>(p, size)
+ : AsciiStrCaseFoldLong<ToUpper>(p, size);
+}
+
static constexpr size_t ValidateAsciiCasefold() {
constexpr size_t num_chars = 1 + CHAR_MAX - CHAR_MIN;
size_t incorrect_index = 0;
@@ -259,8 +222,8 @@ static constexpr size_t ValidateAsciiCasefold() {
for (unsigned int i = 0; i < num_chars; ++i) {
uppered[i] = lowered[i] = static_cast<char>(i);
}
- AsciiStrCaseFold<false>(&lowered[0], &lowered[num_chars]);
- AsciiStrCaseFold<true>(&uppered[0], &uppered[num_chars]);
+ AsciiStrCaseFold<false>(&lowered[0], num_chars);
+ AsciiStrCaseFold<true>(&uppered[0], num_chars);
for (size_t i = 0; i < num_chars; ++i) {
const char ch = static_cast<char>(i),
ch_upper = ('a' <= ch && ch <= 'z' ? 'A' + (ch - 'a') : ch),
@@ -278,13 +241,11 @@ static_assert(ValidateAsciiCasefold() == 0, "error in case conversion");
} // namespace ascii_internal
void AsciiStrToLower(absl::Nonnull<std::string*> s) {
- char* p = &(*s)[0]; // Guaranteed to be valid for empty strings
- return ascii_internal::AsciiStrCaseFold<false>(p, p + s->size());
+ return ascii_internal::AsciiStrCaseFold<false>(&(*s)[0], s->size());
}
void AsciiStrToUpper(absl::Nonnull<std::string*> s) {
- char* p = &(*s)[0]; // Guaranteed to be valid for empty strings
- return ascii_internal::AsciiStrCaseFold<true>(p, p + s->size());
+ return ascii_internal::AsciiStrCaseFold<true>(&(*s)[0], s->size());
}
void RemoveExtraAsciiWhitespace(absl::Nonnull<std::string*> str) {
diff --git a/absl/strings/ascii_test.cc b/absl/strings/ascii_test.cc
index 117140c1..8885bb15 100644
--- a/absl/strings/ascii_test.cc
+++ b/absl/strings/ascii_test.cc
@@ -190,11 +190,13 @@ TEST(AsciiStrTo, Lower) {
const std::string str("GHIJKL");
const std::string str2("MNOPQR");
const absl::string_view sp(str2);
+ const std::string long_str("ABCDEFGHIJKLMNOPQRSTUVWXYZ1!a");
std::string mutable_str("_`?@[{AMNOPQRSTUVWXYZ");
EXPECT_EQ("abcdef", absl::AsciiStrToLower(buf));
EXPECT_EQ("ghijkl", absl::AsciiStrToLower(str));
EXPECT_EQ("mnopqr", absl::AsciiStrToLower(sp));
+ EXPECT_EQ("abcdefghijklmnopqrstuvwxyz1!a", absl::AsciiStrToLower(long_str));
absl::AsciiStrToLower(&mutable_str);
EXPECT_EQ("_`?@[{amnopqrstuvwxyz", mutable_str);
@@ -210,10 +212,12 @@ TEST(AsciiStrTo, Upper) {
const std::string str("ghijkl");
const std::string str2("_`?@[{amnopqrstuvwxyz");
const absl::string_view sp(str2);
+ const std::string long_str("abcdefghijklmnopqrstuvwxyz1!A");
EXPECT_EQ("ABCDEF", absl::AsciiStrToUpper(buf));
EXPECT_EQ("GHIJKL", absl::AsciiStrToUpper(str));
EXPECT_EQ("_`?@[{AMNOPQRSTUVWXYZ", absl::AsciiStrToUpper(sp));
+ EXPECT_EQ("ABCDEFGHIJKLMNOPQRSTUVWXYZ1!A", absl::AsciiStrToUpper(long_str));
char mutable_buf[] = "Mutable";
std::transform(mutable_buf, mutable_buf + strlen(mutable_buf),