diff options
author | Abseil Team <absl-team@google.com> | 2023-03-13 11:44:25 -0700 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2023-03-13 11:45:16 -0700 |
commit | ac8afe6c78d4bde66c8bcf2f3dd9fefea7d4fac8 (patch) | |
tree | c40ac600b01f758c3e35d20eab38a0503b5b9dbd | |
parent | 34e29aae4fe9d296d57268809dfb78a34e705233 (diff) |
Performance improvement for absl::AsciiStrToUpper() and absl::AsciiStrToLower()
PiperOrigin-RevId: 516275043
Change-Id: I906ef0d96dddf12e3738490bd26cb05753ec008c
-rw-r--r-- | absl/strings/ascii.cc | 60 | ||||
-rw-r--r-- | absl/strings/ascii_test.cc | 9 |
2 files changed, 59 insertions, 10 deletions
diff --git a/absl/strings/ascii.cc b/absl/strings/ascii.cc index 868df2d1..16c96899 100644 --- a/absl/strings/ascii.cc +++ b/absl/strings/ascii.cc @@ -14,6 +14,10 @@ #include "absl/strings/ascii.h" +#include <climits> +#include <cstring> +#include <string> + namespace absl { ABSL_NAMESPACE_BEGIN namespace ascii_internal { @@ -153,18 +157,62 @@ ABSL_DLL const char kToUpper[256] = { }; // clang-format on +template <bool ToUpper> +constexpr void AsciiStrCaseFold(char* p, char* end) { + // The upper- and lowercase versions of ASCII characters differ by only 1 bit. + // When we need to flip the case, we can xor with this bit to achieve the + // desired result. Note that the choice of 'a' and 'A' here is arbitrary. We + // could have chosen 'z' and 'Z', or any other pair of characters as they all + // have the same single bit difference. + constexpr unsigned char kAsciiCaseBitFlip = 'a' ^ 'A'; + + constexpr char ch_a = ToUpper ? 'a' : 'A'; + constexpr char ch_z = ToUpper ? 'z' : 'Z'; + for (; p < end; ++p) { + unsigned char v = static_cast<unsigned char>(*p); + // We use & instead of && to ensure this always stays branchless + // We use static_cast<int> to suppress -Wbitwise-instead-of-logical + bool is_in_range = static_cast<bool>(static_cast<int>(ch_a <= v) & + static_cast<int>(v <= ch_z)); + v ^= is_in_range ? kAsciiCaseBitFlip : 0; + *p = static_cast<char>(v); + } +} + +static constexpr size_t ValidateAsciiCasefold() { + constexpr size_t num_chars = 1 + CHAR_MAX - CHAR_MIN; + size_t incorrect_index = 0; + char lowered[num_chars] = {}; + char uppered[num_chars] = {}; + for (unsigned int i = 0; i < num_chars; ++i) { + uppered[i] = lowered[i] = static_cast<char>(i); + } + AsciiStrCaseFold<false>(&lowered[0], &lowered[num_chars]); + AsciiStrCaseFold<true>(&uppered[0], &uppered[num_chars]); + for (size_t i = 0; i < num_chars; ++i) { + const char ch = static_cast<char>(i), + ch_upper = ('a' <= ch && ch <= 'z' ? 'A' + (ch - 'a') : ch), + ch_lower = ('A' <= ch && ch <= 'Z' ? 'a' + (ch - 'A') : ch); + if (uppered[i] != ch_upper || lowered[i] != ch_lower) { + incorrect_index = i > 0 ? i : num_chars; + break; + } + } + return incorrect_index; +} + +static_assert(ValidateAsciiCasefold() == 0, "error in case conversion"); + } // namespace ascii_internal void AsciiStrToLower(std::string* s) { - for (auto& ch : *s) { - ch = absl::ascii_tolower(static_cast<unsigned char>(ch)); - } + char* p = &(*s)[0]; // Guaranteed to be valid for empty strings + return ascii_internal::AsciiStrCaseFold<false>(p, p + s->size()); } void AsciiStrToUpper(std::string* s) { - for (auto& ch : *s) { - ch = absl::ascii_toupper(static_cast<unsigned char>(ch)); - } + char* p = &(*s)[0]; // Guaranteed to be valid for empty strings + return ascii_internal::AsciiStrCaseFold<true>(p, p + s->size()); } void RemoveExtraAsciiWhitespace(std::string* str) { diff --git a/absl/strings/ascii_test.cc b/absl/strings/ascii_test.cc index dfed114c..4ea262f1 100644 --- a/absl/strings/ascii_test.cc +++ b/absl/strings/ascii_test.cc @@ -14,6 +14,7 @@ #include "absl/strings/ascii.h" +#include <algorithm> #include <cctype> #include <clocale> #include <cstring> @@ -189,14 +190,14 @@ TEST(AsciiStrTo, Lower) { const std::string str("GHIJKL"); const std::string str2("MNOPQR"); const absl::string_view sp(str2); - std::string mutable_str("STUVWX"); + std::string mutable_str("_`?@[{AMNOPQRSTUVWXYZ"); EXPECT_EQ("abcdef", absl::AsciiStrToLower(buf)); EXPECT_EQ("ghijkl", absl::AsciiStrToLower(str)); EXPECT_EQ("mnopqr", absl::AsciiStrToLower(sp)); absl::AsciiStrToLower(&mutable_str); - EXPECT_EQ("stuvwx", mutable_str); + EXPECT_EQ("_`?@[{amnopqrstuvwxyz", mutable_str); char mutable_buf[] = "Mutable"; std::transform(mutable_buf, mutable_buf + strlen(mutable_buf), @@ -207,12 +208,12 @@ TEST(AsciiStrTo, Lower) { TEST(AsciiStrTo, Upper) { const char buf[] = "abcdef"; const std::string str("ghijkl"); - const std::string str2("mnopqr"); + const std::string str2("_`?@[{amnopqrstuvwxyz"); const absl::string_view sp(str2); EXPECT_EQ("ABCDEF", absl::AsciiStrToUpper(buf)); EXPECT_EQ("GHIJKL", absl::AsciiStrToUpper(str)); - EXPECT_EQ("MNOPQR", absl::AsciiStrToUpper(sp)); + EXPECT_EQ("_`?@[{AMNOPQRSTUVWXYZ", absl::AsciiStrToUpper(sp)); char mutable_buf[] = "Mutable"; std::transform(mutable_buf, mutable_buf + strlen(mutable_buf), |