summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Abseil Team <absl-team@google.com>2023-03-13 11:44:25 -0700
committerGravatar Copybara-Service <copybara-worker@google.com>2023-03-13 11:45:16 -0700
commitac8afe6c78d4bde66c8bcf2f3dd9fefea7d4fac8 (patch)
treec40ac600b01f758c3e35d20eab38a0503b5b9dbd
parent34e29aae4fe9d296d57268809dfb78a34e705233 (diff)
Performance improvement for absl::AsciiStrToUpper() and absl::AsciiStrToLower()
PiperOrigin-RevId: 516275043 Change-Id: I906ef0d96dddf12e3738490bd26cb05753ec008c
-rw-r--r--absl/strings/ascii.cc60
-rw-r--r--absl/strings/ascii_test.cc9
2 files changed, 59 insertions, 10 deletions
diff --git a/absl/strings/ascii.cc b/absl/strings/ascii.cc
index 868df2d1..16c96899 100644
--- a/absl/strings/ascii.cc
+++ b/absl/strings/ascii.cc
@@ -14,6 +14,10 @@
#include "absl/strings/ascii.h"
+#include <climits>
+#include <cstring>
+#include <string>
+
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace ascii_internal {
@@ -153,18 +157,62 @@ ABSL_DLL const char kToUpper[256] = {
};
// clang-format on
+template <bool ToUpper>
+constexpr void AsciiStrCaseFold(char* p, char* end) {
+ // The upper- and lowercase versions of ASCII characters differ by only 1 bit.
+ // When we need to flip the case, we can xor with this bit to achieve the
+ // desired result. Note that the choice of 'a' and 'A' here is arbitrary. We
+ // could have chosen 'z' and 'Z', or any other pair of characters as they all
+ // have the same single bit difference.
+ constexpr unsigned char kAsciiCaseBitFlip = 'a' ^ 'A';
+
+ constexpr char ch_a = ToUpper ? 'a' : 'A';
+ constexpr char ch_z = ToUpper ? 'z' : 'Z';
+ for (; p < end; ++p) {
+ unsigned char v = static_cast<unsigned char>(*p);
+ // We use & instead of && to ensure this always stays branchless
+ // We use static_cast<int> to suppress -Wbitwise-instead-of-logical
+ bool is_in_range = static_cast<bool>(static_cast<int>(ch_a <= v) &
+ static_cast<int>(v <= ch_z));
+ v ^= is_in_range ? kAsciiCaseBitFlip : 0;
+ *p = static_cast<char>(v);
+ }
+}
+
+static constexpr size_t ValidateAsciiCasefold() {
+ constexpr size_t num_chars = 1 + CHAR_MAX - CHAR_MIN;
+ size_t incorrect_index = 0;
+ char lowered[num_chars] = {};
+ char uppered[num_chars] = {};
+ for (unsigned int i = 0; i < num_chars; ++i) {
+ uppered[i] = lowered[i] = static_cast<char>(i);
+ }
+ AsciiStrCaseFold<false>(&lowered[0], &lowered[num_chars]);
+ AsciiStrCaseFold<true>(&uppered[0], &uppered[num_chars]);
+ for (size_t i = 0; i < num_chars; ++i) {
+ const char ch = static_cast<char>(i),
+ ch_upper = ('a' <= ch && ch <= 'z' ? 'A' + (ch - 'a') : ch),
+ ch_lower = ('A' <= ch && ch <= 'Z' ? 'a' + (ch - 'A') : ch);
+ if (uppered[i] != ch_upper || lowered[i] != ch_lower) {
+ incorrect_index = i > 0 ? i : num_chars;
+ break;
+ }
+ }
+ return incorrect_index;
+}
+
+static_assert(ValidateAsciiCasefold() == 0, "error in case conversion");
+
} // namespace ascii_internal
void AsciiStrToLower(std::string* s) {
- for (auto& ch : *s) {
- ch = absl::ascii_tolower(static_cast<unsigned char>(ch));
- }
+ char* p = &(*s)[0]; // Guaranteed to be valid for empty strings
+ return ascii_internal::AsciiStrCaseFold<false>(p, p + s->size());
}
void AsciiStrToUpper(std::string* s) {
- for (auto& ch : *s) {
- ch = absl::ascii_toupper(static_cast<unsigned char>(ch));
- }
+ char* p = &(*s)[0]; // Guaranteed to be valid for empty strings
+ return ascii_internal::AsciiStrCaseFold<true>(p, p + s->size());
}
void RemoveExtraAsciiWhitespace(std::string* str) {
diff --git a/absl/strings/ascii_test.cc b/absl/strings/ascii_test.cc
index dfed114c..4ea262f1 100644
--- a/absl/strings/ascii_test.cc
+++ b/absl/strings/ascii_test.cc
@@ -14,6 +14,7 @@
#include "absl/strings/ascii.h"
+#include <algorithm>
#include <cctype>
#include <clocale>
#include <cstring>
@@ -189,14 +190,14 @@ TEST(AsciiStrTo, Lower) {
const std::string str("GHIJKL");
const std::string str2("MNOPQR");
const absl::string_view sp(str2);
- std::string mutable_str("STUVWX");
+ std::string mutable_str("_`?@[{AMNOPQRSTUVWXYZ");
EXPECT_EQ("abcdef", absl::AsciiStrToLower(buf));
EXPECT_EQ("ghijkl", absl::AsciiStrToLower(str));
EXPECT_EQ("mnopqr", absl::AsciiStrToLower(sp));
absl::AsciiStrToLower(&mutable_str);
- EXPECT_EQ("stuvwx", mutable_str);
+ EXPECT_EQ("_`?@[{amnopqrstuvwxyz", mutable_str);
char mutable_buf[] = "Mutable";
std::transform(mutable_buf, mutable_buf + strlen(mutable_buf),
@@ -207,12 +208,12 @@ TEST(AsciiStrTo, Lower) {
TEST(AsciiStrTo, Upper) {
const char buf[] = "abcdef";
const std::string str("ghijkl");
- const std::string str2("mnopqr");
+ const std::string str2("_`?@[{amnopqrstuvwxyz");
const absl::string_view sp(str2);
EXPECT_EQ("ABCDEF", absl::AsciiStrToUpper(buf));
EXPECT_EQ("GHIJKL", absl::AsciiStrToUpper(str));
- EXPECT_EQ("MNOPQR", absl::AsciiStrToUpper(sp));
+ EXPECT_EQ("_`?@[{AMNOPQRSTUVWXYZ", absl::AsciiStrToUpper(sp));
char mutable_buf[] = "Mutable";
std::transform(mutable_buf, mutable_buf + strlen(mutable_buf),