summaryrefslogtreecommitdiff
path: root/absl/strings
diff options
context:
space:
mode:
authorGravatar Abseil Team <absl-team@google.com>2023-08-23 07:15:42 -0700
committerGravatar Copybara-Service <copybara-worker@google.com>2023-08-23 07:16:31 -0700
commit91b861c544afd153fe800fc2bea4736a0da37533 (patch)
treec65d0d322bdadd9fe1f9bad07ba3973549a941b3 /absl/strings
parent7aef7808d6dbe46ab95b37e6c67d1350c1da016b (diff)
Add absl::CharSet.
PiperOrigin-RevId: 559415517 Change-Id: I5bbc744bf00be2fd15ec7544b725d699e0d982fb
Diffstat (limited to 'absl/strings')
-rw-r--r--absl/strings/BUILD.bazel67
-rw-r--r--absl/strings/CMakeLists.txt21
-rw-r--r--absl/strings/charset.h164
-rw-r--r--absl/strings/charset_benchmark.cc (renamed from absl/strings/internal/char_map_benchmark.cc)24
-rw-r--r--absl/strings/charset_test.cc181
-rw-r--r--absl/strings/escaping.cc2
-rw-r--r--absl/strings/internal/char_map.h158
-rw-r--r--absl/strings/internal/char_map_test.cc172
8 files changed, 418 insertions, 371 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel
index e3b8af48..a858d0b2 100644
--- a/absl/strings/BUILD.bazel
+++ b/absl/strings/BUILD.bazel
@@ -92,6 +92,7 @@ cc_library(
"string_view.h",
],
deps = [
+ ":charset",
":internal",
":string_view",
"//absl/base",
@@ -115,7 +116,6 @@ cc_library(
"internal/utf8.cc",
],
hdrs = [
- "internal/char_map.h",
"internal/escaping.h",
"internal/ostringstream.h",
"internal/resize_uninitialized.h",
@@ -307,6 +307,50 @@ cc_test(
],
)
+cc_test(
+ name = "charset_benchmark",
+ size = "small",
+ srcs = [
+ "charset_benchmark.cc",
+ ],
+ copts = ABSL_TEST_COPTS,
+ tags = [
+ "benchmark",
+ ],
+ visibility = ["//visibility:private"],
+ deps = [
+ ":charset",
+ "//absl/log:check",
+ "@com_github_google_benchmark//:benchmark_main",
+ ],
+)
+
+cc_library(
+ name = "charset",
+ hdrs = [
+ "charset.h",
+ ],
+ copts = ABSL_DEFAULT_COPTS,
+ linkopts = ABSL_DEFAULT_LINKOPTS,
+ deps = [
+ ":string_view",
+ "//absl/base:core_headers",
+ ],
+)
+
+cc_test(
+ name = "charset_test",
+ size = "small",
+ srcs = ["charset_test.cc"],
+ copts = ABSL_TEST_COPTS,
+ visibility = ["//visibility:private"],
+ deps = [
+ ":charset",
+ ":strings",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
cc_library(
name = "cord_internal",
srcs = [
@@ -1087,27 +1131,6 @@ cc_test(
)
cc_test(
- name = "char_map_test",
- srcs = ["internal/char_map_test.cc"],
- copts = ABSL_TEST_COPTS,
- deps = [
- ":internal",
- "@com_google_googletest//:gtest_main",
- ],
-)
-
-cc_test(
- name = "char_map_benchmark",
- srcs = ["internal/char_map_benchmark.cc"],
- copts = ABSL_TEST_COPTS,
- tags = ["benchmark"],
- deps = [
- ":internal",
- "@com_github_google_benchmark//:benchmark_main",
- ],
-)
-
-cc_test(
name = "charconv_test",
srcs = ["charconv_test.cc"],
copts = ABSL_TEST_COPTS,
diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt
index 0e588674..27e7ce4f 100644
--- a/absl/strings/CMakeLists.txt
+++ b/absl/strings/CMakeLists.txt
@@ -78,6 +78,7 @@ absl_cc_library(
absl::strings_internal
absl::base
absl::bits
+ absl::charset
absl::config
absl::core_headers
absl::endian
@@ -89,12 +90,24 @@ absl_cc_library(
PUBLIC
)
+absl_cc_library(
+ NAME
+ charset
+ HDRS
+ charset.h
+ COPTS
+ ${ABSL_DEFAULT_COPTS}
+ DEPS
+ absl::core_headers
+ absl::string_view
+ PUBLIC
+)
+
# Internal-only target, do not depend on directly.
absl_cc_library(
NAME
strings_internal
HDRS
- "internal/char_map.h"
"internal/escaping.cc"
"internal/escaping.h"
"internal/ostringstream.h"
@@ -357,13 +370,13 @@ absl_cc_test(
absl_cc_test(
NAME
- char_map_test
+ charset_test
SRCS
- "internal/char_map_test.cc"
+ "charset_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
- absl::strings_internal
+ absl::strings
GTest::gmock_main
)
diff --git a/absl/strings/charset.h b/absl/strings/charset.h
new file mode 100644
index 00000000..ff4e81a4
--- /dev/null
+++ b/absl/strings/charset.h
@@ -0,0 +1,164 @@
+// Copyright 2022 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// -----------------------------------------------------------------------------
+// File: charset.h
+// -----------------------------------------------------------------------------
+//
+// This file contains absl::CharSet, a fast, bit-vector set of 8-bit unsigned
+// characters.
+//
+// Instances can be initialized as constexpr constants. For example:
+//
+// constexpr absl::CharSet kJustX = absl::CharSet::Char('x');
+// constexpr absl::CharSet kMySymbols = absl::CharSet("$@!");
+// constexpr absl::CharSet kLetters = absl::CharSet::Range('a', 'z');
+//
+// Multiple instances can be combined that still forms a constexpr expression.
+// For example:
+//
+// constexpr absl::CharSet kLettersAndNumbers =
+// absl::CharSet::Range('a', 'z') | absl::CharSet::Range('0', '9');
+//
+// Several pre-defined character classes are available that mirror the methods
+// from <cctype>. For example:
+//
+// constexpr absl::CharSet kLettersAndWhitespace =
+// absl::CharSet::AsciiAlphabet() | absl::CharSet::AsciiWhitespace();
+//
+// To check membership, use the .contains method, e.g.
+//
+// absl::CharSet hex_letters("abcdef");
+// hex_letters.contains('a'); // true
+// hex_letters.contains('g'); // false
+
+#ifndef ABSL_STRINGS_CHARSET_H_
+#define ABSL_STRINGS_CHARSET_H_
+
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+
+#include "absl/base/macros.h"
+#include "absl/base/port.h"
+#include "absl/strings/string_view.h"
+
+namespace absl {
+
+class CharSet {
+ public:
+ constexpr CharSet() : m_() {}
+
+ // Initializes with a given string_view.
+ constexpr explicit CharSet(absl::string_view str) : m_() {
+ for (char c : str) {
+ SetChar(static_cast<unsigned char>(c));
+ }
+ }
+
+ constexpr bool contains(char c) const {
+ return ((m_[static_cast<unsigned char>(c) / 64] >>
+ (static_cast<unsigned char>(c) % 64)) &
+ 0x1) == 0x1;
+ }
+
+ constexpr bool empty() const {
+ for (uint64_t c : m_) {
+ if (c != 0) return false;
+ }
+ return true;
+ }
+
+ // Containing only a single specified char.
+ static constexpr CharSet Char(char x) {
+ return CharSet(CharMaskForWord(x, 0), CharMaskForWord(x, 1),
+ CharMaskForWord(x, 2), CharMaskForWord(x, 3));
+ }
+
+ // Containing all the chars in the closed interval [lo,hi].
+ static constexpr CharSet Range(char lo, char hi) {
+ return CharSet(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1),
+ RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3));
+ }
+
+ friend constexpr CharSet operator&(const CharSet& a, const CharSet& b) {
+ return CharSet(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2],
+ a.m_[3] & b.m_[3]);
+ }
+
+ friend constexpr CharSet operator|(const CharSet& a, const CharSet& b) {
+ return CharSet(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2],
+ a.m_[3] | b.m_[3]);
+ }
+
+ friend constexpr CharSet operator~(const CharSet& a) {
+ return CharSet(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]);
+ }
+
+ // Mirrors the char-classifying predicates in <cctype>.
+ static constexpr CharSet AsciiUppercase() { return CharSet::Range('A', 'Z'); }
+ static constexpr CharSet AsciiLowercase() { return CharSet::Range('a', 'z'); }
+ static constexpr CharSet AsciiDigits() { return CharSet::Range('0', '9'); }
+ static constexpr CharSet AsciiAlphabet() {
+ return AsciiLowercase() | AsciiUppercase();
+ }
+ static constexpr CharSet AsciiAlphanumerics() {
+ return AsciiDigits() | AsciiAlphabet();
+ }
+ static constexpr CharSet AsciiHexDigits() {
+ return AsciiDigits() | CharSet::Range('A', 'F') | CharSet::Range('a', 'f');
+ }
+ static constexpr CharSet AsciiPrintable() {
+ return CharSet::Range(0x20, 0x7e);
+ }
+ static constexpr CharSet AsciiWhitespace() { return CharSet("\t\n\v\f\r "); }
+ static constexpr CharSet AsciiPunctuation() {
+ return AsciiPrintable() & ~AsciiWhitespace() & ~AsciiAlphanumerics();
+ }
+
+ private:
+ constexpr CharSet(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3)
+ : m_{b0, b1, b2, b3} {}
+
+ static constexpr uint64_t RangeForWord(char lo, char hi, uint64_t word) {
+ return OpenRangeFromZeroForWord(static_cast<unsigned char>(hi) + 1, word) &
+ ~OpenRangeFromZeroForWord(static_cast<unsigned char>(lo), word);
+ }
+
+ // All the chars in the specified word of the range [0, upper).
+ static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper,
+ uint64_t word) {
+ return (upper <= 64 * word) ? 0
+ : (upper >= 64 * (word + 1))
+ ? ~static_cast<uint64_t>(0)
+ : (~static_cast<uint64_t>(0) >> (64 - upper % 64));
+ }
+
+ static constexpr uint64_t CharMaskForWord(char x, uint64_t word) {
+ return (static_cast<unsigned char>(x) / 64 == word)
+ ? (static_cast<uint64_t>(1)
+ << (static_cast<unsigned char>(x) % 64))
+ : 0;
+ }
+
+ constexpr void SetChar(unsigned char c) {
+ m_[c / 64] |= static_cast<uint64_t>(1) << (c % 64);
+ }
+
+ uint64_t m_[4];
+};
+
+} // namespace absl
+
+#endif // ABSL_STRINGS_CHARSET_H_
diff --git a/absl/strings/internal/char_map_benchmark.cc b/absl/strings/charset_benchmark.cc
index 5cef967b..bf7ae560 100644
--- a/absl/strings/internal/char_map_benchmark.cc
+++ b/absl/strings/charset_benchmark.cc
@@ -1,4 +1,4 @@
-// Copyright 2017 The Abseil Authors.
+// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -12,30 +12,30 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "absl/strings/internal/char_map.h"
-
#include <cstdint>
#include "benchmark/benchmark.h"
+#include "absl/log/check.h"
+#include "absl/strings/charset.h"
namespace {
-absl::strings_internal::Charmap MakeBenchmarkMap() {
- absl::strings_internal::Charmap m;
+absl::CharSet MakeBenchmarkMap() {
+ absl::CharSet m;
uint32_t x[] = {0x0, 0x1, 0x2, 0x3, 0xf, 0xe, 0xd, 0xc};
for (uint32_t& t : x) t *= static_cast<uint32_t>(0x11111111UL);
for (uint32_t i = 0; i < 256; ++i) {
- if ((x[i / 32] >> (i % 32)) & 1)
- m = m | absl::strings_internal::Charmap::Char(i);
+ if ((x[i / 32] >> (i % 32)) & 1) m = m | absl::CharSet::Char(i);
}
return m;
}
// Micro-benchmark for Charmap::contains.
-void BM_Contains(benchmark::State& state) {
+static void BM_Contains(benchmark::State& state) {
// Loop-body replicated 10 times to increase time per iteration.
// Argument continuously changed to avoid generating common subexpressions.
- const absl::strings_internal::Charmap benchmark_map = MakeBenchmarkMap();
+ // Final CHECK used to discourage unwanted optimization.
+ const absl::CharSet benchmark_map = MakeBenchmarkMap();
unsigned char c = 0;
int ops = 0;
for (auto _ : state) {
@@ -50,12 +50,8 @@ void BM_Contains(benchmark::State& state) {
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
}
- benchmark::DoNotOptimize(ops);
+ CHECK_NE(ops, -1);
}
BENCHMARK(BM_Contains);
-// We don't bother benchmarking Charmap::IsZero or Charmap::IntersectsWith;
-// their running time is data-dependent and it is not worth characterizing
-// "typical" data.
-
} // namespace
diff --git a/absl/strings/charset_test.cc b/absl/strings/charset_test.cc
new file mode 100644
index 00000000..fff943ae
--- /dev/null
+++ b/absl/strings/charset_test.cc
@@ -0,0 +1,181 @@
+// Copyright 2020 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/charset.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/strings/ascii.h"
+#include "absl/strings/string_view.h"
+
+namespace {
+
+constexpr absl::CharSet everything_map = ~absl::CharSet();
+constexpr absl::CharSet nothing_map = absl::CharSet();
+
+TEST(Charmap, AllTests) {
+ const absl::CharSet also_nothing_map("");
+ EXPECT_TRUE(everything_map.contains('\0'));
+ EXPECT_FALSE(nothing_map.contains('\0'));
+ EXPECT_FALSE(also_nothing_map.contains('\0'));
+ for (unsigned char ch = 1; ch != 0; ++ch) {
+ SCOPED_TRACE(ch);
+ EXPECT_TRUE(everything_map.contains(ch));
+ EXPECT_FALSE(nothing_map.contains(ch));
+ EXPECT_FALSE(also_nothing_map.contains(ch));
+ }
+
+ const absl::CharSet symbols(absl::string_view("&@#@^!@?", 5));
+ EXPECT_TRUE(symbols.contains('&'));
+ EXPECT_TRUE(symbols.contains('@'));
+ EXPECT_TRUE(symbols.contains('#'));
+ EXPECT_TRUE(symbols.contains('^'));
+ EXPECT_FALSE(symbols.contains('!'));
+ EXPECT_FALSE(symbols.contains('?'));
+ int cnt = 0;
+ for (unsigned char ch = 1; ch != 0; ++ch) cnt += symbols.contains(ch);
+ EXPECT_EQ(cnt, 4);
+
+ const absl::CharSet lets(absl::string_view("^abcde", 3));
+ const absl::CharSet lets2(absl::string_view("fghij\0klmnop", 10));
+ const absl::CharSet lets3("fghij\0klmnop");
+ EXPECT_TRUE(lets2.contains('k'));
+ EXPECT_FALSE(lets3.contains('k'));
+
+ EXPECT_FALSE((symbols & lets).empty());
+ EXPECT_TRUE((lets2 & lets).empty());
+ EXPECT_FALSE((lets & symbols).empty());
+ EXPECT_TRUE((lets & lets2).empty());
+
+ EXPECT_TRUE(nothing_map.empty());
+ EXPECT_FALSE(lets.empty());
+}
+
+std::string Members(const absl::CharSet& m) {
+ std::string r;
+ for (size_t i = 0; i < 256; ++i)
+ if (m.contains(i)) r.push_back(i);
+ return r;
+}
+
+std::string ClosedRangeString(unsigned char lo, unsigned char hi) {
+ // Don't depend on lo<hi. Just increment until lo==hi.
+ std::string s;
+ while (true) {
+ s.push_back(lo);
+ if (lo == hi) break;
+ ++lo;
+ }
+ return s;
+}
+
+TEST(Charmap, Constexpr) {
+ constexpr absl::CharSet kEmpty = absl::CharSet();
+ EXPECT_EQ(Members(kEmpty), "");
+ constexpr absl::CharSet kA = absl::CharSet::Char('A');
+ EXPECT_EQ(Members(kA), "A");
+ constexpr absl::CharSet kAZ = absl::CharSet::Range('A', 'Z');
+ EXPECT_EQ(Members(kAZ), "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
+ constexpr absl::CharSet kIdentifier =
+ absl::CharSet::Range('0', '9') | absl::CharSet::Range('A', 'Z') |
+ absl::CharSet::Range('a', 'z') | absl::CharSet::Char('_');
+ EXPECT_EQ(Members(kIdentifier),
+ "0123456789"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "_"
+ "abcdefghijklmnopqrstuvwxyz");
+ constexpr absl::CharSet kAll = ~absl::CharSet();
+ for (size_t i = 0; i < 256; ++i) {
+ SCOPED_TRACE(i);
+ EXPECT_TRUE(kAll.contains(i));
+ }
+ constexpr absl::CharSet kHello = absl::CharSet("Hello, world!");
+ EXPECT_EQ(Members(kHello), " !,Hdelorw");
+
+ // test negation and intersection
+ constexpr absl::CharSet kABC =
+ absl::CharSet::Range('A', 'Z') & ~absl::CharSet::Range('D', 'Z');
+ EXPECT_EQ(Members(kABC), "ABC");
+
+ // contains
+ constexpr bool kContainsA = absl::CharSet("abc").contains('a');
+ EXPECT_TRUE(kContainsA);
+ constexpr bool kContainsD = absl::CharSet("abc").contains('d');
+ EXPECT_FALSE(kContainsD);
+
+ // empty
+ constexpr bool kEmptyIsEmpty = absl::CharSet().empty();
+ EXPECT_TRUE(kEmptyIsEmpty);
+ constexpr bool kNotEmptyIsEmpty = absl::CharSet("abc").empty();
+ EXPECT_FALSE(kNotEmptyIsEmpty);
+}
+
+TEST(Charmap, Range) {
+ // Exhaustive testing takes too long, so test some of the boundaries that
+ // are perhaps going to cause trouble.
+ std::vector<size_t> poi = {0, 1, 2, 3, 4, 7, 8, 9, 15,
+ 16, 17, 30, 31, 32, 33, 63, 64, 65,
+ 127, 128, 129, 223, 224, 225, 254, 255};
+ for (auto lo = poi.begin(); lo != poi.end(); ++lo) {
+ SCOPED_TRACE(*lo);
+ for (auto hi = lo; hi != poi.end(); ++hi) {
+ SCOPED_TRACE(*hi);
+ EXPECT_EQ(Members(absl::CharSet::Range(*lo, *hi)),
+ ClosedRangeString(*lo, *hi));
+ }
+ }
+}
+
+TEST(Charmap, NullByteWithStringView) {
+ char characters[5] = {'a', 'b', '\0', 'd', 'x'};
+ absl::string_view view(characters, 5);
+ absl::CharSet tester(view);
+ EXPECT_TRUE(tester.contains('a'));
+ EXPECT_TRUE(tester.contains('b'));
+ EXPECT_TRUE(tester.contains('\0'));
+ EXPECT_TRUE(tester.contains('d'));
+ EXPECT_TRUE(tester.contains('x'));
+ EXPECT_FALSE(tester.contains('c'));
+}
+
+TEST(CharmapCtype, Match) {
+ for (int c = 0; c < 256; ++c) {
+ SCOPED_TRACE(c);
+ SCOPED_TRACE(static_cast<char>(c));
+ EXPECT_EQ(absl::ascii_isupper(c),
+ absl::CharSet::AsciiUppercase().contains(c));
+ EXPECT_EQ(absl::ascii_islower(c),
+ absl::CharSet::AsciiLowercase().contains(c));
+ EXPECT_EQ(absl::ascii_isdigit(c), absl::CharSet::AsciiDigits().contains(c));
+ EXPECT_EQ(absl::ascii_isalpha(c),
+ absl::CharSet::AsciiAlphabet().contains(c));
+ EXPECT_EQ(absl::ascii_isalnum(c),
+ absl::CharSet::AsciiAlphanumerics().contains(c));
+ EXPECT_EQ(absl::ascii_isxdigit(c),
+ absl::CharSet::AsciiHexDigits().contains(c));
+ EXPECT_EQ(absl::ascii_isprint(c),
+ absl::CharSet::AsciiPrintable().contains(c));
+ EXPECT_EQ(absl::ascii_isspace(c),
+ absl::CharSet::AsciiWhitespace().contains(c));
+ EXPECT_EQ(absl::ascii_ispunct(c),
+ absl::CharSet::AsciiPunctuation().contains(c));
+ }
+}
+
+} // namespace
diff --git a/absl/strings/escaping.cc b/absl/strings/escaping.cc
index 5bf02236..1c0eac42 100644
--- a/absl/strings/escaping.cc
+++ b/absl/strings/escaping.cc
@@ -26,7 +26,7 @@
#include "absl/base/internal/raw_logging.h"
#include "absl/base/internal/unaligned_access.h"
#include "absl/strings/ascii.h"
-#include "absl/strings/internal/char_map.h"
+#include "absl/strings/charset.h"
#include "absl/strings/internal/escaping.h"
#include "absl/strings/internal/resize_uninitialized.h"
#include "absl/strings/internal/utf8.h"
diff --git a/absl/strings/internal/char_map.h b/absl/strings/internal/char_map.h
deleted file mode 100644
index 70a90343..00000000
--- a/absl/strings/internal/char_map.h
+++ /dev/null
@@ -1,158 +0,0 @@
-// Copyright 2017 The Abseil Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Character Map Class
-//
-// A fast, bit-vector map for 8-bit unsigned characters.
-// This class is useful for non-character purposes as well.
-
-#ifndef ABSL_STRINGS_INTERNAL_CHAR_MAP_H_
-#define ABSL_STRINGS_INTERNAL_CHAR_MAP_H_
-
-#include <cstddef>
-#include <cstdint>
-#include <cstring>
-
-#include "absl/base/macros.h"
-#include "absl/base/port.h"
-
-namespace absl {
-ABSL_NAMESPACE_BEGIN
-namespace strings_internal {
-
-class Charmap {
- public:
- constexpr Charmap() : m_() {}
-
- // Initializes with a given char*. Note that NUL is not treated as
- // a terminator, but rather a char to be flicked.
- Charmap(const char* str, int len) : m_() {
- while (len--) SetChar(*str++);
- }
-
- // Initializes with a given char*. NUL is treated as a terminator
- // and will not be in the charmap.
- explicit Charmap(const char* str) : m_() {
- while (*str) SetChar(*str++);
- }
-
- constexpr bool contains(unsigned char c) const {
- return (m_[c / 64] >> (c % 64)) & 0x1;
- }
-
- // Returns true if and only if a character exists in both maps.
- bool IntersectsWith(const Charmap& c) const {
- for (size_t i = 0; i < ABSL_ARRAYSIZE(m_); ++i) {
- if ((m_[i] & c.m_[i]) != 0) return true;
- }
- return false;
- }
-
- bool IsZero() const {
- for (uint64_t c : m_) {
- if (c != 0) return false;
- }
- return true;
- }
-
- // Containing only a single specified char.
- static constexpr Charmap Char(char x) {
- return Charmap(CharMaskForWord(x, 0), CharMaskForWord(x, 1),
- CharMaskForWord(x, 2), CharMaskForWord(x, 3));
- }
-
- // Containing all the chars in the C-string 's'.
- static constexpr Charmap FromString(const char* s) {
- Charmap ret;
- while (*s) ret = ret | Char(*s++);
- return ret;
- }
-
- // Containing all the chars in the closed interval [lo,hi].
- static constexpr Charmap Range(char lo, char hi) {
- return Charmap(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1),
- RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3));
- }
-
- friend constexpr Charmap operator&(const Charmap& a, const Charmap& b) {
- return Charmap(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2],
- a.m_[3] & b.m_[3]);
- }
-
- friend constexpr Charmap operator|(const Charmap& a, const Charmap& b) {
- return Charmap(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2],
- a.m_[3] | b.m_[3]);
- }
-
- friend constexpr Charmap operator~(const Charmap& a) {
- return Charmap(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]);
- }
-
- private:
- constexpr Charmap(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3)
- : m_{b0, b1, b2, b3} {}
-
- static constexpr uint64_t RangeForWord(char lo, char hi, uint64_t word) {
- return OpenRangeFromZeroForWord(static_cast<unsigned char>(hi) + 1, word) &
- ~OpenRangeFromZeroForWord(static_cast<unsigned char>(lo), word);
- }
-
- // All the chars in the specified word of the range [0, upper).
- static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper,
- uint64_t word) {
- return (upper <= 64 * word)
- ? 0
- : (upper >= 64 * (word + 1))
- ? ~static_cast<uint64_t>(0)
- : (~static_cast<uint64_t>(0) >> (64 - upper % 64));
- }
-
- static constexpr uint64_t CharMaskForWord(char x, uint64_t word) {
- const auto unsigned_x = static_cast<unsigned char>(x);
- return (unsigned_x / 64 == word)
- ? (static_cast<uint64_t>(1) << (unsigned_x % 64))
- : 0;
- }
-
- void SetChar(char c) {
- const auto unsigned_c = static_cast<unsigned char>(c);
- m_[unsigned_c / 64] |= static_cast<uint64_t>(1) << (unsigned_c % 64);
- }
-
- uint64_t m_[4];
-};
-
-// Mirror the char-classifying predicates in <cctype>
-constexpr Charmap UpperCharmap() { return Charmap::Range('A', 'Z'); }
-constexpr Charmap LowerCharmap() { return Charmap::Range('a', 'z'); }
-constexpr Charmap DigitCharmap() { return Charmap::Range('0', '9'); }
-constexpr Charmap AlphaCharmap() { return LowerCharmap() | UpperCharmap(); }
-constexpr Charmap AlnumCharmap() { return DigitCharmap() | AlphaCharmap(); }
-constexpr Charmap XDigitCharmap() {
- return DigitCharmap() | Charmap::Range('A', 'F') | Charmap::Range('a', 'f');
-}
-constexpr Charmap PrintCharmap() { return Charmap::Range(0x20, 0x7e); }
-constexpr Charmap SpaceCharmap() { return Charmap::FromString("\t\n\v\f\r "); }
-constexpr Charmap CntrlCharmap() {
- return Charmap::Range(0, 0x7f) & ~PrintCharmap();
-}
-constexpr Charmap BlankCharmap() { return Charmap::FromString("\t "); }
-constexpr Charmap GraphCharmap() { return PrintCharmap() & ~SpaceCharmap(); }
-constexpr Charmap PunctCharmap() { return GraphCharmap() & ~AlnumCharmap(); }
-
-} // namespace strings_internal
-ABSL_NAMESPACE_END
-} // namespace absl
-
-#endif // ABSL_STRINGS_INTERNAL_CHAR_MAP_H_
diff --git a/absl/strings/internal/char_map_test.cc b/absl/strings/internal/char_map_test.cc
deleted file mode 100644
index d3306241..00000000
--- a/absl/strings/internal/char_map_test.cc
+++ /dev/null
@@ -1,172 +0,0 @@
-// Copyright 2017 The Abseil Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "absl/strings/internal/char_map.h"
-
-#include <cctype>
-#include <string>
-#include <vector>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-namespace {
-
-constexpr absl::strings_internal::Charmap everything_map =
- ~absl::strings_internal::Charmap();
-constexpr absl::strings_internal::Charmap nothing_map{};
-
-TEST(Charmap, AllTests) {
- const absl::strings_internal::Charmap also_nothing_map("", 0);
- ASSERT_TRUE(everything_map.contains('\0'));
- ASSERT_TRUE(!nothing_map.contains('\0'));
- ASSERT_TRUE(!also_nothing_map.contains('\0'));
- for (unsigned char ch = 1; ch != 0; ++ch) {
- ASSERT_TRUE(everything_map.contains(ch));
- ASSERT_TRUE(!nothing_map.contains(ch));
- ASSERT_TRUE(!also_nothing_map.contains(ch));
- }
-
- const absl::strings_internal::Charmap symbols("&@#@^!@?", 5);
- ASSERT_TRUE(symbols.contains('&'));
- ASSERT_TRUE(symbols.contains('@'));
- ASSERT_TRUE(symbols.contains('#'));
- ASSERT_TRUE(symbols.contains('^'));
- ASSERT_TRUE(!symbols.contains('!'));
- ASSERT_TRUE(!symbols.contains('?'));
- int cnt = 0;
- for (unsigned char ch = 1; ch != 0; ++ch)
- cnt += symbols.contains(ch);
- ASSERT_EQ(cnt, 4);
-
- const absl::strings_internal::Charmap lets("^abcde", 3);
- const absl::strings_internal::Charmap lets2("fghij\0klmnop", 10);
- const absl::strings_internal::Charmap lets3("fghij\0klmnop");
- ASSERT_TRUE(lets2.contains('k'));
- ASSERT_TRUE(!lets3.contains('k'));
-
- ASSERT_TRUE(symbols.IntersectsWith(lets));
- ASSERT_TRUE(!lets2.IntersectsWith(lets));
- ASSERT_TRUE(lets.IntersectsWith(symbols));
- ASSERT_TRUE(!lets.IntersectsWith(lets2));
-
- ASSERT_TRUE(nothing_map.IsZero());
- ASSERT_TRUE(!lets.IsZero());
-}
-
-namespace {
-std::string Members(const absl::strings_internal::Charmap& m) {
- std::string r;
- for (size_t i = 0; i < 256; ++i)
- if (m.contains(i)) r.push_back(i);
- return r;
-}
-
-std::string ClosedRangeString(unsigned char lo, unsigned char hi) {
- // Don't depend on lo<hi. Just increment until lo==hi.
- std::string s;
- while (true) {
- s.push_back(lo);
- if (lo == hi) break;
- ++lo;
- }
- return s;
-}
-
-} // namespace
-
-TEST(Charmap, Constexpr) {
- constexpr absl::strings_internal::Charmap kEmpty = nothing_map;
- EXPECT_THAT(Members(kEmpty), "");
- constexpr absl::strings_internal::Charmap kA =
- absl::strings_internal::Charmap::Char('A');
- EXPECT_THAT(Members(kA), "A");
- constexpr absl::strings_internal::Charmap kAZ =
- absl::strings_internal::Charmap::Range('A', 'Z');
- EXPECT_THAT(Members(kAZ), "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
- constexpr absl::strings_internal::Charmap kIdentifier =
- absl::strings_internal::Charmap::Range('0', '9') |
- absl::strings_internal::Charmap::Range('A', 'Z') |
- absl::strings_internal::Charmap::Range('a', 'z') |
- absl::strings_internal::Charmap::Char('_');
- EXPECT_THAT(Members(kIdentifier),
- "0123456789"
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- "_"
- "abcdefghijklmnopqrstuvwxyz");
- constexpr absl::strings_internal::Charmap kAll = everything_map;
- for (size_t i = 0; i < 256; ++i) {
- EXPECT_TRUE(kAll.contains(i)) << i;
- }
- constexpr absl::strings_internal::Charmap kHello =
- absl::strings_internal::Charmap::FromString("Hello, world!");
- EXPECT_THAT(Members(kHello), " !,Hdelorw");
-
- // test negation and intersection
- constexpr absl::strings_internal::Charmap kABC =
- absl::strings_internal::Charmap::Range('A', 'Z') &
- ~absl::strings_internal::Charmap::Range('D', 'Z');
- EXPECT_THAT(Members(kABC), "ABC");
-}
-
-TEST(Charmap, Range) {
- // Exhaustive testing takes too long, so test some of the boundaries that
- // are perhaps going to cause trouble.
- std::vector<size_t> poi = {0, 1, 2, 3, 4, 7, 8, 9, 15,
- 16, 17, 30, 31, 32, 33, 63, 64, 65,
- 127, 128, 129, 223, 224, 225, 254, 255};
- for (auto lo = poi.begin(); lo != poi.end(); ++lo) {
- SCOPED_TRACE(*lo);
- for (auto hi = lo; hi != poi.end(); ++hi) {
- SCOPED_TRACE(*hi);
- EXPECT_THAT(Members(absl::strings_internal::Charmap::Range(*lo, *hi)),
- ClosedRangeString(*lo, *hi));
- }
- }
-}
-
-bool AsBool(int x) { return static_cast<bool>(x); }
-
-TEST(CharmapCtype, Match) {
- for (int c = 0; c < 256; ++c) {
- SCOPED_TRACE(c);
- SCOPED_TRACE(static_cast<char>(c));
- EXPECT_EQ(AsBool(std::isupper(c)),
- absl::strings_internal::UpperCharmap().contains(c));
- EXPECT_EQ(AsBool(std::islower(c)),
- absl::strings_internal::LowerCharmap().contains(c));
- EXPECT_EQ(AsBool(std::isdigit(c)),
- absl::strings_internal::DigitCharmap().contains(c));
- EXPECT_EQ(AsBool(std::isalpha(c)),
- absl::strings_internal::AlphaCharmap().contains(c));
- EXPECT_EQ(AsBool(std::isalnum(c)),
- absl::strings_internal::AlnumCharmap().contains(c));
- EXPECT_EQ(AsBool(std::isxdigit(c)),
- absl::strings_internal::XDigitCharmap().contains(c));
- EXPECT_EQ(AsBool(std::isprint(c)),
- absl::strings_internal::PrintCharmap().contains(c));
- EXPECT_EQ(AsBool(std::isspace(c)),
- absl::strings_internal::SpaceCharmap().contains(c));
- EXPECT_EQ(AsBool(std::iscntrl(c)),
- absl::strings_internal::CntrlCharmap().contains(c));
- EXPECT_EQ(AsBool(std::isblank(c)),
- absl::strings_internal::BlankCharmap().contains(c));
- EXPECT_EQ(AsBool(std::isgraph(c)),
- absl::strings_internal::GraphCharmap().contains(c));
- EXPECT_EQ(AsBool(std::ispunct(c)),
- absl::strings_internal::PunctCharmap().contains(c));
- }
-}
-
-} // namespace