diff options
author | Abseil Team <absl-team@google.com> | 2023-08-23 07:15:42 -0700 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2023-08-23 07:16:31 -0700 |
commit | 91b861c544afd153fe800fc2bea4736a0da37533 (patch) | |
tree | c65d0d322bdadd9fe1f9bad07ba3973549a941b3 | |
parent | 7aef7808d6dbe46ab95b37e6c67d1350c1da016b (diff) |
Add absl::CharSet.
PiperOrigin-RevId: 559415517
Change-Id: I5bbc744bf00be2fd15ec7544b725d699e0d982fb
-rw-r--r-- | CMake/AbseilDll.cmake | 2 | ||||
-rw-r--r-- | absl/strings/BUILD.bazel | 67 | ||||
-rw-r--r-- | absl/strings/CMakeLists.txt | 21 | ||||
-rw-r--r-- | absl/strings/charset.h | 164 | ||||
-rw-r--r-- | absl/strings/charset_benchmark.cc (renamed from absl/strings/internal/char_map_benchmark.cc) | 24 | ||||
-rw-r--r-- | absl/strings/charset_test.cc | 181 | ||||
-rw-r--r-- | absl/strings/escaping.cc | 2 | ||||
-rw-r--r-- | absl/strings/internal/char_map.h | 158 | ||||
-rw-r--r-- | absl/strings/internal/char_map_test.cc | 172 |
9 files changed, 419 insertions, 372 deletions
diff --git a/CMake/AbseilDll.cmake b/CMake/AbseilDll.cmake index a5a512bd..1a20a84f 100644 --- a/CMake/AbseilDll.cmake +++ b/CMake/AbseilDll.cmake @@ -263,6 +263,7 @@ set(ABSL_INTERNAL_DLL_FILES "strings/ascii.h" "strings/charconv.cc" "strings/charconv.h" + "strings/charset.h" "strings/cord.cc" "strings/cord.h" "strings/cord_analysis.cc" @@ -327,7 +328,6 @@ set(ABSL_INTERNAL_DLL_FILES "strings/strip.h" "strings/substitute.cc" "strings/substitute.h" - "strings/internal/char_map.h" "strings/internal/escaping.h" "strings/internal/escaping.cc" "strings/internal/memutil.cc" diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index e3b8af48..a858d0b2 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -92,6 +92,7 @@ cc_library( "string_view.h", ], deps = [ + ":charset", ":internal", ":string_view", "//absl/base", @@ -115,7 +116,6 @@ cc_library( "internal/utf8.cc", ], hdrs = [ - "internal/char_map.h", "internal/escaping.h", "internal/ostringstream.h", "internal/resize_uninitialized.h", @@ -307,6 +307,50 @@ cc_test( ], ) +cc_test( + name = "charset_benchmark", + size = "small", + srcs = [ + "charset_benchmark.cc", + ], + copts = ABSL_TEST_COPTS, + tags = [ + "benchmark", + ], + visibility = ["//visibility:private"], + deps = [ + ":charset", + "//absl/log:check", + "@com_github_google_benchmark//:benchmark_main", + ], +) + +cc_library( + name = "charset", + hdrs = [ + "charset.h", + ], + copts = ABSL_DEFAULT_COPTS, + linkopts = ABSL_DEFAULT_LINKOPTS, + deps = [ + ":string_view", + "//absl/base:core_headers", + ], +) + +cc_test( + name = "charset_test", + size = "small", + srcs = ["charset_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":charset", + ":strings", + "@com_google_googletest//:gtest_main", + ], +) + cc_library( name = "cord_internal", srcs = [ @@ -1087,27 +1131,6 @@ cc_test( ) cc_test( - name = "char_map_test", - srcs = ["internal/char_map_test.cc"], - copts = ABSL_TEST_COPTS, - deps = [ - ":internal", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "char_map_benchmark", - srcs = ["internal/char_map_benchmark.cc"], - copts = ABSL_TEST_COPTS, - tags = ["benchmark"], - deps = [ - ":internal", - "@com_github_google_benchmark//:benchmark_main", - ], -) - -cc_test( name = "charconv_test", srcs = ["charconv_test.cc"], copts = ABSL_TEST_COPTS, diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index 0e588674..27e7ce4f 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -78,6 +78,7 @@ absl_cc_library( absl::strings_internal absl::base absl::bits + absl::charset absl::config absl::core_headers absl::endian @@ -89,12 +90,24 @@ absl_cc_library( PUBLIC ) +absl_cc_library( + NAME + charset + HDRS + charset.h + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::core_headers + absl::string_view + PUBLIC +) + # Internal-only target, do not depend on directly. absl_cc_library( NAME strings_internal HDRS - "internal/char_map.h" "internal/escaping.cc" "internal/escaping.h" "internal/ostringstream.h" @@ -357,13 +370,13 @@ absl_cc_test( absl_cc_test( NAME - char_map_test + charset_test SRCS - "internal/char_map_test.cc" + "charset_test.cc" COPTS ${ABSL_TEST_COPTS} DEPS - absl::strings_internal + absl::strings GTest::gmock_main ) diff --git a/absl/strings/charset.h b/absl/strings/charset.h new file mode 100644 index 00000000..ff4e81a4 --- /dev/null +++ b/absl/strings/charset.h @@ -0,0 +1,164 @@ +// Copyright 2022 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: charset.h +// ----------------------------------------------------------------------------- +// +// This file contains absl::CharSet, a fast, bit-vector set of 8-bit unsigned +// characters. +// +// Instances can be initialized as constexpr constants. For example: +// +// constexpr absl::CharSet kJustX = absl::CharSet::Char('x'); +// constexpr absl::CharSet kMySymbols = absl::CharSet("$@!"); +// constexpr absl::CharSet kLetters = absl::CharSet::Range('a', 'z'); +// +// Multiple instances can be combined that still forms a constexpr expression. +// For example: +// +// constexpr absl::CharSet kLettersAndNumbers = +// absl::CharSet::Range('a', 'z') | absl::CharSet::Range('0', '9'); +// +// Several pre-defined character classes are available that mirror the methods +// from <cctype>. For example: +// +// constexpr absl::CharSet kLettersAndWhitespace = +// absl::CharSet::AsciiAlphabet() | absl::CharSet::AsciiWhitespace(); +// +// To check membership, use the .contains method, e.g. +// +// absl::CharSet hex_letters("abcdef"); +// hex_letters.contains('a'); // true +// hex_letters.contains('g'); // false + +#ifndef ABSL_STRINGS_CHARSET_H_ +#define ABSL_STRINGS_CHARSET_H_ + +#include <cstddef> +#include <cstdint> +#include <cstring> + +#include "absl/base/macros.h" +#include "absl/base/port.h" +#include "absl/strings/string_view.h" + +namespace absl { + +class CharSet { + public: + constexpr CharSet() : m_() {} + + // Initializes with a given string_view. + constexpr explicit CharSet(absl::string_view str) : m_() { + for (char c : str) { + SetChar(static_cast<unsigned char>(c)); + } + } + + constexpr bool contains(char c) const { + return ((m_[static_cast<unsigned char>(c) / 64] >> + (static_cast<unsigned char>(c) % 64)) & + 0x1) == 0x1; + } + + constexpr bool empty() const { + for (uint64_t c : m_) { + if (c != 0) return false; + } + return true; + } + + // Containing only a single specified char. + static constexpr CharSet Char(char x) { + return CharSet(CharMaskForWord(x, 0), CharMaskForWord(x, 1), + CharMaskForWord(x, 2), CharMaskForWord(x, 3)); + } + + // Containing all the chars in the closed interval [lo,hi]. + static constexpr CharSet Range(char lo, char hi) { + return CharSet(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1), + RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3)); + } + + friend constexpr CharSet operator&(const CharSet& a, const CharSet& b) { + return CharSet(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2], + a.m_[3] & b.m_[3]); + } + + friend constexpr CharSet operator|(const CharSet& a, const CharSet& b) { + return CharSet(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2], + a.m_[3] | b.m_[3]); + } + + friend constexpr CharSet operator~(const CharSet& a) { + return CharSet(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]); + } + + // Mirrors the char-classifying predicates in <cctype>. + static constexpr CharSet AsciiUppercase() { return CharSet::Range('A', 'Z'); } + static constexpr CharSet AsciiLowercase() { return CharSet::Range('a', 'z'); } + static constexpr CharSet AsciiDigits() { return CharSet::Range('0', '9'); } + static constexpr CharSet AsciiAlphabet() { + return AsciiLowercase() | AsciiUppercase(); + } + static constexpr CharSet AsciiAlphanumerics() { + return AsciiDigits() | AsciiAlphabet(); + } + static constexpr CharSet AsciiHexDigits() { + return AsciiDigits() | CharSet::Range('A', 'F') | CharSet::Range('a', 'f'); + } + static constexpr CharSet AsciiPrintable() { + return CharSet::Range(0x20, 0x7e); + } + static constexpr CharSet AsciiWhitespace() { return CharSet("\t\n\v\f\r "); } + static constexpr CharSet AsciiPunctuation() { + return AsciiPrintable() & ~AsciiWhitespace() & ~AsciiAlphanumerics(); + } + + private: + constexpr CharSet(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3) + : m_{b0, b1, b2, b3} {} + + static constexpr uint64_t RangeForWord(char lo, char hi, uint64_t word) { + return OpenRangeFromZeroForWord(static_cast<unsigned char>(hi) + 1, word) & + ~OpenRangeFromZeroForWord(static_cast<unsigned char>(lo), word); + } + + // All the chars in the specified word of the range [0, upper). + static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper, + uint64_t word) { + return (upper <= 64 * word) ? 0 + : (upper >= 64 * (word + 1)) + ? ~static_cast<uint64_t>(0) + : (~static_cast<uint64_t>(0) >> (64 - upper % 64)); + } + + static constexpr uint64_t CharMaskForWord(char x, uint64_t word) { + return (static_cast<unsigned char>(x) / 64 == word) + ? (static_cast<uint64_t>(1) + << (static_cast<unsigned char>(x) % 64)) + : 0; + } + + constexpr void SetChar(unsigned char c) { + m_[c / 64] |= static_cast<uint64_t>(1) << (c % 64); + } + + uint64_t m_[4]; +}; + +} // namespace absl + +#endif // ABSL_STRINGS_CHARSET_H_ diff --git a/absl/strings/internal/char_map_benchmark.cc b/absl/strings/charset_benchmark.cc index 5cef967b..bf7ae560 100644 --- a/absl/strings/internal/char_map_benchmark.cc +++ b/absl/strings/charset_benchmark.cc @@ -1,4 +1,4 @@ -// Copyright 2017 The Abseil Authors. +// Copyright 2020 The Abseil Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,30 +12,30 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "absl/strings/internal/char_map.h" - #include <cstdint> #include "benchmark/benchmark.h" +#include "absl/log/check.h" +#include "absl/strings/charset.h" namespace { -absl::strings_internal::Charmap MakeBenchmarkMap() { - absl::strings_internal::Charmap m; +absl::CharSet MakeBenchmarkMap() { + absl::CharSet m; uint32_t x[] = {0x0, 0x1, 0x2, 0x3, 0xf, 0xe, 0xd, 0xc}; for (uint32_t& t : x) t *= static_cast<uint32_t>(0x11111111UL); for (uint32_t i = 0; i < 256; ++i) { - if ((x[i / 32] >> (i % 32)) & 1) - m = m | absl::strings_internal::Charmap::Char(i); + if ((x[i / 32] >> (i % 32)) & 1) m = m | absl::CharSet::Char(i); } return m; } // Micro-benchmark for Charmap::contains. -void BM_Contains(benchmark::State& state) { +static void BM_Contains(benchmark::State& state) { // Loop-body replicated 10 times to increase time per iteration. // Argument continuously changed to avoid generating common subexpressions. - const absl::strings_internal::Charmap benchmark_map = MakeBenchmarkMap(); + // Final CHECK used to discourage unwanted optimization. + const absl::CharSet benchmark_map = MakeBenchmarkMap(); unsigned char c = 0; int ops = 0; for (auto _ : state) { @@ -50,12 +50,8 @@ void BM_Contains(benchmark::State& state) { ops += benchmark_map.contains(c++); ops += benchmark_map.contains(c++); } - benchmark::DoNotOptimize(ops); + CHECK_NE(ops, -1); } BENCHMARK(BM_Contains); -// We don't bother benchmarking Charmap::IsZero or Charmap::IntersectsWith; -// their running time is data-dependent and it is not worth characterizing -// "typical" data. - } // namespace diff --git a/absl/strings/charset_test.cc b/absl/strings/charset_test.cc new file mode 100644 index 00000000..fff943ae --- /dev/null +++ b/absl/strings/charset_test.cc @@ -0,0 +1,181 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/charset.h" + +#include <stdio.h> +#include <stdlib.h> + +#include <string> +#include <vector> + +#include "gtest/gtest.h" +#include "absl/strings/ascii.h" +#include "absl/strings/string_view.h" + +namespace { + +constexpr absl::CharSet everything_map = ~absl::CharSet(); +constexpr absl::CharSet nothing_map = absl::CharSet(); + +TEST(Charmap, AllTests) { + const absl::CharSet also_nothing_map(""); + EXPECT_TRUE(everything_map.contains('\0')); + EXPECT_FALSE(nothing_map.contains('\0')); + EXPECT_FALSE(also_nothing_map.contains('\0')); + for (unsigned char ch = 1; ch != 0; ++ch) { + SCOPED_TRACE(ch); + EXPECT_TRUE(everything_map.contains(ch)); + EXPECT_FALSE(nothing_map.contains(ch)); + EXPECT_FALSE(also_nothing_map.contains(ch)); + } + + const absl::CharSet symbols(absl::string_view("&@#@^!@?", 5)); + EXPECT_TRUE(symbols.contains('&')); + EXPECT_TRUE(symbols.contains('@')); + EXPECT_TRUE(symbols.contains('#')); + EXPECT_TRUE(symbols.contains('^')); + EXPECT_FALSE(symbols.contains('!')); + EXPECT_FALSE(symbols.contains('?')); + int cnt = 0; + for (unsigned char ch = 1; ch != 0; ++ch) cnt += symbols.contains(ch); + EXPECT_EQ(cnt, 4); + + const absl::CharSet lets(absl::string_view("^abcde", 3)); + const absl::CharSet lets2(absl::string_view("fghij\0klmnop", 10)); + const absl::CharSet lets3("fghij\0klmnop"); + EXPECT_TRUE(lets2.contains('k')); + EXPECT_FALSE(lets3.contains('k')); + + EXPECT_FALSE((symbols & lets).empty()); + EXPECT_TRUE((lets2 & lets).empty()); + EXPECT_FALSE((lets & symbols).empty()); + EXPECT_TRUE((lets & lets2).empty()); + + EXPECT_TRUE(nothing_map.empty()); + EXPECT_FALSE(lets.empty()); +} + +std::string Members(const absl::CharSet& m) { + std::string r; + for (size_t i = 0; i < 256; ++i) + if (m.contains(i)) r.push_back(i); + return r; +} + +std::string ClosedRangeString(unsigned char lo, unsigned char hi) { + // Don't depend on lo<hi. Just increment until lo==hi. + std::string s; + while (true) { + s.push_back(lo); + if (lo == hi) break; + ++lo; + } + return s; +} + +TEST(Charmap, Constexpr) { + constexpr absl::CharSet kEmpty = absl::CharSet(); + EXPECT_EQ(Members(kEmpty), ""); + constexpr absl::CharSet kA = absl::CharSet::Char('A'); + EXPECT_EQ(Members(kA), "A"); + constexpr absl::CharSet kAZ = absl::CharSet::Range('A', 'Z'); + EXPECT_EQ(Members(kAZ), "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); + constexpr absl::CharSet kIdentifier = + absl::CharSet::Range('0', '9') | absl::CharSet::Range('A', 'Z') | + absl::CharSet::Range('a', 'z') | absl::CharSet::Char('_'); + EXPECT_EQ(Members(kIdentifier), + "0123456789" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "_" + "abcdefghijklmnopqrstuvwxyz"); + constexpr absl::CharSet kAll = ~absl::CharSet(); + for (size_t i = 0; i < 256; ++i) { + SCOPED_TRACE(i); + EXPECT_TRUE(kAll.contains(i)); + } + constexpr absl::CharSet kHello = absl::CharSet("Hello, world!"); + EXPECT_EQ(Members(kHello), " !,Hdelorw"); + + // test negation and intersection + constexpr absl::CharSet kABC = + absl::CharSet::Range('A', 'Z') & ~absl::CharSet::Range('D', 'Z'); + EXPECT_EQ(Members(kABC), "ABC"); + + // contains + constexpr bool kContainsA = absl::CharSet("abc").contains('a'); + EXPECT_TRUE(kContainsA); + constexpr bool kContainsD = absl::CharSet("abc").contains('d'); + EXPECT_FALSE(kContainsD); + + // empty + constexpr bool kEmptyIsEmpty = absl::CharSet().empty(); + EXPECT_TRUE(kEmptyIsEmpty); + constexpr bool kNotEmptyIsEmpty = absl::CharSet("abc").empty(); + EXPECT_FALSE(kNotEmptyIsEmpty); +} + +TEST(Charmap, Range) { + // Exhaustive testing takes too long, so test some of the boundaries that + // are perhaps going to cause trouble. + std::vector<size_t> poi = {0, 1, 2, 3, 4, 7, 8, 9, 15, + 16, 17, 30, 31, 32, 33, 63, 64, 65, + 127, 128, 129, 223, 224, 225, 254, 255}; + for (auto lo = poi.begin(); lo != poi.end(); ++lo) { + SCOPED_TRACE(*lo); + for (auto hi = lo; hi != poi.end(); ++hi) { + SCOPED_TRACE(*hi); + EXPECT_EQ(Members(absl::CharSet::Range(*lo, *hi)), + ClosedRangeString(*lo, *hi)); + } + } +} + +TEST(Charmap, NullByteWithStringView) { + char characters[5] = {'a', 'b', '\0', 'd', 'x'}; + absl::string_view view(characters, 5); + absl::CharSet tester(view); + EXPECT_TRUE(tester.contains('a')); + EXPECT_TRUE(tester.contains('b')); + EXPECT_TRUE(tester.contains('\0')); + EXPECT_TRUE(tester.contains('d')); + EXPECT_TRUE(tester.contains('x')); + EXPECT_FALSE(tester.contains('c')); +} + +TEST(CharmapCtype, Match) { + for (int c = 0; c < 256; ++c) { + SCOPED_TRACE(c); + SCOPED_TRACE(static_cast<char>(c)); + EXPECT_EQ(absl::ascii_isupper(c), + absl::CharSet::AsciiUppercase().contains(c)); + EXPECT_EQ(absl::ascii_islower(c), + absl::CharSet::AsciiLowercase().contains(c)); + EXPECT_EQ(absl::ascii_isdigit(c), absl::CharSet::AsciiDigits().contains(c)); + EXPECT_EQ(absl::ascii_isalpha(c), + absl::CharSet::AsciiAlphabet().contains(c)); + EXPECT_EQ(absl::ascii_isalnum(c), + absl::CharSet::AsciiAlphanumerics().contains(c)); + EXPECT_EQ(absl::ascii_isxdigit(c), + absl::CharSet::AsciiHexDigits().contains(c)); + EXPECT_EQ(absl::ascii_isprint(c), + absl::CharSet::AsciiPrintable().contains(c)); + EXPECT_EQ(absl::ascii_isspace(c), + absl::CharSet::AsciiWhitespace().contains(c)); + EXPECT_EQ(absl::ascii_ispunct(c), + absl::CharSet::AsciiPunctuation().contains(c)); + } +} + +} // namespace diff --git a/absl/strings/escaping.cc b/absl/strings/escaping.cc index 5bf02236..1c0eac42 100644 --- a/absl/strings/escaping.cc +++ b/absl/strings/escaping.cc @@ -26,7 +26,7 @@ #include "absl/base/internal/raw_logging.h" #include "absl/base/internal/unaligned_access.h" #include "absl/strings/ascii.h" -#include "absl/strings/internal/char_map.h" +#include "absl/strings/charset.h" #include "absl/strings/internal/escaping.h" #include "absl/strings/internal/resize_uninitialized.h" #include "absl/strings/internal/utf8.h" diff --git a/absl/strings/internal/char_map.h b/absl/strings/internal/char_map.h deleted file mode 100644 index 70a90343..00000000 --- a/absl/strings/internal/char_map.h +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright 2017 The Abseil Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Character Map Class -// -// A fast, bit-vector map for 8-bit unsigned characters. -// This class is useful for non-character purposes as well. - -#ifndef ABSL_STRINGS_INTERNAL_CHAR_MAP_H_ -#define ABSL_STRINGS_INTERNAL_CHAR_MAP_H_ - -#include <cstddef> -#include <cstdint> -#include <cstring> - -#include "absl/base/macros.h" -#include "absl/base/port.h" - -namespace absl { -ABSL_NAMESPACE_BEGIN -namespace strings_internal { - -class Charmap { - public: - constexpr Charmap() : m_() {} - - // Initializes with a given char*. Note that NUL is not treated as - // a terminator, but rather a char to be flicked. - Charmap(const char* str, int len) : m_() { - while (len--) SetChar(*str++); - } - - // Initializes with a given char*. NUL is treated as a terminator - // and will not be in the charmap. - explicit Charmap(const char* str) : m_() { - while (*str) SetChar(*str++); - } - - constexpr bool contains(unsigned char c) const { - return (m_[c / 64] >> (c % 64)) & 0x1; - } - - // Returns true if and only if a character exists in both maps. - bool IntersectsWith(const Charmap& c) const { - for (size_t i = 0; i < ABSL_ARRAYSIZE(m_); ++i) { - if ((m_[i] & c.m_[i]) != 0) return true; - } - return false; - } - - bool IsZero() const { - for (uint64_t c : m_) { - if (c != 0) return false; - } - return true; - } - - // Containing only a single specified char. - static constexpr Charmap Char(char x) { - return Charmap(CharMaskForWord(x, 0), CharMaskForWord(x, 1), - CharMaskForWord(x, 2), CharMaskForWord(x, 3)); - } - - // Containing all the chars in the C-string 's'. - static constexpr Charmap FromString(const char* s) { - Charmap ret; - while (*s) ret = ret | Char(*s++); - return ret; - } - - // Containing all the chars in the closed interval [lo,hi]. - static constexpr Charmap Range(char lo, char hi) { - return Charmap(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1), - RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3)); - } - - friend constexpr Charmap operator&(const Charmap& a, const Charmap& b) { - return Charmap(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2], - a.m_[3] & b.m_[3]); - } - - friend constexpr Charmap operator|(const Charmap& a, const Charmap& b) { - return Charmap(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2], - a.m_[3] | b.m_[3]); - } - - friend constexpr Charmap operator~(const Charmap& a) { - return Charmap(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]); - } - - private: - constexpr Charmap(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3) - : m_{b0, b1, b2, b3} {} - - static constexpr uint64_t RangeForWord(char lo, char hi, uint64_t word) { - return OpenRangeFromZeroForWord(static_cast<unsigned char>(hi) + 1, word) & - ~OpenRangeFromZeroForWord(static_cast<unsigned char>(lo), word); - } - - // All the chars in the specified word of the range [0, upper). - static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper, - uint64_t word) { - return (upper <= 64 * word) - ? 0 - : (upper >= 64 * (word + 1)) - ? ~static_cast<uint64_t>(0) - : (~static_cast<uint64_t>(0) >> (64 - upper % 64)); - } - - static constexpr uint64_t CharMaskForWord(char x, uint64_t word) { - const auto unsigned_x = static_cast<unsigned char>(x); - return (unsigned_x / 64 == word) - ? (static_cast<uint64_t>(1) << (unsigned_x % 64)) - : 0; - } - - void SetChar(char c) { - const auto unsigned_c = static_cast<unsigned char>(c); - m_[unsigned_c / 64] |= static_cast<uint64_t>(1) << (unsigned_c % 64); - } - - uint64_t m_[4]; -}; - -// Mirror the char-classifying predicates in <cctype> -constexpr Charmap UpperCharmap() { return Charmap::Range('A', 'Z'); } -constexpr Charmap LowerCharmap() { return Charmap::Range('a', 'z'); } -constexpr Charmap DigitCharmap() { return Charmap::Range('0', '9'); } -constexpr Charmap AlphaCharmap() { return LowerCharmap() | UpperCharmap(); } -constexpr Charmap AlnumCharmap() { return DigitCharmap() | AlphaCharmap(); } -constexpr Charmap XDigitCharmap() { - return DigitCharmap() | Charmap::Range('A', 'F') | Charmap::Range('a', 'f'); -} -constexpr Charmap PrintCharmap() { return Charmap::Range(0x20, 0x7e); } -constexpr Charmap SpaceCharmap() { return Charmap::FromString("\t\n\v\f\r "); } -constexpr Charmap CntrlCharmap() { - return Charmap::Range(0, 0x7f) & ~PrintCharmap(); -} -constexpr Charmap BlankCharmap() { return Charmap::FromString("\t "); } -constexpr Charmap GraphCharmap() { return PrintCharmap() & ~SpaceCharmap(); } -constexpr Charmap PunctCharmap() { return GraphCharmap() & ~AlnumCharmap(); } - -} // namespace strings_internal -ABSL_NAMESPACE_END -} // namespace absl - -#endif // ABSL_STRINGS_INTERNAL_CHAR_MAP_H_ diff --git a/absl/strings/internal/char_map_test.cc b/absl/strings/internal/char_map_test.cc deleted file mode 100644 index d3306241..00000000 --- a/absl/strings/internal/char_map_test.cc +++ /dev/null @@ -1,172 +0,0 @@ -// Copyright 2017 The Abseil Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "absl/strings/internal/char_map.h" - -#include <cctype> -#include <string> -#include <vector> - -#include "gmock/gmock.h" -#include "gtest/gtest.h" - -namespace { - -constexpr absl::strings_internal::Charmap everything_map = - ~absl::strings_internal::Charmap(); -constexpr absl::strings_internal::Charmap nothing_map{}; - -TEST(Charmap, AllTests) { - const absl::strings_internal::Charmap also_nothing_map("", 0); - ASSERT_TRUE(everything_map.contains('\0')); - ASSERT_TRUE(!nothing_map.contains('\0')); - ASSERT_TRUE(!also_nothing_map.contains('\0')); - for (unsigned char ch = 1; ch != 0; ++ch) { - ASSERT_TRUE(everything_map.contains(ch)); - ASSERT_TRUE(!nothing_map.contains(ch)); - ASSERT_TRUE(!also_nothing_map.contains(ch)); - } - - const absl::strings_internal::Charmap symbols("&@#@^!@?", 5); - ASSERT_TRUE(symbols.contains('&')); - ASSERT_TRUE(symbols.contains('@')); - ASSERT_TRUE(symbols.contains('#')); - ASSERT_TRUE(symbols.contains('^')); - ASSERT_TRUE(!symbols.contains('!')); - ASSERT_TRUE(!symbols.contains('?')); - int cnt = 0; - for (unsigned char ch = 1; ch != 0; ++ch) - cnt += symbols.contains(ch); - ASSERT_EQ(cnt, 4); - - const absl::strings_internal::Charmap lets("^abcde", 3); - const absl::strings_internal::Charmap lets2("fghij\0klmnop", 10); - const absl::strings_internal::Charmap lets3("fghij\0klmnop"); - ASSERT_TRUE(lets2.contains('k')); - ASSERT_TRUE(!lets3.contains('k')); - - ASSERT_TRUE(symbols.IntersectsWith(lets)); - ASSERT_TRUE(!lets2.IntersectsWith(lets)); - ASSERT_TRUE(lets.IntersectsWith(symbols)); - ASSERT_TRUE(!lets.IntersectsWith(lets2)); - - ASSERT_TRUE(nothing_map.IsZero()); - ASSERT_TRUE(!lets.IsZero()); -} - -namespace { -std::string Members(const absl::strings_internal::Charmap& m) { - std::string r; - for (size_t i = 0; i < 256; ++i) - if (m.contains(i)) r.push_back(i); - return r; -} - -std::string ClosedRangeString(unsigned char lo, unsigned char hi) { - // Don't depend on lo<hi. Just increment until lo==hi. - std::string s; - while (true) { - s.push_back(lo); - if (lo == hi) break; - ++lo; - } - return s; -} - -} // namespace - -TEST(Charmap, Constexpr) { - constexpr absl::strings_internal::Charmap kEmpty = nothing_map; - EXPECT_THAT(Members(kEmpty), ""); - constexpr absl::strings_internal::Charmap kA = - absl::strings_internal::Charmap::Char('A'); - EXPECT_THAT(Members(kA), "A"); - constexpr absl::strings_internal::Charmap kAZ = - absl::strings_internal::Charmap::Range('A', 'Z'); - EXPECT_THAT(Members(kAZ), "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); - constexpr absl::strings_internal::Charmap kIdentifier = - absl::strings_internal::Charmap::Range('0', '9') | - absl::strings_internal::Charmap::Range('A', 'Z') | - absl::strings_internal::Charmap::Range('a', 'z') | - absl::strings_internal::Charmap::Char('_'); - EXPECT_THAT(Members(kIdentifier), - "0123456789" - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "_" - "abcdefghijklmnopqrstuvwxyz"); - constexpr absl::strings_internal::Charmap kAll = everything_map; - for (size_t i = 0; i < 256; ++i) { - EXPECT_TRUE(kAll.contains(i)) << i; - } - constexpr absl::strings_internal::Charmap kHello = - absl::strings_internal::Charmap::FromString("Hello, world!"); - EXPECT_THAT(Members(kHello), " !,Hdelorw"); - - // test negation and intersection - constexpr absl::strings_internal::Charmap kABC = - absl::strings_internal::Charmap::Range('A', 'Z') & - ~absl::strings_internal::Charmap::Range('D', 'Z'); - EXPECT_THAT(Members(kABC), "ABC"); -} - -TEST(Charmap, Range) { - // Exhaustive testing takes too long, so test some of the boundaries that - // are perhaps going to cause trouble. - std::vector<size_t> poi = {0, 1, 2, 3, 4, 7, 8, 9, 15, - 16, 17, 30, 31, 32, 33, 63, 64, 65, - 127, 128, 129, 223, 224, 225, 254, 255}; - for (auto lo = poi.begin(); lo != poi.end(); ++lo) { - SCOPED_TRACE(*lo); - for (auto hi = lo; hi != poi.end(); ++hi) { - SCOPED_TRACE(*hi); - EXPECT_THAT(Members(absl::strings_internal::Charmap::Range(*lo, *hi)), - ClosedRangeString(*lo, *hi)); - } - } -} - -bool AsBool(int x) { return static_cast<bool>(x); } - -TEST(CharmapCtype, Match) { - for (int c = 0; c < 256; ++c) { - SCOPED_TRACE(c); - SCOPED_TRACE(static_cast<char>(c)); - EXPECT_EQ(AsBool(std::isupper(c)), - absl::strings_internal::UpperCharmap().contains(c)); - EXPECT_EQ(AsBool(std::islower(c)), - absl::strings_internal::LowerCharmap().contains(c)); - EXPECT_EQ(AsBool(std::isdigit(c)), - absl::strings_internal::DigitCharmap().contains(c)); - EXPECT_EQ(AsBool(std::isalpha(c)), - absl::strings_internal::AlphaCharmap().contains(c)); - EXPECT_EQ(AsBool(std::isalnum(c)), - absl::strings_internal::AlnumCharmap().contains(c)); - EXPECT_EQ(AsBool(std::isxdigit(c)), - absl::strings_internal::XDigitCharmap().contains(c)); - EXPECT_EQ(AsBool(std::isprint(c)), - absl::strings_internal::PrintCharmap().contains(c)); - EXPECT_EQ(AsBool(std::isspace(c)), - absl::strings_internal::SpaceCharmap().contains(c)); - EXPECT_EQ(AsBool(std::iscntrl(c)), - absl::strings_internal::CntrlCharmap().contains(c)); - EXPECT_EQ(AsBool(std::isblank(c)), - absl::strings_internal::BlankCharmap().contains(c)); - EXPECT_EQ(AsBool(std::isgraph(c)), - absl::strings_internal::GraphCharmap().contains(c)); - EXPECT_EQ(AsBool(std::ispunct(c)), - absl::strings_internal::PunctCharmap().contains(c)); - } -} - -} // namespace |