From 91b861c544afd153fe800fc2bea4736a0da37533 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Wed, 23 Aug 2023 07:15:42 -0700 Subject: Add absl::CharSet. PiperOrigin-RevId: 559415517 Change-Id: I5bbc744bf00be2fd15ec7544b725d699e0d982fb --- absl/strings/BUILD.bazel | 67 ++++++---- absl/strings/CMakeLists.txt | 21 +++- absl/strings/charset.h | 164 +++++++++++++++++++++++++ absl/strings/charset_benchmark.cc | 57 +++++++++ absl/strings/charset_test.cc | 181 ++++++++++++++++++++++++++++ absl/strings/escaping.cc | 2 +- absl/strings/internal/char_map.h | 158 ------------------------ absl/strings/internal/char_map_benchmark.cc | 61 ---------- absl/strings/internal/char_map_test.cc | 172 -------------------------- 9 files changed, 465 insertions(+), 418 deletions(-) create mode 100644 absl/strings/charset.h create mode 100644 absl/strings/charset_benchmark.cc create mode 100644 absl/strings/charset_test.cc delete mode 100644 absl/strings/internal/char_map.h delete mode 100644 absl/strings/internal/char_map_benchmark.cc delete mode 100644 absl/strings/internal/char_map_test.cc (limited to 'absl/strings') diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index e3b8af48..a858d0b2 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -92,6 +92,7 @@ cc_library( "string_view.h", ], deps = [ + ":charset", ":internal", ":string_view", "//absl/base", @@ -115,7 +116,6 @@ cc_library( "internal/utf8.cc", ], hdrs = [ - "internal/char_map.h", "internal/escaping.h", "internal/ostringstream.h", "internal/resize_uninitialized.h", @@ -307,6 +307,50 @@ cc_test( ], ) +cc_test( + name = "charset_benchmark", + size = "small", + srcs = [ + "charset_benchmark.cc", + ], + copts = ABSL_TEST_COPTS, + tags = [ + "benchmark", + ], + visibility = ["//visibility:private"], + deps = [ + ":charset", + "//absl/log:check", + "@com_github_google_benchmark//:benchmark_main", + ], +) + +cc_library( + name = "charset", + hdrs = [ + "charset.h", + ], + copts = ABSL_DEFAULT_COPTS, + linkopts = ABSL_DEFAULT_LINKOPTS, + deps = [ + ":string_view", + "//absl/base:core_headers", + ], +) + +cc_test( + name = "charset_test", + size = "small", + srcs = ["charset_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":charset", + ":strings", + "@com_google_googletest//:gtest_main", + ], +) + cc_library( name = "cord_internal", srcs = [ @@ -1086,27 +1130,6 @@ cc_test( ], ) -cc_test( - name = "char_map_test", - srcs = ["internal/char_map_test.cc"], - copts = ABSL_TEST_COPTS, - deps = [ - ":internal", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "char_map_benchmark", - srcs = ["internal/char_map_benchmark.cc"], - copts = ABSL_TEST_COPTS, - tags = ["benchmark"], - deps = [ - ":internal", - "@com_github_google_benchmark//:benchmark_main", - ], -) - cc_test( name = "charconv_test", srcs = ["charconv_test.cc"], diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index 0e588674..27e7ce4f 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -78,6 +78,7 @@ absl_cc_library( absl::strings_internal absl::base absl::bits + absl::charset absl::config absl::core_headers absl::endian @@ -89,12 +90,24 @@ absl_cc_library( PUBLIC ) +absl_cc_library( + NAME + charset + HDRS + charset.h + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::core_headers + absl::string_view + PUBLIC +) + # Internal-only target, do not depend on directly. absl_cc_library( NAME strings_internal HDRS - "internal/char_map.h" "internal/escaping.cc" "internal/escaping.h" "internal/ostringstream.h" @@ -357,13 +370,13 @@ absl_cc_test( absl_cc_test( NAME - char_map_test + charset_test SRCS - "internal/char_map_test.cc" + "charset_test.cc" COPTS ${ABSL_TEST_COPTS} DEPS - absl::strings_internal + absl::strings GTest::gmock_main ) diff --git a/absl/strings/charset.h b/absl/strings/charset.h new file mode 100644 index 00000000..ff4e81a4 --- /dev/null +++ b/absl/strings/charset.h @@ -0,0 +1,164 @@ +// Copyright 2022 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: charset.h +// ----------------------------------------------------------------------------- +// +// This file contains absl::CharSet, a fast, bit-vector set of 8-bit unsigned +// characters. +// +// Instances can be initialized as constexpr constants. For example: +// +// constexpr absl::CharSet kJustX = absl::CharSet::Char('x'); +// constexpr absl::CharSet kMySymbols = absl::CharSet("$@!"); +// constexpr absl::CharSet kLetters = absl::CharSet::Range('a', 'z'); +// +// Multiple instances can be combined that still forms a constexpr expression. +// For example: +// +// constexpr absl::CharSet kLettersAndNumbers = +// absl::CharSet::Range('a', 'z') | absl::CharSet::Range('0', '9'); +// +// Several pre-defined character classes are available that mirror the methods +// from . For example: +// +// constexpr absl::CharSet kLettersAndWhitespace = +// absl::CharSet::AsciiAlphabet() | absl::CharSet::AsciiWhitespace(); +// +// To check membership, use the .contains method, e.g. +// +// absl::CharSet hex_letters("abcdef"); +// hex_letters.contains('a'); // true +// hex_letters.contains('g'); // false + +#ifndef ABSL_STRINGS_CHARSET_H_ +#define ABSL_STRINGS_CHARSET_H_ + +#include +#include +#include + +#include "absl/base/macros.h" +#include "absl/base/port.h" +#include "absl/strings/string_view.h" + +namespace absl { + +class CharSet { + public: + constexpr CharSet() : m_() {} + + // Initializes with a given string_view. + constexpr explicit CharSet(absl::string_view str) : m_() { + for (char c : str) { + SetChar(static_cast(c)); + } + } + + constexpr bool contains(char c) const { + return ((m_[static_cast(c) / 64] >> + (static_cast(c) % 64)) & + 0x1) == 0x1; + } + + constexpr bool empty() const { + for (uint64_t c : m_) { + if (c != 0) return false; + } + return true; + } + + // Containing only a single specified char. + static constexpr CharSet Char(char x) { + return CharSet(CharMaskForWord(x, 0), CharMaskForWord(x, 1), + CharMaskForWord(x, 2), CharMaskForWord(x, 3)); + } + + // Containing all the chars in the closed interval [lo,hi]. + static constexpr CharSet Range(char lo, char hi) { + return CharSet(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1), + RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3)); + } + + friend constexpr CharSet operator&(const CharSet& a, const CharSet& b) { + return CharSet(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2], + a.m_[3] & b.m_[3]); + } + + friend constexpr CharSet operator|(const CharSet& a, const CharSet& b) { + return CharSet(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2], + a.m_[3] | b.m_[3]); + } + + friend constexpr CharSet operator~(const CharSet& a) { + return CharSet(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]); + } + + // Mirrors the char-classifying predicates in . + static constexpr CharSet AsciiUppercase() { return CharSet::Range('A', 'Z'); } + static constexpr CharSet AsciiLowercase() { return CharSet::Range('a', 'z'); } + static constexpr CharSet AsciiDigits() { return CharSet::Range('0', '9'); } + static constexpr CharSet AsciiAlphabet() { + return AsciiLowercase() | AsciiUppercase(); + } + static constexpr CharSet AsciiAlphanumerics() { + return AsciiDigits() | AsciiAlphabet(); + } + static constexpr CharSet AsciiHexDigits() { + return AsciiDigits() | CharSet::Range('A', 'F') | CharSet::Range('a', 'f'); + } + static constexpr CharSet AsciiPrintable() { + return CharSet::Range(0x20, 0x7e); + } + static constexpr CharSet AsciiWhitespace() { return CharSet("\t\n\v\f\r "); } + static constexpr CharSet AsciiPunctuation() { + return AsciiPrintable() & ~AsciiWhitespace() & ~AsciiAlphanumerics(); + } + + private: + constexpr CharSet(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3) + : m_{b0, b1, b2, b3} {} + + static constexpr uint64_t RangeForWord(char lo, char hi, uint64_t word) { + return OpenRangeFromZeroForWord(static_cast(hi) + 1, word) & + ~OpenRangeFromZeroForWord(static_cast(lo), word); + } + + // All the chars in the specified word of the range [0, upper). + static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper, + uint64_t word) { + return (upper <= 64 * word) ? 0 + : (upper >= 64 * (word + 1)) + ? ~static_cast(0) + : (~static_cast(0) >> (64 - upper % 64)); + } + + static constexpr uint64_t CharMaskForWord(char x, uint64_t word) { + return (static_cast(x) / 64 == word) + ? (static_cast(1) + << (static_cast(x) % 64)) + : 0; + } + + constexpr void SetChar(unsigned char c) { + m_[c / 64] |= static_cast(1) << (c % 64); + } + + uint64_t m_[4]; +}; + +} // namespace absl + +#endif // ABSL_STRINGS_CHARSET_H_ diff --git a/absl/strings/charset_benchmark.cc b/absl/strings/charset_benchmark.cc new file mode 100644 index 00000000..bf7ae560 --- /dev/null +++ b/absl/strings/charset_benchmark.cc @@ -0,0 +1,57 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "benchmark/benchmark.h" +#include "absl/log/check.h" +#include "absl/strings/charset.h" + +namespace { + +absl::CharSet MakeBenchmarkMap() { + absl::CharSet m; + uint32_t x[] = {0x0, 0x1, 0x2, 0x3, 0xf, 0xe, 0xd, 0xc}; + for (uint32_t& t : x) t *= static_cast(0x11111111UL); + for (uint32_t i = 0; i < 256; ++i) { + if ((x[i / 32] >> (i % 32)) & 1) m = m | absl::CharSet::Char(i); + } + return m; +} + +// Micro-benchmark for Charmap::contains. +static void BM_Contains(benchmark::State& state) { + // Loop-body replicated 10 times to increase time per iteration. + // Argument continuously changed to avoid generating common subexpressions. + // Final CHECK used to discourage unwanted optimization. + const absl::CharSet benchmark_map = MakeBenchmarkMap(); + unsigned char c = 0; + int ops = 0; + for (auto _ : state) { + ops += benchmark_map.contains(c++); + ops += benchmark_map.contains(c++); + ops += benchmark_map.contains(c++); + ops += benchmark_map.contains(c++); + ops += benchmark_map.contains(c++); + ops += benchmark_map.contains(c++); + ops += benchmark_map.contains(c++); + ops += benchmark_map.contains(c++); + ops += benchmark_map.contains(c++); + ops += benchmark_map.contains(c++); + } + CHECK_NE(ops, -1); +} +BENCHMARK(BM_Contains); + +} // namespace diff --git a/absl/strings/charset_test.cc b/absl/strings/charset_test.cc new file mode 100644 index 00000000..fff943ae --- /dev/null +++ b/absl/strings/charset_test.cc @@ -0,0 +1,181 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/charset.h" + +#include +#include + +#include +#include + +#include "gtest/gtest.h" +#include "absl/strings/ascii.h" +#include "absl/strings/string_view.h" + +namespace { + +constexpr absl::CharSet everything_map = ~absl::CharSet(); +constexpr absl::CharSet nothing_map = absl::CharSet(); + +TEST(Charmap, AllTests) { + const absl::CharSet also_nothing_map(""); + EXPECT_TRUE(everything_map.contains('\0')); + EXPECT_FALSE(nothing_map.contains('\0')); + EXPECT_FALSE(also_nothing_map.contains('\0')); + for (unsigned char ch = 1; ch != 0; ++ch) { + SCOPED_TRACE(ch); + EXPECT_TRUE(everything_map.contains(ch)); + EXPECT_FALSE(nothing_map.contains(ch)); + EXPECT_FALSE(also_nothing_map.contains(ch)); + } + + const absl::CharSet symbols(absl::string_view("&@#@^!@?", 5)); + EXPECT_TRUE(symbols.contains('&')); + EXPECT_TRUE(symbols.contains('@')); + EXPECT_TRUE(symbols.contains('#')); + EXPECT_TRUE(symbols.contains('^')); + EXPECT_FALSE(symbols.contains('!')); + EXPECT_FALSE(symbols.contains('?')); + int cnt = 0; + for (unsigned char ch = 1; ch != 0; ++ch) cnt += symbols.contains(ch); + EXPECT_EQ(cnt, 4); + + const absl::CharSet lets(absl::string_view("^abcde", 3)); + const absl::CharSet lets2(absl::string_view("fghij\0klmnop", 10)); + const absl::CharSet lets3("fghij\0klmnop"); + EXPECT_TRUE(lets2.contains('k')); + EXPECT_FALSE(lets3.contains('k')); + + EXPECT_FALSE((symbols & lets).empty()); + EXPECT_TRUE((lets2 & lets).empty()); + EXPECT_FALSE((lets & symbols).empty()); + EXPECT_TRUE((lets & lets2).empty()); + + EXPECT_TRUE(nothing_map.empty()); + EXPECT_FALSE(lets.empty()); +} + +std::string Members(const absl::CharSet& m) { + std::string r; + for (size_t i = 0; i < 256; ++i) + if (m.contains(i)) r.push_back(i); + return r; +} + +std::string ClosedRangeString(unsigned char lo, unsigned char hi) { + // Don't depend on lo poi = {0, 1, 2, 3, 4, 7, 8, 9, 15, + 16, 17, 30, 31, 32, 33, 63, 64, 65, + 127, 128, 129, 223, 224, 225, 254, 255}; + for (auto lo = poi.begin(); lo != poi.end(); ++lo) { + SCOPED_TRACE(*lo); + for (auto hi = lo; hi != poi.end(); ++hi) { + SCOPED_TRACE(*hi); + EXPECT_EQ(Members(absl::CharSet::Range(*lo, *hi)), + ClosedRangeString(*lo, *hi)); + } + } +} + +TEST(Charmap, NullByteWithStringView) { + char characters[5] = {'a', 'b', '\0', 'd', 'x'}; + absl::string_view view(characters, 5); + absl::CharSet tester(view); + EXPECT_TRUE(tester.contains('a')); + EXPECT_TRUE(tester.contains('b')); + EXPECT_TRUE(tester.contains('\0')); + EXPECT_TRUE(tester.contains('d')); + EXPECT_TRUE(tester.contains('x')); + EXPECT_FALSE(tester.contains('c')); +} + +TEST(CharmapCtype, Match) { + for (int c = 0; c < 256; ++c) { + SCOPED_TRACE(c); + SCOPED_TRACE(static_cast(c)); + EXPECT_EQ(absl::ascii_isupper(c), + absl::CharSet::AsciiUppercase().contains(c)); + EXPECT_EQ(absl::ascii_islower(c), + absl::CharSet::AsciiLowercase().contains(c)); + EXPECT_EQ(absl::ascii_isdigit(c), absl::CharSet::AsciiDigits().contains(c)); + EXPECT_EQ(absl::ascii_isalpha(c), + absl::CharSet::AsciiAlphabet().contains(c)); + EXPECT_EQ(absl::ascii_isalnum(c), + absl::CharSet::AsciiAlphanumerics().contains(c)); + EXPECT_EQ(absl::ascii_isxdigit(c), + absl::CharSet::AsciiHexDigits().contains(c)); + EXPECT_EQ(absl::ascii_isprint(c), + absl::CharSet::AsciiPrintable().contains(c)); + EXPECT_EQ(absl::ascii_isspace(c), + absl::CharSet::AsciiWhitespace().contains(c)); + EXPECT_EQ(absl::ascii_ispunct(c), + absl::CharSet::AsciiPunctuation().contains(c)); + } +} + +} // namespace diff --git a/absl/strings/escaping.cc b/absl/strings/escaping.cc index 5bf02236..1c0eac42 100644 --- a/absl/strings/escaping.cc +++ b/absl/strings/escaping.cc @@ -26,7 +26,7 @@ #include "absl/base/internal/raw_logging.h" #include "absl/base/internal/unaligned_access.h" #include "absl/strings/ascii.h" -#include "absl/strings/internal/char_map.h" +#include "absl/strings/charset.h" #include "absl/strings/internal/escaping.h" #include "absl/strings/internal/resize_uninitialized.h" #include "absl/strings/internal/utf8.h" diff --git a/absl/strings/internal/char_map.h b/absl/strings/internal/char_map.h deleted file mode 100644 index 70a90343..00000000 --- a/absl/strings/internal/char_map.h +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright 2017 The Abseil Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Character Map Class -// -// A fast, bit-vector map for 8-bit unsigned characters. -// This class is useful for non-character purposes as well. - -#ifndef ABSL_STRINGS_INTERNAL_CHAR_MAP_H_ -#define ABSL_STRINGS_INTERNAL_CHAR_MAP_H_ - -#include -#include -#include - -#include "absl/base/macros.h" -#include "absl/base/port.h" - -namespace absl { -ABSL_NAMESPACE_BEGIN -namespace strings_internal { - -class Charmap { - public: - constexpr Charmap() : m_() {} - - // Initializes with a given char*. Note that NUL is not treated as - // a terminator, but rather a char to be flicked. - Charmap(const char* str, int len) : m_() { - while (len--) SetChar(*str++); - } - - // Initializes with a given char*. NUL is treated as a terminator - // and will not be in the charmap. - explicit Charmap(const char* str) : m_() { - while (*str) SetChar(*str++); - } - - constexpr bool contains(unsigned char c) const { - return (m_[c / 64] >> (c % 64)) & 0x1; - } - - // Returns true if and only if a character exists in both maps. - bool IntersectsWith(const Charmap& c) const { - for (size_t i = 0; i < ABSL_ARRAYSIZE(m_); ++i) { - if ((m_[i] & c.m_[i]) != 0) return true; - } - return false; - } - - bool IsZero() const { - for (uint64_t c : m_) { - if (c != 0) return false; - } - return true; - } - - // Containing only a single specified char. - static constexpr Charmap Char(char x) { - return Charmap(CharMaskForWord(x, 0), CharMaskForWord(x, 1), - CharMaskForWord(x, 2), CharMaskForWord(x, 3)); - } - - // Containing all the chars in the C-string 's'. - static constexpr Charmap FromString(const char* s) { - Charmap ret; - while (*s) ret = ret | Char(*s++); - return ret; - } - - // Containing all the chars in the closed interval [lo,hi]. - static constexpr Charmap Range(char lo, char hi) { - return Charmap(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1), - RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3)); - } - - friend constexpr Charmap operator&(const Charmap& a, const Charmap& b) { - return Charmap(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2], - a.m_[3] & b.m_[3]); - } - - friend constexpr Charmap operator|(const Charmap& a, const Charmap& b) { - return Charmap(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2], - a.m_[3] | b.m_[3]); - } - - friend constexpr Charmap operator~(const Charmap& a) { - return Charmap(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]); - } - - private: - constexpr Charmap(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3) - : m_{b0, b1, b2, b3} {} - - static constexpr uint64_t RangeForWord(char lo, char hi, uint64_t word) { - return OpenRangeFromZeroForWord(static_cast(hi) + 1, word) & - ~OpenRangeFromZeroForWord(static_cast(lo), word); - } - - // All the chars in the specified word of the range [0, upper). - static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper, - uint64_t word) { - return (upper <= 64 * word) - ? 0 - : (upper >= 64 * (word + 1)) - ? ~static_cast(0) - : (~static_cast(0) >> (64 - upper % 64)); - } - - static constexpr uint64_t CharMaskForWord(char x, uint64_t word) { - const auto unsigned_x = static_cast(x); - return (unsigned_x / 64 == word) - ? (static_cast(1) << (unsigned_x % 64)) - : 0; - } - - void SetChar(char c) { - const auto unsigned_c = static_cast(c); - m_[unsigned_c / 64] |= static_cast(1) << (unsigned_c % 64); - } - - uint64_t m_[4]; -}; - -// Mirror the char-classifying predicates in -constexpr Charmap UpperCharmap() { return Charmap::Range('A', 'Z'); } -constexpr Charmap LowerCharmap() { return Charmap::Range('a', 'z'); } -constexpr Charmap DigitCharmap() { return Charmap::Range('0', '9'); } -constexpr Charmap AlphaCharmap() { return LowerCharmap() | UpperCharmap(); } -constexpr Charmap AlnumCharmap() { return DigitCharmap() | AlphaCharmap(); } -constexpr Charmap XDigitCharmap() { - return DigitCharmap() | Charmap::Range('A', 'F') | Charmap::Range('a', 'f'); -} -constexpr Charmap PrintCharmap() { return Charmap::Range(0x20, 0x7e); } -constexpr Charmap SpaceCharmap() { return Charmap::FromString("\t\n\v\f\r "); } -constexpr Charmap CntrlCharmap() { - return Charmap::Range(0, 0x7f) & ~PrintCharmap(); -} -constexpr Charmap BlankCharmap() { return Charmap::FromString("\t "); } -constexpr Charmap GraphCharmap() { return PrintCharmap() & ~SpaceCharmap(); } -constexpr Charmap PunctCharmap() { return GraphCharmap() & ~AlnumCharmap(); } - -} // namespace strings_internal -ABSL_NAMESPACE_END -} // namespace absl - -#endif // ABSL_STRINGS_INTERNAL_CHAR_MAP_H_ diff --git a/absl/strings/internal/char_map_benchmark.cc b/absl/strings/internal/char_map_benchmark.cc deleted file mode 100644 index 5cef967b..00000000 --- a/absl/strings/internal/char_map_benchmark.cc +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright 2017 The Abseil Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "absl/strings/internal/char_map.h" - -#include - -#include "benchmark/benchmark.h" - -namespace { - -absl::strings_internal::Charmap MakeBenchmarkMap() { - absl::strings_internal::Charmap m; - uint32_t x[] = {0x0, 0x1, 0x2, 0x3, 0xf, 0xe, 0xd, 0xc}; - for (uint32_t& t : x) t *= static_cast(0x11111111UL); - for (uint32_t i = 0; i < 256; ++i) { - if ((x[i / 32] >> (i % 32)) & 1) - m = m | absl::strings_internal::Charmap::Char(i); - } - return m; -} - -// Micro-benchmark for Charmap::contains. -void BM_Contains(benchmark::State& state) { - // Loop-body replicated 10 times to increase time per iteration. - // Argument continuously changed to avoid generating common subexpressions. - const absl::strings_internal::Charmap benchmark_map = MakeBenchmarkMap(); - unsigned char c = 0; - int ops = 0; - for (auto _ : state) { - ops += benchmark_map.contains(c++); - ops += benchmark_map.contains(c++); - ops += benchmark_map.contains(c++); - ops += benchmark_map.contains(c++); - ops += benchmark_map.contains(c++); - ops += benchmark_map.contains(c++); - ops += benchmark_map.contains(c++); - ops += benchmark_map.contains(c++); - ops += benchmark_map.contains(c++); - ops += benchmark_map.contains(c++); - } - benchmark::DoNotOptimize(ops); -} -BENCHMARK(BM_Contains); - -// We don't bother benchmarking Charmap::IsZero or Charmap::IntersectsWith; -// their running time is data-dependent and it is not worth characterizing -// "typical" data. - -} // namespace diff --git a/absl/strings/internal/char_map_test.cc b/absl/strings/internal/char_map_test.cc deleted file mode 100644 index d3306241..00000000 --- a/absl/strings/internal/char_map_test.cc +++ /dev/null @@ -1,172 +0,0 @@ -// Copyright 2017 The Abseil Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "absl/strings/internal/char_map.h" - -#include -#include -#include - -#include "gmock/gmock.h" -#include "gtest/gtest.h" - -namespace { - -constexpr absl::strings_internal::Charmap everything_map = - ~absl::strings_internal::Charmap(); -constexpr absl::strings_internal::Charmap nothing_map{}; - -TEST(Charmap, AllTests) { - const absl::strings_internal::Charmap also_nothing_map("", 0); - ASSERT_TRUE(everything_map.contains('\0')); - ASSERT_TRUE(!nothing_map.contains('\0')); - ASSERT_TRUE(!also_nothing_map.contains('\0')); - for (unsigned char ch = 1; ch != 0; ++ch) { - ASSERT_TRUE(everything_map.contains(ch)); - ASSERT_TRUE(!nothing_map.contains(ch)); - ASSERT_TRUE(!also_nothing_map.contains(ch)); - } - - const absl::strings_internal::Charmap symbols("&@#@^!@?", 5); - ASSERT_TRUE(symbols.contains('&')); - ASSERT_TRUE(symbols.contains('@')); - ASSERT_TRUE(symbols.contains('#')); - ASSERT_TRUE(symbols.contains('^')); - ASSERT_TRUE(!symbols.contains('!')); - ASSERT_TRUE(!symbols.contains('?')); - int cnt = 0; - for (unsigned char ch = 1; ch != 0; ++ch) - cnt += symbols.contains(ch); - ASSERT_EQ(cnt, 4); - - const absl::strings_internal::Charmap lets("^abcde", 3); - const absl::strings_internal::Charmap lets2("fghij\0klmnop", 10); - const absl::strings_internal::Charmap lets3("fghij\0klmnop"); - ASSERT_TRUE(lets2.contains('k')); - ASSERT_TRUE(!lets3.contains('k')); - - ASSERT_TRUE(symbols.IntersectsWith(lets)); - ASSERT_TRUE(!lets2.IntersectsWith(lets)); - ASSERT_TRUE(lets.IntersectsWith(symbols)); - ASSERT_TRUE(!lets.IntersectsWith(lets2)); - - ASSERT_TRUE(nothing_map.IsZero()); - ASSERT_TRUE(!lets.IsZero()); -} - -namespace { -std::string Members(const absl::strings_internal::Charmap& m) { - std::string r; - for (size_t i = 0; i < 256; ++i) - if (m.contains(i)) r.push_back(i); - return r; -} - -std::string ClosedRangeString(unsigned char lo, unsigned char hi) { - // Don't depend on lo poi = {0, 1, 2, 3, 4, 7, 8, 9, 15, - 16, 17, 30, 31, 32, 33, 63, 64, 65, - 127, 128, 129, 223, 224, 225, 254, 255}; - for (auto lo = poi.begin(); lo != poi.end(); ++lo) { - SCOPED_TRACE(*lo); - for (auto hi = lo; hi != poi.end(); ++hi) { - SCOPED_TRACE(*hi); - EXPECT_THAT(Members(absl::strings_internal::Charmap::Range(*lo, *hi)), - ClosedRangeString(*lo, *hi)); - } - } -} - -bool AsBool(int x) { return static_cast(x); } - -TEST(CharmapCtype, Match) { - for (int c = 0; c < 256; ++c) { - SCOPED_TRACE(c); - SCOPED_TRACE(static_cast(c)); - EXPECT_EQ(AsBool(std::isupper(c)), - absl::strings_internal::UpperCharmap().contains(c)); - EXPECT_EQ(AsBool(std::islower(c)), - absl::strings_internal::LowerCharmap().contains(c)); - EXPECT_EQ(AsBool(std::isdigit(c)), - absl::strings_internal::DigitCharmap().contains(c)); - EXPECT_EQ(AsBool(std::isalpha(c)), - absl::strings_internal::AlphaCharmap().contains(c)); - EXPECT_EQ(AsBool(std::isalnum(c)), - absl::strings_internal::AlnumCharmap().contains(c)); - EXPECT_EQ(AsBool(std::isxdigit(c)), - absl::strings_internal::XDigitCharmap().contains(c)); - EXPECT_EQ(AsBool(std::isprint(c)), - absl::strings_internal::PrintCharmap().contains(c)); - EXPECT_EQ(AsBool(std::isspace(c)), - absl::strings_internal::SpaceCharmap().contains(c)); - EXPECT_EQ(AsBool(std::iscntrl(c)), - absl::strings_internal::CntrlCharmap().contains(c)); - EXPECT_EQ(AsBool(std::isblank(c)), - absl::strings_internal::BlankCharmap().contains(c)); - EXPECT_EQ(AsBool(std::isgraph(c)), - absl::strings_internal::GraphCharmap().contains(c)); - EXPECT_EQ(AsBool(std::ispunct(c)), - absl::strings_internal::PunctCharmap().contains(c)); - } -} - -} // namespace -- cgit v1.2.3