diff options
author | Abseil Team <absl-team@google.com> | 2023-08-23 07:15:42 -0700 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2023-08-23 07:16:31 -0700 |
commit | 91b861c544afd153fe800fc2bea4736a0da37533 (patch) | |
tree | c65d0d322bdadd9fe1f9bad07ba3973549a941b3 /absl/strings/internal | |
parent | 7aef7808d6dbe46ab95b37e6c67d1350c1da016b (diff) |
Add absl::CharSet.
PiperOrigin-RevId: 559415517
Change-Id: I5bbc744bf00be2fd15ec7544b725d699e0d982fb
Diffstat (limited to 'absl/strings/internal')
-rw-r--r-- | absl/strings/internal/char_map.h | 158 | ||||
-rw-r--r-- | absl/strings/internal/char_map_benchmark.cc | 61 | ||||
-rw-r--r-- | absl/strings/internal/char_map_test.cc | 172 |
3 files changed, 0 insertions, 391 deletions
diff --git a/absl/strings/internal/char_map.h b/absl/strings/internal/char_map.h deleted file mode 100644 index 70a90343..00000000 --- a/absl/strings/internal/char_map.h +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright 2017 The Abseil Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Character Map Class -// -// A fast, bit-vector map for 8-bit unsigned characters. -// This class is useful for non-character purposes as well. - -#ifndef ABSL_STRINGS_INTERNAL_CHAR_MAP_H_ -#define ABSL_STRINGS_INTERNAL_CHAR_MAP_H_ - -#include <cstddef> -#include <cstdint> -#include <cstring> - -#include "absl/base/macros.h" -#include "absl/base/port.h" - -namespace absl { -ABSL_NAMESPACE_BEGIN -namespace strings_internal { - -class Charmap { - public: - constexpr Charmap() : m_() {} - - // Initializes with a given char*. Note that NUL is not treated as - // a terminator, but rather a char to be flicked. - Charmap(const char* str, int len) : m_() { - while (len--) SetChar(*str++); - } - - // Initializes with a given char*. NUL is treated as a terminator - // and will not be in the charmap. - explicit Charmap(const char* str) : m_() { - while (*str) SetChar(*str++); - } - - constexpr bool contains(unsigned char c) const { - return (m_[c / 64] >> (c % 64)) & 0x1; - } - - // Returns true if and only if a character exists in both maps. - bool IntersectsWith(const Charmap& c) const { - for (size_t i = 0; i < ABSL_ARRAYSIZE(m_); ++i) { - if ((m_[i] & c.m_[i]) != 0) return true; - } - return false; - } - - bool IsZero() const { - for (uint64_t c : m_) { - if (c != 0) return false; - } - return true; - } - - // Containing only a single specified char. - static constexpr Charmap Char(char x) { - return Charmap(CharMaskForWord(x, 0), CharMaskForWord(x, 1), - CharMaskForWord(x, 2), CharMaskForWord(x, 3)); - } - - // Containing all the chars in the C-string 's'. - static constexpr Charmap FromString(const char* s) { - Charmap ret; - while (*s) ret = ret | Char(*s++); - return ret; - } - - // Containing all the chars in the closed interval [lo,hi]. - static constexpr Charmap Range(char lo, char hi) { - return Charmap(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1), - RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3)); - } - - friend constexpr Charmap operator&(const Charmap& a, const Charmap& b) { - return Charmap(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2], - a.m_[3] & b.m_[3]); - } - - friend constexpr Charmap operator|(const Charmap& a, const Charmap& b) { - return Charmap(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2], - a.m_[3] | b.m_[3]); - } - - friend constexpr Charmap operator~(const Charmap& a) { - return Charmap(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]); - } - - private: - constexpr Charmap(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3) - : m_{b0, b1, b2, b3} {} - - static constexpr uint64_t RangeForWord(char lo, char hi, uint64_t word) { - return OpenRangeFromZeroForWord(static_cast<unsigned char>(hi) + 1, word) & - ~OpenRangeFromZeroForWord(static_cast<unsigned char>(lo), word); - } - - // All the chars in the specified word of the range [0, upper). - static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper, - uint64_t word) { - return (upper <= 64 * word) - ? 0 - : (upper >= 64 * (word + 1)) - ? ~static_cast<uint64_t>(0) - : (~static_cast<uint64_t>(0) >> (64 - upper % 64)); - } - - static constexpr uint64_t CharMaskForWord(char x, uint64_t word) { - const auto unsigned_x = static_cast<unsigned char>(x); - return (unsigned_x / 64 == word) - ? (static_cast<uint64_t>(1) << (unsigned_x % 64)) - : 0; - } - - void SetChar(char c) { - const auto unsigned_c = static_cast<unsigned char>(c); - m_[unsigned_c / 64] |= static_cast<uint64_t>(1) << (unsigned_c % 64); - } - - uint64_t m_[4]; -}; - -// Mirror the char-classifying predicates in <cctype> -constexpr Charmap UpperCharmap() { return Charmap::Range('A', 'Z'); } -constexpr Charmap LowerCharmap() { return Charmap::Range('a', 'z'); } -constexpr Charmap DigitCharmap() { return Charmap::Range('0', '9'); } -constexpr Charmap AlphaCharmap() { return LowerCharmap() | UpperCharmap(); } -constexpr Charmap AlnumCharmap() { return DigitCharmap() | AlphaCharmap(); } -constexpr Charmap XDigitCharmap() { - return DigitCharmap() | Charmap::Range('A', 'F') | Charmap::Range('a', 'f'); -} -constexpr Charmap PrintCharmap() { return Charmap::Range(0x20, 0x7e); } -constexpr Charmap SpaceCharmap() { return Charmap::FromString("\t\n\v\f\r "); } -constexpr Charmap CntrlCharmap() { - return Charmap::Range(0, 0x7f) & ~PrintCharmap(); -} -constexpr Charmap BlankCharmap() { return Charmap::FromString("\t "); } -constexpr Charmap GraphCharmap() { return PrintCharmap() & ~SpaceCharmap(); } -constexpr Charmap PunctCharmap() { return GraphCharmap() & ~AlnumCharmap(); } - -} // namespace strings_internal -ABSL_NAMESPACE_END -} // namespace absl - -#endif // ABSL_STRINGS_INTERNAL_CHAR_MAP_H_ diff --git a/absl/strings/internal/char_map_benchmark.cc b/absl/strings/internal/char_map_benchmark.cc deleted file mode 100644 index 5cef967b..00000000 --- a/absl/strings/internal/char_map_benchmark.cc +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright 2017 The Abseil Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "absl/strings/internal/char_map.h" - -#include <cstdint> - -#include "benchmark/benchmark.h" - -namespace { - -absl::strings_internal::Charmap MakeBenchmarkMap() { - absl::strings_internal::Charmap m; - uint32_t x[] = {0x0, 0x1, 0x2, 0x3, 0xf, 0xe, 0xd, 0xc}; - for (uint32_t& t : x) t *= static_cast<uint32_t>(0x11111111UL); - for (uint32_t i = 0; i < 256; ++i) { - if ((x[i / 32] >> (i % 32)) & 1) - m = m | absl::strings_internal::Charmap::Char(i); - } - return m; -} - -// Micro-benchmark for Charmap::contains. -void BM_Contains(benchmark::State& state) { - // Loop-body replicated 10 times to increase time per iteration. - // Argument continuously changed to avoid generating common subexpressions. - const absl::strings_internal::Charmap benchmark_map = MakeBenchmarkMap(); - unsigned char c = 0; - int ops = 0; - for (auto _ : state) { - ops += benchmark_map.contains(c++); - ops += benchmark_map.contains(c++); - ops += benchmark_map.contains(c++); - ops += benchmark_map.contains(c++); - ops += benchmark_map.contains(c++); - ops += benchmark_map.contains(c++); - ops += benchmark_map.contains(c++); - ops += benchmark_map.contains(c++); - ops += benchmark_map.contains(c++); - ops += benchmark_map.contains(c++); - } - benchmark::DoNotOptimize(ops); -} -BENCHMARK(BM_Contains); - -// We don't bother benchmarking Charmap::IsZero or Charmap::IntersectsWith; -// their running time is data-dependent and it is not worth characterizing -// "typical" data. - -} // namespace diff --git a/absl/strings/internal/char_map_test.cc b/absl/strings/internal/char_map_test.cc deleted file mode 100644 index d3306241..00000000 --- a/absl/strings/internal/char_map_test.cc +++ /dev/null @@ -1,172 +0,0 @@ -// Copyright 2017 The Abseil Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "absl/strings/internal/char_map.h" - -#include <cctype> -#include <string> -#include <vector> - -#include "gmock/gmock.h" -#include "gtest/gtest.h" - -namespace { - -constexpr absl::strings_internal::Charmap everything_map = - ~absl::strings_internal::Charmap(); -constexpr absl::strings_internal::Charmap nothing_map{}; - -TEST(Charmap, AllTests) { - const absl::strings_internal::Charmap also_nothing_map("", 0); - ASSERT_TRUE(everything_map.contains('\0')); - ASSERT_TRUE(!nothing_map.contains('\0')); - ASSERT_TRUE(!also_nothing_map.contains('\0')); - for (unsigned char ch = 1; ch != 0; ++ch) { - ASSERT_TRUE(everything_map.contains(ch)); - ASSERT_TRUE(!nothing_map.contains(ch)); - ASSERT_TRUE(!also_nothing_map.contains(ch)); - } - - const absl::strings_internal::Charmap symbols("&@#@^!@?", 5); - ASSERT_TRUE(symbols.contains('&')); - ASSERT_TRUE(symbols.contains('@')); - ASSERT_TRUE(symbols.contains('#')); - ASSERT_TRUE(symbols.contains('^')); - ASSERT_TRUE(!symbols.contains('!')); - ASSERT_TRUE(!symbols.contains('?')); - int cnt = 0; - for (unsigned char ch = 1; ch != 0; ++ch) - cnt += symbols.contains(ch); - ASSERT_EQ(cnt, 4); - - const absl::strings_internal::Charmap lets("^abcde", 3); - const absl::strings_internal::Charmap lets2("fghij\0klmnop", 10); - const absl::strings_internal::Charmap lets3("fghij\0klmnop"); - ASSERT_TRUE(lets2.contains('k')); - ASSERT_TRUE(!lets3.contains('k')); - - ASSERT_TRUE(symbols.IntersectsWith(lets)); - ASSERT_TRUE(!lets2.IntersectsWith(lets)); - ASSERT_TRUE(lets.IntersectsWith(symbols)); - ASSERT_TRUE(!lets.IntersectsWith(lets2)); - - ASSERT_TRUE(nothing_map.IsZero()); - ASSERT_TRUE(!lets.IsZero()); -} - -namespace { -std::string Members(const absl::strings_internal::Charmap& m) { - std::string r; - for (size_t i = 0; i < 256; ++i) - if (m.contains(i)) r.push_back(i); - return r; -} - -std::string ClosedRangeString(unsigned char lo, unsigned char hi) { - // Don't depend on lo<hi. Just increment until lo==hi. - std::string s; - while (true) { - s.push_back(lo); - if (lo == hi) break; - ++lo; - } - return s; -} - -} // namespace - -TEST(Charmap, Constexpr) { - constexpr absl::strings_internal::Charmap kEmpty = nothing_map; - EXPECT_THAT(Members(kEmpty), ""); - constexpr absl::strings_internal::Charmap kA = - absl::strings_internal::Charmap::Char('A'); - EXPECT_THAT(Members(kA), "A"); - constexpr absl::strings_internal::Charmap kAZ = - absl::strings_internal::Charmap::Range('A', 'Z'); - EXPECT_THAT(Members(kAZ), "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); - constexpr absl::strings_internal::Charmap kIdentifier = - absl::strings_internal::Charmap::Range('0', '9') | - absl::strings_internal::Charmap::Range('A', 'Z') | - absl::strings_internal::Charmap::Range('a', 'z') | - absl::strings_internal::Charmap::Char('_'); - EXPECT_THAT(Members(kIdentifier), - "0123456789" - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "_" - "abcdefghijklmnopqrstuvwxyz"); - constexpr absl::strings_internal::Charmap kAll = everything_map; - for (size_t i = 0; i < 256; ++i) { - EXPECT_TRUE(kAll.contains(i)) << i; - } - constexpr absl::strings_internal::Charmap kHello = - absl::strings_internal::Charmap::FromString("Hello, world!"); - EXPECT_THAT(Members(kHello), " !,Hdelorw"); - - // test negation and intersection - constexpr absl::strings_internal::Charmap kABC = - absl::strings_internal::Charmap::Range('A', 'Z') & - ~absl::strings_internal::Charmap::Range('D', 'Z'); - EXPECT_THAT(Members(kABC), "ABC"); -} - -TEST(Charmap, Range) { - // Exhaustive testing takes too long, so test some of the boundaries that - // are perhaps going to cause trouble. - std::vector<size_t> poi = {0, 1, 2, 3, 4, 7, 8, 9, 15, - 16, 17, 30, 31, 32, 33, 63, 64, 65, - 127, 128, 129, 223, 224, 225, 254, 255}; - for (auto lo = poi.begin(); lo != poi.end(); ++lo) { - SCOPED_TRACE(*lo); - for (auto hi = lo; hi != poi.end(); ++hi) { - SCOPED_TRACE(*hi); - EXPECT_THAT(Members(absl::strings_internal::Charmap::Range(*lo, *hi)), - ClosedRangeString(*lo, *hi)); - } - } -} - -bool AsBool(int x) { return static_cast<bool>(x); } - -TEST(CharmapCtype, Match) { - for (int c = 0; c < 256; ++c) { - SCOPED_TRACE(c); - SCOPED_TRACE(static_cast<char>(c)); - EXPECT_EQ(AsBool(std::isupper(c)), - absl::strings_internal::UpperCharmap().contains(c)); - EXPECT_EQ(AsBool(std::islower(c)), - absl::strings_internal::LowerCharmap().contains(c)); - EXPECT_EQ(AsBool(std::isdigit(c)), - absl::strings_internal::DigitCharmap().contains(c)); - EXPECT_EQ(AsBool(std::isalpha(c)), - absl::strings_internal::AlphaCharmap().contains(c)); - EXPECT_EQ(AsBool(std::isalnum(c)), - absl::strings_internal::AlnumCharmap().contains(c)); - EXPECT_EQ(AsBool(std::isxdigit(c)), - absl::strings_internal::XDigitCharmap().contains(c)); - EXPECT_EQ(AsBool(std::isprint(c)), - absl::strings_internal::PrintCharmap().contains(c)); - EXPECT_EQ(AsBool(std::isspace(c)), - absl::strings_internal::SpaceCharmap().contains(c)); - EXPECT_EQ(AsBool(std::iscntrl(c)), - absl::strings_internal::CntrlCharmap().contains(c)); - EXPECT_EQ(AsBool(std::isblank(c)), - absl::strings_internal::BlankCharmap().contains(c)); - EXPECT_EQ(AsBool(std::isgraph(c)), - absl::strings_internal::GraphCharmap().contains(c)); - EXPECT_EQ(AsBool(std::ispunct(c)), - absl::strings_internal::PunctCharmap().contains(c)); - } -} - -} // namespace |