summaryrefslogtreecommitdiff
path: root/absl/strings/internal
diff options
context:
space:
mode:
authorGravatar Abseil Team <absl-team@google.com>2023-08-23 07:15:42 -0700
committerGravatar Copybara-Service <copybara-worker@google.com>2023-08-23 07:16:31 -0700
commit91b861c544afd153fe800fc2bea4736a0da37533 (patch)
treec65d0d322bdadd9fe1f9bad07ba3973549a941b3 /absl/strings/internal
parent7aef7808d6dbe46ab95b37e6c67d1350c1da016b (diff)
Add absl::CharSet.
PiperOrigin-RevId: 559415517 Change-Id: I5bbc744bf00be2fd15ec7544b725d699e0d982fb
Diffstat (limited to 'absl/strings/internal')
-rw-r--r--absl/strings/internal/char_map.h158
-rw-r--r--absl/strings/internal/char_map_benchmark.cc61
-rw-r--r--absl/strings/internal/char_map_test.cc172
3 files changed, 0 insertions, 391 deletions
diff --git a/absl/strings/internal/char_map.h b/absl/strings/internal/char_map.h
deleted file mode 100644
index 70a90343..00000000
--- a/absl/strings/internal/char_map.h
+++ /dev/null
@@ -1,158 +0,0 @@
-// Copyright 2017 The Abseil Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Character Map Class
-//
-// A fast, bit-vector map for 8-bit unsigned characters.
-// This class is useful for non-character purposes as well.
-
-#ifndef ABSL_STRINGS_INTERNAL_CHAR_MAP_H_
-#define ABSL_STRINGS_INTERNAL_CHAR_MAP_H_
-
-#include <cstddef>
-#include <cstdint>
-#include <cstring>
-
-#include "absl/base/macros.h"
-#include "absl/base/port.h"
-
-namespace absl {
-ABSL_NAMESPACE_BEGIN
-namespace strings_internal {
-
-class Charmap {
- public:
- constexpr Charmap() : m_() {}
-
- // Initializes with a given char*. Note that NUL is not treated as
- // a terminator, but rather a char to be flicked.
- Charmap(const char* str, int len) : m_() {
- while (len--) SetChar(*str++);
- }
-
- // Initializes with a given char*. NUL is treated as a terminator
- // and will not be in the charmap.
- explicit Charmap(const char* str) : m_() {
- while (*str) SetChar(*str++);
- }
-
- constexpr bool contains(unsigned char c) const {
- return (m_[c / 64] >> (c % 64)) & 0x1;
- }
-
- // Returns true if and only if a character exists in both maps.
- bool IntersectsWith(const Charmap& c) const {
- for (size_t i = 0; i < ABSL_ARRAYSIZE(m_); ++i) {
- if ((m_[i] & c.m_[i]) != 0) return true;
- }
- return false;
- }
-
- bool IsZero() const {
- for (uint64_t c : m_) {
- if (c != 0) return false;
- }
- return true;
- }
-
- // Containing only a single specified char.
- static constexpr Charmap Char(char x) {
- return Charmap(CharMaskForWord(x, 0), CharMaskForWord(x, 1),
- CharMaskForWord(x, 2), CharMaskForWord(x, 3));
- }
-
- // Containing all the chars in the C-string 's'.
- static constexpr Charmap FromString(const char* s) {
- Charmap ret;
- while (*s) ret = ret | Char(*s++);
- return ret;
- }
-
- // Containing all the chars in the closed interval [lo,hi].
- static constexpr Charmap Range(char lo, char hi) {
- return Charmap(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1),
- RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3));
- }
-
- friend constexpr Charmap operator&(const Charmap& a, const Charmap& b) {
- return Charmap(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2],
- a.m_[3] & b.m_[3]);
- }
-
- friend constexpr Charmap operator|(const Charmap& a, const Charmap& b) {
- return Charmap(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2],
- a.m_[3] | b.m_[3]);
- }
-
- friend constexpr Charmap operator~(const Charmap& a) {
- return Charmap(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]);
- }
-
- private:
- constexpr Charmap(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3)
- : m_{b0, b1, b2, b3} {}
-
- static constexpr uint64_t RangeForWord(char lo, char hi, uint64_t word) {
- return OpenRangeFromZeroForWord(static_cast<unsigned char>(hi) + 1, word) &
- ~OpenRangeFromZeroForWord(static_cast<unsigned char>(lo), word);
- }
-
- // All the chars in the specified word of the range [0, upper).
- static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper,
- uint64_t word) {
- return (upper <= 64 * word)
- ? 0
- : (upper >= 64 * (word + 1))
- ? ~static_cast<uint64_t>(0)
- : (~static_cast<uint64_t>(0) >> (64 - upper % 64));
- }
-
- static constexpr uint64_t CharMaskForWord(char x, uint64_t word) {
- const auto unsigned_x = static_cast<unsigned char>(x);
- return (unsigned_x / 64 == word)
- ? (static_cast<uint64_t>(1) << (unsigned_x % 64))
- : 0;
- }
-
- void SetChar(char c) {
- const auto unsigned_c = static_cast<unsigned char>(c);
- m_[unsigned_c / 64] |= static_cast<uint64_t>(1) << (unsigned_c % 64);
- }
-
- uint64_t m_[4];
-};
-
-// Mirror the char-classifying predicates in <cctype>
-constexpr Charmap UpperCharmap() { return Charmap::Range('A', 'Z'); }
-constexpr Charmap LowerCharmap() { return Charmap::Range('a', 'z'); }
-constexpr Charmap DigitCharmap() { return Charmap::Range('0', '9'); }
-constexpr Charmap AlphaCharmap() { return LowerCharmap() | UpperCharmap(); }
-constexpr Charmap AlnumCharmap() { return DigitCharmap() | AlphaCharmap(); }
-constexpr Charmap XDigitCharmap() {
- return DigitCharmap() | Charmap::Range('A', 'F') | Charmap::Range('a', 'f');
-}
-constexpr Charmap PrintCharmap() { return Charmap::Range(0x20, 0x7e); }
-constexpr Charmap SpaceCharmap() { return Charmap::FromString("\t\n\v\f\r "); }
-constexpr Charmap CntrlCharmap() {
- return Charmap::Range(0, 0x7f) & ~PrintCharmap();
-}
-constexpr Charmap BlankCharmap() { return Charmap::FromString("\t "); }
-constexpr Charmap GraphCharmap() { return PrintCharmap() & ~SpaceCharmap(); }
-constexpr Charmap PunctCharmap() { return GraphCharmap() & ~AlnumCharmap(); }
-
-} // namespace strings_internal
-ABSL_NAMESPACE_END
-} // namespace absl
-
-#endif // ABSL_STRINGS_INTERNAL_CHAR_MAP_H_
diff --git a/absl/strings/internal/char_map_benchmark.cc b/absl/strings/internal/char_map_benchmark.cc
deleted file mode 100644
index 5cef967b..00000000
--- a/absl/strings/internal/char_map_benchmark.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright 2017 The Abseil Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "absl/strings/internal/char_map.h"
-
-#include <cstdint>
-
-#include "benchmark/benchmark.h"
-
-namespace {
-
-absl::strings_internal::Charmap MakeBenchmarkMap() {
- absl::strings_internal::Charmap m;
- uint32_t x[] = {0x0, 0x1, 0x2, 0x3, 0xf, 0xe, 0xd, 0xc};
- for (uint32_t& t : x) t *= static_cast<uint32_t>(0x11111111UL);
- for (uint32_t i = 0; i < 256; ++i) {
- if ((x[i / 32] >> (i % 32)) & 1)
- m = m | absl::strings_internal::Charmap::Char(i);
- }
- return m;
-}
-
-// Micro-benchmark for Charmap::contains.
-void BM_Contains(benchmark::State& state) {
- // Loop-body replicated 10 times to increase time per iteration.
- // Argument continuously changed to avoid generating common subexpressions.
- const absl::strings_internal::Charmap benchmark_map = MakeBenchmarkMap();
- unsigned char c = 0;
- int ops = 0;
- for (auto _ : state) {
- ops += benchmark_map.contains(c++);
- ops += benchmark_map.contains(c++);
- ops += benchmark_map.contains(c++);
- ops += benchmark_map.contains(c++);
- ops += benchmark_map.contains(c++);
- ops += benchmark_map.contains(c++);
- ops += benchmark_map.contains(c++);
- ops += benchmark_map.contains(c++);
- ops += benchmark_map.contains(c++);
- ops += benchmark_map.contains(c++);
- }
- benchmark::DoNotOptimize(ops);
-}
-BENCHMARK(BM_Contains);
-
-// We don't bother benchmarking Charmap::IsZero or Charmap::IntersectsWith;
-// their running time is data-dependent and it is not worth characterizing
-// "typical" data.
-
-} // namespace
diff --git a/absl/strings/internal/char_map_test.cc b/absl/strings/internal/char_map_test.cc
deleted file mode 100644
index d3306241..00000000
--- a/absl/strings/internal/char_map_test.cc
+++ /dev/null
@@ -1,172 +0,0 @@
-// Copyright 2017 The Abseil Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "absl/strings/internal/char_map.h"
-
-#include <cctype>
-#include <string>
-#include <vector>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-namespace {
-
-constexpr absl::strings_internal::Charmap everything_map =
- ~absl::strings_internal::Charmap();
-constexpr absl::strings_internal::Charmap nothing_map{};
-
-TEST(Charmap, AllTests) {
- const absl::strings_internal::Charmap also_nothing_map("", 0);
- ASSERT_TRUE(everything_map.contains('\0'));
- ASSERT_TRUE(!nothing_map.contains('\0'));
- ASSERT_TRUE(!also_nothing_map.contains('\0'));
- for (unsigned char ch = 1; ch != 0; ++ch) {
- ASSERT_TRUE(everything_map.contains(ch));
- ASSERT_TRUE(!nothing_map.contains(ch));
- ASSERT_TRUE(!also_nothing_map.contains(ch));
- }
-
- const absl::strings_internal::Charmap symbols("&@#@^!@?", 5);
- ASSERT_TRUE(symbols.contains('&'));
- ASSERT_TRUE(symbols.contains('@'));
- ASSERT_TRUE(symbols.contains('#'));
- ASSERT_TRUE(symbols.contains('^'));
- ASSERT_TRUE(!symbols.contains('!'));
- ASSERT_TRUE(!symbols.contains('?'));
- int cnt = 0;
- for (unsigned char ch = 1; ch != 0; ++ch)
- cnt += symbols.contains(ch);
- ASSERT_EQ(cnt, 4);
-
- const absl::strings_internal::Charmap lets("^abcde", 3);
- const absl::strings_internal::Charmap lets2("fghij\0klmnop", 10);
- const absl::strings_internal::Charmap lets3("fghij\0klmnop");
- ASSERT_TRUE(lets2.contains('k'));
- ASSERT_TRUE(!lets3.contains('k'));
-
- ASSERT_TRUE(symbols.IntersectsWith(lets));
- ASSERT_TRUE(!lets2.IntersectsWith(lets));
- ASSERT_TRUE(lets.IntersectsWith(symbols));
- ASSERT_TRUE(!lets.IntersectsWith(lets2));
-
- ASSERT_TRUE(nothing_map.IsZero());
- ASSERT_TRUE(!lets.IsZero());
-}
-
-namespace {
-std::string Members(const absl::strings_internal::Charmap& m) {
- std::string r;
- for (size_t i = 0; i < 256; ++i)
- if (m.contains(i)) r.push_back(i);
- return r;
-}
-
-std::string ClosedRangeString(unsigned char lo, unsigned char hi) {
- // Don't depend on lo<hi. Just increment until lo==hi.
- std::string s;
- while (true) {
- s.push_back(lo);
- if (lo == hi) break;
- ++lo;
- }
- return s;
-}
-
-} // namespace
-
-TEST(Charmap, Constexpr) {
- constexpr absl::strings_internal::Charmap kEmpty = nothing_map;
- EXPECT_THAT(Members(kEmpty), "");
- constexpr absl::strings_internal::Charmap kA =
- absl::strings_internal::Charmap::Char('A');
- EXPECT_THAT(Members(kA), "A");
- constexpr absl::strings_internal::Charmap kAZ =
- absl::strings_internal::Charmap::Range('A', 'Z');
- EXPECT_THAT(Members(kAZ), "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
- constexpr absl::strings_internal::Charmap kIdentifier =
- absl::strings_internal::Charmap::Range('0', '9') |
- absl::strings_internal::Charmap::Range('A', 'Z') |
- absl::strings_internal::Charmap::Range('a', 'z') |
- absl::strings_internal::Charmap::Char('_');
- EXPECT_THAT(Members(kIdentifier),
- "0123456789"
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- "_"
- "abcdefghijklmnopqrstuvwxyz");
- constexpr absl::strings_internal::Charmap kAll = everything_map;
- for (size_t i = 0; i < 256; ++i) {
- EXPECT_TRUE(kAll.contains(i)) << i;
- }
- constexpr absl::strings_internal::Charmap kHello =
- absl::strings_internal::Charmap::FromString("Hello, world!");
- EXPECT_THAT(Members(kHello), " !,Hdelorw");
-
- // test negation and intersection
- constexpr absl::strings_internal::Charmap kABC =
- absl::strings_internal::Charmap::Range('A', 'Z') &
- ~absl::strings_internal::Charmap::Range('D', 'Z');
- EXPECT_THAT(Members(kABC), "ABC");
-}
-
-TEST(Charmap, Range) {
- // Exhaustive testing takes too long, so test some of the boundaries that
- // are perhaps going to cause trouble.
- std::vector<size_t> poi = {0, 1, 2, 3, 4, 7, 8, 9, 15,
- 16, 17, 30, 31, 32, 33, 63, 64, 65,
- 127, 128, 129, 223, 224, 225, 254, 255};
- for (auto lo = poi.begin(); lo != poi.end(); ++lo) {
- SCOPED_TRACE(*lo);
- for (auto hi = lo; hi != poi.end(); ++hi) {
- SCOPED_TRACE(*hi);
- EXPECT_THAT(Members(absl::strings_internal::Charmap::Range(*lo, *hi)),
- ClosedRangeString(*lo, *hi));
- }
- }
-}
-
-bool AsBool(int x) { return static_cast<bool>(x); }
-
-TEST(CharmapCtype, Match) {
- for (int c = 0; c < 256; ++c) {
- SCOPED_TRACE(c);
- SCOPED_TRACE(static_cast<char>(c));
- EXPECT_EQ(AsBool(std::isupper(c)),
- absl::strings_internal::UpperCharmap().contains(c));
- EXPECT_EQ(AsBool(std::islower(c)),
- absl::strings_internal::LowerCharmap().contains(c));
- EXPECT_EQ(AsBool(std::isdigit(c)),
- absl::strings_internal::DigitCharmap().contains(c));
- EXPECT_EQ(AsBool(std::isalpha(c)),
- absl::strings_internal::AlphaCharmap().contains(c));
- EXPECT_EQ(AsBool(std::isalnum(c)),
- absl::strings_internal::AlnumCharmap().contains(c));
- EXPECT_EQ(AsBool(std::isxdigit(c)),
- absl::strings_internal::XDigitCharmap().contains(c));
- EXPECT_EQ(AsBool(std::isprint(c)),
- absl::strings_internal::PrintCharmap().contains(c));
- EXPECT_EQ(AsBool(std::isspace(c)),
- absl::strings_internal::SpaceCharmap().contains(c));
- EXPECT_EQ(AsBool(std::iscntrl(c)),
- absl::strings_internal::CntrlCharmap().contains(c));
- EXPECT_EQ(AsBool(std::isblank(c)),
- absl::strings_internal::BlankCharmap().contains(c));
- EXPECT_EQ(AsBool(std::isgraph(c)),
- absl::strings_internal::GraphCharmap().contains(c));
- EXPECT_EQ(AsBool(std::ispunct(c)),
- absl::strings_internal::PunctCharmap().contains(c));
- }
-}
-
-} // namespace