diff options
author | Benjamin Barenblat <bbaren@google.com> | 2024-09-03 11:49:29 -0400 |
---|---|---|
committer | Benjamin Barenblat <bbaren@google.com> | 2024-09-03 11:49:29 -0400 |
commit | c1afa8b8238c25591ca80d068477aa7d4ce05fc8 (patch) | |
tree | 284a9f8b319de5783ff83ad004a9e390cb60fd0d /absl/debugging/internal/utf8_for_code_point_test.cc | |
parent | 23778b53f420f54eebc195dd8430e79bda165e5b (diff) | |
parent | 4447c7562e3bc702ade25105912dce503f0c4010 (diff) |
Merge new upstream LTS 20240722.0
Diffstat (limited to 'absl/debugging/internal/utf8_for_code_point_test.cc')
-rw-r--r-- | absl/debugging/internal/utf8_for_code_point_test.cc | 175 |
1 files changed, 175 insertions, 0 deletions
diff --git a/absl/debugging/internal/utf8_for_code_point_test.cc b/absl/debugging/internal/utf8_for_code_point_test.cc new file mode 100644 index 00000000..dd0591ae --- /dev/null +++ b/absl/debugging/internal/utf8_for_code_point_test.cc @@ -0,0 +1,175 @@ +// Copyright 2024 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/debugging/internal/utf8_for_code_point.h" + +#include <cstdint> + +#include "gtest/gtest.h" +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { +namespace { + +TEST(Utf8ForCodePointTest, RecognizesTheSmallestCodePoint) { + Utf8ForCodePoint utf8(uint64_t{0}); + ASSERT_EQ(utf8.length, 1); + EXPECT_EQ(utf8.bytes[0], '\0'); +} + +TEST(Utf8ForCodePointTest, RecognizesAsciiSmallA) { + Utf8ForCodePoint utf8(uint64_t{'a'}); + ASSERT_EQ(utf8.length, 1); + EXPECT_EQ(utf8.bytes[0], 'a'); +} + +TEST(Utf8ForCodePointTest, RecognizesTheLargestOneByteCodePoint) { + Utf8ForCodePoint utf8(uint64_t{0x7f}); + ASSERT_EQ(utf8.length, 1); + EXPECT_EQ(utf8.bytes[0], '\x7f'); +} + +TEST(Utf8ForCodePointTest, RecognizesTheSmallestTwoByteCodePoint) { + Utf8ForCodePoint utf8(uint64_t{0x80}); + ASSERT_EQ(utf8.length, 2); + EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xc2)); + EXPECT_EQ(utf8.bytes[1], static_cast<char>(0x80)); +} + +TEST(Utf8ForCodePointTest, RecognizesSmallNWithTilde) { + Utf8ForCodePoint utf8(uint64_t{0xf1}); + ASSERT_EQ(utf8.length, 2); + const char* want = "ñ"; + EXPECT_EQ(utf8.bytes[0], want[0]); + EXPECT_EQ(utf8.bytes[1], want[1]); +} + +TEST(Utf8ForCodePointTest, RecognizesCapitalPi) { + Utf8ForCodePoint utf8(uint64_t{0x3a0}); + ASSERT_EQ(utf8.length, 2); + const char* want = "Π"; + EXPECT_EQ(utf8.bytes[0], want[0]); + EXPECT_EQ(utf8.bytes[1], want[1]); +} + +TEST(Utf8ForCodePointTest, RecognizesTheLargestTwoByteCodePoint) { + Utf8ForCodePoint utf8(uint64_t{0x7ff}); + ASSERT_EQ(utf8.length, 2); + EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xdf)); + EXPECT_EQ(utf8.bytes[1], static_cast<char>(0xbf)); +} + +TEST(Utf8ForCodePointTest, RecognizesTheSmallestThreeByteCodePoint) { + Utf8ForCodePoint utf8(uint64_t{0x800}); + ASSERT_EQ(utf8.length, 3); + EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xe0)); + EXPECT_EQ(utf8.bytes[1], static_cast<char>(0xa0)); + EXPECT_EQ(utf8.bytes[2], static_cast<char>(0x80)); +} + +TEST(Utf8ForCodePointTest, RecognizesTheChineseCharacterZhong1AsInZhong1Wen2) { + Utf8ForCodePoint utf8(uint64_t{0x4e2d}); + ASSERT_EQ(utf8.length, 3); + const char* want = "中"; + EXPECT_EQ(utf8.bytes[0], want[0]); + EXPECT_EQ(utf8.bytes[1], want[1]); + EXPECT_EQ(utf8.bytes[2], want[2]); +} + +TEST(Utf8ForCodePointTest, RecognizesOneBeforeTheSmallestSurrogate) { + Utf8ForCodePoint utf8(uint64_t{0xd7ff}); + ASSERT_EQ(utf8.length, 3); + EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xed)); + EXPECT_EQ(utf8.bytes[1], static_cast<char>(0x9f)); + EXPECT_EQ(utf8.bytes[2], static_cast<char>(0xbf)); +} + +TEST(Utf8ForCodePointTest, RejectsTheSmallestSurrogate) { + Utf8ForCodePoint utf8(uint64_t{0xd800}); + EXPECT_EQ(utf8.length, 0); +} + +TEST(Utf8ForCodePointTest, RejectsTheLargestSurrogate) { + Utf8ForCodePoint utf8(uint64_t{0xdfff}); + EXPECT_EQ(utf8.length, 0); +} + +TEST(Utf8ForCodePointTest, RecognizesOnePastTheLargestSurrogate) { + Utf8ForCodePoint utf8(uint64_t{0xe000}); + ASSERT_EQ(utf8.length, 3); + EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xee)); + EXPECT_EQ(utf8.bytes[1], static_cast<char>(0x80)); + EXPECT_EQ(utf8.bytes[2], static_cast<char>(0x80)); +} + +TEST(Utf8ForCodePointTest, RecognizesTheLargestThreeByteCodePoint) { + Utf8ForCodePoint utf8(uint64_t{0xffff}); + ASSERT_EQ(utf8.length, 3); + EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xef)); + EXPECT_EQ(utf8.bytes[1], static_cast<char>(0xbf)); + EXPECT_EQ(utf8.bytes[2], static_cast<char>(0xbf)); +} + +TEST(Utf8ForCodePointTest, RecognizesTheSmallestFourByteCodePoint) { + Utf8ForCodePoint utf8(uint64_t{0x10000}); + ASSERT_EQ(utf8.length, 4); + EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xf0)); + EXPECT_EQ(utf8.bytes[1], static_cast<char>(0x90)); + EXPECT_EQ(utf8.bytes[2], static_cast<char>(0x80)); + EXPECT_EQ(utf8.bytes[3], static_cast<char>(0x80)); +} + +TEST(Utf8ForCodePointTest, RecognizesTheJackOfHearts) { + Utf8ForCodePoint utf8(uint64_t{0x1f0bb}); + ASSERT_EQ(utf8.length, 4); + const char* want = "🂻"; + EXPECT_EQ(utf8.bytes[0], want[0]); + EXPECT_EQ(utf8.bytes[1], want[1]); + EXPECT_EQ(utf8.bytes[2], want[2]); + EXPECT_EQ(utf8.bytes[3], want[3]); +} + +TEST(Utf8ForCodePointTest, RecognizesTheLargestFourByteCodePoint) { + Utf8ForCodePoint utf8(uint64_t{0x10ffff}); + ASSERT_EQ(utf8.length, 4); + EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xf4)); + EXPECT_EQ(utf8.bytes[1], static_cast<char>(0x8f)); + EXPECT_EQ(utf8.bytes[2], static_cast<char>(0xbf)); + EXPECT_EQ(utf8.bytes[3], static_cast<char>(0xbf)); +} + +TEST(Utf8ForCodePointTest, RejectsTheSmallestOverlargeCodePoint) { + Utf8ForCodePoint utf8(uint64_t{0x110000}); + EXPECT_EQ(utf8.length, 0); +} + +TEST(Utf8ForCodePointTest, RejectsAThroughlyOverlargeCodePoint) { + Utf8ForCodePoint utf8(uint64_t{0xffffffff00000000}); + EXPECT_EQ(utf8.length, 0); +} + +TEST(Utf8ForCodePointTest, OkReturnsTrueForAValidCodePoint) { + EXPECT_TRUE(Utf8ForCodePoint(uint64_t{0}).ok()); +} + +TEST(Utf8ForCodePointTest, OkReturnsFalseForAnInvalidCodePoint) { + EXPECT_FALSE(Utf8ForCodePoint(uint64_t{0xffffffff00000000}).ok()); +} + +} // namespace +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl |