diff options
Diffstat (limited to 'absl/debugging/internal/decode_rust_punycode_test.cc')
-rw-r--r-- | absl/debugging/internal/decode_rust_punycode_test.cc | 606 |
1 files changed, 606 insertions, 0 deletions
diff --git a/absl/debugging/internal/decode_rust_punycode_test.cc b/absl/debugging/internal/decode_rust_punycode_test.cc new file mode 100644 index 00000000..78d1c332 --- /dev/null +++ b/absl/debugging/internal/decode_rust_punycode_test.cc @@ -0,0 +1,606 @@ +// Copyright 2024 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/debugging/internal/decode_rust_punycode.h" + +#include <cstddef> +#include <cstring> +#include <string> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { +namespace { + +using ::testing::AllOf; +using ::testing::Eq; +using ::testing::IsNull; +using ::testing::Pointee; +using ::testing::ResultOf; +using ::testing::StrEq; + +class DecodeRustPunycodeTest : public ::testing::Test { + protected: + void FillBufferWithNonzeroBytes() { + // The choice of nonzero value to fill with is arbitrary. The point is just + // to fail tests if DecodeRustPunycode forgets to write the final NUL + // character. + std::memset(buffer_storage_, 0xab, sizeof(buffer_storage_)); + } + + DecodeRustPunycodeOptions WithAmpleSpace() { + FillBufferWithNonzeroBytes(); + + DecodeRustPunycodeOptions options; + options.punycode_begin = punycode_.data(); + options.punycode_end = punycode_.data() + punycode_.size(); + options.out_begin = buffer_storage_; + options.out_end = buffer_storage_ + sizeof(buffer_storage_); + return options; + } + + DecodeRustPunycodeOptions WithJustEnoughSpace() { + FillBufferWithNonzeroBytes(); + + const size_t begin_offset = sizeof(buffer_storage_) - plaintext_.size() - 1; + DecodeRustPunycodeOptions options; + options.punycode_begin = punycode_.data(); + options.punycode_end = punycode_.data() + punycode_.size(); + options.out_begin = buffer_storage_ + begin_offset; + options.out_end = buffer_storage_ + sizeof(buffer_storage_); + return options; + } + + DecodeRustPunycodeOptions WithOneByteTooFew() { + FillBufferWithNonzeroBytes(); + + const size_t begin_offset = sizeof(buffer_storage_) - plaintext_.size(); + DecodeRustPunycodeOptions options; + options.punycode_begin = punycode_.data(); + options.punycode_end = punycode_.data() + punycode_.size(); + options.out_begin = buffer_storage_ + begin_offset; + options.out_end = buffer_storage_ + sizeof(buffer_storage_); + return options; + } + + // Matches a correct return value of DecodeRustPunycode when `golden` is the + // expected plaintext output. + auto PointsToTheNulAfter(const std::string& golden) { + const size_t golden_size = golden.size(); + return AllOf( + Pointee(Eq('\0')), + ResultOf("preceding string body", + [golden_size](const char* p) { return p - golden_size; }, + StrEq(golden))); + } + + std::string punycode_; + std::string plaintext_; + char buffer_storage_[1024]; +}; + +TEST_F(DecodeRustPunycodeTest, MapsEmptyToEmpty) { + punycode_ = ""; + plaintext_ = ""; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, + StripsTheTrailingDelimiterFromAPureRunOfBasicChars) { + punycode_ = "foo_"; + plaintext_ = "foo"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, TreatsTheLastUnderscoreAsTheDelimiter) { + punycode_ = "foo_bar_"; + plaintext_ = "foo_bar"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsALeadingUnderscoreIfNotTheDelimiter) { + punycode_ = "_foo_"; + plaintext_ = "_foo"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, RejectsALeadingUnderscoreDelimiter) { + punycode_ = "_foo"; + + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, RejectsEmbeddedNul) { + punycode_ = std::string("foo\0bar_", 8); + + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, RejectsAsciiCharsOtherThanIdentifierChars) { + punycode_ = "foo\007_"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); + + punycode_ = "foo-_"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); + + punycode_ = "foo;_"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); + + punycode_ = "foo\177_"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, RejectsRawNonAsciiChars) { + punycode_ = "\x80"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); + + punycode_ = "\x80_"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); + + punycode_ = "\xff"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); + + punycode_ = "\xff_"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, RecognizesU0080) { + // a encodes 0, so the output is the smallest non-ASCII code point standing + // alone. (U+0080 PAD is not an identifier character, but DecodeRustPunycode + // does not check whether non-ASCII characters could belong to an identifier.) + punycode_ = "a"; + plaintext_ = "\xc2\x80"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, OneByteDeltaSequencesMustBeA) { + // Because bias = 72 for the first code point, any digit but a/A is nonfinal + // in one of the first two bytes of a delta sequence. + punycode_ = "b"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); + + punycode_ = "z"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); + + punycode_ = "0"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); + + punycode_ = "9"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsDeltaSequenceBA) { + punycode_ = "ba"; + plaintext_ = "\xc2\x81"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsOtherDeltaSequencesWithSecondByteA) { + punycode_ = "ca"; + plaintext_ = "\xc2\x82"; + EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + + punycode_ = "za"; + plaintext_ = "\xc2\x99"; + EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + + punycode_ = "0a"; + plaintext_ = "\xc2\x9a"; + EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + + punycode_ = "1a"; + plaintext_ = "\xc2\x9b"; + EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + + punycode_ = "9a"; + plaintext_ = "£"; // Pound sign, U+00A3 + EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); +} + +TEST_F(DecodeRustPunycodeTest, RejectsDeltaWhereTheSecondAndLastDigitIsNotA) { + punycode_ = "bb"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); + + punycode_ = "zz"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); + + punycode_ = "00"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); + + punycode_ = "99"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsDeltasWithSecondByteBFollowedByA) { + punycode_ = "bba"; + plaintext_ = "¤"; // U+00A4 + EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + + punycode_ = "cba"; + plaintext_ = "¥"; // U+00A5 + EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + + punycode_ = "zba"; + plaintext_ = "¼"; // U+00BC + EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + + punycode_ = "0ba"; + plaintext_ = "½"; // U+00BD + EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + + punycode_ = "1ba"; + plaintext_ = "¾"; // U+00BE + EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + + punycode_ = "9ba"; + plaintext_ = "Æ"; // U+00C6 + EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); +} + +// Tests beyond this point use characters allowed in identifiers, so you can +// prepend _RNvC1cu<decimal length><underscore if [0-9_] follows> to a test +// input and run it through another Rust demangler to verify that the +// corresponding golden output is correct. + +TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAlone) { + punycode_ = "0ca"; + plaintext_ = "à"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharBeforeBasicChars) { + punycode_ = "_la_mode_yya"; + plaintext_ = "à_la_mode"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAmidBasicChars) { + punycode_ = "verre__vin_m4a"; + plaintext_ = "verre_à_vin"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAfterBasicChars) { + punycode_ = "belt_3na"; + plaintext_ = "beltà"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedTwoByteChar) { + punycode_ = "0caaaa"; + plaintext_ = "àààà"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsNearbyTwoByteCharsInOrder) { + punycode_ = "3camsuz"; + plaintext_ = "ãéïôù"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsNearbyTwoByteCharsOutOfOrder) { + punycode_ = "3caltsx"; + plaintext_ = "ùéôãï"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsThreeByteCharAlone) { + punycode_ = "fiq"; + plaintext_ = "中"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedThreeByteChar) { + punycode_ = "fiqaaaa"; + plaintext_ = "中中中中中"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsThreeByteCharsInOrder) { + punycode_ = "fiq228c"; + plaintext_ = "中文"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsNearbyThreeByteCharsOutOfOrder) { + punycode_ = "fiq128c"; + plaintext_ = "文中"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAlone) { + punycode_ = "uy7h"; + plaintext_ = "🂻"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharBeforeBasicChars) { + punycode_ = "jack__uh63d"; + plaintext_ = "jack_🂻"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAmidBasicChars) { + punycode_ = "jack__of_hearts_ki37n"; + plaintext_ = "jack_🂻_of_hearts"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAfterBasicChars) { + punycode_ = "_of_hearts_kz45i"; + plaintext_ = "🂻_of_hearts"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedFourByteChar) { + punycode_ = "uy7haaaa"; + plaintext_ = "🂻🂻🂻🂻🂻"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsNearbyFourByteCharsInOrder) { + punycode_ = "8x7hcjmf"; + plaintext_ = "🂦🂧🂪🂭🂮"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsNearbyFourByteCharsOutOfOrder) { + punycode_ = "8x7hcild"; + plaintext_ = "🂮🂦🂭🂪🂧"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsAMixtureOfByteLengths) { + punycode_ = "3caltsx2079ivf8aiuy7cja3a6ak"; + plaintext_ = "ùéôãï中文🂮🂦🂭🂪🂧"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, RejectsOverlargeDeltas) { + punycode_ = "123456789a"; + + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); +} + +// Finally, we test on a few prose and poetry snippets as a defense in depth. +// If our artificial short test inputs did not exercise a bug that is tickled by +// patterns typical of real human writing, maybe real human writing will catch +// that. +// +// These test inputs are extracted from texts old enough to be out of copyright +// that probe a variety of ranges of code-point space. All are longer than 32 +// code points, so they exercise the carrying of seminibbles from one uint64_t +// to the next higher one in BoundedUtf8LengthSequence. + +// The first three lines of the Old English epic _Beowulf_, mostly ASCII with a +// few archaic two-byte letters interspersed. +TEST_F(DecodeRustPunycodeTest, Beowulf) { + punycode_ = "hwt_we_gardena_in_geardagum_" + "eodcyninga_rym_gefrunon_" + "hu_a_elingas_ellen_fremedon_hxg9c70do9alau"; + plaintext_ = "hwæt_we_gardena_in_geardagum_" + "þeodcyninga_þrym_gefrunon_" + "hu_ða_æþelingas_ellen_fremedon"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +// The whole of 過故人莊 by the 8th-century Chinese poet 孟浩然 +// (Meng Haoran), exercising three-byte-character processing. +TEST_F(DecodeRustPunycodeTest, MengHaoran) { + punycode_ = "gmq4ss0cfvao1e2wg8mcw8b0wkl9a7tt90a8riuvbk7t8kbv9a66ogofvzlf6" + "3d01ybn1u28dyqi5q2cxyyxnk5d2gx1ks9ddvfm17bk6gbsd6wftrav60u4ta"; + plaintext_ = "故人具雞黍" "邀我至田家" + "綠樹村邊合" "青山郭外斜" + "開軒面場圃" "把酒話桑麻" + "待到重陽日" "還來就菊花"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +// A poem of the 8th-century Japanese poet 山上憶良 (Yamanoue no Okura). +// Japanese mixes two-byte and three-byte characters: a good workout for codecs. +TEST_F(DecodeRustPunycodeTest, YamanoueNoOkura) { + punycode_ = "48jdaa3a6ccpepjrsmlb0q4bwcdtid8fg6c0cai9822utqeruk3om0u4f2wbp0" + "em23do0op23cc2ff70mb6tae8aq759gja"; + plaintext_ = "瓜食めば" + "子ども思ほゆ" + "栗食めば" + "まして偲はゆ" + "何処より" + "来りしものそ" + "眼交に" + "もとな懸りて" + "安眠し寝さぬ"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +// The first two lines of the Phoenician-language inscription on the sarcophagus +// of Eshmunazar II of Sidon, 6th century BCE. Phoenician and many other +// archaic scripts are allocated in the Supplemental Multilingual Plane (U+10000 +// through U+1FFFF) and thus exercise four-byte-character processing. +TEST_F(DecodeRustPunycodeTest, EshmunazarSarcophagus) { + punycode_ = "wj9caaabaabbaaohcacxvhdc7bgxbccbdcjeacddcedcdlddbdbddcdbdcknfcee" + "ifel8del2a7inq9fhcpxikms7a4a9ac9ataaa0g"; + plaintext_ = "𐤁𐤉𐤓𐤇𐤁𐤋𐤁𐤔𐤍𐤕𐤏𐤎𐤓" + "𐤅𐤀𐤓𐤁𐤏𐤗𐤖𐤖𐤖𐤖𐤋𐤌𐤋𐤊𐤉𐤌𐤋𐤊" + "𐤀𐤔𐤌𐤍𐤏𐤆𐤓𐤌𐤋𐤊𐤑𐤃𐤍𐤌" + "𐤁𐤍𐤌𐤋𐤊𐤕𐤁𐤍𐤕𐤌𐤋𐤊𐤑𐤃𐤍𐤌" + "𐤃𐤁𐤓𐤌𐤋𐤊𐤀𐤔𐤌𐤍𐤏𐤆𐤓𐤌𐤋𐤊" + "𐤑𐤃𐤍𐤌𐤋𐤀𐤌𐤓𐤍𐤂𐤆𐤋𐤕"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +} // namespace +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl |