diff options
author | Hal Canary <halcanary@google.com> | 2017-01-11 15:53:25 -0500 |
---|---|---|
committer | Skia Commit-Bot <skia-commit-bot@chromium.org> | 2017-01-11 21:27:29 +0000 |
commit | d1c8e56423f4d1a879f3a7bcd24e2725d9b690a7 (patch) | |
tree | 7a054dc76e1818e3de7cfa553ff61ca511b13285 /tests | |
parent | f046e15347373c20e42b1a25ecd87cbdb84de146 (diff) |
SkUTF8_CountUnichars(s,l) and SkUTF8_NextUnichar(s,l) now safe.
Theory: We will accept blobs of data as utf-8 text without validation,
but when it comes time to process it: count code poits or convert to
code points, be careful to check for errors.
TODO: SkTypeface::charsToGlyphs() needs to take a length.
Change-Id: Id8110ab43dbffce96faffdda1e0bdaa39cad40e4
Reviewed-on: https://skia-review.googlesource.com/6849
Commit-Queue: Hal Canary <halcanary@google.com>
Reviewed-by: Herb Derby <herb@google.com>
Diffstat (limited to 'tests')
-rw-r--r-- | tests/UtilsTest.cpp | 61 |
1 files changed, 61 insertions, 0 deletions
diff --git a/tests/UtilsTest.cpp b/tests/UtilsTest.cpp index b6f90e03b5..ed861c69a0 100644 --- a/tests/UtilsTest.cpp +++ b/tests/UtilsTest.cpp @@ -219,3 +219,64 @@ DEF_TEST(Utils, reporter) { test_autounref(reporter); test_autostarray(reporter); } + +#define ASCII_BYTE "X" +#define CONTINUATION_BYTE "\x80" +#define LEADING_TWO_BYTE "\xC4" +#define LEADING_THREE_BYTE "\xE0" +#define LEADING_FOUR_BYTE "\xF0" +#define INVALID_BYTE "\xFC" +static bool valid_utf8(const char* p, size_t l) { + return SkUTF8_CountUnicharsWithError(p, l) >= 0; +} +DEF_TEST(Utils_UTF8_ValidLength, r) { + const char* goodTestcases[] = { + "", + ASCII_BYTE, + ASCII_BYTE ASCII_BYTE, + LEADING_TWO_BYTE CONTINUATION_BYTE, + ASCII_BYTE LEADING_TWO_BYTE CONTINUATION_BYTE, + ASCII_BYTE ASCII_BYTE LEADING_TWO_BYTE CONTINUATION_BYTE, + LEADING_THREE_BYTE CONTINUATION_BYTE CONTINUATION_BYTE, + ASCII_BYTE LEADING_THREE_BYTE CONTINUATION_BYTE CONTINUATION_BYTE, + ASCII_BYTE ASCII_BYTE LEADING_THREE_BYTE CONTINUATION_BYTE CONTINUATION_BYTE, + LEADING_FOUR_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE, + ASCII_BYTE LEADING_FOUR_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE, + ASCII_BYTE ASCII_BYTE LEADING_FOUR_BYTE CONTINUATION_BYTE CONTINUATION_BYTE + CONTINUATION_BYTE, + }; + for (const char* testcase : goodTestcases) { + REPORTER_ASSERT(r, valid_utf8(testcase, strlen(testcase))); + } + const char* badTestcases[] = { + INVALID_BYTE, + INVALID_BYTE CONTINUATION_BYTE, + INVALID_BYTE CONTINUATION_BYTE CONTINUATION_BYTE, + INVALID_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE, + LEADING_TWO_BYTE, + CONTINUATION_BYTE, + CONTINUATION_BYTE CONTINUATION_BYTE, + LEADING_THREE_BYTE CONTINUATION_BYTE, + CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE, + LEADING_FOUR_BYTE CONTINUATION_BYTE, + CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE, + + ASCII_BYTE INVALID_BYTE, + ASCII_BYTE INVALID_BYTE CONTINUATION_BYTE, + ASCII_BYTE INVALID_BYTE CONTINUATION_BYTE CONTINUATION_BYTE, + ASCII_BYTE INVALID_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE, + ASCII_BYTE LEADING_TWO_BYTE, + ASCII_BYTE CONTINUATION_BYTE, + ASCII_BYTE CONTINUATION_BYTE CONTINUATION_BYTE, + ASCII_BYTE LEADING_THREE_BYTE CONTINUATION_BYTE, + ASCII_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE, + ASCII_BYTE LEADING_FOUR_BYTE CONTINUATION_BYTE, + ASCII_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE, + + // LEADING_FOUR_BYTE LEADING_TWO_BYTE CONTINUATION_BYTE, + }; + for (const char* testcase : badTestcases) { + REPORTER_ASSERT(r, !valid_utf8(testcase, strlen(testcase))); + } + +} |