SkUTF

Create new header and namespace, `SkUTF` where we are putting all of our robust, well documented UTF-8, UTF-16, and UTF-32 functions: `SkUTF::{Count,Next,To}UTF{8,16,32}()`. SkUTF.h and SkUTF.cpp do not depend on the rest of Skia and are suitable for re-use in other modules. Some of the old UTF-{8,16} functions still live in SkUtils.h; their use will be phased out in future CLs. Also added more unit testing and cleaned up old tests. Removed functions that were unused outside of tests or used only once. Change-Id: Iaa59b8705abccf9c4ba082f855da368a0bad8380 Reviewed-on: https://skia-review.googlesource.com/143306 Reviewed-by: Ben Wagner <bungeman@google.com> Commit-Queue: Hal Canary <halcanary@google.com>
author: Hal Canary <halcanary@google.com> 2018-07-25 16:52:48 -0400
committer: Skia Commit-Bot <skia-commit-bot@chromium.org> 2018-07-31 20:11:19 +0000
commit: f107a2fd014cd39c489060f2cd1b99cd49c7d0be (patch)
tree: 5c324821344901869203bbe055be8d3f69f696cb /modules
parent: 1935aa3d27cd4ed4aef2dc04360f247a541d4b00 (diff)
1 files changed, 18 insertions, 9 deletions
diff --git a/modules/skshaper/src/SkShaper_harfbuzz.cpp b/modules/skshaper/src/SkShaper_harfbuzz.cpp
index 2077dedadc..5c49de04b4 100644
--- a/modules/skshaper/src/SkShaper_harfbuzz.cpp
+++ b/modules/skshaper/src/SkShaper_harfbuzz.cpp
@@ -83,6 +83,15 @@ HBFont create_hb_font(SkTypeface* tf) {
     return font;
 }
 
+/** this version replaces invalid utf-8 sequences with code point U+FFFD. */
+static inline SkUnichar utf8_next(const char** ptr, const char* end) {
+    SkUnichar val = SkUTF::NextUTF8(ptr, end);
+    if (val < 0) {
+        return 0xFFFD;  // REPLACEMENT CHARACTER
+    }
+    return val;
+}
+
 class RunIterator {
 public:
     virtual ~RunIterator() {}
@@ -138,16 +147,16 @@ public:
         SkASSERT(fUTF16LogicalPosition < ubidi_getLength(fBidi.get()));
         int32_t endPosition = ubidi_getLength(fBidi.get());
         fLevel = ubidi_getLevelAt(fBidi.get(), fUTF16LogicalPosition);
-        SkUnichar u = SkUTF8_NextUnichar(&fEndOfCurrentRun, fEndOfAllRuns);
-        fUTF16LogicalPosition += SkUTF16_FromUnichar(u);
+        SkUnichar u = utf8_next(&fEndOfCurrentRun, fEndOfAllRuns);
+        fUTF16LogicalPosition += SkUTF::ToUTF16(u);
         UBiDiLevel level;
         while (fUTF16LogicalPosition < endPosition) {
             level = ubidi_getLevelAt(fBidi.get(), fUTF16LogicalPosition);
             if (level != fLevel) {
                 break;
             }
-            u = SkUTF8_NextUnichar(&fEndOfCurrentRun, fEndOfAllRuns);
-            fUTF16LogicalPosition += SkUTF16_FromUnichar(u);
+            u = utf8_next(&fEndOfCurrentRun, fEndOfAllRuns);
+            fUTF16LogicalPosition += SkUTF::ToUTF16(u);
         }
     }
     const char* endOfCurrentRun() const override {
@@ -184,11 +193,11 @@ public:
     {}
     void consume() override {
         SkASSERT(fCurrent < fEnd);
-        SkUnichar u = SkUTF8_NextUnichar(&fCurrent, fEnd);
+        SkUnichar u = utf8_next(&fCurrent, fEnd);
         fCurrentScript = hb_unicode_script(fHBUnicode, u);
         while (fCurrent < fEnd) {
             const char* prev = fCurrent;
-            u = SkUTF8_NextUnichar(&fCurrent, fEnd);
+            u = utf8_next(&fCurrent, fEnd);
             const hb_script_t script = hb_unicode_script(fHBUnicode, u);
             if (script != fCurrentScript) {
                 if (fCurrentScript == HB_SCRIPT_INHERITED || fCurrentScript == HB_SCRIPT_COMMON) {
@@ -243,7 +252,7 @@ public:
     {}
     void consume() override {
         SkASSERT(fCurrent < fEnd);
-        SkUnichar u = SkUTF8_NextUnichar(&fCurrent, fEnd);
+        SkUnichar u = utf8_next(&fCurrent, fEnd);
         // If the starting typeface can handle this character, use it.
         if (fTypeface->charsToGlyphs(&u, SkTypeface::kUTF32_Encoding, nullptr, 1)) {
             fFallbackTypeface.reset();
@@ -265,7 +274,7 @@ public:
 
         while (fCurrent < fEnd) {
             const char* prev = fCurrent;
-            u = SkUTF8_NextUnichar(&fCurrent, fEnd);
+            u = utf8_next(&fCurrent, fEnd);
 
             // If using a fallback and the initial typeface has this character, stop fallback.
             if (fFallbackTypeface &&
@@ -554,7 +563,7 @@ SkPoint SkShaper::shape(SkTextBlobBuilder* builder,
         const char* utf8Current = utf8Start;
         while (utf8Current < utf8End) {
             unsigned int cluster = utf8Current - utf8Start;
-            hb_codepoint_t u = SkUTF8_NextUnichar(&utf8Current, utf8End);
+            hb_codepoint_t u = utf8_next(&utf8Current, utf8End);
             hb_buffer_add(buffer, u, cluster);
         }
author	Hal Canary <halcanary@google.com>	2018-07-25 16:52:48 -0400
committer	Skia Commit-Bot <skia-commit-bot@chromium.org>	2018-07-31 20:11:19 +0000
commit	f107a2fd014cd39c489060f2cd1b99cd49c7d0be (patch)
tree	5c324821344901869203bbe055be8d3f69f696cb /modules
parent	1935aa3d27cd4ed4aef2dc04360f247a541d4b00 (diff)