diff options
33 files changed, 603 insertions, 775 deletions
diff --git a/fuzz/FuzzCanvas.cpp b/fuzz/FuzzCanvas.cpp index aac68481d4..80d3e816a1 100644 --- a/fuzz/FuzzCanvas.cpp +++ b/fuzz/FuzzCanvas.cpp @@ -1020,21 +1020,21 @@ static SkTDArray<uint8_t> make_fuzz_text(Fuzz* fuzz, const SkPaint& paint) { case SkPaint::kUTF8_TextEncoding: { size_t utf8len = 0; for (int j = 0; j < length; ++j) { - utf8len += SkUTF8_FromUnichar(buffer[j], nullptr); + utf8len += SkUTF::ToUTF8(buffer[j], nullptr); } char* ptr = (char*)array.append(utf8len); for (int j = 0; j < length; ++j) { - ptr += SkUTF8_FromUnichar(buffer[j], ptr); + ptr += SkUTF::ToUTF8(buffer[j], ptr); } } break; case SkPaint::kUTF16_TextEncoding: { size_t utf16len = 0; for (int j = 0; j < length; ++j) { - utf16len += SkUTF16_FromUnichar(buffer[j]); + utf16len += SkUTF::ToUTF16(buffer[j]); } uint16_t* ptr = (uint16_t*)array.append(utf16len * sizeof(uint16_t)); for (int j = 0; j < length; ++j) { - ptr += SkUTF16_FromUnichar(buffer[j], ptr); + ptr += SkUTF::ToUTF16(buffer[j], ptr); } } break; case SkPaint::kUTF32_TextEncoding: diff --git a/gm/atlastext.cpp b/gm/atlastext.cpp index 859b4c356f..e3fed2ee06 100644 --- a/gm/atlastext.cpp +++ b/gm/atlastext.cpp @@ -30,7 +30,7 @@ static SkScalar draw_string(SkAtlasTextTarget* target, const SkString& text, SkS return x; } auto font = SkAtlasTextFont::Make(typeface, size); - int cnt = SkUTF8_CountUnichars(text.c_str(), text.size()); + int cnt = SkUTF::CountUTF8(text.c_str(), text.size()); std::unique_ptr<SkGlyphID[]> glyphs(new SkGlyphID[cnt]); typeface->charsToGlyphs(text.c_str(), SkTypeface::Encoding::kUTF8_Encoding, glyphs.get(), cnt); diff --git a/gm/coloremoji_blendmodes.cpp b/gm/coloremoji_blendmodes.cpp index 872527404c..46ba67ec90 100644 --- a/gm/coloremoji_blendmodes.cpp +++ b/gm/coloremoji_blendmodes.cpp @@ -148,7 +148,8 @@ protected: textP.setBlendMode(gModes[i]); textP.setTextEncoding(SkPaint::kUTF32_TextEncoding); const char* text = sk_tool_utils::emoji_sample_text(); - SkUnichar unichar = SkUTF8_NextUnichar(&text, text + strlen(text)); + SkUnichar unichar = SkUTF::NextUTF8(&text, text + strlen(text)); + SkASSERT(unichar >= 0); canvas->drawText(&unichar, 4, x+ w/10.f, y + 7.f*h/8.f, textP); } #if 1 diff --git a/gn/utils.gni b/gn/utils.gni index 7d014b0a0c..68667beed4 100644 --- a/gn/utils.gni +++ b/gn/utils.gni @@ -62,6 +62,8 @@ skia_utils_sources = [ "$_src/utils/SkShadowUtils.cpp", "$_src/utils/SkThreadUtils_pthread.cpp", "$_src/utils/SkThreadUtils_win.cpp", + "$_src/utils/SkUTF.cpp", + "$_src/utils/SkUTF.h", "$_src/utils/SkWhitelistTypefaces.cpp", #mac diff --git a/modules/skshaper/src/SkShaper_harfbuzz.cpp b/modules/skshaper/src/SkShaper_harfbuzz.cpp index 2077dedadc..5c49de04b4 100644 --- a/modules/skshaper/src/SkShaper_harfbuzz.cpp +++ b/modules/skshaper/src/SkShaper_harfbuzz.cpp @@ -83,6 +83,15 @@ HBFont create_hb_font(SkTypeface* tf) { return font; } +/** this version replaces invalid utf-8 sequences with code point U+FFFD. */ +static inline SkUnichar utf8_next(const char** ptr, const char* end) { + SkUnichar val = SkUTF::NextUTF8(ptr, end); + if (val < 0) { + return 0xFFFD; // REPLACEMENT CHARACTER + } + return val; +} + class RunIterator { public: virtual ~RunIterator() {} @@ -138,16 +147,16 @@ public: SkASSERT(fUTF16LogicalPosition < ubidi_getLength(fBidi.get())); int32_t endPosition = ubidi_getLength(fBidi.get()); fLevel = ubidi_getLevelAt(fBidi.get(), fUTF16LogicalPosition); - SkUnichar u = SkUTF8_NextUnichar(&fEndOfCurrentRun, fEndOfAllRuns); - fUTF16LogicalPosition += SkUTF16_FromUnichar(u); + SkUnichar u = utf8_next(&fEndOfCurrentRun, fEndOfAllRuns); + fUTF16LogicalPosition += SkUTF::ToUTF16(u); UBiDiLevel level; while (fUTF16LogicalPosition < endPosition) { level = ubidi_getLevelAt(fBidi.get(), fUTF16LogicalPosition); if (level != fLevel) { break; } - u = SkUTF8_NextUnichar(&fEndOfCurrentRun, fEndOfAllRuns); - fUTF16LogicalPosition += SkUTF16_FromUnichar(u); + u = utf8_next(&fEndOfCurrentRun, fEndOfAllRuns); + fUTF16LogicalPosition += SkUTF::ToUTF16(u); } } const char* endOfCurrentRun() const override { @@ -184,11 +193,11 @@ public: {} void consume() override { SkASSERT(fCurrent < fEnd); - SkUnichar u = SkUTF8_NextUnichar(&fCurrent, fEnd); + SkUnichar u = utf8_next(&fCurrent, fEnd); fCurrentScript = hb_unicode_script(fHBUnicode, u); while (fCurrent < fEnd) { const char* prev = fCurrent; - u = SkUTF8_NextUnichar(&fCurrent, fEnd); + u = utf8_next(&fCurrent, fEnd); const hb_script_t script = hb_unicode_script(fHBUnicode, u); if (script != fCurrentScript) { if (fCurrentScript == HB_SCRIPT_INHERITED || fCurrentScript == HB_SCRIPT_COMMON) { @@ -243,7 +252,7 @@ public: {} void consume() override { SkASSERT(fCurrent < fEnd); - SkUnichar u = SkUTF8_NextUnichar(&fCurrent, fEnd); + SkUnichar u = utf8_next(&fCurrent, fEnd); // If the starting typeface can handle this character, use it. if (fTypeface->charsToGlyphs(&u, SkTypeface::kUTF32_Encoding, nullptr, 1)) { fFallbackTypeface.reset(); @@ -265,7 +274,7 @@ public: while (fCurrent < fEnd) { const char* prev = fCurrent; - u = SkUTF8_NextUnichar(&fCurrent, fEnd); + u = utf8_next(&fCurrent, fEnd); // If using a fallback and the initial typeface has this character, stop fallback. if (fFallbackTypeface && @@ -554,7 +563,7 @@ SkPoint SkShaper::shape(SkTextBlobBuilder* builder, const char* utf8Current = utf8Start; while (utf8Current < utf8End) { unsigned int cluster = utf8Current - utf8Start; - hb_codepoint_t u = SkUTF8_NextUnichar(&utf8Current, utf8End); + hb_codepoint_t u = utf8_next(&utf8Current, utf8End); hb_buffer_add(buffer, u, cluster); } diff --git a/samplecode/SampleRectanizer.cpp b/samplecode/SampleRectanizer.cpp index 752d5b7136..d402416ca5 100644 --- a/samplecode/SampleRectanizer.cpp +++ b/samplecode/SampleRectanizer.cpp @@ -59,8 +59,8 @@ protected: } SkUnichar uni; if (SampleCode::CharQ(*evt, &uni)) { - char utf8[kMaxBytesInUTF8Sequence]; - size_t size = SkUTF8_FromUnichar(uni, utf8); + char utf8[SkUTF::kMaxBytesInUTF8Sequence]; + size_t size = SkUTF::ToUTF8(uni, utf8); // Only consider events for single char keys if (1 == size) { switch (utf8[0]) { diff --git a/samplecode/SampleUnpremul.cpp b/samplecode/SampleUnpremul.cpp index 108d70914f..4f41418158 100644 --- a/samplecode/SampleUnpremul.cpp +++ b/samplecode/SampleUnpremul.cpp @@ -53,8 +53,8 @@ protected: } SkUnichar uni; if (SampleCode::CharQ(*evt, &uni)) { - char utf8[kMaxBytesInUTF8Sequence]; - size_t size = SkUTF8_FromUnichar(uni, utf8); + char utf8[SkUTF::kMaxBytesInUTF8Sequence]; + size_t size = SkUTF::ToUTF8(uni, utf8); // Only consider events for single char keys if (1 == size) { switch (utf8[0]) { diff --git a/samplecode/SampleXfermodesBlur.cpp b/samplecode/SampleXfermodesBlur.cpp index 37b859288b..0ef144ef18 100644 --- a/samplecode/SampleXfermodesBlur.cpp +++ b/samplecode/SampleXfermodesBlur.cpp @@ -90,7 +90,7 @@ protected: paint.setTextSize(50); paint.setTypeface(SkTypeface::MakeFromName("Arial Unicode MS", SkFontStyle())); char buffer[10]; - size_t len = SkUTF8_FromUnichar(0x8500, buffer); + size_t len = SkUTF::ToUTF8(0x8500, buffer); canvas->drawText(buffer, len, 40, 40, paint); return; } diff --git a/src/core/SkFindAndPlaceGlyph.h b/src/core/SkFindAndPlaceGlyph.h index f602e8d931..74fa516910 100644 --- a/src/core/SkFindAndPlaceGlyph.h +++ b/src/core/SkFindAndPlaceGlyph.h @@ -147,7 +147,7 @@ private: private: SkUnichar nextUnichar(const char** text, const char* stop) override { - return SkUTF8_NextUnichar(text, stop); + return SkUTF::NextUTF8(text, stop); } }; @@ -157,7 +157,7 @@ private: private: SkUnichar nextUnichar(const char** text, const char* stop) override { - return SkUTF16_NextUnichar((const uint16_t**)text, (const uint16_t*)stop); + return SkUTF::NextUTF16((const uint16_t**)text, (const uint16_t*)stop); } }; @@ -167,10 +167,7 @@ private: private: SkUnichar nextUnichar(const char** text, const char* stop) override { - const int32_t* ptr = *(const int32_t**)text; - SkUnichar uni = *ptr++; - *text = (const char*)ptr; - return uni; + return SkUTF::NextUTF32((const int32_t**)text, (const int32_t*)stop); } }; @@ -181,19 +178,24 @@ private: SkASSERT(cache != nullptr); } - const SkGlyph& lookupGlyph(const char** text, const char*) override { - return fCache->getGlyphIDMetrics(nextGlyphId(text)); + const SkGlyph& lookupGlyph(const char** text, const char* stop) override { + return fCache->getGlyphIDMetrics(nextGlyphId(text, stop)); } - const SkGlyph& lookupGlyphXY(const char** text, const char*, + const SkGlyph& lookupGlyphXY(const char** text, const char* stop, SkFixed x, SkFixed y) override { - return fCache->getGlyphIDMetrics(nextGlyphId(text), x, y); + return fCache->getGlyphIDMetrics(nextGlyphId(text, stop), x, y); } private: - uint16_t nextGlyphId(const char** text) { + uint16_t nextGlyphId(const char** text, const char* stop) { SkASSERT(text != nullptr); const uint16_t* ptr = *(const uint16_t**)text; + SkASSERT(ptr); + if (ptr + 1 > (const uint16_t*)stop) { + *text = stop; + return 0; + } uint16_t glyphID = *ptr; ptr += 1; *text = (const char*)ptr; diff --git a/src/core/SkFont.cpp b/src/core/SkFont.cpp index bde8c4c80b..081469f900 100644 --- a/src/core/SkFont.cpp +++ b/src/core/SkFont.cpp @@ -68,10 +68,10 @@ int SkFont::textToGlyphs(const void* text, size_t byteLength, SkTextEncoding enc switch (encoding) { case kUTF8_SkTextEncoding: - count = SkUTF8_CountUnichars((const char*)text, byteLength); + count = SkUTF::CountUTF8((const char*)text, byteLength); break; case kUTF16_SkTextEncoding: - count = SkUTF16_CountUnichars((const uint16_t*)text, byteLength); + count = SkUTF::CountUTF16((const uint16_t*)text, byteLength); break; case kUTF32_SkTextEncoding: count = SkToInt(byteLength >> 2); diff --git a/src/core/SkOverdrawCanvas.cpp b/src/core/SkOverdrawCanvas.cpp index 249723e494..0e20878793 100644 --- a/src/core/SkOverdrawCanvas.cpp +++ b/src/core/SkOverdrawCanvas.cpp @@ -98,13 +98,15 @@ void SkOverdrawCanvas::onDrawTextOnPath(const void* text, size_t byteLength, con typedef int (*CountTextProc)(const char* text, const char* stop); static int count_utf16(const char* text, const char* stop) { const uint16_t* prev = (const uint16_t*)text; - (void)SkUTF16_NextUnichar(&prev, (const uint16_t*)stop); + (void)SkUTF::NextUTF16(&prev, (const uint16_t*)stop); return SkToInt((const char*)prev - text); } static int return_4(const char* text, const char* stop) { return 4; } static int return_2(const char* text, const char* stop) { return 2; } static int count_utf8(const char* text, const char* stop) { - return SkUTF8_LeadByteToCount(*(const uint8_t*)text); + const char* ptr = text; + (void)SkUTF::NextUTF8(&ptr, stop); + return SkToInt(ptr - text); } void SkOverdrawCanvas::onDrawTextRSXform(const void* text, size_t byteLength, diff --git a/src/core/SkPaint.cpp b/src/core/SkPaint.cpp index 9b8431e591..15aa44eebc 100644 --- a/src/core/SkPaint.cpp +++ b/src/core/SkPaint.cpp @@ -399,9 +399,9 @@ int SkPaint::countText(const void* text, size_t byteLength) const { SkASSERT(text != nullptr); switch (this->getTextEncoding()) { case kUTF8_TextEncoding: - return SkUTF8_CountUnichars(text, byteLength); + return SkUTF::CountUTF8((const char*)text, byteLength); case kUTF16_TextEncoding: - return SkUTF16_CountUnichars(text, byteLength); + return SkUTF::CountUTF16((const uint16_t*)text, byteLength); case kUTF32_TextEncoding: return SkToInt(byteLength >> 2); case kGlyphID_TextEncoding: @@ -413,6 +413,13 @@ int SkPaint::countText(const void* text, size_t byteLength) const { return 0; } +static SkTypeface::Encoding to_encoding(SkPaint::TextEncoding e) { + static_assert((int)SkTypeface::kUTF8_Encoding == (int)SkPaint::kUTF8_TextEncoding, ""); + static_assert((int)SkTypeface::kUTF16_Encoding == (int)SkPaint::kUTF16_TextEncoding, ""); + static_assert((int)SkTypeface::kUTF32_Encoding == (int)SkPaint::kUTF32_TextEncoding, ""); + return (SkTypeface::Encoding)e; +} + int SkPaint::textToGlyphs(const void* textData, size_t byteLength, uint16_t glyphs[]) const { SkASSERT(textData != nullptr); @@ -431,38 +438,16 @@ int SkPaint::textToGlyphs(const void* textData, size_t byteLength, uint16_t glyp auto cache = SkStrikeCache::FindOrCreateStrikeExclusive(*this); - const char* text = (const char*)textData; - const char* stop = text + byteLength; + const void* stop = (const char*)textData + byteLength; uint16_t* gptr = glyphs; + const SkTypeface::Encoding encoding = to_encoding(this->getTextEncoding()); - switch (this->getTextEncoding()) { - case SkPaint::kUTF8_TextEncoding: - while (text < stop) { - SkUnichar u = SkUTF8_NextUnicharWithError(&text, stop); - if (u < 0) { - return 0; // bad UTF-8 sequence - } - *gptr++ = cache->unicharToGlyph(u); - } - break; - case SkPaint::kUTF16_TextEncoding: { - const uint16_t* text16 = (const uint16_t*)text; - const uint16_t* stop16 = (const uint16_t*)stop; - while (text16 < stop16) { - *gptr++ = cache->unicharToGlyph(SkUTF16_NextUnichar(&text16, stop16)); - } - break; - } - case kUTF32_TextEncoding: { - const int32_t* text32 = (const int32_t*)text; - const int32_t* stop32 = (const int32_t*)stop; - while (text32 < stop32) { - *gptr++ = cache->unicharToGlyph(*text32++); - } - break; + while (textData < stop) { + SkUnichar unichar = SkUTFN_Next(encoding, &textData, stop); + if (unichar < 0) { + return 0; // bad UTF-N sequence } - default: - SkDEBUGFAIL("unknown text encoding"); + *gptr++ = cache->unicharToGlyph(unichar); } return SkToInt(gptr - glyphs); } @@ -487,41 +472,12 @@ bool SkPaint::containsText(const void* textData, size_t byteLength) const { } auto cache = SkStrikeCache::FindOrCreateStrikeExclusive(*this); - - switch (this->getTextEncoding()) { - case SkPaint::kUTF8_TextEncoding: { - const char* text = static_cast<const char*>(textData); - const char* stop = text + byteLength; - while (text < stop) { - if (0 == cache->unicharToGlyph(SkUTF8_NextUnichar(&text, stop))) { - return false; - } - } - break; - } - case SkPaint::kUTF16_TextEncoding: { - const uint16_t* text = static_cast<const uint16_t*>(textData); - const uint16_t* stop = text + (byteLength >> 1); - while (text < stop) { - if (0 == cache->unicharToGlyph(SkUTF16_NextUnichar(&text, stop))) { - return false; - } - } - break; - } - case SkPaint::kUTF32_TextEncoding: { - const int32_t* text = static_cast<const int32_t*>(textData); - const int32_t* stop = text + (byteLength >> 2); - while (text < stop) { - if (0 == cache->unicharToGlyph(*text++)) { - return false; - } - } - break; - } - default: - SkDEBUGFAIL("unknown text encoding"); + const void* stop = (const char*)textData + byteLength; + const SkTypeface::Encoding encoding = to_encoding(this->getTextEncoding()); + while (textData < stop) { + if (0 == cache->unicharToGlyph(SkUTFN_Next(encoding, &textData, stop))) { return false; + } } return true; } @@ -551,7 +507,7 @@ static const SkGlyph& sk_getMetrics_utf8_next(SkGlyphCache* cache, SkASSERT(cache != nullptr); SkASSERT(text != nullptr); - return cache->getUnicharMetrics(SkUTF8_NextUnichar(text, stop)); + return cache->getUnicharMetrics(SkUTF::NextUTF8(text, stop)); } static const SkGlyph& sk_getMetrics_utf16_next(SkGlyphCache* cache, @@ -561,7 +517,7 @@ static const SkGlyph& sk_getMetrics_utf16_next(SkGlyphCache* cache, SkASSERT(text != nullptr); return cache->getUnicharMetrics( - SkUTF16_NextUnichar((const uint16_t**)text, (const uint16_t*)stop)); + SkUTF::NextUTF16((const uint16_t**)text, (const uint16_t*)stop)); } static const SkGlyph& sk_getMetrics_utf32_next(SkGlyphCache* cache, @@ -570,10 +526,7 @@ static const SkGlyph& sk_getMetrics_utf32_next(SkGlyphCache* cache, SkASSERT(cache != nullptr); SkASSERT(text != nullptr); - const int32_t* ptr = *(const int32_t**)text; - SkUnichar uni = *ptr++; - *text = (const char*)ptr; - return cache->getUnicharMetrics(uni); + return cache->getUnicharMetrics(SkUTF::NextUTF32((const int32_t**)text, (const int32_t*)stop)); } static const SkGlyph& sk_getMetrics_glyph_next(SkGlyphCache* cache, @@ -595,7 +548,7 @@ static const SkGlyph& sk_getAdvance_utf8_next(SkGlyphCache* cache, SkASSERT(cache != nullptr); SkASSERT(text != nullptr); - return cache->getUnicharAdvance(SkUTF8_NextUnichar(text, stop)); + return cache->getUnicharAdvance(SkUTF::NextUTF8(text, stop)); } static const SkGlyph& sk_getAdvance_utf16_next(SkGlyphCache* cache, @@ -605,7 +558,7 @@ static const SkGlyph& sk_getAdvance_utf16_next(SkGlyphCache* cache, SkASSERT(text != nullptr); return cache->getUnicharAdvance( - SkUTF16_NextUnichar((const uint16_t**)text, (const uint16_t*)stop)); + SkUTF::NextUTF16((const uint16_t**)text, (const uint16_t*)stop)); } static const SkGlyph& sk_getAdvance_utf32_next(SkGlyphCache* cache, @@ -614,10 +567,7 @@ static const SkGlyph& sk_getAdvance_utf32_next(SkGlyphCache* cache, SkASSERT(cache != nullptr); SkASSERT(text != nullptr); - const int32_t* ptr = *(const int32_t**)text; - SkUnichar uni = *ptr++; - *text = (const char*)ptr; - return cache->getUnicharAdvance(uni); + return cache->getUnicharAdvance(SkUTF::NextUTF32((const int32_t**)text, (const int32_t*)stop)); } static const SkGlyph& sk_getAdvance_glyph_next(SkGlyphCache* cache, diff --git a/src/core/SkPaintPriv.cpp b/src/core/SkPaintPriv.cpp index 3be1ebeafa..309d244951 100644 --- a/src/core/SkPaintPriv.cpp +++ b/src/core/SkPaintPriv.cpp @@ -92,9 +92,9 @@ bool SkPaintPriv::ShouldDither(const SkPaint& p, SkColorType dstCT) { int SkPaintPriv::ValidCountText(const void* text, size_t length, SkPaint::TextEncoding encoding) { switch (encoding) { - case SkPaint::kUTF8_TextEncoding: return SkUTF8_CountUnichars(text, length); - case SkPaint::kUTF16_TextEncoding: return SkUTF16_CountUnichars(text, length); - case SkPaint::kUTF32_TextEncoding: return SkUTF32_CountUnichars(text, length); + case SkPaint::kUTF8_TextEncoding: return SkUTF::CountUTF8((const char*)text, length); + case SkPaint::kUTF16_TextEncoding: return SkUTF::CountUTF16((const uint16_t*)text, length); + case SkPaint::kUTF32_TextEncoding: return SkUTF::CountUTF32((const int32_t*)text, length); case SkPaint::kGlyphID_TextEncoding: if (!SkIsAlign2(intptr_t(text)) || !SkIsAlign2(length)) { return -1; diff --git a/src/core/SkString.cpp b/src/core/SkString.cpp index 65dae1b753..6f7d80dfd7 100644 --- a/src/core/SkString.cpp +++ b/src/core/SkString.cpp @@ -446,8 +446,8 @@ void SkString::insert(size_t offset, const char text[], size_t len) { } void SkString::insertUnichar(size_t offset, SkUnichar uni) { - char buffer[kMaxBytesInUTF8Sequence]; - size_t len = SkUTF8_FromUnichar(uni, buffer); + char buffer[SkUTF::kMaxBytesInUTF8Sequence]; + size_t len = SkUTF::ToUTF8(uni, buffer); if (len) { this->insert(offset, buffer, len); diff --git a/src/core/SkStringUtils.cpp b/src/core/SkStringUtils.cpp index 500478585f..1743d0d26c 100644 --- a/src/core/SkStringUtils.cpp +++ b/src/core/SkStringUtils.cpp @@ -62,8 +62,8 @@ SkString SkStringFromUTF16(const uint16_t* src, size_t count) { const uint16_t* end = src + count; for (const uint16_t* ptr = src; ptr < end;) { const uint16_t* last = ptr; - SkUnichar u = SkUTF16_NextUnichar(&ptr, stop); - size_t s = SkUTF8_FromUnichar(u); + SkUnichar u = SkUTF::NextUTF16(&ptr, stop); + size_t s = SkUTF::ToUTF8(u); if (n > UINT32_MAX - s) { end = last; // truncate input string break; @@ -73,7 +73,7 @@ SkString SkStringFromUTF16(const uint16_t* src, size_t count) { ret = SkString(n); char* out = ret.writable_str(); for (const uint16_t* ptr = src; ptr < end;) { - out += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&ptr, stop), out); + out += SkUTF::ToUTF8(SkUTF::NextUTF16(&ptr, stop), out); } SkASSERT(out == ret.writable_str() + n); } diff --git a/src/core/SkUtils.cpp b/src/core/SkUtils.cpp index f0c1f60a09..9754da662c 100644 --- a/src/core/SkUtils.cpp +++ b/src/core/SkUtils.cpp @@ -7,431 +7,27 @@ #include "SkUtils.h" -#include "SkTo.h" - -/* 0xxxxxxx 1 total - 10xxxxxx // never a leading byte - 110xxxxx 2 total - 1110xxxx 3 total - 11110xxx 4 total - - 11 10 01 01 xx xx xx xx 0... - 0xE5XX0000 - 0xE5 << 24 -*/ - -static bool utf8_byte_is_valid(uint8_t c) { - return c < 0xF5 && (c & 0xFE) != 0xC0; -} -static bool utf8_byte_is_continuation(uint8_t c) { - return (c & 0xC0) == 0x80; -} -static bool utf8_byte_is_leading_byte(uint8_t c) { - return utf8_byte_is_valid(c) && !utf8_byte_is_continuation(c); -} - -#ifdef SK_DEBUG - static void assert_utf8_leadingbyte(unsigned c) { - SkASSERT(utf8_byte_is_leading_byte(SkToU8(c))); - } - - int SkUTF8_LeadByteToCount(unsigned c) { - assert_utf8_leadingbyte(c); - return (((0xE5 << 24) >> (c >> 4 << 1)) & 3) + 1; - } -#else - #define assert_utf8_leadingbyte(c) -#endif - -/** - * @returns -1 iff invalid UTF8 byte, - * 0 iff UTF8 continuation byte, - * 1 iff ASCII byte, - * 2 iff leading byte of 2-byte sequence, - * 3 iff leading byte of 3-byte sequence, and - * 4 iff leading byte of 4-byte sequence. - * - * I.e.: if return value > 0, then gives length of sequence. -*/ -static int utf8_byte_type(uint8_t c) { - if (c < 0x80) { - return 1; - } else if (c < 0xC0) { - return 0; - } else if (c < 0xF5 && (c & 0xFE) != 0xC0) { // "octet values C0, C1, F5 to FF never appear" - return (((0xE5 << 24) >> ((unsigned)c >> 4 << 1)) & 3) + 1; - } else { - return -1; - } -} -static bool utf8_type_is_valid_leading_byte(int type) { return type > 0; } - -int SkUTF8_CountUnichars(const char utf8[]) { - SkASSERT(utf8); - - int count = 0; - - for (;;) { - int c = *(const uint8_t*)utf8; - if (c == 0) { - break; - } - utf8 += SkUTF8_LeadByteToCount(c); - count += 1; - } - return count; -} - -// SAFE: returns -1 if invalid UTF-8 -int SkUTF8_CountUnichars(const void* text, size_t byteLength) { - SkASSERT(text); - const char* utf8 = static_cast<const char*>(text); - int count = 0; - const char* stop = utf8 + byteLength; - - while (utf8 < stop) { - int type = utf8_byte_type(*(const uint8_t*)utf8); - SkASSERT(type >= -1 && type <= 4); - if (!utf8_type_is_valid_leading_byte(type) || utf8 + type > stop) { - // Sequence extends beyond end. - return -1; - } - while(type-- > 1) { - ++utf8; - if (!utf8_byte_is_continuation(*(const uint8_t*)utf8)) { - return -1; - } - } - ++utf8; - ++count; - } - return count; -} - -SkUnichar SkUTF8_ToUnichar(const char utf8[]) { - SkASSERT(utf8); - - const uint8_t* p = (const uint8_t*)utf8; - int c = *p; - int hic = c << 24; - - assert_utf8_leadingbyte(c); - - if (hic < 0) { - uint32_t mask = (uint32_t)~0x3F; - hic = SkLeftShift(hic, 1); - do { - c = (c << 6) | (*++p & 0x3F); - mask <<= 5; - } while ((hic = SkLeftShift(hic, 1)) < 0); - c &= ~mask; - } - return c; -} - -// SAFE: returns -1 on invalid UTF-8 sequence. -SkUnichar SkUTF8_NextUnicharWithError(const char** ptr, const char* end) { - SkASSERT(ptr && *ptr); - SkASSERT(*ptr < end); - const uint8_t* p = (const uint8_t*)*ptr; - int c = *p; - int hic = c << 24; - - if (!utf8_byte_is_leading_byte(c)) { - return -1; - } - if (hic < 0) { - uint32_t mask = (uint32_t)~0x3F; - hic = SkLeftShift(hic, 1); - do { - ++p; - if (p >= (const uint8_t*)end) { - return -1; - } - // check before reading off end of array. - uint8_t nextByte = *p; - if (!utf8_byte_is_continuation(nextByte)) { - return -1; - } - c = (c << 6) | (nextByte & 0x3F); - mask <<= 5; - } while ((hic = SkLeftShift(hic, 1)) < 0); - c &= ~mask; - } - *ptr = (char*)p + 1; - return c; -} - -SkUnichar SkUTF8_NextUnichar(const char** ptr) { - SkASSERT(ptr && *ptr); - - const uint8_t* p = (const uint8_t*)*ptr; - int c = *p; - int hic = c << 24; - - assert_utf8_leadingbyte(c); - - if (hic < 0) { - uint32_t mask = (uint32_t)~0x3F; - hic = SkLeftShift(hic, 1); - do { - c = (c << 6) | (*++p & 0x3F); - mask <<= 5; - } while ((hic = SkLeftShift(hic, 1)) < 0); - c &= ~mask; - } - *ptr = (char*)p + 1; - return c; -} - -SkUnichar SkUTF8_PrevUnichar(const char** ptr) { - SkASSERT(ptr && *ptr); - - const char* p = *ptr; - - if (*--p & 0x80) { - while (*--p & 0x40) { - ; - } - } - - *ptr = (char*)p; - return SkUTF8_NextUnichar(&p); -} - -size_t SkUTF8_FromUnichar(SkUnichar uni, char utf8[]) { - if ((uint32_t)uni > 0x10FFFF) { - SkDEBUGFAIL("bad unichar"); - return 0; - } - - if (uni <= 127) { - if (utf8) { - *utf8 = (char)uni; - } - return 1; - } - - char tmp[4]; - char* p = tmp; - size_t count = 1; - - SkDEBUGCODE(SkUnichar orig = uni;) - - while (uni > 0x7F >> count) { - *p++ = (char)(0x80 | (uni & 0x3F)); - uni >>= 6; - count += 1; - } - - if (utf8) { - p = tmp; - utf8 += count; - while (p < tmp + count - 1) { - *--utf8 = *p++; - } - *--utf8 = (char)(~(0xFF >> count) | uni); - } - - SkASSERT(utf8 == nullptr || orig == SkUTF8_ToUnichar(utf8)); - return count; -} - -/////////////////////////////////////////////////////////////////////////////// - -int SkUTF16_CountUnichars(const uint16_t src[]) { - SkASSERT(src); - - int count = 0; - unsigned c; - while ((c = *src++) != 0) { - SkASSERT(!SkUTF16_IsLowSurrogate(c)); - if (SkUTF16_IsHighSurrogate(c)) { - c = *src++; - SkASSERT(SkUTF16_IsLowSurrogate(c)); - } - count += 1; - } - return count; -} - -// returns -1 on error -int SkUTF16_CountUnichars(const void* text, size_t byteLength) { - SkASSERT(text); - if (!SkIsAlign2(intptr_t(text)) || !SkIsAlign2(byteLength)) { - return -1; - } - - const uint16_t* src = static_cast<const uint16_t*>(text); - const uint16_t* stop = src + (byteLength >> 1); - int count = 0; - while (src < stop) { - unsigned c = *src++; - if (SkUTF16_IsLowSurrogate(c)) { - return -1; - } - if (SkUTF16_IsHighSurrogate(c)) { - if (src >= stop) { - return -1; - } - c = *src++; - if (!SkUTF16_IsLowSurrogate(c)) { - return -1; - } - } - count += 1; - } - return count; -} - -SkUnichar SkUTF16_NextUnichar(const uint16_t** srcPtr, const uint16_t* endPtr) { - if (!srcPtr || !endPtr) { - return -1; - } - const uint16_t* src = *srcPtr; - if (src >= endPtr) { - return -1; - } - uint16_t c = *src++; - SkUnichar result = c; - - if (SkUTF16_IsLowSurrogate(c)) { - return -1; // srcPtr should never point at low surrogate. - } - if (SkUTF16_IsHighSurrogate(c)) { - if (src == endPtr) { - return -1; // Truncated string. - } - uint16_t low = *src++; - if (!SkUTF16_IsLowSurrogate(low)) { - return -1; - } - /* - [paraphrased from wikipedia] - Take the high surrogate and subtract 0xD800, then multiply by 0x400. - Take the low surrogate and subtract 0xDC00. Add these two results - together, and finally add 0x10000 to get the final decoded codepoint. - - unicode = (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000 - unicode = (high * 0x400) - (0xD800 * 0x400) + low - 0xDC00 + 0x10000 - unicode = (high << 10) - (0xD800 << 10) + low - 0xDC00 + 0x10000 - unicode = (high << 10) + low - ((0xD800 << 10) + 0xDC00 - 0x10000) - */ - result = (result << 10) + (SkUnichar)low - ((0xD800 << 10) + 0xDC00 - 0x10000); - } - *srcPtr = src; - return result; -} - -SkUnichar SkUTF16_NextUnichar(const uint16_t** srcPtr) { - SkUnichar c = SkUTF16_NextUnichar(srcPtr, *srcPtr + 2); +template <typename T> +static SkUnichar next(const T** srcPtr, unsigned N, SkUnichar (*fn)(const T**, const T*)) { + SkASSERT(srcPtr); + const T* ptr = *srcPtr; + SkUnichar c = fn(&ptr, ptr + N); if (c == -1) { SkASSERT(false); ++(*srcPtr); - return 0xFFFD; // REPLACEMENT CHARACTER. - } - return c; -} - -SkUnichar SkUTF16_PrevUnichar(const uint16_t** srcPtr) { - SkASSERT(srcPtr && *srcPtr); - - const uint16_t* src = *srcPtr; - SkUnichar c = *--src; - - SkASSERT(!SkUTF16_IsHighSurrogate(c)); - if (SkUTF16_IsLowSurrogate(c)) { - unsigned c2 = *--src; - SkASSERT(SkUTF16_IsHighSurrogate(c2)); - c = (c2 << 10) + c + (0x10000 - (0xD800 << 10) - 0xDC00); + return 0xFFFD; // REPLACEMENT CHARACTER } - *srcPtr = src; + *srcPtr = ptr; return c; } - -size_t SkUTF16_FromUnichar(SkUnichar uni, uint16_t dst[]) { - SkASSERT((unsigned)uni <= 0x10FFFF); - - int extra = (uni > 0xFFFF); - - if (dst) { - if (extra) { - // dst[0] = SkToU16(0xD800 | ((uni - 0x10000) >> 10)); - // dst[0] = SkToU16(0xD800 | ((uni >> 10) - 64)); - dst[0] = SkToU16((0xD800 - 64) + (uni >> 10)); - dst[1] = SkToU16(0xDC00 | (uni & 0x3FF)); - - SkASSERT(SkUTF16_IsHighSurrogate(dst[0])); - SkASSERT(SkUTF16_IsLowSurrogate(dst[1])); - } else { - dst[0] = SkToU16(uni); - SkASSERT(!SkUTF16_IsHighSurrogate(dst[0])); - SkASSERT(!SkUTF16_IsLowSurrogate(dst[0])); - } - } - return 1 + extra; -} - -size_t SkUTF16_ToUTF8(const uint16_t utf16[], int numberOf16BitValues, - char utf8[]) { - SkASSERT(numberOf16BitValues >= 0); - if (numberOf16BitValues <= 0) { - return 0; - } - - SkASSERT(utf16 != nullptr); - - const uint16_t* stop = utf16 + numberOf16BitValues; - size_t size = 0; - - if (utf8 == nullptr) { // just count - while (utf16 < stop) { - size += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), nullptr); - } - } else { - char* start = utf8; - while (utf16 < stop) { - utf8 += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), utf8); - } - size = utf8 - start; - } - return size; +SkUnichar SkUTF8_NextUnichar(const char** p) { + return next<char>(p, SkUTF::kMaxBytesInUTF8Sequence, SkUTF::NextUTF8); } - -// returns -1 on error -int SkUTF32_CountUnichars(const void* text, size_t byteLength) { - if (!SkIsAlign4(intptr_t(text)) || !SkIsAlign4(byteLength)) { - return -1; - } - const uint32_t kInvalidUnicharMask = 0xFF000000; // unichar fits in 24 bits - const uint32_t* ptr = static_cast<const uint32_t*>(text); - const uint32_t* stop = ptr + (byteLength >> 2); - while (ptr < stop) { - if (*ptr & kInvalidUnicharMask) { - return -1; - } - ptr += 1; - } - return SkToInt(byteLength >> 2); +SkUnichar SkUTF16_NextUnichar(const uint16_t** p) { + return next<uint16_t>(p, 2, SkUTF::NextUTF16); } -// returns -1 on error -int SkUTFN_CountUnichars( - SkTypeface::Encoding encoding, const void* utfN, size_t byteLength) { - SkASSERT(utfN != nullptr); - switch (encoding) { - case SkTypeface::kUTF8_Encoding: - return SkUTF8_CountUnichars(utfN, byteLength); - case SkTypeface::kUTF16_Encoding: - return SkUTF16_CountUnichars(utfN, byteLength); - case SkTypeface::kUTF32_Encoding: - return SkUTF32_CountUnichars(utfN, byteLength); - default: - SkDEBUGFAIL("unknown text encoding"); - } - - return -1; -} +/////////////////////////////////////////////////////////////////////////////// const char SkHexadecimalDigits::gUpper[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; diff --git a/src/core/SkUtils.h b/src/core/SkUtils.h index 795b47a0c4..e18934d6b8 100644 --- a/src/core/SkUtils.h +++ b/src/core/SkUtils.h @@ -8,10 +8,9 @@ #ifndef SkUtils_DEFINED #define SkUtils_DEFINED -#include "SkTypes.h" -#include "SkMath.h" #include "SkOpts.h" #include "SkTypeface.h" +#include "../utils/SkUTF.h" /** Similar to memset(), but it assigns a 16, 32, or 64-bit value into the buffer. @param buffer The memory to have value copied into it @@ -27,87 +26,46 @@ static inline void sk_memset32(uint32_t buffer[], uint32_t value, int count) { static inline void sk_memset64(uint64_t buffer[], uint64_t value, int count) { SkOpts::memset64(buffer, value, count); } -/////////////////////////////////////////////////////////////////////////////// -#define kMaxBytesInUTF8Sequence 4 +/////////////////////////////////////////////////////////////////////////////// -#ifdef SK_DEBUG - int SkUTF8_LeadByteToCount(unsigned c); -#else - #define SkUTF8_LeadByteToCount(c) ((((0xE5 << 24) >> ((unsigned)c >> 4 << 1)) & 3) + 1) -#endif +// Unlike the functions in SkUTF.h, these two functions do not take an array +// length parameter. When possible, use SkUTF::NextUTF{8,16} instead. +SkUnichar SkUTF8_NextUnichar(const char**); +SkUnichar SkUTF16_NextUnichar(const uint16_t**); -inline int SkUTF8_CountUTF8Bytes(const char utf8[]) { - SkASSERT(utf8); - return SkUTF8_LeadByteToCount(*(const uint8_t*)utf8); -} +/////////////////////////////////////////////////////////////////////////////// -int SkUTF8_CountUnichars(const char utf8[]); +static inline bool SkUTF16_IsHighSurrogate(uint16_t c) { return ((c) & 0xFC00) == 0xD800; } -/** These functions are safe: invalid sequences will return -1; */ -int SkUTF8_CountUnichars(const void* utf8, size_t byteLength); -int SkUTF16_CountUnichars(const void* utf16, size_t byteLength); -int SkUTF32_CountUnichars(const void* utf32, size_t byteLength); -int SkUTFN_CountUnichars(SkTypeface::Encoding encoding, const void* utfN, size_t byteLength); +static inline bool SkUTF16_IsLowSurrogate (uint16_t c) { return ((c) & 0xFC00) == 0xDC00; } -/** This function is safe: invalid UTF8 sequences will return -1 - * When -1 is returned, ptr is unchanged. - * Precondition: *ptr < end; - */ -SkUnichar SkUTF8_NextUnicharWithError(const char** ptr, const char* end); +/////////////////////////////////////////////////////////////////////////////// -/** this version replaces invalid utf-8 sequences with code point U+FFFD. */ -inline SkUnichar SkUTF8_NextUnichar(const char** ptr, const char* end) { - SkUnichar val = SkUTF8_NextUnicharWithError(ptr, end); - if (val < 0) { - *ptr = end; - return 0xFFFD; // REPLACEMENT CHARACTER +static inline int SkUTFN_CountUnichars(SkTypeface::Encoding enc, const void* utfN, size_t bytes) { + switch (enc) { + case SkTypeface::kUTF8_Encoding: return SkUTF::CountUTF8((const char*)utfN, bytes); + case SkTypeface::kUTF16_Encoding: return SkUTF::CountUTF16((const uint16_t*)utfN, bytes); + case SkTypeface::kUTF32_Encoding: return SkUTF::CountUTF32((const int32_t*)utfN, bytes); + default: SkDEBUGFAIL("unknown text encoding"); return -1; } - return val; } -SkUnichar SkUTF8_ToUnichar(const char utf8[]); -SkUnichar SkUTF8_NextUnichar(const char**); -SkUnichar SkUTF8_PrevUnichar(const char**); - -/** Return the number of bytes need to convert a unichar - into a utf8 sequence. Will be 1..kMaxBytesInUTF8Sequence, - or 0 if uni is illegal. -*/ -size_t SkUTF8_FromUnichar(SkUnichar uni, char utf8[] = nullptr); - -/////////////////////////////////////////////////////////////////////////////// - -#define SkUTF16_IsHighSurrogate(c) (((c) & 0xFC00) == 0xD800) -#define SkUTF16_IsLowSurrogate(c) (((c) & 0xFC00) == 0xDC00) - -int SkUTF16_CountUnichars(const uint16_t utf16[]); -// returns the current unichar and then moves past it (*p++) -SkUnichar SkUTF16_NextUnichar(const uint16_t**); -SkUnichar SkUTF16_NextUnichar(const uint16_t** srcPtr, const uint16_t* end); - -// this guy backs up to the previus unichar value, and returns it (*--p) -SkUnichar SkUTF16_PrevUnichar(const uint16_t**); -size_t SkUTF16_FromUnichar(SkUnichar uni, uint16_t utf16[] = nullptr); - -size_t SkUTF16_ToUTF8(const uint16_t utf16[], int numberOf16BitValues, - char utf8[] = nullptr); - -inline bool SkUnichar_IsVariationSelector(SkUnichar uni) { -/* The 'true' ranges are: - * 0x180B <= uni <= 0x180D - * 0xFE00 <= uni <= 0xFE0F - * 0xE0100 <= uni <= 0xE01EF - */ - if (uni < 0x180B || uni > 0xE01EF) { - return false; - } - if ((uni > 0x180D && uni < 0xFE00) || (uni > 0xFE0F && uni < 0xE0100)) { - return false; +static inline SkUnichar SkUTFN_Next(SkTypeface::Encoding enc, + const void** ptr, const void* stop) { + switch (enc) { + case SkTypeface::kUTF8_Encoding: + return SkUTF::NextUTF8((const char**)ptr, (const char*)stop); + case SkTypeface::kUTF16_Encoding: + return SkUTF::NextUTF16((const uint16_t**)ptr, (const uint16_t*)stop); + case SkTypeface::kUTF32_Encoding: + return SkUTF::NextUTF32((const int32_t**)ptr, (const int32_t*)stop); + default: SkDEBUGFAIL("unknown text encoding"); return -1; } - return true; } +/////////////////////////////////////////////////////////////////////////////// + namespace SkHexadecimalDigits { extern const char gUpper[16]; // 0-9A-F extern const char gLower[16]; // 0-9a-f diff --git a/src/pdf/SkPDFDevice.cpp b/src/pdf/SkPDFDevice.cpp index 9cd2892a05..fc7384f7c0 100644 --- a/src/pdf/SkPDFDevice.cpp +++ b/src/pdf/SkPDFDevice.cpp @@ -1274,7 +1274,7 @@ void SkPDFDevice::internalDrawGlyphRun(const SkGlyphRun& glyphRun, SkPoint offse // Check if `/ActualText` needed. const char* textPtr = c.fUtf8Text; const char* textEnd = c.fUtf8Text + c.fTextByteLength; - SkUnichar unichar = SkUTF8_NextUnicharWithError(&textPtr, textEnd); + SkUnichar unichar = SkUTF::NextUTF8(&textPtr, textEnd); if (unichar < 0) { return; } @@ -1288,7 +1288,7 @@ void SkPDFDevice::internalDrawGlyphRun(const SkGlyphRun& glyphRun, SkPoint offse // the BOM marks this text as UTF-16BE, not PDFDocEncoding. SkPDFUtils::WriteUTF16beHex(out, unichar); // first char while (textPtr < textEnd) { - unichar = SkUTF8_NextUnicharWithError(&textPtr, textEnd); + unichar = SkUTF::NextUTF8(&textPtr, textEnd); if (unichar < 0) { break; } diff --git a/src/pdf/SkPDFMetadata.cpp b/src/pdf/SkPDFMetadata.cpp index 9234f44ac1..16e6637c7d 100644 --- a/src/pdf/SkPDFMetadata.cpp +++ b/src/pdf/SkPDFMetadata.cpp @@ -62,22 +62,22 @@ static SkString to_utf16be(const char* src, size_t len) { const char* const end = src + len; size_t n = 1; // BOM for (const char* ptr = src; ptr < end;) { - SkUnichar u = SkUTF8_NextUnicharWithError(&ptr, end); + SkUnichar u = SkUTF::NextUTF8(&ptr, end); if (u < 0) { break; } - n += SkUTF16_FromUnichar(u); + n += SkUTF::ToUTF16(u); } ret.resize(2 * n); char* out = ret.writable_str(); write_utf16be(&out, 0xFEFF); // BOM for (const char* ptr = src; ptr < end;) { - SkUnichar u = SkUTF8_NextUnicharWithError(&ptr, end); + SkUnichar u = SkUTF::NextUTF8(&ptr, end); if (u < 0) { break; } uint16_t utf16[2]; - size_t l = SkUTF16_FromUnichar(u, utf16); + size_t l = SkUTF::ToUTF16(u, utf16); write_utf16be(&out, utf16[0]); if (l == 2) { write_utf16be(&out, utf16[1]); diff --git a/src/pdf/SkPDFUtils.h b/src/pdf/SkPDFUtils.h index a291914a5a..a05b52b113 100644 --- a/src/pdf/SkPDFUtils.h +++ b/src/pdf/SkPDFUtils.h @@ -103,7 +103,7 @@ inline void WriteUInt8(SkDynamicMemoryWStream* wStream, uint8_t value) { inline void WriteUTF16beHex(SkDynamicMemoryWStream* wStream, SkUnichar utf32) { uint16_t utf16[2] = {0, 0}; - size_t len = SkUTF16_FromUnichar(utf32, utf16); + size_t len = SkUTF::ToUTF16(utf32, utf16); SkASSERT(len == 1 || len == 2); SkPDFUtils::WriteUInt16BE(wStream, utf16[0]); if (len == 2) { diff --git a/src/ports/SkFontHost_mac.cpp b/src/ports/SkFontHost_mac.cpp index 18d09b4ec0..8fa8c636d6 100644 --- a/src/ports/SkFontHost_mac.cpp +++ b/src/ports/SkFontHost_mac.cpp @@ -1146,7 +1146,7 @@ uint16_t SkScalerContext_Mac::generateCharToGlyph(SkUnichar uni) { UniChar theChar[2]; // UniChar is a UTF-16 16-bit code unit. // Get the glyph - size_t numUniChar = SkUTF16_FromUnichar(uni, theChar); + size_t numUniChar = SkUTF::ToUTF16(uni, theChar); SkASSERT(sizeof(CGGlyph) <= sizeof(uint16_t)); // Undocumented behavior of CTFontGetGlyphsForCharacters with non-bmp code points: @@ -2366,7 +2366,7 @@ int SkTypeface_Mac::onCharsToGlyphs(const void* chars, Encoding encoding, src = utf16; for (int i = 0; i < glyphCount; ++i) { SkUnichar uni = SkUTF8_NextUnichar(&utf8); - utf16 += SkUTF16_FromUnichar(uni, utf16); + utf16 += SkUTF::ToUTF16(uni, utf16); } srcCount = SkToInt(utf16 - src); break; @@ -2387,7 +2387,7 @@ int SkTypeface_Mac::onCharsToGlyphs(const void* chars, Encoding encoding, UniChar* utf16 = charStorage.reset(2 * glyphCount); src = utf16; for (int i = 0; i < glyphCount; ++i) { - utf16 += SkUTF16_FromUnichar(utf32[i], utf16); + utf16 += SkUTF::ToUTF16(utf32[i], utf16); } srcCount = SkToInt(utf16 - src); break; diff --git a/src/ports/SkFontHost_win.cpp b/src/ports/SkFontHost_win.cpp index 07d37aedfe..4ee8bb44fd 100644 --- a/src/ports/SkFontHost_win.cpp +++ b/src/ports/SkFontHost_win.cpp @@ -795,7 +795,7 @@ uint16_t SkScalerContext_GDI::generateCharToGlyph(SkUnichar utf32) { uint16_t index = 0; WCHAR utf16[2]; // TODO(ctguil): Support characters that generate more than one glyph. - if (SkUTF16_FromUnichar(utf32, (uint16_t*)utf16) == 1) { + if (SkUTF::ToUTF16(utf32, (uint16_t*)utf16) == 1) { // Type1 fonts fail with uniscribe API. Use GetGlyphIndices for plane 0. /** Real documentation for GetGlyphIndiciesW: @@ -2103,7 +2103,7 @@ int LogFontTypeface::onCharsToGlyphs(const void* chars, Encoding encoding, // Try a run of non-bmp. while (glyphIndex < glyphCount && currentChar > 0xFFFF) { - SkUTF16_FromUnichar(currentChar, reinterpret_cast<uint16_t*>(scratch)); + SkUTF::ToUTF16(currentChar, reinterpret_cast<uint16_t*>(scratch)); glyphs[glyphIndex] = nonBmpCharToGlyph(hdc, &sc, scratch); ++glyphIndex; if (glyphIndex < glyphCount) { @@ -2158,7 +2158,7 @@ int LogFontTypeface::onCharsToGlyphs(const void* chars, Encoding encoding, // Try a run of non-bmp. while (glyphIndex < glyphCount && utf32[glyphIndex] > 0xFFFF) { - SkUTF16_FromUnichar(utf32[glyphIndex], reinterpret_cast<uint16_t*>(scratch)); + SkUTF::ToUTF16(utf32[glyphIndex], reinterpret_cast<uint16_t*>(scratch)); glyphs[glyphIndex] = nonBmpCharToGlyph(hdc, &sc, scratch); ++glyphIndex; } diff --git a/src/ports/SkFontMgr_win_dw.cpp b/src/ports/SkFontMgr_win_dw.cpp index 6830d82e03..7954c77ca2 100644 --- a/src/ports/SkFontMgr_win_dw.cpp +++ b/src/ports/SkFontMgr_win_dw.cpp @@ -757,7 +757,7 @@ SkTypeface* SkFontMgr_DirectWrite::onMatchFamilyStyleCharacter(const char family WCHAR str[16]; UINT32 strLen = static_cast<UINT32>( - SkUTF16_FromUnichar(character, reinterpret_cast<uint16_t*>(str))); + SkUTF::ToUTF16(character, reinterpret_cast<uint16_t*>(str))); const SkSMallocWCHAR* dwBcp47; SkSMallocWCHAR dwBcp47Local; diff --git a/src/ports/SkOSFile_stdio.cpp b/src/ports/SkOSFile_stdio.cpp index 10ba7c8b0c..8c0be793cd 100644 --- a/src/ports/SkOSFile_stdio.cpp +++ b/src/ports/SkOSFile_stdio.cpp @@ -46,16 +46,16 @@ static FILE* fopen_win(const char* utf8path, const char* perm) { const char* end = utf8path + strlen(utf8path); size_t n = 0; while (ptr < end) { - SkUnichar u = SkUTF8_NextUnicharWithError(&ptr, end); + SkUnichar u = SkUTF::NextUTF8(&ptr, end); if (u < 0) { return nullptr; // malformed UTF-8 } - n += SkUTF16_FromUnichar(u); + n += SkUTF::ToUTF16(u); } std::vector<uint16_t> wchars(n + 1); uint16_t* out = wchars.data(); for (const char* ptr = utf8path; ptr < end;) { - out += SkUTF16_FromUnichar(SkUTF8_NextUnicharWithError(&ptr, end), out); + out += SkUTF::ToUTF16(SkUTF::NextUTF8(&ptr, end), out); } SkASSERT(out == &wchars[n]); *out = 0; // final null diff --git a/src/ports/SkRemotableFontMgr_win_dw.cpp b/src/ports/SkRemotableFontMgr_win_dw.cpp index 6c6613ea37..2b99cf7be8 100644 --- a/src/ports/SkRemotableFontMgr_win_dw.cpp +++ b/src/ports/SkRemotableFontMgr_win_dw.cpp @@ -398,7 +398,7 @@ public: WCHAR str[16]; UINT32 strLen = static_cast<UINT32>( - SkUTF16_FromUnichar(character, reinterpret_cast<uint16_t*>(str))); + SkUTF::ToUTF16(character, reinterpret_cast<uint16_t*>(str))); SkTScopedComPtr<IDWriteTextLayout> fallbackLayout; HR_GENERAL(dwFactory->CreateTextLayout(str, strLen, fallbackFormat.get(), 200.0f, 200.0f, diff --git a/src/svg/SkSVGDevice.cpp b/src/svg/SkSVGDevice.cpp index 4a83a9c841..9a2eb99a77 100644 --- a/src/svg/SkSVGDevice.cpp +++ b/src/svg/SkSVGDevice.cpp @@ -119,6 +119,13 @@ struct Resources { SkString fClip; }; +static SkTypeface::Encoding to_encoding(SkPaint::TextEncoding e) { + static_assert((int)SkTypeface::kUTF8_Encoding == (int)SkPaint::kUTF8_TextEncoding, ""); + static_assert((int)SkTypeface::kUTF16_Encoding == (int)SkPaint::kUTF16_TextEncoding, ""); + static_assert((int)SkTypeface::kUTF32_Encoding == (int)SkPaint::kUTF32_TextEncoding, ""); + return (SkTypeface::Encoding)e; +} + class SVGTextBuilder : SkNoncopyable { public: SVGTextBuilder(const void* text, size_t byteLen, const SkPaint& paint, const SkPoint& offset, @@ -131,42 +138,29 @@ public: SkASSERT(scalarsPerPos <= 2); SkASSERT(scalarsPerPos == 0 || SkToBool(pos)); - int count = paint.countText(text, byteLen); - - const char* stop = (const char*)text + byteLen; - switch(paint.getTextEncoding()) { - case SkPaint::kGlyphID_TextEncoding: { - SkASSERT(count * sizeof(uint16_t) == byteLen); - SkAutoSTArray<64, SkUnichar> unichars(count); - paint.glyphsToUnichars((const uint16_t*)text, count, unichars.get()); - for (int i = 0; i < count; ++i) { - this->appendUnichar(unichars[i]); - } - } break; - case SkPaint::kUTF8_TextEncoding: { - const char* c8 = reinterpret_cast<const char*>(text); - for (int i = 0; i < count; ++i) { - this->appendUnichar(SkUTF8_NextUnichar(&c8, stop)); - } - SkASSERT(reinterpret_cast<const char*>(text) + byteLen == c8); - } break; - case SkPaint::kUTF16_TextEncoding: { - const uint16_t* c16 = reinterpret_cast<const uint16_t*>(text); - for (int i = 0; i < count; ++i) { - this->appendUnichar(SkUTF16_NextUnichar(&c16, (const uint16_t*)stop)); + SkPaint::TextEncoding encoding = paint.getTextEncoding(); + switch(encoding) { + case SkPaint::kGlyphID_TextEncoding: { + int count = paint.countText(text, byteLen); + SkASSERT(count * sizeof(uint16_t) == byteLen); + SkAutoSTArray<64, SkUnichar> unichars(count); + paint.glyphsToUnichars((const uint16_t*)text, count, unichars.get()); + for (int i = 0; i < count; ++i) { + this->appendUnichar(unichars[i]); + } + break; } - SkASSERT(SkIsAlign2(byteLen)); - SkASSERT(reinterpret_cast<const uint16_t*>(text) + (byteLen / 2) == c16); - } break; - case SkPaint::kUTF32_TextEncoding: { - SkASSERT(count * sizeof(uint32_t) == byteLen); - const uint32_t* c32 = reinterpret_cast<const uint32_t*>(text); - for (int i = 0; i < count; ++i) { - this->appendUnichar(c32[i]); + case SkPaint::kUTF8_TextEncoding: + case SkPaint::kUTF16_TextEncoding: + case SkPaint::kUTF32_TextEncoding: { + const void* stop = (const char*)text + byteLen; + while (text < stop) { + this->appendUnichar(SkUTFN_Next(to_encoding(encoding), &text, stop)); + } + break; } - } break; - default: - SK_ABORT("unknown text encoding"); + default: + SK_ABORT("unknown text encoding"); } if (scalarsPerPos < 2) { diff --git a/src/utils/SkUTF.cpp b/src/utils/SkUTF.cpp new file mode 100644 index 0000000000..0670ae02a4 --- /dev/null +++ b/src/utils/SkUTF.cpp @@ -0,0 +1,253 @@ +// Copyright 2018 Google LLC. +// Use of this source code is governed by a BSD-style license that can be found in the LICENSE file. + +#include "SkUTF.h" + +#include <climits> + +static constexpr inline int32_t left_shift(int32_t value, int32_t shift) { + return (int32_t) ((uint32_t) value << shift); +} + +template <typename T> static constexpr bool is_align2(T x) { return 0 == (x & 1); } + +template <typename T> static constexpr bool is_align4(T x) { return 0 == (x & 3); } + +static constexpr inline bool utf16_is_high_surrogate(uint16_t c) { return (c & 0xFC00) == 0xD800; } + +static constexpr inline bool utf16_is_low_surrogate(uint16_t c) { return (c & 0xFC00) == 0xDC00; } + +/** @returns -1 iff invalid UTF8 byte, + 0 iff UTF8 continuation byte, + 1 iff ASCII byte, + 2 iff leading byte of 2-byte sequence, + 3 iff leading byte of 3-byte sequence, and + 4 iff leading byte of 4-byte sequence. + I.e.: if return value > 0, then gives length of sequence. +*/ +static int utf8_byte_type(uint8_t c) { + if (c < 0x80) { + return 1; + } else if (c < 0xC0) { + return 0; + } else if (c >= 0xF5 || (c & 0xFE) == 0xC0) { // "octet values c0, c1, f5 to ff never appear" + return -1; + } else { + int value = (((0xe5 << 24) >> ((unsigned)c >> 4 << 1)) & 3) + 1; + // assert(value >= 2 && value <=4); + return value; + } +} +static bool utf8_type_is_valid_leading_byte(int type) { return type > 0; } + +static bool utf8_byte_is_continuation(uint8_t c) { return utf8_byte_type(c) == 0; } + +//////////////////////////////////////////////////////////////////////////////// + +int SkUTF::CountUTF8(const char* utf8, size_t byteLength) { + if (!utf8) { + return -1; + } + int count = 0; + const char* stop = utf8 + byteLength; + while (utf8 < stop) { + int type = utf8_byte_type(*(const uint8_t*)utf8); + if (!utf8_type_is_valid_leading_byte(type) || utf8 + type > stop) { + return -1; // Sequence extends beyond end. + } + while(type-- > 1) { + ++utf8; + if (!utf8_byte_is_continuation(*(const uint8_t*)utf8)) { + return -1; + } + } + ++utf8; + ++count; + } + return count; +} + +int SkUTF::CountUTF16(const uint16_t* utf16, size_t byteLength) { + if (!utf16 || !is_align2(intptr_t(utf16)) || !is_align2(byteLength)) { + return -1; + } + const uint16_t* src = (const uint16_t*)utf16; + const uint16_t* stop = src + (byteLength >> 1); + int count = 0; + while (src < stop) { + unsigned c = *src++; + if (utf16_is_low_surrogate(c)) { + return -1; + } + if (utf16_is_high_surrogate(c)) { + if (src >= stop) { + return -1; + } + c = *src++; + if (!utf16_is_low_surrogate(c)) { + return -1; + } + } + count += 1; + } + return count; +} + +int SkUTF::CountUTF32(const int32_t* utf32, size_t byteLength) { + if (!is_align4(intptr_t(utf32)) || !is_align4(byteLength) || byteLength >> 2 > INT_MAX) { + return -1; + } + const uint32_t kInvalidUnicharMask = 0xFF000000; // unichar fits in 24 bits + const uint32_t* ptr = (const uint32_t*)utf32; + const uint32_t* stop = ptr + (byteLength >> 2); + while (ptr < stop) { + if (*ptr & kInvalidUnicharMask) { + return -1; + } + ptr += 1; + } + return (int)(byteLength >> 2); +} + +template <typename T> +static SkUnichar next_fail(const T** ptr, const T* end) { + *ptr = end; + return -1; +} + +SkUnichar SkUTF::NextUTF8(const char** ptr, const char* end) { + if (!ptr || !end ) { + return -1; + } + const uint8_t* p = (const uint8_t*)*ptr; + if (!p || p >= (const uint8_t*)end) { + return next_fail(ptr, end); + } + int c = *p; + int hic = c << 24; + + if (!utf8_type_is_valid_leading_byte(utf8_byte_type(c))) { + return next_fail(ptr, end); + } + if (hic < 0) { + uint32_t mask = (uint32_t)~0x3F; + hic = left_shift(hic, 1); + do { + ++p; + if (p >= (const uint8_t*)end) { + return next_fail(ptr, end); + } + // check before reading off end of array. + uint8_t nextByte = *p; + if (!utf8_byte_is_continuation(nextByte)) { + return next_fail(ptr, end); + } + c = (c << 6) | (nextByte & 0x3F); + mask <<= 5; + } while ((hic = left_shift(hic, 1)) < 0); + c &= ~mask; + } + *ptr = (char*)p + 1; + return c; +} + +SkUnichar SkUTF::NextUTF16(const uint16_t** ptr, const uint16_t* end) { + if (!ptr || !end ) { + return next_fail(ptr, end); + } + const uint16_t* src = *ptr; + if (!src || src + 1 > end || !is_align2(intptr_t(src))) { + return next_fail(ptr, end); + } + uint16_t c = *src++; + SkUnichar result = c; + if (utf16_is_low_surrogate(c)) { + return next_fail(ptr, end); // srcPtr should never point at low surrogate. + } + if (utf16_is_high_surrogate(c)) { + if (src + 1 > end) { + return next_fail(ptr, end); // Truncated string. + } + uint16_t low = *src++; + if (!utf16_is_low_surrogate(low)) { + return next_fail(ptr, end); + } + /* + [paraphrased from wikipedia] + Take the high surrogate and subtract 0xD800, then multiply by 0x400. + Take the low surrogate and subtract 0xDC00. Add these two results + together, and finally add 0x10000 to get the final decoded codepoint. + + unicode = (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000 + unicode = (high * 0x400) - (0xD800 * 0x400) + low - 0xDC00 + 0x10000 + unicode = (high << 10) - (0xD800 << 10) + low - 0xDC00 + 0x10000 + unicode = (high << 10) + low - ((0xD800 << 10) + 0xDC00 - 0x10000) + */ + result = (result << 10) + (SkUnichar)low - ((0xD800 << 10) + 0xDC00 - 0x10000); + } + *ptr = src; + return result; +} + +SkUnichar SkUTF::NextUTF32(const int32_t** ptr, const int32_t* end) { + if (!ptr || !end ) { + return -1; + } + const int32_t* s = *ptr; + if (!s || s + 1 > end || !is_align4(intptr_t(s))) { + return next_fail(ptr, end); + } + int32_t value = *s; + const uint32_t kInvalidUnicharMask = 0xFF000000; // unichar fits in 24 bits + if (value & kInvalidUnicharMask) { + return next_fail(ptr, end); + } + *ptr = s + 1; + return value; +} + +size_t SkUTF::ToUTF8(SkUnichar uni, char utf8[SkUTF::kMaxBytesInUTF8Sequence]) { + if ((uint32_t)uni > 0x10FFFF) { + return 0; + } + if (uni <= 127) { + if (utf8) { + *utf8 = (char)uni; + } + return 1; + } + char tmp[4]; + char* p = tmp; + size_t count = 1; + while (uni > 0x7F >> count) { + *p++ = (char)(0x80 | (uni & 0x3F)); + uni >>= 6; + count += 1; + } + if (utf8) { + p = tmp; + utf8 += count; + while (p < tmp + count - 1) { + *--utf8 = *p++; + } + *--utf8 = (char)(~(0xFF >> count) | uni); + } + return count; +} + +size_t SkUTF::ToUTF16(SkUnichar uni, uint16_t utf16[2]) { + if ((uint32_t)uni > 0x10FFFF) { + return 0; + } + int extra = (uni > 0xFFFF); + if (utf16) { + if (extra) { + utf16[0] = (uint16_t)((0xD800 - 64) + (uni >> 10)); + utf16[1] = (uint16_t)(0xDC00 | (uni & 0x3FF)); + } else { + utf16[0] = (uint16_t)uni; + } + } + return 1 + extra; +} + diff --git a/src/utils/SkUTF.h b/src/utils/SkUTF.h new file mode 100644 index 0000000000..385102aadb --- /dev/null +++ b/src/utils/SkUTF.h @@ -0,0 +1,68 @@ +// Copyright 2018 Google LLC. +// Use of this source code is governed by a BSD-style license that can be found in the LICENSE file. +#ifndef SkUTF_DEFINED +#define SkUTF_DEFINED + +#include <cstddef> +#include <cstdint> + +typedef int32_t SkUnichar; + +namespace SkUTF { + +/** Given a sequence of UTF-8 bytes, return the number of unicode codepoints. + If the sequence is invalid UTF-8, return -1. +*/ +int CountUTF8(const char* utf8, size_t byteLength); + +/** Given a sequence of aligned UTF-16 characters in machine-endian form, + return the number of unicode codepoints. If the sequence is invalid + UTF-16, return -1. +*/ +int CountUTF16(const uint16_t* utf16, size_t byteLength); + +/** Given a sequence of aligned UTF-32 characters in machine-endian form, + return the number of unicode codepoints. If the sequence is invalid + UTF-32, return -1. +*/ +int CountUTF32(const int32_t* utf32, size_t byteLength); + +/** Given a sequence of UTF-8 bytes, return the first unicode codepoint. + The pointer will be incremented to point at the next codepoint's start. If + invalid UTF-8 is encountered, set *ptr to end and return -1. +*/ +SkUnichar NextUTF8(const char** ptr, const char* end); + +/** Given a sequence of aligned UTF-16 characters in machine-endian form, + return the first unicode codepoint. The pointer will be incremented to + point at the next codepoint's start. If invalid UTF-16 is encountered, + set *ptr to end and return -1. +*/ +SkUnichar NextUTF16(const uint16_t** ptr, const uint16_t* end); + +/** Given a sequence of aligned UTF-32 characters in machine-endian form, + return the first unicode codepoint. The pointer will be incremented to + point at the next codepoint's start. If invalid UTF-32 is encountered, + set *ptr to end and return -1. +*/ +SkUnichar NextUTF32(const int32_t** ptr, const int32_t* end); + +constexpr unsigned kMaxBytesInUTF8Sequence = 4; + +/** Convert the unicode codepoint into UTF-8. If `utf8` is non-null, place the + result in that array. Return the number of bytes in the result. If `utf8` + is null, simply return the number of bytes that would be used. For invalid + unicode codepoints, return 0. +*/ +size_t ToUTF8(SkUnichar uni, char utf8[kMaxBytesInUTF8Sequence] = nullptr); + +/** Convert the unicode codepoint into UTF-16. If `utf16` is non-null, place + the result in that array. Return the number of UTF-16 code units in the + result (1 or 2). If `utf16` is null, simply return the number of code + units that would be used. For invalid unicode codepoints, return 0. +*/ +size_t ToUTF16(SkUnichar uni, uint16_t utf16[2] = nullptr); + +} // namespace SkUTF + +#endif // SkUTF_DEFINED diff --git a/src/xps/SkXPSDevice.cpp b/src/xps/SkXPSDevice.cpp index b984ef3e88..c2b5e66406 100644 --- a/src/xps/SkXPSDevice.cpp +++ b/src/xps/SkXPSDevice.cpp @@ -1984,17 +1984,13 @@ HRESULT SkXPSDevice::AddGlyphs(IXpsOMObjectFactory* xpsFactory, } static int num_glyph_guess(SkPaint::TextEncoding encoding, const void* text, size_t byteLength) { - switch (encoding) { - case SkPaint::kUTF8_TextEncoding: - return SkUTF8_CountUnichars(text, byteLength); - case SkPaint::kUTF16_TextEncoding: - return SkUTF16_CountUnichars(text, byteLength); - case SkPaint::kGlyphID_TextEncoding: + static_assert((int)SkTypeface::kUTF8_Encoding == (int)SkPaint::kUTF8_TextEncoding, ""); + static_assert((int)SkTypeface::kUTF16_Encoding == (int)SkPaint::kUTF16_TextEncoding, ""); + static_assert((int)SkTypeface::kUTF32_Encoding == (int)SkPaint::kUTF32_TextEncoding, ""); + if (encoding == SkPaint::kGlyphID_TextEncoding) { return SkToInt(byteLength / 2); - default: - SK_ABORT("Invalid Text Encoding"); } - return 0; + return SkUTFN_CountUnichars((SkTypeface::Encoding)encoding, text, byteLength); } static bool text_must_be_pathed(const SkPaint& paint, const SkMatrix& matrix) { diff --git a/tests/PaintTest.cpp b/tests/PaintTest.cpp index 647f8e2f23..0bf6df64de 100644 --- a/tests/PaintTest.cpp +++ b/tests/PaintTest.cpp @@ -23,7 +23,7 @@ static size_t uni_to_utf8(const SkUnichar src[], void* dst, int count) { char* u8 = (char*)dst; for (int i = 0; i < count; ++i) { - int n = SkToInt(SkUTF8_FromUnichar(src[i], u8)); + int n = SkToInt(SkUTF::ToUTF8(src[i], u8)); u8 += n; } return u8 - (char*)dst; @@ -32,7 +32,7 @@ static size_t uni_to_utf8(const SkUnichar src[], void* dst, int count) { static size_t uni_to_utf16(const SkUnichar src[], void* dst, int count) { uint16_t* u16 = (uint16_t*)dst; for (int i = 0; i < count; ++i) { - int n = SkToInt(SkUTF16_FromUnichar(src[i], u16)); + int n = SkToInt(SkUTF::ToUTF16(src[i], u16)); u16 += n; } return (char*)u16 - (char*)dst; diff --git a/tests/UnicodeTest.cpp b/tests/UnicodeTest.cpp index 5ed92d4b34..5dbcf08547 100644 --- a/tests/UnicodeTest.cpp +++ b/tests/UnicodeTest.cpp @@ -9,39 +9,10 @@ #include "SkUtils.h" #include "Test.h" -// Unicode Variation Selector ranges: inclusive -#define UVS_MIN0 0x180B -#define UVS_MAX0 0x180D -#define UVS_MIN1 0xFE00 -#define UVS_MAX1 0xFE0F -#define UVS_MIN2 0xE0100 -#define UVS_MAX2 0xE01EF - -static bool isUVS(SkUnichar uni) { - return (uni >= UVS_MIN0 && uni <= UVS_MAX0) || - (uni >= UVS_MIN1 && uni <= UVS_MAX1) || - (uni >= UVS_MIN2 && uni <= UVS_MAX2); -} - -static void test_uvs(skiatest::Reporter* reporter) { - // [min, max], [min, max] ... inclusive - static const SkUnichar gRanges[] = { - UVS_MIN0, UVS_MAX0, UVS_MIN1, UVS_MAX1, UVS_MIN2, UVS_MAX2 - }; - - for (size_t i = 0; i < SK_ARRAY_COUNT(gRanges); i += 2) { - for (SkUnichar uni = gRanges[i] - 8; uni <= gRanges[i+1] + 8; ++uni) { - bool uvs0 = isUVS(uni); - bool uvs1 = SkUnichar_IsVariationSelector(uni); - REPORTER_ASSERT(reporter, uvs0 == uvs1); - } - } -} - // Simple test to ensure that when we call textToGlyphs, we get the same // result (for the same text) when using UTF8, UTF16, UTF32. // TODO: make the text more complex (i.e. incorporate chars>7bits) -static void test_textencodings(skiatest::Reporter* reporter) { +DEF_TEST(Unicode_textencodings, reporter) { const char text8[] = "ABCDEFGabcdefg0123456789"; uint16_t text16[sizeof(text8)]; int32_t text32[sizeof(text8)]; @@ -76,8 +47,3 @@ static void test_textencodings(skiatest::Reporter* reporter) { REPORTER_ASSERT(reporter, !memcmp(glyphs8, glyphs16, count8 * sizeof(uint16_t))); REPORTER_ASSERT(reporter, !memcmp(glyphs8, glyphs32, count8 * sizeof(uint16_t))); } - -DEF_TEST(Unicode, reporter) { - test_uvs(reporter); - test_textencodings(reporter); -} diff --git a/tests/UtilsTest.cpp b/tests/UtilsTest.cpp index 6f8ec914bc..d621a8b12e 100644 --- a/tests/UtilsTest.cpp +++ b/tests/UtilsTest.cpp @@ -165,20 +165,19 @@ static void test_search(skiatest::Reporter* reporter) { } static void test_utf16(skiatest::Reporter* reporter) { + // Test non-basic-multilingual-plane unicode. static const SkUnichar gUni[] = { 0x10000, 0x18080, 0x20202, 0xFFFFF, 0x101234 }; - - uint16_t buf[2]; - - for (size_t i = 0; i < SK_ARRAY_COUNT(gUni); i++) { - size_t count = SkUTF16_FromUnichar(gUni[i], buf); + for (SkUnichar uni : gUni) { + uint16_t buf[2]; + size_t count = SkUTF::ToUTF16(uni, buf); REPORTER_ASSERT(reporter, count == 2); - size_t count2 = SkUTF16_CountUnichars(buf, 2 * sizeof(uint16_t)); + size_t count2 = SkUTF::CountUTF16(buf, sizeof(buf)); REPORTER_ASSERT(reporter, count2 == 1); const uint16_t* ptr = buf; - SkUnichar c = SkUTF16_NextUnichar(&ptr, buf + SK_ARRAY_COUNT(buf)); - REPORTER_ASSERT(reporter, c == gUni[i]); + SkUnichar c = SkUTF::NextUTF16(&ptr, buf + SK_ARRAY_COUNT(buf)); + REPORTER_ASSERT(reporter, c == uni); REPORTER_ASSERT(reporter, ptr - buf == 2); } } @@ -204,8 +203,8 @@ DEF_TEST(Utils, reporter) { for (size_t i = 0; i < SK_ARRAY_COUNT(gTest); i++) { const char* p = gTest[i].fUtf8; const char* stop = p + strlen(p); - int n = SkUTF8_CountUnichars(p, strlen(p)); - SkUnichar u1 = SkUTF8_NextUnichar(&p, stop); + int n = SkUTF::CountUTF8(p, strlen(p)); + SkUnichar u1 = SkUTF::NextUTF8(&p, stop); REPORTER_ASSERT(reporter, n == 1); REPORTER_ASSERT(reporter, u1 == gTest[i].fUni); @@ -220,62 +219,91 @@ DEF_TEST(Utils, reporter) { } #define ASCII_BYTE "X" -#define CONTINUATION_BYTE "\x80" -#define LEADING_TWO_BYTE "\xC4" -#define LEADING_THREE_BYTE "\xE0" +#define CONTINUATION_BYTE "\xA1" +#define LEADING_TWO_BYTE "\xC2" +#define LEADING_THREE_BYTE "\xE1" #define LEADING_FOUR_BYTE "\xF0" #define INVALID_BYTE "\xFC" -static bool valid_utf8(const char* p, size_t l) { - return SkUTF8_CountUnichars(p, l) >= 0; -} -DEF_TEST(Utils_UTF8_ValidLength, r) { - const char* goodTestcases[] = { - "", - ASCII_BYTE, - ASCII_BYTE ASCII_BYTE, - LEADING_TWO_BYTE CONTINUATION_BYTE, - ASCII_BYTE LEADING_TWO_BYTE CONTINUATION_BYTE, - ASCII_BYTE ASCII_BYTE LEADING_TWO_BYTE CONTINUATION_BYTE, - LEADING_THREE_BYTE CONTINUATION_BYTE CONTINUATION_BYTE, - ASCII_BYTE LEADING_THREE_BYTE CONTINUATION_BYTE CONTINUATION_BYTE, - ASCII_BYTE ASCII_BYTE LEADING_THREE_BYTE CONTINUATION_BYTE CONTINUATION_BYTE, - LEADING_FOUR_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE, - ASCII_BYTE LEADING_FOUR_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE, - ASCII_BYTE ASCII_BYTE LEADING_FOUR_BYTE CONTINUATION_BYTE CONTINUATION_BYTE - CONTINUATION_BYTE, +DEF_TEST(SkUTF_CountUTF8, r) { + struct { + int expectedCount; + const char* utf8String; + } testCases[] = { + { 0, "" }, + { 1, ASCII_BYTE }, + { 2, ASCII_BYTE ASCII_BYTE }, + { 1, LEADING_TWO_BYTE CONTINUATION_BYTE }, + { 2, ASCII_BYTE LEADING_TWO_BYTE CONTINUATION_BYTE }, + { 3, ASCII_BYTE ASCII_BYTE LEADING_TWO_BYTE CONTINUATION_BYTE }, + { 1, LEADING_THREE_BYTE CONTINUATION_BYTE CONTINUATION_BYTE }, + { 2, ASCII_BYTE LEADING_THREE_BYTE CONTINUATION_BYTE CONTINUATION_BYTE }, + { 3, ASCII_BYTE ASCII_BYTE LEADING_THREE_BYTE CONTINUATION_BYTE CONTINUATION_BYTE }, + { 1, LEADING_FOUR_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE }, + { 2, ASCII_BYTE LEADING_FOUR_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE }, + { 3, ASCII_BYTE ASCII_BYTE LEADING_FOUR_BYTE CONTINUATION_BYTE CONTINUATION_BYTE + CONTINUATION_BYTE }, + { -1, INVALID_BYTE }, + { -1, INVALID_BYTE CONTINUATION_BYTE }, + { -1, INVALID_BYTE CONTINUATION_BYTE CONTINUATION_BYTE }, + { -1, INVALID_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE }, + { -1, LEADING_TWO_BYTE }, + { -1, CONTINUATION_BYTE }, + { -1, CONTINUATION_BYTE CONTINUATION_BYTE }, + { -1, LEADING_THREE_BYTE CONTINUATION_BYTE }, + { -1, CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE }, + { -1, LEADING_FOUR_BYTE CONTINUATION_BYTE }, + { -1, CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE }, }; - for (const char* testcase : goodTestcases) { - REPORTER_ASSERT(r, valid_utf8(testcase, strlen(testcase))); + for (auto testCase : testCases) { + const char* str = testCase.utf8String; + REPORTER_ASSERT(r, testCase.expectedCount == SkUTF::CountUTF8(str, strlen(str))); } - const char* badTestcases[] = { - INVALID_BYTE, - INVALID_BYTE CONTINUATION_BYTE, - INVALID_BYTE CONTINUATION_BYTE CONTINUATION_BYTE, - INVALID_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE, - LEADING_TWO_BYTE, - CONTINUATION_BYTE, - CONTINUATION_BYTE CONTINUATION_BYTE, - LEADING_THREE_BYTE CONTINUATION_BYTE, - CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE, - LEADING_FOUR_BYTE CONTINUATION_BYTE, - CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE, - - ASCII_BYTE INVALID_BYTE, - ASCII_BYTE INVALID_BYTE CONTINUATION_BYTE, - ASCII_BYTE INVALID_BYTE CONTINUATION_BYTE CONTINUATION_BYTE, - ASCII_BYTE INVALID_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE, - ASCII_BYTE LEADING_TWO_BYTE, - ASCII_BYTE CONTINUATION_BYTE, - ASCII_BYTE CONTINUATION_BYTE CONTINUATION_BYTE, - ASCII_BYTE LEADING_THREE_BYTE CONTINUATION_BYTE, - ASCII_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE, - ASCII_BYTE LEADING_FOUR_BYTE CONTINUATION_BYTE, - ASCII_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE, - - // LEADING_FOUR_BYTE LEADING_TWO_BYTE CONTINUATION_BYTE, +} + +DEF_TEST(SkUTF_NextUTF8_ToUTF8, r) { + struct { + SkUnichar expected; + const char* utf8String; + } testCases[] = { + { -1, INVALID_BYTE }, + { -1, "" }, + { 0x0058, ASCII_BYTE }, + { 0x00A1, LEADING_TWO_BYTE CONTINUATION_BYTE }, + { 0x1861, LEADING_THREE_BYTE CONTINUATION_BYTE CONTINUATION_BYTE }, + { 0x010330, LEADING_FOUR_BYTE "\x90\x8C\xB0" }, }; - for (const char* testcase : badTestcases) { - REPORTER_ASSERT(r, !valid_utf8(testcase, strlen(testcase))); + for (auto testCase : testCases) { + const char* str = testCase.utf8String; + SkUnichar uni = SkUTF::NextUTF8(&str, str + strlen(str)); + REPORTER_ASSERT(r, str == testCase.utf8String + strlen(testCase.utf8String)); + REPORTER_ASSERT(r, uni == testCase.expected); + char buff[5] = {0, 0, 0, 0, 0}; + size_t len = SkUTF::ToUTF8(uni, buff); + if (buff[len] != 0) { + ERRORF(r, "unexpected write"); + continue; + } + if (uni == -1) { + REPORTER_ASSERT(r, len == 0); + continue; + } + if (len == 0) { + ERRORF(r, "unexpected failure."); + continue; + } + if (len > 4) { + ERRORF(r, "wrote too much"); + continue; + } + str = testCase.utf8String; + REPORTER_ASSERT(r, len == strlen(buff)); + REPORTER_ASSERT(r, len == strlen(str)); + REPORTER_ASSERT(r, 0 == strcmp(str, buff)); } - } +#undef ASCII_BYTE +#undef CONTINUATION_BYTE +#undef LEADING_TWO_BYTE +#undef LEADING_THREE_BYTE +#undef LEADING_FOUR_BYTE +#undef INVALID_BYTE diff --git a/tools/fonts/create_test_font.cpp b/tools/fonts/create_test_font.cpp index 36ead3cb65..de19c099c9 100644 --- a/tools/fonts/create_test_font.cpp +++ b/tools/fonts/create_test_font.cpp @@ -140,13 +140,12 @@ static int output_points(const SkPoint* pts, int emSize, int count, SkString* pt static void output_path_data(const SkPaint& paint, int emSize, SkString* ptsOut, SkTDArray<SkPath::Verb>* verbs, SkTDArray<unsigned>* charCodes, SkTDArray<SkScalar>* widths) { - for (int ch = 0x00; ch < 0x7f; ++ch) { - char str[1]; - str[0] = ch; - const char* used = str; - SkUnichar index = SkUTF8_NextUnichar(&used, str + 1); + for (SkUnichar index = 0x00; index < 0x7f; ++index) { + uint16_t utf16[2]; + size_t utf16Bytes = sizeof(uint16_t) * SkUTF::ToUTF16(index, utf16); SkPath path; - paint.getTextPath((const void*) &index, 2, 0, 0, &path); + SkASSERT(paint.getTextEncoding() == SkPaint::kUTF16_TextEncoding); + paint.getTextPath(utf16, utf16Bytes, 0, 0, &path); SkPath::RawIter iter(path); SkPath::Verb verb; SkPoint pts[4]; @@ -175,12 +174,12 @@ static void output_path_data(const SkPaint& paint, *verbs->append() = SkPath::kDone_Verb; *charCodes->append() = index; SkScalar width; - SkDEBUGCODE(int charCount =) paint.getTextWidths((const void*) &index, 2, &width); + SkDEBUGCODE(int charCount =) paint.getTextWidths(utf16, utf16Bytes, &width); SkASSERT(charCount == 1); // SkASSERT(floor(width) == width); // not true for Hiragino Maru Gothic Pro *widths->append() = width; - if (!ch) { - ch = 0x1f; // skip the rest of the control codes + if (0 == index) { + index = 0x1f; // skip the rest of the control codes } } } @@ -432,7 +431,11 @@ static void generate_index(const char* defaultName) { } int main(int , char * const []) { +#ifdef SK_BUILD_FOR_UNIX + generate_fonts("/usr/share/fonts/truetype/liberation/"); +#else generate_fonts("/Library/Fonts/"); // or /usr/share/fonts/truetype/ttf-liberation/ +#endif generate_index(DEFAULT_FONT_NAME); return 0; } |