aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Hal Canary <halcanary@google.com>2018-07-25 16:52:48 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2018-07-31 20:11:19 +0000
commitf107a2fd014cd39c489060f2cd1b99cd49c7d0be (patch)
tree5c324821344901869203bbe055be8d3f69f696cb
parent1935aa3d27cd4ed4aef2dc04360f247a541d4b00 (diff)
SkUTF
Create new header and namespace, `SkUTF` where we are putting all of our robust, well documented UTF-8, UTF-16, and UTF-32 functions: `SkUTF::{Count,Next,To}UTF{8,16,32}()`. SkUTF.h and SkUTF.cpp do not depend on the rest of Skia and are suitable for re-use in other modules. Some of the old UTF-{8,16} functions still live in SkUtils.h; their use will be phased out in future CLs. Also added more unit testing and cleaned up old tests. Removed functions that were unused outside of tests or used only once. Change-Id: Iaa59b8705abccf9c4ba082f855da368a0bad8380 Reviewed-on: https://skia-review.googlesource.com/143306 Reviewed-by: Ben Wagner <bungeman@google.com> Commit-Queue: Hal Canary <halcanary@google.com>
-rw-r--r--fuzz/FuzzCanvas.cpp8
-rw-r--r--gm/atlastext.cpp2
-rw-r--r--gm/coloremoji_blendmodes.cpp3
-rw-r--r--gn/utils.gni2
-rw-r--r--modules/skshaper/src/SkShaper_harfbuzz.cpp27
-rw-r--r--samplecode/SampleRectanizer.cpp4
-rw-r--r--samplecode/SampleUnpremul.cpp4
-rw-r--r--samplecode/SampleXfermodesBlur.cpp2
-rw-r--r--src/core/SkFindAndPlaceGlyph.h24
-rw-r--r--src/core/SkFont.cpp4
-rw-r--r--src/core/SkOverdrawCanvas.cpp6
-rw-r--r--src/core/SkPaint.cpp104
-rw-r--r--src/core/SkPaintPriv.cpp6
-rw-r--r--src/core/SkString.cpp4
-rw-r--r--src/core/SkStringUtils.cpp6
-rw-r--r--src/core/SkUtils.cpp428
-rw-r--r--src/core/SkUtils.h98
-rw-r--r--src/pdf/SkPDFDevice.cpp4
-rw-r--r--src/pdf/SkPDFMetadata.cpp8
-rw-r--r--src/pdf/SkPDFUtils.h2
-rw-r--r--src/ports/SkFontHost_mac.cpp6
-rw-r--r--src/ports/SkFontHost_win.cpp6
-rw-r--r--src/ports/SkFontMgr_win_dw.cpp2
-rw-r--r--src/ports/SkOSFile_stdio.cpp6
-rw-r--r--src/ports/SkRemotableFontMgr_win_dw.cpp2
-rw-r--r--src/svg/SkSVGDevice.cpp62
-rw-r--r--src/utils/SkUTF.cpp253
-rw-r--r--src/utils/SkUTF.h68
-rw-r--r--src/xps/SkXPSDevice.cpp14
-rw-r--r--tests/PaintTest.cpp4
-rw-r--r--tests/UnicodeTest.cpp36
-rw-r--r--tests/UtilsTest.cpp152
-rw-r--r--tools/fonts/create_test_font.cpp21
33 files changed, 603 insertions, 775 deletions
diff --git a/fuzz/FuzzCanvas.cpp b/fuzz/FuzzCanvas.cpp
index aac68481d4..80d3e816a1 100644
--- a/fuzz/FuzzCanvas.cpp
+++ b/fuzz/FuzzCanvas.cpp
@@ -1020,21 +1020,21 @@ static SkTDArray<uint8_t> make_fuzz_text(Fuzz* fuzz, const SkPaint& paint) {
case SkPaint::kUTF8_TextEncoding: {
size_t utf8len = 0;
for (int j = 0; j < length; ++j) {
- utf8len += SkUTF8_FromUnichar(buffer[j], nullptr);
+ utf8len += SkUTF::ToUTF8(buffer[j], nullptr);
}
char* ptr = (char*)array.append(utf8len);
for (int j = 0; j < length; ++j) {
- ptr += SkUTF8_FromUnichar(buffer[j], ptr);
+ ptr += SkUTF::ToUTF8(buffer[j], ptr);
}
} break;
case SkPaint::kUTF16_TextEncoding: {
size_t utf16len = 0;
for (int j = 0; j < length; ++j) {
- utf16len += SkUTF16_FromUnichar(buffer[j]);
+ utf16len += SkUTF::ToUTF16(buffer[j]);
}
uint16_t* ptr = (uint16_t*)array.append(utf16len * sizeof(uint16_t));
for (int j = 0; j < length; ++j) {
- ptr += SkUTF16_FromUnichar(buffer[j], ptr);
+ ptr += SkUTF::ToUTF16(buffer[j], ptr);
}
} break;
case SkPaint::kUTF32_TextEncoding:
diff --git a/gm/atlastext.cpp b/gm/atlastext.cpp
index 859b4c356f..e3fed2ee06 100644
--- a/gm/atlastext.cpp
+++ b/gm/atlastext.cpp
@@ -30,7 +30,7 @@ static SkScalar draw_string(SkAtlasTextTarget* target, const SkString& text, SkS
return x;
}
auto font = SkAtlasTextFont::Make(typeface, size);
- int cnt = SkUTF8_CountUnichars(text.c_str(), text.size());
+ int cnt = SkUTF::CountUTF8(text.c_str(), text.size());
std::unique_ptr<SkGlyphID[]> glyphs(new SkGlyphID[cnt]);
typeface->charsToGlyphs(text.c_str(), SkTypeface::Encoding::kUTF8_Encoding, glyphs.get(), cnt);
diff --git a/gm/coloremoji_blendmodes.cpp b/gm/coloremoji_blendmodes.cpp
index 872527404c..46ba67ec90 100644
--- a/gm/coloremoji_blendmodes.cpp
+++ b/gm/coloremoji_blendmodes.cpp
@@ -148,7 +148,8 @@ protected:
textP.setBlendMode(gModes[i]);
textP.setTextEncoding(SkPaint::kUTF32_TextEncoding);
const char* text = sk_tool_utils::emoji_sample_text();
- SkUnichar unichar = SkUTF8_NextUnichar(&text, text + strlen(text));
+ SkUnichar unichar = SkUTF::NextUTF8(&text, text + strlen(text));
+ SkASSERT(unichar >= 0);
canvas->drawText(&unichar, 4, x+ w/10.f, y + 7.f*h/8.f, textP);
}
#if 1
diff --git a/gn/utils.gni b/gn/utils.gni
index 7d014b0a0c..68667beed4 100644
--- a/gn/utils.gni
+++ b/gn/utils.gni
@@ -62,6 +62,8 @@ skia_utils_sources = [
"$_src/utils/SkShadowUtils.cpp",
"$_src/utils/SkThreadUtils_pthread.cpp",
"$_src/utils/SkThreadUtils_win.cpp",
+ "$_src/utils/SkUTF.cpp",
+ "$_src/utils/SkUTF.h",
"$_src/utils/SkWhitelistTypefaces.cpp",
#mac
diff --git a/modules/skshaper/src/SkShaper_harfbuzz.cpp b/modules/skshaper/src/SkShaper_harfbuzz.cpp
index 2077dedadc..5c49de04b4 100644
--- a/modules/skshaper/src/SkShaper_harfbuzz.cpp
+++ b/modules/skshaper/src/SkShaper_harfbuzz.cpp
@@ -83,6 +83,15 @@ HBFont create_hb_font(SkTypeface* tf) {
return font;
}
+/** this version replaces invalid utf-8 sequences with code point U+FFFD. */
+static inline SkUnichar utf8_next(const char** ptr, const char* end) {
+ SkUnichar val = SkUTF::NextUTF8(ptr, end);
+ if (val < 0) {
+ return 0xFFFD; // REPLACEMENT CHARACTER
+ }
+ return val;
+}
+
class RunIterator {
public:
virtual ~RunIterator() {}
@@ -138,16 +147,16 @@ public:
SkASSERT(fUTF16LogicalPosition < ubidi_getLength(fBidi.get()));
int32_t endPosition = ubidi_getLength(fBidi.get());
fLevel = ubidi_getLevelAt(fBidi.get(), fUTF16LogicalPosition);
- SkUnichar u = SkUTF8_NextUnichar(&fEndOfCurrentRun, fEndOfAllRuns);
- fUTF16LogicalPosition += SkUTF16_FromUnichar(u);
+ SkUnichar u = utf8_next(&fEndOfCurrentRun, fEndOfAllRuns);
+ fUTF16LogicalPosition += SkUTF::ToUTF16(u);
UBiDiLevel level;
while (fUTF16LogicalPosition < endPosition) {
level = ubidi_getLevelAt(fBidi.get(), fUTF16LogicalPosition);
if (level != fLevel) {
break;
}
- u = SkUTF8_NextUnichar(&fEndOfCurrentRun, fEndOfAllRuns);
- fUTF16LogicalPosition += SkUTF16_FromUnichar(u);
+ u = utf8_next(&fEndOfCurrentRun, fEndOfAllRuns);
+ fUTF16LogicalPosition += SkUTF::ToUTF16(u);
}
}
const char* endOfCurrentRun() const override {
@@ -184,11 +193,11 @@ public:
{}
void consume() override {
SkASSERT(fCurrent < fEnd);
- SkUnichar u = SkUTF8_NextUnichar(&fCurrent, fEnd);
+ SkUnichar u = utf8_next(&fCurrent, fEnd);
fCurrentScript = hb_unicode_script(fHBUnicode, u);
while (fCurrent < fEnd) {
const char* prev = fCurrent;
- u = SkUTF8_NextUnichar(&fCurrent, fEnd);
+ u = utf8_next(&fCurrent, fEnd);
const hb_script_t script = hb_unicode_script(fHBUnicode, u);
if (script != fCurrentScript) {
if (fCurrentScript == HB_SCRIPT_INHERITED || fCurrentScript == HB_SCRIPT_COMMON) {
@@ -243,7 +252,7 @@ public:
{}
void consume() override {
SkASSERT(fCurrent < fEnd);
- SkUnichar u = SkUTF8_NextUnichar(&fCurrent, fEnd);
+ SkUnichar u = utf8_next(&fCurrent, fEnd);
// If the starting typeface can handle this character, use it.
if (fTypeface->charsToGlyphs(&u, SkTypeface::kUTF32_Encoding, nullptr, 1)) {
fFallbackTypeface.reset();
@@ -265,7 +274,7 @@ public:
while (fCurrent < fEnd) {
const char* prev = fCurrent;
- u = SkUTF8_NextUnichar(&fCurrent, fEnd);
+ u = utf8_next(&fCurrent, fEnd);
// If using a fallback and the initial typeface has this character, stop fallback.
if (fFallbackTypeface &&
@@ -554,7 +563,7 @@ SkPoint SkShaper::shape(SkTextBlobBuilder* builder,
const char* utf8Current = utf8Start;
while (utf8Current < utf8End) {
unsigned int cluster = utf8Current - utf8Start;
- hb_codepoint_t u = SkUTF8_NextUnichar(&utf8Current, utf8End);
+ hb_codepoint_t u = utf8_next(&utf8Current, utf8End);
hb_buffer_add(buffer, u, cluster);
}
diff --git a/samplecode/SampleRectanizer.cpp b/samplecode/SampleRectanizer.cpp
index 752d5b7136..d402416ca5 100644
--- a/samplecode/SampleRectanizer.cpp
+++ b/samplecode/SampleRectanizer.cpp
@@ -59,8 +59,8 @@ protected:
}
SkUnichar uni;
if (SampleCode::CharQ(*evt, &uni)) {
- char utf8[kMaxBytesInUTF8Sequence];
- size_t size = SkUTF8_FromUnichar(uni, utf8);
+ char utf8[SkUTF::kMaxBytesInUTF8Sequence];
+ size_t size = SkUTF::ToUTF8(uni, utf8);
// Only consider events for single char keys
if (1 == size) {
switch (utf8[0]) {
diff --git a/samplecode/SampleUnpremul.cpp b/samplecode/SampleUnpremul.cpp
index 108d70914f..4f41418158 100644
--- a/samplecode/SampleUnpremul.cpp
+++ b/samplecode/SampleUnpremul.cpp
@@ -53,8 +53,8 @@ protected:
}
SkUnichar uni;
if (SampleCode::CharQ(*evt, &uni)) {
- char utf8[kMaxBytesInUTF8Sequence];
- size_t size = SkUTF8_FromUnichar(uni, utf8);
+ char utf8[SkUTF::kMaxBytesInUTF8Sequence];
+ size_t size = SkUTF::ToUTF8(uni, utf8);
// Only consider events for single char keys
if (1 == size) {
switch (utf8[0]) {
diff --git a/samplecode/SampleXfermodesBlur.cpp b/samplecode/SampleXfermodesBlur.cpp
index 37b859288b..0ef144ef18 100644
--- a/samplecode/SampleXfermodesBlur.cpp
+++ b/samplecode/SampleXfermodesBlur.cpp
@@ -90,7 +90,7 @@ protected:
paint.setTextSize(50);
paint.setTypeface(SkTypeface::MakeFromName("Arial Unicode MS", SkFontStyle()));
char buffer[10];
- size_t len = SkUTF8_FromUnichar(0x8500, buffer);
+ size_t len = SkUTF::ToUTF8(0x8500, buffer);
canvas->drawText(buffer, len, 40, 40, paint);
return;
}
diff --git a/src/core/SkFindAndPlaceGlyph.h b/src/core/SkFindAndPlaceGlyph.h
index f602e8d931..74fa516910 100644
--- a/src/core/SkFindAndPlaceGlyph.h
+++ b/src/core/SkFindAndPlaceGlyph.h
@@ -147,7 +147,7 @@ private:
private:
SkUnichar nextUnichar(const char** text, const char* stop) override {
- return SkUTF8_NextUnichar(text, stop);
+ return SkUTF::NextUTF8(text, stop);
}
};
@@ -157,7 +157,7 @@ private:
private:
SkUnichar nextUnichar(const char** text, const char* stop) override {
- return SkUTF16_NextUnichar((const uint16_t**)text, (const uint16_t*)stop);
+ return SkUTF::NextUTF16((const uint16_t**)text, (const uint16_t*)stop);
}
};
@@ -167,10 +167,7 @@ private:
private:
SkUnichar nextUnichar(const char** text, const char* stop) override {
- const int32_t* ptr = *(const int32_t**)text;
- SkUnichar uni = *ptr++;
- *text = (const char*)ptr;
- return uni;
+ return SkUTF::NextUTF32((const int32_t**)text, (const int32_t*)stop);
}
};
@@ -181,19 +178,24 @@ private:
SkASSERT(cache != nullptr);
}
- const SkGlyph& lookupGlyph(const char** text, const char*) override {
- return fCache->getGlyphIDMetrics(nextGlyphId(text));
+ const SkGlyph& lookupGlyph(const char** text, const char* stop) override {
+ return fCache->getGlyphIDMetrics(nextGlyphId(text, stop));
}
- const SkGlyph& lookupGlyphXY(const char** text, const char*,
+ const SkGlyph& lookupGlyphXY(const char** text, const char* stop,
SkFixed x, SkFixed y) override {
- return fCache->getGlyphIDMetrics(nextGlyphId(text), x, y);
+ return fCache->getGlyphIDMetrics(nextGlyphId(text, stop), x, y);
}
private:
- uint16_t nextGlyphId(const char** text) {
+ uint16_t nextGlyphId(const char** text, const char* stop) {
SkASSERT(text != nullptr);
const uint16_t* ptr = *(const uint16_t**)text;
+ SkASSERT(ptr);
+ if (ptr + 1 > (const uint16_t*)stop) {
+ *text = stop;
+ return 0;
+ }
uint16_t glyphID = *ptr;
ptr += 1;
*text = (const char*)ptr;
diff --git a/src/core/SkFont.cpp b/src/core/SkFont.cpp
index bde8c4c80b..081469f900 100644
--- a/src/core/SkFont.cpp
+++ b/src/core/SkFont.cpp
@@ -68,10 +68,10 @@ int SkFont::textToGlyphs(const void* text, size_t byteLength, SkTextEncoding enc
switch (encoding) {
case kUTF8_SkTextEncoding:
- count = SkUTF8_CountUnichars((const char*)text, byteLength);
+ count = SkUTF::CountUTF8((const char*)text, byteLength);
break;
case kUTF16_SkTextEncoding:
- count = SkUTF16_CountUnichars((const uint16_t*)text, byteLength);
+ count = SkUTF::CountUTF16((const uint16_t*)text, byteLength);
break;
case kUTF32_SkTextEncoding:
count = SkToInt(byteLength >> 2);
diff --git a/src/core/SkOverdrawCanvas.cpp b/src/core/SkOverdrawCanvas.cpp
index 249723e494..0e20878793 100644
--- a/src/core/SkOverdrawCanvas.cpp
+++ b/src/core/SkOverdrawCanvas.cpp
@@ -98,13 +98,15 @@ void SkOverdrawCanvas::onDrawTextOnPath(const void* text, size_t byteLength, con
typedef int (*CountTextProc)(const char* text, const char* stop);
static int count_utf16(const char* text, const char* stop) {
const uint16_t* prev = (const uint16_t*)text;
- (void)SkUTF16_NextUnichar(&prev, (const uint16_t*)stop);
+ (void)SkUTF::NextUTF16(&prev, (const uint16_t*)stop);
return SkToInt((const char*)prev - text);
}
static int return_4(const char* text, const char* stop) { return 4; }
static int return_2(const char* text, const char* stop) { return 2; }
static int count_utf8(const char* text, const char* stop) {
- return SkUTF8_LeadByteToCount(*(const uint8_t*)text);
+ const char* ptr = text;
+ (void)SkUTF::NextUTF8(&ptr, stop);
+ return SkToInt(ptr - text);
}
void SkOverdrawCanvas::onDrawTextRSXform(const void* text, size_t byteLength,
diff --git a/src/core/SkPaint.cpp b/src/core/SkPaint.cpp
index 9b8431e591..15aa44eebc 100644
--- a/src/core/SkPaint.cpp
+++ b/src/core/SkPaint.cpp
@@ -399,9 +399,9 @@ int SkPaint::countText(const void* text, size_t byteLength) const {
SkASSERT(text != nullptr);
switch (this->getTextEncoding()) {
case kUTF8_TextEncoding:
- return SkUTF8_CountUnichars(text, byteLength);
+ return SkUTF::CountUTF8((const char*)text, byteLength);
case kUTF16_TextEncoding:
- return SkUTF16_CountUnichars(text, byteLength);
+ return SkUTF::CountUTF16((const uint16_t*)text, byteLength);
case kUTF32_TextEncoding:
return SkToInt(byteLength >> 2);
case kGlyphID_TextEncoding:
@@ -413,6 +413,13 @@ int SkPaint::countText(const void* text, size_t byteLength) const {
return 0;
}
+static SkTypeface::Encoding to_encoding(SkPaint::TextEncoding e) {
+ static_assert((int)SkTypeface::kUTF8_Encoding == (int)SkPaint::kUTF8_TextEncoding, "");
+ static_assert((int)SkTypeface::kUTF16_Encoding == (int)SkPaint::kUTF16_TextEncoding, "");
+ static_assert((int)SkTypeface::kUTF32_Encoding == (int)SkPaint::kUTF32_TextEncoding, "");
+ return (SkTypeface::Encoding)e;
+}
+
int SkPaint::textToGlyphs(const void* textData, size_t byteLength, uint16_t glyphs[]) const {
SkASSERT(textData != nullptr);
@@ -431,38 +438,16 @@ int SkPaint::textToGlyphs(const void* textData, size_t byteLength, uint16_t glyp
auto cache = SkStrikeCache::FindOrCreateStrikeExclusive(*this);
- const char* text = (const char*)textData;
- const char* stop = text + byteLength;
+ const void* stop = (const char*)textData + byteLength;
uint16_t* gptr = glyphs;
+ const SkTypeface::Encoding encoding = to_encoding(this->getTextEncoding());
- switch (this->getTextEncoding()) {
- case SkPaint::kUTF8_TextEncoding:
- while (text < stop) {
- SkUnichar u = SkUTF8_NextUnicharWithError(&text, stop);
- if (u < 0) {
- return 0; // bad UTF-8 sequence
- }
- *gptr++ = cache->unicharToGlyph(u);
- }
- break;
- case SkPaint::kUTF16_TextEncoding: {
- const uint16_t* text16 = (const uint16_t*)text;
- const uint16_t* stop16 = (const uint16_t*)stop;
- while (text16 < stop16) {
- *gptr++ = cache->unicharToGlyph(SkUTF16_NextUnichar(&text16, stop16));
- }
- break;
- }
- case kUTF32_TextEncoding: {
- const int32_t* text32 = (const int32_t*)text;
- const int32_t* stop32 = (const int32_t*)stop;
- while (text32 < stop32) {
- *gptr++ = cache->unicharToGlyph(*text32++);
- }
- break;
+ while (textData < stop) {
+ SkUnichar unichar = SkUTFN_Next(encoding, &textData, stop);
+ if (unichar < 0) {
+ return 0; // bad UTF-N sequence
}
- default:
- SkDEBUGFAIL("unknown text encoding");
+ *gptr++ = cache->unicharToGlyph(unichar);
}
return SkToInt(gptr - glyphs);
}
@@ -487,41 +472,12 @@ bool SkPaint::containsText(const void* textData, size_t byteLength) const {
}
auto cache = SkStrikeCache::FindOrCreateStrikeExclusive(*this);
-
- switch (this->getTextEncoding()) {
- case SkPaint::kUTF8_TextEncoding: {
- const char* text = static_cast<const char*>(textData);
- const char* stop = text + byteLength;
- while (text < stop) {
- if (0 == cache->unicharToGlyph(SkUTF8_NextUnichar(&text, stop))) {
- return false;
- }
- }
- break;
- }
- case SkPaint::kUTF16_TextEncoding: {
- const uint16_t* text = static_cast<const uint16_t*>(textData);
- const uint16_t* stop = text + (byteLength >> 1);
- while (text < stop) {
- if (0 == cache->unicharToGlyph(SkUTF16_NextUnichar(&text, stop))) {
- return false;
- }
- }
- break;
- }
- case SkPaint::kUTF32_TextEncoding: {
- const int32_t* text = static_cast<const int32_t*>(textData);
- const int32_t* stop = text + (byteLength >> 2);
- while (text < stop) {
- if (0 == cache->unicharToGlyph(*text++)) {
- return false;
- }
- }
- break;
- }
- default:
- SkDEBUGFAIL("unknown text encoding");
+ const void* stop = (const char*)textData + byteLength;
+ const SkTypeface::Encoding encoding = to_encoding(this->getTextEncoding());
+ while (textData < stop) {
+ if (0 == cache->unicharToGlyph(SkUTFN_Next(encoding, &textData, stop))) {
return false;
+ }
}
return true;
}
@@ -551,7 +507,7 @@ static const SkGlyph& sk_getMetrics_utf8_next(SkGlyphCache* cache,
SkASSERT(cache != nullptr);
SkASSERT(text != nullptr);
- return cache->getUnicharMetrics(SkUTF8_NextUnichar(text, stop));
+ return cache->getUnicharMetrics(SkUTF::NextUTF8(text, stop));
}
static const SkGlyph& sk_getMetrics_utf16_next(SkGlyphCache* cache,
@@ -561,7 +517,7 @@ static const SkGlyph& sk_getMetrics_utf16_next(SkGlyphCache* cache,
SkASSERT(text != nullptr);
return cache->getUnicharMetrics(
- SkUTF16_NextUnichar((const uint16_t**)text, (const uint16_t*)stop));
+ SkUTF::NextUTF16((const uint16_t**)text, (const uint16_t*)stop));
}
static const SkGlyph& sk_getMetrics_utf32_next(SkGlyphCache* cache,
@@ -570,10 +526,7 @@ static const SkGlyph& sk_getMetrics_utf32_next(SkGlyphCache* cache,
SkASSERT(cache != nullptr);
SkASSERT(text != nullptr);
- const int32_t* ptr = *(const int32_t**)text;
- SkUnichar uni = *ptr++;
- *text = (const char*)ptr;
- return cache->getUnicharMetrics(uni);
+ return cache->getUnicharMetrics(SkUTF::NextUTF32((const int32_t**)text, (const int32_t*)stop));
}
static const SkGlyph& sk_getMetrics_glyph_next(SkGlyphCache* cache,
@@ -595,7 +548,7 @@ static const SkGlyph& sk_getAdvance_utf8_next(SkGlyphCache* cache,
SkASSERT(cache != nullptr);
SkASSERT(text != nullptr);
- return cache->getUnicharAdvance(SkUTF8_NextUnichar(text, stop));
+ return cache->getUnicharAdvance(SkUTF::NextUTF8(text, stop));
}
static const SkGlyph& sk_getAdvance_utf16_next(SkGlyphCache* cache,
@@ -605,7 +558,7 @@ static const SkGlyph& sk_getAdvance_utf16_next(SkGlyphCache* cache,
SkASSERT(text != nullptr);
return cache->getUnicharAdvance(
- SkUTF16_NextUnichar((const uint16_t**)text, (const uint16_t*)stop));
+ SkUTF::NextUTF16((const uint16_t**)text, (const uint16_t*)stop));
}
static const SkGlyph& sk_getAdvance_utf32_next(SkGlyphCache* cache,
@@ -614,10 +567,7 @@ static const SkGlyph& sk_getAdvance_utf32_next(SkGlyphCache* cache,
SkASSERT(cache != nullptr);
SkASSERT(text != nullptr);
- const int32_t* ptr = *(const int32_t**)text;
- SkUnichar uni = *ptr++;
- *text = (const char*)ptr;
- return cache->getUnicharAdvance(uni);
+ return cache->getUnicharAdvance(SkUTF::NextUTF32((const int32_t**)text, (const int32_t*)stop));
}
static const SkGlyph& sk_getAdvance_glyph_next(SkGlyphCache* cache,
diff --git a/src/core/SkPaintPriv.cpp b/src/core/SkPaintPriv.cpp
index 3be1ebeafa..309d244951 100644
--- a/src/core/SkPaintPriv.cpp
+++ b/src/core/SkPaintPriv.cpp
@@ -92,9 +92,9 @@ bool SkPaintPriv::ShouldDither(const SkPaint& p, SkColorType dstCT) {
int SkPaintPriv::ValidCountText(const void* text, size_t length, SkPaint::TextEncoding encoding) {
switch (encoding) {
- case SkPaint::kUTF8_TextEncoding: return SkUTF8_CountUnichars(text, length);
- case SkPaint::kUTF16_TextEncoding: return SkUTF16_CountUnichars(text, length);
- case SkPaint::kUTF32_TextEncoding: return SkUTF32_CountUnichars(text, length);
+ case SkPaint::kUTF8_TextEncoding: return SkUTF::CountUTF8((const char*)text, length);
+ case SkPaint::kUTF16_TextEncoding: return SkUTF::CountUTF16((const uint16_t*)text, length);
+ case SkPaint::kUTF32_TextEncoding: return SkUTF::CountUTF32((const int32_t*)text, length);
case SkPaint::kGlyphID_TextEncoding:
if (!SkIsAlign2(intptr_t(text)) || !SkIsAlign2(length)) {
return -1;
diff --git a/src/core/SkString.cpp b/src/core/SkString.cpp
index 65dae1b753..6f7d80dfd7 100644
--- a/src/core/SkString.cpp
+++ b/src/core/SkString.cpp
@@ -446,8 +446,8 @@ void SkString::insert(size_t offset, const char text[], size_t len) {
}
void SkString::insertUnichar(size_t offset, SkUnichar uni) {
- char buffer[kMaxBytesInUTF8Sequence];
- size_t len = SkUTF8_FromUnichar(uni, buffer);
+ char buffer[SkUTF::kMaxBytesInUTF8Sequence];
+ size_t len = SkUTF::ToUTF8(uni, buffer);
if (len) {
this->insert(offset, buffer, len);
diff --git a/src/core/SkStringUtils.cpp b/src/core/SkStringUtils.cpp
index 500478585f..1743d0d26c 100644
--- a/src/core/SkStringUtils.cpp
+++ b/src/core/SkStringUtils.cpp
@@ -62,8 +62,8 @@ SkString SkStringFromUTF16(const uint16_t* src, size_t count) {
const uint16_t* end = src + count;
for (const uint16_t* ptr = src; ptr < end;) {
const uint16_t* last = ptr;
- SkUnichar u = SkUTF16_NextUnichar(&ptr, stop);
- size_t s = SkUTF8_FromUnichar(u);
+ SkUnichar u = SkUTF::NextUTF16(&ptr, stop);
+ size_t s = SkUTF::ToUTF8(u);
if (n > UINT32_MAX - s) {
end = last; // truncate input string
break;
@@ -73,7 +73,7 @@ SkString SkStringFromUTF16(const uint16_t* src, size_t count) {
ret = SkString(n);
char* out = ret.writable_str();
for (const uint16_t* ptr = src; ptr < end;) {
- out += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&ptr, stop), out);
+ out += SkUTF::ToUTF8(SkUTF::NextUTF16(&ptr, stop), out);
}
SkASSERT(out == ret.writable_str() + n);
}
diff --git a/src/core/SkUtils.cpp b/src/core/SkUtils.cpp
index f0c1f60a09..9754da662c 100644
--- a/src/core/SkUtils.cpp
+++ b/src/core/SkUtils.cpp
@@ -7,431 +7,27 @@
#include "SkUtils.h"
-#include "SkTo.h"
-
-/* 0xxxxxxx 1 total
- 10xxxxxx // never a leading byte
- 110xxxxx 2 total
- 1110xxxx 3 total
- 11110xxx 4 total
-
- 11 10 01 01 xx xx xx xx 0...
- 0xE5XX0000
- 0xE5 << 24
-*/
-
-static bool utf8_byte_is_valid(uint8_t c) {
- return c < 0xF5 && (c & 0xFE) != 0xC0;
-}
-static bool utf8_byte_is_continuation(uint8_t c) {
- return (c & 0xC0) == 0x80;
-}
-static bool utf8_byte_is_leading_byte(uint8_t c) {
- return utf8_byte_is_valid(c) && !utf8_byte_is_continuation(c);
-}
-
-#ifdef SK_DEBUG
- static void assert_utf8_leadingbyte(unsigned c) {
- SkASSERT(utf8_byte_is_leading_byte(SkToU8(c)));
- }
-
- int SkUTF8_LeadByteToCount(unsigned c) {
- assert_utf8_leadingbyte(c);
- return (((0xE5 << 24) >> (c >> 4 << 1)) & 3) + 1;
- }
-#else
- #define assert_utf8_leadingbyte(c)
-#endif
-
-/**
- * @returns -1 iff invalid UTF8 byte,
- * 0 iff UTF8 continuation byte,
- * 1 iff ASCII byte,
- * 2 iff leading byte of 2-byte sequence,
- * 3 iff leading byte of 3-byte sequence, and
- * 4 iff leading byte of 4-byte sequence.
- *
- * I.e.: if return value > 0, then gives length of sequence.
-*/
-static int utf8_byte_type(uint8_t c) {
- if (c < 0x80) {
- return 1;
- } else if (c < 0xC0) {
- return 0;
- } else if (c < 0xF5 && (c & 0xFE) != 0xC0) { // "octet values C0, C1, F5 to FF never appear"
- return (((0xE5 << 24) >> ((unsigned)c >> 4 << 1)) & 3) + 1;
- } else {
- return -1;
- }
-}
-static bool utf8_type_is_valid_leading_byte(int type) { return type > 0; }
-
-int SkUTF8_CountUnichars(const char utf8[]) {
- SkASSERT(utf8);
-
- int count = 0;
-
- for (;;) {
- int c = *(const uint8_t*)utf8;
- if (c == 0) {
- break;
- }
- utf8 += SkUTF8_LeadByteToCount(c);
- count += 1;
- }
- return count;
-}
-
-// SAFE: returns -1 if invalid UTF-8
-int SkUTF8_CountUnichars(const void* text, size_t byteLength) {
- SkASSERT(text);
- const char* utf8 = static_cast<const char*>(text);
- int count = 0;
- const char* stop = utf8 + byteLength;
-
- while (utf8 < stop) {
- int type = utf8_byte_type(*(const uint8_t*)utf8);
- SkASSERT(type >= -1 && type <= 4);
- if (!utf8_type_is_valid_leading_byte(type) || utf8 + type > stop) {
- // Sequence extends beyond end.
- return -1;
- }
- while(type-- > 1) {
- ++utf8;
- if (!utf8_byte_is_continuation(*(const uint8_t*)utf8)) {
- return -1;
- }
- }
- ++utf8;
- ++count;
- }
- return count;
-}
-
-SkUnichar SkUTF8_ToUnichar(const char utf8[]) {
- SkASSERT(utf8);
-
- const uint8_t* p = (const uint8_t*)utf8;
- int c = *p;
- int hic = c << 24;
-
- assert_utf8_leadingbyte(c);
-
- if (hic < 0) {
- uint32_t mask = (uint32_t)~0x3F;
- hic = SkLeftShift(hic, 1);
- do {
- c = (c << 6) | (*++p & 0x3F);
- mask <<= 5;
- } while ((hic = SkLeftShift(hic, 1)) < 0);
- c &= ~mask;
- }
- return c;
-}
-
-// SAFE: returns -1 on invalid UTF-8 sequence.
-SkUnichar SkUTF8_NextUnicharWithError(const char** ptr, const char* end) {
- SkASSERT(ptr && *ptr);
- SkASSERT(*ptr < end);
- const uint8_t* p = (const uint8_t*)*ptr;
- int c = *p;
- int hic = c << 24;
-
- if (!utf8_byte_is_leading_byte(c)) {
- return -1;
- }
- if (hic < 0) {
- uint32_t mask = (uint32_t)~0x3F;
- hic = SkLeftShift(hic, 1);
- do {
- ++p;
- if (p >= (const uint8_t*)end) {
- return -1;
- }
- // check before reading off end of array.
- uint8_t nextByte = *p;
- if (!utf8_byte_is_continuation(nextByte)) {
- return -1;
- }
- c = (c << 6) | (nextByte & 0x3F);
- mask <<= 5;
- } while ((hic = SkLeftShift(hic, 1)) < 0);
- c &= ~mask;
- }
- *ptr = (char*)p + 1;
- return c;
-}
-
-SkUnichar SkUTF8_NextUnichar(const char** ptr) {
- SkASSERT(ptr && *ptr);
-
- const uint8_t* p = (const uint8_t*)*ptr;
- int c = *p;
- int hic = c << 24;
-
- assert_utf8_leadingbyte(c);
-
- if (hic < 0) {
- uint32_t mask = (uint32_t)~0x3F;
- hic = SkLeftShift(hic, 1);
- do {
- c = (c << 6) | (*++p & 0x3F);
- mask <<= 5;
- } while ((hic = SkLeftShift(hic, 1)) < 0);
- c &= ~mask;
- }
- *ptr = (char*)p + 1;
- return c;
-}
-
-SkUnichar SkUTF8_PrevUnichar(const char** ptr) {
- SkASSERT(ptr && *ptr);
-
- const char* p = *ptr;
-
- if (*--p & 0x80) {
- while (*--p & 0x40) {
- ;
- }
- }
-
- *ptr = (char*)p;
- return SkUTF8_NextUnichar(&p);
-}
-
-size_t SkUTF8_FromUnichar(SkUnichar uni, char utf8[]) {
- if ((uint32_t)uni > 0x10FFFF) {
- SkDEBUGFAIL("bad unichar");
- return 0;
- }
-
- if (uni <= 127) {
- if (utf8) {
- *utf8 = (char)uni;
- }
- return 1;
- }
-
- char tmp[4];
- char* p = tmp;
- size_t count = 1;
-
- SkDEBUGCODE(SkUnichar orig = uni;)
-
- while (uni > 0x7F >> count) {
- *p++ = (char)(0x80 | (uni & 0x3F));
- uni >>= 6;
- count += 1;
- }
-
- if (utf8) {
- p = tmp;
- utf8 += count;
- while (p < tmp + count - 1) {
- *--utf8 = *p++;
- }
- *--utf8 = (char)(~(0xFF >> count) | uni);
- }
-
- SkASSERT(utf8 == nullptr || orig == SkUTF8_ToUnichar(utf8));
- return count;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-
-int SkUTF16_CountUnichars(const uint16_t src[]) {
- SkASSERT(src);
-
- int count = 0;
- unsigned c;
- while ((c = *src++) != 0) {
- SkASSERT(!SkUTF16_IsLowSurrogate(c));
- if (SkUTF16_IsHighSurrogate(c)) {
- c = *src++;
- SkASSERT(SkUTF16_IsLowSurrogate(c));
- }
- count += 1;
- }
- return count;
-}
-
-// returns -1 on error
-int SkUTF16_CountUnichars(const void* text, size_t byteLength) {
- SkASSERT(text);
- if (!SkIsAlign2(intptr_t(text)) || !SkIsAlign2(byteLength)) {
- return -1;
- }
-
- const uint16_t* src = static_cast<const uint16_t*>(text);
- const uint16_t* stop = src + (byteLength >> 1);
- int count = 0;
- while (src < stop) {
- unsigned c = *src++;
- if (SkUTF16_IsLowSurrogate(c)) {
- return -1;
- }
- if (SkUTF16_IsHighSurrogate(c)) {
- if (src >= stop) {
- return -1;
- }
- c = *src++;
- if (!SkUTF16_IsLowSurrogate(c)) {
- return -1;
- }
- }
- count += 1;
- }
- return count;
-}
-
-SkUnichar SkUTF16_NextUnichar(const uint16_t** srcPtr, const uint16_t* endPtr) {
- if (!srcPtr || !endPtr) {
- return -1;
- }
- const uint16_t* src = *srcPtr;
- if (src >= endPtr) {
- return -1;
- }
- uint16_t c = *src++;
- SkUnichar result = c;
-
- if (SkUTF16_IsLowSurrogate(c)) {
- return -1; // srcPtr should never point at low surrogate.
- }
- if (SkUTF16_IsHighSurrogate(c)) {
- if (src == endPtr) {
- return -1; // Truncated string.
- }
- uint16_t low = *src++;
- if (!SkUTF16_IsLowSurrogate(low)) {
- return -1;
- }
- /*
- [paraphrased from wikipedia]
- Take the high surrogate and subtract 0xD800, then multiply by 0x400.
- Take the low surrogate and subtract 0xDC00. Add these two results
- together, and finally add 0x10000 to get the final decoded codepoint.
-
- unicode = (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000
- unicode = (high * 0x400) - (0xD800 * 0x400) + low - 0xDC00 + 0x10000
- unicode = (high << 10) - (0xD800 << 10) + low - 0xDC00 + 0x10000
- unicode = (high << 10) + low - ((0xD800 << 10) + 0xDC00 - 0x10000)
- */
- result = (result << 10) + (SkUnichar)low - ((0xD800 << 10) + 0xDC00 - 0x10000);
- }
- *srcPtr = src;
- return result;
-}
-
-SkUnichar SkUTF16_NextUnichar(const uint16_t** srcPtr) {
- SkUnichar c = SkUTF16_NextUnichar(srcPtr, *srcPtr + 2);
+template <typename T>
+static SkUnichar next(const T** srcPtr, unsigned N, SkUnichar (*fn)(const T**, const T*)) {
+ SkASSERT(srcPtr);
+ const T* ptr = *srcPtr;
+ SkUnichar c = fn(&ptr, ptr + N);
if (c == -1) {
SkASSERT(false);
++(*srcPtr);
- return 0xFFFD; // REPLACEMENT CHARACTER.
- }
- return c;
-}
-
-SkUnichar SkUTF16_PrevUnichar(const uint16_t** srcPtr) {
- SkASSERT(srcPtr && *srcPtr);
-
- const uint16_t* src = *srcPtr;
- SkUnichar c = *--src;
-
- SkASSERT(!SkUTF16_IsHighSurrogate(c));
- if (SkUTF16_IsLowSurrogate(c)) {
- unsigned c2 = *--src;
- SkASSERT(SkUTF16_IsHighSurrogate(c2));
- c = (c2 << 10) + c + (0x10000 - (0xD800 << 10) - 0xDC00);
+ return 0xFFFD; // REPLACEMENT CHARACTER
}
- *srcPtr = src;
+ *srcPtr = ptr;
return c;
}
-
-size_t SkUTF16_FromUnichar(SkUnichar uni, uint16_t dst[]) {
- SkASSERT((unsigned)uni <= 0x10FFFF);
-
- int extra = (uni > 0xFFFF);
-
- if (dst) {
- if (extra) {
- // dst[0] = SkToU16(0xD800 | ((uni - 0x10000) >> 10));
- // dst[0] = SkToU16(0xD800 | ((uni >> 10) - 64));
- dst[0] = SkToU16((0xD800 - 64) + (uni >> 10));
- dst[1] = SkToU16(0xDC00 | (uni & 0x3FF));
-
- SkASSERT(SkUTF16_IsHighSurrogate(dst[0]));
- SkASSERT(SkUTF16_IsLowSurrogate(dst[1]));
- } else {
- dst[0] = SkToU16(uni);
- SkASSERT(!SkUTF16_IsHighSurrogate(dst[0]));
- SkASSERT(!SkUTF16_IsLowSurrogate(dst[0]));
- }
- }
- return 1 + extra;
-}
-
-size_t SkUTF16_ToUTF8(const uint16_t utf16[], int numberOf16BitValues,
- char utf8[]) {
- SkASSERT(numberOf16BitValues >= 0);
- if (numberOf16BitValues <= 0) {
- return 0;
- }
-
- SkASSERT(utf16 != nullptr);
-
- const uint16_t* stop = utf16 + numberOf16BitValues;
- size_t size = 0;
-
- if (utf8 == nullptr) { // just count
- while (utf16 < stop) {
- size += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), nullptr);
- }
- } else {
- char* start = utf8;
- while (utf16 < stop) {
- utf8 += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), utf8);
- }
- size = utf8 - start;
- }
- return size;
+SkUnichar SkUTF8_NextUnichar(const char** p) {
+ return next<char>(p, SkUTF::kMaxBytesInUTF8Sequence, SkUTF::NextUTF8);
}
-
-// returns -1 on error
-int SkUTF32_CountUnichars(const void* text, size_t byteLength) {
- if (!SkIsAlign4(intptr_t(text)) || !SkIsAlign4(byteLength)) {
- return -1;
- }
- const uint32_t kInvalidUnicharMask = 0xFF000000; // unichar fits in 24 bits
- const uint32_t* ptr = static_cast<const uint32_t*>(text);
- const uint32_t* stop = ptr + (byteLength >> 2);
- while (ptr < stop) {
- if (*ptr & kInvalidUnicharMask) {
- return -1;
- }
- ptr += 1;
- }
- return SkToInt(byteLength >> 2);
+SkUnichar SkUTF16_NextUnichar(const uint16_t** p) {
+ return next<uint16_t>(p, 2, SkUTF::NextUTF16);
}
-// returns -1 on error
-int SkUTFN_CountUnichars(
- SkTypeface::Encoding encoding, const void* utfN, size_t byteLength) {
- SkASSERT(utfN != nullptr);
- switch (encoding) {
- case SkTypeface::kUTF8_Encoding:
- return SkUTF8_CountUnichars(utfN, byteLength);
- case SkTypeface::kUTF16_Encoding:
- return SkUTF16_CountUnichars(utfN, byteLength);
- case SkTypeface::kUTF32_Encoding:
- return SkUTF32_CountUnichars(utfN, byteLength);
- default:
- SkDEBUGFAIL("unknown text encoding");
- }
-
- return -1;
-}
+///////////////////////////////////////////////////////////////////////////////
const char SkHexadecimalDigits::gUpper[16] =
{ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
diff --git a/src/core/SkUtils.h b/src/core/SkUtils.h
index 795b47a0c4..e18934d6b8 100644
--- a/src/core/SkUtils.h
+++ b/src/core/SkUtils.h
@@ -8,10 +8,9 @@
#ifndef SkUtils_DEFINED
#define SkUtils_DEFINED
-#include "SkTypes.h"
-#include "SkMath.h"
#include "SkOpts.h"
#include "SkTypeface.h"
+#include "../utils/SkUTF.h"
/** Similar to memset(), but it assigns a 16, 32, or 64-bit value into the buffer.
@param buffer The memory to have value copied into it
@@ -27,87 +26,46 @@ static inline void sk_memset32(uint32_t buffer[], uint32_t value, int count) {
static inline void sk_memset64(uint64_t buffer[], uint64_t value, int count) {
SkOpts::memset64(buffer, value, count);
}
-///////////////////////////////////////////////////////////////////////////////
-#define kMaxBytesInUTF8Sequence 4
+///////////////////////////////////////////////////////////////////////////////
-#ifdef SK_DEBUG
- int SkUTF8_LeadByteToCount(unsigned c);
-#else
- #define SkUTF8_LeadByteToCount(c) ((((0xE5 << 24) >> ((unsigned)c >> 4 << 1)) & 3) + 1)
-#endif
+// Unlike the functions in SkUTF.h, these two functions do not take an array
+// length parameter. When possible, use SkUTF::NextUTF{8,16} instead.
+SkUnichar SkUTF8_NextUnichar(const char**);
+SkUnichar SkUTF16_NextUnichar(const uint16_t**);
-inline int SkUTF8_CountUTF8Bytes(const char utf8[]) {
- SkASSERT(utf8);
- return SkUTF8_LeadByteToCount(*(const uint8_t*)utf8);
-}
+///////////////////////////////////////////////////////////////////////////////
-int SkUTF8_CountUnichars(const char utf8[]);
+static inline bool SkUTF16_IsHighSurrogate(uint16_t c) { return ((c) & 0xFC00) == 0xD800; }
-/** These functions are safe: invalid sequences will return -1; */
-int SkUTF8_CountUnichars(const void* utf8, size_t byteLength);
-int SkUTF16_CountUnichars(const void* utf16, size_t byteLength);
-int SkUTF32_CountUnichars(const void* utf32, size_t byteLength);
-int SkUTFN_CountUnichars(SkTypeface::Encoding encoding, const void* utfN, size_t byteLength);
+static inline bool SkUTF16_IsLowSurrogate (uint16_t c) { return ((c) & 0xFC00) == 0xDC00; }
-/** This function is safe: invalid UTF8 sequences will return -1
- * When -1 is returned, ptr is unchanged.
- * Precondition: *ptr < end;
- */
-SkUnichar SkUTF8_NextUnicharWithError(const char** ptr, const char* end);
+///////////////////////////////////////////////////////////////////////////////
-/** this version replaces invalid utf-8 sequences with code point U+FFFD. */
-inline SkUnichar SkUTF8_NextUnichar(const char** ptr, const char* end) {
- SkUnichar val = SkUTF8_NextUnicharWithError(ptr, end);
- if (val < 0) {
- *ptr = end;
- return 0xFFFD; // REPLACEMENT CHARACTER
+static inline int SkUTFN_CountUnichars(SkTypeface::Encoding enc, const void* utfN, size_t bytes) {
+ switch (enc) {
+ case SkTypeface::kUTF8_Encoding: return SkUTF::CountUTF8((const char*)utfN, bytes);
+ case SkTypeface::kUTF16_Encoding: return SkUTF::CountUTF16((const uint16_t*)utfN, bytes);
+ case SkTypeface::kUTF32_Encoding: return SkUTF::CountUTF32((const int32_t*)utfN, bytes);
+ default: SkDEBUGFAIL("unknown text encoding"); return -1;
}
- return val;
}
-SkUnichar SkUTF8_ToUnichar(const char utf8[]);
-SkUnichar SkUTF8_NextUnichar(const char**);
-SkUnichar SkUTF8_PrevUnichar(const char**);
-
-/** Return the number of bytes need to convert a unichar
- into a utf8 sequence. Will be 1..kMaxBytesInUTF8Sequence,
- or 0 if uni is illegal.
-*/
-size_t SkUTF8_FromUnichar(SkUnichar uni, char utf8[] = nullptr);
-
-///////////////////////////////////////////////////////////////////////////////
-
-#define SkUTF16_IsHighSurrogate(c) (((c) & 0xFC00) == 0xD800)
-#define SkUTF16_IsLowSurrogate(c) (((c) & 0xFC00) == 0xDC00)
-
-int SkUTF16_CountUnichars(const uint16_t utf16[]);
-// returns the current unichar and then moves past it (*p++)
-SkUnichar SkUTF16_NextUnichar(const uint16_t**);
-SkUnichar SkUTF16_NextUnichar(const uint16_t** srcPtr, const uint16_t* end);
-
-// this guy backs up to the previus unichar value, and returns it (*--p)
-SkUnichar SkUTF16_PrevUnichar(const uint16_t**);
-size_t SkUTF16_FromUnichar(SkUnichar uni, uint16_t utf16[] = nullptr);
-
-size_t SkUTF16_ToUTF8(const uint16_t utf16[], int numberOf16BitValues,
- char utf8[] = nullptr);
-
-inline bool SkUnichar_IsVariationSelector(SkUnichar uni) {
-/* The 'true' ranges are:
- * 0x180B <= uni <= 0x180D
- * 0xFE00 <= uni <= 0xFE0F
- * 0xE0100 <= uni <= 0xE01EF
- */
- if (uni < 0x180B || uni > 0xE01EF) {
- return false;
- }
- if ((uni > 0x180D && uni < 0xFE00) || (uni > 0xFE0F && uni < 0xE0100)) {
- return false;
+static inline SkUnichar SkUTFN_Next(SkTypeface::Encoding enc,
+ const void** ptr, const void* stop) {
+ switch (enc) {
+ case SkTypeface::kUTF8_Encoding:
+ return SkUTF::NextUTF8((const char**)ptr, (const char*)stop);
+ case SkTypeface::kUTF16_Encoding:
+ return SkUTF::NextUTF16((const uint16_t**)ptr, (const uint16_t*)stop);
+ case SkTypeface::kUTF32_Encoding:
+ return SkUTF::NextUTF32((const int32_t**)ptr, (const int32_t*)stop);
+ default: SkDEBUGFAIL("unknown text encoding"); return -1;
}
- return true;
}
+///////////////////////////////////////////////////////////////////////////////
+
namespace SkHexadecimalDigits {
extern const char gUpper[16]; // 0-9A-F
extern const char gLower[16]; // 0-9a-f
diff --git a/src/pdf/SkPDFDevice.cpp b/src/pdf/SkPDFDevice.cpp
index 9cd2892a05..fc7384f7c0 100644
--- a/src/pdf/SkPDFDevice.cpp
+++ b/src/pdf/SkPDFDevice.cpp
@@ -1274,7 +1274,7 @@ void SkPDFDevice::internalDrawGlyphRun(const SkGlyphRun& glyphRun, SkPoint offse
// Check if `/ActualText` needed.
const char* textPtr = c.fUtf8Text;
const char* textEnd = c.fUtf8Text + c.fTextByteLength;
- SkUnichar unichar = SkUTF8_NextUnicharWithError(&textPtr, textEnd);
+ SkUnichar unichar = SkUTF::NextUTF8(&textPtr, textEnd);
if (unichar < 0) {
return;
}
@@ -1288,7 +1288,7 @@ void SkPDFDevice::internalDrawGlyphRun(const SkGlyphRun& glyphRun, SkPoint offse
// the BOM marks this text as UTF-16BE, not PDFDocEncoding.
SkPDFUtils::WriteUTF16beHex(out, unichar); // first char
while (textPtr < textEnd) {
- unichar = SkUTF8_NextUnicharWithError(&textPtr, textEnd);
+ unichar = SkUTF::NextUTF8(&textPtr, textEnd);
if (unichar < 0) {
break;
}
diff --git a/src/pdf/SkPDFMetadata.cpp b/src/pdf/SkPDFMetadata.cpp
index 9234f44ac1..16e6637c7d 100644
--- a/src/pdf/SkPDFMetadata.cpp
+++ b/src/pdf/SkPDFMetadata.cpp
@@ -62,22 +62,22 @@ static SkString to_utf16be(const char* src, size_t len) {
const char* const end = src + len;
size_t n = 1; // BOM
for (const char* ptr = src; ptr < end;) {
- SkUnichar u = SkUTF8_NextUnicharWithError(&ptr, end);
+ SkUnichar u = SkUTF::NextUTF8(&ptr, end);
if (u < 0) {
break;
}
- n += SkUTF16_FromUnichar(u);
+ n += SkUTF::ToUTF16(u);
}
ret.resize(2 * n);
char* out = ret.writable_str();
write_utf16be(&out, 0xFEFF); // BOM
for (const char* ptr = src; ptr < end;) {
- SkUnichar u = SkUTF8_NextUnicharWithError(&ptr, end);
+ SkUnichar u = SkUTF::NextUTF8(&ptr, end);
if (u < 0) {
break;
}
uint16_t utf16[2];
- size_t l = SkUTF16_FromUnichar(u, utf16);
+ size_t l = SkUTF::ToUTF16(u, utf16);
write_utf16be(&out, utf16[0]);
if (l == 2) {
write_utf16be(&out, utf16[1]);
diff --git a/src/pdf/SkPDFUtils.h b/src/pdf/SkPDFUtils.h
index a291914a5a..a05b52b113 100644
--- a/src/pdf/SkPDFUtils.h
+++ b/src/pdf/SkPDFUtils.h
@@ -103,7 +103,7 @@ inline void WriteUInt8(SkDynamicMemoryWStream* wStream, uint8_t value) {
inline void WriteUTF16beHex(SkDynamicMemoryWStream* wStream, SkUnichar utf32) {
uint16_t utf16[2] = {0, 0};
- size_t len = SkUTF16_FromUnichar(utf32, utf16);
+ size_t len = SkUTF::ToUTF16(utf32, utf16);
SkASSERT(len == 1 || len == 2);
SkPDFUtils::WriteUInt16BE(wStream, utf16[0]);
if (len == 2) {
diff --git a/src/ports/SkFontHost_mac.cpp b/src/ports/SkFontHost_mac.cpp
index 18d09b4ec0..8fa8c636d6 100644
--- a/src/ports/SkFontHost_mac.cpp
+++ b/src/ports/SkFontHost_mac.cpp
@@ -1146,7 +1146,7 @@ uint16_t SkScalerContext_Mac::generateCharToGlyph(SkUnichar uni) {
UniChar theChar[2]; // UniChar is a UTF-16 16-bit code unit.
// Get the glyph
- size_t numUniChar = SkUTF16_FromUnichar(uni, theChar);
+ size_t numUniChar = SkUTF::ToUTF16(uni, theChar);
SkASSERT(sizeof(CGGlyph) <= sizeof(uint16_t));
// Undocumented behavior of CTFontGetGlyphsForCharacters with non-bmp code points:
@@ -2366,7 +2366,7 @@ int SkTypeface_Mac::onCharsToGlyphs(const void* chars, Encoding encoding,
src = utf16;
for (int i = 0; i < glyphCount; ++i) {
SkUnichar uni = SkUTF8_NextUnichar(&utf8);
- utf16 += SkUTF16_FromUnichar(uni, utf16);
+ utf16 += SkUTF::ToUTF16(uni, utf16);
}
srcCount = SkToInt(utf16 - src);
break;
@@ -2387,7 +2387,7 @@ int SkTypeface_Mac::onCharsToGlyphs(const void* chars, Encoding encoding,
UniChar* utf16 = charStorage.reset(2 * glyphCount);
src = utf16;
for (int i = 0; i < glyphCount; ++i) {
- utf16 += SkUTF16_FromUnichar(utf32[i], utf16);
+ utf16 += SkUTF::ToUTF16(utf32[i], utf16);
}
srcCount = SkToInt(utf16 - src);
break;
diff --git a/src/ports/SkFontHost_win.cpp b/src/ports/SkFontHost_win.cpp
index 07d37aedfe..4ee8bb44fd 100644
--- a/src/ports/SkFontHost_win.cpp
+++ b/src/ports/SkFontHost_win.cpp
@@ -795,7 +795,7 @@ uint16_t SkScalerContext_GDI::generateCharToGlyph(SkUnichar utf32) {
uint16_t index = 0;
WCHAR utf16[2];
// TODO(ctguil): Support characters that generate more than one glyph.
- if (SkUTF16_FromUnichar(utf32, (uint16_t*)utf16) == 1) {
+ if (SkUTF::ToUTF16(utf32, (uint16_t*)utf16) == 1) {
// Type1 fonts fail with uniscribe API. Use GetGlyphIndices for plane 0.
/** Real documentation for GetGlyphIndiciesW:
@@ -2103,7 +2103,7 @@ int LogFontTypeface::onCharsToGlyphs(const void* chars, Encoding encoding,
// Try a run of non-bmp.
while (glyphIndex < glyphCount && currentChar > 0xFFFF) {
- SkUTF16_FromUnichar(currentChar, reinterpret_cast<uint16_t*>(scratch));
+ SkUTF::ToUTF16(currentChar, reinterpret_cast<uint16_t*>(scratch));
glyphs[glyphIndex] = nonBmpCharToGlyph(hdc, &sc, scratch);
++glyphIndex;
if (glyphIndex < glyphCount) {
@@ -2158,7 +2158,7 @@ int LogFontTypeface::onCharsToGlyphs(const void* chars, Encoding encoding,
// Try a run of non-bmp.
while (glyphIndex < glyphCount && utf32[glyphIndex] > 0xFFFF) {
- SkUTF16_FromUnichar(utf32[glyphIndex], reinterpret_cast<uint16_t*>(scratch));
+ SkUTF::ToUTF16(utf32[glyphIndex], reinterpret_cast<uint16_t*>(scratch));
glyphs[glyphIndex] = nonBmpCharToGlyph(hdc, &sc, scratch);
++glyphIndex;
}
diff --git a/src/ports/SkFontMgr_win_dw.cpp b/src/ports/SkFontMgr_win_dw.cpp
index 6830d82e03..7954c77ca2 100644
--- a/src/ports/SkFontMgr_win_dw.cpp
+++ b/src/ports/SkFontMgr_win_dw.cpp
@@ -757,7 +757,7 @@ SkTypeface* SkFontMgr_DirectWrite::onMatchFamilyStyleCharacter(const char family
WCHAR str[16];
UINT32 strLen = static_cast<UINT32>(
- SkUTF16_FromUnichar(character, reinterpret_cast<uint16_t*>(str)));
+ SkUTF::ToUTF16(character, reinterpret_cast<uint16_t*>(str)));
const SkSMallocWCHAR* dwBcp47;
SkSMallocWCHAR dwBcp47Local;
diff --git a/src/ports/SkOSFile_stdio.cpp b/src/ports/SkOSFile_stdio.cpp
index 10ba7c8b0c..8c0be793cd 100644
--- a/src/ports/SkOSFile_stdio.cpp
+++ b/src/ports/SkOSFile_stdio.cpp
@@ -46,16 +46,16 @@ static FILE* fopen_win(const char* utf8path, const char* perm) {
const char* end = utf8path + strlen(utf8path);
size_t n = 0;
while (ptr < end) {
- SkUnichar u = SkUTF8_NextUnicharWithError(&ptr, end);
+ SkUnichar u = SkUTF::NextUTF8(&ptr, end);
if (u < 0) {
return nullptr; // malformed UTF-8
}
- n += SkUTF16_FromUnichar(u);
+ n += SkUTF::ToUTF16(u);
}
std::vector<uint16_t> wchars(n + 1);
uint16_t* out = wchars.data();
for (const char* ptr = utf8path; ptr < end;) {
- out += SkUTF16_FromUnichar(SkUTF8_NextUnicharWithError(&ptr, end), out);
+ out += SkUTF::ToUTF16(SkUTF::NextUTF8(&ptr, end), out);
}
SkASSERT(out == &wchars[n]);
*out = 0; // final null
diff --git a/src/ports/SkRemotableFontMgr_win_dw.cpp b/src/ports/SkRemotableFontMgr_win_dw.cpp
index 6c6613ea37..2b99cf7be8 100644
--- a/src/ports/SkRemotableFontMgr_win_dw.cpp
+++ b/src/ports/SkRemotableFontMgr_win_dw.cpp
@@ -398,7 +398,7 @@ public:
WCHAR str[16];
UINT32 strLen = static_cast<UINT32>(
- SkUTF16_FromUnichar(character, reinterpret_cast<uint16_t*>(str)));
+ SkUTF::ToUTF16(character, reinterpret_cast<uint16_t*>(str)));
SkTScopedComPtr<IDWriteTextLayout> fallbackLayout;
HR_GENERAL(dwFactory->CreateTextLayout(str, strLen, fallbackFormat.get(),
200.0f, 200.0f,
diff --git a/src/svg/SkSVGDevice.cpp b/src/svg/SkSVGDevice.cpp
index 4a83a9c841..9a2eb99a77 100644
--- a/src/svg/SkSVGDevice.cpp
+++ b/src/svg/SkSVGDevice.cpp
@@ -119,6 +119,13 @@ struct Resources {
SkString fClip;
};
+static SkTypeface::Encoding to_encoding(SkPaint::TextEncoding e) {
+ static_assert((int)SkTypeface::kUTF8_Encoding == (int)SkPaint::kUTF8_TextEncoding, "");
+ static_assert((int)SkTypeface::kUTF16_Encoding == (int)SkPaint::kUTF16_TextEncoding, "");
+ static_assert((int)SkTypeface::kUTF32_Encoding == (int)SkPaint::kUTF32_TextEncoding, "");
+ return (SkTypeface::Encoding)e;
+}
+
class SVGTextBuilder : SkNoncopyable {
public:
SVGTextBuilder(const void* text, size_t byteLen, const SkPaint& paint, const SkPoint& offset,
@@ -131,42 +138,29 @@ public:
SkASSERT(scalarsPerPos <= 2);
SkASSERT(scalarsPerPos == 0 || SkToBool(pos));
- int count = paint.countText(text, byteLen);
-
- const char* stop = (const char*)text + byteLen;
- switch(paint.getTextEncoding()) {
- case SkPaint::kGlyphID_TextEncoding: {
- SkASSERT(count * sizeof(uint16_t) == byteLen);
- SkAutoSTArray<64, SkUnichar> unichars(count);
- paint.glyphsToUnichars((const uint16_t*)text, count, unichars.get());
- for (int i = 0; i < count; ++i) {
- this->appendUnichar(unichars[i]);
- }
- } break;
- case SkPaint::kUTF8_TextEncoding: {
- const char* c8 = reinterpret_cast<const char*>(text);
- for (int i = 0; i < count; ++i) {
- this->appendUnichar(SkUTF8_NextUnichar(&c8, stop));
- }
- SkASSERT(reinterpret_cast<const char*>(text) + byteLen == c8);
- } break;
- case SkPaint::kUTF16_TextEncoding: {
- const uint16_t* c16 = reinterpret_cast<const uint16_t*>(text);
- for (int i = 0; i < count; ++i) {
- this->appendUnichar(SkUTF16_NextUnichar(&c16, (const uint16_t*)stop));
+ SkPaint::TextEncoding encoding = paint.getTextEncoding();
+ switch(encoding) {
+ case SkPaint::kGlyphID_TextEncoding: {
+ int count = paint.countText(text, byteLen);
+ SkASSERT(count * sizeof(uint16_t) == byteLen);
+ SkAutoSTArray<64, SkUnichar> unichars(count);
+ paint.glyphsToUnichars((const uint16_t*)text, count, unichars.get());
+ for (int i = 0; i < count; ++i) {
+ this->appendUnichar(unichars[i]);
+ }
+ break;
}
- SkASSERT(SkIsAlign2(byteLen));
- SkASSERT(reinterpret_cast<const uint16_t*>(text) + (byteLen / 2) == c16);
- } break;
- case SkPaint::kUTF32_TextEncoding: {
- SkASSERT(count * sizeof(uint32_t) == byteLen);
- const uint32_t* c32 = reinterpret_cast<const uint32_t*>(text);
- for (int i = 0; i < count; ++i) {
- this->appendUnichar(c32[i]);
+ case SkPaint::kUTF8_TextEncoding:
+ case SkPaint::kUTF16_TextEncoding:
+ case SkPaint::kUTF32_TextEncoding: {
+ const void* stop = (const char*)text + byteLen;
+ while (text < stop) {
+ this->appendUnichar(SkUTFN_Next(to_encoding(encoding), &text, stop));
+ }
+ break;
}
- } break;
- default:
- SK_ABORT("unknown text encoding");
+ default:
+ SK_ABORT("unknown text encoding");
}
if (scalarsPerPos < 2) {
diff --git a/src/utils/SkUTF.cpp b/src/utils/SkUTF.cpp
new file mode 100644
index 0000000000..0670ae02a4
--- /dev/null
+++ b/src/utils/SkUTF.cpp
@@ -0,0 +1,253 @@
+// Copyright 2018 Google LLC.
+// Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
+
+#include "SkUTF.h"
+
+#include <climits>
+
+static constexpr inline int32_t left_shift(int32_t value, int32_t shift) {
+ return (int32_t) ((uint32_t) value << shift);
+}
+
+template <typename T> static constexpr bool is_align2(T x) { return 0 == (x & 1); }
+
+template <typename T> static constexpr bool is_align4(T x) { return 0 == (x & 3); }
+
+static constexpr inline bool utf16_is_high_surrogate(uint16_t c) { return (c & 0xFC00) == 0xD800; }
+
+static constexpr inline bool utf16_is_low_surrogate(uint16_t c) { return (c & 0xFC00) == 0xDC00; }
+
+/** @returns -1 iff invalid UTF8 byte,
+ 0 iff UTF8 continuation byte,
+ 1 iff ASCII byte,
+ 2 iff leading byte of 2-byte sequence,
+ 3 iff leading byte of 3-byte sequence, and
+ 4 iff leading byte of 4-byte sequence.
+ I.e.: if return value > 0, then gives length of sequence.
+*/
+static int utf8_byte_type(uint8_t c) {
+ if (c < 0x80) {
+ return 1;
+ } else if (c < 0xC0) {
+ return 0;
+ } else if (c >= 0xF5 || (c & 0xFE) == 0xC0) { // "octet values c0, c1, f5 to ff never appear"
+ return -1;
+ } else {
+ int value = (((0xe5 << 24) >> ((unsigned)c >> 4 << 1)) & 3) + 1;
+ // assert(value >= 2 && value <=4);
+ return value;
+ }
+}
+static bool utf8_type_is_valid_leading_byte(int type) { return type > 0; }
+
+static bool utf8_byte_is_continuation(uint8_t c) { return utf8_byte_type(c) == 0; }
+
+////////////////////////////////////////////////////////////////////////////////
+
+int SkUTF::CountUTF8(const char* utf8, size_t byteLength) {
+ if (!utf8) {
+ return -1;
+ }
+ int count = 0;
+ const char* stop = utf8 + byteLength;
+ while (utf8 < stop) {
+ int type = utf8_byte_type(*(const uint8_t*)utf8);
+ if (!utf8_type_is_valid_leading_byte(type) || utf8 + type > stop) {
+ return -1; // Sequence extends beyond end.
+ }
+ while(type-- > 1) {
+ ++utf8;
+ if (!utf8_byte_is_continuation(*(const uint8_t*)utf8)) {
+ return -1;
+ }
+ }
+ ++utf8;
+ ++count;
+ }
+ return count;
+}
+
+int SkUTF::CountUTF16(const uint16_t* utf16, size_t byteLength) {
+ if (!utf16 || !is_align2(intptr_t(utf16)) || !is_align2(byteLength)) {
+ return -1;
+ }
+ const uint16_t* src = (const uint16_t*)utf16;
+ const uint16_t* stop = src + (byteLength >> 1);
+ int count = 0;
+ while (src < stop) {
+ unsigned c = *src++;
+ if (utf16_is_low_surrogate(c)) {
+ return -1;
+ }
+ if (utf16_is_high_surrogate(c)) {
+ if (src >= stop) {
+ return -1;
+ }
+ c = *src++;
+ if (!utf16_is_low_surrogate(c)) {
+ return -1;
+ }
+ }
+ count += 1;
+ }
+ return count;
+}
+
+int SkUTF::CountUTF32(const int32_t* utf32, size_t byteLength) {
+ if (!is_align4(intptr_t(utf32)) || !is_align4(byteLength) || byteLength >> 2 > INT_MAX) {
+ return -1;
+ }
+ const uint32_t kInvalidUnicharMask = 0xFF000000; // unichar fits in 24 bits
+ const uint32_t* ptr = (const uint32_t*)utf32;
+ const uint32_t* stop = ptr + (byteLength >> 2);
+ while (ptr < stop) {
+ if (*ptr & kInvalidUnicharMask) {
+ return -1;
+ }
+ ptr += 1;
+ }
+ return (int)(byteLength >> 2);
+}
+
+template <typename T>
+static SkUnichar next_fail(const T** ptr, const T* end) {
+ *ptr = end;
+ return -1;
+}
+
+SkUnichar SkUTF::NextUTF8(const char** ptr, const char* end) {
+ if (!ptr || !end ) {
+ return -1;
+ }
+ const uint8_t* p = (const uint8_t*)*ptr;
+ if (!p || p >= (const uint8_t*)end) {
+ return next_fail(ptr, end);
+ }
+ int c = *p;
+ int hic = c << 24;
+
+ if (!utf8_type_is_valid_leading_byte(utf8_byte_type(c))) {
+ return next_fail(ptr, end);
+ }
+ if (hic < 0) {
+ uint32_t mask = (uint32_t)~0x3F;
+ hic = left_shift(hic, 1);
+ do {
+ ++p;
+ if (p >= (const uint8_t*)end) {
+ return next_fail(ptr, end);
+ }
+ // check before reading off end of array.
+ uint8_t nextByte = *p;
+ if (!utf8_byte_is_continuation(nextByte)) {
+ return next_fail(ptr, end);
+ }
+ c = (c << 6) | (nextByte & 0x3F);
+ mask <<= 5;
+ } while ((hic = left_shift(hic, 1)) < 0);
+ c &= ~mask;
+ }
+ *ptr = (char*)p + 1;
+ return c;
+}
+
+SkUnichar SkUTF::NextUTF16(const uint16_t** ptr, const uint16_t* end) {
+ if (!ptr || !end ) {
+ return next_fail(ptr, end);
+ }
+ const uint16_t* src = *ptr;
+ if (!src || src + 1 > end || !is_align2(intptr_t(src))) {
+ return next_fail(ptr, end);
+ }
+ uint16_t c = *src++;
+ SkUnichar result = c;
+ if (utf16_is_low_surrogate(c)) {
+ return next_fail(ptr, end); // srcPtr should never point at low surrogate.
+ }
+ if (utf16_is_high_surrogate(c)) {
+ if (src + 1 > end) {
+ return next_fail(ptr, end); // Truncated string.
+ }
+ uint16_t low = *src++;
+ if (!utf16_is_low_surrogate(low)) {
+ return next_fail(ptr, end);
+ }
+ /*
+ [paraphrased from wikipedia]
+ Take the high surrogate and subtract 0xD800, then multiply by 0x400.
+ Take the low surrogate and subtract 0xDC00. Add these two results
+ together, and finally add 0x10000 to get the final decoded codepoint.
+
+ unicode = (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000
+ unicode = (high * 0x400) - (0xD800 * 0x400) + low - 0xDC00 + 0x10000
+ unicode = (high << 10) - (0xD800 << 10) + low - 0xDC00 + 0x10000
+ unicode = (high << 10) + low - ((0xD800 << 10) + 0xDC00 - 0x10000)
+ */
+ result = (result << 10) + (SkUnichar)low - ((0xD800 << 10) + 0xDC00 - 0x10000);
+ }
+ *ptr = src;
+ return result;
+}
+
+SkUnichar SkUTF::NextUTF32(const int32_t** ptr, const int32_t* end) {
+ if (!ptr || !end ) {
+ return -1;
+ }
+ const int32_t* s = *ptr;
+ if (!s || s + 1 > end || !is_align4(intptr_t(s))) {
+ return next_fail(ptr, end);
+ }
+ int32_t value = *s;
+ const uint32_t kInvalidUnicharMask = 0xFF000000; // unichar fits in 24 bits
+ if (value & kInvalidUnicharMask) {
+ return next_fail(ptr, end);
+ }
+ *ptr = s + 1;
+ return value;
+}
+
+size_t SkUTF::ToUTF8(SkUnichar uni, char utf8[SkUTF::kMaxBytesInUTF8Sequence]) {
+ if ((uint32_t)uni > 0x10FFFF) {
+ return 0;
+ }
+ if (uni <= 127) {
+ if (utf8) {
+ *utf8 = (char)uni;
+ }
+ return 1;
+ }
+ char tmp[4];
+ char* p = tmp;
+ size_t count = 1;
+ while (uni > 0x7F >> count) {
+ *p++ = (char)(0x80 | (uni & 0x3F));
+ uni >>= 6;
+ count += 1;
+ }
+ if (utf8) {
+ p = tmp;
+ utf8 += count;
+ while (p < tmp + count - 1) {
+ *--utf8 = *p++;
+ }
+ *--utf8 = (char)(~(0xFF >> count) | uni);
+ }
+ return count;
+}
+
+size_t SkUTF::ToUTF16(SkUnichar uni, uint16_t utf16[2]) {
+ if ((uint32_t)uni > 0x10FFFF) {
+ return 0;
+ }
+ int extra = (uni > 0xFFFF);
+ if (utf16) {
+ if (extra) {
+ utf16[0] = (uint16_t)((0xD800 - 64) + (uni >> 10));
+ utf16[1] = (uint16_t)(0xDC00 | (uni & 0x3FF));
+ } else {
+ utf16[0] = (uint16_t)uni;
+ }
+ }
+ return 1 + extra;
+}
+
diff --git a/src/utils/SkUTF.h b/src/utils/SkUTF.h
new file mode 100644
index 0000000000..385102aadb
--- /dev/null
+++ b/src/utils/SkUTF.h
@@ -0,0 +1,68 @@
+// Copyright 2018 Google LLC.
+// Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
+#ifndef SkUTF_DEFINED
+#define SkUTF_DEFINED
+
+#include <cstddef>
+#include <cstdint>
+
+typedef int32_t SkUnichar;
+
+namespace SkUTF {
+
+/** Given a sequence of UTF-8 bytes, return the number of unicode codepoints.
+ If the sequence is invalid UTF-8, return -1.
+*/
+int CountUTF8(const char* utf8, size_t byteLength);
+
+/** Given a sequence of aligned UTF-16 characters in machine-endian form,
+ return the number of unicode codepoints. If the sequence is invalid
+ UTF-16, return -1.
+*/
+int CountUTF16(const uint16_t* utf16, size_t byteLength);
+
+/** Given a sequence of aligned UTF-32 characters in machine-endian form,
+ return the number of unicode codepoints. If the sequence is invalid
+ UTF-32, return -1.
+*/
+int CountUTF32(const int32_t* utf32, size_t byteLength);
+
+/** Given a sequence of UTF-8 bytes, return the first unicode codepoint.
+ The pointer will be incremented to point at the next codepoint's start. If
+ invalid UTF-8 is encountered, set *ptr to end and return -1.
+*/
+SkUnichar NextUTF8(const char** ptr, const char* end);
+
+/** Given a sequence of aligned UTF-16 characters in machine-endian form,
+ return the first unicode codepoint. The pointer will be incremented to
+ point at the next codepoint's start. If invalid UTF-16 is encountered,
+ set *ptr to end and return -1.
+*/
+SkUnichar NextUTF16(const uint16_t** ptr, const uint16_t* end);
+
+/** Given a sequence of aligned UTF-32 characters in machine-endian form,
+ return the first unicode codepoint. The pointer will be incremented to
+ point at the next codepoint's start. If invalid UTF-32 is encountered,
+ set *ptr to end and return -1.
+*/
+SkUnichar NextUTF32(const int32_t** ptr, const int32_t* end);
+
+constexpr unsigned kMaxBytesInUTF8Sequence = 4;
+
+/** Convert the unicode codepoint into UTF-8. If `utf8` is non-null, place the
+ result in that array. Return the number of bytes in the result. If `utf8`
+ is null, simply return the number of bytes that would be used. For invalid
+ unicode codepoints, return 0.
+*/
+size_t ToUTF8(SkUnichar uni, char utf8[kMaxBytesInUTF8Sequence] = nullptr);
+
+/** Convert the unicode codepoint into UTF-16. If `utf16` is non-null, place
+ the result in that array. Return the number of UTF-16 code units in the
+ result (1 or 2). If `utf16` is null, simply return the number of code
+ units that would be used. For invalid unicode codepoints, return 0.
+*/
+size_t ToUTF16(SkUnichar uni, uint16_t utf16[2] = nullptr);
+
+} // namespace SkUTF
+
+#endif // SkUTF_DEFINED
diff --git a/src/xps/SkXPSDevice.cpp b/src/xps/SkXPSDevice.cpp
index b984ef3e88..c2b5e66406 100644
--- a/src/xps/SkXPSDevice.cpp
+++ b/src/xps/SkXPSDevice.cpp
@@ -1984,17 +1984,13 @@ HRESULT SkXPSDevice::AddGlyphs(IXpsOMObjectFactory* xpsFactory,
}
static int num_glyph_guess(SkPaint::TextEncoding encoding, const void* text, size_t byteLength) {
- switch (encoding) {
- case SkPaint::kUTF8_TextEncoding:
- return SkUTF8_CountUnichars(text, byteLength);
- case SkPaint::kUTF16_TextEncoding:
- return SkUTF16_CountUnichars(text, byteLength);
- case SkPaint::kGlyphID_TextEncoding:
+ static_assert((int)SkTypeface::kUTF8_Encoding == (int)SkPaint::kUTF8_TextEncoding, "");
+ static_assert((int)SkTypeface::kUTF16_Encoding == (int)SkPaint::kUTF16_TextEncoding, "");
+ static_assert((int)SkTypeface::kUTF32_Encoding == (int)SkPaint::kUTF32_TextEncoding, "");
+ if (encoding == SkPaint::kGlyphID_TextEncoding) {
return SkToInt(byteLength / 2);
- default:
- SK_ABORT("Invalid Text Encoding");
}
- return 0;
+ return SkUTFN_CountUnichars((SkTypeface::Encoding)encoding, text, byteLength);
}
static bool text_must_be_pathed(const SkPaint& paint, const SkMatrix& matrix) {
diff --git a/tests/PaintTest.cpp b/tests/PaintTest.cpp
index 647f8e2f23..0bf6df64de 100644
--- a/tests/PaintTest.cpp
+++ b/tests/PaintTest.cpp
@@ -23,7 +23,7 @@
static size_t uni_to_utf8(const SkUnichar src[], void* dst, int count) {
char* u8 = (char*)dst;
for (int i = 0; i < count; ++i) {
- int n = SkToInt(SkUTF8_FromUnichar(src[i], u8));
+ int n = SkToInt(SkUTF::ToUTF8(src[i], u8));
u8 += n;
}
return u8 - (char*)dst;
@@ -32,7 +32,7 @@ static size_t uni_to_utf8(const SkUnichar src[], void* dst, int count) {
static size_t uni_to_utf16(const SkUnichar src[], void* dst, int count) {
uint16_t* u16 = (uint16_t*)dst;
for (int i = 0; i < count; ++i) {
- int n = SkToInt(SkUTF16_FromUnichar(src[i], u16));
+ int n = SkToInt(SkUTF::ToUTF16(src[i], u16));
u16 += n;
}
return (char*)u16 - (char*)dst;
diff --git a/tests/UnicodeTest.cpp b/tests/UnicodeTest.cpp
index 5ed92d4b34..5dbcf08547 100644
--- a/tests/UnicodeTest.cpp
+++ b/tests/UnicodeTest.cpp
@@ -9,39 +9,10 @@
#include "SkUtils.h"
#include "Test.h"
-// Unicode Variation Selector ranges: inclusive
-#define UVS_MIN0 0x180B
-#define UVS_MAX0 0x180D
-#define UVS_MIN1 0xFE00
-#define UVS_MAX1 0xFE0F
-#define UVS_MIN2 0xE0100
-#define UVS_MAX2 0xE01EF
-
-static bool isUVS(SkUnichar uni) {
- return (uni >= UVS_MIN0 && uni <= UVS_MAX0) ||
- (uni >= UVS_MIN1 && uni <= UVS_MAX1) ||
- (uni >= UVS_MIN2 && uni <= UVS_MAX2);
-}
-
-static void test_uvs(skiatest::Reporter* reporter) {
- // [min, max], [min, max] ... inclusive
- static const SkUnichar gRanges[] = {
- UVS_MIN0, UVS_MAX0, UVS_MIN1, UVS_MAX1, UVS_MIN2, UVS_MAX2
- };
-
- for (size_t i = 0; i < SK_ARRAY_COUNT(gRanges); i += 2) {
- for (SkUnichar uni = gRanges[i] - 8; uni <= gRanges[i+1] + 8; ++uni) {
- bool uvs0 = isUVS(uni);
- bool uvs1 = SkUnichar_IsVariationSelector(uni);
- REPORTER_ASSERT(reporter, uvs0 == uvs1);
- }
- }
-}
-
// Simple test to ensure that when we call textToGlyphs, we get the same
// result (for the same text) when using UTF8, UTF16, UTF32.
// TODO: make the text more complex (i.e. incorporate chars>7bits)
-static void test_textencodings(skiatest::Reporter* reporter) {
+DEF_TEST(Unicode_textencodings, reporter) {
const char text8[] = "ABCDEFGabcdefg0123456789";
uint16_t text16[sizeof(text8)];
int32_t text32[sizeof(text8)];
@@ -76,8 +47,3 @@ static void test_textencodings(skiatest::Reporter* reporter) {
REPORTER_ASSERT(reporter, !memcmp(glyphs8, glyphs16, count8 * sizeof(uint16_t)));
REPORTER_ASSERT(reporter, !memcmp(glyphs8, glyphs32, count8 * sizeof(uint16_t)));
}
-
-DEF_TEST(Unicode, reporter) {
- test_uvs(reporter);
- test_textencodings(reporter);
-}
diff --git a/tests/UtilsTest.cpp b/tests/UtilsTest.cpp
index 6f8ec914bc..d621a8b12e 100644
--- a/tests/UtilsTest.cpp
+++ b/tests/UtilsTest.cpp
@@ -165,20 +165,19 @@ static void test_search(skiatest::Reporter* reporter) {
}
static void test_utf16(skiatest::Reporter* reporter) {
+ // Test non-basic-multilingual-plane unicode.
static const SkUnichar gUni[] = {
0x10000, 0x18080, 0x20202, 0xFFFFF, 0x101234
};
-
- uint16_t buf[2];
-
- for (size_t i = 0; i < SK_ARRAY_COUNT(gUni); i++) {
- size_t count = SkUTF16_FromUnichar(gUni[i], buf);
+ for (SkUnichar uni : gUni) {
+ uint16_t buf[2];
+ size_t count = SkUTF::ToUTF16(uni, buf);
REPORTER_ASSERT(reporter, count == 2);
- size_t count2 = SkUTF16_CountUnichars(buf, 2 * sizeof(uint16_t));
+ size_t count2 = SkUTF::CountUTF16(buf, sizeof(buf));
REPORTER_ASSERT(reporter, count2 == 1);
const uint16_t* ptr = buf;
- SkUnichar c = SkUTF16_NextUnichar(&ptr, buf + SK_ARRAY_COUNT(buf));
- REPORTER_ASSERT(reporter, c == gUni[i]);
+ SkUnichar c = SkUTF::NextUTF16(&ptr, buf + SK_ARRAY_COUNT(buf));
+ REPORTER_ASSERT(reporter, c == uni);
REPORTER_ASSERT(reporter, ptr - buf == 2);
}
}
@@ -204,8 +203,8 @@ DEF_TEST(Utils, reporter) {
for (size_t i = 0; i < SK_ARRAY_COUNT(gTest); i++) {
const char* p = gTest[i].fUtf8;
const char* stop = p + strlen(p);
- int n = SkUTF8_CountUnichars(p, strlen(p));
- SkUnichar u1 = SkUTF8_NextUnichar(&p, stop);
+ int n = SkUTF::CountUTF8(p, strlen(p));
+ SkUnichar u1 = SkUTF::NextUTF8(&p, stop);
REPORTER_ASSERT(reporter, n == 1);
REPORTER_ASSERT(reporter, u1 == gTest[i].fUni);
@@ -220,62 +219,91 @@ DEF_TEST(Utils, reporter) {
}
#define ASCII_BYTE "X"
-#define CONTINUATION_BYTE "\x80"
-#define LEADING_TWO_BYTE "\xC4"
-#define LEADING_THREE_BYTE "\xE0"
+#define CONTINUATION_BYTE "\xA1"
+#define LEADING_TWO_BYTE "\xC2"
+#define LEADING_THREE_BYTE "\xE1"
#define LEADING_FOUR_BYTE "\xF0"
#define INVALID_BYTE "\xFC"
-static bool valid_utf8(const char* p, size_t l) {
- return SkUTF8_CountUnichars(p, l) >= 0;
-}
-DEF_TEST(Utils_UTF8_ValidLength, r) {
- const char* goodTestcases[] = {
- "",
- ASCII_BYTE,
- ASCII_BYTE ASCII_BYTE,
- LEADING_TWO_BYTE CONTINUATION_BYTE,
- ASCII_BYTE LEADING_TWO_BYTE CONTINUATION_BYTE,
- ASCII_BYTE ASCII_BYTE LEADING_TWO_BYTE CONTINUATION_BYTE,
- LEADING_THREE_BYTE CONTINUATION_BYTE CONTINUATION_BYTE,
- ASCII_BYTE LEADING_THREE_BYTE CONTINUATION_BYTE CONTINUATION_BYTE,
- ASCII_BYTE ASCII_BYTE LEADING_THREE_BYTE CONTINUATION_BYTE CONTINUATION_BYTE,
- LEADING_FOUR_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE,
- ASCII_BYTE LEADING_FOUR_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE,
- ASCII_BYTE ASCII_BYTE LEADING_FOUR_BYTE CONTINUATION_BYTE CONTINUATION_BYTE
- CONTINUATION_BYTE,
+DEF_TEST(SkUTF_CountUTF8, r) {
+ struct {
+ int expectedCount;
+ const char* utf8String;
+ } testCases[] = {
+ { 0, "" },
+ { 1, ASCII_BYTE },
+ { 2, ASCII_BYTE ASCII_BYTE },
+ { 1, LEADING_TWO_BYTE CONTINUATION_BYTE },
+ { 2, ASCII_BYTE LEADING_TWO_BYTE CONTINUATION_BYTE },
+ { 3, ASCII_BYTE ASCII_BYTE LEADING_TWO_BYTE CONTINUATION_BYTE },
+ { 1, LEADING_THREE_BYTE CONTINUATION_BYTE CONTINUATION_BYTE },
+ { 2, ASCII_BYTE LEADING_THREE_BYTE CONTINUATION_BYTE CONTINUATION_BYTE },
+ { 3, ASCII_BYTE ASCII_BYTE LEADING_THREE_BYTE CONTINUATION_BYTE CONTINUATION_BYTE },
+ { 1, LEADING_FOUR_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE },
+ { 2, ASCII_BYTE LEADING_FOUR_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE },
+ { 3, ASCII_BYTE ASCII_BYTE LEADING_FOUR_BYTE CONTINUATION_BYTE CONTINUATION_BYTE
+ CONTINUATION_BYTE },
+ { -1, INVALID_BYTE },
+ { -1, INVALID_BYTE CONTINUATION_BYTE },
+ { -1, INVALID_BYTE CONTINUATION_BYTE CONTINUATION_BYTE },
+ { -1, INVALID_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE },
+ { -1, LEADING_TWO_BYTE },
+ { -1, CONTINUATION_BYTE },
+ { -1, CONTINUATION_BYTE CONTINUATION_BYTE },
+ { -1, LEADING_THREE_BYTE CONTINUATION_BYTE },
+ { -1, CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE },
+ { -1, LEADING_FOUR_BYTE CONTINUATION_BYTE },
+ { -1, CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE },
};
- for (const char* testcase : goodTestcases) {
- REPORTER_ASSERT(r, valid_utf8(testcase, strlen(testcase)));
+ for (auto testCase : testCases) {
+ const char* str = testCase.utf8String;
+ REPORTER_ASSERT(r, testCase.expectedCount == SkUTF::CountUTF8(str, strlen(str)));
}
- const char* badTestcases[] = {
- INVALID_BYTE,
- INVALID_BYTE CONTINUATION_BYTE,
- INVALID_BYTE CONTINUATION_BYTE CONTINUATION_BYTE,
- INVALID_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE,
- LEADING_TWO_BYTE,
- CONTINUATION_BYTE,
- CONTINUATION_BYTE CONTINUATION_BYTE,
- LEADING_THREE_BYTE CONTINUATION_BYTE,
- CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE,
- LEADING_FOUR_BYTE CONTINUATION_BYTE,
- CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE,
-
- ASCII_BYTE INVALID_BYTE,
- ASCII_BYTE INVALID_BYTE CONTINUATION_BYTE,
- ASCII_BYTE INVALID_BYTE CONTINUATION_BYTE CONTINUATION_BYTE,
- ASCII_BYTE INVALID_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE,
- ASCII_BYTE LEADING_TWO_BYTE,
- ASCII_BYTE CONTINUATION_BYTE,
- ASCII_BYTE CONTINUATION_BYTE CONTINUATION_BYTE,
- ASCII_BYTE LEADING_THREE_BYTE CONTINUATION_BYTE,
- ASCII_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE,
- ASCII_BYTE LEADING_FOUR_BYTE CONTINUATION_BYTE,
- ASCII_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE,
-
- // LEADING_FOUR_BYTE LEADING_TWO_BYTE CONTINUATION_BYTE,
+}
+
+DEF_TEST(SkUTF_NextUTF8_ToUTF8, r) {
+ struct {
+ SkUnichar expected;
+ const char* utf8String;
+ } testCases[] = {
+ { -1, INVALID_BYTE },
+ { -1, "" },
+ { 0x0058, ASCII_BYTE },
+ { 0x00A1, LEADING_TWO_BYTE CONTINUATION_BYTE },
+ { 0x1861, LEADING_THREE_BYTE CONTINUATION_BYTE CONTINUATION_BYTE },
+ { 0x010330, LEADING_FOUR_BYTE "\x90\x8C\xB0" },
};
- for (const char* testcase : badTestcases) {
- REPORTER_ASSERT(r, !valid_utf8(testcase, strlen(testcase)));
+ for (auto testCase : testCases) {
+ const char* str = testCase.utf8String;
+ SkUnichar uni = SkUTF::NextUTF8(&str, str + strlen(str));
+ REPORTER_ASSERT(r, str == testCase.utf8String + strlen(testCase.utf8String));
+ REPORTER_ASSERT(r, uni == testCase.expected);
+ char buff[5] = {0, 0, 0, 0, 0};
+ size_t len = SkUTF::ToUTF8(uni, buff);
+ if (buff[len] != 0) {
+ ERRORF(r, "unexpected write");
+ continue;
+ }
+ if (uni == -1) {
+ REPORTER_ASSERT(r, len == 0);
+ continue;
+ }
+ if (len == 0) {
+ ERRORF(r, "unexpected failure.");
+ continue;
+ }
+ if (len > 4) {
+ ERRORF(r, "wrote too much");
+ continue;
+ }
+ str = testCase.utf8String;
+ REPORTER_ASSERT(r, len == strlen(buff));
+ REPORTER_ASSERT(r, len == strlen(str));
+ REPORTER_ASSERT(r, 0 == strcmp(str, buff));
}
-
}
+#undef ASCII_BYTE
+#undef CONTINUATION_BYTE
+#undef LEADING_TWO_BYTE
+#undef LEADING_THREE_BYTE
+#undef LEADING_FOUR_BYTE
+#undef INVALID_BYTE
diff --git a/tools/fonts/create_test_font.cpp b/tools/fonts/create_test_font.cpp
index 36ead3cb65..de19c099c9 100644
--- a/tools/fonts/create_test_font.cpp
+++ b/tools/fonts/create_test_font.cpp
@@ -140,13 +140,12 @@ static int output_points(const SkPoint* pts, int emSize, int count, SkString* pt
static void output_path_data(const SkPaint& paint,
int emSize, SkString* ptsOut, SkTDArray<SkPath::Verb>* verbs,
SkTDArray<unsigned>* charCodes, SkTDArray<SkScalar>* widths) {
- for (int ch = 0x00; ch < 0x7f; ++ch) {
- char str[1];
- str[0] = ch;
- const char* used = str;
- SkUnichar index = SkUTF8_NextUnichar(&used, str + 1);
+ for (SkUnichar index = 0x00; index < 0x7f; ++index) {
+ uint16_t utf16[2];
+ size_t utf16Bytes = sizeof(uint16_t) * SkUTF::ToUTF16(index, utf16);
SkPath path;
- paint.getTextPath((const void*) &index, 2, 0, 0, &path);
+ SkASSERT(paint.getTextEncoding() == SkPaint::kUTF16_TextEncoding);
+ paint.getTextPath(utf16, utf16Bytes, 0, 0, &path);
SkPath::RawIter iter(path);
SkPath::Verb verb;
SkPoint pts[4];
@@ -175,12 +174,12 @@ static void output_path_data(const SkPaint& paint,
*verbs->append() = SkPath::kDone_Verb;
*charCodes->append() = index;
SkScalar width;
- SkDEBUGCODE(int charCount =) paint.getTextWidths((const void*) &index, 2, &width);
+ SkDEBUGCODE(int charCount =) paint.getTextWidths(utf16, utf16Bytes, &width);
SkASSERT(charCount == 1);
// SkASSERT(floor(width) == width); // not true for Hiragino Maru Gothic Pro
*widths->append() = width;
- if (!ch) {
- ch = 0x1f; // skip the rest of the control codes
+ if (0 == index) {
+ index = 0x1f; // skip the rest of the control codes
}
}
}
@@ -432,7 +431,11 @@ static void generate_index(const char* defaultName) {
}
int main(int , char * const []) {
+#ifdef SK_BUILD_FOR_UNIX
+ generate_fonts("/usr/share/fonts/truetype/liberation/");
+#else
generate_fonts("/Library/Fonts/"); // or /usr/share/fonts/truetype/ttf-liberation/
+#endif
generate_index(DEFAULT_FONT_NAME);
return 0;
}