aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--fuzz/FuzzCanvas.cpp8
-rw-r--r--gm/atlastext.cpp2
-rw-r--r--gm/coloremoji_blendmodes.cpp3
-rw-r--r--gn/utils.gni2
-rw-r--r--modules/skshaper/src/SkShaper_harfbuzz.cpp27
-rw-r--r--samplecode/SampleRectanizer.cpp4
-rw-r--r--samplecode/SampleUnpremul.cpp4
-rw-r--r--samplecode/SampleXfermodesBlur.cpp2
-rw-r--r--src/core/SkFindAndPlaceGlyph.h24
-rw-r--r--src/core/SkFont.cpp4
-rw-r--r--src/core/SkOverdrawCanvas.cpp6
-rw-r--r--src/core/SkPaint.cpp104
-rw-r--r--src/core/SkPaintPriv.cpp6
-rw-r--r--src/core/SkString.cpp4
-rw-r--r--src/core/SkStringUtils.cpp6
-rw-r--r--src/core/SkUtils.cpp428
-rw-r--r--src/core/SkUtils.h98
-rw-r--r--src/pdf/SkPDFDevice.cpp4
-rw-r--r--src/pdf/SkPDFMetadata.cpp8
-rw-r--r--src/pdf/SkPDFUtils.h2
-rw-r--r--src/ports/SkFontHost_mac.cpp6
-rw-r--r--src/ports/SkFontHost_win.cpp6
-rw-r--r--src/ports/SkFontMgr_win_dw.cpp2
-rw-r--r--src/ports/SkOSFile_stdio.cpp6
-rw-r--r--src/ports/SkRemotableFontMgr_win_dw.cpp2
-rw-r--r--src/svg/SkSVGDevice.cpp62
-rw-r--r--src/utils/SkUTF.cpp253
-rw-r--r--src/utils/SkUTF.h68
-rw-r--r--src/xps/SkXPSDevice.cpp14
-rw-r--r--tests/PaintTest.cpp4
-rw-r--r--tests/UnicodeTest.cpp36
-rw-r--r--tests/UtilsTest.cpp152
-rw-r--r--tools/fonts/create_test_font.cpp21
33 files changed, 603 insertions, 775 deletions
diff --git a/fuzz/FuzzCanvas.cpp b/fuzz/FuzzCanvas.cpp
index aac68481d4..80d3e816a1 100644
--- a/fuzz/FuzzCanvas.cpp
+++ b/fuzz/FuzzCanvas.cpp
@@ -1020,21 +1020,21 @@ static SkTDArray<uint8_t> make_fuzz_text(Fuzz* fuzz, const SkPaint& paint) {
case SkPaint::kUTF8_TextEncoding: {
size_t utf8len = 0;
for (int j = 0; j < length; ++j) {
- utf8len += SkUTF8_FromUnichar(buffer[j], nullptr);
+ utf8len += SkUTF::ToUTF8(buffer[j], nullptr);
}
char* ptr = (char*)array.append(utf8len);
for (int j = 0; j < length; ++j) {
- ptr += SkUTF8_FromUnichar(buffer[j], ptr);
+ ptr += SkUTF::ToUTF8(buffer[j], ptr);
}
} break;
case SkPaint::kUTF16_TextEncoding: {
size_t utf16len = 0;
for (int j = 0; j < length; ++j) {
- utf16len += SkUTF16_FromUnichar(buffer[j]);
+ utf16len += SkUTF::ToUTF16(buffer[j]);
}
uint16_t* ptr = (uint16_t*)array.append(utf16len * sizeof(uint16_t));
for (int j = 0; j < length; ++j) {
- ptr += SkUTF16_FromUnichar(buffer[j], ptr);
+ ptr += SkUTF::ToUTF16(buffer[j], ptr);
}
} break;
case SkPaint::kUTF32_TextEncoding:
diff --git a/gm/atlastext.cpp b/gm/atlastext.cpp
index 859b4c356f..e3fed2ee06 100644
--- a/gm/atlastext.cpp
+++ b/gm/atlastext.cpp
@@ -30,7 +30,7 @@ static SkScalar draw_string(SkAtlasTextTarget* target, const SkString& text, SkS
return x;
}
auto font = SkAtlasTextFont::Make(typeface, size);
- int cnt = SkUTF8_CountUnichars(text.c_str(), text.size());
+ int cnt = SkUTF::CountUTF8(text.c_str(), text.size());
std::unique_ptr<SkGlyphID[]> glyphs(new SkGlyphID[cnt]);
typeface->charsToGlyphs(text.c_str(), SkTypeface::Encoding::kUTF8_Encoding, glyphs.get(), cnt);
diff --git a/gm/coloremoji_blendmodes.cpp b/gm/coloremoji_blendmodes.cpp
index 872527404c..46ba67ec90 100644
--- a/gm/coloremoji_blendmodes.cpp
+++ b/gm/coloremoji_blendmodes.cpp
@@ -148,7 +148,8 @@ protected:
textP.setBlendMode(gModes[i]);
textP.setTextEncoding(SkPaint::kUTF32_TextEncoding);
const char* text = sk_tool_utils::emoji_sample_text();
- SkUnichar unichar = SkUTF8_NextUnichar(&text, text + strlen(text));
+ SkUnichar unichar = SkUTF::NextUTF8(&text, text + strlen(text));
+ SkASSERT(unichar >= 0);
canvas->drawText(&unichar, 4, x+ w/10.f, y + 7.f*h/8.f, textP);
}
#if 1
diff --git a/gn/utils.gni b/gn/utils.gni
index 7d014b0a0c..68667beed4 100644
--- a/gn/utils.gni
+++ b/gn/utils.gni
@@ -62,6 +62,8 @@ skia_utils_sources = [
"$_src/utils/SkShadowUtils.cpp",
"$_src/utils/SkThreadUtils_pthread.cpp",
"$_src/utils/SkThreadUtils_win.cpp",
+ "$_src/utils/SkUTF.cpp",
+ "$_src/utils/SkUTF.h",
"$_src/utils/SkWhitelistTypefaces.cpp",
#mac
diff --git a/modules/skshaper/src/SkShaper_harfbuzz.cpp b/modules/skshaper/src/SkShaper_harfbuzz.cpp
index 2077dedadc..5c49de04b4 100644
--- a/modules/skshaper/src/SkShaper_harfbuzz.cpp
+++ b/modules/skshaper/src/SkShaper_harfbuzz.cpp
@@ -83,6 +83,15 @@ HBFont create_hb_font(SkTypeface* tf) {
return font;
}
+/** this version replaces invalid utf-8 sequences with code point U+FFFD. */
+static inline SkUnichar utf8_next(const char** ptr, const char* end) {
+ SkUnichar val = SkUTF::NextUTF8(ptr, end);
+ if (val < 0) {
+ return 0xFFFD; // REPLACEMENT CHARACTER
+ }
+ return val;
+}
+
class RunIterator {
public:
virtual ~RunIterator() {}
@@ -138,16 +147,16 @@ public:
SkASSERT(fUTF16LogicalPosition < ubidi_getLength(fBidi.get()));
int32_t endPosition = ubidi_getLength(fBidi.get());
fLevel = ubidi_getLevelAt(fBidi.get(), fUTF16LogicalPosition);
- SkUnichar u = SkUTF8_NextUnichar(&fEndOfCurrentRun, fEndOfAllRuns);
- fUTF16LogicalPosition += SkUTF16_FromUnichar(u);
+ SkUnichar u = utf8_next(&fEndOfCurrentRun, fEndOfAllRuns);
+ fUTF16LogicalPosition += SkUTF::ToUTF16(u);
UBiDiLevel level;
while (fUTF16LogicalPosition < endPosition) {
level = ubidi_getLevelAt(fBidi.get(), fUTF16LogicalPosition);
if (level != fLevel) {
break;
}
- u = SkUTF8_NextUnichar(&fEndOfCurrentRun, fEndOfAllRuns);
- fUTF16LogicalPosition += SkUTF16_FromUnichar(u);
+ u = utf8_next(&fEndOfCurrentRun, fEndOfAllRuns);
+ fUTF16LogicalPosition += SkUTF::ToUTF16(u);
}
}
const char* endOfCurrentRun() const override {
@@ -184,11 +193,11 @@ public:
{}
void consume() override {
SkASSERT(fCurrent < fEnd);
- SkUnichar u = SkUTF8_NextUnichar(&fCurrent, fEnd);
+ SkUnichar u = utf8_next(&fCurrent, fEnd);
fCurrentScript = hb_unicode_script(fHBUnicode, u);
while (fCurrent < fEnd) {
const char* prev = fCurrent;
- u = SkUTF8_NextUnichar(&fCurrent, fEnd);
+ u = utf8_next(&fCurrent, fEnd);
const hb_script_t script = hb_unicode_script(fHBUnicode, u);
if (script != fCurrentScript) {
if (fCurrentScript == HB_SCRIPT_INHERITED || fCurrentScript == HB_SCRIPT_COMMON) {
@@ -243,7 +252,7 @@ public:
{}
void consume() override {
SkASSERT(fCurrent < fEnd);
- SkUnichar u = SkUTF8_NextUnichar(&fCurrent, fEnd);
+ SkUnichar u = utf8_next(&fCurrent, fEnd);
// If the starting typeface can handle this character, use it.
if (fTypeface->charsToGlyphs(&u, SkTypeface::kUTF32_Encoding, nullptr, 1)) {
fFallbackTypeface.reset();
@@ -265,7 +274,7 @@ public:
while (fCurrent < fEnd) {
const char* prev = fCurrent;
- u = SkUTF8_NextUnichar(&fCurrent, fEnd);
+ u = utf8_next(&fCurrent, fEnd);
// If using a fallback and the initial typeface has this character, stop fallback.
if (fFallbackTypeface &&
@@ -554,7 +563,7 @@ SkPoint SkShaper::shape(SkTextBlobBuilder* builder,
const char* utf8Current = utf8Start;
while (utf8Current < utf8End) {
unsigned int cluster = utf8Current - utf8Start;
- hb_codepoint_t u = SkUTF8_NextUnichar(&utf8Current, utf8End);
+ hb_codepoint_t u = utf8_next(&utf8Current, utf8End);
hb_buffer_add(buffer, u, cluster);
}
diff --git a/samplecode/SampleRectanizer.cpp b/samplecode/SampleRectanizer.cpp
index 752d5b7136..d402416ca5 100644
--- a/samplecode/SampleRectanizer.cpp
+++ b/samplecode/SampleRectanizer.cpp
@@ -59,8 +59,8 @@ protected:
}
SkUnichar uni;
if (SampleCode::CharQ(*evt, &uni)) {
- char utf8[kMaxBytesInUTF8Sequence];
- size_t size = SkUTF8_FromUnichar(uni, utf8);
+ char utf8[SkUTF::kMaxBytesInUTF8Sequence];
+ size_t size = SkUTF::ToUTF8(uni, utf8);
// Only consider events for single char keys
if (1 == size) {
switch (utf8[0]) {
diff --git a/samplecode/SampleUnpremul.cpp b/samplecode/SampleUnpremul.cpp
index 108d70914f..4f41418158 100644
--- a/samplecode/SampleUnpremul.cpp
+++ b/samplecode/SampleUnpremul.cpp
@@ -53,8 +53,8 @@ protected:
}
SkUnichar uni;
if (SampleCode::CharQ(*evt, &uni)) {
- char utf8[kMaxBytesInUTF8Sequence];
- size_t size = SkUTF8_FromUnichar(uni, utf8);
+ char utf8[SkUTF::kMaxBytesInUTF8Sequence];
+ size_t size = SkUTF::ToUTF8(uni, utf8);
// Only consider events for single char keys
if (1 == size) {
switch (utf8[0]) {
diff --git a/samplecode/SampleXfermodesBlur.cpp b/samplecode/SampleXfermodesBlur.cpp
index 37b859288b..0ef144ef18 100644
--- a/samplecode/SampleXfermodesBlur.cpp
+++ b/samplecode/SampleXfermodesBlur.cpp
@@ -90,7 +90,7 @@ protected:
paint.setTextSize(50);
paint.setTypeface(SkTypeface::MakeFromName("Arial Unicode MS", SkFontStyle()));
char buffer[10];
- size_t len = SkUTF8_FromUnichar(0x8500, buffer);
+ size_t len = SkUTF::ToUTF8(0x8500, buffer);
canvas->drawText(buffer, len, 40, 40, paint);
return;
}
diff --git a/src/core/SkFindAndPlaceGlyph.h b/src/core/SkFindAndPlaceGlyph.h
index f602e8d931..74fa516910 100644
--- a/src/core/SkFindAndPlaceGlyph.h
+++ b/src/core/SkFindAndPlaceGlyph.h
@@ -147,7 +147,7 @@ private:
private:
SkUnichar nextUnichar(const char** text, const char* stop) override {
- return SkUTF8_NextUnichar(text, stop);
+ return SkUTF::NextUTF8(text, stop);
}
};
@@ -157,7 +157,7 @@ private:
private:
SkUnichar nextUnichar(const char** text, const char* stop) override {
- return SkUTF16_NextUnichar((const uint16_t**)text, (const uint16_t*)stop);
+ return SkUTF::NextUTF16((const uint16_t**)text, (const uint16_t*)stop);
}
};
@@ -167,10 +167,7 @@ private:
private:
SkUnichar nextUnichar(const char** text, const char* stop) override {
- const int32_t* ptr = *(const int32_t**)text;
- SkUnichar uni = *ptr++;
- *text = (const char*)ptr;
- return uni;
+ return SkUTF::NextUTF32((const int32_t**)text, (const int32_t*)stop);
}
};
@@ -181,19 +178,24 @@ private:
SkASSERT(cache != nullptr);
}
- const SkGlyph& lookupGlyph(const char** text, const char*) override {
- return fCache->getGlyphIDMetrics(nextGlyphId(text));
+ const SkGlyph& lookupGlyph(const char** text, const char* stop) override {
+ return fCache->getGlyphIDMetrics(nextGlyphId(text, stop));
}
- const SkGlyph& lookupGlyphXY(const char** text, const char*,
+ const SkGlyph& lookupGlyphXY(const char** text, const char* stop,
SkFixed x, SkFixed y) override {
- return fCache->getGlyphIDMetrics(nextGlyphId(text), x, y);
+ return fCache->getGlyphIDMetrics(nextGlyphId(text, stop), x, y);
}
private:
- uint16_t nextGlyphId(const char** text) {
+ uint16_t nextGlyphId(const char** text, const char* stop) {
SkASSERT(text != nullptr);
const uint16_t* ptr = *(const uint16_t**)text;
+ SkASSERT(ptr);
+ if (ptr + 1 > (const uint16_t*)stop) {
+ *text = stop;
+ return 0;
+ }
uint16_t glyphID = *ptr;
ptr += 1;
*text = (const char*)ptr;
diff --git a/src/core/SkFont.cpp b/src/core/SkFont.cpp
index bde8c4c80b..081469f900 100644
--- a/src/core/SkFont.cpp
+++ b/src/core/SkFont.cpp
@@ -68,10 +68,10 @@ int SkFont::textToGlyphs(const void* text, size_t byteLength, SkTextEncoding enc
switch (encoding) {
case kUTF8_SkTextEncoding:
- count = SkUTF8_CountUnichars((const char*)text, byteLength);
+ count = SkUTF::CountUTF8((const char*)text, byteLength);
break;
case kUTF16_SkTextEncoding:
- count = SkUTF16_CountUnichars((const uint16_t*)text, byteLength);
+ count = SkUTF::CountUTF16((const uint16_t*)text, byteLength);
break;
case kUTF32_SkTextEncoding:
count = SkToInt(byteLength >> 2);
diff --git a/src/core/SkOverdrawCanvas.cpp b/src/core/SkOverdrawCanvas.cpp
index 249723e494..0e20878793 100644
--- a/src/core/SkOverdrawCanvas.cpp
+++ b/src/core/SkOverdrawCanvas.cpp
@@ -98,13 +98,15 @@ void SkOverdrawCanvas::onDrawTextOnPath(const void* text, size_t byteLength, con
typedef int (*CountTextProc)(const char* text, const char* stop);
static int count_utf16(const char* text, const char* stop) {
const uint16_t* prev = (const uint16_t*)text;
- (void)SkUTF16_NextUnichar(&prev, (const uint16_t*)stop);
+ (void)SkUTF::NextUTF16(&prev, (const uint16_t*)stop);
return SkToInt((const char*)prev - text);
}
static int return_4(const char* text, const char* stop) { return 4; }
static int return_2(const char* text, const char* stop) { return 2; }
static int count_utf8(const char* text, const char* stop) {
- return SkUTF8_LeadByteToCount(*(const uint8_t*)text);
+ const char* ptr = text;
+ (void)SkUTF::NextUTF8(&ptr, stop);
+ return SkToInt(ptr - text);
}
void SkOverdrawCanvas::onDrawTextRSXform(const void* text, size_t byteLength,
diff --git a/src/core/SkPaint.cpp b/src/core/SkPaint.cpp
index 9b8431e591..15aa44eebc 100644
--- a/src/core/SkPaint.cpp
+++ b/src/core/SkPaint.cpp
@@ -399,9 +399,9 @@ int SkPaint::countText(const void* text, size_t byteLength) const {
SkASSERT(text != nullptr);
switch (this->getTextEncoding()) {
case kUTF8_TextEncoding:
- return SkUTF8_CountUnichars(text, byteLength);
+ return SkUTF::CountUTF8((const char*)text, byteLength);
case kUTF16_TextEncoding:
- return SkUTF16_CountUnichars(text, byteLength);
+ return SkUTF::CountUTF16((const uint16_t*)text, byteLength);
case kUTF32_TextEncoding:
return SkToInt(byteLength >> 2);
case kGlyphID_TextEncoding:
@@ -413,6 +413,13 @@ int SkPaint::countText(const void* text, size_t byteLength) const {
return 0;
}
+static SkTypeface::Encoding to_encoding(SkPaint::TextEncoding e) {
+ static_assert((int)SkTypeface::kUTF8_Encoding == (int)SkPaint::kUTF8_TextEncoding, "");
+ static_assert((int)SkTypeface::kUTF16_Encoding == (int)SkPaint::kUTF16_TextEncoding, "");
+ static_assert((int)SkTypeface::kUTF32_Encoding == (int)SkPaint::kUTF32_TextEncoding, "");
+ return (SkTypeface::Encoding)e;
+}
+
int SkPaint::textToGlyphs(const void* textData, size_t byteLength, uint16_t glyphs[]) const {
SkASSERT(textData != nullptr);
@@ -431,38 +438,16 @@ int SkPaint::textToGlyphs(const void* textData, size_t byteLength, uint16_t glyp
auto cache = SkStrikeCache::FindOrCreateStrikeExclusive(*this);
- const char* text = (const char*)textData;
- const char* stop = text + byteLength;
+ const void* stop = (const char*)textData + byteLength;
uint16_t* gptr = glyphs;
+ const SkTypeface::Encoding encoding = to_encoding(this->getTextEncoding());
- switch (this->getTextEncoding()) {
- case SkPaint::kUTF8_TextEncoding:
- while (text < stop) {
- SkUnichar u = SkUTF8_NextUnicharWithError(&text, stop);
- if (u < 0) {
- return 0; // bad UTF-8 sequence
- }
- *gptr++ = cache->unicharToGlyph(u);
- }
- break;
- case SkPaint::kUTF16_TextEncoding: {
- const uint16_t* text16 = (const uint16_t*)text;
- const uint16_t* stop16 = (const uint16_t*)stop;
- while (text16 < stop16) {
- *gptr++ = cache->unicharToGlyph(SkUTF16_NextUnichar(&text16, stop16));
- }
- break;
- }
- case kUTF32_TextEncoding: {
- const int32_t* text32 = (const int32_t*)text;
- const int32_t* stop32 = (const int32_t*)stop;
- while (text32 < stop32) {
- *gptr++ = cache->unicharToGlyph(*text32++);
- }
- break;
+ while (textData < stop) {
+ SkUnichar unichar = SkUTFN_Next(encoding, &textData, stop);
+ if (unichar < 0) {
+ return 0; // bad UTF-N sequence
}
- default:
- SkDEBUGFAIL("unknown text encoding");
+ *gptr++ = cache->unicharToGlyph(unichar);
}
return SkToInt(gptr - glyphs);
}
@@ -487,41 +472,12 @@ bool SkPaint::containsText(const void* textData, size_t byteLength) const {
}
auto cache = SkStrikeCache::FindOrCreateStrikeExclusive(*this);
-
- switch (this->getTextEncoding()) {
- case SkPaint::kUTF8_TextEncoding: {
- const char* text = static_cast<const char*>(textData);
- const char* stop = text + byteLength;
- while (text < stop) {
- if (0 == cache->unicharToGlyph(SkUTF8_NextUnichar(&text, stop))) {
- return false;
- }
- }
- break;
- }
- case SkPaint::kUTF16_TextEncoding: {
- const uint16_t* text = static_cast<const uint16_t*>(textData);
- const uint16_t* stop = text + (byteLength >> 1);
- while (text < stop) {
- if (0 == cache->unicharToGlyph(SkUTF16_NextUnichar(&text, stop))) {
- return false;
- }
- }
- break;
- }
- case SkPaint::kUTF32_TextEncoding: {
- const int32_t* text = static_cast<const int32_t*>(textData);
- const int32_t* stop = text + (byteLength >> 2);
- while (text < stop) {
- if (0 == cache->unicharToGlyph(*text++)) {
- return false;
- }
- }
- break;
- }
- default:
- SkDEBUGFAIL("unknown text encoding");
+ const void* stop = (const char*)textData + byteLength;
+ const SkTypeface::Encoding encoding = to_encoding(this->getTextEncoding());
+ while (textData < stop) {
+ if (0 == cache->unicharToGlyph(SkUTFN_Next(encoding, &textData, stop))) {
return false;
+ }
}
return true;
}
@@ -551,7 +507,7 @@ static const SkGlyph& sk_getMetrics_utf8_next(SkGlyphCache* cache,
SkASSERT(cache != nullptr);
SkASSERT(text != nullptr);
- return cache->getUnicharMetrics(SkUTF8_NextUnichar(text, stop));
+ return cache->getUnicharMetrics(SkUTF::NextUTF8(text, stop));
}
static const SkGlyph& sk_getMetrics_utf16_next(SkGlyphCache* cache,
@@ -561,7 +517,7 @@ static const SkGlyph& sk_getMetrics_utf16_next(SkGlyphCache* cache,
SkASSERT(text != nullptr);
return cache->getUnicharMetrics(
- SkUTF16_NextUnichar((const uint16_t**)text, (const uint16_t*)stop));
+ SkUTF::NextUTF16((const uint16_t**)text, (const uint16_t*)stop));
}
static const SkGlyph& sk_getMetrics_utf32_next(SkGlyphCache* cache,
@@ -570,10 +526,7 @@ static const SkGlyph& sk_getMetrics_utf32_next(SkGlyphCache* cache,
SkASSERT(cache != nullptr);
SkASSERT(text != nullptr);
- const int32_t* ptr = *(const int32_t**)text;
- SkUnichar uni = *ptr++;
- *text = (const char*)ptr;
- return cache->getUnicharMetrics(uni);
+ return cache->getUnicharMetrics(SkUTF::NextUTF32((const int32_t**)text, (const int32_t*)stop));
}
static const SkGlyph& sk_getMetrics_glyph_next(SkGlyphCache* cache,
@@ -595,7 +548,7 @@ static const SkGlyph& sk_getAdvance_utf8_next(SkGlyphCache* cache,
SkASSERT(cache != nullptr);
SkASSERT(text != nullptr);
- return cache->getUnicharAdvance(SkUTF8_NextUnichar(text, stop));
+ return cache->getUnicharAdvance(SkUTF::NextUTF8(text, stop));
}
static const SkGlyph& sk_getAdvance_utf16_next(SkGlyphCache* cache,
@@ -605,7 +558,7 @@ static const SkGlyph& sk_getAdvance_utf16_next(SkGlyphCache* cache,
SkASSERT(text != nullptr);
return cache->getUnicharAdvance(
- SkUTF16_NextUnichar((const uint16_t**)text, (const uint16_t*)stop));
+ SkUTF::NextUTF16((const uint16_t**)text, (const uint16_t*)stop));
}
static const SkGlyph& sk_getAdvance_utf32_next(SkGlyphCache* cache,
@@ -614,10 +567,7 @@ static const SkGlyph& sk_getAdvance_utf32_next(SkGlyphCache* cache,
SkASSERT(cache != nullptr);
SkASSERT(text != nullptr);
- const int32_t* ptr = *(const int32_t**)text;
- SkUnichar uni = *ptr++;
- *text = (const char*)ptr;
- return cache->getUnicharAdvance(uni);
+ return cache->getUnicharAdvance(SkUTF::NextUTF32((const int32_t**)text, (const int32_t*)stop));
}
static const SkGlyph& sk_getAdvance_glyph_next(SkGlyphCache* cache,
diff --git a/src/core/SkPaintPriv.cpp b/src/core/SkPaintPriv.cpp
index 3be1ebeafa..309d244951 100644
--- a/src/core/SkPaintPriv.cpp
+++ b/src/core/SkPaintPriv.cpp
@@ -92,9 +92,9 @@ bool SkPaintPriv::ShouldDither(const SkPaint& p, SkColorType dstCT) {
int SkPaintPriv::ValidCountText(const void* text, size_t length, SkPaint::TextEncoding encoding) {
switch (encoding) {
- case SkPaint::kUTF8_TextEncoding: return SkUTF8_CountUnichars(text, length);
- case SkPaint::kUTF16_TextEncoding: return SkUTF16_CountUnichars(text, length);
- case SkPaint::kUTF32_TextEncoding: return SkUTF32_CountUnichars(text, length);
+ case SkPaint::kUTF8_TextEncoding: return SkUTF::CountUTF8((const char*)text, length);
+ case SkPaint::kUTF16_TextEncoding: return SkUTF::CountUTF16((const uint16_t*)text, length);
+ case SkPaint::kUTF32_TextEncoding: return SkUTF::CountUTF32((const int32_t*)text, length);
case SkPaint::kGlyphID_TextEncoding:
if (!SkIsAlign2(intptr_t(text)) || !SkIsAlign2(length)) {
return -1;
diff --git a/src/core/SkString.cpp b/src/core/SkString.cpp
index 65dae1b753..6f7d80dfd7 100644
--- a/src/core/SkString.cpp
+++ b/src/core/SkString.cpp
@@ -446,8 +446,8 @@ void SkString::insert(size_t offset, const char text[], size_t len) {
}
void SkString::insertUnichar(size_t offset, SkUnichar uni) {
- char buffer[kMaxBytesInUTF8Sequence];
- size_t len = SkUTF8_FromUnichar(uni, buffer);
+ char buffer[SkUTF::kMaxBytesInUTF8Sequence];
+ size_t len = SkUTF::ToUTF8(uni, buffer);
if (len) {
this->insert(offset, buffer, len);
diff --git a/src/core/SkStringUtils.cpp b/src/core/SkStringUtils.cpp
index 500478585f..1743d0d26c 100644
--- a/src/core/SkStringUtils.cpp
+++ b/src/core/SkStringUtils.cpp
@@ -62,8 +62,8 @@ SkString SkStringFromUTF16(const uint16_t* src, size_t count) {
const uint16_t* end = src + count;
for (const uint16_t* ptr = src; ptr < end;) {
const uint16_t* last = ptr;
- SkUnichar u = SkUTF16_NextUnichar(&ptr, stop);
- size_t s = SkUTF8_FromUnichar(u);
+ SkUnichar u = SkUTF::NextUTF16(&ptr, stop);
+ size_t s = SkUTF::ToUTF8(u);
if (n > UINT32_MAX - s) {
end = last; // truncate input string
break;
@@ -73,7 +73,7 @@ SkString SkStringFromUTF16(const uint16_t* src, size_t count) {
ret = SkString(n);
char* out = ret.writable_str();
for (const uint16_t* ptr = src; ptr < end;) {
- out += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&ptr, stop), out);
+ out += SkUTF::ToUTF8(SkUTF::NextUTF16(&ptr, stop), out);
}
SkASSERT(out == ret.writable_str() + n);
}
diff --git a/src/core/SkUtils.cpp b/src/core/SkUtils.cpp
index f0c1f60a09..9754da662c 100644
--- a/src/core/SkUtils.cpp
+++ b/src/core/SkUtils.cpp
@@ -7,431 +7,27 @@
#include "SkUtils.h"
-#include "SkTo.h"
-
-/* 0xxxxxxx 1 total
- 10xxxxxx // never a leading byte
- 110xxxxx 2 total
- 1110xxxx 3 total
- 11110xxx 4 total
-
- 11 10 01 01 xx xx xx xx 0...
- 0xE5XX0000
- 0xE5 << 24
-*/
-
-static bool utf8_byte_is_valid(uint8_t c) {
- return c < 0xF5 && (c & 0xFE) != 0xC0;
-}
-static bool utf8_byte_is_continuation(uint8_t c) {
- return (c & 0xC0) == 0x80;
-}
-static bool utf8_byte_is_leading_byte(uint8_t c) {
- return utf8_byte_is_valid(c) && !utf8_byte_is_continuation(c);
-}
-
-#ifdef SK_DEBUG
- static void assert_utf8_leadingbyte(unsigned c) {
- SkASSERT(utf8_byte_is_leading_byte(SkToU8(c)));
- }
-
- int SkUTF8_LeadByteToCount(unsigned c) {
- assert_utf8_leadingbyte(c);
- return (((0xE5 << 24) >> (c >> 4 << 1)) & 3) + 1;
- }
-#else
- #define assert_utf8_leadingbyte(c)
-#endif
-
-/**
- * @returns -1 iff invalid UTF8 byte,
- * 0 iff UTF8 continuation byte,
- * 1 iff ASCII byte,
- * 2 iff leading byte of 2-byte sequence,
- * 3 iff leading byte of 3-byte sequence, and
- * 4 iff leading byte of 4-byte sequence.
- *
- * I.e.: if return value > 0, then gives length of sequence.
-*/
-static int utf8_byte_type(uint8_t c) {
- if (c < 0x80) {
- return 1;
- } else if (c < 0xC0) {
- return 0;
- } else if (c < 0xF5 && (c & 0xFE) != 0xC0) { // "octet values C0, C1, F5 to FF never appear"
- return (((0xE5 << 24) >> ((unsigned)c >> 4 << 1)) & 3) + 1;
- } else {
- return -1;
- }
-}
-static bool utf8_type_is_valid_leading_byte(int type) { return type > 0; }
-
-int SkUTF8_CountUnichars(const char utf8[]) {
- SkASSERT(utf8);
-
- int count = 0;
-
- for (;;) {
- int c = *(const uint8_t*)utf8;
- if (c == 0) {
- break;
- }
- utf8 += SkUTF8_LeadByteToCount(c);
- count += 1;
- }
- return count;
-}
-
-// SAFE: returns -1 if invalid UTF-8
-int SkUTF8_CountUnichars(const void* text, size_t byteLength) {
- SkASSERT(text);
- const char* utf8 = static_cast<const char*>(text);
- int count = 0;
- const char* stop = utf8 + byteLength;
-
- while (utf8 < stop) {
- int type = utf8_byte_type(*(const uint8_t*)utf8);
- SkASSERT(type >= -1 && type <= 4);
- if (!utf8_type_is_valid_leading_byte(type) || utf8 + type > stop) {
- // Sequence extends beyond end.
- return -1;
- }
- while(type-- > 1) {
- ++utf8;
- if (!utf8_byte_is_continuation(*(const uint8_t*)utf8)) {
- return -1;
- }
- }
- ++utf8;
- ++count;
- }
- return count;
-}
-
-SkUnichar SkUTF8_ToUnichar(const char utf8[]) {
- SkASSERT(utf8);
-
- const uint8_t* p = (const uint8_t*)utf8;
- int c = *p;
- int hic = c << 24;
-
- assert_utf8_leadingbyte(c);
-
- if (hic < 0) {
- uint32_t mask = (uint32_t)~0x3F;
- hic = SkLeftShift(hic, 1);
- do {
- c = (c << 6) | (*++p & 0x3F);
- mask <<= 5;
- } while ((hic = SkLeftShift(hic, 1)) < 0);
- c &= ~mask;
- }
- return c;
-}
-
-// SAFE: returns -1 on invalid UTF-8 sequence.
-SkUnichar SkUTF8_NextUnicharWithError(const char** ptr, const char* end) {
- SkASSERT(ptr && *ptr);
- SkASSERT(*ptr < end);
- const uint8_t* p = (const uint8_t*)*ptr;
- int c = *p;
- int hic = c << 24;
-
- if (!utf8_byte_is_leading_byte(c)) {
- return -1;
- }
- if (hic < 0) {
- uint32_t mask = (uint32_t)~0x3F;
- hic = SkLeftShift(hic, 1);
- do {
- ++p;
- if (p >= (const uint8_t*)end) {
- return -1;
- }
- // check before reading off end of array.
- uint8_t nextByte = *p;
- if (!utf8_byte_is_continuation(nextByte)) {
- return -1;
- }
- c = (c << 6) | (nextByte & 0x3F);
- mask <<= 5;
- } while ((hic = SkLeftShift(hic, 1)) < 0);
- c &= ~mask;
- }
- *ptr = (char*)p + 1;
- return c;
-}
-
-SkUnichar SkUTF8_NextUnichar(const char** ptr) {
- SkASSERT(ptr && *ptr);
-
- const uint8_t* p = (const uint8_t*)*ptr;
- int c = *p;
- int hic = c << 24;
-
- assert_utf8_leadingbyte(c);
-
- if (hic < 0) {
- uint32_t mask = (uint32_t)~0x3F;
- hic = SkLeftShift(hic, 1);
- do {
- c = (c << 6) | (*++p & 0x3F);
- mask <<= 5;
- } while ((hic = SkLeftShift(hic, 1)) < 0);
- c &= ~mask;
- }
- *ptr = (char*)p + 1;
- return c;
-}
-
-SkUnichar SkUTF8_PrevUnichar(const char** ptr) {
- SkASSERT(ptr && *ptr);
-
- const char* p = *ptr;
-
- if (*--p & 0x80) {
- while (*--p & 0x40) {
- ;
- }
- }
-
- *ptr = (char*)p;
- return SkUTF8_NextUnichar(&p);
-}
-
-size_t SkUTF8_FromUnichar(SkUnichar uni, char utf8[]) {
- if ((uint32_t)uni > 0x10FFFF) {
- SkDEBUGFAIL("bad unichar");
- return 0;
- }
-
- if (uni <= 127) {
- if (utf8) {
- *utf8 = (char)uni;
- }
- return 1;
- }
-
- char tmp[4];
- char* p = tmp;
- size_t count = 1;
-
- SkDEBUGCODE(SkUnichar orig = uni;)
-
- while (uni > 0x7F >> count) {
- *p++ = (char)(0x80 | (uni & 0x3F));
- uni >>= 6;
- count += 1;
- }
-
- if (utf8) {
- p = tmp;
- utf8 += count;
- while (p < tmp + count - 1) {
- *--utf8 = *p++;
- }
- *--utf8 = (char)(~(0xFF >> count) | uni);
- }
-
- SkASSERT(utf8 == nullptr || orig == SkUTF8_ToUnichar(utf8));
- return count;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-
-int SkUTF16_CountUnichars(const uint16_t src[]) {
- SkASSERT(src);
-
- int count = 0;
- unsigned c;
- while ((c = *src++) != 0) {
- SkASSERT(!SkUTF16_IsLowSurrogate(c));
- if (SkUTF16_IsHighSurrogate(c)) {
- c = *src++;
- SkASSERT(SkUTF16_IsLowSurrogate(c));
- }
- count += 1;
- }
- return count;
-}
-
-// returns -1 on error
-int SkUTF16_CountUnichars(const void* text, size_t byteLength) {
- SkASSERT(text);
- if (!SkIsAlign2(intptr_t(text)) || !SkIsAlign2(byteLength)) {
- return -1;
- }
-
- const uint16_t* src = static_cast<const uint16_t*>(text);
- const uint16_t* stop = src + (byteLength >> 1);
- int count = 0;
- while (src < stop) {
- unsigned c = *src++;
- if (SkUTF16_IsLowSurrogate(c)) {
- return -1;
- }
- if (SkUTF16_IsHighSurrogate(c)) {
- if (src >= stop) {
- return -1;
- }
- c = *src++;
- if (!SkUTF16_IsLowSurrogate(c)) {
- return -1;
- }
- }
- count += 1;
- }
- return count;
-}
-
-SkUnichar SkUTF16_NextUnichar(const uint16_t** srcPtr, const uint16_t* endPtr) {
- if (!srcPtr || !endPtr) {
- return -1;
- }
- const uint16_t* src = *srcPtr;
- if (src >= endPtr) {
- return -1;
- }
- uint16_t c = *src++;
- SkUnichar result = c;
-
- if (SkUTF16_IsLowSurrogate(c)) {
- return -1; // srcPtr should never point at low surrogate.
- }
- if (SkUTF16_IsHighSurrogate(c)) {
- if (src == endPtr) {
- return -1; // Truncated string.
- }
- uint16_t low = *src++;
- if (!SkUTF16_IsLowSurrogate(low)) {
- return -1;
- }
- /*
- [paraphrased from wikipedia]
- Take the high surrogate and subtract 0xD800, then multiply by 0x400.
- Take the low surrogate and subtract 0xDC00. Add these two results
- together, and finally add 0x10000 to get the final decoded codepoint.
-
- unicode = (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000
- unicode = (high * 0x400) - (0xD800 * 0x400) + low - 0xDC00 + 0x10000
- unicode = (high << 10) - (0xD800 << 10) + low - 0xDC00 + 0x10000
- unicode = (high << 10) + low - ((0xD800 << 10) + 0xDC00 - 0x10000)
- */
- result = (result << 10) + (SkUnichar)low - ((0xD800 << 10) + 0xDC00 - 0x10000);
- }
- *srcPtr = src;
- return result;
-}
-
-SkUnichar SkUTF16_NextUnichar(const uint16_t** srcPtr) {
- SkUnichar c = SkUTF16_NextUnichar(srcPtr, *srcPtr + 2);
+template <typename T>
+static SkUnichar next(const T** srcPtr, unsigned N, SkUnichar (*fn)(const T**, const T*)) {
+ SkASSERT(srcPtr);
+ const T* ptr = *srcPtr;
+ SkUnichar c = fn(&ptr, ptr + N);
if (c == -1) {
SkASSERT(false);
++(*srcPtr);
- return 0xFFFD; // REPLACEMENT CHARACTER.
- }
- return c;
-}
-
-SkUnichar SkUTF16_PrevUnichar(const uint16_t** srcPtr) {
- SkASSERT(srcPtr && *srcPtr);
-
- const uint16_t* src = *srcPtr;
- SkUnichar c = *--src;
-
- SkASSERT(!SkUTF16_IsHighSurrogate(c));
- if (SkUTF16_IsLowSurrogate(c)) {
- unsigned c2 = *--src;
- SkASSERT(SkUTF16_IsHighSurrogate(c2));
- c = (c2 << 10) + c + (0x10000 - (0xD800 << 10) - 0xDC00);
+ return 0xFFFD; // REPLACEMENT CHARACTER
}
- *srcPtr = src;
+ *srcPtr = ptr;
return c;
}
-
-size_t SkUTF16_FromUnichar(SkUnichar uni, uint16_t dst[]) {
- SkASSERT((unsigned)uni <= 0x10FFFF);
-
- int extra = (uni > 0xFFFF);
-
- if (dst) {
- if (extra) {
- // dst[0] = SkToU16(0xD800 | ((uni - 0x10000) >> 10));
- // dst[0] = SkToU16(0xD800 | ((uni >> 10) - 64));
- dst[0] = SkToU16((0xD800 - 64) + (uni >> 10));
- dst[1] = SkToU16(0xDC00 | (uni & 0x3FF));
-
- SkASSERT(SkUTF16_IsHighSurrogate(dst[0]));
- SkASSERT(SkUTF16_IsLowSurrogate(dst[1]));
- } else {
- dst[0] = SkToU16(uni);
- SkASSERT(!SkUTF16_IsHighSurrogate(dst[0]));
- SkASSERT(!SkUTF16_IsLowSurrogate(dst[0]));
- }
- }
- return 1 + extra;
-}
-
-size_t SkUTF16_ToUTF8(const uint16_t utf16[], int numberOf16BitValues,
- char utf8[]) {
- SkASSERT(numberOf16BitValues >= 0);
- if (numberOf16BitValues <= 0) {
- return 0;
- }
-
- SkASSERT(utf16 != nullptr);
-
- const uint16_t* stop = utf16 + numberOf16BitValues;
- size_t size = 0;
-
- if (utf8 == nullptr) { // just count
- while (utf16 < stop) {
- size += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), nullptr);
- }
- } else {
- char* start = utf8;
- while (utf16 < stop) {
- utf8 += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), utf8);
- }
- size = utf8 - start;
- }
- return size;
+SkUnichar SkUTF8_NextUnichar(const char** p) {
+ return next<char>(p, SkUTF::kMaxBytesInUTF8Sequence, SkUTF::NextUTF8);
}
-
-// returns -1 on error
-int SkUTF32_CountUnichars(const void* text, size_t byteLength) {
- if (!SkIsAlign4(intptr_t(text)) || !SkIsAlign4(byteLength)) {
- return -1;
- }
- const uint32_t kInvalidUnicharMask = 0xFF000000; // unichar fits in 24 bits
- const uint32_t* ptr = static_cast<const uint32_t*>(text);
- const uint32_t* stop = ptr + (byteLength >> 2);
- while (ptr < stop) {
- if (*ptr & kInvalidUnicharMask) {
- return -1;
- }
- ptr += 1;
- }
- return SkToInt(byteLength >> 2);
+SkUnichar SkUTF16_NextUnichar(const uint16_t** p) {
+ return next<uint16_t>(p, 2, SkUTF::NextUTF16);
}
-// returns -1 on error
-int SkUTFN_CountUnichars(
- SkTypeface::Encoding encoding, const void* utfN, size_t byteLength) {
- SkASSERT(utfN != nullptr);
- switch (encoding) {
- case SkTypeface::kUTF8_Encoding:
- return SkUTF8_CountUnichars(utfN, byteLength);
- case SkTypeface::kUTF16_Encoding:
- return SkUTF16_CountUnichars(utfN, byteLength);
- case SkTypeface::kUTF32_Encoding:
- return SkUTF32_CountUnichars(utfN, byteLength);
- default:
- SkDEBUGFAIL("unknown text encoding");
- }
-
- return -1;
-}
+///////////////////////////////////////////////////////////////////////////////
const char SkHexadecimalDigits::gUpper[16] =
{ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
diff --git a/src/core/SkUtils.h b/src/core/SkUtils.h
index 795b47a0c4..e18934d6b8 100644
--- a/src/core/SkUtils.h
+++ b/src/core/SkUtils.h
@@ -8,10 +8,9 @@
#ifndef SkUtils_DEFINED
#define SkUtils_DEFINED
-#include "SkTypes.h"
-#include "SkMath.h"
#include "SkOpts.h"
#include "SkTypeface.h"
+#include "../utils/SkUTF.h"
/** Similar to memset(), but it assigns a 16, 32, or 64-bit value into the buffer.
@param buffer The memory to have value copied into it
@@ -27,87 +26,46 @@ static inline void sk_memset32(uint32_t buffer[], uint32_t value, int count) {
static inline void sk_memset64(uint64_t buffer[], uint64_t value, int count) {
SkOpts::memset64(buffer, value, count);
}
-///////////////////////////////////////////////////////////////////////////////
-#define kMaxBytesInUTF8Sequence 4
+///////////////////////////////////////////////////////////////////////////////
-#ifdef SK_DEBUG
- int SkUTF8_LeadByteToCount(unsigned c);
-#else
- #define SkUTF8_LeadByteToCount(c) ((((0xE5 << 24) >> ((unsigned)c >> 4 << 1)) & 3) + 1)
-#endif
+// Unlike the functions in SkUTF.h, these two functions do not take an array
+// length parameter. When possible, use SkUTF::NextUTF{8,16} instead.
+SkUnichar SkUTF8_NextUnichar(const char**);
+SkUnichar SkUTF16_NextUnichar(const uint16_t**);
-inline int SkUTF8_CountUTF8Bytes(const char utf8[]) {
- SkASSERT(utf8);
- return SkUTF8_LeadByteToCount(*(const uint8_t*)utf8);
-}
+///////////////////////////////////////////////////////////////////////////////
-int SkUTF8_CountUnichars(const char utf8[]);
+static inline bool SkUTF16_IsHighSurrogate(uint16_t c) { return ((c) & 0xFC00) == 0xD800; }
-/** These functions are safe: invalid sequences will return -1; */
-int SkUTF8_CountUnichars(const void* utf8, size_t byteLength);
-int SkUTF16_CountUnichars(const void* utf16, size_t byteLength);
-int SkUTF32_CountUnichars(const void* utf32, size_t byteLength);
-int SkUTFN_CountUnichars(SkTypeface::Encoding encoding, const void* utfN, size_t byteLength);
+static inline bool SkUTF16_IsLowSurrogate (uint16_t c) { return ((c) & 0xFC00) == 0xDC00; }
-/** This function is safe: invalid UTF8 sequences will return -1
- * When -1 is returned, ptr is unchanged.
- * Precondition: *ptr < end;
- */
-SkUnichar SkUTF8_NextUnicharWithError(const char** ptr, const char* end);
+///////////////////////////////////////////////////////////////////////////////
-/** this version replaces invalid utf-8 sequences with code point U+FFFD. */
-inline SkUnichar SkUTF8_NextUnichar(const char** ptr, const char* end) {
- SkUnichar val = SkUTF8_NextUnicharWithError(ptr, end);
- if (val < 0) {
- *ptr = end;
- return 0xFFFD; // REPLACEMENT CHARACTER
+static inline int SkUTFN_CountUnichars(SkTypeface::Encoding enc, const void* utfN, size_t bytes) {
+ switch (enc) {
+ case SkTypeface::kUTF8_Encoding: return SkUTF::CountUTF8((const char*)utfN, bytes);
+ case SkTypeface::kUTF16_Encoding: return SkUTF::CountUTF16((const uint16_t*)utfN, bytes);
+ case SkTypeface::kUTF32_Encoding: return SkUTF::CountUTF32((const int32_t*)utfN, bytes);
+ default: SkDEBUGFAIL("unknown text encoding"); return -1;
}
- return val;
}
-SkUnichar SkUTF8_ToUnichar(const char utf8[]);
-SkUnichar SkUTF8_NextUnichar(const char**);
-SkUnichar SkUTF8_PrevUnichar(const char**);
-
-/** Return the number of bytes need to convert a unichar
- into a utf8 sequence. Will be 1..kMaxBytesInUTF8Sequence,
- or 0 if uni is illegal.
-*/
-size_t SkUTF8_FromUnichar(SkUnichar uni, char utf8[] = nullptr);
-
-///////////////////////////////////////////////////////////////////////////////
-
-#define SkUTF16_IsHighSurrogate(c) (((c) & 0xFC00) == 0xD800)
-#define SkUTF16_IsLowSurrogate(c) (((c) & 0xFC00) == 0xDC00)
-
-int SkUTF16_CountUnichars(const uint16_t utf16[]);
-// returns the current unichar and then moves past it (*p++)
-SkUnichar SkUTF16_NextUnichar(const uint16_t**);
-SkUnichar SkUTF16_NextUnichar(const uint16_t** srcPtr, const uint16_t* end);
-
-// this guy backs up to the previus unichar value, and returns it (*--p)
-SkUnichar SkUTF16_PrevUnichar(const uint16_t**);
-size_t SkUTF16_FromUnichar(SkUnichar uni, uint16_t utf16[] = nullptr);
-
-size_t SkUTF16_ToUTF8(const uint16_t utf16[], int numberOf16BitValues,
- char utf8[] = nullptr);
-
-inline bool SkUnichar_IsVariationSelector(SkUnichar uni) {
-/* The 'true' ranges are:
- * 0x180B <= uni <= 0x180D
- * 0xFE00 <= uni <= 0xFE0F
- * 0xE0100 <= uni <= 0xE01EF
- */
- if (uni < 0x180B || uni > 0xE01EF) {
- return false;
- }
- if ((uni > 0x180D && uni < 0xFE00) || (uni > 0xFE0F && uni < 0xE0100)) {
- return false;
+static inline SkUnichar SkUTFN_Next(SkTypeface::Encoding enc,
+ const void** ptr, const void* stop) {
+ switch (enc) {
+ case SkTypeface::kUTF8_Encoding:
+ return SkUTF::NextUTF8((const char**)ptr, (const char*)stop);
+ case SkTypeface::kUTF16_Encoding:
+ return SkUTF::NextUTF16((const uint16_t**)ptr, (const uint16_t*)stop);
+ case SkTypeface::kUTF32_Encoding:
+ return SkUTF::NextUTF32((const int32_t**)ptr, (const int32_t*)stop);
+ default: SkDEBUGFAIL("unknown text encoding"); return -1;
}
- return true;
}
+///////////////////////////////////////////////////////////////////////////////
+
namespace SkHexadecimalDigits {
extern const char gUpper[16]; // 0-9A-F
extern const char gLower[16]; // 0-9a-f
diff --git a/src/pdf/SkPDFDevice.cpp b/src/pdf/SkPDFDevice.cpp
index 9cd2892a05..fc7384f7c0 100644
--- a/src/pdf/SkPDFDevice.cpp
+++ b/src/pdf/SkPDFDevice.cpp
@@ -1274,7 +1274,7 @@ void SkPDFDevice::internalDrawGlyphRun(const SkGlyphRun& glyphRun, SkPoint offse
// Check if `/ActualText` needed.
const char* textPtr = c.fUtf8Text;
const char* textEnd = c.fUtf8Text + c.fTextByteLength;
- SkUnichar unichar = SkUTF8_NextUnicharWithError(&textPtr, textEnd);
+ SkUnichar unichar = SkUTF::NextUTF8(&textPtr, textEnd);
if (unichar < 0) {
return;
}
@@ -1288,7 +1288,7 @@ void SkPDFDevice::internalDrawGlyphRun(const SkGlyphRun& glyphRun, SkPoint offse
// the BOM marks this text as UTF-16BE, not PDFDocEncoding.
SkPDFUtils::WriteUTF16beHex(out, unichar); // first char
while (textPtr < textEnd) {
- unichar = SkUTF8_NextUnicharWithError(&textPtr, textEnd);
+ unichar = SkUTF::NextUTF8(&textPtr, textEnd);
if (unichar < 0) {
break;
}
diff --git a/src/pdf/SkPDFMetadata.cpp b/src/pdf/SkPDFMetadata.cpp
index 9234f44ac1..16e6637c7d 100644
--- a/src/pdf/SkPDFMetadata.cpp
+++ b/src/pdf/SkPDFMetadata.cpp
@@ -62,22 +62,22 @@ static SkString to_utf16be(const char* src, size_t len) {
const char* const end = src + len;
size_t n = 1; // BOM
for (const char* ptr = src; ptr < end;) {
- SkUnichar u = SkUTF8_NextUnicharWithError(&ptr, end);
+ SkUnichar u = SkUTF::NextUTF8(&ptr, end);
if (u < 0) {
break;
}
- n += SkUTF16_FromUnichar(u);
+ n += SkUTF::ToUTF16(u);
}
ret.resize(2 * n);
char* out = ret.writable_str();
write_utf16be(&out, 0xFEFF); // BOM
for (const char* ptr = src; ptr < end;) {
- SkUnichar u = SkUTF8_NextUnicharWithError(&ptr, end);
+ SkUnichar u = SkUTF::NextUTF8(&ptr, end);
if (u < 0) {
break;
}
uint16_t utf16[2];
- size_t l = SkUTF16_FromUnichar(u, utf16);
+ size_t l = SkUTF::ToUTF16(u, utf16);
write_utf16be(&out, utf16[0]);
if (l == 2) {
write_utf16be(&out, utf16[1]);
diff --git a/src/pdf/SkPDFUtils.h b/src/pdf/SkPDFUtils.h
index a291914a5a..a05b52b113 100644
--- a/src/pdf/SkPDFUtils.h
+++ b/src/pdf/SkPDFUtils.h
@@ -103,7 +103,7 @@ inline void WriteUInt8(SkDynamicMemoryWStream* wStream, uint8_t value) {
inline void WriteUTF16beHex(SkDynamicMemoryWStream* wStream, SkUnichar utf32) {
uint16_t utf16[2] = {0, 0};
- size_t len = SkUTF16_FromUnichar(utf32, utf16);
+ size_t len = SkUTF::ToUTF16(utf32, utf16);
SkASSERT(len == 1 || len == 2);
SkPDFUtils::WriteUInt16BE(wStream, utf16[0]);
if (len == 2) {
diff --git a/src/ports/SkFontHost_mac.cpp b/src/ports/SkFontHost_mac.cpp
index 18d09b4ec0..8fa8c636d6 100644
--- a/src/ports/SkFontHost_mac.cpp
+++ b/src/ports/SkFontHost_mac.cpp
@@ -1146,7 +1146,7 @@ uint16_t SkScalerContext_Mac::generateCharToGlyph(SkUnichar uni) {
UniChar theChar[2]; // UniChar is a UTF-16 16-bit code unit.
// Get the glyph
- size_t numUniChar = SkUTF16_FromUnichar(uni, theChar);
+ size_t numUniChar = SkUTF::ToUTF16(uni, theChar);
SkASSERT(sizeof(CGGlyph) <= sizeof(uint16_t));
// Undocumented behavior of CTFontGetGlyphsForCharacters with non-bmp code points:
@@ -2366,7 +2366,7 @@ int SkTypeface_Mac::onCharsToGlyphs(const void* chars, Encoding encoding,
src = utf16;
for (int i = 0; i < glyphCount; ++i) {
SkUnichar uni = SkUTF8_NextUnichar(&utf8);
- utf16 += SkUTF16_FromUnichar(uni, utf16);
+ utf16 += SkUTF::ToUTF16(uni, utf16);
}
srcCount = SkToInt(utf16 - src);
break;
@@ -2387,7 +2387,7 @@ int SkTypeface_Mac::onCharsToGlyphs(const void* chars, Encoding encoding,
UniChar* utf16 = charStorage.reset(2 * glyphCount);
src = utf16;
for (int i = 0; i < glyphCount; ++i) {
- utf16 += SkUTF16_FromUnichar(utf32[i], utf16);
+ utf16 += SkUTF::ToUTF16(utf32[i], utf16);
}
srcCount = SkToInt(utf16 - src);
break;
diff --git a/src/ports/SkFontHost_win.cpp b/src/ports/SkFontHost_win.cpp
index 07d37aedfe..4ee8bb44fd 100644
--- a/src/ports/SkFontHost_win.cpp
+++ b/src/ports/SkFontHost_win.cpp
@@ -795,7 +795,7 @@ uint16_t SkScalerContext_GDI::generateCharToGlyph(SkUnichar utf32) {
uint16_t index = 0;
WCHAR utf16[2];
// TODO(ctguil): Support characters that generate more than one glyph.
- if (SkUTF16_FromUnichar(utf32, (uint16_t*)utf16) == 1) {
+ if (SkUTF::ToUTF16(utf32, (uint16_t*)utf16) == 1) {
// Type1 fonts fail with uniscribe API. Use GetGlyphIndices for plane 0.
/** Real documentation for GetGlyphIndiciesW:
@@ -2103,7 +2103,7 @@ int LogFontTypeface::onCharsToGlyphs(const void* chars, Encoding encoding,
// Try a run of non-bmp.
while (glyphIndex < glyphCount && currentChar > 0xFFFF) {
- SkUTF16_FromUnichar(currentChar, reinterpret_cast<uint16_t*>(scratch));
+ SkUTF::ToUTF16(currentChar, reinterpret_cast<uint16_t*>(scratch));
glyphs[glyphIndex] = nonBmpCharToGlyph(hdc, &sc, scratch);
++glyphIndex;
if (glyphIndex < glyphCount) {
@@ -2158,7 +2158,7 @@ int LogFontTypeface::onCharsToGlyphs(const void* chars, Encoding encoding,
// Try a run of non-bmp.
while (glyphIndex < glyphCount && utf32[glyphIndex] > 0xFFFF) {
- SkUTF16_FromUnichar(utf32[glyphIndex], reinterpret_cast<uint16_t*>(scratch));
+ SkUTF::ToUTF16(utf32[glyphIndex], reinterpret_cast<uint16_t*>(scratch));
glyphs[glyphIndex] = nonBmpCharToGlyph(hdc, &sc, scratch);
++glyphIndex;
}
diff --git a/src/ports/SkFontMgr_win_dw.cpp b/src/ports/SkFontMgr_win_dw.cpp
index 6830d82e03..7954c77ca2 100644
--- a/src/ports/SkFontMgr_win_dw.cpp
+++ b/src/ports/SkFontMgr_win_dw.cpp
@@ -757,7 +757,7 @@ SkTypeface* SkFontMgr_DirectWrite::onMatchFamilyStyleCharacter(const char family
WCHAR str[16];
UINT32 strLen = static_cast<UINT32>(
- SkUTF16_FromUnichar(character, reinterpret_cast<uint16_t*>(str)));
+ SkUTF::ToUTF16(character, reinterpret_cast<uint16_t*>(str)));
const SkSMallocWCHAR* dwBcp47;
SkSMallocWCHAR dwBcp47Local;
diff --git a/src/ports/SkOSFile_stdio.cpp b/src/ports/SkOSFile_stdio.cpp
index 10ba7c8b0c..8c0be793cd 100644
--- a/src/ports/SkOSFile_stdio.cpp
+++ b/src/ports/SkOSFile_stdio.cpp
@@ -46,16 +46,16 @@ static FILE* fopen_win(const char* utf8path, const char* perm) {
const char* end = utf8path + strlen(utf8path);
size_t n = 0;
while (ptr < end) {
- SkUnichar u = SkUTF8_NextUnicharWithError(&ptr, end);
+ SkUnichar u = SkUTF::NextUTF8(&ptr, end);
if (u < 0) {
return nullptr; // malformed UTF-8
}
- n += SkUTF16_FromUnichar(u);
+ n += SkUTF::ToUTF16(u);
}
std::vector<uint16_t> wchars(n + 1);
uint16_t* out = wchars.data();
for (const char* ptr = utf8path; ptr < end;) {
- out += SkUTF16_FromUnichar(SkUTF8_NextUnicharWithError(&ptr, end), out);
+ out += SkUTF::ToUTF16(SkUTF::NextUTF8(&ptr, end), out);
}
SkASSERT(out == &wchars[n]);
*out = 0; // final null
diff --git a/src/ports/SkRemotableFontMgr_win_dw.cpp b/src/ports/SkRemotableFontMgr_win_dw.cpp
index 6c6613ea37..2b99cf7be8 100644
--- a/src/ports/SkRemotableFontMgr_win_dw.cpp
+++ b/src/ports/SkRemotableFontMgr_win_dw.cpp
@@ -398,7 +398,7 @@ public:
WCHAR str[16];
UINT32 strLen = static_cast<UINT32>(
- SkUTF16_FromUnichar(character, reinterpret_cast<uint16_t*>(str)));
+ SkUTF::ToUTF16(character, reinterpret_cast<uint16_t*>(str)));
SkTScopedComPtr<IDWriteTextLayout> fallbackLayout;
HR_GENERAL(dwFactory->CreateTextLayout(str, strLen, fallbackFormat.get(),
200.0f, 200.0f,
diff --git a/src/svg/SkSVGDevice.cpp b/src/svg/SkSVGDevice.cpp
index 4a83a9c841..9a2eb99a77 100644
--- a/src/svg/SkSVGDevice.cpp
+++ b/src/svg/SkSVGDevice.cpp
@@ -119,6 +119,13 @@ struct Resources {
SkString fClip;
};
+static SkTypeface::Encoding to_encoding(SkPaint::TextEncoding e) {
+ static_assert((int)SkTypeface::kUTF8_Encoding == (int)SkPaint::kUTF8_TextEncoding, "");
+ static_assert((int)SkTypeface::kUTF16_Encoding == (int)SkPaint::kUTF16_TextEncoding, "");
+ static_assert((int)SkTypeface::kUTF32_Encoding == (int)SkPaint::kUTF32_TextEncoding, "");
+ return (SkTypeface::Encoding)e;
+}
+
class SVGTextBuilder : SkNoncopyable {
public:
SVGTextBuilder(const void* text, size_t byteLen, const SkPaint& paint, const SkPoint& offset,
@@ -131,42 +138,29 @@ public:
SkASSERT(scalarsPerPos <= 2);
SkASSERT(scalarsPerPos == 0 || SkToBool(pos));
- int count = paint.countText(text, byteLen);
-
- const char* stop = (const char*)text + byteLen;
- switch(paint.getTextEncoding()) {
- case SkPaint::kGlyphID_TextEncoding: {
- SkASSERT(count * sizeof(uint16_t) == byteLen);
- SkAutoSTArray<64, SkUnichar> unichars(count);
- paint.glyphsToUnichars((const uint16_t*)text, count, unichars.get());
- for (int i = 0; i < count; ++i) {
- this->appendUnichar(unichars[i]);
- }
- } break;
- case SkPaint::kUTF8_TextEncoding: {
- const char* c8 = reinterpret_cast<const char*>(text);
- for (int i = 0; i < count; ++i) {
- this->appendUnichar(SkUTF8_NextUnichar(&c8, stop));
- }
- SkASSERT(reinterpret_cast<const char*>(text) + byteLen == c8);
- } break;
- case SkPaint::kUTF16_TextEncoding: {
- const uint16_t* c16 = reinterpret_cast<const uint16_t*>(text);
- for (int i = 0; i < count; ++i) {
- this->appendUnichar(SkUTF16_NextUnichar(&c16, (const uint16_t*)stop));
+ SkPaint::TextEncoding encoding = paint.getTextEncoding();
+ switch(encoding) {
+ case SkPaint::kGlyphID_TextEncoding: {
+ int count = paint.countText(text, byteLen);
+ SkASSERT(count * sizeof(uint16_t) == byteLen);
+ SkAutoSTArray<64, SkUnichar> unichars(count);
+ paint.glyphsToUnichars((const uint16_t*)text, count, unichars.get());
+ for (int i = 0; i < count; ++i) {
+ this->appendUnichar(unichars[i]);
+ }
+ break;
}
- SkASSERT(SkIsAlign2(byteLen));
- SkASSERT(reinterpret_cast<const uint16_t*>(text) + (byteLen / 2) == c16);
- } break;
- case SkPaint::kUTF32_TextEncoding: {
- SkASSERT(count * sizeof(uint32_t) == byteLen);
- const uint32_t* c32 = reinterpret_cast<const uint32_t*>(text);
- for (int i = 0; i < count; ++i) {
- this->appendUnichar(c32[i]);
+ case SkPaint::kUTF8_TextEncoding:
+ case SkPaint::kUTF16_TextEncoding:
+ case SkPaint::kUTF32_TextEncoding: {
+ const void* stop = (const char*)text + byteLen;
+ while (text < stop) {
+ this->appendUnichar(SkUTFN_Next(to_encoding(encoding), &text, stop));
+ }
+ break;
}
- } break;
- default:
- SK_ABORT("unknown text encoding");
+ default:
+ SK_ABORT("unknown text encoding");
}
if (scalarsPerPos < 2) {
diff --git a/src/utils/SkUTF.cpp b/src/utils/SkUTF.cpp
new file mode 100644
index 0000000000..0670ae02a4
--- /dev/null
+++ b/src/utils/SkUTF.cpp
@@ -0,0 +1,253 @@
+// Copyright 2018 Google LLC.
+// Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
+
+#include "SkUTF.h"
+
+#include <climits>
+
+static constexpr inline int32_t left_shift(int32_t value, int32_t shift) {
+ return (int32_t) ((uint32_t) value << shift);
+}
+
+template <typename T> static constexpr bool is_align2(T x) { return 0 == (x & 1); }
+
+template <typename T> static constexpr bool is_align4(T x) { return 0 == (x & 3); }
+
+static constexpr inline bool utf16_is_high_surrogate(uint16_t c) { return (c & 0xFC00) == 0xD800; }
+
+static constexpr inline bool utf16_is_low_surrogate(uint16_t c) { return (c & 0xFC00) == 0xDC00; }
+
+/** @returns -1 iff invalid UTF8 byte,
+ 0 iff UTF8 continuation byte,
+ 1 iff ASCII byte,
+ 2 iff leading byte of 2-byte sequence,
+ 3 iff leading byte of 3-byte sequence, and
+ 4 iff leading byte of 4-byte sequence.
+ I.e.: if return value > 0, then gives length of sequence.
+*/
+static int utf8_byte_type(uint8_t c) {
+ if (c < 0x80) {
+ return 1;
+ } else if (c < 0xC0) {
+ return 0;
+ } else if (c >= 0xF5 || (c & 0xFE) == 0xC0) { // "octet values c0, c1, f5 to ff never appear"
+ return -1;
+ } else {
+ int value = (((0xe5 << 24) >> ((unsigned)c >> 4 << 1)) & 3) + 1;
+ // assert(value >= 2 && value <=4);
+ return value;
+ }
+}
+static bool utf8_type_is_valid_leading_byte(int type) { return type > 0; }
+
+static bool utf8_byte_is_continuation(uint8_t c) { return utf8_byte_type(c) == 0; }
+
+////////////////////////////////////////////////////////////////////////////////
+
+int SkUTF::CountUTF8(const char* utf8, size_t byteLength) {
+ if (!utf8) {
+ return -1;
+ }
+ int count = 0;
+ const char* stop = utf8 + byteLength;
+ while (utf8 < stop) {
+ int type = utf8_byte_type(*(const uint8_t*)utf8);
+ if (!utf8_type_is_valid_leading_byte(type) || utf8 + type > stop) {
+ return -1; // Sequence extends beyond end.
+ }
+ while(type-- > 1) {
+ ++utf8;
+ if (!utf8_byte_is_continuation(*(const uint8_t*)utf8)) {
+ return -1;
+ }
+ }
+ ++utf8;
+ ++count;
+ }
+ return count;
+}
+
+int SkUTF::CountUTF16(const uint16_t* utf16, size_t byteLength) {
+ if (!utf16 || !is_align2(intptr_t(utf16)) || !is_align2(byteLength)) {
+ return -1;
+ }
+ const uint16_t* src = (const uint16_t*)utf16;
+ const uint16_t* stop = src + (byteLength >> 1);
+ int count = 0;
+ while (src < stop) {
+ unsigned c = *src++;
+ if (utf16_is_low_surrogate(c)) {
+ return -1;
+ }
+ if (utf16_is_high_surrogate(c)) {
+ if (src >= stop) {
+ return -1;
+ }
+ c = *src++;
+ if (!utf16_is_low_surrogate(c)) {
+ return -1;
+ }
+ }
+ count += 1;
+ }
+ return count;
+}
+
+int SkUTF::CountUTF32(const int32_t* utf32, size_t byteLength) {
+ if (!is_align4(intptr_t(utf32)) || !is_align4(byteLength) || byteLength >> 2 > INT_MAX) {
+ return -1;
+ }
+ const uint32_t kInvalidUnicharMask = 0xFF000000; // unichar fits in 24 bits
+ const uint32_t* ptr = (const uint32_t*)utf32;
+ const uint32_t* stop = ptr + (byteLength >> 2);
+ while (ptr < stop) {
+ if (*ptr & kInvalidUnicharMask) {
+ return -1;
+ }
+ ptr += 1;
+ }
+ return (int)(byteLength >> 2);
+}
+
+template <typename T>
+static SkUnichar next_fail(const T** ptr, const T* end) {
+ *ptr = end;
+ return -1;
+}
+
+SkUnichar SkUTF::NextUTF8(const char** ptr, const char* end) {
+ if (!ptr || !end ) {
+ return -1;
+ }
+ const uint8_t* p = (const uint8_t*)*ptr;
+ if (!p || p >= (const uint8_t*)end) {
+ return next_fail(ptr, end);
+ }
+ int c = *p;
+ int hic = c << 24;
+
+ if (!utf8_type_is_valid_leading_byte(utf8_byte_type(c))) {
+ return next_fail(ptr, end);
+ }
+ if (hic < 0) {
+ uint32_t mask = (uint32_t)~0x3F;
+ hic = left_shift(hic, 1);
+ do {
+ ++p;
+ if (p >= (const uint8_t*)end) {
+ return next_fail(ptr, end);
+ }
+ // check before reading off end of array.
+ uint8_t nextByte = *p;
+ if (!utf8_byte_is_continuation(nextByte)) {
+ return next_fail(ptr, end);
+ }
+ c = (c << 6) | (nextByte & 0x3F);
+ mask <<= 5;
+ } while ((hic = left_shift(hic, 1)) < 0);
+ c &= ~mask;
+ }
+ *ptr = (char*)p + 1;
+ return c;
+}
+
+SkUnichar SkUTF::NextUTF16(const uint16_t** ptr, const uint16_t* end) {
+ if (!ptr || !end ) {
+ return next_fail(ptr, end);
+ }
+ const uint16_t* src = *ptr;
+ if (!src || src + 1 > end || !is_align2(intptr_t(src))) {
+ return next_fail(ptr, end);
+ }
+ uint16_t c = *src++;
+ SkUnichar result = c;
+ if (utf16_is_low_surrogate(c)) {
+ return next_fail(ptr, end); // srcPtr should never point at low surrogate.
+ }
+ if (utf16_is_high_surrogate(c)) {
+ if (src + 1 > end) {
+ return next_fail(ptr, end); // Truncated string.
+ }
+ uint16_t low = *src++;
+ if (!utf16_is_low_surrogate(low)) {
+ return next_fail(ptr, end);
+ }
+ /*
+ [paraphrased from wikipedia]
+ Take the high surrogate and subtract 0xD800, then multiply by 0x400.
+ Take the low surrogate and subtract 0xDC00. Add these two results
+ together, and finally add 0x10000 to get the final decoded codepoint.
+
+ unicode = (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000
+ unicode = (high * 0x400) - (0xD800 * 0x400) + low - 0xDC00 + 0x10000
+ unicode = (high << 10) - (0xD800 << 10) + low - 0xDC00 + 0x10000
+ unicode = (high << 10) + low - ((0xD800 << 10) + 0xDC00 - 0x10000)
+ */
+ result = (result << 10) + (SkUnichar)low - ((0xD800 << 10) + 0xDC00 - 0x10000);
+ }
+ *ptr = src;
+ return result;
+}
+
+SkUnichar SkUTF::NextUTF32(const int32_t** ptr, const int32_t* end) {
+ if (!ptr || !end ) {
+ return -1;
+ }
+ const int32_t* s = *ptr;
+ if (!s || s + 1 > end || !is_align4(intptr_t(s))) {
+ return next_fail(ptr, end);
+ }
+ int32_t value = *s;
+ const uint32_t kInvalidUnicharMask = 0xFF000000; // unichar fits in 24 bits
+ if (value & kInvalidUnicharMask) {
+ return next_fail(ptr, end);
+ }
+ *ptr = s + 1;
+ return value;
+}
+
+size_t SkUTF::ToUTF8(SkUnichar uni, char utf8[SkUTF::kMaxBytesInUTF8Sequence]) {
+ if ((uint32_t)uni > 0x10FFFF) {
+ return 0;
+ }
+ if (uni <= 127) {
+ if (utf8) {
+ *utf8 = (char)uni;
+ }
+ return 1;
+ }
+ char tmp[4];
+ char* p = tmp;
+ size_t count = 1;
+ while (uni > 0x7F >> count) {
+ *p++ = (char)(0x80 | (uni & 0x3F));
+ uni >>= 6;
+ count += 1;
+ }
+ if (utf8) {
+ p = tmp;
+ utf8 += count;
+ while (p < tmp + count - 1) {
+ *--utf8 = *p++;
+ }
+ *--utf8 = (char)(~(0xFF >> count) | uni);
+ }
+ return count;
+}
+
+size_t SkUTF::ToUTF16(SkUnichar uni, uint16_t utf16[2]) {
+ if ((uint32_t)uni > 0x10FFFF) {
+ return 0;
+ }
+ int extra = (uni > 0xFFFF);
+ if (utf16) {
+ if (extra) {
+ utf16[0] = (uint16_t)((0xD800 - 64) + (uni >> 10));
+ utf16[1] = (uint16_t)(0xDC00 | (uni & 0x3FF));
+ } else {
+ utf16[0] = (uint16_t)uni;
+ }
+ }
+ return 1 + extra;
+}
+
diff --git a/src/utils/SkUTF.h b/src/utils/SkUTF.h
new file mode 100644
index 0000000000..385102aadb
--- /dev/null
+++ b/src/utils/SkUTF.h
@@ -0,0 +1,68 @@
+// Copyright 2018 Google LLC.
+// Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
+#ifndef SkUTF_DEFINED
+#define SkUTF_DEFINED
+
+#include <cstddef>
+#include <cstdint>
+
+typedef int32_t SkUnichar;
+
+namespace SkUTF {
+
+/** Given a sequence of UTF-8 bytes, return the number of unicode codepoints.
+ If the sequence is invalid UTF-8, return -1.
+*/
+int CountUTF8(const char* utf8, size_t byteLength);
+
+/** Given a sequence of aligned UTF-16 characters in machine-endian form,
+ return the number of unicode codepoints. If the sequence is invalid
+ UTF-16, return -1.
+*/
+int CountUTF16(const uint16_t* utf16, size_t byteLength);
+
+/** Given a sequence of aligned UTF-32 characters in machine-endian form,
+ return the number of unicode codepoints. If the sequence is invalid
+ UTF-32, return -1.
+*/
+int CountUTF32(const int32_t* utf32, size_t byteLength);
+
+/** Given a sequence of UTF-8 bytes, return the first unicode codepoint.
+ The pointer will be incremented to point at the next codepoint's start. If
+ invalid UTF-8 is encountered, set *ptr to end and return -1.
+*/
+SkUnichar NextUTF8(const char** ptr, const char* end);
+
+/** Given a sequence of aligned UTF-16 characters in machine-endian form,
+ return the first unicode codepoint. The pointer will be incremented to
+ point at the next codepoint's start. If invalid UTF-16 is encountered,
+ set *ptr to end and return -1.
+*/
+SkUnichar NextUTF16(const uint16_t** ptr, const uint16_t* end);
+
+/** Given a sequence of aligned UTF-32 characters in machine-endian form,
+ return the first unicode codepoint. The pointer will be incremented to
+ point at the next codepoint's start. If invalid UTF-32 is encountered,
+ set *ptr to end and return -1.
+*/
+SkUnichar NextUTF32(const int32_t** ptr, const int32_t* end);
+
+constexpr unsigned kMaxBytesInUTF8Sequence = 4;
+
+/** Convert the unicode codepoint into UTF-8. If `utf8` is non-null, place the
+ result in that array. Return the number of bytes in the result. If `utf8`
+ is null, simply return the number of bytes that would be used. For invalid
+ unicode codepoints, return 0.
+*/
+size_t ToUTF8(SkUnichar uni, char utf8[kMaxBytesInUTF8Sequence] = nullptr);
+
+/** Convert the unicode codepoint into UTF-16. If `utf16` is non-null, place
+ the result in that array. Return the number of UTF-16 code units in the
+ result (1 or 2). If `utf16` is null, simply return the number of code
+ units that would be used. For invalid unicode codepoints, return 0.
+*/
+size_t ToUTF16(SkUnichar uni, uint16_t utf16[2] = nullptr);
+
+} // namespace SkUTF
+
+#endif // SkUTF_DEFINED
diff --git a/src/xps/SkXPSDevice.cpp b/src/xps/SkXPSDevice.cpp
index b984ef3e88..c2b5e66406 100644
--- a/src/xps/SkXPSDevice.cpp
+++ b/src/xps/SkXPSDevice.cpp
@@ -1984,17 +1984,13 @@ HRESULT SkXPSDevice::AddGlyphs(IXpsOMObjectFactory* xpsFactory,
}
static int num_glyph_guess(SkPaint::TextEncoding encoding, const void* text, size_t byteLength) {
- switch (encoding) {
- case SkPaint::kUTF8_TextEncoding:
- return SkUTF8_CountUnichars(text, byteLength);
- case SkPaint::kUTF16_TextEncoding:
- return SkUTF16_CountUnichars(text, byteLength);
- case SkPaint::kGlyphID_TextEncoding:
+ static_assert((int)SkTypeface::kUTF8_Encoding == (int)SkPaint::kUTF8_TextEncoding, "");
+ static_assert((int)SkTypeface::kUTF16_Encoding == (int)SkPaint::kUTF16_TextEncoding, "");
+ static_assert((int)SkTypeface::kUTF32_Encoding == (int)SkPaint::kUTF32_TextEncoding, "");
+ if (encoding == SkPaint::kGlyphID_TextEncoding) {
return SkToInt(byteLength / 2);
- default:
- SK_ABORT("Invalid Text Encoding");
}
- return 0;
+ return SkUTFN_CountUnichars((SkTypeface::Encoding)encoding, text, byteLength);
}
static bool text_must_be_pathed(const SkPaint& paint, const SkMatrix& matrix) {
diff --git a/tests/PaintTest.cpp b/tests/PaintTest.cpp
index 647f8e2f23..0bf6df64de 100644
--- a/tests/PaintTest.cpp
+++ b/tests/PaintTest.cpp
@@ -23,7 +23,7 @@
static size_t uni_to_utf8(const SkUnichar src[], void* dst, int count) {
char* u8 = (char*)dst;
for (int i = 0; i < count; ++i) {
- int n = SkToInt(SkUTF8_FromUnichar(src[i], u8));
+ int n = SkToInt(SkUTF::ToUTF8(src[i], u8));
u8 += n;
}
return u8 - (char*)dst;
@@ -32,7 +32,7 @@ static size_t uni_to_utf8(const SkUnichar src[], void* dst, int count) {
static size_t uni_to_utf16(const SkUnichar src[], void* dst, int count) {
uint16_t* u16 = (uint16_t*)dst;
for (int i = 0; i < count; ++i) {
- int n = SkToInt(SkUTF16_FromUnichar(src[i], u16));
+ int n = SkToInt(SkUTF::ToUTF16(src[i], u16));
u16 += n;
}
return (char*)u16 - (char*)dst;
diff --git a/tests/UnicodeTest.cpp b/tests/UnicodeTest.cpp
index 5ed92d4b34..5dbcf08547 100644
--- a/tests/UnicodeTest.cpp
+++ b/tests/UnicodeTest.cpp
@@ -9,39 +9,10 @@
#include "SkUtils.h"
#include "Test.h"
-// Unicode Variation Selector ranges: inclusive
-#define UVS_MIN0 0x180B
-#define UVS_MAX0 0x180D
-#define UVS_MIN1 0xFE00
-#define UVS_MAX1 0xFE0F
-#define UVS_MIN2 0xE0100
-#define UVS_MAX2 0xE01EF
-
-static bool isUVS(SkUnichar uni) {
- return (uni >= UVS_MIN0 && uni <= UVS_MAX0) ||
- (uni >= UVS_MIN1 && uni <= UVS_MAX1) ||
- (uni >= UVS_MIN2 && uni <= UVS_MAX2);
-}
-
-static void test_uvs(skiatest::Reporter* reporter) {
- // [min, max], [min, max] ... inclusive
- static const SkUnichar gRanges[] = {
- UVS_MIN0, UVS_MAX0, UVS_MIN1, UVS_MAX1, UVS_MIN2, UVS_MAX2
- };
-
- for (size_t i = 0; i < SK_ARRAY_COUNT(gRanges); i += 2) {
- for (SkUnichar uni = gRanges[i] - 8; uni <= gRanges[i+1] + 8; ++uni) {
- bool uvs0 = isUVS(uni);
- bool uvs1 = SkUnichar_IsVariationSelector(uni);
- REPORTER_ASSERT(reporter, uvs0 == uvs1);
- }
- }
-}
-
// Simple test to ensure that when we call textToGlyphs, we get the same
// result (for the same text) when using UTF8, UTF16, UTF32.
// TODO: make the text more complex (i.e. incorporate chars>7bits)
-static void test_textencodings(skiatest::Reporter* reporter) {
+DEF_TEST(Unicode_textencodings, reporter) {
const char text8[] = "ABCDEFGabcdefg0123456789";
uint16_t text16[sizeof(text8)];
int32_t text32[sizeof(text8)];
@@ -76,8 +47,3 @@ static void test_textencodings(skiatest::Reporter* reporter) {
REPORTER_ASSERT(reporter, !memcmp(glyphs8, glyphs16, count8 * sizeof(uint16_t)));
REPORTER_ASSERT(reporter, !memcmp(glyphs8, glyphs32, count8 * sizeof(uint16_t)));
}
-
-DEF_TEST(Unicode, reporter) {
- test_uvs(reporter);
- test_textencodings(reporter);
-}
diff --git a/tests/UtilsTest.cpp b/tests/UtilsTest.cpp
index 6f8ec914bc..d621a8b12e 100644
--- a/tests/UtilsTest.cpp
+++ b/tests/UtilsTest.cpp
@@ -165,20 +165,19 @@ static void test_search(skiatest::Reporter* reporter) {
}
static void test_utf16(skiatest::Reporter* reporter) {
+ // Test non-basic-multilingual-plane unicode.
static const SkUnichar gUni[] = {
0x10000, 0x18080, 0x20202, 0xFFFFF, 0x101234
};
-
- uint16_t buf[2];
-
- for (size_t i = 0; i < SK_ARRAY_COUNT(gUni); i++) {
- size_t count = SkUTF16_FromUnichar(gUni[i], buf);
+ for (SkUnichar uni : gUni) {
+ uint16_t buf[2];
+ size_t count = SkUTF::ToUTF16(uni, buf);
REPORTER_ASSERT(reporter, count == 2);
- size_t count2 = SkUTF16_CountUnichars(buf, 2 * sizeof(uint16_t));
+ size_t count2 = SkUTF::CountUTF16(buf, sizeof(buf));
REPORTER_ASSERT(reporter, count2 == 1);
const uint16_t* ptr = buf;
- SkUnichar c = SkUTF16_NextUnichar(&ptr, buf + SK_ARRAY_COUNT(buf));
- REPORTER_ASSERT(reporter, c == gUni[i]);
+ SkUnichar c = SkUTF::NextUTF16(&ptr, buf + SK_ARRAY_COUNT(buf));
+ REPORTER_ASSERT(reporter, c == uni);
REPORTER_ASSERT(reporter, ptr - buf == 2);
}
}
@@ -204,8 +203,8 @@ DEF_TEST(Utils, reporter) {
for (size_t i = 0; i < SK_ARRAY_COUNT(gTest); i++) {
const char* p = gTest[i].fUtf8;
const char* stop = p + strlen(p);
- int n = SkUTF8_CountUnichars(p, strlen(p));
- SkUnichar u1 = SkUTF8_NextUnichar(&p, stop);
+ int n = SkUTF::CountUTF8(p, strlen(p));
+ SkUnichar u1 = SkUTF::NextUTF8(&p, stop);
REPORTER_ASSERT(reporter, n == 1);
REPORTER_ASSERT(reporter, u1 == gTest[i].fUni);
@@ -220,62 +219,91 @@ DEF_TEST(Utils, reporter) {
}
#define ASCII_BYTE "X"
-#define CONTINUATION_BYTE "\x80"
-#define LEADING_TWO_BYTE "\xC4"
-#define LEADING_THREE_BYTE "\xE0"
+#define CONTINUATION_BYTE "\xA1"
+#define LEADING_TWO_BYTE "\xC2"
+#define LEADING_THREE_BYTE "\xE1"
#define LEADING_FOUR_BYTE "\xF0"
#define INVALID_BYTE "\xFC"
-static bool valid_utf8(const char* p, size_t l) {
- return SkUTF8_CountUnichars(p, l) >= 0;
-}
-DEF_TEST(Utils_UTF8_ValidLength, r) {
- const char* goodTestcases[] = {
- "",
- ASCII_BYTE,
- ASCII_BYTE ASCII_BYTE,
- LEADING_TWO_BYTE CONTINUATION_BYTE,
- ASCII_BYTE LEADING_TWO_BYTE CONTINUATION_BYTE,
- ASCII_BYTE ASCII_BYTE LEADING_TWO_BYTE CONTINUATION_BYTE,
- LEADING_THREE_BYTE CONTINUATION_BYTE CONTINUATION_BYTE,
- ASCII_BYTE LEADING_THREE_BYTE CONTINUATION_BYTE CONTINUATION_BYTE,
- ASCII_BYTE ASCII_BYTE LEADING_THREE_BYTE CONTINUATION_BYTE CONTINUATION_BYTE,
- LEADING_FOUR_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE,
- ASCII_BYTE LEADING_FOUR_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE,
- ASCII_BYTE ASCII_BYTE LEADING_FOUR_BYTE CONTINUATION_BYTE CONTINUATION_BYTE
- CONTINUATION_BYTE,
+DEF_TEST(SkUTF_CountUTF8, r) {
+ struct {
+ int expectedCount;
+ const char* utf8String;
+ } testCases[] = {
+ { 0, "" },
+ { 1, ASCII_BYTE },
+ { 2, ASCII_BYTE ASCII_BYTE },
+ { 1, LEADING_TWO_BYTE CONTINUATION_BYTE },
+ { 2, ASCII_BYTE LEADING_TWO_BYTE CONTINUATION_BYTE },
+ { 3, ASCII_BYTE ASCII_BYTE LEADING_TWO_BYTE CONTINUATION_BYTE },
+ { 1, LEADING_THREE_BYTE CONTINUATION_BYTE CONTINUATION_BYTE },
+ { 2, ASCII_BYTE LEADING_THREE_BYTE CONTINUATION_BYTE CONTINUATION_BYTE },
+ { 3, ASCII_BYTE ASCII_BYTE LEADING_THREE_BYTE CONTINUATION_BYTE CONTINUATION_BYTE },
+ { 1, LEADING_FOUR_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE },
+ { 2, ASCII_BYTE LEADING_FOUR_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE },
+ { 3, ASCII_BYTE ASCII_BYTE LEADING_FOUR_BYTE CONTINUATION_BYTE CONTINUATION_BYTE
+ CONTINUATION_BYTE },
+ { -1, INVALID_BYTE },
+ { -1, INVALID_BYTE CONTINUATION_BYTE },
+ { -1, INVALID_BYTE CONTINUATION_BYTE CONTINUATION_BYTE },
+ { -1, INVALID_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE },
+ { -1, LEADING_TWO_BYTE },
+ { -1, CONTINUATION_BYTE },
+ { -1, CONTINUATION_BYTE CONTINUATION_BYTE },
+ { -1, LEADING_THREE_BYTE CONTINUATION_BYTE },
+ { -1, CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE },
+ { -1, LEADING_FOUR_BYTE CONTINUATION_BYTE },
+ { -1, CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE },
};
- for (const char* testcase : goodTestcases) {
- REPORTER_ASSERT(r, valid_utf8(testcase, strlen(testcase)));
+ for (auto testCase : testCases) {
+ const char* str = testCase.utf8String;
+ REPORTER_ASSERT(r, testCase.expectedCount == SkUTF::CountUTF8(str, strlen(str)));
}
- const char* badTestcases[] = {
- INVALID_BYTE,
- INVALID_BYTE CONTINUATION_BYTE,
- INVALID_BYTE CONTINUATION_BYTE CONTINUATION_BYTE,
- INVALID_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE,
- LEADING_TWO_BYTE,
- CONTINUATION_BYTE,
- CONTINUATION_BYTE CONTINUATION_BYTE,
- LEADING_THREE_BYTE CONTINUATION_BYTE,
- CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE,
- LEADING_FOUR_BYTE CONTINUATION_BYTE,
- CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE,
-
- ASCII_BYTE INVALID_BYTE,
- ASCII_BYTE INVALID_BYTE CONTINUATION_BYTE,
- ASCII_BYTE INVALID_BYTE CONTINUATION_BYTE CONTINUATION_BYTE,
- ASCII_BYTE INVALID_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE,
- ASCII_BYTE LEADING_TWO_BYTE,
- ASCII_BYTE CONTINUATION_BYTE,
- ASCII_BYTE CONTINUATION_BYTE CONTINUATION_BYTE,
- ASCII_BYTE LEADING_THREE_BYTE CONTINUATION_BYTE,
- ASCII_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE,
- ASCII_BYTE LEADING_FOUR_BYTE CONTINUATION_BYTE,
- ASCII_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE CONTINUATION_BYTE,
-
- // LEADING_FOUR_BYTE LEADING_TWO_BYTE CONTINUATION_BYTE,
+}
+
+DEF_TEST(SkUTF_NextUTF8_ToUTF8, r) {
+ struct {
+ SkUnichar expected;
+ const char* utf8String;
+ } testCases[] = {
+ { -1, INVALID_BYTE },
+ { -1, "" },
+ { 0x0058, ASCII_BYTE },
+ { 0x00A1, LEADING_TWO_BYTE CONTINUATION_BYTE },
+ { 0x1861, LEADING_THREE_BYTE CONTINUATION_BYTE CONTINUATION_BYTE },
+ { 0x010330, LEADING_FOUR_BYTE "\x90\x8C\xB0" },
};
- for (const char* testcase : badTestcases) {
- REPORTER_ASSERT(r, !valid_utf8(testcase, strlen(testcase)));
+ for (auto testCase : testCases) {
+ const char* str = testCase.utf8String;
+ SkUnichar uni = SkUTF::NextUTF8(&str, str + strlen(str));
+ REPORTER_ASSERT(r, str == testCase.utf8String + strlen(testCase.utf8String));
+ REPORTER_ASSERT(r, uni == testCase.expected);
+ char buff[5] = {0, 0, 0, 0, 0};
+ size_t len = SkUTF::ToUTF8(uni, buff);
+ if (buff[len] != 0) {
+ ERRORF(r, "unexpected write");
+ continue;
+ }
+ if (uni == -1) {
+ REPORTER_ASSERT(r, len == 0);
+ continue;
+ }
+ if (len == 0) {
+ ERRORF(r, "unexpected failure.");
+ continue;
+ }
+ if (len > 4) {
+ ERRORF(r, "wrote too much");
+ continue;
+ }
+ str = testCase.utf8String;
+ REPORTER_ASSERT(r, len == strlen(buff));
+ REPORTER_ASSERT(r, len == strlen(str));
+ REPORTER_ASSERT(r, 0 == strcmp(str, buff));
}
-
}
+#undef ASCII_BYTE
+#undef CONTINUATION_BYTE
+#undef LEADING_TWO_BYTE
+#undef LEADING_THREE_BYTE
+#undef LEADING_FOUR_BYTE
+#undef INVALID_BYTE
diff --git a/tools/fonts/create_test_font.cpp b/tools/fonts/create_test_font.cpp
index 36ead3cb65..de19c099c9 100644
--- a/tools/fonts/create_test_font.cpp
+++ b/tools/fonts/create_test_font.cpp
@@ -140,13 +140,12 @@ static int output_points(const SkPoint* pts, int emSize, int count, SkString* pt
static void output_path_data(const SkPaint& paint,
int emSize, SkString* ptsOut, SkTDArray<SkPath::Verb>* verbs,
SkTDArray<unsigned>* charCodes, SkTDArray<SkScalar>* widths) {
- for (int ch = 0x00; ch < 0x7f; ++ch) {
- char str[1];
- str[0] = ch;
- const char* used = str;
- SkUnichar index = SkUTF8_NextUnichar(&used, str + 1);
+ for (SkUnichar index = 0x00; index < 0x7f; ++index) {
+ uint16_t utf16[2];
+ size_t utf16Bytes = sizeof(uint16_t) * SkUTF::ToUTF16(index, utf16);
SkPath path;
- paint.getTextPath((const void*) &index, 2, 0, 0, &path);
+ SkASSERT(paint.getTextEncoding() == SkPaint::kUTF16_TextEncoding);
+ paint.getTextPath(utf16, utf16Bytes, 0, 0, &path);
SkPath::RawIter iter(path);
SkPath::Verb verb;
SkPoint pts[4];
@@ -175,12 +174,12 @@ static void output_path_data(const SkPaint& paint,
*verbs->append() = SkPath::kDone_Verb;
*charCodes->append() = index;
SkScalar width;
- SkDEBUGCODE(int charCount =) paint.getTextWidths((const void*) &index, 2, &width);
+ SkDEBUGCODE(int charCount =) paint.getTextWidths(utf16, utf16Bytes, &width);
SkASSERT(charCount == 1);
// SkASSERT(floor(width) == width); // not true for Hiragino Maru Gothic Pro
*widths->append() = width;
- if (!ch) {
- ch = 0x1f; // skip the rest of the control codes
+ if (0 == index) {
+ index = 0x1f; // skip the rest of the control codes
}
}
}
@@ -432,7 +431,11 @@ static void generate_index(const char* defaultName) {
}
int main(int , char * const []) {
+#ifdef SK_BUILD_FOR_UNIX
+ generate_fonts("/usr/share/fonts/truetype/liberation/");
+#else
generate_fonts("/Library/Fonts/"); // or /usr/share/fonts/truetype/ttf-liberation/
+#endif
generate_index(DEFAULT_FONT_NAME);
return 0;
}