aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar halcanary <halcanary@google.com>2016-08-31 12:52:35 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2016-08-31 12:52:35 -0700
commit3d01c62e19df9f369cdfaeff82ec8af2c0be75f1 (patch)
tree6f7d30931b95a3c97ae59ed9c849bed57af1aa41
parent41a8f323f7ac63c753bc50d434ff61c350c176bd (diff)
SkPDF: Fix Type3 ToUnicode table.
This seems to fix text extraction on Adobe Reader - Registry/Ordering is now set to Skia/SkiaOrdering. - Type3 fonts now get a FontDescriptor (force symbolic font). - CMapName is now Skia-Identity-SkiaOrdering - CMap behaves correctly for single-byte fonts. Also: - SkTestTypeface returns tounicode map for testing. - Unit test updated All PDFs render the same BUG=skia:5606 GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2292303004 Review-Url: https://codereview.chromium.org/2292303004
-rw-r--r--src/fonts/SkTestScalerContext.cpp10
-rw-r--r--src/pdf/SkPDFFont.cpp21
-rw-r--r--src/pdf/SkPDFMakeToUnicodeCmap.cpp51
-rw-r--r--tests/PDFGlyphsToUnicodeTest.cpp6
4 files changed, 56 insertions, 32 deletions
diff --git a/src/fonts/SkTestScalerContext.cpp b/src/fonts/SkTestScalerContext.cpp
index fcb65a83c1..09b20ba71b 100644
--- a/src/fonts/SkTestScalerContext.cpp
+++ b/src/fonts/SkTestScalerContext.cpp
@@ -150,7 +150,15 @@ SkAdvancedTypefaceMetrics* SkTestTypeface::onGetAdvancedTypefaceMetrics(
// pdf only
SkAdvancedTypefaceMetrics* info = new SkAdvancedTypefaceMetrics;
info->fFontName.set(fTestFont->fName);
- info->fLastGlyphID = SkToU16(onCountGlyphs() - 1);
+ int glyphCount = this->onCountGlyphs();
+ info->fLastGlyphID = SkToU16(glyphCount - 1);
+
+ SkTDArray<SkUnichar>& toUnicode = info->fGlyphToUnicode;
+ toUnicode.setCount(glyphCount);
+ SkASSERT(glyphCount == SkToInt(fTestFont->fCharCodesCount));
+ for (int gid = 0; gid < glyphCount; ++gid) {
+ toUnicode[gid] = SkToS32(fTestFont->fCharCodes[gid]);
+ }
return info;
}
diff --git a/src/pdf/SkPDFFont.cpp b/src/pdf/SkPDFFont.cpp
index 93f48332d8..32e365388a 100644
--- a/src/pdf/SkPDFFont.cpp
+++ b/src/pdf/SkPDFFont.cpp
@@ -29,7 +29,7 @@ namespace {
// PDF's notion of symbolic vs non-symbolic is related to the character set, not
// symbols vs. characters. Rarely is a font the right character set to call it
// non-symbolic, so always call it symbolic. (PDF 1.4 spec, section 5.7.1)
-static const int kPdfSymbolic = 4;
+static const int32_t kPdfSymbolic = 4;
struct SkPDFType0Font final : public SkPDFFont {
SkPDFType0Font(SkPDFFont::Info, const SkAdvancedTypefaceMetrics&);
@@ -426,8 +426,9 @@ void SkPDFType0Font::getFontSubset(SkPDFCanon* canon) {
}
auto sysInfo = sk_make_sp<SkPDFDict>();
- sysInfo->insertString("Registry", "Adobe");
- sysInfo->insertString("Ordering", "Identity");
+ sysInfo->insertString("Registry", "Skia");
+ // TODO: Registry+Ordering should be globally unique!
+ sysInfo->insertString("Ordering", "SkiaOrdering");
sysInfo->insertInt("Supplement", 0);
newCIDFont->insertObject("CIDSystemInfo", std::move(sysInfo));
@@ -597,6 +598,7 @@ static void add_type3_font_info(SkPDFCanon* canon,
const SkBitSet& subset,
SkGlyphID firstGlyphID,
SkGlyphID lastGlyphID) {
+ const SkAdvancedTypefaceMetrics* metrics = SkPDFFont::GetMetrics(typeface, canon);
SkASSERT(lastGlyphID >= firstGlyphID);
// Remove unused glyphs at the end of the range.
// Keep the lastGlyphID >= firstGlyphID invariant true.
@@ -684,8 +686,7 @@ static void add_type3_font_info(SkPDFCanon* canon,
fontBBox->appendInt(bbox.top());
font->insertObject("FontBBox", std::move(fontBBox));
font->insertName("CIDToGIDMap", "Identity");
- const SkAdvancedTypefaceMetrics* metrics = SkPDFFont::GetMetrics(typeface, canon);
- if (metrics /* && metrics->fGlyphToUnicode.count() > 0 */) {
+ if (metrics && metrics->fGlyphToUnicode.count() > 0) {
font->insertObjRef("ToUnicode",
SkPDFMakeToUnicodeCmap(metrics->fGlyphToUnicode,
&subset,
@@ -693,6 +694,16 @@ static void add_type3_font_info(SkPDFCanon* canon,
firstGlyphID,
lastGlyphID));
}
+ auto descriptor = sk_make_sp<SkPDFDict>("FontDescriptor");
+ int32_t fontDescriptorFlags = kPdfSymbolic;
+ if (metrics) {
+ // Type3 FontDescriptor does not require all the same fields.
+ descriptor->insertName("FontName", metrics->fFontName);
+ descriptor->insertInt("ItalicAngle", metrics->fItalicAngle);
+ fontDescriptorFlags |= (int32_t)metrics->fStyle;
+ }
+ descriptor->insertInt("Flags", fontDescriptorFlags);
+ font->insertObjRef("FontDescriptor", std::move(descriptor));
font->insertObject("Widths", std::move(widthArray));
font->insertObject("Encoding", std::move(encoding));
font->insertObject("CharProcs", std::move(charProcs));
diff --git a/src/pdf/SkPDFMakeToUnicodeCmap.cpp b/src/pdf/SkPDFMakeToUnicodeCmap.cpp
index 5186cbbda1..7fc5c59be3 100644
--- a/src/pdf/SkPDFMakeToUnicodeCmap.cpp
+++ b/src/pdf/SkPDFMakeToUnicodeCmap.cpp
@@ -10,8 +10,7 @@
#include "SkUtils.h"
static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
- SkGlyphID firstGlyphID,
- SkGlyphID lastGlyphID) {
+ bool multibyte) {
// 12 dict begin: 12 is an Adobe-suggested value. Shall not change.
// It's there to prevent old version Adobe Readers from malfunctioning.
const char* kHeader =
@@ -26,8 +25,8 @@ static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
// different. This is not a reference object.
const char* kSysInfo =
"/CIDSystemInfo\n"
- "<< /Registry (Adobe)\n"
- "/Ordering (UCS)\n"
+ "<< /Registry (Skia)\n"
+ "/Ordering (SkiaOrdering)\n"
"/Supplement 0\n"
">> def\n";
cmap->writeText(kSysInfo);
@@ -36,18 +35,16 @@ static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
// /CMapType 2 means ToUnicode.
// Codespace range just tells the PDF processor the valid range.
const char* kTypeInfoHeader =
- "/CMapName /Adobe-Identity-UCS def\n"
+ "/CMapName /Skia-Identity-SkiaOrdering def\n"
"/CMapType 2 def\n"
"1 begincodespacerange\n";
cmap->writeText(kTypeInfoHeader);
-
- // e.g. "<0000> <FFFF>\n"
- SkString range;
- range.appendf("<%04X> <%04X>\n", firstGlyphID, lastGlyphID);
- cmap->writeText(range.c_str());
-
- const char* kTypeInfoFooter = "endcodespacerange\n";
- cmap->writeText(kTypeInfoFooter);
+ if (multibyte) {
+ cmap->writeText("<0000> <FFFF>\n");
+ } else {
+ cmap->writeText("<00> <FF>\n");
+ }
+ cmap->writeText("endcodespacerange\n");
}
static void append_cmap_footer(SkDynamicMemoryWStream* cmap) {
@@ -82,7 +79,18 @@ static void write_utf16be(SkDynamicMemoryWStream* wStream, SkUnichar utf32) {
}
}
+static void write_glyph(SkDynamicMemoryWStream* cmap,
+ bool multiByte,
+ SkGlyphID gid) {
+ if (multiByte) {
+ SkPDFUtils::WriteUInt16BE(cmap, gid);
+ } else {
+ SkPDFUtils::WriteUInt8(cmap, SkToU8(gid));
+ }
+}
+
static void append_bfchar_section(const SkTDArray<BFChar>& bfchar,
+ bool multiByte,
SkDynamicMemoryWStream* cmap) {
// PDF spec defines that every bf* list can have at most 100 entries.
for (int i = 0; i < bfchar.count(); i += 100) {
@@ -92,7 +100,7 @@ static void append_bfchar_section(const SkTDArray<BFChar>& bfchar,
cmap->writeText(" beginbfchar\n");
for (int j = 0; j < count; ++j) {
cmap->writeText("<");
- SkPDFUtils::WriteUInt16BE(cmap, bfchar[i + j].fGlyphId);
+ write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId);
cmap->writeText("> <");
write_utf16be(cmap, bfchar[i + j].fUnicode);
cmap->writeText(">\n");
@@ -102,6 +110,7 @@ static void append_bfchar_section(const SkTDArray<BFChar>& bfchar,
}
static void append_bfrange_section(const SkTDArray<BFRange>& bfrange,
+ bool multiByte,
SkDynamicMemoryWStream* cmap) {
// PDF spec defines that every bf* list can have at most 100 entries.
for (int i = 0; i < bfrange.count(); i += 100) {
@@ -111,9 +120,9 @@ static void append_bfrange_section(const SkTDArray<BFRange>& bfrange,
cmap->writeText(" beginbfrange\n");
for (int j = 0; j < count; ++j) {
cmap->writeText("<");
- SkPDFUtils::WriteUInt16BE(cmap, bfrange[i + j].fStart);
+ write_glyph(cmap, multiByte, bfrange[i + j].fStart);
cmap->writeText("> <");
- SkPDFUtils::WriteUInt16BE(cmap, bfrange[i + j].fEnd);
+ write_glyph(cmap, multiByte, bfrange[i + j].fEnd);
cmap->writeText("> <");
write_utf16be(cmap, bfrange[i + j].fUnicode);
cmap->writeText(">\n");
@@ -206,8 +215,8 @@ void SkPDFAppendCmapSections(const SkTDArray<SkUnichar>& glyphToUnicode,
// The spec requires all bfchar entries for a font must come before bfrange
// entries.
- append_bfchar_section(bfcharEntries, cmap);
- append_bfrange_section(bfrangeEntries, cmap);
+ append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap);
+ append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap);
}
sk_sp<SkPDFStream> SkPDFMakeToUnicodeCmap(
@@ -217,11 +226,7 @@ sk_sp<SkPDFStream> SkPDFMakeToUnicodeCmap(
SkGlyphID firstGlyphID,
SkGlyphID lastGlyphID) {
SkDynamicMemoryWStream cmap;
- if (multiByteGlyphs) {
- append_tounicode_header(&cmap, firstGlyphID, lastGlyphID);
- } else {
- append_tounicode_header(&cmap, 1, lastGlyphID - firstGlyphID + 1);
- }
+ append_tounicode_header(&cmap, multiByteGlyphs);
SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs,
firstGlyphID, lastGlyphID);
append_cmap_footer(&cmap);
diff --git a/tests/PDFGlyphsToUnicodeTest.cpp b/tests/PDFGlyphsToUnicodeTest.cpp
index d83ce664bc..3ba8870774 100644
--- a/tests/PDFGlyphsToUnicodeTest.cpp
+++ b/tests/PDFGlyphsToUnicodeTest.cpp
@@ -125,11 +125,11 @@ endbfchar\n";
char expectedResultSingleBytes[] =
"2 beginbfchar\n\
-<0001> <0000>\n\
-<0002> <0000>\n\
+<01> <0000>\n\
+<02> <0000>\n\
endbfchar\n\
1 beginbfrange\n\
-<0003> <0006> <1010>\n\
+<03> <06> <1010>\n\
endbfrange\n";
REPORTER_ASSERT(reporter, stream_equals(buffer, 0,