diff options
author | halcanary <halcanary@google.com> | 2016-08-31 12:52:35 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2016-08-31 12:52:35 -0700 |
commit | 3d01c62e19df9f369cdfaeff82ec8af2c0be75f1 (patch) | |
tree | 6f7d30931b95a3c97ae59ed9c849bed57af1aa41 /src/pdf | |
parent | 41a8f323f7ac63c753bc50d434ff61c350c176bd (diff) |
SkPDF: Fix Type3 ToUnicode table.
This seems to fix text extraction on Adobe Reader
- Registry/Ordering is now set to Skia/SkiaOrdering.
- Type3 fonts now get a FontDescriptor (force symbolic font).
- CMapName is now Skia-Identity-SkiaOrdering
- CMap behaves correctly for single-byte fonts.
Also:
- SkTestTypeface returns tounicode map for testing.
- Unit test updated
All PDFs render the same
BUG=skia:5606
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2292303004
Review-Url: https://codereview.chromium.org/2292303004
Diffstat (limited to 'src/pdf')
-rw-r--r-- | src/pdf/SkPDFFont.cpp | 21 | ||||
-rw-r--r-- | src/pdf/SkPDFMakeToUnicodeCmap.cpp | 51 |
2 files changed, 44 insertions, 28 deletions
diff --git a/src/pdf/SkPDFFont.cpp b/src/pdf/SkPDFFont.cpp index 93f48332d8..32e365388a 100644 --- a/src/pdf/SkPDFFont.cpp +++ b/src/pdf/SkPDFFont.cpp @@ -29,7 +29,7 @@ namespace { // PDF's notion of symbolic vs non-symbolic is related to the character set, not // symbols vs. characters. Rarely is a font the right character set to call it // non-symbolic, so always call it symbolic. (PDF 1.4 spec, section 5.7.1) -static const int kPdfSymbolic = 4; +static const int32_t kPdfSymbolic = 4; struct SkPDFType0Font final : public SkPDFFont { SkPDFType0Font(SkPDFFont::Info, const SkAdvancedTypefaceMetrics&); @@ -426,8 +426,9 @@ void SkPDFType0Font::getFontSubset(SkPDFCanon* canon) { } auto sysInfo = sk_make_sp<SkPDFDict>(); - sysInfo->insertString("Registry", "Adobe"); - sysInfo->insertString("Ordering", "Identity"); + sysInfo->insertString("Registry", "Skia"); + // TODO: Registry+Ordering should be globally unique! + sysInfo->insertString("Ordering", "SkiaOrdering"); sysInfo->insertInt("Supplement", 0); newCIDFont->insertObject("CIDSystemInfo", std::move(sysInfo)); @@ -597,6 +598,7 @@ static void add_type3_font_info(SkPDFCanon* canon, const SkBitSet& subset, SkGlyphID firstGlyphID, SkGlyphID lastGlyphID) { + const SkAdvancedTypefaceMetrics* metrics = SkPDFFont::GetMetrics(typeface, canon); SkASSERT(lastGlyphID >= firstGlyphID); // Remove unused glyphs at the end of the range. // Keep the lastGlyphID >= firstGlyphID invariant true. @@ -684,8 +686,7 @@ static void add_type3_font_info(SkPDFCanon* canon, fontBBox->appendInt(bbox.top()); font->insertObject("FontBBox", std::move(fontBBox)); font->insertName("CIDToGIDMap", "Identity"); - const SkAdvancedTypefaceMetrics* metrics = SkPDFFont::GetMetrics(typeface, canon); - if (metrics /* && metrics->fGlyphToUnicode.count() > 0 */) { + if (metrics && metrics->fGlyphToUnicode.count() > 0) { font->insertObjRef("ToUnicode", SkPDFMakeToUnicodeCmap(metrics->fGlyphToUnicode, &subset, @@ -693,6 +694,16 @@ static void add_type3_font_info(SkPDFCanon* canon, firstGlyphID, lastGlyphID)); } + auto descriptor = sk_make_sp<SkPDFDict>("FontDescriptor"); + int32_t fontDescriptorFlags = kPdfSymbolic; + if (metrics) { + // Type3 FontDescriptor does not require all the same fields. + descriptor->insertName("FontName", metrics->fFontName); + descriptor->insertInt("ItalicAngle", metrics->fItalicAngle); + fontDescriptorFlags |= (int32_t)metrics->fStyle; + } + descriptor->insertInt("Flags", fontDescriptorFlags); + font->insertObjRef("FontDescriptor", std::move(descriptor)); font->insertObject("Widths", std::move(widthArray)); font->insertObject("Encoding", std::move(encoding)); font->insertObject("CharProcs", std::move(charProcs)); diff --git a/src/pdf/SkPDFMakeToUnicodeCmap.cpp b/src/pdf/SkPDFMakeToUnicodeCmap.cpp index 5186cbbda1..7fc5c59be3 100644 --- a/src/pdf/SkPDFMakeToUnicodeCmap.cpp +++ b/src/pdf/SkPDFMakeToUnicodeCmap.cpp @@ -10,8 +10,7 @@ #include "SkUtils.h" static void append_tounicode_header(SkDynamicMemoryWStream* cmap, - SkGlyphID firstGlyphID, - SkGlyphID lastGlyphID) { + bool multibyte) { // 12 dict begin: 12 is an Adobe-suggested value. Shall not change. // It's there to prevent old version Adobe Readers from malfunctioning. const char* kHeader = @@ -26,8 +25,8 @@ static void append_tounicode_header(SkDynamicMemoryWStream* cmap, // different. This is not a reference object. const char* kSysInfo = "/CIDSystemInfo\n" - "<< /Registry (Adobe)\n" - "/Ordering (UCS)\n" + "<< /Registry (Skia)\n" + "/Ordering (SkiaOrdering)\n" "/Supplement 0\n" ">> def\n"; cmap->writeText(kSysInfo); @@ -36,18 +35,16 @@ static void append_tounicode_header(SkDynamicMemoryWStream* cmap, // /CMapType 2 means ToUnicode. // Codespace range just tells the PDF processor the valid range. const char* kTypeInfoHeader = - "/CMapName /Adobe-Identity-UCS def\n" + "/CMapName /Skia-Identity-SkiaOrdering def\n" "/CMapType 2 def\n" "1 begincodespacerange\n"; cmap->writeText(kTypeInfoHeader); - - // e.g. "<0000> <FFFF>\n" - SkString range; - range.appendf("<%04X> <%04X>\n", firstGlyphID, lastGlyphID); - cmap->writeText(range.c_str()); - - const char* kTypeInfoFooter = "endcodespacerange\n"; - cmap->writeText(kTypeInfoFooter); + if (multibyte) { + cmap->writeText("<0000> <FFFF>\n"); + } else { + cmap->writeText("<00> <FF>\n"); + } + cmap->writeText("endcodespacerange\n"); } static void append_cmap_footer(SkDynamicMemoryWStream* cmap) { @@ -82,7 +79,18 @@ static void write_utf16be(SkDynamicMemoryWStream* wStream, SkUnichar utf32) { } } +static void write_glyph(SkDynamicMemoryWStream* cmap, + bool multiByte, + SkGlyphID gid) { + if (multiByte) { + SkPDFUtils::WriteUInt16BE(cmap, gid); + } else { + SkPDFUtils::WriteUInt8(cmap, SkToU8(gid)); + } +} + static void append_bfchar_section(const SkTDArray<BFChar>& bfchar, + bool multiByte, SkDynamicMemoryWStream* cmap) { // PDF spec defines that every bf* list can have at most 100 entries. for (int i = 0; i < bfchar.count(); i += 100) { @@ -92,7 +100,7 @@ static void append_bfchar_section(const SkTDArray<BFChar>& bfchar, cmap->writeText(" beginbfchar\n"); for (int j = 0; j < count; ++j) { cmap->writeText("<"); - SkPDFUtils::WriteUInt16BE(cmap, bfchar[i + j].fGlyphId); + write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId); cmap->writeText("> <"); write_utf16be(cmap, bfchar[i + j].fUnicode); cmap->writeText(">\n"); @@ -102,6 +110,7 @@ static void append_bfchar_section(const SkTDArray<BFChar>& bfchar, } static void append_bfrange_section(const SkTDArray<BFRange>& bfrange, + bool multiByte, SkDynamicMemoryWStream* cmap) { // PDF spec defines that every bf* list can have at most 100 entries. for (int i = 0; i < bfrange.count(); i += 100) { @@ -111,9 +120,9 @@ static void append_bfrange_section(const SkTDArray<BFRange>& bfrange, cmap->writeText(" beginbfrange\n"); for (int j = 0; j < count; ++j) { cmap->writeText("<"); - SkPDFUtils::WriteUInt16BE(cmap, bfrange[i + j].fStart); + write_glyph(cmap, multiByte, bfrange[i + j].fStart); cmap->writeText("> <"); - SkPDFUtils::WriteUInt16BE(cmap, bfrange[i + j].fEnd); + write_glyph(cmap, multiByte, bfrange[i + j].fEnd); cmap->writeText("> <"); write_utf16be(cmap, bfrange[i + j].fUnicode); cmap->writeText(">\n"); @@ -206,8 +215,8 @@ void SkPDFAppendCmapSections(const SkTDArray<SkUnichar>& glyphToUnicode, // The spec requires all bfchar entries for a font must come before bfrange // entries. - append_bfchar_section(bfcharEntries, cmap); - append_bfrange_section(bfrangeEntries, cmap); + append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap); + append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap); } sk_sp<SkPDFStream> SkPDFMakeToUnicodeCmap( @@ -217,11 +226,7 @@ sk_sp<SkPDFStream> SkPDFMakeToUnicodeCmap( SkGlyphID firstGlyphID, SkGlyphID lastGlyphID) { SkDynamicMemoryWStream cmap; - if (multiByteGlyphs) { - append_tounicode_header(&cmap, firstGlyphID, lastGlyphID); - } else { - append_tounicode_header(&cmap, 1, lastGlyphID - firstGlyphID + 1); - } + append_tounicode_header(&cmap, multiByteGlyphs); SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs, firstGlyphID, lastGlyphID); append_cmap_footer(&cmap); |