diff options
author | vandebo@chromium.org <vandebo@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81> | 2011-05-09 18:13:47 +0000 |
---|---|---|
committer | vandebo@chromium.org <vandebo@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81> | 2011-05-09 18:13:47 +0000 |
commit | 6744d498fcbbbcf503ec80c4d43dd8f118a88597 (patch) | |
tree | 855c458a26e42d13ff29f9aa51266f5e151b9569 /src | |
parent | 339ac3d0a7650c98de35afbcff4ac1d5b47199c0 (diff) |
[PDF] Add a ToUnicode mapping for fonts.
This makes text in PDFs searchable and copy&paste-able.
Code from arthurhsu@chromium.org. Original review: http://codereview.appspot.com/4428082/
Review URL: http://codereview.appspot.com/4525042
git-svn-id: http://skia.googlecode.com/svn/trunk@1280 2bbb7eff-a529-9590-31e7-b0007b416f81
Diffstat (limited to 'src')
-rwxr-xr-x[-rw-r--r--] | src/pdf/SkPDFFont.cpp | 121 | ||||
-rw-r--r-- | src/ports/SkFontHost_FreeType.cpp | 56 | ||||
-rwxr-xr-x[-rw-r--r--] | src/ports/SkFontHost_win.cpp | 56 |
3 files changed, 229 insertions, 4 deletions
diff --git a/src/pdf/SkPDFFont.cpp b/src/pdf/SkPDFFont.cpp index acfe41e8e1..cc249ca9d3 100644..100755 --- a/src/pdf/SkPDFFont.cpp +++ b/src/pdf/SkPDFFont.cpp @@ -319,6 +319,92 @@ SkPDFArray* composeAdvanceData( } // namespace +static void append_tounicode_header(SkDynamicMemoryWStream* cmap) { + // 12 dict begin: 12 is an Adobe-suggested value. Shall not change. + // It's there to prevent old version Adobe Readers from malfunctioning. + const char* kHeader = + "/CIDInit /ProcSet findresource begin\n" + "12 dict begin\n" + "begincmap\n"; + cmap->writeText(kHeader); + + // The /CIDSystemInfo must be consistent to the one in + // SkPDFFont::populateCIDFont(). + // We can not pass over the system info object here because the format is + // different. This is not a reference object. + const char* kSysInfo = + "/CIDSystemInfo\n" + "<< /Registry (Adobe)\n" + "/Ordering (UCS)\n" + "/Supplement 0\n" + ">> def\n"; + cmap->writeText(kSysInfo); + + // The CMapName must be consistent to /CIDSystemInfo above. + // /CMapType 2 means ToUnicode. + // We specify codespacerange from 0x0000 to 0xFFFF because we convert our + // code table from unsigned short (16-bits). Codespace range just tells the + // PDF processor the valid range. It does not matter whether a complete + // mapping is provided or not. + const char* kTypeInfo = + "/CMapName /Adobe-Identity-UCS def\n" + "/CMapType 2 def\n" + "1 begincodespacerange\n" + "<0000> <FFFF>\n" + "endcodespacerange\n"; + cmap->writeText(kTypeInfo); +} + +static void append_cmap_bfchar_table(uint16_t* glyph_id, SkUnichar* unicode, + size_t count, + SkDynamicMemoryWStream* cmap) { + cmap->writeDecAsText(count); + cmap->writeText(" beginbfchar\n"); + for (size_t i = 0; i < count; ++i) { + cmap->writeText("<"); + cmap->writeHexAsText(glyph_id[i], 4); + cmap->writeText("> <"); + cmap->writeHexAsText(unicode[i], 4); + cmap->writeText(">\n"); + } + cmap->writeText("endbfchar\n"); +} + +static void append_cmap_footer(SkDynamicMemoryWStream* cmap) { + const char* kFooter = + "endcmap\n" + "CMapName currentdict /CMap defineresource pop\n" + "end\n" + "end"; + cmap->writeText(kFooter); +} + +// Generate <bfchar> table according to PDF spec 1.4 and Adobe Technote 5014. +static void append_cmap_bfchar_sections( + const SkTDArray<SkUnichar>& glyphUnicode, + SkDynamicMemoryWStream* cmap) { + // PDF spec defines that every bf* list can have at most 100 entries. + const size_t kMaxEntries = 100; + uint16_t glyphId[kMaxEntries]; + SkUnichar unicode[kMaxEntries]; + size_t index = 0; + for (int i = 0; i < glyphUnicode.count(); i++) { + if (glyphUnicode[i]) { + glyphId[index] = i; + unicode[index] = glyphUnicode[i]; + ++index; + } + if (index == kMaxEntries) { + append_cmap_bfchar_table(glyphId, unicode, index, cmap); + index = 0; + } + } + + if (index) { + append_cmap_bfchar_table(glyphId, unicode, index, cmap); + } +} + /* Font subset design: It would be nice to be able to subset fonts * (particularly type 3 fonts), but it's a lot of work and not a priority. * @@ -404,9 +490,13 @@ SkPDFFont* SkPDFFont::getFontResource(SkTypeface* typeface, uint16_t glyphID) { fontInfo = relatedFont->fFontInfo; fontDescriptor = relatedFont->fDescriptor.get(); } else { - fontInfo = SkFontHost::GetAdvancedTypefaceMetrics(fontID, SkTBitOr( - SkAdvancedTypefaceMetrics::kHAdvance_PerGlyphInfo, - SkAdvancedTypefaceMetrics::kGlyphNames_PerGlyphInfo)); + SkAdvancedTypefaceMetrics::PerGlyphInfo info; + info = SkAdvancedTypefaceMetrics::kHAdvance_PerGlyphInfo; + info = SkTBitOr<SkAdvancedTypefaceMetrics::PerGlyphInfo>( + info, SkAdvancedTypefaceMetrics::kGlyphNames_PerGlyphInfo); + info = SkTBitOr<SkAdvancedTypefaceMetrics::PerGlyphInfo>( + info, SkAdvancedTypefaceMetrics::kToUnicode_PerGlyphInfo); + fontInfo = SkFontHost::GetAdvancedTypefaceMetrics(fontID, info); SkSafeUnref(fontInfo.get()); // SkRefPtr and Get both took a reference. } @@ -497,7 +587,6 @@ SkPDFFont::SkPDFFont(class SkAdvancedTypefaceMetrics* fontInfo, } void SkPDFFont::populateType0Font() { - // TODO(vandebo) add a ToUnicode mapping. fMultiByteGlyphs = true; insert("Subtype", new SkPDFName("Type0"))->unref(); @@ -512,6 +601,26 @@ void SkPDFFont::populateType0Font() { new SkPDFFont(fFontInfo.get(), fTypeface.get(), 1, true, NULL)); descendantFonts->append(new SkPDFObjRef(fResources.top()))->unref(); insert("DescendantFonts", descendantFonts.get()); + + populateToUnicodeTable(); +} + +void SkPDFFont::populateToUnicodeTable() { + if (fFontInfo.get() == NULL || + fFontInfo->fGlyphToUnicode.begin() == NULL) { + return; + } + + SkDynamicMemoryWStream cmap; + append_tounicode_header(&cmap); + append_cmap_bfchar_sections(fFontInfo->fGlyphToUnicode, &cmap); + append_cmap_footer(&cmap); + SkRefPtr<SkMemoryStream> cmapStream = new SkMemoryStream(); + cmapStream->unref(); // SkRefPtr and new took a reference. + cmapStream->setMemoryOwned(cmap.detach(), cmap.getOffset()); + SkRefPtr<SkPDFStream> pdfCmap = new SkPDFStream(cmapStream.get()); + fResources.push(pdfCmap.get()); // Pass reference from new. + insert("ToUnicode", new SkPDFObjRef(pdfCmap.get()))->unref(); } void SkPDFFont::populateCIDFont() { @@ -522,6 +631,7 @@ void SkPDFFont::populateCIDFont() { insert("Subtype", new SkPDFName("CIDFontType0"))->unref(); } else if (fFontInfo->fType == SkAdvancedTypefaceMetrics::kTrueType_Font) { insert("Subtype", new SkPDFName("CIDFontType2"))->unref(); + insert("CIDToGIDMap", new SkPDFName("Identity"))->unref(); } else { SkASSERT(false); } @@ -697,9 +807,12 @@ void SkPDFFont::populateType3Font(int16_t glyphID) { insert("FirstChar", new SkPDFInt(fFirstGlyphID))->unref(); insert("LastChar", new SkPDFInt(fLastGlyphID))->unref(); insert("Widths", widthArray.get()); + insert("CIDToGIDMap", new SkPDFName("Identity"))->unref(); if (fFontInfo && fFontInfo->fLastGlyphID <= 255) fFontInfo = NULL; + + populateToUnicodeTable(); } bool SkPDFFont::addFontDescriptor(int16_t defaultWidth) { diff --git a/src/ports/SkFontHost_FreeType.cpp b/src/ports/SkFontHost_FreeType.cpp index 5ed66c883a..b3cc7832e5 100644 --- a/src/ports/SkFontHost_FreeType.cpp +++ b/src/ports/SkFontHost_FreeType.cpp @@ -339,6 +339,56 @@ static bool getWidthAdvance(FT_Face face, int gId, int16_t* data) { return true; } +static void populate_glyph_to_unicode(FT_Face& face, + SkTDArray<SkUnichar>* glyphToUnicode) { + // Check and see if we have Unicode cmaps. + for (int i = 0; i < face->num_charmaps; ++i) { + // CMaps known to support Unicode: + // Platform ID Encoding ID Name + // ----------- ----------- ----------------------------------- + // 0 0,1 Apple Unicode + // 0 3 Apple Unicode 2.0 (preferred) + // 3 1 Microsoft Unicode UCS-2 + // 3 10 Microsoft Unicode UCS-4 (preferred) + // + // See Apple TrueType Reference Manual + // http://developer.apple.com/fonts/TTRefMan/RM06/Chap6cmap.html + // http://developer.apple.com/fonts/TTRefMan/RM06/Chap6name.html#ID + // Microsoft OpenType Specification + // http://www.microsoft.com/typography/otspec/cmap.htm + + FT_UShort platformId = face->charmaps[i]->platform_id; + FT_UShort encodingId = face->charmaps[i]->encoding_id; + + if (platformId != 0 && platformId != 3) { + continue; + } + if (platformId == 3 && encodingId != 1 && encodingId != 10) { + continue; + } + bool preferredMap = ((platformId == 3 && encodingId == 10) || + (platformId == 0 && encodingId == 3)); + + FT_Set_Charmap(face, face->charmaps[i]); + if (glyphToUnicode->isEmpty()) { + glyphToUnicode->setCount(face->num_glyphs); + memset(glyphToUnicode->begin(), 0, + sizeof(SkUnichar) * face->num_glyphs); + } + + // Iterate through each cmap entry. + FT_UInt glyphIndex; + for (SkUnichar charCode = FT_Get_First_Char(face, &glyphIndex); + glyphIndex != 0; + charCode = FT_Get_Next_Char(face, charCode, &glyphIndex)) { + if (charCode && + ((*glyphToUnicode)[glyphIndex] == 0 || preferredMap)) { + (*glyphToUnicode)[glyphIndex] = charCode; + } + } + } +} + // static SkAdvancedTypefaceMetrics* SkFontHost::GetAdvancedTypefaceMetrics( uint32_t fontID, @@ -509,6 +559,12 @@ SkAdvancedTypefaceMetrics* SkFontHost::GetAdvancedTypefaceMetrics( } } + if (perGlyphInfo & SkAdvancedTypefaceMetrics::kToUnicode_PerGlyphInfo && + info->fType != SkAdvancedTypefaceMetrics::kType1_Font && + face->num_charmaps) { + populate_glyph_to_unicode(face, &(info->fGlyphToUnicode)); + } + if (!canEmbed(face)) info->fType = SkAdvancedTypefaceMetrics::kNotEmbeddable_Font; diff --git a/src/ports/SkFontHost_win.cpp b/src/ports/SkFontHost_win.cpp index bc660104b5..fd5b929ae2 100644..100755 --- a/src/ports/SkFontHost_win.cpp +++ b/src/ports/SkFontHost_win.cpp @@ -194,6 +194,58 @@ static void GetLogFontByID(SkFontID fontID, LOGFONT* lf) { } } +// Construct Glyph to Unicode table. +// Unicode code points that require conjugate pairs in utf16 are not +// supported. +// TODO(arthurhsu): Add support for conjugate pairs. It looks like that may +// require parsing the TTF cmap table (platform 4, encoding 12) directly instead +// of calling GetFontUnicodeRange(). +static void populate_glyph_to_unicode(HDC fontHdc, const unsigned glyphCount, + SkTDArray<SkUnichar>* glyphToUnicode) { + DWORD glyphSetBufferSize = GetFontUnicodeRanges(fontHdc, NULL); + if (!glyphSetBufferSize) { + return; + } + + SkAutoTDeleteArray<BYTE> glyphSetBuffer(new BYTE[glyphSetBufferSize]); + GLYPHSET* glyphSet = + reinterpret_cast<LPGLYPHSET>(glyphSetBuffer.get()); + if (GetFontUnicodeRanges(fontHdc, glyphSet) != glyphSetBufferSize) { + return; + } + + glyphToUnicode->setCount(glyphCount); + memset(glyphToUnicode->begin(), 0, glyphCount * sizeof(SkUnichar)); + for (DWORD i = 0; i < glyphSet->cRanges; ++i) { + // There is no guarantee that within a Unicode range, the corresponding + // glyph id in a font file are continuous. So, even if we have ranges, + // we can't just use the first and last entry of the range to compute + // result. We need to enumerate them one by one. + int count = glyphSet->ranges[i].cGlyphs; + SkAutoTArray<WCHAR> chars(count + 1); + chars[count] = 0; // termintate string + SkAutoTArray<WORD> glyph(count); + for (USHORT j = 0; j < count; ++j) { + chars[j] = glyphSet->ranges[i].wcLow + j; + } + GetGlyphIndicesW(fontHdc, chars.get(), count, glyph.get(), + GGI_MARK_NONEXISTING_GLYPHS); + // If the glyph ID is valid, and the glyph is not mapped, then we will + // fill in the char id into the vector. If the glyph is mapped already, + // skip it. + // TODO(arthurhsu): better improve this. e.g. Get all used char ids from + // font cache, then generate this mapping table from there. It's + // unlikely to have collisions since glyph reuse happens mostly for + // different Unicode pages. + for (USHORT j = 0; j < count; ++j) { + if (glyph[j] != 0xffff && glyph[j] < glyphCount && + (*glyphToUnicode)[glyph[j]] == 0) { + (*glyphToUnicode)[glyph[j]] = chars[j]; + } + } + } +} + ////////////////////////////////////////////////////////////////////////////////////////////// class SkScalerContext_Windows : public SkScalerContext { @@ -649,6 +701,10 @@ SkAdvancedTypefaceMetrics* SkFontHost::GetAdvancedTypefaceMetrics( info->fFontName.set(lf.lfFaceName); #endif + if (perGlyphInfo & SkAdvancedTypefaceMetrics::kToUnicode_PerGlyphInfo) { + populate_glyph_to_unicode(hdc, glyphCount, &(info->fGlyphToUnicode)); + } + if (otm.otmTextMetrics.tmPitchAndFamily & TMPF_TRUETYPE) { info->fType = SkAdvancedTypefaceMetrics::kTrueType_Font; } else { |