diff options
-rw-r--r-- | gyp/pdf.gypi | 5 | ||||
-rw-r--r-- | include/core/SkTypeface.h | 1 | ||||
-rw-r--r-- | src/pdf/SkPDFConvertType1FontStream.cpp | 205 | ||||
-rw-r--r-- | src/pdf/SkPDFConvertType1FontStream.h | 28 | ||||
-rw-r--r-- | src/pdf/SkPDFFont.cpp | 666 | ||||
-rw-r--r-- | src/pdf/SkPDFFont.h | 6 | ||||
-rw-r--r-- | src/pdf/SkPDFFontImpl.h | 91 | ||||
-rw-r--r-- | src/pdf/SkPDFMakeToUnicodeCmap.cpp | 230 | ||||
-rw-r--r-- | src/pdf/SkPDFMakeToUnicodeCmap.h | 29 | ||||
-rw-r--r-- | tests/PDFGlyphsToUnicodeTest.cpp | 19 |
10 files changed, 638 insertions, 642 deletions
diff --git a/gyp/pdf.gypi b/gyp/pdf.gypi index 4fa46f2f31..18a773e2f3 100644 --- a/gyp/pdf.gypi +++ b/gyp/pdf.gypi @@ -22,17 +22,20 @@ '<(skia_src_path)/pdf/SkPDFCanon.h', '<(skia_src_path)/pdf/SkPDFCanvas.cpp', '<(skia_src_path)/pdf/SkPDFCanvas.h', + '<(skia_src_path)/pdf/SkPDFConvertType1FontStream.cpp', + '<(skia_src_path)/pdf/SkPDFConvertType1FontStream.h', '<(skia_src_path)/pdf/SkPDFDevice.cpp', '<(skia_src_path)/pdf/SkPDFDevice.h', '<(skia_src_path)/pdf/SkPDFDocument.cpp', '<(skia_src_path)/pdf/SkPDFDocument.h', '<(skia_src_path)/pdf/SkPDFFont.cpp', '<(skia_src_path)/pdf/SkPDFFont.h', - '<(skia_src_path)/pdf/SkPDFFontImpl.h', '<(skia_src_path)/pdf/SkPDFFormXObject.cpp', '<(skia_src_path)/pdf/SkPDFFormXObject.h', '<(skia_src_path)/pdf/SkPDFGraphicState.cpp', '<(skia_src_path)/pdf/SkPDFGraphicState.h', + '<(skia_src_path)/pdf/SkPDFMakeToUnicodeCmap.cpp', + '<(skia_src_path)/pdf/SkPDFMakeToUnicodeCmap.h', '<(skia_src_path)/pdf/SkPDFMetadata.cpp', '<(skia_src_path)/pdf/SkPDFMetadata.h', '<(skia_src_path)/pdf/SkPDFResourceDict.cpp', diff --git a/include/core/SkTypeface.h b/include/core/SkTypeface.h index 30a0903bbd..23ca15e1f9 100644 --- a/include/core/SkTypeface.h +++ b/include/core/SkTypeface.h @@ -390,7 +390,6 @@ private: friend class SkGTypeface; friend class SkRandomTypeface; friend class SkPDFFont; - friend class SkPDFCIDFont; friend class GrPathRendering; friend class GrGLPathRendering; diff --git a/src/pdf/SkPDFConvertType1FontStream.cpp b/src/pdf/SkPDFConvertType1FontStream.cpp new file mode 100644 index 0000000000..d75da5c787 --- /dev/null +++ b/src/pdf/SkPDFConvertType1FontStream.cpp @@ -0,0 +1,205 @@ +/* + * Copyright 2011 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "SkPDFConvertType1FontStream.h" + +#include <ctype.h> + +static bool parsePFBSection(const uint8_t** src, size_t* len, int sectionType, + size_t* size) { + // PFB sections have a two or six bytes header. 0x80 and a one byte + // section type followed by a four byte section length. Type one is + // an ASCII section (includes a length), type two is a binary section + // (includes a length) and type three is an EOF marker with no length. + const uint8_t* buf = *src; + if (*len < 2 || buf[0] != 0x80 || buf[1] != sectionType) { + return false; + } else if (buf[1] == 3) { + return true; + } else if (*len < 6) { + return false; + } + + *size = (size_t)buf[2] | ((size_t)buf[3] << 8) | ((size_t)buf[4] << 16) | + ((size_t)buf[5] << 24); + size_t consumed = *size + 6; + if (consumed > *len) { + return false; + } + *src = *src + consumed; + *len = *len - consumed; + return true; +} + +static bool parsePFB(const uint8_t* src, size_t size, size_t* headerLen, + size_t* dataLen, size_t* trailerLen) { + const uint8_t* srcPtr = src; + size_t remaining = size; + + return parsePFBSection(&srcPtr, &remaining, 1, headerLen) && + parsePFBSection(&srcPtr, &remaining, 2, dataLen) && + parsePFBSection(&srcPtr, &remaining, 1, trailerLen) && + parsePFBSection(&srcPtr, &remaining, 3, nullptr); +} + +/* The sections of a PFA file are implicitly defined. The body starts + * after the line containing "eexec," and the trailer starts with 512 + * literal 0's followed by "cleartomark" (plus arbitrary white space). + * + * This function assumes that src is NUL terminated, but the NUL + * termination is not included in size. + * + */ +static bool parsePFA(const char* src, size_t size, size_t* headerLen, + size_t* hexDataLen, size_t* dataLen, size_t* trailerLen) { + const char* end = src + size; + + const char* dataPos = strstr(src, "eexec"); + if (!dataPos) { + return false; + } + dataPos += strlen("eexec"); + while ((*dataPos == '\n' || *dataPos == '\r' || *dataPos == ' ') && + dataPos < end) { + dataPos++; + } + *headerLen = dataPos - src; + + const char* trailerPos = strstr(dataPos, "cleartomark"); + if (!trailerPos) { + return false; + } + int zeroCount = 0; + for (trailerPos--; trailerPos > dataPos && zeroCount < 512; trailerPos--) { + if (*trailerPos == '\n' || *trailerPos == '\r' || *trailerPos == ' ') { + continue; + } else if (*trailerPos == '0') { + zeroCount++; + } else { + return false; + } + } + if (zeroCount != 512) { + return false; + } + + *hexDataLen = trailerPos - src - *headerLen; + *trailerLen = size - *headerLen - *hexDataLen; + + // Verify that the data section is hex encoded and count the bytes. + int nibbles = 0; + for (; dataPos < trailerPos; dataPos++) { + if (isspace(*dataPos)) { + continue; + } + if (!isxdigit(*dataPos)) { + return false; + } + nibbles++; + } + *dataLen = (nibbles + 1) / 2; + + return true; +} + +static int8_t hexToBin(uint8_t c) { + if (!isxdigit(c)) { + return -1; + } else if (c <= '9') { + return c - '0'; + } else if (c <= 'F') { + return c - 'A' + 10; + } else if (c <= 'f') { + return c - 'a' + 10; + } + return -1; +} + +sk_sp<SkData> SkPDFConvertType1FontStream( + std::unique_ptr<SkStreamAsset> srcStream, size_t* headerLen, + size_t* dataLen, size_t* trailerLen) { + size_t srcLen = srcStream ? srcStream->getLength() : 0; + SkASSERT(srcLen); + if (!srcLen) { + return nullptr; + } + // Flatten and Nul-terminate the source stream so that we can use + // strstr() to search it. + SkAutoTMalloc<uint8_t> sourceBuffer(SkToInt(srcLen + 1)); + (void)srcStream->read(sourceBuffer.get(), srcLen); + sourceBuffer[SkToInt(srcLen)] = 0; + const uint8_t* src = sourceBuffer.get(); + + if (parsePFB(src, srcLen, headerLen, dataLen, trailerLen)) { + static const int kPFBSectionHeaderLength = 6; + const size_t length = *headerLen + *dataLen + *trailerLen; + SkASSERT(length > 0); + SkASSERT(length + (2 * kPFBSectionHeaderLength) <= srcLen); + + sk_sp<SkData> data(SkData::MakeUninitialized(length)); + + const uint8_t* const srcHeader = src + kPFBSectionHeaderLength; + // There is a six-byte section header before header and data + // (but not trailer) that we're not going to copy. + const uint8_t* const srcData = srcHeader + *headerLen + kPFBSectionHeaderLength; + const uint8_t* const srcTrailer = srcData + *headerLen; + + uint8_t* const resultHeader = (uint8_t*)data->writable_data(); + uint8_t* const resultData = resultHeader + *headerLen; + uint8_t* const resultTrailer = resultData + *dataLen; + + SkASSERT(resultTrailer + *trailerLen == resultHeader + length); + + memcpy(resultHeader, srcHeader, *headerLen); + memcpy(resultData, srcData, *dataLen); + memcpy(resultTrailer, srcTrailer, *trailerLen); + + return data; + } + + // A PFA has to be converted for PDF. + size_t hexDataLen; + if (!parsePFA((const char*)src, srcLen, headerLen, &hexDataLen, dataLen, + trailerLen)) { + return nullptr; + } + const size_t length = *headerLen + *dataLen + *trailerLen; + SkASSERT(length > 0); + auto data = SkData::MakeUninitialized(length); + uint8_t* buffer = (uint8_t*)data->writable_data(); + + memcpy(buffer, src, *headerLen); + uint8_t* const resultData = &(buffer[*headerLen]); + + const uint8_t* hexData = src + *headerLen; + const uint8_t* trailer = hexData + hexDataLen; + size_t outputOffset = 0; + uint8_t dataByte = 0; // To hush compiler. + bool highNibble = true; + for (; hexData < trailer; hexData++) { + int8_t curNibble = hexToBin(*hexData); + if (curNibble < 0) { + continue; + } + if (highNibble) { + dataByte = curNibble << 4; + highNibble = false; + } else { + dataByte |= curNibble; + highNibble = true; + resultData[outputOffset++] = dataByte; + } + } + if (!highNibble) { + resultData[outputOffset++] = dataByte; + } + SkASSERT(outputOffset == *dataLen); + + uint8_t* const resultTrailer = &(buffer[SkToInt(*headerLen + outputOffset)]); + memcpy(resultTrailer, src + *headerLen + hexDataLen, *trailerLen); + return data; +} diff --git a/src/pdf/SkPDFConvertType1FontStream.h b/src/pdf/SkPDFConvertType1FontStream.h new file mode 100644 index 0000000000..ffd2da3093 --- /dev/null +++ b/src/pdf/SkPDFConvertType1FontStream.h @@ -0,0 +1,28 @@ +/* + * Copyright 2016 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkPDFConvertType1FontStream_DEFINED +#define SkPDFConvertType1FontStream_DEFINED + +#include "SkData.h" +#include "SkStream.h" + +/* + "A standard Type 1 font program, as described in the Adobe Type 1 + Font Format specification, consists of three parts: a clear-text + portion (written using PostScript syntax), an encrypted portion, and + a fixed-content portion. The fixed-content portion contains 512 + ASCII zeros followed by a cleartomark operator, and perhaps followed + by additional data. Although the encrypted portion of a standard + Type 1 font may be in binary or ASCII hexadecimal format, PDF + supports only the binary format." +*/ +sk_sp<SkData> SkPDFConvertType1FontStream( + std::unique_ptr<SkStreamAsset> srcStream, size_t* headerLen, + size_t* dataLen, size_t* trailerLen); + +#endif // SkPDFConvertType1FontStream_DEFINED diff --git a/src/pdf/SkPDFFont.cpp b/src/pdf/SkPDFFont.cpp index 093ad4a90d..ff1dd2d76d 100644 --- a/src/pdf/SkPDFFont.cpp +++ b/src/pdf/SkPDFFont.cpp @@ -5,15 +5,14 @@ * found in the LICENSE file. */ -#include <ctype.h> - #include "SkData.h" #include "SkGlyphCache.h" #include "SkPaint.h" #include "SkPDFCanon.h" +#include "SkPDFConvertType1FontStream.h" #include "SkPDFDevice.h" +#include "SkPDFMakeToUnicodeCmap.h" #include "SkPDFFont.h" -#include "SkPDFFontImpl.h" #include "SkPDFUtils.h" #include "SkRefCnt.h" #include "SkScalar.h" @@ -31,6 +30,8 @@ #endif #endif +namespace { + // PDF's notion of symbolic vs non-symbolic is related to the character set, not // symbols vs. characters. Rarely is a font the right character set to call it // non-symbolic, so always call it symbolic. (PDF 1.4 spec, section 5.7.1) @@ -53,7 +54,66 @@ struct AdvanceMetric { AdvanceMetric& operator=(const AdvanceMetric&) = delete; }; -namespace { +class SkPDFType0Font final : public SkPDFFont { +public: + SkPDFType0Font(const SkAdvancedTypefaceMetrics* info, + SkTypeface* typeface); + virtual ~SkPDFType0Font(); + bool multiByteGlyphs() const override { return true; } + SkPDFFont* getFontSubset(const SkPDFGlyphSet* usage) override; +#ifdef SK_DEBUG + void emitObject(SkWStream*, + const SkPDFObjNumMap&, + const SkPDFSubstituteMap&) const override; +#endif + +private: +#ifdef SK_DEBUG + bool fPopulated; +#endif + bool populate(const SkPDFGlyphSet* subset); + typedef SkPDFDict INHERITED; +}; + +class SkPDFCIDFont final : public SkPDFFont { +public: + SkPDFCIDFont(const SkAdvancedTypefaceMetrics* info, + SkTypeface* typeface, + const SkPDFGlyphSet* subset); + virtual ~SkPDFCIDFont(); + bool multiByteGlyphs() const override { return true; } + +private: + bool populate(const SkPDFGlyphSet* subset); + bool addFontDescriptor(int16_t defaultWidth, + const SkTDArray<uint32_t>* subset); +}; + +class SkPDFType1Font final : public SkPDFFont { +public: + SkPDFType1Font(const SkAdvancedTypefaceMetrics* info, + SkTypeface* typeface, + uint16_t glyphID, + SkPDFDict* relatedFontDescriptor); + virtual ~SkPDFType1Font(); + bool multiByteGlyphs() const override { return false; } + +private: + bool populate(int16_t glyphID); + bool addFontDescriptor(int16_t defaultWidth); +}; + +class SkPDFType3Font final : public SkPDFFont { +public: + SkPDFType3Font(const SkAdvancedTypefaceMetrics* info, + SkTypeface* typeface, + uint16_t glyphID); + virtual ~SkPDFType3Font(); + bool multiByteGlyphs() const override { return false; } + +private: + bool populate(uint16_t glyphID); +}; /////////////////////////////////////////////////////////////////////////////// // File-Local Functions @@ -260,233 +320,8 @@ static void get_glyph_widths(SkSinglyLinkedList<AdvanceMetric>* glyphWidths, //////////////////////////////////////////////////////////////////////////////// -bool parsePFBSection(const uint8_t** src, size_t* len, int sectionType, - size_t* size) { - // PFB sections have a two or six bytes header. 0x80 and a one byte - // section type followed by a four byte section length. Type one is - // an ASCII section (includes a length), type two is a binary section - // (includes a length) and type three is an EOF marker with no length. - const uint8_t* buf = *src; - if (*len < 2 || buf[0] != 0x80 || buf[1] != sectionType) { - return false; - } else if (buf[1] == 3) { - return true; - } else if (*len < 6) { - return false; - } - - *size = (size_t)buf[2] | ((size_t)buf[3] << 8) | ((size_t)buf[4] << 16) | - ((size_t)buf[5] << 24); - size_t consumed = *size + 6; - if (consumed > *len) { - return false; - } - *src = *src + consumed; - *len = *len - consumed; - return true; -} - -bool parsePFB(const uint8_t* src, size_t size, size_t* headerLen, - size_t* dataLen, size_t* trailerLen) { - const uint8_t* srcPtr = src; - size_t remaining = size; - - return parsePFBSection(&srcPtr, &remaining, 1, headerLen) && - parsePFBSection(&srcPtr, &remaining, 2, dataLen) && - parsePFBSection(&srcPtr, &remaining, 1, trailerLen) && - parsePFBSection(&srcPtr, &remaining, 3, nullptr); -} - -/* The sections of a PFA file are implicitly defined. The body starts - * after the line containing "eexec," and the trailer starts with 512 - * literal 0's followed by "cleartomark" (plus arbitrary white space). - * - * This function assumes that src is NUL terminated, but the NUL - * termination is not included in size. - * - */ -bool parsePFA(const char* src, size_t size, size_t* headerLen, - size_t* hexDataLen, size_t* dataLen, size_t* trailerLen) { - const char* end = src + size; - - const char* dataPos = strstr(src, "eexec"); - if (!dataPos) { - return false; - } - dataPos += strlen("eexec"); - while ((*dataPos == '\n' || *dataPos == '\r' || *dataPos == ' ') && - dataPos < end) { - dataPos++; - } - *headerLen = dataPos - src; - - const char* trailerPos = strstr(dataPos, "cleartomark"); - if (!trailerPos) { - return false; - } - int zeroCount = 0; - for (trailerPos--; trailerPos > dataPos && zeroCount < 512; trailerPos--) { - if (*trailerPos == '\n' || *trailerPos == '\r' || *trailerPos == ' ') { - continue; - } else if (*trailerPos == '0') { - zeroCount++; - } else { - return false; - } - } - if (zeroCount != 512) { - return false; - } - - *hexDataLen = trailerPos - src - *headerLen; - *trailerLen = size - *headerLen - *hexDataLen; - - // Verify that the data section is hex encoded and count the bytes. - int nibbles = 0; - for (; dataPos < trailerPos; dataPos++) { - if (isspace(*dataPos)) { - continue; - } - if (!isxdigit(*dataPos)) { - return false; - } - nibbles++; - } - *dataLen = (nibbles + 1) / 2; - - return true; -} - -int8_t hexToBin(uint8_t c) { - if (!isxdigit(c)) { - return -1; - } else if (c <= '9') { - return c - '0'; - } else if (c <= 'F') { - return c - 'A' + 10; - } else if (c <= 'f') { - return c - 'a' + 10; - } - return -1; -} - -static sk_sp<SkData> handle_type1_stream(SkStream* srcStream, size_t* headerLen, - size_t* dataLen, size_t* trailerLen) { - // srcStream may be backed by a file or a unseekable fd, so we may not be - // able to use skip(), rewind(), or getMemoryBase(). read()ing through - // the input only once is doable, but very ugly. Furthermore, it'd be nice - // if the data was NUL terminated so that we can use strstr() to search it. - // Make as few copies as possible given these constraints. - SkDynamicMemoryWStream dynamicStream; - std::unique_ptr<SkMemoryStream> staticStream; - sk_sp<SkData> data; - const uint8_t* src; - size_t srcLen; - if ((srcLen = srcStream->getLength()) > 0) { - staticStream.reset(new SkMemoryStream(srcLen + 1)); - src = (const uint8_t*)staticStream->getMemoryBase(); - if (srcStream->getMemoryBase() != nullptr) { - memcpy((void *)src, srcStream->getMemoryBase(), srcLen); - } else { - size_t read = 0; - while (read < srcLen) { - size_t got = srcStream->read((void *)staticStream->getAtPos(), - srcLen - read); - if (got == 0) { - return nullptr; - } - read += got; - staticStream->seek(read); - } - } - ((uint8_t *)src)[srcLen] = 0; - } else { - static const size_t kBufSize = 4096; - uint8_t buf[kBufSize]; - size_t amount; - while ((amount = srcStream->read(buf, kBufSize)) > 0) { - dynamicStream.write(buf, amount); - } - amount = 0; - dynamicStream.write(&amount, 1); // nullptr terminator. - data.reset(dynamicStream.copyToData()); - src = data->bytes(); - srcLen = data->size() - 1; - } - - if (parsePFB(src, srcLen, headerLen, dataLen, trailerLen)) { - static const int kPFBSectionHeaderLength = 6; - const size_t length = *headerLen + *dataLen + *trailerLen; - SkASSERT(length > 0); - SkASSERT(length + (2 * kPFBSectionHeaderLength) <= srcLen); - - sk_sp<SkData> data(SkData::MakeUninitialized(length)); - - const uint8_t* const srcHeader = src + kPFBSectionHeaderLength; - // There is a six-byte section header before header and data - // (but not trailer) that we're not going to copy. - const uint8_t* const srcData = srcHeader + *headerLen + kPFBSectionHeaderLength; - const uint8_t* const srcTrailer = srcData + *headerLen; - - uint8_t* const resultHeader = (uint8_t*)data->writable_data(); - uint8_t* const resultData = resultHeader + *headerLen; - uint8_t* const resultTrailer = resultData + *dataLen; - - SkASSERT(resultTrailer + *trailerLen == resultHeader + length); - - memcpy(resultHeader, srcHeader, *headerLen); - memcpy(resultData, srcData, *dataLen); - memcpy(resultTrailer, srcTrailer, *trailerLen); - - return data; - } - - // A PFA has to be converted for PDF. - size_t hexDataLen; - if (parsePFA((const char*)src, srcLen, headerLen, &hexDataLen, dataLen, - trailerLen)) { - const size_t length = *headerLen + *dataLen + *trailerLen; - SkASSERT(length > 0); - SkAutoTMalloc<uint8_t> buffer(length); - - memcpy(buffer.get(), src, *headerLen); - uint8_t* const resultData = &(buffer[SkToInt(*headerLen)]); - - const uint8_t* hexData = src + *headerLen; - const uint8_t* trailer = hexData + hexDataLen; - size_t outputOffset = 0; - uint8_t dataByte = 0; // To hush compiler. - bool highNibble = true; - for (; hexData < trailer; hexData++) { - int8_t curNibble = hexToBin(*hexData); - if (curNibble < 0) { - continue; - } - if (highNibble) { - dataByte = curNibble << 4; - highNibble = false; - } else { - dataByte |= curNibble; - highNibble = true; - resultData[outputOffset++] = dataByte; - } - } - if (!highNibble) { - resultData[outputOffset++] = dataByte; - } - SkASSERT(outputOffset == *dataLen); - - uint8_t* const resultTrailer = &(buffer[SkToInt(*headerLen + outputOffset)]); - memcpy(resultTrailer, src + *headerLen + hexDataLen, *trailerLen); - - return SkData::MakeFromMalloc(buffer.release(), length); - } - return nullptr; -} - // scale from em-units to base-1000, returning as a SkScalar -SkScalar scaleFromFontUnits(int16_t val, uint16_t emSize) { - SkScalar scaled = SkIntToScalar(val); +SkScalar from_font_units(SkScalar scaled, uint16_t emSize) { if (emSize == 1000) { return scaled; } else { @@ -494,6 +329,11 @@ SkScalar scaleFromFontUnits(int16_t val, uint16_t emSize) { } } +SkScalar scaleFromFontUnits(int16_t val, uint16_t emSize) { + return from_font_units(SkIntToScalar(val), emSize); +} + + void setGlyphWidthAndBoundingBox(SkScalar width, SkIRect box, SkWStream* content) { // Specify width and bounding box for the glyph. @@ -555,233 +395,6 @@ sk_sp<SkPDFArray> composeAdvanceData( } // namespace -static void append_tounicode_header(SkDynamicMemoryWStream* cmap, - uint16_t firstGlyphID, - uint16_t lastGlyphID) { - // 12 dict begin: 12 is an Adobe-suggested value. Shall not change. - // It's there to prevent old version Adobe Readers from malfunctioning. - const char* kHeader = - "/CIDInit /ProcSet findresource begin\n" - "12 dict begin\n" - "begincmap\n"; - cmap->writeText(kHeader); - - // The /CIDSystemInfo must be consistent to the one in - // SkPDFFont::populateCIDFont(). - // We can not pass over the system info object here because the format is - // different. This is not a reference object. - const char* kSysInfo = - "/CIDSystemInfo\n" - "<< /Registry (Adobe)\n" - "/Ordering (UCS)\n" - "/Supplement 0\n" - ">> def\n"; - cmap->writeText(kSysInfo); - - // The CMapName must be consistent to /CIDSystemInfo above. - // /CMapType 2 means ToUnicode. - // Codespace range just tells the PDF processor the valid range. - const char* kTypeInfoHeader = - "/CMapName /Adobe-Identity-UCS def\n" - "/CMapType 2 def\n" - "1 begincodespacerange\n"; - cmap->writeText(kTypeInfoHeader); - - // e.g. "<0000> <FFFF>\n" - SkString range; - range.appendf("<%04X> <%04X>\n", firstGlyphID, lastGlyphID); - cmap->writeText(range.c_str()); - - const char* kTypeInfoFooter = "endcodespacerange\n"; - cmap->writeText(kTypeInfoFooter); -} - -static void append_cmap_footer(SkDynamicMemoryWStream* cmap) { - const char* kFooter = - "endcmap\n" - "CMapName currentdict /CMap defineresource pop\n" - "end\n" - "end"; - cmap->writeText(kFooter); -} - -struct BFChar { - uint16_t fGlyphId; - SkUnichar fUnicode; -}; - -struct BFRange { - uint16_t fStart; - uint16_t fEnd; - SkUnichar fUnicode; -}; - -static void write_utf16be(SkDynamicMemoryWStream* wStream, SkUnichar utf32) { - uint16_t utf16[2] = {0, 0}; - size_t len = SkUTF16_FromUnichar(utf32, utf16); - SkASSERT(len == 1 || len == 2); - SkPDFUtils::WriteUInt16BE(wStream, utf16[0]); - if (len == 2) { - SkPDFUtils::WriteUInt16BE(wStream, utf16[1]); - } -} - -static void append_bfchar_section(const SkTDArray<BFChar>& bfchar, - SkDynamicMemoryWStream* cmap) { - // PDF spec defines that every bf* list can have at most 100 entries. - for (int i = 0; i < bfchar.count(); i += 100) { - int count = bfchar.count() - i; - count = SkMin32(count, 100); - cmap->writeDecAsText(count); - cmap->writeText(" beginbfchar\n"); - for (int j = 0; j < count; ++j) { - cmap->writeText("<"); - SkPDFUtils::WriteUInt16BE(cmap, bfchar[i + j].fGlyphId); - cmap->writeText("> <"); - write_utf16be(cmap, bfchar[i + j].fUnicode); - cmap->writeText(">\n"); - } - cmap->writeText("endbfchar\n"); - } -} - -static void append_bfrange_section(const SkTDArray<BFRange>& bfrange, - SkDynamicMemoryWStream* cmap) { - // PDF spec defines that every bf* list can have at most 100 entries. - for (int i = 0; i < bfrange.count(); i += 100) { - int count = bfrange.count() - i; - count = SkMin32(count, 100); - cmap->writeDecAsText(count); - cmap->writeText(" beginbfrange\n"); - for (int j = 0; j < count; ++j) { - cmap->writeText("<"); - SkPDFUtils::WriteUInt16BE(cmap, bfrange[i + j].fStart); - cmap->writeText("> <"); - SkPDFUtils::WriteUInt16BE(cmap, bfrange[i + j].fEnd); - cmap->writeText("> <"); - write_utf16be(cmap, bfrange[i + j].fUnicode); - cmap->writeText(">\n"); - } - cmap->writeText("endbfrange\n"); - } -} - -// Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe -// Technote 5014. -// The function is not static so we can test it in unit tests. -// -// Current implementation guarantees bfchar and bfrange entries do not overlap. -// -// Current implementation does not attempt aggresive optimizations against -// following case because the specification is not clear. -// -// 4 beginbfchar 1 beginbfchar -// <0003> <0013> <0020> <0014> -// <0005> <0015> to endbfchar -// <0007> <0017> 1 beginbfrange -// <0020> <0014> <0003> <0007> <0013> -// endbfchar endbfrange -// -// Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may -// overlap, but succeeding maps supersede preceding maps." -// -// In case of searching text in PDF, bfrange will have higher precedence so -// typing char id 0x0014 in search box will get glyph id 0x0004 first. However, -// the spec does not mention how will this kind of conflict being resolved. -// -// For the worst case (having 65536 continuous unicode and we use every other -// one of them), the possible savings by aggressive optimization is 416KB -// pre-compressed and does not provide enough motivation for implementation. - -// TODO(halcanary): this should be in a header so that it is separately testable -// ( see caller in tests/ToUnicode.cpp ) -void append_cmap_sections(const SkTDArray<SkUnichar>& glyphToUnicode, - const SkPDFGlyphSet* subset, - SkDynamicMemoryWStream* cmap, - bool multiByteGlyphs, - uint16_t firstGlyphID, - uint16_t lastGlyphID); - -void append_cmap_sections(const SkTDArray<SkUnichar>& glyphToUnicode, - const SkPDFGlyphSet* subset, - SkDynamicMemoryWStream* cmap, - bool multiByteGlyphs, - uint16_t firstGlyphID, - uint16_t lastGlyphID) { - if (glyphToUnicode.isEmpty()) { - return; - } - int glyphOffset = 0; - if (!multiByteGlyphs) { - glyphOffset = firstGlyphID - 1; - } - - SkTDArray<BFChar> bfcharEntries; - SkTDArray<BFRange> bfrangeEntries; - - BFRange currentRangeEntry = {0, 0, 0}; - bool rangeEmpty = true; - const int limit = - SkMin32(lastGlyphID + 1, glyphToUnicode.count()) - glyphOffset; - - for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) { - bool inSubset = i < limit && - (subset == nullptr || subset->has(i + glyphOffset)); - if (!rangeEmpty) { - // PDF spec requires bfrange not changing the higher byte, - // e.g. <1035> <10FF> <2222> is ok, but - // <1035> <1100> <2222> is no good - bool inRange = - i == currentRangeEntry.fEnd + 1 && - i >> 8 == currentRangeEntry.fStart >> 8 && - i < limit && - glyphToUnicode[i + glyphOffset] == - currentRangeEntry.fUnicode + i - currentRangeEntry.fStart; - if (!inSubset || !inRange) { - if (currentRangeEntry.fEnd > currentRangeEntry.fStart) { - bfrangeEntries.push(currentRangeEntry); - } else { - BFChar* entry = bfcharEntries.append(); - entry->fGlyphId = currentRangeEntry.fStart; - entry->fUnicode = currentRangeEntry.fUnicode; - } - rangeEmpty = true; - } - } - if (inSubset) { - currentRangeEntry.fEnd = i; - if (rangeEmpty) { - currentRangeEntry.fStart = i; - currentRangeEntry.fUnicode = glyphToUnicode[i + glyphOffset]; - rangeEmpty = false; - } - } - } - - // The spec requires all bfchar entries for a font must come before bfrange - // entries. - append_bfchar_section(bfcharEntries, cmap); - append_bfrange_section(bfrangeEntries, cmap); -} - -static sk_sp<SkPDFStream> generate_tounicode_cmap( - const SkTDArray<SkUnichar>& glyphToUnicode, - const SkPDFGlyphSet* subset, - bool multiByteGlyphs, - uint16_t firstGlyphID, - uint16_t lastGlyphID) { - SkDynamicMemoryWStream cmap; - if (multiByteGlyphs) { - append_tounicode_header(&cmap, firstGlyphID, lastGlyphID); - } else { - append_tounicode_header(&cmap, 1, lastGlyphID - firstGlyphID + 1); - } - append_cmap_sections(glyphToUnicode, subset, &cmap, multiByteGlyphs, - firstGlyphID, lastGlyphID); - append_cmap_footer(&cmap); - return sk_make_sp<SkPDFStream>( - std::unique_ptr<SkStreamAsset>(cmap.detachAsStream())); -} /////////////////////////////////////////////////////////////////////////////// // class SkPDFGlyphSet @@ -1069,11 +682,11 @@ void SkPDFFont::populateToUnicodeTable(const SkPDFGlyphSet* subset) { return; } this->insertObjRef("ToUnicode", - generate_tounicode_cmap(fFontInfo->fGlyphToUnicode, - subset, - multiByteGlyphs(), - firstGlyphID(), - lastGlyphID())); + SkPDFMakeToUnicodeCmap(fFontInfo->fGlyphToUnicode, + subset, + multiByteGlyphs(), + firstGlyphID(), + lastGlyphID())); } /////////////////////////////////////////////////////////////////////////////// @@ -1274,6 +887,20 @@ void set_glyph_widths(SkTypeface* tf, } } +sk_sp<const SkAdvancedTypefaceMetrics> SkPDFFont::GetFontMetricsWithGlyphNames( + SkTypeface* typeface, uint32_t* glyphs, uint32_t glyphsCount) { + return sk_sp<const SkAdvancedTypefaceMetrics>( + typeface->getAdvancedTypefaceMetrics( + SkTypeface::kGlyphNames_PerGlyphInfo, glyphs, glyphsCount)); +} + +sk_sp<const SkAdvancedTypefaceMetrics> SkPDFFont::GetFontMetricsWithToUnicode( + SkTypeface* typeface, uint32_t* glyphs, uint32_t glyphsCount) { + return sk_sp<const SkAdvancedTypefaceMetrics>( + typeface->getAdvancedTypefaceMetrics( + SkTypeface::kToUnicode_PerGlyphInfo, glyphs, glyphsCount)); +} + bool SkPDFCIDFont::populate(const SkPDFGlyphSet* subset) { // Generate new font metrics with advance info for true type fonts. // Generate glyph id array. @@ -1285,13 +912,12 @@ bool SkPDFCIDFont::populate(const SkPDFGlyphSet* subset) { subset->exportTo(&glyphIDs); } if (fontInfo()->fType == SkAdvancedTypefaceMetrics::kTrueType_Font) { - SkTypeface::PerGlyphInfo info = SkTypeface::kGlyphNames_PerGlyphInfo; uint32_t* glyphs = (glyphIDs.count() == 0) ? nullptr : glyphIDs.begin(); uint32_t glyphsCount = glyphs ? glyphIDs.count() : 0; - sk_sp<const SkAdvancedTypefaceMetrics> fontMetrics( - typeface()->getAdvancedTypefaceMetrics(info, glyphs, glyphsCount)); - setFontInfo(fontMetrics.get()); - addFontDescriptor(0, &glyphIDs); + sk_sp<const SkAdvancedTypefaceMetrics> fontMetrics = + SkPDFFont::GetFontMetricsWithGlyphNames(this->typeface(), glyphs, glyphsCount); + this->setFontInfo(fontMetrics.get()); + this->addFontDescriptor(0, &glyphIDs); } else { // Other CID fonts addFontDescriptor(0, nullptr); @@ -1357,13 +983,9 @@ bool SkPDFType1Font::addFontDescriptor(int16_t defaultWidth) { size_t data SK_INIT_TO_AVOID_WARNING; size_t trailer SK_INIT_TO_AVOID_WARNING; std::unique_ptr<SkStreamAsset> rawFontData(typeface()->openStream(&ttcIndex)); - SkASSERT(rawFontData); - SkASSERT(rawFontData->getLength() > 0); - if (!rawFontData || 0 == rawFontData->getLength()) { - return false; - } - sk_sp<SkData> fontData(handle_type1_stream(rawFontData.get(), &header, &data, &trailer)); - if (fontData.get() == nullptr) { + sk_sp<SkData> fontData = SkPDFConvertType1FontStream(std::move(rawFontData), + &header, &data, &trailer); + if (!fontData) { return false; } SkASSERT(this->canEmbed()); @@ -1379,43 +1001,47 @@ bool SkPDFType1Font::addFontDescriptor(int16_t defaultWidth) { } bool SkPDFType1Font::populate(int16_t glyphID) { + this->insertName("Subtype", "Type1"); + this->insertName("BaseFont", fontInfo()->fFontName); adjustGlyphRangeForSingleByteEncoding(glyphID); - - int16_t defaultWidth = 0; - const AdvanceMetric* widthRangeEntry = nullptr; + SkGlyphID firstGlyphID = this->firstGlyphID(); + SkGlyphID lastGlyphID = this->lastGlyphID(); + + // glyphCount not including glyph 0 + unsigned glyphCount = 1 + lastGlyphID - firstGlyphID; + SkASSERT(glyphCount > 0 && glyphCount <= 255); + this->insertInt("FirstChar", (size_t)0); + this->insertInt("LastChar", (size_t)glyphCount); { - SkSinglyLinkedList<AdvanceMetric> tmpMetrics; - set_glyph_widths(this->typeface(), nullptr, &tmpMetrics); - for (const auto& widthEntry : tmpMetrics) { - switch (widthEntry.fType) { - case AdvanceMetric::kDefault: - defaultWidth = widthEntry.fAdvance[0]; - break; - case AdvanceMetric::kRun: - SkASSERT(false); - break; - case AdvanceMetric::kRange: - SkASSERT(widthRangeEntry == nullptr); - widthRangeEntry = &widthEntry; - break; - } + SkPaint tmpPaint; + tmpPaint.setHinting(SkPaint::kNo_Hinting); + tmpPaint.setTypeface(sk_ref_sp(this->typeface())); + tmpPaint.setTextSize((SkScalar)this->typeface()->getUnitsPerEm()); + SkAutoGlyphCache glyphCache(tmpPaint, nullptr, nullptr); + auto widths = sk_make_sp<SkPDFArray>(); + SkScalar advance = glyphCache->getGlyphIDAdvance(0).fAdvanceX; + const uint16_t emSize = this->fontInfo()->fEmSize; + widths->appendScalar(from_font_units(advance, emSize)); + for (unsigned gID = firstGlyphID; gID <= lastGlyphID; gID++) { + advance = glyphCache->getGlyphIDAdvance(gID).fAdvanceX; + widths->appendScalar(from_font_units(advance, emSize)); } + this->insertObject("Widths", std::move(widths)); } - - if (!addFontDescriptor(defaultWidth)) { + if (!addFontDescriptor(0)) { return false; } - - insertName("Subtype", "Type1"); - insertName("BaseFont", fontInfo()->fFontName); - - addWidthInfoFromRange(defaultWidth, widthRangeEntry); auto encDiffs = sk_make_sp<SkPDFArray>(); - encDiffs->reserve(lastGlyphID() - firstGlyphID() + 2); - encDiffs->appendInt(1); - SkASSERT(this->fontInfo()->fGlyphNames.count() >= this->lastGlyphID()); - for (int gID = firstGlyphID(); gID <= lastGlyphID(); gID++) { - encDiffs->appendName(fontInfo()->fGlyphNames[gID].c_str()); + encDiffs->reserve(lastGlyphID - firstGlyphID + 3); + encDiffs->appendInt(0); + const SkTArray<SkString>& glyphNames = this->fontInfo()->fGlyphNames; + SkASSERT(glyphNames.count() > lastGlyphID); + encDiffs->appendName(glyphNames[0].c_str()); + const SkString unknown("UNKNOWN"); + for (int gID = firstGlyphID; gID <= lastGlyphID; gID++) { + const bool valid = gID < glyphNames.count() && !glyphNames[gID].isEmpty(); + const SkString& name = valid ? glyphNames[gID] : unknown; + encDiffs->appendName(name); } auto encoding = sk_make_sp<SkPDFDict>("Encoding"); @@ -1424,38 +1050,6 @@ bool SkPDFType1Font::populate(int16_t glyphID) { return true; } -void SkPDFType1Font::addWidthInfoFromRange( - int16_t defaultWidth, - const AdvanceMetric* widthRangeEntry) { - auto widthArray = sk_make_sp<SkPDFArray>(); - int firstChar = 0; - if (widthRangeEntry) { - const uint16_t emSize = fontInfo()->fEmSize; - int startIndex = firstGlyphID() - widthRangeEntry->fStartId; - int endIndex = startIndex + lastGlyphID() - firstGlyphID() + 1; - if (startIndex < 0) - startIndex = 0; - if (endIndex > widthRangeEntry->fAdvance.count()) - endIndex = widthRangeEntry->fAdvance.count(); - if (widthRangeEntry->fStartId == 0) { - widthArray->appendScalar( - scaleFromFontUnits(widthRangeEntry->fAdvance[0], emSize)); - } else { - firstChar = startIndex + widthRangeEntry->fStartId; - } - for (int i = startIndex; i < endIndex; i++) { - widthArray->appendScalar( - scaleFromFontUnits(widthRangeEntry->fAdvance[i], emSize)); - } - } else { - widthArray->appendScalar( - scaleFromFontUnits(defaultWidth, 1000)); - } - this->insertInt("FirstChar", firstChar); - this->insertInt("LastChar", firstChar + widthArray->size() - 1); - this->insertObject("Widths", std::move(widthArray)); -} - /////////////////////////////////////////////////////////////////////////////// // class SkPDFType3Font /////////////////////////////////////////////////////////////////////////////// diff --git a/src/pdf/SkPDFFont.h b/src/pdf/SkPDFFont.h index 0d73745984..c55f650769 100644 --- a/src/pdf/SkPDFFont.h +++ b/src/pdf/SkPDFFont.h @@ -120,6 +120,12 @@ public: SkTypeface* typeface, uint16_t glyphID); + static sk_sp<const SkAdvancedTypefaceMetrics> GetFontMetricsWithGlyphNames( + SkTypeface*, uint32_t* glyphs, uint32_t glyphsCount); + + static sk_sp<const SkAdvancedTypefaceMetrics> GetFontMetricsWithToUnicode( + SkTypeface*, uint32_t* glyphs, uint32_t glyphsCount); + /** Subset the font based on usage set. Returns a SkPDFFont instance with * subset. * @param usage Glyph subset requested. diff --git a/src/pdf/SkPDFFontImpl.h b/src/pdf/SkPDFFontImpl.h deleted file mode 100644 index 05774de6b3..0000000000 --- a/src/pdf/SkPDFFontImpl.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright 2011 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - - -#ifndef SkPDFFontImpl_DEFINED -#define SkPDFFontImpl_DEFINED - -#include "SkPDFFont.h" - -class SkPDFType0Font final : public SkPDFFont { -public: - virtual ~SkPDFType0Font(); - bool multiByteGlyphs() const override { return true; } - SkPDFFont* getFontSubset(const SkPDFGlyphSet* usage) override; -#ifdef SK_DEBUG - void emitObject(SkWStream*, - const SkPDFObjNumMap&, - const SkPDFSubstituteMap&) const override; -#endif - -private: - friend class SkPDFFont; // to access the constructor -#ifdef SK_DEBUG - bool fPopulated; - typedef SkPDFDict INHERITED; -#endif - - SkPDFType0Font(const SkAdvancedTypefaceMetrics* info, - SkTypeface* typeface); - - bool populate(const SkPDFGlyphSet* subset); -}; - -class SkPDFCIDFont final : public SkPDFFont { -public: - virtual ~SkPDFCIDFont(); - virtual bool multiByteGlyphs() const { return true; } - -private: - friend class SkPDFType0Font; // to access the constructor - - SkPDFCIDFont(const SkAdvancedTypefaceMetrics* info, - SkTypeface* typeface, - const SkPDFGlyphSet* subset); - - bool populate(const SkPDFGlyphSet* subset); - bool addFontDescriptor(int16_t defaultWidth, - const SkTDArray<uint32_t>* subset); -}; - -struct AdvanceMetric; - -class SkPDFType1Font final : public SkPDFFont { -public: - virtual ~SkPDFType1Font(); - virtual bool multiByteGlyphs() const { return false; } - -private: - friend class SkPDFFont; // to access the constructor - - SkPDFType1Font(const SkAdvancedTypefaceMetrics* info, - SkTypeface* typeface, - uint16_t glyphID, - SkPDFDict* relatedFontDescriptor); - - bool populate(int16_t glyphID); - bool addFontDescriptor(int16_t defaultWidth); - void addWidthInfoFromRange(int16_t defaultWidth, - const AdvanceMetric* widthRangeEntry); -}; - -class SkPDFType3Font final : public SkPDFFont { -public: - virtual ~SkPDFType3Font(); - virtual bool multiByteGlyphs() const { return false; } - -private: - friend class SkPDFFont; // to access the constructor - - SkPDFType3Font(const SkAdvancedTypefaceMetrics* info, - SkTypeface* typeface, - uint16_t glyphID); - - bool populate(uint16_t glyphID); -}; - -#endif diff --git a/src/pdf/SkPDFMakeToUnicodeCmap.cpp b/src/pdf/SkPDFMakeToUnicodeCmap.cpp new file mode 100644 index 0000000000..6fd8b1ca16 --- /dev/null +++ b/src/pdf/SkPDFMakeToUnicodeCmap.cpp @@ -0,0 +1,230 @@ +/* + * Copyright 2011 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "SkPDFMakeToUnicodeCmap.h" +#include "SkPDFUtils.h" +#include "SkUtils.h" + +static void append_tounicode_header(SkDynamicMemoryWStream* cmap, + SkGlyphID firstGlyphID, + SkGlyphID lastGlyphID) { + // 12 dict begin: 12 is an Adobe-suggested value. Shall not change. + // It's there to prevent old version Adobe Readers from malfunctioning. + const char* kHeader = + "/CIDInit /ProcSet findresource begin\n" + "12 dict begin\n" + "begincmap\n"; + cmap->writeText(kHeader); + + // The /CIDSystemInfo must be consistent to the one in + // SkPDFFont::populateCIDFont(). + // We can not pass over the system info object here because the format is + // different. This is not a reference object. + const char* kSysInfo = + "/CIDSystemInfo\n" + "<< /Registry (Adobe)\n" + "/Ordering (UCS)\n" + "/Supplement 0\n" + ">> def\n"; + cmap->writeText(kSysInfo); + + // The CMapName must be consistent to /CIDSystemInfo above. + // /CMapType 2 means ToUnicode. + // Codespace range just tells the PDF processor the valid range. + const char* kTypeInfoHeader = + "/CMapName /Adobe-Identity-UCS def\n" + "/CMapType 2 def\n" + "1 begincodespacerange\n"; + cmap->writeText(kTypeInfoHeader); + + // e.g. "<0000> <FFFF>\n" + SkString range; + range.appendf("<%04X> <%04X>\n", firstGlyphID, lastGlyphID); + cmap->writeText(range.c_str()); + + const char* kTypeInfoFooter = "endcodespacerange\n"; + cmap->writeText(kTypeInfoFooter); +} + +static void append_cmap_footer(SkDynamicMemoryWStream* cmap) { + const char kFooter[] = + "endcmap\n" + "CMapName currentdict /CMap defineresource pop\n" + "end\n" + "end"; + cmap->writeText(kFooter); +} + +namespace { +struct BFChar { + SkGlyphID fGlyphId; + SkUnichar fUnicode; +}; + +struct BFRange { + SkGlyphID fStart; + SkGlyphID fEnd; + SkUnichar fUnicode; +}; +} // namespace + +static void write_utf16be(SkDynamicMemoryWStream* wStream, SkUnichar utf32) { + SkGlyphID utf16[2] = {0, 0}; + size_t len = SkUTF16_FromUnichar(utf32, utf16); + SkASSERT(len == 1 || len == 2); + SkPDFUtils::WriteUInt16BE(wStream, utf16[0]); + if (len == 2) { + SkPDFUtils::WriteUInt16BE(wStream, utf16[1]); + } +} + +static void append_bfchar_section(const SkTDArray<BFChar>& bfchar, + SkDynamicMemoryWStream* cmap) { + // PDF spec defines that every bf* list can have at most 100 entries. + for (int i = 0; i < bfchar.count(); i += 100) { + int count = bfchar.count() - i; + count = SkMin32(count, 100); + cmap->writeDecAsText(count); + cmap->writeText(" beginbfchar\n"); + for (int j = 0; j < count; ++j) { + cmap->writeText("<"); + SkPDFUtils::WriteUInt16BE(cmap, bfchar[i + j].fGlyphId); + cmap->writeText("> <"); + write_utf16be(cmap, bfchar[i + j].fUnicode); + cmap->writeText(">\n"); + } + cmap->writeText("endbfchar\n"); + } +} + +static void append_bfrange_section(const SkTDArray<BFRange>& bfrange, + SkDynamicMemoryWStream* cmap) { + // PDF spec defines that every bf* list can have at most 100 entries. + for (int i = 0; i < bfrange.count(); i += 100) { + int count = bfrange.count() - i; + count = SkMin32(count, 100); + cmap->writeDecAsText(count); + cmap->writeText(" beginbfrange\n"); + for (int j = 0; j < count; ++j) { + cmap->writeText("<"); + SkPDFUtils::WriteUInt16BE(cmap, bfrange[i + j].fStart); + cmap->writeText("> <"); + SkPDFUtils::WriteUInt16BE(cmap, bfrange[i + j].fEnd); + cmap->writeText("> <"); + write_utf16be(cmap, bfrange[i + j].fUnicode); + cmap->writeText(">\n"); + } + cmap->writeText("endbfrange\n"); + } +} + +// Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe +// Technote 5014. +// The function is not static so we can test it in unit tests. +// +// Current implementation guarantees bfchar and bfrange entries do not overlap. +// +// Current implementation does not attempt aggresive optimizations against +// following case because the specification is not clear. +// +// 4 beginbfchar 1 beginbfchar +// <0003> <0013> <0020> <0014> +// <0005> <0015> to endbfchar +// <0007> <0017> 1 beginbfrange +// <0020> <0014> <0003> <0007> <0013> +// endbfchar endbfrange +// +// Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may +// overlap, but succeeding maps supersede preceding maps." +// +// In case of searching text in PDF, bfrange will have higher precedence so +// typing char id 0x0014 in search box will get glyph id 0x0004 first. However, +// the spec does not mention how will this kind of conflict being resolved. +// +// For the worst case (having 65536 continuous unicode and we use every other +// one of them), the possible savings by aggressive optimization is 416KB +// pre-compressed and does not provide enough motivation for implementation. +void SkPDFAppendCmapSections(const SkTDArray<SkUnichar>& glyphToUnicode, + const SkPDFGlyphSet* subset, + SkDynamicMemoryWStream* cmap, + bool multiByteGlyphs, + SkGlyphID firstGlyphID, + SkGlyphID lastGlyphID) { + if (glyphToUnicode.isEmpty()) { + return; + } + int glyphOffset = 0; + if (!multiByteGlyphs) { + glyphOffset = firstGlyphID - 1; + } + + SkTDArray<BFChar> bfcharEntries; + SkTDArray<BFRange> bfrangeEntries; + + BFRange currentRangeEntry = {0, 0, 0}; + bool rangeEmpty = true; + const int limit = + SkMin32(lastGlyphID + 1, glyphToUnicode.count()) - glyphOffset; + + for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) { + bool inSubset = i < limit && + (subset == nullptr || subset->has(i + glyphOffset)); + if (!rangeEmpty) { + // PDF spec requires bfrange not changing the higher byte, + // e.g. <1035> <10FF> <2222> is ok, but + // <1035> <1100> <2222> is no good + bool inRange = + i == currentRangeEntry.fEnd + 1 && + i >> 8 == currentRangeEntry.fStart >> 8 && + i < limit && + glyphToUnicode[i + glyphOffset] == + currentRangeEntry.fUnicode + i - currentRangeEntry.fStart; + if (!inSubset || !inRange) { + if (currentRangeEntry.fEnd > currentRangeEntry.fStart) { + bfrangeEntries.push(currentRangeEntry); + } else { + BFChar* entry = bfcharEntries.append(); + entry->fGlyphId = currentRangeEntry.fStart; + entry->fUnicode = currentRangeEntry.fUnicode; + } + rangeEmpty = true; + } + } + if (inSubset) { + currentRangeEntry.fEnd = i; + if (rangeEmpty) { + currentRangeEntry.fStart = i; + currentRangeEntry.fUnicode = glyphToUnicode[i + glyphOffset]; + rangeEmpty = false; + } + } + } + + // The spec requires all bfchar entries for a font must come before bfrange + // entries. + append_bfchar_section(bfcharEntries, cmap); + append_bfrange_section(bfrangeEntries, cmap); +} + +sk_sp<SkPDFStream> SkPDFMakeToUnicodeCmap( + const SkTDArray<SkUnichar>& glyphToUnicode, + const SkPDFGlyphSet* subset, + bool multiByteGlyphs, + SkGlyphID firstGlyphID, + SkGlyphID lastGlyphID) { + SkDynamicMemoryWStream cmap; + if (multiByteGlyphs) { + append_tounicode_header(&cmap, firstGlyphID, lastGlyphID); + } else { + append_tounicode_header(&cmap, 1, lastGlyphID - firstGlyphID + 1); + } + SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs, + firstGlyphID, lastGlyphID); + append_cmap_footer(&cmap); + return sk_make_sp<SkPDFStream>( + std::unique_ptr<SkStreamAsset>(cmap.detachAsStream())); +} diff --git a/src/pdf/SkPDFMakeToUnicodeCmap.h b/src/pdf/SkPDFMakeToUnicodeCmap.h new file mode 100644 index 0000000000..1bd8930742 --- /dev/null +++ b/src/pdf/SkPDFMakeToUnicodeCmap.h @@ -0,0 +1,29 @@ +/* + * Copyright 2016 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ +#ifndef SkPDFMakeToUnicodeCmap_DEFINED +#define SkPDFMakeToUnicodeCmap_DEFINED + +#include "SkTDArray.h" +#include "SkPDFFont.h" +#include "SkStream.h" + +sk_sp<SkPDFStream> SkPDFMakeToUnicodeCmap( + const SkTDArray<SkUnichar>& glyphToUnicode, + const SkPDFGlyphSet* subset, + bool multiByteGlyphs, + SkGlyphID firstGlyphID, + SkGlyphID lastGlyphID); + +// Exposed for unit testing. +void SkPDFAppendCmapSections(const SkTDArray<SkUnichar>& glyphToUnicode, + const SkPDFGlyphSet* subset, + SkDynamicMemoryWStream* cmap, + bool multiByteGlyphs, + SkGlyphID firstGlyphID, + SkGlyphID lastGlyphID); + +#endif // SkPDFMakeToUnicodeCmap_DEFINED diff --git a/tests/PDFGlyphsToUnicodeTest.cpp b/tests/PDFGlyphsToUnicodeTest.cpp index f0425d94fc..b8157caeea 100644 --- a/tests/PDFGlyphsToUnicodeTest.cpp +++ b/tests/PDFGlyphsToUnicodeTest.cpp @@ -7,7 +7,7 @@ #include "SkData.h" #include "SkPDFFont.h" -#include "SkPDFTypes.h" +#include "SkPDFMakeToUnicodeCmap.h" #include "SkStream.h" #include "Test.h" @@ -23,13 +23,6 @@ static bool stream_equals(const SkDynamicMemoryWStream& stream, size_t offset, return memcmp(data->bytes() + offset, buffer, len) == 0; } -void append_cmap_sections(const SkTDArray<SkUnichar>& glyphToUnicode, - const SkPDFGlyphSet* subset, - SkDynamicMemoryWStream* cmap, - bool multiByteGlyphs, - uint16_t firstGlypthID, - uint16_t lastGlypthID); - DEF_TEST(ToUnicode, reporter) { SkTDArray<SkUnichar> glyphToUnicode; SkTDArray<uint16_t> glyphsInSubset; @@ -73,7 +66,7 @@ DEF_TEST(ToUnicode, reporter) { SkDynamicMemoryWStream buffer; subset.set(glyphsInSubset.begin(), glyphsInSubset.count()); - append_cmap_sections(glyphToUnicode, &subset, &buffer, true, 0, 0xFFFF); + SkPDFAppendCmapSections(glyphToUnicode, &subset, &buffer, true, 0, 0xFFFF); char expectedResult[] = "4 beginbfchar\n\ @@ -95,7 +88,7 @@ endbfrange\n"; // Remove characters and ranges. buffer.reset(); - append_cmap_sections(glyphToUnicode, &subset, &buffer, true, 8, 0x00FF); + SkPDFAppendCmapSections(glyphToUnicode, &subset, &buffer, true, 8, 0x00FF); char expectedResultChop1[] = "2 beginbfchar\n\ @@ -113,7 +106,7 @@ endbfrange\n"; // Remove characters from range to downdrade it to one char. buffer.reset(); - append_cmap_sections(glyphToUnicode, &subset, &buffer, true, 0x00D, 0x00FE); + SkPDFAppendCmapSections(glyphToUnicode, &subset, &buffer, true, 0x00D, 0x00FE); char expectedResultChop2[] = "2 beginbfchar\n\ @@ -126,7 +119,7 @@ endbfchar\n"; buffer.reset(); - append_cmap_sections(glyphToUnicode, nullptr, &buffer, false, 0xFC, 0x110); + SkPDFAppendCmapSections(glyphToUnicode, nullptr, &buffer, false, 0xFC, 0x110); char expectedResultSingleBytes[] = "2 beginbfchar\n\ @@ -162,7 +155,7 @@ endbfrange\n"; SkDynamicMemoryWStream buffer2; subset2.set(glyphsInSubset.begin(), glyphsInSubset.count()); - append_cmap_sections(glyphToUnicode, &subset2, &buffer2, true, 0, 0xffff); + SkPDFAppendCmapSections(glyphToUnicode, &subset2, &buffer2, true, 0, 0xffff); char expectedResult2[] = "4 beginbfchar\n\ |