aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--gyp/pdf.gypi5
-rw-r--r--include/core/SkTypeface.h1
-rw-r--r--src/pdf/SkPDFConvertType1FontStream.cpp205
-rw-r--r--src/pdf/SkPDFConvertType1FontStream.h28
-rw-r--r--src/pdf/SkPDFFont.cpp666
-rw-r--r--src/pdf/SkPDFFont.h6
-rw-r--r--src/pdf/SkPDFFontImpl.h91
-rw-r--r--src/pdf/SkPDFMakeToUnicodeCmap.cpp230
-rw-r--r--src/pdf/SkPDFMakeToUnicodeCmap.h29
-rw-r--r--tests/PDFGlyphsToUnicodeTest.cpp19
10 files changed, 638 insertions, 642 deletions
diff --git a/gyp/pdf.gypi b/gyp/pdf.gypi
index 4fa46f2f31..18a773e2f3 100644
--- a/gyp/pdf.gypi
+++ b/gyp/pdf.gypi
@@ -22,17 +22,20 @@
'<(skia_src_path)/pdf/SkPDFCanon.h',
'<(skia_src_path)/pdf/SkPDFCanvas.cpp',
'<(skia_src_path)/pdf/SkPDFCanvas.h',
+ '<(skia_src_path)/pdf/SkPDFConvertType1FontStream.cpp',
+ '<(skia_src_path)/pdf/SkPDFConvertType1FontStream.h',
'<(skia_src_path)/pdf/SkPDFDevice.cpp',
'<(skia_src_path)/pdf/SkPDFDevice.h',
'<(skia_src_path)/pdf/SkPDFDocument.cpp',
'<(skia_src_path)/pdf/SkPDFDocument.h',
'<(skia_src_path)/pdf/SkPDFFont.cpp',
'<(skia_src_path)/pdf/SkPDFFont.h',
- '<(skia_src_path)/pdf/SkPDFFontImpl.h',
'<(skia_src_path)/pdf/SkPDFFormXObject.cpp',
'<(skia_src_path)/pdf/SkPDFFormXObject.h',
'<(skia_src_path)/pdf/SkPDFGraphicState.cpp',
'<(skia_src_path)/pdf/SkPDFGraphicState.h',
+ '<(skia_src_path)/pdf/SkPDFMakeToUnicodeCmap.cpp',
+ '<(skia_src_path)/pdf/SkPDFMakeToUnicodeCmap.h',
'<(skia_src_path)/pdf/SkPDFMetadata.cpp',
'<(skia_src_path)/pdf/SkPDFMetadata.h',
'<(skia_src_path)/pdf/SkPDFResourceDict.cpp',
diff --git a/include/core/SkTypeface.h b/include/core/SkTypeface.h
index 30a0903bbd..23ca15e1f9 100644
--- a/include/core/SkTypeface.h
+++ b/include/core/SkTypeface.h
@@ -390,7 +390,6 @@ private:
friend class SkGTypeface;
friend class SkRandomTypeface;
friend class SkPDFFont;
- friend class SkPDFCIDFont;
friend class GrPathRendering;
friend class GrGLPathRendering;
diff --git a/src/pdf/SkPDFConvertType1FontStream.cpp b/src/pdf/SkPDFConvertType1FontStream.cpp
new file mode 100644
index 0000000000..d75da5c787
--- /dev/null
+++ b/src/pdf/SkPDFConvertType1FontStream.cpp
@@ -0,0 +1,205 @@
+/*
+ * Copyright 2011 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "SkPDFConvertType1FontStream.h"
+
+#include <ctype.h>
+
+static bool parsePFBSection(const uint8_t** src, size_t* len, int sectionType,
+ size_t* size) {
+ // PFB sections have a two or six bytes header. 0x80 and a one byte
+ // section type followed by a four byte section length. Type one is
+ // an ASCII section (includes a length), type two is a binary section
+ // (includes a length) and type three is an EOF marker with no length.
+ const uint8_t* buf = *src;
+ if (*len < 2 || buf[0] != 0x80 || buf[1] != sectionType) {
+ return false;
+ } else if (buf[1] == 3) {
+ return true;
+ } else if (*len < 6) {
+ return false;
+ }
+
+ *size = (size_t)buf[2] | ((size_t)buf[3] << 8) | ((size_t)buf[4] << 16) |
+ ((size_t)buf[5] << 24);
+ size_t consumed = *size + 6;
+ if (consumed > *len) {
+ return false;
+ }
+ *src = *src + consumed;
+ *len = *len - consumed;
+ return true;
+}
+
+static bool parsePFB(const uint8_t* src, size_t size, size_t* headerLen,
+ size_t* dataLen, size_t* trailerLen) {
+ const uint8_t* srcPtr = src;
+ size_t remaining = size;
+
+ return parsePFBSection(&srcPtr, &remaining, 1, headerLen) &&
+ parsePFBSection(&srcPtr, &remaining, 2, dataLen) &&
+ parsePFBSection(&srcPtr, &remaining, 1, trailerLen) &&
+ parsePFBSection(&srcPtr, &remaining, 3, nullptr);
+}
+
+/* The sections of a PFA file are implicitly defined. The body starts
+ * after the line containing "eexec," and the trailer starts with 512
+ * literal 0's followed by "cleartomark" (plus arbitrary white space).
+ *
+ * This function assumes that src is NUL terminated, but the NUL
+ * termination is not included in size.
+ *
+ */
+static bool parsePFA(const char* src, size_t size, size_t* headerLen,
+ size_t* hexDataLen, size_t* dataLen, size_t* trailerLen) {
+ const char* end = src + size;
+
+ const char* dataPos = strstr(src, "eexec");
+ if (!dataPos) {
+ return false;
+ }
+ dataPos += strlen("eexec");
+ while ((*dataPos == '\n' || *dataPos == '\r' || *dataPos == ' ') &&
+ dataPos < end) {
+ dataPos++;
+ }
+ *headerLen = dataPos - src;
+
+ const char* trailerPos = strstr(dataPos, "cleartomark");
+ if (!trailerPos) {
+ return false;
+ }
+ int zeroCount = 0;
+ for (trailerPos--; trailerPos > dataPos && zeroCount < 512; trailerPos--) {
+ if (*trailerPos == '\n' || *trailerPos == '\r' || *trailerPos == ' ') {
+ continue;
+ } else if (*trailerPos == '0') {
+ zeroCount++;
+ } else {
+ return false;
+ }
+ }
+ if (zeroCount != 512) {
+ return false;
+ }
+
+ *hexDataLen = trailerPos - src - *headerLen;
+ *trailerLen = size - *headerLen - *hexDataLen;
+
+ // Verify that the data section is hex encoded and count the bytes.
+ int nibbles = 0;
+ for (; dataPos < trailerPos; dataPos++) {
+ if (isspace(*dataPos)) {
+ continue;
+ }
+ if (!isxdigit(*dataPos)) {
+ return false;
+ }
+ nibbles++;
+ }
+ *dataLen = (nibbles + 1) / 2;
+
+ return true;
+}
+
+static int8_t hexToBin(uint8_t c) {
+ if (!isxdigit(c)) {
+ return -1;
+ } else if (c <= '9') {
+ return c - '0';
+ } else if (c <= 'F') {
+ return c - 'A' + 10;
+ } else if (c <= 'f') {
+ return c - 'a' + 10;
+ }
+ return -1;
+}
+
+sk_sp<SkData> SkPDFConvertType1FontStream(
+ std::unique_ptr<SkStreamAsset> srcStream, size_t* headerLen,
+ size_t* dataLen, size_t* trailerLen) {
+ size_t srcLen = srcStream ? srcStream->getLength() : 0;
+ SkASSERT(srcLen);
+ if (!srcLen) {
+ return nullptr;
+ }
+ // Flatten and Nul-terminate the source stream so that we can use
+ // strstr() to search it.
+ SkAutoTMalloc<uint8_t> sourceBuffer(SkToInt(srcLen + 1));
+ (void)srcStream->read(sourceBuffer.get(), srcLen);
+ sourceBuffer[SkToInt(srcLen)] = 0;
+ const uint8_t* src = sourceBuffer.get();
+
+ if (parsePFB(src, srcLen, headerLen, dataLen, trailerLen)) {
+ static const int kPFBSectionHeaderLength = 6;
+ const size_t length = *headerLen + *dataLen + *trailerLen;
+ SkASSERT(length > 0);
+ SkASSERT(length + (2 * kPFBSectionHeaderLength) <= srcLen);
+
+ sk_sp<SkData> data(SkData::MakeUninitialized(length));
+
+ const uint8_t* const srcHeader = src + kPFBSectionHeaderLength;
+ // There is a six-byte section header before header and data
+ // (but not trailer) that we're not going to copy.
+ const uint8_t* const srcData = srcHeader + *headerLen + kPFBSectionHeaderLength;
+ const uint8_t* const srcTrailer = srcData + *headerLen;
+
+ uint8_t* const resultHeader = (uint8_t*)data->writable_data();
+ uint8_t* const resultData = resultHeader + *headerLen;
+ uint8_t* const resultTrailer = resultData + *dataLen;
+
+ SkASSERT(resultTrailer + *trailerLen == resultHeader + length);
+
+ memcpy(resultHeader, srcHeader, *headerLen);
+ memcpy(resultData, srcData, *dataLen);
+ memcpy(resultTrailer, srcTrailer, *trailerLen);
+
+ return data;
+ }
+
+ // A PFA has to be converted for PDF.
+ size_t hexDataLen;
+ if (!parsePFA((const char*)src, srcLen, headerLen, &hexDataLen, dataLen,
+ trailerLen)) {
+ return nullptr;
+ }
+ const size_t length = *headerLen + *dataLen + *trailerLen;
+ SkASSERT(length > 0);
+ auto data = SkData::MakeUninitialized(length);
+ uint8_t* buffer = (uint8_t*)data->writable_data();
+
+ memcpy(buffer, src, *headerLen);
+ uint8_t* const resultData = &(buffer[*headerLen]);
+
+ const uint8_t* hexData = src + *headerLen;
+ const uint8_t* trailer = hexData + hexDataLen;
+ size_t outputOffset = 0;
+ uint8_t dataByte = 0; // To hush compiler.
+ bool highNibble = true;
+ for (; hexData < trailer; hexData++) {
+ int8_t curNibble = hexToBin(*hexData);
+ if (curNibble < 0) {
+ continue;
+ }
+ if (highNibble) {
+ dataByte = curNibble << 4;
+ highNibble = false;
+ } else {
+ dataByte |= curNibble;
+ highNibble = true;
+ resultData[outputOffset++] = dataByte;
+ }
+ }
+ if (!highNibble) {
+ resultData[outputOffset++] = dataByte;
+ }
+ SkASSERT(outputOffset == *dataLen);
+
+ uint8_t* const resultTrailer = &(buffer[SkToInt(*headerLen + outputOffset)]);
+ memcpy(resultTrailer, src + *headerLen + hexDataLen, *trailerLen);
+ return data;
+}
diff --git a/src/pdf/SkPDFConvertType1FontStream.h b/src/pdf/SkPDFConvertType1FontStream.h
new file mode 100644
index 0000000000..ffd2da3093
--- /dev/null
+++ b/src/pdf/SkPDFConvertType1FontStream.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkPDFConvertType1FontStream_DEFINED
+#define SkPDFConvertType1FontStream_DEFINED
+
+#include "SkData.h"
+#include "SkStream.h"
+
+/*
+ "A standard Type 1 font program, as described in the Adobe Type 1
+ Font Format specification, consists of three parts: a clear-text
+ portion (written using PostScript syntax), an encrypted portion, and
+ a fixed-content portion. The fixed-content portion contains 512
+ ASCII zeros followed by a cleartomark operator, and perhaps followed
+ by additional data. Although the encrypted portion of a standard
+ Type 1 font may be in binary or ASCII hexadecimal format, PDF
+ supports only the binary format."
+*/
+sk_sp<SkData> SkPDFConvertType1FontStream(
+ std::unique_ptr<SkStreamAsset> srcStream, size_t* headerLen,
+ size_t* dataLen, size_t* trailerLen);
+
+#endif // SkPDFConvertType1FontStream_DEFINED
diff --git a/src/pdf/SkPDFFont.cpp b/src/pdf/SkPDFFont.cpp
index 093ad4a90d..ff1dd2d76d 100644
--- a/src/pdf/SkPDFFont.cpp
+++ b/src/pdf/SkPDFFont.cpp
@@ -5,15 +5,14 @@
* found in the LICENSE file.
*/
-#include <ctype.h>
-
#include "SkData.h"
#include "SkGlyphCache.h"
#include "SkPaint.h"
#include "SkPDFCanon.h"
+#include "SkPDFConvertType1FontStream.h"
#include "SkPDFDevice.h"
+#include "SkPDFMakeToUnicodeCmap.h"
#include "SkPDFFont.h"
-#include "SkPDFFontImpl.h"
#include "SkPDFUtils.h"
#include "SkRefCnt.h"
#include "SkScalar.h"
@@ -31,6 +30,8 @@
#endif
#endif
+namespace {
+
// PDF's notion of symbolic vs non-symbolic is related to the character set, not
// symbols vs. characters. Rarely is a font the right character set to call it
// non-symbolic, so always call it symbolic. (PDF 1.4 spec, section 5.7.1)
@@ -53,7 +54,66 @@ struct AdvanceMetric {
AdvanceMetric& operator=(const AdvanceMetric&) = delete;
};
-namespace {
+class SkPDFType0Font final : public SkPDFFont {
+public:
+ SkPDFType0Font(const SkAdvancedTypefaceMetrics* info,
+ SkTypeface* typeface);
+ virtual ~SkPDFType0Font();
+ bool multiByteGlyphs() const override { return true; }
+ SkPDFFont* getFontSubset(const SkPDFGlyphSet* usage) override;
+#ifdef SK_DEBUG
+ void emitObject(SkWStream*,
+ const SkPDFObjNumMap&,
+ const SkPDFSubstituteMap&) const override;
+#endif
+
+private:
+#ifdef SK_DEBUG
+ bool fPopulated;
+#endif
+ bool populate(const SkPDFGlyphSet* subset);
+ typedef SkPDFDict INHERITED;
+};
+
+class SkPDFCIDFont final : public SkPDFFont {
+public:
+ SkPDFCIDFont(const SkAdvancedTypefaceMetrics* info,
+ SkTypeface* typeface,
+ const SkPDFGlyphSet* subset);
+ virtual ~SkPDFCIDFont();
+ bool multiByteGlyphs() const override { return true; }
+
+private:
+ bool populate(const SkPDFGlyphSet* subset);
+ bool addFontDescriptor(int16_t defaultWidth,
+ const SkTDArray<uint32_t>* subset);
+};
+
+class SkPDFType1Font final : public SkPDFFont {
+public:
+ SkPDFType1Font(const SkAdvancedTypefaceMetrics* info,
+ SkTypeface* typeface,
+ uint16_t glyphID,
+ SkPDFDict* relatedFontDescriptor);
+ virtual ~SkPDFType1Font();
+ bool multiByteGlyphs() const override { return false; }
+
+private:
+ bool populate(int16_t glyphID);
+ bool addFontDescriptor(int16_t defaultWidth);
+};
+
+class SkPDFType3Font final : public SkPDFFont {
+public:
+ SkPDFType3Font(const SkAdvancedTypefaceMetrics* info,
+ SkTypeface* typeface,
+ uint16_t glyphID);
+ virtual ~SkPDFType3Font();
+ bool multiByteGlyphs() const override { return false; }
+
+private:
+ bool populate(uint16_t glyphID);
+};
///////////////////////////////////////////////////////////////////////////////
// File-Local Functions
@@ -260,233 +320,8 @@ static void get_glyph_widths(SkSinglyLinkedList<AdvanceMetric>* glyphWidths,
////////////////////////////////////////////////////////////////////////////////
-bool parsePFBSection(const uint8_t** src, size_t* len, int sectionType,
- size_t* size) {
- // PFB sections have a two or six bytes header. 0x80 and a one byte
- // section type followed by a four byte section length. Type one is
- // an ASCII section (includes a length), type two is a binary section
- // (includes a length) and type three is an EOF marker with no length.
- const uint8_t* buf = *src;
- if (*len < 2 || buf[0] != 0x80 || buf[1] != sectionType) {
- return false;
- } else if (buf[1] == 3) {
- return true;
- } else if (*len < 6) {
- return false;
- }
-
- *size = (size_t)buf[2] | ((size_t)buf[3] << 8) | ((size_t)buf[4] << 16) |
- ((size_t)buf[5] << 24);
- size_t consumed = *size + 6;
- if (consumed > *len) {
- return false;
- }
- *src = *src + consumed;
- *len = *len - consumed;
- return true;
-}
-
-bool parsePFB(const uint8_t* src, size_t size, size_t* headerLen,
- size_t* dataLen, size_t* trailerLen) {
- const uint8_t* srcPtr = src;
- size_t remaining = size;
-
- return parsePFBSection(&srcPtr, &remaining, 1, headerLen) &&
- parsePFBSection(&srcPtr, &remaining, 2, dataLen) &&
- parsePFBSection(&srcPtr, &remaining, 1, trailerLen) &&
- parsePFBSection(&srcPtr, &remaining, 3, nullptr);
-}
-
-/* The sections of a PFA file are implicitly defined. The body starts
- * after the line containing "eexec," and the trailer starts with 512
- * literal 0's followed by "cleartomark" (plus arbitrary white space).
- *
- * This function assumes that src is NUL terminated, but the NUL
- * termination is not included in size.
- *
- */
-bool parsePFA(const char* src, size_t size, size_t* headerLen,
- size_t* hexDataLen, size_t* dataLen, size_t* trailerLen) {
- const char* end = src + size;
-
- const char* dataPos = strstr(src, "eexec");
- if (!dataPos) {
- return false;
- }
- dataPos += strlen("eexec");
- while ((*dataPos == '\n' || *dataPos == '\r' || *dataPos == ' ') &&
- dataPos < end) {
- dataPos++;
- }
- *headerLen = dataPos - src;
-
- const char* trailerPos = strstr(dataPos, "cleartomark");
- if (!trailerPos) {
- return false;
- }
- int zeroCount = 0;
- for (trailerPos--; trailerPos > dataPos && zeroCount < 512; trailerPos--) {
- if (*trailerPos == '\n' || *trailerPos == '\r' || *trailerPos == ' ') {
- continue;
- } else if (*trailerPos == '0') {
- zeroCount++;
- } else {
- return false;
- }
- }
- if (zeroCount != 512) {
- return false;
- }
-
- *hexDataLen = trailerPos - src - *headerLen;
- *trailerLen = size - *headerLen - *hexDataLen;
-
- // Verify that the data section is hex encoded and count the bytes.
- int nibbles = 0;
- for (; dataPos < trailerPos; dataPos++) {
- if (isspace(*dataPos)) {
- continue;
- }
- if (!isxdigit(*dataPos)) {
- return false;
- }
- nibbles++;
- }
- *dataLen = (nibbles + 1) / 2;
-
- return true;
-}
-
-int8_t hexToBin(uint8_t c) {
- if (!isxdigit(c)) {
- return -1;
- } else if (c <= '9') {
- return c - '0';
- } else if (c <= 'F') {
- return c - 'A' + 10;
- } else if (c <= 'f') {
- return c - 'a' + 10;
- }
- return -1;
-}
-
-static sk_sp<SkData> handle_type1_stream(SkStream* srcStream, size_t* headerLen,
- size_t* dataLen, size_t* trailerLen) {
- // srcStream may be backed by a file or a unseekable fd, so we may not be
- // able to use skip(), rewind(), or getMemoryBase(). read()ing through
- // the input only once is doable, but very ugly. Furthermore, it'd be nice
- // if the data was NUL terminated so that we can use strstr() to search it.
- // Make as few copies as possible given these constraints.
- SkDynamicMemoryWStream dynamicStream;
- std::unique_ptr<SkMemoryStream> staticStream;
- sk_sp<SkData> data;
- const uint8_t* src;
- size_t srcLen;
- if ((srcLen = srcStream->getLength()) > 0) {
- staticStream.reset(new SkMemoryStream(srcLen + 1));
- src = (const uint8_t*)staticStream->getMemoryBase();
- if (srcStream->getMemoryBase() != nullptr) {
- memcpy((void *)src, srcStream->getMemoryBase(), srcLen);
- } else {
- size_t read = 0;
- while (read < srcLen) {
- size_t got = srcStream->read((void *)staticStream->getAtPos(),
- srcLen - read);
- if (got == 0) {
- return nullptr;
- }
- read += got;
- staticStream->seek(read);
- }
- }
- ((uint8_t *)src)[srcLen] = 0;
- } else {
- static const size_t kBufSize = 4096;
- uint8_t buf[kBufSize];
- size_t amount;
- while ((amount = srcStream->read(buf, kBufSize)) > 0) {
- dynamicStream.write(buf, amount);
- }
- amount = 0;
- dynamicStream.write(&amount, 1); // nullptr terminator.
- data.reset(dynamicStream.copyToData());
- src = data->bytes();
- srcLen = data->size() - 1;
- }
-
- if (parsePFB(src, srcLen, headerLen, dataLen, trailerLen)) {
- static const int kPFBSectionHeaderLength = 6;
- const size_t length = *headerLen + *dataLen + *trailerLen;
- SkASSERT(length > 0);
- SkASSERT(length + (2 * kPFBSectionHeaderLength) <= srcLen);
-
- sk_sp<SkData> data(SkData::MakeUninitialized(length));
-
- const uint8_t* const srcHeader = src + kPFBSectionHeaderLength;
- // There is a six-byte section header before header and data
- // (but not trailer) that we're not going to copy.
- const uint8_t* const srcData = srcHeader + *headerLen + kPFBSectionHeaderLength;
- const uint8_t* const srcTrailer = srcData + *headerLen;
-
- uint8_t* const resultHeader = (uint8_t*)data->writable_data();
- uint8_t* const resultData = resultHeader + *headerLen;
- uint8_t* const resultTrailer = resultData + *dataLen;
-
- SkASSERT(resultTrailer + *trailerLen == resultHeader + length);
-
- memcpy(resultHeader, srcHeader, *headerLen);
- memcpy(resultData, srcData, *dataLen);
- memcpy(resultTrailer, srcTrailer, *trailerLen);
-
- return data;
- }
-
- // A PFA has to be converted for PDF.
- size_t hexDataLen;
- if (parsePFA((const char*)src, srcLen, headerLen, &hexDataLen, dataLen,
- trailerLen)) {
- const size_t length = *headerLen + *dataLen + *trailerLen;
- SkASSERT(length > 0);
- SkAutoTMalloc<uint8_t> buffer(length);
-
- memcpy(buffer.get(), src, *headerLen);
- uint8_t* const resultData = &(buffer[SkToInt(*headerLen)]);
-
- const uint8_t* hexData = src + *headerLen;
- const uint8_t* trailer = hexData + hexDataLen;
- size_t outputOffset = 0;
- uint8_t dataByte = 0; // To hush compiler.
- bool highNibble = true;
- for (; hexData < trailer; hexData++) {
- int8_t curNibble = hexToBin(*hexData);
- if (curNibble < 0) {
- continue;
- }
- if (highNibble) {
- dataByte = curNibble << 4;
- highNibble = false;
- } else {
- dataByte |= curNibble;
- highNibble = true;
- resultData[outputOffset++] = dataByte;
- }
- }
- if (!highNibble) {
- resultData[outputOffset++] = dataByte;
- }
- SkASSERT(outputOffset == *dataLen);
-
- uint8_t* const resultTrailer = &(buffer[SkToInt(*headerLen + outputOffset)]);
- memcpy(resultTrailer, src + *headerLen + hexDataLen, *trailerLen);
-
- return SkData::MakeFromMalloc(buffer.release(), length);
- }
- return nullptr;
-}
-
// scale from em-units to base-1000, returning as a SkScalar
-SkScalar scaleFromFontUnits(int16_t val, uint16_t emSize) {
- SkScalar scaled = SkIntToScalar(val);
+SkScalar from_font_units(SkScalar scaled, uint16_t emSize) {
if (emSize == 1000) {
return scaled;
} else {
@@ -494,6 +329,11 @@ SkScalar scaleFromFontUnits(int16_t val, uint16_t emSize) {
}
}
+SkScalar scaleFromFontUnits(int16_t val, uint16_t emSize) {
+ return from_font_units(SkIntToScalar(val), emSize);
+}
+
+
void setGlyphWidthAndBoundingBox(SkScalar width, SkIRect box,
SkWStream* content) {
// Specify width and bounding box for the glyph.
@@ -555,233 +395,6 @@ sk_sp<SkPDFArray> composeAdvanceData(
} // namespace
-static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
- uint16_t firstGlyphID,
- uint16_t lastGlyphID) {
- // 12 dict begin: 12 is an Adobe-suggested value. Shall not change.
- // It's there to prevent old version Adobe Readers from malfunctioning.
- const char* kHeader =
- "/CIDInit /ProcSet findresource begin\n"
- "12 dict begin\n"
- "begincmap\n";
- cmap->writeText(kHeader);
-
- // The /CIDSystemInfo must be consistent to the one in
- // SkPDFFont::populateCIDFont().
- // We can not pass over the system info object here because the format is
- // different. This is not a reference object.
- const char* kSysInfo =
- "/CIDSystemInfo\n"
- "<< /Registry (Adobe)\n"
- "/Ordering (UCS)\n"
- "/Supplement 0\n"
- ">> def\n";
- cmap->writeText(kSysInfo);
-
- // The CMapName must be consistent to /CIDSystemInfo above.
- // /CMapType 2 means ToUnicode.
- // Codespace range just tells the PDF processor the valid range.
- const char* kTypeInfoHeader =
- "/CMapName /Adobe-Identity-UCS def\n"
- "/CMapType 2 def\n"
- "1 begincodespacerange\n";
- cmap->writeText(kTypeInfoHeader);
-
- // e.g. "<0000> <FFFF>\n"
- SkString range;
- range.appendf("<%04X> <%04X>\n", firstGlyphID, lastGlyphID);
- cmap->writeText(range.c_str());
-
- const char* kTypeInfoFooter = "endcodespacerange\n";
- cmap->writeText(kTypeInfoFooter);
-}
-
-static void append_cmap_footer(SkDynamicMemoryWStream* cmap) {
- const char* kFooter =
- "endcmap\n"
- "CMapName currentdict /CMap defineresource pop\n"
- "end\n"
- "end";
- cmap->writeText(kFooter);
-}
-
-struct BFChar {
- uint16_t fGlyphId;
- SkUnichar fUnicode;
-};
-
-struct BFRange {
- uint16_t fStart;
- uint16_t fEnd;
- SkUnichar fUnicode;
-};
-
-static void write_utf16be(SkDynamicMemoryWStream* wStream, SkUnichar utf32) {
- uint16_t utf16[2] = {0, 0};
- size_t len = SkUTF16_FromUnichar(utf32, utf16);
- SkASSERT(len == 1 || len == 2);
- SkPDFUtils::WriteUInt16BE(wStream, utf16[0]);
- if (len == 2) {
- SkPDFUtils::WriteUInt16BE(wStream, utf16[1]);
- }
-}
-
-static void append_bfchar_section(const SkTDArray<BFChar>& bfchar,
- SkDynamicMemoryWStream* cmap) {
- // PDF spec defines that every bf* list can have at most 100 entries.
- for (int i = 0; i < bfchar.count(); i += 100) {
- int count = bfchar.count() - i;
- count = SkMin32(count, 100);
- cmap->writeDecAsText(count);
- cmap->writeText(" beginbfchar\n");
- for (int j = 0; j < count; ++j) {
- cmap->writeText("<");
- SkPDFUtils::WriteUInt16BE(cmap, bfchar[i + j].fGlyphId);
- cmap->writeText("> <");
- write_utf16be(cmap, bfchar[i + j].fUnicode);
- cmap->writeText(">\n");
- }
- cmap->writeText("endbfchar\n");
- }
-}
-
-static void append_bfrange_section(const SkTDArray<BFRange>& bfrange,
- SkDynamicMemoryWStream* cmap) {
- // PDF spec defines that every bf* list can have at most 100 entries.
- for (int i = 0; i < bfrange.count(); i += 100) {
- int count = bfrange.count() - i;
- count = SkMin32(count, 100);
- cmap->writeDecAsText(count);
- cmap->writeText(" beginbfrange\n");
- for (int j = 0; j < count; ++j) {
- cmap->writeText("<");
- SkPDFUtils::WriteUInt16BE(cmap, bfrange[i + j].fStart);
- cmap->writeText("> <");
- SkPDFUtils::WriteUInt16BE(cmap, bfrange[i + j].fEnd);
- cmap->writeText("> <");
- write_utf16be(cmap, bfrange[i + j].fUnicode);
- cmap->writeText(">\n");
- }
- cmap->writeText("endbfrange\n");
- }
-}
-
-// Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe
-// Technote 5014.
-// The function is not static so we can test it in unit tests.
-//
-// Current implementation guarantees bfchar and bfrange entries do not overlap.
-//
-// Current implementation does not attempt aggresive optimizations against
-// following case because the specification is not clear.
-//
-// 4 beginbfchar 1 beginbfchar
-// <0003> <0013> <0020> <0014>
-// <0005> <0015> to endbfchar
-// <0007> <0017> 1 beginbfrange
-// <0020> <0014> <0003> <0007> <0013>
-// endbfchar endbfrange
-//
-// Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may
-// overlap, but succeeding maps supersede preceding maps."
-//
-// In case of searching text in PDF, bfrange will have higher precedence so
-// typing char id 0x0014 in search box will get glyph id 0x0004 first. However,
-// the spec does not mention how will this kind of conflict being resolved.
-//
-// For the worst case (having 65536 continuous unicode and we use every other
-// one of them), the possible savings by aggressive optimization is 416KB
-// pre-compressed and does not provide enough motivation for implementation.
-
-// TODO(halcanary): this should be in a header so that it is separately testable
-// ( see caller in tests/ToUnicode.cpp )
-void append_cmap_sections(const SkTDArray<SkUnichar>& glyphToUnicode,
- const SkPDFGlyphSet* subset,
- SkDynamicMemoryWStream* cmap,
- bool multiByteGlyphs,
- uint16_t firstGlyphID,
- uint16_t lastGlyphID);
-
-void append_cmap_sections(const SkTDArray<SkUnichar>& glyphToUnicode,
- const SkPDFGlyphSet* subset,
- SkDynamicMemoryWStream* cmap,
- bool multiByteGlyphs,
- uint16_t firstGlyphID,
- uint16_t lastGlyphID) {
- if (glyphToUnicode.isEmpty()) {
- return;
- }
- int glyphOffset = 0;
- if (!multiByteGlyphs) {
- glyphOffset = firstGlyphID - 1;
- }
-
- SkTDArray<BFChar> bfcharEntries;
- SkTDArray<BFRange> bfrangeEntries;
-
- BFRange currentRangeEntry = {0, 0, 0};
- bool rangeEmpty = true;
- const int limit =
- SkMin32(lastGlyphID + 1, glyphToUnicode.count()) - glyphOffset;
-
- for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) {
- bool inSubset = i < limit &&
- (subset == nullptr || subset->has(i + glyphOffset));
- if (!rangeEmpty) {
- // PDF spec requires bfrange not changing the higher byte,
- // e.g. <1035> <10FF> <2222> is ok, but
- // <1035> <1100> <2222> is no good
- bool inRange =
- i == currentRangeEntry.fEnd + 1 &&
- i >> 8 == currentRangeEntry.fStart >> 8 &&
- i < limit &&
- glyphToUnicode[i + glyphOffset] ==
- currentRangeEntry.fUnicode + i - currentRangeEntry.fStart;
- if (!inSubset || !inRange) {
- if (currentRangeEntry.fEnd > currentRangeEntry.fStart) {
- bfrangeEntries.push(currentRangeEntry);
- } else {
- BFChar* entry = bfcharEntries.append();
- entry->fGlyphId = currentRangeEntry.fStart;
- entry->fUnicode = currentRangeEntry.fUnicode;
- }
- rangeEmpty = true;
- }
- }
- if (inSubset) {
- currentRangeEntry.fEnd = i;
- if (rangeEmpty) {
- currentRangeEntry.fStart = i;
- currentRangeEntry.fUnicode = glyphToUnicode[i + glyphOffset];
- rangeEmpty = false;
- }
- }
- }
-
- // The spec requires all bfchar entries for a font must come before bfrange
- // entries.
- append_bfchar_section(bfcharEntries, cmap);
- append_bfrange_section(bfrangeEntries, cmap);
-}
-
-static sk_sp<SkPDFStream> generate_tounicode_cmap(
- const SkTDArray<SkUnichar>& glyphToUnicode,
- const SkPDFGlyphSet* subset,
- bool multiByteGlyphs,
- uint16_t firstGlyphID,
- uint16_t lastGlyphID) {
- SkDynamicMemoryWStream cmap;
- if (multiByteGlyphs) {
- append_tounicode_header(&cmap, firstGlyphID, lastGlyphID);
- } else {
- append_tounicode_header(&cmap, 1, lastGlyphID - firstGlyphID + 1);
- }
- append_cmap_sections(glyphToUnicode, subset, &cmap, multiByteGlyphs,
- firstGlyphID, lastGlyphID);
- append_cmap_footer(&cmap);
- return sk_make_sp<SkPDFStream>(
- std::unique_ptr<SkStreamAsset>(cmap.detachAsStream()));
-}
///////////////////////////////////////////////////////////////////////////////
// class SkPDFGlyphSet
@@ -1069,11 +682,11 @@ void SkPDFFont::populateToUnicodeTable(const SkPDFGlyphSet* subset) {
return;
}
this->insertObjRef("ToUnicode",
- generate_tounicode_cmap(fFontInfo->fGlyphToUnicode,
- subset,
- multiByteGlyphs(),
- firstGlyphID(),
- lastGlyphID()));
+ SkPDFMakeToUnicodeCmap(fFontInfo->fGlyphToUnicode,
+ subset,
+ multiByteGlyphs(),
+ firstGlyphID(),
+ lastGlyphID()));
}
///////////////////////////////////////////////////////////////////////////////
@@ -1274,6 +887,20 @@ void set_glyph_widths(SkTypeface* tf,
}
}
+sk_sp<const SkAdvancedTypefaceMetrics> SkPDFFont::GetFontMetricsWithGlyphNames(
+ SkTypeface* typeface, uint32_t* glyphs, uint32_t glyphsCount) {
+ return sk_sp<const SkAdvancedTypefaceMetrics>(
+ typeface->getAdvancedTypefaceMetrics(
+ SkTypeface::kGlyphNames_PerGlyphInfo, glyphs, glyphsCount));
+}
+
+sk_sp<const SkAdvancedTypefaceMetrics> SkPDFFont::GetFontMetricsWithToUnicode(
+ SkTypeface* typeface, uint32_t* glyphs, uint32_t glyphsCount) {
+ return sk_sp<const SkAdvancedTypefaceMetrics>(
+ typeface->getAdvancedTypefaceMetrics(
+ SkTypeface::kToUnicode_PerGlyphInfo, glyphs, glyphsCount));
+}
+
bool SkPDFCIDFont::populate(const SkPDFGlyphSet* subset) {
// Generate new font metrics with advance info for true type fonts.
// Generate glyph id array.
@@ -1285,13 +912,12 @@ bool SkPDFCIDFont::populate(const SkPDFGlyphSet* subset) {
subset->exportTo(&glyphIDs);
}
if (fontInfo()->fType == SkAdvancedTypefaceMetrics::kTrueType_Font) {
- SkTypeface::PerGlyphInfo info = SkTypeface::kGlyphNames_PerGlyphInfo;
uint32_t* glyphs = (glyphIDs.count() == 0) ? nullptr : glyphIDs.begin();
uint32_t glyphsCount = glyphs ? glyphIDs.count() : 0;
- sk_sp<const SkAdvancedTypefaceMetrics> fontMetrics(
- typeface()->getAdvancedTypefaceMetrics(info, glyphs, glyphsCount));
- setFontInfo(fontMetrics.get());
- addFontDescriptor(0, &glyphIDs);
+ sk_sp<const SkAdvancedTypefaceMetrics> fontMetrics =
+ SkPDFFont::GetFontMetricsWithGlyphNames(this->typeface(), glyphs, glyphsCount);
+ this->setFontInfo(fontMetrics.get());
+ this->addFontDescriptor(0, &glyphIDs);
} else {
// Other CID fonts
addFontDescriptor(0, nullptr);
@@ -1357,13 +983,9 @@ bool SkPDFType1Font::addFontDescriptor(int16_t defaultWidth) {
size_t data SK_INIT_TO_AVOID_WARNING;
size_t trailer SK_INIT_TO_AVOID_WARNING;
std::unique_ptr<SkStreamAsset> rawFontData(typeface()->openStream(&ttcIndex));
- SkASSERT(rawFontData);
- SkASSERT(rawFontData->getLength() > 0);
- if (!rawFontData || 0 == rawFontData->getLength()) {
- return false;
- }
- sk_sp<SkData> fontData(handle_type1_stream(rawFontData.get(), &header, &data, &trailer));
- if (fontData.get() == nullptr) {
+ sk_sp<SkData> fontData = SkPDFConvertType1FontStream(std::move(rawFontData),
+ &header, &data, &trailer);
+ if (!fontData) {
return false;
}
SkASSERT(this->canEmbed());
@@ -1379,43 +1001,47 @@ bool SkPDFType1Font::addFontDescriptor(int16_t defaultWidth) {
}
bool SkPDFType1Font::populate(int16_t glyphID) {
+ this->insertName("Subtype", "Type1");
+ this->insertName("BaseFont", fontInfo()->fFontName);
adjustGlyphRangeForSingleByteEncoding(glyphID);
-
- int16_t defaultWidth = 0;
- const AdvanceMetric* widthRangeEntry = nullptr;
+ SkGlyphID firstGlyphID = this->firstGlyphID();
+ SkGlyphID lastGlyphID = this->lastGlyphID();
+
+ // glyphCount not including glyph 0
+ unsigned glyphCount = 1 + lastGlyphID - firstGlyphID;
+ SkASSERT(glyphCount > 0 && glyphCount <= 255);
+ this->insertInt("FirstChar", (size_t)0);
+ this->insertInt("LastChar", (size_t)glyphCount);
{
- SkSinglyLinkedList<AdvanceMetric> tmpMetrics;
- set_glyph_widths(this->typeface(), nullptr, &tmpMetrics);
- for (const auto& widthEntry : tmpMetrics) {
- switch (widthEntry.fType) {
- case AdvanceMetric::kDefault:
- defaultWidth = widthEntry.fAdvance[0];
- break;
- case AdvanceMetric::kRun:
- SkASSERT(false);
- break;
- case AdvanceMetric::kRange:
- SkASSERT(widthRangeEntry == nullptr);
- widthRangeEntry = &widthEntry;
- break;
- }
+ SkPaint tmpPaint;
+ tmpPaint.setHinting(SkPaint::kNo_Hinting);
+ tmpPaint.setTypeface(sk_ref_sp(this->typeface()));
+ tmpPaint.setTextSize((SkScalar)this->typeface()->getUnitsPerEm());
+ SkAutoGlyphCache glyphCache(tmpPaint, nullptr, nullptr);
+ auto widths = sk_make_sp<SkPDFArray>();
+ SkScalar advance = glyphCache->getGlyphIDAdvance(0).fAdvanceX;
+ const uint16_t emSize = this->fontInfo()->fEmSize;
+ widths->appendScalar(from_font_units(advance, emSize));
+ for (unsigned gID = firstGlyphID; gID <= lastGlyphID; gID++) {
+ advance = glyphCache->getGlyphIDAdvance(gID).fAdvanceX;
+ widths->appendScalar(from_font_units(advance, emSize));
}
+ this->insertObject("Widths", std::move(widths));
}
-
- if (!addFontDescriptor(defaultWidth)) {
+ if (!addFontDescriptor(0)) {
return false;
}
-
- insertName("Subtype", "Type1");
- insertName("BaseFont", fontInfo()->fFontName);
-
- addWidthInfoFromRange(defaultWidth, widthRangeEntry);
auto encDiffs = sk_make_sp<SkPDFArray>();
- encDiffs->reserve(lastGlyphID() - firstGlyphID() + 2);
- encDiffs->appendInt(1);
- SkASSERT(this->fontInfo()->fGlyphNames.count() >= this->lastGlyphID());
- for (int gID = firstGlyphID(); gID <= lastGlyphID(); gID++) {
- encDiffs->appendName(fontInfo()->fGlyphNames[gID].c_str());
+ encDiffs->reserve(lastGlyphID - firstGlyphID + 3);
+ encDiffs->appendInt(0);
+ const SkTArray<SkString>& glyphNames = this->fontInfo()->fGlyphNames;
+ SkASSERT(glyphNames.count() > lastGlyphID);
+ encDiffs->appendName(glyphNames[0].c_str());
+ const SkString unknown("UNKNOWN");
+ for (int gID = firstGlyphID; gID <= lastGlyphID; gID++) {
+ const bool valid = gID < glyphNames.count() && !glyphNames[gID].isEmpty();
+ const SkString& name = valid ? glyphNames[gID] : unknown;
+ encDiffs->appendName(name);
}
auto encoding = sk_make_sp<SkPDFDict>("Encoding");
@@ -1424,38 +1050,6 @@ bool SkPDFType1Font::populate(int16_t glyphID) {
return true;
}
-void SkPDFType1Font::addWidthInfoFromRange(
- int16_t defaultWidth,
- const AdvanceMetric* widthRangeEntry) {
- auto widthArray = sk_make_sp<SkPDFArray>();
- int firstChar = 0;
- if (widthRangeEntry) {
- const uint16_t emSize = fontInfo()->fEmSize;
- int startIndex = firstGlyphID() - widthRangeEntry->fStartId;
- int endIndex = startIndex + lastGlyphID() - firstGlyphID() + 1;
- if (startIndex < 0)
- startIndex = 0;
- if (endIndex > widthRangeEntry->fAdvance.count())
- endIndex = widthRangeEntry->fAdvance.count();
- if (widthRangeEntry->fStartId == 0) {
- widthArray->appendScalar(
- scaleFromFontUnits(widthRangeEntry->fAdvance[0], emSize));
- } else {
- firstChar = startIndex + widthRangeEntry->fStartId;
- }
- for (int i = startIndex; i < endIndex; i++) {
- widthArray->appendScalar(
- scaleFromFontUnits(widthRangeEntry->fAdvance[i], emSize));
- }
- } else {
- widthArray->appendScalar(
- scaleFromFontUnits(defaultWidth, 1000));
- }
- this->insertInt("FirstChar", firstChar);
- this->insertInt("LastChar", firstChar + widthArray->size() - 1);
- this->insertObject("Widths", std::move(widthArray));
-}
-
///////////////////////////////////////////////////////////////////////////////
// class SkPDFType3Font
///////////////////////////////////////////////////////////////////////////////
diff --git a/src/pdf/SkPDFFont.h b/src/pdf/SkPDFFont.h
index 0d73745984..c55f650769 100644
--- a/src/pdf/SkPDFFont.h
+++ b/src/pdf/SkPDFFont.h
@@ -120,6 +120,12 @@ public:
SkTypeface* typeface,
uint16_t glyphID);
+ static sk_sp<const SkAdvancedTypefaceMetrics> GetFontMetricsWithGlyphNames(
+ SkTypeface*, uint32_t* glyphs, uint32_t glyphsCount);
+
+ static sk_sp<const SkAdvancedTypefaceMetrics> GetFontMetricsWithToUnicode(
+ SkTypeface*, uint32_t* glyphs, uint32_t glyphsCount);
+
/** Subset the font based on usage set. Returns a SkPDFFont instance with
* subset.
* @param usage Glyph subset requested.
diff --git a/src/pdf/SkPDFFontImpl.h b/src/pdf/SkPDFFontImpl.h
deleted file mode 100644
index 05774de6b3..0000000000
--- a/src/pdf/SkPDFFontImpl.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright 2011 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-
-#ifndef SkPDFFontImpl_DEFINED
-#define SkPDFFontImpl_DEFINED
-
-#include "SkPDFFont.h"
-
-class SkPDFType0Font final : public SkPDFFont {
-public:
- virtual ~SkPDFType0Font();
- bool multiByteGlyphs() const override { return true; }
- SkPDFFont* getFontSubset(const SkPDFGlyphSet* usage) override;
-#ifdef SK_DEBUG
- void emitObject(SkWStream*,
- const SkPDFObjNumMap&,
- const SkPDFSubstituteMap&) const override;
-#endif
-
-private:
- friend class SkPDFFont; // to access the constructor
-#ifdef SK_DEBUG
- bool fPopulated;
- typedef SkPDFDict INHERITED;
-#endif
-
- SkPDFType0Font(const SkAdvancedTypefaceMetrics* info,
- SkTypeface* typeface);
-
- bool populate(const SkPDFGlyphSet* subset);
-};
-
-class SkPDFCIDFont final : public SkPDFFont {
-public:
- virtual ~SkPDFCIDFont();
- virtual bool multiByteGlyphs() const { return true; }
-
-private:
- friend class SkPDFType0Font; // to access the constructor
-
- SkPDFCIDFont(const SkAdvancedTypefaceMetrics* info,
- SkTypeface* typeface,
- const SkPDFGlyphSet* subset);
-
- bool populate(const SkPDFGlyphSet* subset);
- bool addFontDescriptor(int16_t defaultWidth,
- const SkTDArray<uint32_t>* subset);
-};
-
-struct AdvanceMetric;
-
-class SkPDFType1Font final : public SkPDFFont {
-public:
- virtual ~SkPDFType1Font();
- virtual bool multiByteGlyphs() const { return false; }
-
-private:
- friend class SkPDFFont; // to access the constructor
-
- SkPDFType1Font(const SkAdvancedTypefaceMetrics* info,
- SkTypeface* typeface,
- uint16_t glyphID,
- SkPDFDict* relatedFontDescriptor);
-
- bool populate(int16_t glyphID);
- bool addFontDescriptor(int16_t defaultWidth);
- void addWidthInfoFromRange(int16_t defaultWidth,
- const AdvanceMetric* widthRangeEntry);
-};
-
-class SkPDFType3Font final : public SkPDFFont {
-public:
- virtual ~SkPDFType3Font();
- virtual bool multiByteGlyphs() const { return false; }
-
-private:
- friend class SkPDFFont; // to access the constructor
-
- SkPDFType3Font(const SkAdvancedTypefaceMetrics* info,
- SkTypeface* typeface,
- uint16_t glyphID);
-
- bool populate(uint16_t glyphID);
-};
-
-#endif
diff --git a/src/pdf/SkPDFMakeToUnicodeCmap.cpp b/src/pdf/SkPDFMakeToUnicodeCmap.cpp
new file mode 100644
index 0000000000..6fd8b1ca16
--- /dev/null
+++ b/src/pdf/SkPDFMakeToUnicodeCmap.cpp
@@ -0,0 +1,230 @@
+/*
+ * Copyright 2011 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "SkPDFMakeToUnicodeCmap.h"
+#include "SkPDFUtils.h"
+#include "SkUtils.h"
+
+static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
+ SkGlyphID firstGlyphID,
+ SkGlyphID lastGlyphID) {
+ // 12 dict begin: 12 is an Adobe-suggested value. Shall not change.
+ // It's there to prevent old version Adobe Readers from malfunctioning.
+ const char* kHeader =
+ "/CIDInit /ProcSet findresource begin\n"
+ "12 dict begin\n"
+ "begincmap\n";
+ cmap->writeText(kHeader);
+
+ // The /CIDSystemInfo must be consistent to the one in
+ // SkPDFFont::populateCIDFont().
+ // We can not pass over the system info object here because the format is
+ // different. This is not a reference object.
+ const char* kSysInfo =
+ "/CIDSystemInfo\n"
+ "<< /Registry (Adobe)\n"
+ "/Ordering (UCS)\n"
+ "/Supplement 0\n"
+ ">> def\n";
+ cmap->writeText(kSysInfo);
+
+ // The CMapName must be consistent to /CIDSystemInfo above.
+ // /CMapType 2 means ToUnicode.
+ // Codespace range just tells the PDF processor the valid range.
+ const char* kTypeInfoHeader =
+ "/CMapName /Adobe-Identity-UCS def\n"
+ "/CMapType 2 def\n"
+ "1 begincodespacerange\n";
+ cmap->writeText(kTypeInfoHeader);
+
+ // e.g. "<0000> <FFFF>\n"
+ SkString range;
+ range.appendf("<%04X> <%04X>\n", firstGlyphID, lastGlyphID);
+ cmap->writeText(range.c_str());
+
+ const char* kTypeInfoFooter = "endcodespacerange\n";
+ cmap->writeText(kTypeInfoFooter);
+}
+
+static void append_cmap_footer(SkDynamicMemoryWStream* cmap) {
+ const char kFooter[] =
+ "endcmap\n"
+ "CMapName currentdict /CMap defineresource pop\n"
+ "end\n"
+ "end";
+ cmap->writeText(kFooter);
+}
+
+namespace {
+struct BFChar {
+ SkGlyphID fGlyphId;
+ SkUnichar fUnicode;
+};
+
+struct BFRange {
+ SkGlyphID fStart;
+ SkGlyphID fEnd;
+ SkUnichar fUnicode;
+};
+} // namespace
+
+static void write_utf16be(SkDynamicMemoryWStream* wStream, SkUnichar utf32) {
+ SkGlyphID utf16[2] = {0, 0};
+ size_t len = SkUTF16_FromUnichar(utf32, utf16);
+ SkASSERT(len == 1 || len == 2);
+ SkPDFUtils::WriteUInt16BE(wStream, utf16[0]);
+ if (len == 2) {
+ SkPDFUtils::WriteUInt16BE(wStream, utf16[1]);
+ }
+}
+
+static void append_bfchar_section(const SkTDArray<BFChar>& bfchar,
+ SkDynamicMemoryWStream* cmap) {
+ // PDF spec defines that every bf* list can have at most 100 entries.
+ for (int i = 0; i < bfchar.count(); i += 100) {
+ int count = bfchar.count() - i;
+ count = SkMin32(count, 100);
+ cmap->writeDecAsText(count);
+ cmap->writeText(" beginbfchar\n");
+ for (int j = 0; j < count; ++j) {
+ cmap->writeText("<");
+ SkPDFUtils::WriteUInt16BE(cmap, bfchar[i + j].fGlyphId);
+ cmap->writeText("> <");
+ write_utf16be(cmap, bfchar[i + j].fUnicode);
+ cmap->writeText(">\n");
+ }
+ cmap->writeText("endbfchar\n");
+ }
+}
+
+static void append_bfrange_section(const SkTDArray<BFRange>& bfrange,
+ SkDynamicMemoryWStream* cmap) {
+ // PDF spec defines that every bf* list can have at most 100 entries.
+ for (int i = 0; i < bfrange.count(); i += 100) {
+ int count = bfrange.count() - i;
+ count = SkMin32(count, 100);
+ cmap->writeDecAsText(count);
+ cmap->writeText(" beginbfrange\n");
+ for (int j = 0; j < count; ++j) {
+ cmap->writeText("<");
+ SkPDFUtils::WriteUInt16BE(cmap, bfrange[i + j].fStart);
+ cmap->writeText("> <");
+ SkPDFUtils::WriteUInt16BE(cmap, bfrange[i + j].fEnd);
+ cmap->writeText("> <");
+ write_utf16be(cmap, bfrange[i + j].fUnicode);
+ cmap->writeText(">\n");
+ }
+ cmap->writeText("endbfrange\n");
+ }
+}
+
+// Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe
+// Technote 5014.
+// The function is not static so we can test it in unit tests.
+//
+// Current implementation guarantees bfchar and bfrange entries do not overlap.
+//
+// Current implementation does not attempt aggresive optimizations against
+// following case because the specification is not clear.
+//
+// 4 beginbfchar 1 beginbfchar
+// <0003> <0013> <0020> <0014>
+// <0005> <0015> to endbfchar
+// <0007> <0017> 1 beginbfrange
+// <0020> <0014> <0003> <0007> <0013>
+// endbfchar endbfrange
+//
+// Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may
+// overlap, but succeeding maps supersede preceding maps."
+//
+// In case of searching text in PDF, bfrange will have higher precedence so
+// typing char id 0x0014 in search box will get glyph id 0x0004 first. However,
+// the spec does not mention how will this kind of conflict being resolved.
+//
+// For the worst case (having 65536 continuous unicode and we use every other
+// one of them), the possible savings by aggressive optimization is 416KB
+// pre-compressed and does not provide enough motivation for implementation.
+void SkPDFAppendCmapSections(const SkTDArray<SkUnichar>& glyphToUnicode,
+ const SkPDFGlyphSet* subset,
+ SkDynamicMemoryWStream* cmap,
+ bool multiByteGlyphs,
+ SkGlyphID firstGlyphID,
+ SkGlyphID lastGlyphID) {
+ if (glyphToUnicode.isEmpty()) {
+ return;
+ }
+ int glyphOffset = 0;
+ if (!multiByteGlyphs) {
+ glyphOffset = firstGlyphID - 1;
+ }
+
+ SkTDArray<BFChar> bfcharEntries;
+ SkTDArray<BFRange> bfrangeEntries;
+
+ BFRange currentRangeEntry = {0, 0, 0};
+ bool rangeEmpty = true;
+ const int limit =
+ SkMin32(lastGlyphID + 1, glyphToUnicode.count()) - glyphOffset;
+
+ for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) {
+ bool inSubset = i < limit &&
+ (subset == nullptr || subset->has(i + glyphOffset));
+ if (!rangeEmpty) {
+ // PDF spec requires bfrange not changing the higher byte,
+ // e.g. <1035> <10FF> <2222> is ok, but
+ // <1035> <1100> <2222> is no good
+ bool inRange =
+ i == currentRangeEntry.fEnd + 1 &&
+ i >> 8 == currentRangeEntry.fStart >> 8 &&
+ i < limit &&
+ glyphToUnicode[i + glyphOffset] ==
+ currentRangeEntry.fUnicode + i - currentRangeEntry.fStart;
+ if (!inSubset || !inRange) {
+ if (currentRangeEntry.fEnd > currentRangeEntry.fStart) {
+ bfrangeEntries.push(currentRangeEntry);
+ } else {
+ BFChar* entry = bfcharEntries.append();
+ entry->fGlyphId = currentRangeEntry.fStart;
+ entry->fUnicode = currentRangeEntry.fUnicode;
+ }
+ rangeEmpty = true;
+ }
+ }
+ if (inSubset) {
+ currentRangeEntry.fEnd = i;
+ if (rangeEmpty) {
+ currentRangeEntry.fStart = i;
+ currentRangeEntry.fUnicode = glyphToUnicode[i + glyphOffset];
+ rangeEmpty = false;
+ }
+ }
+ }
+
+ // The spec requires all bfchar entries for a font must come before bfrange
+ // entries.
+ append_bfchar_section(bfcharEntries, cmap);
+ append_bfrange_section(bfrangeEntries, cmap);
+}
+
+sk_sp<SkPDFStream> SkPDFMakeToUnicodeCmap(
+ const SkTDArray<SkUnichar>& glyphToUnicode,
+ const SkPDFGlyphSet* subset,
+ bool multiByteGlyphs,
+ SkGlyphID firstGlyphID,
+ SkGlyphID lastGlyphID) {
+ SkDynamicMemoryWStream cmap;
+ if (multiByteGlyphs) {
+ append_tounicode_header(&cmap, firstGlyphID, lastGlyphID);
+ } else {
+ append_tounicode_header(&cmap, 1, lastGlyphID - firstGlyphID + 1);
+ }
+ SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs,
+ firstGlyphID, lastGlyphID);
+ append_cmap_footer(&cmap);
+ return sk_make_sp<SkPDFStream>(
+ std::unique_ptr<SkStreamAsset>(cmap.detachAsStream()));
+}
diff --git a/src/pdf/SkPDFMakeToUnicodeCmap.h b/src/pdf/SkPDFMakeToUnicodeCmap.h
new file mode 100644
index 0000000000..1bd8930742
--- /dev/null
+++ b/src/pdf/SkPDFMakeToUnicodeCmap.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#ifndef SkPDFMakeToUnicodeCmap_DEFINED
+#define SkPDFMakeToUnicodeCmap_DEFINED
+
+#include "SkTDArray.h"
+#include "SkPDFFont.h"
+#include "SkStream.h"
+
+sk_sp<SkPDFStream> SkPDFMakeToUnicodeCmap(
+ const SkTDArray<SkUnichar>& glyphToUnicode,
+ const SkPDFGlyphSet* subset,
+ bool multiByteGlyphs,
+ SkGlyphID firstGlyphID,
+ SkGlyphID lastGlyphID);
+
+// Exposed for unit testing.
+void SkPDFAppendCmapSections(const SkTDArray<SkUnichar>& glyphToUnicode,
+ const SkPDFGlyphSet* subset,
+ SkDynamicMemoryWStream* cmap,
+ bool multiByteGlyphs,
+ SkGlyphID firstGlyphID,
+ SkGlyphID lastGlyphID);
+
+#endif // SkPDFMakeToUnicodeCmap_DEFINED
diff --git a/tests/PDFGlyphsToUnicodeTest.cpp b/tests/PDFGlyphsToUnicodeTest.cpp
index f0425d94fc..b8157caeea 100644
--- a/tests/PDFGlyphsToUnicodeTest.cpp
+++ b/tests/PDFGlyphsToUnicodeTest.cpp
@@ -7,7 +7,7 @@
#include "SkData.h"
#include "SkPDFFont.h"
-#include "SkPDFTypes.h"
+#include "SkPDFMakeToUnicodeCmap.h"
#include "SkStream.h"
#include "Test.h"
@@ -23,13 +23,6 @@ static bool stream_equals(const SkDynamicMemoryWStream& stream, size_t offset,
return memcmp(data->bytes() + offset, buffer, len) == 0;
}
-void append_cmap_sections(const SkTDArray<SkUnichar>& glyphToUnicode,
- const SkPDFGlyphSet* subset,
- SkDynamicMemoryWStream* cmap,
- bool multiByteGlyphs,
- uint16_t firstGlypthID,
- uint16_t lastGlypthID);
-
DEF_TEST(ToUnicode, reporter) {
SkTDArray<SkUnichar> glyphToUnicode;
SkTDArray<uint16_t> glyphsInSubset;
@@ -73,7 +66,7 @@ DEF_TEST(ToUnicode, reporter) {
SkDynamicMemoryWStream buffer;
subset.set(glyphsInSubset.begin(), glyphsInSubset.count());
- append_cmap_sections(glyphToUnicode, &subset, &buffer, true, 0, 0xFFFF);
+ SkPDFAppendCmapSections(glyphToUnicode, &subset, &buffer, true, 0, 0xFFFF);
char expectedResult[] =
"4 beginbfchar\n\
@@ -95,7 +88,7 @@ endbfrange\n";
// Remove characters and ranges.
buffer.reset();
- append_cmap_sections(glyphToUnicode, &subset, &buffer, true, 8, 0x00FF);
+ SkPDFAppendCmapSections(glyphToUnicode, &subset, &buffer, true, 8, 0x00FF);
char expectedResultChop1[] =
"2 beginbfchar\n\
@@ -113,7 +106,7 @@ endbfrange\n";
// Remove characters from range to downdrade it to one char.
buffer.reset();
- append_cmap_sections(glyphToUnicode, &subset, &buffer, true, 0x00D, 0x00FE);
+ SkPDFAppendCmapSections(glyphToUnicode, &subset, &buffer, true, 0x00D, 0x00FE);
char expectedResultChop2[] =
"2 beginbfchar\n\
@@ -126,7 +119,7 @@ endbfchar\n";
buffer.reset();
- append_cmap_sections(glyphToUnicode, nullptr, &buffer, false, 0xFC, 0x110);
+ SkPDFAppendCmapSections(glyphToUnicode, nullptr, &buffer, false, 0xFC, 0x110);
char expectedResultSingleBytes[] =
"2 beginbfchar\n\
@@ -162,7 +155,7 @@ endbfrange\n";
SkDynamicMemoryWStream buffer2;
subset2.set(glyphsInSubset.begin(), glyphsInSubset.count());
- append_cmap_sections(glyphToUnicode, &subset2, &buffer2, true, 0, 0xffff);
+ SkPDFAppendCmapSections(glyphToUnicode, &subset2, &buffer2, true, 0, 0xffff);
char expectedResult2[] =
"4 beginbfchar\n\