aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/pdf
diff options
context:
space:
mode:
authorGravatar vandebo@chromium.org <vandebo@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81>2011-08-08 22:33:05 +0000
committerGravatar vandebo@chromium.org <vandebo@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81>2011-08-08 22:33:05 +0000
commit04c643b7739cf7e618d9996c3fd1514eee4b27c0 (patch)
tree08a314248f0e589e076bcfb3d2bbb023ecab63d0 /src/pdf
parent14b23c7c1a1a11d1919a077d4e662a093bdc2512 (diff)
Use bfrange to shrink ToUnicode table.
Patch from Arthur Hsu. Original CL: http://codereview.appspot.com/4844043/ BUG=258 Review URL: http://codereview.appspot.com/4808083 git-svn-id: http://skia.googlecode.com/svn/trunk@2075 2bbb7eff-a529-9590-31e7-b0007b416f81
Diffstat (limited to 'src/pdf')
-rw-r--r--src/pdf/SkPDFFont.cpp169
1 files changed, 133 insertions, 36 deletions
diff --git a/src/pdf/SkPDFFont.cpp b/src/pdf/SkPDFFont.cpp
index 1333c9f6c7..ecee5020df 100644
--- a/src/pdf/SkPDFFont.cpp
+++ b/src/pdf/SkPDFFont.cpp
@@ -361,21 +361,6 @@ static void append_tounicode_header(SkDynamicMemoryWStream* cmap) {
cmap->writeText(kTypeInfo);
}
-static void append_cmap_bfchar_table(uint16_t* glyph_id, SkUnichar* unicode,
- size_t count,
- SkDynamicMemoryWStream* cmap) {
- cmap->writeDecAsText(count);
- cmap->writeText(" beginbfchar\n");
- for (size_t i = 0; i < count; ++i) {
- cmap->writeText("<");
- cmap->writeHexAsText(glyph_id[i], 4);
- cmap->writeText("> <");
- cmap->writeHexAsText(unicode[i], 4);
- cmap->writeText(">\n");
- }
- cmap->writeText("endbfchar\n");
-}
-
static void append_cmap_footer(SkDynamicMemoryWStream* cmap) {
const char* kFooter =
"endcmap\n"
@@ -385,38 +370,150 @@ static void append_cmap_footer(SkDynamicMemoryWStream* cmap) {
cmap->writeText(kFooter);
}
-// Generate <bfchar> table according to PDF spec 1.4 and Adobe Technote 5014.
-static void append_cmap_bfchar_sections(
- const SkTDArray<SkUnichar>& glyphUnicode,
- const SkPDFGlyphSet* subset, SkDynamicMemoryWStream* cmap) {
+struct BFChar {
+ uint16_t fGlyphId;
+ SkUnichar fUnicode;
+};
+
+struct BFRange {
+ uint16_t fStart;
+ uint16_t fEnd;
+ SkUnichar fUnicode;
+};
+
+static void append_bfchar_section(const SkTDArray<BFChar>& bfchar,
+ SkDynamicMemoryWStream* cmap) {
// PDF spec defines that every bf* list can have at most 100 entries.
- const size_t kMaxEntries = 100;
- uint16_t glyphId[kMaxEntries];
- SkUnichar unicode[kMaxEntries];
- size_t index = 0;
- for (int i = 0; i < glyphUnicode.count(); i++) {
- if (glyphUnicode[i] && (subset == NULL || subset->has(i))) {
- glyphId[index] = i;
- unicode[index] = glyphUnicode[i];
- ++index;
+ for (int i = 0; i < bfchar.count(); i += 100) {
+ int count = bfchar.count() - i;
+ count = SkMin32(count, 100);
+ cmap->writeDecAsText(count);
+ cmap->writeText(" beginbfchar\n");
+ for (int j = 0; j < count; ++j) {
+ cmap->writeText("<");
+ cmap->writeHexAsText(bfchar[i + j].fGlyphId, 4);
+ cmap->writeText("> <");
+ cmap->writeHexAsText(bfchar[i + j].fUnicode, 4);
+ cmap->writeText(">\n");
}
- if (index == kMaxEntries) {
- append_cmap_bfchar_table(glyphId, unicode, index, cmap);
- index = 0;
+ cmap->writeText("endbfchar\n");
+ }
+}
+
+static void append_bfrange_section(const SkTDArray<BFRange>& bfrange,
+ SkDynamicMemoryWStream* cmap) {
+ // PDF spec defines that every bf* list can have at most 100 entries.
+ for (int i = 0; i < bfrange.count(); i += 100) {
+ int count = bfrange.count() - i;
+ count = SkMin32(count, 100);
+ cmap->writeDecAsText(count);
+ cmap->writeText(" beginbfrange\n");
+ for (int j = 0; j < count; ++j) {
+ cmap->writeText("<");
+ cmap->writeHexAsText(bfrange[i + j].fStart, 4);
+ cmap->writeText("> <");
+ cmap->writeHexAsText(bfrange[i + j].fEnd, 4);
+ cmap->writeText("> <");
+ cmap->writeHexAsText(bfrange[i + j].fUnicode, 4);
+ cmap->writeText(">\n");
}
+ cmap->writeText("endbfrange\n");
}
+}
+
+// Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe
+// Technote 5014.
+// The function is not static so we can test it in unit tests.
+//
+// Current implementation guarantees bfchar and bfrange entries do not overlap.
+//
+// Current implementation does not attempt aggresive optimizations against
+// following case because the specification is not clear.
+//
+// 4 beginbfchar 1 beginbfchar
+// <0003> <0013> <0020> <0014>
+// <0005> <0015> to endbfchar
+// <0007> <0017> 1 beginbfrange
+// <0020> <0014> <0003> <0007> <0013>
+// endbfchar endbfrange
+//
+// Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may
+// overlap, but succeeding maps superceded preceding maps."
+//
+// In case of searching text in PDF, bfrange will have higher precedence so
+// typing char id 0x0014 in search box will get glyph id 0x0004 first. However,
+// the spec does not mention how will this kind of conflict being resolved.
+//
+// For the worst case (having 65536 continuous unicode and we use every other
+// one of them), the possible savings by aggressive optimization is 416KB
+// pre-compressed and does not provide enough motivation for implementation.
+void append_cmap_sections(const SkTDArray<SkUnichar>& glyphToUnicode,
+ const SkPDFGlyphSet* subset,
+ SkDynamicMemoryWStream* cmap) {
+ if (glyphToUnicode.count() == 0) return;
+
+ SkTDArray<BFChar> bfcharEntries;
+ SkTDArray<BFRange> bfrangeEntries;
+
+ BFRange currentRangeEntry;
+ bool haveBase = false;
+ int continuousEntries = 0;
+
+ for (int i = 0; i < glyphToUnicode.count(); ++i) {
+ if (glyphToUnicode[i] && (subset == NULL || subset->has(i))) {
+ // PDF spec requires bfrange not changing the higher byte,
+ // e.g. <1035> <10FF> <2222> is ok, but
+ // <1035> <1100> <2222> is no good
+ if (haveBase) {
+ ++continuousEntries;
+ if (i == currentRangeEntry.fStart + continuousEntries &&
+ (i >> 8) == (currentRangeEntry.fStart >> 8) &&
+ glyphToUnicode[i] == (currentRangeEntry.fUnicode +
+ continuousEntries)) {
+ currentRangeEntry.fEnd = i;
+ if (i == glyphToUnicode.count() - 1) {
+ // Last entry is in a range.
+ bfrangeEntries.push(currentRangeEntry);
+ }
+ continue;
+ }
+
+ // Need to have at least 2 entries to form a bfrange.
+ if (continuousEntries >= 2) {
+ bfrangeEntries.push(currentRangeEntry);
+ } else {
+ BFChar* entry = bfcharEntries.append();
+ entry->fGlyphId = currentRangeEntry.fStart;
+ entry->fUnicode = currentRangeEntry.fUnicode;
+ }
+ continuousEntries = 0;
+ }
- if (index) {
- append_cmap_bfchar_table(glyphId, unicode, index, cmap);
+ if (i != glyphToUnicode.count() - 1) {
+ currentRangeEntry.fStart = i;
+ currentRangeEntry.fEnd = i;
+ currentRangeEntry.fUnicode = glyphToUnicode[i];
+ haveBase = true;
+ } else {
+ BFChar* entry = bfcharEntries.append();
+ entry->fGlyphId = i;
+ entry->fUnicode = glyphToUnicode[i];
+ }
+ }
}
+
+ // The spec requires all bfchar entries for a font must come before bfrange
+ // entries.
+ append_bfchar_section(bfcharEntries, cmap);
+ append_bfrange_section(bfrangeEntries, cmap);
}
static SkPDFStream* generate_tounicode_cmap(
- const SkTDArray<SkUnichar>& glyphUnicode,
- const SkPDFGlyphSet* subset) {
+ const SkTDArray<SkUnichar>& glyphToUnicode,
+ const SkPDFGlyphSet* subset) {
SkDynamicMemoryWStream cmap;
append_tounicode_header(&cmap);
- append_cmap_bfchar_sections(glyphUnicode, subset, &cmap);
+ append_cmap_sections(glyphToUnicode, subset, &cmap);
append_cmap_footer(&cmap);
SkRefPtr<SkMemoryStream> cmapStream = new SkMemoryStream();
cmapStream->unref(); // SkRefPtr and new took a reference.