aboutsummaryrefslogtreecommitdiffhomepage
path: root/experimental/PdfViewer/pdfparser/native
diff options
context:
space:
mode:
authorGravatar edisonn@google.com <edisonn@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>2013-07-29 22:14:45 +0000
committerGravatar edisonn@google.com <edisonn@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>2013-07-29 22:14:45 +0000
commit4ef4bed00efd247a0ea005b95b7239a9d4c14c68 (patch)
tree7a82f5984b24ecff66dbbd1ba05d78b768924a9c /experimental/PdfViewer/pdfparser/native
parentd49173afc862e0a33133190c392cd5a221a7e51f (diff)
pdfviewer: load files with missing xref (we need in order to help with testing, as most good pdfx in the whild miss the xref). add period as a valid character to start a real value.
Review URL: https://codereview.chromium.org/21096006 git-svn-id: http://skia.googlecode.com/svn/trunk@10423 2bbb7eff-a529-9590-31e7-b0007b416f81
Diffstat (limited to 'experimental/PdfViewer/pdfparser/native')
-rw-r--r--experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.cpp104
-rw-r--r--experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.h3
-rw-r--r--experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp21
-rw-r--r--experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h7
-rw-r--r--experimental/PdfViewer/pdfparser/native/SkPdfObject.h16
5 files changed, 127 insertions, 24 deletions
diff --git a/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.cpp b/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.cpp
index 8892ee2643..e54ba825bc 100644
--- a/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.cpp
+++ b/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.cpp
@@ -123,7 +123,7 @@ void SkNativeParsedPDF::init(const void* bytes, size_t length) {
bool storeCatalog = true;
while (xrefByteOffset >= 0) {
const unsigned char* trailerStart = readCrossReferenceSection(fFileContent + xrefByteOffset, xrefstartKeywordLine);
- xrefByteOffset = readTrailer(trailerStart, xrefstartKeywordLine, storeCatalog);
+ readTrailer(trailerStart, xrefstartKeywordLine, storeCatalog, &xrefByteOffset, false);
storeCatalog = false;
}
@@ -141,6 +141,12 @@ void SkNativeParsedPDF::init(const void* bytes, size_t length) {
}
}
+ // TODO(edisonn): clean up this doc, or better, let the caller call again and build a new doc
+ // caller should be a static function.
+ if (pages() == 0) {
+ loadWithoutXRef();
+ }
+
// TODO(edisonn): corrupted pdf, read it from beginning and rebuild (xref, trailer, or just reall all objects)
// 0 pages
@@ -148,6 +154,67 @@ void SkNativeParsedPDF::init(const void* bytes, size_t length) {
// and resolve references?... or not ...
}
+void SkNativeParsedPDF::loadWithoutXRef() {
+ const unsigned char* current = fFileContent;
+ const unsigned char* end = fFileContent + fContentLength;
+
+ // TODO(edisonn): read pdf version
+ current = ignoreLine(current, end);
+
+ current = skipPdfWhiteSpaces(0, current, end);
+ while (current < end) {
+ SkPdfObject token;
+ current = nextObject(0, current, end, &token, NULL, NULL);
+ if (token.isInteger()) {
+ int id = (int)token.intValue();
+
+ token.reset();
+ current = nextObject(0, current, end, &token, NULL, NULL);
+ // int generation = (int)token.intValue(); // TODO(edisonn): ignored for now
+
+ token.reset();
+ current = nextObject(0, current, end, &token, NULL, NULL);
+ // TODO(edisonn): must be obj, return error if not? ignore ?
+ if (!token.isKeyword("obj")) {
+ continue;
+ }
+
+ while (fObjects.count() < id + 1) {
+ reset(fObjects.append());
+ }
+
+ fObjects[id].fOffset = current - fFileContent;
+
+ SkPdfObject* obj = fAllocator->allocObject();
+ current = nextObject(0, current, end, obj, fAllocator, this);
+
+ fObjects[id].fResolvedReference = obj;
+ fObjects[id].fObj = obj;
+
+ // set objects
+ } else if (token.isKeyword("trailer")) {
+ long dummy;
+ current = readTrailer(current, end, true, &dummy, true);
+ } else if (token.isKeyword("startxref")) {
+ token.reset();
+ current = nextObject(0, current, end, &token, NULL, NULL); // ignore
+ }
+
+ current = skipPdfWhiteSpaces(0, current, end);
+ }
+
+ if (fRootCatalogRef) {
+ fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef);
+ if (fRootCatalog->isDictionary() && fRootCatalog->valid()) {
+ SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this);
+ if (tree && tree->isDictionary() && tree->valid()) {
+ fillPages(tree);
+ }
+ }
+ }
+
+}
+
// TODO(edisonn): NYI
SkNativeParsedPDF::~SkNativeParsedPDF() {
sk_free((void*)fFileContent);
@@ -208,43 +275,47 @@ const unsigned char* SkNativeParsedPDF::readCrossReferenceSection(const unsigned
return current;
}
-long SkNativeParsedPDF::readTrailer(const unsigned char* trailerStart, const unsigned char* trailerEnd, bool storeCatalog) {
- SkPdfObject trailerKeyword;
- // TODO(edisonn): use null allocator, and let it just fail if memory
- // needs allocated (but no crash)!
- const unsigned char* current =
- nextObject(0, trailerStart, trailerEnd, &trailerKeyword, NULL, NULL);
+const unsigned char* SkNativeParsedPDF::readTrailer(const unsigned char* trailerStart, const unsigned char* trailerEnd, bool storeCatalog, long* prev, bool skipKeyword) {
+ *prev = -1;
+
+ const unsigned char* current = trailerStart;
+ if (!skipKeyword) {
+ SkPdfObject trailerKeyword;
+ // TODO(edisonn): use null allocator, and let it just fail if memory
+ // needs allocated (but no crash)!
+ current = nextObject(0, current, trailerEnd, &trailerKeyword, NULL, NULL);
- if (!trailerKeyword.isKeyword() || strlen("trailer") != trailerKeyword.lenstr() ||
- strncmp(trailerKeyword.c_str(), "trailer", strlen("trailer")) != 0) {
- // TODO(edisonn): report warning, rebuild trailer from objects.
- return -1;
+ if (!trailerKeyword.isKeyword() || strlen("trailer") != trailerKeyword.lenstr() ||
+ strncmp(trailerKeyword.c_str(), "trailer", strlen("trailer")) != 0) {
+ // TODO(edisonn): report warning, rebuild trailer from objects.
+ return current;
+ }
}
SkPdfObject token;
current = nextObject(0, current, trailerEnd, &token, fAllocator, NULL);
if (!token.isDictionary()) {
- return -1;
+ return current;
}
SkPdfFileTrailerDictionary* trailer = (SkPdfFileTrailerDictionary*)&token;
if (!trailer->valid()) {
- return -1;
+ return current;
}
if (storeCatalog) {
const SkPdfObject* ref = trailer->Root(NULL);
if (ref == NULL || !ref->isReference()) {
// TODO(edisonn): oops, we have to fix the corrup pdf file
- return -1;
+ return current;
}
fRootCatalogRef = ref;
}
if (trailer->has_Prev()) {
- return (long)trailer->Prev(NULL);
+ *prev = (long)trailer->Prev(NULL);
}
- return -1;
+ return current;
}
void SkNativeParsedPDF::addCrossSectionInfo(int id, int generation, int offset, bool isFreed) {
@@ -255,6 +326,7 @@ void SkNativeParsedPDF::addCrossSectionInfo(int id, int generation, int offset,
fObjects[id].fOffset = offset;
fObjects[id].fObj = NULL;
+ fObjects[id].fResolvedReference = NULL;
}
SkPdfObject* SkNativeParsedPDF::readObject(int id/*, int expectedGeneration*/) {
diff --git a/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.h b/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.h
index d55d808a2c..77a98c7d04 100644
--- a/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.h
+++ b/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.h
@@ -72,9 +72,10 @@ private:
// Takes ownership of bytes.
void init(const void* bytes, size_t length);
+ void loadWithoutXRef();
const unsigned char* readCrossReferenceSection(const unsigned char* xrefStart, const unsigned char* trailerEnd);
- long readTrailer(const unsigned char* trailerStart, const unsigned char* trailerEnd, bool storeCatalog);
+ const unsigned char* readTrailer(const unsigned char* trailerStart, const unsigned char* trailerEnd, bool storeCatalog, long* prev, bool skipKeyword);
// TODO(edisonn): updates not supported right now, generation ignored
void addCrossSectionInfo(int id, int generation, int offset, bool isFreed);
diff --git a/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp b/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp
index 41bd92d170..09b7a0b3a8 100644
--- a/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp
+++ b/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp
@@ -83,9 +83,9 @@ static void TRACE_HEXSTRING(const unsigned char* start, const unsigned char* end
#define TRACE_HEXSTRING(start,end)
#endif
-static const unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* start, const unsigned char* end) {
+const unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* start, const unsigned char* end) {
TRACE_INDENT(level, "White Space");
- while (start < end && isPdfWhiteSpace(*start)) {
+ while (start < end && (isPdfWhiteSpace(*start) || *start == kComment_PdfDelimiter)) {
TRACE_COMMENT(*start);
if (*start == kComment_PdfDelimiter) {
// skip the comment until end of line
@@ -103,7 +103,7 @@ static const unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* s
}
// TODO(edisonn) '(' can be used, will it break the string a delimiter or space inside () ?
-static const unsigned char* endOfPdfToken(int level, const unsigned char* start, const unsigned char* end) {
+const unsigned char* endOfPdfToken(int level, const unsigned char* start, const unsigned char* end) {
//int opened brackets
//TODO(edisonn): what out for special chars, like \n, \032
TRACE_INDENT(level, "Token");
@@ -636,6 +636,21 @@ static const unsigned char* readStream(int level, const unsigned char* start, co
// TODO(edisonn): laod external streams
// TODO(edisonn): look at the last filter, to determione how to deal with possible issue
+
+ if (length >= 0) {
+ const unsigned char* endstream = start + length;
+
+ if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
+ endstream += 2;
+ } else if (endstream[0] == kLF_PdfWhiteSpace) {
+ endstream += 1;
+ }
+
+ if (strncmp((const char*)endstream, "endstream", strlen("endstream")) != 0) {
+ length = -1;
+ }
+ }
+
if (length < 0) {
// scan the buffer, until we find first endstream
// TODO(edisonn): all buffers must have a 0 at the end now,
diff --git a/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h b/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h
index 2884937aaf..134f7b3cf8 100644
--- a/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h
+++ b/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h
@@ -60,11 +60,10 @@ class SkPdfImageDictionary;
#define isPdfWhiteSpaceOrPdfDelimiter(ch) (isPdfWhiteSpace(ch)||isPdfDelimiter(ch))
#define isPdfDigit(ch) ((ch)>='0'&&(ch)<='9')
-#define isPdfNumeric(ch) (isPdfDigit(ch)||(ch)=='+'||(ch)=='-')
+#define isPdfNumeric(ch) (isPdfDigit(ch)||(ch)=='+'||(ch)=='-'||(ch)=='.')
-const unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* buffer, size_t len);
-const unsigned char* endOfPdfToken(int level, const unsigned char* start, size_t len);
-const unsigned char* skipPdfComment(int level, const unsigned char* start, size_t len);
+const unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* buffer, const unsigned char* end);
+const unsigned char* endOfPdfToken(int level, const unsigned char* start, const unsigned char* end);
// TODO(edisonn): typedef read and integer tyepes? make less readable...
//typedef double SkPdfReal;
diff --git a/experimental/PdfViewer/pdfparser/native/SkPdfObject.h b/experimental/PdfViewer/pdfparser/native/SkPdfObject.h
index 9df9a239e6..9ac9a12509 100644
--- a/experimental/PdfViewer/pdfparser/native/SkPdfObject.h
+++ b/experimental/PdfViewer/pdfparser/native/SkPdfObject.h
@@ -527,6 +527,22 @@ public:
return fObjectType == kKeyword_PdfObjectType;
}
+ bool isKeyword(const char* keyword) const {
+ if (!isKeyword()) {
+ return false;
+ }
+
+ if (strlen(keyword) != fStr.fBytes) {
+ return false;
+ }
+
+ if (strncmp(keyword, (const char*)fStr.fBuffer, fStr.fBytes) != 0) {
+ return false;
+ }
+
+ return true;
+ }
+
bool isName() const {
return fObjectType == kName_PdfObjectType;
}