diff options
author | 2013-07-29 22:14:45 +0000 | |
---|---|---|
committer | 2013-07-29 22:14:45 +0000 | |
commit | 4ef4bed00efd247a0ea005b95b7239a9d4c14c68 (patch) | |
tree | 7a82f5984b24ecff66dbbd1ba05d78b768924a9c /experimental/PdfViewer/pdfparser/native | |
parent | d49173afc862e0a33133190c392cd5a221a7e51f (diff) |
pdfviewer: load files with missing xref (we need in order to help with testing, as most good pdfx in the whild miss the xref). add period as a valid character to start a real value.
Review URL: https://codereview.chromium.org/21096006
git-svn-id: http://skia.googlecode.com/svn/trunk@10423 2bbb7eff-a529-9590-31e7-b0007b416f81
Diffstat (limited to 'experimental/PdfViewer/pdfparser/native')
5 files changed, 127 insertions, 24 deletions
diff --git a/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.cpp b/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.cpp index 8892ee2643..e54ba825bc 100644 --- a/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.cpp +++ b/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.cpp @@ -123,7 +123,7 @@ void SkNativeParsedPDF::init(const void* bytes, size_t length) { bool storeCatalog = true; while (xrefByteOffset >= 0) { const unsigned char* trailerStart = readCrossReferenceSection(fFileContent + xrefByteOffset, xrefstartKeywordLine); - xrefByteOffset = readTrailer(trailerStart, xrefstartKeywordLine, storeCatalog); + readTrailer(trailerStart, xrefstartKeywordLine, storeCatalog, &xrefByteOffset, false); storeCatalog = false; } @@ -141,6 +141,12 @@ void SkNativeParsedPDF::init(const void* bytes, size_t length) { } } + // TODO(edisonn): clean up this doc, or better, let the caller call again and build a new doc + // caller should be a static function. + if (pages() == 0) { + loadWithoutXRef(); + } + // TODO(edisonn): corrupted pdf, read it from beginning and rebuild (xref, trailer, or just reall all objects) // 0 pages @@ -148,6 +154,67 @@ void SkNativeParsedPDF::init(const void* bytes, size_t length) { // and resolve references?... or not ... } +void SkNativeParsedPDF::loadWithoutXRef() { + const unsigned char* current = fFileContent; + const unsigned char* end = fFileContent + fContentLength; + + // TODO(edisonn): read pdf version + current = ignoreLine(current, end); + + current = skipPdfWhiteSpaces(0, current, end); + while (current < end) { + SkPdfObject token; + current = nextObject(0, current, end, &token, NULL, NULL); + if (token.isInteger()) { + int id = (int)token.intValue(); + + token.reset(); + current = nextObject(0, current, end, &token, NULL, NULL); + // int generation = (int)token.intValue(); // TODO(edisonn): ignored for now + + token.reset(); + current = nextObject(0, current, end, &token, NULL, NULL); + // TODO(edisonn): must be obj, return error if not? ignore ? + if (!token.isKeyword("obj")) { + continue; + } + + while (fObjects.count() < id + 1) { + reset(fObjects.append()); + } + + fObjects[id].fOffset = current - fFileContent; + + SkPdfObject* obj = fAllocator->allocObject(); + current = nextObject(0, current, end, obj, fAllocator, this); + + fObjects[id].fResolvedReference = obj; + fObjects[id].fObj = obj; + + // set objects + } else if (token.isKeyword("trailer")) { + long dummy; + current = readTrailer(current, end, true, &dummy, true); + } else if (token.isKeyword("startxref")) { + token.reset(); + current = nextObject(0, current, end, &token, NULL, NULL); // ignore + } + + current = skipPdfWhiteSpaces(0, current, end); + } + + if (fRootCatalogRef) { + fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef); + if (fRootCatalog->isDictionary() && fRootCatalog->valid()) { + SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this); + if (tree && tree->isDictionary() && tree->valid()) { + fillPages(tree); + } + } + } + +} + // TODO(edisonn): NYI SkNativeParsedPDF::~SkNativeParsedPDF() { sk_free((void*)fFileContent); @@ -208,43 +275,47 @@ const unsigned char* SkNativeParsedPDF::readCrossReferenceSection(const unsigned return current; } -long SkNativeParsedPDF::readTrailer(const unsigned char* trailerStart, const unsigned char* trailerEnd, bool storeCatalog) { - SkPdfObject trailerKeyword; - // TODO(edisonn): use null allocator, and let it just fail if memory - // needs allocated (but no crash)! - const unsigned char* current = - nextObject(0, trailerStart, trailerEnd, &trailerKeyword, NULL, NULL); +const unsigned char* SkNativeParsedPDF::readTrailer(const unsigned char* trailerStart, const unsigned char* trailerEnd, bool storeCatalog, long* prev, bool skipKeyword) { + *prev = -1; + + const unsigned char* current = trailerStart; + if (!skipKeyword) { + SkPdfObject trailerKeyword; + // TODO(edisonn): use null allocator, and let it just fail if memory + // needs allocated (but no crash)! + current = nextObject(0, current, trailerEnd, &trailerKeyword, NULL, NULL); - if (!trailerKeyword.isKeyword() || strlen("trailer") != trailerKeyword.lenstr() || - strncmp(trailerKeyword.c_str(), "trailer", strlen("trailer")) != 0) { - // TODO(edisonn): report warning, rebuild trailer from objects. - return -1; + if (!trailerKeyword.isKeyword() || strlen("trailer") != trailerKeyword.lenstr() || + strncmp(trailerKeyword.c_str(), "trailer", strlen("trailer")) != 0) { + // TODO(edisonn): report warning, rebuild trailer from objects. + return current; + } } SkPdfObject token; current = nextObject(0, current, trailerEnd, &token, fAllocator, NULL); if (!token.isDictionary()) { - return -1; + return current; } SkPdfFileTrailerDictionary* trailer = (SkPdfFileTrailerDictionary*)&token; if (!trailer->valid()) { - return -1; + return current; } if (storeCatalog) { const SkPdfObject* ref = trailer->Root(NULL); if (ref == NULL || !ref->isReference()) { // TODO(edisonn): oops, we have to fix the corrup pdf file - return -1; + return current; } fRootCatalogRef = ref; } if (trailer->has_Prev()) { - return (long)trailer->Prev(NULL); + *prev = (long)trailer->Prev(NULL); } - return -1; + return current; } void SkNativeParsedPDF::addCrossSectionInfo(int id, int generation, int offset, bool isFreed) { @@ -255,6 +326,7 @@ void SkNativeParsedPDF::addCrossSectionInfo(int id, int generation, int offset, fObjects[id].fOffset = offset; fObjects[id].fObj = NULL; + fObjects[id].fResolvedReference = NULL; } SkPdfObject* SkNativeParsedPDF::readObject(int id/*, int expectedGeneration*/) { diff --git a/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.h b/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.h index d55d808a2c..77a98c7d04 100644 --- a/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.h +++ b/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.h @@ -72,9 +72,10 @@ private: // Takes ownership of bytes. void init(const void* bytes, size_t length); + void loadWithoutXRef(); const unsigned char* readCrossReferenceSection(const unsigned char* xrefStart, const unsigned char* trailerEnd); - long readTrailer(const unsigned char* trailerStart, const unsigned char* trailerEnd, bool storeCatalog); + const unsigned char* readTrailer(const unsigned char* trailerStart, const unsigned char* trailerEnd, bool storeCatalog, long* prev, bool skipKeyword); // TODO(edisonn): updates not supported right now, generation ignored void addCrossSectionInfo(int id, int generation, int offset, bool isFreed); diff --git a/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp b/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp index 41bd92d170..09b7a0b3a8 100644 --- a/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp +++ b/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp @@ -83,9 +83,9 @@ static void TRACE_HEXSTRING(const unsigned char* start, const unsigned char* end #define TRACE_HEXSTRING(start,end) #endif -static const unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* start, const unsigned char* end) { +const unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* start, const unsigned char* end) { TRACE_INDENT(level, "White Space"); - while (start < end && isPdfWhiteSpace(*start)) { + while (start < end && (isPdfWhiteSpace(*start) || *start == kComment_PdfDelimiter)) { TRACE_COMMENT(*start); if (*start == kComment_PdfDelimiter) { // skip the comment until end of line @@ -103,7 +103,7 @@ static const unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* s } // TODO(edisonn) '(' can be used, will it break the string a delimiter or space inside () ? -static const unsigned char* endOfPdfToken(int level, const unsigned char* start, const unsigned char* end) { +const unsigned char* endOfPdfToken(int level, const unsigned char* start, const unsigned char* end) { //int opened brackets //TODO(edisonn): what out for special chars, like \n, \032 TRACE_INDENT(level, "Token"); @@ -636,6 +636,21 @@ static const unsigned char* readStream(int level, const unsigned char* start, co // TODO(edisonn): laod external streams // TODO(edisonn): look at the last filter, to determione how to deal with possible issue + + if (length >= 0) { + const unsigned char* endstream = start + length; + + if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) { + endstream += 2; + } else if (endstream[0] == kLF_PdfWhiteSpace) { + endstream += 1; + } + + if (strncmp((const char*)endstream, "endstream", strlen("endstream")) != 0) { + length = -1; + } + } + if (length < 0) { // scan the buffer, until we find first endstream // TODO(edisonn): all buffers must have a 0 at the end now, diff --git a/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h b/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h index 2884937aaf..134f7b3cf8 100644 --- a/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h +++ b/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h @@ -60,11 +60,10 @@ class SkPdfImageDictionary; #define isPdfWhiteSpaceOrPdfDelimiter(ch) (isPdfWhiteSpace(ch)||isPdfDelimiter(ch)) #define isPdfDigit(ch) ((ch)>='0'&&(ch)<='9') -#define isPdfNumeric(ch) (isPdfDigit(ch)||(ch)=='+'||(ch)=='-') +#define isPdfNumeric(ch) (isPdfDigit(ch)||(ch)=='+'||(ch)=='-'||(ch)=='.') -const unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* buffer, size_t len); -const unsigned char* endOfPdfToken(int level, const unsigned char* start, size_t len); -const unsigned char* skipPdfComment(int level, const unsigned char* start, size_t len); +const unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* buffer, const unsigned char* end); +const unsigned char* endOfPdfToken(int level, const unsigned char* start, const unsigned char* end); // TODO(edisonn): typedef read and integer tyepes? make less readable... //typedef double SkPdfReal; diff --git a/experimental/PdfViewer/pdfparser/native/SkPdfObject.h b/experimental/PdfViewer/pdfparser/native/SkPdfObject.h index 9df9a239e6..9ac9a12509 100644 --- a/experimental/PdfViewer/pdfparser/native/SkPdfObject.h +++ b/experimental/PdfViewer/pdfparser/native/SkPdfObject.h @@ -527,6 +527,22 @@ public: return fObjectType == kKeyword_PdfObjectType; } + bool isKeyword(const char* keyword) const { + if (!isKeyword()) { + return false; + } + + if (strlen(keyword) != fStr.fBytes) { + return false; + } + + if (strncmp(keyword, (const char*)fStr.fBuffer, fStr.fBytes) != 0) { + return false; + } + + return true; + } + bool isName() const { return fObjectType == kName_PdfObjectType; } |