diff options
author | edisonn@google.com <edisonn@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81> | 2013-10-09 20:23:12 +0000 |
---|---|---|
committer | edisonn@google.com <edisonn@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81> | 2013-10-09 20:23:12 +0000 |
commit | c8fda9d96be0bd944d37a6e23f7adad5f247c51d (patch) | |
tree | 5b4c8feafaa79732010276b9e3063fbb6e07587f /experimental/PdfViewer/pdfparser/native | |
parent | 1303761a8990fe1ea56f68e2c660c58d97930a25 (diff) |
code cleanup
Review URL: https://codereview.chromium.org/26613006
git-svn-id: http://skia.googlecode.com/svn/trunk@11687 2bbb7eff-a529-9590-31e7-b0007b416f81
Diffstat (limited to 'experimental/PdfViewer/pdfparser/native')
5 files changed, 349 insertions, 347 deletions
diff --git a/experimental/PdfViewer/pdfparser/native/SkPdfNativeDoc.cpp b/experimental/PdfViewer/pdfparser/native/SkPdfNativeDoc.cpp index f6b323abc2..19f15d1399 100644 --- a/experimental/PdfViewer/pdfparser/native/SkPdfNativeDoc.cpp +++ b/experimental/PdfViewer/pdfparser/native/SkPdfNativeDoc.cpp @@ -6,15 +6,18 @@ */ #include "SkPdfNativeDoc.h" -#include "SkPdfNativeTokenizer.h" -#include "SkPdfNativeObject.h" -#include "SkPdfReporter.h" #include <stdio.h> #include <string.h> #include <sys/types.h> #include <sys/stat.h> +#include "SkPdfMapper_autogen.h" +#include "SkPdfNativeObject.h" +#include "SkPdfNativeTokenizer.h" +#include "SkPdfReporter.h" +#include "SkStream.h" + // TODO(edisonn): for some reason on mac these files are found here, but are found from headers //#include "SkPdfFileTrailerDictionary_autogen.h" //#include "SkPdfCatalogDictionary_autogen.h" @@ -22,11 +25,6 @@ //#include "SkPdfPageTreeNodeDictionary_autogen.h" #include "SkPdfHeaders_autogen.h" -#include "SkPdfMapper_autogen.h" - -#include "SkStream.h" - - static long getFileSize(const char* filename) { struct stat stat_buf; @@ -41,7 +39,8 @@ static const unsigned char* lineHome(const unsigned char* start, const unsigned return current; } -static const unsigned char* previousLineHome(const unsigned char* start, const unsigned char* current) { +static const unsigned char* previousLineHome(const unsigned char* start, + const unsigned char* current) { if (current > start && isPdfEOL(*(current - 1))) { current--; } @@ -71,14 +70,6 @@ static const unsigned char* ignoreLine(const unsigned char* current, const unsig SkPdfNativeDoc* gDoc = NULL; -// TODO(edisonn): NYI -// TODO(edisonn): 3 constructuctors from URL, from stream, from file ... -// TODO(edisonn): write one that accepts errors in the file and ignores/fixis them -// TODO(edisonn): testing: -// 1) run on a lot of file -// 2) recoverable corupt file: remove endobj, endsteam, remove other keywords, use other white spaces, insert comments randomly, ... -// 3) irrecoverable corrupt file - SkPdfNativeDoc::SkPdfNativeDoc(SkStream* stream) : fAllocator(new SkPdfAllocator()) , fFileContent(NULL) @@ -108,7 +99,8 @@ SkPdfNativeDoc::SkPdfNativeDoc(const char* path) fclose(file); if (!ok) { sk_free(content); - SkPdfReport(kFatalError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, "could not read file", NULL, NULL); + SkPdfReport(kFatalError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, + "could not read file", NULL, NULL); // TODO(edisonn): not nice to return like this from constructor, create a static // function that can report NULL for failures. return; // Doc will have 0 pages @@ -126,14 +118,16 @@ void SkPdfNativeDoc::init(const void* bytes, size_t length) { const unsigned char* xrefstartKeywordLine = previousLineHome(fFileContent, xrefByteOffsetLine); if (strcmp((char*)xrefstartKeywordLine, "startxref") != 0) { - SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue, "Could not find startxref", NULL, NULL); + SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue, + "Could not find startxref", NULL, NULL); } long xrefByteOffset = atol((const char*)xrefByteOffsetLine); bool storeCatalog = true; while (xrefByteOffset >= 0) { - const unsigned char* trailerStart = readCrossReferenceSection(fFileContent + xrefByteOffset, xrefstartKeywordLine); + const unsigned char* trailerStart = readCrossReferenceSection(fFileContent + xrefByteOffset, + xrefstartKeywordLine); xrefByteOffset = -1; if (trailerStart < xrefstartKeywordLine) { readTrailer(trailerStart, xrefstartKeywordLine, storeCatalog, &xrefByteOffset, false); @@ -143,7 +137,6 @@ void SkPdfNativeDoc::init(const void* bytes, size_t length) { // TODO(edisonn): warn/error expect fObjects[fRefCatalogId].fGeneration == fRefCatalogGeneration // TODO(edisonn): security, verify that SkPdfCatalogDictionary is indeed using mapper - // load catalog if (fRootCatalogRef) { fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef); @@ -155,17 +148,13 @@ void SkPdfNativeDoc::init(const void* bytes, size_t length) { } } - // TODO(edisonn): clean up this doc, or better, let the caller call again and build a new doc - // caller should be a static function. if (pages() == 0) { + // TODO(edisonn): probably it would be better to return NULL and make a clean document. loadWithoutXRef(); } - // TODO(edisonn): corrupted pdf, read it from beginning and rebuild (xref, trailer, or just reall all objects) - // 0 pages - - // now actually read all objects if we want, or do it lazyly - // and resolve references?... or not ... + // TODO(edisonn): corrupted pdf, read it from beginning and rebuild + // (xref, trailer, or just read all objects) } void SkPdfNativeDoc::loadWithoutXRef() { @@ -184,13 +173,15 @@ void SkPdfNativeDoc::loadWithoutXRef() { token.reset(); current = nextObject(current, end, &token, NULL, NULL); - // int generation = (int)token.intValue(); // TODO(edisonn): ignored for now + // TODO(edisonn): generation ignored for now (used in pdfs with updates) + // int generation = (int)token.intValue(); token.reset(); current = nextObject(current, end, &token, NULL, NULL); - // TODO(edisonn): must be obj, return error if not? ignore ? + // TODO(edisonn): keywork must be "obj". Add ability to report error instead ignoring. if (!token.isKeyword("obj")) { - SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue, "Could not find obj", NULL, NULL); + SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue, + "Could not find obj", NULL, NULL); continue; } @@ -206,21 +197,19 @@ void SkPdfNativeDoc::loadWithoutXRef() { fObjects[id].fResolvedReference = obj; fObjects[id].fObj = obj; fObjects[id].fIsReferenceResolved = true; - - - // set objects } else if (token.isKeyword("trailer")) { long dummy; current = readTrailer(current, end, true, &dummy, true); } else if (token.isKeyword("startxref")) { token.reset(); - current = nextObject(current, end, &token, NULL, NULL); // ignore + current = nextObject(current, end, &token, NULL, NULL); // ignore startxref } current = skipPdfWhiteSpaces(current, end); } - // TODO(edisonn): hack, detect root catalog - we need to implement liniarized support, and remove this hack. + // TODO(edisonn): quick hack, detect root catalog. When we implement linearized support we + // might not need it. if (!fRootCatalogRef) { for (unsigned int i = 0 ; i < objects(); i++) { SkPdfNativeObject* obj = object(i); @@ -244,18 +233,19 @@ void SkPdfNativeDoc::loadWithoutXRef() { } -// TODO(edisonn): NYI SkPdfNativeDoc::~SkPdfNativeDoc() { sk_free((void*)fFileContent); delete fAllocator; } -const unsigned char* SkPdfNativeDoc::readCrossReferenceSection(const unsigned char* xrefStart, const unsigned char* trailerEnd) { +const unsigned char* SkPdfNativeDoc::readCrossReferenceSection(const unsigned char* xrefStart, + const unsigned char* trailerEnd) { SkPdfNativeObject xref; const unsigned char* current = nextObject(xrefStart, trailerEnd, &xref, NULL, NULL); if (!xref.isKeyword("xref")) { - SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue, "Could not find sref", NULL, NULL); + SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue, "Could not find sref", + NULL, NULL); return trailerEnd; } @@ -265,7 +255,8 @@ const unsigned char* SkPdfNativeDoc::readCrossReferenceSection(const unsigned ch const unsigned char* previous = current; current = nextObject(current, trailerEnd, &token, NULL, NULL); if (!token.isInteger()) { - SkPdfReport(kInfo_SkPdfIssueSeverity, kNoIssue_SkPdfIssue, "Done readCrossReferenceSection", NULL, NULL); + SkPdfReport(kInfo_SkPdfIssueSeverity, kNoIssue_SkPdfIssue, + "Done readCrossReferenceSection", NULL, NULL); return previous; } @@ -274,7 +265,8 @@ const unsigned char* SkPdfNativeDoc::readCrossReferenceSection(const unsigned ch current = nextObject(current, trailerEnd, &token, NULL, NULL); if (!token.isInteger()) { - SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readCrossReferenceSection", &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL); + SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readCrossReferenceSection", + &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL); return current; } @@ -284,7 +276,9 @@ const unsigned char* SkPdfNativeDoc::readCrossReferenceSection(const unsigned ch token.reset(); current = nextObject(current, trailerEnd, &token, NULL, NULL); if (!token.isInteger()) { - SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readCrossReferenceSection", &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL); + SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, + "readCrossReferenceSection", + &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL); return current; } int offset = (int)token.intValue(); @@ -292,39 +286,48 @@ const unsigned char* SkPdfNativeDoc::readCrossReferenceSection(const unsigned ch token.reset(); current = nextObject(current, trailerEnd, &token, NULL, NULL); if (!token.isInteger()) { - SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readCrossReferenceSection", &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL); + SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, + "readCrossReferenceSection", + &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL); return current; } int generation = (int)token.intValue(); token.reset(); current = nextObject(current, trailerEnd, &token, NULL, NULL); - if (!token.isKeyword() || token.lenstr() != 1 || (*token.c_str() != 'f' && *token.c_str() != 'n')) { - SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readCrossReferenceSection: f or n expected", &token, SkPdfNativeObject::kKeyword_PdfObjectType, NULL); + if (!token.isKeyword() || token.lenstr() != 1 || + (*token.c_str() != 'f' && *token.c_str() != 'n')) { + SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, + "readCrossReferenceSection: f or n expected", + &token, SkPdfNativeObject::kKeyword_PdfObjectType, NULL); return current; } addCrossSectionInfo(startId + i, generation, offset, *token.c_str() == 'f'); } } - SkPdfReport(kInfo_SkPdfIssueSeverity, kNoIssue_SkPdfIssue, "Unexpected end of readCrossReferenceSection", NULL, NULL); + SkPdfReport(kInfo_SkPdfIssueSeverity, kNoIssue_SkPdfIssue, + "Unexpected end of readCrossReferenceSection", NULL, NULL); return current; } -const unsigned char* SkPdfNativeDoc::readTrailer(const unsigned char* trailerStart, const unsigned char* trailerEnd, bool storeCatalog, long* prev, bool skipKeyword) { +const unsigned char* SkPdfNativeDoc::readTrailer(const unsigned char* trailerStart, + const unsigned char* trailerEnd, + bool storeCatalog, long* prev, bool skipKeyword) { *prev = -1; const unsigned char* current = trailerStart; if (!skipKeyword) { SkPdfNativeObject trailerKeyword; - // TODO(edisonn): use null allocator, and let it just fail if memory - // needs allocated (but no crash)! + // Use null allocator, and let it just fail if memory, it should not crash. current = nextObject(current, trailerEnd, &trailerKeyword, NULL, NULL); if (!trailerKeyword.isKeyword() || strlen("trailer") != trailerKeyword.lenstr() || strncmp(trailerKeyword.c_str(), "trailer", strlen("trailer")) != 0) { - // TODO(edisonn): report warning, rebuild trailer from objects. - SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readTrailer: trailer keyword expected", &trailerKeyword, SkPdfNativeObject::kKeyword_PdfObjectType, NULL); + SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, + "readTrailer: trailer keyword expected", + &trailerKeyword, + SkPdfNativeObject::kKeyword_PdfObjectType, NULL); return current; } } @@ -342,7 +345,9 @@ const unsigned char* SkPdfNativeDoc::readTrailer(const unsigned char* trailerSta if (storeCatalog) { SkPdfNativeObject* ref = trailer->Root(NULL); if (ref == NULL || !ref->isReference()) { - SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readTrailer: unexpected root reference", ref, SkPdfNativeObject::kReference_PdfObjectType, NULL); + SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, + "readTrailer: unexpected root reference", + ref, SkPdfNativeObject::kReference_PdfObjectType, NULL); return current; } fRootCatalogRef = ref; @@ -356,7 +361,7 @@ const unsigned char* SkPdfNativeDoc::readTrailer(const unsigned char* trailerSta } void SkPdfNativeDoc::addCrossSectionInfo(int id, int generation, int offset, bool isFreed) { - // TODO(edisonn): security here + // TODO(edisonn): security here, verify id while (fObjects.count() < id + 1) { reset(fObjects.append()); } @@ -371,10 +376,6 @@ SkPdfNativeObject* SkPdfNativeDoc::readObject(int id/*, int expectedGeneration*/ long startOffset = fObjects[id].fOffset; //long endOffset = fObjects[id].fOffsetEnd; // TODO(edisonn): use hinted endOffset - // TODO(edisonn): current implementation will result in a lot of memory usage - // to decrease memory usage, we wither need to be smart and know where objects end, and we will - // alocate only the chancks needed, or the tokenizer will not make copies, but then it needs to - // cache the results so it does not go twice on the same buffer const unsigned char* current = fFileContent + startOffset; const unsigned char* end = fFileContent + fContentLength; @@ -387,38 +388,46 @@ SkPdfNativeObject* SkPdfNativeDoc::readObject(int id/*, int expectedGeneration*/ current = nextObject(current, end, &idObj, NULL, NULL); if (current >= end) { - SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, "reading id", NULL, NULL); + SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, "reading id", + NULL, NULL); return NULL; } current = nextObject(current, end, &generationObj, NULL, NULL); if (current >= end) { - SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, "reading generation", NULL, NULL); + SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, + "reading generation", NULL, NULL); return NULL; } current = nextObject(current, end, &objKeyword, NULL, NULL); if (current >= end) { - SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, "reading keyword obj", NULL, NULL); + SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, + "reading keyword obj", NULL, NULL); return NULL; } if (!idObj.isInteger() || id != idObj.intValue()) { - SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readObject: unexpected id", &idObj, SkPdfNativeObject::kInteger_PdfObjectType, NULL); + SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readObject: unexpected id", + &idObj, SkPdfNativeObject::kInteger_PdfObjectType, NULL); } // TODO(edisonn): verify that the generation is the right one if (!generationObj.isInteger() /* || generation != generationObj.intValue()*/) { - SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readObject: unexpected generation", &generationObj, SkPdfNativeObject::kInteger_PdfObjectType, NULL); + SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, + "readObject: unexpected generation", + &generationObj, SkPdfNativeObject::kInteger_PdfObjectType, NULL); } if (!objKeyword.isKeyword() || strcmp(objKeyword.c_str(), "obj") != 0) { - SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readObject: unexpected obj keyword", &objKeyword, SkPdfNativeObject::kKeyword_PdfObjectType, NULL); + SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, + "readObject: unexpected obj keyword", + &objKeyword, SkPdfNativeObject::kKeyword_PdfObjectType, NULL); } current = nextObject(current, end, dict, fAllocator, this); - // TODO(edisonn): report warning/error - verify last token is endobj + // TODO(edisonn): report warning/error - verify that the last token is endobj return dict; } @@ -457,7 +466,8 @@ SkPdfResourceDictionary* SkPdfNativeDoc::pageResources(int page) { return fPages[page]->Resources(this); } -// TODO(edisonn): Partial implemented. Move the logics directly in the code generator for inheritable and default value? +// TODO(edisonn): Partial implemented. +// Move the logics directly in the code generator for inheritable and default values? SkRect SkPdfNativeDoc::MediaBox(int page) { SkPdfPageObjectDictionary* current = fPages[page]; while (!current->has_MediaBox() && current->has_Parent()) { @@ -469,20 +479,18 @@ SkRect SkPdfNativeDoc::MediaBox(int page) { return SkRect::MakeEmpty(); } -// TODO(edisonn): stream or array ... ? for now only array -SkPdfNativeTokenizer* SkPdfNativeDoc::tokenizerOfPage(int page, - SkPdfAllocator* allocator) { +SkPdfNativeTokenizer* SkPdfNativeDoc::tokenizerOfPage(int page, SkPdfAllocator* allocator) { if (fPages[page]->isContentsAStream(this)) { return tokenizerOfStream(fPages[page]->getContentsAsStream(this), allocator); } else { - // TODO(edisonn): NYI, we need to concatenate all streams in the array or make the tokenizer smart - // so we don't allocate new memory + // TODO(edisonn): NYI, we need to concatenate all streams in the array or + // make the tokenizer smart so we don't allocate new memory. return NULL; } } SkPdfNativeTokenizer* SkPdfNativeDoc::tokenizerOfStream(SkPdfNativeObject* stream, - SkPdfAllocator* allocator) { + SkPdfAllocator* allocator) { if (stream == NULL) { return NULL; } @@ -490,11 +498,8 @@ SkPdfNativeTokenizer* SkPdfNativeDoc::tokenizerOfStream(SkPdfNativeObject* strea return new SkPdfNativeTokenizer(stream, allocator, this); } -// TODO(edisonn): NYI SkPdfNativeTokenizer* SkPdfNativeDoc::tokenizerOfBuffer(const unsigned char* buffer, size_t len, - SkPdfAllocator* allocator) { - // warning does not track two calls in the same buffer! the buffer is updated! - // make a clean copy if needed! + SkPdfAllocator* allocator) { return new SkPdfNativeTokenizer(buffer, len, allocator, this); } @@ -510,9 +515,10 @@ SkPdfNativeObject* SkPdfNativeDoc::object(int i) { } if (fObjects[i].fObj == NULL) { - // TODO(edisonn): when we read the cross reference sections, store the start of the next object - // and fill fOffsetEnd fObjects[i].fObj = readObject(i); + // TODO(edisonn): For perf, when we read the cross reference sections, we should take + // advantage of the boundaries of known objects, to minimize the risk of just parsing a bad + // stream, and fail quickly, in case we default to sequential stream read. } return fObjects[i].fObj; @@ -525,21 +531,21 @@ const SkPdfMapper* SkPdfNativeDoc::mapper() const { SkPdfReal* SkPdfNativeDoc::createReal(double value) const { SkPdfNativeObject* obj = fAllocator->allocObject(); SkPdfNativeObject::makeReal(value, obj); - // TODO(edisonn): TRACK_FROM_CODE(obj); + TRACK_OBJECT_SRC(obj); return (SkPdfReal*)obj; } SkPdfInteger* SkPdfNativeDoc::createInteger(int value) const { SkPdfNativeObject* obj = fAllocator->allocObject(); SkPdfNativeObject::makeInteger(value, obj); - // TODO(edisonn): TRACK_FROM_CODE(obj); + TRACK_OBJECT_SRC(obj); return (SkPdfInteger*)obj; } SkPdfString* SkPdfNativeDoc::createString(const unsigned char* sz, size_t len) const { SkPdfNativeObject* obj = fAllocator->allocObject(); SkPdfNativeObject::makeString(sz, len, obj); - // TODO(edisonn): TRACK_FROM_CODE(obj); + TRACK_OBJECT_SRC(obj); return (SkPdfString*)obj; } @@ -547,8 +553,6 @@ SkPdfAllocator* SkPdfNativeDoc::allocator() const { return fAllocator; } -// TODO(edisonn): fix infinite loop if ref to itself! -// TODO(edisonn): perf, fix refs at load, and resolve will simply return fResolvedReference? SkPdfNativeObject* SkPdfNativeDoc::resolveReference(SkPdfNativeObject* ref) { if (ref && ref->isReference()) { int id = ref->referenceId(); @@ -557,24 +561,22 @@ SkPdfNativeObject* SkPdfNativeDoc::resolveReference(SkPdfNativeObject* ref) { // TODO(edisonn): verify id and gen expected if (id < 0 || id >= fObjects.count()) { - SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, "resolve reference id out of bounds", NULL, NULL); + SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, + "resolve reference id out of bounds", NULL, NULL); return NULL; } if (fObjects[id].fIsReferenceResolved) { - -#ifdef PDF_TRACE - printf("\nresolve(%s) = %s\n", ref->toString(0).c_str(), fObjects[id].fResolvedReference->toString(0, ref->toString().size() + 13).c_str()); -#endif - - SkPdfReportIf(!fObjects[id].fResolvedReference, kIgnoreError_SkPdfIssueSeverity, kBadReference_SkPdfIssue, "ref is NULL", NULL, NULL); + SkPdfReportIf(!fObjects[id].fResolvedReference, kIgnoreError_SkPdfIssueSeverity, + kBadReference_SkPdfIssue, "ref is NULL", NULL, NULL); return fObjects[id].fResolvedReference; } - // TODO(edisonn): there are pdfs in the crashing suite that cause a stack overflow here unless we check for resolved reference on next line - // determine if the pdf is corrupted, or we have a bug here + // TODO(edisonn): there are pdfs in the crashing suite that cause a stack overflow + // here unless we check for resolved reference on next line. + // Determine if the pdf is corrupted, or we have a bug here. - // avoids recursive calls + // Avoids recursive calls fObjects[id].fIsReferenceResolved = true; if (fObjects[id].fObj == NULL) { @@ -589,13 +591,9 @@ SkPdfNativeObject* SkPdfNativeDoc::resolveReference(SkPdfNativeObject* ref) { } } -#ifdef PDF_TRACE - printf("\nresolve(%s) = %s\n", ref->toString(0).c_str(), fObjects[id].fResolvedReference->toString(0, ref->toString().size() + 13).c_str()); -#endif return fObjects[id].fResolvedReference; } - // TODO(edisonn): fix the mess with const, probably we need to remove it pretty much everywhere return (SkPdfNativeObject*)ref; } diff --git a/experimental/PdfViewer/pdfparser/native/SkPdfNativeDoc.h b/experimental/PdfViewer/pdfparser/native/SkPdfNativeDoc.h index 411343b56c..04456cc678 100644 --- a/experimental/PdfViewer/pdfparser/native/SkPdfNativeDoc.h +++ b/experimental/PdfViewer/pdfparser/native/SkPdfNativeDoc.h @@ -28,24 +28,25 @@ class SkPdfNativeTokenizer; class SkStream; +// TODO(edisonn): Implement a smart stream that can seek, and that can also fall back to reading +// the bytes in order. For example, we can try to read the stream optimistically, but if there +// are issues in the pdf, we must read the pdf from the beginning, and fix whatever errors we can. +// This would be useful to show quickly page 100 in a pdf (www.example.com/foo.pdf#page100) +// But if the pdf is missing the xref, then we will have to read most of pdf to be able to render +// page 100. + class SkPdfNativeDoc { private: struct PublicObjectEntry { long fOffset; - // long endOffset; // TODO(edisonn): determine the end of the object, to be used when the doc is corrupted + // long endOffset; // TODO(edisonn): determine the end of the object, + // to be used when the doc is corrupted, for fast failure. SkPdfNativeObject* fObj; - // TODO(edisonn): perf ... probably it does not make sense to cache the ref. test it! SkPdfNativeObject* fResolvedReference; bool fIsReferenceResolved; }; public: - // TODO(edisonn): read methods: file, stream, http(s)://url, url with seek? - // TODO(edisonn): read first page asap, linearized - // TODO(edisonn): read page N asap, read all file - // TODO(edisonn): allow corruptions of file (e.g. missing endobj, missing stream length, ...) - // TODO(edisonn): encryption - SkPdfNativeDoc(const char* path); SkPdfNativeDoc(SkStream* stream); @@ -83,10 +84,13 @@ private: void init(const void* bytes, size_t length); void loadWithoutXRef(); - const unsigned char* readCrossReferenceSection(const unsigned char* xrefStart, const unsigned char* trailerEnd); - const unsigned char* readTrailer(const unsigned char* trailerStart, const unsigned char* trailerEnd, bool storeCatalog, long* prev, bool skipKeyword); + const unsigned char* readCrossReferenceSection(const unsigned char* xrefStart, + const unsigned char* trailerEnd); + const unsigned char* readTrailer(const unsigned char* trailerStart, + const unsigned char* trailerEnd, + bool storeCatalog, long* prev, bool skipKeyword); - // TODO(edisonn): updates not supported right now, generation ignored + // TODO(edisonn): pdfs with updates not supported right now, generation ignored. void addCrossSectionInfo(int id, int generation, int offset, bool isFreed); static void reset(PublicObjectEntry* obj) { obj->fObj = NULL; @@ -99,7 +103,6 @@ private: void fillPages(SkPdfPageTreeNodeDictionary* tree); - // private fields SkPdfAllocator* fAllocator; SkPdfMapper* fMapper; const unsigned char* fFileContent; diff --git a/experimental/PdfViewer/pdfparser/native/SkPdfNativeObject.cpp b/experimental/PdfViewer/pdfparser/native/SkPdfNativeObject.cpp index 328d6fea25..a1d5df049f 100644 --- a/experimental/PdfViewer/pdfparser/native/SkPdfNativeObject.cpp +++ b/experimental/PdfViewer/pdfparser/native/SkPdfNativeObject.cpp @@ -7,24 +7,24 @@ #include "SkPdfNativeObject.h" -// TODO(edisonn): mac builder does not find the header ... but from headers is ok -//#include "SkPdfStreamCommonDictionary_autogen.h" -#include "SkPdfHeaders_autogen.h" - +#include "SkBitmap.h" #include "SkFlate.h" -#include "SkStream.h" +#include "SkPdfFont.h" #include "SkPdfNativeTokenizer.h" +#include "SkPdfReporter.h" +#include "SkStream.h" -#include "SkBitmap.h" -#include "SkPdfFont.h" +// TODO(edisonn): mac builder does not find the header ... but from headers is ok +//#include "SkPdfStreamCommonDictionary_autogen.h" +#include "SkPdfHeaders_autogen.h" -#include "SkPdfReporter.h" SkPdfNativeObject SkPdfNativeObject::kNull = SkPdfNativeObject::makeNull(); bool SkPdfNativeObject::applyFlateDecodeFilter() { if (!SkFlate::HaveFlate()) { - SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kNoFlateLibrary_SkPdfIssue, "forgot to link with flate library?", NULL, NULL); + SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kNoFlateLibrary_SkPdfIssue, + "forgot to link with flate library?", NULL, NULL); return false; } @@ -35,7 +35,8 @@ bool SkPdfNativeObject::applyFlateDecodeFilter() { SkDynamicMemoryWStream uncompressedData; if (SkFlate::Inflate(&skstream, &uncompressedData)) { - fStr.fBytes = (uncompressedData.bytesWritten() << 2) + kOwnedStreamBit + kUnfilteredStreamBit; + fStr.fBytes = (uncompressedData.bytesWritten() << 2) + kOwnedStreamBit + + kUnfilteredStreamBit; fStr.fBuffer = (const unsigned char*)new unsigned char[uncompressedData.bytesWritten()]; uncompressedData.copyTo((void*)fStr.fBuffer); @@ -51,9 +52,9 @@ bool SkPdfNativeObject::applyFlateDecodeFilter() { } bool SkPdfNativeObject::applyDCTDecodeFilter() { - // this would fail, and it won't allow any more filters. - // technically, it would be possible, but not a real world scenario - // TODO(edisonn): or get the image here and store it for fast retrieval? + // applyDCTDecodeFilter will fail, and it won't allow any more filters. + // technically, it would be possible, but not a real world scenario. + // in this way we create the image from the DCT stream directly. return false; } @@ -63,7 +64,8 @@ bool SkPdfNativeObject::applyFilter(const char* name) { } else if (strcmp(name, "DCTDecode") == 0) { return applyDCTDecodeFilter(); } - SkPdfReport(kCodeWarning_SkPdfIssueSeverity, kNYI_SkPdfIssue, "filter not supported", this, NULL); + SkPdfReport(kCodeWarning_SkPdfIssueSeverity, kNYI_SkPdfIssue, "filter not supported", this, + NULL); return false; } @@ -71,7 +73,8 @@ bool SkPdfNativeObject::filterStream() { SkPdfMarkObjectUsed(); if (!hasStream()) { - SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kBadStream_SkPdfIssue, "No Stream", this, NULL); + SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kBadStream_SkPdfIssue, "No Stream", this, + NULL); return false; } @@ -96,7 +99,8 @@ bool SkPdfNativeObject::filterStream() { break; } } else { - SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kIncositentSyntax_SkPdfIssue, "filter name should be a Name", this, NULL); + SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kIncositentSyntax_SkPdfIssue, + "filter name should be a Name", this, NULL); } } } @@ -106,7 +110,8 @@ bool SkPdfNativeObject::filterStream() { void SkPdfNativeObject::releaseData() { #ifdef PDF_TRACK_OBJECT_USAGE - SkPdfReportIf(!fUsed, kInfo_SkPdfIssueSeverity, kNoIssue_SkPdfIssue, "Unused object in rendering", this, NULL); + SkPdfReportIf(!fUsed, kInfo_SkPdfIssueSeverity, kNoIssue_SkPdfIssue, + "Unused object in rendering", this, NULL); #endif // PDF_TRACK_OBJECT_USAGE SkPdfMarkObjectUnused(); diff --git a/experimental/PdfViewer/pdfparser/native/SkPdfNativeObject.h b/experimental/PdfViewer/pdfparser/native/SkPdfNativeObject.h index 8c6bf28176..b9d5d03bed 100644 --- a/experimental/PdfViewer/pdfparser/native/SkPdfNativeObject.h +++ b/experimental/PdfViewer/pdfparser/native/SkPdfNativeObject.h @@ -10,27 +10,22 @@ #include <stdint.h> #include <string.h> -#include "SkString.h" -#include "SkTDArray.h" -#include "SkTDict.h" -#include "SkRect.h" -#include "SkMatrix.h" -#include "SkString.h" -#include "SkPdfNYI.h" +#include "SkMatrix.h" #include "SkPdfConfig.h" -#include "SkPdfUtils.h" - #include "SkPdfNativeTokenizer.h" +#include "SkPdfNYI.h" +#include "SkPdfUtils.h" +#include "SkRect.h" +#include "SkString.h" +#include "SkTDArray.h" +#include "SkTDict.h" class SkPdfDictionary; class SkPdfStream; class SkPdfAllocator; -// TODO(edisonn): macro it and move it to utils -SkMatrix SkMatrixFromPdfMatrix(double array[6]); - - +// TODO(edisonn): remove these constants and clean up the code. #define kFilteredStreamBit 0 #define kUnfilteredStreamBit 1 #define kOwnedStreamBit 2 @@ -56,11 +51,10 @@ class SkPdfNativeObject { kDictionary_PdfObjectType = 1 << 11, kNull_PdfObjectType = 1 << 12, - // TODO(edisonn): after the pdf has been loaded completely, resolve all references - // try the same thing with delayed loaded ... kReference_PdfObjectType = 1 << 13, - kUndefined_PdfObjectType = 1 << 14, // per 1.4 spec, if the same key appear twice in the dictionary, the value is undefined + kUndefined_PdfObjectType = 1 << 14, // per 1.4 spec, if the same key appear twice in the + // dictionary, the value is undefined. _kObject_PdfObjectType = -1, }; @@ -72,33 +66,25 @@ class SkPdfNativeObject { }; private: - // TODO(edisonn): assert reset operations while in rendering! + // TODO(edisonn): assert reset operations while in rendering! The objects should be reset + // only when rendering is completed. uint32_t fInRendering : 1; uint32_t fUnused : 31; - struct Reference { unsigned int fId; unsigned int fGen; }; - // TODO(edisonn): add stream start, stream end, where stream is weither the file - // or decoded/filtered pdf stream - - // TODO(edisonn): add warning/report per object - // TODO(edisonn): add flag fUsed, to be used once the parsing is complete, - // so we could show what parts have been proccessed, ignored, or generated errors - ObjectType fObjectType; union { bool fBooleanValue; int64_t fIntegerValue; - // TODO(edisonn): double, float? typedefed + // TODO(edisonn): double, float, SkScalar? double fRealValue; NotOwnedString fStr; - // TODO(edisonn): make sure the foorprint of fArray and fMap is small, otherwise, use pointers, or classes with up to 8 bytes in footprint SkTDArray<SkPdfNativeObject*>* fArray; Reference fRef; }; @@ -108,7 +94,6 @@ private: void* fData; DataType fDataType; - // Keep this the last entries #ifdef PDF_TRACK_OBJECT_USAGE mutable bool fUsed; @@ -167,7 +152,7 @@ public: void releaseData(); // ~SkPdfNativeObject() { -// //reset(); must be called manually! +// //reset(); must be called manually! Normally, will be called by allocator destructor. // } void reset() { @@ -211,7 +196,7 @@ public: return (const char*)fStr.fBuffer; default: - // TODO(edisonn): report/warning + // TODO(edisonn): report/warning/assert? return NULL; } } @@ -227,7 +212,7 @@ public: return fStr.fBytes; default: - // TODO(edisonn): report/warning + // TODO(edisonn): report/warning/assert? return 0; } } @@ -257,9 +242,6 @@ public: return nyi; } - // TODO(edisonn) impl store - //STORE_TRACK_PARAMETERS(obj); - static void makeBoolean(bool value, SkPdfNativeObject* obj) { SkASSERT(obj->fObjectType == kInvalid_PdfObjectType); @@ -305,7 +287,8 @@ public: static SkPdfNativeObject kNull; - static void makeNumeric(const unsigned char* start, const unsigned char* end, SkPdfNativeObject* obj) { + static void makeNumeric(const unsigned char* start, const unsigned char* end, + SkPdfNativeObject* obj) { SkASSERT(obj->fObjectType == kInvalid_PdfObjectType); // TODO(edisonn): NYI properly @@ -343,7 +326,8 @@ public: makeStringCore(start, strlen((const char*)start), obj, kString_PdfObjectType); } - static void makeString(const unsigned char* start, const unsigned char* end, SkPdfNativeObject* obj) { + static void makeString(const unsigned char* start, const unsigned char* end, + SkPdfNativeObject* obj) { makeStringCore(start, end - start, obj, kString_PdfObjectType); } @@ -356,7 +340,8 @@ public: makeStringCore(start, strlen((const char*)start), obj, kHexString_PdfObjectType); } - static void makeHexString(const unsigned char* start, const unsigned char* end, SkPdfNativeObject* obj) { + static void makeHexString(const unsigned char* start, const unsigned char* end, + SkPdfNativeObject* obj) { makeStringCore(start, end - start, obj, kHexString_PdfObjectType); } @@ -369,7 +354,8 @@ public: makeStringCore(start, strlen((const char*)start), obj, kName_PdfObjectType); } - static void makeName(const unsigned char* start, const unsigned char* end, SkPdfNativeObject* obj) { + static void makeName(const unsigned char* start, const unsigned char* end, + SkPdfNativeObject* obj) { makeStringCore(start, end - start, obj, kName_PdfObjectType); } @@ -382,7 +368,8 @@ public: makeStringCore(start, strlen((const char*)start), obj, kKeyword_PdfObjectType); } - static void makeKeyword(const unsigned char* start, const unsigned char* end, SkPdfNativeObject* obj) { + static void makeKeyword(const unsigned char* start, const unsigned char* end, + SkPdfNativeObject* obj) { makeStringCore(start, end - start, obj, kKeyword_PdfObjectType); } @@ -390,21 +377,17 @@ public: makeStringCore(start, bytes, obj, kKeyword_PdfObjectType); } - - - // TODO(edisonn): make the functions to return SkPdfArray, move these functions in SkPdfArray static void makeEmptyArray(SkPdfNativeObject* obj) { SkASSERT(obj->fObjectType == kInvalid_PdfObjectType); obj->fObjectType = kArray_PdfObjectType; obj->fArray = new SkTDArray<SkPdfNativeObject*>(); - // return (SkPdfArray*)obj; } bool appendInArray(SkPdfNativeObject* obj) { SkASSERT(fObjectType == kArray_PdfObjectType); if (fObjectType != kArray_PdfObjectType) { - // TODO(edisonn): report err + // TODO(edisonn): report/warning/assert? return false; } @@ -429,7 +412,7 @@ public: } SkPdfNativeObject* removeLastInArray() { - // SkPdfMarkObjectUsed(); + SkPdfMarkObjectUsed(); SkASSERT(fObjectType == kArray_PdfObjectType); @@ -439,7 +422,6 @@ public: return ret; } - const SkPdfNativeObject* objAtAIndex(int i) const { SkPdfMarkObjectUsed(); @@ -449,6 +431,8 @@ public: } SkPdfNativeObject* operator[](int i) { + SkPdfMarkObjectUsed(); + SkASSERT(fObjectType == kArray_PdfObjectType); return (*fArray)[i]; @@ -462,8 +446,6 @@ public: return (*fArray)[i]; } - - // TODO(edisonn): make the functions to return SkPdfDictionary, move these functions in SkPdfDictionary static void makeEmptyDictionary(SkPdfNativeObject* obj) { SkASSERT(obj->fObjectType == kInvalid_PdfObjectType); @@ -473,43 +455,40 @@ public: obj->fStr.fBytes = 0; } - // TODO(edisonn): get all the possible names from spec, and compute a hash function + // TODO(edisonn): perf: get all the possible names from spec, and compute a hash function // that would create no overlaps in the same dictionary // or build a tree of chars that when followed goes to a unique id/index/hash // TODO(edisonn): generate constants like kDictFoo, kNameDict_name // which will be used in code // add function SkPdfFastNameKey key(const char* key); - // TODO(edisonn): setting the same key twike, will make the value undefined! + // TODO(edisonn): setting the same key twice, will make the value undefined! bool set(const SkPdfNativeObject* key, SkPdfNativeObject* value) { - //SkPdfMarkObjectUsed(); + SkPdfMarkObjectUsed(); SkASSERT(fObjectType == kDictionary_PdfObjectType); SkASSERT(key->fObjectType == kName_PdfObjectType); if (key->fObjectType != kName_PdfObjectType || fObjectType != kDictionary_PdfObjectType) { - // TODO(edisonn): report err + // TODO(edisonn): report/warn/assert? return false; } - //// we rewrite all delimiters and white spaces with '\0', so we expect the end of name to be '\0' - //SkASSERT(key->fStr.fBuffer[key->fStr.fBytes] == '\0'); - return set(key->fStr.fBuffer, key->fStr.fBytes, value); } bool set(const char* key, SkPdfNativeObject* value) { - //SkPdfMarkObjectUsed(); + SkPdfMarkObjectUsed(); return set((const unsigned char*)key, strlen(key), value); } bool set(const unsigned char* key, size_t len, SkPdfNativeObject* value) { - //SkPdfMarkObjectUsed(); + SkPdfMarkObjectUsed(); SkASSERT(fObjectType == kDictionary_PdfObjectType); if (fObjectType != kDictionary_PdfObjectType) { - // TODO(edisonn): report err + // TODO(edisonn): report/warn/assert. return false; } @@ -523,12 +502,10 @@ public: SkASSERT(key->fObjectType == kName_PdfObjectType); if (key->fObjectType != kName_PdfObjectType || fObjectType != kDictionary_PdfObjectType) { - // TODO(edisonn): report err + // TODO(edisonn): report/warn/assert. return NULL; } - //SkASSERT(key->fStr.fBuffer[key->fStr.fBytes] == '\0'); - return get(key->fStr.fBuffer, key->fStr.fBytes); } @@ -544,7 +521,7 @@ public: SkASSERT(fObjectType == kDictionary_PdfObjectType); SkASSERT(key); if (fObjectType != kDictionary_PdfObjectType) { - // TODO(edisonn): report err + // TODO(edisonn): report/warn/assert. return NULL; } SkPdfNativeObject* ret = NULL; @@ -553,7 +530,8 @@ public: #ifdef PDF_TRACE SkString _key; _key.append((const char*)key, len); - printf("\nget(/%s) = %s\n", _key.c_str(), ret ? ret->toString(0, len + 9).c_str() : "_NOT_FOUND"); + printf("\nget(/%s) = %s\n", _key.c_str(), + ret ? ret->toString(0, len + 9).c_str() : "_NOT_FOUND"); #endif return ret; @@ -566,12 +544,10 @@ public: SkASSERT(key->fObjectType == kName_PdfObjectType); if (key->fObjectType != kName_PdfObjectType || fObjectType != kDictionary_PdfObjectType) { - // TODO(edisonn): report err + // TODO(edisonn): report/warn/assert. return NULL; } - //SkASSERT(key->fStr.fBuffer[key->fStr.fBytes] == '\0'); - return get(key->fStr.fBuffer, key->fStr.fBytes); } @@ -587,7 +563,7 @@ public: SkASSERT(fObjectType == kDictionary_PdfObjectType); SkASSERT(key); if (fObjectType != kDictionary_PdfObjectType) { - // TODO(edisonn): report err + // TODO(edisonn): report/warn/assert. return NULL; } SkPdfNativeObject* ret = NULL; @@ -596,7 +572,8 @@ public: #ifdef PDF_TRACE SkString _key; _key.append((const char*)key, len); - printf("\nget(/%s) = %s\n", _key.c_str(), ret ? ret->toString(0, len + 9).c_str() : "_NOT_FOUND"); + printf("\nget(/%s) = %s\n", _key.c_str(), + ret ? ret->toString(0, len + 9).c_str() : "_NOT_FOUND"); #endif return ret; @@ -606,8 +583,7 @@ public: SkPdfMarkObjectUsed(); const SkPdfNativeObject* ret = get(key); - // TODO(edisonn): / is a valid name, and it might be an abreviation, so "" should not be like NULL - // make this distiontion in generator, and remove "" from condition + // TODO(edisonn): remove || *abr == '\0' and pass NULL in the _autogen files instead. if (ret != NULL || abr == NULL || *abr == '\0') { return ret; } @@ -618,8 +594,7 @@ public: SkPdfMarkObjectUsed(); SkPdfNativeObject* ret = get(key); - // TODO(edisonn): / is a valid name, and it might be an abreviation, so "" should not be like NULL - // make this distiontion in generator, and remove "" from condition + // TODO(edisonn): remove || *abr == '\0' and pass NULL in the _autogen files instead. if (ret != NULL || abr == NULL || *abr == '\0') { return ret; } @@ -716,7 +691,9 @@ public: bool isName(const char* name) const { SkPdfMarkObjectUsed(); - return fObjectType == kName_PdfObjectType && fStr.fBytes == strlen(name) && strncmp((const char*)fStr.fBuffer, name, fStr.fBytes) == 0; + return fObjectType == kName_PdfObjectType && + fStr.fBytes == strlen(name) && + strncmp((const char*)fStr.fBuffer, name, fStr.fBytes) == 0; } bool isArray() const { @@ -746,7 +723,8 @@ public: bool isRectangle() const { SkPdfMarkObjectUsed(); - return fObjectType == kArray_PdfObjectType && fArray->count() == 4; // NYI + and elems are numbers + // TODO(edisonn): add also that each of these 4 objects are numbers. + return fObjectType == kArray_PdfObjectType && fArray->count() == 4; } // TODO(edisonn): has stream .. or is stream ... TBD @@ -784,7 +762,8 @@ public: bool isMatrix() const { SkPdfMarkObjectUsed(); - return fObjectType == kArray_PdfObjectType && fArray->count() == 6; // NYI + and elems are numbers + // TODO(edisonn): add also that each of these 6 objects are numbers. + return fObjectType == kArray_PdfObjectType && fArray->count() == 6; } inline int64_t intValue() const { @@ -793,7 +772,7 @@ public: SkASSERT(fObjectType == kInteger_PdfObjectType); if (fObjectType != kInteger_PdfObjectType) { - // TODO(edisonn): log err + // TODO(edisonn): report/warn/assert. return 0; } return fIntegerValue; @@ -805,7 +784,7 @@ private: SkASSERT(fObjectType == kReal_PdfObjectType); if (fObjectType != kReal_PdfObjectType) { - // TODO(edisonn): log err + // TODO(edisonn): report/warn/assert. return 0; } return fRealValue; @@ -817,7 +796,7 @@ public: SkASSERT(isNumber()); if (!isNumber()) { - // TODO(edisonn): log err + // TODO(edisonn): report/warn/assert. return 0; } return fObjectType == kReal_PdfObjectType ? fRealValue : fIntegerValue; @@ -829,7 +808,7 @@ public: SkASSERT(isNumber()); if (!isNumber()) { - // TODO(edisonn): log err + // TODO(edisonn): report/warn/assert. return SkIntToScalar(0); } return fObjectType == kReal_PdfObjectType ? SkDoubleToScalar(fRealValue) : @@ -856,7 +835,7 @@ public: SkASSERT(fObjectType == kName_PdfObjectType); if (fObjectType != kName_PdfObjectType) { - // TODO(edisonn): log err + // TODO(edisonn): report/warn/assert. return ""; } return (const char*)fStr.fBuffer; @@ -868,7 +847,7 @@ public: SkASSERT(fObjectType == kString_PdfObjectType || fObjectType == kHexString_PdfObjectType); if (fObjectType != kString_PdfObjectType && fObjectType != kHexString_PdfObjectType) { - // TODO(edisonn): log err + // TODO(edisonn): report/warn/assert. return ""; } return (const char*)fStr.fBuffer; @@ -911,7 +890,7 @@ public: SkASSERT(fObjectType == kString_PdfObjectType || fObjectType == kHexString_PdfObjectType); if (fObjectType != kString_PdfObjectType && fObjectType != kHexString_PdfObjectType) { - // TODO(edisonn): log err + // TODO(edisonn): report/warn/assert. return SkString(); } return SkString((const char*)fStr.fBuffer, fStr.fBytes); @@ -923,7 +902,7 @@ public: SkASSERT(fObjectType == kBoolean_PdfObjectType); if (fObjectType != kBoolean_PdfObjectType) { - // TODO(edisonn): log err + // TODO(edisonn): report/warn/assert. return false; } return fBooleanValue; @@ -942,7 +921,7 @@ public: // TODO(edisonn): version where we could resolve references? const SkPdfNativeObject* elem = objAtAIndex(i); if (elem == NULL || !elem->isNumber()) { - // TODO(edisonn): report error + // TODO(edisonn): report/warn/assert. return SkRect::MakeEmpty(); } array[i] = elem->numberValue(); @@ -967,7 +946,7 @@ public: // TODO(edisonn): version where we could resolve references? const SkPdfNativeObject* elem = objAtAIndex(i); if (elem == NULL || !elem->isNumber()) { - // TODO(edisonn): report error + // TODO(edisonn): report/warn/assert. return SkMatrix::I(); } array[i] = elem->numberValue(); @@ -982,7 +961,8 @@ public: bool GetFilteredStreamRef(unsigned char const** buffer, size_t* len) { SkPdfMarkObjectUsed(); - // TODO(edisonn): add params that couls let the last filter in place if it is jpeg or png to fast load images + // TODO(edisonn): add params that could let the last filter in place + // if it is jpeg or png to fast load images. if (!hasStream()) { return false; } @@ -994,7 +974,7 @@ public: } if (len) { - *len = fStr.fBytes >> 2; // last 2 bits + *len = fStr.fBytes >> 2; // last 2 bits - TODO(edisonn): clean up. } return true; @@ -1028,14 +1008,14 @@ public: } if (len) { - *len = fStr.fBytes >> 2; // remove last 2 bits + *len = fStr.fBytes >> 2; // remove last 2 bits - TODO(edisonn): clean up. } return true; } bool addStream(const unsigned char* buffer, size_t len) { - //SkPdfMarkObjectUsed(); + SkPdfMarkObjectUsed(); SkASSERT(!hasStream()); SkASSERT(isDictionary()); @@ -1142,7 +1122,8 @@ public: str.append("<<\n"); while ((key = iter.next(&obj)) != NULL) { appendSpaces(&str, level + 2); - str.appendf("/%s %s\n", key, obj->toString(0, level + strlen(key) + 4).c_str()); + str.appendf("/%s %s\n", key, + obj->toString(0, level + strlen(key) + 4).c_str()); } appendSpaces(&str, level); str.append(">>"); @@ -1181,16 +1162,18 @@ public: } private: - static void makeStringCore(const unsigned char* start, SkPdfNativeObject* obj, ObjectType type) { + static void makeStringCore(const unsigned char* start, SkPdfNativeObject* obj, + ObjectType type) { makeStringCore(start, strlen((const char*)start), obj, type); } - static void makeStringCore(const unsigned char* start, const unsigned char* end, SkPdfNativeObject* obj, ObjectType type) { + static void makeStringCore(const unsigned char* start, const unsigned char* end, + SkPdfNativeObject* obj, ObjectType type) { makeStringCore(start, end - start, obj, type); } - static void makeStringCore(const unsigned char* start, size_t bytes, SkPdfNativeObject* obj, ObjectType type) { - + static void makeStringCore(const unsigned char* start, size_t bytes, SkPdfNativeObject* obj, + ObjectType type) { SkASSERT(obj->fObjectType == kInvalid_PdfObjectType); obj->fObjectType = type; diff --git a/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp b/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp index aa8dae8bbb..79935bc954 100644 --- a/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp +++ b/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp @@ -5,9 +5,10 @@ * found in the LICENSE file. */ -#include "SkPdfNativeTokenizer.h" -#include "SkPdfNativeObject.h" #include "SkPdfConfig.h" +#include "SkPdfNativeObject.h" +#include "SkPdfNativeTokenizer.h" +#include "SkPdfUtils.h" // TODO(edisonn): mac builder does not find the header ... but from headers is ok //#include "SkPdfStreamCommonDictionary_autogen.h" @@ -15,8 +16,9 @@ #include "SkPdfHeaders_autogen.h" -// TODO(edisonn): perf!!! -// there could be 0s between start and end! but not in the needle. +// TODO(edisonn): Perf, Make this function run faster. +// There could be 0s between start and end. +// needle will not contain 0s. static char* strrstrk(char* hayStart, char* hayEnd, const char* needle) { int needleLen = strlen(needle); if ((isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) && @@ -28,7 +30,8 @@ static char* strrstrk(char* hayStart, char* hayEnd, const char* needle) { while (hayStart < hayEnd) { if (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart-1)) && - (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) && + (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || + (hayStart+needleLen == hayEnd)) && strncmp(hayStart, needle, needleLen) == 0) { return hayStart; } @@ -37,67 +40,22 @@ static char* strrstrk(char* hayStart, char* hayEnd, const char* needle) { return NULL; } -#ifdef PDF_TRACE_TOKENIZER - -static void TRACE_COMMENT(char ch) { - printf("%c", ch); -} - -static void TRACE_TK(char ch) { - printf("%c", ch); -} - -static void TRACE_NAME(const unsigned char* start, const unsigned char* end) { - while (start < end) { - printf("%c", *start); - start++; - } - printf("\n"); -} - -static void TRACE_STRING(const unsigned char* start, const unsigned char* end) { - while (start < end) { - printf("%c", *start); - start++; - } - printf("\n"); -} - -static void TRACE_HEXSTRING(const unsigned char* start, const unsigned char* end) { - while (start < end) { - printf("%c", *start); - start++; - } - printf("\n"); -} - -#else -#define TRACE_COMMENT(ch) -#define TRACE_TK(ch) -#define TRACE_NAME(start,end) -#define TRACE_STRING(start,end) -#define TRACE_HEXSTRING(start,end) -#endif - const unsigned char* skipPdfWhiteSpaces(const unsigned char* start, const unsigned char* end) { while (start < end && (isPdfWhiteSpace(*start) || *start == kComment_PdfDelimiter)) { TRACE_COMMENT(*start); if (*start == kComment_PdfDelimiter) { // skip the comment until end of line while (start < end && !isPdfEOL(*start)) { - //*start = '\0'; start++; TRACE_COMMENT(*start); } } else { - //*start = '\0'; start++; } } return start; } -// TODO(edisonn) '(' can be used, will it break the string a delimiter or space inside () ? const unsigned char* endOfPdfToken(const unsigned char* start, const unsigned char* end) { SkASSERT(!isPdfWhiteSpace(*start)); @@ -114,13 +72,15 @@ const unsigned char* endOfPdfToken(const unsigned char* start, const unsigned ch return start; } -// last elem has to be ] -static const unsigned char* readArray(const unsigned char* start, const unsigned char* end, SkPdfNativeObject* array, SkPdfAllocator* allocator, SkPdfNativeDoc* doc) { +// The parsing should end with a ]. +static const unsigned char* readArray(const unsigned char* start, const unsigned char* end, + SkPdfNativeObject* array, + SkPdfAllocator* allocator, SkPdfNativeDoc* doc) { SkPdfNativeObject::makeEmptyArray(array); // PUT_TRACK_STREAM(array, start, start) if (allocator == NULL) { - // TODO(edisonn): report/warning error + // TODO(edisonn): report/warning error/assert return end; } @@ -141,15 +101,17 @@ static const unsigned char* readArray(const unsigned char* start, const unsigned SkPdfNativeObject* newObj = allocator->allocObject(); start = nextObject(start, end, newObj, allocator, doc); - // TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array only when - // we are sure they are not references! - if (newObj->isKeywordReference() && array->size() >= 2 && array->objAtAIndex(array->size() - 1)->isInteger() && array->objAtAIndex(array->size() - 2)->isInteger()) { + // TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array + // only when we are sure they are not references! + if (newObj->isKeywordReference() && array->size() >= 2 && + array->objAtAIndex(array->size() - 1)->isInteger() && + array->objAtAIndex(array->size() - 2)->isInteger()) { SkPdfNativeObject* gen = array->removeLastInArray(); SkPdfNativeObject* id = array->removeLastInArray(); - SkPdfNativeObject::resetAndMakeReference((unsigned int)id->intValue(), (unsigned int)gen->intValue(), newObj); + SkPdfNativeObject::resetAndMakeReference((unsigned int)id->intValue(), + (unsigned int)gen->intValue(), newObj); // newObj PUT_TRACK_PARAMETERS_OBJ2(id, newObj) - store end, as now - } array->appendInArray(newObj); } @@ -159,12 +121,8 @@ static const unsigned char* readArray(const unsigned char* start, const unsigned return start; } -// When we read strings we will rewrite the string so we will reuse the memory -// when we start to read the string, we already consumed the opened bracket - -// TODO(edisonn): space: add paramater, taht would report if we need to allocate new buffer, or we can reuse the one we have - -static const unsigned char* readString(const unsigned char* start, const unsigned char* end, unsigned char* out) { +static const unsigned char* readString(const unsigned char* start, const unsigned char* end, + unsigned char* out) { const unsigned char* in = start; bool hasOut = (out != NULL); @@ -260,7 +218,7 @@ static const unsigned char* readString(const unsigned char* start, const unsigne break; default: - // Per spec, backslash is ignored is escaped ch is unknown + // Per spec, backslash is ignored if escaped ch is unknown in++; break; } @@ -268,9 +226,6 @@ static const unsigned char* readString(const unsigned char* start, const unsigne in++; } } else { - // TODO(edisonn): perf, avoid copy into itself, maybe first do a simple scan until found backslash ? - // we could have one look that first just inc current, and when we find the backslash - // we go to this loop if (hasOut) { *out = *in; } in++; out++; @@ -280,7 +235,8 @@ static const unsigned char* readString(const unsigned char* start, const unsigne if (hasOut) { return in; // consumed already ) at the end of the string } else { - return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string + // return where the string would end if we reuse the string + return start + (out - (const unsigned char*)NULL); } } @@ -288,12 +244,14 @@ static int readStringLength(const unsigned char* start, const unsigned char* end return readString(start, end, NULL) - start; } -static const unsigned char* readString(const unsigned char* start, const unsigned char* end, SkPdfNativeObject* str, SkPdfAllocator* allocator) { +static const unsigned char* readString(const unsigned char* start, const unsigned char* end, + SkPdfNativeObject* str, SkPdfAllocator* allocator) { if (!allocator) { + // TODO(edisonn): report error/warn/assert return end; } + int outLength = readStringLength(start, end); - // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer unsigned char* out = (unsigned char*)allocator->alloc(outLength); const unsigned char* now = readString(start, end, out); SkPdfNativeObject::makeString(out, out + outLength, str); @@ -302,7 +260,8 @@ static const unsigned char* readString(const unsigned char* start, const unsigne return now; // consumed already ) at the end of the string } -static const unsigned char* readHexString(const unsigned char* start, const unsigned char* end, unsigned char* out) { +static const unsigned char* readHexString(const unsigned char* start, const unsigned char* end, + unsigned char* out) { bool hasOut = (out != NULL); const unsigned char* in = start; @@ -314,7 +273,6 @@ static const unsigned char* readHexString(const unsigned char* start, const unsi } if (*in == kClosedInequityBracket_PdfDelimiter) { - //*in = '\0'; in++; // consume > // normal exit break; @@ -425,9 +383,10 @@ static const unsigned char* readHexString(const unsigned char* start, const unsi } if (hasOut) { - return in; // consumed already > at the end of the string + return in; // consumed already ) at the end of the string } else { - return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string + // return where the string would end if we reuse the string + return start + (out - (const unsigned char*)NULL); } } @@ -437,10 +396,10 @@ static int readHexStringLength(const unsigned char* start, const unsigned char* static const unsigned char* readHexString(const unsigned char* start, const unsigned char* end, SkPdfNativeObject* str, SkPdfAllocator* allocator) { if (!allocator) { + // TODO(edisonn): report error/warn/assert return end; } int outLength = readHexStringLength(start, end); - // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer unsigned char* out = (unsigned char*)allocator->alloc(outLength); const unsigned char* now = readHexString(start, end, out); SkPdfNativeObject::makeHexString(out, out + outLength, str); @@ -449,8 +408,9 @@ static const unsigned char* readHexString(const unsigned char* start, const unsi return now; // consumed already > at the end of the string } -// TODO(edisonn): before PDF 1.2 name could not have special characters, add version parameter -static const unsigned char* readName(const unsigned char* start, const unsigned char* end, unsigned char* out) { +// TODO(edisonn): add version parameter, before PDF 1.2 name could not have special characters. +static const unsigned char* readName(const unsigned char* start, const unsigned char* end, + unsigned char* out) { bool hasOut = (out != NULL); const unsigned char* in = start; @@ -550,9 +510,10 @@ static const unsigned char* readName(const unsigned char* start, const unsigned } if (hasOut) { - return in; + return in; // consumed already ) at the end of the string } else { - return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string + // return where the string would end if we reuse the string + return start + (out - (const unsigned char*)NULL); } } @@ -560,12 +521,13 @@ static int readNameLength(const unsigned char* start, const unsigned char* end) return readName(start, end, NULL) - start; } -static const unsigned char* readName(const unsigned char* start, const unsigned char* end, SkPdfNativeObject* name, SkPdfAllocator* allocator) { +static const unsigned char* readName(const unsigned char* start, const unsigned char* end, + SkPdfNativeObject* name, SkPdfAllocator* allocator) { if (!allocator) { + // TODO(edisonn): report error/warn/assert return end; } int outLength = readNameLength(start, end); - // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer unsigned char* out = (unsigned char*)allocator->alloc(outLength); const unsigned char* now = readName(start, end, out); SkPdfNativeObject::makeName(out, out + outLength, name); @@ -597,10 +559,15 @@ and it could get worse, with multiple object like this // right now implement the silly algorithm that assumes endstream is finishing the stream - -static const unsigned char* readStream(const unsigned char* start, const unsigned char* end, SkPdfNativeObject* dict, SkPdfNativeDoc* doc) { +static const unsigned char* readStream(const unsigned char* start, const unsigned char* end, + SkPdfNativeObject* dict, SkPdfNativeDoc* doc) { start = skipPdfWhiteSpaces(start, end); - if (!(start[0] == 's' && start[1] == 't' && start[2] == 'r' && start[3] == 'e' && start[4] == 'a' && start[5] == 'm')) { + if (!( start[0] == 's' && + start[1] == 't' && + start[2] == 'r' && + start[3] == 'e' && + start[4] == 'a' && + start[5] == 'm')) { // no stream. return. return start; } @@ -614,7 +581,6 @@ static const unsigned char* readStream(const unsigned char* start, const unsigne start += 1; } else { // TODO(edisonn): warn it should be isPdfDelimiter(start[0])) ? - // TODO(edisonn): warning? } SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict; @@ -626,9 +592,10 @@ static const unsigned char* readStream(const unsigned char* start, const unsigne length = stream->Length(doc); } - // TODO(edisonn): laod external streams - // TODO(edisonn): look at the last filter, to determione how to deal with possible issue - + // TODO(edisonn): load external streams + // TODO(edisonn): look at the last filter, to determine how to deal with possible parsing + // issues. The last filter can have special rules to terminate a stream, which we could + // use to determine end of stream. if (length >= 0) { const unsigned char* endstream = start + length; @@ -647,7 +614,8 @@ static const unsigned char* readStream(const unsigned char* start, const unsigne if (length < 0) { // scan the buffer, until we find first endstream // TODO(edisonn): all buffers must have a 0 at the end now, - const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end, "endstream"); + const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end, + "endstream"); if (endstream) { length = endstream - start; @@ -674,10 +642,19 @@ static const unsigned char* readStream(const unsigned char* start, const unsigne return start; } -static const unsigned char* readInlineImageStream(const unsigned char* start, const unsigned char* end, SkPdfImageDictionary* inlineImage, SkPdfNativeDoc* doc) { +static const unsigned char* readInlineImageStream(const unsigned char* start, + const unsigned char* end, + SkPdfImageDictionary* inlineImage, + SkPdfNativeDoc* doc) { // We already processed ID keyword, and we should be positioned immediately after it - // TODO(edisonn): security: read after end check, or make buffers with extra 2 bytes + // TODO(edisonn): security: either make all streams to have extra 2 bytes at the end, + // instead of this if. + //if (end - start <= 2) { + // // TODO(edisonn): warning? + // return end; // but can we have a pixel image encoded in 1-2 bytes? + //} + if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) { start += 2; } else if (start[0] == kLF_PdfWhiteSpace) { @@ -705,7 +682,9 @@ static const unsigned char* readInlineImageStream(const unsigned char* start, co return endEI; } -static const unsigned char* readDictionary(const unsigned char* start, const unsigned char* end, SkPdfNativeObject* dict, SkPdfAllocator* allocator, SkPdfNativeDoc* doc) { +static const unsigned char* readDictionary(const unsigned char* start, const unsigned char* end, + SkPdfNativeObject* dict, + SkPdfAllocator* allocator, SkPdfNativeDoc* doc) { if (allocator == NULL) { // TODO(edisonn): report/warning error return end; @@ -714,7 +693,7 @@ static const unsigned char* readDictionary(const unsigned char* start, const uns // PUT_TRACK_STREAM(dict, start, start) start = skipPdfWhiteSpaces(start, end); - SkPdfAllocator tmpStorage; // keys will be stored in dict, we can free them immediately after set. + SkPdfAllocator tmpStorage; // keys will be stored in dict, we can free them after set. while (start < end && *start == kNamed_PdfDelimiter) { SkPdfNativeObject key; @@ -730,7 +709,7 @@ static const unsigned char* readDictionary(const unsigned char* start, const uns start = skipPdfWhiteSpaces(start, end); if (start < end) { - // seems we have an indirect reference + // We should have an indirect reference if (isPdfDigit(*start)) { SkPdfNativeObject generation; start = nextObject(start, end, &generation, allocator, doc); @@ -738,13 +717,17 @@ static const unsigned char* readDictionary(const unsigned char* start, const uns SkPdfNativeObject keywordR; start = nextObject(start, end, &keywordR, allocator, doc); - if (value->isInteger() && generation.isInteger() && keywordR.isKeywordReference()) { + if (value->isInteger() && generation.isInteger() && + keywordR.isKeywordReference()) { int64_t id = value->intValue(); - SkPdfNativeObject::resetAndMakeReference((unsigned int)id, (unsigned int)generation.intValue(), value); + SkPdfNativeObject::resetAndMakeReference( + (unsigned int)id, + (unsigned int)generation.intValue(), + value); // PUT_TRACK_PARAMETERS_OBJ2(value, &generation) dict->set(&key, value); } else { - // error, ignore + // TODO(edisonn) error?, ignore it for now. dict->set(&key, value); } } else { @@ -763,19 +746,17 @@ static const unsigned char* readDictionary(const unsigned char* start, const uns } } - // TODO(edisonn): options to ignore these errors - // now we should expect >> start = skipPdfWhiteSpaces(start, end); if (*start != kClosedInequityBracket_PdfDelimiter) { // TODO(edisonn): report/warning } - //*start = '\0'; + start++; // skip > if (*start != kClosedInequityBracket_PdfDelimiter) { // TODO(edisonn): report/warning } - //*start = '\0'; + start++; // skip > //STORE_TRACK_PARAMETER_OFFSET_END(dict,start); @@ -785,7 +766,9 @@ static const unsigned char* readDictionary(const unsigned char* start, const uns return start; } -const unsigned char* nextObject(const unsigned char* start, const unsigned char* end, SkPdfNativeObject* token, SkPdfAllocator* allocator, SkPdfNativeDoc* doc) { +const unsigned char* nextObject(const unsigned char* start, const unsigned char* end, + SkPdfNativeObject* token, + SkPdfAllocator* allocator, SkPdfNativeDoc* doc) { const unsigned char* current; // skip white spaces @@ -808,17 +791,13 @@ const unsigned char* nextObject(const unsigned char* start, const unsigned char* // start array switch (*start) { case kOpenedSquareBracket_PdfDelimiter: - //*start = '\0'; return readArray(current, end, token, allocator, doc); case kOpenedRoundBracket_PdfDelimiter: - //*start = '\0'; return readString(start + 1, end, token, allocator); case kOpenedInequityBracket_PdfDelimiter: - //*start = '\0'; if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDelimiter) { - //start[1] = '\0'; // optional // TODO(edisonn): pass here the length somehow? return readDictionary(start + 2, end, token, allocator, doc); // skip << } else { @@ -826,10 +805,9 @@ const unsigned char* nextObject(const unsigned char* start, const unsigned char* } case kNamed_PdfDelimiter: - //*start = '\0'; return readName(start + 1, end, token, allocator); - // TODO(edisonn): what to do curly brackets? read spec! + // TODO(edisonn): what to do curly brackets? case kOpenedCurlyBracket_PdfDelimiter: default: break; @@ -837,7 +815,7 @@ const unsigned char* nextObject(const unsigned char* start, const unsigned char* SkASSERT(!isPdfWhiteSpace(*start)); if (isPdfDelimiter(*start)) { - // TODO(edisonn): how stream ] } > ) will be handled? + // TODO(edisonn): how unexpected stream ] } > ) will be handled? // for now ignore, and it will become a keyword to be ignored } } @@ -854,7 +832,12 @@ const unsigned char* nextObject(const unsigned char* start, const unsigned char* return current; } - if (tokenLen == 5 && start[0] == 'f' && start[1] == 'a' && start[2] == 'l' && start[3] == 's' && start[4] == 'e') { + // TODO(edisonn): again, make all buffers have 5 extra bytes + if (tokenLen == 5 && start[0] == 'f' && + start[1] == 'a' && + start[2] == 'l' && + start[3] == 's' && + start[4] == 'e') { SkPdfNativeObject::makeBoolean(false, token); // PUT_TRACK_STREAM(start, start + 5) return current; @@ -902,12 +885,23 @@ SkPdfNativeObject* SkPdfAllocator::allocObject() { return &fCurrent[fCurrentUsed - 1]; } -// TODO(edisonn): perf: do no copy the buffers, but use them, and mark cache the result, so there is no need of a second pass -SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream, SkPdfAllocator* allocator, SkPdfNativeDoc* doc) : fDoc(doc), fAllocator(allocator), fUncompressedStream(NULL), fUncompressedStreamEnd(NULL), fEmpty(false), fHasPutBack(false) { +// TODO(edisonn): perf: do no copy the buffers, but reuse them, and mark cache the result, +// so there is no need of a second pass +SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream, + SkPdfAllocator* allocator, + SkPdfNativeDoc* doc) + : fDoc(doc) + , fAllocator(allocator) + , fUncompressedStream(NULL) + , fUncompressedStreamEnd(NULL) + , fEmpty(false) + , fHasPutBack(false) { const unsigned char* buffer = NULL; size_t len = 0; objWithStream->GetFilteredStreamRef(&buffer, &len); - // TODO(edisonn): hack, find end of object + // TODO(edisonn): really bad hack, find end of object (endobj might be in a comment!) + // we need to do now for perf, and our generated pdfs do not have comments, + // but we need to remove this hack for pdfs in the wild char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj"); if (endobj) { len = endobj - (char*)buffer + strlen("endobj"); @@ -916,8 +910,15 @@ SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream, SkP fUncompressedStreamEnd = fUncompressedStream + len; } -SkPdfNativeTokenizer::SkPdfNativeTokenizer(const unsigned char* buffer, int len, SkPdfAllocator* allocator, SkPdfNativeDoc* doc) : fDoc(doc), fAllocator(allocator), fEmpty(false), fHasPutBack(false) { - // TODO(edisonn): hack, find end of object +SkPdfNativeTokenizer::SkPdfNativeTokenizer(const unsigned char* buffer, int len, + SkPdfAllocator* allocator, + SkPdfNativeDoc* doc) : fDoc(doc) + , fAllocator(allocator) + , fEmpty(false) + , fHasPutBack(false) { + // TODO(edisonn): really bad hack, find end of object (endobj might be in a comment!) + // we need to do now for perf, and our generated pdfs do not have comments, + // but we need to remove this hack for pdfs in the wild char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj"); if (endobj) { len = endobj - (char*)buffer + strlen("endobj"); @@ -930,10 +931,10 @@ SkPdfNativeTokenizer::~SkPdfNativeTokenizer() { } bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) { - SkPdfNativeObject obj; #ifdef PDF_TRACE_READ_TOKEN static int read_op = 0; #endif + token->fKeyword = NULL; token->fObject = NULL; @@ -942,10 +943,11 @@ bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) { return false; } + SkPdfNativeObject obj; fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, &obj, fAllocator, fDoc); // PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart) - // If it is a keyword, we will only get the pointer of the string + // If it is a keyword, we will only get the pointer of the string. if (obj.type() == SkPdfNativeObject::kKeyword_PdfObjectType) { token->fKeyword = obj.c_str(); token->fKeywordLength = obj.lenstr(); @@ -964,7 +966,9 @@ bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) { printf("break;\n"); } #endif - printf("%i READ %s %s\n", read_op, token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? SkString(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str()); + printf("%i READ %s %s\n", read_op, token->fType == kKeyword_TokenType ? "Keyword" : "Object", + token->fKeyword ? SkString(token->fKeyword, token->fKeywordLength).c_str() : + token->fObject->toString().c_str()); #endif return true; @@ -975,7 +979,9 @@ void SkPdfNativeTokenizer::PutBack(PdfToken token) { fHasPutBack = true; fPutBack = token; #ifdef PDF_TRACE_READ_TOKEN - printf("PUT_BACK %s %s\n", token.fType == kKeyword_TokenType ? "Keyword" : "Object", token.fKeyword ? SkString(token.fKeyword, token.fKeywordLength).c_str(): token.fObject->toString().c_str()); + printf("PUT_BACK %s %s\n", token.fType == kKeyword_TokenType ? "Keyword" : "Object", + token.fKeyword ? SkString(token.fKeyword, token.fKeywordLength).c_str() : + token.fObject->toString().c_str()); #endif } @@ -984,7 +990,9 @@ bool SkPdfNativeTokenizer::readToken(PdfToken* token) { *token = fPutBack; fHasPutBack = false; #ifdef PDF_TRACE_READ_TOKEN - printf("READ_BACK %s %s\n", token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? SkString(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str()); + printf("READ_BACK %s %s\n", token->fType == kKeyword_TokenType ? "Keyword" : "Object", + token->fKeyword ? SkString(token->fKeyword, token->fKeywordLength).c_str() : + token->fObject->toString().c_str()); #endif return true; } @@ -1009,7 +1017,7 @@ DECLARE_PDF_NAME(DecodeParms); DECLARE_PDF_NAME(Filter); DECLARE_PDF_NAME(Height); DECLARE_PDF_NAME(ImageMask); -DECLARE_PDF_NAME(Intent); // PDF 1.1 - the key, or the abreviations? +DECLARE_PDF_NAME(Intent); // PDF 1.1 - the key, or the abBreviations? DECLARE_PDF_NAME(Interpolate); DECLARE_PDF_NAME(Width); @@ -1079,21 +1087,26 @@ SkPdfImageDictionary* SkPdfNativeTokenizer::readInlineImage() { SkPdfImageDictionary* inlineImage = (SkPdfImageDictionary*)fAllocator->allocObject(); SkPdfNativeObject::makeEmptyDictionary(inlineImage); - // PUT_TRACK_STREAM_ARGS_EXPL(fStreamId, fUncompressedStream - fUncompressedStreamStart, fUncompressedStream - fUncompressedStreamStart) + // PUT_TRACK_STREAM_ARGS_EXPL(fStreamId, fUncompressedStream - fUncompressedStreamStart, + // fUncompressedStream - fUncompressedStreamStart) while (fUncompressedStream < fUncompressedStreamEnd) { SkPdfNativeObject* key = fAllocator->allocObject(); - fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, key, fAllocator, fDoc); + fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, key, + fAllocator, fDoc); // PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart)s - if (key->isKeyword() && key->lenstr() == 2 && key->c_str()[0] == 'I' && key->c_str()[1] == 'D') { // ID - fUncompressedStream = readInlineImageStream(fUncompressedStream, fUncompressedStreamEnd, inlineImage, fDoc); + if (key->isKeyword() && key->lenstr() == 2 && + key->c_str()[0] == 'I' && key->c_str()[1] == 'D') { // ID + fUncompressedStream = readInlineImageStream(fUncompressedStream, fUncompressedStreamEnd, + inlineImage, fDoc); return inlineImage; } else { SkPdfNativeObject* obj = fAllocator->allocObject(); - fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, obj, fAllocator, fDoc); + fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, obj, + fAllocator, fDoc); // PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart)s - // TODO(edisonn): perf maybe we should not expand abreviation like this + // TODO(edisonn): perf maybe we should not expand abBreviation like this inlineImage->set(inlineImageKeyAbbreviationExpand(key), inlineImageValueAbbreviationExpand(obj)); } |