diff options
author | edisonn@google.com <edisonn@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81> | 2013-07-10 17:09:50 +0000 |
---|---|---|
committer | edisonn@google.com <edisonn@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81> | 2013-07-10 17:09:50 +0000 |
commit | 571c70b95f56e22b5a7d6f4f288aa6c9a925a64f (patch) | |
tree | e0d3377d7e373350706d46722af8fe050abda9d3 /experimental/PdfViewer | |
parent | 89fa4b9ee6bc6039781acbdb6c097a41f894ea1c (diff) |
Native PDF parser implementation - don't try it on pdfs that are not generated by skia, Crome Print Preview or Chrome Save As Pdf - it will crash as mising xref, pdfs with updates, and other features are not supported yer.
Review URL: https://codereview.chromium.org/18323019
git-svn-id: http://skia.googlecode.com/svn/trunk@9962 2bbb7eff-a529-9590-31e7-b0007b416f81
Diffstat (limited to 'experimental/PdfViewer')
18 files changed, 3046 insertions, 712 deletions
diff --git a/experimental/PdfViewer/SkPdfBasics.h b/experimental/PdfViewer/SkPdfBasics.h index d20f34bdae..747c32a3d4 100644 --- a/experimental/PdfViewer/SkPdfBasics.h +++ b/experimental/PdfViewer/SkPdfBasics.h @@ -15,11 +15,12 @@ class SkPdfDoc; class SkPdfObject; class SkPdfResourceDictionary; -class SkPodofoParsedPDF; +class SkNativeParsedPDF; // TODO(edisonn): better class design. struct SkPdfColorOperator { - std::string fColorSpace; // TODO(edisonn): use SkString + // does not own the char* + const char* fColorSpace; // TODO(edisonn): use SkString, or even char* SkColor fColor; double fOpacity; // ca or CA // TODO(edisonn): add here other color space options. @@ -29,7 +30,7 @@ struct SkPdfColorOperator { fColor = color; } // TODO(edisonn): double check the default values for all fields. - SkPdfColorOperator() : fColor(SK_ColorBLACK), fOpacity(1) {} + SkPdfColorOperator() : fColorSpace(NULL), fColor(SK_ColorBLACK), fOpacity(1) {} void applyGraphicsState(SkPaint* paint) { paint->setColor(SkColorSetA(fColor, fOpacity * 255)); @@ -63,7 +64,7 @@ struct SkPdfGraphicsState { double fWordSpace; double fCharSpace; - const SkPdfResourceDictionary* fResources; + SkPdfResourceDictionary* fResources; SkBitmap fSMask; @@ -115,12 +116,12 @@ struct PdfContext { std::stack<SkPdfObject*> fObjectStack; std::stack<SkPdfGraphicsState> fStateStack; SkPdfGraphicsState fGraphicsState; - const SkPodofoParsedPDF* fPdfDoc; + SkNativeParsedPDF* fPdfDoc; SkMatrix fOriginalMatrix; SkPdfInlineImage fInlineImage; - PdfContext(const SkPodofoParsedPDF* doc) : fPdfDoc(doc) {} + PdfContext(SkNativeParsedPDF* doc) : fPdfDoc(doc) {} }; diff --git a/experimental/PdfViewer/SkPdfFont.cpp b/experimental/PdfViewer/SkPdfFont.cpp index 733ae8705a..2309f541af 100644 --- a/experimental/PdfViewer/SkPdfFont.cpp +++ b/experimental/PdfViewer/SkPdfFont.cpp @@ -3,7 +3,7 @@ #include "SkStream.h" #include "SkTypeface.h" -#include "SkPdfPodofoTokenizer.h" +#include "SkPdfNativeTokenizer.h" std::map<std::string, SkPdfStandardFontEntry>& getStandardFonts() { static std::map<std::string, SkPdfStandardFontEntry> gPdfStandardFonts; @@ -149,28 +149,28 @@ SkTypeface* SkTypefaceFromPdfStandardFont(const char* fontName, bool bold, bool return typeface; } -SkPdfFont* SkPdfFont::fontFromFontDescriptor(SkPdfFontDescriptorDictionary* fd, bool loadFromName) { - // TODO(edisonn): partial implementation +SkPdfFont* SkPdfFont::fontFromFontDescriptor(SkNativeParsedPDF* doc, SkPdfFontDescriptorDictionary* fd, bool loadFromName) { + // TODO(edisonn): partial implementation ... also const handling ... // Only one, at most be available SkPdfStream* pdfStream = NULL; if (fd->has_FontFile()) { - pdfStream = fd->FontFile(); + pdfStream = fd->FontFile(doc); } else if (fd->has_FontFile2()) { - pdfStream = fd->FontFile2(); + pdfStream = fd->FontFile2(doc); } if (fd->has_FontFile3()) { - pdfStream = fd->FontFile3(); + pdfStream = fd->FontFile3(doc); } else { if (loadFromName) { - return fontFromName(fd, fd->FontName().c_str()); + return fontFromName(doc, fd, fd->FontName(doc).c_str()); } } - char* uncompressedStream = NULL; - long uncompressedStreamLength = 0; + unsigned char* uncompressedStream = NULL; + size_t uncompressedStreamLength = 0; // TODO(edisonn): report warning to be used in testing. if (!pdfStream || - !pdfStream->GetFilteredCopy(&uncompressedStream, &uncompressedStreamLength) || + !pdfStream->GetFilteredStreamRef(&uncompressedStream, &uncompressedStreamLength, doc->allocator()) || !uncompressedStream || !uncompressedStreamLength) { return NULL; @@ -189,26 +189,32 @@ SkPdfFont* SkPdfFont::fontFromFontDescriptor(SkPdfFontDescriptorDictionary* fd, return new SkPdfStandardFont(face); } -SkPdfFont* fontFromName(SkPdfObject* obj, const char* fontName) { +SkPdfFont* fontFromName(SkNativeParsedPDF* doc, SkPdfObject* obj, const char* fontName) { SkTypeface* typeface = SkTypefaceFromPdfStandardFont(fontName, false, false); if (typeface != NULL) { return new SkPdfStandardFont(typeface); } // TODO(edisonn): perf - make a map - for (int i = 0 ; i < obj->doc()->objects(); i++) { - const SkPdfObject* podofoFont = obj->doc()->object(i); - SkPdfFontDescriptorDictionary* fd = NULL; - - if (obj->doc()->mapper()->mapFontDescriptorDictionary(podofoFont, &fd)) { - if (fd->has_FontName() && fd->FontName() == fontName) { - SkPdfFont* font = SkPdfFont::fontFromFontDescriptor(fd, false); - if (font) { - return font; - } else { - // failed to load font descriptor - break; - } + for (unsigned int i = 0 ; i < doc->objects(); i++) { + SkPdfObject* obj = doc->object(i); + if (!obj->isDictionary()) { + continue; + } + + SkPdfFontDescriptorDictionary* fd = obj->asDictionary()->asFontDescriptorDictionary(); + + if (!fd->valid()) { + continue; + } + + if (fd->has_FontName() && fd->FontName(doc) == fontName) { + SkPdfFont* font = SkPdfFont::fontFromFontDescriptor(doc, fd, false); + if (font) { + return font; + } else { + // failed to load font descriptor + break; } } } @@ -217,86 +223,106 @@ SkPdfFont* fontFromName(SkPdfObject* obj, const char* fontName) { return SkPdfFont::Default(); } -SkPdfFont* SkPdfFont::fontFromPdfDictionary(SkPdfFontDictionary* dict) { - if (dict == NULL) { - return NULL; // TODO(edisonn): report default one? - } - - switch (dict->getType()) { +SkPdfFont* SkPdfFont::fontFromPdfDictionaryOnce(SkNativeParsedPDF* doc, SkPdfFontDictionary* dict) { + // TODO(edisonn): keep the type in a smart way in the SkPdfObject + // 1) flag, isResolved (1bit): reset at reset, add/remove/update (array) and set(dict) + // in a tree like structure, 3-4 bits for all the datatypes inheriting from obj (int, real, ...) + // if is a dict, reserveve a few bytes to encode type of dict, and so on like in a tree + // issue: type can be determined from context! atribute night be missing/wrong + switch (doc->mapper()->mapFontDictionary(dict)) { case kType0FontDictionary_SkPdfObjectType: - return fontFromType0FontDictionary(dict->asType0FontDictionary()); + return fontFromType0FontDictionary(doc, dict->asType0FontDictionary()); case kTrueTypeFontDictionary_SkPdfObjectType: - return fontFromTrueTypeFontDictionary(dict->asTrueTypeFontDictionary()); + return fontFromTrueTypeFontDictionary(doc, dict->asTrueTypeFontDictionary()); case kType1FontDictionary_SkPdfObjectType: - return fontFromType1FontDictionary(dict->asType1FontDictionary()); + return fontFromType1FontDictionary(doc, dict->asType1FontDictionary()); case kMultiMasterFontDictionary_SkPdfObjectType: - return fontFromMultiMasterFontDictionary(dict->asMultiMasterFontDictionary()); + return fontFromMultiMasterFontDictionary(doc, dict->asMultiMasterFontDictionary()); case kType3FontDictionary_SkPdfObjectType: - return fontFromType3FontDictionary(dict->asType3FontDictionary()); + return fontFromType3FontDictionary(doc, dict->asType3FontDictionary()); + + default: + // TODO(edisonn): report error? + return NULL; } - return NULL; // TODO(edisonn): report error? } -SkPdfType0Font* SkPdfFont::fontFromType0FontDictionary(SkPdfType0FontDictionary* dict) { +SkPdfFont* SkPdfFont::fontFromPdfDictionary(SkNativeParsedPDF* doc, SkPdfFontDictionary* dict) { + if (dict == NULL) { + return NULL; // TODO(edisonn): report default one? + } + + if (dict->data() == NULL) { + dict->setData(fontFromPdfDictionaryOnce(doc, dict)); + } + return (SkPdfFont*)dict->data(); +} + + + +SkPdfType0Font* SkPdfFont::fontFromType0FontDictionary(SkNativeParsedPDF* doc, SkPdfType0FontDictionary* dict) { if (dict == NULL) { return NULL; // default one? } - return new SkPdfType0Font(dict); + return new SkPdfType0Font(doc, dict); } -SkPdfType1Font* SkPdfFont:: fontFromType1FontDictionary(SkPdfType1FontDictionary* dict) { +SkPdfType1Font* SkPdfFont:: fontFromType1FontDictionary(SkNativeParsedPDF* doc, SkPdfType1FontDictionary* dict) { if (dict == NULL) { return NULL; // default one? } - return new SkPdfType1Font(dict); + return new SkPdfType1Font(doc, dict); } -SkPdfType3Font* SkPdfFont::fontFromType3FontDictionary(SkPdfType3FontDictionary* dict) { +SkPdfType3Font* SkPdfFont::fontFromType3FontDictionary(SkNativeParsedPDF* doc, SkPdfType3FontDictionary* dict) { if (dict == NULL) { return NULL; // default one? } - return new SkPdfType3Font(dict); + return new SkPdfType3Font(doc, dict); } -SkPdfTrueTypeFont* SkPdfFont::fontFromTrueTypeFontDictionary(SkPdfTrueTypeFontDictionary* dict) { +SkPdfTrueTypeFont* SkPdfFont::fontFromTrueTypeFontDictionary(SkNativeParsedPDF* doc, SkPdfTrueTypeFontDictionary* dict) { if (dict == NULL) { return NULL; // default one? } - return new SkPdfTrueTypeFont(dict); + return new SkPdfTrueTypeFont(doc, dict); } -SkPdfMultiMasterFont* SkPdfFont::fontFromMultiMasterFontDictionary(SkPdfMultiMasterFontDictionary* dict) { +SkPdfMultiMasterFont* SkPdfFont::fontFromMultiMasterFontDictionary(SkNativeParsedPDF* doc, SkPdfMultiMasterFontDictionary* dict) { if (dict == NULL) { return NULL; // default one? } - return new SkPdfMultiMasterFont(dict); + return new SkPdfMultiMasterFont(doc, dict); } -static int skstoi(const SkPdfString* str) { +static int skstoi(const SkPdfObject* str) { + // TODO(edisonn): report err of it is not a (hex) string int ret = 0; - for (int i = 0 ; i < str->len(); i++) { + for (unsigned int i = 0 ; i < str->len(); i++) { ret = (ret << 8) + ((unsigned char*)str->c_str())[i]; } return ret; } -SkPdfToUnicode::SkPdfToUnicode(const SkPdfStream* stream) { +#define tokenIsKeyword(token,keyword) (token.fType == kKeyword_TokenType && token.fKeywordLength==sizeof(keyword)-1 && strncmp(token.fKeyword, keyword, sizeof(keyword)-1) == 0) + +SkPdfToUnicode::SkPdfToUnicode(SkNativeParsedPDF* parsed, SkPdfStream* stream) : fParsed(parsed) { fCMapEncoding = NULL; fCMapEncodingFlag = NULL; if (stream) { - SkPdfPodofoTokenizer* tokenizer = stream->doc()->tokenizerOfStream(stream); + SkPdfNativeTokenizer* tokenizer = fParsed->tokenizerOfStream(stream); PdfToken token; fCMapEncoding = new unsigned short[256 * 256]; @@ -314,48 +340,50 @@ SkPdfToUnicode::SkPdfToUnicode(const SkPdfStream* stream) { while (tokenizer->readToken(&token)) { - if (token.fType == kKeyword_TokenType && strcmp(token.fKeyword, "begincodespacerange") == 0) { - while (tokenizer->readToken(&token) && !(token.fType == kKeyword_TokenType && strcmp(token.fKeyword, "endcodespacerange") == 0)) { + // TODO(edisonn): perf, macro that would make equal first for token.fKeywordLength with sizeof(keyword), instead od strlen, make sure it is keyword, not a char* + if (tokenIsKeyword(token, "begincodespacerange")) { + while (tokenizer->readToken(&token) && !tokenIsKeyword(token, "endcodespacerange")) { // tokenizer->PutBack(token); // tokenizer->readToken(&token); // TODO(edisonn): check token type! ignore/report errors. - int start = skstoi(token.fObject->asString()); + int start = skstoi(token.fObject); tokenizer->readToken(&token); - int end = skstoi(token.fObject->asString()); + int end = skstoi(token.fObject); for (int i = start; i <= end; i++) { fCMapEncodingFlag[i] |= 1; } } } - if (token.fType == kKeyword_TokenType && strcmp(token.fKeyword, "beginbfchar") == 0) { - while (tokenizer->readToken(&token) && !(token.fType == kKeyword_TokenType && strcmp(token.fKeyword, "endbfchar") == 0)) { + if (tokenIsKeyword(token, "beginbfchar")) { + while (tokenizer->readToken(&token) && !tokenIsKeyword(token, "endbfchar")) { // tokenizer->PutBack(token); // tokenizer->readToken(&token); - int from = skstoi(token.fObject->asString()); + int from = skstoi(token.fObject); tokenizer->readToken(&token); - int to = skstoi(token.fObject->asString()); + int to = skstoi(token.fObject); fCMapEncodingFlag[from] |= 2; fCMapEncoding[from] = to; } } - if (token.fType == kKeyword_TokenType && strcmp(token.fKeyword, "beginbfrange") == 0) { - while (tokenizer->readToken(&token) && !(token.fType == kKeyword_TokenType && strcmp(token.fKeyword, "endbfrange") == 0)) { + if (tokenIsKeyword(token, "beginbfrange")) { + while (tokenizer->readToken(&token) && !tokenIsKeyword(token, "endbfrange")) { // tokenizer->PutBack(token); // tokenizer->readToken(&token); - int start = skstoi(token.fObject->asString()); + int start = skstoi(token.fObject); tokenizer->readToken(&token); - int end = skstoi(token.fObject->asString()); + int end = skstoi(token.fObject); tokenizer->readToken(&token); // [ or just an array directly? // tokenizer->PutBack(token); - if (token.fType == kObject_TokenType && token.fObject->asString()) { + // TODO(edisonn): read spec: any string or only hex string? + if (token.fType == kObject_TokenType && token.fObject->isAnyString()) { // tokenizer->readToken(&token); - int value = skstoi(token.fObject->asString()); + int value = skstoi(token.fObject); for (int i = start; i <= end; i++) { fCMapEncodingFlag[i] |= 2; @@ -365,11 +393,11 @@ SkPdfToUnicode::SkPdfToUnicode(const SkPdfStream* stream) { } // read one string - } else if (token.fType == kObject_TokenType && token.fObject->asArray()) { + } else if (token.fType == kObject_TokenType && token.fObject->isArray()) { // tokenizer->readToken(&token); - for (int i = 0; i < token.fObject->asArray()->size(); i++) { + for (unsigned int i = 0; i < token.fObject->size(); i++) { fCMapEncodingFlag[start + i] |= 2; - fCMapEncoding[start + i] = skstoi((*token.fObject->asArray())[i]->asString()); + fCMapEncoding[start + i] = skstoi((*token.fObject)[i]); } // read array } @@ -383,14 +411,14 @@ SkPdfToUnicode::SkPdfToUnicode(const SkPdfStream* stream) { } -SkPdfType0Font::SkPdfType0Font(SkPdfType0FontDictionary* dict) { - fBaseFont = fontFromName(dict, dict->BaseFont().c_str()); +SkPdfType0Font::SkPdfType0Font(SkNativeParsedPDF* doc, SkPdfType0FontDictionary* dict) { + fBaseFont = fontFromName(doc, dict, dict->BaseFont(doc).c_str()); fEncoding = NULL; if (dict->has_Encoding()) { - if (dict->isEncodingAName()) { - fEncoding = SkPdfEncoding::fromName(dict->getEncodingAsName().c_str()); - } else if (dict->isEncodingAStream()) { + if (dict->isEncodingAName(doc)) { + fEncoding = SkPdfEncoding::fromName(dict->getEncodingAsName(doc).c_str()); + } else if (dict->isEncodingAStream(doc)) { //fEncoding = loadEncodingFromStream(dict->getEncodingAsStream()); } else { // TODO(edisonn): error ... warning .. assert? @@ -398,7 +426,7 @@ SkPdfType0Font::SkPdfType0Font(SkPdfType0FontDictionary* dict) { } if (dict->has_ToUnicode()) { - fToUnicode = new SkPdfToUnicode(dict->ToUnicode()); + fToUnicode = new SkPdfToUnicode(doc, dict->ToUnicode(doc)); } } diff --git a/experimental/PdfViewer/SkPdfFont.h b/experimental/PdfViewer/SkPdfFont.h index 1e9d9ee82f..324f0f65b4 100644 --- a/experimental/PdfViewer/SkPdfFont.h +++ b/experimental/PdfViewer/SkPdfFont.h @@ -29,7 +29,7 @@ struct SkPdfStandardFontEntry { std::map<std::string, SkPdfStandardFontEntry>& getStandardFonts(); SkTypeface* SkTypefaceFromPdfStandardFont(const char* fontName, bool bold, bool italic); -SkPdfFont* fontFromName(SkPdfObject* obj, const char* fontName); +SkPdfFont* fontFromName(SkNativeParsedPDF* doc, SkPdfObject* obj, const char* fontName); struct SkUnencodedText { void* text; @@ -68,12 +68,13 @@ public: std::map<std::string, SkPdfEncoding*>& getStandardEncodings(); class SkPdfToUnicode { + SkNativeParsedPDF* fParsed; // TODO(edisonn): hide public members public: unsigned short* fCMapEncoding; unsigned char* fCMapEncodingFlag; - SkPdfToUnicode(const SkPdfStream* stream); + SkPdfToUnicode(SkNativeParsedPDF* parsed, SkPdfStream* stream); }; @@ -127,12 +128,12 @@ public: virtual bool decodeText(const SkUnencodedText& textIn, SkDecodedText* textOut) const { // TODO(edisonn): SkASSERT(textIn.len % 2 == 0); or report error? - unsigned char* text = (unsigned char*)textIn.text; - textOut->text = new uint16_t[textIn.len]; - textOut->len = textIn.len; + uint16_t* text = (uint16_t*)textIn.text; + textOut->text = new uint16_t[textIn.len / 2]; + textOut->len = textIn.len / 2; for (int i = 0; i < textOut->len; i++) { - textOut->text[i] = text[i]; + textOut->text[i] = ((text[i] << 8) & 0xff00) | ((text[i] >> 8) & 0x00ff); } return true; @@ -185,20 +186,23 @@ public: } }; - static SkPdfFont* fontFromPdfDictionary(SkPdfFontDictionary* dict); - static SkPdfFont* Default() {return fontFromName(NULL, "TimesNewRoman");} + static SkPdfFont* fontFromPdfDictionary(SkNativeParsedPDF* doc, SkPdfFontDictionary* dict); + static SkPdfFont* Default() {return fontFromName(NULL, NULL, "TimesNewRoman");} - static SkPdfType0Font* fontFromType0FontDictionary(SkPdfType0FontDictionary* dict); - static SkPdfType1Font* fontFromType1FontDictionary(SkPdfType1FontDictionary* dict); - static SkPdfType3Font* fontFromType3FontDictionary(SkPdfType3FontDictionary* dict); - static SkPdfTrueTypeFont* fontFromTrueTypeFontDictionary(SkPdfTrueTypeFontDictionary* dict); - static SkPdfMultiMasterFont* fontFromMultiMasterFontDictionary(SkPdfMultiMasterFontDictionary* dict); + static SkPdfType0Font* fontFromType0FontDictionary(SkNativeParsedPDF* doc, SkPdfType0FontDictionary* dict); + static SkPdfType1Font* fontFromType1FontDictionary(SkNativeParsedPDF* doc, SkPdfType1FontDictionary* dict); + static SkPdfType3Font* fontFromType3FontDictionary(SkNativeParsedPDF* doc, SkPdfType3FontDictionary* dict); + static SkPdfTrueTypeFont* fontFromTrueTypeFontDictionary(SkNativeParsedPDF* doc, SkPdfTrueTypeFontDictionary* dict); + static SkPdfMultiMasterFont* fontFromMultiMasterFontDictionary(SkNativeParsedPDF* doc, SkPdfMultiMasterFontDictionary* dict); - static SkPdfFont* fontFromFontDescriptor(SkPdfFontDescriptorDictionary* fd, bool loadFromName = true); + static SkPdfFont* fontFromFontDescriptor(SkNativeParsedPDF* doc, SkPdfFontDescriptorDictionary* fd, bool loadFromName = true); public: virtual double drawOneChar(unsigned int ch, SkPaint* paint, PdfContext* pdfContext, SkCanvas* canvas) = 0; virtual void afterWord(SkPaint* paint, SkMatrix* matrix) = 0; + +private: + static SkPdfFont* fontFromPdfDictionaryOnce(SkNativeParsedPDF* doc, SkPdfFontDictionary* dict); }; class SkPdfStandardFont : public SkPdfFont { @@ -227,7 +231,7 @@ public: class SkPdfType0Font : public SkPdfFont { public: - SkPdfType0Font(SkPdfType0FontDictionary* dict); + SkPdfType0Font(SkNativeParsedPDF* doc, SkPdfType0FontDictionary* dict); public: @@ -241,11 +245,11 @@ public: class SkPdfType1Font : public SkPdfFont { public: - SkPdfType1Font(SkPdfType1FontDictionary* dict) { + SkPdfType1Font(SkNativeParsedPDF* doc, SkPdfType1FontDictionary* dict) { if (dict->has_FontDescriptor()) { - fBaseFont = SkPdfFont::fontFromFontDescriptor(dict->FontDescriptor()); + fBaseFont = SkPdfFont::fontFromFontDescriptor(doc, dict->FontDescriptor(doc)); } else { - fBaseFont = fontFromName(dict, dict->BaseFont().c_str()); + fBaseFont = fontFromName(doc, dict, dict->BaseFont(doc).c_str()); } } @@ -261,13 +265,13 @@ public: class SkPdfTrueTypeFont : public SkPdfType1Font { public: - SkPdfTrueTypeFont(SkPdfTrueTypeFontDictionary* dict) : SkPdfType1Font(dict) { + SkPdfTrueTypeFont(SkNativeParsedPDF* doc, SkPdfTrueTypeFontDictionary* dict) : SkPdfType1Font(doc, dict) { } }; class SkPdfMultiMasterFont : public SkPdfType1Font { public: - SkPdfMultiMasterFont(SkPdfMultiMasterFontDictionary* dict) : SkPdfType1Font(dict) { + SkPdfMultiMasterFont(SkNativeParsedPDF* doc, SkPdfMultiMasterFontDictionary* dict) : SkPdfType1Font(doc, dict) { } }; /* @@ -302,7 +306,7 @@ CIDToGIDMap* fCidToGid; class SkPdfType3Font : public SkPdfFont { struct Type3FontChar { - SkPdfObject* fObj; + const SkPdfObject* fObj; double fWidth; }; @@ -317,33 +321,33 @@ class SkPdfType3Font : public SkPdfFont { Type3FontChar* fChars; public: - SkPdfType3Font(SkPdfType3FontDictionary* dict) { - fBaseFont = fontFromName(dict, dict->BaseFont().c_str()); + SkPdfType3Font(SkNativeParsedPDF* parsed, SkPdfType3FontDictionary* dict) { + fBaseFont = fontFromName(parsed, dict, dict->BaseFont(parsed).c_str()); if (dict->has_Encoding()) { - if (dict->isEncodingAName()) { - fEncoding = SkPdfEncoding::fromName(dict->getEncodingAsName().c_str()); - } else if (dict->isEncodingAEncodingdictionary()) { + if (dict->isEncodingAName(parsed)) { + fEncoding = SkPdfEncoding::fromName(dict->getEncodingAsName(parsed).c_str()); + } else if (dict->isEncodingAEncodingdictionary(parsed)) { // technically, there is no encoding. fEncoding = SkPdfCIDToGIDMapIdentityEncoding::instance(); - fEncodingDict = dict->getEncodingAsEncodingdictionary(); + fEncodingDict = dict->getEncodingAsEncodingdictionary(parsed); } } // null? - fCharProcs = dict->CharProcs(); + fCharProcs = dict->CharProcs(parsed); fToUnicode = NULL; if (dict->has_ToUnicode()) { - fToUnicode = new SkPdfToUnicode(dict->ToUnicode()); + fToUnicode = new SkPdfToUnicode(parsed, dict->ToUnicode(parsed)); } - fFirstChar = dict->FirstChar(); - fLastChar = dict->LastChar(); - fFonMatrix = dict->has_FontMatrix() ? *dict->FontMatrix() : SkMatrix::I(); + fFirstChar = dict->FirstChar(parsed); + fLastChar = dict->LastChar(parsed); + fFonMatrix = dict->has_FontMatrix() ? dict->FontMatrix(parsed) : SkMatrix::I(); - if (dict->FontBBox()) { - fFontBBox = *dict->FontBBox(); + if (dict->has_FontBBox()) { + fFontBBox = dict->FontBBox(parsed); } fChars = new Type3FontChar[fLastChar - fFirstChar + 1]; @@ -351,24 +355,24 @@ public: memset(fChars, 0, sizeof(fChars[0]) * (fLastChar - fFirstChar + 1)); - SkPdfArray* widths = dict->Widths(); - for (int i = 0 ; i < widths->size(); i++) { + const SkPdfArray* widths = dict->Widths(parsed); + for (unsigned int i = 0 ; i < widths->size(); i++) { if ((fFirstChar + i) < fFirstChar || (fFirstChar + i) > fLastChar) { printf("break; error 1\n"); } - fChars[i].fWidth = (*widths)[i]->asNumber()->value(); + fChars[i].fWidth = (*widths)[i]->numberValue(); } - SkPdfArray* diffs = fEncodingDict->Differences(); - int j = fFirstChar; - for (int i = 0 ; i < diffs->size(); i++) { - if ((*diffs)[i]->asInteger()) { - j = (*diffs)[i]->asInteger()->value(); - } else if ((*diffs)[i]->asName()) { + const SkPdfArray* diffs = fEncodingDict->Differences(parsed); + unsigned int j = fFirstChar; + for (unsigned int i = 0 ; i < diffs->size(); i++) { + if ((*diffs)[i]->isInteger()) { + j = (*diffs)[i]->intValue(); + } else if ((*diffs)[i]->isName()) { if (j < fFirstChar || j > fLastChar) { printf("break; error 2\n"); } - fChars[j - fFirstChar].fObj = fCharProcs->get((*diffs)[i]->asName()->value().c_str()); + fChars[j - fFirstChar].fObj = fCharProcs->get((*diffs)[i]->nameValue()); j++; } else { // err @@ -389,11 +393,13 @@ public: } #endif - doType3Char(pdfContext, canvas, fChars[ch - fFirstChar].fObj, fFontBBox, fFonMatrix, pdfContext->fGraphicsState.fCurFontSize); + // TODO(edisonn): is it better to resolve the reference at load time, or now? + doType3Char(pdfContext, canvas, pdfContext->fPdfDoc->resolveReference(fChars[ch - fFirstChar].fObj), fFontBBox, fFonMatrix, pdfContext->fGraphicsState.fCurFontSize); // TODO(edisonn): verify/test translate code, not tested yet pdfContext->fGraphicsState.fMatrixTm.preTranslate(SkDoubleToScalar(pdfContext->fGraphicsState.fCurFontSize * fChars[ch - fFirstChar].fWidth), SkDoubleToScalar(0.0)); + return fChars[ch - fFirstChar].fWidth; } virtual void afterWord(SkPaint* paint, SkMatrix* matrix) { diff --git a/experimental/PdfViewer/SkPdfParser.cpp b/experimental/PdfViewer/SkPdfParser.cpp index 030251250e..ea0f6251e6 100644 --- a/experimental/PdfViewer/SkPdfParser.cpp +++ b/experimental/PdfViewer/SkPdfParser.cpp @@ -21,10 +21,7 @@ #include <iostream> #include <cstdio> #include <stack> - -#include "podofo.h" -using namespace PoDoFo; - +#include <set> __SK_FORCE_IMAGE_DECODER_LINKING; @@ -41,7 +38,7 @@ __SK_FORCE_IMAGE_DECODER_LINKING; /* #ifdef PDF_TRACE std::string str; - pdfContext->fGraphicsState.fResources->podofo()->ToString(str); + pdfContext->fGraphicsState.fResources->native()->ToString(str); printf("Print Tf Resources: %s\n", str.c_str()); #endif */ @@ -63,7 +60,7 @@ __SK_FORCE_IMAGE_DECODER_LINKING; * - load font for youtube.pdf * - parser for pdf from the definition already available in pdfspec_autogen.py * - all docs from ~/work - * - encapsulate podofo in the pdf api so the skpdf does not know anything about podofo ... in progress + * - encapsulate native in the pdf api so the skpdf does not know anything about native ... in progress * - load gs/ especially smask and already known prop (skp) ... in progress * - wrapper on classes for customizations? e.g. * SkPdfPageObjectVanila - has only the basic loaders/getters @@ -74,7 +71,6 @@ __SK_FORCE_IMAGE_DECODER_LINKING; */ using namespace std; -using namespace PoDoFo; // Utilities static void setup_bitmap(SkBitmap* bitmap, int width, int height, SkColor color = SK_ColorWHITE) { @@ -122,10 +118,10 @@ SkMatrix SkMatrixFromPdfArray(SkPdfArray* pdfArray) { // TODO(edisonn): security issue, ret if size() != 6 for (int i = 0; i < 6; i++) { const SkPdfObject* elem = pdfArray->operator [](i); - if (elem == NULL || (!elem->asNumber() && !elem->asInteger())) { + if (elem == NULL || !elem->isNumber()) { return SkMatrix::I(); // TODO(edisonn): report issue } - array[i] = elem->asNumber() ? elem->asNumber()->value() : elem->asInteger()->value(); + array[i] = elem->numberValue(); } return SkMatrixFromPdfMatrix(array); @@ -152,7 +148,7 @@ bool hasVisualEffect(const char* pdfOp) { } // TODO(edisonn): Pass PdfContext and SkCanvasd only with the define for instrumentation. -static bool readToken(SkPdfPodofoTokenizer* fTokenizer, PdfToken* token) { +static bool readToken(SkPdfNativeTokenizer* fTokenizer, PdfToken* token) { bool ret = fTokenizer->readToken(token); gReadOp++; @@ -251,7 +247,7 @@ map<std::string, PdfOperatorRenderer> gPdfOps; map<std::string, int> gRenderStats[kCount_PdfResult]; -char* gRenderStatsNames[kCount_PdfResult] = { +const char* gRenderStatsNames[kCount_PdfResult] = { "Success", "Partially implemented", "Not yet implemented", @@ -260,63 +256,6 @@ char* gRenderStatsNames[kCount_PdfResult] = { "Unsupported/Unknown" }; -static SkTypeface* SkTypefaceFromPdfFont(PdfFont* font) { - if (font == NULL) { - return SkTypeface::CreateFromName("Times New Roman", SkTypeface::kNormal); - } - - PdfObject* fontObject = font->GetObject(); - - PdfObject* pBaseFont = NULL; - // TODO(edisonn): warning, PoDoFo has a bug in PdfFont constructor, does not call InitVars() - // for now fixed locally. - pBaseFont = fontObject->GetIndirectKey( "BaseFont" ); - const char* pszBaseFontName = pBaseFont->GetName().GetName().c_str(); - -#ifdef PDF_TRACE - std::string str; - fontObject->ToString(str); - printf("Base Font Name: %s\n", pszBaseFontName); - printf("Font Object Data: %s\n", str.c_str()); -#endif - - SkTypeface* typeface = SkTypefaceFromPdfStandardFont(pszBaseFontName, font->IsBold(), font->IsItalic()); - - if (typeface != NULL) { - return typeface; - } - - char name[1000]; - // HACK - strncpy(name, pszBaseFontName, 1000); - char* comma = strstr(name, ","); - char* dash = strstr(name, "-"); - if (comma) *comma = '\0'; - if (dash) *dash = '\0'; - - typeface = SkTypeface::CreateFromName( - name, - SkTypeface::Style((font->IsBold() ? SkTypeface::kBold : 0) | - (font->IsItalic() ? SkTypeface::kItalic : 0))); - - if (typeface != NULL) { -#ifdef PDF_TRACE - printf("HACKED FONT found %s\n", name); -#endif - return typeface; - } - -#ifdef PDF_TRACE - printf("FONT_NOT_FOUND %s\n", pszBaseFontName); -#endif - - // TODO(edisonn): Report Warning, NYI - return SkTypeface::CreateFromName( - "Times New Roman", - SkTypeface::Style((font->IsBold() ? SkTypeface::kBold : 0) | - (font->IsItalic() ? SkTypeface::kItalic : 0))); -} - PdfResult DrawText(PdfContext* pdfContext, const SkPdfObject* _str, SkCanvas* canvas) @@ -327,12 +266,12 @@ PdfResult DrawText(PdfContext* pdfContext, skfont = SkPdfFont::Default(); } - const SkPdfString* str = _str->asString(); - if (str == NULL) { + if (_str == NULL || !_str->isAnyString()) { // TODO(edisonn): report warning return kIgnoreError_PdfResult; } + const SkPdfString* str = (const SkPdfString*)_str; SkUnencodedText binary(str); @@ -406,16 +345,15 @@ static SkColorTable* getGrayColortable() { return grayColortable; } -SkBitmap transferImageStreamToBitmap(unsigned char* uncompressedStream, pdf_long uncompressedStreamLength, +SkBitmap transferImageStreamToBitmap(unsigned char* uncompressedStream, size_t uncompressedStreamLength, int width, int height, int bytesPerLine, int bpc, const std::string& colorSpace, bool transparencyMask) { SkBitmap bitmap; - int components = GetColorSpaceComponents(colorSpace); + //int components = GetColorSpaceComponents(colorSpace); //#define MAX_COMPONENTS 10 - int bitsPerLine = width * components * bpc; // TODO(edisonn): assume start of lines are aligned at 32 bits? // Is there a faster way to load the uncompressed stream into a bitmap? @@ -424,7 +362,7 @@ SkBitmap transferImageStreamToBitmap(unsigned char* uncompressedStream, pdf_long SkColor* uncompressedStreamArgb = (SkColor*)malloc(width * height * sizeof(SkColor)); for (int h = 0 ; h < height; h++) { - long i = width * (height - 1 - h); + long i = width * (h); for (int w = 0 ; w < width; w++) { uncompressedStreamArgb[i] = SkColorSetRGB(uncompressedStream[3 * w], uncompressedStream[3 * w + 1], @@ -441,7 +379,7 @@ SkBitmap transferImageStreamToBitmap(unsigned char* uncompressedStream, pdf_long unsigned char* uncompressedStreamA8 = (unsigned char*)malloc(width * height); for (int h = 0 ; h < height; h++) { - long i = width * (height - 1 - h); + long i = width * (h); for (int w = 0 ; w < width; w++) { uncompressedStreamA8[i] = transparencyMask ? 255 - uncompressedStream[w] : uncompressedStream[w]; @@ -459,15 +397,14 @@ SkBitmap transferImageStreamToBitmap(unsigned char* uncompressedStream, pdf_long return bitmap; } -bool transferImageStreamToARGB(unsigned char* uncompressedStream, pdf_long uncompressedStreamLength, +bool transferImageStreamToARGB(unsigned char* uncompressedStream, size_t uncompressedStreamLength, int width, int bytesPerLine, int bpc, const std::string& colorSpace, SkColor** uncompressedStreamArgb, - pdf_long* uncompressedStreamLengthInBytesArgb) { - int components = GetColorSpaceComponents(colorSpace); + size_t* uncompressedStreamLengthInBytesArgb) { + //int components = GetColorSpaceComponents(colorSpace); //#define MAX_COMPONENTS 10 - int bitsPerLine = width * components * bpc; // TODO(edisonn): assume start of lines are aligned at 32 bits? int height = uncompressedStreamLength / bytesPerLine; @@ -477,7 +414,7 @@ bool transferImageStreamToARGB(unsigned char* uncompressedStream, pdf_long uncom *uncompressedStreamArgb = (SkColor*)malloc(*uncompressedStreamLengthInBytesArgb); for (int h = 0 ; h < height; h++) { - long i = width * (height - 1 - h); + long i = width * (h); for (int w = 0 ; w < width; w++) { (*uncompressedStreamArgb)[i] = SkColorSetRGB(uncompressedStream[3 * w], uncompressedStream[3 * w + 1], @@ -494,7 +431,7 @@ bool transferImageStreamToARGB(unsigned char* uncompressedStream, pdf_long uncom *uncompressedStreamArgb = (SkColor*)malloc(*uncompressedStreamLengthInBytesArgb); for (int h = 0 ; h < height; h++) { - long i = width * (height - 1 - h); + long i = width * (h); for (int w = 0 ; w < width; w++) { (*uncompressedStreamArgb)[i] = SkColorSetRGB(uncompressedStream[w], uncompressedStream[w], @@ -518,32 +455,20 @@ bool transferImageStreamToARGB(unsigned char* uncompressedStream, pdf_long uncom // this functions returns the image, it does not look at the smask. -SkBitmap getImageFromObject(PdfContext* pdfContext, const SkPdfImageDictionary* image, bool transparencyMask) { - if (image == NULL) { +SkBitmap getImageFromObject(PdfContext* pdfContext, SkPdfImageDictionary* image, bool transparencyMask) { + if (image == NULL || !image->hasStream()) { // TODO(edisonn): report warning to be used in testing. return SkBitmap(); } - // TODO (edisonn): Fast Jpeg(DCTDecode) draw, or fast PNG(FlateDecode) draw ... -// PdfObject* value = resolveReferenceObject(pdfContext->fPdfDoc, -// obj.GetDictionary().GetKey(PdfName("Filter"))); -// if (value && value->IsArray() && value->GetArray().GetSize() == 1) { -// value = resolveReferenceObject(pdfContext->fPdfDoc, -// &value->GetArray()[0]); -// } -// if (value && value->IsName() && value->GetName().GetName() == "DCTDecode") { -// SkStream stream = SkStream:: -// SkImageDecoder::Factory() -// } - - long bpc = image->BitsPerComponent(); - long width = image->Width(); - long height = image->Height(); + long bpc = image->BitsPerComponent(pdfContext->fPdfDoc); + long width = image->Width(pdfContext->fPdfDoc); + long height = image->Height(pdfContext->fPdfDoc); std::string colorSpace = "DeviceRGB"; // TODO(edisonn): color space can be an array too! - if (image->isColorSpaceAName()) { - colorSpace = image->getColorSpaceAsName(); + if (image->isColorSpaceAName(pdfContext->fPdfDoc)) { + colorSpace = image->getColorSpaceAsName(pdfContext->fPdfDoc); } /* @@ -558,23 +483,48 @@ SkBitmap getImageFromObject(PdfContext* pdfContext, const SkPdfImageDictionary* } */ - char* uncompressedStream = NULL; - pdf_long uncompressedStreamLength = 0; + unsigned char* uncompressedStream = NULL; + size_t uncompressedStreamLength = 0; - PdfResult ret = kPartial_PdfResult; - SkPdfStream* stream = NULL; - image->doc()->mapper()->mapStream(image, &stream); + SkPdfStream* stream = (SkPdfStream*)image; - if (!stream || !stream->GetFilteredCopy(&uncompressedStream, &uncompressedStreamLength) || + if (!stream || !stream->GetFilteredStreamRef(&uncompressedStream, &uncompressedStreamLength, pdfContext->fPdfDoc->allocator()) || uncompressedStream == NULL || uncompressedStreamLength == 0) { // TODO(edisonn): report warning to be used in testing. return SkBitmap(); } + SkPdfStreamCommonDictionary* streamDict = (SkPdfStreamCommonDictionary*)stream; + + if (streamDict->has_Filter() && ((streamDict->isFilterAName(NULL) && + streamDict->getFilterAsName(NULL) == "DCTDecode") || + (streamDict->isFilterAArray(NULL) && + streamDict->getFilterAsArray(NULL)->size() > 0 && + streamDict->getFilterAsArray(NULL)->objAtAIndex(0)->isName() && + streamDict->getFilterAsArray(NULL)->objAtAIndex(0)->nameValue2() == "DCTDecode"))) { + SkBitmap bitmap; + SkImageDecoder::DecodeMemory(uncompressedStream, uncompressedStreamLength, &bitmap); + return bitmap; + } + + + + // TODO (edisonn): Fast Jpeg(DCTDecode) draw, or fast PNG(FlateDecode) draw ... +// PdfObject* value = resolveReferenceObject(pdfContext->fPdfDoc, +// obj.GetDictionary().GetKey(PdfName("Filter"))); +// if (value && value->IsArray() && value->GetArray().GetSize() == 1) { +// value = resolveReferenceObject(pdfContext->fPdfDoc, +// &value->GetArray()[0]); +// } +// if (value && value->IsName() && value->GetName().GetName() == "DCTDecode") { +// SkStream stream = SkStream:: +// SkImageDecoder::Factory() +// } + int bytesPerLine = uncompressedStreamLength / height; #ifdef PDF_TRACE if (uncompressedStreamLength % height != 0) { - printf("Warning uncompressedStreamLength % height != 0 !!!\n"); + printf("Warning uncompressedStreamLength modulo height != 0 !!!\n"); } #endif @@ -584,13 +534,11 @@ SkBitmap getImageFromObject(PdfContext* pdfContext, const SkPdfImageDictionary* bpc, colorSpace, transparencyMask); - free(uncompressedStream); - return bitmap; } -SkBitmap getSmaskFromObject(PdfContext* pdfContext, const SkPdfImageDictionary* obj) { - const SkPdfImageDictionary* sMask = obj->SMask(); +SkBitmap getSmaskFromObject(PdfContext* pdfContext, SkPdfImageDictionary* obj) { + SkPdfImageDictionary* sMask = obj->SMask(pdfContext->fPdfDoc); if (sMask) { return getImageFromObject(pdfContext, sMask, true); @@ -600,7 +548,7 @@ SkBitmap getSmaskFromObject(PdfContext* pdfContext, const SkPdfImageDictionary* return pdfContext->fGraphicsState.fSMask; } -PdfResult doXObject_Image(PdfContext* pdfContext, SkCanvas* canvas, const SkPdfImageDictionary* skpdfimage) { +PdfResult doXObject_Image(PdfContext* pdfContext, SkCanvas* canvas, SkPdfImageDictionary* skpdfimage) { if (skpdfimage == NULL) { return kIgnoreError_PdfResult; } @@ -610,6 +558,20 @@ PdfResult doXObject_Image(PdfContext* pdfContext, SkCanvas* canvas, const SkPdfI canvas->save(); canvas->setMatrix(pdfContext->fGraphicsState.fMatrix); + +#if 1 + SkScalar z = SkIntToScalar(0); + SkScalar one = SkIntToScalar(1); + + SkPoint from[4] = {SkPoint::Make(z, z), SkPoint::Make(one, z), SkPoint::Make(one, one), SkPoint::Make(z, one)}; + SkPoint to[4] = {SkPoint::Make(z, one), SkPoint::Make(one, one), SkPoint::Make(one, z), SkPoint::Make(z, z)}; + SkMatrix flip; + SkAssertResult(flip.setPolyToPoly(from, to, 4)); + SkMatrix solveImageFlip = pdfContext->fGraphicsState.fMatrix; + solveImageFlip.preConcat(flip); + canvas->setMatrix(solveImageFlip); +#endif + SkRect dst = SkRect::MakeXYWH(SkDoubleToScalar(0.0), SkDoubleToScalar(0.0), SkDoubleToScalar(1.0), SkDoubleToScalar(1.0)); if (sMask.empty()) { @@ -633,7 +595,7 @@ PdfResult doXObject_Image(PdfContext* pdfContext, SkCanvas* canvas, const SkPdfI PdfResult doXObject_Form(PdfContext* pdfContext, SkCanvas* canvas, SkPdfType1FormDictionary* skobj) { - if (!skobj) { + if (!skobj || !skobj->hasStream()) { return kIgnoreError_PdfResult; } @@ -641,14 +603,14 @@ PdfResult doXObject_Form(PdfContext* pdfContext, SkCanvas* canvas, SkPdfType1For canvas->save(); - if (skobj->Resources()) { - pdfContext->fGraphicsState.fResources = skobj->Resources(); + if (skobj->Resources(pdfContext->fPdfDoc)) { + pdfContext->fGraphicsState.fResources = skobj->Resources(pdfContext->fPdfDoc); } SkTraceMatrix(pdfContext->fGraphicsState.fMatrix, "Current matrix"); - if (skobj->Matrix()) { - pdfContext->fGraphicsState.fMatrix.preConcat(*skobj->Matrix()); + if (skobj->has_Matrix()) { + pdfContext->fGraphicsState.fMatrix.preConcat(skobj->Matrix(pdfContext->fPdfDoc)); pdfContext->fGraphicsState.fMatrixTm = pdfContext->fGraphicsState.fMatrix; pdfContext->fGraphicsState.fMatrixTlm = pdfContext->fGraphicsState.fMatrix; // TODO(edisonn) reset matrixTm and matricTlm also? @@ -658,17 +620,16 @@ PdfResult doXObject_Form(PdfContext* pdfContext, SkCanvas* canvas, SkPdfType1For canvas->setMatrix(pdfContext->fGraphicsState.fMatrix); - if (skobj->BBox()) { - canvas->clipRect(*skobj->BBox(), SkRegion::kIntersect_Op, true); // TODO(edisonn): AA from settings. + if (skobj->has_BBox()) { + canvas->clipRect(skobj->BBox(pdfContext->fPdfDoc), SkRegion::kIntersect_Op, true); // TODO(edisonn): AA from settings. } // TODO(edisonn): iterate smart on the stream even if it is compressed, tokenize it as we go. // For this PdfContentsTokenizer needs to be extended. - SkPdfStream* stream = NULL; - skobj->doc()->mapper()->mapStream(skobj, &stream); + SkPdfStream* stream = (SkPdfStream*)skobj; - SkPdfPodofoTokenizer* tokenizer = skobj->doc()->tokenizerOfStream(stream); + SkPdfNativeTokenizer* tokenizer = pdfContext->fPdfDoc->tokenizerOfStream(stream); if (tokenizer != NULL) { PdfMainLooper looper(NULL, tokenizer, pdfContext, canvas); looper.loop(); @@ -682,12 +643,12 @@ PdfResult doXObject_Form(PdfContext* pdfContext, SkCanvas* canvas, SkPdfType1For return kPartial_PdfResult; } -PdfResult doXObject_PS(PdfContext* pdfContext, SkCanvas* canvas, const PdfObject& obj) { +PdfResult doXObject_PS(PdfContext* pdfContext, SkCanvas* canvas, const SkPdfObject* obj) { return kNYI_PdfResult; } -PdfResult doType3Char(PdfContext* pdfContext, SkCanvas* canvas, SkPdfObject* skobj, SkRect bBox, SkMatrix matrix, double textSize) { - if (!skobj) { +PdfResult doType3Char(PdfContext* pdfContext, SkCanvas* canvas, const SkPdfObject* skobj, SkRect bBox, SkMatrix matrix, double textSize) { + if (!skobj || !skobj->hasStream()) { return kIgnoreError_PdfResult; } @@ -714,10 +675,9 @@ PdfResult doType3Char(PdfContext* pdfContext, SkCanvas* canvas, SkPdfObject* sko // TODO(edisonn): iterate smart on the stream even if it is compressed, tokenize it as we go. // For this PdfContentsTokenizer needs to be extended. - SkPdfStream* stream = NULL; - skobj->doc()->mapper()->mapStream(skobj, &stream); + SkPdfStream* stream = (SkPdfStream*)skobj; - SkPdfPodofoTokenizer* tokenizer = skobj->doc()->tokenizerOfStream(stream); + SkPdfNativeTokenizer* tokenizer = pdfContext->fPdfDoc->tokenizerOfStream(stream); if (tokenizer != NULL) { PdfMainLooper looper(NULL, tokenizer, pdfContext, canvas); looper.loop(); @@ -733,13 +693,13 @@ PdfResult doType3Char(PdfContext* pdfContext, SkCanvas* canvas, SkPdfObject* sko } -// TODO(edisonn): faster, have the property on the SkPdfObject itself? -std::set<const void*> gInRendering; +// TODO(edisonn): make sure the pointer is unique +std::set<const SkPdfObject*> gInRendering; class CheckRecursiveRendering { - const void* fUniqueData; + const SkPdfObject* fUniqueData; public: - CheckRecursiveRendering(const SkPdfObject* obj) : fUniqueData(obj->data()) { + CheckRecursiveRendering(const SkPdfObject* obj) : fUniqueData(obj) { gInRendering.insert(obj); } @@ -749,39 +709,29 @@ public: } static bool IsInRendering(const SkPdfObject* obj) { - return gInRendering.find(obj->data()) != gInRendering.end(); + return gInRendering.find(obj) != gInRendering.end(); } }; -PdfResult doXObject(PdfContext* pdfContext, SkCanvas* canvas, const SkPdfObject& obj) { - if (CheckRecursiveRendering::IsInRendering(&obj)) { +PdfResult doXObject(PdfContext* pdfContext, SkCanvas* canvas, const SkPdfObject* obj) { + if (CheckRecursiveRendering::IsInRendering(obj)) { // Oops, corrupt PDF! return kIgnoreError_PdfResult; } - CheckRecursiveRendering checkRecursion(&obj); - - // TODO(edisonn): check type - SkPdfXObjectDictionary* skobj = NULL; - if (!obj.doc()->mapper()->mapXObjectDictionary(&obj, &skobj)) return kIgnoreError_PdfResult; + CheckRecursiveRendering checkRecursion(obj); - if (!skobj) return kIgnoreError_PdfResult; - - PdfResult ret = kIgnoreError_PdfResult; - switch (skobj->getType()) + switch (pdfContext->fPdfDoc->mapper()->mapXObjectDictionary(obj)) { case kImageDictionary_SkPdfObjectType: - ret = doXObject_Image(pdfContext, canvas, skobj->asImageDictionary()); - break; + return doXObject_Image(pdfContext, canvas, (SkPdfImageDictionary*)obj); case kType1FormDictionary_SkPdfObjectType: - ret = doXObject_Form(pdfContext, canvas, skobj->asType1FormDictionary()); - break; + return doXObject_Form(pdfContext, canvas, (SkPdfType1FormDictionary*)obj); //case kObjectDictionaryXObjectPS_SkPdfObjectType: //return doXObject_PS(skxobj.asPS()); + default: + return kIgnoreError_PdfResult; } - - delete skobj; - return ret; } PdfResult PdfOp_q(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** looper) { @@ -800,7 +750,7 @@ PdfResult PdfOp_Q(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** loo PdfResult PdfOp_cm(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** looper) { double array[6]; for (int i = 0 ; i < 6 ; i++) { - array[5 - i] = pdfContext->fObjectStack.top()->asNumber()->value(); + array[5 - i] = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); } @@ -835,7 +785,7 @@ PdfResult PdfOp_cm(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** lo //, to leading, which is a number expressed in unscaled text //space units. Text leading is used only by the T*, ', and " operators. Initial value: 0. PdfResult PdfOp_TL(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** looper) { - double ty = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); + double ty = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); pdfContext->fGraphicsState.fTextLeading = ty; @@ -843,8 +793,8 @@ PdfResult PdfOp_TL(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** lo } PdfResult PdfOp_Td(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** looper) { - double ty = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); - double tx = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); + double ty = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); + double tx = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); double array[6] = {1, 0, 0, 1, tx, ty}; SkMatrix matrix = SkMatrixFromPdfMatrix(array); @@ -856,19 +806,19 @@ PdfResult PdfOp_Td(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** lo } PdfResult PdfOp_TD(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** looper) { - double ty = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); - double tx = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); + double ty = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); + double tx = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); - // TODO(edisonn): Create factory methods or constructors so podofo is hidden - SkPdfNumber* _ty = pdfContext->fPdfDoc->createNumber(-ty); + // TODO(edisonn): Create factory methods or constructors so native is hidden + SkPdfReal* _ty = pdfContext->fPdfDoc->createReal(-ty); pdfContext->fObjectStack.push(_ty); PdfOp_TL(pdfContext, canvas, looper); - SkPdfNumber* vtx = pdfContext->fPdfDoc->createNumber(tx); + SkPdfReal* vtx = pdfContext->fPdfDoc->createReal(tx); pdfContext->fObjectStack.push(vtx); - SkPdfNumber* vty = pdfContext->fPdfDoc->createNumber(ty); + SkPdfReal* vty = pdfContext->fPdfDoc->createReal(ty); pdfContext->fObjectStack.push(vty); PdfResult ret = PdfOp_Td(pdfContext, canvas, looper); @@ -883,12 +833,12 @@ PdfResult PdfOp_TD(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** lo } PdfResult PdfOp_Tm(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** looper) { - double f = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); - double e = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); - double d = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); - double c = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); - double b = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); - double a = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); + double f = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); + double e = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); + double d = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); + double c = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); + double b = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); + double a = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); double array[6]; array[0] = a; @@ -912,8 +862,8 @@ PdfResult PdfOp_Tm(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** lo //0 Tl Td //where Tl is the current leading parameter in the text state PdfResult PdfOp_T_star(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** looper) { - SkPdfNumber* zero = pdfContext->fPdfDoc->createNumber(0.0); - SkPdfNumber* tl = pdfContext->fPdfDoc->createNumber(pdfContext->fGraphicsState.fTextLeading); + SkPdfReal* zero = pdfContext->fPdfDoc->createReal(0.0); + SkPdfReal* tl = pdfContext->fPdfDoc->createReal(pdfContext->fGraphicsState.fTextLeading); pdfContext->fObjectStack.push(zero); pdfContext->fObjectStack.push(tl); @@ -932,8 +882,8 @@ PdfResult PdfOp_m(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** loo pdfContext->fGraphicsState.fPathClosed = false; } - pdfContext->fGraphicsState.fCurPosY = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); - pdfContext->fGraphicsState.fCurPosX = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); + pdfContext->fGraphicsState.fCurPosY = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); + pdfContext->fGraphicsState.fCurPosX = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); pdfContext->fGraphicsState.fPath.moveTo(SkDoubleToScalar(pdfContext->fGraphicsState.fCurPosX), SkDoubleToScalar(pdfContext->fGraphicsState.fCurPosY)); @@ -947,8 +897,8 @@ PdfResult PdfOp_l(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** loo pdfContext->fGraphicsState.fPathClosed = false; } - pdfContext->fGraphicsState.fCurPosY = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); - pdfContext->fGraphicsState.fCurPosX = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); + pdfContext->fGraphicsState.fCurPosY = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); + pdfContext->fGraphicsState.fCurPosX = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); pdfContext->fGraphicsState.fPath.lineTo(SkDoubleToScalar(pdfContext->fGraphicsState.fCurPosX), SkDoubleToScalar(pdfContext->fGraphicsState.fCurPosY)); @@ -962,12 +912,12 @@ PdfResult PdfOp_c(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** loo pdfContext->fGraphicsState.fPathClosed = false; } - double y3 = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); - double x3 = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); - double y2 = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); - double x2 = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); - double y1 = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); - double x1 = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); + double y3 = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); + double x3 = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); + double y2 = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); + double x2 = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); + double y1 = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); + double x1 = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); pdfContext->fGraphicsState.fPath.cubicTo(SkDoubleToScalar(x1), SkDoubleToScalar(y1), SkDoubleToScalar(x2), SkDoubleToScalar(y2), @@ -985,10 +935,10 @@ PdfResult PdfOp_v(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** loo pdfContext->fGraphicsState.fPathClosed = false; } - double y3 = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); - double x3 = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); - double y2 = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); - double x2 = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); + double y3 = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); + double x3 = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); + double y2 = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); + double x2 = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); double y1 = pdfContext->fGraphicsState.fCurPosY; double x1 = pdfContext->fGraphicsState.fCurPosX; @@ -1008,12 +958,12 @@ PdfResult PdfOp_y(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** loo pdfContext->fGraphicsState.fPathClosed = false; } - double y3 = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); - double x3 = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); + double y3 = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); + double x3 = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); double y2 = pdfContext->fGraphicsState.fCurPosY; double x2 = pdfContext->fGraphicsState.fCurPosX; - double y1 = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); - double x1 = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); + double y1 = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); + double x1 = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); pdfContext->fGraphicsState.fPath.cubicTo(SkDoubleToScalar(x1), SkDoubleToScalar(y1), SkDoubleToScalar(x2), SkDoubleToScalar(y2), @@ -1031,10 +981,10 @@ PdfResult PdfOp_re(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** lo pdfContext->fGraphicsState.fPathClosed = false; } - double height = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); - double width = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); - double y = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); - double x = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); + double height = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); + double width = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); + double y = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); + double x = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); pdfContext->fGraphicsState.fPath.addRect(SkDoubleToScalar(x), SkDoubleToScalar(y), SkDoubleToScalar(x + width), SkDoubleToScalar(y + height)); @@ -1181,26 +1131,29 @@ PdfResult PdfOp_ET(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** lo //a number representing a scale factor. There is no initial value for either font or //size; they must be specified explicitly using Tf before any text is shown. PdfResult PdfOp_Tf(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** looper) { - pdfContext->fGraphicsState.fCurFontSize = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); - std::string fontName = pdfContext->fObjectStack.top()->asName()->value(); pdfContext->fObjectStack.pop(); + pdfContext->fGraphicsState.fCurFontSize = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); + const char* fontName = pdfContext->fObjectStack.top()->nameValue(); pdfContext->fObjectStack.pop(); #ifdef PDF_TRACE - printf("font name: %s\n", fontName.c_str()); + printf("font name: %s\n", fontName); #endif - SkPdfFontDictionary* fd = NULL; - if (pdfContext->fGraphicsState.fResources->Font()) { - SkPdfObject* objFont = pdfContext->fGraphicsState.fResources->Font()->get(fontName.c_str()); - objFont->doc()->mapper()->mapFontDictionary(objFont, &fd); - } + if (pdfContext->fGraphicsState.fResources->Font(pdfContext->fPdfDoc)) { + SkPdfObject* objFont = pdfContext->fGraphicsState.fResources->Font(pdfContext->fPdfDoc)->get(fontName); + objFont = pdfContext->fPdfDoc->resolveReference(objFont); + if (kNone_SkPdfObjectType == pdfContext->fPdfDoc->mapper()->mapFontDictionary(objFont)) { + // TODO(edisonn): try to recover and draw it any way? + return kIgnoreError_PdfResult; + } + SkPdfFontDictionary* fd = (SkPdfFontDictionary*)objFont; - SkPdfFont* skfont = SkPdfFont::fontFromPdfDictionary(fd); + SkPdfFont* skfont = SkPdfFont::fontFromPdfDictionary(pdfContext->fPdfDoc, fd); - if (skfont) { - pdfContext->fGraphicsState.fSkFont = skfont; + if (skfont) { + pdfContext->fGraphicsState.fSkFont = skfont; + } } - - return kPartial_PdfResult; + return kIgnoreError_PdfResult; } PdfResult PdfOp_Tj(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** looper) { @@ -1256,18 +1209,22 @@ PdfResult PdfOp_TJ(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** lo return kIgnoreError_PdfResult; } - SkPdfArray* array = pdfContext->fObjectStack.top()->asArray(); + SkPdfArray* array = (SkPdfArray*)pdfContext->fObjectStack.top(); pdfContext->fObjectStack.pop(); + if (!array->isArray()) { + return kIgnoreError_PdfResult; + } + for( int i=0; i<static_cast<int>(array->size()); i++ ) { - if( (*array)[i]->asString()) { + if( (*array)[i]->isAnyString()) { SkPdfObject* obj = (*array)[i]; DrawText(pdfContext, obj, canvas); - } else if ((*array)[i]->asInteger() || (*array)[i]->asNumber()) { - double dx = (*array)[i]->asNumber()->value(); + } else if ((*array)[i]->isNumber()) { + double dx = (*array)[i]->numberValue(); SkMatrix matrix; matrix.setAll(SkDoubleToScalar(1), SkDoubleToScalar(0), @@ -1287,7 +1244,7 @@ PdfResult PdfOp_TJ(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** lo } PdfResult PdfOp_CS_cs(PdfContext* pdfContext, SkCanvas* canvas, SkPdfColorOperator* colorOperator) { - colorOperator->fColorSpace = pdfContext->fObjectStack.top()->asName()->value(); pdfContext->fObjectStack.pop(); + colorOperator->fColorSpace = pdfContext->fObjectStack.top()->nameValue(); pdfContext->fObjectStack.pop(); return kOK_PdfResult; } @@ -1301,29 +1258,31 @@ PdfResult PdfOp_cs(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** lo PdfResult PdfOp_SC_sc(PdfContext* pdfContext, SkCanvas* canvas, SkPdfColorOperator* colorOperator) { double c[4]; - pdf_int64 v[4]; +// int64_t v[4]; int n = GetColorSpaceComponents(colorOperator->fColorSpace); bool doubles = true; - if (colorOperator->fColorSpace == "Indexed") { + if (strcmp(colorOperator->fColorSpace, "Indexed") == 0) { doubles = false; } #ifdef PDF_TRACE - printf("color space = %s, N = %i\n", colorOperator->fColorSpace.c_str(), n); + printf("color space = %s, N = %i\n", colorOperator->fColorSpace, n); #endif for (int i = n - 1; i >= 0 ; i--) { if (doubles) { - c[i] = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); - } else { - v[i] = pdfContext->fObjectStack.top()->asInteger()->value(); pdfContext->fObjectStack.pop(); + c[i] = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); +// } else { +// v[i] = pdfContext->fObjectStack.top()->intValue(); pdfContext->fObjectStack.pop(); } } // TODO(edisonn): Now, set that color. Only DeviceRGB supported. - if (colorOperator->fColorSpace == "DeviceRGB") { + // TODO(edisonn): do possible field values to enum at parsing time! + // TODO(edisonn): support also abreviations /DeviceRGB == /RGB + if (strcmp(colorOperator->fColorSpace, "DeviceRGB") == 0 || strcmp(colorOperator->fColorSpace, "RGB") == 0) { colorOperator->setRGBColor(SkColorSetRGB(255*c[0], 255*c[1], 255*c[2])); } return kPartial_PdfResult; @@ -1338,9 +1297,9 @@ PdfResult PdfOp_sc(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** lo } PdfResult PdfOp_SCN_scn(PdfContext* pdfContext, SkCanvas* canvas, SkPdfColorOperator* colorOperator) { - PdfString name; - - if (pdfContext->fObjectStack.top()->asName()) { + //SkPdfString* name; + if (pdfContext->fObjectStack.top()->isName()) { + // TODO(edisonn): get name, pass it pdfContext->fObjectStack.pop(); } @@ -1359,7 +1318,7 @@ PdfResult PdfOp_scn(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** l } PdfResult PdfOp_G_g(PdfContext* pdfContext, SkCanvas* canvas, SkPdfColorOperator* colorOperator) { - double gray = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); + /*double gray = */pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); return kNYI_PdfResult; } @@ -1372,9 +1331,9 @@ PdfResult PdfOp_g(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** loo } PdfResult PdfOp_RG_rg(PdfContext* pdfContext, SkCanvas* canvas, SkPdfColorOperator* colorOperator) { - double b = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); - double g = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); - double r = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); + double b = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); + double g = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); + double r = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); colorOperator->fColorSpace = "DeviceRGB"; colorOperator->setRGBColor(SkColorSetRGB(255*r, 255*g, 255*b)); @@ -1391,10 +1350,10 @@ PdfResult PdfOp_rg(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** lo PdfResult PdfOp_K_k(PdfContext* pdfContext, SkCanvas* canvas, SkPdfColorOperator* colorOperator) { // TODO(edisonn): spec has some rules about overprint, implement them. - double k = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); - double y = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); - double m = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); - double c = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); + /*double k = */pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); + /*double y = */pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); + /*double m = */pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); + /*double c = */pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); colorOperator->fColorSpace = "DeviceCMYK"; // TODO(edisonn): Set color. @@ -1468,7 +1427,7 @@ PdfResult PdfOp_EI(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** lo //lineWidth w Set the line width in the graphics state (see “Line Width” on page 152). PdfResult PdfOp_w(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** looper) { - double lineWidth = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); + double lineWidth = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); pdfContext->fGraphicsState.fLineWidth = lineWidth; return kOK_PdfResult; @@ -1477,7 +1436,7 @@ PdfResult PdfOp_w(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** loo //lineCap J Set the line cap style in the graphics state (see “Line Cap Style” on page 153). PdfResult PdfOp_J(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** looper) { pdfContext->fObjectStack.pop(); - //double lineCap = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); + //double lineCap = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); return kNYI_PdfResult; } @@ -1485,7 +1444,7 @@ PdfResult PdfOp_J(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** loo //lineJoin j Set the line join style in the graphics state (see “Line Join Style” on page 153). PdfResult PdfOp_j(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** looper) { pdfContext->fObjectStack.pop(); - //double lineJoin = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); + //double lineJoin = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); return kNYI_PdfResult; } @@ -1493,7 +1452,7 @@ PdfResult PdfOp_j(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** loo //miterLimit M Set the miter limit in the graphics state (see “Miter Limit” on page 153). PdfResult PdfOp_M(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** looper) { pdfContext->fObjectStack.pop(); - //double miterLimit = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); + //double miterLimit = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); return kNYI_PdfResult; } @@ -1526,14 +1485,14 @@ PdfResult PdfOp_i(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** loo //dictName gs (PDF 1.2) Set the specified parameters in the graphics state. dictName is //the name of a graphics state parameter dictionary in the ExtGState subdictionary of the current resource dictionary (see the next section). PdfResult PdfOp_gs(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** looper) { - std::string name = pdfContext->fObjectStack.top()->asName()->value(); pdfContext->fObjectStack.pop(); + const char* name = pdfContext->fObjectStack.top()->nameValue(); pdfContext->fObjectStack.pop(); #ifdef PDF_TRACE std::string str; #endif //Next, get the ExtGState Dictionary from the Resource Dictionary: - const SkPdfDictionary* extGStateDictionary = pdfContext->fGraphicsState.fResources->ExtGState(); + SkPdfDictionary* extGStateDictionary = pdfContext->fGraphicsState.fResources->ExtGState(pdfContext->fPdfDoc); if (extGStateDictionary == NULL) { #ifdef PDF_TRACE @@ -1542,10 +1501,12 @@ PdfResult PdfOp_gs(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** lo return kIgnoreError_PdfResult; } - SkPdfObject* value = extGStateDictionary->get(name.c_str()); + SkPdfObject* value = pdfContext->fPdfDoc->resolveReference(extGStateDictionary->get(name)); - SkPdfGraphicsStateDictionary* gs = NULL; - value->doc()->mapper()->mapGraphicsStateDictionary(value, &gs); + if (kNone_SkPdfObjectType == pdfContext->fPdfDoc->mapper()->mapGraphicsStateDictionary(value)) { + return kIgnoreError_PdfResult; + } + SkPdfGraphicsStateDictionary* gs = (SkPdfGraphicsStateDictionary*)value; // TODO(edisonn): now load all those properties in graphic state. if (gs == NULL) { @@ -1553,19 +1514,17 @@ PdfResult PdfOp_gs(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** lo } if (gs->has_CA()) { - pdfContext->fGraphicsState.fStroking.fOpacity = gs->CA(); + pdfContext->fGraphicsState.fStroking.fOpacity = gs->CA(pdfContext->fPdfDoc); } if (gs->has_ca()) { - pdfContext->fGraphicsState.fNonStroking.fOpacity = gs->ca(); + pdfContext->fGraphicsState.fNonStroking.fOpacity = gs->ca(pdfContext->fPdfDoc); } if (gs->has_LW()) { - pdfContext->fGraphicsState.fLineWidth = gs->LW(); + pdfContext->fGraphicsState.fLineWidth = gs->LW(pdfContext->fPdfDoc); } - - return kNYI_PdfResult; } @@ -1573,7 +1532,7 @@ PdfResult PdfOp_gs(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** lo //, to charSpace, which is a number expressed in unscaled text space units. Character spacing is used by the Tj, TJ, and ' operators. //Initial value: 0. PdfResult PdfOp_Tc(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** looper) { - double charSpace = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); + double charSpace = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); pdfContext->fGraphicsState.fCharSpace = charSpace; return kOK_PdfResult; @@ -1585,7 +1544,7 @@ PdfResult PdfOp_Tc(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** lo //text space units. Word spacing is used by the Tj, TJ, and ' operators. Initial //value: 0. PdfResult PdfOp_Tw(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** looper) { - double wordSpace = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); + double wordSpace = pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); pdfContext->fGraphicsState.fWordSpace = wordSpace; return kOK_PdfResult; @@ -1595,7 +1554,7 @@ PdfResult PdfOp_Tw(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** lo //, to (scale ˜ 100). scale is a number specifying the //percentage of the normal width. Initial value: 100 (normal width). PdfResult PdfOp_Tz(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** looper) { - double scale = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); + /*double scale = */pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); return kNYI_PdfResult; } @@ -1603,14 +1562,14 @@ PdfResult PdfOp_Tz(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** lo //render Tr Set the text rendering mode, T //mode, to render, which is an integer. Initial value: 0. PdfResult PdfOp_Tr(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** looper) { - double render = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); + /*double render = */pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); return kNYI_PdfResult; } //rise Ts Set the text rise, Trise, to rise, which is a number expressed in unscaled text space //units. Initial value: 0. PdfResult PdfOp_Ts(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** looper) { - double rise = pdfContext->fObjectStack.top()->asNumber()->value(); pdfContext->fObjectStack.pop(); + /*double rise = */pdfContext->fObjectStack.top()->numberValue(); pdfContext->fObjectStack.pop(); return kNYI_PdfResult; } @@ -1644,9 +1603,9 @@ PdfResult PdfOp_sh(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** lo //name Do PdfResult PdfOp_Do(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** looper) { - std::string name = pdfContext->fObjectStack.top()->asName()->value(); pdfContext->fObjectStack.pop(); + const char* name = pdfContext->fObjectStack.top()->nameValue(); pdfContext->fObjectStack.pop(); - SkPdfDictionary* xObject = pdfContext->fGraphicsState.fResources->XObject(); + SkPdfDictionary* xObject = pdfContext->fGraphicsState.fResources->XObject(pdfContext->fPdfDoc); if (xObject == NULL) { #ifdef PDF_TRACE @@ -1655,14 +1614,15 @@ PdfResult PdfOp_Do(PdfContext* pdfContext, SkCanvas* canvas, PdfTokenLooper** lo return kIgnoreError_PdfResult; } - SkPdfObject* value = xObject->get(name.c_str()); + SkPdfObject* value = xObject->get(name); + value = pdfContext->fPdfDoc->resolveReference(value); #ifdef PDF_TRACE // value->ToString(str); -// printf("Do object value: %s\n", str.c_str()); +// printf("Do object value: %s\n", str); #endif - return doXObject(pdfContext, canvas, *value); + return doXObject(pdfContext, canvas, value); } //tag MP Designate a marked-content point. tag is a name object indicating the role or @@ -1826,14 +1786,18 @@ void reportPdfRenderStats() { } PdfResult PdfMainLooper::consumeToken(PdfToken& token) { - if (token.fType == kKeyword_TokenType) + char keyword[256]; + + if (token.fType == kKeyword_TokenType && token.fKeywordLength < 256) { + strncpy(keyword, token.fKeyword, token.fKeywordLength); + keyword[token.fKeywordLength] = '\0'; // TODO(edisonn): log trace flag (verbose, error, info, warning, ...) - PdfOperatorRenderer pdfOperatorRenderer = gPdfOps[token.fKeyword]; + PdfOperatorRenderer pdfOperatorRenderer = gPdfOps[keyword]; if (pdfOperatorRenderer) { // caller, main work is done by pdfOperatorRenderer(...) PdfTokenLooper* childLooper = NULL; - gRenderStats[pdfOperatorRenderer(fPdfContext, fCanvas, &childLooper)][token.fKeyword]++; + gRenderStats[pdfOperatorRenderer(fPdfContext, fCanvas, &childLooper)][keyword]++; if (childLooper) { childLooper->setUp(this); @@ -1841,17 +1805,15 @@ PdfResult PdfMainLooper::consumeToken(PdfToken& token) { delete childLooper; } } else { - gRenderStats[kUnsupported_PdfResult][token.fKeyword]++; + gRenderStats[kUnsupported_PdfResult][keyword]++; } } else if (token.fType == kObject_TokenType) { fPdfContext->fObjectStack.push( token.fObject ); } - else if ( token.fType == kImageData_TokenType) { - // TODO(edisonn): implement inline image. - } else { + // TODO(edisonn): deine or use assert not reached return kIgnoreError_PdfResult; } return kOK_PdfResult; @@ -1945,7 +1907,7 @@ void PdfCompatibilitySectionLooper::loop() { bool SkPdfViewer::load(const SkString inputFileName, SkPicture* out) { std::cout << "Init: " << inputFileName.c_str() << std::endl; - SkPodofoParsedPDF* doc = new SkPodofoParsedPDF(inputFileName.c_str()); + SkNativeParsedPDF* doc = new SkNativeParsedPDF(inputFileName.c_str()); if (!doc->pages()) { std::cout << "ERROR: Empty Document" << inputFileName.c_str() << std::endl; diff --git a/experimental/PdfViewer/SkPdfParser.h b/experimental/PdfViewer/SkPdfParser.h index 5841b12c3b..20f0cf5887 100644 --- a/experimental/PdfViewer/SkPdfParser.h +++ b/experimental/PdfViewer/SkPdfParser.h @@ -5,14 +5,12 @@ * found in the LICENSE file. */ -#include "SkPdfHeaders_autogen.h" -#include "SkPdfMapper_autogen.h" #ifndef SkPdfParser_DEFINED #define SkPdfParser_DEFINED #include "SkPdfBasics.h" -#include "SkPdfPodofoTokenizer.h" +#include "SkPdfNativeTokenizer.h" extern "C" PdfContext* gPdfContext; extern "C" SkBitmap* gDumpBitmap; @@ -22,13 +20,13 @@ extern "C" SkCanvas* gDumpCanvas; class PdfTokenLooper { protected: PdfTokenLooper* fParent; - SkPdfPodofoTokenizer* fTokenizer; + SkPdfNativeTokenizer* fTokenizer; PdfContext* fPdfContext; SkCanvas* fCanvas; public: PdfTokenLooper(PdfTokenLooper* parent, - SkPdfPodofoTokenizer* tokenizer, + SkPdfNativeTokenizer* tokenizer, PdfContext* pdfContext, SkCanvas* canvas) : fParent(parent), fTokenizer(tokenizer), fPdfContext(pdfContext), fCanvas(canvas) {} @@ -47,7 +45,7 @@ public: class PdfMainLooper : public PdfTokenLooper { public: PdfMainLooper(PdfTokenLooper* parent, - SkPdfPodofoTokenizer* tokenizer, + SkPdfNativeTokenizer* tokenizer, PdfContext* pdfContext, SkCanvas* canvas) : PdfTokenLooper(parent, tokenizer, pdfContext, canvas) {} diff --git a/experimental/PdfViewer/SkPdfUtils.cpp b/experimental/PdfViewer/SkPdfUtils.cpp index 898f83d763..869e633933 100644 --- a/experimental/PdfViewer/SkPdfUtils.cpp +++ b/experimental/PdfViewer/SkPdfUtils.cpp @@ -1 +1,21 @@ #include "SkPdfUtils.h" +#include "SkPdfConfig.h" + +#ifdef PDF_TRACE +void SkTraceMatrix(const SkMatrix& matrix, const char* sz) { + printf("SkMatrix %s ", sz); + for (int i = 0 ; i < 9 ; i++) { + printf("%f ", SkScalarToDouble(matrix.get(i))); + } + printf("\n"); +} + +void SkTraceRect(const SkRect& rect, const char* sz) { + printf("SkRect %s ", sz); + printf("x = %f ", SkScalarToDouble(rect.x())); + printf("y = %f ", SkScalarToDouble(rect.y())); + printf("w = %f ", SkScalarToDouble(rect.width())); + printf("h = %f ", SkScalarToDouble(rect.height())); + printf("\n"); +} +#endif diff --git a/experimental/PdfViewer/SkPdfUtils.h b/experimental/PdfViewer/SkPdfUtils.h index 03aacf671a..6555ac5392 100644 --- a/experimental/PdfViewer/SkPdfUtils.h +++ b/experimental/PdfViewer/SkPdfUtils.h @@ -7,6 +7,6 @@ class SkPdfArray; SkMatrix SkMatrixFromPdfArray(SkPdfArray* pdfArray); -PdfResult doType3Char(PdfContext* pdfContext, SkCanvas* canvas, SkPdfObject* skobj, SkRect bBox, SkMatrix matrix, double textSize); +PdfResult doType3Char(PdfContext* pdfContext, SkCanvas* canvas, const SkPdfObject* skobj, SkRect bBox, SkMatrix matrix, double textSize); #endif // __DEFINED__SkPdfUtils diff --git a/experimental/PdfViewer/autogen.sh b/experimental/PdfViewer/autogen.sh index a1dcf67a5b..22995e6516 100755 --- a/experimental/PdfViewer/autogen.sh +++ b/experimental/PdfViewer/autogen.sh @@ -1,5 +1,4 @@ rm pdfparser/autogen/* rm pdfparser/native/autogen/* -rm pdfparser/podofo/autogen/* #python spec2def.py PdfReference-okular-1.txt autogen/pdfspec_autogen.py #python generate_code.py 'pdfparser/' diff --git a/experimental/PdfViewer/datatypes.py b/experimental/PdfViewer/datatypes.py index 5ffaba5387..d297a07a06 100644 --- a/experimental/PdfViewer/datatypes.py +++ b/experimental/PdfViewer/datatypes.py @@ -54,9 +54,13 @@ class FileSpecNone: def toCpp(self): return 'SkPdfFileSpec()' -class PdfEmptyRect: +class EmptyRect: def toCpp(self): - return 'SkRect()' + return 'SkRect::MakeEmpty()' + +class IdentityMatrix: + def toCpp(self): + return 'SkMatrix::I()' class PdfEmptyStream: def toCpp(self): @@ -70,5 +74,6 @@ class PdfFunctionNone: def toCpp(self): return 'SkPdfFunction()' - -
\ No newline at end of file +class EmptyTree: + def toCpp(self): + return 'SkPdfTree()' diff --git a/experimental/PdfViewer/generate_code.py b/experimental/PdfViewer/generate_code.py index 482f876c55..51fa173cd1 100644 --- a/experimental/PdfViewer/generate_code.py +++ b/experimental/PdfViewer/generate_code.py @@ -5,26 +5,31 @@ import sys import datatypes from autogen.pdfspec_autogen import * +# TODO(edisonn): date and some other types are in fact strings, with a custom format!!! +# TODO(edisonn): refer to page 99 (PDF data types) knowTypes = { -'(any)': ['SkPdfObject*', 'SkPdfObjectFromDictionary', datatypes.CppNull(), 'true', 'use a mapper'], -'(undefined)': ['SkPdfObject*', 'SkPdfObjectFromDictionary', datatypes.CppNull(), 'true', 'use a mapper'], -'(various)': ['SkPdfObject*', 'SkPdfObjectFromDictionary', datatypes.CppNull(), 'true', 'use a mapper'], -'array': ['SkPdfArray*', 'ArrayFromDictionary', datatypes.CppNull(), 'ret->podofo()->GetDataType() == PoDoFo::ePdfDataType_Array'], -'boolean': ['bool', 'BoolFromDictionary', datatypes.PdfBoolean('false'), 'ret->podofo()->GetDataType() == PoDoFo::ePdfDataType_Bool'], -'date': ['SkPdfDate', 'DateFromDictionary', datatypes.PdfDateNever(), 'ret->podofo()->GetDataType() == PoDoFo::ePdfDataType_Array'], -'dictionary': ['SkPdfDictionary*', 'SkPdfDictionaryFromDictionary', datatypes.CppNull(), 'ret->podofo()->GetDataType() == PoDoFo::ePdfDataType_Dictionary', 'use a mapper'], -'function': ['SkPdfFunction', 'FunctionFromDictionary', datatypes.PdfFunctionNone(), 'ret->podofo()->GetDataType() == PoDoFo::ePdfDataType_Reference'], -'integer': ['long', 'LongFromDictionary', datatypes.PdfInteger(0), 'ret->podofo()->GetDataType() == PoDoFo::ePdfDataType_Number'], -'file_specification': ['SkPdfFileSpec', 'FileSpecFromDictionary', datatypes.FileSpecNone(), 'ret->podofo()->GetDataType() == PoDoFo::ePdfDataType_Reference'], -'name': ['std::string', 'NameFromDictionary', datatypes.PdfString('""'), 'ret->podofo()->GetDataType() == PoDoFo::ePdfDataType_Name'], -'tree': ['SkPdfTree*', 'TreeFromDictionary', datatypes.CppNull(), 'ret->podofo()->GetDataType() == PoDoFo::ePdfDataType_Reference'], -'number': ['double', 'DoubleFromDictionary', datatypes.PdfNumber(0), 'ret->podofo()->GetDataType() == PoDoFo::ePdfDataType_Real || ret->podofo()->GetDataType() == PoDoFo::ePdfDataType_Number'], -'rectangle': ['SkRect*', 'SkRectFromDictionary', datatypes.CppNull(), 'ret->podofo()->GetDataType() == PoDoFo::ePdfDataType_Array && ret->podofo()->GetArray().GetLength() == 4'], -'stream': ['SkPdfStream*', 'StreamFromDictionary', datatypes.CppNull(), 'ret->podofo()->HasStream()'], -'string': ['std::string', 'StringFromDictionary', datatypes.PdfString('""'), 'ret->podofo()->GetDataType() == PoDoFo::ePdfDataType_String || ret->podofo()->GetDataType() == PoDoFo::ePdfDataType_HexString'], -'text': ['std::string', 'StringFromDictionary', datatypes.PdfString('""'), 'ret->podofo()->GetDataType() == PoDoFo::ePdfDataType_String || ret->podofo()->GetDataType() == PoDoFo::ePdfDataType_HexString'], -'text string': ['std::string', 'StringFromDictionary', datatypes.PdfString('""'), 'ret->podofo()->GetDataType() == PoDoFo::ePdfDataType_String || ret->podofo()->GetDataType() == PoDoFo::ePdfDataType_HexString'], -'matrix': ['SkMatrix*', 'SkMatrixFromDictionary', datatypes.CppNull(), 'ret->podofo()->GetDataType() == PoDoFo::ePdfDataType_Array && ret->podofo()->GetArray().GetLength() == 4'], +'(any)': ['SkPdfObject*', 'ret', datatypes.CppNull(), 'true', 'use a mapper'], +# TODO(edisonn): return constant for undefined +'(undefined)': ['SkPdfObject*', 'ret', datatypes.CppNull(), 'true', 'use a mapper'], +'(various)': ['SkPdfObject*', 'ret', datatypes.CppNull(), 'true', 'use a mapper'], +'array': ['SkPdfArray*', '(SkPdfArray*)ret', datatypes.CppNull(), 'ret->isArray()'], +'boolean': ['bool', 'ret->boolValue()', datatypes.PdfBoolean('false'), 'ret->isBoolean()'], +#date is a string, with special formating, add here the +'date': ['SkPdfDate', 'ret->dateValue()', datatypes.PdfDateNever(), 'ret->isDate()'], +'dictionary': ['SkPdfDictionary*', '(SkPdfDictionary*)ret', datatypes.CppNull(), 'ret->isDictionary()', 'use a mapper'], +'function': ['SkPdfFunction', 'ret->functionValue()', datatypes.PdfFunctionNone(), 'ret->isFunction()'], +'integer': ['int64_t', 'ret->intValue()', datatypes.PdfInteger(0), 'ret->isInteger()'], +'file_specification': ['SkPdfFileSpec', 'ret->fileSpecValue()', datatypes.FileSpecNone(), 'false'], +'name': ['std::string', 'ret->nameValue2()', datatypes.PdfString('""'), 'ret->isName()'], +#should assert, references should never be allowed here, should be resolved way earlier +'tree': ['SkPdfTree', 'ret->treeValue()', datatypes.EmptyTree(), 'false'], +'number': ['double', 'ret->numberValue()', datatypes.PdfNumber(0), 'ret->isNumber()'], +'rectangle': ['SkRect', 'ret->rectangleValue()', datatypes.EmptyRect(), 'ret->isRectangle()'], +'stream': ['SkPdfStream*', 'ret->getStream()', datatypes.CppNull(), 'ret->hasStream()'], +'string': ['std::string', 'ret->stringValue2()', datatypes.PdfString('""'), 'ret->isAnyString()'], +'text': ['std::string', 'ret->stringValue2()', datatypes.PdfString('""'), 'ret->isAnyString()'], +'text string': ['std::string', 'ret->stringValue2()', datatypes.PdfString('""'), 'ret->isAnyString()'], +'matrix': ['SkMatrix', 'ret->matrixValue()', datatypes.IdentityMatrix(), 'ret->isMatrix()'], } @@ -101,8 +106,8 @@ class PdfClass: def __init__(self, name, base, comment): self.fFields = [] self.fIncludes = [] - self.fCCPublicPodofo = [] - self.fCCPublicPodofoCpp = [] + self.fCCPublicNative = [] + self.fCCPublicNativeCpp = [] self.fName = name self.fBase = base self.fComment = comment @@ -140,12 +145,12 @@ class PdfClass: self.fIncludes.append(path) return self - def carbonCopyPublicPodofo(self, cc): - self.fCCPublicPodofo.append(cc) + def carbonCopyPublicNative(self, cc): + self.fCCPublicNative.append(cc) return self - def carbonCopyPublicPodofoCpp(self, cc): - self.fCCPublicPodofoCpp.append(cc) + def carbonCopyPublicNativeCpp(self, cc): + self.fCCPublicNativeCpp.append(cc) return self def done(self): @@ -156,8 +161,8 @@ class PdfClassManager: self.fClasses = {} self.fClassesNamesInOrder = [] - def addClass(self, name, base='Object', comment=''): - if name == 'Object': + def addClass(self, name, base='', comment=''): + if name == 'Dictionary': cls = PdfClass(name, '', comment) else: cls = PdfClass(name, base, comment) @@ -179,42 +184,42 @@ class PdfClassManager: fileEnums.write(' ' + cls.fEnumEnd + ',\n') - def writeAsNull(self, podofoFileClass, cls, enumToCls): - podofoFileClass.write(' virtual SkPdf' + cls.fName +'* as' + cls.fName + '() {return NULL;}\n') - podofoFileClass.write(' virtual const SkPdf' + cls.fName +'* as' + cls.fName + '() const {return NULL;}\n') - podofoFileClass.write('\n') + def writeAsNull(self, nativeFileClass, cls, enumToCls): + nativeFileClass.write(' SkPdf' + cls.fName +'* as' + cls.fName + '() {return (SkPdf' + cls.fName + '*)this;}\n') + nativeFileClass.write(' const SkPdf' + cls.fName +'* as' + cls.fName + '() const {return (const SkPdf' + cls.fName + '*)this;}\n') + nativeFileClass.write('\n') cnt = 0 for sub in cls.fEnumSubclasses: - self.writeAsNull(podofoFileClass, enumToCls[cls.fEnumSubclasses[cnt]], enumToCls) + self.writeAsNull(nativeFileClass, enumToCls[cls.fEnumSubclasses[cnt]], enumToCls) cnt = cnt + 1 - def writeAsFoo(self, podofoFileClass, cls, enumToCls): + def writeAsFoo(self, nativeFileClass, cls, enumToCls): # TODO(edisonn): add a container, with sections, public, private, default, ... # the end code will be grouped # me - podofoFileClass.write('public:\n') + nativeFileClass.write('public:\n') - podofoFileClass.write('public:\n') - podofoFileClass.write(' SkPdf' + cls.fName +'* as' + cls.fName + '() {return this;}\n') - podofoFileClass.write(' virtual const SkPdf' + cls.fName +'* as' + cls.fName + '() const {return this;}\n') - podofoFileClass.write('\n') + nativeFileClass.write('public:\n') + nativeFileClass.write(' SkPdf' + cls.fName +'* as' + cls.fName + '() {return this;}\n') + nativeFileClass.write(' const SkPdf' + cls.fName +'* as' + cls.fName + '() const {return this;}\n') + nativeFileClass.write('\n') - if cls.fName == 'Object': + if cls.fName == 'Dictionary': cnt = 0 for sub in cls.fEnumSubclasses: - self.writeAsNull(podofoFileClass, enumToCls[cls.fEnumSubclasses[cnt]], enumToCls) + self.writeAsNull(nativeFileClass, enumToCls[cls.fEnumSubclasses[cnt]], enumToCls) cnt = cnt + 1 - if cls.fName != 'Object': - podofoFileClass.write('private:\n') + if cls.fName != 'Dictionary': + nativeFileClass.write('private:\n') base = self.fClasses[cls.fBase] cnt = 0 for sub in base.fEnumSubclasses: if enumToCls[base.fEnumSubclasses[cnt]].fName != cls.fName: - self.writeAsNull(podofoFileClass, enumToCls[base.fEnumSubclasses[cnt]], enumToCls) + self.writeAsNull(nativeFileClass, enumToCls[base.fEnumSubclasses[cnt]], enumToCls) cnt = cnt + 1 @@ -237,8 +242,8 @@ class PdfClassManager: return mustBe def write(self): - global fileHeadersPodofo - global fileHeadersPodofoCpp + global fileHeadersNative + global fileHeadersNativeCpp global knowTypes # generate enum @@ -251,8 +256,8 @@ class PdfClassManager: cls.fEnum = 'k' + name + '_SkPdfObjectType' cls.fEnumEnd = 'k' + name + '__End_SkPdfObjectType' - fileHeadersPodofo.write('#include "SkPdf' + cls.fName + '_autogen.h"\n') - fileHeadersPodofoCpp.write('#include "SkPdf' + cls.fName + '_autogen.cpp"\n') + fileHeadersNative.write('#include "SkPdf' + cls.fName + '_autogen.h"\n') + fileHeadersNativeCpp.write('#include "SkPdf' + cls.fName + '_autogen.cpp"\n') if cls.fBase != '': self.fClasses[cls.fBase].fEnumSubclasses.append(cls.fEnum) @@ -275,6 +280,7 @@ class PdfClassManager: fileEnums.write('\n') fileEnums.write('enum SkPdfObjectType {\n') + fileEnums.write(' kNone_SkPdfObjectType = 0,\n') for enum in enumsRoot: self.writeEnum(fileEnums, enum, enumToCls) fileEnums.write('};\n') @@ -292,93 +298,62 @@ class PdfClassManager: cls = self.fClasses[name] enum = cls.fEnum - podofoFileClass = open(sys.argv[1] + 'podofo/autogen/SkPdf' + cls.fName + '_autogen.h', 'w') - podofoFileClassCpp = open(sys.argv[1] + 'podofo/autogen/SkPdf' + cls.fName + '_autogen.cpp', 'w') + nativeFileClass = open(sys.argv[1] + 'native/autogen/SkPdf' + cls.fName + '_autogen.h', 'w') + nativeFileClassCpp = open(sys.argv[1] + 'native/autogen/SkPdf' + cls.fName + '_autogen.cpp', 'w') - podofoFileClass.write('#ifndef __DEFINED__SkPdf' + cls.fName + '\n') - podofoFileClass.write('#define __DEFINED__SkPdf' + cls.fName + '\n') - podofoFileClass.write('\n') + nativeFileClass.write('#ifndef __DEFINED__SkPdf' + cls.fName + '\n') + nativeFileClass.write('#define __DEFINED__SkPdf' + cls.fName + '\n') + nativeFileClass.write('\n') - podofoFileClassCpp.write('#include "SkPdf' + cls.fName + '_autogen.h"\n\n') - podofoFileClassCpp.write('#include "podofo.h"\n') - podofoFileClassCpp.write('#include "SkPodofoUtils.h"\n') - podofoFileClassCpp.write('#include "SkPdfMapper_autogen.h"\n') - podofoFileClassCpp.write('\n') + nativeFileClassCpp.write('#include "SkPdf' + cls.fName + '_autogen.h"\n\n') + nativeFileClassCpp.write('\n') if cls.fBase == '': - podofoFileClass.write('#include "stddef.h"\n') - podofoFileClass.write('#include <string>\n') - podofoFileClass.write('#include "SkPdfEnums_autogen.h"\n') - podofoFileClass.write('#include "SkPdfNYI.h"\n') - podofoFileClass.write('#include "SkPodofoUtils.h"\n') + nativeFileClass.write('#include "stddef.h"\n') + nativeFileClass.write('#include <string>\n') + nativeFileClass.write('#include "SkPdfEnums_autogen.h"\n') + nativeFileClass.write('#include "SkPdfNYI.h"\n') + nativeFileClass.write('#include "SkPdfObject.h"\n') + nativeFileClass.write('class SkNativeParsedPDF;\n') if cls.fBase != '': - podofoFileClass.write('#include "SkPdf' + cls.fBase + '_autogen.h"\n') + nativeFileClass.write('#include "SkPdf' + cls.fBase + '_autogen.h"\n') - if cls.fBase == '': - podofoFileClass.write('#include "SkPodofoParsedPDF.h"\n') + nativeFileClassCpp.write('#include "SkNativeParsedPDF.h"\n') - podofoFileClass.write('\n') - - if cls.fBase == '': - podofoFileClass.write('namespace PoDoFo {\n') - podofoFileClass.write('class PdfMemDocument;\n') - podofoFileClass.write('class PdfObject;\n') - podofoFileClass.write('}\n') + + nativeFileClass.write('\n') if cls.fComment != '': - podofoFileClass.write('// ' + cls.fComment + '\n') + nativeFileClass.write('// ' + cls.fComment + '\n') if cls.fBase == '': - podofoFileClass.write('class SkPdf' + cls.fName + ' {\n') + nativeFileClass.write('class SkPdf' + cls.fName + ' : public SkPdfObject {\n') else: - podofoFileClass.write('class SkPdf' + cls.fName + ' : public SkPdf' + cls.fBase + ' {\n') + nativeFileClass.write('class SkPdf' + cls.fName + ' : public SkPdf' + cls.fBase + ' {\n') - podofoFileClass.write('public:\n') - podofoFileClass.write(' virtual SkPdfObjectType getType() const { return ' + cls.fEnum + ';}\n') - if len(cls.fEnumSubclasses) == 0: - podofoFileClass.write(' virtual SkPdfObjectType getTypeEnd() const { return (SkPdfObjectType)(' + cls.fEnum + ' + 1);}\n') - else: - podofoFileClass.write(' virtual SkPdfObjectType getTypeEnd() const { return ' + cls.fEnumEnd + ';}\n') + self.writeAsFoo(nativeFileClass, cls, enumToCls) - self.writeAsFoo(podofoFileClass, cls, enumToCls) - - podofoFileClass.write('public:\n') + nativeFileClass.write('public:\n') - for cc in cls.fCCPublicPodofo: - podofoFileClass.write(' ' + cc + '\n') + for cc in cls.fCCPublicNative: + nativeFileClass.write(' ' + cc + '\n') - for cc in cls.fCCPublicPodofoCpp: - podofoFileClassCpp.write(cc + '\n\n') + for cc in cls.fCCPublicNativeCpp: + nativeFileClassCpp.write(cc + '\n\n') if cls.fBase == '': - podofoFileClass.write('protected:\n') - podofoFileClass.write(' const PoDoFo::PdfMemDocument* fPodofoDoc;\n') - podofoFileClass.write(' const SkPodofoParsedPDF* fParsedDoc;\n') - podofoFileClass.write(' const PoDoFo::PdfObject* fPodofoObj;\n') - podofoFileClass.write('\n') - - podofoFileClass.write('public:\n') + nativeFileClass.write('public:\n') - podofoFileClass.write(' SkPdf' + cls.fName + '(const SkPodofoParsedPDF* doc = NULL, const PoDoFo::PdfObject* podofoObj = NULL) : fPodofoDoc(doc->podofo()), fParsedDoc(doc), fPodofoObj(podofoObj) {}\n') - podofoFileClass.write('\n') - podofoFileClass.write(' const SkPodofoParsedPDF* doc() const { return fParsedDoc;}\n') - podofoFileClass.write(' const void* data() const {return fPodofoObj;}\n') - podofoFileClass.write(' const PoDoFo::PdfObject* podofo() const {return fPodofoObj;}\n') - else: - podofoFileClass.write('public:\n') - podofoFileClass.write(' SkPdf' + cls.fName + '(const SkPodofoParsedPDF* doc = NULL, const PoDoFo::PdfObject* podofoObj = NULL) : SkPdf' + cls.fBase + '(doc, podofoObj) {}\n') - podofoFileClass.write('\n') - - # TODO(edisonn): add is valid ? #check required fieds, also, there should be an internal_valid() manually wrote for complex # situations # right now valid return true - #podofoFileClass.write(' virtual bool valid() const {return true;}\n') - #podofoFileClass.write('\n') + # TODO(edisonn): cache the value of valid, have a set of bits that would remember what types are valid for this type + nativeFileClass.write(' bool valid() const {return true;}\n') + #nativeFileClass.write('\n') for field in cls.fFields: prop = field.fProp @@ -386,170 +361,163 @@ class PdfClassManager: lines = prop.fComment.split('\n') if prop.fComment != '' and len(lines) > 0: - podofoFileClass.write('/** ' + lines[0] + '\n') + nativeFileClass.write('/** ' + lines[0] + '\n') for line in lines[1:]: - podofoFileClass.write(' * ' + line + '\n') - podofoFileClass.write('**/\n') + nativeFileClass.write(' * ' + line + '\n') + nativeFileClass.write('**/\n') if prop.fCppName[0] == '[': - podofoFileClass.write('/*\n') # comment code of the atributes that can have any name - podofoFileClassCpp.write('/*\n') # comment code of the atributes that can have any name + nativeFileClass.write('/*\n') # comment code of the atributes that can have any name + nativeFileClassCpp.write('/*\n') # comment code of the atributes that can have any name if len(prop.fTypes.split()) == 1: t = prop.fTypes.strip() - podofoFileClass.write(' ' + knowTypes[t][0] + ' ' + prop.fCppName + '() const;\n') - podofoFileClassCpp.write('' + knowTypes[t][0] + ' SkPdf' + cls.fName + '::' + prop.fCppName + '() const {\n') - podofoFileClassCpp.write(' ' + knowTypes[t][0] + ' ret;\n') + nativeFileClass.write(' ' + knowTypes[t][0] + ' ' + prop.fCppName + '(const SkNativeParsedPDF* doc);\n') + nativeFileClassCpp.write('' + knowTypes[t][0] + ' SkPdf' + cls.fName + '::' + prop.fCppName + '(const SkNativeParsedPDF* doc) {\n') + nativeFileClassCpp.write(' SkPdfObject* ret = get(\"' + prop.fName + '\", \"' + prop.fAbr + '\");\n') + nativeFileClassCpp.write(' if (doc) {ret = doc->resolveReference(ret);}\n') + nativeFileClassCpp.write(' if ((ret != NULL && ' + knowTypes[t][3] + ') || (doc == NULL && ret != NULL && ret->isReference())) return ' + knowTypes[t][1] + ';\n') - #hack, find out if it is dict, they have an extra entry in the array - if len(knowTypes[t]) == 5: - podofoFileClassCpp.write(' if (fParsedDoc->mapper()->' + knowTypes[t][1] + '(podofo()->GetDictionary(), \"' + prop.fName + '\", \"' + prop.fAbr + '\", &ret)) return ret;\n') + if field.fRequired: + nativeFileClassCpp.write(' // TODO(edisonn): warn about missing required field, assert for known good pdfs\n') + nativeFileClassCpp.write(' return ' + knowTypes[t][2].toCpp() + ';\n'); + elif prop.fDefault != '': + nativeFileClassCpp.write(' return ' + prop.fDefault.toCpp() + ';\n'); else: - podofoFileClassCpp.write(' if (' + knowTypes[t][1] + '(fParsedDoc, podofo()->GetDictionary(), \"' + prop.fName + '\", \"' + prop.fAbr + '\", &ret)) return ret;\n') + nativeFileClassCpp.write(' // TODO(edisonn): warn about missing default value for optional fields\n') + nativeFileClassCpp.write(' return ' + knowTypes[t][2].toCpp() + ';\n'); - if field.fRequired == False and prop.fDefault != '': - podofoFileClassCpp.write(' return ' + prop.fDefault.toCpp() + ';\n'); - else: - podofoFileClassCpp.write(' // TODO(edisonn): warn about missing required field, assert for known good pdfs\n') - podofoFileClassCpp.write(' return ' + knowTypes[t][2].toCpp() + ';\n'); - podofoFileClassCpp.write('}\n') - podofoFileClassCpp.write('\n') + nativeFileClassCpp.write('}\n') + nativeFileClassCpp.write('\n') else: for type in prop.fTypes.split(): t = type.strip() - podofoFileClass.write(' bool is' + prop.fCppName + 'A' + t.title() + '() const;\n') - - podofoFileClassCpp.write('bool SkPdf' + cls.fName + '::is' + prop.fCppName + 'A' + t.title() + '() const {\n') - podofoFileClassCpp.write(' SkPdfObject* ret = NULL;\n') - podofoFileClassCpp.write(' if (!fParsedDoc->mapper()->SkPdfObjectFromDictionary(podofo()->GetDictionary(), \"' + prop.fName + '\", \"' + prop.fAbr + '\", &ret)) return false;\n') - podofoFileClassCpp.write(' return ' + knowTypes[t][3] + ';\n') - podofoFileClassCpp.write('}\n') - podofoFileClassCpp.write('\n') - - podofoFileClass.write(' ' + knowTypes[t][0] + ' get' + prop.fCppName + 'As' + t.title() + '() const;\n') - podofoFileClassCpp.write('' + knowTypes[t][0] + ' SkPdf' + cls.fName + '::get' + prop.fCppName + 'As' + t.title() + '() const {\n') - podofoFileClassCpp.write(' ' + knowTypes[t][0] + ' ret = ' + knowTypes[t][2].toCpp() + ';\n') - - # hack - if len(knowTypes[t]) == 5: - podofoFileClassCpp.write(' if (fParsedDoc->mapper()->' + knowTypes[t][1] + '(podofo()->GetDictionary(), \"' + prop.fName + '\", \"' + prop.fAbr + '\", &ret)) return ret;\n') + nativeFileClass.write(' bool is' + prop.fCppName + 'A' + t.title() + '(const SkNativeParsedPDF* doc);\n') + + nativeFileClassCpp.write('bool SkPdf' + cls.fName + '::is' + prop.fCppName + 'A' + t.title() + '(const SkNativeParsedPDF* doc) {\n') + nativeFileClassCpp.write(' SkPdfObject* ret = get(\"' + prop.fName + '\", \"' + prop.fAbr + '\");\n') + nativeFileClassCpp.write(' if (doc) {ret = doc->resolveReference(ret);}\n') + nativeFileClassCpp.write(' return ret != NULL && ' + knowTypes[t][3] + ';\n') + nativeFileClassCpp.write('}\n') + nativeFileClassCpp.write('\n') + + nativeFileClass.write(' ' + knowTypes[t][0] + ' get' + prop.fCppName + 'As' + t.title() + '(const SkNativeParsedPDF* doc);\n') + nativeFileClassCpp.write('' + knowTypes[t][0] + ' SkPdf' + cls.fName + '::get' + prop.fCppName + 'As' + t.title() + '(const SkNativeParsedPDF* doc) {\n') + + nativeFileClassCpp.write(' SkPdfObject* ret = get(\"' + prop.fName + '\", \"' + prop.fAbr + '\");\n') + nativeFileClassCpp.write(' if (doc) {ret = doc->resolveReference(ret);}\n') + nativeFileClassCpp.write(' if ((ret != NULL && ' + knowTypes[t][3] + ') || (doc == NULL && ret != NULL && ret->isReference())) return ' + knowTypes[t][1] + ';\n') + + + if field.fRequired: + nativeFileClassCpp.write(' // TODO(edisonn): warn about missing required field, assert for known good pdfs\n') + nativeFileClassCpp.write(' return ' + knowTypes[t][2].toCpp() + ';\n'); + elif prop.fDefault != '': + nativeFileClassCpp.write(' return ' + prop.fDefault.toCpp() + ';\n'); else: - podofoFileClassCpp.write(' if (' + knowTypes[t][1] + '(fParsedDoc, podofo()->GetDictionary(), \"' + prop.fName + '\", \"' + prop.fAbr + '\", &ret)) return ret;\n') - - podofoFileClassCpp.write(' // TODO(edisonn): warn about missing required field, assert for known good pdfs\n') - podofoFileClassCpp.write(' return ' + knowTypes[t][2].toCpp() + ';\n') - podofoFileClassCpp.write('}\n') - podofoFileClassCpp.write('\n') + nativeFileClassCpp.write(' // TODO(edisonn): warn about missing default value for optional fields\n') + nativeFileClassCpp.write(' return ' + knowTypes[t][2].toCpp() + ';\n'); + + nativeFileClassCpp.write('}\n') + nativeFileClassCpp.write('\n') - podofoFileClass.write(' bool has_' + prop.fCppName + '() const;\n') - podofoFileClassCpp.write('bool SkPdf' + cls.fName + '::has_' + prop.fCppName + '() const {\n') - podofoFileClassCpp.write(' return (ObjectFromDictionary(fParsedDoc, podofo()->GetDictionary(), \"' + prop.fName + '\", \"' + prop.fAbr + '\", NULL));\n') - podofoFileClassCpp.write('}\n') - podofoFileClassCpp.write('\n') + nativeFileClass.write(' bool has_' + prop.fCppName + '() const;\n') + nativeFileClassCpp.write('bool SkPdf' + cls.fName + '::has_' + prop.fCppName + '() const {\n') + # TODO(edisonn): has_foo() does not check type, add has_valid_foo(), and check that type is expected (e.g. number, string, ...) + nativeFileClassCpp.write(' return get(\"' + prop.fName + '\", \"' + prop.fAbr + '\") != NULL;\n') + nativeFileClassCpp.write('}\n') + nativeFileClassCpp.write('\n') if prop.fCppName[0] == '[': - podofoFileClass.write('*/\n') # comment code of the atributes that can have any name - podofoFileClassCpp.write('*/\n') # comment code of the atributes that can have any name + nativeFileClass.write('*/\n') # comment code of the atributes that can have any name + nativeFileClassCpp.write('*/\n') # comment code of the atributes that can have any name - podofoFileClass.write('};\n') - podofoFileClass.write('\n') + nativeFileClass.write('};\n') + nativeFileClass.write('\n') - podofoFileClass.write('#endif // __DEFINED__PODOFO_SkPdf' + cls.fName + '\n') + nativeFileClass.write('#endif // __DEFINED__NATIVE_SkPdf' + cls.fName + '\n') - podofoFileClass.close() - podofoFileClassCpp.close() + nativeFileClass.close() + nativeFileClassCpp.close() # generate constructor when knowing the type # later, p2, generate constructor when not knowing the type - very similar with parsing? # generate parser # TODO(edisonn): fast recognition based on must attributes. - fileMapperPodofo = open(sys.argv[1] + 'podofo/autogen/SkPdfMapper_autogen.h', 'w') - fileMapperPodofoCpp = open(sys.argv[1] + 'podofo/autogen/SkPdfMapper_autogen.cpp', 'w') - - fileMapperPodofo.write('#ifndef __DEFINED__SkPdfMapper\n') - fileMapperPodofo.write('#define __DEFINED__SkPdfMapper\n') - fileMapperPodofo.write('\n') + fileMapperNative = open(sys.argv[1] + 'native/autogen/SkPdfMapper_autogen.h', 'w') + fileMapperNativeCpp = open(sys.argv[1] + 'native/autogen/SkPdfMapper_autogen.cpp', 'w') - fileMapperPodofo.write('#include "SkPdfHeaders_autogen.h"\n') + fileMapperNative.write('#ifndef __DEFINED__SkPdfMapper\n') + fileMapperNative.write('#define __DEFINED__SkPdfMapper\n') + fileMapperNative.write('\n') + fileMapperNative.write('#include "SkPdfHeaders_autogen.h"\n') + fileMapperNative.write('#include "SkNativeParsedPDF.h"\n') + fileMapperNative.write('#include "SkPdfObject.h"\n') - fileMapperPodofo.write('namespace PoDoFo {\n') - fileMapperPodofo.write('class PdfDictionary;\n') - fileMapperPodofo.write('class PdfMemDocument;\n') - fileMapperPodofo.write('class PdfObject;\n') - fileMapperPodofo.write('}\n') - fileMapperPodofoCpp.write('#include "SkPdfMapper_autogen.h"\n') - fileMapperPodofoCpp.write('#include "SkPdfUtils.h"\n') - fileMapperPodofoCpp.write('#include "podofo.h"\n') - fileMapperPodofoCpp.write('\n') + fileMapperNativeCpp.write('#include "SkPdfMapper_autogen.h"\n') + fileMapperNativeCpp.write('#include "SkPdfUtils.h"\n') + fileMapperNativeCpp.write('#include "SkPdfObject.h"\n') + fileMapperNativeCpp.write('\n') - fileMapperPodofo.write('class SkPdfMapper {\n') + fileMapperNative.write('class SkPdfMapper {\n') - fileMapperPodofo.write(' const SkPodofoParsedPDF* fParsedDoc;\n') - fileMapperPodofo.write(' const PoDoFo::PdfMemDocument* fPodofoDoc;\n') + fileMapperNative.write(' const SkNativeParsedPDF* fParsedDoc;\n') - fileMapperPodofo.write('public:\n') + fileMapperNative.write('public:\n') - fileMapperPodofo.write(' SkPdfMapper(const SkPodofoParsedPDF* doc) : fParsedDoc(doc), fPodofoDoc(doc ? doc->podofo() : NULL) {}\n') - fileMapperPodofo.write('\n') + fileMapperNative.write(' SkPdfMapper(const SkNativeParsedPDF* doc) : fParsedDoc(doc) {}\n') + fileMapperNative.write('\n') for name in self.fClassesNamesInOrder: cls = self.fClasses[name] - fileMapperPodofo.write(' bool map' + name + '(const SkPdfObject* in, SkPdf' + name + '** out) const;\n') + fileMapperNative.write(' SkPdfObjectType map' + name + '(const SkPdfObject* in) const;\n') - fileMapperPodofoCpp.write('bool SkPdfMapper::map' + name + '(const SkPdfObject* in, SkPdf' + name + '** out) const {\n') - fileMapperPodofoCpp.write(' return map' + name + '((const PoDoFo::PdfObject*)in->data(), (SkPdf' + name + '**)out);\n') - fileMapperPodofoCpp.write('}\n') - fileMapperPodofoCpp.write('\n') - - fileMapperPodofo.write(' bool map' + name + '(const PoDoFo::PdfObject* podofoObj, SkPdf' + name + '** out) const ;\n') - fileMapperPodofoCpp.write('bool SkPdfMapper::map' + name + '(const PoDoFo::PdfObject* podofoObj, SkPdf' + name + '** out) const {\n') - fileMapperPodofoCpp.write(' if (!is' + name + '(podofoObj)) return false;\n') - fileMapperPodofoCpp.write('\n') + fileMapperNativeCpp.write('SkPdfObjectType SkPdfMapper::map' + name + '(const SkPdfObject* in) const {\n') + fileMapperNativeCpp.write(' if (!is' + name + '(in)) return kNone_SkPdfObjectType;\n') + fileMapperNativeCpp.write('\n') + if len(cls.fEnumSubclasses) > 0: + fileMapperNativeCpp.write(' SkPdfObjectType ret;\n') # stream must be last one hasStream = False for sub in cls.fEnumSubclasses: - if cls.fName == 'Object' and enumToCls[sub].fName == 'Stream': - hasStream = True - else: - fileMapperPodofoCpp.write(' if (map' + enumToCls[sub].fName + '(podofoObj, (SkPdf' + enumToCls[sub].fName + '**)out)) return true;\n') - - if hasStream: - fileMapperPodofoCpp.write(' if (mapStream(podofoObj, (SkPdfStream**)out)) return true;\n') + fileMapperNativeCpp.write(' if (kNone_SkPdfObjectType != (ret = map' + enumToCls[sub].fName + '(in))) return ret;\n') - - fileMapperPodofoCpp.write('\n') + fileMapperNativeCpp.write('\n') - fileMapperPodofoCpp.write(' *out = new SkPdf' + name + '(fParsedDoc, podofoObj);\n') - fileMapperPodofoCpp.write(' return true;\n') - fileMapperPodofoCpp.write('}\n') - fileMapperPodofoCpp.write('\n') + fileMapperNativeCpp.write(' return k' + name + '_SkPdfObjectType;\n') + fileMapperNativeCpp.write('}\n') + fileMapperNativeCpp.write('\n') for name in self.fClassesNamesInOrder: cls = self.fClasses[name] - fileMapperPodofo.write(' bool is' + name + '(const PoDoFo::PdfObject* podofoObj) const ;\n') - fileMapperPodofoCpp.write('bool SkPdfMapper::is' + name + '(const PoDoFo::PdfObject* podofoObj) const {\n') + fileMapperNative.write(' bool is' + name + '(const SkPdfObject* nativeObj) const ;\n') + fileMapperNativeCpp.write('bool SkPdfMapper::is' + name + '(const SkPdfObject* nativeObj) const {\n') if cls.fCheck != '': - fileMapperPodofoCpp.write(' return ' + cls.fCheck + ';\n') + fileMapperNativeCpp.write(' return ' + cls.fCheck + ';\n') else: cntMust = 0 + emitedRet = False for field in cls.fFields: prop = field.fProp if prop.fHasMust: + if emitedRet == False: + fileMapperNativeCpp.write(' const SkPdfObject* ret = NULL;\n') + emitedRet = True cntMust = cntMust + 1 - fileMapperPodofoCpp.write(' ' + knowTypes[prop.fTypes.strip()][0] + ' ' + prop.fCppName + ';\n') - fileMapperPodofoCpp.write(' if (!podofoObj->IsDictionary()) return false;\n') - fileMapperPodofoCpp.write(' if (!' + knowTypes[prop.fTypes.strip()][1] + '(fParsedDoc, podofoObj->GetDictionary(), \"' + prop.fName + '\", \"' + prop.fAbr + '\", &' + prop.fCppName + ')) return false;\n') + fileMapperNativeCpp.write(' if (!nativeObj->isDictionary()) return false;\n') + fileMapperNativeCpp.write(' ret = nativeObj->get(\"' + prop.fName + '\", \"' + prop.fAbr + '\");\n') + fileMapperNativeCpp.write(' if (ret == NULL) return false;\n') eval = ''; # TODO(edisonn): this could get out of hand, and could have poor performance if continued on this path @@ -560,111 +528,65 @@ class PdfClassManager: if len(mustBe) > 0: for cnd in mustBe: if eval == '': - eval = '(' + prop.fCppName + ' != ' + cnd.toCpp() + ')' + eval = '(' + knowTypes[prop.fTypes.strip()][1] + ' != ' + cnd.toCpp() + ')' else: - eval = eval + ' && ' + '(' + prop.fCppName + ' != ' + cnd.toCpp() + ')' - fileMapperPodofoCpp.write(' if (' + eval + ') return false;\n') - fileMapperPodofoCpp.write('\n') + eval = eval + ' && ' + '(' + knowTypes[prop.fTypes.strip()][1] + ' != ' + cnd.toCpp() + ')' + fileMapperNativeCpp.write(' if (' + eval + ') return false;\n') + fileMapperNativeCpp.write('\n') - fileMapperPodofoCpp.write(' return true;\n') + fileMapperNativeCpp.write(' return true;\n') - fileMapperPodofoCpp.write('}\n') - fileMapperPodofoCpp.write('\n') + fileMapperNativeCpp.write('}\n') + fileMapperNativeCpp.write('\n') - fileMapperPodofo.write(' bool SkPdf' + name + 'FromDictionary(const PoDoFo::PdfDictionary& dict, const char* key, SkPdf' + name + '** data) const ;\n') - fileMapperPodofoCpp.write('bool SkPdfMapper::SkPdf' + name + 'FromDictionary(const PoDoFo::PdfDictionary& dict, const char* key, SkPdf' + name + '** data) const {\n') - fileMapperPodofoCpp.write(' const PoDoFo::PdfObject* value = resolveReferenceObject(fParsedDoc, dict.GetKey(PoDoFo::PdfName(key)), true);\n') - fileMapperPodofoCpp.write(' if (value == NULL) { return false; }\n') - fileMapperPodofoCpp.write(' if (data == NULL) { return true; }\n') - fileMapperPodofoCpp.write(' return map' + name + '(value, (SkPdf' + name + '**)data);\n') - fileMapperPodofoCpp.write('}\n') - fileMapperPodofoCpp.write('\n') - - fileMapperPodofo.write(' bool SkPdf' + name + 'FromDictionary(const PoDoFo::PdfDictionary& dict, const char* key, const char* abr, SkPdf' + name + '** data) const ;\n') - fileMapperPodofoCpp.write('bool SkPdfMapper::SkPdf' + name + 'FromDictionary(const PoDoFo::PdfDictionary& dict, const char* key, const char* abr, SkPdf' + name + '** data) const {\n') - fileMapperPodofoCpp.write(' if (SkPdf' + name + 'FromDictionary(dict, key, data)) return true;\n') - fileMapperPodofoCpp.write(' if (abr == NULL || *abr == \'\\0\') return false;\n') - fileMapperPodofoCpp.write(' return SkPdf' + name + 'FromDictionary(dict, abr, data);\n') - fileMapperPodofoCpp.write('}\n') - fileMapperPodofoCpp.write('\n') + # TODO(edisonn): dict should be a SkPdfDictionary ? + fileMapperNative.write(' bool SkPdf' + name + 'FromDictionary(const SkPdfObject* dict, const char* key, SkPdf' + name + '** data) const ;\n') + fileMapperNativeCpp.write('bool SkPdfMapper::SkPdf' + name + 'FromDictionary(const SkPdfObject* dict, const char* key, SkPdf' + name + '** data) const {\n') + fileMapperNativeCpp.write(' const SkPdfObject* value = dict->get(key);\n') + fileMapperNativeCpp.write(' if (value == NULL) { return false; }\n') + fileMapperNativeCpp.write(' if (data == NULL) { return true; }\n') + fileMapperNativeCpp.write(' if (kNone_SkPdfObjectType == map' + name + '(value)) return false;\n') + fileMapperNativeCpp.write(' *data = (SkPdf' + name + '*)value;\n') + fileMapperNativeCpp.write(' return true;\n'); + fileMapperNativeCpp.write('}\n') + fileMapperNativeCpp.write('\n') + + fileMapperNative.write(' bool SkPdf' + name + 'FromDictionary(const SkPdfObject* dict, const char* key, const char* abr, SkPdf' + name + '** data) const ;\n') + fileMapperNativeCpp.write('bool SkPdfMapper::SkPdf' + name + 'FromDictionary(const SkPdfObject* dict, const char* key, const char* abr, SkPdf' + name + '** data) const {\n') + fileMapperNativeCpp.write(' if (SkPdf' + name + 'FromDictionary(dict, key, data)) return true;\n') + fileMapperNativeCpp.write(' if (abr == NULL || *abr == \'\\0\') return false;\n') + fileMapperNativeCpp.write(' return SkPdf' + name + 'FromDictionary(dict, abr, data);\n') + fileMapperNativeCpp.write('}\n') + fileMapperNativeCpp.write('\n') - fileMapperPodofo.write('};\n') - fileMapperPodofo.write('\n') + fileMapperNative.write('};\n') + fileMapperNative.write('\n') - fileMapperPodofo.write('#endif // __DEFINED__SkPdfMapper\n') + fileMapperNative.write('#endif // __DEFINED__SkPdfMapper\n') - fileMapperPodofo.close() - fileMapperPodofoCpp.close() + fileMapperNative.close() + fileMapperNativeCpp.close() return def generateCode(): - global fileHeadersPodofo - global fileHeadersPodofoCpp + global fileHeadersNative + global fileHeadersNativeCpp global knowTypes - fileHeadersPodofo = open(sys.argv[1] + 'podofo/autogen/SkPdfHeaders_autogen.h', 'w') - fileHeadersPodofoCpp = open(sys.argv[1] + 'podofo/autogen/SkPdfHeaders_autogen.cpp', 'w') + fileHeadersNative = open(sys.argv[1] + 'native/autogen/SkPdfHeaders_autogen.h', 'w') + fileHeadersNativeCpp = open(sys.argv[1] + 'native/autogen/SkPdfHeaders_autogen.cpp', 'w') - fileHeadersPodofo.write('#ifndef __DEFINED__SkPdfHeaders\n') - fileHeadersPodofo.write('#define __DEFINED__SkPdfHeaders\n') - fileHeadersPodofo.write('\n') + fileHeadersNative.write('#ifndef __DEFINED__SkPdfHeaders\n') + fileHeadersNative.write('#define __DEFINED__SkPdfHeaders\n') + fileHeadersNative.write('\n') - fileHeadersPodofoCpp.write('#include "SkPdfHeaders_autogen.h"\n') + fileHeadersNativeCpp.write('#include "SkPdfHeaders_autogen.h"\n') manager = PdfClassManager() - manager.addClass('Object') - - # TODO(edisonn): perf, instead of virtual functions, store data in field and reurn it. - # maybe in constructor load it, or laizy load it - - manager.addClass('Null').check('podofoObj->GetDataType() == PoDoFo::ePdfDataType_Null') - manager.addClass('Boolean').check('podofoObj->GetDataType() == PoDoFo::ePdfDataType_Bool')\ - .carbonCopyPublicPodofo('bool value() const;')\ - .carbonCopyPublicPodofoCpp('bool SkPdfBoolean::value() const {return podofo()->GetBool();}') - - manager.addClass('Integer').check('podofoObj->GetDataType() == PoDoFo::ePdfDataType_Number || podofoObj->GetDataType() == PoDoFo::ePdfDataType_Real')\ - .carbonCopyPublicPodofo('long value() const;')\ - .carbonCopyPublicPodofoCpp('long SkPdfInteger::value() const {return podofo()->GetNumber();}') - - manager.addClass('Number', 'Integer').check('podofoObj->GetDataType() == PoDoFo::ePdfDataType_Number || podofoObj->GetDataType() == PoDoFo::ePdfDataType_Real')\ - .carbonCopyPublicPodofo('double value() const;')\ - .carbonCopyPublicPodofoCpp('double SkPdfNumber::value() const {return podofo()->GetReal();}')\ - - manager.addClass('Name').check('podofoObj->GetDataType() == PoDoFo::ePdfDataType_Name')\ - .carbonCopyPublicPodofo('const std::string& value() const;')\ - .carbonCopyPublicPodofoCpp('const std::string& SkPdfName::value() const {return podofo()->GetName().GetName();}') - - manager.addClass('Reference').check('podofoObj->GetDataType() == PoDoFo::ePdfDataType_Reference') - - manager.addClass('Array').check('podofoObj->GetDataType() == PoDoFo::ePdfDataType_Array')\ - .carbonCopyPublicPodofo('int size() const;')\ - .carbonCopyPublicPodofoCpp('int SkPdfArray::size() const {return podofo()->GetArray().GetSize();}')\ - .carbonCopyPublicPodofo('SkPdfObject* operator[](int i) const;')\ - .carbonCopyPublicPodofoCpp('SkPdfObject* SkPdfArray::operator[](int i) const { SkPdfObject* ret = NULL; fParsedDoc->mapper()->mapObject(&podofo()->GetArray()[i], &ret); return ret; }') - - manager.addClass('String').check('podofoObj->GetDataType() == PoDoFo::ePdfDataType_String || podofoObj->GetDataType() == PoDoFo::ePdfDataType_HexString')\ - .carbonCopyPublicPodofo('const std::string& value() const;')\ - .carbonCopyPublicPodofoCpp('const std::string& SkPdfString::value() const {return podofo()->GetString().GetStringUtf8();}')\ - .carbonCopyPublicPodofo('const char* c_str() const;')\ - .carbonCopyPublicPodofoCpp('const char* SkPdfString::c_str() const {return podofo()->GetString().GetString();}')\ - .carbonCopyPublicPodofo('size_t len() const;')\ - .carbonCopyPublicPodofoCpp('size_t SkPdfString::len() const {return podofo()->GetString().GetLength();}') - - manager.addClass('HexString', 'String').check('podofoObj->GetDataType() == PoDoFo::ePdfDataType_HexString')\ - - manager.addClass('Dictionary').check('podofoObj->GetDataType() == PoDoFo::ePdfDataType_Dictionary')\ - .carbonCopyPublicPodofo('SkPdfObject* get(const char* dictionaryKeyName) const;')\ - .carbonCopyPublicPodofoCpp('SkPdfObject* SkPdfDictionary::get(const char* dictionaryKeyName) const {SkPdfObject* ret = NULL; fParsedDoc->mapper()->mapObject(resolveReferenceObject(fParsedDoc, podofo()->GetDictionary().GetKey(PoDoFo::PdfName(dictionaryKeyName))), &ret); return ret;}')\ - - # attached to a dictionary in podofo - manager.addClass('Stream')\ - .carbonCopyPublicPodofo('bool GetFilteredCopy(char** buffer, long* len) const;')\ - .carbonCopyPublicPodofoCpp('bool SkPdfStream::GetFilteredCopy(char** buffer, long* len) const {try {PoDoFo::pdf_long podofoLen = 0; *buffer = NULL; *len = 0;podofo()->GetStream()->GetFilteredCopy(buffer, &podofoLen); *len = (long)podofoLen;} catch (PoDoFo::PdfError& e) { return false; } return true;}') - - # these classes are not explicitely backed by a table in the pdf spec + manager.addClass('Dictionary') manager.addClass('XObjectDictionary', 'Dictionary') manager.addClass('FontDictionary', 'Dictionary') @@ -678,7 +600,6 @@ def generateCode(): .must([datatypes.PdfName('TrueType')])\ .done().done()\ - addDictionaryTypesTo(knowTypes) buildPdfSpec(manager) @@ -694,10 +615,10 @@ def generateCode(): manager.write() - fileHeadersPodofo.write('#endif // __DEFINED__SkPdfHeaders\n') + fileHeadersNative.write('#endif // __DEFINED__SkPdfHeaders\n') - fileHeadersPodofo.close() - fileHeadersPodofoCpp.close() + fileHeadersNative.close() + fileHeadersNativeCpp.close() if '__main__' == __name__: #print sys.argv diff --git a/experimental/PdfViewer/pdfparser/SkPdfNYI.h b/experimental/PdfViewer/pdfparser/SkPdfNYI.h index 5f1ee82691..4d4b2e80b0 100644 --- a/experimental/PdfViewer/pdfparser/SkPdfNYI.h +++ b/experimental/PdfViewer/pdfparser/SkPdfNYI.h @@ -2,6 +2,9 @@ #define EXPERIMENTAL_PDFVIEWER_PDFPARSER_SKPDFNYI_H_ struct SkPdfFileSpec {}; + +// TODO(edisonn): date is actually a string! + struct SkPdfDate {}; struct SkPdfTree {}; struct SkPdfFunction {}; diff --git a/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.cpp b/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.cpp index 5d8683899b..04a1c50caf 100644 --- a/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.cpp +++ b/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.cpp @@ -1,11 +1,469 @@ - #include "SkNativeParsedPDF.h" +#include "SkPdfNativeTokenizer.h" +#include "SkPdfBasics.h" +#include "SkPdfParser.h" +#include "SkPdfObject.h" + +#include <stdio.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include "SkPdfFileTrailerDictionary_autogen.h" +#include "SkPdfCatalogDictionary_autogen.h" +#include "SkPdfPageObjectDictionary_autogen.h" +#include "SkPdfPageTreeNodeDictionary_autogen.h" +#include "SkPdfMapper_autogen.h" + + + +long getFileSize(const char* filename) +{ + struct stat stat_buf; + int rc = stat(filename, &stat_buf); + return rc == 0 ? stat_buf.st_size : -1; +} + +unsigned char* lineHome(unsigned char* start, unsigned char* current) { + while (current > start && !isPdfEOL(*(current - 1))) { + current--; + } + return current; +} + +unsigned char* previousLineHome(unsigned char* start, unsigned char* current) { + if (current > start && isPdfEOL(*(current - 1))) { + current--; + } + + // allows CR+LF, LF+CR but not two CR+CR or LF+LF + if (current > start && isPdfEOL(*(current - 1)) && *current != *(current - 1)) { + current--; + } + + while (current > start && !isPdfEOL(*(current - 1))) { + current--; + } + + return current; +} + +unsigned char* ignoreLine(unsigned char* current, unsigned char* end) { + while (current < end && !isPdfEOL(*current)) { + current++; + } + current++; + if (current < end && isPdfEOL(*current) && *current != *(current - 1)) { + current++; + } + return current; +} + + +// TODO(edisonn): NYI +// TODO(edisonn): 3 constructuctors from URL, from stream, from file ... +// TODO(edisonn): write one that accepts errors in the file and ignores/fixis them +// TODO(edisonn): testing: +// 1) run on a lot of file +// 2) recoverable corupt file: remove endobj, endsteam, remove other keywords, use other white spaces, insert comments randomly, ... +// 3) irrecoverable corrupt file +SkNativeParsedPDF::SkNativeParsedPDF(const char* path) : fAllocator(new SkPdfAllocator()) { + FILE* file = fopen(path, "r"); + fContentLength = getFileSize(path); + fFileContent = new unsigned char[fContentLength]; + fread(fFileContent, fContentLength, 1, file); + fclose(file); + file = NULL; + + unsigned char* eofLine = lineHome(fFileContent, fFileContent + fContentLength - 1); + unsigned char* xrefByteOffsetLine = previousLineHome(fFileContent, eofLine); + unsigned char* xrefstartKeywordLine = previousLineHome(fFileContent, xrefByteOffsetLine); + + if (strcmp((char*)xrefstartKeywordLine, "startxref") != 0) { + // TODO(edisonn): report/issue + } + + long xrefByteOffset = atol((const char*)xrefByteOffsetLine); + + bool storeCatalog = true; + while (xrefByteOffset >= 0) { + unsigned char* trailerStart = readCrossReferenceSection(fFileContent + xrefByteOffset, xrefstartKeywordLine); + xrefByteOffset = readTrailer(trailerStart, xrefstartKeywordLine, storeCatalog); + storeCatalog = false; + } + + // TODO(edisonn): warn/error expect fObjects[fRefCatalogId].fGeneration == fRefCatalogGeneration + // TODO(edisonn): security, verify that SkPdfCatalogDictionary is indeed using mapper + // load catalog + fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef); + SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this); -SkNativeParsedPDF::SkNativeParsedPDF() { - // TODO(edisonn): Auto-generated constructor stub + fillPages(tree); + // now actually read all objects if we want, or do it lazyly + // and resolve references?... or not ... } +// TODO(edisonn): NYI SkNativeParsedPDF::~SkNativeParsedPDF() { - // TODO(edisonn): Auto-generated destructor stub + delete[] fFileContent; + delete fAllocator; +} + +unsigned char* SkNativeParsedPDF::readCrossReferenceSection(unsigned char* xrefStart, unsigned char* trailerEnd) { + unsigned char* current = ignoreLine(xrefStart, trailerEnd); // TODO(edisonn): verify next keyord is "xref", use nextObject here + + SkPdfObject token; + while (current < trailerEnd) { + token.reset(); + unsigned char* previous = current; + current = nextObject(current, trailerEnd, &token, NULL); + if (!token.isInteger()) { + return previous; + } + + int startId = token.intValue(); + token.reset(); + current = nextObject(current, trailerEnd, &token, NULL); + + if (!token.isInteger()) { + // TODO(edisonn): report/warning + return current; + } + + int entries = token.intValue(); + + for (int i = 0; i < entries; i++) { + token.reset(); + current = nextObject(current, trailerEnd, &token, NULL); + if (!token.isInteger()) { + // TODO(edisonn): report/warning + return current; + } + int offset = token.intValue(); + + token.reset(); + current = nextObject(current, trailerEnd, &token, NULL); + if (!token.isInteger()) { + // TODO(edisonn): report/warning + return current; + } + int generation = token.intValue(); + + token.reset(); + current = nextObject(current, trailerEnd, &token, NULL); + if (!token.isKeyword() || token.len() != 1 || (*token.c_str() != 'f' && *token.c_str() != 'n')) { + // TODO(edisonn): report/warning + return current; + } + + addCrossSectionInfo(startId + i, generation, offset, *token.c_str() == 'f'); + } + } + // TODO(edisonn): it should never get here? there is no trailer? + return current; +} + +long SkNativeParsedPDF::readTrailer(unsigned char* trailerStart, unsigned char* trailerEnd, bool storeCatalog) { + unsigned char* current = ignoreLine(trailerStart, trailerEnd); // TODO(edisonn): verify next keyord is "trailer" use nextObject here + + SkPdfObject token; + current = nextObject(current, trailerEnd, &token, fAllocator); + SkPdfFileTrailerDictionary* trailer = (SkPdfFileTrailerDictionary*)&token; + + if (storeCatalog) { + const SkPdfObject* ref = trailer->Root(NULL); + if (ref == NULL || !ref->isReference()) { + // TODO(edisonn): oops, we have to fix the corrup pdf file + return -1; + } + fRootCatalogRef = ref; + } + + if (trailer->has_Prev()) { + return trailer->Prev(NULL); + } + + return -1; +} + +void SkNativeParsedPDF::addCrossSectionInfo(int id, int generation, int offset, bool isFreed) { + // TODO(edisonn): security here + while (fObjects.count() < id + 1) { + reset(fObjects.append()); + } + + fObjects[id].fOffset = offset; + fObjects[id].fObj = NULL; +} + +SkPdfObject* SkNativeParsedPDF::readObject(int id/*, int expectedGeneration*/) const { + long startOffset = fObjects[id].fOffset; + //long endOffset = fObjects[id].fOffsetEnd; + // TODO(edisonn): use hinted endOffset + // TODO(edisonn): current implementation will result in a lot of memory usage + // to decrease memory usage, we wither need to be smart and know where objects end, and we will + // alocate only the chancks needed, or the tokenizer will not make copies, but then it needs to + // cache the results so it does not go twice on the same buffer + unsigned char* current = fFileContent + startOffset; + unsigned char* end = fFileContent + fContentLength; + + SkPdfNativeTokenizer tokenizer(current, end - current, fMapper, fAllocator); + + SkPdfObject idObj; + SkPdfObject generationObj; + SkPdfObject objKeyword; + SkPdfObject* dict = fAllocator->allocObject(); + + current = nextObject(current, end, &idObj, NULL); + if (current >= end) { + // TODO(edisonn): report warning/error + return NULL; + } + + current = nextObject(current, end, &generationObj, NULL); + if (current >= end) { + // TODO(edisonn): report warning/error + return NULL; + } + + current = nextObject(current, end, &objKeyword, NULL); + if (current >= end) { + // TODO(edisonn): report warning/error + return NULL; + } + + if (!idObj.isInteger() || !generationObj.isInteger() || id != idObj.intValue()/* || generation != generationObj.intValue()*/) { + // TODO(edisonn): report warning/error + } + + if (!objKeyword.isKeyword() || strcmp(objKeyword.c_str(), "obj") != 0) { + // TODO(edisonn): report warning/error + } + + current = nextObject(current, end, dict, fAllocator); + + // TODO(edisonn): report warning/error - verify last token is endobj + + return dict; +} + +void SkNativeParsedPDF::fillPages(SkPdfPageTreeNodeDictionary* tree) { + const SkPdfArray* kids = tree->Kids(this); + if (kids == NULL) { + *fPages.append() = (SkPdfPageObjectDictionary*)tree; + return; + } + + int cnt = kids->size(); + for (int i = 0; i < cnt; i++) { + const SkPdfObject* obj = resolveReference(kids->objAtAIndex(i)); + if (fMapper->mapPageObjectDictionary(obj) != kPageObjectDictionary_SkPdfObjectType) { + *fPages.append() = (SkPdfPageObjectDictionary*)obj; + } else { + // TODO(edisonn): verify that it is a page tree indeed + fillPages((SkPdfPageTreeNodeDictionary*)obj); + } + } +} + +int SkNativeParsedPDF::pages() const { + return fPages.count(); +} + +SkPdfResourceDictionary* SkNativeParsedPDF::pageResources(int page) { + return fPages[page]->Resources(this); +} + +// TODO(edisonn): Partial implemented. Move the logics directly in the code generator for inheritable and default value? +SkRect SkNativeParsedPDF::MediaBox(int page) const { + SkPdfPageObjectDictionary* current = fPages[page]; + while (!current->has_MediaBox() && current->has_Parent()) { + current = (SkPdfPageObjectDictionary*)current->Parent(this); + } + if (current) { + return current->MediaBox(this); + } + return SkRect::MakeEmpty(); +} + +// TODO(edisonn): stream or array ... ? for now only array +SkPdfNativeTokenizer* SkNativeParsedPDF::tokenizerOfPage(int page) const { + if (fPages[page]->isContentsAStream(this)) { + return tokenizerOfStream(fPages[page]->getContentsAsStream(this)); + } else { + // TODO(edisonn): NYI, we need to concatenate all streams in the array or make the tokenizer smart + // so we don't allocate new memory + return NULL; + } +} + +SkPdfNativeTokenizer* SkNativeParsedPDF::tokenizerOfStream(SkPdfObject* stream) const { + if (stream == NULL) { + return NULL; + } + + return new SkPdfNativeTokenizer(stream, fMapper, fAllocator); +} + +// TODO(edisonn): NYI +SkPdfNativeTokenizer* SkNativeParsedPDF::tokenizerOfBuffer(unsigned char* buffer, size_t len) const { + // warning does not track two calls in the same buffer! the buffer is updated! + // make a clean copy if needed! + return new SkPdfNativeTokenizer(buffer, len, fMapper, fAllocator); +} + +size_t SkNativeParsedPDF::objects() const { + return fObjects.count(); +} + +SkPdfObject* SkNativeParsedPDF::object(int i) { + SkASSERT(!(i < 0 || i > fObjects.count())); + + if (i < 0 || i > fObjects.count()) { + return NULL; + } + + if (fObjects[i].fObj == NULL) { + // TODO(edisonn): when we read the cross reference sections, store the start of the next object + // and fill fOffsetEnd + fObjects[i].fObj = readObject(i); + } + + return fObjects[i].fObj; +} + +const SkPdfMapper* SkNativeParsedPDF::mapper() const { + return fMapper; +} + +SkPdfReal* SkNativeParsedPDF::createReal(double value) const { + SkPdfObject* obj = fAllocator->allocObject(); + SkPdfObject::makeReal(value, obj); + return (SkPdfReal*)obj; +} + +SkPdfInteger* SkNativeParsedPDF::createInteger(int value) const { + SkPdfObject* obj = fAllocator->allocObject(); + SkPdfObject::makeInteger(value, obj); + return (SkPdfInteger*)obj; +} + +SkPdfString* SkNativeParsedPDF::createString(unsigned char* sz, size_t len) const { + SkPdfObject* obj = fAllocator->allocObject(); + SkPdfObject::makeString(sz, len, obj); + return (SkPdfString*)obj; +} + +PdfContext* gPdfContext = NULL; + +void SkNativeParsedPDF::drawPage(int page, SkCanvas* canvas) { + SkPdfNativeTokenizer* tokenizer = tokenizerOfPage(page); + + PdfContext pdfContext(this); + pdfContext.fOriginalMatrix = SkMatrix::I(); + pdfContext.fGraphicsState.fResources = pageResources(page); + + gPdfContext = &pdfContext; + + // TODO(edisonn): get matrix stuff right. + // TODO(edisonn): add DPI/scale/zoom. + SkScalar z = SkIntToScalar(0); + SkRect rect = MediaBox(page); + SkScalar w = rect.width(); + SkScalar h = rect.height(); + + SkPoint pdfSpace[4] = {SkPoint::Make(z, z), SkPoint::Make(w, z), SkPoint::Make(w, h), SkPoint::Make(z, h)}; +// SkPoint skiaSpace[4] = {SkPoint::Make(z, h), SkPoint::Make(w, h), SkPoint::Make(w, z), SkPoint::Make(z, z)}; + + // TODO(edisonn): add flag for this app to create sourunding buffer zone + // TODO(edisonn): add flagg for no clipping. + // Use larger image to make sure we do not draw anything outside of page + // could be used in tests. + +#ifdef PDF_DEBUG_3X + SkPoint skiaSpace[4] = {SkPoint::Make(w+z, h+h), SkPoint::Make(w+w, h+h), SkPoint::Make(w+w, h+z), SkPoint::Make(w+z, h+z)}; +#else + SkPoint skiaSpace[4] = {SkPoint::Make(z, h), SkPoint::Make(w, h), SkPoint::Make(w, z), SkPoint::Make(z, z)}; +#endif + //SkPoint pdfSpace[2] = {SkPoint::Make(z, z), SkPoint::Make(w, h)}; + //SkPoint skiaSpace[2] = {SkPoint::Make(w, z), SkPoint::Make(z, h)}; + + //SkPoint pdfSpace[2] = {SkPoint::Make(z, z), SkPoint::Make(z, h)}; + //SkPoint skiaSpace[2] = {SkPoint::Make(z, h), SkPoint::Make(z, z)}; + + //SkPoint pdfSpace[3] = {SkPoint::Make(z, z), SkPoint::Make(z, h), SkPoint::Make(w, h)}; + //SkPoint skiaSpace[3] = {SkPoint::Make(z, h), SkPoint::Make(z, z), SkPoint::Make(w, 0)}; + + SkAssertResult(pdfContext.fOriginalMatrix.setPolyToPoly(pdfSpace, skiaSpace, 4)); + SkTraceMatrix(pdfContext.fOriginalMatrix, "Original matrix"); + + + pdfContext.fGraphicsState.fMatrix = pdfContext.fOriginalMatrix; + pdfContext.fGraphicsState.fMatrixTm = pdfContext.fGraphicsState.fMatrix; + pdfContext.fGraphicsState.fMatrixTlm = pdfContext.fGraphicsState.fMatrix; + + canvas->setMatrix(pdfContext.fOriginalMatrix); + +#ifndef PDF_DEBUG_NO_PAGE_CLIPING + canvas->clipRect(SkRect::MakeXYWH(z, z, w, h), SkRegion::kIntersect_Op, true); +#endif + +// erase with red before? +// SkPaint paint; +// paint.setColor(SK_ColorRED); +// canvas->drawRect(rect, paint); + + PdfMainLooper looper(NULL, tokenizer, &pdfContext, canvas); + looper.loop(); + + delete tokenizer; + + canvas->flush(); +} + +SkPdfAllocator* SkNativeParsedPDF::allocator() const { + return fAllocator; +} + +SkPdfObject* SkNativeParsedPDF::resolveReference(SkPdfObject* ref) const { + return (SkPdfObject*)resolveReference((const SkPdfObject*)ref); +} + +// TODO(edisonn): fix infinite loop if ref to itself! +// TODO(edisonn): perf, fix refs at load, and resolve will simply return fResolvedReference? +SkPdfObject* SkNativeParsedPDF::resolveReference(const SkPdfObject* ref) const { + if (ref && ref->isReference()) { + int id = ref->referenceId(); + // TODO(edisonn): generation/updates not supported now + //int gen = ref->referenceGeneration(); + + SkASSERT(!(id < 0 || id > fObjects.count())); + + if (id < 0 || id > fObjects.count()) { + return NULL; + } + + // TODO(edisonn): verify id and gen expected + + if (fObjects[id].fResolvedReference != NULL) { + return fObjects[id].fResolvedReference; + } + + if (fObjects[id].fObj == NULL) { + fObjects[id].fObj = readObject(id); + } + + if (fObjects[id].fResolvedReference == NULL) { + if (!fObjects[id].fObj->isReference()) { + fObjects[id].fResolvedReference = fObjects[id].fObj; + } else { + fObjects[id].fResolvedReference = resolveReference(fObjects[id].fObj); + } + } + + return fObjects[id].fResolvedReference; + } + // TODO(edisonn): fix the mess with const, probably we need to remove it pretty much everywhere + return (SkPdfObject*)ref; } diff --git a/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.h b/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.h index 38c72b089b..245bdfb133 100644 --- a/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.h +++ b/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.h @@ -1,12 +1,96 @@ #ifndef EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKNATIVEPARSEDPDF_H_ #define EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKNATIVEPARSEDPDF_H_ -#include "base/macros.h" +#include "SkRect.h" +#include "SkTDArray.h" + +class SkCanvas; + +class SkPdfAllocator; +class SkPdfMapper; +class SkPdfObject; +class SkPdfReal; +class SkPdfInteger; +class SkPdfString; +class SkPdfResourceDictionary; +class SkPdfCatalogDictionary; +class SkPdfPageObjectDictionary; +class SkPdfPageTreeNodeDictionary; + + + +class SkPdfNativeTokenizer; + +class SkNativeParsedPDF { +private: + struct PublicObjectEntry { + long fOffset; + // long endOffset; // TODO(edisonn): determine the end of the object, to be used when the doc is corrupted + SkPdfObject* fObj; + // TODO(edisonn): perf ... probably it does not make sense to cache the ref. test it! + SkPdfObject* fResolvedReference; + }; -class SkNativeParsedPDF : public SkParsedPDF { public: - SkNativeParsedPDF(); - virtual ~SkNativeParsedPDF(); + // TODO(edisonn): read methods: file, stream, http(s)://url, url with seek? + // TODO(edisonn): read first page asap, linearized + // TODO(edisonn): read page N asap, read all file + // TODO(edisonn): allow corruptions of file (e.g. missing endobj, missing stream length, ...) + // TODO(edisonn): encryption + SkNativeParsedPDF(const char* path); + ~SkNativeParsedPDF(); + + int pages() const; + SkPdfResourceDictionary* pageResources(int page); + SkRect MediaBox(int page) const; + SkPdfNativeTokenizer* tokenizerOfPage(int n) const; + + SkPdfNativeTokenizer* tokenizerOfStream(SkPdfObject* stream) const; + SkPdfNativeTokenizer* tokenizerOfBuffer(unsigned char* buffer, size_t len) const; + + size_t objects() const; + SkPdfObject* object(int i); + + const SkPdfMapper* mapper() const; + SkPdfAllocator* allocator() const; + + SkPdfReal* createReal(double value) const; + SkPdfInteger* createInteger(int value) const; + // the string does not own the char* + SkPdfString* createString(unsigned char* sz, size_t len) const; + + void drawPage(int page, SkCanvas* canvas); + + SkPdfObject* resolveReference(SkPdfObject* ref) const; + SkPdfObject* resolveReference(const SkPdfObject* ref) const; + +private: + + unsigned char* readCrossReferenceSection(unsigned char* xrefStart, unsigned char* trailerEnd); + long readTrailer(unsigned char* trailerStart, unsigned char* trailerEnd, bool storeCatalog); + + // TODO(edisonn): updates not supported right now, generation ignored + void addCrossSectionInfo(int id, int generation, int offset, bool isFreed); + static void reset(PublicObjectEntry* obj) { + obj->fObj = NULL; + obj->fResolvedReference = NULL; + obj->fOffset = -1; + } + + SkPdfObject* readObject(int id/*, int generation*/) const; + + void fillPages(SkPdfPageTreeNodeDictionary* tree); + + // private fields + SkPdfAllocator* fAllocator; + SkPdfMapper* fMapper; + unsigned char* fFileContent; + size_t fContentLength; + const SkPdfObject* fRootCatalogRef; + SkPdfCatalogDictionary* fRootCatalog; + + mutable SkTDArray<PublicObjectEntry> fObjects; + SkTDArray<SkPdfPageObjectDictionary*> fPages; }; #endif // EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKNATIVEPARSEDPDF_H_ diff --git a/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp b/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp index ba3a34e372..dd72acd2f4 100644 --- a/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp +++ b/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp @@ -1,11 +1,772 @@ #include "SkPdfNativeTokenizer.h" +#include "SkPdfObject.h" +#include "SkPdfConfig.h" -SkPdfNativeTokenizer::SkPdfNativeTokenizer() { - // TODO(edisonn): Auto-generated constructor stub +#include "SkPdfStreamCommonDictionary_autogen.h" +unsigned char* skipPdfWhiteSpaces(unsigned char* start, unsigned char* end) { + while (start < end && isPdfWhiteSpace(*start)) { + if (*start == kComment_PdfDelimiter) { + // skip the comment until end of line + while (start < end && !isPdfEOL(*start)) { + *start = '\0'; + start++; + } + } else { + *start = '\0'; + start++; + } + } + return start; +} + +// TODO(edisonn) '(' can be used, will it break the string a delimiter or space inside () ? +unsigned char* endOfPdfToken(unsigned char* start, unsigned char* end) { + //int opened brackets + //TODO(edisonn): what out for special chars, like \n, \032 + + SkASSERT(!isPdfWhiteSpace(*start)); + + if (start < end && isPdfDelimiter(*start)) { + start++; + return start; + } + + while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) { + start++; + } + return start; +} + +unsigned char* skipPdfComment(unsigned char* start, unsigned char* end) { + SkASSERT(start == end || *start == kComment_PdfDelimiter); + while (start < end && isPdfEOL(*start)) { + *start = '\0'; + start++; + } + return start; +} + +// last elem has to be ] +unsigned char* readArray(unsigned char* start, unsigned char* end, SkPdfObject* array, SkPdfAllocator* allocator) { + while (start < end) { + // skip white spaces + start = skipPdfWhiteSpaces(start, end); + + unsigned char* endOfToken = endOfPdfToken(start, end); + + if (endOfToken == start) { + // TODO(edisonn): report error in pdf file (end of stream with ] for end of aray + return start; + } + + if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimiter) { + return endOfToken; + } + + SkPdfObject* newObj = allocator->allocObject(); + start = nextObject(start, end, newObj, allocator); + // TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array only when + // we are sure they are not references! + if (newObj->isKeywordReference() && array->size() >= 2 && array->objAtAIndex(array->size() - 1)->isInteger() && array->objAtAIndex(array->size() - 2)->isInteger()) { + SkPdfObject* gen = array->removeLastInArray(); + SkPdfObject* id = array->removeLastInArray(); + newObj->reset(); + SkPdfObject::makeReference(id->intValue(), gen->intValue(), newObj); + } + array->appendInArray(newObj); + } + // TODO(edisonn): report not reached, we should never get here + SkASSERT(false); + return start; +} + +// When we read strings we will rewrite the string so we will reuse the memory +// when we start to read the string, we already consumed the opened bracket +unsigned char* readString(unsigned char* start, unsigned char* end, SkPdfObject* str) { + unsigned char* out = start; + unsigned char* in = start; + + int openRoundBrackets = 0; + while (in < end && (*in != kClosedRoundBracket_PdfDelimiter || openRoundBrackets > 0)) { + openRoundBrackets += ((*in) == kOpenedRoundBracket_PdfDelimiter); + openRoundBrackets -= ((*in) == kClosedRoundBracket_PdfDelimiter); + if (*in == kEscape_PdfSpecial) { + if (in + 1 < end) { + switch (in[1]) { + case 'n': + *out = kLF_PdfWhiteSpace; + out++; + in += 2; + break; + + case 'r': + *out = kCR_PdfWhiteSpace; + out++; + in += 2; + break; + + case 't': + *out = kHT_PdfWhiteSpace; + out++; + in += 2; + break; + + case 'b': + // TODO(edisonn): any special meaning to backspace? + *out = kBackspace_PdfSpecial; + out++; + in += 2; + break; + + case 'f': + *out = kFF_PdfWhiteSpace; + out++; + in += 2; + break; + + case kOpenedRoundBracket_PdfDelimiter: + *out = kOpenedRoundBracket_PdfDelimiter; + out++; + in += 2; + break; + + case kClosedRoundBracket_PdfDelimiter: + *out = kClosedRoundBracket_PdfDelimiter; + out++; + in += 2; + break; + + case kEscape_PdfSpecial: + *out = kEscape_PdfSpecial; + out++; + in += 2; + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': { + //read octals + in++; // consume backslash + + int code = 0; + int i = 0; + while (in < end && *in >= '0' && *in < '8') { + code = (code << 3) + ((*in) - '0'); // code * 8 + d + i++; + in++; + if (i == 3) { + *out = code & 0xff; + out++; + i = 0; + } + } + if (i > 0) { + *out = code & 0xff; + out++; + } + } + break; + + default: + // Per spec, backslash is ignored is escaped ch is unknown + in++; + break; + } + } + } else { + // TODO(edisonn): perf, avoid copy into itself, maybe first do a simple scan until found backslash ? + // we could have one look that first just inc current, and when we find the backslash + // we go to this loop + *in = *out; + in++; + out++; + } + } + + + SkPdfObject::makeString(start, out, str); + return in + 1; // consume ) at the end of the string +} + +unsigned char* readHexString(unsigned char* start, unsigned char* end, SkPdfObject* str) { + unsigned char* out = start; + unsigned char* in = start; + + unsigned char code = 0; + + while (in < end) { + while (in < end && isPdfWhiteSpace(*in)) { + in++; + } + + if (*in == kClosedInequityBracket_PdfDelimiter) { + *in = '\0'; + in++; + // normal exit + break; + } + + if (in >= end) { + // end too soon + break; + } + + switch (*in) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + code = (*in - '0') << 4; + break; + + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + code = (*in - 'a' + 10) << 4; + break; + + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + code = (*in - 'A' + 10) << 4; + break; + + // TODO(edisonn): spec does not say how to handle this error + default: + break; + } + + in++; // advance + + while (in < end && isPdfWhiteSpace(*in)) { + in++; + } + + // TODO(edisonn): report error + if (in >= end) { + *out = code; + out++; + break; + } + + if (*in == kClosedInequityBracket_PdfDelimiter) { + *out = code; + out++; + break; + } + + switch (*in) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + code += (*in - '0'); + break; + + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + code += (*in - 'a' + 10); + break; + + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + code += (*in - 'A' + 10); + break; + + // TODO(edisonn): spec does not say how to handle this error + default: + break; + } + + *out = code; + out++; + in++; + } + + if (out < in) { + *out = '\0'; + } + + SkPdfObject::makeHexString(start, out, str); + return in; // consume > at the end of the string +} + +// TODO(edisonn): before PDF 1.2 name could not have special characters, add version parameter +unsigned char* readName(unsigned char* start, unsigned char* end, SkPdfObject* name) { + unsigned char* out = start; + unsigned char* in = start; + + unsigned char code = 0; + + while (in < end) { + if (isPdfWhiteSpaceOrPdfDelimiter(*in)) { + break; + } + + if (*in == '#' && in + 2 < end) { + in++; + switch (*in) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + code = (*in - '0') << 4; + break; + + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + code = (*in - 'a' + 10) << 4; + break; + + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + code = (*in - 'A' + 10) << 4; + break; + + // TODO(edisonn): spec does not say how to handle this error + default: + break; + } + + in++; // advance + + switch (*in) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + code += (*in - '0'); + break; + + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + code += (*in - 'a' + 10); + break; + + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + code += (*in - 'A' + 10); + break; + + // TODO(edisonn): spec does not say how to handle this error + default: + break; + } + + *out = code; + out++; + in++; + } else { + *out = *in; + out++; + in++; + } + } + + SkPdfObject::makeName(start, out, name); + return in; +} + +// TODO(edisonn): pdf spec let Length to be an indirect object define after the stream +// that makes for an interesting scenario, where the stream itself contains endstream, together +// with a reference object with the length, but the real length object would be somewhere else +// it could confuse the parser +/*example: + +7 0 obj +<< /length 8 0 R>> +stream +............... +endstream +8 0 obj #we are in stream actually, not a real object +<< 10 >> #we are in stream actually, not a real object +endobj +endstream +8 0 obj #real obj +<< 100 >> #real obj +endobj +and it could get worse, with multiple object like this +*/ + +// right now implement the silly algorithm that assumes endstream is finishing the stream + + +unsigned char* readStream(unsigned char* start, unsigned char* end, SkPdfObject* dict) { + start = skipPdfWhiteSpaces(start, end); + if (!(start[0] == 's' && start[1] == 't' && start[2] == 'r' && start[3] == 'e' && start[4] == 'a' && start[5] == 'm')) { + // no stream. return. + return start; + } + + start += 6; // strlen("stream") + if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) { + start += 2; + } else if (start[0] == kLF_PdfWhiteSpace) { + start += 1; + } + + SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict; + // TODO(edisonn): load Length + int length = -1; + + // TODO(edisonn): very basic implementation + if (stream->has_Length() && stream->Length(NULL) > 0) { + length = stream->Length(NULL); + } + + // TODO(edisonn): laod external streams + // TODO(edisonn): look at the last filter, to determione how to deal with possible issue + + if (length < 0) { + // scan the buffer, until we find first endstream + // TODO(edisonn): all buffers must have a 0 at the end now, + // TODO(edisonn): hack (mark end of content with 0) + unsigned char lastCh = *end; + *end = '\0'; + //SkASSERT(*end == '\0'); + unsigned char* endstream = (unsigned char*)strstr((const char*)start, "endstream"); + *end = lastCh; + + if (endstream) { + length = endstream - start; + if (*(endstream-1) == kLF_PdfWhiteSpace) length--; + if (*(endstream-1) == kCR_PdfWhiteSpace) length--; + } + } + if (length >= 0) { + unsigned char* endstream = start + length; + + if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) { + endstream += 2; + } else if (endstream[0] == kLF_PdfWhiteSpace) { + endstream += 1; + } + + // TODO(edisonn): verify the next bytes are "endstream" + + endstream += strlen("endstream"); + // TODO(edisonn): Assert? report error/warning? + dict->addStream(start, length); + return endstream; + } + return start; +} + +unsigned char* readDictionary(unsigned char* start, unsigned char* end, SkPdfObject* dict, SkPdfAllocator* allocator) { + SkPdfObject::makeEmptyDictionary(dict); + + start = skipPdfWhiteSpaces(start, end); + + while (start < end && *start == kNamed_PdfDelimiter) { + SkPdfObject key; + *start = '\0'; + start++; + start = readName(start, end, &key); + start = skipPdfWhiteSpaces(start, end); + + if (start < end) { + SkPdfObject* value = allocator->allocObject(); + start = nextObject(start, end, value, allocator); + + start = skipPdfWhiteSpaces(start, end); + + if (start < end) { + // seems we have an indirect reference + if (isPdfDigit(*start)) { + SkPdfObject generation; + start = nextObject(start, end, &generation, allocator); + + SkPdfObject keywordR; + start = nextObject(start, end, &keywordR, allocator); + + if (value->isInteger() && generation.isInteger() && keywordR.isKeywordReference()) { + int64_t id = value->intValue(); + value->reset(); + SkPdfObject::makeReference(id, generation.intValue(), value); + dict->set(&key, value); + } else { + // error, ignore + dict->set(&key, value); + } + } else { + // next elem is not a digit, but it might not be / either! + dict->set(&key, value); + } + } else { + // /key >> + dict->set(&key, value); + return end; + } + start = skipPdfWhiteSpaces(start, end); + } else { + dict->set(&key, &SkPdfObject::kNull); + return end; + } + } + + // TODO(edisonn): options to ignore these errors + + // now we should expect >> + start = skipPdfWhiteSpaces(start, end); + start = endOfPdfToken(start, end); // > + start = endOfPdfToken(start, end); // > + + // TODO(edisonn): read stream ... put dict and stream in a struct, and have a pointer to struct ... + // or alocate 2 objects, and if there is no stream, free it to be used by someone else? or just leave it ? + + start = readStream(start, end, dict); + + return start; +} + +unsigned char* nextObject(unsigned char* start, unsigned char* end, SkPdfObject* token, SkPdfAllocator* allocator) { + unsigned char* current; + + // skip white spaces + start = skipPdfWhiteSpaces(start, end); + + current = endOfPdfToken(start, end); + + // no token, len would be 0 + if (current == start) { + return NULL; + } + + int tokenLen = current - start; + + if (tokenLen == 1) { + // start array + switch (*start) { + case kOpenedSquareBracket_PdfDelimiter: + *start = '\0'; + SkPdfObject::makeEmptyArray(token); + return readArray(current, end, token, allocator); + + case kOpenedRoundBracket_PdfDelimiter: + *start = '\0'; + return readString(start, end, token); + + case kOpenedInequityBracket_PdfDelimiter: + *start = '\0'; + if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDelimiter) { + // TODO(edisonn): pass here the length somehow? + return readDictionary(start + 2, end, token, allocator); // skip << + } else { + return readHexString(start + 1, end, token); // skip < + } + + case kNamed_PdfDelimiter: + *start = '\0'; + return readName(start + 1, end, token); + + // TODO(edisonn): what to do curly brackets? read spec! + case kOpenedCurlyBracket_PdfDelimiter: + default: + break; + } + + SkASSERT(!isPdfWhiteSpace(*start)); + if (isPdfDelimiter(*start)) { + // TODO(edisonn): how stream ] } > ) will be handled? + // for now ignore, and it will become a keyword to be ignored + } + } + + if (tokenLen == 4 && start[0] == 'n' && start[1] == 'u' && start[2] == 'l' && start[3] == 'l') { + SkPdfObject::makeNull(token); + return current; + } + + if (tokenLen == 4 && start[0] == 't' && start[1] == 'r' && start[2] == 'u' && start[3] == 'e') { + SkPdfObject::makeBoolean(true, token); + return current; + } + + if (tokenLen == 5 && start[0] == 'f' && start[1] == 'a' && start[2] == 'l' && start[3] == 's' && start[3] == 'e') { + SkPdfObject::makeBoolean(false, token); + return current; + } + + if (isPdfNumeric(*start)) { + SkPdfObject::makeNumeric(start, current, token); + } else { + SkPdfObject::makeKeyword(start, current, token); + } + return current; +} + +SkPdfObject* SkPdfAllocator::allocBlock() { + return new SkPdfObject[BUFFER_SIZE]; +} + +SkPdfAllocator::~SkPdfAllocator() { + for (int i = 0 ; i < fHandles.count(); i++) { + free(fHandles[i]); + } + for (int i = 0 ; i < fHistory.count(); i++) { + delete[] fHistory[i]; + } + delete[] fCurrent; +} + +SkPdfObject* SkPdfAllocator::allocObject() { + if (fCurrentUsed >= BUFFER_SIZE) { + fHistory.push(fCurrent); + fCurrent = allocBlock(); + fCurrentUsed = 0; + } + + fCurrentUsed++; + return &fCurrent[fCurrentUsed - 1]; +} + +// TODO(edisonn): perf: do no copy the buffers, but use them, and mark cache the result, so there is no need of a second pass +SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfObject* objWithStream, const SkPdfMapper* mapper, SkPdfAllocator* allocator) : fMapper(mapper), fAllocator(allocator), fUncompressedStream(NULL), fUncompressedStreamEnd(NULL), fEmpty(false), fHasPutBack(false) { + unsigned char* buffer = NULL; + size_t len = 0; + objWithStream->GetFilteredStreamRef(&buffer, &len, fAllocator); + fUncompressedStreamStart = fUncompressedStream = (unsigned char*)fAllocator->alloc(len); + fUncompressedStreamEnd = fUncompressedStream + len; + memcpy(fUncompressedStream, buffer, len);} + +SkPdfNativeTokenizer::SkPdfNativeTokenizer(unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator) : fMapper(mapper), fAllocator(allocator), fEmpty(false), fHasPutBack(false) { + fUncompressedStreamStart = fUncompressedStream = (unsigned char*)fAllocator->alloc(len); + fUncompressedStreamEnd = fUncompressedStream + len; + memcpy(fUncompressedStream, buffer, len); } SkPdfNativeTokenizer::~SkPdfNativeTokenizer() { - // TODO(edisonn): Auto-generated destructor stub + // free the unparsed stream, we don't need it. + // the parsed one is locked as it contains the strings and keywords referenced in objects + if (fUncompressedStream) { + realloc(fUncompressedStreamStart, fUncompressedStream - fUncompressedStreamStart); + } else { + SkASSERT(false); + } +} + +bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) { + token->fKeyword = NULL; + token->fObject = NULL; + + fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedStreamEnd); + if (fUncompressedStream >= fUncompressedStreamEnd) { + return false; + } + + SkPdfObject obj; + fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, &obj, fAllocator); + + // If it is a keyword, we will only get the pointer of the string + if (obj.type() == SkPdfObject::kKeyword_PdfObjectType) { + token->fKeyword = obj.c_str(); + token->fKeywordLength = obj.len(); + token->fType = kKeyword_TokenType; + } else { + SkPdfObject* pobj = fAllocator->allocObject(); + *pobj = obj; + token->fObject = pobj; + token->fType = kObject_TokenType; + } + +#ifdef PDF_TRACE + static int read_op = 0; + read_op++; + if (182749 == read_op) { + printf("break;\n"); + } + printf("%i READ %s %s\n", read_op, token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str()); +#endif + + return true; +} + +void SkPdfNativeTokenizer::PutBack(PdfToken token) { + SkASSERT(!fHasPutBack); + fHasPutBack = true; + fPutBack = token; +#ifdef PDF_TRACE + printf("PUT_BACK %s %s\n", token.fType == kKeyword_TokenType ? "Keyword" : "Object", token.fKeyword ? std::string(token.fKeyword, token.fKeywordLength).c_str(): token.fObject->toString().c_str()); +#endif +} + +bool SkPdfNativeTokenizer::readToken(PdfToken* token) { + if (fHasPutBack) { + *token = fPutBack; + fHasPutBack = false; +#ifdef PDF_TRACE + printf("READ_BACK %s %s\n", token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str()); +#endif + return true; + } + + if (fEmpty) { +#ifdef PDF_TRACE + printf("EMPTY TOKENIZER\n"); +#endif + return false; + } + + return readTokenCore(token); } diff --git a/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h b/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h index c8a2f46c73..1c2336e356 100644 --- a/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h +++ b/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h @@ -1,10 +1,145 @@ #ifndef EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_ #define EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_ -class SkPdfNativeTokenizer : public SkPdfPodofoTokenizer { +#include "SkTDArray.h" +#include "SkTDict.h" +#include <math.h> +#include <string.h> + +class SkPdfMapper; +class SkPdfDictionary; + +// White Spaces +#define kNUL_PdfWhiteSpace '\x00' +#define kHT_PdfWhiteSpace '\x09' +#define kLF_PdfWhiteSpace '\x0A' +#define kFF_PdfWhiteSpace '\x0C' +#define kCR_PdfWhiteSpace '\x0D' +#define kSP_PdfWhiteSpace '\x20' + +// PdfDelimiters +#define kOpenedRoundBracket_PdfDelimiter '(' +#define kClosedRoundBracket_PdfDelimiter ')' +#define kOpenedInequityBracket_PdfDelimiter '<' +#define kClosedInequityBracket_PdfDelimiter '>' +#define kOpenedSquareBracket_PdfDelimiter '[' +#define kClosedSquareBracket_PdfDelimiter ']' +#define kOpenedCurlyBracket_PdfDelimiter '{' +#define kClosedCurlyBracket_PdfDelimiter '}' +#define kNamed_PdfDelimiter '/' +#define kComment_PdfDelimiter '%' + +#define kEscape_PdfSpecial '\\' +#define kBackspace_PdfSpecial '\x08' + +// TODO(edisonn): what is the faster way for compiler/machine type to evaluate this expressions? +// we should evaluate all options. might be even different from one machine to another +// 1) expand expression, let compiler optimize it +// 2) binary search +// 3) linear search in array +// 4) vector (e.f. T type[256] .. return type[ch] ... +// 5) manually build the expression with least number of operators, e.g. for consecutive +// chars, we can use an binary equal ignoring last bit +#define isPdfWhiteSpace(ch) (((ch)==kNUL_PdfWhiteSpace)||((ch)==kHT_PdfWhiteSpace)||((ch)==kLF_PdfWhiteSpace)||((ch)==kFF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace)||((ch)==kSP_PdfWhiteSpace)) + +#define isPdfEOL(ch) (((ch)==kLF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace)) + + +#define isPdfDelimiter(ch) (((ch)==kOpenedRoundBracket_PdfDelimiter)||\ + ((ch)==kClosedRoundBracket_PdfDelimiter)||\ + ((ch)==kOpenedInequityBracket_PdfDelimiter)||\ + ((ch)==kClosedInequityBracket_PdfDelimiter)||\ + ((ch)==kOpenedSquareBracket_PdfDelimiter)||\ + ((ch)==kClosedSquareBracket_PdfDelimiter)||\ + ((ch)==kOpenedCurlyBracket_PdfDelimiter)||\ + ((ch)==kClosedCurlyBracket_PdfDelimiter)||\ + ((ch)==kNamed_PdfDelimiter)||\ + ((ch)==kComment_PdfDelimiter)) + +#define isPdfWhiteSpaceOrPdfDelimiter(ch) (isPdfWhiteSpace(ch)||isPdfDelimiter(ch)) + +#define isPdfDigit(ch) ((ch)>='0'&&(ch)<='9') +#define isPdfNumeric(ch) (isPdfDigit(ch)||(ch)=='+'||(ch)=='-') + +unsigned char* skipPdfWhiteSpaces(unsigned char* buffer, size_t len); +unsigned char* endOfPdfToken(unsigned char* start, size_t len); +unsigned char* skipPdfComment(unsigned char* start, size_t len); + +// TODO(edisonn): typedef read and integer tyepes? make less readable... +//typedef double SkPdfReal; +//typedef int64_t SkPdfInteger; + +// an allocator only allocates memory, and it deletes it all when the allocator is destroyed +// this would allow us not to do any garbage collection while we parse or draw a pdf, and defere it +// while the user is looking at the image + +class SkPdfObject; + +class SkPdfAllocator { +#define BUFFER_SIZE 1024 + SkTDArray<SkPdfObject*> fHistory; + SkTDArray<void*> fHandles; + SkPdfObject* fCurrent; + int fCurrentUsed; + + SkPdfObject* allocBlock(); + public: - SkPdfNativeTokenizer(); + SkPdfAllocator() { + fCurrent = allocBlock(); + fCurrentUsed = 0; + } + + ~SkPdfAllocator(); + + SkPdfObject* allocObject(); + + // TODO(edisonn): free this memory in destructor, track the usage? + void* alloc(size_t bytes) { + void* data = malloc(bytes); + fHandles.push(data); + return data; + } +}; + +unsigned char* nextObject(unsigned char* start, unsigned char* end, SkPdfObject* token, SkPdfAllocator* allocator); + +enum SkPdfTokenType { + kKeyword_TokenType, + kObject_TokenType, +}; + +struct PdfToken { + const char* fKeyword; + size_t fKeywordLength; + SkPdfObject* fObject; + SkPdfTokenType fType; + + PdfToken() : fKeyword(NULL), fKeywordLength(0), fObject(NULL) {} +}; + +class SkPdfNativeTokenizer { +public: + SkPdfNativeTokenizer(SkPdfObject* objWithStream, const SkPdfMapper* mapper, SkPdfAllocator* allocator); + SkPdfNativeTokenizer(unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator); + virtual ~SkPdfNativeTokenizer(); + + bool readToken(PdfToken* token); + bool readTokenCore(PdfToken* token); + void PutBack(PdfToken token); + +private: + const SkPdfMapper* fMapper; + SkPdfAllocator* fAllocator; + + unsigned char* fUncompressedStreamStart; + unsigned char* fUncompressedStream; + unsigned char* fUncompressedStreamEnd; + + bool fEmpty; + bool fHasPutBack; + PdfToken fPutBack; }; #endif // EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_ diff --git a/experimental/PdfViewer/pdfparser/native/SkPdfObject.cpp b/experimental/PdfViewer/pdfparser/native/SkPdfObject.cpp new file mode 100644 index 0000000000..57e9abb368 --- /dev/null +++ b/experimental/PdfViewer/pdfparser/native/SkPdfObject.cpp @@ -0,0 +1,85 @@ + +#include "SkPdfObject.h" +#include "SkPdfStreamCommonDictionary_autogen.h" + +#include "SkFlate.h" +#include "SkStream.h" +#include "SkPdfNativeTokenizer.h" + +SkPdfObject SkPdfObject::kNull = SkPdfObject::makeNull(); + +bool SkPdfObject::applyFlateDecodeFilter(SkPdfAllocator* allocator) { + if (!SkFlate::HaveFlate()) { + // TODO(edisonn): warn, make callers handle it + return false; + } + + SkMemoryStream skstream(fStr.fBuffer, fStr.fBytes >> 1, false); + SkDynamicMemoryWStream uncompressedData; + + if (SkFlate::Inflate(&skstream, &uncompressedData)) { + fStr.fBytes = (uncompressedData.bytesWritten() << 1) + kUnfilteredStreamBit; + fStr.fBuffer = (unsigned char*)allocator->alloc(uncompressedData.bytesWritten()); + uncompressedData.copyTo(fStr.fBuffer); + return true; + } else { + // TODO(edisonn): warn, make callers handle it + return false; + } +} + +bool SkPdfObject::applyDCTDecodeFilter(SkPdfAllocator* allocator) { + // this would fail, and it won't allow any more filters. + // technically, it would be possible, but not a real world scenario + // TODO(edisonn): or get the image here and store it for fast retrieval? + return false; +} + +bool SkPdfObject::applyFilter(const char* name, SkPdfAllocator* allocator) { + if (strcmp(name, "FlateDecode") == 0) { + return applyFlateDecodeFilter(allocator); + } else if (strcmp(name, "DCTDecode") == 0) { + return applyDCTDecodeFilter(allocator); + } + // TODO(edisonn): allert, not supported, but should be implemented asap + return false; +} + +bool SkPdfObject::filterStream(SkPdfAllocator* allocator) { + if (!hasStream()) { + return false; + } + + if (isStreamFiltered()) { + return true; + } + + SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*)this; + + if (!stream->has_Filter()) { + fStr.fBytes = ((fStr.fBytes >> 1) << 1) + kFilteredStreamBit; + return true; + } + + if (stream->isFilterAName(NULL)) { + std::string filterName = stream->getFilterAsName(NULL); + applyFilter(filterName.c_str(), allocator); + } else if (stream->isFilterAArray(NULL)) { + const SkPdfArray* filters = stream->getFilterAsArray(NULL); + int cnt = filters->size(); + for (int i = cnt - 1; i >= 0; i--) { + const SkPdfObject* filterName = filters->objAtAIndex(i); + if (filterName != NULL && filterName->isName()) { + if (!applyFilter(filterName->nameValue(), allocator)) { + break; + } + } else { + // TODO(edisonn): report warning + } + } + } + + fStr.fBytes = ((fStr.fBytes >> 1) << 1) + kFilteredStreamBit; + + return true; +} diff --git a/experimental/PdfViewer/pdfparser/native/SkPdfObject.h b/experimental/PdfViewer/pdfparser/native/SkPdfObject.h new file mode 100644 index 0000000000..86963b0398 --- /dev/null +++ b/experimental/PdfViewer/pdfparser/native/SkPdfObject.h @@ -0,0 +1,866 @@ +#ifndef EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFOBJECT_H_ +#define EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFOBJECT_H_ + +#include <stdint.h> +#include <string.h> +#include <string> +#include "SkTDArray.h" +#include "SkTDict.h" +#include "SkRect.h" +#include "SkMatrix.h" +#include "SkString.h" + +#include "SkPdfNYI.h" +#include "SkPdfConfig.h" + +class SkPdfDictionary; +class SkPdfStream; +class SkPdfAllocator; + +// TODO(edisonn): macro it and move it to utils +SkMatrix SkMatrixFromPdfMatrix(double array[6]); + + +#define kFilteredStreamBit 0 +#define kUnfilteredStreamBit 1 + + +class SkPdfObject { + public: + enum ObjectType { + kInvalid_PdfObjectType, + + kBoolean_PdfObjectType, + kInteger_PdfObjectType, + kReal_PdfObjectType, + kString_PdfObjectType, + kHexString_PdfObjectType, + kName_PdfObjectType, + kKeyword_PdfObjectType, + //kStream_PdfObjectType, // attached to a Dictionary + kArray_PdfObjectType, + kDictionary_PdfObjectType, + kNull_PdfObjectType, + + // TODO(edisonn): after the pdf has been loaded completely, resolve all references + // try the same thing with delayed loaded ... + kReference_PdfObjectType, + + kUndefined_PdfObjectType, // per 1.4 spec, if the same key appear twice in the dictionary, the value is undefined + }; + +private: + struct NotOwnedString { + unsigned char* fBuffer; + size_t fBytes; + }; + + struct Reference { + unsigned int fId; + unsigned int fGen; + }; + + // TODO(edisonn): add stream start, stream end, where stream is weither the file + // or decoded/filtered pdf stream + + // TODO(edisonn): add warning/report per object + // TODO(edisonn): add flag fUsed, to be used once the parsing is complete, + // so we could show what parts have been proccessed, ignored, or generated errors + + ObjectType fObjectType; + + union { + bool fBooleanValue; + int64_t fIntegerValue; + // TODO(edisonn): double, float? typedefed + double fRealValue; + NotOwnedString fStr; + + // TODO(edisonn): make sure the foorprint of fArray and fMap is small, otherwise, use pointers, or classes with up to 8 bytes in footprint + SkTDArray<SkPdfObject*>* fArray; + Reference fRef; + }; + SkTDict<SkPdfObject*>* fMap; + void* fData; + + +public: + + SkPdfObject() : fObjectType(kInvalid_PdfObjectType), fData(NULL) {} + + inline void* data() { + return fData; + } + + inline void setData(void* data) { + fData = data; + } + + ~SkPdfObject() { + reset(); + } + + void reset() { + switch (fObjectType) { + case kArray_PdfObjectType: + delete fArray; + break; + + case kDictionary_PdfObjectType: + delete fMap; + break; + + default: + break; + } + fObjectType = kInvalid_PdfObjectType; + } + + ObjectType type() { return fObjectType; } + + const char* c_str() const { + switch (fObjectType) { + case kString_PdfObjectType: + case kHexString_PdfObjectType: + case kKeyword_PdfObjectType: + return (const char*)fStr.fBuffer; + + default: + // TODO(edisonn): report/warning + return NULL; + } + } + + size_t len() const { + switch (fObjectType) { + case kString_PdfObjectType: + case kHexString_PdfObjectType: + case kKeyword_PdfObjectType: + return fStr.fBytes; + + default: + // TODO(edisonn): report/warning + return 0; + } + } + + + // TODO(edisonn): NYI + SkPdfDate& dateValue() const { + static SkPdfDate nyi; + return nyi; + } + + // TODO(edisonn): NYI + SkPdfFunction& functionValue() const { + static SkPdfFunction nyi; + return nyi; + } + + // TODO(edisonn): NYI + SkPdfFileSpec& fileSpecValue() const { + static SkPdfFileSpec nyi; + return nyi; + } + + // TODO(edisonn): NYI + SkPdfTree& treeValue() const { + static SkPdfTree nyi; + return nyi; + } + + + static void makeBoolean(bool value, SkPdfObject* obj) { + SkASSERT(obj->fObjectType == kInvalid_PdfObjectType); + + obj->fObjectType = kBoolean_PdfObjectType; + obj->fBooleanValue = value; + } + + static SkPdfObject makeBoolean(bool value) { + SkPdfObject obj; + obj.fObjectType = kBoolean_PdfObjectType; + obj.fBooleanValue = value; + return obj; + } + + static void makeInteger(int64_t value, SkPdfObject* obj) { + SkASSERT(obj->fObjectType == kInvalid_PdfObjectType); + + obj->fObjectType = kInteger_PdfObjectType; + obj->fIntegerValue = value; + } + + static void makeReal(double value, SkPdfObject* obj) { + SkASSERT(obj->fObjectType == kInvalid_PdfObjectType); + + obj->fObjectType = kReal_PdfObjectType; + obj->fRealValue = value; + } + + static void makeNull(SkPdfObject* obj) { + SkASSERT(obj->fObjectType == kInvalid_PdfObjectType); + + obj->fObjectType = kNull_PdfObjectType; + } + + static SkPdfObject makeNull() { + SkPdfObject obj; + obj.fObjectType = kNull_PdfObjectType; + return obj; + } + + static SkPdfObject kNull; + + static void makeNumeric(unsigned char* start, unsigned char* end, SkPdfObject* obj) { + SkASSERT(obj->fObjectType == kInvalid_PdfObjectType); + + // TODO(edisonn): NYI properly + // if has dot (impl), or exceeds max int, is real, otherwise is int + bool isInt = true; + for (unsigned char* current = start; current < end; current++) { + if (*current == '.') { + isInt = false; + break; + } + // TODO(edisonn): report parse issue with numbers like "24asdasd123" + } + if (isInt) { + makeInteger(atol((const char*)start), obj); + } else { + makeReal(atof((const char*)start), obj); + } + } + + static void makeReference(unsigned int id, unsigned int gen, SkPdfObject* obj) { + SkASSERT(obj->fObjectType == kInvalid_PdfObjectType); + + obj->fObjectType = kReference_PdfObjectType; + obj->fRef.fId = id; + obj->fRef.fGen = gen; + } + + + static void makeString(unsigned char* start, SkPdfObject* obj) { + makeStringCore(start, strlen((const char*)start), obj, kString_PdfObjectType); + } + + static void makeString(unsigned char* start, unsigned char* end, SkPdfObject* obj) { + makeStringCore(start, end - start, obj, kString_PdfObjectType); + } + + static void makeString(unsigned char* start, size_t bytes, SkPdfObject* obj) { + makeStringCore(start, bytes, obj, kString_PdfObjectType); + } + + + static void makeHexString(unsigned char* start, SkPdfObject* obj) { + makeStringCore(start, strlen((const char*)start), obj, kHexString_PdfObjectType); + } + + static void makeHexString(unsigned char* start, unsigned char* end, SkPdfObject* obj) { + makeStringCore(start, end - start, obj, kHexString_PdfObjectType); + } + + static void makeHexString(unsigned char* start, size_t bytes, SkPdfObject* obj) { + makeStringCore(start, bytes, obj, kHexString_PdfObjectType); + } + + + static void makeName(unsigned char* start, SkPdfObject* obj) { + makeStringCore(start, strlen((const char*)start), obj, kName_PdfObjectType); + } + + static void makeName(unsigned char* start, unsigned char* end, SkPdfObject* obj) { + makeStringCore(start, end - start, obj, kName_PdfObjectType); + } + + static void makeName(unsigned char* start, size_t bytes, SkPdfObject* obj) { + makeStringCore(start, bytes, obj, kName_PdfObjectType); + } + + + static void makeKeyword(unsigned char* start, SkPdfObject* obj) { + makeStringCore(start, strlen((const char*)start), obj, kKeyword_PdfObjectType); + } + + static void makeKeyword(unsigned char* start, unsigned char* end, SkPdfObject* obj) { + makeStringCore(start, end - start, obj, kKeyword_PdfObjectType); + } + + static void makeKeyword(unsigned char* start, size_t bytes, SkPdfObject* obj) { + makeStringCore(start, bytes, obj, kKeyword_PdfObjectType); + } + + + + // TODO(edisonn): make the functions to return SkPdfArray, move these functions in SkPdfArray + static void makeEmptyArray(SkPdfObject* obj) { + SkASSERT(obj->fObjectType == kInvalid_PdfObjectType); + + obj->fObjectType = kArray_PdfObjectType; + obj->fArray = new SkTDArray<SkPdfObject*>(); + // return (SkPdfArray*)obj; + } + + bool appendInArray(SkPdfObject* obj) { + SkASSERT(fObjectType == kArray_PdfObjectType); + if (fObjectType != kArray_PdfObjectType) { + // TODO(edisonn): report err + return false; + } + + fArray->push(obj); + return true; + } + + size_t size() const { + SkASSERT(fObjectType == kArray_PdfObjectType); + + return fArray->count(); + } + + SkPdfObject* objAtAIndex(int i) { + SkASSERT(fObjectType == kArray_PdfObjectType); + + return (*fArray)[i]; + } + + SkPdfObject* removeLastInArray() { + SkASSERT(fObjectType == kArray_PdfObjectType); + + SkPdfObject* ret = NULL; + fArray->pop(&ret); + + return ret; + } + + + const SkPdfObject* objAtAIndex(int i) const { + SkASSERT(fObjectType == kArray_PdfObjectType); + + return (*fArray)[i]; + } + + SkPdfObject* operator[](int i) { + SkASSERT(fObjectType == kArray_PdfObjectType); + + return (*fArray)[i]; + } + + const SkPdfObject* operator[](int i) const { + SkASSERT(fObjectType == kArray_PdfObjectType); + + return (*fArray)[i]; + } + + + // TODO(edisonn): make the functions to return SkPdfDictionary, move these functions in SkPdfDictionary + static void makeEmptyDictionary(SkPdfObject* obj) { + SkASSERT(obj->fObjectType == kInvalid_PdfObjectType); + + obj->fObjectType = kDictionary_PdfObjectType; + obj->fMap = new SkTDict<SkPdfObject*>(1); + obj->fStr.fBuffer = NULL; + obj->fStr.fBytes = 0; + } + + // TODO(edisonn): get all the possible names from spec, and compute a hash function + // that would create no overlaps in the same dictionary + // or build a tree of chars that when followed goes to a unique id/index/hash + // TODO(edisonn): generate constants like kDictFoo, kNameDict_name + // which will be used in code + // add function SkPdfFastNameKey key(const char* key); + // TODO(edisonn): setting the same key twike, will make the value undefined! + bool set(SkPdfObject* key, SkPdfObject* value) { + SkASSERT(fObjectType == kDictionary_PdfObjectType); + SkASSERT(key->fObjectType == kName_PdfObjectType); + + if (key->fObjectType != kName_PdfObjectType || fObjectType != kDictionary_PdfObjectType) { + // TODO(edisonn): report err + return false; + } + + // we rewrite all delimiters and white spaces with '\0', so we expect the end of name to be '\0' + SkASSERT(key->fStr.fBuffer[key->fStr.fBytes] == '\0'); + + return set((char*)key->fStr.fBuffer, value); + } + + bool set(const char* key, SkPdfObject* value) { + SkASSERT(fObjectType == kDictionary_PdfObjectType); + + if (fObjectType != kDictionary_PdfObjectType) { + // TODO(edisonn): report err + return false; + } + + return fMap->set(key, value); + } + + SkPdfObject* get(SkPdfObject* key) { + SkASSERT(fObjectType == kDictionary_PdfObjectType); + SkASSERT(key->fObjectType == kName_PdfObjectType); + + if (key->fObjectType != kName_PdfObjectType || fObjectType != kDictionary_PdfObjectType) { + // TODO(edisonn): report err + return false; + } + + SkASSERT(key->fStr.fBuffer[key->fStr.fBytes] == '\0'); + + return get((char*)key->fStr.fBuffer); + } + + SkPdfObject* get(const char* key) { + SkASSERT(fObjectType == kDictionary_PdfObjectType); + SkASSERT(key); + if (fObjectType != kDictionary_PdfObjectType) { + // TODO(edisonn): report err + return NULL; + } + SkPdfObject* ret = NULL; + fMap->find(key, &ret); + return ret; + } + + const SkPdfObject* get(SkPdfObject* key) const { + SkASSERT(fObjectType == kDictionary_PdfObjectType); + SkASSERT(key->fObjectType == kName_PdfObjectType); + + if (key->fObjectType != kName_PdfObjectType || fObjectType != kDictionary_PdfObjectType) { + // TODO(edisonn): report err + return false; + } + + SkASSERT(key->fStr.fBuffer[key->fStr.fBytes] == '\0'); + + return get((char*)key->fStr.fBuffer); + } + + + const SkPdfObject* get(const char* key) const { + SkASSERT(fObjectType == kDictionary_PdfObjectType); + SkASSERT(key); + if (fObjectType != kDictionary_PdfObjectType) { + // TODO(edisonn): report err + return NULL; + } + SkPdfObject* ret = NULL; + fMap->find(key, &ret); + return ret; + } + + const SkPdfObject* get(const char* key, const char* abr) const { + const SkPdfObject* ret = get(key); + // TODO(edisonn): / is a valid name, and it might be an abreviation, so "" should not be like NULL + // make this distiontion in generator, and remove "" from condition + if (ret != NULL || abr == NULL || *abr == '\0') { + return ret; + } + return get(abr); + } + + SkPdfObject* get(const char* key, const char* abr) { + SkPdfObject* ret = get(key); + // TODO(edisonn): / is a valid name, and it might be an abreviation, so "" should not be like NULL + // make this distiontion in generator, and remove "" from condition + if (ret != NULL || abr == NULL || *abr == '\0') { + return ret; + } + return get(abr); + } + + SkPdfDictionary* asDictionary() { + SkASSERT(isDictionary()); + if (!isDictionary()) { + return NULL; + } + return (SkPdfDictionary*) this; + } + + const SkPdfDictionary* asDictionary() const { + SkASSERT(isDictionary()); + if (!isDictionary()) { + return NULL; + } + return (SkPdfDictionary*) this; + } + + + bool isReference() const { + return fObjectType == kReference_PdfObjectType; + } + + bool isBoolean() const { + return fObjectType == kBoolean_PdfObjectType; + } + + bool isInteger() const { + return fObjectType == kInteger_PdfObjectType; + } +private: + bool isReal() const { + return fObjectType == kReal_PdfObjectType; + } +public: + bool isNumber() const { + return fObjectType == kInteger_PdfObjectType || fObjectType == kReal_PdfObjectType; + } + + bool isKeywordReference() const { + return fObjectType == kKeyword_PdfObjectType && fStr.fBytes == 1 && fStr.fBuffer[0] == 'R'; + } + + bool isKeyword() const { + return fObjectType == kKeyword_PdfObjectType; + } + + bool isName() const { + return fObjectType == kName_PdfObjectType; + } + + bool isArray() const { + return fObjectType == kArray_PdfObjectType; + } + + bool isDate() const { + return fObjectType == kString_PdfObjectType || fObjectType == kHexString_PdfObjectType; + } + + bool isDictionary() const { + return fObjectType == kDictionary_PdfObjectType; + } + + bool isFunction() const { + return false; // NYI + } + + bool isRectangle() const { + return fObjectType == kArray_PdfObjectType && fArray->count() == 4; // NYI + and elems are numbers + } + + // TODO(edisonn): has stream .. or is stream ... TBD + bool hasStream() const { + return isDictionary() && fStr.fBuffer != NULL; + } + + // TODO(edisonn): has stream .. or is stream ... TBD + const SkPdfStream* getStream() const { + return hasStream() ? (const SkPdfStream*)this : NULL; + } + + SkPdfStream* getStream() { + return hasStream() ? (SkPdfStream*)this : NULL; + } + + bool isAnyString() const { + return fObjectType == kString_PdfObjectType || fObjectType == kHexString_PdfObjectType; + } + + bool isMatrix() const { + return fObjectType == kArray_PdfObjectType && fArray->count() == 6; // NYI + and elems are numbers + } + + inline int64_t intValue() const { + SkASSERT(fObjectType == kInteger_PdfObjectType); + + if (fObjectType != kInteger_PdfObjectType) { + // TODO(edisonn): log err + return 0; + } + return fIntegerValue; + } +private: + inline double realValue() const { + SkASSERT(fObjectType == kReal_PdfObjectType); + + if (fObjectType != kReal_PdfObjectType) { + // TODO(edisonn): log err + return 0; + } + return fRealValue; + } +public: + inline double numberValue() const { + SkASSERT(isNumber()); + + if (!isNumber()) { + // TODO(edisonn): log err + return 0; + } + return fObjectType == kReal_PdfObjectType ? fRealValue : fIntegerValue; + } + + int referenceId() const { + SkASSERT(fObjectType == kReference_PdfObjectType); + return fRef.fId; + } + + int referenceGeneration() const { + SkASSERT(fObjectType == kReference_PdfObjectType); + return fRef.fGen; + } + + inline const char* nameValue() const { + SkASSERT(fObjectType == kName_PdfObjectType); + + if (fObjectType != kName_PdfObjectType) { + // TODO(edisonn): log err + return ""; + } + return (const char*)fStr.fBuffer; + } + + inline const char* stringValue() const { + SkASSERT(fObjectType == kString_PdfObjectType || fObjectType == kHexString_PdfObjectType); + + if (fObjectType != kString_PdfObjectType && fObjectType != kHexString_PdfObjectType) { + // TODO(edisonn): log err + return ""; + } + return (const char*)fStr.fBuffer; + } + + // TODO(edisonn): nameValue2 and stringValue2 are used to make code generation easy, + // but it is not a performat way to do it, since it will create an extra copy + // remove these functions and make code generated faster + inline std::string nameValue2() const { + SkASSERT(fObjectType == kName_PdfObjectType); + + if (fObjectType != kName_PdfObjectType) { + // TODO(edisonn): log err + return ""; + } + return (const char*)fStr.fBuffer; + } + + inline std::string stringValue2() const { + SkASSERT(fObjectType == kString_PdfObjectType || fObjectType == kHexString_PdfObjectType); + + if (fObjectType != kString_PdfObjectType && fObjectType != kHexString_PdfObjectType) { + // TODO(edisonn): log err + return ""; + } + return (const char*)fStr.fBuffer; + } + + inline bool boolValue() const { + SkASSERT(fObjectType == kBoolean_PdfObjectType); + + if (fObjectType == kBoolean_PdfObjectType) { + // TODO(edisonn): log err + return false; + } + return fBooleanValue; + } + + SkRect rectangleValue() const { + SkASSERT(isRectangle()); + if (!isRectangle()) { + return SkRect::MakeEmpty(); + } + + double array[4]; + for (int i = 0; i < 4; i++) { + // TODO(edisonn): version where we could resolve references? + const SkPdfObject* elem = objAtAIndex(i); + if (elem == NULL || !elem->isNumber()) { + // TODO(edisonn): report error + return SkRect::MakeEmpty(); + } + array[i] = elem->numberValue(); + } + + return SkRect::MakeLTRB(SkDoubleToScalar(array[0]), + SkDoubleToScalar(array[1]), + SkDoubleToScalar(array[2]), + SkDoubleToScalar(array[3])); + } + + SkMatrix matrixValue() const { + SkASSERT(isMatrix()); + if (!isMatrix()) { + return SkMatrix::I(); + } + + double array[6]; + for (int i = 0; i < 6; i++) { + // TODO(edisonn): version where we could resolve references? + const SkPdfObject* elem = objAtAIndex(i); + if (elem == NULL || !elem->isNumber()) { + // TODO(edisonn): report error + return SkMatrix::I(); + } + array[i] = elem->numberValue(); + } + + return SkMatrixFromPdfMatrix(array); + } + + bool filterStream(SkPdfAllocator* allocator); + + + bool GetFilteredStreamRef(unsigned char** buffer, size_t* len, SkPdfAllocator* allocator) { + // TODO(edisonn): add params that couls let the last filter in place if it is jpeg or png to fast load images + if (!hasStream()) { + return false; + } + + filterStream(allocator); + + if (buffer) { + *buffer = fStr.fBuffer; + } + + if (len) { + *len = fStr.fBytes >> 1; // last bit + } + + return true; + } + + bool isStreamFiltered() const { + return hasStream() && ((fStr.fBytes & 1) == kFilteredStreamBit); + } + + bool GetUnfilteredStreamRef(unsigned char** buffer, size_t* len) const { + if (isStreamFiltered()) { + return false; + } + + if (!hasStream()) { + return false; + } + + if (buffer) { + *buffer = fStr.fBuffer; + } + + if (len) { + *len = fStr.fBytes >> 1; // remove slast bit + } + + return true; + } + + bool addStream(unsigned char* buffer, size_t len) { + SkASSERT(!hasStream()); + SkASSERT(isDictionary()); + + if (!isDictionary() || hasStream()) { + return false; + } + + fStr.fBuffer = buffer; + fStr.fBytes = (len << 2) + kUnfilteredStreamBit; + + return true; + } + + SkString toString() { + SkString str; + switch (fObjectType) { + case kInvalid_PdfObjectType: + str.append("Invalid"); + break; + + case kBoolean_PdfObjectType: + str.appendf("Boolean: %s", fBooleanValue ? "true" : "false"); + break; + + case kInteger_PdfObjectType: + str.appendf("Integer: %i", (int)fIntegerValue); + break; + + case kReal_PdfObjectType: + str.appendf("Real: %f", fRealValue); + break; + + case kString_PdfObjectType: + str.appendf("String, len() = %u: ", (unsigned int)fStr.fBytes); + str.append((const char*)fStr.fBuffer, fStr.fBytes); + break; + + case kHexString_PdfObjectType: + str.appendf("HexString, len() = %u: ", (unsigned int)fStr.fBytes); + str.append((const char*)fStr.fBuffer, fStr.fBytes); + break; + + case kName_PdfObjectType: + str.appendf("Name, len() = %u: ", (unsigned int)fStr.fBytes); + str.append((const char*)fStr.fBuffer, fStr.fBytes); + break; + + case kKeyword_PdfObjectType: + str.appendf("Keyword, len() = %u: ", (unsigned int)fStr.fBytes); + str.append((const char*)fStr.fBuffer, fStr.fBytes); + break; + + case kArray_PdfObjectType: + str.append("Array, size() = %i [", size()); + for (unsigned int i = 0; i < size(); i++) { + str.append(objAtAIndex(i)->toString()); + } + str.append("]"); + break; + + case kDictionary_PdfObjectType: + // TODO(edisonn): NYI + str.append("Dictionary: NYI"); + if (hasStream()) { + str.append(" HAS_STREAM"); + } + break; + + case kNull_PdfObjectType: + str = "NULL"; + break; + + case kReference_PdfObjectType: + str.appendf("Reference: %i %i", fRef.fId, fRef.fGen); + break; + + case kUndefined_PdfObjectType: + str = "Undefined"; + break; + + default: + str = "Internal Error Object Type"; + break; + } + + return str; + } + +private: + static void makeStringCore(unsigned char* start, SkPdfObject* obj, ObjectType type) { + makeStringCore(start, strlen((const char*)start), obj, type); + } + + static void makeStringCore(unsigned char* start, unsigned char* end, SkPdfObject* obj, ObjectType type) { + makeStringCore(start, end - start, obj, type); + } + + static void makeStringCore(unsigned char* start, size_t bytes, SkPdfObject* obj, ObjectType type) { + SkASSERT(obj->fObjectType == kInvalid_PdfObjectType); + + obj->fObjectType = type; + obj->fStr.fBuffer = start; + obj->fStr.fBytes = bytes; + } + + bool applyFilter(const char* name, SkPdfAllocator* allocator); + bool applyFlateDecodeFilter(SkPdfAllocator* allocator); + bool applyDCTDecodeFilter(SkPdfAllocator* allocator); +}; + +class SkPdfStream : public SkPdfObject {}; +class SkPdfArray : public SkPdfObject {}; +class SkPdfString : public SkPdfObject {}; +class SkPdfHexString : public SkPdfObject {}; +class SkPdfInteger : public SkPdfObject {}; +class SkPdfReal : public SkPdfObject {}; +class SkPdfNumber : public SkPdfObject {}; + +#endif // EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFOBJECT_H_ diff --git a/experimental/PdfViewer/spec2def.py b/experimental/PdfViewer/spec2def.py index 8b77e728a6..ff64671330 100644 --- a/experimental/PdfViewer/spec2def.py +++ b/experimental/PdfViewer/spec2def.py @@ -627,11 +627,13 @@ def generateDef(): fspecPy.write('def addDictionaryTypesTo(knowTypes):\n') for e in tableToClassName: - fspecPy.write(' knowTypes[\'' + tableToClassName[e][0] + '\'] = [\'SkPdf' + tableToClassName[e][0] + '*\', \'SkPdf' + tableToClassName[e][0] + 'FromDictionary\', datatypes.CppNull(), \'ret->podofo()->GetDataType() == PoDoFo::ePdfDataType_Dictionary\', \'A_DICTIONARY\']\n') + #TODO(edisonn): build this map + + fspecPy.write(' knowTypes[\'' + tableToClassName[e][0] + '\'] = [\'SkPdf' + tableToClassName[e][0] + '*\', \'(SkPdf' + tableToClassName[e][0] + '*)ret\', datatypes.CppNull(), \'ret->isDictionary() && ((SkPdf' + tableToClassName[e][0] + '*)ret)->valid()\', \'A_DICTIONARY\']\n') fspecPy.write('\n') #print lines #fnewspec.close() if '__main__' == __name__: - sys.exit(generateDef())
\ No newline at end of file + sys.exit(generateDef()) |