/* * Copyright 2013 Google Inc. * * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE file. */ #ifndef SkPdfNativeTokenizer_DEFINED #define SkPdfNativeTokenizer_DEFINED #include #include #include "SkPdfConfig.h" #include "SkTDArray.h" #include "SkTDict.h" // All these constants are defined by the PDF 1.4 Spec. class SkPdfDictionary; class SkPdfImageDictionary; class SkPdfNativeDoc; class SkPdfNativeObject; // White Spaces #define kNUL_PdfWhiteSpace '\x00' #define kHT_PdfWhiteSpace '\x09' #define kLF_PdfWhiteSpace '\x0A' #define kFF_PdfWhiteSpace '\x0C' #define kCR_PdfWhiteSpace '\x0D' #define kSP_PdfWhiteSpace '\x20' // PdfDelimiters #define kOpenedRoundBracket_PdfDelimiter '(' #define kClosedRoundBracket_PdfDelimiter ')' #define kOpenedInequityBracket_PdfDelimiter '<' #define kClosedInequityBracket_PdfDelimiter '>' #define kOpenedSquareBracket_PdfDelimiter '[' #define kClosedSquareBracket_PdfDelimiter ']' #define kOpenedCurlyBracket_PdfDelimiter '{' #define kClosedCurlyBracket_PdfDelimiter '}' #define kNamed_PdfDelimiter '/' #define kComment_PdfDelimiter '%' #define kEscape_PdfSpecial '\\' #define kBackspace_PdfSpecial '\x08' // TODO(edisonn): what is the faster way for compiler/machine type to evaluate this expressions? // we should evaluate all options. might be even different from one machine to another // 1) expand expression, let compiler optimize it // 2) binary search // 3) linear search in array // 4) vector (e.f. T type[256] .. return type[ch] ... // 5) manually build the expression with least number of operators, e.g. for consecutive // chars, we can use an binary equal ignoring last bit #define isPdfWhiteSpace(ch) (((ch)==kNUL_PdfWhiteSpace)|| \ ((ch)==kHT_PdfWhiteSpace)|| \ ((ch)==kLF_PdfWhiteSpace)|| \ ((ch)==kFF_PdfWhiteSpace)|| \ ((ch)==kCR_PdfWhiteSpace)|| \ ((ch)==kSP_PdfWhiteSpace)) #define isPdfEOL(ch) (((ch)==kLF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace)) #define isPdfDelimiter(ch) (((ch)==kOpenedRoundBracket_PdfDelimiter)||\ ((ch)==kClosedRoundBracket_PdfDelimiter)||\ ((ch)==kOpenedInequityBracket_PdfDelimiter)||\ ((ch)==kClosedInequityBracket_PdfDelimiter)||\ ((ch)==kOpenedSquareBracket_PdfDelimiter)||\ ((ch)==kClosedSquareBracket_PdfDelimiter)||\ ((ch)==kOpenedCurlyBracket_PdfDelimiter)||\ ((ch)==kClosedCurlyBracket_PdfDelimiter)||\ ((ch)==kNamed_PdfDelimiter)||\ ((ch)==kComment_PdfDelimiter)) #define isPdfWhiteSpaceOrPdfDelimiter(ch) (isPdfWhiteSpace(ch)||isPdfDelimiter(ch)) #define isPdfDigit(ch) ((ch)>='0'&&(ch)<='9') #define isPdfNumeric(ch) (isPdfDigit(ch)||(ch)=='+'||(ch)=='-'||(ch)=='.') const unsigned char* skipPdfWhiteSpaces(const unsigned char* buffer, const unsigned char* end); const unsigned char* endOfPdfToken(const unsigned char* start, const unsigned char* end); #define BUFFER_SIZE 1024 /** \class SkPdfAllocator * * An allocator only allocates memory, and it deletes it all when the allocator is destroyed. * This strategy would allow us not to do any garbage collection while we parse and/or render * a pdf. * */ class SkPdfAllocator { public: SkPdfAllocator() { fSizeInBytes = sizeof(*this); fCurrent = allocBlock(); fCurrentUsed = 0; } ~SkPdfAllocator(); // Allocates an object. It will be reset automatically when ~SkPdfAllocator() is called. SkPdfNativeObject* allocObject(); // Allocates a buffer. It will be freed automatically when ~SkPdfAllocator() is called. void* alloc(size_t bytes) { void* data = malloc(bytes); fHandles.push(data); fSizeInBytes += bytes; return data; } // Returns the number of bytes used in this allocator. size_t bytesUsed() const { return fSizeInBytes; } private: SkTDArray fHistory; SkTDArray fHandles; SkPdfNativeObject* fCurrent; int fCurrentUsed; SkPdfNativeObject* allocBlock(); size_t fSizeInBytes; }; // Type of a parsed token. enum SkPdfTokenType { kKeyword_TokenType, kObject_TokenType, }; /** \struct PdfToken * * Stores the result of the parsing - a keyword or an object. * */ struct PdfToken { const char* fKeyword; size_t fKeywordLength; SkPdfNativeObject* fObject; SkPdfTokenType fType; PdfToken() : fKeyword(NULL), fKeywordLength(0), fObject(NULL) {} }; /** \class SkPdfNativeTokenizer * * Responsible to tokenize a stream in small tokens, eityh a keyword or an object. * A renderer can feed on the tokens and render a pdf. * */ class SkPdfNativeTokenizer { public: SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream, SkPdfAllocator* allocator, SkPdfNativeDoc* doc); SkPdfNativeTokenizer(const unsigned char* buffer, int len, SkPdfAllocator* allocator, SkPdfNativeDoc* doc); virtual ~SkPdfNativeTokenizer(); // Reads one token. Returns false if there are no more tokens. // If writeDiff is true, and a token was read, create a PNG highlighting // the difference caused by this command in /tmp/log_step_by_step. // If PDF_TRACE_DIFF_IN_PNG is not defined, writeDiff does nothing. bool readToken(PdfToken* token, bool writeDiff = false); // Put back a token to be read in the nextToken read. Only one token is allowed to be put // back. Must not necesaarely be the last token read. void PutBack(PdfToken token); // Reads the inline image that is present in the stream. At this point we just consumed the ID // token already. SkPdfImageDictionary* readInlineImage(); private: bool readTokenCore(PdfToken* token); SkPdfNativeDoc* fDoc; SkPdfAllocator* fAllocator; const unsigned char* fUncompressedStreamStart; const unsigned char* fUncompressedStream; const unsigned char* fUncompressedStreamEnd; bool fEmpty; bool fHasPutBack; PdfToken fPutBack; }; const unsigned char* nextObject(const unsigned char* start, const unsigned char* end, SkPdfNativeObject* token, SkPdfAllocator* allocator, SkPdfNativeDoc* doc); #endif // SkPdfNativeTokenizer_DEFINED