Native PDF parser implementation - don't try it on pdfs that are not generated by skia, Crome Print Preview or Chrome Save As Pdf - it will crash as mising xref, pdfs with updates, and other features are not supported yer.
Review URL: https://codereview.chromium.org/18323019
git-svn-id: http://skia.googlecode.com/svn/trunk@9962 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.cpp b/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.cpp
index 5d86838..04a1c50 100644
--- a/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.cpp
+++ b/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.cpp
@@ -1,11 +1,469 @@
-
#include "SkNativeParsedPDF.h"
+#include "SkPdfNativeTokenizer.h"
+#include "SkPdfBasics.h"
+#include "SkPdfParser.h"
+#include "SkPdfObject.h"
-SkNativeParsedPDF::SkNativeParsedPDF() {
- // TODO(edisonn): Auto-generated constructor stub
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "SkPdfFileTrailerDictionary_autogen.h"
+#include "SkPdfCatalogDictionary_autogen.h"
+#include "SkPdfPageObjectDictionary_autogen.h"
+#include "SkPdfPageTreeNodeDictionary_autogen.h"
+#include "SkPdfMapper_autogen.h"
+
+
+
+long getFileSize(const char* filename)
+{
+ struct stat stat_buf;
+ int rc = stat(filename, &stat_buf);
+ return rc == 0 ? stat_buf.st_size : -1;
}
+unsigned char* lineHome(unsigned char* start, unsigned char* current) {
+ while (current > start && !isPdfEOL(*(current - 1))) {
+ current--;
+ }
+ return current;
+}
+
+unsigned char* previousLineHome(unsigned char* start, unsigned char* current) {
+ if (current > start && isPdfEOL(*(current - 1))) {
+ current--;
+ }
+
+ // allows CR+LF, LF+CR but not two CR+CR or LF+LF
+ if (current > start && isPdfEOL(*(current - 1)) && *current != *(current - 1)) {
+ current--;
+ }
+
+ while (current > start && !isPdfEOL(*(current - 1))) {
+ current--;
+ }
+
+ return current;
+}
+
+unsigned char* ignoreLine(unsigned char* current, unsigned char* end) {
+ while (current < end && !isPdfEOL(*current)) {
+ current++;
+ }
+ current++;
+ if (current < end && isPdfEOL(*current) && *current != *(current - 1)) {
+ current++;
+ }
+ return current;
+}
+
+
+// TODO(edisonn): NYI
+// TODO(edisonn): 3 constructuctors from URL, from stream, from file ...
+// TODO(edisonn): write one that accepts errors in the file and ignores/fixis them
+// TODO(edisonn): testing:
+// 1) run on a lot of file
+// 2) recoverable corupt file: remove endobj, endsteam, remove other keywords, use other white spaces, insert comments randomly, ...
+// 3) irrecoverable corrupt file
+SkNativeParsedPDF::SkNativeParsedPDF(const char* path) : fAllocator(new SkPdfAllocator()) {
+ FILE* file = fopen(path, "r");
+ fContentLength = getFileSize(path);
+ fFileContent = new unsigned char[fContentLength];
+ fread(fFileContent, fContentLength, 1, file);
+ fclose(file);
+ file = NULL;
+
+ unsigned char* eofLine = lineHome(fFileContent, fFileContent + fContentLength - 1);
+ unsigned char* xrefByteOffsetLine = previousLineHome(fFileContent, eofLine);
+ unsigned char* xrefstartKeywordLine = previousLineHome(fFileContent, xrefByteOffsetLine);
+
+ if (strcmp((char*)xrefstartKeywordLine, "startxref") != 0) {
+ // TODO(edisonn): report/issue
+ }
+
+ long xrefByteOffset = atol((const char*)xrefByteOffsetLine);
+
+ bool storeCatalog = true;
+ while (xrefByteOffset >= 0) {
+ unsigned char* trailerStart = readCrossReferenceSection(fFileContent + xrefByteOffset, xrefstartKeywordLine);
+ xrefByteOffset = readTrailer(trailerStart, xrefstartKeywordLine, storeCatalog);
+ storeCatalog = false;
+ }
+
+ // TODO(edisonn): warn/error expect fObjects[fRefCatalogId].fGeneration == fRefCatalogGeneration
+ // TODO(edisonn): security, verify that SkPdfCatalogDictionary is indeed using mapper
+ // load catalog
+ fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef);
+ SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this);
+
+ fillPages(tree);
+
+ // now actually read all objects if we want, or do it lazyly
+ // and resolve references?... or not ...
+}
+
+// TODO(edisonn): NYI
SkNativeParsedPDF::~SkNativeParsedPDF() {
- // TODO(edisonn): Auto-generated destructor stub
+ delete[] fFileContent;
+ delete fAllocator;
+}
+
+unsigned char* SkNativeParsedPDF::readCrossReferenceSection(unsigned char* xrefStart, unsigned char* trailerEnd) {
+ unsigned char* current = ignoreLine(xrefStart, trailerEnd); // TODO(edisonn): verify next keyord is "xref", use nextObject here
+
+ SkPdfObject token;
+ while (current < trailerEnd) {
+ token.reset();
+ unsigned char* previous = current;
+ current = nextObject(current, trailerEnd, &token, NULL);
+ if (!token.isInteger()) {
+ return previous;
+ }
+
+ int startId = token.intValue();
+ token.reset();
+ current = nextObject(current, trailerEnd, &token, NULL);
+
+ if (!token.isInteger()) {
+ // TODO(edisonn): report/warning
+ return current;
+ }
+
+ int entries = token.intValue();
+
+ for (int i = 0; i < entries; i++) {
+ token.reset();
+ current = nextObject(current, trailerEnd, &token, NULL);
+ if (!token.isInteger()) {
+ // TODO(edisonn): report/warning
+ return current;
+ }
+ int offset = token.intValue();
+
+ token.reset();
+ current = nextObject(current, trailerEnd, &token, NULL);
+ if (!token.isInteger()) {
+ // TODO(edisonn): report/warning
+ return current;
+ }
+ int generation = token.intValue();
+
+ token.reset();
+ current = nextObject(current, trailerEnd, &token, NULL);
+ if (!token.isKeyword() || token.len() != 1 || (*token.c_str() != 'f' && *token.c_str() != 'n')) {
+ // TODO(edisonn): report/warning
+ return current;
+ }
+
+ addCrossSectionInfo(startId + i, generation, offset, *token.c_str() == 'f');
+ }
+ }
+ // TODO(edisonn): it should never get here? there is no trailer?
+ return current;
+}
+
+long SkNativeParsedPDF::readTrailer(unsigned char* trailerStart, unsigned char* trailerEnd, bool storeCatalog) {
+ unsigned char* current = ignoreLine(trailerStart, trailerEnd); // TODO(edisonn): verify next keyord is "trailer" use nextObject here
+
+ SkPdfObject token;
+ current = nextObject(current, trailerEnd, &token, fAllocator);
+ SkPdfFileTrailerDictionary* trailer = (SkPdfFileTrailerDictionary*)&token;
+
+ if (storeCatalog) {
+ const SkPdfObject* ref = trailer->Root(NULL);
+ if (ref == NULL || !ref->isReference()) {
+ // TODO(edisonn): oops, we have to fix the corrup pdf file
+ return -1;
+ }
+ fRootCatalogRef = ref;
+ }
+
+ if (trailer->has_Prev()) {
+ return trailer->Prev(NULL);
+ }
+
+ return -1;
+}
+
+void SkNativeParsedPDF::addCrossSectionInfo(int id, int generation, int offset, bool isFreed) {
+ // TODO(edisonn): security here
+ while (fObjects.count() < id + 1) {
+ reset(fObjects.append());
+ }
+
+ fObjects[id].fOffset = offset;
+ fObjects[id].fObj = NULL;
+}
+
+SkPdfObject* SkNativeParsedPDF::readObject(int id/*, int expectedGeneration*/) const {
+ long startOffset = fObjects[id].fOffset;
+ //long endOffset = fObjects[id].fOffsetEnd;
+ // TODO(edisonn): use hinted endOffset
+ // TODO(edisonn): current implementation will result in a lot of memory usage
+ // to decrease memory usage, we wither need to be smart and know where objects end, and we will
+ // alocate only the chancks needed, or the tokenizer will not make copies, but then it needs to
+ // cache the results so it does not go twice on the same buffer
+ unsigned char* current = fFileContent + startOffset;
+ unsigned char* end = fFileContent + fContentLength;
+
+ SkPdfNativeTokenizer tokenizer(current, end - current, fMapper, fAllocator);
+
+ SkPdfObject idObj;
+ SkPdfObject generationObj;
+ SkPdfObject objKeyword;
+ SkPdfObject* dict = fAllocator->allocObject();
+
+ current = nextObject(current, end, &idObj, NULL);
+ if (current >= end) {
+ // TODO(edisonn): report warning/error
+ return NULL;
+ }
+
+ current = nextObject(current, end, &generationObj, NULL);
+ if (current >= end) {
+ // TODO(edisonn): report warning/error
+ return NULL;
+ }
+
+ current = nextObject(current, end, &objKeyword, NULL);
+ if (current >= end) {
+ // TODO(edisonn): report warning/error
+ return NULL;
+ }
+
+ if (!idObj.isInteger() || !generationObj.isInteger() || id != idObj.intValue()/* || generation != generationObj.intValue()*/) {
+ // TODO(edisonn): report warning/error
+ }
+
+ if (!objKeyword.isKeyword() || strcmp(objKeyword.c_str(), "obj") != 0) {
+ // TODO(edisonn): report warning/error
+ }
+
+ current = nextObject(current, end, dict, fAllocator);
+
+ // TODO(edisonn): report warning/error - verify last token is endobj
+
+ return dict;
+}
+
+void SkNativeParsedPDF::fillPages(SkPdfPageTreeNodeDictionary* tree) {
+ const SkPdfArray* kids = tree->Kids(this);
+ if (kids == NULL) {
+ *fPages.append() = (SkPdfPageObjectDictionary*)tree;
+ return;
+ }
+
+ int cnt = kids->size();
+ for (int i = 0; i < cnt; i++) {
+ const SkPdfObject* obj = resolveReference(kids->objAtAIndex(i));
+ if (fMapper->mapPageObjectDictionary(obj) != kPageObjectDictionary_SkPdfObjectType) {
+ *fPages.append() = (SkPdfPageObjectDictionary*)obj;
+ } else {
+ // TODO(edisonn): verify that it is a page tree indeed
+ fillPages((SkPdfPageTreeNodeDictionary*)obj);
+ }
+ }
+}
+
+int SkNativeParsedPDF::pages() const {
+ return fPages.count();
+}
+
+SkPdfResourceDictionary* SkNativeParsedPDF::pageResources(int page) {
+ return fPages[page]->Resources(this);
+}
+
+// TODO(edisonn): Partial implemented. Move the logics directly in the code generator for inheritable and default value?
+SkRect SkNativeParsedPDF::MediaBox(int page) const {
+ SkPdfPageObjectDictionary* current = fPages[page];
+ while (!current->has_MediaBox() && current->has_Parent()) {
+ current = (SkPdfPageObjectDictionary*)current->Parent(this);
+ }
+ if (current) {
+ return current->MediaBox(this);
+ }
+ return SkRect::MakeEmpty();
+}
+
+// TODO(edisonn): stream or array ... ? for now only array
+SkPdfNativeTokenizer* SkNativeParsedPDF::tokenizerOfPage(int page) const {
+ if (fPages[page]->isContentsAStream(this)) {
+ return tokenizerOfStream(fPages[page]->getContentsAsStream(this));
+ } else {
+ // TODO(edisonn): NYI, we need to concatenate all streams in the array or make the tokenizer smart
+ // so we don't allocate new memory
+ return NULL;
+ }
+}
+
+SkPdfNativeTokenizer* SkNativeParsedPDF::tokenizerOfStream(SkPdfObject* stream) const {
+ if (stream == NULL) {
+ return NULL;
+ }
+
+ return new SkPdfNativeTokenizer(stream, fMapper, fAllocator);
+}
+
+// TODO(edisonn): NYI
+SkPdfNativeTokenizer* SkNativeParsedPDF::tokenizerOfBuffer(unsigned char* buffer, size_t len) const {
+ // warning does not track two calls in the same buffer! the buffer is updated!
+ // make a clean copy if needed!
+ return new SkPdfNativeTokenizer(buffer, len, fMapper, fAllocator);
+}
+
+size_t SkNativeParsedPDF::objects() const {
+ return fObjects.count();
+}
+
+SkPdfObject* SkNativeParsedPDF::object(int i) {
+ SkASSERT(!(i < 0 || i > fObjects.count()));
+
+ if (i < 0 || i > fObjects.count()) {
+ return NULL;
+ }
+
+ if (fObjects[i].fObj == NULL) {
+ // TODO(edisonn): when we read the cross reference sections, store the start of the next object
+ // and fill fOffsetEnd
+ fObjects[i].fObj = readObject(i);
+ }
+
+ return fObjects[i].fObj;
+}
+
+const SkPdfMapper* SkNativeParsedPDF::mapper() const {
+ return fMapper;
+}
+
+SkPdfReal* SkNativeParsedPDF::createReal(double value) const {
+ SkPdfObject* obj = fAllocator->allocObject();
+ SkPdfObject::makeReal(value, obj);
+ return (SkPdfReal*)obj;
+}
+
+SkPdfInteger* SkNativeParsedPDF::createInteger(int value) const {
+ SkPdfObject* obj = fAllocator->allocObject();
+ SkPdfObject::makeInteger(value, obj);
+ return (SkPdfInteger*)obj;
+}
+
+SkPdfString* SkNativeParsedPDF::createString(unsigned char* sz, size_t len) const {
+ SkPdfObject* obj = fAllocator->allocObject();
+ SkPdfObject::makeString(sz, len, obj);
+ return (SkPdfString*)obj;
+}
+
+PdfContext* gPdfContext = NULL;
+
+void SkNativeParsedPDF::drawPage(int page, SkCanvas* canvas) {
+ SkPdfNativeTokenizer* tokenizer = tokenizerOfPage(page);
+
+ PdfContext pdfContext(this);
+ pdfContext.fOriginalMatrix = SkMatrix::I();
+ pdfContext.fGraphicsState.fResources = pageResources(page);
+
+ gPdfContext = &pdfContext;
+
+ // TODO(edisonn): get matrix stuff right.
+ // TODO(edisonn): add DPI/scale/zoom.
+ SkScalar z = SkIntToScalar(0);
+ SkRect rect = MediaBox(page);
+ SkScalar w = rect.width();
+ SkScalar h = rect.height();
+
+ SkPoint pdfSpace[4] = {SkPoint::Make(z, z), SkPoint::Make(w, z), SkPoint::Make(w, h), SkPoint::Make(z, h)};
+// SkPoint skiaSpace[4] = {SkPoint::Make(z, h), SkPoint::Make(w, h), SkPoint::Make(w, z), SkPoint::Make(z, z)};
+
+ // TODO(edisonn): add flag for this app to create sourunding buffer zone
+ // TODO(edisonn): add flagg for no clipping.
+ // Use larger image to make sure we do not draw anything outside of page
+ // could be used in tests.
+
+#ifdef PDF_DEBUG_3X
+ SkPoint skiaSpace[4] = {SkPoint::Make(w+z, h+h), SkPoint::Make(w+w, h+h), SkPoint::Make(w+w, h+z), SkPoint::Make(w+z, h+z)};
+#else
+ SkPoint skiaSpace[4] = {SkPoint::Make(z, h), SkPoint::Make(w, h), SkPoint::Make(w, z), SkPoint::Make(z, z)};
+#endif
+ //SkPoint pdfSpace[2] = {SkPoint::Make(z, z), SkPoint::Make(w, h)};
+ //SkPoint skiaSpace[2] = {SkPoint::Make(w, z), SkPoint::Make(z, h)};
+
+ //SkPoint pdfSpace[2] = {SkPoint::Make(z, z), SkPoint::Make(z, h)};
+ //SkPoint skiaSpace[2] = {SkPoint::Make(z, h), SkPoint::Make(z, z)};
+
+ //SkPoint pdfSpace[3] = {SkPoint::Make(z, z), SkPoint::Make(z, h), SkPoint::Make(w, h)};
+ //SkPoint skiaSpace[3] = {SkPoint::Make(z, h), SkPoint::Make(z, z), SkPoint::Make(w, 0)};
+
+ SkAssertResult(pdfContext.fOriginalMatrix.setPolyToPoly(pdfSpace, skiaSpace, 4));
+ SkTraceMatrix(pdfContext.fOriginalMatrix, "Original matrix");
+
+
+ pdfContext.fGraphicsState.fMatrix = pdfContext.fOriginalMatrix;
+ pdfContext.fGraphicsState.fMatrixTm = pdfContext.fGraphicsState.fMatrix;
+ pdfContext.fGraphicsState.fMatrixTlm = pdfContext.fGraphicsState.fMatrix;
+
+ canvas->setMatrix(pdfContext.fOriginalMatrix);
+
+#ifndef PDF_DEBUG_NO_PAGE_CLIPING
+ canvas->clipRect(SkRect::MakeXYWH(z, z, w, h), SkRegion::kIntersect_Op, true);
+#endif
+
+// erase with red before?
+// SkPaint paint;
+// paint.setColor(SK_ColorRED);
+// canvas->drawRect(rect, paint);
+
+ PdfMainLooper looper(NULL, tokenizer, &pdfContext, canvas);
+ looper.loop();
+
+ delete tokenizer;
+
+ canvas->flush();
+}
+
+SkPdfAllocator* SkNativeParsedPDF::allocator() const {
+ return fAllocator;
+}
+
+SkPdfObject* SkNativeParsedPDF::resolveReference(SkPdfObject* ref) const {
+ return (SkPdfObject*)resolveReference((const SkPdfObject*)ref);
+}
+
+// TODO(edisonn): fix infinite loop if ref to itself!
+// TODO(edisonn): perf, fix refs at load, and resolve will simply return fResolvedReference?
+SkPdfObject* SkNativeParsedPDF::resolveReference(const SkPdfObject* ref) const {
+ if (ref && ref->isReference()) {
+ int id = ref->referenceId();
+ // TODO(edisonn): generation/updates not supported now
+ //int gen = ref->referenceGeneration();
+
+ SkASSERT(!(id < 0 || id > fObjects.count()));
+
+ if (id < 0 || id > fObjects.count()) {
+ return NULL;
+ }
+
+ // TODO(edisonn): verify id and gen expected
+
+ if (fObjects[id].fResolvedReference != NULL) {
+ return fObjects[id].fResolvedReference;
+ }
+
+ if (fObjects[id].fObj == NULL) {
+ fObjects[id].fObj = readObject(id);
+ }
+
+ if (fObjects[id].fResolvedReference == NULL) {
+ if (!fObjects[id].fObj->isReference()) {
+ fObjects[id].fResolvedReference = fObjects[id].fObj;
+ } else {
+ fObjects[id].fResolvedReference = resolveReference(fObjects[id].fObj);
+ }
+ }
+
+ return fObjects[id].fResolvedReference;
+ }
+ // TODO(edisonn): fix the mess with const, probably we need to remove it pretty much everywhere
+ return (SkPdfObject*)ref;
}
diff --git a/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.h b/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.h
index 38c72b0..245bdfb 100644
--- a/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.h
+++ b/experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.h
@@ -1,12 +1,96 @@
#ifndef EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKNATIVEPARSEDPDF_H_
#define EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKNATIVEPARSEDPDF_H_
-#include "base/macros.h"
+#include "SkRect.h"
+#include "SkTDArray.h"
-class SkNativeParsedPDF : public SkParsedPDF {
+class SkCanvas;
+
+class SkPdfAllocator;
+class SkPdfMapper;
+class SkPdfObject;
+class SkPdfReal;
+class SkPdfInteger;
+class SkPdfString;
+class SkPdfResourceDictionary;
+class SkPdfCatalogDictionary;
+class SkPdfPageObjectDictionary;
+class SkPdfPageTreeNodeDictionary;
+
+
+
+class SkPdfNativeTokenizer;
+
+class SkNativeParsedPDF {
+private:
+ struct PublicObjectEntry {
+ long fOffset;
+ // long endOffset; // TODO(edisonn): determine the end of the object, to be used when the doc is corrupted
+ SkPdfObject* fObj;
+ // TODO(edisonn): perf ... probably it does not make sense to cache the ref. test it!
+ SkPdfObject* fResolvedReference;
+ };
+
public:
- SkNativeParsedPDF();
- virtual ~SkNativeParsedPDF();
+ // TODO(edisonn): read methods: file, stream, http(s)://url, url with seek?
+ // TODO(edisonn): read first page asap, linearized
+ // TODO(edisonn): read page N asap, read all file
+ // TODO(edisonn): allow corruptions of file (e.g. missing endobj, missing stream length, ...)
+ // TODO(edisonn): encryption
+ SkNativeParsedPDF(const char* path);
+ ~SkNativeParsedPDF();
+
+ int pages() const;
+ SkPdfResourceDictionary* pageResources(int page);
+ SkRect MediaBox(int page) const;
+ SkPdfNativeTokenizer* tokenizerOfPage(int n) const;
+
+ SkPdfNativeTokenizer* tokenizerOfStream(SkPdfObject* stream) const;
+ SkPdfNativeTokenizer* tokenizerOfBuffer(unsigned char* buffer, size_t len) const;
+
+ size_t objects() const;
+ SkPdfObject* object(int i);
+
+ const SkPdfMapper* mapper() const;
+ SkPdfAllocator* allocator() const;
+
+ SkPdfReal* createReal(double value) const;
+ SkPdfInteger* createInteger(int value) const;
+ // the string does not own the char*
+ SkPdfString* createString(unsigned char* sz, size_t len) const;
+
+ void drawPage(int page, SkCanvas* canvas);
+
+ SkPdfObject* resolveReference(SkPdfObject* ref) const;
+ SkPdfObject* resolveReference(const SkPdfObject* ref) const;
+
+private:
+
+ unsigned char* readCrossReferenceSection(unsigned char* xrefStart, unsigned char* trailerEnd);
+ long readTrailer(unsigned char* trailerStart, unsigned char* trailerEnd, bool storeCatalog);
+
+ // TODO(edisonn): updates not supported right now, generation ignored
+ void addCrossSectionInfo(int id, int generation, int offset, bool isFreed);
+ static void reset(PublicObjectEntry* obj) {
+ obj->fObj = NULL;
+ obj->fResolvedReference = NULL;
+ obj->fOffset = -1;
+ }
+
+ SkPdfObject* readObject(int id/*, int generation*/) const;
+
+ void fillPages(SkPdfPageTreeNodeDictionary* tree);
+
+ // private fields
+ SkPdfAllocator* fAllocator;
+ SkPdfMapper* fMapper;
+ unsigned char* fFileContent;
+ size_t fContentLength;
+ const SkPdfObject* fRootCatalogRef;
+ SkPdfCatalogDictionary* fRootCatalog;
+
+ mutable SkTDArray<PublicObjectEntry> fObjects;
+ SkTDArray<SkPdfPageObjectDictionary*> fPages;
};
#endif // EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKNATIVEPARSEDPDF_H_
diff --git a/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp b/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp
index ba3a34e..dd72acd 100644
--- a/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp
+++ b/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp
@@ -1,11 +1,772 @@
#include "SkPdfNativeTokenizer.h"
+#include "SkPdfObject.h"
+#include "SkPdfConfig.h"
-SkPdfNativeTokenizer::SkPdfNativeTokenizer() {
- // TODO(edisonn): Auto-generated constructor stub
+#include "SkPdfStreamCommonDictionary_autogen.h"
+unsigned char* skipPdfWhiteSpaces(unsigned char* start, unsigned char* end) {
+ while (start < end && isPdfWhiteSpace(*start)) {
+ if (*start == kComment_PdfDelimiter) {
+ // skip the comment until end of line
+ while (start < end && !isPdfEOL(*start)) {
+ *start = '\0';
+ start++;
+ }
+ } else {
+ *start = '\0';
+ start++;
+ }
+ }
+ return start;
+}
+
+// TODO(edisonn) '(' can be used, will it break the string a delimiter or space inside () ?
+unsigned char* endOfPdfToken(unsigned char* start, unsigned char* end) {
+ //int opened brackets
+ //TODO(edisonn): what out for special chars, like \n, \032
+
+ SkASSERT(!isPdfWhiteSpace(*start));
+
+ if (start < end && isPdfDelimiter(*start)) {
+ start++;
+ return start;
+ }
+
+ while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) {
+ start++;
+ }
+ return start;
+}
+
+unsigned char* skipPdfComment(unsigned char* start, unsigned char* end) {
+ SkASSERT(start == end || *start == kComment_PdfDelimiter);
+ while (start < end && isPdfEOL(*start)) {
+ *start = '\0';
+ start++;
+ }
+ return start;
+}
+
+// last elem has to be ]
+unsigned char* readArray(unsigned char* start, unsigned char* end, SkPdfObject* array, SkPdfAllocator* allocator) {
+ while (start < end) {
+ // skip white spaces
+ start = skipPdfWhiteSpaces(start, end);
+
+ unsigned char* endOfToken = endOfPdfToken(start, end);
+
+ if (endOfToken == start) {
+ // TODO(edisonn): report error in pdf file (end of stream with ] for end of aray
+ return start;
+ }
+
+ if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimiter) {
+ return endOfToken;
+ }
+
+ SkPdfObject* newObj = allocator->allocObject();
+ start = nextObject(start, end, newObj, allocator);
+ // TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array only when
+ // we are sure they are not references!
+ if (newObj->isKeywordReference() && array->size() >= 2 && array->objAtAIndex(array->size() - 1)->isInteger() && array->objAtAIndex(array->size() - 2)->isInteger()) {
+ SkPdfObject* gen = array->removeLastInArray();
+ SkPdfObject* id = array->removeLastInArray();
+ newObj->reset();
+ SkPdfObject::makeReference(id->intValue(), gen->intValue(), newObj);
+ }
+ array->appendInArray(newObj);
+ }
+ // TODO(edisonn): report not reached, we should never get here
+ SkASSERT(false);
+ return start;
+}
+
+// When we read strings we will rewrite the string so we will reuse the memory
+// when we start to read the string, we already consumed the opened bracket
+unsigned char* readString(unsigned char* start, unsigned char* end, SkPdfObject* str) {
+ unsigned char* out = start;
+ unsigned char* in = start;
+
+ int openRoundBrackets = 0;
+ while (in < end && (*in != kClosedRoundBracket_PdfDelimiter || openRoundBrackets > 0)) {
+ openRoundBrackets += ((*in) == kOpenedRoundBracket_PdfDelimiter);
+ openRoundBrackets -= ((*in) == kClosedRoundBracket_PdfDelimiter);
+ if (*in == kEscape_PdfSpecial) {
+ if (in + 1 < end) {
+ switch (in[1]) {
+ case 'n':
+ *out = kLF_PdfWhiteSpace;
+ out++;
+ in += 2;
+ break;
+
+ case 'r':
+ *out = kCR_PdfWhiteSpace;
+ out++;
+ in += 2;
+ break;
+
+ case 't':
+ *out = kHT_PdfWhiteSpace;
+ out++;
+ in += 2;
+ break;
+
+ case 'b':
+ // TODO(edisonn): any special meaning to backspace?
+ *out = kBackspace_PdfSpecial;
+ out++;
+ in += 2;
+ break;
+
+ case 'f':
+ *out = kFF_PdfWhiteSpace;
+ out++;
+ in += 2;
+ break;
+
+ case kOpenedRoundBracket_PdfDelimiter:
+ *out = kOpenedRoundBracket_PdfDelimiter;
+ out++;
+ in += 2;
+ break;
+
+ case kClosedRoundBracket_PdfDelimiter:
+ *out = kClosedRoundBracket_PdfDelimiter;
+ out++;
+ in += 2;
+ break;
+
+ case kEscape_PdfSpecial:
+ *out = kEscape_PdfSpecial;
+ out++;
+ in += 2;
+ break;
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7': {
+ //read octals
+ in++; // consume backslash
+
+ int code = 0;
+ int i = 0;
+ while (in < end && *in >= '0' && *in < '8') {
+ code = (code << 3) + ((*in) - '0'); // code * 8 + d
+ i++;
+ in++;
+ if (i == 3) {
+ *out = code & 0xff;
+ out++;
+ i = 0;
+ }
+ }
+ if (i > 0) {
+ *out = code & 0xff;
+ out++;
+ }
+ }
+ break;
+
+ default:
+ // Per spec, backslash is ignored is escaped ch is unknown
+ in++;
+ break;
+ }
+ }
+ } else {
+ // TODO(edisonn): perf, avoid copy into itself, maybe first do a simple scan until found backslash ?
+ // we could have one look that first just inc current, and when we find the backslash
+ // we go to this loop
+ *in = *out;
+ in++;
+ out++;
+ }
+ }
+
+
+ SkPdfObject::makeString(start, out, str);
+ return in + 1; // consume ) at the end of the string
+}
+
+unsigned char* readHexString(unsigned char* start, unsigned char* end, SkPdfObject* str) {
+ unsigned char* out = start;
+ unsigned char* in = start;
+
+ unsigned char code = 0;
+
+ while (in < end) {
+ while (in < end && isPdfWhiteSpace(*in)) {
+ in++;
+ }
+
+ if (*in == kClosedInequityBracket_PdfDelimiter) {
+ *in = '\0';
+ in++;
+ // normal exit
+ break;
+ }
+
+ if (in >= end) {
+ // end too soon
+ break;
+ }
+
+ switch (*in) {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ code = (*in - '0') << 4;
+ break;
+
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'e':
+ case 'f':
+ code = (*in - 'a' + 10) << 4;
+ break;
+
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'E':
+ case 'F':
+ code = (*in - 'A' + 10) << 4;
+ break;
+
+ // TODO(edisonn): spec does not say how to handle this error
+ default:
+ break;
+ }
+
+ in++; // advance
+
+ while (in < end && isPdfWhiteSpace(*in)) {
+ in++;
+ }
+
+ // TODO(edisonn): report error
+ if (in >= end) {
+ *out = code;
+ out++;
+ break;
+ }
+
+ if (*in == kClosedInequityBracket_PdfDelimiter) {
+ *out = code;
+ out++;
+ break;
+ }
+
+ switch (*in) {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ code += (*in - '0');
+ break;
+
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'e':
+ case 'f':
+ code += (*in - 'a' + 10);
+ break;
+
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'E':
+ case 'F':
+ code += (*in - 'A' + 10);
+ break;
+
+ // TODO(edisonn): spec does not say how to handle this error
+ default:
+ break;
+ }
+
+ *out = code;
+ out++;
+ in++;
+ }
+
+ if (out < in) {
+ *out = '\0';
+ }
+
+ SkPdfObject::makeHexString(start, out, str);
+ return in; // consume > at the end of the string
+}
+
+// TODO(edisonn): before PDF 1.2 name could not have special characters, add version parameter
+unsigned char* readName(unsigned char* start, unsigned char* end, SkPdfObject* name) {
+ unsigned char* out = start;
+ unsigned char* in = start;
+
+ unsigned char code = 0;
+
+ while (in < end) {
+ if (isPdfWhiteSpaceOrPdfDelimiter(*in)) {
+ break;
+ }
+
+ if (*in == '#' && in + 2 < end) {
+ in++;
+ switch (*in) {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ code = (*in - '0') << 4;
+ break;
+
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'e':
+ case 'f':
+ code = (*in - 'a' + 10) << 4;
+ break;
+
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'E':
+ case 'F':
+ code = (*in - 'A' + 10) << 4;
+ break;
+
+ // TODO(edisonn): spec does not say how to handle this error
+ default:
+ break;
+ }
+
+ in++; // advance
+
+ switch (*in) {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ code += (*in - '0');
+ break;
+
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'e':
+ case 'f':
+ code += (*in - 'a' + 10);
+ break;
+
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'E':
+ case 'F':
+ code += (*in - 'A' + 10);
+ break;
+
+ // TODO(edisonn): spec does not say how to handle this error
+ default:
+ break;
+ }
+
+ *out = code;
+ out++;
+ in++;
+ } else {
+ *out = *in;
+ out++;
+ in++;
+ }
+ }
+
+ SkPdfObject::makeName(start, out, name);
+ return in;
+}
+
+// TODO(edisonn): pdf spec let Length to be an indirect object define after the stream
+// that makes for an interesting scenario, where the stream itself contains endstream, together
+// with a reference object with the length, but the real length object would be somewhere else
+// it could confuse the parser
+/*example:
+
+7 0 obj
+<< /length 8 0 R>>
+stream
+...............
+endstream
+8 0 obj #we are in stream actually, not a real object
+<< 10 >> #we are in stream actually, not a real object
+endobj
+endstream
+8 0 obj #real obj
+<< 100 >> #real obj
+endobj
+and it could get worse, with multiple object like this
+*/
+
+// right now implement the silly algorithm that assumes endstream is finishing the stream
+
+
+unsigned char* readStream(unsigned char* start, unsigned char* end, SkPdfObject* dict) {
+ start = skipPdfWhiteSpaces(start, end);
+ if (!(start[0] == 's' && start[1] == 't' && start[2] == 'r' && start[3] == 'e' && start[4] == 'a' && start[5] == 'm')) {
+ // no stream. return.
+ return start;
+ }
+
+ start += 6; // strlen("stream")
+ if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
+ start += 2;
+ } else if (start[0] == kLF_PdfWhiteSpace) {
+ start += 1;
+ }
+
+ SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict;
+ // TODO(edisonn): load Length
+ int length = -1;
+
+ // TODO(edisonn): very basic implementation
+ if (stream->has_Length() && stream->Length(NULL) > 0) {
+ length = stream->Length(NULL);
+ }
+
+ // TODO(edisonn): laod external streams
+ // TODO(edisonn): look at the last filter, to determione how to deal with possible issue
+
+ if (length < 0) {
+ // scan the buffer, until we find first endstream
+ // TODO(edisonn): all buffers must have a 0 at the end now,
+ // TODO(edisonn): hack (mark end of content with 0)
+ unsigned char lastCh = *end;
+ *end = '\0';
+ //SkASSERT(*end == '\0');
+ unsigned char* endstream = (unsigned char*)strstr((const char*)start, "endstream");
+ *end = lastCh;
+
+ if (endstream) {
+ length = endstream - start;
+ if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
+ if (*(endstream-1) == kCR_PdfWhiteSpace) length--;
+ }
+ }
+ if (length >= 0) {
+ unsigned char* endstream = start + length;
+
+ if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
+ endstream += 2;
+ } else if (endstream[0] == kLF_PdfWhiteSpace) {
+ endstream += 1;
+ }
+
+ // TODO(edisonn): verify the next bytes are "endstream"
+
+ endstream += strlen("endstream");
+ // TODO(edisonn): Assert? report error/warning?
+ dict->addStream(start, length);
+ return endstream;
+ }
+ return start;
+}
+
+unsigned char* readDictionary(unsigned char* start, unsigned char* end, SkPdfObject* dict, SkPdfAllocator* allocator) {
+ SkPdfObject::makeEmptyDictionary(dict);
+
+ start = skipPdfWhiteSpaces(start, end);
+
+ while (start < end && *start == kNamed_PdfDelimiter) {
+ SkPdfObject key;
+ *start = '\0';
+ start++;
+ start = readName(start, end, &key);
+ start = skipPdfWhiteSpaces(start, end);
+
+ if (start < end) {
+ SkPdfObject* value = allocator->allocObject();
+ start = nextObject(start, end, value, allocator);
+
+ start = skipPdfWhiteSpaces(start, end);
+
+ if (start < end) {
+ // seems we have an indirect reference
+ if (isPdfDigit(*start)) {
+ SkPdfObject generation;
+ start = nextObject(start, end, &generation, allocator);
+
+ SkPdfObject keywordR;
+ start = nextObject(start, end, &keywordR, allocator);
+
+ if (value->isInteger() && generation.isInteger() && keywordR.isKeywordReference()) {
+ int64_t id = value->intValue();
+ value->reset();
+ SkPdfObject::makeReference(id, generation.intValue(), value);
+ dict->set(&key, value);
+ } else {
+ // error, ignore
+ dict->set(&key, value);
+ }
+ } else {
+ // next elem is not a digit, but it might not be / either!
+ dict->set(&key, value);
+ }
+ } else {
+ // /key >>
+ dict->set(&key, value);
+ return end;
+ }
+ start = skipPdfWhiteSpaces(start, end);
+ } else {
+ dict->set(&key, &SkPdfObject::kNull);
+ return end;
+ }
+ }
+
+ // TODO(edisonn): options to ignore these errors
+
+ // now we should expect >>
+ start = skipPdfWhiteSpaces(start, end);
+ start = endOfPdfToken(start, end); // >
+ start = endOfPdfToken(start, end); // >
+
+ // TODO(edisonn): read stream ... put dict and stream in a struct, and have a pointer to struct ...
+ // or alocate 2 objects, and if there is no stream, free it to be used by someone else? or just leave it ?
+
+ start = readStream(start, end, dict);
+
+ return start;
+}
+
+unsigned char* nextObject(unsigned char* start, unsigned char* end, SkPdfObject* token, SkPdfAllocator* allocator) {
+ unsigned char* current;
+
+ // skip white spaces
+ start = skipPdfWhiteSpaces(start, end);
+
+ current = endOfPdfToken(start, end);
+
+ // no token, len would be 0
+ if (current == start) {
+ return NULL;
+ }
+
+ int tokenLen = current - start;
+
+ if (tokenLen == 1) {
+ // start array
+ switch (*start) {
+ case kOpenedSquareBracket_PdfDelimiter:
+ *start = '\0';
+ SkPdfObject::makeEmptyArray(token);
+ return readArray(current, end, token, allocator);
+
+ case kOpenedRoundBracket_PdfDelimiter:
+ *start = '\0';
+ return readString(start, end, token);
+
+ case kOpenedInequityBracket_PdfDelimiter:
+ *start = '\0';
+ if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDelimiter) {
+ // TODO(edisonn): pass here the length somehow?
+ return readDictionary(start + 2, end, token, allocator); // skip <<
+ } else {
+ return readHexString(start + 1, end, token); // skip <
+ }
+
+ case kNamed_PdfDelimiter:
+ *start = '\0';
+ return readName(start + 1, end, token);
+
+ // TODO(edisonn): what to do curly brackets? read spec!
+ case kOpenedCurlyBracket_PdfDelimiter:
+ default:
+ break;
+ }
+
+ SkASSERT(!isPdfWhiteSpace(*start));
+ if (isPdfDelimiter(*start)) {
+ // TODO(edisonn): how stream ] } > ) will be handled?
+ // for now ignore, and it will become a keyword to be ignored
+ }
+ }
+
+ if (tokenLen == 4 && start[0] == 'n' && start[1] == 'u' && start[2] == 'l' && start[3] == 'l') {
+ SkPdfObject::makeNull(token);
+ return current;
+ }
+
+ if (tokenLen == 4 && start[0] == 't' && start[1] == 'r' && start[2] == 'u' && start[3] == 'e') {
+ SkPdfObject::makeBoolean(true, token);
+ return current;
+ }
+
+ if (tokenLen == 5 && start[0] == 'f' && start[1] == 'a' && start[2] == 'l' && start[3] == 's' && start[3] == 'e') {
+ SkPdfObject::makeBoolean(false, token);
+ return current;
+ }
+
+ if (isPdfNumeric(*start)) {
+ SkPdfObject::makeNumeric(start, current, token);
+ } else {
+ SkPdfObject::makeKeyword(start, current, token);
+ }
+ return current;
+}
+
+SkPdfObject* SkPdfAllocator::allocBlock() {
+ return new SkPdfObject[BUFFER_SIZE];
+}
+
+SkPdfAllocator::~SkPdfAllocator() {
+ for (int i = 0 ; i < fHandles.count(); i++) {
+ free(fHandles[i]);
+ }
+ for (int i = 0 ; i < fHistory.count(); i++) {
+ delete[] fHistory[i];
+ }
+ delete[] fCurrent;
+}
+
+SkPdfObject* SkPdfAllocator::allocObject() {
+ if (fCurrentUsed >= BUFFER_SIZE) {
+ fHistory.push(fCurrent);
+ fCurrent = allocBlock();
+ fCurrentUsed = 0;
+ }
+
+ fCurrentUsed++;
+ return &fCurrent[fCurrentUsed - 1];
+}
+
+// TODO(edisonn): perf: do no copy the buffers, but use them, and mark cache the result, so there is no need of a second pass
+SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfObject* objWithStream, const SkPdfMapper* mapper, SkPdfAllocator* allocator) : fMapper(mapper), fAllocator(allocator), fUncompressedStream(NULL), fUncompressedStreamEnd(NULL), fEmpty(false), fHasPutBack(false) {
+ unsigned char* buffer = NULL;
+ size_t len = 0;
+ objWithStream->GetFilteredStreamRef(&buffer, &len, fAllocator);
+ fUncompressedStreamStart = fUncompressedStream = (unsigned char*)fAllocator->alloc(len);
+ fUncompressedStreamEnd = fUncompressedStream + len;
+ memcpy(fUncompressedStream, buffer, len);}
+
+SkPdfNativeTokenizer::SkPdfNativeTokenizer(unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator) : fMapper(mapper), fAllocator(allocator), fEmpty(false), fHasPutBack(false) {
+ fUncompressedStreamStart = fUncompressedStream = (unsigned char*)fAllocator->alloc(len);
+ fUncompressedStreamEnd = fUncompressedStream + len;
+ memcpy(fUncompressedStream, buffer, len);
}
SkPdfNativeTokenizer::~SkPdfNativeTokenizer() {
- // TODO(edisonn): Auto-generated destructor stub
+ // free the unparsed stream, we don't need it.
+ // the parsed one is locked as it contains the strings and keywords referenced in objects
+ if (fUncompressedStream) {
+ realloc(fUncompressedStreamStart, fUncompressedStream - fUncompressedStreamStart);
+ } else {
+ SkASSERT(false);
+ }
+}
+
+bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) {
+ token->fKeyword = NULL;
+ token->fObject = NULL;
+
+ fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedStreamEnd);
+ if (fUncompressedStream >= fUncompressedStreamEnd) {
+ return false;
+ }
+
+ SkPdfObject obj;
+ fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, &obj, fAllocator);
+
+ // If it is a keyword, we will only get the pointer of the string
+ if (obj.type() == SkPdfObject::kKeyword_PdfObjectType) {
+ token->fKeyword = obj.c_str();
+ token->fKeywordLength = obj.len();
+ token->fType = kKeyword_TokenType;
+ } else {
+ SkPdfObject* pobj = fAllocator->allocObject();
+ *pobj = obj;
+ token->fObject = pobj;
+ token->fType = kObject_TokenType;
+ }
+
+#ifdef PDF_TRACE
+ static int read_op = 0;
+ read_op++;
+ if (182749 == read_op) {
+ printf("break;\n");
+ }
+ printf("%i READ %s %s\n", read_op, token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
+#endif
+
+ return true;
+}
+
+void SkPdfNativeTokenizer::PutBack(PdfToken token) {
+ SkASSERT(!fHasPutBack);
+ fHasPutBack = true;
+ fPutBack = token;
+#ifdef PDF_TRACE
+ printf("PUT_BACK %s %s\n", token.fType == kKeyword_TokenType ? "Keyword" : "Object", token.fKeyword ? std::string(token.fKeyword, token.fKeywordLength).c_str(): token.fObject->toString().c_str());
+#endif
+}
+
+bool SkPdfNativeTokenizer::readToken(PdfToken* token) {
+ if (fHasPutBack) {
+ *token = fPutBack;
+ fHasPutBack = false;
+#ifdef PDF_TRACE
+ printf("READ_BACK %s %s\n", token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
+#endif
+ return true;
+ }
+
+ if (fEmpty) {
+#ifdef PDF_TRACE
+ printf("EMPTY TOKENIZER\n");
+#endif
+ return false;
+ }
+
+ return readTokenCore(token);
}
diff --git a/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h b/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h
index c8a2f46..1c2336e 100644
--- a/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h
+++ b/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h
@@ -1,10 +1,145 @@
#ifndef EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_
#define EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_
-class SkPdfNativeTokenizer : public SkPdfPodofoTokenizer {
+#include "SkTDArray.h"
+#include "SkTDict.h"
+#include <math.h>
+#include <string.h>
+
+class SkPdfMapper;
+class SkPdfDictionary;
+
+// White Spaces
+#define kNUL_PdfWhiteSpace '\x00'
+#define kHT_PdfWhiteSpace '\x09'
+#define kLF_PdfWhiteSpace '\x0A'
+#define kFF_PdfWhiteSpace '\x0C'
+#define kCR_PdfWhiteSpace '\x0D'
+#define kSP_PdfWhiteSpace '\x20'
+
+// PdfDelimiters
+#define kOpenedRoundBracket_PdfDelimiter '('
+#define kClosedRoundBracket_PdfDelimiter ')'
+#define kOpenedInequityBracket_PdfDelimiter '<'
+#define kClosedInequityBracket_PdfDelimiter '>'
+#define kOpenedSquareBracket_PdfDelimiter '['
+#define kClosedSquareBracket_PdfDelimiter ']'
+#define kOpenedCurlyBracket_PdfDelimiter '{'
+#define kClosedCurlyBracket_PdfDelimiter '}'
+#define kNamed_PdfDelimiter '/'
+#define kComment_PdfDelimiter '%'
+
+#define kEscape_PdfSpecial '\\'
+#define kBackspace_PdfSpecial '\x08'
+
+// TODO(edisonn): what is the faster way for compiler/machine type to evaluate this expressions?
+// we should evaluate all options. might be even different from one machine to another
+// 1) expand expression, let compiler optimize it
+// 2) binary search
+// 3) linear search in array
+// 4) vector (e.f. T type[256] .. return type[ch] ...
+// 5) manually build the expression with least number of operators, e.g. for consecutive
+// chars, we can use an binary equal ignoring last bit
+#define isPdfWhiteSpace(ch) (((ch)==kNUL_PdfWhiteSpace)||((ch)==kHT_PdfWhiteSpace)||((ch)==kLF_PdfWhiteSpace)||((ch)==kFF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace)||((ch)==kSP_PdfWhiteSpace))
+
+#define isPdfEOL(ch) (((ch)==kLF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace))
+
+
+#define isPdfDelimiter(ch) (((ch)==kOpenedRoundBracket_PdfDelimiter)||\
+ ((ch)==kClosedRoundBracket_PdfDelimiter)||\
+ ((ch)==kOpenedInequityBracket_PdfDelimiter)||\
+ ((ch)==kClosedInequityBracket_PdfDelimiter)||\
+ ((ch)==kOpenedSquareBracket_PdfDelimiter)||\
+ ((ch)==kClosedSquareBracket_PdfDelimiter)||\
+ ((ch)==kOpenedCurlyBracket_PdfDelimiter)||\
+ ((ch)==kClosedCurlyBracket_PdfDelimiter)||\
+ ((ch)==kNamed_PdfDelimiter)||\
+ ((ch)==kComment_PdfDelimiter))
+
+#define isPdfWhiteSpaceOrPdfDelimiter(ch) (isPdfWhiteSpace(ch)||isPdfDelimiter(ch))
+
+#define isPdfDigit(ch) ((ch)>='0'&&(ch)<='9')
+#define isPdfNumeric(ch) (isPdfDigit(ch)||(ch)=='+'||(ch)=='-')
+
+unsigned char* skipPdfWhiteSpaces(unsigned char* buffer, size_t len);
+unsigned char* endOfPdfToken(unsigned char* start, size_t len);
+unsigned char* skipPdfComment(unsigned char* start, size_t len);
+
+// TODO(edisonn): typedef read and integer tyepes? make less readable...
+//typedef double SkPdfReal;
+//typedef int64_t SkPdfInteger;
+
+// an allocator only allocates memory, and it deletes it all when the allocator is destroyed
+// this would allow us not to do any garbage collection while we parse or draw a pdf, and defere it
+// while the user is looking at the image
+
+class SkPdfObject;
+
+class SkPdfAllocator {
+#define BUFFER_SIZE 1024
+ SkTDArray<SkPdfObject*> fHistory;
+ SkTDArray<void*> fHandles;
+ SkPdfObject* fCurrent;
+ int fCurrentUsed;
+
+ SkPdfObject* allocBlock();
+
public:
- SkPdfNativeTokenizer();
+ SkPdfAllocator() {
+ fCurrent = allocBlock();
+ fCurrentUsed = 0;
+ }
+
+ ~SkPdfAllocator();
+
+ SkPdfObject* allocObject();
+
+ // TODO(edisonn): free this memory in destructor, track the usage?
+ void* alloc(size_t bytes) {
+ void* data = malloc(bytes);
+ fHandles.push(data);
+ return data;
+ }
+};
+
+unsigned char* nextObject(unsigned char* start, unsigned char* end, SkPdfObject* token, SkPdfAllocator* allocator);
+
+enum SkPdfTokenType {
+ kKeyword_TokenType,
+ kObject_TokenType,
+};
+
+struct PdfToken {
+ const char* fKeyword;
+ size_t fKeywordLength;
+ SkPdfObject* fObject;
+ SkPdfTokenType fType;
+
+ PdfToken() : fKeyword(NULL), fKeywordLength(0), fObject(NULL) {}
+};
+
+class SkPdfNativeTokenizer {
+public:
+ SkPdfNativeTokenizer(SkPdfObject* objWithStream, const SkPdfMapper* mapper, SkPdfAllocator* allocator);
+ SkPdfNativeTokenizer(unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator);
+
virtual ~SkPdfNativeTokenizer();
+
+ bool readToken(PdfToken* token);
+ bool readTokenCore(PdfToken* token);
+ void PutBack(PdfToken token);
+
+private:
+ const SkPdfMapper* fMapper;
+ SkPdfAllocator* fAllocator;
+
+ unsigned char* fUncompressedStreamStart;
+ unsigned char* fUncompressedStream;
+ unsigned char* fUncompressedStreamEnd;
+
+ bool fEmpty;
+ bool fHasPutBack;
+ PdfToken fPutBack;
};
#endif // EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_
diff --git a/experimental/PdfViewer/pdfparser/native/SkPdfObject.cpp b/experimental/PdfViewer/pdfparser/native/SkPdfObject.cpp
new file mode 100644
index 0000000..57e9abb
--- /dev/null
+++ b/experimental/PdfViewer/pdfparser/native/SkPdfObject.cpp
@@ -0,0 +1,85 @@
+
+#include "SkPdfObject.h"
+#include "SkPdfStreamCommonDictionary_autogen.h"
+
+#include "SkFlate.h"
+#include "SkStream.h"
+#include "SkPdfNativeTokenizer.h"
+
+SkPdfObject SkPdfObject::kNull = SkPdfObject::makeNull();
+
+bool SkPdfObject::applyFlateDecodeFilter(SkPdfAllocator* allocator) {
+ if (!SkFlate::HaveFlate()) {
+ // TODO(edisonn): warn, make callers handle it
+ return false;
+ }
+
+ SkMemoryStream skstream(fStr.fBuffer, fStr.fBytes >> 1, false);
+ SkDynamicMemoryWStream uncompressedData;
+
+ if (SkFlate::Inflate(&skstream, &uncompressedData)) {
+ fStr.fBytes = (uncompressedData.bytesWritten() << 1) + kUnfilteredStreamBit;
+ fStr.fBuffer = (unsigned char*)allocator->alloc(uncompressedData.bytesWritten());
+ uncompressedData.copyTo(fStr.fBuffer);
+ return true;
+ } else {
+ // TODO(edisonn): warn, make callers handle it
+ return false;
+ }
+}
+
+bool SkPdfObject::applyDCTDecodeFilter(SkPdfAllocator* allocator) {
+ // this would fail, and it won't allow any more filters.
+ // technically, it would be possible, but not a real world scenario
+ // TODO(edisonn): or get the image here and store it for fast retrieval?
+ return false;
+}
+
+bool SkPdfObject::applyFilter(const char* name, SkPdfAllocator* allocator) {
+ if (strcmp(name, "FlateDecode") == 0) {
+ return applyFlateDecodeFilter(allocator);
+ } else if (strcmp(name, "DCTDecode") == 0) {
+ return applyDCTDecodeFilter(allocator);
+ }
+ // TODO(edisonn): allert, not supported, but should be implemented asap
+ return false;
+}
+
+bool SkPdfObject::filterStream(SkPdfAllocator* allocator) {
+ if (!hasStream()) {
+ return false;
+ }
+
+ if (isStreamFiltered()) {
+ return true;
+ }
+
+ SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*)this;
+
+ if (!stream->has_Filter()) {
+ fStr.fBytes = ((fStr.fBytes >> 1) << 1) + kFilteredStreamBit;
+ return true;
+ }
+
+ if (stream->isFilterAName(NULL)) {
+ std::string filterName = stream->getFilterAsName(NULL);
+ applyFilter(filterName.c_str(), allocator);
+ } else if (stream->isFilterAArray(NULL)) {
+ const SkPdfArray* filters = stream->getFilterAsArray(NULL);
+ int cnt = filters->size();
+ for (int i = cnt - 1; i >= 0; i--) {
+ const SkPdfObject* filterName = filters->objAtAIndex(i);
+ if (filterName != NULL && filterName->isName()) {
+ if (!applyFilter(filterName->nameValue(), allocator)) {
+ break;
+ }
+ } else {
+ // TODO(edisonn): report warning
+ }
+ }
+ }
+
+ fStr.fBytes = ((fStr.fBytes >> 1) << 1) + kFilteredStreamBit;
+
+ return true;
+}
diff --git a/experimental/PdfViewer/pdfparser/native/SkPdfObject.h b/experimental/PdfViewer/pdfparser/native/SkPdfObject.h
new file mode 100644
index 0000000..86963b0
--- /dev/null
+++ b/experimental/PdfViewer/pdfparser/native/SkPdfObject.h
@@ -0,0 +1,866 @@
+#ifndef EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFOBJECT_H_
+#define EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFOBJECT_H_
+
+#include <stdint.h>
+#include <string.h>
+#include <string>
+#include "SkTDArray.h"
+#include "SkTDict.h"
+#include "SkRect.h"
+#include "SkMatrix.h"
+#include "SkString.h"
+
+#include "SkPdfNYI.h"
+#include "SkPdfConfig.h"
+
+class SkPdfDictionary;
+class SkPdfStream;
+class SkPdfAllocator;
+
+// TODO(edisonn): macro it and move it to utils
+SkMatrix SkMatrixFromPdfMatrix(double array[6]);
+
+
+#define kFilteredStreamBit 0
+#define kUnfilteredStreamBit 1
+
+
+class SkPdfObject {
+ public:
+ enum ObjectType {
+ kInvalid_PdfObjectType,
+
+ kBoolean_PdfObjectType,
+ kInteger_PdfObjectType,
+ kReal_PdfObjectType,
+ kString_PdfObjectType,
+ kHexString_PdfObjectType,
+ kName_PdfObjectType,
+ kKeyword_PdfObjectType,
+ //kStream_PdfObjectType, // attached to a Dictionary
+ kArray_PdfObjectType,
+ kDictionary_PdfObjectType,
+ kNull_PdfObjectType,
+
+ // TODO(edisonn): after the pdf has been loaded completely, resolve all references
+ // try the same thing with delayed loaded ...
+ kReference_PdfObjectType,
+
+ kUndefined_PdfObjectType, // per 1.4 spec, if the same key appear twice in the dictionary, the value is undefined
+ };
+
+private:
+ struct NotOwnedString {
+ unsigned char* fBuffer;
+ size_t fBytes;
+ };
+
+ struct Reference {
+ unsigned int fId;
+ unsigned int fGen;
+ };
+
+ // TODO(edisonn): add stream start, stream end, where stream is weither the file
+ // or decoded/filtered pdf stream
+
+ // TODO(edisonn): add warning/report per object
+ // TODO(edisonn): add flag fUsed, to be used once the parsing is complete,
+ // so we could show what parts have been proccessed, ignored, or generated errors
+
+ ObjectType fObjectType;
+
+ union {
+ bool fBooleanValue;
+ int64_t fIntegerValue;
+ // TODO(edisonn): double, float? typedefed
+ double fRealValue;
+ NotOwnedString fStr;
+
+ // TODO(edisonn): make sure the foorprint of fArray and fMap is small, otherwise, use pointers, or classes with up to 8 bytes in footprint
+ SkTDArray<SkPdfObject*>* fArray;
+ Reference fRef;
+ };
+ SkTDict<SkPdfObject*>* fMap;
+ void* fData;
+
+
+public:
+
+ SkPdfObject() : fObjectType(kInvalid_PdfObjectType), fData(NULL) {}
+
+ inline void* data() {
+ return fData;
+ }
+
+ inline void setData(void* data) {
+ fData = data;
+ }
+
+ ~SkPdfObject() {
+ reset();
+ }
+
+ void reset() {
+ switch (fObjectType) {
+ case kArray_PdfObjectType:
+ delete fArray;
+ break;
+
+ case kDictionary_PdfObjectType:
+ delete fMap;
+ break;
+
+ default:
+ break;
+ }
+ fObjectType = kInvalid_PdfObjectType;
+ }
+
+ ObjectType type() { return fObjectType; }
+
+ const char* c_str() const {
+ switch (fObjectType) {
+ case kString_PdfObjectType:
+ case kHexString_PdfObjectType:
+ case kKeyword_PdfObjectType:
+ return (const char*)fStr.fBuffer;
+
+ default:
+ // TODO(edisonn): report/warning
+ return NULL;
+ }
+ }
+
+ size_t len() const {
+ switch (fObjectType) {
+ case kString_PdfObjectType:
+ case kHexString_PdfObjectType:
+ case kKeyword_PdfObjectType:
+ return fStr.fBytes;
+
+ default:
+ // TODO(edisonn): report/warning
+ return 0;
+ }
+ }
+
+
+ // TODO(edisonn): NYI
+ SkPdfDate& dateValue() const {
+ static SkPdfDate nyi;
+ return nyi;
+ }
+
+ // TODO(edisonn): NYI
+ SkPdfFunction& functionValue() const {
+ static SkPdfFunction nyi;
+ return nyi;
+ }
+
+ // TODO(edisonn): NYI
+ SkPdfFileSpec& fileSpecValue() const {
+ static SkPdfFileSpec nyi;
+ return nyi;
+ }
+
+ // TODO(edisonn): NYI
+ SkPdfTree& treeValue() const {
+ static SkPdfTree nyi;
+ return nyi;
+ }
+
+
+ static void makeBoolean(bool value, SkPdfObject* obj) {
+ SkASSERT(obj->fObjectType == kInvalid_PdfObjectType);
+
+ obj->fObjectType = kBoolean_PdfObjectType;
+ obj->fBooleanValue = value;
+ }
+
+ static SkPdfObject makeBoolean(bool value) {
+ SkPdfObject obj;
+ obj.fObjectType = kBoolean_PdfObjectType;
+ obj.fBooleanValue = value;
+ return obj;
+ }
+
+ static void makeInteger(int64_t value, SkPdfObject* obj) {
+ SkASSERT(obj->fObjectType == kInvalid_PdfObjectType);
+
+ obj->fObjectType = kInteger_PdfObjectType;
+ obj->fIntegerValue = value;
+ }
+
+ static void makeReal(double value, SkPdfObject* obj) {
+ SkASSERT(obj->fObjectType == kInvalid_PdfObjectType);
+
+ obj->fObjectType = kReal_PdfObjectType;
+ obj->fRealValue = value;
+ }
+
+ static void makeNull(SkPdfObject* obj) {
+ SkASSERT(obj->fObjectType == kInvalid_PdfObjectType);
+
+ obj->fObjectType = kNull_PdfObjectType;
+ }
+
+ static SkPdfObject makeNull() {
+ SkPdfObject obj;
+ obj.fObjectType = kNull_PdfObjectType;
+ return obj;
+ }
+
+ static SkPdfObject kNull;
+
+ static void makeNumeric(unsigned char* start, unsigned char* end, SkPdfObject* obj) {
+ SkASSERT(obj->fObjectType == kInvalid_PdfObjectType);
+
+ // TODO(edisonn): NYI properly
+ // if has dot (impl), or exceeds max int, is real, otherwise is int
+ bool isInt = true;
+ for (unsigned char* current = start; current < end; current++) {
+ if (*current == '.') {
+ isInt = false;
+ break;
+ }
+ // TODO(edisonn): report parse issue with numbers like "24asdasd123"
+ }
+ if (isInt) {
+ makeInteger(atol((const char*)start), obj);
+ } else {
+ makeReal(atof((const char*)start), obj);
+ }
+ }
+
+ static void makeReference(unsigned int id, unsigned int gen, SkPdfObject* obj) {
+ SkASSERT(obj->fObjectType == kInvalid_PdfObjectType);
+
+ obj->fObjectType = kReference_PdfObjectType;
+ obj->fRef.fId = id;
+ obj->fRef.fGen = gen;
+ }
+
+
+ static void makeString(unsigned char* start, SkPdfObject* obj) {
+ makeStringCore(start, strlen((const char*)start), obj, kString_PdfObjectType);
+ }
+
+ static void makeString(unsigned char* start, unsigned char* end, SkPdfObject* obj) {
+ makeStringCore(start, end - start, obj, kString_PdfObjectType);
+ }
+
+ static void makeString(unsigned char* start, size_t bytes, SkPdfObject* obj) {
+ makeStringCore(start, bytes, obj, kString_PdfObjectType);
+ }
+
+
+ static void makeHexString(unsigned char* start, SkPdfObject* obj) {
+ makeStringCore(start, strlen((const char*)start), obj, kHexString_PdfObjectType);
+ }
+
+ static void makeHexString(unsigned char* start, unsigned char* end, SkPdfObject* obj) {
+ makeStringCore(start, end - start, obj, kHexString_PdfObjectType);
+ }
+
+ static void makeHexString(unsigned char* start, size_t bytes, SkPdfObject* obj) {
+ makeStringCore(start, bytes, obj, kHexString_PdfObjectType);
+ }
+
+
+ static void makeName(unsigned char* start, SkPdfObject* obj) {
+ makeStringCore(start, strlen((const char*)start), obj, kName_PdfObjectType);
+ }
+
+ static void makeName(unsigned char* start, unsigned char* end, SkPdfObject* obj) {
+ makeStringCore(start, end - start, obj, kName_PdfObjectType);
+ }
+
+ static void makeName(unsigned char* start, size_t bytes, SkPdfObject* obj) {
+ makeStringCore(start, bytes, obj, kName_PdfObjectType);
+ }
+
+
+ static void makeKeyword(unsigned char* start, SkPdfObject* obj) {
+ makeStringCore(start, strlen((const char*)start), obj, kKeyword_PdfObjectType);
+ }
+
+ static void makeKeyword(unsigned char* start, unsigned char* end, SkPdfObject* obj) {
+ makeStringCore(start, end - start, obj, kKeyword_PdfObjectType);
+ }
+
+ static void makeKeyword(unsigned char* start, size_t bytes, SkPdfObject* obj) {
+ makeStringCore(start, bytes, obj, kKeyword_PdfObjectType);
+ }
+
+
+
+ // TODO(edisonn): make the functions to return SkPdfArray, move these functions in SkPdfArray
+ static void makeEmptyArray(SkPdfObject* obj) {
+ SkASSERT(obj->fObjectType == kInvalid_PdfObjectType);
+
+ obj->fObjectType = kArray_PdfObjectType;
+ obj->fArray = new SkTDArray<SkPdfObject*>();
+ // return (SkPdfArray*)obj;
+ }
+
+ bool appendInArray(SkPdfObject* obj) {
+ SkASSERT(fObjectType == kArray_PdfObjectType);
+ if (fObjectType != kArray_PdfObjectType) {
+ // TODO(edisonn): report err
+ return false;
+ }
+
+ fArray->push(obj);
+ return true;
+ }
+
+ size_t size() const {
+ SkASSERT(fObjectType == kArray_PdfObjectType);
+
+ return fArray->count();
+ }
+
+ SkPdfObject* objAtAIndex(int i) {
+ SkASSERT(fObjectType == kArray_PdfObjectType);
+
+ return (*fArray)[i];
+ }
+
+ SkPdfObject* removeLastInArray() {
+ SkASSERT(fObjectType == kArray_PdfObjectType);
+
+ SkPdfObject* ret = NULL;
+ fArray->pop(&ret);
+
+ return ret;
+ }
+
+
+ const SkPdfObject* objAtAIndex(int i) const {
+ SkASSERT(fObjectType == kArray_PdfObjectType);
+
+ return (*fArray)[i];
+ }
+
+ SkPdfObject* operator[](int i) {
+ SkASSERT(fObjectType == kArray_PdfObjectType);
+
+ return (*fArray)[i];
+ }
+
+ const SkPdfObject* operator[](int i) const {
+ SkASSERT(fObjectType == kArray_PdfObjectType);
+
+ return (*fArray)[i];
+ }
+
+
+ // TODO(edisonn): make the functions to return SkPdfDictionary, move these functions in SkPdfDictionary
+ static void makeEmptyDictionary(SkPdfObject* obj) {
+ SkASSERT(obj->fObjectType == kInvalid_PdfObjectType);
+
+ obj->fObjectType = kDictionary_PdfObjectType;
+ obj->fMap = new SkTDict<SkPdfObject*>(1);
+ obj->fStr.fBuffer = NULL;
+ obj->fStr.fBytes = 0;
+ }
+
+ // TODO(edisonn): get all the possible names from spec, and compute a hash function
+ // that would create no overlaps in the same dictionary
+ // or build a tree of chars that when followed goes to a unique id/index/hash
+ // TODO(edisonn): generate constants like kDictFoo, kNameDict_name
+ // which will be used in code
+ // add function SkPdfFastNameKey key(const char* key);
+ // TODO(edisonn): setting the same key twike, will make the value undefined!
+ bool set(SkPdfObject* key, SkPdfObject* value) {
+ SkASSERT(fObjectType == kDictionary_PdfObjectType);
+ SkASSERT(key->fObjectType == kName_PdfObjectType);
+
+ if (key->fObjectType != kName_PdfObjectType || fObjectType != kDictionary_PdfObjectType) {
+ // TODO(edisonn): report err
+ return false;
+ }
+
+ // we rewrite all delimiters and white spaces with '\0', so we expect the end of name to be '\0'
+ SkASSERT(key->fStr.fBuffer[key->fStr.fBytes] == '\0');
+
+ return set((char*)key->fStr.fBuffer, value);
+ }
+
+ bool set(const char* key, SkPdfObject* value) {
+ SkASSERT(fObjectType == kDictionary_PdfObjectType);
+
+ if (fObjectType != kDictionary_PdfObjectType) {
+ // TODO(edisonn): report err
+ return false;
+ }
+
+ return fMap->set(key, value);
+ }
+
+ SkPdfObject* get(SkPdfObject* key) {
+ SkASSERT(fObjectType == kDictionary_PdfObjectType);
+ SkASSERT(key->fObjectType == kName_PdfObjectType);
+
+ if (key->fObjectType != kName_PdfObjectType || fObjectType != kDictionary_PdfObjectType) {
+ // TODO(edisonn): report err
+ return false;
+ }
+
+ SkASSERT(key->fStr.fBuffer[key->fStr.fBytes] == '\0');
+
+ return get((char*)key->fStr.fBuffer);
+ }
+
+ SkPdfObject* get(const char* key) {
+ SkASSERT(fObjectType == kDictionary_PdfObjectType);
+ SkASSERT(key);
+ if (fObjectType != kDictionary_PdfObjectType) {
+ // TODO(edisonn): report err
+ return NULL;
+ }
+ SkPdfObject* ret = NULL;
+ fMap->find(key, &ret);
+ return ret;
+ }
+
+ const SkPdfObject* get(SkPdfObject* key) const {
+ SkASSERT(fObjectType == kDictionary_PdfObjectType);
+ SkASSERT(key->fObjectType == kName_PdfObjectType);
+
+ if (key->fObjectType != kName_PdfObjectType || fObjectType != kDictionary_PdfObjectType) {
+ // TODO(edisonn): report err
+ return false;
+ }
+
+ SkASSERT(key->fStr.fBuffer[key->fStr.fBytes] == '\0');
+
+ return get((char*)key->fStr.fBuffer);
+ }
+
+
+ const SkPdfObject* get(const char* key) const {
+ SkASSERT(fObjectType == kDictionary_PdfObjectType);
+ SkASSERT(key);
+ if (fObjectType != kDictionary_PdfObjectType) {
+ // TODO(edisonn): report err
+ return NULL;
+ }
+ SkPdfObject* ret = NULL;
+ fMap->find(key, &ret);
+ return ret;
+ }
+
+ const SkPdfObject* get(const char* key, const char* abr) const {
+ const SkPdfObject* ret = get(key);
+ // TODO(edisonn): / is a valid name, and it might be an abreviation, so "" should not be like NULL
+ // make this distiontion in generator, and remove "" from condition
+ if (ret != NULL || abr == NULL || *abr == '\0') {
+ return ret;
+ }
+ return get(abr);
+ }
+
+ SkPdfObject* get(const char* key, const char* abr) {
+ SkPdfObject* ret = get(key);
+ // TODO(edisonn): / is a valid name, and it might be an abreviation, so "" should not be like NULL
+ // make this distiontion in generator, and remove "" from condition
+ if (ret != NULL || abr == NULL || *abr == '\0') {
+ return ret;
+ }
+ return get(abr);
+ }
+
+ SkPdfDictionary* asDictionary() {
+ SkASSERT(isDictionary());
+ if (!isDictionary()) {
+ return NULL;
+ }
+ return (SkPdfDictionary*) this;
+ }
+
+ const SkPdfDictionary* asDictionary() const {
+ SkASSERT(isDictionary());
+ if (!isDictionary()) {
+ return NULL;
+ }
+ return (SkPdfDictionary*) this;
+ }
+
+
+ bool isReference() const {
+ return fObjectType == kReference_PdfObjectType;
+ }
+
+ bool isBoolean() const {
+ return fObjectType == kBoolean_PdfObjectType;
+ }
+
+ bool isInteger() const {
+ return fObjectType == kInteger_PdfObjectType;
+ }
+private:
+ bool isReal() const {
+ return fObjectType == kReal_PdfObjectType;
+ }
+public:
+ bool isNumber() const {
+ return fObjectType == kInteger_PdfObjectType || fObjectType == kReal_PdfObjectType;
+ }
+
+ bool isKeywordReference() const {
+ return fObjectType == kKeyword_PdfObjectType && fStr.fBytes == 1 && fStr.fBuffer[0] == 'R';
+ }
+
+ bool isKeyword() const {
+ return fObjectType == kKeyword_PdfObjectType;
+ }
+
+ bool isName() const {
+ return fObjectType == kName_PdfObjectType;
+ }
+
+ bool isArray() const {
+ return fObjectType == kArray_PdfObjectType;
+ }
+
+ bool isDate() const {
+ return fObjectType == kString_PdfObjectType || fObjectType == kHexString_PdfObjectType;
+ }
+
+ bool isDictionary() const {
+ return fObjectType == kDictionary_PdfObjectType;
+ }
+
+ bool isFunction() const {
+ return false; // NYI
+ }
+
+ bool isRectangle() const {
+ return fObjectType == kArray_PdfObjectType && fArray->count() == 4; // NYI + and elems are numbers
+ }
+
+ // TODO(edisonn): has stream .. or is stream ... TBD
+ bool hasStream() const {
+ return isDictionary() && fStr.fBuffer != NULL;
+ }
+
+ // TODO(edisonn): has stream .. or is stream ... TBD
+ const SkPdfStream* getStream() const {
+ return hasStream() ? (const SkPdfStream*)this : NULL;
+ }
+
+ SkPdfStream* getStream() {
+ return hasStream() ? (SkPdfStream*)this : NULL;
+ }
+
+ bool isAnyString() const {
+ return fObjectType == kString_PdfObjectType || fObjectType == kHexString_PdfObjectType;
+ }
+
+ bool isMatrix() const {
+ return fObjectType == kArray_PdfObjectType && fArray->count() == 6; // NYI + and elems are numbers
+ }
+
+ inline int64_t intValue() const {
+ SkASSERT(fObjectType == kInteger_PdfObjectType);
+
+ if (fObjectType != kInteger_PdfObjectType) {
+ // TODO(edisonn): log err
+ return 0;
+ }
+ return fIntegerValue;
+ }
+private:
+ inline double realValue() const {
+ SkASSERT(fObjectType == kReal_PdfObjectType);
+
+ if (fObjectType != kReal_PdfObjectType) {
+ // TODO(edisonn): log err
+ return 0;
+ }
+ return fRealValue;
+ }
+public:
+ inline double numberValue() const {
+ SkASSERT(isNumber());
+
+ if (!isNumber()) {
+ // TODO(edisonn): log err
+ return 0;
+ }
+ return fObjectType == kReal_PdfObjectType ? fRealValue : fIntegerValue;
+ }
+
+ int referenceId() const {
+ SkASSERT(fObjectType == kReference_PdfObjectType);
+ return fRef.fId;
+ }
+
+ int referenceGeneration() const {
+ SkASSERT(fObjectType == kReference_PdfObjectType);
+ return fRef.fGen;
+ }
+
+ inline const char* nameValue() const {
+ SkASSERT(fObjectType == kName_PdfObjectType);
+
+ if (fObjectType != kName_PdfObjectType) {
+ // TODO(edisonn): log err
+ return "";
+ }
+ return (const char*)fStr.fBuffer;
+ }
+
+ inline const char* stringValue() const {
+ SkASSERT(fObjectType == kString_PdfObjectType || fObjectType == kHexString_PdfObjectType);
+
+ if (fObjectType != kString_PdfObjectType && fObjectType != kHexString_PdfObjectType) {
+ // TODO(edisonn): log err
+ return "";
+ }
+ return (const char*)fStr.fBuffer;
+ }
+
+ // TODO(edisonn): nameValue2 and stringValue2 are used to make code generation easy,
+ // but it is not a performat way to do it, since it will create an extra copy
+ // remove these functions and make code generated faster
+ inline std::string nameValue2() const {
+ SkASSERT(fObjectType == kName_PdfObjectType);
+
+ if (fObjectType != kName_PdfObjectType) {
+ // TODO(edisonn): log err
+ return "";
+ }
+ return (const char*)fStr.fBuffer;
+ }
+
+ inline std::string stringValue2() const {
+ SkASSERT(fObjectType == kString_PdfObjectType || fObjectType == kHexString_PdfObjectType);
+
+ if (fObjectType != kString_PdfObjectType && fObjectType != kHexString_PdfObjectType) {
+ // TODO(edisonn): log err
+ return "";
+ }
+ return (const char*)fStr.fBuffer;
+ }
+
+ inline bool boolValue() const {
+ SkASSERT(fObjectType == kBoolean_PdfObjectType);
+
+ if (fObjectType == kBoolean_PdfObjectType) {
+ // TODO(edisonn): log err
+ return false;
+ }
+ return fBooleanValue;
+ }
+
+ SkRect rectangleValue() const {
+ SkASSERT(isRectangle());
+ if (!isRectangle()) {
+ return SkRect::MakeEmpty();
+ }
+
+ double array[4];
+ for (int i = 0; i < 4; i++) {
+ // TODO(edisonn): version where we could resolve references?
+ const SkPdfObject* elem = objAtAIndex(i);
+ if (elem == NULL || !elem->isNumber()) {
+ // TODO(edisonn): report error
+ return SkRect::MakeEmpty();
+ }
+ array[i] = elem->numberValue();
+ }
+
+ return SkRect::MakeLTRB(SkDoubleToScalar(array[0]),
+ SkDoubleToScalar(array[1]),
+ SkDoubleToScalar(array[2]),
+ SkDoubleToScalar(array[3]));
+ }
+
+ SkMatrix matrixValue() const {
+ SkASSERT(isMatrix());
+ if (!isMatrix()) {
+ return SkMatrix::I();
+ }
+
+ double array[6];
+ for (int i = 0; i < 6; i++) {
+ // TODO(edisonn): version where we could resolve references?
+ const SkPdfObject* elem = objAtAIndex(i);
+ if (elem == NULL || !elem->isNumber()) {
+ // TODO(edisonn): report error
+ return SkMatrix::I();
+ }
+ array[i] = elem->numberValue();
+ }
+
+ return SkMatrixFromPdfMatrix(array);
+ }
+
+ bool filterStream(SkPdfAllocator* allocator);
+
+
+ bool GetFilteredStreamRef(unsigned char** buffer, size_t* len, SkPdfAllocator* allocator) {
+ // TODO(edisonn): add params that couls let the last filter in place if it is jpeg or png to fast load images
+ if (!hasStream()) {
+ return false;
+ }
+
+ filterStream(allocator);
+
+ if (buffer) {
+ *buffer = fStr.fBuffer;
+ }
+
+ if (len) {
+ *len = fStr.fBytes >> 1; // last bit
+ }
+
+ return true;
+ }
+
+ bool isStreamFiltered() const {
+ return hasStream() && ((fStr.fBytes & 1) == kFilteredStreamBit);
+ }
+
+ bool GetUnfilteredStreamRef(unsigned char** buffer, size_t* len) const {
+ if (isStreamFiltered()) {
+ return false;
+ }
+
+ if (!hasStream()) {
+ return false;
+ }
+
+ if (buffer) {
+ *buffer = fStr.fBuffer;
+ }
+
+ if (len) {
+ *len = fStr.fBytes >> 1; // remove slast bit
+ }
+
+ return true;
+ }
+
+ bool addStream(unsigned char* buffer, size_t len) {
+ SkASSERT(!hasStream());
+ SkASSERT(isDictionary());
+
+ if (!isDictionary() || hasStream()) {
+ return false;
+ }
+
+ fStr.fBuffer = buffer;
+ fStr.fBytes = (len << 2) + kUnfilteredStreamBit;
+
+ return true;
+ }
+
+ SkString toString() {
+ SkString str;
+ switch (fObjectType) {
+ case kInvalid_PdfObjectType:
+ str.append("Invalid");
+ break;
+
+ case kBoolean_PdfObjectType:
+ str.appendf("Boolean: %s", fBooleanValue ? "true" : "false");
+ break;
+
+ case kInteger_PdfObjectType:
+ str.appendf("Integer: %i", (int)fIntegerValue);
+ break;
+
+ case kReal_PdfObjectType:
+ str.appendf("Real: %f", fRealValue);
+ break;
+
+ case kString_PdfObjectType:
+ str.appendf("String, len() = %u: ", (unsigned int)fStr.fBytes);
+ str.append((const char*)fStr.fBuffer, fStr.fBytes);
+ break;
+
+ case kHexString_PdfObjectType:
+ str.appendf("HexString, len() = %u: ", (unsigned int)fStr.fBytes);
+ str.append((const char*)fStr.fBuffer, fStr.fBytes);
+ break;
+
+ case kName_PdfObjectType:
+ str.appendf("Name, len() = %u: ", (unsigned int)fStr.fBytes);
+ str.append((const char*)fStr.fBuffer, fStr.fBytes);
+ break;
+
+ case kKeyword_PdfObjectType:
+ str.appendf("Keyword, len() = %u: ", (unsigned int)fStr.fBytes);
+ str.append((const char*)fStr.fBuffer, fStr.fBytes);
+ break;
+
+ case kArray_PdfObjectType:
+ str.append("Array, size() = %i [", size());
+ for (unsigned int i = 0; i < size(); i++) {
+ str.append(objAtAIndex(i)->toString());
+ }
+ str.append("]");
+ break;
+
+ case kDictionary_PdfObjectType:
+ // TODO(edisonn): NYI
+ str.append("Dictionary: NYI");
+ if (hasStream()) {
+ str.append(" HAS_STREAM");
+ }
+ break;
+
+ case kNull_PdfObjectType:
+ str = "NULL";
+ break;
+
+ case kReference_PdfObjectType:
+ str.appendf("Reference: %i %i", fRef.fId, fRef.fGen);
+ break;
+
+ case kUndefined_PdfObjectType:
+ str = "Undefined";
+ break;
+
+ default:
+ str = "Internal Error Object Type";
+ break;
+ }
+
+ return str;
+ }
+
+private:
+ static void makeStringCore(unsigned char* start, SkPdfObject* obj, ObjectType type) {
+ makeStringCore(start, strlen((const char*)start), obj, type);
+ }
+
+ static void makeStringCore(unsigned char* start, unsigned char* end, SkPdfObject* obj, ObjectType type) {
+ makeStringCore(start, end - start, obj, type);
+ }
+
+ static void makeStringCore(unsigned char* start, size_t bytes, SkPdfObject* obj, ObjectType type) {
+ SkASSERT(obj->fObjectType == kInvalid_PdfObjectType);
+
+ obj->fObjectType = type;
+ obj->fStr.fBuffer = start;
+ obj->fStr.fBytes = bytes;
+ }
+
+ bool applyFilter(const char* name, SkPdfAllocator* allocator);
+ bool applyFlateDecodeFilter(SkPdfAllocator* allocator);
+ bool applyDCTDecodeFilter(SkPdfAllocator* allocator);
+};
+
+class SkPdfStream : public SkPdfObject {};
+class SkPdfArray : public SkPdfObject {};
+class SkPdfString : public SkPdfObject {};
+class SkPdfHexString : public SkPdfObject {};
+class SkPdfInteger : public SkPdfObject {};
+class SkPdfReal : public SkPdfObject {};
+class SkPdfNumber : public SkPdfObject {};
+
+#endif // EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFOBJECT_H_