aboutsummaryrefslogtreecommitdiff
path: root/xpdf/XRef.cc
diff options
context:
space:
mode:
authorCalvin Morrison <calvin@pobox.com>2023-04-05 14:13:39 -0400
committerCalvin Morrison <calvin@pobox.com>2023-04-05 14:13:39 -0400
commit835e373b3eeaabcd0621ed6798ab500f37982fae (patch)
treedfa16b0e2e1b4956b38f693220eac4e607802133 /xpdf/XRef.cc
xpdf-no-select-disableHEADmaster
Diffstat (limited to 'xpdf/XRef.cc')
-rw-r--r--xpdf/XRef.cc1431
1 files changed, 1431 insertions, 0 deletions
diff --git a/xpdf/XRef.cc b/xpdf/XRef.cc
new file mode 100644
index 0000000..51af86d
--- /dev/null
+++ b/xpdf/XRef.cc
@@ -0,0 +1,1431 @@
+//========================================================================
+//
+// XRef.cc
+//
+// Copyright 1996-2003 Glyph & Cog, LLC
+//
+//========================================================================
+
+#include <aconf.h>
+
+#ifdef USE_GCC_PRAGMAS
+#pragma implementation
+#endif
+
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+#include "gmem.h"
+#include "gmempp.h"
+#include "gfile.h"
+#include "Object.h"
+#include "Stream.h"
+#include "Lexer.h"
+#include "Parser.h"
+#include "Dict.h"
+#include "Error.h"
+#include "ErrorCodes.h"
+#include "XRef.h"
+
+//------------------------------------------------------------------------
+
+#define xrefSearchSize 1024 // read this many bytes at end of file
+ // to look for 'startxref'
+
+//------------------------------------------------------------------------
+// Permission bits
+//------------------------------------------------------------------------
+
+#define permPrint (1<<2)
+#define permChange (1<<3)
+#define permCopy (1<<4)
+#define permNotes (1<<5)
+#define defPermFlags 0xfffc
+
+//------------------------------------------------------------------------
+// XRefPosSet
+//------------------------------------------------------------------------
+
+class XRefPosSet {
+public:
+
+ XRefPosSet();
+ ~XRefPosSet();
+ void add(GFileOffset pos);
+ GBool check(GFileOffset pos);
+ int getLength() { return len; }
+ GFileOffset get(int idx) { return tab[idx]; }
+
+private:
+
+ int find(GFileOffset pos);
+
+ GFileOffset *tab;
+ int size;
+ int len;
+};
+
+XRefPosSet::XRefPosSet() {
+ size = 16;
+ len = 0;
+ tab = (GFileOffset *)gmallocn(size, sizeof(GFileOffset));
+}
+
+XRefPosSet::~XRefPosSet() {
+ gfree(tab);
+}
+
+void XRefPosSet::add(GFileOffset pos) {
+ int i;
+
+ i = find(pos);
+ if (i < len && tab[i] == pos) {
+ return;
+ }
+ if (len == size) {
+ if (size > INT_MAX / 2) {
+ gMemError("Integer overflow in XRefPosSet::add()");
+ }
+ size *= 2;
+ tab = (GFileOffset *)greallocn(tab, size, sizeof(GFileOffset));
+ }
+ if (i < len) {
+ memmove(&tab[i + 1], &tab[i], (len - i) * sizeof(GFileOffset));
+ }
+ tab[i] = pos;
+ ++len;
+}
+
+GBool XRefPosSet::check(GFileOffset pos) {
+ int i;
+
+ i = find(pos);
+ return i < len && tab[i] == pos;
+}
+
+int XRefPosSet::find(GFileOffset pos) {
+ int a, b, m;
+
+ a = - 1;
+ b = len;
+ // invariant: tab[a] < pos < tab[b]
+ while (b - a > 1) {
+ m = (a + b) / 2;
+ if (tab[m] < pos) {
+ a = m;
+ } else if (tab[m] > pos) {
+ b = m;
+ } else {
+ return m;
+ }
+ }
+ return b;
+}
+
+//------------------------------------------------------------------------
+// ObjectStream
+//------------------------------------------------------------------------
+
+class ObjectStream {
+public:
+
+ // Create an object stream, using object number <objStrNum>,
+ // generation 0.
+ ObjectStream(XRef *xref, int objStrNumA);
+
+ GBool isOk() { return ok; }
+
+ ~ObjectStream();
+
+ // Return the object number of this object stream.
+ int getObjStrNum() { return objStrNum; }
+
+ // Get the <objIdx>th object from this stream, which should be
+ // object number <objNum>, generation 0.
+ Object *getObject(int objIdx, int objNum, Object *obj);
+
+private:
+
+ int objStrNum; // object number of the object stream
+ int nObjects; // number of objects in the stream
+ Object *objs; // the objects (length = nObjects)
+ int *objNums; // the object numbers (length = nObjects)
+ GBool ok;
+};
+
+ObjectStream::ObjectStream(XRef *xref, int objStrNumA) {
+ Stream *str;
+ Lexer *lexer;
+ Parser *parser;
+ int *offsets;
+ Object objStr, obj1, obj2;
+ int first, i;
+
+ objStrNum = objStrNumA;
+ nObjects = 0;
+ objs = NULL;
+ objNums = NULL;
+ ok = gFalse;
+
+ if (!xref->fetch(objStrNum, 0, &objStr)->isStream()) {
+ goto err1;
+ }
+
+ if (!objStr.streamGetDict()->lookup("N", &obj1)->isInt()) {
+ obj1.free();
+ goto err1;
+ }
+ nObjects = obj1.getInt();
+ obj1.free();
+ if (nObjects <= 0) {
+ goto err1;
+ }
+
+ if (!objStr.streamGetDict()->lookup("First", &obj1)->isInt()) {
+ obj1.free();
+ goto err1;
+ }
+ first = obj1.getInt();
+ obj1.free();
+ if (first < 0) {
+ goto err1;
+ }
+
+ // this is an arbitrary limit to avoid integer overflow problems
+ // in the 'new Object[nObjects]' call (Acrobat apparently limits
+ // object streams to 100-200 objects)
+ if (nObjects > 1000000) {
+ error(errSyntaxError, -1, "Too many objects in an object stream");
+ goto err1;
+ }
+ objs = new Object[nObjects];
+ objNums = (int *)gmallocn(nObjects, sizeof(int));
+ offsets = (int *)gmallocn(nObjects, sizeof(int));
+
+ // parse the header: object numbers and offsets
+ objStr.streamReset();
+ obj1.initNull();
+ str = new EmbedStream(objStr.getStream(), &obj1, gTrue, first);
+ lexer = new Lexer(xref, str);
+ parser = new Parser(xref, lexer, gFalse);
+ for (i = 0; i < nObjects; ++i) {
+ parser->getObj(&obj1, gTrue);
+ parser->getObj(&obj2, gTrue);
+ if (!obj1.isInt() || !obj2.isInt()) {
+ obj1.free();
+ obj2.free();
+ delete parser;
+ gfree(offsets);
+ goto err2;
+ }
+ objNums[i] = obj1.getInt();
+ offsets[i] = obj2.getInt();
+ obj1.free();
+ obj2.free();
+ if (objNums[i] < 0 || offsets[i] < 0 ||
+ (i > 0 && offsets[i] < offsets[i-1])) {
+ delete parser;
+ gfree(offsets);
+ goto err2;
+ }
+ }
+ lexer->skipToEOF();
+ delete parser;
+
+ // skip to the first object - this shouldn't be necessary because
+ // the First key is supposed to be equal to offsets[0], but just in
+ // case...
+ if (first < offsets[0]) {
+ objStr.getStream()->discardChars(offsets[0] - first);
+ }
+
+ // parse the objects
+ for (i = 0; i < nObjects; ++i) {
+ obj1.initNull();
+ if (i == nObjects - 1) {
+ str = new EmbedStream(objStr.getStream(), &obj1, gFalse, 0);
+ } else {
+ str = new EmbedStream(objStr.getStream(), &obj1, gTrue,
+ offsets[i+1] - offsets[i]);
+ }
+ lexer = new Lexer(xref, str);
+ parser = new Parser(xref, lexer, gFalse);
+ parser->getObj(&objs[i]);
+ lexer->skipToEOF();
+ delete parser;
+ }
+
+ gfree(offsets);
+ ok = gTrue;
+
+ err2:
+ objStr.streamClose();
+ err1:
+ objStr.free();
+}
+
+ObjectStream::~ObjectStream() {
+ int i;
+
+ if (objs) {
+ for (i = 0; i < nObjects; ++i) {
+ objs[i].free();
+ }
+ delete[] objs;
+ }
+ gfree(objNums);
+}
+
+Object *ObjectStream::getObject(int objIdx, int objNum, Object *obj) {
+ if (objIdx < 0 || objIdx >= nObjects || objNum != objNums[objIdx]) {
+ obj->initNull();
+ } else {
+ objs[objIdx].copy(obj);
+ }
+ return obj;
+}
+
+//------------------------------------------------------------------------
+// XRef
+//------------------------------------------------------------------------
+
+XRef::XRef(BaseStream *strA, GBool repair) {
+ GFileOffset pos;
+ Object obj;
+ XRefPosSet *posSet;
+ int i;
+
+ ok = gTrue;
+ errCode = errNone;
+ repaired = gFalse;
+ size = 0;
+ last = -1;
+ entries = NULL;
+ lastStartxrefPos = 0;
+ xrefTablePos = NULL;
+ xrefTablePosLen = 0;
+ streamEnds = NULL;
+ streamEndsLen = 0;
+ for (i = 0; i < objStrCacheSize; ++i) {
+ objStrs[i] = NULL;
+ objStrLastUse[i] = 0;
+ }
+ objStrCacheLength = 0;
+ objStrTime = 0;
+
+ encrypted = gFalse;
+ permFlags = defPermFlags;
+ ownerPasswordOk = gFalse;
+
+ for (i = 0; i < xrefCacheSize; ++i) {
+ cache[i].num = -1;
+ }
+
+#if MULTITHREADED
+ gInitMutex(&objStrsMutex);
+ gInitMutex(&cacheMutex);
+#endif
+
+ str = strA;
+ start = str->getStart();
+
+ // if the 'repair' flag is set, try to reconstruct the xref table
+ if (repair) {
+ if (!(ok = constructXRef())) {
+ errCode = errDamaged;
+ return;
+ }
+ repaired = gTrue;
+
+ // if the 'repair' flag is not set, read the xref table
+ } else {
+
+ // read the trailer
+ pos = getStartXref();
+ if (pos == 0) {
+ errCode = errDamaged;
+ ok = gFalse;
+ return;
+ }
+
+ // read the xref table
+ posSet = new XRefPosSet();
+ while (readXRef(&pos, posSet, gFalse)) ;
+ xrefTablePosLen = posSet->getLength();
+ xrefTablePos = (GFileOffset *)gmallocn(xrefTablePosLen,
+ sizeof(GFileOffset));
+ for (i = 0; i < xrefTablePosLen; ++i) {
+ xrefTablePos[i] = posSet->get(i);
+ }
+ delete posSet;
+ if (!ok) {
+ errCode = errDamaged;
+ return;
+ }
+ }
+
+ // get the root dictionary (catalog) object
+ trailerDict.dictLookupNF("Root", &obj);
+ if (obj.isRef()) {
+ rootNum = obj.getRefNum();
+ rootGen = obj.getRefGen();
+ obj.free();
+ } else {
+ obj.free();
+ if (!(ok = constructXRef())) {
+ errCode = errDamaged;
+ return;
+ }
+ }
+
+ // now set the trailer dictionary's xref pointer so we can fetch
+ // indirect objects from it
+ trailerDict.getDict()->setXRef(this);
+}
+
+XRef::~XRef() {
+ int i;
+
+ for (i = 0; i < xrefCacheSize; ++i) {
+ if (cache[i].num >= 0) {
+ cache[i].obj.free();
+ }
+ }
+ gfree(entries);
+ trailerDict.free();
+ if (xrefTablePos) {
+ gfree(xrefTablePos);
+ }
+ if (streamEnds) {
+ gfree(streamEnds);
+ }
+ for (i = 0; i < objStrCacheSize; ++i) {
+ if (objStrs[i]) {
+ delete objStrs[i];
+ }
+ }
+#if MULTITHREADED
+ gDestroyMutex(&objStrsMutex);
+ gDestroyMutex(&cacheMutex);
+#endif
+}
+
+// Read the 'startxref' position.
+GFileOffset XRef::getStartXref() {
+ char buf[xrefSearchSize+1];
+ char *p;
+ int n, i;
+
+ // read last xrefSearchSize bytes
+ str->setPos(xrefSearchSize, -1);
+ n = str->getBlock(buf, xrefSearchSize);
+ buf[n] = '\0';
+
+ // find startxref
+ for (i = n - 9; i >= 0; --i) {
+ if (!strncmp(&buf[i], "startxref", 9)) {
+ break;
+ }
+ }
+ if (i < 0) {
+ return 0;
+ }
+ for (p = &buf[i+9]; isspace(*p & 0xff); ++p) ;
+ lastXRefPos = strToFileOffset(p);
+ lastStartxrefPos = str->getPos() - n + i;
+
+ return lastXRefPos;
+}
+
+// Read one xref table section. Also reads the associated trailer
+// dictionary, and returns the prev pointer (if any). The [hybrid]
+// flag is true when following the XRefStm link in a hybrid-reference
+// file.
+GBool XRef::readXRef(GFileOffset *pos, XRefPosSet *posSet, GBool hybrid) {
+ Parser *parser;
+ Object obj;
+ GBool more;
+ char buf[100];
+ int n, i;
+
+ // check for a loop in the xref tables
+ if (posSet->check(*pos)) {
+ error(errSyntaxWarning, -1, "Infinite loop in xref table");
+ return gFalse;
+ }
+ posSet->add(*pos);
+
+ // the xref data should either be "xref ..." (for an xref table) or
+ // "nn gg obj << ... >> stream ..." (for an xref stream); possibly
+ // preceded by whitespace
+ str->setPos(start + *pos);
+ n = str->getBlock(buf, 100);
+ for (i = 0; i < n && Lexer::isSpace(buf[i]); ++i) ;
+
+ // parse an old-style xref table
+ if (!hybrid &&
+ i + 4 < n &&
+ buf[i] == 'x' && buf[i+1] == 'r' && buf[i+2] == 'e' && buf[i+3] == 'f' &&
+ Lexer::isSpace(buf[i+4])) {
+ more = readXRefTable(pos, i + 5, posSet);
+
+ // parse an xref stream
+ } else {
+ obj.initNull();
+ parser = new Parser(NULL,
+ new Lexer(NULL,
+ str->makeSubStream(start + *pos, gFalse, 0, &obj)),
+ gTrue);
+ if (!parser->getObj(&obj, gTrue)->isInt()) {
+ goto err;
+ }
+ obj.free();
+ if (!parser->getObj(&obj, gTrue)->isInt()) {
+ goto err;
+ }
+ obj.free();
+ if (!parser->getObj(&obj, gTrue)->isCmd("obj")) {
+ goto err;
+ }
+ obj.free();
+ if (!parser->getObj(&obj)->isStream()) {
+ goto err;
+ }
+ more = readXRefStream(obj.getStream(), pos, hybrid);
+ obj.free();
+ delete parser;
+ }
+
+ return more;
+
+ err:
+ obj.free();
+ delete parser;
+ ok = gFalse;
+ return gFalse;
+}
+
+GBool XRef::readXRefTable(GFileOffset *pos, int offset, XRefPosSet *posSet) {
+ XRefEntry entry;
+ Parser *parser;
+ Object obj, obj2;
+ char buf[6];
+ GFileOffset off, pos2;
+ GBool more;
+ int first, n, digit, newSize, gen, i, c;
+
+ str->setPos(start + *pos + offset);
+
+ while (1) {
+ do {
+ c = str->getChar();
+ } while (Lexer::isSpace(c));
+ if (c == 't') {
+ if (str->getBlock(buf, 6) != 6 || memcmp(buf, "railer", 6)) {
+ goto err1;
+ }
+ break;
+ }
+ if (c < '0' || c > '9') {
+ goto err1;
+ }
+ first = 0;
+ do {
+ digit = c - '0';
+ if (first > (INT_MAX - digit) / 10) {
+ goto err1;
+ }
+ first = (first * 10) + digit;
+ c = str->getChar();
+ } while (c >= '0' && c <= '9');
+ if (!Lexer::isSpace(c)) {
+ goto err1;
+ }
+ do {
+ c = str->getChar();
+ } while (Lexer::isSpace(c));
+ n = 0;
+ do {
+ digit = c - '0';
+ if (n > (INT_MAX - digit) / 10) {
+ goto err1;
+ }
+ n = (n * 10) + digit;
+ c = str->getChar();
+ } while (c >= '0' && c <= '9');
+ if (!Lexer::isSpace(c)) {
+ goto err1;
+ }
+ if (first > INT_MAX - n) {
+ goto err1;
+ }
+ if (first + n > size) {
+ for (newSize = size ? 2 * size : 1024;
+ first + n > newSize && newSize > 0;
+ newSize <<= 1) ;
+ if (newSize < 0) {
+ goto err1;
+ }
+ entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry));
+ for (i = size; i < newSize; ++i) {
+ entries[i].offset = (GFileOffset)-1;
+ entries[i].type = xrefEntryFree;
+ }
+ size = newSize;
+ }
+ for (i = first; i < first + n; ++i) {
+ do {
+ c = str->getChar();
+ } while (Lexer::isSpace(c));
+ off = 0;
+ do {
+ off = (off * 10) + (c - '0');
+ c = str->getChar();
+ } while (c >= '0' && c <= '9');
+ if (!Lexer::isSpace(c)) {
+ goto err1;
+ }
+ entry.offset = off;
+ do {
+ c = str->getChar();
+ } while (Lexer::isSpace(c));
+ gen = 0;
+ do {
+ gen = (gen * 10) + (c - '0');
+ c = str->getChar();
+ } while (c >= '0' && c <= '9');
+ if (!Lexer::isSpace(c)) {
+ goto err1;
+ }
+ entry.gen = gen;
+ do {
+ c = str->getChar();
+ } while (Lexer::isSpace(c));
+ if (c == 'n') {
+ entry.type = xrefEntryUncompressed;
+ } else if (c == 'f') {
+ entry.type = xrefEntryFree;
+ } else {
+ goto err1;
+ }
+ c = str->getChar();
+ if (!Lexer::isSpace(c)) {
+ goto err1;
+ }
+ if (entries[i].offset == (GFileOffset)-1) {
+ entries[i] = entry;
+ // PDF files of patents from the IBM Intellectual Property
+ // Network have a bug: the xref table claims to start at 1
+ // instead of 0.
+ if (i == 1 && first == 1 &&
+ entries[1].offset == 0 && entries[1].gen == 65535 &&
+ entries[1].type == xrefEntryFree) {
+ i = first = 0;
+ entries[0] = entries[1];
+ entries[1].offset = (GFileOffset)-1;
+ }
+ if (i > last) {
+ last = i;
+ }
+ }
+ }
+ }
+
+ // read the trailer dictionary
+ obj.initNull();
+ parser = new Parser(NULL,
+ new Lexer(NULL,
+ str->makeSubStream(str->getPos(), gFalse, 0, &obj)),
+ gTrue);
+ parser->getObj(&obj);
+ delete parser;
+ if (!obj.isDict()) {
+ obj.free();
+ goto err1;
+ }
+
+ // get the 'Prev' pointer
+ //~ this can be a 64-bit int (?)
+ obj.getDict()->lookupNF("Prev", &obj2);
+ if (obj2.isInt()) {
+ *pos = (GFileOffset)(Guint)obj2.getInt();
+ more = gTrue;
+ } else if (obj2.isRef()) {
+ // certain buggy PDF generators generate "/Prev NNN 0 R" instead
+ // of "/Prev NNN"
+ *pos = (GFileOffset)(Guint)obj2.getRefNum();
+ more = gTrue;
+ } else {
+ more = gFalse;
+ }
+ obj2.free();
+
+ // save the first trailer dictionary
+ if (trailerDict.isNone()) {
+ obj.copy(&trailerDict);
+ }
+
+ // check for an 'XRefStm' key
+ //~ this can be a 64-bit int (?)
+ if (obj.getDict()->lookup("XRefStm", &obj2)->isInt()) {
+ pos2 = (GFileOffset)(Guint)obj2.getInt();
+ readXRef(&pos2, posSet, gTrue);
+ if (!ok) {
+ obj2.free();
+ obj.free();
+ goto err1;
+ }
+ }
+ obj2.free();
+
+ obj.free();
+ return more;
+
+ err1:
+ ok = gFalse;
+ return gFalse;
+}
+
+GBool XRef::readXRefStream(Stream *xrefStr, GFileOffset *pos, GBool hybrid) {
+ Dict *dict;
+ int w[3];
+ GBool more;
+ Object obj, obj2, idx;
+ int newSize, first, n, i;
+
+ dict = xrefStr->getDict();
+
+ if (!dict->lookupNF("Size", &obj)->isInt()) {
+ goto err1;
+ }
+ newSize = obj.getInt();
+ obj.free();
+ if (newSize < 0) {
+ goto err1;
+ }
+ if (newSize > size) {
+ entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry));
+ for (i = size; i < newSize; ++i) {
+ entries[i].offset = (GFileOffset)-1;
+ entries[i].type = xrefEntryFree;
+ }
+ size = newSize;
+ }
+
+ if (!dict->lookupNF("W", &obj)->isArray() ||
+ obj.arrayGetLength() < 3) {
+ goto err1;
+ }
+ for (i = 0; i < 3; ++i) {
+ if (!obj.arrayGet(i, &obj2)->isInt()) {
+ obj2.free();
+ goto err1;
+ }
+ w[i] = obj2.getInt();
+ obj2.free();
+ }
+ obj.free();
+ if (w[0] < 0 || w[0] > 8 ||
+ w[1] < 0 || w[1] > 8 ||
+ w[2] < 0 || w[2] > 8) {
+ goto err0;
+ }
+
+ xrefStr->reset();
+ dict->lookupNF("Index", &idx);
+ if (idx.isArray()) {
+ for (i = 0; i+1 < idx.arrayGetLength(); i += 2) {
+ if (!idx.arrayGet(i, &obj)->isInt()) {
+ idx.free();
+ goto err1;
+ }
+ first = obj.getInt();
+ obj.free();
+ if (!idx.arrayGet(i+1, &obj)->isInt()) {
+ idx.free();
+ goto err1;
+ }
+ n = obj.getInt();
+ obj.free();
+ if (first < 0 || n < 0 ||
+ !readXRefStreamSection(xrefStr, w, first, n)) {
+ idx.free();
+ goto err0;
+ }
+ }
+ } else {
+ if (!readXRefStreamSection(xrefStr, w, 0, newSize)) {
+ idx.free();
+ goto err0;
+ }
+ }
+ idx.free();
+
+ //~ this can be a 64-bit int (?)
+ dict->lookupNF("Prev", &obj);
+ if (obj.isInt()) {
+ *pos = (GFileOffset)(Guint)obj.getInt();
+ more = gTrue;
+ } else {
+ more = gFalse;
+ }
+ obj.free();
+ if (trailerDict.isNone()) {
+ trailerDict.initDict(dict);
+ }
+
+ return more;
+
+ err1:
+ obj.free();
+ err0:
+ ok = gFalse;
+ return gFalse;
+}
+
+GBool XRef::readXRefStreamSection(Stream *xrefStr, int *w, int first, int n) {
+ long long type, gen, offset;
+ int c, newSize, i, j;
+
+ if (first + n < 0) {
+ return gFalse;
+ }
+ if (first + n > size) {
+ for (newSize = size ? 2 * size : 1024;
+ first + n > newSize && newSize > 0;
+ newSize <<= 1) ;
+ if (newSize < 0) {
+ return gFalse;
+ }
+ entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry));
+ for (i = size; i < newSize; ++i) {
+ entries[i].offset = (GFileOffset)-1;
+ entries[i].type = xrefEntryFree;
+ }
+ size = newSize;
+ }
+ for (i = first; i < first + n; ++i) {
+ if (w[0] == 0) {
+ type = 1;
+ } else {
+ for (type = 0, j = 0; j < w[0]; ++j) {
+ if ((c = xrefStr->getChar()) == EOF) {
+ return gFalse;
+ }
+ type = (type << 8) + c;
+ }
+ }
+ for (offset = 0, j = 0; j < w[1]; ++j) {
+ if ((c = xrefStr->getChar()) == EOF) {
+ return gFalse;
+ }
+ offset = (offset << 8) + c;
+ }
+ if (offset < 0 || offset > GFILEOFFSET_MAX) {
+ return gFalse;
+ }
+ for (gen = 0, j = 0; j < w[2]; ++j) {
+ if ((c = xrefStr->getChar()) == EOF) {
+ return gFalse;
+ }
+ gen = (gen << 8) + c;
+ }
+ // some PDF generators include a free entry with gen=0xffffffff
+ if ((gen < 0 || gen > INT_MAX) && type != 0) {
+ return gFalse;
+ }
+ if (entries[i].offset == (GFileOffset)-1) {
+ switch (type) {
+ case 0:
+ entries[i].offset = (GFileOffset)offset;
+ entries[i].gen = (int)gen;
+ entries[i].type = xrefEntryFree;
+ break;
+ case 1:
+ entries[i].offset = (GFileOffset)offset;
+ entries[i].gen = (int)gen;
+ entries[i].type = xrefEntryUncompressed;
+ break;
+ case 2:
+ entries[i].offset = (GFileOffset)offset;
+ entries[i].gen = (int)gen;
+ entries[i].type = xrefEntryCompressed;
+ break;
+ default:
+ return gFalse;
+ }
+ if (i > last) {
+ last = i;
+ }
+ }
+ }
+
+ return gTrue;
+}
+
+// Attempt to construct an xref table for a damaged file.
+GBool XRef::constructXRef() {
+ int *streamObjNums = NULL;
+ int streamObjNumsLen = 0;
+ int streamObjNumsSize = 0;
+ int lastObjNum = -1;
+ rootNum = -1;
+ int streamEndsSize = 0;
+ streamEndsLen = 0;
+ char buf[4096 + 1];
+ str->reset();
+ GFileOffset bufPos = start;
+ char *p = buf;
+ char *end = buf;
+ GBool startOfLine = gTrue;
+ GBool eof = gFalse;
+ while (1) {
+ if (end - p < 256 && !eof) {
+ memcpy(buf, p, end - p);
+ bufPos += p - buf;
+ p = buf + (end - p);
+ int n = (int)(buf + 4096 - p);
+ int m = str->getBlock(p, n);
+ end = p + m;
+ *end = '\0';
+ p = buf;
+ eof = m < n;
+ }
+ if (p == end && eof) {
+ break;
+ }
+ if (startOfLine && !strncmp(p, "trailer", 7)) {
+ constructTrailerDict((GFileOffset)(bufPos + (p + 7 - buf)));
+ p += 7;
+ startOfLine = gFalse;
+ } else if (startOfLine && !strncmp(p, "endstream", 9)) {
+ if (streamEndsLen == streamEndsSize) {
+ streamEndsSize += 64;
+ streamEnds = (GFileOffset *)greallocn(streamEnds, streamEndsSize,
+ sizeof(GFileOffset));
+ }
+ streamEnds[streamEndsLen++] = (GFileOffset)(bufPos + (p - buf));
+ p += 9;
+ startOfLine = gFalse;
+ } else if (startOfLine && *p >= '0' && *p <= '9') {
+ p = constructObjectEntry(p, (GFileOffset)(bufPos + (p - buf)),
+ &lastObjNum);
+ startOfLine = gFalse;
+ } else if (p[0] == '>' && p[1] == '>') {
+ p += 2;
+ startOfLine = gFalse;
+ // skip any PDF whitespace except for '\0'
+ while (*p == '\t' || *p == '\n' || *p == '\x0c' ||
+ *p == '\r' || *p == ' ') {
+ if (*p == '\n' || *p == '\r') {
+ startOfLine = gTrue;
+ }
+ ++p;
+ }
+ if (!strncmp(p, "stream", 6)) {
+ if (lastObjNum >= 0) {
+ if (streamObjNumsLen == streamObjNumsSize) {
+ streamObjNumsSize += 64;
+ streamObjNums = (int *)greallocn(streamObjNums, streamObjNumsSize,
+ sizeof(int));
+ }
+ streamObjNums[streamObjNumsLen++] = lastObjNum;
+ }
+ p += 6;
+ startOfLine = gFalse;
+ }
+ } else {
+ if (*p == '\n' || *p == '\r') {
+ startOfLine = gTrue;
+ } else if (!Lexer::isSpace(*p & 0xff)) {
+ startOfLine = gFalse;
+ }
+ ++p;
+ }
+ }
+
+ // read each stream object, check for xref or object stream
+ for (int i = 0; i < streamObjNumsLen; ++i) {
+ Object obj;
+ fetch(streamObjNums[i], entries[streamObjNums[i]].gen, &obj);
+ if (obj.isStream()) {
+ Dict *dict = obj.streamGetDict();
+ Object type;
+ dict->lookup("Type", &type);
+ if (type.isName("XRef")) {
+ saveTrailerDict(dict, gTrue);
+ } else if (type.isName("ObjStm")) {
+ constructObjectStreamEntries(&obj, streamObjNums[i]);
+ }
+ type.free();
+ }
+ obj.free();
+ }
+
+ gfree(streamObjNums);
+
+ // if the file is encrypted, then any objects fetched here will be
+ // incorrect (because decryption is not yet enabled), so clear the
+ // cache to avoid that problem
+ for (int i = 0; i < xrefCacheSize; ++i) {
+ if (cache[i].num >= 0) {
+ cache[i].obj.free();
+ cache[i].num = -1;
+ }
+ }
+
+ if (rootNum < 0) {
+ error(errSyntaxError, -1, "Couldn't find trailer dictionary");
+ return gFalse;
+ }
+ return gTrue;
+}
+
+// Attempt to construct a trailer dict at [pos] in the stream.
+void XRef::constructTrailerDict(GFileOffset pos) {
+ Object newTrailerDict, obj;
+ obj.initNull();
+ Parser *parser =
+ new Parser(NULL,
+ new Lexer(NULL,
+ str->makeSubStream(pos, gFalse, 0, &obj)),
+ gFalse);
+ parser->getObj(&newTrailerDict);
+ if (newTrailerDict.isDict()) {
+ saveTrailerDict(newTrailerDict.getDict(), gFalse);
+ }
+ newTrailerDict.free();
+ delete parser;
+}
+
+// If [dict] "looks like" a trailer dict (i.e., has a Root entry),
+// save it as the trailer dict.
+void XRef::saveTrailerDict(Dict *dict, GBool isXRefStream) {
+ Object obj;
+ dict->lookupNF("Root", &obj);
+ if (obj.isRef()) {
+ int newRootNum = obj.getRefNum();
+ // the xref stream scanning code runs after all objects are found,
+ // so we can check for a valid root object number at that point
+ if (!isXRefStream || newRootNum <= last) {
+ rootNum = newRootNum;
+ rootGen = obj.getRefGen();
+ if (!trailerDict.isNone()) {
+ trailerDict.free();
+ }
+ trailerDict.initDict(dict);
+ }
+ }
+ obj.free();
+}
+
+// Look for an object header ("nnn ggg obj") at [p]. The first
+// character at *[p] is a digit. [pos] is the position of *[p].
+char *XRef::constructObjectEntry(char *p, GFileOffset pos, int *objNum) {
+ // we look for non-end-of-line space characters here, to deal with
+ // situations like:
+ // nnn <-- garbage digits on a line
+ // nnn nnn obj <-- actual object
+ // and we also ignore '\0' (because it's used to terminate the
+ // buffer in this damage-scanning code)
+ int num = 0;
+ do {
+ num = (num * 10) + (*p - '0');
+ ++p;
+ } while (*p >= '0' && *p <= '9' && num < 100000000);
+ if (*p != '\t' && *p != '\x0c' && *p != ' ') {
+ return p;
+ }
+ do {
+ ++p;
+ } while (*p == '\t' || *p == '\x0c' || *p == ' ');
+ if (!(*p >= '0' && *p <= '9')) {
+ return p;
+ }
+ int gen = 0;
+ do {
+ gen = (gen * 10) + (*p - '0');
+ ++p;
+ } while (*p >= '0' && *p <= '9' && gen < 100000000);
+ if (*p != '\t' && *p != '\x0c' && *p != ' ') {
+ return p;
+ }
+ do {
+ ++p;
+ } while (*p == '\t' || *p == '\x0c' || *p == ' ');
+ if (strncmp(p, "obj", 3)) {
+ return p;
+ }
+
+ if (constructXRefEntry(num, gen, pos - start, xrefEntryUncompressed)) {
+ *objNum = num;
+ }
+
+ return p;
+}
+
+// Read the header from an object stream, and add xref entries for all
+// of its objects.
+void XRef::constructObjectStreamEntries(Object *objStr, int objStrObjNum) {
+ Object obj1, obj2;
+
+ // get the object count
+ if (!objStr->streamGetDict()->lookup("N", &obj1)->isInt()) {
+ obj1.free();
+ return;
+ }
+ int nObjects = obj1.getInt();
+ obj1.free();
+ if (nObjects <= 0 || nObjects > 1000000) {
+ return;
+ }
+
+ // parse the header: object numbers and offsets
+ Parser *parser = new Parser(NULL,
+ new Lexer(NULL, objStr->getStream()->copy()),
+ gFalse);
+ for (int i = 0; i < nObjects; ++i) {
+ parser->getObj(&obj1, gTrue);
+ parser->getObj(&obj2, gTrue);
+ if (obj1.isInt() && obj2.isInt()) {
+ int num = obj1.getInt();
+ if (num >= 0 && num < 1000000) {
+ constructXRefEntry(num, i, objStrObjNum, xrefEntryCompressed);
+ }
+ }
+ obj2.free();
+ obj1.free();
+ }
+ delete parser;
+}
+
+GBool XRef::constructXRefEntry(int num, int gen, GFileOffset pos,
+ XRefEntryType type) {
+ if (num >= size) {
+ int newSize = (num + 1 + 255) & ~255;
+ if (newSize < 0) {
+ return gFalse;
+ }
+ entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry));
+ for (int i = size; i < newSize; ++i) {
+ entries[i].offset = (GFileOffset)-1;
+ entries[i].type = xrefEntryFree;
+ }
+ size = newSize;
+ }
+
+ if (entries[num].type == xrefEntryFree ||
+ gen >= entries[num].gen) {
+ entries[num].offset = pos;
+ entries[num].gen = gen;
+ entries[num].type = type;
+ if (num > last) {
+ last = num;
+ }
+ }
+
+ return gTrue;
+}
+
+void XRef::setEncryption(int permFlagsA, GBool ownerPasswordOkA,
+ Guchar *fileKeyA, int keyLengthA, int encVersionA,
+ CryptAlgorithm encAlgorithmA) {
+ int i;
+
+ encrypted = gTrue;
+ permFlags = permFlagsA;
+ ownerPasswordOk = ownerPasswordOkA;
+ if (keyLengthA <= 32) {
+ keyLength = keyLengthA;
+ } else {
+ keyLength = 32;
+ }
+ for (i = 0; i < keyLength; ++i) {
+ fileKey[i] = fileKeyA[i];
+ }
+ encVersion = encVersionA;
+ encAlgorithm = encAlgorithmA;
+}
+
+GBool XRef::getEncryption(int *permFlagsA, GBool *ownerPasswordOkA,
+ int *keyLengthA, int *encVersionA,
+ CryptAlgorithm *encAlgorithmA) {
+ if (!encrypted) {
+ return gFalse;
+ }
+ *permFlagsA = permFlags;
+ *ownerPasswordOkA = ownerPasswordOk;
+ *keyLengthA = keyLength;
+ *encVersionA = encVersion;
+ *encAlgorithmA = encAlgorithm;
+ return gTrue;
+}
+
+GBool XRef::okToPrint(GBool ignoreOwnerPW) {
+ return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permPrint);
+}
+
+GBool XRef::okToChange(GBool ignoreOwnerPW) {
+ return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permChange);
+}
+
+GBool XRef::okToCopy(GBool ignoreOwnerPW) {
+ return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permCopy);
+}
+
+GBool XRef::okToAddNotes(GBool ignoreOwnerPW) {
+ return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permNotes);
+}
+
+Object *XRef::fetch(int num, int gen, Object *obj, int recursion) {
+ XRefEntry *e;
+ Parser *parser;
+ Object obj1, obj2, obj3;
+ XRefCacheEntry tmp;
+ int i, j;
+
+ // check for bogus ref - this can happen in corrupted PDF files
+ if (num < 0 || num >= size) {
+ goto err;
+ }
+
+ // check the cache
+#if MULTITHREADED
+ gLockMutex(&cacheMutex);
+#endif
+ if (cache[0].num == num && cache[0].gen == gen) {
+ cache[0].obj.copy(obj);
+#if MULTITHREADED
+ gUnlockMutex(&cacheMutex);
+#endif
+ return obj;
+ }
+ for (i = 1; i < xrefCacheSize; ++i) {
+ if (cache[i].num == num && cache[i].gen == gen) {
+ tmp = cache[i];
+ for (j = i; j > 0; --j) {
+ cache[j] = cache[j - 1];
+ }
+ cache[0] = tmp;
+ cache[0].obj.copy(obj);
+#if MULTITHREADED
+ gUnlockMutex(&cacheMutex);
+#endif
+ return obj;
+ }
+ }
+#if MULTITHREADED
+ gUnlockMutex(&cacheMutex);
+#endif
+
+ e = &entries[num];
+ switch (e->type) {
+
+ case xrefEntryUncompressed:
+ if (e->gen != gen) {
+ goto err;
+ }
+ obj1.initNull();
+ parser = new Parser(this,
+ new Lexer(this,
+ str->makeSubStream(start + e->offset, gFalse, 0, &obj1)),
+ gTrue);
+ parser->getObj(&obj1, gTrue);
+ parser->getObj(&obj2, gTrue);
+ parser->getObj(&obj3, gTrue);
+ if (!obj1.isInt() || obj1.getInt() != num ||
+ !obj2.isInt() || obj2.getInt() != gen ||
+ !obj3.isCmd("obj")) {
+ obj1.free();
+ obj2.free();
+ obj3.free();
+ delete parser;
+ goto err;
+ }
+ parser->getObj(obj, gFalse, encrypted ? fileKey : (Guchar *)NULL,
+ encAlgorithm, keyLength, num, gen, recursion);
+ obj1.free();
+ obj2.free();
+ obj3.free();
+ delete parser;
+ break;
+
+ case xrefEntryCompressed:
+#if 0 // Adobe apparently ignores the generation number on compressed objects
+ if (gen != 0) {
+ goto err;
+ }
+#endif
+ if (e->offset >= (GFileOffset)size ||
+ entries[e->offset].type != xrefEntryUncompressed) {
+ error(errSyntaxError, -1, "Invalid object stream");
+ goto err;
+ }
+ if (!getObjectStreamObject((int)e->offset, e->gen, num, obj)) {
+ goto err;
+ }
+ break;
+
+ default:
+ goto err;
+ }
+
+ // put the new object in the cache, throwing away the oldest object
+ // currently in the cache
+#if MULTITHREADED
+ gLockMutex(&cacheMutex);
+#endif
+ if (cache[xrefCacheSize - 1].num >= 0) {
+ cache[xrefCacheSize - 1].obj.free();
+ }
+ for (i = xrefCacheSize - 1; i > 0; --i) {
+ cache[i] = cache[i - 1];
+ }
+ cache[0].num = num;
+ cache[0].gen = gen;
+ obj->copy(&cache[0].obj);
+#if MULTITHREADED
+ gUnlockMutex(&cacheMutex);
+#endif
+
+ return obj;
+
+ err:
+ return obj->initNull();
+}
+
+GBool XRef::getObjectStreamObject(int objStrNum, int objIdx,
+ int objNum, Object *obj) {
+ ObjectStream *objStr;
+
+#if MULTITHREADED
+ gLockMutex(&objStrsMutex);
+#endif
+ if (!(objStr = getObjectStream(objStrNum))) {
+#if MULTITHREADED
+ gUnlockMutex(&objStrsMutex);
+#endif
+ return gFalse;
+ }
+ cleanObjectStreamCache();
+ objStr->getObject(objIdx, objNum, obj);
+#if MULTITHREADED
+ gUnlockMutex(&objStrsMutex);
+#endif
+ return gTrue;
+}
+
+// NB: objStrsMutex must be locked when calling this function.
+ObjectStream *XRef::getObjectStream(int objStrNum) {
+ ObjectStream *objStr;
+ int i, j;
+
+ // check the MRU entry in the cache
+ if (objStrs[0] && objStrs[0]->getObjStrNum() == objStrNum) {
+ objStr = objStrs[0];
+ objStrLastUse[0] = objStrTime++;
+ return objStr;
+ }
+
+ // check the rest of the cache
+ for (i = 1; i < objStrCacheLength; ++i) {
+ if (objStrs[i] && objStrs[i]->getObjStrNum() == objStrNum) {
+ objStr = objStrs[i];
+ for (j = i; j > 0; --j) {
+ objStrs[j] = objStrs[j - 1];
+ objStrLastUse[j] = objStrLastUse[j - 1];
+ }
+ objStrs[0] = objStr;
+ objStrLastUse[0] = objStrTime++;
+ return objStr;
+ }
+ }
+
+ // load a new ObjectStream
+ objStr = new ObjectStream(this, objStrNum);
+ if (!objStr->isOk()) {
+ delete objStr;
+ return NULL;
+ }
+
+ // add to the cache
+ if (objStrCacheLength == objStrCacheSize) {
+ delete objStrs[objStrCacheSize - 1];
+ --objStrCacheLength;
+ }
+ for (j = objStrCacheLength; j > 0; --j) {
+ objStrs[j] = objStrs[j - 1];
+ objStrLastUse[j] = objStrLastUse[j - 1];
+ }
+ ++objStrCacheLength;
+ objStrs[0] = objStr;
+ objStrLastUse[0] = objStrTime++;
+
+ return objStr;
+}
+
+// If the oldest (least recently used) entry in the object stream
+// cache is more than objStrCacheTimeout accesses old (hasn't been
+// used in the last objStrCacheTimeout accesses), eject it from the
+// cache.
+void XRef::cleanObjectStreamCache() {
+ // NB: objStrTime and objStrLastUse[] are unsigned ints, so the
+ // mod-2^32 arithmetic makes the subtraction work out, even if the
+ // time wraps around.
+ if (objStrCacheLength > 1 &&
+ objStrTime - objStrLastUse[objStrCacheLength - 1]
+ > objStrCacheTimeout) {
+ delete objStrs[objStrCacheLength - 1];
+ objStrs[objStrCacheLength - 1] = NULL;
+ --objStrCacheLength;
+ }
+}
+
+Object *XRef::getDocInfo(Object *obj) {
+ return trailerDict.dictLookup("Info", obj);
+}
+
+// Added for the pdftex project.
+Object *XRef::getDocInfoNF(Object *obj) {
+ return trailerDict.dictLookupNF("Info", obj);
+}
+
+GBool XRef::getStreamEnd(GFileOffset streamStart, GFileOffset *streamEnd) {
+ int a, b, m;
+
+ if (streamEndsLen == 0 ||
+ streamStart > streamEnds[streamEndsLen - 1]) {
+ return gFalse;
+ }
+
+ a = -1;
+ b = streamEndsLen - 1;
+ // invariant: streamEnds[a] < streamStart <= streamEnds[b]
+ while (b - a > 1) {
+ m = (a + b) / 2;
+ if (streamStart <= streamEnds[m]) {
+ b = m;
+ } else {
+ a = m;
+ }
+ }
+ *streamEnd = streamEnds[b];
+ return gTrue;
+}
+
+GFileOffset XRef::strToFileOffset(char *s) {
+ GFileOffset x, d;
+ char *p;
+
+ x = 0;
+ for (p = s; *p && isdigit(*p & 0xff); ++p) {
+ d = *p - '0';
+ if (x > (GFILEOFFSET_MAX - d) / 10) {
+ break;
+ }
+ x = 10 * x + d;
+ }
+ return x;
+}