xpdf-no-select-disableHEAD master

author: Calvin Morrison <calvin@pobox.com> 2023-04-05 14:13:39 -0400
committer: Calvin Morrison <calvin@pobox.com> 2023-04-05 14:13:39 -0400
commit: 835e373b3eeaabcd0621ed6798ab500f37982fae (patch)
tree: dfa16b0e2e1b4956b38f693220eac4e607802133 /xpdf/XRef.cc
1 files changed, 1431 insertions, 0 deletions
diff --git a/xpdf/XRef.cc b/xpdf/XRef.cc
new file mode 100644
index 0000000..51af86d
--- /dev/null
+++ b/xpdf/XRef.cc
@@ -0,0 +1,1431 @@
+//========================================================================
+//
+// XRef.cc
+//
+// Copyright 1996-2003 Glyph & Cog, LLC
+//
+//========================================================================
+
+#include <aconf.h>
+
+#ifdef USE_GCC_PRAGMAS
+#pragma implementation
+#endif
+
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+#include "gmem.h"
+#include "gmempp.h"
+#include "gfile.h"
+#include "Object.h"
+#include "Stream.h"
+#include "Lexer.h"
+#include "Parser.h"
+#include "Dict.h"
+#include "Error.h"
+#include "ErrorCodes.h"
+#include "XRef.h"
+
+//------------------------------------------------------------------------
+
+#define xrefSearchSize 1024	// read this many bytes at end of file
+				//   to look for 'startxref'
+
+//------------------------------------------------------------------------
+// Permission bits
+//------------------------------------------------------------------------
+
+#define permPrint    (1<<2)
+#define permChange   (1<<3)
+#define permCopy     (1<<4)
+#define permNotes    (1<<5)
+#define defPermFlags 0xfffc
+
+//------------------------------------------------------------------------
+// XRefPosSet
+//------------------------------------------------------------------------
+
+class XRefPosSet {
+public:
+
+  XRefPosSet();
+  ~XRefPosSet();
+  void add(GFileOffset pos);
+  GBool check(GFileOffset pos);
+  int getLength() { return len; }
+  GFileOffset get(int idx) { return tab[idx]; }
+
+private:
+
+  int find(GFileOffset pos);
+
+  GFileOffset *tab;
+  int size;
+  int len;
+};
+
+XRefPosSet::XRefPosSet() {
+  size = 16;
+  len = 0;
+  tab = (GFileOffset *)gmallocn(size, sizeof(GFileOffset));
+}
+
+XRefPosSet::~XRefPosSet() {
+  gfree(tab);
+}
+
+void XRefPosSet::add(GFileOffset pos) {
+  int i;
+
+  i = find(pos);
+  if (i < len && tab[i] == pos) {
+    return;
+  }
+  if (len == size) {
+    if (size > INT_MAX / 2) {
+      gMemError("Integer overflow in XRefPosSet::add()");
+    }
+    size *= 2;
+    tab = (GFileOffset *)greallocn(tab, size, sizeof(GFileOffset));
+  }
+  if (i < len) {
+    memmove(&tab[i + 1], &tab[i], (len - i) * sizeof(GFileOffset));
+  }
+  tab[i] = pos;
+  ++len;
+}
+
+GBool XRefPosSet::check(GFileOffset pos) {
+  int i;
+
+  i = find(pos);
+  return i < len && tab[i] == pos;
+}
+
+int XRefPosSet::find(GFileOffset pos) {
+  int a, b, m;
+
+  a = - 1;
+  b = len;
+  // invariant: tab[a] < pos < tab[b]
+  while (b - a > 1) {
+    m = (a + b) / 2;
+    if (tab[m] < pos) {
+      a = m;
+    } else if (tab[m] > pos) {
+      b = m;
+    } else {
+      return m;
+    }
+  }
+  return b;
+}
+
+//------------------------------------------------------------------------
+// ObjectStream
+//------------------------------------------------------------------------
+
+class ObjectStream {
+public:
+
+  // Create an object stream, using object number <objStrNum>,
+  // generation 0.
+  ObjectStream(XRef *xref, int objStrNumA);
+
+  GBool isOk() { return ok; }
+
+  ~ObjectStream();
+
+  // Return the object number of this object stream.
+  int getObjStrNum() { return objStrNum; }
+
+  // Get the <objIdx>th object from this stream, which should be
+  // object number <objNum>, generation 0.
+  Object *getObject(int objIdx, int objNum, Object *obj);
+
+private:
+
+  int objStrNum;		// object number of the object stream
+  int nObjects;			// number of objects in the stream
+  Object *objs;			// the objects (length = nObjects)
+  int *objNums;			// the object numbers (length = nObjects)
+  GBool ok;
+};
+
+ObjectStream::ObjectStream(XRef *xref, int objStrNumA) {
+  Stream *str;
+  Lexer *lexer;
+  Parser *parser;
+  int *offsets;
+  Object objStr, obj1, obj2;
+  int first, i;
+
+  objStrNum = objStrNumA;
+  nObjects = 0;
+  objs = NULL;
+  objNums = NULL;
+  ok = gFalse;
+
+  if (!xref->fetch(objStrNum, 0, &objStr)->isStream()) {
+    goto err1;
+  }
+
+  if (!objStr.streamGetDict()->lookup("N", &obj1)->isInt()) {
+    obj1.free();
+    goto err1;
+  }
+  nObjects = obj1.getInt();
+  obj1.free();
+  if (nObjects <= 0) {
+    goto err1;
+  }
+
+  if (!objStr.streamGetDict()->lookup("First", &obj1)->isInt()) {
+    obj1.free();
+    goto err1;
+  }
+  first = obj1.getInt();
+  obj1.free();
+  if (first < 0) {
+    goto err1;
+  }
+
+  // this is an arbitrary limit to avoid integer overflow problems
+  // in the 'new Object[nObjects]' call (Acrobat apparently limits
+  // object streams to 100-200 objects)
+  if (nObjects > 1000000) {
+    error(errSyntaxError, -1, "Too many objects in an object stream");
+    goto err1;
+  }
+  objs = new Object[nObjects];
+  objNums = (int *)gmallocn(nObjects, sizeof(int));
+  offsets = (int *)gmallocn(nObjects, sizeof(int));
+
+  // parse the header: object numbers and offsets
+  objStr.streamReset();
+  obj1.initNull();
+  str = new EmbedStream(objStr.getStream(), &obj1, gTrue, first);
+  lexer = new Lexer(xref, str);
+  parser = new Parser(xref, lexer, gFalse);
+  for (i = 0; i < nObjects; ++i) {
+    parser->getObj(&obj1, gTrue);
+    parser->getObj(&obj2, gTrue);
+    if (!obj1.isInt() || !obj2.isInt()) {
+      obj1.free();
+      obj2.free();
+      delete parser;
+      gfree(offsets);
+      goto err2;
+    }
+    objNums[i] = obj1.getInt();
+    offsets[i] = obj2.getInt();
+    obj1.free();
+    obj2.free();
+    if (objNums[i] < 0 || offsets[i] < 0 ||
+	(i > 0 && offsets[i] < offsets[i-1])) {
+      delete parser;
+      gfree(offsets);
+      goto err2;
+    }
+  }
+  lexer->skipToEOF();
+  delete parser;
+
+  // skip to the first object - this shouldn't be necessary because
+  // the First key is supposed to be equal to offsets[0], but just in
+  // case...
+  if (first < offsets[0]) {
+    objStr.getStream()->discardChars(offsets[0] - first);
+  }
+
+  // parse the objects
+  for (i = 0; i < nObjects; ++i) {
+    obj1.initNull();
+    if (i == nObjects - 1) {
+      str = new EmbedStream(objStr.getStream(), &obj1, gFalse, 0);
+    } else {
+      str = new EmbedStream(objStr.getStream(), &obj1, gTrue,
+			    offsets[i+1] - offsets[i]);
+    }
+    lexer = new Lexer(xref, str);
+    parser = new Parser(xref, lexer, gFalse);
+    parser->getObj(&objs[i]);
+    lexer->skipToEOF();
+    delete parser;
+  }
+
+  gfree(offsets);
+  ok = gTrue;
+
+ err2:
+  objStr.streamClose();
+ err1:
+  objStr.free();
+}
+
+ObjectStream::~ObjectStream() {
+  int i;
+
+  if (objs) {
+    for (i = 0; i < nObjects; ++i) {
+      objs[i].free();
+    }
+    delete[] objs;
+  }
+  gfree(objNums);
+}
+
+Object *ObjectStream::getObject(int objIdx, int objNum, Object *obj) {
+  if (objIdx < 0 || objIdx >= nObjects || objNum != objNums[objIdx]) {
+    obj->initNull();
+  } else {
+    objs[objIdx].copy(obj);
+  }
+  return obj;
+}
+
+//------------------------------------------------------------------------
+// XRef
+//------------------------------------------------------------------------
+
+XRef::XRef(BaseStream *strA, GBool repair) {
+  GFileOffset pos;
+  Object obj;
+  XRefPosSet *posSet;
+  int i;
+
+  ok = gTrue;
+  errCode = errNone;
+  repaired = gFalse;
+  size = 0;
+  last = -1;
+  entries = NULL;
+  lastStartxrefPos = 0;
+  xrefTablePos = NULL;
+  xrefTablePosLen = 0;
+  streamEnds = NULL;
+  streamEndsLen = 0;
+  for (i = 0; i < objStrCacheSize; ++i) {
+    objStrs[i] = NULL;
+    objStrLastUse[i] = 0;
+  }
+  objStrCacheLength = 0;
+  objStrTime = 0;
+
+  encrypted = gFalse;
+  permFlags = defPermFlags;
+  ownerPasswordOk = gFalse;
+
+  for (i = 0; i < xrefCacheSize; ++i) {
+    cache[i].num = -1;
+  }
+
+#if MULTITHREADED
+  gInitMutex(&objStrsMutex);
+  gInitMutex(&cacheMutex);
+#endif
+
+  str = strA;
+  start = str->getStart();
+
+  // if the 'repair' flag is set, try to reconstruct the xref table
+  if (repair) {
+    if (!(ok = constructXRef())) {
+      errCode = errDamaged;
+      return;
+    }
+    repaired = gTrue;
+
+  // if the 'repair' flag is not set, read the xref table
+  } else {
+
+    // read the trailer
+    pos = getStartXref();
+    if (pos == 0) {
+      errCode = errDamaged;
+      ok = gFalse;
+      return;
+    }
+
+    // read the xref table
+    posSet = new XRefPosSet();
+    while (readXRef(&pos, posSet, gFalse)) ;
+    xrefTablePosLen = posSet->getLength();
+    xrefTablePos = (GFileOffset *)gmallocn(xrefTablePosLen,
+					   sizeof(GFileOffset));
+    for (i = 0; i < xrefTablePosLen; ++i)  {
+      xrefTablePos[i] = posSet->get(i);
+    }
+    delete posSet;
+    if (!ok) {
+      errCode = errDamaged;
+      return;
+    }
+  }
+
+  // get the root dictionary (catalog) object
+  trailerDict.dictLookupNF("Root", &obj);
+  if (obj.isRef()) {
+    rootNum = obj.getRefNum();
+    rootGen = obj.getRefGen();
+    obj.free();
+  } else {
+    obj.free();
+    if (!(ok = constructXRef())) {
+      errCode = errDamaged;
+      return;
+    }
+  }
+
+  // now set the trailer dictionary's xref pointer so we can fetch
+  // indirect objects from it
+  trailerDict.getDict()->setXRef(this);
+}
+
+XRef::~XRef() {
+  int i;
+
+  for (i = 0; i < xrefCacheSize; ++i) {
+    if (cache[i].num >= 0) {
+      cache[i].obj.free();
+    }
+  }
+  gfree(entries);
+  trailerDict.free();
+  if (xrefTablePos) {
+    gfree(xrefTablePos);
+  }
+  if (streamEnds) {
+    gfree(streamEnds);
+  }
+  for (i = 0; i < objStrCacheSize; ++i) {
+    if (objStrs[i]) {
+      delete objStrs[i];
+    }
+  }
+#if MULTITHREADED
+  gDestroyMutex(&objStrsMutex);
+  gDestroyMutex(&cacheMutex);
+#endif
+}
+
+// Read the 'startxref' position.
+GFileOffset XRef::getStartXref() {
+  char buf[xrefSearchSize+1];
+  char *p;
+  int n, i;
+
+  // read last xrefSearchSize bytes
+  str->setPos(xrefSearchSize, -1);
+  n = str->getBlock(buf, xrefSearchSize);
+  buf[n] = '\0';
+
+  // find startxref
+  for (i = n - 9; i >= 0; --i) {
+    if (!strncmp(&buf[i], "startxref", 9)) {
+      break;
+    }
+  }
+  if (i < 0) {
+    return 0;
+  }
+  for (p = &buf[i+9]; isspace(*p & 0xff); ++p) ;
+  lastXRefPos = strToFileOffset(p);
+  lastStartxrefPos = str->getPos() - n + i;
+
+  return lastXRefPos;
+}
+
+// Read one xref table section.  Also reads the associated trailer
+// dictionary, and returns the prev pointer (if any).  The [hybrid]
+// flag is true when following the XRefStm link in a hybrid-reference
+// file.
+GBool XRef::readXRef(GFileOffset *pos, XRefPosSet *posSet, GBool hybrid) {
+  Parser *parser;
+  Object obj;
+  GBool more;
+  char buf[100];
+  int n, i;
+
+  // check for a loop in the xref tables
+  if (posSet->check(*pos)) {
+    error(errSyntaxWarning, -1, "Infinite loop in xref table");
+    return gFalse;
+  }
+  posSet->add(*pos);
+
+  // the xref data should either be "xref ..." (for an xref table) or
+  // "nn gg obj << ... >> stream ..." (for an xref stream); possibly
+  // preceded by whitespace
+  str->setPos(start + *pos);
+  n = str->getBlock(buf, 100);
+  for (i = 0; i < n && Lexer::isSpace(buf[i]); ++i) ;
+
+  // parse an old-style xref table
+  if (!hybrid &&
+      i + 4 < n &&
+      buf[i] == 'x' && buf[i+1] == 'r' && buf[i+2] == 'e' && buf[i+3] == 'f' &&
+      Lexer::isSpace(buf[i+4])) {
+    more = readXRefTable(pos, i + 5, posSet);
+
+  // parse an xref stream
+  } else {
+    obj.initNull();
+    parser = new Parser(NULL,
+	       new Lexer(NULL,
+		 str->makeSubStream(start + *pos, gFalse, 0, &obj)),
+	       gTrue);
+    if (!parser->getObj(&obj, gTrue)->isInt()) {
+      goto err;
+    }
+    obj.free();
+    if (!parser->getObj(&obj, gTrue)->isInt()) {
+      goto err;
+    }
+    obj.free();
+    if (!parser->getObj(&obj, gTrue)->isCmd("obj")) {
+      goto err;
+    }
+    obj.free();
+    if (!parser->getObj(&obj)->isStream()) {
+      goto err;
+    }
+    more = readXRefStream(obj.getStream(), pos, hybrid);
+    obj.free();
+    delete parser;
+  }
+
+  return more;
+
+ err:
+  obj.free();
+  delete parser;
+  ok = gFalse;
+  return gFalse;
+}
+
+GBool XRef::readXRefTable(GFileOffset *pos, int offset, XRefPosSet *posSet) {
+  XRefEntry entry;
+  Parser *parser;
+  Object obj, obj2;
+  char buf[6];
+  GFileOffset off, pos2;
+  GBool more;
+  int first, n, digit, newSize, gen, i, c;
+
+  str->setPos(start + *pos + offset);
+
+  while (1) {
+    do {
+      c = str->getChar();
+    } while (Lexer::isSpace(c));
+    if (c == 't') {
+      if (str->getBlock(buf, 6) != 6 || memcmp(buf, "railer", 6)) {
+	goto err1;
+      }
+      break;
+    }
+    if (c < '0' || c > '9') {
+      goto err1;
+    }
+    first = 0;
+    do {
+      digit = c - '0';
+      if (first > (INT_MAX - digit) / 10) {
+	goto err1;
+      }
+      first = (first * 10) + digit;
+      c = str->getChar();
+    } while (c >= '0' && c <= '9');
+    if (!Lexer::isSpace(c)) {
+      goto err1;
+    }
+    do {
+      c = str->getChar();
+    } while (Lexer::isSpace(c));
+    n = 0;
+    do {
+      digit = c - '0';
+      if (n > (INT_MAX - digit) / 10) {
+	goto err1;
+      }
+      n = (n * 10) + digit;
+      c = str->getChar();
+    } while (c >= '0' && c <= '9');
+    if (!Lexer::isSpace(c)) {
+      goto err1;
+    }
+    if (first > INT_MAX - n) {
+      goto err1;
+    }
+    if (first + n > size) {
+      for (newSize = size ? 2 * size : 1024;
+	   first + n > newSize && newSize > 0;
+	   newSize <<= 1) ;
+      if (newSize < 0) {
+	goto err1;
+      }
+      entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry));
+      for (i = size; i < newSize; ++i) {
+	entries[i].offset = (GFileOffset)-1;
+	entries[i].type = xrefEntryFree;
+      }
+      size = newSize;
+    }
+    for (i = first; i < first + n; ++i) {
+      do {
+	c = str->getChar();
+      } while (Lexer::isSpace(c));
+      off = 0;
+      do {
+	off = (off * 10) + (c - '0');
+	c = str->getChar();
+      } while (c >= '0' && c <= '9');
+      if (!Lexer::isSpace(c)) {
+	goto err1;
+      }
+      entry.offset = off;
+      do {
+	c = str->getChar();
+      } while (Lexer::isSpace(c));
+      gen = 0;
+      do {
+	gen = (gen * 10) + (c - '0');
+	c = str->getChar();
+      } while (c >= '0' && c <= '9');
+      if (!Lexer::isSpace(c)) {
+	goto err1;
+      }
+      entry.gen = gen;
+      do {
+	c = str->getChar();
+      } while (Lexer::isSpace(c));
+      if (c == 'n') {
+	entry.type = xrefEntryUncompressed;
+      } else if (c == 'f') {
+	entry.type = xrefEntryFree;
+      } else {
+	goto err1;
+      }
+      c = str->getChar();
+      if (!Lexer::isSpace(c)) {
+	goto err1;
+      }
+      if (entries[i].offset == (GFileOffset)-1) {
+	entries[i] = entry;
+	// PDF files of patents from the IBM Intellectual Property
+	// Network have a bug: the xref table claims to start at 1
+	// instead of 0.
+	if (i == 1 && first == 1 &&
+	    entries[1].offset == 0 && entries[1].gen == 65535 &&
+	    entries[1].type == xrefEntryFree) {
+	  i = first = 0;
+	  entries[0] = entries[1];
+	  entries[1].offset = (GFileOffset)-1;
+	}
+	if (i > last) {
+	  last = i;
+	}
+      }
+    }
+  }
+
+  // read the trailer dictionary
+  obj.initNull();
+  parser = new Parser(NULL,
+	     new Lexer(NULL,
+	       str->makeSubStream(str->getPos(), gFalse, 0, &obj)),
+	     gTrue);
+  parser->getObj(&obj);
+  delete parser;
+  if (!obj.isDict()) {
+    obj.free();
+    goto err1;
+  }
+
+  // get the 'Prev' pointer
+  //~ this can be a 64-bit int (?)
+  obj.getDict()->lookupNF("Prev", &obj2);
+  if (obj2.isInt()) {
+    *pos = (GFileOffset)(Guint)obj2.getInt();
+    more = gTrue;
+  } else if (obj2.isRef()) {
+    // certain buggy PDF generators generate "/Prev NNN 0 R" instead
+    // of "/Prev NNN"
+    *pos = (GFileOffset)(Guint)obj2.getRefNum();
+    more = gTrue;
+  } else {
+    more = gFalse;
+  }
+  obj2.free();
+
+  // save the first trailer dictionary
+  if (trailerDict.isNone()) {
+    obj.copy(&trailerDict);
+  }
+
+  // check for an 'XRefStm' key
+  //~ this can be a 64-bit int (?)
+  if (obj.getDict()->lookup("XRefStm", &obj2)->isInt()) {
+    pos2 = (GFileOffset)(Guint)obj2.getInt();
+    readXRef(&pos2, posSet, gTrue);
+    if (!ok) {
+      obj2.free();
+      obj.free();
+      goto err1;
+    }
+  }
+  obj2.free();
+
+  obj.free();
+  return more;
+
+ err1:
+  ok = gFalse;
+  return gFalse;
+}
+
+GBool XRef::readXRefStream(Stream *xrefStr, GFileOffset *pos, GBool hybrid) {
+  Dict *dict;
+  int w[3];
+  GBool more;
+  Object obj, obj2, idx;
+  int newSize, first, n, i;
+
+  dict = xrefStr->getDict();
+
+  if (!dict->lookupNF("Size", &obj)->isInt()) {
+    goto err1;
+  }
+  newSize = obj.getInt();
+  obj.free();
+  if (newSize < 0) {
+    goto err1;
+  }
+  if (newSize > size) {
+    entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry));
+    for (i = size; i < newSize; ++i) {
+      entries[i].offset = (GFileOffset)-1;
+      entries[i].type = xrefEntryFree;
+    }
+    size = newSize;
+  }
+
+  if (!dict->lookupNF("W", &obj)->isArray() ||
+      obj.arrayGetLength() < 3) {
+    goto err1;
+  }
+  for (i = 0; i < 3; ++i) {
+    if (!obj.arrayGet(i, &obj2)->isInt()) {
+      obj2.free();
+      goto err1;
+    }
+    w[i] = obj2.getInt();
+    obj2.free();
+  }
+  obj.free();
+  if (w[0] < 0 || w[0] > 8 ||
+      w[1] < 0 || w[1] > 8 ||
+      w[2] < 0 || w[2] > 8) {
+    goto err0;
+  }
+
+  xrefStr->reset();
+  dict->lookupNF("Index", &idx);
+  if (idx.isArray()) {
+    for (i = 0; i+1 < idx.arrayGetLength(); i += 2) {
+      if (!idx.arrayGet(i, &obj)->isInt()) {
+	idx.free();
+	goto err1;
+      }
+      first = obj.getInt();
+      obj.free();
+      if (!idx.arrayGet(i+1, &obj)->isInt()) {
+	idx.free();
+	goto err1;
+      }
+      n = obj.getInt();
+      obj.free();
+      if (first < 0 || n < 0 ||
+	  !readXRefStreamSection(xrefStr, w, first, n)) {
+	idx.free();
+	goto err0;
+      }
+    }
+  } else {
+    if (!readXRefStreamSection(xrefStr, w, 0, newSize)) {
+      idx.free();
+      goto err0;
+    }
+  }
+  idx.free();
+
+  //~ this can be a 64-bit int (?)
+  dict->lookupNF("Prev", &obj);
+  if (obj.isInt()) {
+    *pos = (GFileOffset)(Guint)obj.getInt();
+    more = gTrue;
+  } else {
+    more = gFalse;
+  }
+  obj.free();
+  if (trailerDict.isNone()) {
+    trailerDict.initDict(dict);
+  }
+
+  return more;
+
+ err1:
+  obj.free();
+ err0:
+  ok = gFalse;
+  return gFalse;
+}
+
+GBool XRef::readXRefStreamSection(Stream *xrefStr, int *w, int first, int n) {
+  long long type, gen, offset;
+  int c, newSize, i, j;
+
+  if (first + n < 0) {
+    return gFalse;
+  }
+  if (first + n > size) {
+    for (newSize = size ? 2 * size : 1024;
+	 first + n > newSize && newSize > 0;
+	 newSize <<= 1) ;
+    if (newSize < 0) {
+      return gFalse;
+    }
+    entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry));
+    for (i = size; i < newSize; ++i) {
+      entries[i].offset = (GFileOffset)-1;
+      entries[i].type = xrefEntryFree;
+    }
+    size = newSize;
+  }
+  for (i = first; i < first + n; ++i) {
+    if (w[0] == 0) {
+      type = 1;
+    } else {
+      for (type = 0, j = 0; j < w[0]; ++j) {
+	if ((c = xrefStr->getChar()) == EOF) {
+	  return gFalse;
+	}
+	type = (type << 8) + c;
+      }
+    }
+    for (offset = 0, j = 0; j < w[1]; ++j) {
+      if ((c = xrefStr->getChar()) == EOF) {
+	return gFalse;
+      }
+      offset = (offset << 8) + c;
+    }
+    if (offset < 0 || offset > GFILEOFFSET_MAX) {
+      return gFalse;
+    }
+    for (gen = 0, j = 0; j < w[2]; ++j) {
+      if ((c = xrefStr->getChar()) == EOF) {
+	return gFalse;
+      }
+      gen = (gen << 8) + c;
+    }
+    // some PDF generators include a free entry with gen=0xffffffff
+    if ((gen < 0 || gen > INT_MAX) && type != 0) {
+      return gFalse;
+    }
+    if (entries[i].offset == (GFileOffset)-1) {
+      switch (type) {
+      case 0:
+	entries[i].offset = (GFileOffset)offset;
+	entries[i].gen = (int)gen;
+	entries[i].type = xrefEntryFree;
+	break;
+      case 1:
+	entries[i].offset = (GFileOffset)offset;
+	entries[i].gen = (int)gen;
+	entries[i].type = xrefEntryUncompressed;
+	break;
+      case 2:
+	entries[i].offset = (GFileOffset)offset;
+	entries[i].gen = (int)gen;
+	entries[i].type = xrefEntryCompressed;
+	break;
+      default:
+	return gFalse;
+      }
+      if (i > last) {
+	last = i;
+      }
+    }
+  }
+
+  return gTrue;
+}
+
+// Attempt to construct an xref table for a damaged file.
+GBool XRef::constructXRef() {
+  int *streamObjNums = NULL;
+  int streamObjNumsLen = 0;
+  int streamObjNumsSize = 0;
+  int lastObjNum = -1;
+  rootNum = -1;
+  int streamEndsSize = 0;
+  streamEndsLen = 0;
+  char buf[4096 + 1];
+  str->reset();
+  GFileOffset bufPos = start;
+  char *p = buf;
+  char *end = buf;
+  GBool startOfLine = gTrue;
+  GBool eof = gFalse;
+  while (1) {
+    if (end - p < 256 && !eof) {
+      memcpy(buf, p, end - p);
+      bufPos += p - buf;
+      p = buf + (end - p);
+      int n = (int)(buf + 4096 - p);
+      int m = str->getBlock(p, n);
+      end = p + m;
+      *end = '\0';
+      p = buf;
+      eof = m < n;
+    }
+    if (p == end && eof) {
+      break;
+    }
+    if (startOfLine && !strncmp(p, "trailer", 7)) {
+      constructTrailerDict((GFileOffset)(bufPos + (p + 7 - buf)));
+      p += 7;
+      startOfLine = gFalse;
+    } else if (startOfLine && !strncmp(p, "endstream", 9)) {
+      if (streamEndsLen == streamEndsSize) {
+	streamEndsSize += 64;
+	streamEnds = (GFileOffset *)greallocn(streamEnds, streamEndsSize,
+					      sizeof(GFileOffset));
+      }
+      streamEnds[streamEndsLen++] = (GFileOffset)(bufPos + (p - buf));
+      p += 9;
+      startOfLine = gFalse;
+    } else if (startOfLine && *p >= '0' && *p <= '9') {
+      p = constructObjectEntry(p, (GFileOffset)(bufPos + (p - buf)),
+			       &lastObjNum);
+      startOfLine = gFalse;
+    } else if (p[0] == '>' && p[1] == '>') {
+      p += 2;
+      startOfLine = gFalse;
+      // skip any PDF whitespace except for '\0'
+      while (*p == '\t' || *p == '\n' || *p == '\x0c' ||
+	     *p == '\r' || *p == ' ') {
+	if (*p == '\n' || *p == '\r') {
+	  startOfLine = gTrue;
+	}
+	++p;
+      }
+      if (!strncmp(p, "stream", 6)) {
+	if (lastObjNum >= 0) {
+	  if (streamObjNumsLen == streamObjNumsSize) {
+	    streamObjNumsSize += 64;
+	    streamObjNums = (int *)greallocn(streamObjNums, streamObjNumsSize,
+					     sizeof(int));
+	  }
+	  streamObjNums[streamObjNumsLen++] = lastObjNum;
+	}
+	p += 6;
+	startOfLine = gFalse;
+      }
+    } else {
+      if (*p == '\n' || *p == '\r') {
+	startOfLine = gTrue;
+      } else if (!Lexer::isSpace(*p & 0xff)) {
+	startOfLine = gFalse;
+      }
+      ++p;
+    }
+  }
+
+  // read each stream object, check for xref or object stream
+  for (int i = 0; i < streamObjNumsLen; ++i) {
+    Object obj;
+    fetch(streamObjNums[i], entries[streamObjNums[i]].gen, &obj);
+    if (obj.isStream()) {
+      Dict *dict = obj.streamGetDict();
+      Object type;
+      dict->lookup("Type", &type);
+      if (type.isName("XRef")) {
+	saveTrailerDict(dict, gTrue);
+      } else if (type.isName("ObjStm")) {
+	constructObjectStreamEntries(&obj, streamObjNums[i]);
+      }
+      type.free();
+    }
+    obj.free();
+  }
+
+  gfree(streamObjNums);
+
+  // if the file is encrypted, then any objects fetched here will be
+  // incorrect (because decryption is not yet enabled), so clear the
+  // cache to avoid that problem
+  for (int i = 0; i < xrefCacheSize; ++i) {
+    if (cache[i].num >= 0) {
+      cache[i].obj.free();
+      cache[i].num = -1;
+    }
+  }
+
+  if (rootNum < 0) {
+    error(errSyntaxError, -1, "Couldn't find trailer dictionary");
+    return gFalse;
+  }
+  return gTrue;
+}
+
+// Attempt to construct a trailer dict at [pos] in the stream.
+void XRef::constructTrailerDict(GFileOffset pos) {
+  Object newTrailerDict, obj;
+  obj.initNull();
+  Parser *parser =
+      new Parser(NULL,
+		 new Lexer(NULL,
+			   str->makeSubStream(pos, gFalse, 0, &obj)),
+		 gFalse);
+  parser->getObj(&newTrailerDict);
+  if (newTrailerDict.isDict()) {
+    saveTrailerDict(newTrailerDict.getDict(), gFalse);
+  }
+  newTrailerDict.free();
+  delete parser;
+}
+
+// If [dict] "looks like" a trailer dict (i.e., has a Root entry),
+// save it as the trailer dict.
+void XRef::saveTrailerDict(Dict *dict, GBool isXRefStream) {
+  Object obj;
+  dict->lookupNF("Root", &obj);
+  if (obj.isRef()) {
+    int newRootNum = obj.getRefNum();
+    // the xref stream scanning code runs after all objects are found,
+    // so we can check for a valid root object number at that point
+    if (!isXRefStream || newRootNum <= last) {
+      rootNum = newRootNum;
+      rootGen = obj.getRefGen();
+      if (!trailerDict.isNone()) {
+	trailerDict.free();
+      }
+      trailerDict.initDict(dict);
+    }
+  }
+  obj.free();
+}
+
+// Look for an object header ("nnn ggg obj") at [p].  The first
+// character at *[p] is a digit.  [pos] is the position of *[p].
+char *XRef::constructObjectEntry(char *p, GFileOffset pos, int *objNum) {
+  // we look for non-end-of-line space characters here, to deal with
+  // situations like:
+  //    nnn          <-- garbage digits on a line
+  //    nnn nnn obj  <-- actual object
+  // and we also ignore '\0' (because it's used to terminate the
+  // buffer in this damage-scanning code)
+  int num = 0;
+  do {
+    num = (num * 10) + (*p - '0');
+    ++p;
+  } while (*p >= '0' && *p <= '9' && num < 100000000);
+  if (*p != '\t' && *p != '\x0c' && *p != ' ') {
+    return p;
+  }
+  do {
+    ++p;
+  } while (*p == '\t' || *p == '\x0c' || *p == ' ');
+  if (!(*p >= '0' && *p <= '9')) {
+    return p;
+  }
+  int gen = 0;
+  do {
+    gen = (gen * 10) + (*p - '0');
+    ++p;
+  } while (*p >= '0' && *p <= '9' && gen < 100000000);
+  if (*p != '\t' && *p != '\x0c' && *p != ' ') {
+    return p;
+  }
+  do {
+    ++p;
+  } while (*p == '\t' || *p == '\x0c' || *p == ' ');
+  if (strncmp(p, "obj", 3)) {
+    return p;
+  }
+
+  if (constructXRefEntry(num, gen, pos - start, xrefEntryUncompressed)) {
+    *objNum = num;
+  }
+
+  return p;
+}
+
+// Read the header from an object stream, and add xref entries for all
+// of its objects.
+void XRef::constructObjectStreamEntries(Object *objStr, int objStrObjNum) {
+  Object obj1, obj2;
+
+  // get the object count
+  if (!objStr->streamGetDict()->lookup("N", &obj1)->isInt()) {
+    obj1.free();
+    return;
+  }
+  int nObjects = obj1.getInt();
+  obj1.free();
+  if (nObjects <= 0 || nObjects > 1000000) {
+    return;
+  }
+
+  // parse the header: object numbers and offsets
+  Parser *parser = new Parser(NULL,
+			      new Lexer(NULL, objStr->getStream()->copy()),
+			      gFalse);
+  for (int i = 0; i < nObjects; ++i) {
+    parser->getObj(&obj1, gTrue);
+    parser->getObj(&obj2, gTrue);
+    if (obj1.isInt() && obj2.isInt()) {
+      int num = obj1.getInt();
+      if (num >= 0 && num < 1000000) {
+	constructXRefEntry(num, i, objStrObjNum, xrefEntryCompressed);
+      }
+    }
+    obj2.free();
+    obj1.free();
+  }
+  delete parser;
+}
+
+GBool XRef::constructXRefEntry(int num, int gen, GFileOffset pos,
+			       XRefEntryType type) {
+  if (num >= size) {
+    int newSize = (num + 1 + 255) & ~255;
+    if (newSize < 0) {
+      return gFalse;
+    }
+    entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry));
+    for (int i = size; i < newSize; ++i) {
+      entries[i].offset = (GFileOffset)-1;
+      entries[i].type = xrefEntryFree;
+    }
+    size = newSize;
+  }
+
+  if (entries[num].type == xrefEntryFree ||
+      gen >= entries[num].gen) {
+    entries[num].offset = pos;
+    entries[num].gen = gen;
+    entries[num].type = type;
+    if (num > last) {
+      last = num;
+    }
+  }
+
+  return gTrue;
+}
+
+void XRef::setEncryption(int permFlagsA, GBool ownerPasswordOkA,
+			 Guchar *fileKeyA, int keyLengthA, int encVersionA,
+			 CryptAlgorithm encAlgorithmA) {
+  int i;
+
+  encrypted = gTrue;
+  permFlags = permFlagsA;
+  ownerPasswordOk = ownerPasswordOkA;
+  if (keyLengthA <= 32) {
+    keyLength = keyLengthA;
+  } else {
+    keyLength = 32;
+  }
+  for (i = 0; i < keyLength; ++i) {
+    fileKey[i] = fileKeyA[i];
+  }
+  encVersion = encVersionA;
+  encAlgorithm = encAlgorithmA;
+}
+
+GBool XRef::getEncryption(int *permFlagsA, GBool *ownerPasswordOkA,
+			  int *keyLengthA, int *encVersionA,
+			  CryptAlgorithm *encAlgorithmA) {
+  if (!encrypted) {
+    return gFalse;
+  }
+  *permFlagsA = permFlags;
+  *ownerPasswordOkA = ownerPasswordOk;
+  *keyLengthA = keyLength;
+  *encVersionA = encVersion;
+  *encAlgorithmA = encAlgorithm;
+  return gTrue;
+}
+
+GBool XRef::okToPrint(GBool ignoreOwnerPW) {
+  return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permPrint);
+}
+
+GBool XRef::okToChange(GBool ignoreOwnerPW) {
+  return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permChange);
+}
+
+GBool XRef::okToCopy(GBool ignoreOwnerPW) {
+  return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permCopy);
+}
+
+GBool XRef::okToAddNotes(GBool ignoreOwnerPW) {
+  return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permNotes);
+}
+
+Object *XRef::fetch(int num, int gen, Object *obj, int recursion) {
+  XRefEntry *e;
+  Parser *parser;
+  Object obj1, obj2, obj3;
+  XRefCacheEntry tmp;
+  int i, j;
+
+  // check for bogus ref - this can happen in corrupted PDF files
+  if (num < 0 || num >= size) {
+    goto err;
+  }
+
+  // check the cache
+#if MULTITHREADED
+  gLockMutex(&cacheMutex);
+#endif
+  if (cache[0].num == num && cache[0].gen == gen) {
+    cache[0].obj.copy(obj);
+#if MULTITHREADED
+    gUnlockMutex(&cacheMutex);
+#endif
+    return obj;
+  }
+  for (i = 1; i < xrefCacheSize; ++i) {
+    if (cache[i].num == num && cache[i].gen == gen) {
+      tmp = cache[i];
+      for (j = i; j > 0; --j) {
+	cache[j] = cache[j - 1];
+      }
+      cache[0] = tmp;
+      cache[0].obj.copy(obj);
+#if MULTITHREADED
+      gUnlockMutex(&cacheMutex);
+#endif
+      return obj;
+    }
+  }
+#if MULTITHREADED
+  gUnlockMutex(&cacheMutex);
+#endif
+
+  e = &entries[num];
+  switch (e->type) {
+
+  case xrefEntryUncompressed:
+    if (e->gen != gen) {
+      goto err;
+    }
+    obj1.initNull();
+    parser = new Parser(this,
+	       new Lexer(this,
+		 str->makeSubStream(start + e->offset, gFalse, 0, &obj1)),
+	       gTrue);
+    parser->getObj(&obj1, gTrue);
+    parser->getObj(&obj2, gTrue);
+    parser->getObj(&obj3, gTrue);
+    if (!obj1.isInt() || obj1.getInt() != num ||
+	!obj2.isInt() || obj2.getInt() != gen ||
+	!obj3.isCmd("obj")) {
+      obj1.free();
+      obj2.free();
+      obj3.free();
+      delete parser;
+      goto err;
+    }
+    parser->getObj(obj, gFalse, encrypted ? fileKey : (Guchar *)NULL,
+		   encAlgorithm, keyLength, num, gen, recursion);
+    obj1.free();
+    obj2.free();
+    obj3.free();
+    delete parser;
+    break;
+
+  case xrefEntryCompressed:
+#if 0 // Adobe apparently ignores the generation number on compressed objects
+    if (gen != 0) {
+      goto err;
+    }
+#endif
+    if (e->offset >= (GFileOffset)size ||
+	entries[e->offset].type != xrefEntryUncompressed) {
+      error(errSyntaxError, -1, "Invalid object stream");
+      goto err;
+    }
+    if (!getObjectStreamObject((int)e->offset, e->gen, num, obj)) {
+      goto err;
+    }
+    break;
+
+  default:
+    goto err;
+  }
+
+  // put the new object in the cache, throwing away the oldest object
+  // currently in the cache
+#if MULTITHREADED
+  gLockMutex(&cacheMutex);
+#endif
+  if (cache[xrefCacheSize - 1].num >= 0) {
+    cache[xrefCacheSize - 1].obj.free();
+  }
+  for (i = xrefCacheSize - 1; i > 0; --i) {
+    cache[i] = cache[i - 1];
+  }
+  cache[0].num = num;
+  cache[0].gen = gen;
+  obj->copy(&cache[0].obj);
+#if MULTITHREADED
+  gUnlockMutex(&cacheMutex);
+#endif
+
+  return obj;
+
+ err:
+  return obj->initNull();
+}
+
+GBool XRef::getObjectStreamObject(int objStrNum, int objIdx,
+				  int objNum, Object *obj) {
+  ObjectStream *objStr;
+
+#if MULTITHREADED
+  gLockMutex(&objStrsMutex);
+#endif
+  if (!(objStr = getObjectStream(objStrNum))) {
+#if MULTITHREADED
+    gUnlockMutex(&objStrsMutex);
+#endif
+    return gFalse;
+  }
+  cleanObjectStreamCache();
+  objStr->getObject(objIdx, objNum, obj);
+#if MULTITHREADED
+  gUnlockMutex(&objStrsMutex);
+#endif
+  return gTrue;
+}
+
+// NB: objStrsMutex must be locked when calling this function.
+ObjectStream *XRef::getObjectStream(int objStrNum) {
+  ObjectStream *objStr;
+  int i, j;
+
+  // check the MRU entry in the cache
+  if (objStrs[0] && objStrs[0]->getObjStrNum() == objStrNum) {
+    objStr = objStrs[0];
+    objStrLastUse[0] = objStrTime++;
+    return objStr;
+  }
+
+  // check the rest of the cache
+  for (i = 1; i < objStrCacheLength; ++i) {
+    if (objStrs[i] && objStrs[i]->getObjStrNum() == objStrNum) {
+      objStr = objStrs[i];
+      for (j = i; j > 0; --j) {
+	objStrs[j] = objStrs[j - 1];
+	objStrLastUse[j] = objStrLastUse[j - 1];
+      }
+      objStrs[0] = objStr;
+      objStrLastUse[0] = objStrTime++;
+      return objStr;
+    }
+  }
+
+  // load a new ObjectStream
+  objStr = new ObjectStream(this, objStrNum);
+  if (!objStr->isOk()) {
+    delete objStr;
+    return NULL;
+  }
+
+  // add to the cache
+  if (objStrCacheLength == objStrCacheSize) {
+    delete objStrs[objStrCacheSize - 1];
+    --objStrCacheLength;
+  }
+  for (j = objStrCacheLength; j > 0; --j) {
+    objStrs[j] = objStrs[j - 1];
+    objStrLastUse[j] = objStrLastUse[j - 1];
+  }
+  ++objStrCacheLength;
+  objStrs[0] = objStr;
+  objStrLastUse[0] = objStrTime++;
+
+  return objStr;
+}
+
+// If the oldest (least recently used) entry in the object stream
+// cache is more than objStrCacheTimeout accesses old (hasn't been
+// used in the last objStrCacheTimeout accesses), eject it from the
+// cache.
+void XRef::cleanObjectStreamCache() {
+  // NB: objStrTime and objStrLastUse[] are unsigned ints, so the
+  // mod-2^32 arithmetic makes the subtraction work out, even if the
+  // time wraps around.
+  if (objStrCacheLength > 1 &&
+      objStrTime - objStrLastUse[objStrCacheLength - 1]
+        > objStrCacheTimeout) {
+    delete objStrs[objStrCacheLength - 1];
+    objStrs[objStrCacheLength - 1] = NULL;
+    --objStrCacheLength;
+  }
+}
+
+Object *XRef::getDocInfo(Object *obj) {
+  return trailerDict.dictLookup("Info", obj);
+}
+
+// Added for the pdftex project.
+Object *XRef::getDocInfoNF(Object *obj) {
+  return trailerDict.dictLookupNF("Info", obj);
+}
+
+GBool XRef::getStreamEnd(GFileOffset streamStart, GFileOffset *streamEnd) {
+  int a, b, m;
+
+  if (streamEndsLen == 0 ||
+      streamStart > streamEnds[streamEndsLen - 1]) {
+    return gFalse;
+  }
+
+  a = -1;
+  b = streamEndsLen - 1;
+  // invariant: streamEnds[a] < streamStart <= streamEnds[b]
+  while (b - a > 1) {
+    m = (a + b) / 2;
+    if (streamStart <= streamEnds[m]) {
+      b = m;
+    } else {
+      a = m;
+    }
+  }
+  *streamEnd = streamEnds[b];
+  return gTrue;
+}
+
+GFileOffset XRef::strToFileOffset(char *s) {
+  GFileOffset x, d;
+  char *p;
+
+  x = 0;
+  for (p = s; *p && isdigit(*p & 0xff); ++p) {
+    d = *p - '0';
+    if (x > (GFILEOFFSET_MAX - d) / 10) {
+      break;
+    }
+    x = 10 * x + d;
+  }
+  return x;
+}
author	Calvin Morrison <calvin@pobox.com>	2023-04-05 14:13:39 -0400
committer	Calvin Morrison <calvin@pobox.com>	2023-04-05 14:13:39 -0400
commit	835e373b3eeaabcd0621ed6798ab500f37982fae (patch)
tree	dfa16b0e2e1b4956b38f693220eac4e607802133 /xpdf/XRef.cc