aboutsummaryrefslogtreecommitdiff
path: root/xpdf/PDFDoc.cc
diff options
context:
space:
mode:
authorCalvin Morrison <calvin@pobox.com>2023-04-05 14:13:39 -0400
committerCalvin Morrison <calvin@pobox.com>2023-04-05 14:13:39 -0400
commit835e373b3eeaabcd0621ed6798ab500f37982fae (patch)
treedfa16b0e2e1b4956b38f693220eac4e607802133 /xpdf/PDFDoc.cc
xpdf-no-select-disableHEADmaster
Diffstat (limited to 'xpdf/PDFDoc.cc')
-rw-r--r--xpdf/PDFDoc.cc679
1 files changed, 679 insertions, 0 deletions
diff --git a/xpdf/PDFDoc.cc b/xpdf/PDFDoc.cc
new file mode 100644
index 0000000..3e9307d
--- /dev/null
+++ b/xpdf/PDFDoc.cc
@@ -0,0 +1,679 @@
+//========================================================================
+//
+// PDFDoc.cc
+//
+// Copyright 1996-2003 Glyph & Cog, LLC
+//
+//========================================================================
+
+#include <aconf.h>
+
+#ifdef USE_GCC_PRAGMAS
+#pragma implementation
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#ifdef _WIN32
+# include <windows.h>
+#endif
+#include "gmempp.h"
+#include "GString.h"
+#include "gfile.h"
+#include "config.h"
+#include "GlobalParams.h"
+#include "Page.h"
+#include "Catalog.h"
+#include "Stream.h"
+#include "XRef.h"
+#include "Link.h"
+#include "OutputDev.h"
+#include "Error.h"
+#include "ErrorCodes.h"
+#include "Lexer.h"
+#include "Parser.h"
+#include "SecurityHandler.h"
+#include "UTF8.h"
+#ifndef DISABLE_OUTLINE
+#include "Outline.h"
+#endif
+#include "OptionalContent.h"
+#include "PDFDoc.h"
+
+//------------------------------------------------------------------------
+
+#define headerSearchSize 1024 // read this many bytes at beginning of
+ // file to look for '%PDF'
+
+// Avoid sharing files with child processes on Windows, where sharing
+// can cause problems.
+#ifdef _WIN32
+# define fopenReadMode "rbN"
+# define wfopenReadMode L"rbN"
+#else
+# define fopenReadMode "rb"
+#endif
+
+//------------------------------------------------------------------------
+// PDFDoc
+//------------------------------------------------------------------------
+
+PDFDoc::PDFDoc(GString *fileNameA, GString *ownerPassword,
+ GString *userPassword, PDFCore *coreA) {
+ Object obj;
+ GString *fileName1, *fileName2;
+#ifdef _WIN32
+ int n, i;
+#endif
+
+ init(coreA);
+
+ fileName = fileNameA;
+#ifdef _WIN32
+ n = fileName->getLength();
+ fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t));
+ for (i = 0; i < n; ++i) {
+ fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff);
+ }
+ fileNameU[n] = L'\0';
+#endif
+
+ fileName1 = fileName;
+
+ // try to open file
+ fileName2 = NULL;
+#ifdef VMS
+ if (!(file = fopen(fileName1->getCString(), fopenReadMode, "ctx=stm"))) {
+ error(errIO, -1, "Couldn't open file '{0:t}'", fileName1);
+ errCode = errOpenFile;
+ return;
+ }
+#else
+ if (!(file = fopen(fileName1->getCString(), fopenReadMode))) {
+ fileName2 = fileName->copy();
+ fileName2->lowerCase();
+ if (!(file = fopen(fileName2->getCString(), fopenReadMode))) {
+ fileName2->upperCase();
+ if (!(file = fopen(fileName2->getCString(), fopenReadMode))) {
+ error(errIO, -1, "Couldn't open file '{0:t}'", fileName);
+ delete fileName2;
+ errCode = errOpenFile;
+ return;
+ }
+ }
+ delete fileName2;
+ }
+#endif
+
+ // create stream
+ obj.initNull();
+ str = new FileStream(file, 0, gFalse, 0, &obj);
+
+ ok = setup(ownerPassword, userPassword);
+}
+
+#ifdef _WIN32
+PDFDoc::PDFDoc(wchar_t *fileNameA, int fileNameLen, GString *ownerPassword,
+ GString *userPassword, PDFCore *coreA) {
+ OSVERSIONINFO version;
+ Object obj;
+ int i;
+
+ init(coreA);
+
+ // handle a Windows shortcut
+ wchar_t wPath[winMaxLongPath + 1];
+ int n = fileNameLen < winMaxLongPath ? fileNameLen : winMaxLongPath;
+ memcpy(wPath, fileNameA, n * sizeof(wchar_t));
+ wPath[n] = L'\0';
+ readWindowsShortcut(wPath, winMaxLongPath + 1);
+ int wPathLen = (int)wcslen(wPath);
+
+ // save both Unicode and 8-bit copies of the file name
+ fileName = new GString();
+ fileNameU = (wchar_t *)gmallocn(wPathLen + 1, sizeof(wchar_t));
+ memcpy(fileNameU, wPath, (wPathLen + 1) * sizeof(wchar_t));
+ for (i = 0; i < wPathLen; ++i) {
+ fileName->append((char)fileNameA[i]);
+ }
+
+ // try to open file
+ // NB: _wfopen is only available in NT
+ version.dwOSVersionInfoSize = sizeof(version);
+ GetVersionEx(&version);
+ if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) {
+ file = _wfopen(fileNameU, wfopenReadMode);
+ } else {
+ file = fopen(fileName->getCString(), fopenReadMode);
+ }
+ if (!file) {
+ error(errIO, -1, "Couldn't open file '{0:t}'", fileName);
+ errCode = errOpenFile;
+ return;
+ }
+
+ // create stream
+ obj.initNull();
+ str = new FileStream(file, 0, gFalse, 0, &obj);
+
+ ok = setup(ownerPassword, userPassword);
+}
+#endif
+
+PDFDoc::PDFDoc(char *fileNameA, GString *ownerPassword,
+ GString *userPassword, PDFCore *coreA) {
+#ifdef _WIN32
+ OSVERSIONINFO version;
+#endif
+ Object obj;
+#ifdef _WIN32
+ Unicode u;
+ int i, j;
+#endif
+
+ init(coreA);
+
+ fileName = new GString(fileNameA);
+
+#if defined(_WIN32)
+ wchar_t wPath[winMaxLongPath + 1];
+ i = 0;
+ j = 0;
+ while (j < winMaxLongPath && getUTF8(fileName, &i, &u)) {
+ wPath[j++] = (wchar_t)u;
+ }
+ wPath[j] = L'\0';
+ readWindowsShortcut(wPath, winMaxLongPath + 1);
+ int wPathLen = (int)wcslen(wPath);
+
+ fileNameU = (wchar_t *)gmallocn(wPathLen + 1, sizeof(wchar_t));
+ memcpy(fileNameU, wPath, (wPathLen + 1) * sizeof(wchar_t));
+
+ // NB: _wfopen is only available in NT
+ version.dwOSVersionInfoSize = sizeof(version);
+ GetVersionEx(&version);
+ if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) {
+ file = _wfopen(fileNameU, wfopenReadMode);
+ } else {
+ file = fopen(fileName->getCString(), fopenReadMode);
+ }
+#elif defined(VMS)
+ file = fopen(fileName->getCString(), fopenReadMode, "ctx=stm");
+#else
+ file = fopen(fileName->getCString(), fopenReadMode);
+#endif
+
+ if (!file) {
+ error(errIO, -1, "Couldn't open file '{0:t}'", fileName);
+ errCode = errOpenFile;
+ return;
+ }
+
+ // create stream
+ obj.initNull();
+ str = new FileStream(file, 0, gFalse, 0, &obj);
+
+ ok = setup(ownerPassword, userPassword);
+}
+
+PDFDoc::PDFDoc(BaseStream *strA, GString *ownerPassword,
+ GString *userPassword, PDFCore *coreA) {
+#ifdef _WIN32
+ int n, i;
+#endif
+
+ init(coreA);
+
+ if (strA->getFileName()) {
+ fileName = strA->getFileName()->copy();
+#ifdef _WIN32
+ n = fileName->getLength();
+ fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t));
+ for (i = 0; i < n; ++i) {
+ fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff);
+ }
+ fileNameU[n] = L'\0';
+#endif
+ } else {
+ fileName = NULL;
+#ifdef _WIN32
+ fileNameU = NULL;
+#endif
+ }
+ str = strA;
+ ok = setup(ownerPassword, userPassword);
+}
+
+void PDFDoc::init(PDFCore *coreA) {
+ ok = gFalse;
+ errCode = errNone;
+ core = coreA;
+ file = NULL;
+ str = NULL;
+ xref = NULL;
+ catalog = NULL;
+#ifndef DISABLE_OUTLINE
+ outline = NULL;
+#endif
+ optContent = NULL;
+}
+
+GBool PDFDoc::setup(GString *ownerPassword, GString *userPassword) {
+
+ str->reset();
+
+ // check header
+ checkHeader();
+
+ // read the xref and catalog
+ if (!PDFDoc::setup2(ownerPassword, userPassword, gFalse)) {
+ if (errCode == errDamaged || errCode == errBadCatalog) {
+ // try repairing the xref table
+ error(errSyntaxWarning, -1,
+ "PDF file is damaged - attempting to reconstruct xref table...");
+ if (!PDFDoc::setup2(ownerPassword, userPassword, gTrue)) {
+ return gFalse;
+ }
+ } else {
+ return gFalse;
+ }
+ }
+
+#ifndef DISABLE_OUTLINE
+ // read outline
+ outline = new Outline(catalog->getOutline(), xref);
+#endif
+
+ // read the optional content info
+ optContent = new OptionalContent(this);
+
+
+ // done
+ return gTrue;
+}
+
+GBool PDFDoc::setup2(GString *ownerPassword, GString *userPassword,
+ GBool repairXRef) {
+ // read xref table
+ xref = new XRef(str, repairXRef);
+ if (!xref->isOk()) {
+ error(errSyntaxError, -1, "Couldn't read xref table");
+ errCode = xref->getErrorCode();
+ delete xref;
+ xref = NULL;
+ return gFalse;
+ }
+
+ // check for encryption
+ if (!checkEncryption(ownerPassword, userPassword)) {
+ errCode = errEncrypted;
+ delete xref;
+ xref = NULL;
+ return gFalse;
+ }
+
+ // read catalog
+ catalog = new Catalog(this);
+ if (!catalog->isOk()) {
+ error(errSyntaxError, -1, "Couldn't read page catalog");
+ errCode = errBadCatalog;
+ delete catalog;
+ catalog = NULL;
+ delete xref;
+ xref = NULL;
+ return gFalse;
+ }
+
+ return gTrue;
+}
+
+PDFDoc::~PDFDoc() {
+ if (optContent) {
+ delete optContent;
+ }
+#ifndef DISABLE_OUTLINE
+ if (outline) {
+ delete outline;
+ }
+#endif
+ if (catalog) {
+ delete catalog;
+ }
+ if (xref) {
+ delete xref;
+ }
+ if (str) {
+ delete str;
+ }
+ if (file) {
+ fclose(file);
+ }
+ if (fileName) {
+ delete fileName;
+ }
+#ifdef _WIN32
+ if (fileNameU) {
+ gfree(fileNameU);
+ }
+#endif
+}
+
+// Check for a PDF header on this stream. Skip past some garbage
+// if necessary.
+void PDFDoc::checkHeader() {
+ char hdrBuf[headerSearchSize+1];
+ char *p;
+ int i;
+
+ pdfVersion = 0;
+ memset(hdrBuf, 0, headerSearchSize + 1);
+ str->getBlock(hdrBuf, headerSearchSize);
+ for (i = 0; i < headerSearchSize - 5; ++i) {
+ if (!strncmp(&hdrBuf[i], "%PDF-", 5)) {
+ break;
+ }
+ }
+ if (i >= headerSearchSize - 5) {
+ error(errSyntaxWarning, -1, "May not be a PDF file (continuing anyway)");
+ return;
+ }
+ str->moveStart(i);
+ if (!(p = strtok(&hdrBuf[i+5], " \t\n\r"))) {
+ error(errSyntaxWarning, -1, "May not be a PDF file (continuing anyway)");
+ return;
+ }
+ pdfVersion = atof(p);
+ if (!(hdrBuf[i+5] >= '0' && hdrBuf[i+5] <= '9') ||
+ pdfVersion > supportedPDFVersionNum + 0.0001) {
+ error(errSyntaxWarning, -1,
+ "PDF version {0:s} -- xpdf supports version {1:s} (continuing anyway)",
+ p, supportedPDFVersionStr);
+ }
+}
+
+GBool PDFDoc::checkEncryption(GString *ownerPassword, GString *userPassword) {
+ Object encrypt;
+ GBool encrypted;
+ SecurityHandler *secHdlr;
+ GBool ret;
+
+ xref->getTrailerDict()->dictLookup("Encrypt", &encrypt);
+ if ((encrypted = encrypt.isDict())) {
+ if ((secHdlr = SecurityHandler::make(this, &encrypt))) {
+ if (secHdlr->isUnencrypted()) {
+ // no encryption
+ ret = gTrue;
+ } else if (secHdlr->checkEncryption(ownerPassword, userPassword)) {
+ // authorization succeeded
+ xref->setEncryption(secHdlr->getPermissionFlags(),
+ secHdlr->getOwnerPasswordOk(),
+ secHdlr->getFileKey(),
+ secHdlr->getFileKeyLength(),
+ secHdlr->getEncVersion(),
+ secHdlr->getEncAlgorithm());
+ ret = gTrue;
+ } else {
+ // authorization failed
+ ret = gFalse;
+ }
+ delete secHdlr;
+ } else {
+ // couldn't find the matching security handler
+ ret = gFalse;
+ }
+ } else {
+ // document is not encrypted
+ ret = gTrue;
+ }
+ encrypt.free();
+ return ret;
+}
+
+void PDFDoc::displayPage(OutputDev *out, int page,
+ double hDPI, double vDPI, int rotate,
+ GBool useMediaBox, GBool crop, GBool printing,
+ GBool (*abortCheckCbk)(void *data),
+ void *abortCheckCbkData) {
+ if (globalParams->getPrintCommands()) {
+ printf("***** page %d *****\n", page);
+ }
+ catalog->getPage(page)->display(out, hDPI, vDPI,
+ rotate, useMediaBox, crop, printing,
+ abortCheckCbk, abortCheckCbkData);
+}
+
+void PDFDoc::displayPages(OutputDev *out, int firstPage, int lastPage,
+ double hDPI, double vDPI, int rotate,
+ GBool useMediaBox, GBool crop, GBool printing,
+ GBool (*abortCheckCbk)(void *data),
+ void *abortCheckCbkData) {
+ int page;
+
+ for (page = firstPage; page <= lastPage; ++page) {
+ if (globalParams->getPrintStatusInfo()) {
+ fflush(stderr);
+ printf("[processing page %d]\n", page);
+ fflush(stdout);
+ }
+ displayPage(out, page, hDPI, vDPI, rotate, useMediaBox, crop, printing,
+ abortCheckCbk, abortCheckCbkData);
+ catalog->doneWithPage(page);
+ }
+}
+
+void PDFDoc::displayPageSlice(OutputDev *out, int page,
+ double hDPI, double vDPI, int rotate,
+ GBool useMediaBox, GBool crop, GBool printing,
+ int sliceX, int sliceY, int sliceW, int sliceH,
+ GBool (*abortCheckCbk)(void *data),
+ void *abortCheckCbkData) {
+ catalog->getPage(page)->displaySlice(out, hDPI, vDPI,
+ rotate, useMediaBox, crop,
+ sliceX, sliceY, sliceW, sliceH,
+ printing,
+ abortCheckCbk, abortCheckCbkData);
+}
+
+Links *PDFDoc::getLinks(int page) {
+ return catalog->getPage(page)->getLinks();
+}
+
+void PDFDoc::processLinks(OutputDev *out, int page) {
+ catalog->getPage(page)->processLinks(out);
+}
+
+#ifndef DISABLE_OUTLINE
+int PDFDoc::getOutlineTargetPage(OutlineItem *outlineItem) {
+ LinkAction *action;
+ LinkActionKind kind;
+ LinkDest *dest;
+ GString *namedDest;
+ Ref pageRef;
+ int pg;
+
+ if (outlineItem->pageNum >= 0) {
+ return outlineItem->pageNum;
+ }
+ if (!(action = outlineItem->getAction())) {
+ outlineItem->pageNum = 0;
+ return 0;
+ }
+ kind = action->getKind();
+ if (kind != actionGoTo) {
+ outlineItem->pageNum = 0;
+ return 0;
+ }
+ if ((dest = ((LinkGoTo *)action)->getDest())) {
+ dest = dest->copy();
+ } else if ((namedDest = ((LinkGoTo *)action)->getNamedDest())) {
+ dest = findDest(namedDest);
+ }
+ pg = 0;
+ if (dest) {
+ if (dest->isPageRef()) {
+ pageRef = dest->getPageRef();
+ pg = findPage(pageRef.num, pageRef.gen);
+ } else {
+ pg = dest->getPageNum();
+ }
+ delete dest;
+ }
+ outlineItem->pageNum = pg;
+ return pg;
+}
+#endif
+
+GBool PDFDoc::isLinearized() {
+ Parser *parser;
+ Object obj1, obj2, obj3, obj4, obj5;
+ GBool lin;
+
+ lin = gFalse;
+ obj1.initNull();
+ parser = new Parser(xref,
+ new Lexer(xref,
+ str->makeSubStream(str->getStart(), gFalse, 0, &obj1)),
+ gTrue);
+ parser->getObj(&obj1);
+ parser->getObj(&obj2);
+ parser->getObj(&obj3);
+ parser->getObj(&obj4);
+ if (obj1.isInt() && obj2.isInt() && obj3.isCmd("obj") &&
+ obj4.isDict()) {
+ obj4.dictLookup("Linearized", &obj5);
+ if (obj5.isNum() && obj5.getNum() > 0) {
+ lin = gTrue;
+ }
+ obj5.free();
+ }
+ obj4.free();
+ obj3.free();
+ obj2.free();
+ obj1.free();
+ delete parser;
+ return lin;
+}
+
+GBool PDFDoc::saveAs(GString *name) {
+ FILE *f;
+ char buf[4096];
+ int n;
+
+ if (!(f = fopen(name->getCString(), "wb"))) {
+ error(errIO, -1, "Couldn't open file '{0:t}'", name);
+ return gFalse;
+ }
+ str->reset();
+ while ((n = str->getBlock(buf, sizeof(buf))) > 0) {
+ fwrite(buf, 1, n, f);
+ }
+ str->close();
+ fclose(f);
+ return gTrue;
+}
+
+GBool PDFDoc::saveEmbeddedFile(int idx, const char *path) {
+ FILE *f;
+ GBool ret;
+
+ if (!(f = fopen(path, "wb"))) {
+ return gFalse;
+ }
+ ret = saveEmbeddedFile2(idx, f);
+ fclose(f);
+ return ret;
+}
+
+GBool PDFDoc::saveEmbeddedFileU(int idx, const char *path) {
+ FILE *f;
+ GBool ret;
+
+ if (!(f = openFile(path, "wb"))) {
+ return gFalse;
+ }
+ ret = saveEmbeddedFile2(idx, f);
+ fclose(f);
+ return ret;
+}
+
+#ifdef _WIN32
+GBool PDFDoc::saveEmbeddedFile(int idx, const wchar_t *path, int pathLen) {
+ FILE *f;
+ OSVERSIONINFO version;
+ wchar_t path2w[winMaxLongPath + 1];
+ char path2c[MAX_PATH + 1];
+ int i;
+ GBool ret;
+
+ // NB: _wfopen is only available in NT
+ version.dwOSVersionInfoSize = sizeof(version);
+ GetVersionEx(&version);
+ if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) {
+ for (i = 0; i < pathLen && i < winMaxLongPath; ++i) {
+ path2w[i] = path[i];
+ }
+ path2w[i] = 0;
+ f = _wfopen(path2w, L"wb");
+ } else {
+ for (i = 0; i < pathLen && i < MAX_PATH; ++i) {
+ path2c[i] = (char)path[i];
+ }
+ path2c[i] = 0;
+ f = fopen(path2c, "wb");
+ }
+ if (!f) {
+ return gFalse;
+ }
+ ret = saveEmbeddedFile2(idx, f);
+ fclose(f);
+ return ret;
+}
+#endif
+
+GBool PDFDoc::saveEmbeddedFile2(int idx, FILE *f) {
+ Object strObj;
+ char buf[4096];
+ int n;
+
+ if (!catalog->getEmbeddedFileStreamObj(idx, &strObj)) {
+ return gFalse;
+ }
+ strObj.streamReset();
+ while ((n = strObj.streamGetBlock(buf, sizeof(buf))) > 0) {
+ fwrite(buf, 1, n, f);
+ }
+ strObj.streamClose();
+ strObj.free();
+ return gTrue;
+}
+
+char *PDFDoc::getEmbeddedFileMem(int idx, int *size) {
+ Object strObj;
+ char *buf;
+ int bufSize, sizeInc, n;
+
+ if (!catalog->getEmbeddedFileStreamObj(idx, &strObj)) {
+ return NULL;
+ }
+ strObj.streamReset();
+ bufSize = 0;
+ buf = NULL;
+ do {
+ sizeInc = bufSize ? bufSize : 1024;
+ if (bufSize > INT_MAX - sizeInc) {
+ error(errIO, -1, "embedded file is too large");
+ *size = 0;
+ return NULL;
+ }
+ buf = (char *)grealloc(buf, bufSize + sizeInc);
+ n = strObj.streamGetBlock(buf + bufSize, sizeInc);
+ bufSize += n;
+ } while (n == sizeInc);
+ strObj.streamClose();
+ strObj.free();
+ *size = bufSize;
+ return buf;
+}
+