aboutsummaryrefslogtreecommitdiff
path: root/xpdf/pdffonts.cc
diff options
context:
space:
mode:
authorCalvin Morrison <calvin@pobox.com>2023-04-05 14:13:39 -0400
committerCalvin Morrison <calvin@pobox.com>2023-04-05 14:13:39 -0400
commit835e373b3eeaabcd0621ed6798ab500f37982fae (patch)
treedfa16b0e2e1b4956b38f693220eac4e607802133 /xpdf/pdffonts.cc
xpdf-no-select-disableHEADmaster
Diffstat (limited to 'xpdf/pdffonts.cc')
-rw-r--r--xpdf/pdffonts.cc455
1 files changed, 455 insertions, 0 deletions
diff --git a/xpdf/pdffonts.cc b/xpdf/pdffonts.cc
new file mode 100644
index 0000000..bde5755
--- /dev/null
+++ b/xpdf/pdffonts.cc
@@ -0,0 +1,455 @@
+//========================================================================
+//
+// pdffonts.cc
+//
+// Copyright 2001-2007 Glyph & Cog, LLC
+//
+//========================================================================
+
+#include <aconf.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <math.h>
+#include <limits.h>
+#include "gmem.h"
+#include "gmempp.h"
+#include "parseargs.h"
+#include "GString.h"
+#include "GlobalParams.h"
+#include "Error.h"
+#include "Object.h"
+#include "Dict.h"
+#include "GfxFont.h"
+#include "Annot.h"
+#include "AcroForm.h"
+#include "PDFDoc.h"
+#include "config.h"
+
+// NB: this must match the definition of GfxFontType in GfxFont.h.
+static const char *fontTypeNames[] = {
+ "unknown",
+ "Type 1",
+ "Type 1C",
+ "Type 1C (OT)",
+ "Type 3",
+ "TrueType",
+ "TrueType (OT)",
+ "CID Type 0",
+ "CID Type 0C",
+ "CID Type 0C (OT)",
+ "CID TrueType",
+ "CID TrueType (OT)"
+};
+
+static void scanFonts(Object *obj, PDFDoc *doc);
+static void scanFonts(Dict *resDict, PDFDoc *doc);
+static void scanFont(GfxFont *font, PDFDoc *doc);
+static GBool checkObject(Object *in, Object *out);
+
+static int firstPage = 1;
+static int lastPage = 0;
+static GBool showFontLoc = gFalse;
+static GBool showFontLocPS = gFalse;
+static char ownerPassword[33] = "\001";
+static char userPassword[33] = "\001";
+static char cfgFileName[256] = "";
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+
+static ArgDesc argDesc[] = {
+ {"-f", argInt, &firstPage, 0,
+ "first page to examine"},
+ {"-l", argInt, &lastPage, 0,
+ "last page to examine"},
+ {"-loc", argFlag, &showFontLoc, 0,
+ "print extended info on font location"},
+ {"-locPS", argFlag, &showFontLocPS, 0,
+ "print extended info on font location for PostScript conversion"},
+ {"-opw", argString, ownerPassword, sizeof(ownerPassword),
+ "owner password (for encrypted files)"},
+ {"-upw", argString, userPassword, sizeof(userPassword),
+ "user password (for encrypted files)"},
+ {"-cfg", argString, cfgFileName, sizeof(cfgFileName),
+ "configuration file to use in place of .xpdfrc"},
+ {"-v", argFlag, &printVersion, 0,
+ "print copyright and version info"},
+ {"-h", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"--help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-?", argFlag, &printHelp, 0,
+ "print usage information"},
+ {NULL}
+};
+
+static PDFDoc *doc;
+
+static Ref *fonts;
+static int fontsLen;
+static int fontsSize;
+
+static char *seenObjs;
+static int numObjects;
+
+int main(int argc, char *argv[]) {
+ char *fileName;
+ GString *ownerPW, *userPW;
+ GBool ok;
+ Page *page;
+ Dict *resDict;
+ Annots *annots;
+ AcroForm *form;
+ Object obj1, obj2;
+ int pg, i, j;
+ int exitCode;
+
+ exitCode = 99;
+
+ // parse args
+ fixCommandLine(&argc, &argv);
+ ok = parseArgs(argDesc, &argc, argv);
+ if (!ok || argc != 2 || printVersion || printHelp) {
+ fprintf(stderr, "pdffonts version %s [www.xpdfreader.com]\n", xpdfVersion);
+ fprintf(stderr, "%s\n", xpdfCopyright);
+ if (!printVersion) {
+ printUsage("pdffonts", "<PDF-file>", argDesc);
+ }
+ goto err0;
+ }
+ fileName = argv[1];
+
+ // read config file
+ if (cfgFileName[0] && !pathIsFile(cfgFileName)) {
+ error(errConfig, -1, "Config file '{0:s}' doesn't exist or isn't a file",
+ cfgFileName);
+ }
+ globalParams = new GlobalParams(cfgFileName);
+ globalParams->setupBaseFonts(NULL);
+
+ // open PDF file
+ if (ownerPassword[0] != '\001') {
+ ownerPW = new GString(ownerPassword);
+ } else {
+ ownerPW = NULL;
+ }
+ if (userPassword[0] != '\001') {
+ userPW = new GString(userPassword);
+ } else {
+ userPW = NULL;
+ }
+ doc = new PDFDoc(fileName, ownerPW, userPW);
+ if (userPW) {
+ delete userPW;
+ }
+ if (ownerPW) {
+ delete ownerPW;
+ }
+ if (!doc->isOk()) {
+ exitCode = 1;
+ goto err1;
+ }
+
+ // get page range
+ if (firstPage < 1) {
+ firstPage = 1;
+ }
+ if (lastPage < 1 || lastPage > doc->getNumPages()) {
+ lastPage = doc->getNumPages();
+ }
+
+ // scan the fonts
+ if (showFontLoc || showFontLocPS) {
+ printf("name type emb sub uni prob object ID location\n");
+ printf("---------------------------------------------- ----------------- --- --- --- ---- --------- --------\n");
+ } else {
+ printf("name type emb sub uni prob object ID\n");
+ printf("---------------------------------------------- ----------------- --- --- --- ---- ---------\n");
+ }
+ fonts = NULL;
+ fontsLen = fontsSize = 0;
+ numObjects = doc->getXRef()->getNumObjects();
+ seenObjs = (char *)gmalloc(numObjects);
+ memset(seenObjs, 0, numObjects);
+ for (pg = firstPage; pg <= lastPage; ++pg) {
+ page = doc->getCatalog()->getPage(pg);
+ if ((resDict = page->getResourceDict())) {
+ scanFonts(resDict, doc);
+ }
+ annots = new Annots(doc, page->getAnnots(&obj1));
+ obj1.free();
+ for (i = 0; i < annots->getNumAnnots(); ++i) {
+ if (annots->getAnnot(i)->getAppearance(&obj1)->isStream()) {
+ obj1.streamGetDict()->lookupNF("Resources", &obj2);
+ scanFonts(&obj2, doc);
+ obj2.free();
+ }
+ obj1.free();
+ }
+ delete annots;
+ }
+ if ((form = doc->getCatalog()->getForm())) {
+ for (i = 0; i < form->getNumFields(); ++i) {
+ form->getField(i)->getResources(&obj1);
+ if (obj1.isArray()) {
+ for (j = 0; j < obj1.arrayGetLength(); ++j) {
+ obj1.arrayGetNF(j, &obj2);
+ scanFonts(&obj2, doc);
+ obj2.free();
+ }
+ } else if (obj1.isDict()) {
+ scanFonts(obj1.getDict(), doc);
+ }
+ obj1.free();
+ }
+ }
+
+ exitCode = 0;
+
+ // clean up
+ gfree(fonts);
+ gfree(seenObjs);
+ err1:
+ delete doc;
+ delete globalParams;
+ err0:
+
+ // check for memory leaks
+ Object::memCheck(stderr);
+ gMemReport(stderr);
+
+ return exitCode;
+}
+
+static void scanFonts(Object *obj, PDFDoc *doc) {
+ Object obj2;
+
+ if (checkObject(obj, &obj2) && obj2.isDict()) {
+ scanFonts(obj2.getDict(), doc);
+ }
+ obj2.free();
+}
+
+static void scanFonts(Dict *resDict, PDFDoc *doc) {
+ Object fontDict1, fontDict2, xObjDict1, xObjDict2, xObj1, xObj2;
+ Object patternDict1, patternDict2, pattern1, pattern2;
+ Object gsDict1, gsDict2, gs1, gs2, smask1, smask2, smaskGroup1, smaskGroup2;
+ Object resObj;
+ Ref r;
+ GfxFontDict *gfxFontDict;
+ GfxFont *font;
+ int i;
+
+ // scan the fonts in this resource dictionary
+ gfxFontDict = NULL;
+ resDict->lookupNF("Font", &fontDict1);
+ if (checkObject(&fontDict1, &fontDict2) && fontDict2.isDict()) {
+ if (fontDict1.isRef()) {
+ r = fontDict1.getRef();
+ gfxFontDict = new GfxFontDict(doc->getXRef(), &r, fontDict2.getDict());
+ } else {
+ gfxFontDict = new GfxFontDict(doc->getXRef(), NULL, fontDict2.getDict());
+ }
+ if (gfxFontDict) {
+ for (i = 0; i < gfxFontDict->getNumFonts(); ++i) {
+ if ((font = gfxFontDict->getFont(i))) {
+ scanFont(font, doc);
+ }
+ }
+ delete gfxFontDict;
+ }
+ }
+ fontDict2.free();
+ fontDict1.free();
+
+ // recursively scan any resource dictionaries in XObjects in this
+ // resource dictionary
+ resDict->lookupNF("XObject", &xObjDict1);
+ if (checkObject(&xObjDict1, &xObjDict2) && xObjDict2.isDict()) {
+ for (i = 0; i < xObjDict2.dictGetLength(); ++i) {
+ xObjDict2.dictGetValNF(i, &xObj1);
+ if (checkObject(&xObj1, &xObj2) && xObj2.isStream()) {
+ xObj2.streamGetDict()->lookupNF("Resources", &resObj);
+ scanFonts(&resObj, doc);
+ resObj.free();
+ }
+ xObj2.free();
+ xObj1.free();
+ }
+ }
+ xObjDict2.free();
+ xObjDict1.free();
+
+ // recursively scan any resource dictionaries in Patterns in this
+ // resource dictionary
+ resDict->lookupNF("Pattern", &patternDict1);
+ if (checkObject(&patternDict1, &patternDict2) && patternDict2.isDict()) {
+ for (i = 0; i < patternDict2.dictGetLength(); ++i) {
+ patternDict2.dictGetValNF(i, &pattern1);
+ if (checkObject(&pattern1, &pattern2) && pattern2.isStream()) {
+ pattern2.streamGetDict()->lookupNF("Resources", &resObj);
+ scanFonts(&resObj, doc);
+ resObj.free();
+ }
+ pattern2.free();
+ pattern1.free();
+ }
+ }
+ patternDict2.free();
+ patternDict1.free();
+
+ // recursively scan any resource dictionaries in ExtGStates in this
+ // resource dictionary
+ resDict->lookupNF("ExtGState", &gsDict1);
+ if (checkObject(&gsDict1, &gsDict2) && gsDict2.isDict()) {
+ for (i = 0; i < gsDict2.dictGetLength(); ++i) {
+ gsDict2.dictGetValNF(i, &gs1);
+ if (checkObject(&gs1, &gs2) && gs2.isDict()) {
+ gs2.dictLookupNF("SMask", &smask1);
+ if (checkObject(&smask1, &smask2) && smask2.isDict()) {
+ smask2.dictLookupNF("G", &smaskGroup1);
+ if (checkObject(&smaskGroup1, &smaskGroup2) &&
+ smaskGroup2.isStream()) {
+ smaskGroup2.streamGetDict()->lookupNF("Resources", &resObj);
+ scanFonts(&resObj, doc);
+ resObj.free();
+ }
+ smaskGroup2.free();
+ smaskGroup1.free();
+ }
+ smask2.free();
+ smask1.free();
+ }
+ gs2.free();
+ gs1.free();
+ }
+ }
+ gsDict2.free();
+ gsDict1.free();
+}
+
+static void scanFont(GfxFont *font, PDFDoc *doc) {
+ Ref fontRef, embRef;
+ Object fontObj, toUnicodeObj;
+ GString *name;
+ GBool emb, subset, hasToUnicode;
+ GfxFontLoc *loc;
+ int i;
+
+ fontRef = *font->getID();
+
+ // check for an already-seen font
+ for (i = 0; i < fontsLen; ++i) {
+ if (fontRef.num == fonts[i].num && fontRef.gen == fonts[i].gen) {
+ return;
+ }
+ }
+
+ // font name
+ name = font->getName();
+
+ // check for an embedded font
+ if (font->getType() == fontType3) {
+ emb = gTrue;
+ } else {
+ emb = font->getEmbeddedFontID(&embRef);
+ }
+
+ // look for a ToUnicode map
+ hasToUnicode = gFalse;
+ if (doc->getXRef()->fetch(fontRef.num, fontRef.gen, &fontObj)->isDict()) {
+ hasToUnicode = fontObj.dictLookup("ToUnicode", &toUnicodeObj)->isStream();
+ toUnicodeObj.free();
+ }
+ fontObj.free();
+
+ // check for a font subset name: capital letters followed by a '+'
+ // sign
+ subset = gFalse;
+ if (name) {
+ for (i = 0; i < name->getLength(); ++i) {
+ if (name->getChar(i) < 'A' || name->getChar(i) > 'Z') {
+ break;
+ }
+ }
+ subset = i > 0 && i < name->getLength() && name->getChar(i) == '+';
+ }
+
+ // print the font info
+ printf("%-46s %-17s %-3s %-3s %-3s %-4s",
+ name ? name->getCString() : "[none]",
+ fontTypeNames[font->getType()],
+ emb ? "yes" : "no",
+ subset ? "yes" : "no",
+ hasToUnicode ? "yes" : "no",
+ font->problematicForUnicode() ? " X" : "");
+ if (fontRef.gen >= 100000) {
+ printf(" [none]");
+ } else {
+ printf(" %6d %2d", fontRef.num, fontRef.gen);
+ }
+ if (showFontLoc || showFontLocPS) {
+ if (font->getType() == fontType3) {
+ printf(" embedded");
+ } else {
+ loc = font->locateFont(doc->getXRef(), showFontLocPS);
+ if (loc) {
+ if (loc->locType == gfxFontLocEmbedded) {
+ printf(" embedded");
+ } else if (loc->locType == gfxFontLocExternal) {
+ if (loc->path) {
+ printf(" external: %s", loc->path->getCString());
+ } else {
+ printf(" unavailable");
+ }
+ } else if (loc->locType == gfxFontLocResident) {
+ if (loc->path) {
+ printf(" resident: %s", loc->path->getCString());
+ } else {
+ printf(" unavailable");
+ }
+ }
+ } else {
+ printf(" unknown");
+ }
+ delete loc;
+ }
+ }
+ printf("\n");
+
+ // add this font to the list
+ if (fontsLen == fontsSize) {
+ if (fontsSize <= INT_MAX - 32) {
+ fontsSize += 32;
+ } else {
+ // let greallocn throw an exception
+ fontsSize = -1;
+ }
+ fonts = (Ref *)greallocn(fonts, fontsSize, sizeof(Ref));
+ }
+ fonts[fontsLen++] = *font->getID();
+}
+
+static GBool checkObject(Object *in, Object *out) {
+ int objNum;
+
+ if (!in->isRef()) {
+ in->copy(out);
+ return gTrue;
+ }
+ objNum = in->getRefNum();
+ if (objNum < 0 || objNum >= numObjects) {
+ out->initNull();
+ return gTrue;
+ }
+ if (seenObjs[objNum]) {
+ out->initNull();
+ return gFalse;
+ }
+ seenObjs[objNum] = (char)1;
+ in->fetch(doc->getXRef(), out);
+ return gTrue;
+}