aboutsummaryrefslogtreecommitdiff
path: root/xpdf/TextOutputDev.h
diff options
context:
space:
mode:
authorCalvin Morrison <calvin@pobox.com>2023-04-05 14:13:39 -0400
committerCalvin Morrison <calvin@pobox.com>2023-04-05 14:13:39 -0400
commit835e373b3eeaabcd0621ed6798ab500f37982fae (patch)
treedfa16b0e2e1b4956b38f693220eac4e607802133 /xpdf/TextOutputDev.h
xpdf-no-select-disableHEADmaster
Diffstat (limited to 'xpdf/TextOutputDev.h')
-rw-r--r--xpdf/TextOutputDev.h800
1 files changed, 800 insertions, 0 deletions
diff --git a/xpdf/TextOutputDev.h b/xpdf/TextOutputDev.h
new file mode 100644
index 0000000..302975c
--- /dev/null
+++ b/xpdf/TextOutputDev.h
@@ -0,0 +1,800 @@
+//========================================================================
+//
+// TextOutputDev.h
+//
+// Copyright 1997-2012 Glyph & Cog, LLC
+//
+//========================================================================
+
+#ifndef TEXTOUTPUTDEV_H
+#define TEXTOUTPUTDEV_H
+
+#include <aconf.h>
+
+#ifdef USE_GCC_PRAGMAS
+#pragma interface
+#endif
+
+#include <stdio.h>
+#include "gtypes.h"
+#include "GfxFont.h"
+#include "OutputDev.h"
+
+class GList;
+class UnicodeMap;
+class UnicodeRemapping;
+
+class TextBlock;
+class TextChar;
+class TextGaps;
+class TextLink;
+class TextPage;
+
+//------------------------------------------------------------------------
+
+typedef void (*TextOutputFunc)(void *stream, const char *text, int len);
+
+//------------------------------------------------------------------------
+// TextOutputControl
+//------------------------------------------------------------------------
+
+enum TextOutputMode {
+ textOutReadingOrder, // format into reading order
+ textOutPhysLayout, // maintain original physical layout
+ textOutSimpleLayout, // simple one-column physical layout
+ textOutSimple2Layout, // simple one-column physical layout
+ textOutTableLayout, // similar to PhysLayout, but optimized
+ // for tables
+ textOutLinePrinter, // strict fixed-pitch/height layout
+ textOutRawOrder // keep text in content stream order
+};
+
+enum TextOutputOverlapHandling {
+ textOutIgnoreOverlaps, // no special handling for overlaps
+ textOutAppendOverlaps, // append overlapping text to main text
+ textOutDiscardOverlaps // discard overlapping text
+};
+
+class TextOutputControl {
+public:
+
+ TextOutputControl();
+ ~TextOutputControl() {}
+
+ TextOutputMode mode; // formatting mode
+ double fixedPitch; // if this is non-zero, assume fixed-pitch
+ // characters with this width
+ // (only relevant for PhysLayout, Table,
+ // and LinePrinter modes)
+ double fixedLineSpacing; // fixed line spacing (only relevant for
+ // LinePrinter mode)
+ GBool html; // enable extra processing for HTML
+ GBool clipText; // separate clipped text and add it back
+ // in after forming columns
+ GBool discardDiagonalText; // discard all text that's not close to
+ // 0/90/180/270 degrees
+ GBool discardRotatedText; // discard all text that's not horizontal
+ // (0 degrees)
+ GBool discardInvisibleText; // discard all invisible characters
+ GBool discardClippedText; // discard all clipped characters
+ GBool splitRotatedWords; // do not combine horizontal and
+ // non-horizontal chars in a single
+ // word
+ TextOutputOverlapHandling // how to handle overlapping text
+ overlapHandling;
+ GBool separateLargeChars; // separate "large" characters from
+ // "regular" characters
+ GBool insertBOM; // insert a Unicode BOM at the start of
+ // the text output
+ double marginLeft, // characters outside the margins are
+ marginRight, // discarded
+ marginTop,
+ marginBottom;
+};
+
+//------------------------------------------------------------------------
+// TextFontInfo
+//------------------------------------------------------------------------
+
+class TextFontInfo {
+public:
+
+ // Create a TextFontInfo for the current font in [state].
+ TextFontInfo(GfxState *state);
+
+ // Create a dummy TextFontInfo.
+ TextFontInfo();
+
+ ~TextFontInfo();
+
+ GBool matches(GfxState *state);
+
+ // Get the font name (which may be NULL).
+ GString *getFontName() { return fontName; }
+
+ // Get font descriptor flags.
+ GBool isFixedWidth() { return flags & fontFixedWidth; }
+ GBool isSerif() { return flags & fontSerif; }
+ GBool isSymbolic() { return flags & fontSymbolic; }
+ GBool isItalic() { return flags & fontItalic; }
+ GBool isBold() { return flags & fontBold; }
+
+ // Get the width of the 'm' character, if available.
+ double getMWidth() { return mWidth; }
+
+ Ref getFontID() { return fontID; }
+
+private:
+
+ Ref fontID;
+ GString *fontName;
+ int flags;
+ double mWidth;
+ double ascent, descent;
+
+ friend class TextLine;
+ friend class TextPage;
+ friend class TextWord;
+};
+
+//------------------------------------------------------------------------
+// TextWord
+//------------------------------------------------------------------------
+
+class TextWord {
+public:
+
+ TextWord(GList *chars, int start, int lenA,
+ int rotA, GBool rotatedA, int dirA, GBool spaceAfterA);
+ ~TextWord();
+ TextWord *copy() { return new TextWord(this); }
+
+ // Get the TextFontInfo object associated with this word.
+ TextFontInfo *getFontInfo() { return font; }
+
+ int getLength() { return len; }
+ Unicode getChar(int idx) { return text[idx]; }
+ GString *getText();
+ GString *getFontName() { return font->fontName; }
+ void getColor(double *r, double *g, double *b)
+ { *r = colorR; *g = colorG; *b = colorB; }
+ GBool isInvisible() { return invisible; }
+ void getBBox(double *xMinA, double *yMinA, double *xMaxA, double *yMaxA)
+ { *xMinA = xMin; *yMinA = yMin; *xMaxA = xMax; *yMaxA = yMax; }
+ void getCharBBox(int charIdx, double *xMinA, double *yMinA,
+ double *xMaxA, double *yMaxA);
+ double getFontSize() { return fontSize; }
+ int getRotation() { return rot; }
+ GBool isRotated() { return (GBool)rotated; }
+ int getCharPos() { return charPos[0]; }
+ int getCharLen() { return charPos[len] - charPos[0]; }
+ int getDirection() { return dir; }
+ GBool getSpaceAfter() { return spaceAfter; }
+ double getBaseline();
+ GBool isUnderlined() { return underlined; }
+ GString *getLinkURI();
+
+private:
+
+ TextWord(TextWord *word);
+ static int cmpYX(const void *p1, const void *p2);
+ static int cmpCharPos(const void *p1, const void *p2);
+
+ double xMin, xMax; // bounding box x coordinates
+ double yMin, yMax; // bounding box y coordinates
+ Unicode *text; // the text
+ int *charPos; // character position (within content stream)
+ // of each char (plus one extra entry for
+ // the last char)
+ double *edge; // "near" edge x or y coord of each char
+ // (plus one extra entry for the last char)
+ int len; // number of characters
+ TextFontInfo *font; // font information
+ double fontSize; // font size
+ TextLink *link;
+ double colorR, // word color
+ colorG,
+ colorB;
+ GBool invisible; // set for invisible text (render mode 3)
+
+ // group the byte-size fields to minimize object size
+ Guchar rot; // rotation, multiple of 90 degrees
+ // (0, 1, 2, or 3)
+ char rotated; // set if this word is non-horizontal
+ char dir; // character direction (+1 = left-to-right;
+ // -1 = right-to-left; 0 = neither)
+ char spaceAfter; // set if there is a space between this
+ // word and the next word on the line
+ char underlined;
+
+ friend class TextBlock;
+ friend class TextLine;
+ friend class TextPage;
+};
+
+//------------------------------------------------------------------------
+// TextLine
+//------------------------------------------------------------------------
+
+class TextLine {
+public:
+
+ TextLine(GList *wordsA, double xMinA, double yMinA,
+ double xMaxA, double yMaxA, double fontSizeA);
+ ~TextLine();
+
+ double getXMin() { return xMin; }
+ double getYMin() { return yMin; }
+ double getXMax() { return xMax; }
+ double getYMax() { return yMax; }
+ double getBaseline();
+ int getRotation() { return rot; }
+ GList *getWords() { return words; }
+ Unicode *getUnicode() { return text; }
+ int getLength() { return len; }
+ double getEdge(int idx) { return edge[idx]; }
+ GBool getHyphenated() { return hyphenated; }
+
+private:
+
+ static int cmpX(const void *p1, const void *p2);
+
+ GList *words; // [TextWord]
+ int rot; // rotation, multiple of 90 degrees
+ // (0, 1, 2, or 3)
+ double xMin, xMax; // bounding box x coordinates
+ double yMin, yMax; // bounding box y coordinates
+ double fontSize; // main (max) font size for this line
+ Unicode *text; // Unicode text of the line, including
+ // spaces between words
+ double *edge; // "near" edge x or y coord of each char
+ // (plus one extra entry for the last char)
+ int len; // number of Unicode chars
+ GBool hyphenated; // set if last char is a hyphen
+ int px; // x offset (in characters, relative to
+ // containing column) in physical layout mode
+ int pw; // line width (in characters) in physical
+ // layout mode
+
+ friend class TextSuperLine;
+ friend class TextPage;
+ friend class TextParagraph;
+};
+
+//------------------------------------------------------------------------
+// TextParagraph
+//------------------------------------------------------------------------
+
+class TextParagraph {
+public:
+
+ TextParagraph(GList *linesA, GBool dropCapA);
+ ~TextParagraph();
+
+ // Get the list of TextLine objects.
+ GList *getLines() { return lines; }
+
+ GBool hasDropCap() { return dropCap; }
+
+ double getXMin() { return xMin; }
+ double getYMin() { return yMin; }
+ double getXMax() { return xMax; }
+ double getYMax() { return yMax; }
+
+private:
+
+ GList *lines; // [TextLine]
+ GBool dropCap; // paragraph starts with a drop capital
+ double xMin, xMax; // bounding box x coordinates
+ double yMin, yMax; // bounding box y coordinates
+
+ friend class TextPage;
+};
+
+//------------------------------------------------------------------------
+// TextColumn
+//------------------------------------------------------------------------
+
+class TextColumn {
+public:
+
+ TextColumn(GList *paragraphsA, double xMinA, double yMinA,
+ double xMaxA, double yMaxA);
+ ~TextColumn();
+
+ // Get the list of TextParagraph objects.
+ GList *getParagraphs() { return paragraphs; }
+
+ double getXMin() { return xMin; }
+ double getYMin() { return yMin; }
+ double getXMax() { return xMax; }
+ double getYMax() { return yMax; }
+
+ int getRotation();
+
+private:
+
+ static int cmpX(const void *p1, const void *p2);
+ static int cmpY(const void *p1, const void *p2);
+ static int cmpPX(const void *p1, const void *p2);
+
+ GList *paragraphs; // [TextParagraph]
+ double xMin, xMax; // bounding box x coordinates
+ double yMin, yMax; // bounding box y coordinates
+ int px, py; // x, y position (in characters) in physical
+ // layout mode
+ int pw, ph; // column width, height (in characters) in
+ // physical layout mode
+
+ friend class TextPage;
+};
+
+//------------------------------------------------------------------------
+// TextWordList
+//------------------------------------------------------------------------
+
+class TextWordList {
+public:
+
+ TextWordList(GList *wordsA, GBool primaryLRA);
+
+ ~TextWordList();
+
+ // Return the number of words on the list.
+ int getLength();
+
+ // Return the <idx>th word from the list.
+ TextWord *get(int idx);
+
+ // Returns true if primary direction is left-to-right, or false if
+ // right-to-left.
+ GBool getPrimaryLR() { return primaryLR; }
+
+private:
+
+ GList *words; // [TextWord]
+ GBool primaryLR;
+};
+
+//------------------------------------------------------------------------
+// TextPosition
+//------------------------------------------------------------------------
+
+// Position within a TextColumn tree. The position is in column
+// [colIdx], paragraph [parIdx], line [lineIdx], before character
+// [charIdx].
+class TextPosition {
+public:
+
+ TextPosition(): colIdx(0), parIdx(0), lineIdx(0), charIdx(0) {}
+ TextPosition(int colIdxA, int parIdxA, int lineIdxA, int charIdxA):
+ colIdx(colIdxA), parIdx(parIdxA), lineIdx(lineIdxA), charIdx(charIdxA) {}
+
+ int operator==(TextPosition pos);
+ int operator!=(TextPosition pos);
+ int operator<(TextPosition pos);
+ int operator>(TextPosition pos);
+
+ int colIdx, parIdx, lineIdx, charIdx;
+};
+
+//------------------------------------------------------------------------
+// TextPage
+//------------------------------------------------------------------------
+
+class TextPage {
+public:
+
+ TextPage(TextOutputControl *controlA);
+ ~TextPage();
+
+ // Write contents of page to a stream.
+ void write(void *outputStream, TextOutputFunc outputFunc);
+
+ // Find a string. If <startAtTop> is true, starts looking at the
+ // top of the page; else if <startAtLast> is true, starts looking
+ // immediately after the last find result; else starts looking at
+ // <xMin>,<yMin>. If <stopAtBottom> is true, stops looking at the
+ // bottom of the page; else if <stopAtLast> is true, stops looking
+ // just before the last find result; else stops looking at
+ // <xMax>,<yMax>.
+ GBool findText(Unicode *s, int len,
+ GBool startAtTop, GBool stopAtBottom,
+ GBool startAtLast, GBool stopAtLast,
+ GBool caseSensitive, GBool backward,
+ GBool wholeWord,
+ double *xMin, double *yMin,
+ double *xMax, double *yMax);
+
+ // Get the text which is inside the specified rectangle. Multi-line
+ // text always includes end-of-line markers at the end of each line.
+ // If <forceEOL> is true, an end-of-line marker will be appended to
+ // single-line text as well.
+ GString *getText(double xMin, double yMin,
+ double xMax, double yMax,
+ GBool forceEOL = gFalse);
+
+ // Find a string by character position and length. If found, sets
+ // the text bounding rectangle and returns true; otherwise returns
+ // false.
+ GBool findCharRange(int pos, int length,
+ double *xMin, double *yMin,
+ double *xMax, double *yMax);
+
+ // Returns true if x,y falls inside a column.
+ GBool checkPointInside(double x, double y);
+
+ // Find a point inside a column. Returns false if x,y fall outside
+ // all columns.
+ GBool findPointInside(double x, double y, TextPosition *pos);
+
+ // Find a point in the nearest column. Returns false only if there
+ // are no columns.
+ GBool findPointNear(double x, double y, TextPosition *pos);
+
+ // Find the start and end of a word inside a column. Returns false
+ // if x,y fall outside all columns.
+ GBool findWordPoints(double x, double y,
+ TextPosition *startPos, TextPosition *endPos);
+
+ // Find the start and end of a line inside a column. Returns false
+ // if x,y fall outside all columns.
+ GBool findLinePoints(double x, double y,
+ TextPosition *startPos, TextPosition *endPos);
+
+ // Get the upper point of a TextPosition.
+ void convertPosToPointUpper(TextPosition *pos, double *x, double *y);
+
+ // Get the lower point of a TextPosition.
+ void convertPosToPointLower(TextPosition *pos, double *x, double *y);
+
+ // Get the upper left corner of the line containing a TextPosition.
+ void convertPosToPointLeftEdge(TextPosition *pos, double *x, double *y);
+
+ // Get the lower right corner of the line containing a TextPosition.
+ void convertPosToPointRightEdge(TextPosition *pos, double *x, double *y);
+
+ // Get the upper right corner of a column.
+ void getColumnUpperRight(int colIdx, double *x, double *y);
+
+ // Get the lower left corner of a column.
+ void getColumnLowerLeft(int colIdx, double *x, double *y);
+
+ // Create and return a list of TextColumn objects.
+ GList *makeColumns();
+
+ // Get the list of all TextFontInfo objects used on this page.
+ GList *getFonts() { return fonts; }
+
+ // Build a flat word list, in the specified ordering.
+ TextWordList *makeWordList();
+
+ // Build a word list containing only words inside the specified
+ // rectangle.
+ TextWordList *makeWordListForRect(double xMin, double yMin,
+ double xMax, double yMax);
+
+ // Returns true if the primary character direction is left-to-right,
+ // false if it is right-to-left.
+ GBool primaryDirectionIsLR();
+
+ // Returns true if any of the fonts used on this page are likely to
+ // be problematic when converting text to Unicode.
+ GBool problematicForUnicode() { return problematic; }
+
+ // Add a 'special' character to this TextPage. This is currently
+ // used by pdftohtml to insert markers for form fields.
+ void addSpecialChar(double xMin, double yMin, double xMax, double yMax,
+ int rot, TextFontInfo *font, double fontSize,
+ Unicode u);
+
+ // Remove characters that fall inside a region.
+ void removeChars(double xMin, double yMin, double xMax, double yMax,
+ double xOverlapThresh, double yOverlapThresh);
+
+private:
+
+ void startPage(GfxState *state);
+ void clear();
+ void updateFont(GfxState *state);
+ void addChar(GfxState *state, double x, double y,
+ double dx, double dy,
+ CharCode c, int nBytes, Unicode *u, int uLen);
+ void incCharCount(int nChars);
+ void beginActualText(GfxState *state, Unicode *u, int uLen);
+ void endActualText(GfxState *state);
+ void addUnderline(double x0, double y0, double x1, double y1);
+ void addLink(double xMin, double yMin, double xMax, double yMax,
+ Link *link);
+
+ // output
+ void writeReadingOrder(void *outputStream,
+ TextOutputFunc outputFunc,
+ UnicodeMap *uMap,
+ char *space, int spaceLen,
+ char *eol, int eolLen);
+ void writePhysLayout(void *outputStream,
+ TextOutputFunc outputFunc,
+ UnicodeMap *uMap,
+ char *space, int spaceLen,
+ char *eol, int eolLen);
+ void writeSimpleLayout(void *outputStream,
+ TextOutputFunc outputFunc,
+ UnicodeMap *uMap,
+ char *space, int spaceLen,
+ char *eol, int eolLen);
+ void writeSimple2Layout(void *outputStream,
+ TextOutputFunc outputFunc,
+ UnicodeMap *uMap,
+ char *space, int spaceLen,
+ char *eol, int eolLen);
+ void writeLinePrinter(void *outputStream,
+ TextOutputFunc outputFunc,
+ UnicodeMap *uMap,
+ char *space, int spaceLen,
+ char *eol, int eolLen);
+ void writeRaw(void *outputStream,
+ TextOutputFunc outputFunc,
+ UnicodeMap *uMap,
+ char *space, int spaceLen,
+ char *eol, int eolLen);
+ void encodeFragment(Unicode *text, int len, UnicodeMap *uMap,
+ GBool primaryLR, GString *s);
+ GBool unicodeEffectiveTypeLOrNum(Unicode u, Unicode left, Unicode right);
+ GBool unicodeEffectiveTypeR(Unicode u, Unicode left, Unicode right);
+
+ // analysis
+ int rotateChars(GList *charsA);
+ void rotateCharsToZero(GList *charsA);
+ void rotateUnderlinesAndLinks(int rot);
+ void unrotateChars(GList *charsA, int rot);
+ void unrotateCharsFromZero(GList *charsA);
+ void unrotateColumnsFromZero(GList *columns);
+ void unrotateColumns(GList *columns, int rot);
+ void unrotateWords(GList *words, int rot);
+ GBool checkPrimaryLR(GList *charsA);
+ void removeDuplicates(GList *charsA, int rot);
+ GList *separateOverlappingText(GList *charsA);
+ TextColumn *buildOverlappingTextColumn(GList *overlappingChars);
+ TextBlock *splitChars(GList *charsA);
+ TextBlock *split(GList *charsA, int rot, GBool vertOnly);
+ GList *getChars(GList *charsA, double xMin, double yMin,
+ double xMax, double yMax);
+ void findGaps(GList *charsA, int rot,
+ double *xMinOut, double *yMinOut,
+ double *xMaxOut, double *yMaxOut,
+ double *avgFontSizeOut, double *minFontSizeOut,
+ GList *splitLines,
+ TextGaps *horizGaps, TextGaps *vertGaps);
+ void mergeSplitLines(GList *charsA, int rot, GList *splitLines);
+ void tagBlock(TextBlock *blk);
+ void insertLargeChars(GList *largeChars, TextBlock *blk);
+ void insertLargeCharsInFirstLeaf(GList *largeChars, TextBlock *blk);
+ void insertLargeCharInLeaf(TextChar *ch, TextBlock *blk);
+ void insertIntoTree(TextBlock *subtree, TextBlock *primaryTree);
+ void insertColumnIntoTree(TextBlock *column, TextBlock *tree);
+ void insertClippedChars(GList *clippedChars, TextBlock *tree);
+ TextBlock *findClippedCharLeaf(TextChar *ch, TextBlock *tree);
+ GList *buildColumns(TextBlock *tree, GBool primaryLR);
+ void buildColumns2(TextBlock *blk, GList *columns, GBool primaryLR);
+ TextColumn *buildColumn(TextBlock *tree);
+ double getLineIndent(TextLine *line, TextBlock *blk);
+ double getAverageLineSpacing(GList *lines);
+ double getLineSpacing(TextLine *line0, TextLine *line1);
+ void buildLines(TextBlock *blk, GList *lines, GBool splitSuperLines);
+ GList *buildSimple2Columns(GList *charsA);
+ GList *buildSimple2Lines(GList *charsA, int rot);
+ TextLine *buildLine(TextBlock *blk);
+ TextLine *buildLine(GList *charsA, int rot,
+ double xMin, double yMin, double xMax, double yMax);
+ void getLineChars(TextBlock *blk, GList *charsA);
+ double computeWordSpacingThreshold(GList *charsA, int rot);
+ int getCharDirection(TextChar *ch);
+ int getCharDirection(TextChar *ch, TextChar *left, TextChar *right);
+ int assignPhysLayoutPositions(GList *columns);
+ void assignLinePhysPositions(GList *columns);
+ void computeLinePhysWidth(TextLine *line, UnicodeMap *uMap);
+ int assignColumnPhysPositions(GList *columns);
+ void buildSuperLines(TextBlock *blk, GList *superLines);
+ void assignSimpleLayoutPositions(GList *superLines, UnicodeMap *uMap);
+ void generateUnderlinesAndLinks(GList *columns);
+ void findPointInColumn(TextColumn *col, double x, double y,
+ TextPosition *pos);
+ void buildFindCols();
+
+ // debug
+#if 0 //~debug
+ void dumpChars(GList *charsA);
+ void dumpTree(TextBlock *tree, int indent = 0);
+ void dumpColumns(GList *columns, GBool dumpWords = gFalse);
+ void dumpUnderlines();
+#endif
+
+ // word list
+ TextWordList *makeWordListForChars(GList *charList);
+
+ TextOutputControl control; // formatting parameters
+
+ UnicodeRemapping *remapping;
+ Unicode *uBuf;
+ int uBufSize;
+
+ double pageWidth, pageHeight; // width and height of current page
+ int charPos; // next character position (within content
+ // stream)
+ TextFontInfo *curFont; // current font
+ double curFontSize; // current font size
+ int curRot; // current rotation
+ GBool diagonal; // set if rotation is not close to
+ // 0/90/180/270 degrees
+ GBool rotated; // set if text is not horizontal (0 degrees)
+ int nTinyChars; // number of "tiny" chars seen so far
+ Unicode *actualText; // current "ActualText" span
+ int actualTextLen;
+ double actualTextX0,
+ actualTextY0,
+ actualTextX1,
+ actualTextY1;
+ int actualTextNBytes;
+
+ GList *chars; // [TextChar]
+ GList *fonts; // all font info objects used on this
+ // page [TextFontInfo]
+
+ GList *underlines; // [TextUnderline]
+ GList *links; // [TextLink]
+
+ GList *findCols; // text used by the findText**/findPoint**
+ // functions [TextColumn]
+ double lastFindXMin, // coordinates of the last "find" result
+ lastFindYMin;
+ GBool haveLastFind;
+
+ GBool problematic; // true if any of the fonts used on this
+ // page are marked as problematic for
+ // Unicode conversion
+
+ friend class TextOutputDev;
+};
+
+//------------------------------------------------------------------------
+// TextOutputDev
+//------------------------------------------------------------------------
+
+class TextOutputDev: public OutputDev {
+public:
+
+ // Open a text output file. If <fileName> is NULL, no file is
+ // written (this is useful, e.g., for searching text). If
+ // <physLayoutA> is true, the original physical layout of the text
+ // is maintained. If <rawOrder> is true, the text is kept in
+ // content stream order.
+ TextOutputDev(char *fileName, TextOutputControl *controlA,
+ GBool append, GBool fileNameIsUTF8 = gFalse);
+
+ // Create a TextOutputDev which will write to a generic stream. If
+ // <physLayoutA> is true, the original physical layout of the text
+ // is maintained. If <rawOrder> is true, the text is kept in
+ // content stream order.
+ TextOutputDev(TextOutputFunc func, void *stream,
+ TextOutputControl *controlA);
+
+ // Destructor.
+ virtual ~TextOutputDev();
+
+ // Check if file was successfully created.
+ virtual GBool isOk() { return ok; }
+
+ //---- get info about output device
+
+ // Does this device use upside-down coordinates?
+ // (Upside-down means (0,0) is the top left corner of the page.)
+ virtual GBool upsideDown() { return gTrue; }
+
+ // Does this device use drawChar() or drawString()?
+ virtual GBool useDrawChar() { return gTrue; }
+
+ // Does this device use beginType3Char/endType3Char? Otherwise,
+ // text in Type 3 fonts will be drawn with drawChar/drawString.
+ virtual GBool interpretType3Chars() { return gFalse; }
+
+ // Does this device need non-text content?
+ virtual GBool needNonText() { return gFalse; }
+
+ // Does this device require incCharCount to be called for text on
+ // non-shown layers?
+ virtual GBool needCharCount() { return gTrue; }
+
+ //----- initialization and control
+
+ // Start a page.
+ virtual void startPage(int pageNum, GfxState *state);
+
+ // End a page.
+ virtual void endPage();
+
+ //----- save/restore graphics state
+ virtual void restoreState(GfxState *state);
+
+ //----- update text state
+ virtual void updateFont(GfxState *state);
+
+ //----- text drawing
+ virtual void beginString(GfxState *state, GString *s);
+ virtual void endString(GfxState *state);
+ virtual void drawChar(GfxState *state, double x, double y,
+ double dx, double dy,
+ double originX, double originY,
+ CharCode c, int nBytes, Unicode *u, int uLen);
+ virtual void incCharCount(int nChars);
+ virtual void beginActualText(GfxState *state, Unicode *u, int uLen);
+ virtual void endActualText(GfxState *state);
+
+ //----- path painting
+ virtual void stroke(GfxState *state);
+ virtual void fill(GfxState *state);
+ virtual void eoFill(GfxState *state);
+
+ //----- link borders
+ virtual void processLink(Link *link);
+
+ //----- special access
+
+ // Find a string. If <startAtTop> is true, starts looking at the
+ // top of the page; else if <startAtLast> is true, starts looking
+ // immediately after the last find result; else starts looking at
+ // <xMin>,<yMin>. If <stopAtBottom> is true, stops looking at the
+ // bottom of the page; else if <stopAtLast> is true, stops looking
+ // just before the last find result; else stops looking at
+ // <xMax>,<yMax>.
+ GBool findText(Unicode *s, int len,
+ GBool startAtTop, GBool stopAtBottom,
+ GBool startAtLast, GBool stopAtLast,
+ GBool caseSensitive, GBool backward,
+ GBool wholeWord,
+ double *xMin, double *yMin,
+ double *xMax, double *yMax);
+
+ // Get the text which is inside the specified rectangle.
+ GString *getText(double xMin, double yMin,
+ double xMax, double yMax);
+
+ // Find a string by character position and length. If found, sets
+ // the text bounding rectangle and returns true; otherwise returns
+ // false.
+ GBool findCharRange(int pos, int length,
+ double *xMin, double *yMin,
+ double *xMax, double *yMax);
+
+ // Build a flat word list, in content stream order (if
+ // this->rawOrder is true), physical layout order (if
+ // this->physLayout is true and this->rawOrder is false), or reading
+ // order (if both flags are false).
+ TextWordList *makeWordList();
+
+ // Build a word list containing only words inside the specified
+ // rectangle.
+ TextWordList *makeWordListForRect(double xMin, double yMin,
+ double xMax, double yMax);
+
+ // Returns the TextPage object for the last rasterized page,
+ // transferring ownership to the caller.
+ TextPage *takeText();
+
+ // Turn extra processing for HTML conversion on or off.
+ void enableHTMLExtras(GBool html) { control.html = html; }
+
+private:
+
+ void generateBOM();
+
+ TextOutputFunc outputFunc; // output function
+ void *outputStream; // output stream
+ GBool needClose; // need to close the output file?
+ // (only if outputStream is a FILE*)
+ TextPage *text; // text for the current page
+ TextOutputControl control; // formatting parameters
+ GBool ok; // set up ok?
+};
+
+#endif