diff options
Diffstat (limited to 'xpdf/TextOutputDev.h')
| -rw-r--r-- | xpdf/TextOutputDev.h | 800 | 
1 files changed, 800 insertions, 0 deletions
| diff --git a/xpdf/TextOutputDev.h b/xpdf/TextOutputDev.h new file mode 100644 index 0000000..302975c --- /dev/null +++ b/xpdf/TextOutputDev.h @@ -0,0 +1,800 @@ +//======================================================================== +// +// TextOutputDev.h +// +// Copyright 1997-2012 Glyph & Cog, LLC +// +//======================================================================== + +#ifndef TEXTOUTPUTDEV_H +#define TEXTOUTPUTDEV_H + +#include <aconf.h> + +#ifdef USE_GCC_PRAGMAS +#pragma interface +#endif + +#include <stdio.h> +#include "gtypes.h" +#include "GfxFont.h" +#include "OutputDev.h" + +class GList; +class UnicodeMap; +class UnicodeRemapping; + +class TextBlock; +class TextChar; +class TextGaps; +class TextLink; +class TextPage; + +//------------------------------------------------------------------------ + +typedef void (*TextOutputFunc)(void *stream, const char *text, int len); + +//------------------------------------------------------------------------ +// TextOutputControl +//------------------------------------------------------------------------ + +enum TextOutputMode { +  textOutReadingOrder,		// format into reading order +  textOutPhysLayout,		// maintain original physical layout +  textOutSimpleLayout,		// simple one-column physical layout +  textOutSimple2Layout,		// simple one-column physical layout +  textOutTableLayout,		// similar to PhysLayout, but optimized +				//   for tables +  textOutLinePrinter,		// strict fixed-pitch/height layout +  textOutRawOrder		// keep text in content stream order +}; + +enum TextOutputOverlapHandling { +  textOutIgnoreOverlaps,	// no special handling for overlaps +  textOutAppendOverlaps,	// append overlapping text to main text +  textOutDiscardOverlaps	// discard overlapping text +}; + +class TextOutputControl { +public: + +  TextOutputControl(); +  ~TextOutputControl() {} + +  TextOutputMode mode;		// formatting mode +  double fixedPitch;		// if this is non-zero, assume fixed-pitch +				//   characters with this width +				//   (only relevant for PhysLayout, Table, +				//   and LinePrinter modes) +  double fixedLineSpacing;	// fixed line spacing (only relevant for +				//   LinePrinter mode) +  GBool html;			// enable extra processing for HTML +  GBool clipText;		// separate clipped text and add it back +				//   in after forming columns +  GBool discardDiagonalText;	// discard all text that's not close to +				//   0/90/180/270 degrees +  GBool discardRotatedText;	// discard all text that's not horizontal +				//   (0 degrees) +  GBool discardInvisibleText;	// discard all invisible characters +  GBool discardClippedText;	// discard all clipped characters +  GBool splitRotatedWords;	// do not combine horizontal and +				//   non-horizontal chars in a single +				//   word +  TextOutputOverlapHandling	// how to handle overlapping text +               overlapHandling; +  GBool separateLargeChars;	// separate "large" characters from +				//   "regular" characters +  GBool insertBOM;		// insert a Unicode BOM at the start of +				//   the text output +  double marginLeft,		// characters outside the margins are +         marginRight,		//   discarded +         marginTop, +         marginBottom; +}; + +//------------------------------------------------------------------------ +// TextFontInfo +//------------------------------------------------------------------------ + +class TextFontInfo { +public: + +  // Create a TextFontInfo for the current font in [state]. +  TextFontInfo(GfxState *state); + +  // Create a dummy TextFontInfo. +  TextFontInfo(); + +  ~TextFontInfo(); + +  GBool matches(GfxState *state); + +  // Get the font name (which may be NULL). +  GString *getFontName() { return fontName; } + +  // Get font descriptor flags. +  GBool isFixedWidth() { return flags & fontFixedWidth; } +  GBool isSerif() { return flags & fontSerif; } +  GBool isSymbolic() { return flags & fontSymbolic; } +  GBool isItalic() { return flags & fontItalic; } +  GBool isBold() { return flags & fontBold; } + +  // Get the width of the 'm' character, if available. +  double getMWidth() { return mWidth; } + +  Ref getFontID() { return fontID; } + +private: + +  Ref fontID; +  GString *fontName; +  int flags; +  double mWidth; +  double ascent, descent; + +  friend class TextLine; +  friend class TextPage; +  friend class TextWord; +}; + +//------------------------------------------------------------------------ +// TextWord +//------------------------------------------------------------------------ + +class TextWord { +public: + +  TextWord(GList *chars, int start, int lenA, +	   int rotA, GBool rotatedA, int dirA, GBool spaceAfterA); +  ~TextWord(); +  TextWord *copy() { return new TextWord(this); } + +  // Get the TextFontInfo object associated with this word. +  TextFontInfo *getFontInfo() { return font; } + +  int getLength() { return len; } +  Unicode getChar(int idx) { return text[idx]; } +  GString *getText(); +  GString *getFontName() { return font->fontName; } +  void getColor(double *r, double *g, double *b) +    { *r = colorR; *g = colorG; *b = colorB; } +  GBool isInvisible() { return invisible; } +  void getBBox(double *xMinA, double *yMinA, double *xMaxA, double *yMaxA) +    { *xMinA = xMin; *yMinA = yMin; *xMaxA = xMax; *yMaxA = yMax; } +  void getCharBBox(int charIdx, double *xMinA, double *yMinA, +		   double *xMaxA, double *yMaxA); +  double getFontSize() { return fontSize; } +  int getRotation() { return rot; } +  GBool isRotated() { return (GBool)rotated; } +  int getCharPos() { return charPos[0]; } +  int getCharLen() { return charPos[len] - charPos[0]; } +  int getDirection() { return dir; } +  GBool getSpaceAfter() { return spaceAfter; } +  double getBaseline(); +  GBool isUnderlined() { return underlined; } +  GString *getLinkURI(); + +private: + +  TextWord(TextWord *word); +  static int cmpYX(const void *p1, const void *p2); +  static int cmpCharPos(const void *p1, const void *p2); + +  double xMin, xMax;		// bounding box x coordinates +  double yMin, yMax;		// bounding box y coordinates +  Unicode *text;		// the text +  int *charPos;			// character position (within content stream) +				//   of each char (plus one extra entry for +				//   the last char) +  double *edge;			// "near" edge x or y coord of each char +				//   (plus one extra entry for the last char) +  int len;			// number of characters +  TextFontInfo *font;		// font information +  double fontSize;		// font size +  TextLink *link; +  double colorR,		// word color +         colorG, +         colorB; +  GBool invisible;		// set for invisible text (render mode 3) + +  // group the byte-size fields to minimize object size +  Guchar rot;			// rotation, multiple of 90 degrees +				//   (0, 1, 2, or 3) +  char rotated;			// set if this word is non-horizontal +  char dir;			// character direction (+1 = left-to-right; +				//   -1 = right-to-left; 0 = neither) +  char spaceAfter;		// set if there is a space between this +				//   word and the next word on the line +  char underlined; + +  friend class TextBlock; +  friend class TextLine; +  friend class TextPage; +}; + +//------------------------------------------------------------------------ +// TextLine +//------------------------------------------------------------------------ + +class TextLine { +public: + +  TextLine(GList *wordsA, double xMinA, double yMinA, +	   double xMaxA, double yMaxA, double fontSizeA); +  ~TextLine(); + +  double getXMin() { return xMin; } +  double getYMin() { return yMin; } +  double getXMax() { return xMax; } +  double getYMax() { return yMax; } +  double getBaseline(); +  int getRotation() { return rot; } +  GList *getWords() { return words; } +  Unicode *getUnicode() { return text; } +  int getLength() { return len; } +  double getEdge(int idx) { return edge[idx]; } +  GBool getHyphenated() { return hyphenated; } + +private: + +  static int cmpX(const void *p1, const void *p2); + +  GList *words;			// [TextWord] +  int rot;			// rotation, multiple of 90 degrees +				//   (0, 1, 2, or 3) +  double xMin, xMax;		// bounding box x coordinates +  double yMin, yMax;		// bounding box y coordinates +  double fontSize;		// main (max) font size for this line +  Unicode *text;		// Unicode text of the line, including +				//   spaces between words +  double *edge;			// "near" edge x or y coord of each char +				//   (plus one extra entry for the last char) +  int len;			// number of Unicode chars +  GBool hyphenated;		// set if last char is a hyphen +  int px;			// x offset (in characters, relative to +				//   containing column) in physical layout mode +  int pw;			// line width (in characters) in physical +				//   layout mode + +  friend class TextSuperLine; +  friend class TextPage; +  friend class TextParagraph; +}; + +//------------------------------------------------------------------------ +// TextParagraph +//------------------------------------------------------------------------ + +class TextParagraph { +public: + +  TextParagraph(GList *linesA, GBool dropCapA); +  ~TextParagraph(); + +  // Get the list of TextLine objects. +  GList *getLines() { return lines; } + +  GBool hasDropCap() { return dropCap; } + +  double getXMin() { return xMin; } +  double getYMin() { return yMin; } +  double getXMax() { return xMax; } +  double getYMax() { return yMax; } + +private: + +  GList *lines;			// [TextLine] +  GBool dropCap;		// paragraph starts with a drop capital +  double xMin, xMax;		// bounding box x coordinates +  double yMin, yMax;		// bounding box y coordinates + +  friend class TextPage; +}; + +//------------------------------------------------------------------------ +// TextColumn +//------------------------------------------------------------------------ + +class TextColumn { +public: + +  TextColumn(GList *paragraphsA, double xMinA, double yMinA, +	     double xMaxA, double yMaxA); +  ~TextColumn(); + +  // Get the list of TextParagraph objects. +  GList *getParagraphs() { return paragraphs; } + +  double getXMin() { return xMin; } +  double getYMin() { return yMin; } +  double getXMax() { return xMax; } +  double getYMax() { return yMax; } + +  int getRotation(); + +private: + +  static int cmpX(const void *p1, const void *p2); +  static int cmpY(const void *p1, const void *p2); +  static int cmpPX(const void *p1, const void *p2); + +  GList *paragraphs;		// [TextParagraph] +  double xMin, xMax;		// bounding box x coordinates +  double yMin, yMax;		// bounding box y coordinates +  int px, py;			// x, y position (in characters) in physical +				//   layout mode +  int pw, ph;			// column width, height (in characters) in +				//   physical layout mode + +  friend class TextPage; +}; + +//------------------------------------------------------------------------ +// TextWordList +//------------------------------------------------------------------------ + +class TextWordList { +public: + +  TextWordList(GList *wordsA, GBool primaryLRA); + +  ~TextWordList(); + +  // Return the number of words on the list. +  int getLength(); + +  // Return the <idx>th word from the list. +  TextWord *get(int idx); + +  // Returns true if primary direction is left-to-right, or false if +  // right-to-left. +  GBool getPrimaryLR() { return primaryLR; } + +private: + +  GList *words;			// [TextWord] +  GBool primaryLR; +}; + +//------------------------------------------------------------------------ +// TextPosition +//------------------------------------------------------------------------ + +// Position within a TextColumn tree.  The position is in column +// [colIdx], paragraph [parIdx], line [lineIdx], before character +// [charIdx]. +class TextPosition { +public: + +  TextPosition(): colIdx(0), parIdx(0), lineIdx(0), charIdx(0) {} +  TextPosition(int colIdxA, int parIdxA, int lineIdxA, int charIdxA): +    colIdx(colIdxA), parIdx(parIdxA), lineIdx(lineIdxA), charIdx(charIdxA) {} + +  int operator==(TextPosition pos); +  int operator!=(TextPosition pos); +  int operator<(TextPosition pos); +  int operator>(TextPosition pos); + +  int colIdx, parIdx, lineIdx, charIdx; +}; + +//------------------------------------------------------------------------ +// TextPage +//------------------------------------------------------------------------ + +class TextPage { +public: + +  TextPage(TextOutputControl *controlA); +  ~TextPage(); + +  // Write contents of page to a stream. +  void write(void *outputStream, TextOutputFunc outputFunc); + +  // Find a string.  If <startAtTop> is true, starts looking at the +  // top of the page; else if <startAtLast> is true, starts looking +  // immediately after the last find result; else starts looking at +  // <xMin>,<yMin>.  If <stopAtBottom> is true, stops looking at the +  // bottom of the page; else if <stopAtLast> is true, stops looking +  // just before the last find result; else stops looking at +  // <xMax>,<yMax>. +  GBool findText(Unicode *s, int len, +		 GBool startAtTop, GBool stopAtBottom, +		 GBool startAtLast, GBool stopAtLast, +		 GBool caseSensitive, GBool backward, +		 GBool wholeWord, +		 double *xMin, double *yMin, +		 double *xMax, double *yMax); + +  // Get the text which is inside the specified rectangle.  Multi-line +  // text always includes end-of-line markers at the end of each line. +  // If <forceEOL> is true, an end-of-line marker will be appended to +  // single-line text as well. +  GString *getText(double xMin, double yMin, +		   double xMax, double yMax, +		   GBool forceEOL = gFalse); + +  // Find a string by character position and length.  If found, sets +  // the text bounding rectangle and returns true; otherwise returns +  // false. +  GBool findCharRange(int pos, int length, +		      double *xMin, double *yMin, +		      double *xMax, double *yMax); + +  // Returns true if x,y falls inside a column. +  GBool checkPointInside(double x, double y); + +  // Find a point inside a column.  Returns false if x,y fall outside +  // all columns. +  GBool findPointInside(double x, double y, TextPosition *pos); + +  // Find a point in the nearest column.  Returns false only if there +  // are no columns. +  GBool findPointNear(double x, double y, TextPosition *pos); + +  // Find the start and end of a word inside a column.  Returns false +  // if x,y fall outside all columns. +  GBool findWordPoints(double x, double y, +		       TextPosition *startPos, TextPosition *endPos); + +  // Find the start and end of a line inside a column.  Returns false +  // if x,y fall outside all columns. +  GBool findLinePoints(double x, double y, +		       TextPosition *startPos, TextPosition *endPos); + +  // Get the upper point of a TextPosition. +  void convertPosToPointUpper(TextPosition *pos, double *x, double *y); + +  // Get the lower point of a TextPosition. +  void convertPosToPointLower(TextPosition *pos, double *x, double *y); + +  // Get the upper left corner of the line containing a TextPosition. +  void convertPosToPointLeftEdge(TextPosition *pos, double *x, double *y); + +  // Get the lower right corner of the line containing a TextPosition. +  void convertPosToPointRightEdge(TextPosition *pos, double *x, double *y); + +  // Get the upper right corner of a column. +  void getColumnUpperRight(int colIdx, double *x, double *y); + +  // Get the lower left corner of a column. +  void getColumnLowerLeft(int colIdx, double *x, double *y); + +  // Create and return a list of TextColumn objects. +  GList *makeColumns(); + +  // Get the list of all TextFontInfo objects used on this page. +  GList *getFonts() { return fonts; } + +  // Build a flat word list, in the specified ordering. +  TextWordList *makeWordList(); + +  // Build a word list containing only words inside the specified +  // rectangle. +  TextWordList *makeWordListForRect(double xMin, double yMin, +				    double xMax, double yMax); + +  // Returns true if the primary character direction is left-to-right, +  // false if it is right-to-left. +  GBool primaryDirectionIsLR(); + +  // Returns true if any of the fonts used on this page are likely to +  // be problematic when converting text to Unicode. +  GBool problematicForUnicode() { return problematic; } + +  // Add a 'special' character to this TextPage.  This is currently +  // used by pdftohtml to insert markers for form fields. +  void addSpecialChar(double xMin, double yMin, double xMax, double yMax, +		      int rot, TextFontInfo *font, double fontSize, +		      Unicode u); + +  // Remove characters that fall inside a region. +  void removeChars(double xMin, double yMin, double xMax, double yMax, +		   double xOverlapThresh, double yOverlapThresh); + +private: + +  void startPage(GfxState *state); +  void clear(); +  void updateFont(GfxState *state); +  void addChar(GfxState *state, double x, double y, +	       double dx, double dy, +	       CharCode c, int nBytes, Unicode *u, int uLen); +  void incCharCount(int nChars); +  void beginActualText(GfxState *state, Unicode *u, int uLen); +  void endActualText(GfxState *state); +  void addUnderline(double x0, double y0, double x1, double y1); +  void addLink(double xMin, double yMin, double xMax, double yMax, +	       Link *link); + +  // output +  void writeReadingOrder(void *outputStream, +			 TextOutputFunc outputFunc, +			 UnicodeMap *uMap, +			 char *space, int spaceLen, +			 char *eol, int eolLen); +  void writePhysLayout(void *outputStream, +		       TextOutputFunc outputFunc, +		       UnicodeMap *uMap, +		       char *space, int spaceLen, +		       char *eol, int eolLen); +  void writeSimpleLayout(void *outputStream, +			 TextOutputFunc outputFunc, +			 UnicodeMap *uMap, +			 char *space, int spaceLen, +			 char *eol, int eolLen); +  void writeSimple2Layout(void *outputStream, +			  TextOutputFunc outputFunc, +			  UnicodeMap *uMap, +			  char *space, int spaceLen, +			  char *eol, int eolLen); +  void writeLinePrinter(void *outputStream, +			TextOutputFunc outputFunc, +			UnicodeMap *uMap, +			char *space, int spaceLen, +			char *eol, int eolLen); +  void writeRaw(void *outputStream, +		TextOutputFunc outputFunc, +		UnicodeMap *uMap, +		char *space, int spaceLen, +		char *eol, int eolLen); +  void encodeFragment(Unicode *text, int len, UnicodeMap *uMap, +		      GBool primaryLR, GString *s); +  GBool unicodeEffectiveTypeLOrNum(Unicode u, Unicode left, Unicode right); +  GBool unicodeEffectiveTypeR(Unicode u, Unicode left, Unicode right); + +  // analysis +  int rotateChars(GList *charsA); +  void rotateCharsToZero(GList *charsA); +  void rotateUnderlinesAndLinks(int rot); +  void unrotateChars(GList *charsA, int rot); +  void unrotateCharsFromZero(GList *charsA); +  void unrotateColumnsFromZero(GList *columns); +  void unrotateColumns(GList *columns, int rot); +  void unrotateWords(GList *words, int rot); +  GBool checkPrimaryLR(GList *charsA); +  void removeDuplicates(GList *charsA, int rot); +  GList *separateOverlappingText(GList *charsA); +  TextColumn *buildOverlappingTextColumn(GList *overlappingChars); +  TextBlock *splitChars(GList *charsA); +  TextBlock *split(GList *charsA, int rot, GBool vertOnly); +  GList *getChars(GList *charsA, double xMin, double yMin, +		  double xMax, double yMax); +  void findGaps(GList *charsA, int rot, +		double *xMinOut, double *yMinOut, +		double *xMaxOut, double *yMaxOut, +		double *avgFontSizeOut, double *minFontSizeOut, +		GList *splitLines, +		TextGaps *horizGaps, TextGaps *vertGaps); +  void mergeSplitLines(GList *charsA, int rot, GList *splitLines); +  void tagBlock(TextBlock *blk); +  void insertLargeChars(GList *largeChars, TextBlock *blk); +  void insertLargeCharsInFirstLeaf(GList *largeChars, TextBlock *blk); +  void insertLargeCharInLeaf(TextChar *ch, TextBlock *blk); +  void insertIntoTree(TextBlock *subtree, TextBlock *primaryTree); +  void insertColumnIntoTree(TextBlock *column, TextBlock *tree); +  void insertClippedChars(GList *clippedChars, TextBlock *tree); +  TextBlock *findClippedCharLeaf(TextChar *ch, TextBlock *tree); +  GList *buildColumns(TextBlock *tree, GBool primaryLR); +  void buildColumns2(TextBlock *blk, GList *columns, GBool primaryLR); +  TextColumn *buildColumn(TextBlock *tree); +  double getLineIndent(TextLine *line, TextBlock *blk); +  double getAverageLineSpacing(GList *lines); +  double getLineSpacing(TextLine *line0, TextLine *line1); +  void buildLines(TextBlock *blk, GList *lines, GBool splitSuperLines); +  GList *buildSimple2Columns(GList *charsA); +  GList *buildSimple2Lines(GList *charsA, int rot); +  TextLine *buildLine(TextBlock *blk); +  TextLine *buildLine(GList *charsA, int rot, +		      double xMin, double yMin, double xMax, double yMax); +  void getLineChars(TextBlock *blk, GList *charsA); +  double computeWordSpacingThreshold(GList *charsA, int rot); +  int getCharDirection(TextChar *ch); +  int getCharDirection(TextChar *ch, TextChar *left, TextChar *right); +  int assignPhysLayoutPositions(GList *columns); +  void assignLinePhysPositions(GList *columns); +  void computeLinePhysWidth(TextLine *line, UnicodeMap *uMap); +  int assignColumnPhysPositions(GList *columns); +  void buildSuperLines(TextBlock *blk, GList *superLines); +  void assignSimpleLayoutPositions(GList *superLines, UnicodeMap *uMap); +  void generateUnderlinesAndLinks(GList *columns); +  void findPointInColumn(TextColumn *col, double x, double y, +			 TextPosition *pos); +  void buildFindCols(); + +  // debug +#if 0 //~debug +  void dumpChars(GList *charsA); +  void dumpTree(TextBlock *tree, int indent = 0); +  void dumpColumns(GList *columns, GBool dumpWords = gFalse); +  void dumpUnderlines(); +#endif + +  // word list +  TextWordList *makeWordListForChars(GList *charList); + +  TextOutputControl control;	// formatting parameters + +  UnicodeRemapping *remapping; +  Unicode *uBuf; +  int uBufSize; + +  double pageWidth, pageHeight;	// width and height of current page +  int charPos;			// next character position (within content +				//   stream) +  TextFontInfo *curFont;	// current font +  double curFontSize;		// current font size +  int curRot;			// current rotation +  GBool diagonal;		// set if rotation is not close to +				//   0/90/180/270 degrees +  GBool rotated;		// set if text is not horizontal (0 degrees) +  int nTinyChars;		// number of "tiny" chars seen so far +  Unicode *actualText;		// current "ActualText" span +  int actualTextLen; +  double actualTextX0, +         actualTextY0, +         actualTextX1, +         actualTextY1; +  int actualTextNBytes; + +  GList *chars;			// [TextChar] +  GList *fonts;			// all font info objects used on this +				//   page [TextFontInfo] + +  GList *underlines;		// [TextUnderline] +  GList *links;			// [TextLink] + +  GList *findCols;		// text used by the findText**/findPoint** +				//   functions [TextColumn] +  double lastFindXMin,		// coordinates of the last "find" result +         lastFindYMin; +  GBool haveLastFind; + +  GBool problematic;		// true if any of the fonts used on this +				//   page are marked as problematic for +				//   Unicode conversion + +  friend class TextOutputDev; +}; + +//------------------------------------------------------------------------ +// TextOutputDev +//------------------------------------------------------------------------ + +class TextOutputDev: public OutputDev { +public: + +  // Open a text output file.  If <fileName> is NULL, no file is +  // written (this is useful, e.g., for searching text).  If +  // <physLayoutA> is true, the original physical layout of the text +  // is maintained.  If <rawOrder> is true, the text is kept in +  // content stream order. +  TextOutputDev(char *fileName, TextOutputControl *controlA, +		GBool append, GBool fileNameIsUTF8 = gFalse); + +  // Create a TextOutputDev which will write to a generic stream.  If +  // <physLayoutA> is true, the original physical layout of the text +  // is maintained.  If <rawOrder> is true, the text is kept in +  // content stream order. +  TextOutputDev(TextOutputFunc func, void *stream, +		TextOutputControl *controlA); + +  // Destructor. +  virtual ~TextOutputDev(); + +  // Check if file was successfully created. +  virtual GBool isOk() { return ok; } + +  //---- get info about output device + +  // Does this device use upside-down coordinates? +  // (Upside-down means (0,0) is the top left corner of the page.) +  virtual GBool upsideDown() { return gTrue; } + +  // Does this device use drawChar() or drawString()? +  virtual GBool useDrawChar() { return gTrue; } + +  // Does this device use beginType3Char/endType3Char?  Otherwise, +  // text in Type 3 fonts will be drawn with drawChar/drawString. +  virtual GBool interpretType3Chars() { return gFalse; } + +  // Does this device need non-text content? +  virtual GBool needNonText() { return gFalse; } + +  // Does this device require incCharCount to be called for text on +  // non-shown layers? +  virtual GBool needCharCount() { return gTrue; } + +  //----- initialization and control + +  // Start a page. +  virtual void startPage(int pageNum, GfxState *state); + +  // End a page. +  virtual void endPage(); + +  //----- save/restore graphics state +  virtual void restoreState(GfxState *state); + +  //----- update text state +  virtual void updateFont(GfxState *state); + +  //----- text drawing +  virtual void beginString(GfxState *state, GString *s); +  virtual void endString(GfxState *state); +  virtual void drawChar(GfxState *state, double x, double y, +			double dx, double dy, +			double originX, double originY, +			CharCode c, int nBytes, Unicode *u, int uLen); +  virtual void incCharCount(int nChars); +  virtual void beginActualText(GfxState *state, Unicode *u, int uLen); +  virtual void endActualText(GfxState *state); + +  //----- path painting +  virtual void stroke(GfxState *state); +  virtual void fill(GfxState *state); +  virtual void eoFill(GfxState *state); + +  //----- link borders +  virtual void processLink(Link *link); + +  //----- special access + +  // Find a string.  If <startAtTop> is true, starts looking at the +  // top of the page; else if <startAtLast> is true, starts looking +  // immediately after the last find result; else starts looking at +  // <xMin>,<yMin>.  If <stopAtBottom> is true, stops looking at the +  // bottom of the page; else if <stopAtLast> is true, stops looking +  // just before the last find result; else stops looking at +  // <xMax>,<yMax>. +  GBool findText(Unicode *s, int len, +		 GBool startAtTop, GBool stopAtBottom, +		 GBool startAtLast, GBool stopAtLast, +		 GBool caseSensitive, GBool backward, +		 GBool wholeWord, +		 double *xMin, double *yMin, +		 double *xMax, double *yMax); + +  // Get the text which is inside the specified rectangle. +  GString *getText(double xMin, double yMin, +		   double xMax, double yMax); + +  // Find a string by character position and length.  If found, sets +  // the text bounding rectangle and returns true; otherwise returns +  // false. +  GBool findCharRange(int pos, int length, +		      double *xMin, double *yMin, +		      double *xMax, double *yMax); + +  // Build a flat word list, in content stream order (if +  // this->rawOrder is true), physical layout order (if +  // this->physLayout is true and this->rawOrder is false), or reading +  // order (if both flags are false). +  TextWordList *makeWordList(); + +  // Build a word list containing only words inside the specified +  // rectangle. +  TextWordList *makeWordListForRect(double xMin, double yMin, +				    double xMax, double yMax); + +  // Returns the TextPage object for the last rasterized page, +  // transferring ownership to the caller. +  TextPage *takeText(); + +  // Turn extra processing for HTML conversion on or off. +  void enableHTMLExtras(GBool html) { control.html = html; } + +private: + +  void generateBOM(); + +  TextOutputFunc outputFunc;	// output function +  void *outputStream;		// output stream +  GBool needClose;		// need to close the output file? +				//   (only if outputStream is a FILE*) +  TextPage *text;		// text for the current page +  TextOutputControl control;	// formatting parameters +  GBool ok;			// set up ok? +}; + +#endif | 
