source: trunk/poppler/mypoppler/poppler/TextOutputDev.h @ 461

Last change on this file since 461 was 461, checked in by Silvan Scherrer, 11 years ago

poppler update to 0.14.2

File size: 24.3 KB
Line 
1//========================================================================
2//
3// TextOutputDev.h
4//
5// Copyright 1997-2003 Glyph & Cog, LLC
6//
7//========================================================================
8
9//========================================================================
10//
11// Modified under the Poppler project - http://poppler.freedesktop.org
12//
13// Copyright (C) 2005-2007 Kristian HÞgsberg <krh@redhat.com>
14// Copyright (C) 2006 Ed Catmur <ed@catmur.co.uk>
15// Copyright (C) 2007-2008 Carlos Garcia Campos <carlosgc@gnome.org>
16// Copyright (C) 2007 Adrian Johnson <ajohnson@redneon.com>
17// Copyright (C) 2008 Albert Astals Cid <aacid@kde.org>
18// Copyright (C) 2010 Brian Ewins <brian.ewins@gmail.com>
19//
20// To see a description of the changes please see the Changelog file that
21// came with your tarball or type make ChangeLog if you are building from git
22//
23//========================================================================
24
25#ifndef TEXTOUTPUTDEV_H
26#define TEXTOUTPUTDEV_H
27
28#ifdef USE_GCC_PRAGMAS
29#pragma interface
30#endif
31
32#include "poppler-config.h"
33#include <stdio.h>
34#include "goo/gtypes.h"
35#include "GfxFont.h"
36#include "GfxState.h"
37#include "OutputDev.h"
38
39class GooString;
40class GooList;
41class Gfx;
42class GfxFont;
43class GfxState;
44class UnicodeMap;
45class Link;
46
47class TextWord;
48class TextPool;
49class TextLine;
50class TextLineFrag;
51class TextBlock;
52class TextFlow;
53class TextWordList;
54class TextPage;
55class TextSelectionVisitor;
56
57//------------------------------------------------------------------------
58
59typedef void (*TextOutputFunc)(void *stream, char *text, int len);
60
61enum SelectionStyle {
62  selectionStyleGlyph,
63  selectionStyleWord,
64  selectionStyleLine
65};
66
67//------------------------------------------------------------------------
68// TextFontInfo
69//------------------------------------------------------------------------
70
71class TextFontInfo {
72public:
73
74  TextFontInfo(GfxState *state);
75  ~TextFontInfo();
76
77  GBool matches(GfxState *state);
78
79#if TEXTOUT_WORD_LIST
80  // Get the font name (which may be NULL).
81  GooString *getFontName() { return fontName; }
82
83  // Get font descriptor flags.
84  GBool isFixedWidth() { return flags & fontFixedWidth; }
85  GBool isSerif() { return flags & fontSerif; }
86  GBool isSymbolic() { return flags & fontSymbolic; }
87  GBool isItalic() { return flags & fontItalic; }
88  GBool isBold() { return flags & fontBold; }
89#endif
90
91private:
92
93  GfxFont *gfxFont;
94#if TEXTOUT_WORD_LIST
95  GooString *fontName;
96  int flags;
97#endif
98
99  friend class TextWord;
100  friend class TextPage;
101  friend class TextSelectionPainter;
102};
103
104//------------------------------------------------------------------------
105// TextWord
106//------------------------------------------------------------------------
107
108class TextWord {
109public:
110
111  // Constructor.
112  TextWord(GfxState *state, int rotA, double x0, double y0,
113           int charPosA, TextFontInfo *fontA, double fontSize);
114
115  // Destructor.
116  ~TextWord();
117
118  // Add a character to the word.
119  void addChar(GfxState *state, double x, double y,
120               double dx, double dy, CharCode c, Unicode u);
121
122  // Merge <word> onto the end of <this>.
123  void merge(TextWord *word);
124
125  // Compares <this> to <word>, returning -1 (<), 0 (=), or +1 (>),
126  // based on a primary-axis comparison, e.g., x ordering if rot=0.
127  int primaryCmp(TextWord *word);
128
129  // Return the distance along the primary axis between <this> and
130  // <word>.
131  double primaryDelta(TextWord *word);
132
133  static int cmpYX(const void *p1, const void *p2);
134
135  void visitSelection(TextSelectionVisitor *visitor,
136                      PDFRectangle *selection,
137                      SelectionStyle style);
138
139  // Get the TextFontInfo object associated with this word.
140  TextFontInfo *getFontInfo() { return font; }
141
142  // Get the next TextWord on the linked list.
143  TextWord *getNext() { return next; }
144
145#if TEXTOUT_WORD_LIST
146  int getLength() { return len; }
147  const Unicode *getChar(int idx) { return &text[idx]; }
148  GooString *getText();
149  GooString *getFontName() { return font->fontName; }
150  void getColor(double *r, double *g, double *b)
151    { *r = colorR; *g = colorG; *b = colorB; }
152  void getBBox(double *xMinA, double *yMinA, double *xMaxA, double *yMaxA)
153    { *xMinA = xMin; *yMinA = yMin; *xMaxA = xMax; *yMaxA = yMax; }
154  void getCharBBox(int charIdx, double *xMinA, double *yMinA,
155                   double *xMaxA, double *yMaxA);
156  double getFontSize() { return fontSize; }
157  int getRotation() { return rot; }
158  int getCharPos() { return charPos; }
159  int getCharLen() { return charLen; }
160  GBool getSpaceAfter() { return spaceAfter; }
161#endif
162  GBool isUnderlined() { return underlined; }
163  Link *getLink() { return link; }
164  double getEdge(int i) { return edge[i]; }
165  double getBaseline () { return base; }
166  GBool hasSpaceAfter  () { return spaceAfter; }
167  TextWord* nextWord () { return next; };
168private:
169
170  int rot;                      // rotation, multiple of 90 degrees
171                                //   (0, 1, 2, or 3)
172  double xMin, xMax;            // bounding box x coordinates
173  double yMin, yMax;            // bounding box y coordinates
174  double base;                  // baseline x or y coordinate
175  Unicode *text;                // the text
176  CharCode *charcode;           // glyph indices
177  double *edge;                 // "near" edge x or y coord of each char
178                                //   (plus one extra entry for the last char)
179  int len;                      // length of text and edge arrays
180  int size;                     // size of text and edge arrays
181  int charPos;                  // character position (within content stream)
182  int charLen;                  // number of content stream characters in
183                                //   this word
184  TextFontInfo *font;           // font information
185  double fontSize;              // font size
186  GBool spaceAfter;             // set if there is a space between this
187                                //   word and the next word on the line
188  TextWord *next;               // next word in line
189
190#if TEXTOUT_WORD_LIST
191  double colorR,                // word color
192         colorG,
193         colorB;
194#endif
195
196  GBool underlined;
197  Link *link;
198
199  friend class TextPool;
200  friend class TextLine;
201  friend class TextBlock;
202  friend class TextFlow;
203  friend class TextWordList;
204  friend class TextPage;
205
206  friend class TextSelectionPainter;
207  friend class TextSelectionDumper;
208};
209
210//------------------------------------------------------------------------
211// TextPool
212//------------------------------------------------------------------------
213
214class TextPool {
215public:
216
217  TextPool();
218  ~TextPool();
219
220  TextWord *getPool(int baseIdx) { return pool[baseIdx - minBaseIdx]; }
221  void setPool(int baseIdx, TextWord *p) { pool[baseIdx - minBaseIdx] = p; }
222
223  int getBaseIdx(double base);
224
225  void addWord(TextWord *word);
226
227private:
228
229  int minBaseIdx;               // min baseline bucket index
230  int maxBaseIdx;               // max baseline bucket index
231  TextWord **pool;              // array of linked lists, one for each
232                                //   baseline value (multiple of 4 pts)
233  TextWord *cursor;             // pointer to last-accessed word
234  int cursorBaseIdx;            // baseline bucket index of last-accessed word
235
236  friend class TextBlock;
237  friend class TextPage;
238};
239
240struct TextFlowData;
241
242//------------------------------------------------------------------------
243// TextLine
244//------------------------------------------------------------------------
245
246class TextLine {
247public:
248
249  TextLine(TextBlock *blkA, int rotA, double baseA);
250  ~TextLine();
251
252  void addWord(TextWord *word);
253
254  // Return the distance along the primary axis between <this> and
255  // <line>.
256  double primaryDelta(TextLine *line);
257
258  // Compares <this> to <line>, returning -1 (<), 0 (=), or +1 (>),
259  // based on a primary-axis comparison, e.g., x ordering if rot=0.
260  int primaryCmp(TextLine *line);
261
262  // Compares <this> to <line>, returning -1 (<), 0 (=), or +1 (>),
263  // based on a secondary-axis comparison of the baselines, e.g., y
264  // ordering if rot=0.
265  int secondaryCmp(TextLine *line);
266
267  int cmpYX(TextLine *line);
268
269  static int cmpXY(const void *p1, const void *p2);
270
271  void coalesce(UnicodeMap *uMap);
272
273  void visitSelection(TextSelectionVisitor *visitor,
274                      PDFRectangle *selection,
275                      SelectionStyle style);
276
277  // Get the head of the linked list of TextWords.
278  TextWord *getWords() { return words; }
279
280  // Get the next TextLine on the linked list.
281  TextLine *getNext() { return next; }
282
283  // Returns true if the last char of the line is a hyphen.
284  GBool isHyphenated() { return hyphenated; }
285
286private:
287
288  TextBlock *blk;               // parent block
289  int rot;                      // text rotation
290  double xMin, xMax;            // bounding box x coordinates
291  double yMin, yMax;            // bounding box y coordinates
292  double base;                  // baseline x or y coordinate
293  TextWord *words;              // words in this line
294  TextWord *lastWord;           // last word in this line
295  Unicode *text;                // Unicode text of the line, including
296                                //   spaces between words
297  double *edge;                 // "near" edge x or y coord of each char
298                                //   (plus one extra entry for the last char)
299  int *col;                     // starting column number of each Unicode char
300  int len;                      // number of Unicode chars
301  int convertedLen;             // total number of converted characters
302  GBool hyphenated;             // set if last char is a hyphen
303  TextLine *next;               // next line in block
304  Unicode *normalized;          // normalized form of Unicode text
305  int normalized_len;           // number of normalized Unicode chars
306  int *normalized_idx;          // indices of normalized chars into Unicode text
307
308  friend class TextLineFrag;
309  friend class TextBlock;
310  friend class TextFlow;
311  friend class TextWordList;
312  friend class TextPage;
313
314  friend class TextSelectionPainter;
315  friend class TextSelectionSizer;
316  friend class TextSelectionDumper;
317};
318
319//------------------------------------------------------------------------
320// TextBlock
321//------------------------------------------------------------------------
322
323class TextBlock {
324public:
325
326  TextBlock(TextPage *pageA, int rotA);
327  ~TextBlock();
328
329  void addWord(TextWord *word);
330
331  void coalesce(UnicodeMap *uMap);
332
333  // Update this block's priMin and priMax values, looking at <blk>.
334  void updatePriMinMax(TextBlock *blk);
335
336  static int cmpXYPrimaryRot(const void *p1, const void *p2);
337
338  static int cmpYXPrimaryRot(const void *p1, const void *p2);
339
340  int primaryCmp(TextBlock *blk);
341
342  double secondaryDelta(TextBlock *blk);
343
344  // Returns true if <this> is below <blk>, relative to the page's
345  // primary rotation.
346  GBool isBelow(TextBlock *blk);
347
348  void visitSelection(TextSelectionVisitor *visitor,
349                      PDFRectangle *selection,
350                      SelectionStyle style);
351
352  // Get the head of the linked list of TextLines.
353  TextLine *getLines() { return lines; }
354
355  // Get the next TextBlock on the linked list.
356  TextBlock *getNext() { return next; }
357
358  void getBBox(double *xMinA, double *yMinA, double *xMaxA, double *yMaxA)
359    { *xMinA = xMin; *yMinA = yMin; *xMaxA = xMax; *yMaxA = yMax; }
360
361  int getLineCount() { return nLines; }
362
363private:
364
365  GBool isBeforeByRule1(TextBlock *blk1);
366  GBool isBeforeByRepeatedRule1(TextBlock *blkList, TextBlock *blk1);
367  GBool isBeforeByRule2(TextBlock *blk1);
368
369  int visitDepthFirst(TextBlock *blkList, int pos1,
370                      TextBlock **sorted, int sortPos,
371                      GBool* visited);
372
373  TextPage *page;               // the parent page
374  int rot;                      // text rotation
375  double xMin, xMax;            // bounding box x coordinates
376  double yMin, yMax;            // bounding box y coordinates
377  double priMin, priMax;        // whitespace bounding box along primary axis
378  double ExMin, ExMax;          // extended bounding box x coordinates
379  double EyMin, EyMax;          // extended bounding box y coordinates
380  int tableId;                  // id of table to which this block belongs
381  GBool tableEnd;               // is this block at end of line of actual table
382
383  TextPool *pool;               // pool of words (used only until lines
384                                //   are built)
385  TextLine *lines;              // linked list of lines
386  TextLine *curLine;            // most recently added line
387  int nLines;                   // number of lines
388  int charCount;                // number of characters in the block
389  int col;                      // starting column
390  int nColumns;                 // number of columns in the block
391
392  TextBlock *next;
393  TextBlock *stackNext;
394
395  friend class TextLine;
396  friend class TextLineFrag;
397  friend class TextFlow;
398  friend class TextWordList;
399  friend class TextPage;
400  friend class TextSelectionPainter;
401  friend class TextSelectionDumper;
402};
403
404//------------------------------------------------------------------------
405// TextFlow
406//------------------------------------------------------------------------
407
408class TextFlow {
409public:
410
411  TextFlow(TextPage *pageA, TextBlock *blk);
412  ~TextFlow();
413
414  // Add a block to the end of this flow.
415  void addBlock(TextBlock *blk);
416
417  // Returns true if <blk> fits below <prevBlk> in the flow, i.e., (1)
418  // it uses a font no larger than the last block added to the flow,
419  // and (2) it fits within the flow's [priMin, priMax] along the
420  // primary axis.
421  GBool blockFits(TextBlock *blk, TextBlock *prevBlk);
422
423  // Get the head of the linked list of TextBlocks.
424  TextBlock *getBlocks() { return blocks; }
425
426  // Get the next TextFlow on the linked list.
427  TextFlow *getNext() { return next; }
428
429private:
430
431  TextPage *page;               // the parent page
432  double xMin, xMax;            // bounding box x coordinates
433  double yMin, yMax;            // bounding box y coordinates
434  double priMin, priMax;        // whitespace bounding box along primary axis
435  TextBlock *blocks;            // blocks in flow
436  TextBlock *lastBlk;           // last block in this flow
437  TextFlow *next;
438
439  friend class TextWordList;
440  friend class TextPage;
441};
442
443#if TEXTOUT_WORD_LIST
444
445//------------------------------------------------------------------------
446// TextWordList
447//------------------------------------------------------------------------
448
449class TextWordList {
450public:
451
452  // Build a flat word list, in content stream order (if
453  // text->rawOrder is true), physical layout order (if <physLayout>
454  // is true and text->rawOrder is false), or reading order (if both
455  // flags are false).
456  TextWordList(TextPage *text, GBool physLayout);
457
458  ~TextWordList();
459
460  // Return the number of words on the list.
461  int getLength();
462
463  // Return the <idx>th word from the list.
464  TextWord *get(int idx);
465
466private:
467
468  GooList *words;                       // [TextWord]
469};
470
471#endif // TEXTOUT_WORD_LIST
472
473//------------------------------------------------------------------------
474// TextPage
475//------------------------------------------------------------------------
476
477class TextPage {
478public:
479
480  // Constructor.
481  TextPage(GBool rawOrderA);
482
483  void incRefCnt();
484  void decRefCnt();
485
486  // Start a new page.
487  void startPage(GfxState *state);
488
489  // End the current page.
490  void endPage();
491
492  // Update the current font.
493  void updateFont(GfxState *state);
494
495  // Begin a new word.
496  void beginWord(GfxState *state, double x0, double y0);
497
498  // Add a character to the current word.
499  void addChar(GfxState *state, double x, double y,
500               double dx, double dy,
501               CharCode c, int nBytes, Unicode *u, int uLen);
502
503  // End the current word, sorting it into the list of words.
504  void endWord();
505
506  // Add a word, sorting it into the list of words.
507  void addWord(TextWord *word);
508
509  // Add a (potential) underline.
510  void addUnderline(double x0, double y0, double x1, double y1);
511
512  // Add a hyperlink.
513  void addLink(int xMin, int yMin, int xMax, int yMax, Link *link);
514
515  // Coalesce strings that look like parts of the same line.
516  void coalesce(GBool physLayout, GBool doHTML);
517
518  // Find a string.  If <startAtTop> is true, starts looking at the
519  // top of the page; else if <startAtLast> is true, starts looking
520  // immediately after the last find result; else starts looking at
521  // <xMin>,<yMin>.  If <stopAtBottom> is true, stops looking at the
522  // bottom of the page; else if <stopAtLast> is true, stops looking
523  // just before the last find result; else stops looking at
524  // <xMax>,<yMax>.
525  GBool findText(Unicode *s, int len,
526                 GBool startAtTop, GBool stopAtBottom,
527                 GBool startAtLast, GBool stopAtLast,
528                 GBool caseSensitive, GBool backward,
529                 double *xMin, double *yMin,
530                 double *xMax, double *yMax);
531
532  // Get the text which is inside the specified rectangle.
533  GooString *getText(double xMin, double yMin,
534                     double xMax, double yMax);
535
536  void visitSelection(TextSelectionVisitor *visitor,
537                      PDFRectangle *selection,
538                      SelectionStyle style);
539
540  void drawSelection(OutputDev *out,
541                     double scale,
542                     int rotation,
543                     PDFRectangle *selection,
544                     SelectionStyle style,
545                     GfxColor *glyph_color, GfxColor *box_color);
546
547  GooList *getSelectionRegion(PDFRectangle *selection,
548                              SelectionStyle style,
549                              double scale);
550
551  GooString *getSelectionText(PDFRectangle *selection,
552                              SelectionStyle style);
553
554  // Find a string by character position and length.  If found, sets
555  // the text bounding rectangle and returns true; otherwise returns
556  // false.
557  GBool findCharRange(int pos, int length,
558                      double *xMin, double *yMin,
559                      double *xMax, double *yMax);
560
561  // Dump contents of page to a file.
562  void dump(void *outputStream, TextOutputFunc outputFunc,
563            GBool physLayout);
564
565  // Get the head of the linked list of TextFlows.
566  TextFlow *getFlows() { return flows; }
567
568#if TEXTOUT_WORD_LIST
569  // Build a flat word list, in content stream order (if
570  // this->rawOrder is true), physical layout order (if <physLayout>
571  // is true and this->rawOrder is false), or reading order (if both
572  // flags are false).
573  TextWordList *makeWordList(GBool physLayout);
574#endif
575
576private:
577 
578  // Destructor.
579  ~TextPage();
580 
581  void clear();
582  void assignColumns(TextLineFrag *frags, int nFrags, int rot);
583  int dumpFragment(Unicode *text, int len, UnicodeMap *uMap, GooString *s);
584
585  GBool rawOrder;               // keep text in content stream order
586
587  double pageWidth, pageHeight; // width and height of current page
588  TextWord *curWord;            // currently active string
589  int charPos;                  // next character position (within content
590                                //   stream)
591  TextFontInfo *curFont;        // current font
592  double curFontSize;           // current font size
593  int nest;                     // current nesting level (for Type 3 fonts)
594  int nTinyChars;               // number of "tiny" chars seen so far
595  GBool lastCharOverlap;        // set if the last added char overlapped the
596                                //   previous char
597
598  TextPool *pools[4];           // a "pool" of TextWords for each rotation
599  TextFlow *flows;              // linked list of flows
600  TextBlock **blocks;           // array of blocks, in yx order
601  int nBlocks;                  // number of blocks
602  int primaryRot;               // primary rotation
603  GBool primaryLR;              // primary direction (true means L-to-R,
604                                //   false means R-to-L)
605  TextWord *rawWords;           // list of words, in raw order (only if
606                                //   rawOrder is set)
607  TextWord *rawLastWord;        // last word on rawWords list
608
609  GooList *fonts;                       // all font info objects used on this
610                                //   page [TextFontInfo]
611
612  double lastFindXMin,          // coordinates of the last "find" result
613         lastFindYMin;
614  GBool haveLastFind;
615
616  GooList *underlines;          // [TextUnderline]
617  GooList *links;               // [TextLink]
618
619  int refCnt;
620
621  friend class TextLine;
622  friend class TextLineFrag;
623  friend class TextBlock;
624  friend class TextFlow;
625  friend class TextWordList;
626  friend class TextSelectionPainter;
627  friend class TextSelectionDumper;
628};
629
630//------------------------------------------------------------------------
631// ActualText
632//------------------------------------------------------------------------
633
634class ActualText {
635public:
636  // Create an ActualText
637  ActualText(TextPage *out);
638  ~ActualText();
639
640  void addChar(GfxState *state, double x, double y,
641               double dx, double dy,
642               CharCode c, int nBytes, Unicode *u, int uLen);
643  void beginMC(Dict *properties);
644  void endMC(GfxState *state);
645
646private:
647  TextPage *text;
648  int actualTextBMCLevel;       // > 0 when inside ActualText span. Incremented
649                                // for each nested BMC inside the span.
650  GooString *actualText;        // replacement text for the span
651  GBool newActualTextSpan;      // true at start of span. used to init the extent
652  double actualText_x, actualText_y; // extent of the text inside the span
653  double actualText_dx, actualText_dy;
654};
655 
656
657//------------------------------------------------------------------------
658// TextOutputDev
659//------------------------------------------------------------------------
660
661class TextOutputDev: public OutputDev {
662public:
663
664  // Open a text output file.  If <fileName> is NULL, no file is
665  // written (this is useful, e.g., for searching text).  If
666  // <physLayoutA> is true, the original physical layout of the text
667  // is maintained.  If <rawOrder> is true, the text is kept in
668  // content stream order.
669  TextOutputDev(char *fileName, GBool physLayoutA,
670                GBool rawOrderA, GBool append);
671
672  // Create a TextOutputDev which will write to a generic stream.  If
673  // <physLayoutA> is true, the original physical layout of the text
674  // is maintained.  If <rawOrder> is true, the text is kept in
675  // content stream order.
676  TextOutputDev(TextOutputFunc func, void *stream,
677                GBool physLayoutA, GBool rawOrderA);
678
679  // Destructor.
680  virtual ~TextOutputDev();
681
682  // Check if file was successfully created.
683  virtual GBool isOk() { return ok; }
684
685  //---- get info about output device
686
687  // Does this device use upside-down coordinates?
688  // (Upside-down means (0,0) is the top left corner of the page.)
689  virtual GBool upsideDown() { return gTrue; }
690
691  // Does this device use drawChar() or drawString()?
692  virtual GBool useDrawChar() { return gTrue; }
693
694  // Does this device use beginType3Char/endType3Char?  Otherwise,
695  // text in Type 3 fonts will be drawn with drawChar/drawString.
696  virtual GBool interpretType3Chars() { return gFalse; }
697
698  // Does this device need non-text content?
699  virtual GBool needNonText() { return gFalse; }
700
701  //----- initialization and control
702
703  // Start a page.
704  virtual void startPage(int pageNum, GfxState *state);
705
706  // End a page.
707  virtual void endPage();
708
709  //----- update text state
710  virtual void updateFont(GfxState *state);
711
712  //----- text drawing
713  virtual void beginString(GfxState *state, GooString *s);
714  virtual void endString(GfxState *state);
715  virtual void drawChar(GfxState *state, double x, double y,
716                        double dx, double dy,
717                        double originX, double originY,
718                        CharCode c, int nBytes, Unicode *u, int uLen);
719
720  //----- grouping operators
721  virtual void beginMarkedContent(char *name, Dict *properties);
722  virtual void endMarkedContent(GfxState *state);
723
724  //----- path painting
725  virtual void stroke(GfxState *state);
726  virtual void fill(GfxState *state);
727  virtual void eoFill(GfxState *state);
728
729  //----- link borders
730  virtual void processLink(Link *link, Catalog *catalog);
731
732  //----- special access
733
734  // Find a string.  If <startAtTop> is true, starts looking at the
735  // top of the page; else if <startAtLast> is true, starts looking
736  // immediately after the last find result; else starts looking at
737  // <xMin>,<yMin>.  If <stopAtBottom> is true, stops looking at the
738  // bottom of the page; else if <stopAtLast> is true, stops looking
739  // just before the last find result; else stops looking at
740  // <xMax>,<yMax>.
741  GBool findText(Unicode *s, int len,
742                 GBool startAtTop, GBool stopAtBottom,
743                 GBool startAtLast, GBool stopAtLast,
744                 GBool caseSensitive, GBool backward,
745                 double *xMin, double *yMin,
746                 double *xMax, double *yMax);
747
748  // Get the text which is inside the specified rectangle.
749  GooString *getText(double xMin, double yMin,
750                   double xMax, double yMax);
751
752  // Find a string by character position and length.  If found, sets
753  // the text bounding rectangle and returns true; otherwise returns
754  // false.
755  GBool findCharRange(int pos, int length,
756                      double *xMin, double *yMin,
757                      double *xMax, double *yMax);
758
759  void drawSelection(OutputDev *out, double scale, int rotation,
760                     PDFRectangle *selection,
761                     SelectionStyle style,
762                     GfxColor *glyph_color, GfxColor *box_color);
763
764  GooList *getSelectionRegion(PDFRectangle *selection,
765                              SelectionStyle style,
766                              double scale);
767
768  GooString *getSelectionText(PDFRectangle *selection,
769                              SelectionStyle style);
770
771#if TEXTOUT_WORD_LIST
772  // Build a flat word list, in content stream order (if
773  // this->rawOrder is true), physical layout order (if
774  // this->physLayout is true and this->rawOrder is false), or reading
775  // order (if both flags are false).
776  TextWordList *makeWordList();
777#endif
778
779  // Returns the TextPage object for the last rasterized page,
780  // transferring ownership to the caller.
781  TextPage *takeText();
782
783  // Turn extra processing for HTML conversion on or off.
784  void enableHTMLExtras(GBool doHTMLA) { doHTML = doHTMLA; }
785
786private:
787
788  TextOutputFunc outputFunc;    // output function
789  void *outputStream;           // output stream
790  GBool needClose;              // need to close the output file?
791                                //   (only if outputStream is a FILE*)
792  TextPage *text;               // text for the current page
793  GBool physLayout;             // maintain original physical layout when
794                                //   dumping text
795  GBool rawOrder;               // keep text in content stream order
796  GBool doHTML;                 // extra processing for HTML conversion
797  GBool ok;                     // set up ok?
798
799  ActualText *actualText;
800};
801
802#endif
Note: See TracBrowser for help on using the repository browser.