source: trunk/libdjvu/DjVuDocument.h @ 206

Last change on this file since 206 was 206, checked in by Eugene Romanenko, 14 years ago

DJVU plugin: djvulibre updated to version 3.5.19

File size: 43.8 KB
Line 
1//C-  -*- C++ -*-
2//C- -------------------------------------------------------------------
3//C- DjVuLibre-3.5
4//C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
5//C- Copyright (c) 2001  AT&T
6//C-
7//C- This software is subject to, and may be distributed under, the
8//C- GNU General Public License, either Version 2 of the license,
9//C- or (at your option) any later version. The license should have
10//C- accompanied the software or you may obtain a copy of the license
11//C- from the Free Software Foundation at http://www.fsf.org .
12//C-
13//C- This program is distributed in the hope that it will be useful,
14//C- but WITHOUT ANY WARRANTY; without even the implied warranty of
15//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16//C- GNU General Public License for more details.
17//C-
18//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
19//C- Lizardtech Software.  Lizardtech Software has authorized us to
20//C- replace the original DjVu(r) Reference Library notice by the following
21//C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
22//C-
23//C-  ------------------------------------------------------------------
24//C- | DjVu (r) Reference Library (v. 3.5)
25//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
26//C- | The DjVu Reference Library is protected by U.S. Pat. No.
27//C- | 6,058,214 and patents pending.
28//C- |
29//C- | This software is subject to, and may be distributed under, the
30//C- | GNU General Public License, either Version 2 of the license,
31//C- | or (at your option) any later version. The license should have
32//C- | accompanied the software or you may obtain a copy of the license
33//C- | from the Free Software Foundation at http://www.fsf.org .
34//C- |
35//C- | The computer code originally released by LizardTech under this
36//C- | license and unmodified by other parties is deemed "the LIZARDTECH
37//C- | ORIGINAL CODE."  Subject to any third party intellectual property
38//C- | claims, LizardTech grants recipient a worldwide, royalty-free,
39//C- | non-exclusive license to make, use, sell, or otherwise dispose of
40//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
41//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
42//C- | General Public License.   This grant only confers the right to
43//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
44//C- | the extent such infringement is reasonably necessary to enable
45//C- | recipient to make, have made, practice, sell, or otherwise dispose
46//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
47//C- | any greater extent that may be necessary to utilize further
48//C- | modifications or combinations.
49//C- |
50//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
51//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
52//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
53//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
54//C- +------------------------------------------------------------------
55//
56// $Id: DjVuDocument.h,v 1.11 2007/03/25 20:48:30 leonb Exp $
57// $Name: release_3_5_19 $
58
59#ifndef _DJVUDOCUMENT_H
60#define _DJVUDOCUMENT_H
61#ifdef HAVE_CONFIG_H
62#include "config.h"
63#endif
64#if NEED_GNUG_PRAGMAS
65# pragma interface
66#endif
67
68
69#include "DjVuPort.h"
70
71#ifdef HAVE_NAMESPACES
72namespace DJVU {
73# ifdef NOT_DEFINED // Just to fool emacs c++ mode
74}
75#endif
76#endif
77
78class DjVmDoc;
79class DjVmDir;
80class DjVmDir0;
81class DjVmNav;
82class DjVuImage;
83class DjVuFile;
84class DjVuFileCache;
85class DjVuNavDir;
86class ByteStream;
87
88/** @name DjVuDocument.h
89    Files #"DjVuDocument.h"# and #"DjVuDocument.cpp"# contain implementation
90    of the \Ref{DjVuDocument} class - the ideal tool for opening, decoding
91    and saving DjVu single page and multi page documents.
92
93    @memo DjVu document class.
94    @author Andrei Erofeev <eaf@geocities.com>
95    @version #$Id: DjVuDocument.h,v 1.11 2007/03/25 20:48:30 leonb Exp $#
96*/
97
98//@{
99
100/** #DjVuDocument# provides convenient interface for opening, decoding
101    and saving back DjVu documents in single page and multi page formats.
102
103    {\bf Input formats}
104    It can read multi page DjVu documents in either of the 4 formats: 2
105    obsolete ({\em old bundled} and {\em old indexed}) and two new
106    ({\em new bundled} and {\em new indirect}).
107
108    {\bf Output formats}
109    To encourage users to switch to the new formats, the #DjVuDocument# can
110    save documents back only in the new formats: {\em bundled} and
111    {\em indirect}.
112
113    {\bf Conversion.} Since #DjVuDocument# can open DjVu documents in
114    an obsolete format and save it in any of the two new formats
115    ({\em new bundled} and {\em new indirect}), this class can be used for
116    conversion from obsolete formats to the new ones. Although it can also
117    do conversion between the new two formats, it's not the best way to
118    do it. Please refer to \Ref{DjVmDoc} for details.
119
120    {\bf Decoding.} #DjVuDocument# provides convenient interface for obtaining
121    \Ref{DjVuImage} corresponding to any page of the document. It uses
122    \Ref{DjVuFileCache} to do caching thus avoiding unnecessary multiple decoding of
123    the same page. The real decoding though is accomplished by \Ref{DjVuFile}.
124
125    {\bf Messenging.} Being derived from \Ref{DjVuPort}, #DjVuDocument#
126    takes an active part in exchanging messages (requests and notifications)
127    between different parties involved in decoding. It reports (relays)
128    errors, progress information and even handles some requests for data (when
129    these requests deal with local files).
130
131    Typical usage of #DjVuDocument# class in a threadless command line
132    program would be the following:
133    \begin{verbatim}
134    static const char file_name[]="/tmp/document.djvu";
135    GP<DjVuDocument> doc=DjVuDocument::create_wait(file_name);
136    const int pages=doc->get_pages_num();
137    for(int page=0;page<pages;page++)
138    {
139       GP<DjVuImage> dimg=doc->get_page(page);
140       // Do something
141    };
142    \end{verbatim}
143   
144    {\bf Comments for the code above}
145    \begin{enumerate}
146       \item Since the document is assumed to be stored on the hard drive,
147             we don't have to cope with \Ref{DjVuPort}s and can pass
148             #ZERO# pointer to the \Ref{init}() function. #DjVuDocument#
149             can access local data itself. In the case of a plugin though,
150             one would have to implement his own \Ref{DjVuPort}, which
151             would handle requests for data arising when the document
152             is being decoded.
153       \item In a threaded program instead of calling the \Ref{init}()
154             function one can call \Ref{start_init}() and \Ref{stop_init}()
155             to initiate and interrupt initialization carried out in
156             another thread. This possibility of initializing the document
157             in another thread has been added specially for the plugin
158             because the initialization itself requires data, which is
159             not immediately available in the plugin. Thus, to prevent the
160             main thread from blocking, we perform initialization in a
161             separate thread. To check if the class is completely and
162             successfully initialized, use \Ref{is_init_ok}(). To see if
163             there was an error, use \Ref{is_init_failed}(). To
164             know when initialization is over (whether successfully or not),
165             use \Ref{is_init_complete}(). To wait for this to happen use
166             \Ref{wait_for_complete_init}(). Once again, all these things are
167             not required for single-threaded program.
168
169             Another difference between single-threaded and multi-threaded
170             environments is that in a single-threaded program, the image is
171             fully decoded before it's returned. In a multi-threaded
172             application decoding starts in a separate thread, and the pointer
173             to the \Ref{DjVuImage} being decoded is returned immediately.
174             This has been done to enable progressive redisplay
175             in the DjVu plugin. Use communication mechanism provided by
176             \Ref{DjVuPort} and \Ref{DjVuPortcaster} to learn about progress
177             of decoding.  Or try #dimg->wait_for_complete_decode()# to wait
178             until the decoding ends.
179       \item See Also: \Ref{DjVuFile}, \Ref{DjVuImage}, \Ref{GOS}.
180    \end{enumerate}
181
182    {\bf Initialization}
183    As mentioned above, the #DjVuDocument# can go through several stages
184    of initialization. The functionality is gradually added while it passes
185    one stage after another:
186    \begin{enumerate}
187       \item First of all, immediately after the object is created \Ref{init}()
188             or \Ref{start_init}() functions must be called. {\bf Nothing}
189             will work until this is done. \Ref{init}() function will not
190             return until the initialization is complete. You need to make
191             sure, that enough data is available. {\bf Do not call \Ref{init}()
192             in the plugin}. \Ref{start_init}() will start initialization
193             in another thread. Use \Ref{stop_init}() to interrupt it.
194             Use \Ref{is_init_complete}() to check the initialization progress.
195             Use \Ref{wait_for_complete_init}() to wait for init to finish.
196       \item The first thing the initializing code learns about the document
197             is its type (#BUNDLED#, #INDIRECT#, #OLD_BUNDLED# or #OLD_INDEXED#).
198             As soon as it happens, document flags are changed and
199             #notify_doc_flags_changed()# request is sent through the
200             communication mechanism provided by \Ref{DjVuPortcaster}.
201       \item After the document type becomes known, the initializing code
202             proceeds with learning the document structure. Gradually the
203             flags are updated with values:
204             \begin{itemize}
205                \item #DOC_DIR_KNOWN#: Contents of the document became known.
206                      This is meaningful for #BUNDLED#, #OLD_BUNDLED# and
207                      #INDIRECT# documents only.
208                \item #DOC_NDIR_KNOWN#: Contents of the document navigation
209                      directory became known. This is meaningful for old-style
210                      documents (#OLD_BUNDLED# and #OLD_INDEXED#) only
211                \item #DOC_INIT_OK# or #DOC_INIT_FAILED#:
212                      The initializating code finished.
213             \end{itemize}
214    \end{enumerate} */
215   
216class DjVuDocument : public DjVuPort
217{
218public:
219      /** Flags describing the document initialization state.
220          \begin{itemize}
221             \item #DOC_TYPE_KNOWN#: The type of the document has been learnt.
222             \item #DOC_DIR_KNOWN#: Contents of the document became known.
223                   This is meaningful for #BUNDLED#, #OLD_BUNDLED# and
224                   #INDIRECT# documents only.
225             \item #DOC_NDIR_KNOWN#: Contents of the document navigation
226                   directory became known. This is meaningful for old-style
227                   documents (#OLD_BUNDLED# and #OLD_INDEXED#) only
228             \item #DOC_INIT_OK#: The initialization has completed successfully.
229             \item #DOC_INIT_FAILED#: The initialization failed.
230          \end{itemize} */
231   enum DOC_FLAGS { DOC_TYPE_KNOWN=1, DOC_DIR_KNOWN=2,
232                    DOC_NDIR_KNOWN=4, DOC_INIT_OK=8,
233                    DOC_INIT_FAILED=16 };
234      /** Specifies the format of #DjVuDocument#. There are currently 4 DjVu
235          multipage formats recognized by the library. Two of them are obsolete
236          and should not be used.
237          \begin{enumerate}
238             \item #OLD_BUNDLED# - Obsolete bundled format
239             \item #OLD_INDEXED# - Obsolete multipage format where every page
240                   is stored in a separate file and "includes" (by means
241                   of an #INCL# chunk) the file with the document directory.
242             \item #SINGLE_PAGE# - Single page document. Basically a file
243                   with either #FORM:DJVU# or #FORM:IW44# and no multipage
244                   information. For example, #OLD_INDEXED# documents with
245                   document directory do not qualify even if they contain only
246                   one page.
247             \item #BUNDLED# - Currently supported bundled format
248             \item #INDIRECT# - Currently supported "expanded" format, where
249                   every page and component is stored in a separate file. There
250                   is also a {\em top-level} file with the document directory.
251          \end{enumerate} */
252   enum DOC_TYPE { OLD_BUNDLED=1, OLD_INDEXED, BUNDLED, INDIRECT,
253                   SINGLE_PAGE, UNKNOWN_TYPE };
254   enum THREAD_FLAGS { STARTED=1, FINISHED=2 };
255
256protected:
257      /** Default creator. Please call functions \Ref{init}() or
258          \Ref{start_init}() before you start working with the #DjVuDocument#.
259        */
260   DjVuDocument(void);
261public:
262
263     /// Virtual Destructor
264   virtual ~DjVuDocument(void);
265
266      /** Initializes the #DjVuDocument# object using an existing document.
267          This function should be called once after creating the object.
268          The #url# should point to the real data, and the creator of the
269          document should be ready to return this data to the document
270          if it's not stored locally (in which case #DjVuDocument# can
271          access it itself).
272
273          {\bf Initializing thread}
274          In a single-threaded application, the #start_init()# function performs
275          the complete initialization of the #DjVuDocument# before it returns.
276          In a multi-threaded application, though, it initializes some internal
277          variables, requests data for the document and starts a new
278          {\em initializing} thread, which is responsible for determining the
279          document type and structure and completing the initialization
280          process. This additional complication is justified in the case of
281          the DjVu plugin because performing initialization requires data and
282          in the plugin the data can be supplied by the main thread only.
283          Thus, if the initialization was completed by the main thread, the
284          plugin would run out of data and block.
285
286          {\bf Stages of initialization}
287          Immediately after the #start_init()# function terminates, the
288          #DjVuDocument# object is ready for use. Its functionality will
289          not be complete (until the initializing thread finishes), but
290          the object is still very useful. Such functions as \Ref{get_page}()
291          or \Ref{get_djvu_file}() or \Ref{id_to_url}() may be called
292          before the initializing thread completes. This allows the DjVu
293          plugin start decoding as soon as possible without waiting for
294          all data to arrive.
295
296          To query the current stage of initialization you can use
297          \Ref{get_doc_flags}() function or listen to the
298          #notify_doc_flags_changed()# notifications distributed with the help
299          of \Ref{DjVuPortcaster}. To wait for the initialization to
300          complete use \Ref{wait_for_complete_init}(). To stop initialization
301          call \Ref{stop_init}().
302
303          {\bf Querying data}
304          The query for data is done using the communication mechanism
305          provided by \Ref{DjVuPort} and \Ref{DjVuPortcaster}. If #port#
306          is not #ZERO#, then the request for data will be forwarded to it.
307          If it {\bf is} #ZERO# then #DjVuDocument# will create an internal
308          instance of \Ref{DjVuSimplePort} and will use it to access local
309          files and report errors to #stderr#. In short, if the document
310          file is stored on the local hard disk, and you're OK about reporting
311          errors to #stderr#, you may pass #ZERO# pointer to \Ref{DjVuPort}
312          as #DjVuDocument# can take care of this situation by itself.
313
314          {\bf The URL}
315          Depending on the document type the #url# should point to:
316          \begin{itemize}
317             \item {\bf Old bundled} and {\bf New bundled} formats: to the
318                   document itself.
319             \item {\bf Old indexed} format: to any page of the document.
320             \item {\bf New indirect} format: to the top-level file of the
321                   document. If (like in the {\em old indexed} format) you
322                   point the #url# to a page, the page {\em will} be decoded,
323                   but it will {\em not} be recognized to be part of the
324                   document.
325          \end{itemize}
326
327          @param url The URL pointing to the document. If the document is
328                 in a {\em bundled} format then the URL should point to it.
329                 If the document is in the {\em old indexed} format then
330                 URL may point to any page of this document. For {\em new
331                 indirect} format the URL should point to the top-level
332                 file of the document.
333          @param port If not #ZERO#, all requests and notifications will
334                 be sent to it. Otherwise #DjVuDocument# will create an internal
335                 instance of \Ref{DjVuSimplePort} for these purposes.
336                 It's OK to make it #ZERO# if you're writing a command line
337                 tool, which should work with files on the hard disk only
338                 because #DjVuDocument# can access such files itself.
339          @param cache It's used to cache decoded \Ref{DjVuFile}s and
340                 is actually useful in the plugin only.  */
341   void         start_init(const GURL & url, GP<DjVuPort> port=0,
342                           DjVuFileCache * cache=0);
343
344   /** This creates a DjVuDocument without initializing it. */
345   static GP<DjVuDocument> create_noinit(void) {return new DjVuDocument;}
346
347   /** Create a version of DjVuDocument which has finished initializing. */
348   static GP<DjVuDocument> create_wait(
349     const GURL &url, GP<DjVuPort> xport=0, DjVuFileCache * const xcache=0);
350
351   /** Create a version of DjVuDocument which has begun initializing. */
352   static GP<DjVuDocument> create(
353     const GURL &url, GP<DjVuPort> xport=0, DjVuFileCache * const xcache=0);
354
355   /** Create a version of DjVuDocument which has begun initializing. */
356   static GP<DjVuDocument> create(
357     GP<DataPool> pool, GP<DjVuPort> xport=0, DjVuFileCache * const xcache=0);
358
359   /** Create a version of DjVuDocument which has begun initializing. */
360   static GP<DjVuDocument> create(
361     const GP<ByteStream> &bs, GP<DjVuPort> xport=0,
362     DjVuFileCache * const xcache=0);
363
364      /** Call this function when you don't need the #DjVuDocument# any more.
365          In a multi-threaded environment it will stop initialization
366          thread, if it is currently running. {\bf You will not be able
367          to start the initialization again. Thus, after calling this
368          function the document should not be used any more}. */
369   void         stop_init(void);
370
371      /** Initializes the document.
372
373          Contrary to \Ref{start_init}(), which just starts the initialization
374          thread in a multi-threaded environment, this function does not
375          return until the initialization completes (either successfully or
376          not). Basically, it calls \Ref{start_init}() and then
377          \Ref{wait_for_complete_init}().
378          */
379   void         init(const GURL & url, GP<DjVuPort> port=0,
380                     DjVuFileCache * cache=0);
381
382      /** Returns #TRUE# if the initialization thread finished (does not
383          matter successfully or not). As soon as it happens, the document
384          becomes completely initialized and its every function should work
385          properly. Please refer to the description of \Ref{init}() function
386          and of the #DjVuDocument# class to learn about the initializing
387          stages.
388
389          To wait for the initialization to complete use
390          \Ref{wait_for_complete_init}() function.
391
392          To query the initialization stage use \Ref{get_flags}() function.
393
394          To learn whether initialization was successful or not,
395          use \Ref{is_init_ok}() and \Ref{is_init_failed}().
396
397          {\bf Note:} In a single threaded application the initialization
398          completes before the \Ref{init}() function returns. */
399   bool         is_init_complete(void) const;
400
401      /** Returns #TRUE# is the initialization thread finished successfully.
402
403          See \Ref{is_init_complete}() and \Ref{wait_for_complete_init}()
404          for more details. */
405   bool         is_init_ok(void) const;
406      /** Forces compression with the next save_as function. */
407   void         set_needs_compression(void);
408      /** Returns #TRUE# if there are uncompressed pages in this document. */
409   bool         needs_compression(void) const;
410      /** Returns #TRUE# if this file must be renamed before saving. */
411   bool         needs_rename(void) const;
412      /** Returns #TRUE# if this file must be renamed before saving. */
413   bool         can_compress(void) const;
414
415      /** Returns #TRUE# is the initialization thread failed.
416
417          See \Ref{is_init_complete}() and \Ref{wait_for_complete_init}()
418          for more details. */
419   bool         is_init_failed(void) const;
420
421      /** If the document has already learnt its type, the function will
422          returns it: #DjVuDocument::OLD_BUNDLED# or
423          #DjVuDocument::OLD_INDEXED# or #DjVuDocument::SINGLE_PAGE# or
424          #DjVuDocument:BUNDLED# or #DjVuDocument::INDIRECT#. The first
425          two formats are obsolete. Otherwise (if the type is unknown yet),
426          #UNKNOWN_TYPE# will be returned.
427
428          {\bf Note:} To check the stage of the document initialization
429          use \Ref{get_flags}() or \Ref{is_init_complete}() functions. To
430          wait for the initialization to complete use \Ref{wait_for_complete_init}().
431          For single threaded applications the initialization completes
432          before the \Ref{init}() function returns. */
433   int          get_doc_type(void) const;
434
435      /** Returns the document flags. The flags describe the degree in which
436          the #DjVuDocument# object is initialized. Every time the flags
437          are changed, a #notify_doc_flags_changed()# notification is
438          distributed using the \Ref{DjVuPortcaster} communication
439          mechanism.
440
441          {\bf Note:} To wait for the initialization to complete use
442          \Ref{wait_for_complete_init}(). For single threaded applications
443          the initialization completes before the \Ref{init}() function
444          returns. */
445   long         get_doc_flags(void) const;
446
447      /** Returns #TRUE# if the document is in bundled format (either in
448          #DjVuDocument::OLD_BUNDLED# or #DjVuDocument::BUNDLED# formats). */
449   bool         is_bundled(void) const;
450
451      /// Returns the URL passed to the \Ref{init}() function
452   GURL         get_init_url(void) const;
453
454      /// Returns a listing of id's used by this document.
455   GList<GUTF8String> get_id_list(void);
456
457      /// Fill the id's into a GMap.
458   void map_ids( GMap<GUTF8String,void *> &map);
459
460      /** Returns data corresponding to the URL passed to the \Ref{init}()
461          function.
462
463          {\bf Note:} The pointer returned is guaranteed to be non-#ZERO#
464          only after the #DjVuDocument# learns its type (passes through
465          the first stage of initialization process). Please refer to
466          \Ref{init}() for details. */
467   GP<DataPool> get_init_data_pool(void) const;
468
469      /** @name Accessing pages */
470      //@{
471      /** Returns the number of pages in the document. If there is still
472          insufficient information about the document structure (initialization
473          has not finished yet), #1# will be returned. Please refer to
474          \Ref{init}() for details. */
475   int          get_pages_num(void) const;
476
477      /** Translates the page number to the full URL of the page. This URL
478          is "artificial" for the {\em bundled} formats and is obtained
479          by appending the page name to the document's URL honoring possible
480          #;# and #?# in it. Negative page number has a special meaning for
481          #OLD_INDEXED# documents: it points to the URL, which the
482          #DjVuDocument# has been initialized with. For other formats this
483          is the same as page #0#.
484
485          The function tries it best to map the page number to the URL.
486          Although, if the document structure has not been fully discovered
487          yet, an empty URL will be returned. Use \Ref{wait_for_complete_init}()
488          to wait until the document initialization completes. Refer to
489          \Ref{init}() for details.
490
491          Depending on the document format, the function assumes, that there
492          is enough information to complete the request when:
493          \begin{itemize}
494             \item #OLD_INDEXED#: If #page_num<0#, #DOC_TYPE_KNOWN# flag must
495                   be set. Otherwise #DOC_NDIR_KNOWN# must be set.
496             \item #OLD_BUNDLED#: If #page_num=0#, #DOC_DIR_KNOWN# flag must
497                   be set. Otherwise #DOC_NDIR_KNOWN# flag must be set.
498             \item #INDIRECT# and #BUNDLED#: #DOC_DIR_KNOWN# flag must be set.
499          \end{itemize} */
500   GURL         page_to_url(int page_num) const;
501   /// Tranlate the page number to id...
502   GUTF8String page_to_id(int page_num) const
503   { return url_to_id(page_to_url(page_num)); }
504      /** Translates the page URL back to page number. Returns #-1# if the
505          page is not in the document or the document's structure
506          has not been learnt yet.
507
508          Depending on the document format, the function starts working
509          properly as soon as:
510          \begin{itemize}
511             \item #OLD_INDEXED# and #OLD_BUNDLED# and #SINGLE_PAGE#:
512                   #DOC_NDIR_KNOWN# is set
513             \item #INDIRECT# and #BUNDLED#: #DOC_DIR_KNOWN# is set.
514          \end{itemize} */
515   int          url_to_page(const GURL & url) const;
516   /// Map the specified url to it's id.
517   GUTF8String  url_to_id(const GURL &url) const
518   { return url.fname(); }
519
520      /** Translates the textual ID to the complete URL if possible.
521         
522          Depending on the document format the translation is done in the
523          following way:
524          \begin{itemize}
525             \item For #BUNDLED# and #INDIRECT# documents the function
526                   scans the \Ref{DjVmDir} (the document directory) and
527                   matches the ID against:
528                   \begin{enumerate}
529                      \item File ID from the \Ref{DjVmDir}
530                      \item File name from the \Ref{DjVmDir}
531                      \item File title from the \Ref{DjVmDir}
532                   \end{enumerate}
533                   Then for #BUNDLED# document the URL is obtained by
534                   appending the #name# of the found file to the document's
535                   URL.
536
537                   For #INDIRECT# documents the URL is obtained by
538                   appending the #name# of the found file to the URL of
539                   the directory containing the document.
540             \item For #OLD_BUNDLED# documents the function compares the ID
541                   with internal name of every file inside the bundle and
542                   composes an artificial URL by appending the file name to
543                   the document's URL.
544             \item For #OLD_INDEXED# or #SINGLE_PAGE# documents the function
545                   composes the URL by appending the ID to the URL of the
546                   directory containing the document.
547          \end{itemize}
548
549          If information obtained by the initialization thread is not
550          sufficient yet, the #id_to_url()# may return an empty URL.
551          Depending on the document type, the information is sufficient when
552          \begin{itemize}
553             \item #BUNDLED# and #INDIRECT#: #DOC_DIR_KNOWN# flag is set.
554             \item #OLD_BUNDLED# and #OLD_INDEXED# and #SINGLE_PAGE#:
555                   #DOC_TYPE_KNOWN# flag is set.
556          \end{itemize} */
557   GURL         id_to_url(const GUTF8String &id) const;
558   /// Find out which page this id is...
559   int          id_to_page(const GUTF8String &id) const
560   {  return url_to_page(id_to_url(id)); }
561
562      /** Returns \Ref{GP} pointer to \Ref{DjVuImage} corresponding to page
563          #page_num#. If caching is enabled, and there is a {\em fully decoded}
564          \Ref{DjVuFile} in the cache, the image will be reused and will
565          be returned fully decoded. Otherwise, if multi-threaded behavior
566          is allowed, and #sync# is set to #FALSE#, the decoding will be
567          started in a separate thread, which enables to do progressive
568          redisplay. Thus, in this case the image returned may be partially
569          decoded.
570
571          Negative #page_num# has a special meaning for the {\em old indexed}
572          multipage documents: the #DjVuDocument# will start decoding of the
573          URL with which it has been initialized. For other formats page
574          #-1# is the same as page #0#.
575
576          #DjVuDocument# can also connect the created page to the specified
577          #port# {\em before starting decoding}. This option will allow
578          the future owner of \Ref{DjVuImage} to receive all messages and
579          requests generated during its decoding.
580
581          If this function is called before the document's structure becomes
582          known (the initialization process completes), the \Ref{DjVuFile},
583          which the returned image will be attached to, will be assigned a
584          temporary artificial URL, which will be corrected as soon as enough
585          information becomes available. The trick prevents the main thread
586          from blocking and in some cases helps to start decoding earlier.
587          The URL is corrected and decoding will start as soon as
588          #DjVuDocument# passes some given stages of initialization and
589          \Ref{page_to_url}(), \Ref{id_to_url}() functions start working
590          properly. Please look through their description for details.
591
592          {\bf Note:} To wait for the initialization to complete use
593          \Ref{wait_for_complete_init}(). For single threaded applications
594          the initialization completes before the \Ref{init}() function
595          returns.
596
597          @param page_num Number of the page to be decoded
598          @param sync When set to #TRUE# the function will not return
599                      until the page is completely decoded. Otherwise,
600                      in a multi-threaded program, this function will
601                      start decoding in a new thread and will return
602                      a partially decoded image. Refer to
603                      \Ref{DjVuImage::wait_for_complete_decode}() and
604                      \Ref{DjVuFile::is_decode_ok}().
605          @param port A pointer to \Ref{DjVuPort}, that the created image
606                      will be connected to. */
607   GP<DjVuImage> get_page(int page_num, bool sync=true, DjVuPort * port=0) const;
608   GP<DjVuImage> get_page(int page_num, bool sync=true, DjVuPort * port=0)
609   { return const_cast<const DjVuDocument *>(this)->get_page(page_num,sync,port); }
610
611      /** Returns \Ref{GP} pointer to \Ref{DjVuImage} corresponding to the
612          specified ID. This function behaves exactly as the #get_page()#
613          function above. The only thing worth mentioning here is how the #ID#
614          parameter is treated.
615
616          First of all the function checks, if the ID contains a number.
617          If so, it just calls the #get_page()# function above. If ID is
618          #ZERO# or just empty, page number #-1# is assumed. Otherwise
619          the ID is translated to the URL using \Ref{id_to_url}(). */
620   GP<DjVuImage> get_page(const GUTF8String &id, bool sync=true, DjVuPort * port=0);
621   
622      /** Returns \Ref{DjVuFile} corresponding to the specified page.
623          Normally it translates the page number to the URL using
624          \Ref{page_to_url}() and then creates \Ref{DjVuFile} initializing
625          it with data from the URL.
626
627          The behavior becomes different, though in the case when the
628          document structure is unknown at the moment this function is called.
629          In this situations it invents a temporary URL, creates a
630          \Ref{DjVuFile}, initializes it with this URL and returns
631          immediately. The caller may start decoding the file right away
632          (if necessary). The decoding will block but will automatically
633          continue as soon as enough information is collected about the
634          document. This trick should be quite transparent to the user and
635          helps to prevent the main thread from blocking. The decoding will
636          unblock and this function will stop using this "trick" as soon
637          as #DjVuDocument# passes some given stages of initialization and
638          \Ref{page_to_url}(), \Ref{id_to_url}() functions start working
639          properly.
640
641          If #dont_create# is #FALSE# the function will return the file
642          only if it already exists.
643
644          {\bf Note:} To wait for the initialization to complete use
645          \Ref{wait_for_complete_init}(). For single threaded applications
646          the initialization completes before the \Ref{init}() function
647          returns. */
648   GP<DjVuFile> get_djvu_file(int page_num, bool dont_create=false) const;
649   GP<DjVuFile> get_djvu_file(int page_num, bool dont_create=false)
650   { return const_cast<const DjVuDocument *>(this)->get_djvu_file(page_num,dont_create); }
651
652
653      /** Returns \Ref{DjVuFile} corresponding to the specified ID.
654          This function behaves exactly as the #get_djvu_file()# function
655          above. The only thing worth mentioning here is how the #ID#
656          parameter is treated.
657
658          First off, \Ref{id_to_url}() is called.  If not successfull,
659          the function checks, if the ID contains a number.
660          If so, it just calls the #get_djvu_file()# function above. If ID is
661          #ZERO# or just empty, page number #-1# is assumed.
662
663          If #dont_create# is #FALSE# the function will return the file
664          only if it already exists. */
665   GP<DjVuFile> get_djvu_file(const GUTF8String &id, bool dont_create=false);
666   GP<DjVuFile> get_djvu_file(const GURL &url, bool dont_create=false);
667      /** Returns a \Ref{DataPool} containing one chunk #TH44# with
668          the encoded thumbnail for the specified page. The function
669          first looks for thumbnails enclosed into the document and if
670          it fails to find one, it decodes the required page and creates
671          the thumbnail on the fly (unless #dont_decode# is true).
672
673          {\bf Note:} It may happen that the returned \Ref{DataPool} will
674          not contain all the data you need. In this case you will need
675          to install a trigger into the \Ref{DataPool} to learn when the
676          data actually arrives. */
677   virtual GP<DataPool> get_thumbnail(int page_num, bool dont_decode);
678      /* Will return gamma correction, which was used when creating
679         thumbnail images. If you need other gamma correction, you will
680         need to correct the thumbnails again. */
681   float        get_thumbnails_gamma(void) const;
682      //@}
683
684      /** Waits until the document initialization process finishes.
685          It can finish either successfully or not. Use \Ref{is_init_ok}()
686          and \Ref{is_init_failed}() to learn the result code.
687         
688          As described in \Ref{start_init}(), for multi-threaded applications the
689          initialization is carried out in parallel with the main thread.
690          This function blocks the calling thread until the initializing
691          thread reads enough data, receives information about the document
692          format and exits.  This function returns #true# if the
693          initialization is successful. You can use \Ref{get_flags}() or
694          \Ref{is_init_complete}() to check more precisely the degree of
695          initialization. Use \Ref{stop_init}() to interrupt initialization. */
696   bool            wait_for_complete_init(void);
697
698          /** Wait until we known the number of pages and return. */
699   int wait_get_pages_num(void) const;
700   
701      /// Returns cache being used.
702   DjVuFileCache * get_cache(void) const;
703
704      /** @name Saving document to disk */
705      //@{
706      /** Returns pointer to the \Ref{DjVmDoc} class, which can save the
707          document contents on the hard disk in one of the two new formats:
708          {\em bundled} and {\em indirect}. You may also want to look
709          at \Ref{write}() and \Ref{expand}() if you are interested in
710          how to save the document.
711
712          {\bf Plugin Warning}. This function will read contents of the whole
713          document. Thus, if you call it from the main thread (the thread,
714          which transfers data from Netscape), the plugin will block. */
715   GP<DjVmDoc>          get_djvm_doc(void);
716      /** Saves the document in the {\em new bundled} format. All the data
717          is "bundled" into one file and this file is written into the
718          passed stream.
719
720          If #force_djvm# is #TRUE# then even one page documents will be
721          saved in the #DJVM BUNDLED# format (inside a #FORM:DJVM#);
722
723          {\bf Plugin Warning}. This function will read contents of the whole
724          document. Thus, if you call it from the main thread (the thread,
725          which transfers data from Netscape), the plugin will block. */
726   virtual void write(const GP<ByteStream> &str, bool force_djvm=false);
727     /** Always save as bundled, renaming any files conflicting with the
728         the names in the supplied GMap. */
729   virtual void write(const GP<ByteStream> &str,
730     const GMap<GUTF8String,void *> &reserved);
731      /** Saves the document in the {\em new indirect} format when every
732          page and component are stored in separate files. This format
733          is ideal for web publishing because it allows direct access to
734          any page and component. In addition to it, a top-level file
735          containing the list of all components will be created. To view
736          the document later in the plugin or in the viewer one should
737          load the top-level file.
738
739          {\bf Plugin Warning}. This function will read contents of the whole
740          document. Thus, if you call it from the main thread (the thread,
741          which transfers data from Netscape), the plugin will block.
742         
743          @param codebase - Name of the directory which the document should
744                 be expanded into.
745          @param idx_name - Name of the top-level file containing the document
746                 directory (basically, list of all files composing the document).
747      */
748   void                 expand(const GURL &codebase, const GUTF8String &idx_name);
749      /** This function can be used instead of \Ref{write}() and \Ref{expand}().
750          It allows to save the document either in the new #BUNDLED# format
751          or in the new #INDIRECT# format depending on the value of parameter
752          #bundled#.
753
754          Depending on the document's type, the meaning of #where# is:
755          \begin{itemize}
756             \item For #BUNDLED# documents this is the name of the file
757             \item For #INDIRECT# documents this is the name of top-level
758                   index file. All document files will be saved into the
759                   save directory where the index file will resize. */
760   virtual void         save_as(const GURL &where, const bool bundled=0);
761      //@}
762      /** Returns pointer to the internal directory of the document, if it
763          is in one of the new formats: #BUNDLED# or #INDIRECT#.
764          Otherwise (if the format of the input document is obsolete),
765          #ZERO# is returned.
766
767          #ZERO# will also be returned if the initializing thread has not
768          learnt enough information about the document (#DOC_DIR_KNOWN# has
769          not been set yet). Check \Ref{is_init_complete}() and \Ref{init}()
770          for details. */
771   GP<DjVmDir>          get_djvm_dir(void) const;
772      /** Returns pointer to the document bookmarks.
773          This applies to #BUNDLED# and #INDIRECT# documents.
774
775          #ZERO# will also be returned if the initializing thread has not
776          learnt enough information about the document (#DOC_DIR_KNOWN# has
777          not been set yet). Check \Ref{is_init_complete}() and \Ref{init}()
778          for details. */
779   GP<DjVmNav>          get_djvm_nav(void) const;
780      /** Returns pointer to the internal directory of the document, if it
781          is in obsolete #OLD_BUNDLED# format.
782
783          #ZERO# will also be returned if the initializing thread has not
784          learnt enough information about the document (#DOC_DIR_KNOWN# has
785          not been set yet). Check \Ref{is_init_complete}() and \Ref{init}()
786          for details. */
787   GP<DjVmDir0>         get_djvm_dir0(void) const;
788      /** Returns pointer to {\em navigation directory} of the document.
789          The navigation directory is a DjVu file containing only one
790          chunk #NDIR# inside a #FORM:DJVI# with the list of all
791          document pages. */
792   GP<DjVuNavDir>       get_nav_dir(void) const;
793
794   /// Create a complete DjVuXML file.
795   void writeDjVuXML(const GP<ByteStream> &gstr_out,int flags) const;
796
797      /// Returns TRUE if #class_name# is #"DjVuDocument"# or #"DjVuPort"#
798   virtual bool         inherits(const GUTF8String &class_name) const;
799
800      /// Converts the specified id to a URL.
801   virtual GURL         id_to_url(const DjVuPort * source, const GUTF8String &id);
802   virtual GP<DjVuFile> id_to_file(const DjVuPort * source, const GUTF8String &id);
803   virtual GP<DataPool> request_data(const DjVuPort * source, const GURL & url);
804   virtual void         notify_file_flags_changed(const DjVuFile * source,
805                        long set_mask, long clr_mask);
806
807   virtual GList<GURL>  get_url_names(void);
808   virtual void         set_recover_errors(ErrorRecoveryAction=ABORT);
809   virtual void         set_verbose_eof(bool=true);
810
811   static void set_compress_codec(
812     void (*codec)(GP<ByteStream> &, const GURL &where, bool bundled));
813
814   static void set_import_codec(
815     void (*codec)(GP<DataPool> &,const GURL &url,bool &, bool &));
816
817protected:
818   static void (*djvu_import_codec) (
819     GP<DataPool> &pool, const GURL &url,bool &needs_compression, bool &needs_rename );
820   static void (*djvu_compress_codec) (
821     GP<ByteStream> &bs, const GURL &where, bool bundled);
822   virtual GP<DjVuFile> url_to_file(const GURL & url, bool dont_create=false) const;
823   GURL                 init_url;
824   GP<DataPool>         init_data_pool;
825   GP<DjVmDir>          djvm_dir;       // New-style DjVm directory
826   GP<DjVmNav>          djvm_nav;
827   int  doc_type;
828   bool needs_compression_flag;
829   bool can_compress_flag;
830   bool needs_rename_flag;
831
832   
833
834   bool                 has_url_names;
835   GCriticalSection     url_names_lock;
836   GList<GURL>  url_names;
837   ErrorRecoveryAction  recover_errors;
838   bool                 verbose_eof;
839public:
840   class UnnamedFile; // This really should be protected ...
841   class ThumbReq; // This really should be protected ...
842protected:
843   bool                 init_started;
844   GSafeFlags           flags;
845   GSafeFlags           init_thread_flags;
846   DjVuFileCache        * cache;
847   GP<DjVuSimplePort>   simple_port;
848
849   GP<DjVmDir0>         djvm_dir0;      // Old-style DjVm directory
850   GP<DjVuNavDir>       ndir;           // Old-style navigation directory
851   GUTF8String          first_page_name;// For OLD_BUNDLED docs only
852
853      // The following is used in init() and destructor to query NDIR
854      // DO NOT USE FOR ANYTHING ELSE. THE FILE IS ZEROED IMMEDIATELY
855      // AFTER IT'S NO LONGER NEEDED. If you don't zero it, ~DjVuDocument()
856      // will kill it, which is a BAD thing if the file's already in cache.
857   GP<DjVuFile>         ndir_file;
858   
859   GPList<UnnamedFile>  ufiles_list;
860   GCriticalSection     ufiles_lock;
861
862   GPList<ThumbReq>     threqs_list;
863   GCriticalSection     threqs_lock;
864
865   GP<DjVuDocument>     init_life_saver;
866
867   static const float   thumb_gamma;
868
869      // Reads document contents in another thread trying to determine
870      // its type and structure
871   GThread              init_thr;
872   static void          static_init_thread(void *);
873   void                 init_thread(void);
874
875   void                 check() const;
876
877   void                 process_threqs(void);
878   GP<ThumbReq>         add_thumb_req(const GP<ThumbReq> & thumb_req);
879     
880   void                 add_to_cache(const GP<DjVuFile> & f);
881   void                 check_unnamed_files(void);
882   GUTF8String          get_int_prefix(void) const;
883   void                 set_file_aliases(const DjVuFile * file);
884   GURL                 invent_url(const GUTF8String &name) const;
885};
886
887class DjVuDocument::UnnamedFile : public GPEnabled
888{
889public:
890   enum { ID, PAGE_NUM };
891   int          id_type;
892   GUTF8String          id;
893   int          page_num;
894   GURL         url;
895   GP<DjVuFile> file;
896   GP<DataPool> data_pool;
897protected:
898   UnnamedFile(int xid_type, const GUTF8String &xid, int xpage_num, const GURL & xurl,
899                  const GP<DjVuFile> & xfile) :
900      id_type(xid_type), id(xid), page_num(xpage_num), url(xurl), file(xfile) {}
901   friend class DjVuDocument;
902};
903
904class DjVuDocument::ThumbReq : public GPEnabled
905{
906public:
907   int          page_num;
908   GP<DataPool> data_pool;
909
910         // Either of the next two blocks should present
911   GP<DjVuFile> image_file;
912
913   int          thumb_chunk;
914   GP<DjVuFile> thumb_file;
915protected:
916   ThumbReq(int xpage_num, const GP<DataPool> & xdata_pool) :
917      page_num(xpage_num), data_pool(xdata_pool) {}
918   friend class DjVuDocument;
919};
920
921inline void
922DjVuDocument::init(const GURL &url, GP<DjVuPort> port, DjVuFileCache *cache)
923{
924  start_init(url,port,cache);
925  wait_for_complete_init();
926}
927
928inline GP<DjVuDocument>
929DjVuDocument::create(
930  const GURL &url, GP<DjVuPort> xport, DjVuFileCache * const xcache)
931{
932  DjVuDocument *doc=new DjVuDocument;
933  GP<DjVuDocument> retval=doc;
934  doc->start_init(url,xport,xcache);
935  return retval;
936}
937
938inline bool
939DjVuDocument::is_init_complete(void) const
940{
941   return (flags & (DOC_INIT_OK | DOC_INIT_FAILED))!=0;
942}
943
944inline bool
945DjVuDocument::is_init_ok(void) const
946{
947   return (flags & DOC_INIT_OK)!=0;
948}
949
950inline void
951DjVuDocument::set_needs_compression(void)
952{
953   needs_compression_flag=true;
954}
955
956inline bool
957DjVuDocument::needs_compression(void) const
958{
959   return needs_compression_flag;
960}
961
962inline bool
963DjVuDocument::needs_rename(void) const
964{
965   return needs_rename_flag;
966}
967
968inline bool
969DjVuDocument::can_compress(void) const
970{
971   return can_compress_flag;
972}
973
974inline bool
975DjVuDocument::is_init_failed(void) const
976{
977   return (flags & DOC_INIT_FAILED)!=0;
978}
979
980inline int
981DjVuDocument::get_doc_type(void) const { return doc_type; }
982
983inline long
984DjVuDocument::get_doc_flags(void) const { return flags; }
985
986inline bool
987DjVuDocument::is_bundled(void) const
988{
989   return doc_type==BUNDLED || doc_type==OLD_BUNDLED;
990}
991
992inline GURL
993DjVuDocument::get_init_url(void) const { return init_url; }
994
995inline GP<DataPool>
996DjVuDocument::get_init_data_pool(void) const { return init_data_pool; }
997
998inline bool
999DjVuDocument::inherits(const GUTF8String &class_name) const
1000{
1001   return
1002      (GUTF8String("DjVuDocument") == class_name) ||
1003      DjVuPort::inherits(class_name);
1004//      !strcmp("DjVuDocument", class_name) ||
1005//      DjVuPort::inherits(class_name);
1006}
1007
1008inline float
1009DjVuDocument::get_thumbnails_gamma(void) const
1010{
1011   return thumb_gamma;
1012}
1013
1014inline DjVuFileCache *
1015DjVuDocument::get_cache(void) const
1016{
1017   return cache;
1018}
1019
1020inline GP<DjVmDir>
1021DjVuDocument::get_djvm_dir(void) const
1022{
1023   if (doc_type==SINGLE_PAGE)
1024      G_THROW( ERR_MSG("DjVuDocument.no_dir") );
1025   if (doc_type!=BUNDLED && doc_type!=INDIRECT)
1026      G_THROW( ERR_MSG("DjVuDocument.obsolete") );
1027   return djvm_dir;
1028}
1029
1030inline GP<DjVmNav>
1031DjVuDocument::get_djvm_nav(void) const
1032{
1033  if (doc_type==BUNDLED || doc_type==INDIRECT)
1034    return djvm_nav;
1035  return 0;
1036}
1037
1038inline GP<DjVmDir0>
1039DjVuDocument::get_djvm_dir0(void) const
1040{
1041   if (doc_type!=OLD_BUNDLED)
1042      G_THROW( ERR_MSG("DjVuDocument.old_bundle") );
1043   return djvm_dir0;
1044}
1045
1046inline GP<DjVuNavDir>
1047DjVuDocument::get_nav_dir(void) const
1048{
1049   return ndir;
1050}
1051
1052inline void
1053DjVuDocument::set_recover_errors(ErrorRecoveryAction recover)
1054{
1055  recover_errors=recover;
1056}
1057
1058inline void
1059DjVuDocument::set_verbose_eof(bool verbose)
1060{
1061  verbose_eof=verbose;
1062}
1063
1064//@}
1065
1066
1067#ifdef HAVE_NAMESPACES
1068}
1069# ifndef NOT_USING_DJVU_NAMESPACE
1070using namespace DJVU;
1071# endif
1072#endif
1073#endif
Note: See TracBrowser for help on using the repository browser.