source: trunk/libdjvu/DjVuDocument.h @ 15

Last change on this file since 15 was 15, checked in by Eugene Romanenko, 15 years ago

needed libs update

File size: 43.7 KB
Line 
1//C-  -*- C++ -*-
2//C- -------------------------------------------------------------------
3//C- DjVuLibre-3.5
4//C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
5//C- Copyright (c) 2001  AT&T
6//C-
7//C- This software is subject to, and may be distributed under, the
8//C- GNU General Public License, Version 2. The license should have
9//C- accompanied the software or you may obtain a copy of the license
10//C- from the Free Software Foundation at http://www.fsf.org .
11//C-
12//C- This program is distributed in the hope that it will be useful,
13//C- but WITHOUT ANY WARRANTY; without even the implied warranty of
14//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15//C- GNU General Public License for more details.
16//C-
17//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library
18//C- distributed by Lizardtech Software.  On July 19th 2002, Lizardtech
19//C- Software authorized us to replace the original DjVu(r) Reference
20//C- Library notice by the following text (see doc/lizard2002.djvu):
21//C-
22//C-  ------------------------------------------------------------------
23//C- | DjVu (r) Reference Library (v. 3.5)
24//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
25//C- | The DjVu Reference Library is protected by U.S. Pat. No.
26//C- | 6,058,214 and patents pending.
27//C- |
28//C- | This software is subject to, and may be distributed under, the
29//C- | GNU General Public License, Version 2. The license should have
30//C- | accompanied the software or you may obtain a copy of the license
31//C- | from the Free Software Foundation at http://www.fsf.org .
32//C- |
33//C- | The computer code originally released by LizardTech under this
34//C- | license and unmodified by other parties is deemed "the LIZARDTECH
35//C- | ORIGINAL CODE."  Subject to any third party intellectual property
36//C- | claims, LizardTech grants recipient a worldwide, royalty-free,
37//C- | non-exclusive license to make, use, sell, or otherwise dispose of
38//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
39//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
40//C- | General Public License.   This grant only confers the right to
41//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
42//C- | the extent such infringement is reasonably necessary to enable
43//C- | recipient to make, have made, practice, sell, or otherwise dispose
44//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
45//C- | any greater extent that may be necessary to utilize further
46//C- | modifications or combinations.
47//C- |
48//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
49//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
50//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
51//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
52//C- +------------------------------------------------------------------
53//
54// $Id: DjVuDocument.h,v 1.10 2005/05/25 20:24:52 leonb Exp $
55// $Name: release_3_5_16 $
56
57#ifndef _DJVUDOCUMENT_H
58#define _DJVUDOCUMENT_H
59#ifdef HAVE_CONFIG_H
60#include "config.h"
61#endif
62#if NEED_GNUG_PRAGMAS
63# pragma interface
64#endif
65
66
67#include "DjVuPort.h"
68
69#ifdef HAVE_NAMESPACES
70namespace DJVU {
71# ifdef NOT_DEFINED // Just to fool emacs c++ mode
72}
73#endif
74#endif
75
76class DjVmDoc;
77class DjVmDir;
78class DjVmDir0;
79class DjVmNav;
80class DjVuImage;
81class DjVuFile;
82class DjVuFileCache;
83class DjVuNavDir;
84class ByteStream;
85
86/** @name DjVuDocument.h
87    Files #"DjVuDocument.h"# and #"DjVuDocument.cpp"# contain implementation
88    of the \Ref{DjVuDocument} class - the ideal tool for opening, decoding
89    and saving DjVu single page and multi page documents.
90
91    @memo DjVu document class.
92    @author Andrei Erofeev <eaf@geocities.com>
93    @version #$Id: DjVuDocument.h,v 1.10 2005/05/25 20:24:52 leonb Exp $#
94*/
95
96//@{
97
98/** #DjVuDocument# provides convenient interface for opening, decoding
99    and saving back DjVu documents in single page and multi page formats.
100
101    {\bf Input formats}
102    It can read multi page DjVu documents in either of the 4 formats: 2
103    obsolete ({\em old bundled} and {\em old indexed}) and two new
104    ({\em new bundled} and {\em new indirect}).
105
106    {\bf Output formats}
107    To encourage users to switch to the new formats, the #DjVuDocument# can
108    save documents back only in the new formats: {\em bundled} and
109    {\em indirect}.
110
111    {\bf Conversion.} Since #DjVuDocument# can open DjVu documents in
112    an obsolete format and save it in any of the two new formats
113    ({\em new bundled} and {\em new indirect}), this class can be used for
114    conversion from obsolete formats to the new ones. Although it can also
115    do conversion between the new two formats, it's not the best way to
116    do it. Please refer to \Ref{DjVmDoc} for details.
117
118    {\bf Decoding.} #DjVuDocument# provides convenient interface for obtaining
119    \Ref{DjVuImage} corresponding to any page of the document. It uses
120    \Ref{DjVuFileCache} to do caching thus avoiding unnecessary multiple decoding of
121    the same page. The real decoding though is accomplished by \Ref{DjVuFile}.
122
123    {\bf Messenging.} Being derived from \Ref{DjVuPort}, #DjVuDocument#
124    takes an active part in exchanging messages (requests and notifications)
125    between different parties involved in decoding. It reports (relays)
126    errors, progress information and even handles some requests for data (when
127    these requests deal with local files).
128
129    Typical usage of #DjVuDocument# class in a threadless command line
130    program would be the following:
131    \begin{verbatim}
132    static const char file_name[]="/tmp/document.djvu";
133    GP<DjVuDocument> doc=DjVuDocument::create_wait(file_name);
134    const int pages=doc->get_pages_num();
135    for(int page=0;page<pages;page++)
136    {
137       GP<DjVuImage> dimg=doc->get_page(page);
138       // Do something
139    };
140    \end{verbatim}
141   
142    {\bf Comments for the code above}
143    \begin{enumerate}
144       \item Since the document is assumed to be stored on the hard drive,
145             we don't have to cope with \Ref{DjVuPort}s and can pass
146             #ZERO# pointer to the \Ref{init}() function. #DjVuDocument#
147             can access local data itself. In the case of a plugin though,
148             one would have to implement his own \Ref{DjVuPort}, which
149             would handle requests for data arising when the document
150             is being decoded.
151       \item In a threaded program instead of calling the \Ref{init}()
152             function one can call \Ref{start_init}() and \Ref{stop_init}()
153             to initiate and interrupt initialization carried out in
154             another thread. This possibility of initializing the document
155             in another thread has been added specially for the plugin
156             because the initialization itself requires data, which is
157             not immediately available in the plugin. Thus, to prevent the
158             main thread from blocking, we perform initialization in a
159             separate thread. To check if the class is completely and
160             successfully initialized, use \Ref{is_init_ok}(). To see if
161             there was an error, use \Ref{is_init_failed}(). To
162             know when initialization is over (whether successfully or not),
163             use \Ref{is_init_complete}(). To wait for this to happen use
164             \Ref{wait_for_complete_init}(). Once again, all these things are
165             not required for single-threaded program.
166
167             Another difference between single-threaded and multi-threaded
168             environments is that in a single-threaded program, the image is
169             fully decoded before it's returned. In a multi-threaded
170             application decoding starts in a separate thread, and the pointer
171             to the \Ref{DjVuImage} being decoded is returned immediately.
172             This has been done to enable progressive redisplay
173             in the DjVu plugin. Use communication mechanism provided by
174             \Ref{DjVuPort} and \Ref{DjVuPortcaster} to learn about progress
175             of decoding.  Or try #dimg->wait_for_complete_decode()# to wait
176             until the decoding ends.
177       \item See Also: \Ref{DjVuFile}, \Ref{DjVuImage}, \Ref{GOS}.
178    \end{enumerate}
179
180    {\bf Initialization}
181    As mentioned above, the #DjVuDocument# can go through several stages
182    of initialization. The functionality is gradually added while it passes
183    one stage after another:
184    \begin{enumerate}
185       \item First of all, immediately after the object is created \Ref{init}()
186             or \Ref{start_init}() functions must be called. {\bf Nothing}
187             will work until this is done. \Ref{init}() function will not
188             return until the initialization is complete. You need to make
189             sure, that enough data is available. {\bf Do not call \Ref{init}()
190             in the plugin}. \Ref{start_init}() will start initialization
191             in another thread. Use \Ref{stop_init}() to interrupt it.
192             Use \Ref{is_init_complete}() to check the initialization progress.
193             Use \Ref{wait_for_complete_init}() to wait for init to finish.
194       \item The first thing the initializing code learns about the document
195             is its type (#BUNDLED#, #INDIRECT#, #OLD_BUNDLED# or #OLD_INDEXED#).
196             As soon as it happens, document flags are changed and
197             #notify_doc_flags_changed()# request is sent through the
198             communication mechanism provided by \Ref{DjVuPortcaster}.
199       \item After the document type becomes known, the initializing code
200             proceeds with learning the document structure. Gradually the
201             flags are updated with values:
202             \begin{itemize}
203                \item #DOC_DIR_KNOWN#: Contents of the document became known.
204                      This is meaningful for #BUNDLED#, #OLD_BUNDLED# and
205                      #INDIRECT# documents only.
206                \item #DOC_NDIR_KNOWN#: Contents of the document navigation
207                      directory became known. This is meaningful for old-style
208                      documents (#OLD_BUNDLED# and #OLD_INDEXED#) only
209                \item #DOC_INIT_OK# or #DOC_INIT_FAILED#:
210                      The initializating code finished.
211             \end{itemize}
212    \end{enumerate} */
213   
214class DjVuDocument : public DjVuPort
215{
216public:
217      /** Flags describing the document initialization state.
218          \begin{itemize}
219             \item #DOC_TYPE_KNOWN#: The type of the document has been learnt.
220             \item #DOC_DIR_KNOWN#: Contents of the document became known.
221                   This is meaningful for #BUNDLED#, #OLD_BUNDLED# and
222                   #INDIRECT# documents only.
223             \item #DOC_NDIR_KNOWN#: Contents of the document navigation
224                   directory became known. This is meaningful for old-style
225                   documents (#OLD_BUNDLED# and #OLD_INDEXED#) only
226             \item #DOC_INIT_OK#: The initialization has completed successfully.
227             \item #DOC_INIT_FAILED#: The initialization failed.
228          \end{itemize} */
229   enum DOC_FLAGS { DOC_TYPE_KNOWN=1, DOC_DIR_KNOWN=2,
230                    DOC_NDIR_KNOWN=4, DOC_INIT_OK=8,
231                    DOC_INIT_FAILED=16 };
232      /** Specifies the format of #DjVuDocument#. There are currently 4 DjVu
233          multipage formats recognized by the library. Two of them are obsolete
234          and should not be used.
235          \begin{enumerate}
236             \item #OLD_BUNDLED# - Obsolete bundled format
237             \item #OLD_INDEXED# - Obsolete multipage format where every page
238                   is stored in a separate file and "includes" (by means
239                   of an #INCL# chunk) the file with the document directory.
240             \item #SINGLE_PAGE# - Single page document. Basically a file
241                   with either #FORM:DJVU# or #FORM:IW44# and no multipage
242                   information. For example, #OLD_INDEXED# documents with
243                   document directory do not qualify even if they contain only
244                   one page.
245             \item #BUNDLED# - Currently supported bundled format
246             \item #INDIRECT# - Currently supported "expanded" format, where
247                   every page and component is stored in a separate file. There
248                   is also a {\em top-level} file with the document directory.
249          \end{enumerate} */
250   enum DOC_TYPE { OLD_BUNDLED=1, OLD_INDEXED, BUNDLED, INDIRECT,
251                   SINGLE_PAGE, UNKNOWN_TYPE };
252   enum THREAD_FLAGS { STARTED=1, FINISHED=2 };
253
254protected:
255      /** Default creator. Please call functions \Ref{init}() or
256          \Ref{start_init}() before you start working with the #DjVuDocument#.
257        */
258   DjVuDocument(void);
259public:
260
261     /// Virtual Destructor
262   virtual ~DjVuDocument(void);
263
264      /** Initializes the #DjVuDocument# object using an existing document.
265          This function should be called once after creating the object.
266          The #url# should point to the real data, and the creator of the
267          document should be ready to return this data to the document
268          if it's not stored locally (in which case #DjVuDocument# can
269          access it itself).
270
271          {\bf Initializing thread}
272          In a single-threaded application, the #start_init()# function performs
273          the complete initialization of the #DjVuDocument# before it returns.
274          In a multi-threaded application, though, it initializes some internal
275          variables, requests data for the document and starts a new
276          {\em initializing} thread, which is responsible for determining the
277          document type and structure and completing the initialization
278          process. This additional complication is justified in the case of
279          the DjVu plugin because performing initialization requires data and
280          in the plugin the data can be supplied by the main thread only.
281          Thus, if the initialization was completed by the main thread, the
282          plugin would run out of data and block.
283
284          {\bf Stages of initialization}
285          Immediately after the #start_init()# function terminates, the
286          #DjVuDocument# object is ready for use. Its functionality will
287          not be complete (until the initializing thread finishes), but
288          the object is still very useful. Such functions as \Ref{get_page}()
289          or \Ref{get_djvu_file}() or \Ref{id_to_url}() may be called
290          before the initializing thread completes. This allows the DjVu
291          plugin start decoding as soon as possible without waiting for
292          all data to arrive.
293
294          To query the current stage of initialization you can use
295          \Ref{get_doc_flags}() function or listen to the
296          #notify_doc_flags_changed()# notifications distributed with the help
297          of \Ref{DjVuPortcaster}. To wait for the initialization to
298          complete use \Ref{wait_for_complete_init}(). To stop initialization
299          call \Ref{stop_init}().
300
301          {\bf Querying data}
302          The query for data is done using the communication mechanism
303          provided by \Ref{DjVuPort} and \Ref{DjVuPortcaster}. If #port#
304          is not #ZERO#, then the request for data will be forwarded to it.
305          If it {\bf is} #ZERO# then #DjVuDocument# will create an internal
306          instance of \Ref{DjVuSimplePort} and will use it to access local
307          files and report errors to #stderr#. In short, if the document
308          file is stored on the local hard disk, and you're OK about reporting
309          errors to #stderr#, you may pass #ZERO# pointer to \Ref{DjVuPort}
310          as #DjVuDocument# can take care of this situation by itself.
311
312          {\bf The URL}
313          Depending on the document type the #url# should point to:
314          \begin{itemize}
315             \item {\bf Old bundled} and {\bf New bundled} formats: to the
316                   document itself.
317             \item {\bf Old indexed} format: to any page of the document.
318             \item {\bf New indirect} format: to the top-level file of the
319                   document. If (like in the {\em old indexed} format) you
320                   point the #url# to a page, the page {\em will} be decoded,
321                   but it will {\em not} be recognized to be part of the
322                   document.
323          \end{itemize}
324
325          @param url The URL pointing to the document. If the document is
326                 in a {\em bundled} format then the URL should point to it.
327                 If the document is in the {\em old indexed} format then
328                 URL may point to any page of this document. For {\em new
329                 indirect} format the URL should point to the top-level
330                 file of the document.
331          @param port If not #ZERO#, all requests and notifications will
332                 be sent to it. Otherwise #DjVuDocument# will create an internal
333                 instance of \Ref{DjVuSimplePort} for these purposes.
334                 It's OK to make it #ZERO# if you're writing a command line
335                 tool, which should work with files on the hard disk only
336                 because #DjVuDocument# can access such files itself.
337          @param cache It's used to cache decoded \Ref{DjVuFile}s and
338                 is actually useful in the plugin only.  */
339   void         start_init(const GURL & url, GP<DjVuPort> port=0,
340                           DjVuFileCache * cache=0);
341
342   /** This creates a DjVuDocument without initializing it. */
343   static GP<DjVuDocument> create_noinit(void) {return new DjVuDocument;}
344
345   /** Create a version of DjVuDocument which has finished initializing. */
346   static GP<DjVuDocument> create_wait(
347     const GURL &url, GP<DjVuPort> xport=0, DjVuFileCache * const xcache=0);
348
349   /** Create a version of DjVuDocument which has begun initializing. */
350   static GP<DjVuDocument> create(
351     const GURL &url, GP<DjVuPort> xport=0, DjVuFileCache * const xcache=0);
352
353   /** Create a version of DjVuDocument which has begun initializing. */
354   static GP<DjVuDocument> create(
355     GP<DataPool> pool, GP<DjVuPort> xport=0, DjVuFileCache * const xcache=0);
356
357   /** Create a version of DjVuDocument which has begun initializing. */
358   static GP<DjVuDocument> create(
359     const GP<ByteStream> &bs, GP<DjVuPort> xport=0,
360     DjVuFileCache * const xcache=0);
361
362      /** Call this function when you don't need the #DjVuDocument# any more.
363          In a multi-threaded environment it will stop initialization
364          thread, if it is currently running. {\bf You will not be able
365          to start the initialization again. Thus, after calling this
366          function the document should not be used any more}. */
367   void         stop_init(void);
368
369      /** Initializes the document.
370
371          Contrary to \Ref{start_init}(), which just starts the initialization
372          thread in a multi-threaded environment, this function does not
373          return until the initialization completes (either successfully or
374          not). Basically, it calls \Ref{start_init}() and then
375          \Ref{wait_for_complete_init}().
376          */
377   void         init(const GURL & url, GP<DjVuPort> port=0,
378                     DjVuFileCache * cache=0);
379
380      /** Returns #TRUE# if the initialization thread finished (does not
381          matter successfully or not). As soon as it happens, the document
382          becomes completely initialized and its every function should work
383          properly. Please refer to the description of \Ref{init}() function
384          and of the #DjVuDocument# class to learn about the initializing
385          stages.
386
387          To wait for the initialization to complete use
388          \Ref{wait_for_complete_init}() function.
389
390          To query the initialization stage use \Ref{get_flags}() function.
391
392          To learn whether initialization was successful or not,
393          use \Ref{is_init_ok}() and \Ref{is_init_failed}().
394
395          {\bf Note:} In a single threaded application the initialization
396          completes before the \Ref{init}() function returns. */
397   bool         is_init_complete(void) const;
398
399      /** Returns #TRUE# is the initialization thread finished successfully.
400
401          See \Ref{is_init_complete}() and \Ref{wait_for_complete_init}()
402          for more details. */
403   bool         is_init_ok(void) const;
404      /** Forces compression with the next save_as function. */
405   void         set_needs_compression(void);
406      /** Returns #TRUE# if there are uncompressed pages in this document. */
407   bool         needs_compression(void) const;
408      /** Returns #TRUE# if this file must be renamed before saving. */
409   bool         needs_rename(void) const;
410      /** Returns #TRUE# if this file must be renamed before saving. */
411   bool         can_compress(void) const;
412
413      /** Returns #TRUE# is the initialization thread failed.
414
415          See \Ref{is_init_complete}() and \Ref{wait_for_complete_init}()
416          for more details. */
417   bool         is_init_failed(void) const;
418
419      /** If the document has already learnt its type, the function will
420          returns it: #DjVuDocument::OLD_BUNDLED# or
421          #DjVuDocument::OLD_INDEXED# or #DjVuDocument::SINGLE_PAGE# or
422          #DjVuDocument:BUNDLED# or #DjVuDocument::INDIRECT#. The first
423          two formats are obsolete. Otherwise (if the type is unknown yet),
424          #UNKNOWN_TYPE# will be returned.
425
426          {\bf Note:} To check the stage of the document initialization
427          use \Ref{get_flags}() or \Ref{is_init_complete}() functions. To
428          wait for the initialization to complete use \Ref{wait_for_complete_init}().
429          For single threaded applications the initialization completes
430          before the \Ref{init}() function returns. */
431   int          get_doc_type(void) const;
432
433      /** Returns the document flags. The flags describe the degree in which
434          the #DjVuDocument# object is initialized. Every time the flags
435          are changed, a #notify_doc_flags_changed()# notification is
436          distributed using the \Ref{DjVuPortcaster} communication
437          mechanism.
438
439          {\bf Note:} To wait for the initialization to complete use
440          \Ref{wait_for_complete_init}(). For single threaded applications
441          the initialization completes before the \Ref{init}() function
442          returns. */
443   long         get_doc_flags(void) const;
444
445      /** Returns #TRUE# if the document is in bundled format (either in
446          #DjVuDocument::OLD_BUNDLED# or #DjVuDocument::BUNDLED# formats). */
447   bool         is_bundled(void) const;
448
449      /// Returns the URL passed to the \Ref{init}() function
450   GURL         get_init_url(void) const;
451
452      /// Returns a listing of id's used by this document.
453   GList<GUTF8String> get_id_list(void);
454
455      /// Fill the id's into a GMap.
456   void map_ids( GMap<GUTF8String,void *> &map);
457
458      /** Returns data corresponding to the URL passed to the \Ref{init}()
459          function.
460
461          {\bf Note:} The pointer returned is guaranteed to be non-#ZERO#
462          only after the #DjVuDocument# learns its type (passes through
463          the first stage of initialization process). Please refer to
464          \Ref{init}() for details. */
465   GP<DataPool> get_init_data_pool(void) const;
466
467      /** @name Accessing pages */
468      //@{
469      /** Returns the number of pages in the document. If there is still
470          insufficient information about the document structure (initialization
471          has not finished yet), #1# will be returned. Please refer to
472          \Ref{init}() for details. */
473   int          get_pages_num(void) const;
474
475      /** Translates the page number to the full URL of the page. This URL
476          is "artificial" for the {\em bundled} formats and is obtained
477          by appending the page name to the document's URL honoring possible
478          #;# and #?# in it. Negative page number has a special meaning for
479          #OLD_INDEXED# documents: it points to the URL, which the
480          #DjVuDocument# has been initialized with. For other formats this
481          is the same as page #0#.
482
483          The function tries it best to map the page number to the URL.
484          Although, if the document structure has not been fully discovered
485          yet, an empty URL will be returned. Use \Ref{wait_for_complete_init}()
486          to wait until the document initialization completes. Refer to
487          \Ref{init}() for details.
488
489          Depending on the document format, the function assumes, that there
490          is enough information to complete the request when:
491          \begin{itemize}
492             \item #OLD_INDEXED#: If #page_num<0#, #DOC_TYPE_KNOWN# flag must
493                   be set. Otherwise #DOC_NDIR_KNOWN# must be set.
494             \item #OLD_BUNDLED#: If #page_num=0#, #DOC_DIR_KNOWN# flag must
495                   be set. Otherwise #DOC_NDIR_KNOWN# flag must be set.
496             \item #INDIRECT# and #BUNDLED#: #DOC_DIR_KNOWN# flag must be set.
497          \end{itemize} */
498   GURL         page_to_url(int page_num) const;
499   /// Tranlate the page number to id...
500   GUTF8String page_to_id(int page_num) const
501   { return url_to_id(page_to_url(page_num)); }
502      /** Translates the page URL back to page number. Returns #-1# if the
503          page is not in the document or the document's structure
504          has not been learnt yet.
505
506          Depending on the document format, the function starts working
507          properly as soon as:
508          \begin{itemize}
509             \item #OLD_INDEXED# and #OLD_BUNDLED# and #SINGLE_PAGE#:
510                   #DOC_NDIR_KNOWN# is set
511             \item #INDIRECT# and #BUNDLED#: #DOC_DIR_KNOWN# is set.
512          \end{itemize} */
513   int          url_to_page(const GURL & url) const;
514   /// Map the specified url to it's id.
515   GUTF8String  url_to_id(const GURL &url) const
516   { return url.fname(); }
517
518      /** Translates the textual ID to the complete URL if possible.
519         
520          Depending on the document format the translation is done in the
521          following way:
522          \begin{itemize}
523             \item For #BUNDLED# and #INDIRECT# documents the function
524                   scans the \Ref{DjVmDir} (the document directory) and
525                   matches the ID against:
526                   \begin{enumerate}
527                      \item File ID from the \Ref{DjVmDir}
528                      \item File name from the \Ref{DjVmDir}
529                      \item File title from the \Ref{DjVmDir}
530                   \end{enumerate}
531                   Then for #BUNDLED# document the URL is obtained by
532                   appending the #name# of the found file to the document's
533                   URL.
534
535                   For #INDIRECT# documents the URL is obtained by
536                   appending the #name# of the found file to the URL of
537                   the directory containing the document.
538             \item For #OLD_BUNDLED# documents the function compares the ID
539                   with internal name of every file inside the bundle and
540                   composes an artificial URL by appending the file name to
541                   the document's URL.
542             \item For #OLD_INDEXED# or #SINGLE_PAGE# documents the function
543                   composes the URL by appending the ID to the URL of the
544                   directory containing the document.
545          \end{itemize}
546
547          If information obtained by the initialization thread is not
548          sufficient yet, the #id_to_url()# may return an empty URL.
549          Depending on the document type, the information is sufficient when
550          \begin{itemize}
551             \item #BUNDLED# and #INDIRECT#: #DOC_DIR_KNOWN# flag is set.
552             \item #OLD_BUNDLED# and #OLD_INDEXED# and #SINGLE_PAGE#:
553                   #DOC_TYPE_KNOWN# flag is set.
554          \end{itemize} */
555   GURL         id_to_url(const GUTF8String &id) const;
556   /// Find out which page this id is...
557   int          id_to_page(const GUTF8String &id) const
558   {  return url_to_page(id_to_url(id)); }
559
560      /** Returns \Ref{GP} pointer to \Ref{DjVuImage} corresponding to page
561          #page_num#. If caching is enabled, and there is a {\em fully decoded}
562          \Ref{DjVuFile} in the cache, the image will be reused and will
563          be returned fully decoded. Otherwise, if multi-threaded behavior
564          is allowed, and #sync# is set to #FALSE#, the decoding will be
565          started in a separate thread, which enables to do progressive
566          redisplay. Thus, in this case the image returned may be partially
567          decoded.
568
569          Negative #page_num# has a special meaning for the {\em old indexed}
570          multipage documents: the #DjVuDocument# will start decoding of the
571          URL with which it has been initialized. For other formats page
572          #-1# is the same as page #0#.
573
574          #DjVuDocument# can also connect the created page to the specified
575          #port# {\em before starting decoding}. This option will allow
576          the future owner of \Ref{DjVuImage} to receive all messages and
577          requests generated during its decoding.
578
579          If this function is called before the document's structure becomes
580          known (the initialization process completes), the \Ref{DjVuFile},
581          which the returned image will be attached to, will be assigned a
582          temporary artificial URL, which will be corrected as soon as enough
583          information becomes available. The trick prevents the main thread
584          from blocking and in some cases helps to start decoding earlier.
585          The URL is corrected and decoding will start as soon as
586          #DjVuDocument# passes some given stages of initialization and
587          \Ref{page_to_url}(), \Ref{id_to_url}() functions start working
588          properly. Please look through their description for details.
589
590          {\bf Note:} To wait for the initialization to complete use
591          \Ref{wait_for_complete_init}(). For single threaded applications
592          the initialization completes before the \Ref{init}() function
593          returns.
594
595          @param page_num Number of the page to be decoded
596          @param sync When set to #TRUE# the function will not return
597                      until the page is completely decoded. Otherwise,
598                      in a multi-threaded program, this function will
599                      start decoding in a new thread and will return
600                      a partially decoded image. Refer to
601                      \Ref{DjVuImage::wait_for_complete_decode}() and
602                      \Ref{DjVuFile::is_decode_ok}().
603          @param port A pointer to \Ref{DjVuPort}, that the created image
604                      will be connected to. */
605   GP<DjVuImage> get_page(int page_num, bool sync=true, DjVuPort * port=0) const;
606   GP<DjVuImage> get_page(int page_num, bool sync=true, DjVuPort * port=0)
607   { return const_cast<const DjVuDocument *>(this)->get_page(page_num,sync,port); }
608
609      /** Returns \Ref{GP} pointer to \Ref{DjVuImage} corresponding to the
610          specified ID. This function behaves exactly as the #get_page()#
611          function above. The only thing worth mentioning here is how the #ID#
612          parameter is treated.
613
614          First of all the function checks, if the ID contains a number.
615          If so, it just calls the #get_page()# function above. If ID is
616          #ZERO# or just empty, page number #-1# is assumed. Otherwise
617          the ID is translated to the URL using \Ref{id_to_url}(). */
618   GP<DjVuImage> get_page(const GUTF8String &id, bool sync=true, DjVuPort * port=0);
619   
620      /** Returns \Ref{DjVuFile} corresponding to the specified page.
621          Normally it translates the page number to the URL using
622          \Ref{page_to_url}() and then creates \Ref{DjVuFile} initializing
623          it with data from the URL.
624
625          The behavior becomes different, though in the case when the
626          document structure is unknown at the moment this function is called.
627          In this situations it invents a temporary URL, creates a
628          \Ref{DjVuFile}, initializes it with this URL and returns
629          immediately. The caller may start decoding the file right away
630          (if necessary). The decoding will block but will automatically
631          continue as soon as enough information is collected about the
632          document. This trick should be quite transparent to the user and
633          helps to prevent the main thread from blocking. The decoding will
634          unblock and this function will stop using this "trick" as soon
635          as #DjVuDocument# passes some given stages of initialization and
636          \Ref{page_to_url}(), \Ref{id_to_url}() functions start working
637          properly.
638
639          If #dont_create# is #FALSE# the function will return the file
640          only if it already exists.
641
642          {\bf Note:} To wait for the initialization to complete use
643          \Ref{wait_for_complete_init}(). For single threaded applications
644          the initialization completes before the \Ref{init}() function
645          returns. */
646   GP<DjVuFile> get_djvu_file(int page_num, bool dont_create=false) const;
647   GP<DjVuFile> get_djvu_file(int page_num, bool dont_create=false)
648   { return const_cast<const DjVuDocument *>(this)->get_djvu_file(page_num,dont_create); }
649
650
651      /** Returns \Ref{DjVuFile} corresponding to the specified ID.
652          This function behaves exactly as the #get_djvu_file()# function
653          above. The only thing worth mentioning here is how the #ID#
654          parameter is treated.
655
656          First off, \Ref{id_to_url}() is called.  If not successfull,
657          the function checks, if the ID contains a number.
658          If so, it just calls the #get_djvu_file()# function above. If ID is
659          #ZERO# or just empty, page number #-1# is assumed.
660
661          If #dont_create# is #FALSE# the function will return the file
662          only if it already exists. */
663   GP<DjVuFile> get_djvu_file(const GUTF8String &id, bool dont_create=false);
664   GP<DjVuFile> get_djvu_file(const GURL &url, bool dont_create=false);
665      /** Returns a \Ref{DataPool} containing one chunk #TH44# with
666          the encoded thumbnail for the specified page. The function
667          first looks for thumbnails enclosed into the document and if
668          it fails to find one, it decodes the required page and creates
669          the thumbnail on the fly (unless #dont_decode# is true).
670
671          {\bf Note:} It may happen that the returned \Ref{DataPool} will
672          not contain all the data you need. In this case you will need
673          to install a trigger into the \Ref{DataPool} to learn when the
674          data actually arrives. */
675   virtual GP<DataPool> get_thumbnail(int page_num, bool dont_decode);
676      /* Will return gamma correction, which was used when creating
677         thumbnail images. If you need other gamma correction, you will
678         need to correct the thumbnails again. */
679   float        get_thumbnails_gamma(void) const;
680      //@}
681
682      /** Waits until the document initialization process finishes.
683          It can finish either successfully or not. Use \Ref{is_init_ok}()
684          and \Ref{is_init_failed}() to learn the result code.
685         
686          As described in \Ref{start_init}(), for multi-threaded applications the
687          initialization is carried out in parallel with the main thread.
688          This function blocks the calling thread until the initializing
689          thread reads enough data, receives information about the document
690          format and exits.  This function returns #true# if the
691          initialization is successful. You can use \Ref{get_flags}() or
692          \Ref{is_init_complete}() to check more precisely the degree of
693          initialization. Use \Ref{stop_init}() to interrupt initialization. */
694   bool            wait_for_complete_init(void);
695
696          /** Wait until we known the number of pages and return. */
697   int wait_get_pages_num(void) const;
698   
699      /// Returns cache being used.
700   DjVuFileCache * get_cache(void) const;
701
702      /** @name Saving document to disk */
703      //@{
704      /** Returns pointer to the \Ref{DjVmDoc} class, which can save the
705          document contents on the hard disk in one of the two new formats:
706          {\em bundled} and {\em indirect}. You may also want to look
707          at \Ref{write}() and \Ref{expand}() if you are interested in
708          how to save the document.
709
710          {\bf Plugin Warning}. This function will read contents of the whole
711          document. Thus, if you call it from the main thread (the thread,
712          which transfers data from Netscape), the plugin will block. */
713   GP<DjVmDoc>          get_djvm_doc(void);
714      /** Saves the document in the {\em new bundled} format. All the data
715          is "bundled" into one file and this file is written into the
716          passed stream.
717
718          If #force_djvm# is #TRUE# then even one page documents will be
719          saved in the #DJVM BUNDLED# format (inside a #FORM:DJVM#);
720
721          {\bf Plugin Warning}. This function will read contents of the whole
722          document. Thus, if you call it from the main thread (the thread,
723          which transfers data from Netscape), the plugin will block. */
724   virtual void write(const GP<ByteStream> &str, bool force_djvm=false);
725     /** Always save as bundled, renaming any files conflicting with the
726         the names in the supplied GMap. */
727   virtual void write(const GP<ByteStream> &str,
728     const GMap<GUTF8String,void *> &reserved);
729      /** Saves the document in the {\em new indirect} format when every
730          page and component are stored in separate files. This format
731          is ideal for web publishing because it allows direct access to
732          any page and component. In addition to it, a top-level file
733          containing the list of all components will be created. To view
734          the document later in the plugin or in the viewer one should
735          load the top-level file.
736
737          {\bf Plugin Warning}. This function will read contents of the whole
738          document. Thus, if you call it from the main thread (the thread,
739          which transfers data from Netscape), the plugin will block.
740         
741          @param codebase - Name of the directory which the document should
742                 be expanded into.
743          @param idx_name - Name of the top-level file containing the document
744                 directory (basically, list of all files composing the document).
745      */
746   void                 expand(const GURL &codebase, const GUTF8String &idx_name);
747      /** This function can be used instead of \Ref{write}() and \Ref{expand}().
748          It allows to save the document either in the new #BUNDLED# format
749          or in the new #INDIRECT# format depending on the value of parameter
750          #bundled#.
751
752          Depending on the document's type, the meaning of #where# is:
753          \begin{itemize}
754             \item For #BUNDLED# documents this is the name of the file
755             \item For #INDIRECT# documents this is the name of top-level
756                   index file. All document files will be saved into the
757                   save directory where the index file will resize. */
758   virtual void         save_as(const GURL &where, const bool bundled=0);
759      //@}
760      /** Returns pointer to the internal directory of the document, if it
761          is in one of the new formats: #BUNDLED# or #INDIRECT#.
762          Otherwise (if the format of the input document is obsolete),
763          #ZERO# is returned.
764
765          #ZERO# will also be returned if the initializing thread has not
766          learnt enough information about the document (#DOC_DIR_KNOWN# has
767          not been set yet). Check \Ref{is_init_complete}() and \Ref{init}()
768          for details. */
769   GP<DjVmDir>          get_djvm_dir(void) const;
770      /** Returns pointer to the document bookmarks.
771          This applies to #BUNDLED# and #INDIRECT# documents.
772
773          #ZERO# will also be returned if the initializing thread has not
774          learnt enough information about the document (#DOC_DIR_KNOWN# has
775          not been set yet). Check \Ref{is_init_complete}() and \Ref{init}()
776          for details. */
777   GP<DjVmNav>          get_djvm_nav(void) const;
778      /** Returns pointer to the internal directory of the document, if it
779          is in obsolete #OLD_BUNDLED# format.
780
781          #ZERO# will also be returned if the initializing thread has not
782          learnt enough information about the document (#DOC_DIR_KNOWN# has
783          not been set yet). Check \Ref{is_init_complete}() and \Ref{init}()
784          for details. */
785   GP<DjVmDir0>         get_djvm_dir0(void) const;
786      /** Returns pointer to {\em navigation directory} of the document.
787          The navigation directory is a DjVu file containing only one
788          chunk #NDIR# inside a #FORM:DJVI# with the list of all
789          document pages. */
790   GP<DjVuNavDir>       get_nav_dir(void) const;
791
792   /// Create a complete DjVuXML file.
793   void writeDjVuXML(const GP<ByteStream> &gstr_out,int flags) const;
794
795      /// Returns TRUE if #class_name# is #"DjVuDocument"# or #"DjVuPort"#
796   virtual bool         inherits(const GUTF8String &class_name) const;
797
798      /// Converts the specified id to a URL.
799   virtual GURL         id_to_url(const DjVuPort * source, const GUTF8String &id);
800   virtual GP<DjVuFile> id_to_file(const DjVuPort * source, const GUTF8String &id);
801   virtual GP<DataPool> request_data(const DjVuPort * source, const GURL & url);
802   virtual void         notify_file_flags_changed(const DjVuFile * source,
803                        long set_mask, long clr_mask);
804
805   virtual GList<GURL>  get_url_names(void);
806   virtual void         set_recover_errors(ErrorRecoveryAction=ABORT);
807   virtual void         set_verbose_eof(bool=true);
808
809   static void set_compress_codec(
810     void (*codec)(GP<ByteStream> &, const GURL &where, bool bundled));
811
812   static void set_import_codec(
813     void (*codec)(GP<DataPool> &,const GURL &url,bool &, bool &));
814
815protected:
816   static void (*djvu_import_codec) (
817     GP<DataPool> &pool, const GURL &url,bool &needs_compression, bool &needs_rename );
818   static void (*djvu_compress_codec) (
819     GP<ByteStream> &bs, const GURL &where, bool bundled);
820   virtual GP<DjVuFile> url_to_file(const GURL & url, bool dont_create=false) const;
821   GURL                 init_url;
822   GP<DataPool>         init_data_pool;
823   GP<DjVmDir>          djvm_dir;       // New-style DjVm directory
824   GP<DjVmNav>          djvm_nav;
825   int  doc_type;
826   bool needs_compression_flag;
827   bool can_compress_flag;
828   bool needs_rename_flag;
829
830   
831
832   bool                 has_url_names;
833   GCriticalSection     url_names_lock;
834   GList<GURL>  url_names;
835   ErrorRecoveryAction  recover_errors;
836   bool                 verbose_eof;
837public:
838   class UnnamedFile; // This really should be protected ...
839   class ThumbReq; // This really should be protected ...
840protected:
841   bool                 init_started;
842   GSafeFlags           flags;
843   GSafeFlags           init_thread_flags;
844   DjVuFileCache        * cache;
845   GP<DjVuSimplePort>   simple_port;
846
847   GP<DjVmDir0>         djvm_dir0;      // Old-style DjVm directory
848   GP<DjVuNavDir>       ndir;           // Old-style navigation directory
849   GUTF8String          first_page_name;// For OLD_BUNDLED docs only
850
851      // The following is used in init() and destructor to query NDIR
852      // DO NOT USE FOR ANYTHING ELSE. THE FILE IS ZEROED IMMEDIATELY
853      // AFTER IT'S NO LONGER NEEDED. If you don't zero it, ~DjVuDocument()
854      // will kill it, which is a BAD thing if the file's already in cache.
855   GP<DjVuFile>         ndir_file;
856   
857   GPList<UnnamedFile>  ufiles_list;
858   GCriticalSection     ufiles_lock;
859
860   GPList<ThumbReq>     threqs_list;
861   GCriticalSection     threqs_lock;
862
863   GP<DjVuDocument>     init_life_saver;
864
865   static const float   thumb_gamma;
866
867      // Reads document contents in another thread trying to determine
868      // its type and structure
869   GThread              init_thr;
870   static void          static_init_thread(void *);
871   void                 init_thread(void);
872
873   void                 check() const;
874
875   void                 process_threqs(void);
876   GP<ThumbReq>         add_thumb_req(const GP<ThumbReq> & thumb_req);
877     
878   void                 add_to_cache(const GP<DjVuFile> & f);
879   void                 check_unnamed_files(void);
880   GUTF8String          get_int_prefix(void) const;
881   void                 set_file_aliases(const DjVuFile * file);
882   GURL                 invent_url(const GUTF8String &name) const;
883};
884
885class DjVuDocument::UnnamedFile : public GPEnabled
886{
887public:
888   enum { ID, PAGE_NUM };
889   int          id_type;
890   GUTF8String          id;
891   int          page_num;
892   GURL         url;
893   GP<DjVuFile> file;
894   GP<DataPool> data_pool;
895protected:
896   UnnamedFile(int xid_type, const GUTF8String &xid, int xpage_num, const GURL & xurl,
897                  const GP<DjVuFile> & xfile) :
898      id_type(xid_type), id(xid), page_num(xpage_num), url(xurl), file(xfile) {}
899   friend class DjVuDocument;
900};
901
902class DjVuDocument::ThumbReq : public GPEnabled
903{
904public:
905   int          page_num;
906   GP<DataPool> data_pool;
907
908         // Either of the next two blocks should present
909   GP<DjVuFile> image_file;
910
911   int          thumb_chunk;
912   GP<DjVuFile> thumb_file;
913protected:
914   ThumbReq(int xpage_num, const GP<DataPool> & xdata_pool) :
915      page_num(xpage_num), data_pool(xdata_pool) {}
916   friend class DjVuDocument;
917};
918
919inline void
920DjVuDocument::init(const GURL &url, GP<DjVuPort> port, DjVuFileCache *cache)
921{
922  start_init(url,port,cache);
923  wait_for_complete_init();
924}
925
926inline GP<DjVuDocument>
927DjVuDocument::create(
928  const GURL &url, GP<DjVuPort> xport, DjVuFileCache * const xcache)
929{
930  DjVuDocument *doc=new DjVuDocument;
931  GP<DjVuDocument> retval=doc;
932  doc->start_init(url,xport,xcache);
933  return retval;
934}
935
936inline bool
937DjVuDocument::is_init_complete(void) const
938{
939   return (flags & (DOC_INIT_OK | DOC_INIT_FAILED))!=0;
940}
941
942inline bool
943DjVuDocument::is_init_ok(void) const
944{
945   return (flags & DOC_INIT_OK)!=0;
946}
947
948inline void
949DjVuDocument::set_needs_compression(void)
950{
951   needs_compression_flag=true;
952}
953
954inline bool
955DjVuDocument::needs_compression(void) const
956{
957   return needs_compression_flag;
958}
959
960inline bool
961DjVuDocument::needs_rename(void) const
962{
963   return needs_rename_flag;
964}
965
966inline bool
967DjVuDocument::can_compress(void) const
968{
969   return can_compress_flag;
970}
971
972inline bool
973DjVuDocument::is_init_failed(void) const
974{
975   return (flags & DOC_INIT_FAILED)!=0;
976}
977
978inline int
979DjVuDocument::get_doc_type(void) const { return doc_type; }
980
981inline long
982DjVuDocument::get_doc_flags(void) const { return flags; }
983
984inline bool
985DjVuDocument::is_bundled(void) const
986{
987   return doc_type==BUNDLED || doc_type==OLD_BUNDLED;
988}
989
990inline GURL
991DjVuDocument::get_init_url(void) const { return init_url; }
992
993inline GP<DataPool>
994DjVuDocument::get_init_data_pool(void) const { return init_data_pool; }
995
996inline bool
997DjVuDocument::inherits(const GUTF8String &class_name) const
998{
999   return
1000      (GUTF8String("DjVuDocument") == class_name) ||
1001      DjVuPort::inherits(class_name);
1002//      !strcmp("DjVuDocument", class_name) ||
1003//      DjVuPort::inherits(class_name);
1004}
1005
1006inline float
1007DjVuDocument::get_thumbnails_gamma(void) const
1008{
1009   return thumb_gamma;
1010}
1011
1012inline DjVuFileCache *
1013DjVuDocument::get_cache(void) const
1014{
1015   return cache;
1016}
1017
1018inline GP<DjVmDir>
1019DjVuDocument::get_djvm_dir(void) const
1020{
1021   if (doc_type==SINGLE_PAGE)
1022      G_THROW( ERR_MSG("DjVuDocument.no_dir") );
1023   if (doc_type!=BUNDLED && doc_type!=INDIRECT)
1024      G_THROW( ERR_MSG("DjVuDocument.obsolete") );
1025   return djvm_dir;
1026}
1027
1028inline GP<DjVmNav>
1029DjVuDocument::get_djvm_nav(void) const
1030{
1031  if (doc_type==BUNDLED || doc_type==INDIRECT)
1032    return djvm_nav;
1033  return 0;
1034}
1035
1036inline GP<DjVmDir0>
1037DjVuDocument::get_djvm_dir0(void) const
1038{
1039   if (doc_type!=OLD_BUNDLED)
1040      G_THROW( ERR_MSG("DjVuDocument.old_bundle") );
1041   return djvm_dir0;
1042}
1043
1044inline GP<DjVuNavDir>
1045DjVuDocument::get_nav_dir(void) const
1046{
1047   return ndir;
1048}
1049
1050inline void
1051DjVuDocument::set_recover_errors(ErrorRecoveryAction recover)
1052{
1053  recover_errors=recover;
1054}
1055
1056inline void
1057DjVuDocument::set_verbose_eof(bool verbose)
1058{
1059  verbose_eof=verbose;
1060}
1061
1062//@}
1063
1064
1065#ifdef HAVE_NAMESPACES
1066}
1067# ifndef NOT_USING_DJVU_NAMESPACE
1068using namespace DJVU;
1069# endif
1070#endif
1071#endif
Note: See TracBrowser for help on using the repository browser.