source: trunk/libdjvu/DjVmDir.h @ 15

Last change on this file since 15 was 15, checked in by Eugene Romanenko, 15 years ago

needed libs update

File size: 19.0 KB
Line 
1//C-  -*- C++ -*-
2//C- -------------------------------------------------------------------
3//C- DjVuLibre-3.5
4//C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
5//C- Copyright (c) 2001  AT&T
6//C-
7//C- This software is subject to, and may be distributed under, the
8//C- GNU General Public License, Version 2. The license should have
9//C- accompanied the software or you may obtain a copy of the license
10//C- from the Free Software Foundation at http://www.fsf.org .
11//C-
12//C- This program is distributed in the hope that it will be useful,
13//C- but WITHOUT ANY WARRANTY; without even the implied warranty of
14//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15//C- GNU General Public License for more details.
16//C-
17//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library
18//C- distributed by Lizardtech Software.  On July 19th 2002, Lizardtech
19//C- Software authorized us to replace the original DjVu(r) Reference
20//C- Library notice by the following text (see doc/lizard2002.djvu):
21//C-
22//C-  ------------------------------------------------------------------
23//C- | DjVu (r) Reference Library (v. 3.5)
24//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
25//C- | The DjVu Reference Library is protected by U.S. Pat. No.
26//C- | 6,058,214 and patents pending.
27//C- |
28//C- | This software is subject to, and may be distributed under, the
29//C- | GNU General Public License, Version 2. The license should have
30//C- | accompanied the software or you may obtain a copy of the license
31//C- | from the Free Software Foundation at http://www.fsf.org .
32//C- |
33//C- | The computer code originally released by LizardTech under this
34//C- | license and unmodified by other parties is deemed "the LIZARDTECH
35//C- | ORIGINAL CODE."  Subject to any third party intellectual property
36//C- | claims, LizardTech grants recipient a worldwide, royalty-free,
37//C- | non-exclusive license to make, use, sell, or otherwise dispose of
38//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
39//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
40//C- | General Public License.   This grant only confers the right to
41//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
42//C- | the extent such infringement is reasonably necessary to enable
43//C- | recipient to make, have made, practice, sell, or otherwise dispose
44//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
45//C- | any greater extent that may be necessary to utilize further
46//C- | modifications or combinations.
47//C- |
48//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
49//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
50//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
51//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
52//C- +------------------------------------------------------------------
53//
54// $Id: DjVmDir.h,v 1.10 2003/11/07 22:08:20 leonb Exp $
55// $Name: release_3_5_16 $
56
57#ifndef _DJVMDIR_H
58#define _DJVMDIR_H
59#ifdef HAVE_CONFIG_H
60#include "config.h"
61#endif
62#if NEED_GNUG_PRAGMAS
63# pragma interface
64#endif
65
66
67/** @name DjVmDir.h
68    Files #"DjVmDir.h"# and #"DjVmDir.cpp"# implement class \Ref{DjVmDir} for
69    representing the directory of a DjVu multipage document.
70
71    {\bf Bundled vs. Indirect format} --- There are currently two multipage
72    DjVu formats supported: {\em bundled} and {\em indirect}.  In the first
73    format all component files composing a given document are packaged (or
74    bundled) into one file, in the second one every page and component is
75    stored in a separate file and there is one more file, which contains the
76    list of all others.
77
78    {\bf Multipage DjVu format} --- Multipage DjVu documents follow the EA
79    IFF85 format (cf. \Ref{IFFByteStream.h}.)  A document is composed of a
80    #"FORM:DJVM"# whose first chunk is a #"DIRM"# chunk containing the {\em
81    document directory}.  This directory lists all component files composing
82    the given document, helps to access every component file and identify the
83    pages of the document.
84    \begin{itemize}
85    \item In a {\em bundled} multipage file, the component files
86         are stored immediately after the #"DIRM"# chunk,
87         within the #"FORM:DJVU"# composite chunk. 
88    \item In an {\em indirect} multipage file, the component files are
89          stored in different files whose URLs are composed using information
90          stored in the #"DIRM"# chunk.
91    \end{itemize}
92    Most of the component files represent pages of a document.  Some files
93    however represent data shared by several pages.  The pages refer to these
94    supporting files by means of an inclusion chunk (#"INCL"# chunks)
95    identifying the supporting file.
96
97    {\bf Document Directory} --- Every directory record describes a component
98    file.  Each component file is identified by a small string named the
99    identifier (ID).  Each component file also contains a file name and a
100    title.  The format of the #"DIRM"# chunk is described in section
101    \Ref{Format of the DIRM chunk.}.
102
103    Theoretically, IDs are used to uniquely identify each component file in
104    #"INCL"# chunks, names are used to compose the the URLs of the component
105    files in an indirect multipage DjVu file, and titles are cosmetic names
106    possibly displayed when viewing a page of a document.  There are however
107    many problems with this scheme, and we {\em strongly suggest}, with the
108    current implementation to always make the file ID, the file name and the
109    file title identical.
110
111    @memo Implements DjVu multipage document directory
112    @author Andrei Erofeev <eaf@geocities.com>
113    @version
114    #$Id: DjVmDir.h,v 1.10 2003/11/07 22:08:20 leonb Exp $# */
115//@{
116
117
118
119#include "GString.h"
120#include "GThreads.h"
121
122#ifdef HAVE_NAMESPACES
123namespace DJVU {
124# ifdef NOT_DEFINED // Just to fool emacs c++ mode
125}
126#endif
127#endif
128
129class ByteStream;
130
131/** Implements DjVu multipage document directory.  There are currently two
132    multipage DjVu formats supported: {\em bundled} and {\em indirect}.  In
133    the first format all component files composing a given document are
134    packaged (or bundled) into one file, in the second one every page and
135    component is stored in a separate file and there is one more file, which
136    contains the list of all others.
137
138    The multipage document directory lists all component files composing the
139    given document, helps to access every file, identify pages and maintain
140    user-specified shortcuts.  Every directory record describes a file
141    composing the document.  Each file is identified by a small string named
142    the identifier (ID).  Each file may also contain a file name and a title.
143
144    The #DjVmDir# class represents a multipage document directory.  Its main
145    purpose is to encode and decode the document directory when writing or
146    reading the #DIRM# chunk.  Normally you don't have to create this class
147    yourself. It's done automatically when \Ref{DjVmDoc} class initializes
148    itself. It may be useful though to be able to access records in the
149    directory because some classes (like \Ref{DjVuDocument} and \Ref{DjVmDoc})
150    return a pointer to #DjVmDir# in some cases. */
151
152class DjVmDir : public GPEnabled
153{
154protected:
155      /** Class \Ref{DjVmDir::File} represents the directory records
156          managed by class \Ref{DjVmDir}. */
157   DjVmDir(void) { } ;
158public:
159   class File;
160
161   static const int version;
162
163      /** Class \Ref{DjVmDir::File} represents the directory records
164          managed by class \Ref{DjVmDir}. */
165   static GP<DjVmDir> create(void) {return new DjVmDir; } ;
166
167      /** Decodes the directory from the specified stream. */
168   void decode(const GP<ByteStream> &stream);
169      /** Encodes the directory into the specified stream. */
170   void encode(const GP<ByteStream> &stream, const bool do_rename=false) const;
171      /** Encodes the directory into the specified stream, explicitely as bundled or indirect. */
172   void encode(const GP<ByteStream> &stream, const bool bundled, const bool do_rename) const;
173      /** Tests if directory defines an {\em indirect} document. */
174   bool is_indirect(void) const;
175      /** Tests if the directory defines a {\em bundled} document. */
176   bool is_bundled(void) const;
177      /** Translates page numbers to file records. */
178   GP<File> page_to_file(int page_num) const;
179      /** Translates file names to file records. */
180   GP<File> name_to_file(const GUTF8String & name) const;
181      /** Translates file IDs to file records. */
182   GP<File> id_to_file(const GUTF8String &id) const;
183      /** Translates file shortcuts to file records. */
184   GP<File> title_to_file(const GUTF8String &title) const;
185      /** Returns position of the file in the directory. */
186   int get_file_pos(const File * f) const;
187      /** Returns position of the given page in the directory. */
188   int get_page_pos(int page_num) const;
189      /** Check for duplicate names, and resolve them. */
190   GPList<File> resolve_duplicates(const bool save_as_bundled);
191      /** Returns a copy of the list of file records. */
192   GPList<File> get_files_list(void) const;
193      /** Returns the number of file records. */
194   int get_files_num(void) const;
195      /** Returns the number of file records representing pages. */
196   int get_pages_num(void) const;
197      /** Returns back pointer to the file with #SHARED_ANNO# flag.
198        Note that there may be only one file with shared annotations
199        in any multipage DjVu document. */
200   GP<File> get_shared_anno_file(void) const;
201      /** Changes the title of the file with ID #id#. */
202   void set_file_title(const GUTF8String &id, const GUTF8String &title);
203      /** Changes the name of the file with ID #id#. */
204   void set_file_name(const GUTF8String &id, const GUTF8String &name);
205      /** Inserts the specified file record at the specified position.
206        Specifying #pos# equal to #-1# means to append.  The actual position
207        inserted is returned. */
208   int insert_file(const GP<File> & file, int pos=-1);
209      /** Removes a file record with ID #id#. */
210   void delete_file(const GUTF8String &id);
211private:
212   GCriticalSection class_lock;
213   GPList<File> files_list;
214   GPArray<File> page2file;
215   GPMap<GUTF8String, File> name2file;
216   GPMap<GUTF8String, File> id2file;
217   GPMap<GUTF8String, File> title2file;
218private: //dummy stuff
219   static void decode(ByteStream *);
220   static void encode(ByteStream *);
221};
222
223class DjVmDir::File : public GPEnabled
224{
225public:
226  // Out of the record: INCLUDE below must be zero and PAGE must be one.
227  // This is to avoid problems with the File constructor, which now takes
228  // 'int file_type' as the last argument instead of 'bool is_page'
229 
230  /** File type. Possible file types are:
231     \begin{description}
232       \item[PAGE] This is a top level page file. It may include other
233         #INCLUDE#d files, which may in turn be shared between
234         different pages.
235       \item[INCLUDE] This file is included into some other file inside
236         this document.
237       \item[THUMBNAILS] This file contains thumbnails for the document
238         pages.
239       \item[SHARED_ANNO] This file contains annotations shared by
240         all the pages. It's supposed to be included into every page
241         for the annotations to take effect. There may be only one
242         file with shared annotations in a document.
243     \end{description} */
244  enum FILE_TYPE { INCLUDE=0, PAGE=1, THUMBNAILS=2, SHARED_ANNO=3 };
245protected:
246  /** Default constructor. */
247  File(void);
248
249public:
250  static GP<File> create(void) { return new File(); }
251  static GP<File> create(const GUTF8String &load_name,
252     const GUTF8String &save_name, const GUTF8String &title,
253     const FILE_TYPE file_type);
254
255  /** Check for filenames that are not valid for the native encoding,
256      and change them. */
257  const GUTF8String &check_save_name(const bool as_bundled);
258
259  /** File name.  The optional file name must be unique and is the name
260      that will be used when the document is saved to an indirect file.
261      If not assigned, the value of #id# will be used for this purpose.
262      By keeping the name in {\em bundled} document we guarantee, that it
263      can be expanded later into {\em indirect} document and files will
264      still have the same names, if the name is legal on a given filesystem.
265    */
266  const GUTF8String &get_save_name(void) const;
267
268  /** File identifier.  The encoder assigns a unique identifier to each file
269      in a multipage document. This is the name used when loading files.
270      Indirection chunks in other files (#"INCL"# chunks) may refer to another
271      file using its identifier. */
272  const GUTF8String &get_load_name(void) const;
273  void set_load_name(const GUTF8String &id);
274
275  /** File title.  The file title is assigned by the user and may be used as
276      a shortcut for viewing a particular page.  Names like #"chapter1"# or
277      #"appendix"# are appropriate. */
278  const GUTF8String &get_title() const;
279  void set_title(const GUTF8String &id);
280
281  /** Reports an ascii string indicating file type. */
282  GUTF8String get_str_type(void) const;
283
284  /** Offset of the file data in a bundled DJVM file.  This number is
285      relevant in the {\em bundled} case only when everything is packed into
286      one single file. */
287  int offset;
288
289  /** Size of the file data in a bundled DJVM file.  This number is
290      relevant in the {\em bundled} case only when everything is
291      packed into one single file. */
292  int size;
293
294  /** Have we checked the saved file name, to see if it is valid on the
295      local disk? */
296  bool valid_name;
297
298  /** Tests if this file represents a page of the document. */
299  bool is_page(void) const 
300  {
301    return (flags & TYPE_MASK)==PAGE;
302  }
303
304  /** Returns #TRUE# if this file is included into some other files of
305      this document. */
306  bool is_include(void) const
307  {
308    return (flags & TYPE_MASK)==INCLUDE;
309  }
310
311  /** Returns #TRUE# if this file contains thumbnails for the document pages. */
312  bool is_thumbnails(void) const
313  {
314    return (flags & TYPE_MASK)==THUMBNAILS;
315  }
316
317  /** Returns the page number of this file. This function returns
318      #-1# if this file does not represent a page of the document. */
319  bool is_shared_anno(void) const
320  { return (flags & TYPE_MASK)==SHARED_ANNO; }
321
322  int get_page_num(void) const 
323  { return page_num; } 
324protected:
325  GUTF8String name;
326  GUTF8String oldname;
327  GUTF8String id;
328  GUTF8String title; 
329  void set_save_name(const GUTF8String &name);
330private:
331      friend class DjVmDir;
332      enum FLAGS_0 { IS_PAGE_0=1, HAS_NAME_0=2, HAS_TITLE_0=4 };
333      enum FLAGS_1 { HAS_NAME=0x80, HAS_TITLE=0x40, TYPE_MASK=0x3f };
334      unsigned char flags;
335      int page_num;
336};
337
338inline const GUTF8String &
339DjVmDir::File::get_load_name(void) const
340{ return id; }
341
342inline const GUTF8String &
343DjVmDir::File::get_title() const
344{ return *(title.length()?&title:&id); }
345
346inline void
347DjVmDir::File::set_title(const GUTF8String &xtitle) { title=xtitle; }
348
349/** @name Format of the DIRM chunk.
350
351    {\bf Variants} --- There are two versions of the #"DIRM"# chunk format.
352    The version number is identified by the seven low bits of the first byte
353    of the chunk.  Version {\bf 0} is obsolete and should never be used.  This
354    section describes version {\bf 1}.  There are two major multipage DjVu
355    formats supported: {\em bundled} and {\em indirect}.  The #"DIRM"# chunk
356    indicates which format is used in the most significant bit of the first
357    byte of the chunk.  The document is bundled when this bit is set.
358    Otherwise the document is indirect.
359
360    {\bf Unencoded data} --- The #"DIRM"# chunk is composed some unencoded
361    data followed by \Ref{bzz} encoded data.  The unencoded data starts with
362    the version byte and a 16 bit integer representing the number of component
363    files.  All integers are encoded with the most significant byte first.
364    \begin{verbatim}
365          BYTE:             Flags/Version:  0x<bundled>0000011
366          INT16:            Number of component files.
367    \end{verbatim}
368    When the document is a bundled document (i.e. the flag #bundled# is set),
369    this header is followed by the offsets of each of the component files within
370    the #"FORM:DJVM"#.  These offsets allow for random component file access.
371    \begin{verbatim}
372          INT32:            Offset of first component file.
373          INT32:            Offset of second component file.
374          ...
375          INT32:            Offset of last component file.
376    \end{verbatim}
377
378    {\bf BZZ encoded data} --- The rest of the chunk is entirely compressed
379    with the BZZ general purpose compressor.  We describe now the data fed
380    into (or retrieved from) the BZZ codec (cf. \Ref{BSByteStream}.)  First
381    come the sizes and the flags associated with each component file.
382    \begin{verbatim}
383          INT24:             Size of the first component file.
384          INT24:             Size of the second component file.
385          ...
386          INT24:             Size of the last component file.
387          BYTE:              Flag byte for the first component file.
388          BYTE:              Flag byte for the second component file.
389          ...
390          BYTE:              Flag byte for the last component file.
391    \end{verbatim}
392    The flag bytes have the following format:
393    \begin{verbatim}
394          0b<hasname><hastitle>000000     for a file included by other files.
395          0b<hasname><hastitle>000001     for a file representing a page.
396          0b<hasname><hastitle>000010     for a file containing thumbnails.
397    \end{verbatim}
398    Flag #hasname# is set when the name of the file is different from the file
399    ID.  Flag #hastitle# is set when the title of the file is different from
400    the file ID.  These flags are used to avoid encoding the same string three
401    times.  Then come a sequence of zero terminated strings.  There are one to
402    three such strings per component file.  The first string contains the ID
403    of the component file.  The second string contains the name of the
404    component file.  It is only present when the flag #hasname# is set. The third
405    one contains the title of the component file. It is only present when the
406    flag #hastitle# is set. The \Ref{bzz} encoding system makes sure that
407    all these strings will be encoded efficiently despite their possible
408    redundancies.
409    \begin{verbatim}
410          ZSTR:     ID of the first component file.
411          ZSTR:     Name of the first component file (only if #hasname# is set.)
412          ZSTR:     Title of the first component file (only if #hastitle# is set.)
413          ...
414          ZSTR:     ID of the last component file.
415          ZSTR:     Name of the last component file (only if #hasname# is set.)
416          ZSTR:     Title of the last component file (only if #hastitle# is set.)
417    \end{verbatim}
418
419    @memo Description of the format of the DIRM chunk.  */
420//@}
421
422
423
424// -------------- IMPLEMENTATION
425
426
427inline bool
428DjVmDir::is_bundled(void) const
429{
430  return ! is_indirect();
431}
432
433inline bool
434DjVmDir::is_indirect(void) const
435{
436  GCriticalSectionLock lock((GCriticalSection *) &class_lock);
437  return ( files_list.size() && files_list[files_list] != 0 &&
438           files_list[files_list]->offset==0 );
439}
440
441
442
443// ----- THE END
444
445#ifdef HAVE_NAMESPACES
446}
447# ifndef NOT_USING_DJVU_NAMESPACE
448using namespace DJVU;
449# endif
450#endif
451#endif
Note: See TracBrowser for help on using the repository browser.