source: trunk/libdjvu/DjVmDoc.cpp @ 15

Last change on this file since 15 was 15, checked in by Eugene Romanenko, 15 years ago

needed libs update

File size: 18.5 KB
Line 
1//C-  -*- C++ -*-
2//C- -------------------------------------------------------------------
3//C- DjVuLibre-3.5
4//C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
5//C- Copyright (c) 2001  AT&T
6//C-
7//C- This software is subject to, and may be distributed under, the
8//C- GNU General Public License, Version 2. The license should have
9//C- accompanied the software or you may obtain a copy of the license
10//C- from the Free Software Foundation at http://www.fsf.org .
11//C-
12//C- This program is distributed in the hope that it will be useful,
13//C- but WITHOUT ANY WARRANTY; without even the implied warranty of
14//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15//C- GNU General Public License for more details.
16//C-
17//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library
18//C- distributed by Lizardtech Software.  On July 19th 2002, Lizardtech
19//C- Software authorized us to replace the original DjVu(r) Reference
20//C- Library notice by the following text (see doc/lizard2002.djvu):
21//C-
22//C-  ------------------------------------------------------------------
23//C- | DjVu (r) Reference Library (v. 3.5)
24//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
25//C- | The DjVu Reference Library is protected by U.S. Pat. No.
26//C- | 6,058,214 and patents pending.
27//C- |
28//C- | This software is subject to, and may be distributed under, the
29//C- | GNU General Public License, Version 2. The license should have
30//C- | accompanied the software or you may obtain a copy of the license
31//C- | from the Free Software Foundation at http://www.fsf.org .
32//C- |
33//C- | The computer code originally released by LizardTech under this
34//C- | license and unmodified by other parties is deemed "the LIZARDTECH
35//C- | ORIGINAL CODE."  Subject to any third party intellectual property
36//C- | claims, LizardTech grants recipient a worldwide, royalty-free,
37//C- | non-exclusive license to make, use, sell, or otherwise dispose of
38//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
39//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
40//C- | General Public License.   This grant only confers the right to
41//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
42//C- | the extent such infringement is reasonably necessary to enable
43//C- | recipient to make, have made, practice, sell, or otherwise dispose
44//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
45//C- | any greater extent that may be necessary to utilize further
46//C- | modifications or combinations.
47//C- |
48//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
49//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
50//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
51//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
52//C- +------------------------------------------------------------------
53//
54// $Id: DjVmDoc.cpp,v 1.10 2005/05/25 20:24:52 leonb Exp $
55// $Name: release_3_5_16 $
56
57#ifdef HAVE_CONFIG_H
58# include "config.h"
59#endif
60#if NEED_GNUG_PRAGMAS
61# pragma implementation
62#endif
63
64#include "DjVmDoc.h"
65#include "DjVmNav.h"
66#include "DataPool.h"
67#include "IFFByteStream.h"
68#include "GOS.h"
69#include "debug.h"
70
71
72#ifdef HAVE_NAMESPACES
73namespace DJVU {
74# ifdef NOT_DEFINED // Just to fool emacs c++ mode
75}
76#endif
77#endif
78
79static const char octets[4]={0x41,0x54,0x26,0x54};
80
81// Save the file to disk, remapping INCL chunks while saving.
82static void
83save_file(
84  IFFByteStream &iff_in, IFFByteStream &iff_out, const DjVmDir &dir,
85  GMap<GUTF8String,GUTF8String> &incl)
86{
87  GUTF8String chkid;
88  if (iff_in.get_chunk(chkid))
89  {
90    iff_out.put_chunk(chkid,true);
91    if(!chkid.cmp("FORM:",5))
92    {
93      for(;iff_in.get_chunk(chkid);iff_in.close_chunk())
94      {
95        iff_out.put_chunk(chkid);
96        if(chkid == "INCL")
97        {
98          GUTF8String incl_str;
99          char buffer[1024];
100          int length;
101          while((length=iff_in.read(buffer, 1024)))
102            incl_str+=GUTF8String(buffer, length);
103          // Eat '\n' in the beginning and at the end
104          while(incl_str.length() && incl_str[0]=='\n')
105          {
106            incl_str=incl_str.substr(1,(unsigned int)(-1));
107          }
108          while(incl_str.length()>0 && incl_str[(int)incl_str.length()-1]=='\n')
109          {
110            incl_str.setat(incl_str.length()-1, 0);
111          }
112          GPosition pos=incl.contains(incl_str);
113          if(pos)
114          { 
115            iff_out.get_bytestream()->writestring(incl[pos]);
116          }else
117          {
118            GP<DjVmDir::File> incl_file=dir.id_to_file(incl_str); 
119            if(incl_file)
120            {
121              DEBUG_MSG("INCL '"<<(const char *)incl_file->get_save_name()<<"'\n");
122              const GUTF8String incl_name=incl_file->get_save_name();
123              incl[incl_str]=incl_name;
124              iff_out.get_bytestream()->writestring(incl_name);
125            }else
126            {
127              DEBUG_MSG("BOGUS INCL '"<<(const char *)incl_str<<"'\n");
128              iff_out.copy(*iff_in.get_bytestream());
129            }
130          }
131        }else
132        {
133          iff_out.copy(*iff_in.get_bytestream());
134        }
135        iff_out.close_chunk();
136      }
137    }else
138    {
139      iff_out.copy(*iff_in.get_bytestream());
140    }
141    iff_out.close_chunk();
142    iff_in.close_chunk();
143  }
144}
145
146DjVmDoc::DjVmDoc(void)
147{
148   DEBUG_MSG("DjVmDoc::DjVmDoc(): Constructing empty DjVm document.\n");
149   DEBUG_MAKE_INDENT(3);
150}
151
152void
153DjVmDoc::init(void)
154{
155  dir=DjVmDir::create();
156}
157
158GP<DjVmDoc>
159DjVmDoc::create(void)
160{
161  DjVmDoc *doc=new DjVmDoc();
162  GP<DjVmDoc> retval=doc;
163  doc->init();
164  return retval;
165}
166
167void
168DjVmDoc::insert_file(const GP<DjVmDir::File> & f,
169                     GP<DataPool> data_pool, int pos)
170{
171   DEBUG_MSG("DjVmDoc::insert_file(): inserting file '" << f->get_load_name() <<
172             "' at pos " << pos << "\n");
173   DEBUG_MAKE_INDENT(3);
174
175   if (!f)
176     G_THROW( ERR_MSG("DjVmDoc.no_zero_file") );
177   if (data.contains(f->get_load_name()))
178     G_THROW( ERR_MSG("DjVmDoc.no_duplicate") );
179
180   char buffer[4];
181   if (data_pool->get_data(buffer, 0, 4)==4 && !memcmp(buffer, octets, 4))
182   {
183      data_pool=DataPool::create(data_pool, 4, -1);
184   } 
185   data[f->get_load_name()]=data_pool;
186   dir->insert_file(f, pos);
187}
188
189void
190DjVmDoc::insert_file(
191  ByteStream &data, DjVmDir::File::FILE_TYPE file_type,
192  const GUTF8String &name, const GUTF8String &id, const GUTF8String &title,
193  int pos)
194{
195   const GP<DjVmDir::File> file(
196     DjVmDir::File::create(name, id, title, file_type));
197   const GP<DataPool> pool(DataPool::create());
198      // Cannot connect to a bytestream.
199      // Must copy data into the datapool.
200   int nbytes;
201   char buffer[1024];
202   while ((nbytes = data.read(buffer, sizeof(buffer))))
203      pool->add_data(buffer, nbytes);
204   pool->set_eof();
205      // Call low level insert
206   insert_file(file, pool, pos);
207}
208
209void
210DjVmDoc::insert_file(
211  const GP<DataPool> &pool, DjVmDir::File::FILE_TYPE file_type,
212  const GUTF8String &name, const GUTF8String &id, const GUTF8String &title,
213  int pos)
214{
215   const GP<DjVmDir::File> file(
216     DjVmDir::File::create(name, id, title, file_type));
217      // Call low level insert
218   insert_file(file, pool, pos);
219}
220
221void
222DjVmDoc::delete_file(const GUTF8String &id)
223{
224   DEBUG_MSG("DjVmDoc::delete_file(): deleting file '" << id << "'\n");
225   DEBUG_MAKE_INDENT(3);
226   
227   if (!data.contains(id))
228      G_THROW(GUTF8String( ERR_MSG("DjVmDoc.cant_delete") "\t") + id);
229   
230   data.del(id);
231   dir->delete_file(id);
232}
233
234void 
235DjVmDoc::set_djvm_nav(GP<DjVmNav> n)
236{
237  if (n && ! n->isValidBookmark())
238    G_THROW("Invalid bookmark data");
239  nav = n;
240}
241
242GP<DataPool>
243DjVmDoc::get_data(const GUTF8String &id) const
244{
245  GPosition pos;
246  if (!data.contains(id, pos))
247    G_THROW(GUTF8String( ERR_MSG("DjVmDoc.cant_find") "\t") + id);
248  const GP<DataPool> pool(data[pos]);
249   // First check that the file is in IFF format
250  G_TRY
251  {
252    const GP<ByteStream> str_in(pool->get_stream());
253    const GP<IFFByteStream> giff_in=IFFByteStream::create(str_in);
254    IFFByteStream &iff_in=*giff_in;
255    GUTF8String chkid;
256    int size=iff_in.get_chunk(chkid);
257    if (size<0 || size>0x7fffffff)
258      G_THROW( ERR_MSG("DjVmDoc.not_IFF") "\t" + id);
259  }
260  G_CATCH_ALL
261  {
262    G_THROW( ERR_MSG("DjVmDoc.not_IFF") "\t" + id);
263  }
264  G_ENDCATCH;
265  return pool;
266}
267
268void
269DjVmDoc::write(const GP<ByteStream> &gstr)
270{
271  const GMap<GUTF8String,void *> reserved;
272  write(gstr,reserved);
273}
274
275static inline GUTF8String
276get_name(const DjVmDir::File &file)
277{
278  const GUTF8String save_name(file.get_save_name());
279  return save_name.length()?save_name:(file.get_load_name());
280}
281
282void
283DjVmDoc::write(const GP<ByteStream> &gstr,
284               const GMap<GUTF8String,void *> &reserved)
285{
286  DEBUG_MSG("DjVmDoc::write(): Storing document into the byte stream.\n");
287  DEBUG_MAKE_INDENT(3);
288
289  GPList<DjVmDir::File> files_list=dir->resolve_duplicates(true);
290  bool do_rename=false;
291  GPosition pos(reserved);
292
293  GMap<GUTF8String,GUTF8String> incl;
294  DEBUG_MSG("pass 1: looking for reserved names.");
295  if(pos)
296  {
297      // Check if there are any conflicting file names.
298    for(pos=files_list;pos;++pos)
299    {
300      GP<DjVmDir::File> file=files_list[pos];
301      if((do_rename=(reserved.contains(file->get_load_name())?true:false))
302                  ||(do_rename=(reserved.contains(file->get_save_name())?true:false)))
303      {
304        break;
305      }
306    }
307    // If there are conflicting file names, check if the save names
308    // are OK.  If not, generate new save names.
309    if(do_rename)
310    {
311      DEBUG_MSG("pass 1: renaming reserved names.");
312      for(;;files_list=dir->resolve_duplicates(true))
313      {
314        GMap<GUTF8String,void *> this_doc;
315        for(pos=files_list;pos;++pos)
316        {
317          GP<DjVmDir::File> file=files_list[pos];
318          this_doc[::get_name(*file)]=0;
319        }
320        bool need_new_list=false;
321        for(pos=files_list;pos;++pos)
322        {
323          GP<DjVmDir::File> file=files_list[pos];
324          const GUTF8String name(::get_name(*file));
325          if(reserved.contains(name))
326          {
327            GUTF8String new_name;
328            int series=0;
329            do
330            {
331              int dot=name.rsearch('.');
332              if(dot>0)
333              {
334                new_name=name.substr(0,dot)+
335                  "_"+GUTF8String(++series)+name.substr(dot,-1);
336              }else
337              {
338                new_name=name+"_"+GUTF8String(++series);
339              }
340            } while(reserved.contains(new_name)||this_doc.contains(new_name));
341            dir->set_file_name(file->get_load_name(),new_name);
342            need_new_list=true;
343          }
344        }
345        if(!need_new_list)
346          break;
347      }
348    }
349  }
350
351  DEBUG_MSG("pass 2: create dummy DIRM chunk and calculate offsets...\n");
352  for(pos=files_list;pos;++pos)
353  {
354    GP<DjVmDir::File> file=files_list[pos];
355    file->offset=0xffffffff;
356    GPosition data_pos=data.contains(file->get_load_name());
357    if (!data_pos)
358      G_THROW( ERR_MSG("DjVmDoc.no_data") "\t" + file->get_load_name());
359    if(do_rename)
360    {
361      GP<ByteStream> gout(ByteStream::create());
362      {
363        const GP<IFFByteStream> giff_in(
364          IFFByteStream::create(data[data_pos]->get_stream()));
365        const GP<IFFByteStream> giff_out(IFFByteStream::create(gout));
366        ::save_file(*giff_in,*giff_out,*dir,incl);
367      }
368      gout->seek(0L);
369      data[data_pos]=DataPool::create(gout);
370    }
371    file->size=data[data_pos]->get_length();
372    if (!file->size)
373      G_THROW( ERR_MSG("DjVmDoc.zero_file") );
374  }
375   
376  const GP<ByteStream> tmp_str(ByteStream::create());
377  const GP<IFFByteStream> gtmp_iff(IFFByteStream::create(tmp_str));
378  IFFByteStream &tmp_iff=*gtmp_iff;
379  tmp_iff.put_chunk("FORM:DJVM", 1);
380  tmp_iff.put_chunk("DIRM");
381  dir->encode(tmp_iff.get_bytestream(),do_rename);
382  tmp_iff.close_chunk();
383  if (nav)
384    {
385      tmp_iff.put_chunk("NAVM");
386      nav->encode(tmp_iff.get_bytestream());
387      tmp_iff.close_chunk();
388    }
389  tmp_iff.close_chunk();
390  int offset=tmp_iff.tell();
391
392  for(pos=files_list;pos;++pos)
393  {
394    if ((offset & 1)!=0)
395      offset++;
396     
397    GP<DjVmDir::File> & file=files_list[pos];
398    file->offset=offset;
399    offset+=file->size; // file->size has been set in the first pass
400  }
401
402  DEBUG_MSG("pass 3: store the file contents.\n");
403
404  GP<IFFByteStream> giff=IFFByteStream::create(gstr);
405  IFFByteStream &iff=*giff;
406  iff.put_chunk("FORM:DJVM", 1);
407  iff.put_chunk("DIRM");
408  dir->encode(iff.get_bytestream(),do_rename);
409  iff.close_chunk();
410  if (nav)
411    {
412      iff.put_chunk("NAVM");
413      nav->encode(iff.get_bytestream());
414      iff.close_chunk();
415    }
416
417  for(pos=files_list;pos;++pos)
418  {
419    GP<DjVmDir::File> & file=files_list[pos];
420
421    const GP<DataPool> pool=get_data(file->get_load_name());
422    const GP<ByteStream> str_in(pool->get_stream());
423    if ((iff.tell() & 1)!=0)
424    {
425      iff.get_bytestream()->write8(0);
426    }
427    iff.copy(*str_in);
428  }
429
430  iff.close_chunk();
431  iff.flush();
432
433  DEBUG_MSG("done storing DjVm file.\n");
434}
435
436void
437DjVmDoc::read(const GP<DataPool> & pool)
438{
439   DEBUG_MSG("DjVmDoc::read(): reading the BUNDLED doc contents from the pool\n");
440   DEBUG_MAKE_INDENT(3);
441   
442   const GP<ByteStream> str(pool->get_stream());
443   
444   GP<IFFByteStream> giff=IFFByteStream::create(str);
445   IFFByteStream &iff=*giff;
446   GUTF8String chkid;
447   iff.get_chunk(chkid);
448   if (chkid!="FORM:DJVM")
449      G_THROW( ERR_MSG("DjVmDoc.no_form_djvm") );
450
451   iff.get_chunk(chkid);
452   if (chkid!="DIRM")
453      G_THROW( ERR_MSG("DjVmDoc.no_dirm_chunk") );
454   dir->decode(iff.get_bytestream());
455   iff.close_chunk();
456
457   data.empty();
458
459   if (dir->is_indirect())
460      G_THROW( ERR_MSG("DjVmDoc.cant_read_indr") );
461
462   GPList<DjVmDir::File> files_list=dir->get_files_list();
463   for(GPosition pos=files_list;pos;++pos)
464   {
465      DjVmDir::File * f=files_list[pos];
466     
467      DEBUG_MSG("reading contents of file '" << f->get_load_name() << "'\n");
468      data[f->get_load_name()]=DataPool::create(pool, f->offset, f->size);
469   }
470}
471
472void
473DjVmDoc::read(ByteStream & str_in)
474{
475   DEBUG_MSG("DjVmDoc::read(): reading the BUNDLED doc contents from the stream\n");
476   DEBUG_MAKE_INDENT(3);
477
478   GP<DataPool> pool=DataPool::create();
479   char buffer[1024];
480   int length;
481   while((length=str_in.read(buffer, 1024)))
482      pool->add_data(buffer, length);
483   pool->set_eof();
484
485   read(pool);
486}
487
488void
489DjVmDoc::read(const GURL &url)
490{
491   DEBUG_MSG("DjVmDoc::read(): reading the doc contents from the HDD\n");
492   DEBUG_MAKE_INDENT(3);
493
494   GP<DataPool> pool=DataPool::create(url);
495   const GP<ByteStream> str(pool->get_stream());
496   GP<IFFByteStream> giff=IFFByteStream::create(str);
497   IFFByteStream &iff=*giff;
498   GUTF8String chkid;
499   iff.get_chunk(chkid);
500   if (chkid!="FORM:DJVM")
501      G_THROW( ERR_MSG("DjVmDoc.no_form_djvm2") );
502
503   iff.get_chunk(chkid);
504   if (chkid!="DIRM")
505      G_THROW( ERR_MSG("DjVmDoc.no_dirm_chunk") );
506   dir->decode(iff.get_bytestream());
507   iff.close_chunk();
508
509   if (dir->is_bundled())
510     read(pool);
511   else
512   {
513//      GUTF8String full_name=GOS::expand_name(name);
514//      GUTF8String dir_name=GOS::dirname(GOS::url_to_filename(url.base()));
515      GURL dirbase=url.base();
516
517      data.empty();
518
519      GPList<DjVmDir::File> files_list=dir->get_files_list();
520      for(GPosition pos=files_list;pos;++pos)
521      {
522         DjVmDir::File * f=files_list[pos];
523     
524         DEBUG_MSG("reading contents of file '" << f->get_load_name() << "'\n");
525
526         const GURL::UTF8 url(f->get_load_name(),dirbase);
527         data[f->get_load_name()]=DataPool::create(url);
528      }
529   }
530}
531
532void
533DjVmDoc::write_index(const GP<ByteStream> &str)
534{
535   DEBUG_MSG("DjVmDoc::write_index(): Storing DjVm index file\n");
536   DEBUG_MAKE_INDENT(3);
537
538   GPList<DjVmDir::File> files_list=dir->get_files_list();
539   for(GPosition pos=files_list;pos;++pos)
540   {
541      GP<DjVmDir::File> file=files_list[pos];
542      file->offset=0;
543
544      GPosition data_pos=data.contains(file->get_load_name());
545      if (!data_pos)
546        G_THROW( ERR_MSG("DjVmDoc.no_data") "\t" + file->get_load_name());
547      file->size=data[data_pos]->get_length();
548      if (!file->size)
549        G_THROW( ERR_MSG("DjVmDoc.zero_file") );
550   }
551
552   GP<IFFByteStream> giff=IFFByteStream::create(str);
553   IFFByteStream &iff=*giff;
554   iff.put_chunk("FORM:DJVM", 1);
555   iff.put_chunk("DIRM");
556   dir->encode(iff.get_bytestream());
557   iff.close_chunk();
558   if (nav)
559     {
560       iff.put_chunk("NAVM");
561       nav->encode(iff.get_bytestream());
562       iff.close_chunk();
563     }
564   iff.close_chunk();
565   iff.flush();
566}
567
568void
569DjVmDoc::save_page(
570  const GURL &codebase, const DjVmDir::File &file) const
571{
572  GMap<GUTF8String,GUTF8String> incl;
573  save_file(codebase,file,&incl);
574}
575
576void
577DjVmDoc::save_page(
578  const GURL &codebase, const DjVmDir::File &file,
579  GMap<GUTF8String,GUTF8String> &incl ) const
580{
581  save_file(codebase,file,&incl);
582}
583
584void
585DjVmDoc::save_file(
586  const GURL &codebase, const DjVmDir::File &file) const
587{
588  save_file(codebase,file,0);
589}
590
591GUTF8String
592DjVmDoc::save_file(const GURL &codebase, const DjVmDir::File &file,
593  GMap<GUTF8String,GUTF8String> &incl, const GP<DataPool> &pool) const
594{
595  const GUTF8String save_name(file.get_save_name());
596  const GURL::UTF8 new_url(save_name,codebase);
597  DEBUG_MSG("storing file '"<<new_url<<"'\n");
598  DataPool::load_file(new_url);
599  const GP<ByteStream> str_in(pool->get_stream());
600  const GP<ByteStream> str_out(ByteStream::create(new_url, "wb"));
601  ::save_file( *IFFByteStream::create(str_in),
602      *IFFByteStream::create(str_out), *dir, incl);
603  return save_name;
604}
605
606void
607DjVmDoc::save_file(
608  const GURL &codebase, const DjVmDir::File &file,
609  GMap<GUTF8String,GUTF8String> *incl) const
610{
611  const GUTF8String load_name=file.get_load_name();
612  if(!incl || !incl->contains(load_name))
613  {
614    GMap<GUTF8String,GUTF8String> new_incl;
615    const GUTF8String save_name(
616      save_file(codebase,file,new_incl,get_data(load_name)));
617
618    if(incl)
619    {
620      (*incl)[load_name]=save_name;
621      for(GPosition pos=new_incl;pos;++pos)
622      {
623        save_file(codebase,file,incl);
624      }
625    }
626  }
627}
628
629void
630DjVmDoc::expand(const GURL &codebase, const GUTF8String &idx_name)
631{
632   DEBUG_MSG("DjVmDoc::expand(): Expanding into '" << codebase << "'\n");
633   DEBUG_MAKE_INDENT(3);
634
635   // Resolve any name conflicts
636   // Find the list of all files.
637   GPList<DjVmDir::File> files_list=dir->resolve_duplicates(false);
638
639      // store each file
640   for(GPosition pos=files_list;pos;++pos)
641   {
642     save_file(codebase,*files_list[pos]);
643   }
644
645   if (idx_name.length())
646   {
647      const GURL::UTF8 idx_url(idx_name, codebase);
648   
649      DEBUG_MSG("storing index file '" << idx_url << "'\n");
650
651      DataPool::load_file(idx_url);
652      GP<ByteStream> str=ByteStream::create(idx_url, "wb");
653      write_index(str);
654   }
655}
656
657
658#ifdef HAVE_NAMESPACES
659}
660# ifndef NOT_USING_DJVU_NAMESPACE
661using namespace DJVU;
662# endif
663#endif
Note: See TracBrowser for help on using the repository browser.