source: trunk/libdjvu/DjVmDoc.cpp @ 280

Last change on this file since 280 was 280, checked in by rbri, 11 years ago

DJVU plugin: djvulibre updated to version 3.5.22

File size: 18.6 KB
Line 
1//C-  -*- C++ -*-
2//C- -------------------------------------------------------------------
3//C- DjVuLibre-3.5
4//C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
5//C- Copyright (c) 2001  AT&T
6//C-
7//C- This software is subject to, and may be distributed under, the
8//C- GNU General Public License, either Version 2 of the license,
9//C- or (at your option) any later version. The license should have
10//C- accompanied the software or you may obtain a copy of the license
11//C- from the Free Software Foundation at http://www.fsf.org .
12//C-
13//C- This program is distributed in the hope that it will be useful,
14//C- but WITHOUT ANY WARRANTY; without even the implied warranty of
15//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16//C- GNU General Public License for more details.
17//C-
18//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
19//C- Lizardtech Software.  Lizardtech Software has authorized us to
20//C- replace the original DjVu(r) Reference Library notice by the following
21//C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
22//C-
23//C-  ------------------------------------------------------------------
24//C- | DjVu (r) Reference Library (v. 3.5)
25//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
26//C- | The DjVu Reference Library is protected by U.S. Pat. No.
27//C- | 6,058,214 and patents pending.
28//C- |
29//C- | This software is subject to, and may be distributed under, the
30//C- | GNU General Public License, either Version 2 of the license,
31//C- | or (at your option) any later version. The license should have
32//C- | accompanied the software or you may obtain a copy of the license
33//C- | from the Free Software Foundation at http://www.fsf.org .
34//C- |
35//C- | The computer code originally released by LizardTech under this
36//C- | license and unmodified by other parties is deemed "the LIZARDTECH
37//C- | ORIGINAL CODE."  Subject to any third party intellectual property
38//C- | claims, LizardTech grants recipient a worldwide, royalty-free,
39//C- | non-exclusive license to make, use, sell, or otherwise dispose of
40//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
41//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
42//C- | General Public License.   This grant only confers the right to
43//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
44//C- | the extent such infringement is reasonably necessary to enable
45//C- | recipient to make, have made, practice, sell, or otherwise dispose
46//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
47//C- | any greater extent that may be necessary to utilize further
48//C- | modifications or combinations.
49//C- |
50//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
51//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
52//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
53//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
54//C- +------------------------------------------------------------------
55//
56// $Id: DjVmDoc.cpp,v 1.11 2007/03/25 20:48:29 leonb Exp $
57// $Name: release_3_5_22 $
58
59#ifdef HAVE_CONFIG_H
60# include "config.h"
61#endif
62#if NEED_GNUG_PRAGMAS
63# pragma implementation
64#endif
65
66#include "DjVmDoc.h"
67#include "DjVmNav.h"
68#include "DataPool.h"
69#include "IFFByteStream.h"
70#include "GOS.h"
71#include "debug.h"
72
73
74#ifdef HAVE_NAMESPACES
75namespace DJVU {
76# ifdef NOT_DEFINED // Just to fool emacs c++ mode
77}
78#endif
79#endif
80
81static const char octets[4]={0x41,0x54,0x26,0x54};
82
83// Save the file to disk, remapping INCL chunks while saving.
84static void
85save_file(
86  IFFByteStream &iff_in, IFFByteStream &iff_out, const DjVmDir &dir,
87  GMap<GUTF8String,GUTF8String> &incl)
88{
89  GUTF8String chkid;
90  if (iff_in.get_chunk(chkid))
91  {
92    iff_out.put_chunk(chkid,true);
93    if(!chkid.cmp("FORM:",5))
94    {
95      for(;iff_in.get_chunk(chkid);iff_in.close_chunk())
96      {
97        iff_out.put_chunk(chkid);
98        if(chkid == "INCL")
99        {
100          GUTF8String incl_str;
101          char buffer[1024];
102          int length;
103          while((length=iff_in.read(buffer, 1024)))
104            incl_str+=GUTF8String(buffer, length);
105          // Eat '\n' in the beginning and at the end
106          while(incl_str.length() && incl_str[0]=='\n')
107          {
108            incl_str=incl_str.substr(1,(unsigned int)(-1));
109          }
110          while(incl_str.length()>0 && incl_str[(int)incl_str.length()-1]=='\n')
111          {
112            incl_str.setat(incl_str.length()-1, 0);
113          }
114          GPosition pos=incl.contains(incl_str);
115          if(pos)
116          { 
117            iff_out.get_bytestream()->writestring(incl[pos]);
118          }else
119          {
120            GP<DjVmDir::File> incl_file=dir.id_to_file(incl_str); 
121            if(incl_file)
122            {
123              DEBUG_MSG("INCL '"<<(const char *)incl_file->get_save_name()<<"'\n");
124              const GUTF8String incl_name=incl_file->get_save_name();
125              incl[incl_str]=incl_name;
126              iff_out.get_bytestream()->writestring(incl_name);
127            }else
128            {
129              DEBUG_MSG("BOGUS INCL '"<<(const char *)incl_str<<"'\n");
130              iff_out.copy(*iff_in.get_bytestream());
131            }
132          }
133        }else
134        {
135          iff_out.copy(*iff_in.get_bytestream());
136        }
137        iff_out.close_chunk();
138      }
139    }else
140    {
141      iff_out.copy(*iff_in.get_bytestream());
142    }
143    iff_out.close_chunk();
144    iff_in.close_chunk();
145  }
146}
147
148DjVmDoc::DjVmDoc(void)
149{
150   DEBUG_MSG("DjVmDoc::DjVmDoc(): Constructing empty DjVm document.\n");
151   DEBUG_MAKE_INDENT(3);
152}
153
154void
155DjVmDoc::init(void)
156{
157  dir=DjVmDir::create();
158}
159
160GP<DjVmDoc>
161DjVmDoc::create(void)
162{
163  DjVmDoc *doc=new DjVmDoc();
164  GP<DjVmDoc> retval=doc;
165  doc->init();
166  return retval;
167}
168
169void
170DjVmDoc::insert_file(const GP<DjVmDir::File> & f,
171                     GP<DataPool> data_pool, int pos)
172{
173   DEBUG_MSG("DjVmDoc::insert_file(): inserting file '" << f->get_load_name() <<
174             "' at pos " << pos << "\n");
175   DEBUG_MAKE_INDENT(3);
176
177   if (!f)
178     G_THROW( ERR_MSG("DjVmDoc.no_zero_file") );
179   if (data.contains(f->get_load_name()))
180     G_THROW( ERR_MSG("DjVmDoc.no_duplicate") );
181
182   char buffer[4];
183   if (data_pool->get_data(buffer, 0, 4)==4 && !memcmp(buffer, octets, 4))
184   {
185      data_pool=DataPool::create(data_pool, 4, -1);
186   } 
187   data[f->get_load_name()]=data_pool;
188   dir->insert_file(f, pos);
189}
190
191void
192DjVmDoc::insert_file(
193  ByteStream &data, DjVmDir::File::FILE_TYPE file_type,
194  const GUTF8String &name, const GUTF8String &id, const GUTF8String &title,
195  int pos)
196{
197   const GP<DjVmDir::File> file(
198     DjVmDir::File::create(name, id, title, file_type));
199   const GP<DataPool> pool(DataPool::create());
200      // Cannot connect to a bytestream.
201      // Must copy data into the datapool.
202   int nbytes;
203   char buffer[1024];
204   while ((nbytes = data.read(buffer, sizeof(buffer))))
205      pool->add_data(buffer, nbytes);
206   pool->set_eof();
207      // Call low level insert
208   insert_file(file, pool, pos);
209}
210
211void
212DjVmDoc::insert_file(
213  const GP<DataPool> &pool, DjVmDir::File::FILE_TYPE file_type,
214  const GUTF8String &name, const GUTF8String &id, const GUTF8String &title,
215  int pos)
216{
217   const GP<DjVmDir::File> file(
218     DjVmDir::File::create(name, id, title, file_type));
219      // Call low level insert
220   insert_file(file, pool, pos);
221}
222
223void
224DjVmDoc::delete_file(const GUTF8String &id)
225{
226   DEBUG_MSG("DjVmDoc::delete_file(): deleting file '" << id << "'\n");
227   DEBUG_MAKE_INDENT(3);
228   
229   if (!data.contains(id))
230      G_THROW(GUTF8String( ERR_MSG("DjVmDoc.cant_delete") "\t") + id);
231   
232   data.del(id);
233   dir->delete_file(id);
234}
235
236void 
237DjVmDoc::set_djvm_nav(GP<DjVmNav> n)
238{
239  if (n && ! n->isValidBookmark())
240    G_THROW("Invalid bookmark data");
241  nav = n;
242}
243
244GP<DataPool>
245DjVmDoc::get_data(const GUTF8String &id) const
246{
247  GPosition pos;
248  if (!data.contains(id, pos))
249    G_THROW(GUTF8String( ERR_MSG("DjVmDoc.cant_find") "\t") + id);
250  const GP<DataPool> pool(data[pos]);
251   // First check that the file is in IFF format
252  G_TRY
253  {
254    const GP<ByteStream> str_in(pool->get_stream());
255    const GP<IFFByteStream> giff_in=IFFByteStream::create(str_in);
256    IFFByteStream &iff_in=*giff_in;
257    GUTF8String chkid;
258    int size=iff_in.get_chunk(chkid);
259    if (size<0 || size>0x7fffffff)
260      G_THROW( ERR_MSG("DjVmDoc.not_IFF") "\t" + id);
261  }
262  G_CATCH_ALL
263  {
264    G_THROW( ERR_MSG("DjVmDoc.not_IFF") "\t" + id);
265  }
266  G_ENDCATCH;
267  return pool;
268}
269
270void
271DjVmDoc::write(const GP<ByteStream> &gstr)
272{
273  const GMap<GUTF8String,void *> reserved;
274  write(gstr,reserved);
275}
276
277static inline GUTF8String
278get_name(const DjVmDir::File &file)
279{
280  const GUTF8String save_name(file.get_save_name());
281  return save_name.length()?save_name:(file.get_load_name());
282}
283
284void
285DjVmDoc::write(const GP<ByteStream> &gstr,
286               const GMap<GUTF8String,void *> &reserved)
287{
288  DEBUG_MSG("DjVmDoc::write(): Storing document into the byte stream.\n");
289  DEBUG_MAKE_INDENT(3);
290
291  GPList<DjVmDir::File> files_list=dir->resolve_duplicates(true);
292  bool do_rename=false;
293  GPosition pos(reserved);
294
295  GMap<GUTF8String,GUTF8String> incl;
296  DEBUG_MSG("pass 1: looking for reserved names.");
297  if(pos)
298  {
299      // Check if there are any conflicting file names.
300    for(pos=files_list;pos;++pos)
301    {
302      GP<DjVmDir::File> file=files_list[pos];
303      if((do_rename=(reserved.contains(file->get_load_name())?true:false))
304                  ||(do_rename=(reserved.contains(file->get_save_name())?true:false)))
305      {
306        break;
307      }
308    }
309    // If there are conflicting file names, check if the save names
310    // are OK.  If not, generate new save names.
311    if(do_rename)
312    {
313      DEBUG_MSG("pass 1: renaming reserved names.");
314      for(;;files_list=dir->resolve_duplicates(true))
315      {
316        GMap<GUTF8String,void *> this_doc;
317        for(pos=files_list;pos;++pos)
318        {
319          GP<DjVmDir::File> file=files_list[pos];
320          this_doc[::get_name(*file)]=0;
321        }
322        bool need_new_list=false;
323        for(pos=files_list;pos;++pos)
324        {
325          GP<DjVmDir::File> file=files_list[pos];
326          const GUTF8String name(::get_name(*file));
327          if(reserved.contains(name))
328          {
329            GUTF8String new_name;
330            int series=0;
331            do
332            {
333              int dot=name.rsearch('.');
334              if(dot>0)
335              {
336                new_name=name.substr(0,dot)+
337                  "_"+GUTF8String(++series)+name.substr(dot,-1);
338              }else
339              {
340                new_name=name+"_"+GUTF8String(++series);
341              }
342            } while(reserved.contains(new_name)||this_doc.contains(new_name));
343            dir->set_file_name(file->get_load_name(),new_name);
344            need_new_list=true;
345          }
346        }
347        if(!need_new_list)
348          break;
349      }
350    }
351  }
352
353  DEBUG_MSG("pass 2: create dummy DIRM chunk and calculate offsets...\n");
354  for(pos=files_list;pos;++pos)
355  {
356    GP<DjVmDir::File> file=files_list[pos];
357    file->offset=0xffffffff;
358    GPosition data_pos=data.contains(file->get_load_name());
359    if (!data_pos)
360      G_THROW( ERR_MSG("DjVmDoc.no_data") "\t" + file->get_load_name());
361    if(do_rename)
362    {
363      GP<ByteStream> gout(ByteStream::create());
364      {
365        const GP<IFFByteStream> giff_in(
366          IFFByteStream::create(data[data_pos]->get_stream()));
367        const GP<IFFByteStream> giff_out(IFFByteStream::create(gout));
368        ::save_file(*giff_in,*giff_out,*dir,incl);
369      }
370      gout->seek(0L);
371      data[data_pos]=DataPool::create(gout);
372    }
373    file->size=data[data_pos]->get_length();
374    if (!file->size)
375      G_THROW( ERR_MSG("DjVmDoc.zero_file") );
376  }
377   
378  const GP<ByteStream> tmp_str(ByteStream::create());
379  const GP<IFFByteStream> gtmp_iff(IFFByteStream::create(tmp_str));
380  IFFByteStream &tmp_iff=*gtmp_iff;
381  tmp_iff.put_chunk("FORM:DJVM", 1);
382  tmp_iff.put_chunk("DIRM");
383  dir->encode(tmp_iff.get_bytestream(),do_rename);
384  tmp_iff.close_chunk();
385  if (nav)
386    {
387      tmp_iff.put_chunk("NAVM");
388      nav->encode(tmp_iff.get_bytestream());
389      tmp_iff.close_chunk();
390    }
391  tmp_iff.close_chunk();
392  int offset=tmp_iff.tell();
393
394  for(pos=files_list;pos;++pos)
395  {
396    if ((offset & 1)!=0)
397      offset++;
398     
399    GP<DjVmDir::File> & file=files_list[pos];
400    file->offset=offset;
401    offset+=file->size; // file->size has been set in the first pass
402  }
403
404  DEBUG_MSG("pass 3: store the file contents.\n");
405
406  GP<IFFByteStream> giff=IFFByteStream::create(gstr);
407  IFFByteStream &iff=*giff;
408  iff.put_chunk("FORM:DJVM", 1);
409  iff.put_chunk("DIRM");
410  dir->encode(iff.get_bytestream(),do_rename);
411  iff.close_chunk();
412  if (nav)
413    {
414      iff.put_chunk("NAVM");
415      nav->encode(iff.get_bytestream());
416      iff.close_chunk();
417    }
418
419  for(pos=files_list;pos;++pos)
420  {
421    GP<DjVmDir::File> & file=files_list[pos];
422
423    const GP<DataPool> pool=get_data(file->get_load_name());
424    const GP<ByteStream> str_in(pool->get_stream());
425    if ((iff.tell() & 1)!=0)
426    {
427      iff.get_bytestream()->write8(0);
428    }
429    iff.copy(*str_in);
430  }
431
432  iff.close_chunk();
433  iff.flush();
434
435  DEBUG_MSG("done storing DjVm file.\n");
436}
437
438void
439DjVmDoc::read(const GP<DataPool> & pool)
440{
441   DEBUG_MSG("DjVmDoc::read(): reading the BUNDLED doc contents from the pool\n");
442   DEBUG_MAKE_INDENT(3);
443   
444   const GP<ByteStream> str(pool->get_stream());
445   
446   GP<IFFByteStream> giff=IFFByteStream::create(str);
447   IFFByteStream &iff=*giff;
448   GUTF8String chkid;
449   iff.get_chunk(chkid);
450   if (chkid!="FORM:DJVM")
451      G_THROW( ERR_MSG("DjVmDoc.no_form_djvm") );
452
453   iff.get_chunk(chkid);
454   if (chkid!="DIRM")
455      G_THROW( ERR_MSG("DjVmDoc.no_dirm_chunk") );
456   dir->decode(iff.get_bytestream());
457   iff.close_chunk();
458
459   data.empty();
460
461   if (dir->is_indirect())
462      G_THROW( ERR_MSG("DjVmDoc.cant_read_indr") );
463
464   GPList<DjVmDir::File> files_list=dir->get_files_list();
465   for(GPosition pos=files_list;pos;++pos)
466   {
467      DjVmDir::File * f=files_list[pos];
468     
469      DEBUG_MSG("reading contents of file '" << f->get_load_name() << "'\n");
470      data[f->get_load_name()]=DataPool::create(pool, f->offset, f->size);
471   }
472}
473
474void
475DjVmDoc::read(ByteStream & str_in)
476{
477   DEBUG_MSG("DjVmDoc::read(): reading the BUNDLED doc contents from the stream\n");
478   DEBUG_MAKE_INDENT(3);
479
480   GP<DataPool> pool=DataPool::create();
481   char buffer[1024];
482   int length;
483   while((length=str_in.read(buffer, 1024)))
484      pool->add_data(buffer, length);
485   pool->set_eof();
486
487   read(pool);
488}
489
490void
491DjVmDoc::read(const GURL &url)
492{
493   DEBUG_MSG("DjVmDoc::read(): reading the doc contents from the HDD\n");
494   DEBUG_MAKE_INDENT(3);
495
496   GP<DataPool> pool=DataPool::create(url);
497   const GP<ByteStream> str(pool->get_stream());
498   GP<IFFByteStream> giff=IFFByteStream::create(str);
499   IFFByteStream &iff=*giff;
500   GUTF8String chkid;
501   iff.get_chunk(chkid);
502   if (chkid!="FORM:DJVM")
503      G_THROW( ERR_MSG("DjVmDoc.no_form_djvm2") );
504
505   iff.get_chunk(chkid);
506   if (chkid!="DIRM")
507      G_THROW( ERR_MSG("DjVmDoc.no_dirm_chunk") );
508   dir->decode(iff.get_bytestream());
509   iff.close_chunk();
510
511   if (dir->is_bundled())
512     read(pool);
513   else
514   {
515//      GUTF8String full_name=GOS::expand_name(name);
516//      GUTF8String dir_name=GOS::dirname(GOS::url_to_filename(url.base()));
517      GURL dirbase=url.base();
518
519      data.empty();
520
521      GPList<DjVmDir::File> files_list=dir->get_files_list();
522      for(GPosition pos=files_list;pos;++pos)
523      {
524         DjVmDir::File * f=files_list[pos];
525     
526         DEBUG_MSG("reading contents of file '" << f->get_load_name() << "'\n");
527
528         const GURL::UTF8 url(f->get_load_name(),dirbase);
529         data[f->get_load_name()]=DataPool::create(url);
530      }
531   }
532}
533
534void
535DjVmDoc::write_index(const GP<ByteStream> &str)
536{
537   DEBUG_MSG("DjVmDoc::write_index(): Storing DjVm index file\n");
538   DEBUG_MAKE_INDENT(3);
539
540   GPList<DjVmDir::File> files_list=dir->get_files_list();
541   for(GPosition pos=files_list;pos;++pos)
542   {
543      GP<DjVmDir::File> file=files_list[pos];
544      file->offset=0;
545
546      GPosition data_pos=data.contains(file->get_load_name());
547      if (!data_pos)
548        G_THROW( ERR_MSG("DjVmDoc.no_data") "\t" + file->get_load_name());
549      file->size=data[data_pos]->get_length();
550      if (!file->size)
551        G_THROW( ERR_MSG("DjVmDoc.zero_file") );
552   }
553
554   GP<IFFByteStream> giff=IFFByteStream::create(str);
555   IFFByteStream &iff=*giff;
556   iff.put_chunk("FORM:DJVM", 1);
557   iff.put_chunk("DIRM");
558   dir->encode(iff.get_bytestream());
559   iff.close_chunk();
560   if (nav)
561     {
562       iff.put_chunk("NAVM");
563       nav->encode(iff.get_bytestream());
564       iff.close_chunk();
565     }
566   iff.close_chunk();
567   iff.flush();
568}
569
570void
571DjVmDoc::save_page(
572  const GURL &codebase, const DjVmDir::File &file) const
573{
574  GMap<GUTF8String,GUTF8String> incl;
575  save_file(codebase,file,&incl);
576}
577
578void
579DjVmDoc::save_page(
580  const GURL &codebase, const DjVmDir::File &file,
581  GMap<GUTF8String,GUTF8String> &incl ) const
582{
583  save_file(codebase,file,&incl);
584}
585
586void
587DjVmDoc::save_file(
588  const GURL &codebase, const DjVmDir::File &file) const
589{
590  save_file(codebase,file,0);
591}
592
593GUTF8String
594DjVmDoc::save_file(const GURL &codebase, const DjVmDir::File &file,
595  GMap<GUTF8String,GUTF8String> &incl, const GP<DataPool> &pool) const
596{
597  const GUTF8String save_name(file.get_save_name());
598  const GURL::UTF8 new_url(save_name,codebase);
599  DEBUG_MSG("storing file '"<<new_url<<"'\n");
600  DataPool::load_file(new_url);
601  const GP<ByteStream> str_in(pool->get_stream());
602  const GP<ByteStream> str_out(ByteStream::create(new_url, "wb"));
603  ::save_file( *IFFByteStream::create(str_in),
604      *IFFByteStream::create(str_out), *dir, incl);
605  return save_name;
606}
607
608void
609DjVmDoc::save_file(
610  const GURL &codebase, const DjVmDir::File &file,
611  GMap<GUTF8String,GUTF8String> *incl) const
612{
613  const GUTF8String load_name=file.get_load_name();
614  if(!incl || !incl->contains(load_name))
615  {
616    GMap<GUTF8String,GUTF8String> new_incl;
617    const GUTF8String save_name(
618      save_file(codebase,file,new_incl,get_data(load_name)));
619
620    if(incl)
621    {
622      (*incl)[load_name]=save_name;
623      for(GPosition pos=new_incl;pos;++pos)
624      {
625        save_file(codebase,file,incl);
626      }
627    }
628  }
629}
630
631void
632DjVmDoc::expand(const GURL &codebase, const GUTF8String &idx_name)
633{
634   DEBUG_MSG("DjVmDoc::expand(): Expanding into '" << codebase << "'\n");
635   DEBUG_MAKE_INDENT(3);
636
637   // Resolve any name conflicts
638   // Find the list of all files.
639   GPList<DjVmDir::File> files_list=dir->resolve_duplicates(false);
640
641      // store each file
642   for(GPosition pos=files_list;pos;++pos)
643   {
644     save_file(codebase,*files_list[pos]);
645   }
646
647   if (idx_name.length())
648   {
649      const GURL::UTF8 idx_url(idx_name, codebase);
650   
651      DEBUG_MSG("storing index file '" << idx_url << "'\n");
652
653      DataPool::load_file(idx_url);
654      GP<ByteStream> str=ByteStream::create(idx_url, "wb");
655      write_index(str);
656   }
657}
658
659
660#ifdef HAVE_NAMESPACES
661}
662# ifndef NOT_USING_DJVU_NAMESPACE
663using namespace DJVU;
664# endif
665#endif
Note: See TracBrowser for help on using the repository browser.