source: trunk/libdjvu/XMLTags.cpp @ 426

Last change on this file since 426 was 280, checked in by rbri, 12 years ago

DJVU plugin: djvulibre updated to version 3.5.22

File size: 10.6 KB
Line 
1//C-  -*- C++ -*-
2//C- -------------------------------------------------------------------
3//C- DjVuLibre-3.5
4//C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
5//C- Copyright (c) 2001  AT&T
6//C-
7//C- This software is subject to, and may be distributed under, the
8//C- GNU General Public License, either Version 2 of the license,
9//C- or (at your option) any later version. The license should have
10//C- accompanied the software or you may obtain a copy of the license
11//C- from the Free Software Foundation at http://www.fsf.org .
12//C-
13//C- This program is distributed in the hope that it will be useful,
14//C- but WITHOUT ANY WARRANTY; without even the implied warranty of
15//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16//C- GNU General Public License for more details.
17//C-
18//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
19//C- Lizardtech Software.  Lizardtech Software has authorized us to
20//C- replace the original DjVu(r) Reference Library notice by the following
21//C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
22//C-
23//C-  ------------------------------------------------------------------
24//C- | DjVu (r) Reference Library (v. 3.5)
25//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
26//C- | The DjVu Reference Library is protected by U.S. Pat. No.
27//C- | 6,058,214 and patents pending.
28//C- |
29//C- | This software is subject to, and may be distributed under, the
30//C- | GNU General Public License, either Version 2 of the license,
31//C- | or (at your option) any later version. The license should have
32//C- | accompanied the software or you may obtain a copy of the license
33//C- | from the Free Software Foundation at http://www.fsf.org .
34//C- |
35//C- | The computer code originally released by LizardTech under this
36//C- | license and unmodified by other parties is deemed "the LIZARDTECH
37//C- | ORIGINAL CODE."  Subject to any third party intellectual property
38//C- | claims, LizardTech grants recipient a worldwide, royalty-free,
39//C- | non-exclusive license to make, use, sell, or otherwise dispose of
40//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
41//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
42//C- | General Public License.   This grant only confers the right to
43//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
44//C- | the extent such infringement is reasonably necessary to enable
45//C- | recipient to make, have made, practice, sell, or otherwise dispose
46//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
47//C- | any greater extent that may be necessary to utilize further
48//C- | modifications or combinations.
49//C- |
50//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
51//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
52//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
53//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
54//C- +------------------------------------------------------------------
55//
56// $Id: XMLTags.cpp,v 1.13 2007/03/25 20:48:34 leonb Exp $
57// $Name: release_3_5_22 $
58
59#ifdef HAVE_CONFIG_H
60# include "config.h"
61#endif
62#if NEED_GNUG_PRAGMAS
63# pragma implementation
64#endif
65
66// From: Leon Bottou, 1/31/2002
67// This is purely Lizardtech stuff.
68
69#include "XMLTags.h"
70#include "UnicodeByteStream.h"
71#include <ctype.h>
72#if HAS_WCTYPE
73#include <wctype.h>
74#endif
75
76
77#ifdef HAVE_NAMESPACES
78namespace DJVU {
79# ifdef NOT_DEFINED // Just to fool emacs c++ mode
80}
81#endif
82#endif
83
84lt_XMLContents::lt_XMLContents(void) {}
85
86lt_XMLContents::lt_XMLContents(GP<lt_XMLTags> t)
87{
88  tag=t;
89}
90
91static GUTF8String
92getargn(char const tag[], char const *&t)
93{
94  char const *s;
95  for(s=tag;isspace(*s);s++);
96  for(t=s;(*t)&&((*t)!='/')&&((*t)!='>')&&((*t)!='=')&&!isspace(*t);++t);
97  return GUTF8String(s,t-s);
98}
99
100static GUTF8String
101getargv(char const tag[], char const *&t)
102{
103  GUTF8String retval;
104  if(tag && tag[0] == '=')
105  {
106    char const *s=t=tag+1;
107    if((*t == '"')||(*t == '\47'))
108    {
109      char const q=*(t++);
110      for(s++;(*t)&&((*t)!=q)&&((*t)!='>');++t);
111      retval=GUTF8String(s,t-s);
112      if (t[0] == q)
113      {
114        ++t;
115      }
116    }else
117    {
118      for(t=s;(*t)&&((*t)!='/')&&((*t)!='>')&&!isspace(*t);++t);
119      retval=GUTF8String(s,t-s);
120    }
121  }else
122  {
123    t=tag;
124  }
125  return retval;
126}
127
128static GUTF8String
129tagtoname(char const tag[],char const *&t)
130{
131  char const *s;
132  for(s=tag;isspace(*s);s++);
133  for(t=s;(*t)&&((*t)!='>')&&((*t)!='/')&&!isspace(*t);++t);
134  return GUTF8String(s,t-s);
135}
136
137static inline GUTF8String
138tagtoname(char const tag[])
139{
140  char const *t;
141  return tagtoname(tag,t);
142}
143
144static inline bool
145isspaces(const GUTF8String &raw)
146{
147  return (raw.nextNonSpace() == (int)raw.length());
148}
149
150void
151lt_XMLTags::ParseValues(char const *t, GMap<GUTF8String,GUTF8String> &args,bool downcase)
152{
153  GUTF8String argn;
154  char const *tt;
155  while((argn=getargn(t,tt)).length())
156  {
157    if(downcase)
158      argn=argn.downcase();
159    args[argn]=getargv(tt,t).fromEscaped();
160  }
161}
162
163lt_XMLTags::~lt_XMLTags() {}
164
165lt_XMLTags::lt_XMLTags(void) : startline(0) {}
166
167lt_XMLTags::lt_XMLTags(const char n[]) : startline(0)
168{
169  char const *t;
170  name=tagtoname(n,t);
171  ParseValues(t,args);
172}
173
174void
175lt_XMLTags::init(const GP<ByteStream> &bs)
176{
177  GP<XMLByteStream> gxmlbs=XMLByteStream::create(bs);
178  init(*gxmlbs);
179}
180
181void
182lt_XMLTags::init(const GURL &url)
183{
184  const GP<ByteStream> bs=ByteStream::create(url,"rb");
185  init(bs);
186}
187
188void
189lt_XMLTags::init(XMLByteStream &xmlbs)
190{
191  if(!get_count())
192  {
193    G_THROW( ERR_MSG("XMLTags.no_GP") );
194  }
195  GPList<lt_XMLTags> level;
196  GUTF8String tag,raw(xmlbs.gets(0,'<',false));
197  int linesread=xmlbs.get_lines_read();
198  if(!isspaces(raw))
199  {
200    G_THROW( (ERR_MSG("XMLTags.raw_string") "\t")+raw);
201  }
202  GUTF8String encoding;
203  for(int len;(len=(tag=xmlbs.gets(0,'>',true)).length());)
204  {
205    if(tag[len-1] != '>')
206    {
207      G_THROW((ERR_MSG("XMLTags.bad_tag") "\t")+tag);
208    }
209    switch(tag[1])
210    {
211      case '?':
212      {
213        while(len < 4 || tag.substr(len-2,len) != "?>")
214        {
215          GUTF8String cont(xmlbs.gets(0,'>',true));
216          if(!cont.length())
217          { 
218            G_THROW( (ERR_MSG("XMLTags.bad_PI") "\t")+tag);
219          }
220          len=((tag+=cont).length());
221        }
222        char const *n;
223        GUTF8String xtag = tag.substr(2,-1);
224        GUTF8String xname = tagtoname(xtag,n);
225        if(xname.downcase() == "xml")
226        {
227          ParseValues(n,args);
228          for(GPosition pos=args;pos;++pos)
229          {
230            if(args.key(pos) == "encoding")
231            {
232              const GUTF8String e=args[pos].upcase();
233              if(e != encoding)
234              {
235                xmlbs.set_encoding((encoding=e));
236              }
237            }
238          }
239        }
240        break;
241      }
242      case '!':
243      {
244        if(tag[2] == '-' && tag[3] == '-')
245        {
246          while((len < 7) ||
247            (tag.substr(len-3,-1) != "-->"))
248          {
249            GUTF8String cont(xmlbs.gets(0,'>',true));
250            if(!cont.length())
251            { 
252              GUTF8String mesg;
253              mesg.format( ERR_MSG("XMLTags.bad_comment") "\t%s",(const char *)tag);
254              G_THROW(mesg);
255            }
256            len=((tag+=cont).length());
257          }
258        }
259        break;
260      }
261      case '/':
262      {
263        GUTF8String xname=tagtoname(tag.substr(2,-1));
264        GPosition last=level.lastpos();
265        if(last)
266        {
267          if(level[last]->name != xname)
268          {
269            G_THROW( (ERR_MSG("XMLTags.unmatched_end") "\t")
270              +level[last]->name+("\t"+GUTF8String(level[last]->get_Line()))
271              +("\t"+xname)+("\t"+GUTF8String(linesread+1)));
272          }
273          level.del(last);
274        }else
275        {
276          G_THROW( ERR_MSG("XMLTags.bad_form") );
277        }
278        break;
279      }
280      default:
281      {
282        GPosition last=level.lastpos();
283        GP<lt_XMLTags> t;
284        if(last)
285        {
286          t=new lt_XMLTags(tag.substr(1,len-1));
287          level[last]->addtag(t);
288          if(tag[len-2] != '/')
289          {
290            level.append(t);
291          }
292        }else if(tag[len-2] != '/')
293        {
294          char const *n;
295          GUTF8String xtag = tag.substr(1,-1); 
296          name=tagtoname(xtag, n);
297          ParseValues(n,args);
298          t=this;
299          level.append(t);
300        }else
301        {
302          G_THROW( ERR_MSG("XMLTags.no_body") );
303        }
304        t->set_Line(linesread+1);
305        break;
306      }
307    }
308    if((raw=xmlbs.gets(0,'<',false))[0])
309    { 
310      linesread=xmlbs.get_lines_read();
311      GPosition last=level.lastpos();
312      if(last)
313      {
314        level[last]->addraw(raw);
315      }else if(!isspaces(raw))
316      {
317        G_THROW(( ERR_MSG("XMLTags.raw_string") "\t")+raw);
318      }
319    }
320  }
321}
322
323GPList<lt_XMLTags>
324lt_XMLTags::get_Tags(char const tagname[]) const
325{
326  GPosition pos=allTags.contains(tagname);
327  GPList<lt_XMLTags> retval;
328  return (pos?allTags[pos]:retval);
329}
330
331void
332lt_XMLTags::get_Maps(char const tagname[],
333                     char const argn[],
334                     GPList<lt_XMLTags> list,
335                     GMap<GUTF8String, GP<lt_XMLTags> > &map)
336{
337  for(GPosition pos=list;pos;++pos)
338  {
339    GP<lt_XMLTags> &tag=list[pos];
340    if(tag)
341    {
342      GPosition loc;
343      if((loc=tag->contains(tagname)))
344      {
345        GPList<lt_XMLTags> maps=(GPList<lt_XMLTags> &)((*tag)[loc]);
346        for(GPosition mloc=maps;mloc;++mloc)
347        {
348          GP<lt_XMLTags> gtag=maps[mloc];
349          if(gtag)
350          {
351            GMap<GUTF8String,GUTF8String> &args=gtag->args;
352            GPosition gpos;
353            if((gpos=args.contains(argn)))
354            {
355              map[args[gpos]]=gtag;
356            }
357          }
358        }
359      }
360    }
361  }
362}
363
364void
365lt_XMLTags::write(ByteStream &bs,bool const top) const
366{
367  if(name.length())
368  {
369    GUTF8String tag="<"+name;
370    for(GPosition pos=args;pos;++pos)
371    {
372      tag+=GUTF8String(' ')+args.key(pos)+GUTF8String("=\42")+args[pos].toEscaped()+GUTF8String("\42");
373    }
374    GPosition tags=content;
375    if(tags||raw.length()) 
376    {
377      tag+=">";
378      bs.writall((const char *)tag,tag.length());
379      tag="</"+name+">";
380      if(raw.length())
381      {
382        bs.writestring(raw);
383      }
384      for(;tags;++tags)
385      {
386        content[tags].write(bs);
387      }
388    }else if(!raw.length())
389    {
390      tag+="/>";
391    }
392    bs.writall((const char *)tag,tag.length());
393  }
394  if(top)
395  {
396     bs.writall("\n",1);
397  }
398}
399
400void
401lt_XMLContents::write(ByteStream &bs) const
402{
403  if(tag)
404  {
405    tag->write(bs,false);
406  }
407  if(raw.length())
408  {
409    bs.writestring(raw);
410  } 
411}
412
413
414#ifdef HAVE_NAMESPACES
415}
416# ifndef NOT_USING_DJVU_NAMESPACE
417using namespace DJVU;
418# endif
419#endif
Note: See TracBrowser for help on using the repository browser.