source: trunk/libdjvu/XMLTags.cpp @ 15

Last change on this file since 15 was 15, checked in by Eugene Romanenko, 15 years ago

needed libs update

File size: 10.4 KB
Line 
1//C-  -*- C++ -*-
2//C- -------------------------------------------------------------------
3//C- DjVuLibre-3.5
4//C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
5//C- Copyright (c) 2001  AT&T
6//C-
7//C- This software is subject to, and may be distributed under, the
8//C- GNU General Public License, Version 2. The license should have
9//C- accompanied the software or you may obtain a copy of the license
10//C- from the Free Software Foundation at http://www.fsf.org .
11//C-
12//C- This program is distributed in the hope that it will be useful,
13//C- but WITHOUT ANY WARRANTY; without even the implied warranty of
14//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15//C- GNU General Public License for more details.
16//C-
17//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library
18//C- distributed by Lizardtech Software.  On July 19th 2002, Lizardtech
19//C- Software authorized us to replace the original DjVu(r) Reference
20//C- Library notice by the following text (see doc/lizard2002.djvu):
21//C-
22//C-  ------------------------------------------------------------------
23//C- | DjVu (r) Reference Library (v. 3.5)
24//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
25//C- | The DjVu Reference Library is protected by U.S. Pat. No.
26//C- | 6,058,214 and patents pending.
27//C- |
28//C- | This software is subject to, and may be distributed under, the
29//C- | GNU General Public License, Version 2. The license should have
30//C- | accompanied the software or you may obtain a copy of the license
31//C- | from the Free Software Foundation at http://www.fsf.org .
32//C- |
33//C- | The computer code originally released by LizardTech under this
34//C- | license and unmodified by other parties is deemed "the LIZARDTECH
35//C- | ORIGINAL CODE."  Subject to any third party intellectual property
36//C- | claims, LizardTech grants recipient a worldwide, royalty-free,
37//C- | non-exclusive license to make, use, sell, or otherwise dispose of
38//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
39//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
40//C- | General Public License.   This grant only confers the right to
41//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
42//C- | the extent such infringement is reasonably necessary to enable
43//C- | recipient to make, have made, practice, sell, or otherwise dispose
44//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
45//C- | any greater extent that may be necessary to utilize further
46//C- | modifications or combinations.
47//C- |
48//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
49//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
50//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
51//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
52//C- +------------------------------------------------------------------
53//
54// $Id: XMLTags.cpp,v 1.12 2003/11/07 22:08:22 leonb Exp $
55// $Name: release_3_5_16 $
56
57#ifdef HAVE_CONFIG_H
58# include "config.h"
59#endif
60#if NEED_GNUG_PRAGMAS
61# pragma implementation
62#endif
63
64// From: Leon Bottou, 1/31/2002
65// This is purely Lizardtech stuff.
66
67#include "XMLTags.h"
68#include "UnicodeByteStream.h"
69#include <ctype.h>
70#if HAS_WCTYPE
71#include <wctype.h>
72#endif
73
74
75#ifdef HAVE_NAMESPACES
76namespace DJVU {
77# ifdef NOT_DEFINED // Just to fool emacs c++ mode
78}
79#endif
80#endif
81
82lt_XMLContents::lt_XMLContents(void) {}
83
84lt_XMLContents::lt_XMLContents(GP<lt_XMLTags> t)
85{
86  tag=t;
87}
88
89static GUTF8String
90getargn(char const tag[], char const *&t)
91{
92  char const *s;
93  for(s=tag;isspace(*s);s++);
94  for(t=s;(*t)&&((*t)!='/')&&((*t)!='>')&&((*t)!='=')&&!isspace(*t);++t);
95  return GUTF8String(s,t-s);
96}
97
98static GUTF8String
99getargv(char const tag[], char const *&t)
100{
101  GUTF8String retval;
102  if(tag && tag[0] == '=')
103  {
104    char const *s=t=tag+1;
105    if((*t == '"')||(*t == '\47'))
106    {
107      char const q=*(t++);
108      for(s++;(*t)&&((*t)!=q)&&((*t)!='>');++t);
109      retval=GUTF8String(s,t-s);
110      if (t[0] == q)
111      {
112        ++t;
113      }
114    }else
115    {
116      for(t=s;(*t)&&((*t)!='/')&&((*t)!='>')&&!isspace(*t);++t);
117      retval=GUTF8String(s,t-s);
118    }
119  }else
120  {
121    t=tag;
122  }
123  return retval;
124}
125
126static GUTF8String
127tagtoname(char const tag[],char const *&t)
128{
129  char const *s;
130  for(s=tag;isspace(*s);s++);
131  for(t=s;(*t)&&((*t)!='>')&&((*t)!='/')&&!isspace(*t);++t);
132  return GUTF8String(s,t-s);
133}
134
135static inline GUTF8String
136tagtoname(char const tag[])
137{
138  char const *t;
139  return tagtoname(tag,t);
140}
141
142static inline bool
143isspaces(const GUTF8String &raw)
144{
145  return (raw.nextNonSpace() == (int)raw.length());
146}
147
148void
149lt_XMLTags::ParseValues(char const *t, GMap<GUTF8String,GUTF8String> &args,bool downcase)
150{
151  GUTF8String argn;
152  char const *tt;
153  while((argn=getargn(t,tt)).length())
154  {
155    if(downcase)
156      argn=argn.downcase();
157    args[argn]=getargv(tt,t).fromEscaped();
158  }
159}
160
161lt_XMLTags::~lt_XMLTags() {}
162
163lt_XMLTags::lt_XMLTags(void) : startline(0) {}
164
165lt_XMLTags::lt_XMLTags(const char n[]) : startline(0)
166{
167  char const *t;
168  name=tagtoname(n,t);
169  ParseValues(t,args);
170}
171
172void
173lt_XMLTags::init(const GP<ByteStream> &bs)
174{
175  GP<XMLByteStream> gxmlbs=XMLByteStream::create(bs);
176  init(*gxmlbs);
177}
178
179void
180lt_XMLTags::init(const GURL &url)
181{
182  const GP<ByteStream> bs=ByteStream::create(url,"rb");
183  init(bs);
184}
185
186void
187lt_XMLTags::init(XMLByteStream &xmlbs)
188{
189  if(!get_count())
190  {
191    G_THROW( ERR_MSG("XMLTags.no_GP") );
192  }
193  GPList<lt_XMLTags> level;
194  GUTF8String tag,raw(xmlbs.gets(0,'<',false));
195  int linesread=xmlbs.get_lines_read();
196  if(!isspaces(raw))
197  {
198    G_THROW( (ERR_MSG("XMLTags.raw_string") "\t")+raw);
199  }
200  GUTF8String encoding;
201  for(int len;(len=(tag=xmlbs.gets(0,'>',true)).length());)
202  {
203    if(tag[len-1] != '>')
204    {
205      G_THROW((ERR_MSG("XMLTags.bad_tag") "\t")+tag);
206    }
207    switch(tag[1])
208    {
209      case '?':
210      {
211        while(len < 4 || tag.substr(len-2,len) != "?>")
212        {
213          GUTF8String cont(xmlbs.gets(0,'>',true));
214          if(!cont.length())
215          { 
216            G_THROW( (ERR_MSG("XMLTags.bad_PI") "\t")+tag);
217          }
218          len=((tag+=cont).length());
219        }
220        char const *n;
221        GUTF8String xtag = tag.substr(2,-1);
222        GUTF8String xname = tagtoname(xtag,n);
223        if(xname.downcase() == "xml")
224        {
225          ParseValues(n,args);
226          for(GPosition pos=args;pos;++pos)
227          {
228            if(args.key(pos) == "encoding")
229            {
230              const GUTF8String e=args[pos].upcase();
231              if(e != encoding)
232              {
233                xmlbs.set_encoding((encoding=e));
234              }
235            }
236          }
237        }
238        break;
239      }
240      case '!':
241      {
242        if(tag[2] == '-' && tag[3] == '-')
243        {
244          while((len < 7) ||
245            (tag.substr(len-3,-1) != "-->"))
246          {
247            GUTF8String cont(xmlbs.gets(0,'>',true));
248            if(!cont.length())
249            { 
250              GUTF8String mesg;
251              mesg.format( ERR_MSG("XMLTags.bad_comment") "\t%s",(const char *)tag);
252              G_THROW(mesg);
253            }
254            len=((tag+=cont).length());
255          }
256        }
257        break;
258      }
259      case '/':
260      {
261        GUTF8String xname=tagtoname(tag.substr(2,-1));
262        GPosition last=level.lastpos();
263        if(last)
264        {
265          if(level[last]->name != xname)
266          {
267            G_THROW( (ERR_MSG("XMLTags.unmatched_end") "\t")
268              +level[last]->name+("\t"+GUTF8String(level[last]->get_Line()))
269              +("\t"+xname)+("\t"+GUTF8String(linesread+1)));
270          }
271          level.del(last);
272        }else
273        {
274          G_THROW( ERR_MSG("XMLTags.bad_form") );
275        }
276        break;
277      }
278      default:
279      {
280        GPosition last=level.lastpos();
281        GP<lt_XMLTags> t;
282        if(last)
283        {
284          t=new lt_XMLTags(tag.substr(1,len-1));
285          level[last]->addtag(t);
286          if(tag[len-2] != '/')
287          {
288            level.append(t);
289          }
290        }else if(tag[len-2] != '/')
291        {
292          char const *n;
293          GUTF8String xtag = tag.substr(1,-1); 
294          name=tagtoname(xtag, n);
295          ParseValues(n,args);
296          t=this;
297          level.append(t);
298        }else
299        {
300          G_THROW( ERR_MSG("XMLTags.no_body") );
301        }
302        t->set_Line(linesread+1);
303        break;
304      }
305    }
306    if((raw=xmlbs.gets(0,'<',false))[0])
307    { 
308      linesread=xmlbs.get_lines_read();
309      GPosition last=level.lastpos();
310      if(last)
311      {
312        level[last]->addraw(raw);
313      }else if(!isspaces(raw))
314      {
315        G_THROW(( ERR_MSG("XMLTags.raw_string") "\t")+raw);
316      }
317    }
318  }
319}
320
321GPList<lt_XMLTags>
322lt_XMLTags::get_Tags(char const tagname[]) const
323{
324  GPosition pos=allTags.contains(tagname);
325  GPList<lt_XMLTags> retval;
326  return (pos?allTags[pos]:retval);
327}
328
329void
330lt_XMLTags::get_Maps(char const tagname[],
331                     char const argn[],
332                     GPList<lt_XMLTags> list,
333                     GMap<GUTF8String, GP<lt_XMLTags> > &map)
334{
335  for(GPosition pos=list;pos;++pos)
336  {
337    GP<lt_XMLTags> &tag=list[pos];
338    if(tag)
339    {
340      GPosition loc;
341      if((loc=tag->contains(tagname)))
342      {
343        GPList<lt_XMLTags> maps=(GPList<lt_XMLTags> &)((*tag)[loc]);
344        for(GPosition mloc=maps;mloc;++mloc)
345        {
346          GP<lt_XMLTags> gtag=maps[mloc];
347          if(gtag)
348          {
349            GMap<GUTF8String,GUTF8String> &args=gtag->args;
350            GPosition gpos;
351            if((gpos=args.contains(argn)))
352            {
353              map[args[gpos]]=gtag;
354            }
355          }
356        }
357      }
358    }
359  }
360}
361
362void
363lt_XMLTags::write(ByteStream &bs,bool const top) const
364{
365  if(name.length())
366  {
367    GUTF8String tag="<"+name;
368    for(GPosition pos=args;pos;++pos)
369    {
370      tag+=GUTF8String(' ')+args.key(pos)+GUTF8String("=\42")+args[pos].toEscaped()+GUTF8String("\42");
371    }
372    GPosition tags=content;
373    if(tags||raw.length()) 
374    {
375      tag+=">";
376      bs.writall((const char *)tag,tag.length());
377      tag="</"+name+">";
378      if(raw.length())
379      {
380        bs.writestring(raw);
381      }
382      for(;tags;++tags)
383      {
384        content[tags].write(bs);
385      }
386    }else if(!raw.length())
387    {
388      tag+="/>";
389    }
390    bs.writall((const char *)tag,tag.length());
391  }
392  if(top)
393  {
394     bs.writall("\n",1);
395  }
396}
397
398void
399lt_XMLContents::write(ByteStream &bs) const
400{
401  if(tag)
402  {
403    tag->write(bs,false);
404  }
405  if(raw.length())
406  {
407    bs.writestring(raw);
408  } 
409}
410
411
412#ifdef HAVE_NAMESPACES
413}
414# ifndef NOT_USING_DJVU_NAMESPACE
415using namespace DJVU;
416# endif
417#endif
Note: See TracBrowser for help on using the repository browser.