source: trunk/libdjvu/UnicodeByteStream.cpp @ 76

Last change on this file since 76 was 17, checked in by Eugene Romanenko, 16 years ago

update makefiles, remove absolute paths, update djvulibre to version 3.5.17

File size: 9.6 KB
Line 
1//C-  -*- C++ -*-
2//C- -------------------------------------------------------------------
3//C- DjVuLibre-3.5
4//C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
5//C- Copyright (c) 2001  AT&T
6//C-
7//C- This software is subject to, and may be distributed under, the
8//C- GNU General Public License, Version 2. The license should have
9//C- accompanied the software or you may obtain a copy of the license
10//C- from the Free Software Foundation at http://www.fsf.org .
11//C-
12//C- This program is distributed in the hope that it will be useful,
13//C- but WITHOUT ANY WARRANTY; without even the implied warranty of
14//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15//C- GNU General Public License for more details.
16//C-
17//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library
18//C- distributed by Lizardtech Software.  On July 19th 2002, Lizardtech
19//C- Software authorized us to replace the original DjVu(r) Reference
20//C- Library notice by the following text (see doc/lizard2002.djvu):
21//C-
22//C-  ------------------------------------------------------------------
23//C- | DjVu (r) Reference Library (v. 3.5)
24//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
25//C- | The DjVu Reference Library is protected by U.S. Pat. No.
26//C- | 6,058,214 and patents pending.
27//C- |
28//C- | This software is subject to, and may be distributed under, the
29//C- | GNU General Public License, Version 2. The license should have
30//C- | accompanied the software or you may obtain a copy of the license
31//C- | from the Free Software Foundation at http://www.fsf.org .
32//C- |
33//C- | The computer code originally released by LizardTech under this
34//C- | license and unmodified by other parties is deemed "the LIZARDTECH
35//C- | ORIGINAL CODE."  Subject to any third party intellectual property
36//C- | claims, LizardTech grants recipient a worldwide, royalty-free,
37//C- | non-exclusive license to make, use, sell, or otherwise dispose of
38//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
39//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
40//C- | General Public License.   This grant only confers the right to
41//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
42//C- | the extent such infringement is reasonably necessary to enable
43//C- | recipient to make, have made, practice, sell, or otherwise dispose
44//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
45//C- | any greater extent that may be necessary to utilize further
46//C- | modifications or combinations.
47//C- |
48//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
49//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
50//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
51//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
52//C- +------------------------------------------------------------------
53//
54// $Id: UnicodeByteStream.cpp,v 1.8 2003/11/07 22:08:22 leonb Exp $
55// $Name:  $
56
57#ifdef HAVE_CONFIG_H
58# include "config.h"
59#endif
60#if NEED_GNUG_PRAGMAS
61# pragma implementation
62#endif
63
64#include "UnicodeByteStream.h"
65#include "ByteStream.h"
66
67
68#ifdef HAVE_NAMESPACES
69namespace DJVU {
70# ifdef NOT_DEFINED // Just to fool emacs c++ mode
71}
72#endif
73#endif
74
75UnicodeByteStream::UnicodeByteStream(const UnicodeByteStream &uni)
76: bs(uni.bs), buffer(uni.buffer), bufferpos(uni.bufferpos), linesread(0)
77{
78  startpos=bs->tell();
79}
80
81UnicodeByteStream::UnicodeByteStream(
82  GP<ByteStream> ibs,const GStringRep::EncodeType et)
83: bs(ibs), bufferpos(0), linesread(0)
84{
85  buffer=GUTF8String::create(0,0,et);
86  startpos=bs->tell();
87}
88
89UnicodeByteStream::~UnicodeByteStream()
90{}
91
92static int
93CountLines(const GUTF8String &str)
94{
95  int retval=0;
96  static const unsigned long lf='\n';
97  for(int pos=0;(pos=str.search(lf,pos)+1)>0;++retval)
98    EMPTY_LOOP;
99  return retval;
100}
101
102void
103UnicodeByteStream::set_encodetype(const GStringRep::EncodeType et)
104{
105  seek(startpos,SEEK_SET);
106  bufferpos=0;
107  buffer=GUTF8String::create(0,0,et);
108}
109
110void
111UnicodeByteStream::set_encoding(const GUTF8String &xencoding)
112{
113  seek(startpos,SEEK_SET);
114  bufferpos=0;
115  buffer=GUTF8String::create(0,0,xencoding);
116}
117
118size_t
119UnicodeByteStream::read(void *buf, size_t size)
120{
121  bufferpos=0;
122  const int retval=bs->read(buf,size);
123  if(retval)
124  {
125    buffer=GUTF8String::create(
126      (unsigned char const *)buf,retval,buffer.get_remainder());
127  }else
128  {
129    buffer=GUTF8String::create(0,0,buffer.get_remainder());
130  }
131  return retval;
132}
133
134size_t
135UnicodeByteStream::write(const void *buf, size_t size)
136{
137  bufferpos=0;
138  buffer=GUTF8String::create(0,0,buffer.get_remainder());
139  return bs->write(buf,size);
140}
141
142long 
143UnicodeByteStream::tell(void) const
144{
145  return bs->tell();
146}
147
148UnicodeByteStream & 
149UnicodeByteStream::operator=(UnicodeByteStream &uni)
150{
151  bs=uni.bs;
152  bufferpos=uni.bufferpos;
153  buffer=uni.buffer;
154  return *this;
155}
156
157int 
158UnicodeByteStream::seek
159(long offset, int whence, bool nothrow)
160{
161  int retval=bs->seek(offset,whence,nothrow);
162  bufferpos=0;
163  buffer=GUTF8String::create(0,0,buffer.get_remainder());
164  return retval;
165}
166
167void 
168UnicodeByteStream::flush(void)
169{
170  bs->flush();
171  bufferpos=0;
172  buffer=GUTF8String::create(0,0,buffer.get_remainder());
173}
174
175
176
177GUTF8String
178UnicodeByteStream::gets(
179  size_t const t,unsigned long const stopat,bool const inclusive)
180{
181  GUTF8String retval;
182  unsigned int len=buffer.length()-bufferpos;
183  if(!len)
184  {
185    int i;
186    char *buf;
187        static const size_t bufsize=327680;
188    GPBuffer<char> gbuf(buf,bufsize);
189    while((i=read(buf,bufsize)>0))
190    {
191      if((len=buffer.length()-bufferpos))
192        break;
193    }
194  }
195  if(len)
196  {
197    int i=buffer.search((char)stopat,bufferpos);
198    if(i>=0)
199    {
200      if(inclusive)
201      {
202        ++i;
203      }
204      if(t&&(i>(int)t+bufferpos))
205      {
206        i=t+bufferpos;
207      }
208      if(i>bufferpos)
209      {
210        retval=buffer.substr(bufferpos,i-bufferpos);
211      }
212      bufferpos=i;
213      linesread+=CountLines(retval);
214    }else
215    {
216      retval=buffer.substr(bufferpos,len);
217      bufferpos=buffer.length();
218      linesread+=CountLines(retval);
219      retval+=gets(t?(t-(i-bufferpos)):0,stopat,inclusive);
220    }
221  }
222  return retval;
223}
224
225XMLByteStream::XMLByteStream(UnicodeByteStream &uni)
226: UnicodeByteStream(uni) {}
227
228XMLByteStream::XMLByteStream(GP<ByteStream> &ibs) 
229: UnicodeByteStream(ibs,GStringRep::XOTHER)
230{}
231
232GP<XMLByteStream>
233XMLByteStream::create(GP<ByteStream> ibs) 
234{
235  XMLByteStream *xml=new XMLByteStream(ibs);
236  GP<XMLByteStream> retval=xml;
237  xml->init();
238  return retval;
239}
240
241void
242XMLByteStream::init(void)
243{
244  unsigned char buf[4];
245  GP<ByteStream> ibs=bs;
246  bufferpos=0;
247  bs->readall(buf,sizeof(buf));
248  const unsigned int i=(buf[0]<<8)+buf[1];
249  switch(i)
250  {
251    case 0x0000:
252    {
253      const unsigned int j=(buf[2]<<8)+buf[3];
254      switch(j)
255      {
256        case 0x003C:
257        {
258          buffer=GUTF8String::create(buf,sizeof(buf),GStringRep::XUCS4BE);
259          break;
260        }
261        case 0x3C00:
262        {
263          buffer=GUTF8String::create(buf,sizeof(buf),GStringRep::XUCS4_2143);
264          break;
265        }
266        case 0xFEFF:
267        {
268          buffer=GUTF8String::create(0,0,GStringRep::XUCS4BE);
269          startpos+=sizeof(buf);
270          break;
271        }
272        case 0xFFFE:
273        {
274          buffer=GUTF8String::create(0,0,GStringRep::XUCS4_2143);
275          startpos+=sizeof(buf);
276          break;
277        }
278        default:
279        {
280          buffer=GUTF8String::create(buf,sizeof(buf),GStringRep::XUTF8);
281          break;
282        }
283      }
284    }
285    case 0x003C:
286    {
287      const unsigned int j=(buf[2]<<8)+buf[3];
288      switch(j)
289      {
290        case 0x0000:
291          buffer=GUTF8String::create(buf,sizeof(buf),GStringRep::XUCS4_3412);
292          break;
293        case 0x003F:
294          buffer=GUTF8String::create(buf,sizeof(buf),GStringRep::XUTF16BE);
295          break;
296        default:
297          buffer=GUTF8String::create(buf,sizeof(buf),GStringRep::XUTF8);
298          break;
299      }
300      break;
301    }
302    case 0x3C00:
303    {
304      const unsigned int j=(buf[2]<<8)+buf[3];
305      switch(j)
306      {
307        case 0x0000:
308          buffer=GUTF8String::create(buf,sizeof(buf),GStringRep::XUCS4LE);
309          break;
310        case 0x3F00:
311          buffer=GUTF8String::create(buf,sizeof(buf),GStringRep::XUTF16LE);
312          break;
313        default:
314          buffer=GUTF8String::create(buf,sizeof(buf),GStringRep::XUTF8);
315          break;
316      }
317      break;
318    }
319    case 0x4C6F:
320    {
321      const unsigned int j=(buf[2]<<8)+buf[3];
322      buffer=GUTF8String::create(buf,sizeof(buf),
323         (j == 0xA794)?(GStringRep::XEBCDIC):(GStringRep::XUTF8));
324      break;
325    }
326    case 0xFFFE:
327    {
328      buffer=GUTF8String::create(buf+2,sizeof(buf)-2,GStringRep::XUTF16LE);
329      startpos+=2;
330      break;
331    }
332    case 0xFEFF:
333    {
334      buffer=GUTF8String::create(buf+2,sizeof(buf)-2,GStringRep::XUTF16BE);
335      startpos+=2;
336      break;
337    }
338    case 0xEFBB:
339    {
340      if(buf[2] == 0xBF)
341      {
342        buffer=GUTF8String::create(buf+3,sizeof(buf)-3,GStringRep::XUTF8);
343        startpos+=3;
344      }else
345      {
346        buffer=GUTF8String::create(buf,sizeof(buf),GStringRep::XUTF8);
347      }
348      break;
349    }
350    case 0x3C3F:
351    default:
352    {
353      buffer=GUTF8String::create(buf,sizeof(buf),GStringRep::XUTF8);
354    }
355  }
356  bs=ibs;
357}
358
359XMLByteStream::~XMLByteStream()
360{}
361
362
363#ifdef HAVE_NAMESPACES
364}
365# ifndef NOT_USING_DJVU_NAMESPACE
366using namespace DJVU;
367# endif
368#endif
Note: See TracBrowser for help on using the repository browser.