source: trunk/libdjvu/ByteStream.h @ 269

Last change on this file since 269 was 206, checked in by Eugene Romanenko, 14 years ago

DJVU plugin: djvulibre updated to version 3.5.19

File size: 18.4 KB
Line 
1//C-  -*- C++ -*-
2//C- -------------------------------------------------------------------
3//C- DjVuLibre-3.5
4//C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
5//C- Copyright (c) 2001  AT&T
6//C-
7//C- This software is subject to, and may be distributed under, the
8//C- GNU General Public License, either Version 2 of the license,
9//C- or (at your option) any later version. The license should have
10//C- accompanied the software or you may obtain a copy of the license
11//C- from the Free Software Foundation at http://www.fsf.org .
12//C-
13//C- This program is distributed in the hope that it will be useful,
14//C- but WITHOUT ANY WARRANTY; without even the implied warranty of
15//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16//C- GNU General Public License for more details.
17//C-
18//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
19//C- Lizardtech Software.  Lizardtech Software has authorized us to
20//C- replace the original DjVu(r) Reference Library notice by the following
21//C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
22//C-
23//C-  ------------------------------------------------------------------
24//C- | DjVu (r) Reference Library (v. 3.5)
25//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
26//C- | The DjVu Reference Library is protected by U.S. Pat. No.
27//C- | 6,058,214 and patents pending.
28//C- |
29//C- | This software is subject to, and may be distributed under, the
30//C- | GNU General Public License, either Version 2 of the license,
31//C- | or (at your option) any later version. The license should have
32//C- | accompanied the software or you may obtain a copy of the license
33//C- | from the Free Software Foundation at http://www.fsf.org .
34//C- |
35//C- | The computer code originally released by LizardTech under this
36//C- | license and unmodified by other parties is deemed "the LIZARDTECH
37//C- | ORIGINAL CODE."  Subject to any third party intellectual property
38//C- | claims, LizardTech grants recipient a worldwide, royalty-free,
39//C- | non-exclusive license to make, use, sell, or otherwise dispose of
40//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
41//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
42//C- | General Public License.   This grant only confers the right to
43//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
44//C- | the extent such infringement is reasonably necessary to enable
45//C- | recipient to make, have made, practice, sell, or otherwise dispose
46//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
47//C- | any greater extent that may be necessary to utilize further
48//C- | modifications or combinations.
49//C- |
50//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
51//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
52//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
53//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
54//C- +------------------------------------------------------------------
55//
56// $Id: ByteStream.h,v 1.15 2007/03/25 20:48:29 leonb Exp $
57// $Name: release_3_5_19 $
58
59#ifndef _BYTESTREAM_H
60#define _BYTESTREAM_H
61#ifdef HAVE_CONFIG_H
62#include "config.h"
63#endif
64#if NEED_GNUG_PRAGMAS
65# pragma interface
66#endif
67
68/** @name ByteStream.h
69   
70    Files #"ByteStream.h"# and #"ByteStream.cpp"# define input/output classes
71    similar in spirit to the well known C++ #iostream# classes.  Class
72    \Ref{ByteStream} is an abstract base class for all byte streams.  It
73    defines a virtual interface and also provides useful functions.  These
74    files provide two subclasses. Class \Ref{ByteStream::Stdio} provides a
75    simple interface to the Ansi C buffered input/output functions. Class
76    \Ref{ByteStream::Memory} provides stream-like access to a dynamical array
77    maintained in memory. Class \Ref{ByteStream::Static} provides read-only
78    stream-like access to a user allocated data buffer.
79
80    {\bf Notes} --- These classes were partly written because we did not want to
81    depend on the standard C++ library.  The main reason however is related to
82    the browser interface. We want to have a tight control over the
83    implementation of subclasses because we want to use a byte stream to
84    represent data passed by a web browser to a plugin.  This operation
85    involves multi-threading issues that many implementations of the standard
86    C++ library would squarely ignore.
87
88    @memo
89    Input/output classes
90    @author
91    L\'eon Bottou <leonb@research.att.com> -- initial implementation\\
92    Andrei Erofeev <eaf@geocities.com> --
93
94// From: Leon Bottou, 1/31/2002
95// This file has very little to do with my initial implementation.
96// It has been practically rewritten by Lizardtech for i18n changes.
97// Our original implementation consisted of multiple classes.
98// <http://prdownloads.sourceforge.net/djvu/DjVu2_2b-src.tgz>.
99
100
101    @version
102    #$Id: ByteStream.h,v 1.15 2007/03/25 20:48:29 leonb Exp $# */
103//@{
104
105
106#include "Arrays.h"
107#include <stdio.h>
108
109#ifdef HAVE_NAMESPACES
110namespace DJVU {
111# ifdef NOT_DEFINED // Just to fool emacs c++ mode
112}
113#endif
114#endif
115
116class GURL;
117class GUTF8String;
118class GNativeString;
119
120/** Abstract class for a stream of bytes.  Class #ByteStream# represent an
121    object from which (resp. to which) bytes can be read (resp. written) as
122    with a regular file.  Virtual functions #read# and #write# must implement
123    these two basic operations.  In addition, function #tell# returns an
124    offset identifying the current position, and function #seek# may be used
125    to change the current position.
126
127    {\bf Note}. Both the copy constructor and the copy operator are declared
128    as private members. It is therefore not possible to make multiple copies
129    of instances of this class, as implied by the class semantic. 
130*/
131class ByteStream : public GPEnabled
132{
133public:
134  class Stdio;
135  class Static;
136  class Memory;
137  class Wrapper;
138  enum codepage_type {RAW,AUTO,NATIVE,UTF8} cp;
139
140  /** @name Virtual Functions.
141      These functions are usually implemented by each subclass of #ByteStream#.
142  */
143  //@{
144public:
145  /** Virtual destructor. */
146  virtual ~ByteStream();
147  /** Reads data from a ByteStream.  This function {\em must} be implemented
148      by each subclass of #ByteStream#.  At most #size# bytes are read from
149      the ByteStream and stored in the memory area pointed to by #buffer#.
150      Function #read# returns immediately if #size# is zero. The actual number
151      of bytes read is returned.  Function #read# returns a number of bytes
152      smaller than #size# if the end-of-file mark is reached before filling
153      the buffer. Subsequent invocations will always return value #0#.
154      Function #read# may also return a value greater than zero but smaller
155      than #size# for internal reasons. Programs must be ready to handle these
156      cases or use function \Ref{readall}. Exception \Ref{GException} is
157      thrown with a plain text error message whenever an error occurs. */
158  virtual size_t read(void *buffer, size_t size);
159  /** Writes data to a ByteStream.  This function {\em must} be implemented by
160      each subclass of #ByteStream#.  At most #size# bytes from buffer
161      #buffer# are written to the ByteStream.  Function #write# returns
162      immediately if #size# is zero.  The actual number of bytes written is
163      returned. Function #write# may also return a value greater than zero but
164      smaller than #size# for internal reasons. Programs must be ready to
165      handle these cases or use function \Ref{writall}. Exception
166      \Ref{GException} is thrown with a plain text error message whenever an
167      error occurs. */
168  virtual size_t write(const void *buffer, size_t size);
169  /** Returns the offset of the current position in the ByteStream.  This
170      function {\em must} be implemented by each subclass of #ByteStream#. */
171  virtual long tell(void) const  = 0;
172  /** Sets the current position for reading or writing the ByteStream.  Class
173      #ByteStream# provides a default implementation able to seek forward by
174      calling function #read# until reaching the desired position.  Subclasses
175      implementing better seek capabilities must override this default
176      implementation.  The new current position is computed by applying
177      displacement #offset# to the position represented by argument
178      #whence#. The following values are recognized for argument #whence#:
179      \begin{description}
180      \item[#SEEK_SET#] Argument #offset# indicates the position relative to
181      the beginning of the ByteStream.
182      \item[#SEEK_CUR#] Argument #offset# is a signed displacement relative to
183      the current position.
184      \item[#SEEK_END#] Argument #offset# is a displacement relative to the end
185      of the file. It is then advisable to provide a negative value for #offset#.
186      \end{description}
187      Results are undefined whenever the new position is greater than the
188      total size of the ByteStream.
189
190      {\bf Error reporting}:
191      If #seek()# succeeds, #0# is returned. Otherwise it either returns
192      #-1# (if #nothrow# is set to #FALSE#) or throws the \Ref{GException}
193      exception. */
194  virtual int seek(long offset, int whence = SEEK_SET, bool nothrow=false);
195  /** Flushes all buffers in the ByteStream.  Calling this function
196      guarantees that pending data have been actually written (i.e. passed to
197      the operating system). Class #ByteStream# provides a default
198      implementation which does nothing. */
199  virtual void flush(void);
200  //@}
201  /** @name Utility Functions. 
202      Class #ByteStream# implements these functions using the virtual
203      interface functions only.  All subclasses of #ByteStream# inherit these
204      functions. */
205  //@{
206public:
207  /** Reads data and blocks until everything has been read.  This function is
208      essentially similar to function #read#.  Unlike function #read# however,
209      function #readall# will never return a value smaller than #size# unless
210      an end-of-file mark is reached.  This is implemented by repeatedly
211      calling function #read# until everything is read or until we reach an
212      end-of-file mark.  Note that #read# and #readall# are equivalent when
213      #size# is one. */
214  size_t readall(void *buffer, size_t size);
215  /** Writes data and blocks until everything has been written.  This function
216      is essentially similar to function #write#.  Unlike function #write#
217      however, function #writall# will only return after all #size# bytes have
218      been written.  This is implemented by repeatedly calling function
219      #write# until everything is written.  Note that #write# and #writall#
220      are equivalent when #size# is one. */
221  size_t writall(const void *buffer, size_t size);
222  /** Copy data from another ByteStream.  A maximum of #size# bytes are read
223      from the ByteStream #bsfrom# and are written to the ByteStream #*this#
224      at the current position.  Less than #size# bytes may be written if an
225      end-of-file mark is reached on #bsfrom#.  This function returns the
226      total number of bytes copied.  Setting argument #size# to zero (the
227      default value) has a special meaning: the copying process will continue
228      until reaching the end-of-file mark on ByteStream #bsfrom#, regardless
229      of the number of bytes transferred.  */
230  size_t copy(ByteStream &bsfrom, size_t size=0);
231  /// Allows printf() type operations to a bytestream.
232  size_t format(const char *fmt, ... );
233  /// Allows scanf() type operations on a bytestream.
234  int scanf(const char *fmt, ... );
235  /** Writes the string as is, to the specified stream. */
236  size_t writestring(const GUTF8String &s);
237  /** Writes the string as is, to the specified stream. */
238  size_t writestring(const GNativeString &s);
239  /** Formats the message string, looks up the external representation
240      and writes it to the specified stream. */
241  void formatmessage( const char *fmt, ... );
242  /** Looks up the message and writes it to the specified stream. */
243  void writemessage( const char *message );
244  /** Writes a one-byte integer to a ByteStream. */
245  void write8 (unsigned int card8);
246  /** Writes a two-bytes integer to a ByteStream.
247      The integer most significant byte is written first,
248      regardless of the processor endianness. */
249  void write16(unsigned int card16);
250  /** Writes a three-bytes integer to a ByteStream.
251      The integer most significant byte is written first,
252      regardless of the processor endianness. */
253  void write24(unsigned int card24);
254  /** Writes a four-bytes integer to a ByteStream.
255      The integer most significant bytes are written first,
256      regardless of the processor endianness. */
257  void write32(unsigned int card32);
258  /** Reads a one-byte integer from a ByteStream. */
259  unsigned int read8 ();
260  /** Reads a two-bytes integer from a ByteStream.
261      The integer most significant byte is read first,
262      regardless of the processor endianness. */
263  unsigned int read16();
264  /** Reads a three-bytes integer from a ByteStream.
265      The integer most significant byte is read first,
266      regardless of the processor endianness. */
267  unsigned int read24();
268  /** Reads a four-bytes integer from a ByteStream.
269      The integer most significant bytes are read first,
270      regardless of the processor endianness. */
271  unsigned int read32();
272  /** Returns the total number of bytes contained in the buffer, file, etc.
273      Valid offsets for function #seek# range from 0 to the value returned
274      by this function. */
275  virtual int size(void) const;
276  /// Use at your own risk, only guarenteed to work for ByteStream::Memorys.
277  TArray<char> get_data(void);
278  /** Reads data from a random position. This function reads at most #sz#
279      bytes at position #pos# into #buffer# and returns the actual number of
280      bytes read.  The current position is unchanged. */
281  virtual size_t readat(void *buffer, size_t sz, int pos);
282  //@}
283protected:
284  ByteStream(void) : cp(AUTO) {};
285private:
286  // Cancel C++ default stuff
287  ByteStream(const ByteStream &);
288  ByteStream & operator=(const ByteStream &);
289public:
290  /** Constructs an empty Memory ByteStream.  The buffer itself is organized
291      as an array of 4096 byte blocks.  The buffer is initially empty. You
292      must first use function #write# to store data into the buffer, use
293      function #seek# to rewind the current position, and function #read# to
294      read the data back. */
295  static GP<ByteStream> create(void);
296  /** Constructs a Memory ByteStream by copying initial data.  The
297      Memory buffer is initialized with #size# bytes copied from the
298      memory area pointed to by #buffer#. */
299  static GP<ByteStream> create(void const * const buffer, const size_t size);
300  /** Constructs a ByteStream for accessing the file named #url#.
301      Arguments #url# and #mode# are similar to the arguments of the well
302      known stdio function #fopen#. In addition a url of #-# will be
303      interpreted as the standard output or the standard input according to
304      #mode#.  This constructor will open a stdio file and construct a
305      ByteStream object accessing this file. Destroying the ByteStream object
306      will flush and close the associated stdio file.  Exception
307      \Ref{GException} is thrown with a plain text error message if the stdio
308      file cannot be opened. */
309  static GP<ByteStream> create(
310    const GURL &url, char const * const mode);
311  /** Same as the above, but uses stdin or stdout */
312  static GP<ByteStream> create( char const * const mode);
313
314  /** Constructs a ByteStream for accessing the stdio file #f#.
315      Argument #mode# indicates the type of the stdio file, as in the
316      well known stdio function #fopen#.  Destroying the ByteStream
317      object will not close the stdio file #f# unless closeme is true. */
318  static GP<ByteStream> create(
319    const int fd, char const * const mode, const bool closeme);
320
321  /** Constructs a ByteStream for accessing the stdio file #f#.
322      Argument #mode# indicates the type of the stdio file, as in the
323      well known stdio function #fopen#.  Destroying the ByteStream
324      object will not close the stdio file #f# unless closeme is true. */
325  static GP<ByteStream> create(
326    FILE * const f, char const * const mode, const bool closeme);
327  /** Creates a ByteStream object for allocating the memory area of
328      length #sz# starting at address #buffer#.  This call impliments
329      a read-only ByteStream interface for a memory area specified by
330      the user at construction time. Calls to function #read# directly
331      access this memory area.  The user must therefore make sure that its
332      content remain valid long enough.  */
333  static GP<ByteStream> create_static(void const *buffer, size_t size);
334 
335  /** Easy access to preallocated stdin/stdout/stderr bytestreams */
336  static GP<ByteStream> get_stdin(char const * mode=0);
337  static GP<ByteStream> get_stdout(char const * mode=0); 
338  static GP<ByteStream> get_stderr(char const * mode=0);
339
340  /** This is the conventional name for EOF exceptions */
341  static const char *EndOfFile;
342  /** Returns the contents of the file as a GNativeString */
343  GNativeString getAsNative(void);
344  /** Returns the contents of the file as a GUTF8String */
345  GUTF8String getAsUTF8(void);
346};
347
348inline size_t
349ByteStream::readat(void *buffer, size_t sz, int pos)
350{
351  size_t retval;
352  long tpos=tell();
353  seek(pos, SEEK_SET, true);
354  retval=readall(buffer,sz);
355  seek(tpos, SEEK_SET, true);
356  return retval;
357}
358
359inline int
360ByteStream::size(void) const
361{
362  ByteStream *bs=const_cast<ByteStream *>(this);
363  int bsize=(-1);
364  long pos=tell();
365  if(bs->seek(0,SEEK_END,true))
366  {
367    bsize=(int)tell();
368    (void)(bs->seek(pos,SEEK_SET,false));
369  }
370  return bsize;
371}
372
373/** ByteStream::Wrapper implements wrapping bytestream.  This is useful
374    for derived classes that take a GP<ByteStream> as a creation argument,
375    and the backwards compatible bytestreams.  */
376class ByteStream::Wrapper : public ByteStream
377{
378protected:
379  GP<ByteStream> gbs;
380  ByteStream *bs;
381  Wrapper(void) : bs(0) {}
382  Wrapper(const GP<ByteStream> &xbs) : gbs(xbs), bs(xbs) {}
383public:
384  ~Wrapper();
385  ByteStream * operator & () const {return bs;}
386  ByteStream * operator & () {return bs;}
387  virtual size_t read(void *buffer, size_t size)
388    { return bs->read(buffer,size); }
389  virtual size_t write(const void *buffer, size_t size)
390    { return bs->write(buffer,size); }
391  virtual long tell(void) const
392    { return bs->tell(); }
393  virtual int seek(long offset, int whence = SEEK_SET, bool nothrow=false)
394    { return bs->seek(offset,whence,nothrow); }
395  virtual void flush(void)
396    { bs->flush(); }
397};
398
399
400//@}
401
402// ------------ THE END
403
404#ifdef HAVE_NAMESPACES
405}
406# ifndef NOT_USING_DJVU_NAMESPACE
407using namespace DJVU;
408# endif
409#endif
410#endif
411
Note: See TracBrowser for help on using the repository browser.