source: trunk/libdjvu/ByteStream.h @ 17

Last change on this file since 17 was 17, checked in by Eugene Romanenko, 15 years ago

update makefiles, remove absolute paths, update djvulibre to version 3.5.17

File size: 19.0 KB
Line 
1//C-  -*- C++ -*-
2//C- -------------------------------------------------------------------
3//C- DjVuLibre-3.5
4//C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
5//C- Copyright (c) 2001  AT&T
6//C-
7//C- This software is subject to, and may be distributed under, the
8//C- GNU General Public License, Version 2. The license should have
9//C- accompanied the software or you may obtain a copy of the license
10//C- from the Free Software Foundation at http://www.fsf.org .
11//C-
12//C- This program is distributed in the hope that it will be useful,
13//C- but WITHOUT ANY WARRANTY; without even the implied warranty of
14//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15//C- GNU General Public License for more details.
16//C-
17//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library
18//C- distributed by Lizardtech Software.  On July 19th 2002, Lizardtech
19//C- Software authorized us to replace the original DjVu(r) Reference
20//C- Library notice by the following text (see doc/lizard2002.djvu):
21//C-
22//C-  ------------------------------------------------------------------
23//C- | DjVu (r) Reference Library (v. 3.5)
24//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
25//C- | The DjVu Reference Library is protected by U.S. Pat. No.
26//C- | 6,058,214 and patents pending.
27//C- |
28//C- | This software is subject to, and may be distributed under, the
29//C- | GNU General Public License, Version 2. The license should have
30//C- | accompanied the software or you may obtain a copy of the license
31//C- | from the Free Software Foundation at http://www.fsf.org .
32//C- |
33//C- | The computer code originally released by LizardTech under this
34//C- | license and unmodified by other parties is deemed "the LIZARDTECH
35//C- | ORIGINAL CODE."  Subject to any third party intellectual property
36//C- | claims, LizardTech grants recipient a worldwide, royalty-free,
37//C- | non-exclusive license to make, use, sell, or otherwise dispose of
38//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
39//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
40//C- | General Public License.   This grant only confers the right to
41//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
42//C- | the extent such infringement is reasonably necessary to enable
43//C- | recipient to make, have made, practice, sell, or otherwise dispose
44//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
45//C- | any greater extent that may be necessary to utilize further
46//C- | modifications or combinations.
47//C- |
48//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
49//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
50//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
51//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
52//C- +------------------------------------------------------------------
53//
54// $Id: ByteStream.h,v 1.13 2006/01/31 15:28:30 leonb Exp $
55// $Name:  $
56
57#ifndef _BYTESTREAM_H
58#define _BYTESTREAM_H
59#ifdef HAVE_CONFIG_H
60#include "config.h"
61#endif
62#if NEED_GNUG_PRAGMAS
63# pragma interface
64#endif
65
66/** @name ByteStream.h
67   
68    Files #"ByteStream.h"# and #"ByteStream.cpp"# define input/output classes
69    similar in spirit to the well known C++ #iostream# classes.  Class
70    \Ref{ByteStream} is an abstract base class for all byte streams.  It
71    defines a virtual interface and also provides useful functions.  These
72    files provide two subclasses. Class \Ref{ByteStream::Stdio} provides a
73    simple interface to the Ansi C buffered input/output functions. Class
74    \Ref{ByteStream::Memory} provides stream-like access to a dynamical array
75    maintained in memory. Class \Ref{ByteStream::Static} provides read-only
76    stream-like access to a user allocated data buffer.
77
78    {\bf Notes} --- These classes were partly written because we did not want to
79    depend on the standard C++ library.  The main reason however is related to
80    the browser interface. We want to have a tight control over the
81    implementation of subclasses because we want to use a byte stream to
82    represent data passed by a web browser to a plugin.  This operation
83    involves multi-threading issues that many implementations of the standard
84    C++ library would squarely ignore.
85
86    @memo
87    Input/output classes
88    @author
89    L\'eon Bottou <leonb@research.att.com> -- initial implementation\\
90    Andrei Erofeev <eaf@geocities.com> --
91
92// From: Leon Bottou, 1/31/2002
93// This file has very little to do with my initial implementation.
94// It has been practically rewritten by Lizardtech for i18n changes.
95// Our original implementation consisted of multiple classes.
96// <http://prdownloads.sourceforge.net/djvu/DjVu2_2b-src.tgz>.
97
98
99    @version
100    #$Id: ByteStream.h,v 1.13 2006/01/31 15:28:30 leonb Exp $# */
101//@{
102
103
104#include "Arrays.h"
105#include <stdio.h>
106
107#ifdef HAVE_NAMESPACES
108namespace DJVU {
109# ifdef NOT_DEFINED // Just to fool emacs c++ mode
110}
111#endif
112#endif
113
114class GURL;
115class GUTF8String;
116class GNativeString;
117
118/** Abstract class for a stream of bytes.  Class #ByteStream# represent an
119    object from which (resp. to which) bytes can be read (resp. written) as
120    with a regular file.  Virtual functions #read# and #write# must implement
121    these two basic operations.  In addition, function #tell# returns an
122    offset identifying the current position, and function #seek# may be used
123    to change the current position.
124
125    {\bf Note}. Both the copy constructor and the copy operator are declared
126    as private members. It is therefore not possible to make multiple copies
127    of instances of this class, as implied by the class semantic. 
128*/
129class ByteStream : public GPEnabled
130{
131public:
132  class Stdio;
133  class Static;
134  class Memory;
135  class Wrapper;
136  enum codepage_type {RAW,AUTO,NATIVE,UTF8} cp;
137
138  /** @name Virtual Functions.
139      These functions are usually implemented by each subclass of #ByteStream#.
140  */
141  //@{
142public:
143  /** Virtual destructor. */
144  virtual ~ByteStream();
145  /** Reads data from a ByteStream.  This function {\em must} be implemented
146      by each subclass of #ByteStream#.  At most #size# bytes are read from
147      the ByteStream and stored in the memory area pointed to by #buffer#.
148      Function #read# returns immediately if #size# is zero. The actual number
149      of bytes read is returned.  Function #read# returns a number of bytes
150      smaller than #size# if the end-of-file mark is reached before filling
151      the buffer. Subsequent invocations will always return value #0#.
152      Function #read# may also return a value greater than zero but smaller
153      than #size# for internal reasons. Programs must be ready to handle these
154      cases or use function \Ref{readall}. Exception \Ref{GException} is
155      thrown with a plain text error message whenever an error occurs. */
156  virtual size_t read(void *buffer, size_t size);
157  /** Writes data to a ByteStream.  This function {\em must} be implemented by
158      each subclass of #ByteStream#.  At most #size# bytes from buffer
159      #buffer# are written to the ByteStream.  Function #write# returns
160      immediately if #size# is zero.  The actual number of bytes written is
161      returned. Function #write# may also return a value greater than zero but
162      smaller than #size# for internal reasons. Programs must be ready to
163      handle these cases or use function \Ref{writall}. Exception
164      \Ref{GException} is thrown with a plain text error message whenever an
165      error occurs. */
166  virtual size_t write(const void *buffer, size_t size);
167  /** Returns the offset of the current position in the ByteStream.  This
168      function {\em must} be implemented by each subclass of #ByteStream#. */
169  virtual long tell(void) const  = 0;
170  /** Sets the current position for reading or writing the ByteStream.  Class
171      #ByteStream# provides a default implementation able to seek forward by
172      calling function #read# until reaching the desired position.  Subclasses
173      implementing better seek capabilities must override this default
174      implementation.  The new current position is computed by applying
175      displacement #offset# to the position represented by argument
176      #whence#. The following values are recognized for argument #whence#:
177      \begin{description}
178      \item[#SEEK_SET#] Argument #offset# indicates the position relative to
179      the beginning of the ByteStream.
180      \item[#SEEK_CUR#] Argument #offset# is a signed displacement relative to
181      the current position.
182      \item[#SEEK_END#] Argument #offset# is a displacement relative to the end
183      of the file. It is then advisable to provide a negative value for #offset#.
184      \end{description}
185      Results are undefined whenever the new position is greater than the
186      total size of the ByteStream.
187
188      {\bf Error reporting}:
189      If #seek()# succeeds, #0# is returned. Otherwise it either returns
190      #-1# (if #nothrow# is set to #FALSE#) or throws the \Ref{GException}
191      exception. */
192  virtual int seek(long offset, int whence = SEEK_SET, bool nothrow=false);
193  /** Flushes all buffers in the ByteStream.  Calling this function
194      guarantees that pending data have been actually written (i.e. passed to
195      the operating system). Class #ByteStream# provides a default
196      implementation which does nothing. */
197  virtual void flush(void);
198  //@}
199  /** @name Utility Functions. 
200      Class #ByteStream# implements these functions using the virtual
201      interface functions only.  All subclasses of #ByteStream# inherit these
202      functions. */
203  //@{
204public:
205  /** Reads data and blocks until everything has been read.  This function is
206      essentially similar to function #read#.  Unlike function #read# however,
207      function #readall# will never return a value smaller than #size# unless
208      an end-of-file mark is reached.  This is implemented by repeatedly
209      calling function #read# until everything is read or until we reach an
210      end-of-file mark.  Note that #read# and #readall# are equivalent when
211      #size# is one. */
212  size_t readall(void *buffer, size_t size);
213  /** Writes data and blocks until everything has been written.  This function
214      is essentially similar to function #write#.  Unlike function #write#
215      however, function #writall# will only return after all #size# bytes have
216      been written.  This is implemented by repeatedly calling function
217      #write# until everything is written.  Note that #write# and #writall#
218      are equivalent when #size# is one. */
219  size_t writall(const void *buffer, size_t size);
220  /** Copy data from another ByteStream.  A maximum of #size# bytes are read
221      from the ByteStream #bsfrom# and are written to the ByteStream #*this#
222      at the current position.  Less than #size# bytes may be written if an
223      end-of-file mark is reached on #bsfrom#.  This function returns the
224      total number of bytes copied.  Setting argument #size# to zero (the
225      default value) has a special meaning: the copying process will continue
226      until reaching the end-of-file mark on ByteStream #bsfrom#, regardless
227      of the number of bytes transferred.  */
228  size_t copy(ByteStream &bsfrom, size_t size=0);
229  /** Create a new #ByteStream# that copies the data from this #ByteStream#
230      starting from the current position, upto #size# bytes.  Setting the
231      #size# to zero means copy to the end-of-file mark. */
232  GP<ByteStream> duplicate(const size_t size=0) const;
233  /// Allows printf() type operations to a bytestream.
234  size_t format(const char *fmt, ... );
235  /// Allows scanf() type operations on a bytestream.
236  int scanf(const char *fmt, ... );
237  /** Writes the string as is, to the specified stream. */
238  size_t writestring(const GUTF8String &s);
239  /** Writes the string as is, to the specified stream. */
240  size_t writestring(const GNativeString &s);
241  /** Formats the message string, looks up the external representation
242      and writes it to the specified stream. */
243  void formatmessage( const char *fmt, ... );
244  /** Looks up the message and writes it to the specified stream. */
245  void writemessage( const char *message );
246  /** Writes a one-byte integer to a ByteStream. */
247  void write8 (unsigned int card8);
248  /** Writes a two-bytes integer to a ByteStream.
249      The integer most significant byte is written first,
250      regardless of the processor endianness. */
251  void write16(unsigned int card16);
252  /** Writes a three-bytes integer to a ByteStream.
253      The integer most significant byte is written first,
254      regardless of the processor endianness. */
255  void write24(unsigned int card24);
256  /** Writes a four-bytes integer to a ByteStream.
257      The integer most significant bytes are written first,
258      regardless of the processor endianness. */
259  void write32(unsigned int card32);
260  /** Reads a one-byte integer from a ByteStream. */
261  unsigned int read8 ();
262  /** Reads a two-bytes integer from a ByteStream.
263      The integer most significant byte is read first,
264      regardless of the processor endianness. */
265  unsigned int read16();
266  /** Reads a three-bytes integer from a ByteStream.
267      The integer most significant byte is read first,
268      regardless of the processor endianness. */
269  unsigned int read24();
270  /** Reads a four-bytes integer from a ByteStream.
271      The integer most significant bytes are read first,
272      regardless of the processor endianness. */
273  unsigned int read32();
274  /** Returns the total number of bytes contained in the buffer, file, etc.
275      Valid offsets for function #seek# range from 0 to the value returned
276      by this function. */
277  virtual int size(void) const;
278  /// Use at your own risk, only guarenteed to work for ByteStream::Memorys.
279  TArray<char> get_data(void);
280  /** Reads data from a random position. This function reads at most #sz#
281      bytes at position #pos# into #buffer# and returns the actual number of
282      bytes read.  The current position is unchanged. */
283  virtual size_t readat(void *buffer, size_t sz, int pos);
284  /// Returns false, unless a subclass of ByteStream::Static
285  virtual bool is_static(void) const { return false; }
286  //@}
287protected:
288  ByteStream(void) : cp(AUTO) {};
289private:
290  // Cancel C++ default stuff
291  ByteStream(const ByteStream &);
292  ByteStream & operator=(const ByteStream &);
293public:
294  /** Constructs an empty Memory ByteStream.  The buffer itself is organized
295      as an array of 4096 byte blocks.  The buffer is initially empty. You
296      must first use function #write# to store data into the buffer, use
297      function #seek# to rewind the current position, and function #read# to
298      read the data back. */
299  static GP<ByteStream> create(void);
300  /** Constructs a Memory ByteStream by copying initial data.  The
301      Memory buffer is initialized with #size# bytes copied from the
302      memory area pointed to by #buffer#. */
303  static GP<ByteStream> create(void const * const buffer, const size_t size);
304  /** Constructs a ByteStream for accessing the file named #url#.
305      Arguments #url# and #mode# are similar to the arguments of the well
306      known stdio function #fopen#. In addition a url of #-# will be
307      interpreted as the standard output or the standard input according to
308      #mode#.  This constructor will open a stdio file and construct a
309      ByteStream object accessing this file. Destroying the ByteStream object
310      will flush and close the associated stdio file.  Exception
311      \Ref{GException} is thrown with a plain text error message if the stdio
312      file cannot be opened. */
313  static GP<ByteStream> create(
314    const GURL &url, char const * const mode);
315  /** Same as the above, but uses stdin or stdout */
316  static GP<ByteStream> create( char const * const mode);
317
318  /** Constructs a ByteStream for accessing the stdio file #f#.
319      Argument #mode# indicates the type of the stdio file, as in the
320      well known stdio function #fopen#.  Destroying the ByteStream
321      object will not close the stdio file #f# unless closeme is true. */
322  static GP<ByteStream> create(
323    const int fd, char const * const mode, const bool closeme);
324
325  /** Constructs a ByteStream for accessing the stdio file #f#.
326      Argument #mode# indicates the type of the stdio file, as in the
327      well known stdio function #fopen#.  Destroying the ByteStream
328      object will not close the stdio file #f# unless closeme is true. */
329  static GP<ByteStream> create(
330    FILE * const f, char const * const mode, const bool closeme);
331  /** Creates a ByteStream object for allocating the memory area of
332      length #sz# starting at address #buffer#.  This call impliments
333      a read-only ByteStream interface for a memory area specified by
334      the user at construction time. Calls to function #read# directly
335      access this memory area.  The user must therefore make sure that its
336      content remain valid long enough.  */
337  static GP<ByteStream> create_static(void const *buffer, size_t size);
338 
339  /** Easy access to preallocated stdin/stdout/stderr bytestreams */
340  static GP<ByteStream> get_stdin(char const * mode=0);
341  static GP<ByteStream> get_stdout(char const * mode=0); 
342  static GP<ByteStream> get_stderr(char const * mode=0);
343
344  /** This is the conventional name for EOF exceptions */
345  static const char *EndOfFile;
346  /** Returns the contents of the file as a GNativeString */
347  GNativeString getAsNative(void);
348  /** Returns the contents of the file as a GUTF8String */
349  GUTF8String getAsUTF8(void);
350};
351
352inline size_t
353ByteStream::readat(void *buffer, size_t sz, int pos)
354{
355  size_t retval;
356  long tpos=tell();
357  seek(pos, SEEK_SET, true);
358  retval=readall(buffer,sz);
359  seek(tpos, SEEK_SET, true);
360  return retval;
361}
362
363inline int
364ByteStream::size(void) const
365{
366  ByteStream *bs=const_cast<ByteStream *>(this);
367  int bsize=(-1);
368  long pos=tell();
369  if(bs->seek(0,SEEK_END,true))
370  {
371    bsize=(int)tell();
372    (void)(bs->seek(pos,SEEK_SET,false));
373  }
374  return bsize;
375}
376
377/** ByteStream::Wrapper implements wrapping bytestream.  This is useful
378    for derived classes that take a GP<ByteStream> as a creation argument,
379    and the backwards compatible bytestreams.  */
380class ByteStream::Wrapper : public ByteStream
381{
382protected:
383  GP<ByteStream> gbs;
384  ByteStream *bs;
385  Wrapper(void) : bs(0) {}
386  Wrapper(const GP<ByteStream> &xbs) : gbs(xbs), bs(xbs) {}
387public:
388  ~Wrapper();
389  ByteStream * operator & () const {return bs;}
390  ByteStream * operator & () {return bs;}
391  virtual size_t read(void *buffer, size_t size)
392    { return bs->read(buffer,size); }
393  virtual size_t write(const void *buffer, size_t size)
394    { return bs->write(buffer,size); }
395  virtual long tell(void) const
396    { return bs->tell(); }
397  virtual int seek(long offset, int whence = SEEK_SET, bool nothrow=false)
398    { return bs->seek(offset,whence,nothrow); }
399  virtual void flush(void)
400    { bs->flush(); }
401};
402
403
404//@}
405
406// ------------ THE END
407
408#ifdef HAVE_NAMESPACES
409}
410# ifndef NOT_USING_DJVU_NAMESPACE
411using namespace DJVU;
412# endif
413#endif
414#endif
415
Note: See TracBrowser for help on using the repository browser.