source: trunk/libdjvu/BSByteStream.cpp @ 280

Last change on this file since 280 was 280, checked in by rbri, 11 years ago

DJVU plugin: djvulibre updated to version 3.5.22

File size: 13.1 KB
Line 
1//C-  -*- C++ -*-
2//C- -------------------------------------------------------------------
3//C- DjVuLibre-3.5
4//C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
5//C- Copyright (c) 2001  AT&T
6//C-
7//C- This software is subject to, and may be distributed under, the
8//C- GNU General Public License, either Version 2 of the license,
9//C- or (at your option) any later version. The license should have
10//C- accompanied the software or you may obtain a copy of the license
11//C- from the Free Software Foundation at http://www.fsf.org .
12//C-
13//C- This program is distributed in the hope that it will be useful,
14//C- but WITHOUT ANY WARRANTY; without even the implied warranty of
15//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16//C- GNU General Public License for more details.
17//C-
18//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
19//C- Lizardtech Software.  Lizardtech Software has authorized us to
20//C- replace the original DjVu(r) Reference Library notice by the following
21//C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
22//C-
23//C-  ------------------------------------------------------------------
24//C- | DjVu (r) Reference Library (v. 3.5)
25//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
26//C- | The DjVu Reference Library is protected by U.S. Pat. No.
27//C- | 6,058,214 and patents pending.
28//C- |
29//C- | This software is subject to, and may be distributed under, the
30//C- | GNU General Public License, either Version 2 of the license,
31//C- | or (at your option) any later version. The license should have
32//C- | accompanied the software or you may obtain a copy of the license
33//C- | from the Free Software Foundation at http://www.fsf.org .
34//C- |
35//C- | The computer code originally released by LizardTech under this
36//C- | license and unmodified by other parties is deemed "the LIZARDTECH
37//C- | ORIGINAL CODE."  Subject to any third party intellectual property
38//C- | claims, LizardTech grants recipient a worldwide, royalty-free,
39//C- | non-exclusive license to make, use, sell, or otherwise dispose of
40//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
41//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
42//C- | General Public License.   This grant only confers the right to
43//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
44//C- | the extent such infringement is reasonably necessary to enable
45//C- | recipient to make, have made, practice, sell, or otherwise dispose
46//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
47//C- | any greater extent that may be necessary to utilize further
48//C- | modifications or combinations.
49//C- |
50//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
51//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
52//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
53//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
54//C- +------------------------------------------------------------------
55//
56// $Id: BSByteStream.cpp,v 1.9 2007/03/25 20:48:29 leonb Exp $
57// $Name: release_3_5_22 $
58
59#ifdef HAVE_CONFIG_H
60# include "config.h"
61#endif
62#if NEED_GNUG_PRAGMAS
63# pragma implementation
64#endif
65
66// - Author: Leon Bottou, 07/1998
67
68#include <stdlib.h>
69#include <stdio.h>
70#include <string.h>
71#include "BSByteStream.h"
72#undef BSORT_TIMER
73#ifdef BSORT_TIMER
74#include "GOS.h"
75#endif
76
77
78#ifdef HAVE_NAMESPACES
79namespace DJVU {
80# ifdef NOT_DEFINED // Just to fool emacs c++ mode
81}
82#endif
83#endif
84
85class BSByteStream::Decode : public BSByteStream
86{
87public:
88  /** Creates a Static object for allocating the memory area of
89      length #sz# starting at address #buffer#. */
90  Decode(GP<ByteStream> bs);
91  ~Decode();
92  void init(void);
93  // Virtual functions
94  virtual size_t read(void *buffer, size_t sz);
95  virtual void flush(void);
96protected:
97  unsigned int decode(void);
98private:
99  bool eof;
100};
101
102// ========================================
103// --- Assertion
104
105#define ASSERT(expr) do{if(!(expr))G_THROW("assertion ("#expr") failed");}while(0)
106
107// ========================================
108// --- Construction
109
110BSByteStream::BSByteStream(GP<ByteStream> xbs)
111: offset(0), bptr(0), blocksize(0), size(0), bs(xbs),
112  gbs(xbs), gdata(data,0)
113{
114  // Initialize context array
115  memset(ctx, 0, sizeof(ctx));
116}
117
118BSByteStream::~BSByteStream() {}
119
120BSByteStream::Decode::Decode(GP<ByteStream> xbs)
121: BSByteStream(xbs), eof(false) {}
122
123void
124BSByteStream::Decode::init(void)
125{
126  gzp=ZPCodec::create(gbs,false,true);
127}
128
129BSByteStream::Decode::~Decode() {}
130
131GP<ByteStream>
132BSByteStream::create(GP<ByteStream> xbs)
133{
134  BSByteStream::Decode *rbs=new BSByteStream::Decode(xbs);
135  GP<ByteStream> retval=rbs;
136  rbs->init();
137  return retval;
138}
139
140void 
141BSByteStream::Decode::flush()
142{
143  size = bptr = 0;
144}
145
146// ========================================
147// -- Decoding
148
149
150static int 
151decode_raw(ZPCodec &zp, int bits)
152{
153  int n = 1;
154  const int m = (1<<bits);
155  while (n < m)
156    {
157      const int b = zp.decoder();
158      n = (n<<1) | b;
159    }
160  return n - m;
161}
162
163static inline int 
164decode_binary(ZPCodec &zp, BitContext *ctx, int bits)
165{
166  int n = 1;
167  int m = (1<<bits);
168  ctx = ctx - 1;
169  while (n < m)
170    {
171      int b = zp.decoder(ctx[n]);
172      n = (n<<1) | b;
173    }
174  return n - m;
175}
176
177
178static inline void
179assignmtf(unsigned char xmtf[256])
180{
181  static const unsigned char mtf[256]={
182    0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
183    0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
184    0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
185    0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
186    0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
187    0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
188    0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
189    0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
190    0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
191    0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
192    0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
193    0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
194    0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
195    0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
196    0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
197    0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
198    0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
199    0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
200    0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
201    0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
202    0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
203    0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
204    0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
205    0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
206    0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
207    0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
208    0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
209    0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
210    0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
211    0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
212    0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
213    0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF};
214  memcpy(xmtf,mtf,sizeof(mtf));
215}
216 
217unsigned int
218BSByteStream::Decode::decode(void)
219{
220  /////////////////////////////////
221  ////////////  Decode input stream
222 
223  int i;
224  // Decode block size
225  ZPCodec &zp=*gzp;
226  size = decode_raw(zp, 24);
227  if (!size)
228    return 0;
229  if (size>MAXBLOCK*1024)
230    G_THROW( ERR_MSG("ByteStream.corrupt") );
231  // Allocate
232  if ((int)blocksize < size)
233    {
234      blocksize = size;
235      if (data)
236      {
237        gdata.resize(0);
238      }
239    }
240  if (! data) 
241    gdata.resize(blocksize);
242  // Decode Estimation Speed
243  int fshift = 0;
244  if (zp.decoder())
245    {
246      fshift += 1;
247      if (zp.decoder())
248        fshift += 1;
249    }
250  // Prepare Quasi MTF
251  static const unsigned char xmtf[256]={
252    0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
253    0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
254    0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
255    0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
256    0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
257    0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
258    0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
259    0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
260    0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
261    0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
262    0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
263    0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
264    0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
265    0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
266    0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
267    0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
268    0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
269    0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
270    0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
271    0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
272    0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
273    0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
274    0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
275    0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
276    0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
277    0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
278    0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
279    0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
280    0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
281    0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
282    0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
283    0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF};
284  unsigned char mtf[256];
285  memcpy(mtf,xmtf,sizeof(xmtf));
286  unsigned int freq[FREQMAX];
287  memset(freq,0,sizeof(freq));
288  int fadd = 4;
289  // Decode
290  int mtfno = 3;
291  int markerpos = -1;
292  for (i=0; i<size; i++)
293    {
294      int ctxid = CTXIDS-1;
295      if (ctxid>mtfno) ctxid=mtfno;
296      BitContext *cx = ctx;
297      if (zp.decoder(cx[ctxid]))
298        { mtfno=0; data[i]=mtf[mtfno]; goto rotate; }
299      cx+=CTXIDS;
300      if (zp.decoder(cx[ctxid]))
301        { mtfno=1; data[i]=mtf[mtfno]; goto rotate; } 
302      cx+=CTXIDS;
303      if (zp.decoder(cx[0]))
304        { mtfno=2+decode_binary(zp,cx+1,1); data[i]=mtf[mtfno]; goto rotate; } 
305      cx+=1+1;
306      if (zp.decoder(cx[0]))
307        { mtfno=4+decode_binary(zp,cx+1,2); data[i]=mtf[mtfno]; goto rotate; } 
308      cx+=1+3;
309      if (zp.decoder(cx[0]))
310        { mtfno=8+decode_binary(zp,cx+1,3); data[i]=mtf[mtfno]; goto rotate; } 
311      cx+=1+7;
312      if (zp.decoder(cx[0]))
313        { mtfno=16+decode_binary(zp,cx+1,4); data[i]=mtf[mtfno]; goto rotate; } 
314      cx+=1+15;
315      if (zp.decoder(cx[0]))
316        { mtfno=32+decode_binary(zp,cx+1,5); data[i]=mtf[mtfno]; goto rotate; } 
317      cx+=1+31;
318      if (zp.decoder(cx[0]))
319        { mtfno=64+decode_binary(zp,cx+1,6); data[i]=mtf[mtfno]; goto rotate; } 
320      cx+=1+63;
321      if (zp.decoder(cx[0]))
322        { mtfno=128+decode_binary(zp,cx+1,7); data[i]=mtf[mtfno]; goto rotate; } 
323      mtfno=256;
324      data[i]=0;
325      markerpos=i;
326      continue;
327      // Rotate mtf according to empirical frequencies (new!)
328    rotate:
329      // Adjust frequencies for overflow
330      int k;
331      fadd = fadd + (fadd>>fshift);
332      if (fadd > 0x10000000) 
333        {
334          fadd    >>= 24;
335          freq[0] >>= 24;
336          freq[1] >>= 24;
337          freq[2] >>= 24;
338          freq[3] >>= 24;
339          for (k=4; k<FREQMAX; k++)
340            freq[k] = freq[k]>>24;
341        }
342      // Relocate new char according to new freq
343      unsigned int fc = fadd;
344      if (mtfno < FREQMAX)
345        fc += freq[mtfno];
346      for (k=mtfno; k>=FREQMAX; k--) 
347        mtf[k] = mtf[k-1];
348      for (; k>0 && fc>=freq[k-1]; k--)
349        {
350          mtf[k] = mtf[k-1];
351          freq[k] = freq[k-1];
352        }
353      mtf[k] = data[i];
354      freq[k] = fc;
355    }
356 
357
358  /////////////////////////////////
359  ////////// Reconstruct the string
360 
361  if (markerpos<1 || markerpos>=size)
362    G_THROW( ERR_MSG("ByteStream.corrupt") );
363  // Allocate pointers
364  unsigned int *posn;
365  GPBuffer<unsigned int> gposn(posn,blocksize);
366  memset(posn, 0, sizeof(unsigned int)*size);
367  // Prepare count buffer
368  int count[256];
369  for (i=0; i<256; i++)
370    count[i] = 0;
371  // Fill count buffer
372  for (i=0; i<markerpos; i++) 
373    {
374      unsigned char c = data[i];
375      posn[i] = (c<<24) | (count[c] & 0xffffff);
376      count[c] += 1;
377    }
378  for (i=markerpos+1; i<size; i++)
379    {
380      unsigned char c = data[i];
381      posn[i] = (c<<24) | (count[c] & 0xffffff);
382      count[c] += 1;
383    }
384  // Compute sorted char positions
385  int last = 1;
386  for (i=0; i<256; i++)
387    {
388      int tmp = count[i];
389      count[i] = last;
390      last += tmp;
391    }
392  // Undo the sort transform
393  i = 0;
394  last = size-1;
395  while (last>0)
396    {
397      unsigned int n = posn[i];
398      unsigned char c = (posn[i]>>24);
399      data[--last] = c;
400      i = count[c] + (n & 0xffffff);
401    }
402  // Free and check
403  if (i != markerpos)
404    G_THROW( ERR_MSG("ByteStream.corrupt") );
405  return size;
406}
407
408
409
410// ========================================
411// -- ByteStream interface
412
413
414
415long 
416BSByteStream::tell() const
417{
418  return offset;
419}
420
421size_t 
422BSByteStream::Decode::read(void *buffer, size_t sz)
423{
424  if (eof)
425    return 0;
426  // Loop
427  int copied = 0;
428  while (sz>0 && !eof)
429    {
430      // Decode if needed
431      if (!size)
432        {
433          bptr = 0;
434          if (! decode()) 
435          {
436            size = 1 ;
437            eof = true;
438          }
439          size -= 1;
440        }
441      // Compute remaining
442      int bytes = size;
443      if (bytes > (int)sz)
444        bytes = sz;
445      // Transfer
446      if (buffer && bytes)
447        {
448          memcpy(buffer, data+bptr, bytes);
449          buffer = (void*)((char*)buffer + bytes);
450        }
451      size -= bytes;
452      bptr += bytes;
453      sz -= bytes;
454      copied += bytes;
455      offset += bytes;
456    }
457  // Return copied bytes
458  return copied;
459}
460
461
462#ifdef HAVE_NAMESPACES
463}
464# ifndef NOT_USING_DJVU_NAMESPACE
465using namespace DJVU;
466# endif
467#endif
Note: See TracBrowser for help on using the repository browser.