1 | //C- -*- C++ -*- |
---|
2 | //C- ------------------------------------------------------------------- |
---|
3 | //C- DjVuLibre-3.5 |
---|
4 | //C- Copyright (c) 2002 Leon Bottou and Yann Le Cun. |
---|
5 | //C- Copyright (c) 2001 AT&T |
---|
6 | //C- |
---|
7 | //C- This software is subject to, and may be distributed under, the |
---|
8 | //C- GNU General Public License, either Version 2 of the license, |
---|
9 | //C- or (at your option) any later version. The license should have |
---|
10 | //C- accompanied the software or you may obtain a copy of the license |
---|
11 | //C- from the Free Software Foundation at http://www.fsf.org . |
---|
12 | //C- |
---|
13 | //C- This program is distributed in the hope that it will be useful, |
---|
14 | //C- but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
15 | //C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
16 | //C- GNU General Public License for more details. |
---|
17 | //C- |
---|
18 | //C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from |
---|
19 | //C- Lizardtech Software. Lizardtech Software has authorized us to |
---|
20 | //C- replace the original DjVu(r) Reference Library notice by the following |
---|
21 | //C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu): |
---|
22 | //C- |
---|
23 | //C- ------------------------------------------------------------------ |
---|
24 | //C- | DjVu (r) Reference Library (v. 3.5) |
---|
25 | //C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved. |
---|
26 | //C- | The DjVu Reference Library is protected by U.S. Pat. No. |
---|
27 | //C- | 6,058,214 and patents pending. |
---|
28 | //C- | |
---|
29 | //C- | This software is subject to, and may be distributed under, the |
---|
30 | //C- | GNU General Public License, either Version 2 of the license, |
---|
31 | //C- | or (at your option) any later version. The license should have |
---|
32 | //C- | accompanied the software or you may obtain a copy of the license |
---|
33 | //C- | from the Free Software Foundation at http://www.fsf.org . |
---|
34 | //C- | |
---|
35 | //C- | The computer code originally released by LizardTech under this |
---|
36 | //C- | license and unmodified by other parties is deemed "the LIZARDTECH |
---|
37 | //C- | ORIGINAL CODE." Subject to any third party intellectual property |
---|
38 | //C- | claims, LizardTech grants recipient a worldwide, royalty-free, |
---|
39 | //C- | non-exclusive license to make, use, sell, or otherwise dispose of |
---|
40 | //C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the |
---|
41 | //C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU |
---|
42 | //C- | General Public License. This grant only confers the right to |
---|
43 | //C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to |
---|
44 | //C- | the extent such infringement is reasonably necessary to enable |
---|
45 | //C- | recipient to make, have made, practice, sell, or otherwise dispose |
---|
46 | //C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to |
---|
47 | //C- | any greater extent that may be necessary to utilize further |
---|
48 | //C- | modifications or combinations. |
---|
49 | //C- | |
---|
50 | //C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY |
---|
51 | //C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED |
---|
52 | //C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF |
---|
53 | //C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. |
---|
54 | //C- +------------------------------------------------------------------ |
---|
55 | // |
---|
56 | // $Id: ZPCodec.h,v 1.10 2007/03/25 20:48:35 leonb Exp $ |
---|
57 | // $Name: release_3_5_22 $ |
---|
58 | |
---|
59 | #ifndef _ZPCODEC_H |
---|
60 | #define _ZPCODEC_H |
---|
61 | #ifdef HAVE_CONFIG_H |
---|
62 | #include "config.h" |
---|
63 | #endif |
---|
64 | #if NEED_GNUG_PRAGMAS |
---|
65 | # pragma interface |
---|
66 | #endif |
---|
67 | |
---|
68 | // From: Leon Bottou, 1/31/2002 |
---|
69 | // Almost equal to my initial code. |
---|
70 | |
---|
71 | #include "GContainer.h" |
---|
72 | |
---|
73 | #ifdef HAVE_NAMESPACES |
---|
74 | namespace DJVU { |
---|
75 | # ifdef NOT_DEFINED // Just to fool emacs c++ mode |
---|
76 | } |
---|
77 | #endif |
---|
78 | #endif |
---|
79 | |
---|
80 | class ByteStream; |
---|
81 | |
---|
82 | |
---|
83 | |
---|
84 | /** @name ZPCodec.h |
---|
85 | |
---|
86 | Files #"ZPCodec.h"# and #"ZPCodec.cpp"# implement a fast binary adaptive |
---|
87 | quasi-arithmetic coder named ZP-Coder. Because of its speed and |
---|
88 | convenience, the ZP-Coder is used in several parts of the DjVu reference |
---|
89 | library (See \Ref{BSByteStream.h}, \Ref{JB2Image.h}, \Ref{IW44Image.h}). |
---|
90 | The following comments avoid the theory (see the historical remarks for |
---|
91 | useful pointers) and concentrate on the user perspective on the ZP-Coder. |
---|
92 | |
---|
93 | {\bf Introduction} --- |
---|
94 | Encoding consists of transforming a sequence of {\em message bits} into a |
---|
95 | sequence of {\em code bits}. Decoding consists of retrieving the message |
---|
96 | bits using only the code bits. We can make the code smaller than the |
---|
97 | message as soon as we can predict a message bit on the basis of a {\em |
---|
98 | coding context} composed of previously encoded or decoded bits. If the |
---|
99 | prediction is always correct, we do not even need to encode the message |
---|
100 | bit. If the prediction is totally unreliable, we need to generate one code |
---|
101 | bit in order to unambiguously specify the message bit. In other words, |
---|
102 | the more reliable the prediction, the more compression we get. |
---|
103 | |
---|
104 | The ZP-Coder handles prediction by means of {\em context variables} (see |
---|
105 | \Ref{BitContext}). There must be a context variable for each possible |
---|
106 | combination of context bits. Both the encoder and the decoder use same |
---|
107 | context variable for coding each message bit. For instance, we can code a |
---|
108 | binary image by successively coding all the pixels (the message bits) in |
---|
109 | row and column order. It is reasonable to assume that each pixel can be |
---|
110 | reasonably well predicted by looking at a few (say 10) neighboring pixels |
---|
111 | located above and to the left of the current pixel. Since these 10 pixels |
---|
112 | make 1024 combinations, we need 1024 context variables. Each pixel is |
---|
113 | encoded using the context variable corresponding to the values of the 10 |
---|
114 | neighboring pixels. Each pixel will be decoded by specifying the same |
---|
115 | context variable corresponding to the values of these 10 pixels. This is |
---|
116 | possible because these 10 pixels (located above and to the left) have |
---|
117 | already been decoded and therefore are known by the decoder program. |
---|
118 | |
---|
119 | The context variables are initially set to zero, which mean that we do not |
---|
120 | know yet how to predict the current message bit on the basis of the |
---|
121 | context bits. While coding the message bits, the ZP-Coder automatically |
---|
122 | estimates the frequencies of #0#s and #1#s coded using each context |
---|
123 | variable. These frequencies actually provide a prediction (the most |
---|
124 | probable bit value) and an estimation of the prediction reliability (how |
---|
125 | often the prediction was correct in the past). All this statistical |
---|
126 | information is stored into the context variable after coding each bit. In |
---|
127 | other words, the more we code bits within a particular context, the better |
---|
128 | the ZP-Coder adapts its prediction model, and the more compression we can |
---|
129 | obtain. |
---|
130 | |
---|
131 | All this adaptation works indeed because both the encoder program and the |
---|
132 | decoder program are always synchronized. Both the encoder and the decoder |
---|
133 | see the same message bits encoded (or decoded) with the same context |
---|
134 | variables. Both the encoder and the decoder apply the same rules to |
---|
135 | update the context variables and improve the predictors. Both the encoder |
---|
136 | and the decoder programs use the same predictors for any given message |
---|
137 | bit. The decoder could not work if this was not the case. |
---|
138 | |
---|
139 | Just before encoding a message bit, all the context variables in the |
---|
140 | encoder program contain certain values. Just before decoding this message |
---|
141 | bit, all the context variables in the decoder program must contain the same |
---|
142 | values as for the encoder program. This is guaranteed as long as |
---|
143 | each prediction only depends on already coded bits: {\em the coding context, |
---|
144 | on which the each prediction is based, must be composed of message bits which |
---|
145 | have already been coded. } |
---|
146 | |
---|
147 | {\bf Usage} --- |
---|
148 | Once you know how to organize the predictions (i.e. which coding context |
---|
149 | to use, how many context variables to initialize, etc.), using the |
---|
150 | ZP-Coder is straightforward (see \Ref{ZPCodec Examples}): |
---|
151 | \begin{itemize} |
---|
152 | \item The {\em encoder program} allocates context variables and |
---|
153 | initializes them to zero. It then constructs a \Ref{ZPCodec} object for |
---|
154 | encoding. For each message bit, the encoder program retrieves the context |
---|
155 | bits, selects a context variable on the basis of the context bits and |
---|
156 | calls member function \Ref{ZPCodec::encoder} with the message bit and a |
---|
157 | reference to the context variable. |
---|
158 | \item The {\em decoder program} allocates context variables and |
---|
159 | initializes them to zero. It then constructs a \Ref{ZPCodec} object for |
---|
160 | decoding. For each message bit, the decoder program retrieves the context |
---|
161 | bits, selects a context variable on the basis of the context bits and |
---|
162 | calls member function \Ref{ZPCodec::decoder} with a reference to the |
---|
163 | context variable. This function returns the message bit. |
---|
164 | \end{itemize} |
---|
165 | Functions #encoder# and #decoder# only require a few machine cycles to |
---|
166 | perform two essential tasks, namely {\em coding} and {\em context |
---|
167 | adaptation}. Function #decoder# often returns after two arithmetic |
---|
168 | operations only. To make your program fast, you just need to feed message |
---|
169 | bits and context variables fast enough. |
---|
170 | |
---|
171 | {\bf History} --- The ZP-Coder is similar in function and performance to |
---|
172 | the seminal Q-Coder (Pennebaker, Mitchell, Langdon, Arps, IBM J. Res |
---|
173 | Dev. 32, 1988). An improved version of the Q-Coder, named QM-Coder, has |
---|
174 | been described in certain parts of the JPEG standard. Unfortunate patent |
---|
175 | policies have made these coders very difficult to use in general purpose |
---|
176 | applications. The Z-Coder is constructed using a new approach based on an |
---|
177 | extension of the Golomb codes (Bottou, Howard, Bengio, IEEE DCC 98, 1998 |
---|
178 | \URL[DjVu]{http://www.research.att.com/~leonb/DJVU/bottou-howard-bengio/} |
---|
179 | \URL[PostScript]{http://www.research.att.com/~leonb/PS/bottou-howard-bengio.ps.gz}) |
---|
180 | This new approach does not infringe the QM-Coder patents. Unfortunately |
---|
181 | the Z-Coder is dangerously close to the patented Arithmetic MEL Coder. |
---|
182 | Therefore we wrote the ZP-Coder (pronounce Zee-Prime Coder) which we |
---|
183 | believe is clear of legal problems. Needless to say, AT&T has patents |
---|
184 | pending for both the Z-Coder and the ZP-Coder, licenced to LizardTech. |
---|
185 | The good news however is that we can grant a license to use the ZP-Coder |
---|
186 | in ``free software'' without further complication. See the Copyright |
---|
187 | for more information. |
---|
188 | |
---|
189 | @memo |
---|
190 | Binary adaptive quasi-arithmetic coder. |
---|
191 | @version |
---|
192 | #$Id: ZPCodec.h,v 1.10 2007/03/25 20:48:35 leonb Exp $# |
---|
193 | @author |
---|
194 | L\'eon Bottou <leonb@research.att.com> */ |
---|
195 | //@{ |
---|
196 | |
---|
197 | |
---|
198 | /** Context variable. |
---|
199 | Variables of type #BitContext# hold a single byte describing how to encode |
---|
200 | or decode message bits with similar statistical properties. This single |
---|
201 | byte simultaneously represents the current estimate of the bit probability |
---|
202 | distribution (which is determined by the frequencies of #1#s and #0#s |
---|
203 | already coded with this context) and the confidence in this estimate |
---|
204 | (which determines how fast the estimate can change.) |
---|
205 | |
---|
206 | A coding program typically allocates hundreds of context variables. Each |
---|
207 | coding context is initialized to zero before encoding or decoding. Value |
---|
208 | zero represents equal probabilities for #1#s and #0#s with a minimal |
---|
209 | confidence and therefore a maximum adaptation speed. Each message bit is |
---|
210 | encoded using a coding context determined as a function of previously |
---|
211 | encoded message bits. The decoder therefore can examine the previously |
---|
212 | decoded message bits and decode the current bit using the same context as |
---|
213 | the encoder. This is critical for proper decoding. |
---|
214 | */ |
---|
215 | typedef unsigned char BitContext; |
---|
216 | |
---|
217 | |
---|
218 | /** Performs ZP-Coder encoding and decoding. A ZPCodec object must either |
---|
219 | constructed for encoding or for decoding. The ZPCodec object is connected |
---|
220 | with a \Ref{ByteStream} object specified at construction time. A ZPCodec |
---|
221 | object constructed for decoding reads code bits from the ByteStream and |
---|
222 | returns a message bit whenever function \Ref{decoder} is called. A |
---|
223 | ZPCodec constructed for encoding processes the message bits provided by |
---|
224 | function \Ref{encoder} and writes the corresponding code bits to |
---|
225 | ByteStream #bs#. |
---|
226 | |
---|
227 | You should never directly access a ByteStream object connected to a valid |
---|
228 | ZPCodec object. The most direct way to access the ByteStream object |
---|
229 | consists of using the "pass-thru" versions of functions \Ref{encoder} and |
---|
230 | \Ref{decoder}. |
---|
231 | |
---|
232 | The ByteStream object can be accessed again after the destruction of the |
---|
233 | ZPCodec object. Note that the encoder always flushes its internal buffers |
---|
234 | and writes a few final code bytes when the ZPCodec object is destroyed. |
---|
235 | Note also that the decoder often reads a few bytes beyond the last code byte |
---|
236 | written by the encoder. This lag means that you must reposition the |
---|
237 | ByteStream after the destruction of the ZPCodec object and before re-using |
---|
238 | the ByteStream object (see \Ref{IFFByteStream}.) |
---|
239 | |
---|
240 | Please note also that the decoder has no way to reliably indicate the end |
---|
241 | of the message bit sequence. The content of the message must be designed |
---|
242 | in a way which indicates when to stop decoding. Simple ways to achieve |
---|
243 | this consists of announcing the message length at the beginning (like a |
---|
244 | pascal style string), or of defining a termination code (like a null |
---|
245 | terminated string). */ |
---|
246 | |
---|
247 | class ZPCodec : public GPEnabled { |
---|
248 | protected: |
---|
249 | ZPCodec (GP<ByteStream> gbs, const bool encoding, const bool djvucompat=false); |
---|
250 | public: |
---|
251 | class Encode; |
---|
252 | class Decode; |
---|
253 | |
---|
254 | /// Non-virtual destructor. |
---|
255 | ~ZPCodec(); |
---|
256 | /** Constructs a ZP-Coder. If argument #encoding# is zero, the ZP-Coder |
---|
257 | object will read code bits from the ByteStream #bs# and return a message |
---|
258 | bit whenever function #decoder# is called. If flag #encoding# is set |
---|
259 | the ZP-Coder object will process the message bits provided by function |
---|
260 | #encoder# and write code bits to ByteStream #bs#. Optional flag |
---|
261 | #djvucompat# selects a slightly less efficient adaptation table which is |
---|
262 | used by the DjVu project. This is required in order to ensure the |
---|
263 | bitstream compatibility. You should not use this flag unless you want |
---|
264 | to decode JB2, IW44 or BZZ encoded data. */ |
---|
265 | static GP<ZPCodec> create( |
---|
266 | GP<ByteStream> gbs, const bool encoding, const bool djvucompat=false); |
---|
267 | |
---|
268 | /** Encodes bit #bit# using context variable #ctx#. Argument #bit# must be |
---|
269 | #0# or #1#. This function should only be used with ZP-Coder objects |
---|
270 | created for encoding. It may modify the contents of variable #ctx# in |
---|
271 | order to perform context adaptation. */ |
---|
272 | void encoder(int bit, BitContext &ctx); |
---|
273 | |
---|
274 | /** Decodes a bit using context variable #ctx#. This function should only be |
---|
275 | used with ZP-Coder objects created for decoding. It may modify the |
---|
276 | contents of variable #ctx# in order to perform context adaptation. */ |
---|
277 | int decoder(BitContext &ctx); |
---|
278 | |
---|
279 | /** Encodes bit #bit# without compression (pass-thru encoder). Argument |
---|
280 | #bit# must be #0# or #1#. No compression will be applied. Calling this |
---|
281 | function always increases the length of the code bit sequence by one |
---|
282 | bit. */ |
---|
283 | void encoder(int bit); |
---|
284 | |
---|
285 | /** Decodes a bit without compression (pass-thru decoder). This function |
---|
286 | retrieves bits encoded with the pass-thru encoder. */ |
---|
287 | int decoder(void); |
---|
288 | #ifdef ZPCODEC_BITCOUNT |
---|
289 | /** Counter for code bits (requires #-DZPCODEC_BITCOUNT#). This member |
---|
290 | variable is available when the ZP-Coder is compiled with option |
---|
291 | #-DZPCODEC_BITCOUNT#. Variable #bitcount# counts the number of code |
---|
292 | bits processed by the coder since the construction of the object. This |
---|
293 | variable can be used to evaluate how many code bits are spent on various |
---|
294 | components of the message. */ |
---|
295 | int bitcount; |
---|
296 | #endif |
---|
297 | // Table management (advanced stuff) |
---|
298 | struct Table { |
---|
299 | unsigned short p; |
---|
300 | unsigned short m; |
---|
301 | BitContext up; |
---|
302 | BitContext dn; |
---|
303 | }; |
---|
304 | void newtable(ZPCodec::Table *table); |
---|
305 | BitContext state(float prob1); |
---|
306 | // Non-adaptive encoder/decoder |
---|
307 | void encoder_nolearn(int pix, BitContext &ctx); |
---|
308 | int decoder_nolearn(BitContext &ctx); |
---|
309 | inline int IWdecoder(void); |
---|
310 | inline void IWencoder(const bool bit); |
---|
311 | protected: |
---|
312 | // coder status |
---|
313 | GP<ByteStream> gbs; // Where the data goes/comes from |
---|
314 | ByteStream *bs; // Where the data goes/comes from |
---|
315 | const bool encoding; // Direction (0=decoding, 1=encoding) |
---|
316 | unsigned char byte; |
---|
317 | unsigned char scount; |
---|
318 | unsigned char delay; |
---|
319 | unsigned int a; |
---|
320 | unsigned int code; |
---|
321 | unsigned int fence; |
---|
322 | unsigned int subend; |
---|
323 | unsigned int buffer; |
---|
324 | unsigned int nrun; |
---|
325 | // table |
---|
326 | unsigned int p[256]; |
---|
327 | unsigned int m[256]; |
---|
328 | BitContext up[256]; |
---|
329 | BitContext dn[256]; |
---|
330 | // machine independent ffz |
---|
331 | char ffzt[256]; |
---|
332 | // encoder private |
---|
333 | void einit (void); |
---|
334 | void eflush (void); |
---|
335 | void outbit(int bit); |
---|
336 | void zemit(int b); |
---|
337 | void encode_mps(BitContext &ctx, unsigned int z); |
---|
338 | void encode_lps(BitContext &ctx, unsigned int z); |
---|
339 | void encode_mps_simple(unsigned int z); |
---|
340 | void encode_lps_simple(unsigned int z); |
---|
341 | void encode_mps_nolearn(unsigned int z); |
---|
342 | void encode_lps_nolearn(unsigned int z); |
---|
343 | // decoder private |
---|
344 | void dinit(void); |
---|
345 | void preload(void); |
---|
346 | int ffz(unsigned int x); |
---|
347 | int decode_sub(BitContext &ctx, unsigned int z); |
---|
348 | int decode_sub_simple(int mps, unsigned int z); |
---|
349 | int decode_sub_nolearn(int mps, unsigned int z); |
---|
350 | private: |
---|
351 | // no copy allowed (hate c++) |
---|
352 | ZPCodec(const ZPCodec&); |
---|
353 | ZPCodec& operator=(const ZPCodec&); |
---|
354 | #ifdef ZPCODEC_FRIEND |
---|
355 | friend ZPCODEC_FRIEND; |
---|
356 | #endif |
---|
357 | }; |
---|
358 | |
---|
359 | |
---|
360 | |
---|
361 | |
---|
362 | |
---|
363 | |
---|
364 | // INLINE CODE |
---|
365 | |
---|
366 | inline void |
---|
367 | ZPCodec::encoder(int bit, BitContext &ctx) |
---|
368 | { |
---|
369 | unsigned int z = a + p[ctx]; |
---|
370 | if (bit != (ctx & 1)) |
---|
371 | { |
---|
372 | encode_lps(ctx, z); |
---|
373 | }else if (z >= 0x8000) |
---|
374 | { |
---|
375 | encode_mps(ctx, z); |
---|
376 | }else |
---|
377 | { |
---|
378 | a = z; |
---|
379 | } |
---|
380 | } |
---|
381 | |
---|
382 | inline int |
---|
383 | ZPCodec::IWdecoder(void) |
---|
384 | { |
---|
385 | return decode_sub_simple(0,0x8000 + ((a+a+a) >> 3)); |
---|
386 | } |
---|
387 | |
---|
388 | inline int |
---|
389 | ZPCodec::decoder(BitContext &ctx) |
---|
390 | { |
---|
391 | unsigned int z = a + p[ctx]; |
---|
392 | if (z <= fence) |
---|
393 | { a = z; return (ctx&1); } |
---|
394 | return decode_sub(ctx, z); |
---|
395 | } |
---|
396 | |
---|
397 | inline void |
---|
398 | ZPCodec::encoder_nolearn(int bit, BitContext &ctx) |
---|
399 | { |
---|
400 | unsigned int z = a + p[ctx]; |
---|
401 | if (bit != (ctx & 1)) |
---|
402 | encode_lps_nolearn(z); |
---|
403 | else if (z >= 0x8000) |
---|
404 | encode_mps_nolearn(z); |
---|
405 | else |
---|
406 | a = z; |
---|
407 | } |
---|
408 | |
---|
409 | inline int |
---|
410 | ZPCodec::decoder_nolearn(BitContext &ctx) |
---|
411 | { |
---|
412 | unsigned int z = a + p[ctx]; |
---|
413 | if (z <= fence) |
---|
414 | { a = z; return (ctx&1); } |
---|
415 | return decode_sub_nolearn( (ctx&1), z); |
---|
416 | } |
---|
417 | |
---|
418 | inline void |
---|
419 | ZPCodec::encoder(int bit) |
---|
420 | { |
---|
421 | if (bit) |
---|
422 | encode_lps_simple(0x8000 + (a>>1)); |
---|
423 | else |
---|
424 | encode_mps_simple(0x8000 + (a>>1)); |
---|
425 | } |
---|
426 | |
---|
427 | inline int |
---|
428 | ZPCodec::decoder(void) |
---|
429 | { |
---|
430 | return decode_sub_simple(0, 0x8000 + (a>>1)); |
---|
431 | } |
---|
432 | |
---|
433 | inline void |
---|
434 | ZPCodec::IWencoder(const bool bit) |
---|
435 | { |
---|
436 | const int z = 0x8000 + ((a+a+a) >> 3); |
---|
437 | if (bit) |
---|
438 | { |
---|
439 | encode_lps_simple(z); |
---|
440 | }else |
---|
441 | { |
---|
442 | encode_mps_simple(z); |
---|
443 | } |
---|
444 | } |
---|
445 | |
---|
446 | // ------------ ADDITIONAL DOCUMENTATION |
---|
447 | |
---|
448 | /** @name ZPCodec Examples |
---|
449 | |
---|
450 | Binary adaptive coders are efficient and very flexible. Unfortunate |
---|
451 | intellectual property issues however have limited their popularity. As a |
---|
452 | consequence, few programmers have a direct experience of using such a |
---|
453 | coding device. The few examples provided in this section demonstrate how |
---|
454 | we think the ZP-Coder should be used. |
---|
455 | |
---|
456 | {\bf Encoding Multivalued Symbols} --- |
---|
457 | Since the ZP-Coder is a strictly binary coder, every message must be |
---|
458 | reduced to a sequence of bits (#0#s or #1#s). It is often convenient to |
---|
459 | consider that a message is a sequence of symbols taking more than two |
---|
460 | values. For instance, a character string may be a sequence of bytes, and |
---|
461 | each byte can take 256 values. Each byte of course is composed of eight |
---|
462 | bits that we can encode in sequence. The real issue however consists of |
---|
463 | deciding how we will use context variables in order to let the ZP-Coder |
---|
464 | learn the probability distribution of the byte values. |
---|
465 | |
---|
466 | The most significant bit #b0# decides whether the byte is in range 0..127 |
---|
467 | or in range 128..255. We let the ZP-Coder learn how to predict this bit |
---|
468 | by allocating one context variable for it. The second most significant |
---|
469 | byte #b1# has two distinct meanings depending of bit #b0#. If bit #b0# is |
---|
470 | #0#, bit #b1# decides whether the byte is in range 0..63 or 64..127. If |
---|
471 | bit #b0# is #1#, bit #b1# decides whether the byte is in range 128..191 or |
---|
472 | 192..255. The prediction for bit #b1# must therefore depend on the value |
---|
473 | of #b0#. This is why we will allocate two context variables for this bit. |
---|
474 | If bit #b0# is #0#, we will use the first variable; if bit #b0# is #1#, we |
---|
475 | will use the second variable. The next bit #b2# has four meanings and |
---|
476 | therefore we will use four context variables, etc. This analysis leads to |
---|
477 | a total of #1+2+4+...+128# = #255# context variables for encoding one |
---|
478 | byte. This encoding procedure can be understood as a binary decision |
---|
479 | tree with a dedicated context variable for predicting each decision. |
---|
480 | \begin{verbatim} |
---|
481 | [>=128]----n---[>=64?]----n----[>31?] ... |
---|
482 | \ `---y----[>95?] ... |
---|
483 | \ |
---|
484 | `--y---[>=192?]----n---[>=160?] ... |
---|
485 | `---y---[>=224?] ... |
---|
486 | \end{verbatim} |
---|
487 | The following decoding function illustrates a very compact way to |
---|
488 | implement such a decision tree. Argument #ctx# points to an array of 255 |
---|
489 | #BitContext# variables. Macro #REPEAT8# is a shorthand notation for eight |
---|
490 | repetitions of its argument. |
---|
491 | \begin{verbatim} |
---|
492 | int decode_8_bits(ZPCodec &zp, BitContext *ctx ) |
---|
493 | { |
---|
494 | int n = 1; |
---|
495 | REPEAT8( { n = (n<<1) | (zp.decoder(ctx[n-1])); } ); |
---|
496 | return n & 0xff; |
---|
497 | } |
---|
498 | \end{verbatim} |
---|
499 | The binary representation of variable #n# is always composed of a #1# |
---|
500 | followed by whichever bits have been decoded so far. This extra bit #1# in |
---|
501 | fact is a nice trick to flatten out the tree structure and directly |
---|
502 | address the array of context variables. Bit #b0# is decoded using the |
---|
503 | first context variable since #n# is initially #1#. Bit #b1# is decoded |
---|
504 | using one of the next two variables in the array, since #n# is either #2# |
---|
505 | (#10# in binary) or #3# (#11# in binary). Bit #b2# will be decoded using |
---|
506 | one of the next four variables, since #n# ranges from #4# (#100# in |
---|
507 | binary) to #7# (#111# in binary). The final result is given by removing |
---|
508 | the extra #1# in variable #n#. |
---|
509 | |
---|
510 | The corresponding encoding function is almost as compact. Argument #ctx# |
---|
511 | again is an array of 255 #BitContext# variables. Each bit of byte #x# is |
---|
512 | encoded and shifted into variable #n# as in the decoding function. |
---|
513 | Variable #x# in fact contains the bits to be encoded. Variable #n# |
---|
514 | contains a #1# followed by the already encoded bits. |
---|
515 | \begin{verbatim} |
---|
516 | void encode_8_bits(ZPCodec &zp, int x, BitContext *ctx ) |
---|
517 | { |
---|
518 | int n = 1; |
---|
519 | REPEAT8( { int b=((x&0x80)?1:0); x=(x<<1); |
---|
520 | zp.encoder(b,ctx[n-1]); n=(n<<1)|(b); } ); |
---|
521 | } |
---|
522 | \end{verbatim} |
---|
523 | The ZP-Coder automatically adjusts the content of the context variables |
---|
524 | while coding (recall the context variable argument is passed to functions |
---|
525 | #encoder# and #decoder# by reference). The whole array of 255 context |
---|
526 | variables can be understood as a "byte context variable". The estimated |
---|
527 | probability of each byte value is indeed the product of the estimated |
---|
528 | probabilities of the eight binary decisions that lead to that value in the |
---|
529 | decision tree. All these probabilities are adapted by the underlying |
---|
530 | adaptation algorithm of the ZP-Coder. |
---|
531 | |
---|
532 | {\bf Application} --- |
---|
533 | We consider now a simple applications consisting of encoding the |
---|
534 | horizontal and vertical coordinates of a cloud of points. Each coordinate |
---|
535 | requires one byte. The following function illustrates a possible |
---|
536 | implementation: |
---|
537 | \begin{verbatim} |
---|
538 | void encode_points(const char *filename, int n, int *x, int *y) |
---|
539 | { |
---|
540 | StdioByteStream bs(filename, "wb"); |
---|
541 | bs.write32(n); // Write number of points. |
---|
542 | ZPCodec zp(bs, 1); // Construct encoder and context vars. |
---|
543 | BitContext ctxX[255], ctxY[255]; |
---|
544 | memset(ctxX, 0, sizeof(ctxX)); |
---|
545 | memset(ctxY, 0, sizeof(ctxY)); |
---|
546 | for (int i=0; i<n; i++) { // Encode coordinates. |
---|
547 | encode_8_bits(zp, x[i], ctxX); |
---|
548 | encode_8_bits(zp, y[i], ctxY); |
---|
549 | } |
---|
550 | } |
---|
551 | \end{verbatim} |
---|
552 | The decoding function is very similar to the encoding function: |
---|
553 | \begin{verbatim} |
---|
554 | int decode_points(const char *filename, int *x, int *y) |
---|
555 | { |
---|
556 | StdioByteStream bs(filename,"rb"); |
---|
557 | int n = bs.read32(); // Read number of points. |
---|
558 | ZPCodec zp(bs, 0); // Construct decoder and context vars. |
---|
559 | BitContext ctxX[255], ctxY[255]; |
---|
560 | memset(ctxX, 0, sizeof(ctxX)); |
---|
561 | memset(ctxY, 0, sizeof(ctxY)); |
---|
562 | for (int i=0; i<n; i++) { // Decode coordinates. |
---|
563 | x[i] = decode_8_bits(zp, ctxX); |
---|
564 | y[i] = decode_8_bits(zp, ctxY); |
---|
565 | } |
---|
566 | return n; // Return number of points. |
---|
567 | } |
---|
568 | \end{verbatim} |
---|
569 | The ZP-Coder automatically estimates the probability distributions of both |
---|
570 | the horizontal and vertical coordinates. These estimates are used to |
---|
571 | efficiently encode the point coordinates. This particular implementation |
---|
572 | is a good option if we assume that the order of the points is significant |
---|
573 | and that successive points are independent. It would be much smarter |
---|
574 | otherwise to sort the points and encode relative displacements between |
---|
575 | successive points. |
---|
576 | |
---|
577 | |
---|
578 | {\bf Huffman Coding Tricks} --- |
---|
579 | Programmers with experience in Huffman codes can see the similarity in the |
---|
580 | ZP-Coder. Huffman codes also organize the symbol values as a decision |
---|
581 | tree. The tree is balanced in such a way that each decision is as |
---|
582 | unpredictable as possible (i.e. both branches must be equally probable). |
---|
583 | This is very close to the ZP-Coder technique described above. Since we |
---|
584 | allocate one context variable for each decision, our tree need not be |
---|
585 | balanced: the context variable will track the decision statistics and the |
---|
586 | ZP-Coder will compensate optimally. |
---|
587 | |
---|
588 | There are good reasons however to avoid unbalanced trees with the ZP-Coder. |
---|
589 | Frequent symbol values may be located quite deep in a poorly balanced |
---|
590 | tree. This increases the average number of message bits (the number of |
---|
591 | decisions) required to code a symbol. The ZP-Coder will be called more |
---|
592 | often, making the coding program slower. Furthermore, each message |
---|
593 | bit is encoded using an estimated distribution. All these useless message |
---|
594 | bits mean that the ZP-Coder has more distributions to adapt. This |
---|
595 | extra adaptation work will probably increase the file size. |
---|
596 | |
---|
597 | Huffman codes are very fast when the tree structure is fixed beforehand. |
---|
598 | Such {\em static Huffman codes} are unfortunately not very efficient |
---|
599 | because the tree never matches the actual data distribution. This is why |
---|
600 | such programs almost always define a data dependent tree structure. This |
---|
601 | structure must then be encoded in the file since the decoder must know it |
---|
602 | before decoding the symbols. Static Huffman codes however become very |
---|
603 | efficient when decisions are encoded with the ZP-Coder. The tree |
---|
604 | structure represents a priori knowledge about the distribution of the |
---|
605 | symbol values. Small data discrepancies will be addressed transparently |
---|
606 | by the ZP-Coder. |
---|
607 | |
---|
608 | |
---|
609 | {\bf Encoding Numbers} --- |
---|
610 | This technique is illustrated with the following number encoding example. |
---|
611 | The multivalued technique described above is not practical with large |
---|
612 | numbers because the decision tree has too many nodes and requires too many |
---|
613 | context variables. This problem can be solved by using a priori knowledge |
---|
614 | about the probability distribution of our numbers. |
---|
615 | |
---|
616 | Assume for instance that the distribution is symmetrical and that small |
---|
617 | numbers are much more probable than large numbers. We will first group |
---|
618 | our numbers into several sets. Each number is coded by first coding which |
---|
619 | set contains the number and then coding a position within the set. Each |
---|
620 | set contains #2^n# numbers that we consider roughly equiprobable. Since |
---|
621 | the most probable values occur much more often, we want to model their |
---|
622 | probability more precisely. Therefore we use small sets for the most |
---|
623 | probable values and large sets for the least probable values, as |
---|
624 | demonstrated below. |
---|
625 | \begin{verbatim} |
---|
626 | A---------------- {0} (size=1) |
---|
627 | `------B---C---- {1} or {-1} (size=1) |
---|
628 | \ `--- {2,3} or {-2,-3} (size=2) |
---|
629 | D------ {4...131} or {-4...-131} (size=128) |
---|
630 | `----- {132...32899} or {-132...-32899} (size=32768) |
---|
631 | \end{verbatim} |
---|
632 | We then organize a decision tree for coding the set identifier. This |
---|
633 | decision tree is balanced using whatever a priori knowledge we have about |
---|
634 | the probability distribution of the number values, just like a static |
---|
635 | Huffman tree. Each decision (except the sign decision) is then coded |
---|
636 | using a dedicated context variable. |
---|
637 | \begin{verbatim} |
---|
638 | if (! zp.decoder(ctx_A)) { // decision A |
---|
639 | return 0; |
---|
640 | } else { |
---|
641 | if (! zp.decoder(ctx_B)) { // + decision B |
---|
642 | if (! zp.decoder(ctx_C)) { // ++ decision C |
---|
643 | if (! zp.decoder()) // +++ sign decision |
---|
644 | return +1; |
---|
645 | else |
---|
646 | return -1; |
---|
647 | } else { |
---|
648 | if (! zp.decoder()) // +++ sign decision |
---|
649 | return + 2 + zp.decoder(); |
---|
650 | else |
---|
651 | return - 2 - zp.decoder(); |
---|
652 | } |
---|
653 | } else { |
---|
654 | if (! zp.decoder(ctx_D)) { // ++ decision D |
---|
655 | if (! zp.decoder()) // +++ sign decision |
---|
656 | return + 4 + decode_7_bits(zp); |
---|
657 | else |
---|
658 | return - 4 - decode_7_bits(zp); |
---|
659 | } else { |
---|
660 | if (! zp.decoder()) // +++ sign decision |
---|
661 | return + 132 + decode_15_bits(zp); |
---|
662 | else |
---|
663 | return - 132 - decode_15_bits(zp); |
---|
664 | } |
---|
665 | } |
---|
666 | } |
---|
667 | \end{verbatim} |
---|
668 | Note that the call #zp.decoder()# for coding the sign decision does not use |
---|
669 | a context variable. This is a "pass-thru" variant of \Ref{decoder} which |
---|
670 | bypasses the ZP-Coder and just reads a bit from the code sequence. There |
---|
671 | is a corresponding "pass-thru" version of \Ref{encoder} for encoding such |
---|
672 | bits. Similarly, functions #decode_7_bits# and #decode_15_bits# do not |
---|
673 | take an array of context variables because, unlike function #decode_8_bits# |
---|
674 | listed above, they are based on the pass-thru decoder instead of the |
---|
675 | regular decoder. |
---|
676 | |
---|
677 | The ZP-Coder will not learn the probabilities of the numbers within a set |
---|
678 | since no context variables have been allocated for that purpose. This |
---|
679 | could be improved by allocating additional context variables for encoding |
---|
680 | the position within the smaller sets and using the regular decoding |
---|
681 | functions instead of the pass-thru variants. Only experimentation can tell |
---|
682 | what works best for your particular encoding problem. |
---|
683 | |
---|
684 | |
---|
685 | {\bf Understanding Adaptation} --- |
---|
686 | We have so far explained that the ZP-Coder adaptation algorithm is able to |
---|
687 | quickly estimate of the probability distribution of the message bits coded |
---|
688 | using a particular context variable. It is also able to track slow |
---|
689 | variations when the actual probabilities change while coding. |
---|
690 | |
---|
691 | Let us consider the ``cloud of points'' application presented above. |
---|
692 | Suppose that we first code points located towards the left side and then |
---|
693 | slowly move towards points located on the right side. The ZP-Coder will |
---|
694 | first estimate that the X coordinates are rather on the left side. This |
---|
695 | estimation will be progressively revised after seeing more points on the |
---|
696 | right side. Such an ordering of the points obviously violates the point |
---|
697 | independence assumption on which our code is based. Despite our inexact |
---|
698 | assumptions, the tracking mechanism allows for better prediction of the X |
---|
699 | coordinates and therefore better compression. |
---|
700 | |
---|
701 | However, this is not a perfect solution. The ZP-Coder tracks the changes |
---|
702 | because every point seems to be a little bit more on the right side than |
---|
703 | suggested by the previous points. The ZP-Coder coding algorithm is always |
---|
704 | slightly misadjusted and we always lose a little on possible compression |
---|
705 | ratio. This is not much of a problem when the probabilities drift slowly. |
---|
706 | On the other hand, this can be very significant if the probabilities change |
---|
707 | drastically. |
---|
708 | |
---|
709 | Adaptation is always associated with a small loss of efficiency. The |
---|
710 | ZP-Coder updates the probability model whenever it suspects, {\em after |
---|
711 | coding}, that the current settings were not optimal. The model will be |
---|
712 | better next time, but a slight loss in compression has occurred. The |
---|
713 | design of ZP-Coder of course minimizes this effect as much as possible. |
---|
714 | Yet you will pay a price if you ask too much to the adaptation algorithm. |
---|
715 | If you have millions of context variables, it will be difficult to train |
---|
716 | them all. If the probability distributions change drastically while |
---|
717 | coding, it will be difficult to track the changes fast enough. |
---|
718 | |
---|
719 | Adaptation on the other hand is a great simplification. A good data |
---|
720 | compression program must (a) represent the data in order to make its |
---|
721 | predictability apparent, and (b) perform the predictions and generate the |
---|
722 | code bits. The ZP-Coder is an efficient and effortless solution for |
---|
723 | implementing task (b). |
---|
724 | |
---|
725 | |
---|
726 | {\bf Practical Debugging Tricks} --- |
---|
727 | Sometimes you write an encoding program and a decoding program. |
---|
728 | Unfortunately there is a bug: the decoding program decodes half the file |
---|
729 | and then just outputs garbage. There is a simple way to locate the |
---|
730 | problem. In the encoding program, after each call to #encoder#, print the |
---|
731 | encoded bit and the value of the context variable. In the decoding |
---|
732 | program, after each call to #decoder#, print the decoded bit and the value |
---|
733 | of the context variable. Both program should print exactly the same thing. |
---|
734 | When you find the difference, you find the bug. |
---|
735 | |
---|
736 | @memo Suggestions for efficiently using the ZP-Coder. */ |
---|
737 | //@} |
---|
738 | |
---|
739 | // ------------ THE END |
---|
740 | |
---|
741 | #ifdef HAVE_NAMESPACES |
---|
742 | } |
---|
743 | # ifndef NOT_USING_DJVU_NAMESPACE |
---|
744 | using namespace DJVU; |
---|
745 | # endif |
---|
746 | #endif |
---|
747 | #endif |
---|
748 | |
---|
749 | |
---|