source: trunk/libdjvu/miniexp.h @ 209

Last change on this file since 209 was 206, checked in by Eugene Romanenko, 14 years ago

DJVU plugin: djvulibre updated to version 3.5.19

File size: 18.8 KB
Line 
1/* -*- C -*-
2// -------------------------------------------------------------------
3// MiniExp - Library for handling lisp expressions
4// Copyright (c) 2005  Leon Bottou
5//
6// This software is subject to, and may be distributed under, the
7// GNU General Public License, either Version 2 of the license,
8// or (at your option) any later version. The license should have
9// accompanied the software or you may obtain a copy of the license
10// from the Free Software Foundation at http://www.fsf.org .
11//
12// This program is distributed in the hope that it will be useful,
13// but WITHOUT ANY WARRANTY; without even the implied warranty of
14// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15// GNU General Public License for more details.
16// -------------------------------------------------------------------
17*/
18/* $Id: miniexp.h,v 1.14 2007/03/25 20:48:35 leonb Exp $ */
19
20#ifndef MINIEXP_H
21#define MINIEXP_H
22
23#ifdef __cplusplus
24extern "C" { 
25# ifndef __cplusplus
26}
27# endif
28#endif
29
30#ifndef MINILISPAPI
31# define MINILISPAPI /**/
32#endif
33 
34
35/* -------------------------------------------------- */
36/* LISP EXPRESSIONS                                   */
37/* -------------------------------------------------- */
38
39/* miniexp_t --
40   Opaque pointer type representing a lisp expression,
41   also known as s-expression.
42   S-expressions can be viewed as a simple and powerful
43   alternative to XML.  DjVu uses s-expressions to handle
44   annotations. Both the decoding api <ddjvuapi.h> and
45   program <djvused> use s-expressions to describe the
46   hidden text information and the navigation
47   information */
48
49
50typedef struct miniexp_s* miniexp_t;
51
52
53/* There are four basic types of lisp expressions,
54   numbers, symbols, pairs, and objects.
55   The latter category can represent any c++ object
56   that inherits class <miniobj_t> defined later in this file.
57   The only such objects defined in this file are strings. */
58
59
60/* -------- NUMBERS -------- */
61
62/* Minilisp numbers can represent any integer
63   in range [-2^29...2^29-1] */
64
65
66/* miniexp_numberp --
67   Tests if an expression is a number. */
68
69static inline int miniexp_numberp(miniexp_t p) {
70  return (((size_t)(p)&3)==3);
71}
72
73/* miniexp_to_int --
74   Returns the integer corresponding to a lisp expression.
75   Assume that the expression is indeed a number. */
76
77static inline int miniexp_to_int(miniexp_t p) {
78  return (((int)(size_t)(p))>>2);
79}
80
81/* miniexp_number --
82   Constructs the expression corresponding to an integer. */
83
84static inline miniexp_t miniexp_number(int x) {
85  return (miniexp_t) (size_t) ((x<<2)|3);
86}
87   
88
89
90/* -------- SYMBOLS -------- */
91
92/* The textual representation of a minilisp symbol is a
93   sequence of printable characters forming an identifier.
94   Each symbol has a unique representation and remain
95   permanently allocated. To compare two symbols,
96   simply compare the <miniexp_t> pointers. */
97
98
99/* miniexp_symbolp --
100   Tests if an expression is a symbol. */
101
102static inline int miniexp_symbolp(miniexp_t p) {
103  return ((((size_t)p)&3)==2);
104}
105
106/* miniexp_to_name --
107   Returns the symbol name as a string.
108   Returns NULL if the expression is not a symbol. */
109   
110MINILISPAPI const char* miniexp_to_name(miniexp_t p);
111
112/* miniexp_symbol --
113   Returns the unique symbol expression with the specified name. */
114
115MINILISPAPI miniexp_t miniexp_symbol(const char *name);
116
117
118
119/* -------- PAIRS -------- */
120
121/* Pairs (also named "cons") are the basic building blocks for
122   minilisp lists. Each pair contains two expression:
123   - the <car> represents the first element of a list.
124   - the <cdr> usually is a pair representing the rest of the list.
125   The empty list is represented by a null pointer. */
126
127
128/* miniexp_nil --
129   The empty list. */
130
131#define miniexp_nil ((miniexp_t)(size_t)0)
132
133/* miniexp_dummy --
134   An invalid expression used to represent
135   various exceptional conditions. */
136
137#define miniexp_dummy ((miniexp_t)(size_t)2)
138
139/* miniexp_listp --
140   Tests if an expression is either a pair or the empty list. */   
141
142static inline int miniexp_listp(miniexp_t p) {
143  return ((((size_t)p)&3)==0);
144}
145
146/* miniexp_consp --
147   Tests if an expression is a pair. */
148
149static inline int miniexp_consp(miniexp_t p) {
150  return p && miniexp_listp(p);
151}
152
153/* miniexp_length --
154   Returns the length of a list.
155   Returns 0 for non lists, -1 for circular lists. */
156
157MINILISPAPI int miniexp_length(miniexp_t p);
158
159/* miniexp_car --
160   miniexp_cdr --
161   Returns the car or cdr of a pair. */
162
163static inline miniexp_t miniexp_car(miniexp_t p) {
164  if (miniexp_consp(p))
165    return ((miniexp_t*)p)[0];
166  return miniexp_nil;
167}
168
169static inline miniexp_t miniexp_cdr(miniexp_t p) {
170  if (miniexp_consp(p))
171    return ((miniexp_t*)p)[1];
172  return miniexp_nil;
173}
174
175/* miniexp_cXXr --
176   Represent common combinations of car and cdr. */
177
178MINILISPAPI miniexp_t miniexp_caar (miniexp_t p);
179MINILISPAPI miniexp_t miniexp_cadr (miniexp_t p);
180MINILISPAPI miniexp_t miniexp_cdar (miniexp_t p);
181MINILISPAPI miniexp_t miniexp_cddr (miniexp_t p);
182MINILISPAPI miniexp_t miniexp_caddr(miniexp_t p);
183MINILISPAPI miniexp_t miniexp_cdddr(miniexp_t p);
184
185/* miniexp_nth --
186   Returns the n-th element of a list. */
187
188MINILISPAPI miniexp_t miniexp_nth(int n, miniexp_t l);
189
190/* miniexp_cons --
191   Constructs a pair. */
192
193MINILISPAPI miniexp_t miniexp_cons(miniexp_t car, miniexp_t cdr);
194
195/* miniexp_rplaca --
196   miniexp_rplacd --
197   Changes the car or the cdr of a pair. */
198
199MINILISPAPI miniexp_t miniexp_rplaca(miniexp_t pair, miniexp_t newcar);
200MINILISPAPI miniexp_t miniexp_rplacd(miniexp_t pair, miniexp_t newcdr);
201
202/* miniexp_reverse --
203   Reverses a list in place. */
204
205MINILISPAPI miniexp_t miniexp_reverse(miniexp_t p);
206
207
208/* -------- OBJECTS (GENERIC) -------- */
209
210/* Object expressions represent a c++ object
211   that inherits class <miniobj_t> defined later.
212   Each object expression has a symbolic class name
213   and a pointer to the c++ object. */
214
215/* miniexp_objectp --
216   Tests if an expression is an object. */
217
218static inline int miniexp_objectp(miniexp_t p) {
219  return ((((size_t)p)&3)==1);
220}
221
222/* miniexp_classof --
223   Returns the symbolic class of an expression.
224   Returns nil if the expression is not an object. */
225
226MINILISPAPI miniexp_t miniexp_classof(miniexp_t p);
227
228/* miniexp_isa --
229   If <p> is an instance of class named <c> or one of
230   its subclasses, returns the actual class name.
231   Otherwise returns miniexp_nil. */
232
233MINILISPAPI miniexp_t miniexp_isa(miniexp_t p, miniexp_t c);
234
235
236/* -------- OBJECTS (STRINGS) -------- */
237
238/* miniexp_stringp --
239   Tests if an expression is a string. */
240
241MINILISPAPI int miniexp_stringp(miniexp_t p);
242
243/* miniexp_to_str --
244   Returns the c string represented by the expression.
245   Returns NULL if the expression is not a string.
246   The c string remains valid as long as the
247   corresponding lisp object exists. */
248
249MINILISPAPI const char *miniexp_to_str(miniexp_t p);
250
251/* miniexp_string --
252   Constructs a string expression by copying string s. */
253
254MINILISPAPI miniexp_t miniexp_string(const char *s);
255
256/* miniexp_substring --
257   Constructs a string expression by copying
258   at most n character from string s. */
259
260MINILISPAPI miniexp_t miniexp_substring(const char *s, int n);
261
262/* miniexp_concat --
263   Concat all the string expressions in list <l>. */
264
265MINILISPAPI miniexp_t miniexp_concat(miniexp_t l);
266
267
268
269
270
271/* -------------------------------------------------- */
272/* GARBAGE COLLECTION                                 */
273/* -------------------------------------------------- */
274
275
276/* The garbage collector reclaims the memory allocated for
277   lisp expressions no longer in use.  It is automatically
278   invoked by the pair and object allocation functions when
279   the available memory runs low.  It is however possible to
280   temporarily disable it.
281
282   The trick is to determine which lisp expressions are in
283   use at a given moment. This package takes a simplistic
284   approach. All objects of type <minivar_t> are chained and
285   can reference an arbitrary lisp expression.  Garbage
286   collection preserves all lisp expressions referenced by a
287   minivar, as well as all lisp expressions that can be
288   accessed from these.
289     
290   The minivar class is designed such that C++ program can
291   directly use instances of <minivar_t> as normal
292   <miniexp_t> variables.  There is almost no overhead
293   accessing or changing the lisp expression referenced by a
294   minivar. However, the minivar chain must be updated
295   whenever the minivar object is constructed or destructed.
296   
297   Example (in C++ only):
298     miniexp_t copy_in_reverse(miniexp_t p) {
299        minivar_t l = miniexp_nil;
300        while (miniexp_consp(p)) {
301          l = miniexp_cons(miniexp_car(p), l);
302          p = miniexp_cdr(p);
303        }
304        return l;
305     }
306
307   When to use minivar_t instead of miniexp_t?
308
309   * A function that only navigates properly secured
310     s-expressions without modifying them does not need to
311     bother about minivars.
312
313   * Only the following miniexp functions can cause a
314     garbage collection: miniexp_cons(), miniexp_object(),
315     miniexp_string(), miniexp_substring(),
316     miniexp_concat(), miniexp_pprin(), miniexp_pprint(),
317     miniexp_gc(), and miniexp_release_gc_lock().  A
318     function that does not cause calls to these functions
319     does not need to bother about minivars.
320
321   * Other functions should make sure that all useful
322     s-expression are directly or indirectly secured by a
323     minivar_t object. In case of doubt, use minivars
324     everywhere.
325
326   * Function arguments should remain <miniexp_t> in order
327     to allow interoperability with the C language. As a
328     consequence, functions must often copy their arguments
329     into minivars in order to make sure they remain
330     allocated. A small performance improvement can be
331     achieved by deciding that the function should always be
332     called using properly secured arguments. This is more
333     difficult to get right.
334
335   C programs cannot use minivars as easily as C++ programs.
336   Wrappers are provided to allocate minivars and to access
337   their value. This is somehow inconvenient.  It might be
338   more practical to control the garbage collector
339   invocations with <miniexp_acquire_gc_lock()> and
340   <miniexp_release_gc_lock()>...  */
341   
342
343/* minilisp_gc --
344   Invokes the garbage collector now. */
345
346MINILISPAPI void minilisp_gc(void);
347
348/* minilisp_info --
349   Prints garbage collector statistics. */
350
351MINILISPAPI void minilisp_info(void);
352
353/* minilisp_acquire_gc_lock --
354   minilisp_release_gc_lock --
355   Temporarily disables automatic garbage collection.
356   Acquire/release pairs may be nested.
357   Both functions return their argument unmodified.
358   This is practical because <minilisp_release_gc_lock>
359   can invoke the garbage collector. Before doing
360   so it stores its argument in a minivar to
361   preserve it.
362
363   Example (in C):
364     miniexp_t copy_in_reverse(miniexp_t p) {
365        miniexp_t l = 0;
366        minilisp_acquire_gc_lock(0);
367        while (miniexp_consp(p)) {
368          l = miniexp_cons(miniexp_car(p), l);
369          p = miniexp_cdr(p);
370        }
371        return minilisp_release_gc_lock(l);
372     }
373   
374   Disabling garbage collection for a long time
375   increases the memory consumption. */
376
377MINILISPAPI miniexp_t minilisp_acquire_gc_lock(miniexp_t);
378MINILISPAPI miniexp_t minilisp_release_gc_lock(miniexp_t);
379
380/* minivar_t --
381   The minivar type. */
382#ifdef __cplusplus
383class minivar_t;
384#else
385typedef struct minivar_s minivar_t;
386#endif
387
388/* minivar_alloc --
389   minivar_free --
390   Wrappers for creating and destroying minivars in C. */
391
392MINILISPAPI minivar_t *minivar_alloc(void);
393MINILISPAPI void minivar_free(minivar_t *v);
394
395/* minivar_pointer --
396   Wrappers to access the lisp expression referenced
397   by a minivar. This function returns a pointer
398   to the actual miniexp_t variable. */
399
400MINILISPAPI miniexp_t *minivar_pointer(minivar_t *v);
401
402/* minilisp_debug --
403   Setting the debug flag runs the garbage collector
404   very often. This is extremely slow, but can be
405   useful to debug memory allocation problems. */
406
407MINILISPAPI void minilisp_debug(int debugflag);
408
409/* minilisp_finish --
410   Deallocates everything.  This is only useful when using
411   development tools designed to check for memory leaks. 
412   No miniexp function can be used after calliang this. */
413
414MINILISPAPI void minilisp_finish(void);
415
416
417/* -------------------------------------------------- */
418/* INPUT/OUTPUT                                       */
419/* -------------------------------------------------- */
420
421/* Notes about the textual represenation of miniexps.
422
423   - Special characters are:
424     * the parenthesis <(> and <)>,
425     * the double quote <">,
426     * the vertical bar <|>,
427     * any ascii character with a non zero entry
428       in array <minilisp_macrochar_parser>.
429
430   - Symbols are represented by their name.
431     Vertical bars <|> can be used to delimit names that
432     contain blanks, special characters, non printable
433     characters, non ascii characters, or
434     can be confused as a number.
435     
436   - Numbers follow the syntax specified by the C
437     function strtol() with base=0.
438
439   - Strings are delimited by double quotes.
440     All C string escapes are recognized.
441     Non printable ascii characters must be escaped.
442
443   - List are represented by an open parenthesis <(>
444     followed by the space separated list elements,
445     followed by a closing parenthesis <)>.
446     When the cdr of the last pair is non zero,
447     the closed parenthesis is preceded by
448     a space, a dot <.>, a space, and the textual
449     representation of the cdr.
450
451   - When the parser encounters an ascii character corresponding
452     to a non zero function pointer in <minilisp_macrochar_parser>,
453     the function is invoked and must return a possibly empty
454     list of miniexps to be returned by subsequent
455     invocations of the parser. */
456
457
458/* minilisp_puts/getc/ungetc --
459   All minilisp i/o is performed by invoking
460   these functions pointers. */
461
462extern MINILISPAPI int (*minilisp_puts)(const char *s);
463extern MINILISPAPI int (*minilisp_getc)(void);
464extern MINILISPAPI int (*minilisp_ungetc)(int c);
465
466/* minilisp_set_output --
467   minilisp_set_input --
468   Sets the above function to read/write from/to file f.
469   Only defined when <stdio.h> has been included. */
470
471#if defined(stdin)
472MINILISPAPI void minilisp_set_output(FILE *f);
473MINILISPAPI void minilisp_set_input(FILE *f);
474#endif
475
476/* miniexp_read --
477   Reads an expression by repeatedly
478   invoking <minilisp_getc> and <minilisp_ungetc>.
479   Returns <miniexp_dummy> when an error occurs. */
480
481MINILISPAPI miniexp_t miniexp_read(void);
482
483/* miniexp_prin --
484   miniexp_print --
485   Prints a minilisp expression by repeatedly invoking <minilisp_puts>.
486   Only <minilisp_print> outputs a final newline character.
487   These functions are safe to call anytime. */
488
489MINILISPAPI miniexp_t miniexp_prin(miniexp_t p);
490MINILISPAPI miniexp_t miniexp_print(miniexp_t p);
491
492/* miniexp_pprin --
493   miniexp_pprint --
494   Prints a minilisp expression with reasonably pretty line breaks.
495   Argument <width> is the intended number of columns.
496   Only <minilisp_pprint> outputs a final newline character.
497   These functions can cause a garbage collection to occur. */
498
499MINILISPAPI miniexp_t miniexp_pprin(miniexp_t p, int width);
500MINILISPAPI miniexp_t miniexp_pprint(miniexp_t p, int width);
501
502/* miniexp_pname --
503   Returns a string containing the textual representation
504   of a minilisp expression. Set argument <width> to zero
505   to output a single line, or to a positive value to
506   perform pretty line breaks for this intended number of columns.
507   These functions can cause a garbage collection to occur.
508   It works by temporarily redefining <minilisp_puts>. */
509
510MINILISPAPI miniexp_t miniexp_pname(miniexp_t p, int width);
511
512/* minilisp_print_7bits --
513   When this flag is set, all non ascii characters
514   in strings are escaped in octal. */
515
516extern MINILISPAPI int minilisp_print_7bits;
517
518/* minilisp_macrochar_parser --
519   A non zero entry in this array defines a special parsing
520   function that runs when the corresponding character is
521   encountered. */
522
523extern MINILISPAPI miniexp_t (*minilisp_macrochar_parser[128])(void);
524
525
526
527/* -------------------------------------------------- */
528/* STUFF FOR C++ ONLY                                 */
529/* -------------------------------------------------- */
530
531#ifdef __cplusplus
532# ifndef __cplusplus
533{
534# endif
535} // extern "C"
536
537typedef void minilisp_mark_t(miniexp_t *pp);
538
539/* -------- MINIVARS -------- */
540
541/* minivar_t --
542   A class for protected garbage collector variables. */
543
544MINILISPAPI
545class minivar_t
546{
547  miniexp_t data;
548  minivar_t *next;
549  minivar_t **pprev;
550public:
551  minivar_t();
552  minivar_t(miniexp_t p);
553  minivar_t(const minivar_t &v);
554  operator miniexp_t&() { return data; }
555  miniexp_t* operator&() { return &data; }
556  minivar_t& operator=(miniexp_t p) { data = p; return *this; }
557  minivar_t& operator=(const minivar_t &v) { data = v.data; return *this; }
558  ~minivar_t() { if ((*pprev = next)) next->pprev = pprev; }
559#ifdef MINIEXP_IMPLEMENTATION
560  static minivar_t *vars;
561  static void mark(minilisp_mark_t*);
562#endif
563};
564
565
566/* -------- MINIOBJ -------- */
567
568
569/* miniobj_t --
570   The base class for c++ objects
571   represented by object expressions. */
572
573MINILISPAPI
574class miniobj_t {
575 public:
576  virtual ~miniobj_t();
577
578  /* --- stuff defined by MINIOBJ_DECLARE --- */
579  /* classname: a symbol characterizing this class. */
580  static miniexp_t classname;
581  /* classof: class name symbol for this object. */
582  virtual miniexp_t classof() const = 0;
583  /* isa -- tests if this is an instance of <classname>. */
584  virtual bool isa(miniexp_t classname) const;
585
586  /* --- optional stuff --- */
587  /* mark: iterates over miniexps contained by this object
588     for garbage collecting purposes. */
589  virtual void mark(minilisp_mark_t*);
590
591  /* pname: returns a printable name for this object.
592     The caller must deallocate the result with delete[]. */
593  virtual char *pname() const;
594};
595
596/* MINIOBJ_DECLARE --
597   MINIOBJ_IMPLEMENT --
598   Useful code fragments for implementing
599   the mandatory part of miniobj subclasses. */
600
601#define MINIOBJ_DECLARE(cls, supercls, name) \
602  public: static miniexp_t classname; \
603          virtual miniexp_t classof() const; \
604          virtual bool isa(miniexp_t) const;
605
606#define MINIOBJ_IMPLEMENT(cls, supercls, name)\
607  miniexp_t cls::classname = miniexp_symbol(name);\
608  miniexp_t cls::classof() const {\
609    return cls::classname; }\
610  bool cls::isa(miniexp_t n) const {\
611    return (cls::classname==n) || (supercls::isa(n)); }
612
613
614/* miniexp_to_obj --
615   Returns a pointer to the object represented by an lisp
616   expression. Returns NULL if the expression is not an
617   object expression.
618*/
619
620static inline miniobj_t *miniexp_to_obj(miniexp_t p) {
621  if (miniexp_objectp(p))
622    return ((miniobj_t**)(((size_t)p)&~((size_t)3)))[0];
623  return 0;
624}
625
626/* miniexp_object --
627   Create an object expression for a given object. */
628
629MINILISPAPI miniexp_t miniexp_object(miniobj_t *obj);
630
631
632#endif /* __cplusplus */
633
634
635
636
637
638/* -------------------------------------------------- */
639/* THE END                                            */
640/* -------------------------------------------------- */
641
642#endif /* MINIEXP_H */
Note: See TracBrowser for help on using the repository browser.