source: trunk/libdjvu/miniexp.h @ 280

Last change on this file since 280 was 280, checked in by rbri, 11 years ago

DJVU plugin: djvulibre updated to version 3.5.22

File size: 19.3 KB
Line 
1/* -*- C -*-
2// -------------------------------------------------------------------
3// MiniExp - Library for handling lisp expressions
4// Copyright (c) 2005  Leon Bottou
5//
6// This software is subject to, and may be distributed under, the
7// GNU General Public License, either Version 2 of the license,
8// or (at your option) any later version. The license should have
9// accompanied the software or you may obtain a copy of the license
10// from the Free Software Foundation at http://www.fsf.org .
11//
12// This program is distributed in the hope that it will be useful,
13// but WITHOUT ANY WARRANTY; without even the implied warranty of
14// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15// GNU General Public License for more details.
16// -------------------------------------------------------------------
17*/
18/* $Id: miniexp.h,v 1.22 2008/07/06 00:04:37 leonb Exp $ */
19
20#ifndef MINIEXP_H
21#define MINIEXP_H
22
23#ifdef __cplusplus
24extern "C" { 
25# ifndef __cplusplus
26}
27# endif
28#endif
29
30#ifndef MINILISPAPI
31# ifdef WIN32
32#  ifdef DLL_EXPORT
33#   define MINILISPAPI __declspec(dllexport)
34#  else
35#   define MINILISPAPI __declspec(dllimport)
36#  endif
37# endif
38#endif
39#ifndef MINILISPAPI
40# define MINILISPAPI /**/
41#endif
42 
43
44/* -------------------------------------------------- */
45/* LISP EXPRESSIONS                                   */
46/* -------------------------------------------------- */
47
48/* miniexp_t --
49   Opaque pointer type representing a lisp expression,
50   also known as s-expression.
51   S-expressions can be viewed as a simple and powerful
52   alternative to XML.  DjVu uses s-expressions to handle
53   annotations. Both the decoding api <ddjvuapi.h> and
54   program <djvused> use s-expressions to describe the
55   hidden text information and the navigation
56   information */
57
58
59typedef struct miniexp_s* miniexp_t;
60
61
62/* There are four basic types of lisp expressions,
63   numbers, symbols, pairs, and objects.
64   The latter category can represent any c++ object
65   that inherits class <miniobj_t> defined later in this file.
66   The only such objects defined in this file are strings. */
67
68
69/* -------- NUMBERS -------- */
70
71/* Minilisp numbers can represent any integer
72   in range [-2^29...2^29-1] */
73
74
75/* miniexp_numberp --
76   Tests if an expression is a number. */
77
78static inline int miniexp_numberp(miniexp_t p) {
79  return (((size_t)(p)&3)==3);
80}
81
82/* miniexp_to_int --
83   Returns the integer corresponding to a lisp expression.
84   Assume that the expression is indeed a number. */
85
86static inline int miniexp_to_int(miniexp_t p) {
87  return (((int)(size_t)(p))>>2);
88}
89
90/* miniexp_number --
91   Constructs the expression corresponding to an integer. */
92
93static inline miniexp_t miniexp_number(int x) {
94  return (miniexp_t) (size_t) ((x<<2)|3);
95}
96   
97
98
99/* -------- SYMBOLS -------- */
100
101/* The textual representation of a minilisp symbol is a
102   sequence of printable characters forming an identifier.
103   Each symbol has a unique representation and remain
104   permanently allocated. To compare two symbols,
105   simply compare the <miniexp_t> pointers. */
106
107
108/* miniexp_symbolp --
109   Tests if an expression is a symbol. */
110
111static inline int miniexp_symbolp(miniexp_t p) {
112  return ((((size_t)p)&3)==2);
113}
114
115/* miniexp_to_name --
116   Returns the symbol name as a string.
117   Returns NULL if the expression is not a symbol. */
118   
119MINILISPAPI const char* miniexp_to_name(miniexp_t p);
120
121/* miniexp_symbol --
122   Returns the unique symbol expression with the specified name. */
123
124MINILISPAPI miniexp_t miniexp_symbol(const char *name);
125
126
127
128/* -------- PAIRS -------- */
129
130/* Pairs (also named "cons") are the basic building blocks for
131   minilisp lists. Each pair contains two expression:
132   - the <car> represents the first element of a list.
133   - the <cdr> usually is a pair representing the rest of the list.
134   The empty list is represented by a null pointer. */
135
136
137/* miniexp_nil --
138   The empty list. */
139
140#define miniexp_nil ((miniexp_t)(size_t)0)
141
142/* miniexp_dummy --
143   An invalid expression used to represent
144   various exceptional conditions. */
145
146#define miniexp_dummy ((miniexp_t)(size_t)2)
147
148/* miniexp_listp --
149   Tests if an expression is either a pair or the empty list. */   
150
151static inline int miniexp_listp(miniexp_t p) {
152  return ((((size_t)p)&3)==0);
153}
154
155/* miniexp_consp --
156   Tests if an expression is a pair. */
157
158static inline int miniexp_consp(miniexp_t p) {
159  return p && miniexp_listp(p);
160}
161
162/* miniexp_length --
163   Returns the length of a list.
164   Returns 0 for non lists, -1 for circular lists. */
165
166MINILISPAPI int miniexp_length(miniexp_t p);
167
168/* miniexp_car --
169   miniexp_cdr --
170   Returns the car or cdr of a pair. */
171
172static inline miniexp_t miniexp_car(miniexp_t p) {
173  if (miniexp_consp(p))
174    return ((miniexp_t*)p)[0];
175  return miniexp_nil;
176}
177
178static inline miniexp_t miniexp_cdr(miniexp_t p) {
179  if (miniexp_consp(p))
180    return ((miniexp_t*)p)[1];
181  return miniexp_nil;
182}
183
184/* miniexp_cXXr --
185   Represent common combinations of car and cdr. */
186
187MINILISPAPI miniexp_t miniexp_caar (miniexp_t p);
188MINILISPAPI miniexp_t miniexp_cadr (miniexp_t p);
189MINILISPAPI miniexp_t miniexp_cdar (miniexp_t p);
190MINILISPAPI miniexp_t miniexp_cddr (miniexp_t p);
191MINILISPAPI miniexp_t miniexp_caddr(miniexp_t p);
192MINILISPAPI miniexp_t miniexp_cdddr(miniexp_t p);
193
194/* miniexp_nth --
195   Returns the n-th element of a list. */
196
197MINILISPAPI miniexp_t miniexp_nth(int n, miniexp_t l);
198
199/* miniexp_cons --
200   Constructs a pair. */
201
202MINILISPAPI miniexp_t miniexp_cons(miniexp_t car, miniexp_t cdr);
203
204/* miniexp_rplaca --
205   miniexp_rplacd --
206   Changes the car or the cdr of a pair. */
207
208MINILISPAPI miniexp_t miniexp_rplaca(miniexp_t pair, miniexp_t newcar);
209MINILISPAPI miniexp_t miniexp_rplacd(miniexp_t pair, miniexp_t newcdr);
210
211/* miniexp_reverse --
212   Reverses a list in place. */
213
214MINILISPAPI miniexp_t miniexp_reverse(miniexp_t p);
215
216
217/* -------- OBJECTS (GENERIC) -------- */
218
219/* Object expressions represent a c++ object
220   that inherits class <miniobj_t> defined later.
221   Each object expression has a symbolic class name
222   and a pointer to the c++ object. */
223
224/* miniexp_objectp --
225   Tests if an expression is an object. */
226
227static inline int miniexp_objectp(miniexp_t p) {
228  return ((((size_t)p)&3)==1);
229}
230
231/* miniexp_classof --
232   Returns the symbolic class of an expression.
233   Returns nil if the expression is not an object. */
234
235MINILISPAPI miniexp_t miniexp_classof(miniexp_t p);
236
237/* miniexp_isa --
238   If <p> is an instance of class named <c> or one of
239   its subclasses, returns the actual class name.
240   Otherwise returns miniexp_nil. */
241
242MINILISPAPI miniexp_t miniexp_isa(miniexp_t p, miniexp_t c);
243
244
245/* -------- OBJECTS (STRINGS) -------- */
246
247/* miniexp_stringp --
248   Tests if an expression is a string. */
249
250MINILISPAPI int miniexp_stringp(miniexp_t p);
251
252/* miniexp_to_str --
253   Returns the c string represented by the expression.
254   Returns NULL if the expression is not a string.
255   The c string remains valid as long as the
256   corresponding lisp object exists. */
257
258MINILISPAPI const char *miniexp_to_str(miniexp_t p);
259
260/* miniexp_string --
261   Constructs a string expression by copying string s. */
262
263MINILISPAPI miniexp_t miniexp_string(const char *s);
264
265/* miniexp_substring --
266   Constructs a string expression by copying
267   at most n character from string s. */
268
269MINILISPAPI miniexp_t miniexp_substring(const char *s, int n);
270
271/* miniexp_concat --
272   Concat all the string expressions in list <l>. */
273
274MINILISPAPI miniexp_t miniexp_concat(miniexp_t l);
275
276
277
278
279
280/* -------------------------------------------------- */
281/* GARBAGE COLLECTION                                 */
282/* -------------------------------------------------- */
283
284
285/* The garbage collector reclaims the memory allocated for
286   lisp expressions no longer in use.  It is automatically
287   invoked by the pair and object allocation functions when
288   the available memory runs low.  It is however possible to
289   temporarily disable it.
290
291   The trick is to determine which lisp expressions are in
292   use at a given moment. This package takes a simplistic
293   approach. All objects of type <minivar_t> are chained and
294   can reference an arbitrary lisp expression.  Garbage
295   collection preserves all lisp expressions referenced by a
296   minivar, as well as all lisp expressions that can be
297   accessed from these. When called automatically,
298   garbage collection also preserves the sixteen most recently
299   created miniexps in order to make sure that temporaries do
300   not vanish in the middle of complicated C expressions.
301     
302   The minivar class is designed such that C++ program can
303   directly use instances of <minivar_t> as normal
304   <miniexp_t> variables.  There is almost no overhead
305   accessing or changing the lisp expression referenced by a
306   minivar. However, the minivar chain must be updated
307   whenever the minivar object is constructed or destructed.
308   
309   Example (in C++ only):
310     miniexp_t copy_in_reverse(miniexp_t p) {
311        minivar_t l = miniexp_nil;
312        while (miniexp_consp(p)) {
313          l = miniexp_cons(miniexp_car(p), l);
314          p = miniexp_cdr(p);
315        }
316        return l;
317     }
318
319   When to use minivar_t instead of miniexp_t?
320
321   * A function that only navigates properly secured
322     s-expressions without modifying them does not need to
323     bother about minivars.
324
325   * Only the following miniexp functions can cause a
326     garbage collection: miniexp_cons(), miniexp_object(),
327     miniexp_string(), miniexp_substring(),
328     miniexp_concat(), miniexp_pprin(), miniexp_pprint(),
329     miniexp_gc(), and minilisp_release_gc_lock().  A
330     function that does not cause calls to these functions
331     does not need to bother about minivars.
332
333   * Other functions should make sure that all useful
334     s-expression are directly or indirectly secured by a
335     minivar_t object. In case of doubt, use minivars
336     everywhere.
337
338   * Function arguments should remain <miniexp_t> in order
339     to allow interoperability with the C language. As a
340     consequence, functions must often copy their arguments
341     into minivars in order to make sure they remain
342     allocated. A small performance improvement can be
343     achieved by deciding that the function should always be
344     called using properly secured arguments. This is more
345     difficult to get right.
346
347   C programs cannot use minivars as easily as C++ programs.
348   Wrappers are provided to allocate minivars and to access
349   their value. This is somehow inconvenient.  It might be
350   more practical to control the garbage collector
351   invocations with <minilisp_acquire_gc_lock()> and
352   <minilisp_release_gc_lock()>...  */
353   
354
355/* minilisp_gc --
356   Invokes the garbage collector now. */
357
358MINILISPAPI void minilisp_gc(void);
359
360/* minilisp_info --
361   Prints garbage collector statistics. */
362
363MINILISPAPI void minilisp_info(void);
364
365/* minilisp_acquire_gc_lock --
366   minilisp_release_gc_lock --
367   Temporarily disables automatic garbage collection.
368   Acquire/release pairs may be nested.
369   Both functions return their argument unmodified.
370   This is practical because <minilisp_release_gc_lock>
371   can invoke the garbage collector. Before doing
372   so it stores its argument in a minivar to
373   preserve it.
374
375   Example (in C):
376     miniexp_t copy_in_reverse(miniexp_t p) {
377        miniexp_t l = 0;
378        minilisp_acquire_gc_lock(0);
379        while (miniexp_consp(p)) {
380          l = miniexp_cons(miniexp_car(p), l);
381          p = miniexp_cdr(p);
382        }
383        return minilisp_release_gc_lock(l);
384     }
385   
386   Disabling garbage collection for a long time
387   increases the memory consumption. */
388
389MINILISPAPI miniexp_t minilisp_acquire_gc_lock(miniexp_t);
390MINILISPAPI miniexp_t minilisp_release_gc_lock(miniexp_t);
391
392/* minivar_t --
393   The minivar type. */
394#ifdef __cplusplus
395class minivar_t;
396#else
397typedef struct minivar_s minivar_t;
398#endif
399
400/* minivar_alloc --
401   minivar_free --
402   Wrappers for creating and destroying minivars in C. */
403
404MINILISPAPI minivar_t *minivar_alloc(void);
405MINILISPAPI void minivar_free(minivar_t *v);
406
407/* minivar_pointer --
408   Wrappers to access the lisp expression referenced
409   by a minivar. This function returns a pointer
410   to the actual miniexp_t variable. */
411
412MINILISPAPI miniexp_t *minivar_pointer(minivar_t *v);
413
414/* minilisp_debug --
415   Setting the debug flag runs the garbage collector
416   very often. This is extremely slow, but can be
417   useful to debug memory allocation problems. */
418
419MINILISPAPI void minilisp_debug(int debugflag);
420
421/* minilisp_finish --
422   Deallocates everything.  This is only useful when using
423   development tools designed to check for memory leaks. 
424   No miniexp function can be used after calling this. */
425
426MINILISPAPI void minilisp_finish(void);
427
428
429/* -------------------------------------------------- */
430/* INPUT/OUTPUT                                       */
431/* -------------------------------------------------- */
432
433/* Notes about the textual representation of miniexps.
434
435   - Special characters are:
436     * the parenthesis <(> and <)>,
437     * the double quote <">,
438     * the vertical bar <|>,
439     * any ascii character with a non zero entry
440       in array <minilisp_macrochar_parser>.
441
442   - Symbols are represented by their name.
443     Vertical bars <|> can be used to delimit names that
444     contain blanks, special characters, non printable
445     characters, non ascii characters, or
446     can be confused as a number.
447     
448   - Numbers follow the syntax specified by the C
449     function strtol() with base=0.
450
451   - Strings are delimited by double quotes.
452     All C string escapes are recognized.
453     Non printable ascii characters must be escaped.
454
455   - List are represented by an open parenthesis <(>
456     followed by the space separated list elements,
457     followed by a closing parenthesis <)>.
458     When the cdr of the last pair is non zero,
459     the closed parenthesis is preceded by
460     a space, a dot <.>, a space, and the textual
461     representation of the cdr.
462
463   - When the parser encounters an ascii character corresponding
464     to a non zero function pointer in <minilisp_macrochar_parser>,
465     the function is invoked and must return a possibly empty
466     list of miniexps to be returned by subsequent
467     invocations of the parser. */
468
469
470/* minilisp_puts/getc/ungetc --
471   All minilisp i/o is performed by invoking
472   these functions pointers. */
473
474extern MINILISPAPI int (*minilisp_puts)(const char *s);
475extern MINILISPAPI int (*minilisp_getc)(void);
476extern MINILISPAPI int (*minilisp_ungetc)(int c);
477
478/* minilisp_set_output --
479   minilisp_set_input --
480   Sets the above function to read/write from/to file f.
481   Only defined when <stdio.h> has been included. */
482
483#if defined(stdin)
484MINILISPAPI void minilisp_set_output(FILE *f);
485MINILISPAPI void minilisp_set_input(FILE *f);
486#endif
487
488/* miniexp_read --
489   Reads an expression by repeatedly
490   invoking <minilisp_getc> and <minilisp_ungetc>.
491   Returns <miniexp_dummy> when an error occurs. */
492
493MINILISPAPI miniexp_t miniexp_read(void);
494
495/* miniexp_prin --
496   miniexp_print --
497   Prints a minilisp expression by repeatedly invoking <minilisp_puts>.
498   Only <minilisp_print> outputs a final newline character.
499   These functions are safe to call anytime. */
500
501MINILISPAPI miniexp_t miniexp_prin(miniexp_t p);
502MINILISPAPI miniexp_t miniexp_print(miniexp_t p);
503
504/* miniexp_pprin --
505   miniexp_pprint --
506   Prints a minilisp expression with reasonably pretty line breaks.
507   Argument <width> is the intended number of columns.
508   Only <minilisp_pprint> outputs a final newline character.
509   These functions can cause a garbage collection to occur. */
510
511MINILISPAPI miniexp_t miniexp_pprin(miniexp_t p, int width);
512MINILISPAPI miniexp_t miniexp_pprint(miniexp_t p, int width);
513
514/* miniexp_pname --
515   Returns a string containing the textual representation
516   of a minilisp expression. Set argument <width> to zero
517   to output a single line, or to a positive value to
518   perform pretty line breaks for this intended number of columns.
519   These functions can cause a garbage collection to occur.
520   It works by temporarily redefining <minilisp_puts>. */
521
522MINILISPAPI miniexp_t miniexp_pname(miniexp_t p, int width);
523
524/* minilisp_print_7bits --
525   When this flag is set, all non ascii characters
526   in strings are escaped in octal. */
527
528extern MINILISPAPI int minilisp_print_7bits;
529
530/* minilisp_macrochar_parser --
531   A non zero entry in this array defines a special parsing
532   function that runs when the corresponding character is
533   encountered. */
534
535extern MINILISPAPI miniexp_t (*minilisp_macrochar_parser[128])(void);
536
537
538
539/* -------------------------------------------------- */
540/* STUFF FOR C++ ONLY                                 */
541/* -------------------------------------------------- */
542
543#ifdef __cplusplus
544# ifndef __cplusplus
545{
546# endif
547} // extern "C"
548
549typedef void minilisp_mark_t(miniexp_t *pp);
550
551/* -------- MINIVARS -------- */
552
553/* minivar_t --
554   A class for protected garbage collector variables. */
555
556class MINILISPAPI
557minivar_t
558{
559  miniexp_t data;
560  minivar_t *next;
561  minivar_t **pprev;
562public:
563  minivar_t();
564  minivar_t(miniexp_t p);
565  minivar_t(const minivar_t &v);
566  operator miniexp_t&() { return data; }
567  miniexp_t* operator&() { return &data; }
568  minivar_t& operator=(miniexp_t p) { data = p; return *this; }
569  minivar_t& operator=(const minivar_t &v) { data = v.data; return *this; }
570  ~minivar_t() { if ((*pprev = next)) next->pprev = pprev; }
571#ifdef MINIEXP_IMPLEMENTATION
572  static minivar_t *vars;
573  static void mark(minilisp_mark_t*);
574#endif
575};
576
577
578/* -------- MINIOBJ -------- */
579
580
581/* miniobj_t --
582   The base class for c++ objects
583   represented by object expressions. */
584
585class MINILISPAPI
586miniobj_t {
587 public:
588  virtual ~miniobj_t();
589
590  /* --- stuff defined by MINIOBJ_DECLARE --- */
591  /* classname: a symbol characterizing this class. */
592  static const miniexp_t classname;
593  /* classof: class name symbol for this object. */
594  virtual miniexp_t classof() const = 0;
595  /* isa -- tests if this is an instance of <classname>. */
596  virtual bool isa(miniexp_t classname) const;
597
598  /* --- optional stuff --- */
599  /* pname: returns a printable name for this object.
600     The caller must deallocate the result with delete[]. */
601  virtual char *pname() const;
602  /* mark: iterates over miniexps contained by this object
603     for garbage collecting purposes. */
604  virtual void mark(minilisp_mark_t*);
605  /* destroy: called by the garbage collector to
606     deallocate the object. Defaults to 'delete this'. */
607  virtual void destroy();
608     
609};
610
611/* MINIOBJ_DECLARE --
612   MINIOBJ_IMPLEMENT --
613   Useful code fragments for implementing
614   the mandatory part of miniobj subclasses. */
615
616#define MINIOBJ_DECLARE(cls, supercls, name) \
617  public: static const miniexp_t classname; \
618          virtual miniexp_t classof() const; \
619          virtual bool isa(miniexp_t) const;
620
621#define MINIOBJ_IMPLEMENT(cls, supercls, name)\
622  const miniexp_t cls::classname = miniexp_symbol(name);\
623  miniexp_t cls::classof() const {\
624    return cls::classname; }\
625  bool cls::isa(miniexp_t n) const {\
626    return (cls::classname==n) || (supercls::isa(n)); }
627
628
629/* miniexp_to_obj --
630   Returns a pointer to the object represented by an lisp
631   expression. Returns NULL if the expression is not an
632   object expression.
633*/
634
635static inline miniobj_t *miniexp_to_obj(miniexp_t p) {
636  if (miniexp_objectp(p))
637    return ((miniobj_t**)(((size_t)p)&~((size_t)3)))[0];
638  return 0;
639}
640
641/* miniexp_object --
642   Create an object expression for a given object. */
643
644MINILISPAPI miniexp_t miniexp_object(miniobj_t *obj);
645
646
647#endif /* __cplusplus */
648
649
650
651
652
653/* -------------------------------------------------- */
654/* THE END                                            */
655/* -------------------------------------------------- */
656
657#endif /* MINIEXP_H */
Note: See TracBrowser for help on using the repository browser.