source: trunk/libdjvu/GString.h @ 280

Last change on this file since 280 was 280, checked in by rbri, 11 years ago

DJVU plugin: djvulibre updated to version 3.5.22

File size: 57.0 KB
Line 
1//C-  -*- C++ -*-
2//C- -------------------------------------------------------------------
3//C- DjVuLibre-3.5
4//C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
5//C- Copyright (c) 2001  AT&T
6//C-
7//C- This software is subject to, and may be distributed under, the
8//C- GNU General Public License, either Version 2 of the license,
9//C- or (at your option) any later version. The license should have
10//C- accompanied the software or you may obtain a copy of the license
11//C- from the Free Software Foundation at http://www.fsf.org .
12//C-
13//C- This program is distributed in the hope that it will be useful,
14//C- but WITHOUT ANY WARRANTY; without even the implied warranty of
15//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16//C- GNU General Public License for more details.
17//C-
18//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
19//C- Lizardtech Software.  Lizardtech Software has authorized us to
20//C- replace the original DjVu(r) Reference Library notice by the following
21//C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
22//C-
23//C-  ------------------------------------------------------------------
24//C- | DjVu (r) Reference Library (v. 3.5)
25//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
26//C- | The DjVu Reference Library is protected by U.S. Pat. No.
27//C- | 6,058,214 and patents pending.
28//C- |
29//C- | This software is subject to, and may be distributed under, the
30//C- | GNU General Public License, either Version 2 of the license,
31//C- | or (at your option) any later version. The license should have
32//C- | accompanied the software or you may obtain a copy of the license
33//C- | from the Free Software Foundation at http://www.fsf.org .
34//C- |
35//C- | The computer code originally released by LizardTech under this
36//C- | license and unmodified by other parties is deemed "the LIZARDTECH
37//C- | ORIGINAL CODE."  Subject to any third party intellectual property
38//C- | claims, LizardTech grants recipient a worldwide, royalty-free,
39//C- | non-exclusive license to make, use, sell, or otherwise dispose of
40//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
41//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
42//C- | General Public License.   This grant only confers the right to
43//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
44//C- | the extent such infringement is reasonably necessary to enable
45//C- | recipient to make, have made, practice, sell, or otherwise dispose
46//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
47//C- | any greater extent that may be necessary to utilize further
48//C- | modifications or combinations.
49//C- |
50//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
51//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
52//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
53//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
54//C- +------------------------------------------------------------------
55//
56// $Id: GString.h,v 1.25 2009/05/17 23:57:42 leonb Exp $
57// $Name: release_3_5_22 $
58
59#ifndef _GSTRING_H_
60#define _GSTRING_H_
61#ifdef HAVE_CONFIG_H
62#include "config.h"
63#endif
64#if NEED_GNUG_PRAGMAS
65# pragma interface
66#endif
67
68/** @name GString.h
69
70    Files #"GString.h"# and #"GString.cpp"# implement a general
71    purpose string class \Ref{GBaseString}, with dirived types
72    \Ref{GUTF8String} and \Ref{GNativeString} for UTF8 MBS encoding
73    and the current Native MBS encoding respectively.  This
74    implementation relies on smart pointers (see
75    \Ref{GSmartPointer.h}).
76
77    {\bf Historical Comments} --- At some point during the DjVu
78    research era, it became clear that C++ compilers rarely provided
79    portable libraries. We then decided to avoid fancy classes (like
80    #iostream# or #string#) and to rely only on the good old C
81    library.  A good string class however is very useful.  We had
82    already randomly picked letter 'G' to prefix class names and we
83    logically derived the new class name.  Native English speakers
84    kept laughing in hiding.  This is ironic because we completely
85    forgot this letter 'G' when creating more challenging things
86    like the ZP Coder or the IW44 wavelets. 
87
88    {\bf Later Changes}
89    When converting to I18N, we (Lizardtech) decided that two string classes
90    where needing, replacing the original GString with \Ref{GUTF8String} and
91    \Ref{GNativeString}.
92
93    @memo
94    General purpose string class.
95    @author
96    L\'eon Bottou <leonb@research.att.com> -- initial implementation.\\
97
98// From: Leon Bottou, 1/31/2002
99// This file has very little to do with my initial implementation.
100// It has been practically rewritten by Lizardtech for i18n changes.
101// My original implementation was very small in comparison
102// <http://prdownloads.sourceforge.net/djvu/DjVu2_2b-src.tgz>.
103// In my opinion, the duplication of the string classes is a failed
104// attempt to use the type system to enforce coding policies.
105// This could be fixed.  But there are better things to do in djvulibre.
106   
107    @version
108    #$Id: GString.h,v 1.25 2009/05/17 23:57:42 leonb Exp $# */
109//@{
110
111
112#include "DjVuGlobal.h"
113#include "GContainer.h"
114
115#include <stdlib.h>
116#include <stdarg.h>
117#ifdef WIN32
118# include <windows.h>
119# define HAS_WCHAR 1
120# define HAS_MBSTATE 1
121#endif
122
123#if HAS_WCHAR
124# if !defined(AUTOCONF) || HAVE_WCHAR_H
125#  include <wchar.h>
126# endif
127#endif
128
129#if !defined(AUTOCONF) || HAVE_STDINT_H
130# include <stdint.h>
131#elif HAVE_INTTYPES_H
132# include <inttypes.h>
133#endif
134
135#ifdef HAVE_NAMESPACES
136namespace DJVU {
137# ifdef NOT_DEFINED // Just to fool emacs c++ mode
138}
139#endif
140#endif
141
142#if !HAS_MBSTATE
143# ifndef HAVE_MBSTATE_T
144typedef int mbstate_t;
145# endif
146#endif
147
148class GBaseString;
149class GUTF8String;
150class GNativeString;
151
152// Internal string representation.
153class DJVUAPI GStringRep : public GPEnabled
154{
155public:
156  enum EncodeType { XUCS4, XUCS4BE, XUCS4LE, XUCS4_2143, XUCS4_3412,
157    XUTF16, XUTF16BE, XUTF16LE, XUTF8, XEBCDIC, XOTHER } ;
158
159  enum EscapeMode { UNKNOWN_ESCAPED=0,  IS_ESCAPED=1, NOT_ESCAPED=2 };
160
161  class UTF8;
162  friend class UTF8;
163  class Unicode;
164  friend class Unicode;
165
166  class ChangeLocale;
167#if HAS_WCHAR
168  class Native;
169  friend class Native;
170#endif // HAS_WCHAR
171  friend class GBaseString;
172  friend class GUTF8String;
173  friend class GNativeString;
174  friend DJVUAPI unsigned int hash(const GBaseString &ref);
175
176public:
177  // default constructor
178  GStringRep(void);
179  // virtual destructor
180  virtual ~GStringRep();
181
182    // Other virtual methods.
183      // Create an empty string.
184  virtual GP<GStringRep> blank(const unsigned int sz) const = 0;
185      // Create a duplicate at the given size.
186  GP<GStringRep>  getbuf(int n) const;
187      // Change the value of one of the bytes.
188  GP<GStringRep> setat(int n, char ch) const;
189      // Append a string.
190  virtual GP<GStringRep> append(const GP<GStringRep> &s2) const = 0;
191      // Test if isUTF8.
192  virtual bool isUTF8(void) const { return false; }
193      // Test if Native.
194  virtual bool isNative(void) const { return false; }
195      // Convert to Native.
196  virtual GP<GStringRep> toNative(
197    const EscapeMode escape=UNKNOWN_ESCAPED ) const = 0;
198      // Convert to UTF8.
199  virtual GP<GStringRep> toUTF8(const bool nothrow=false) const = 0;
200      // Convert to same as current class.
201  virtual GP<GStringRep> toThis(
202    const GP<GStringRep> &rep,const GP<GStringRep> &locale=0) const = 0;
203      // Compare with #s2#.
204  virtual int cmp(const GP<GStringRep> &s2,const int len=(-1)) const = 0;
205
206  // Convert strings to numbers.
207  virtual int toInt(void) const = 0;
208  virtual long int toLong(
209    const int pos, int &endpos, const int base=10) const = 0;
210  virtual unsigned long toULong(
211    const int pos, int &endpos, const int base=10) const = 0;
212  virtual double toDouble(const int pos, int &endpos) const = 0;
213
214  // return the position of the next character
215  int nextChar( const int from=0 ) const;
216
217  // return next non space position
218  int nextNonSpace( const int from=0, const int len=(-1) ) const;
219
220  // return next white space position
221  int nextSpace( const int from=0, const int len=(-1) ) const;
222
223  // return the position after the last non-whitespace character.
224  int firstEndSpace( int from=0, const int len=(-1) ) const;
225
226    // Create an empty string.
227  template <class TYPE> static GP<GStringRep> create(
228    const unsigned int sz,TYPE *);
229    // Creates with a strdup string.
230  GP<GStringRep> strdup(const char *s) const;
231
232    // Creates by appending to the current string
233  GP<GStringRep> append(const char *s2) const;
234
235    // Creates with a concat operation.
236  GP<GStringRep> concat(const GP<GStringRep> &s1,const GP<GStringRep> &s2) const;
237  GP<GStringRep> concat(const char *s1,const GP<GStringRep> &s2) const;
238  GP<GStringRep> concat(const GP<GStringRep> &s1,const char *s2) const;
239  GP<GStringRep> concat(const char *s1,const char *s2) const;
240
241   /* Creates with a strdup and substr.  Negative values have strlen(s)+1
242      added to them.
243   */
244  GP<GStringRep> substr(
245    const char *s,const int start,const int length=(-1)) const;
246
247  GP<GStringRep> substr(
248    const uint16_t *s,const int start,const int length=(-1)) const;
249
250  GP<GStringRep> substr(
251    const uint32_t *s,const int start,const int length=(-1)) const;
252
253  /** Initializes a string with a formatted string (as in #vprintf#).  The
254      string is re-initialized with the characters generated according to the
255      specified format #fmt# and using the optional arguments.  See the ANSI-C
256      function #vprintf()# for more information. The current implementation
257      will cause a segmentation violation if the resulting string is longer
258      than 32768 characters. */
259  GP<GStringRep> vformat(va_list args) const;
260  // -- SEARCHING
261
262  static GP<GStringRep> UTF8ToNative( const char *s,
263    const EscapeMode escape=UNKNOWN_ESCAPED );
264  static GP<GStringRep> NativeToUTF8( const char *s );
265
266  // Creates an uppercase version of the current string.
267  GP<GStringRep> upcase(void) const;
268  // Creates a lowercase version of the current string.
269  GP<GStringRep> downcase(void) const;
270
271  /** Returns the next UCS4 character, and updates the pointer s. */
272  static uint32_t UTF8toUCS4(
273    unsigned char const *&s, void const * const endptr );
274
275  /** Returns the number of bytes in next UCS4 character,
276      and sets #w# to the next UCS4 chacter.  */
277  static int UTF8toUCS4(
278    uint32_t &w, unsigned char const s[], void const * const endptr )
279  { unsigned char const *r=s;w=UTF8toUCS4(r,endptr);return (int)((size_t)r-(size_t)s); }
280
281  /** Returns the next UCS4 word from the UTF16 string. */
282  static int UTF16toUCS4(
283     uint32_t &w, uint16_t const * const s,void const * const eptr);
284
285  static int UCS4toUTF16(
286    uint32_t w, uint16_t &w1, uint16_t &w2);
287
288  int cmp(const char *s2, const int len=(-1)) const;
289  static int cmp(
290    const GP<GStringRep> &s1, const GP<GStringRep> &s2, const int len=(-1)) ;
291  static int cmp(
292    const GP<GStringRep> &s1, const char *s2, const int len=(-1));
293  static int cmp(
294    const char *s1, const GP<GStringRep> &s2, const int len=(-1));
295  static int cmp(
296    const char *s1, const char *s2, const int len=(-1));
297
298  // Lookup the next character, and return the position of the next character.
299  int getUCS4(uint32_t &w, const int from) const;
300
301  virtual unsigned char *UCS4toString(
302    const uint32_t w, unsigned char *ptr, mbstate_t *ps=0) const = 0;
303
304  static unsigned char *UCS4toUTF8(
305    const uint32_t w,unsigned char *ptr);
306
307  static unsigned char *UCS4toNative(
308    const uint32_t w,unsigned char *ptr, mbstate_t *ps);
309
310  int search(char c, int from=0) const;
311
312  int search(char const *str, int from=0) const;
313
314  int rsearch(char c, int from=0) const;
315
316  int rsearch(char const *str, int from=0) const;
317
318  int contains(char const accept[], int from=0) const;
319
320  int rcontains(char const accept[], int from=0) const;
321
322protected:
323  // Return the next character and increment the source pointer.
324  virtual uint32_t getValidUCS4(const char *&source) const = 0;
325
326  GP<GStringRep> tocase(
327    bool (*xiswcase)(const unsigned long wc),
328    unsigned long (*xtowcase)(const unsigned long wc)) const;
329
330  // Tests if the specified character passes the xiswtest.  If so, the
331  // return pointer is incremented to the next character, otherwise the
332  // specified #ptr# is returned.
333  const char * isCharType( bool (*xiswtest)(const unsigned long wc), const char *ptr,
334    const bool reverse=false) const;
335
336  // Find the next character position that passes the isCharType test.
337  int nextCharType(
338    bool (*xiswtest)(const unsigned long wc),const int from,const int len,
339    const bool reverse=false) const;
340
341  static bool giswspace(const unsigned long w);
342  static bool giswupper(const unsigned long w);
343  static bool giswlower(const unsigned long w);
344  static unsigned long gtowupper(const unsigned long w);
345  static unsigned long gtowlower(const unsigned long w);
346
347  virtual void set_remainder( void const * const buf, const unsigned int size,
348    const EncodeType encodetype);
349  virtual void set_remainder( void const * const buf, const unsigned int size,
350    const GP<GStringRep> &encoding );
351  virtual void set_remainder ( const GP<Unicode> &remainder );
352
353  virtual GP<Unicode> get_remainder( void ) const;
354
355public:
356  /* Returns a copy of this string with characters used in XML with
357      '<'  to "&lt;", '>'  to "&gt;",  '&' to "&amp;" '\'' to
358      "&apos;", and  '\"' to  "&quot;".   Characters 0x01 through
359      0x1f are also escaped. */
360  GP<GStringRep> toEscaped( const bool tosevenbit ) const;
361
362  // Tests if a string is legally encoded in the current character set.
363  virtual bool is_valid(void) const = 0;
364#if HAS_WCHAR
365  virtual int ncopy(wchar_t * const buf, const int buflen) const = 0;
366#endif
367protected:
368
369// Actual string data.
370  int  size;
371  char *data;
372};
373
374class DJVUAPI GStringRep::UTF8 : public GStringRep
375{
376public:
377  // default constructor
378  UTF8(void);
379  // virtual destructor
380  virtual ~UTF8();
381
382    // Other virtual methods.
383  virtual GP<GStringRep> blank(const unsigned int sz = 0) const;
384  virtual GP<GStringRep> append(const GP<GStringRep> &s2) const;
385      // Test if Native.
386  virtual bool isUTF8(void) const;
387      // Convert to Native.
388  virtual GP<GStringRep> toNative(
389    const EscapeMode escape=UNKNOWN_ESCAPED) const;
390      // Convert to UTF8.
391  virtual GP<GStringRep> toUTF8(const bool nothrow=false) const;
392      // Convert to same as current class.
393  virtual GP<GStringRep> toThis(
394    const GP<GStringRep> &rep,const GP<GStringRep> &) const;
395      // Compare with #s2#.
396  virtual int cmp(const GP<GStringRep> &s2,const int len=(-1)) const;
397
398  static GP<GStringRep> create(const unsigned int sz = 0);
399
400  // Convert strings to numbers.
401  virtual int toInt(void) const;
402  virtual long int toLong(
403    const int pos, int &endpos, const int base=10) const;
404  virtual unsigned long toULong(
405    const int pos, int &endpos, const int base=10) const;
406  virtual double toDouble(
407    const int pos, int &endpos) const;
408
409    // Create a strdup string.
410  static GP<GStringRep> create(const char *s);
411
412   // Creates with a concat operation.
413  static GP<GStringRep> create(
414    const GP<GStringRep> &s1,const GP<GStringRep> &s2);
415  static GP<GStringRep> create( const GP<GStringRep> &s1,const char *s2);
416  static GP<GStringRep> create( const char *s1, const GP<GStringRep> &s2);
417  static GP<GStringRep> create( const char *s1,const char *s2);
418
419    // Create with a strdup and substr operation.
420  static GP<GStringRep> create(
421    const char *s,const int start,const int length=(-1));
422
423  static GP<GStringRep> create(
424    const uint16_t *s,const int start,const int length=(-1));
425
426  static GP<GStringRep> create(
427    const uint32_t *s,const int start,const int length=(-1));
428
429  static GP<GStringRep> create_format(const char fmt[],...);
430  static GP<GStringRep> create(const char fmt[],va_list& args);
431
432  virtual unsigned char *UCS4toString(
433    const uint32_t w,unsigned char *ptr, mbstate_t *ps=0) const;
434
435  // Tests if a string is legally encoded in the current character set.
436  virtual bool is_valid(void) const;
437#if HAS_WCHAR
438  virtual int ncopy(wchar_t * const buf, const int buflen) const;
439#endif
440  friend class GBaseString;
441
442protected:
443  // Return the next character and increment the source pointer.
444  virtual uint32_t getValidUCS4(const char *&source) const;
445};
446
447
448/** General purpose character string.
449    Each dirivied instance of class #GBaseString# represents a
450    character string.  Overloaded operators provide a value semantic
451    to #GBaseString# objects.  Conversion operators and constructors
452    transparently convert between #GBaseString# objects and
453    #const char*# pointers.  The #GBaseString# class has no public
454    constructors, since a dirived type should always be used
455    to specify the desired multibyte character encoding.
456
457    Functions taking strings as arguments should declare their
458    arguments as "#const char*#".  Such functions will work equally
459    well with dirived #GBaseString# objects since there is a fast
460    conversion operator from the dirivied #GBaseString# objects
461    to "#const char*#".  Functions returning strings should return
462    #GUTF8String# or #GNativeString# objects because the class will
463    automatically manage the necessary memory.
464
465    Characters in the string can be identified by their position.  The
466    first character of a string is numbered zero. Negative positions
467    represent characters relative to the end of the string (i.e.
468    position #-1# accesses the last character of the string,
469    position #-2# represents the second last character, etc.)  */
470
471class DJVUAPI GBaseString : protected GP<GStringRep>
472{
473public:
474  enum EscapeMode {
475    UNKNOWN_ESCAPED=GStringRep::UNKNOWN_ESCAPED,
476    IS_ESCAPED=GStringRep::IS_ESCAPED,
477    NOT_ESCAPED=GStringRep::NOT_ESCAPED };
478
479  friend class GUTF8String;
480  friend class GNativeString;
481protected:
482  // Sets the gstr pointer;
483  inline void init(void);
484
485  ~GBaseString();
486  inline GBaseString &init(const GP<GStringRep> &rep);
487
488  // -- CONSTRUCTORS
489  /** Null constructor. Constructs an empty string. */
490  GBaseString( void );
491
492public:
493  // -- ACCESS
494  /** Converts a string into a constant null terminated character
495      array.  This conversion operator is very efficient because
496      it simply returns a pointer to the internal string data. The
497      returned pointer remains valid as long as the string is
498      unmodified. */
499  operator const char* ( void ) const  ;
500  /// Returns the string length.
501  unsigned int length( void ) const;
502  /** Returns true if and only if the string contains zero characters.
503      This operator is useful for conditional expression in control
504      structures.
505      \begin{verbatim}
506         if (! str) { ... }
507         while (!! str) { ... }  -- Note the double operator!
508      \end{verbatim}
509      Class #GBaseString# does not to support syntax
510      "#if# #(str)# #{}#" because the required conversion operator
511      introduces dangerous ambiguities with certain compilers. */
512  bool operator! ( void ) const;
513
514  // -- INDEXING
515  /** Returns the character at position #n#. An exception
516      \Ref{GException} is thrown if number #n# is not in range #-len#
517      to #len-1#, where #len# is the length of the string.  The first
518      character of a string is numbered zero.  Negative positions
519      represent characters relative to the end of the string. */
520  char operator[] (int n) const;
521  /// Returns #TRUE# if the string contains an integer number.
522  bool is_int(void) const;
523  /// Returns #TRUE# if the string contains a float number.
524  bool is_float(void) const;
525
526  /** Converts strings between native & UTF8 **/
527  GNativeString getUTF82Native( EscapeMode escape=UNKNOWN_ESCAPED ) const;
528  GUTF8String getNative2UTF8( void ) const;
529
530  // -- ALTERING
531  /// Reinitializes a string with the null string.
532  void empty( void );
533  // -- SEARCHING
534  /** Searches character #c# in the string, starting at position
535      #from# and scanning forward until reaching the end of the
536      string.  This function returns the position of the matching
537      character.  It returns #-1# if character #c# cannot be found. */
538  int search(char c, int from=0) const;
539
540  /** Searches sub-string #str# in the string, starting at position
541      #from# and scanning forward until reaching the end of the
542      string.  This function returns the position of the first
543      matching character of the sub-string.  It returns #-1# if
544      string #str# cannot be found. */
545  int search(const char *str, int from=0) const;
546
547  /** Searches character #c# in the string, starting at position
548      #from# and scanning backwards until reaching the beginning of
549      the string.  This function returns the position of the matching
550      character.  It returns #-1# if character #c# cannot be found. */
551  int rsearch(char c, const int from=0) const;
552  /** Searches sub-string #str# in the string, starting at position
553      #from# and scanning backwards until reaching the beginning of
554      the string.  This function returns the position of the first
555      matching character of the sub-string. It returns #-1# if
556      string #str# cannot be found. */
557  int rsearch(const char *str, const int from=0) const;
558  /** Searches for any of the specified characters in the accept
559      string.  It returns #-1# if the none of the characters and
560      be found, otherwise the position of the first match. */
561  int contains(const char accept[], const int from=0) const;
562  /** Searches for any of the specified characters in the accept
563      string.  It returns #-1# if the none of the characters and be
564      found, otherwise the position of the last match. */
565  int rcontains(const char accept[], const int from=0) const;
566
567  /** Concatenates strings. Returns a string composed by concatenating
568      the characters of strings #s1# and #s2#. */
569  GUTF8String operator+(const GUTF8String &s2) const;
570  GNativeString operator+(const GNativeString &s2) const;
571
572  /** Returns an integer.  Implements i18n atoi.  */
573  int toInt(void) const;
574
575  /** Returns a long intenger.  Implments i18n strtol.  */
576  long toLong(const int pos, int &endpos, const int base=10) const;
577
578  /** Returns a unsigned long integer.  Implements i18n strtoul. */
579  unsigned long toULong(
580    const int pos, int &endpos, const int base=10) const;
581
582  /** Returns a double.  Implements the i18n strtod.  */
583  double toDouble(
584    const int pos, int &endpos ) const;
585
586  /** Returns a long intenger.  Implments i18n strtol.  */
587  static long toLong(
588    const GUTF8String& src, const int pos, int &endpos, const int base=10);
589
590  static unsigned long toULong(
591    const GUTF8String& src, const int pos, int &endpos, const int base=10);
592
593  static double toDouble(
594    const GUTF8String& src, const int pos, int &endpos);
595
596  /** Returns a long intenger.  Implments i18n strtol.  */
597  static long toLong(
598    const GNativeString& src, const int pos, int &endpos, const int base=10);
599
600  static unsigned long toULong(
601    const GNativeString& src, const int pos, int &endpos, const int base=10);
602
603  static double toDouble(
604    const GNativeString& src, const int pos, int &endpos);
605
606  // -- HASHING
607
608  // -- COMPARISONS
609    /** Returns an #int#.  Compares string with #s2# and returns
610        sorting order. */
611  int cmp(const GBaseString &s2, const int len=(-1)) const;
612    /** Returns an #int#.  Compares string with #s2# and returns
613        sorting order. */
614  int cmp(const char *s2, const int len=(-1)) const;
615    /** Returns an #int#.  Compares string with #s2# and returns
616        sorting order. */
617  int cmp(const char s2) const;
618    /** Returns an #int#.  Compares #s2# with #s2# and returns
619        sorting order. */
620  static int cmp(const char *s1, const char *s2, const int len=(-1));
621  /** Returns a boolean. The Standard C strncmp takes two string and
622      compares the first N characters.  static bool GBaseString::ncmp
623      will compare #s1# with #s2# with the #len# characters starting
624      from the beginning of the string. */
625  /** String comparison. Returns true if and only if character
626      strings #s1# and #s2# are equal (as with #strcmp#.)
627    */
628  bool operator==(const GBaseString &s2) const;
629  bool operator==(const char *s2) const;
630  friend bool operator==(const char    *s1, const GBaseString &s2);
631
632  /** String comparison. Returns true if and only if character
633      strings #s1# and #s2# are not equal (as with #strcmp#.)
634    */
635  bool operator!=(const GBaseString &s2) const;
636  bool operator!=(const char *s2) const;
637  friend bool operator!=(const char *s1, const GBaseString &s2);
638
639  /** String comparison. Returns true if and only if character
640      strings #s1# is lexicographically greater than or equal to
641      string #s2# (as with #strcmp#.) */
642  bool operator>=(const GBaseString &s2) const;
643  bool operator>=(const char *s2) const;
644  bool operator>=(const char s2) const;
645  friend bool operator>=(const char    *s1, const GBaseString &s2);
646  friend bool operator>=(const char s1, const GBaseString &s2);
647
648  /** String comparison. Returns true if and only if character
649      strings #s1# is lexicographically less than string #s2#
650      (as with #strcmp#.)
651   */
652  bool operator<(const GBaseString &s2) const;
653  bool operator<(const char *s2) const;
654  bool operator<(const char s2) const;
655  friend bool operator<(const char *s1, const GBaseString &s2);
656  friend bool operator<(const char s1, const GBaseString &s2);
657
658  /** String comparison. Returns true if and only if character
659      strings #s1# is lexicographically greater than string #s2#
660      (as with #strcmp#.)
661   */
662  bool operator> (const GBaseString &s2) const;
663  bool operator> (const char *s2) const;
664  bool operator> (const char s2) const;
665  friend bool operator> (const char    *s1, const GBaseString &s2);
666  friend bool operator> (const char s1, const GBaseString &s2);
667
668  /** String comparison. Returns true if and only if character
669      strings #s1# is lexicographically less than or equal to string
670      #s2# (as with #strcmp#.)
671   */
672  bool operator<=(const GBaseString &s2) const;
673  bool operator<=(const char *s2) const;
674  bool operator<=(const char s2) const;
675  friend bool operator<=(const char    *s1, const GBaseString &s2);
676  friend bool operator<=(const char    s1, const GBaseString &s2);
677
678   /** Returns an integer.  Implements a functional i18n atoi. Note
679       that if you pass a GBaseString that is not in Native format
680       the results may be disparaging. */
681
682  /** Returns a hash code for the string.  This hashing function
683      helps when creating associative maps with string keys (see
684      \Ref{GMap}).  This hash code may be reduced to an arbitrary
685      range by computing its remainder modulo the upper bound of
686      the range. */
687  friend DJVUAPI unsigned int hash(const GBaseString &ref);
688  // -- HELPERS
689  friend class GStringRep;
690
691  /// Returns next non space position.
692  int nextNonSpace( const int from=0, const int len=(-1) ) const;
693
694  /// Returns next character position.
695  int nextChar( const int from=0 ) const;
696
697  /// Returns next non space position.
698  int nextSpace( const int from=0, const int len=(-1) ) const;
699
700  /// return the position after the last non-whitespace character.
701  int firstEndSpace( const int from=0,const int len=(-1) ) const;
702
703  /// Tests if the string is legally encoded in the current codepage.
704  bool is_valid(void) const;
705
706  /// copy to a wchar_t buffer
707#if HAS_WCHAR
708  int ncopy(wchar_t * const buf, const int buflen) const;
709#endif
710protected:
711  const char *gstr;
712  static void throw_illegal_subscript() no_return;
713  static const char *nullstr;
714public:
715  GNativeString UTF8ToNative(
716    const bool currentlocale=false,
717    const EscapeMode escape=UNKNOWN_ESCAPED) const;
718  GUTF8String NativeToUTF8(void) const;
719protected:
720  inline int CheckSubscript(int n) const;
721};
722
723/** General purpose character string.
724    Each instance of class #GUTF8String# represents a character
725    string.  Overloaded operators provide a value semantic to
726    #GUTF8String# objects.  Conversion operators and constructors
727    transparently convert between #GUTF8String# objects and
728    #const char*# pointers.
729
730    Functions taking strings as arguments should declare their
731    arguments as "#const char*#".  Such functions will work equally
732    well with #GUTF8String# objects since there is a fast conversion
733    operator from #GUTF8String# to "#const char*#".  Functions
734    returning strings should return #GUTF8String# or #GNativeString#
735    objects because the class will automatically manage the necessary
736    memory.
737
738    Characters in the string can be identified by their position.  The
739    first character of a string is numbered zero. Negative positions
740    represent characters relative to the end of the string (i.e.
741    position #-1# accesses the last character of the string,
742    position #-2# represents the second last character, etc.)  */
743
744class DJVUAPI GUTF8String : public GBaseString
745{
746public:
747  ~GUTF8String();
748  inline void init(void);
749
750  inline GUTF8String &init(const GP<GStringRep> &rep);
751
752  // -- CONSTRUCTORS
753  /** Null constructor. Constructs an empty string. */
754  GUTF8String(void);
755  /// Constructs a string from a character.
756  GUTF8String(const char dat);
757  /// Constructs a string from a null terminated character array.
758  GUTF8String(const char *str);
759  /// Constructs a string from a null terminated character array.
760  GUTF8String(const unsigned char *str);
761  GUTF8String(const uint16_t *dat);
762  GUTF8String(const uint32_t *dat);
763  /** Constructs a string from a character array.  Elements of the
764      character array #dat# are added into the string until the
765      string length reaches #len# or until encountering a null
766      character (whichever comes first). */
767  GUTF8String(const char *dat, unsigned int len);
768  GUTF8String(const uint16_t *dat, unsigned int len);
769  GUTF8String(const uint32_t *dat, unsigned int len);
770
771  /// Construct from base class.
772  GUTF8String(const GP<GStringRep> &str);
773  GUTF8String(const GBaseString &str);
774  GUTF8String(const GUTF8String &str);
775  GUTF8String(const GNativeString &str);
776  /** Constructs a string from a character array.  Elements of the
777      character array #dat# are added into the string until the
778      string length reaches #len# or until encountering a null
779      character (whichever comes first). */
780  GUTF8String(const GBaseString &gs, int from, int len);
781
782  /** Copy a null terminated character array. Resets this string
783      with the character string contained in the null terminated
784      character array #str#. */
785  GUTF8String& operator= (const char str);
786  GUTF8String& operator= (const char *str);
787  inline GUTF8String& operator= (const GP<GStringRep> &str);
788  inline GUTF8String& operator= (const GBaseString &str);
789  inline GUTF8String& operator= (const GUTF8String &str);
790  inline GUTF8String& operator= (const GNativeString &str);
791
792  /** Constructs a string with a formatted string (as in #vprintf#).
793      The string is re-initialized with the characters generated
794      according to the specified format #fmt# and using the optional
795      arguments.  See the ANSI-C function #vprintf()# for more
796      information. The current implementation will cause a
797      segmentation violation if the resulting string is longer
798      than 32768 characters. */
799  GUTF8String(const GUTF8String &fmt, va_list &args);
800
801  /// Constructs a string from a character.
802  /** Constructs a string with a human-readable representation of
803      integer #number#.  The format is similar to format #"%d"# in
804      function #printf#. */
805  GUTF8String(const int number);
806
807  /** Constructs a string with a human-readable representation of
808      floating point number #number#. The format is similar to
809      format #"%f"# in function #printf#.  */
810  GUTF8String(const double number);
811
812
813  /** Initializes a string with a formatted string (as in #printf#).
814      The string is re-initialized with the characters generated
815      according to the specified format #fmt# and using the optional
816      arguments.  See the ANSI-C function #printf()# for more
817      information. The current implementation will cause a
818      segmentation violation if the resulting string is longer
819      than 32768 characters. */
820  GUTF8String &format(const char *fmt, ... );
821  /** Initializes a string with a formatted string (as in #vprintf#).
822      The string is re-initialized with the characters generated
823      according to the specified format #fmt# and using the optional
824      arguments.  See the ANSI-C function #vprintf()# for more
825      information. The current implementation will cause a
826      segmentation violation if the resulting string is longer
827      than 32768 characters. */
828  GUTF8String &vformat(const GUTF8String &fmt, va_list &args);
829
830  /** Returns a copy of this string with characters used in XML with
831      '<'  to "&lt;", '>'  to "&gt;",  '&' to "&amp;" '\'' to
832      "&apos;", and  '\"' to  "&quot;".   Characters 0x01 through
833      0x1f are also escaped. */
834  GUTF8String toEscaped( const bool tosevenbit=false ) const;
835
836  /** Converts strings containing HTML/XML escaped characters into
837      their unescaped forms. Numeric representations of characters
838      (e.g., "&#38;" or "&#x26;" for "*") are the only forms
839      converted by this function. */
840  GUTF8String fromEscaped( void ) const;
841
842  /** Converts strings containing HTML/XML escaped characters
843      (e.g., "&lt;" for "<") into their unescaped forms. The
844      conversion is partially defined by the ConvMap argument which
845      specifies the conversion strings to be recognized. Numeric
846      representations of characters (e.g., "&#38;" or "&#x26;"
847      for "*") are always converted. */
848  GUTF8String fromEscaped(
849    const GMap<GUTF8String,GUTF8String> ConvMap ) const;
850
851
852  // -- CONCATENATION
853  /// Appends character #ch# to the string.
854  GUTF8String& operator+= (char ch);
855
856  /// Appends the null terminated character array #str# to the string.
857  GUTF8String& operator+= (const char *str);
858  /// Appends the specified GBaseString to the string.
859  GUTF8String& operator+= (const GBaseString &str);
860
861  /** Returns a sub-string.  The sub-string is composed by copying
862      #len# characters starting at position #from# in this string.
863      The length of the resulting string may be smaller than #len#
864      if the specified range is too large. */
865  GUTF8String substr(int from, int len/*=(-1)*/) const;
866
867  /** Returns an upper case copy of this string.  The returned string
868      contains a copy of the current string with all letters turned
869      into upper case letters. */
870  GUTF8String upcase( void ) const;
871  /** Returns an lower case copy of this string.  The returned string
872      contains a copy of the current string with all letters turned
873      into lower case letters. */
874  GUTF8String downcase( void ) const;
875
876  /** Concatenates strings. Returns a string composed by concatenating
877      the characters of strings #s1# and #s2#.
878  */
879  GUTF8String operator+(const GBaseString &s2) const;
880  GUTF8String operator+(const GUTF8String &s2) const;
881  GUTF8String operator+(const GNativeString &s2) const;
882  GUTF8String operator+(const char *s2) const;
883  friend DJVUAPI GUTF8String operator+(const char *s1, const GUTF8String &s2);
884
885  /** Provides a direct access to the string buffer.  Returns a
886      pointer for directly accessing the string buffer.  This pointer
887      valid remains valid as long as the string is not modified by
888      other means.  Positive values for argument #n# represent the
889      length of the returned buffer.  The returned string buffer will
890      be large enough to hold at least #n# characters plus a null
891      character.  If #n# is positive but smaller than the string
892      length, the string will be truncated to #n# characters. */
893  char *getbuf(int n = -1);
894  /** Set the character at position #n# to value #ch#.  An exception
895      \Ref{GException} is thrown if number #n# is not in range #-len#
896      to #len#, where #len# is the length of the string.  If character
897      #ch# is zero, the string is truncated at position #n#.  The
898      first character of a string is numbered zero. Negative
899      positions represent characters relative to the end of the
900      string. If position #n# is equal to the length of the string,
901      this function appends character #ch# to the end of the string. */
902  void setat(const int n, const char ch);
903public:
904  typedef enum GStringRep::EncodeType EncodeType;
905  static GUTF8String create(void const * const buf,
906    const unsigned int size,
907    const EncodeType encodetype, const GUTF8String &encoding);
908  static GUTF8String create( void const * const buf,
909    unsigned int size, const EncodeType encodetype );
910  static GUTF8String create( void const * const buf,
911    const unsigned int size, const GUTF8String &encoding );
912  static GUTF8String create( void const * const buf,
913    const unsigned int size, const GP<GStringRep::Unicode> &remainder);
914  GP<GStringRep::Unicode> get_remainder(void) const;
915  static GUTF8String create( const char *buf, const unsigned int bufsize );
916  static GUTF8String create( const uint16_t *buf, const unsigned int bufsize );
917  static GUTF8String create( const uint32_t *buf, const unsigned int bufsize );
918};
919
920
921#if !HAS_WCHAR
922#define GBaseString GUTF8String
923#endif
924
925/** General purpose character string.
926    Each instance of class #GNativeString# represents a character
927    string.  Overloaded operators provide a value semantic to
928    #GNativeString# objects.  Conversion operators and constructors
929    transparently convert between #GNativeString# objects and
930    #const char*# pointers.
931
932    Functions taking strings as arguments should declare their
933    arguments as "#const char*#".  Such functions will work equally
934    well with #GNativeString# objects since there is a fast conversion
935    operator from #GNativeString# to "#const char*#".  Functions
936    returning strings should return #GUTF8String# or #GNativeString#
937    objects because the class will automatically manage the necessary
938    memory.
939
940    Characters in the string can be identified by their position.  The
941    first character of a string is numbered zero. Negative positions
942    represent characters relative to the end of the string (i.e.
943    position #-1# accesses the last character of the string,
944    position #-2# represents the second last character, etc.)  */
945
946class DJVUAPI GNativeString : public GBaseString
947{
948public:
949  ~GNativeString();
950  // -- CONSTRUCTORS
951  /** Null constructor. Constructs an empty string. */
952  GNativeString(void);
953  /// Constructs a string from a character.
954  GNativeString(const char dat);
955  /// Constructs a string from a null terminated character array.
956  GNativeString(const char *str);
957  /// Constructs a string from a null terminated character array.
958  GNativeString(const unsigned char *str);
959  GNativeString(const uint16_t *str);
960  GNativeString(const uint32_t *str);
961  /** Constructs a string from a character array.  Elements of the
962      character array #dat# are added into the string until the
963      string length reaches #len# or until encountering a null
964      character (whichever comes first). */
965  GNativeString(const char *dat, unsigned int len);
966  GNativeString(const uint16_t *dat, unsigned int len);
967  GNativeString(const uint32_t *dat, unsigned int len);
968  /// Construct from base class.
969  GNativeString(const GP<GStringRep> &str);
970  GNativeString(const GBaseString &str);
971#if HAS_WCHAR
972  GNativeString(const GUTF8String &str);
973#endif
974  GNativeString(const GNativeString &str);
975  /** Constructs a string from a character array.  Elements of the
976      character array #dat# are added into the string until the
977      string length reaches #len# or until encountering a null
978      character (whichever comes first). */
979  GNativeString(const GBaseString &gs, int from, int len);
980
981  /** Constructs a string with a formatted string (as in #vprintf#).
982      The string is re-initialized with the characters generated
983      according to the specified format #fmt# and using the optional
984      arguments.  See the ANSI-C function #vprintf()# for more
985      information. The current implementation will cause a
986      segmentation violation if the resulting string is longer than
987      32768 characters. */
988  GNativeString(const GNativeString &fmt, va_list &args);
989
990  /** Constructs a string with a human-readable representation of
991      integer #number#.  The format is similar to format #"%d"# in
992      function #printf#. */
993  GNativeString(const int number);
994
995  /** Constructs a string with a human-readable representation of
996      floating point number #number#. The format is similar to
997      format #"%f"# in function #printf#.  */
998  GNativeString(const double number);
999
1000#if !HAS_WCHAR
1001#undef GBaseString
1002#else
1003  /// Initialize this string class
1004  void init(void);
1005
1006  /// Initialize this string class
1007  GNativeString &init(const GP<GStringRep> &rep);
1008
1009  /** Copy a null terminated character array. Resets this string with
1010      the character string contained in the null terminated character
1011      array #str#. */
1012  GNativeString& operator= (const char str);
1013  GNativeString& operator= (const char *str);
1014  inline GNativeString& operator= (const GP<GStringRep> &str);
1015  inline GNativeString& operator= (const GBaseString &str);
1016  inline GNativeString& operator= (const GUTF8String &str);
1017  inline GNativeString& operator= (const GNativeString &str);
1018  // -- CONCATENATION
1019  /// Appends character #ch# to the string.
1020  GNativeString& operator+= (char ch);
1021  /// Appends the null terminated character array #str# to the string.
1022  GNativeString& operator+= (const char *str);
1023  /// Appends the specified GBaseString to the string.
1024  GNativeString& operator+= (const GBaseString &str);
1025
1026  /** Returns a sub-string.  The sub-string is composed by copying
1027      #len# characters starting at position #from# in this string.
1028      The length of the resulting string may be smaller than #len#
1029      if the specified range is too large. */
1030  GNativeString substr(int from, int len/*=(-1)*/) const;
1031
1032  /** Returns an upper case copy of this string.  The returned
1033      string contains a copy of the current string with all letters
1034      turned into upper case letters. */
1035  GNativeString upcase( void ) const;
1036  /** Returns an lower case copy of this string.  The returned
1037      string contains a copy of the current string with all letters
1038      turned into lower case letters. */
1039  GNativeString downcase( void ) const;
1040
1041
1042  GNativeString operator+(const GBaseString &s2) const;
1043  GNativeString operator+(const GNativeString &s2) const;
1044  GUTF8String operator+(const GUTF8String &s2) const;
1045  GNativeString operator+(const char *s2) const;
1046  friend DJVUAPI GNativeString operator+(const char *s1, const GNativeString &s2);
1047
1048  /** Initializes a string with a formatted string (as in #printf#).
1049      The string is re-initialized with the characters generated
1050      according to the specified format #fmt# and using the optional
1051      arguments.  See the ANSI-C function #printf()# for more
1052      information. The current implementation will cause a
1053      segmentation violation if the resulting string is longer than
1054      32768 characters. */
1055  GNativeString &format(const char *fmt, ... );
1056  /** Initializes a string with a formatted string (as in #vprintf#).
1057      The string is re-initialized with the characters generated
1058      according to the specified format #fmt# and using the optional
1059      arguments.  See the ANSI-C function #vprintf()# for more
1060      information. The current implementation will cause a
1061      segmentation violation if the resulting string is longer than
1062      32768 characters. */
1063  GNativeString &vformat(const GNativeString &fmt, va_list &args);
1064
1065  /** Returns a copy of this string with characters used in XML with
1066      '<'  to "&lt;", '>'  to "&gt;",  '&' to "&amp;" '\'' to
1067      "&apos;", and  '\"' to  "&quot;".   Characters 0x01 through
1068      0x1f are also escaped. */
1069  GNativeString toEscaped( const bool tosevenbit=false ) const;
1070
1071
1072  /** Provides a direct access to the string buffer.  Returns a
1073      pointer for directly accessing the string buffer.  This
1074      pointer valid remains valid as long as the string is not
1075      modified by other means.  Positive values for argument #n#
1076      represent the length of the returned buffer.  The returned
1077      string buffer will be large enough to hold at least #n#
1078      characters plus a null character.  If #n# is positive but
1079      smaller than the string length, the string will be truncated
1080      to #n# characters. */
1081  char *getbuf(int n = -1);
1082  /** Set the character at position #n# to value #ch#.  An exception
1083      \Ref{GException} is thrown if number #n# is not in range #-len#
1084      to #len#, where #len# is the length of the string.  If
1085      character #ch# is zero, the string is truncated at position
1086      #n#.  The first character of a string is numbered zero.
1087      Negative positions represent characters relative to the end of
1088      the string. If position #n# is equal to the length of the
1089      string, this function appends character #ch# to the end of the
1090      string. */
1091  void setat(const int n, const char ch);
1092
1093  static GNativeString create( const char *buf, const unsigned int bufsize );
1094  static GNativeString create( const uint16_t *buf, const unsigned int bufsize );
1095  static GNativeString create( const uint32_t *buf, const unsigned int bufsize );
1096#endif // WinCE
1097};
1098
1099//@}
1100
1101inline
1102GBaseString::operator const char* ( void ) const
1103{
1104  return ptr?(*this)->data:nullstr;
1105}
1106
1107inline unsigned int
1108GBaseString::length( void ) const
1109{
1110  return ptr ? (*this)->size : 0;
1111}
1112
1113inline bool
1114GBaseString::operator! ( void ) const
1115{
1116  return !ptr;
1117}
1118
1119inline GUTF8String
1120GUTF8String::upcase( void ) const
1121{
1122  if (ptr) return (*this)->upcase();
1123  return *this;
1124}
1125
1126inline GUTF8String
1127GUTF8String::downcase( void ) const
1128{
1129  if (ptr) return (*this)->downcase();
1130  return *this;
1131}
1132
1133inline void
1134GUTF8String::init(void)
1135{ GBaseString::init(); }
1136
1137inline GUTF8String &
1138GUTF8String::init(const GP<GStringRep> &rep)
1139{ GP<GStringRep>::operator=(rep?rep->toUTF8(true):rep); init(); return *this; }
1140
1141inline GUTF8String &
1142GUTF8String::vformat(const GUTF8String &fmt, va_list &args)
1143{ return (*this = (fmt.ptr?GUTF8String(fmt,args):fmt)); }
1144
1145inline GUTF8String
1146GUTF8String::toEscaped( const bool tosevenbit ) const
1147{ return ptr?GUTF8String((*this)->toEscaped(tosevenbit)):(*this); }
1148
1149inline GP<GStringRep::Unicode> 
1150GUTF8String::get_remainder(void) const
1151{
1152  GP<GStringRep::Unicode> retval;
1153  if(ptr)
1154    retval=((*this)->get_remainder());
1155  return retval;
1156}
1157
1158inline
1159GUTF8String::GUTF8String(const GNativeString &str)
1160{ init(str.length()?(str->toUTF8(true)):(GP<GStringRep>)str); }
1161
1162inline
1163GUTF8String::GUTF8String(const GP<GStringRep> &str)
1164{ init(str?(str->toUTF8(true)):str); }
1165
1166inline
1167GUTF8String::GUTF8String(const GBaseString &str)
1168{ init(str.length()?(str->toUTF8(true)):(GP<GStringRep>)str); }
1169
1170inline void
1171GBaseString::init(void)
1172{
1173  gstr=ptr?((*this)->data):nullstr;
1174}
1175/** Returns an integer.  Implements i18n atoi.  */
1176inline int
1177GBaseString::toInt(void) const
1178{ return ptr?(*this)->toInt():0; }
1179
1180/** Returns a long intenger.  Implments i18n strtol.  */
1181inline long
1182GBaseString::toLong(const int pos, int &endpos, const int base) const
1183{
1184  long int retval=0;
1185  if(ptr)
1186  {
1187    retval=(*this)->toLong(pos, endpos, base);
1188  }else
1189  {
1190    endpos=(-1);
1191  }
1192  return retval;
1193}
1194
1195inline long
1196GBaseString::toLong(
1197  const GUTF8String& src, const int pos, int &endpos, const int base)
1198{
1199  return src.toLong(pos,endpos,base);
1200}
1201
1202inline long
1203GBaseString::toLong(
1204  const GNativeString& src, const int pos, int &endpos, const int base)
1205{
1206  return src.toLong(pos,endpos,base);
1207}
1208
1209/** Returns a unsigned long integer.  Implements i18n strtoul. */
1210inline unsigned long
1211GBaseString::toULong(const int pos, int &endpos, const int base) const
1212{
1213  unsigned long retval=0;
1214  if(ptr)
1215  {
1216    retval=(*this)->toULong(pos, endpos, base);
1217  }else
1218  {
1219    endpos=(-1);
1220  }
1221  return retval;
1222}
1223
1224inline unsigned long
1225GBaseString::toULong(
1226  const GUTF8String& src, const int pos, int &endpos, const int base)
1227{
1228  return src.toULong(pos,endpos,base);
1229}
1230
1231inline unsigned long
1232GBaseString::toULong(
1233  const GNativeString& src, const int pos, int &endpos, const int base)
1234{
1235  return src.toULong(pos,endpos,base);
1236}
1237
1238/** Returns a double.  Implements the i18n strtod.  */
1239inline double
1240GBaseString::toDouble(
1241  const int pos, int &endpos ) const
1242{
1243  double retval=(double)0;
1244  if(ptr)
1245  {
1246    retval=(*this)->toDouble(pos, endpos);
1247  }else
1248  {
1249    endpos=(-1);
1250  }
1251  return retval;
1252}
1253
1254inline double
1255GBaseString::toDouble(
1256  const GUTF8String& src, const int pos, int &endpos)
1257{
1258  return src.toDouble(pos,endpos);
1259}
1260
1261inline double
1262GBaseString::toDouble(
1263  const GNativeString& src, const int pos, int &endpos)
1264{
1265  return src.toDouble(pos,endpos);
1266}
1267
1268inline GBaseString &
1269GBaseString::init(const GP<GStringRep> &rep)
1270{ GP<GStringRep>::operator=(rep); init(); return *this;}
1271
1272inline char
1273GBaseString::operator[] (int n) const
1274{ return ((n||ptr)?((*this)->data[CheckSubscript(n)]):0); }
1275
1276inline int
1277GBaseString::search(char c, int from) const
1278{ return ptr?((*this)->search(c,from)):(-1); }
1279
1280inline int
1281GBaseString::search(const char *str, int from) const
1282{ return ptr?((*this)->search(str,from)):(-1); }
1283
1284inline int
1285GBaseString::rsearch(char c, const int from) const
1286{ return ptr?((*this)->rsearch(c,from)):(-1); }
1287
1288inline int
1289GBaseString::rsearch(const char *str, const int from) const
1290{ return ptr?((*this)->rsearch(str,from)):(-1); }
1291
1292inline int
1293GBaseString::contains(const char accept[], const int from) const
1294{ return ptr?((*this)->contains(accept,from)):(-1); }
1295
1296inline int
1297GBaseString::rcontains(const char accept[], const int from) const
1298{ return ptr?((*this)->rcontains(accept,from)):(-1); }
1299
1300inline int
1301GBaseString::cmp(const GBaseString &s2, const int len) const
1302{ return GStringRep::cmp(*this,s2,len); }
1303
1304inline int
1305GBaseString::cmp(const char *s2, const int len) const
1306{ return GStringRep::cmp(*this,s2,len); }
1307
1308inline int
1309GBaseString::cmp(const char s2) const
1310{ return GStringRep::cmp(*this,&s2,1); }
1311
1312inline int
1313GBaseString::cmp(const char *s1, const char *s2, const int len)
1314{ return GStringRep::cmp(s1,s2,len); }
1315
1316inline bool
1317GBaseString::operator==(const GBaseString &s2) const
1318{ return !cmp(s2); }
1319
1320inline bool
1321GBaseString::operator==(const char *s2) const
1322{ return !cmp(s2); }
1323
1324inline bool
1325GBaseString::operator!=(const GBaseString &s2) const
1326{ return !!cmp(s2); }
1327
1328inline bool
1329GBaseString::operator!=(const char *s2) const
1330{ return !!cmp(s2); }
1331
1332inline bool
1333GBaseString::operator>=(const GBaseString &s2) const
1334{ return (cmp(s2)>=0); }
1335
1336inline bool
1337GBaseString::operator>=(const char *s2) const
1338{ return (cmp(s2)>=0); }
1339
1340inline bool
1341GBaseString::operator>=(const char s2) const
1342{ return (cmp(s2)>=0); }
1343
1344inline bool
1345GBaseString::operator<(const GBaseString &s2) const
1346{ return (cmp(s2)<0); }
1347
1348inline bool
1349GBaseString::operator<(const char *s2) const
1350{ return (cmp(s2)<0); }
1351
1352inline bool
1353GBaseString::operator<(const char s2) const
1354{ return (cmp(s2)<0); }
1355
1356inline bool
1357GBaseString::operator> (const GBaseString &s2) const
1358{ return (cmp(s2)>0); }
1359
1360inline bool
1361GBaseString::operator> (const char *s2) const
1362{ return (cmp(s2)>0); }
1363
1364inline bool
1365GBaseString::operator> (const char s2) const
1366{ return (cmp(s2)>0); }
1367
1368inline bool
1369GBaseString::operator<=(const GBaseString &s2) const
1370{ return (cmp(s2)<=0); }
1371
1372inline bool
1373GBaseString::operator<=(const char *s2) const
1374{ return (cmp(s2)<=0); }
1375
1376inline bool
1377GBaseString::operator<=(const char s2) const
1378{ return (cmp(s2)<=0); }
1379
1380inline int
1381GBaseString::nextNonSpace( const int from, const int len ) const
1382{ return ptr?(*this)->nextNonSpace(from,len):0; }
1383
1384inline int
1385GBaseString::nextChar( const int from ) const
1386{ return ptr?(*this)->nextChar(from):0; }
1387
1388inline int
1389GBaseString::nextSpace( const int from, const int len ) const
1390{ return ptr?(*this)->nextSpace(from,len):0; }
1391
1392inline int
1393GBaseString::firstEndSpace( const int from,const int len ) const
1394{ return ptr?(*this)->firstEndSpace(from,len):0; }
1395
1396inline bool
1397GBaseString::is_valid(void) const
1398{ return ptr?((*this)->is_valid()):true; }
1399
1400#if HAS_WCHAR
1401inline int
1402GBaseString::ncopy(wchar_t * const buf, const int buflen) const
1403{if(buf&&buflen)buf[0]=0;return ptr?((*this)->ncopy(buf,buflen)):0;}
1404#endif
1405
1406inline int
1407GBaseString::CheckSubscript(int n) const
1408{
1409  if(n)
1410  {
1411    if (n<0 && ptr)
1412      n += (*this)->size;
1413    if (n<0 || !ptr || n > (int)(*this)->size)
1414      throw_illegal_subscript();
1415  }
1416  return n;
1417}
1418
1419inline GBaseString::GBaseString(void) { init(); }
1420
1421inline GUTF8String::GUTF8String(void) { }
1422
1423inline GUTF8String::GUTF8String(const GUTF8String &str)
1424{ init(str); }
1425
1426inline GUTF8String& GUTF8String::operator= (const GP<GStringRep> &str)
1427{ return init(str); }
1428
1429inline GUTF8String& GUTF8String::operator= (const GBaseString &str)
1430{ return init(str); }
1431
1432inline GUTF8String& GUTF8String::operator= (const GUTF8String &str)
1433{ return init(str); }
1434
1435inline GUTF8String& GUTF8String::operator= (const GNativeString &str)
1436{ return init(str); }
1437
1438inline GUTF8String
1439GUTF8String::create( const char *buf, const unsigned int bufsize )
1440{
1441#if HAS_WCHAR
1442  return GNativeString(buf,bufsize);
1443#else
1444  return GUTF8String(buf,bufsize);
1445#endif
1446}
1447
1448inline GUTF8String
1449GUTF8String::create( const uint16_t *buf, const unsigned int bufsize )
1450{
1451  return GUTF8String(buf,bufsize);
1452}
1453
1454inline GUTF8String
1455GUTF8String::create( const uint32_t *buf, const unsigned int bufsize )
1456{
1457  return GUTF8String(buf,bufsize);
1458}
1459
1460inline GNativeString::GNativeString(void) {}
1461
1462#if !HAS_WCHAR
1463// For Windows CE, GNativeString is essentially GUTF8String
1464
1465inline
1466GNativeString::GNativeString(const GUTF8String &str)
1467: GUTF8String(str) {}
1468
1469inline
1470GNativeString::GNativeString(const GP<GStringRep> &str)
1471: GUTF8String(str) {}
1472
1473inline
1474GNativeString::GNativeString(const char dat)
1475: GUTF8String(dat) {}
1476
1477inline
1478GNativeString::GNativeString(const char *str)
1479: GUTF8String(str) {}
1480
1481inline
1482GNativeString::GNativeString(const unsigned char *str)
1483: GUTF8String(str) {}
1484
1485inline
1486GNativeString::GNativeString(const uint16_t *str)
1487: GUTF8String(str) {}
1488
1489inline
1490GNativeString::GNativeString(const uint32_t *str)
1491: GUTF8String(str) {}
1492
1493inline
1494GNativeString::GNativeString(const char *dat, unsigned int len)
1495: GUTF8String(dat,len) {}
1496
1497inline
1498GNativeString::GNativeString(const uint16_t *dat, unsigned int len)
1499: GUTF8String(dat,len) {}
1500
1501inline
1502GNativeString::GNativeString(const uint32_t *dat, unsigned int len)
1503: GUTF8String(dat,len) {}
1504
1505inline
1506GNativeString::GNativeString(const GNativeString &str)
1507: GUTF8String(str) {}
1508
1509inline
1510GNativeString::GNativeString(const int number)
1511: GUTF8String(number) {}
1512
1513inline
1514GNativeString::GNativeString(const double number)
1515: GUTF8String(number) {}
1516
1517inline
1518GNativeString::GNativeString(const GNativeString &fmt, va_list &args)
1519: GUTF8String(fmt,args) {}
1520
1521#else // HAS_WCHAR
1522
1523/// Initialize this string class
1524inline void
1525GNativeString::init(void)
1526{ GBaseString::init(); }
1527
1528/// Initialize this string class
1529inline GNativeString &
1530GNativeString::init(const GP<GStringRep> &rep)
1531{
1532  GP<GStringRep>::operator=(rep?rep->toNative(GStringRep::NOT_ESCAPED):rep);
1533  init();
1534  return *this;
1535}
1536
1537inline GNativeString
1538GNativeString::substr(int from, int len) const
1539{ return GNativeString(*this, from, len); }
1540
1541inline GNativeString &
1542GNativeString::vformat(const GNativeString &fmt, va_list &args)
1543{ return (*this = (fmt.ptr?GNativeString(fmt,args):fmt)); }
1544
1545inline GNativeString
1546GNativeString::toEscaped( const bool tosevenbit ) const
1547{ return ptr?GNativeString((*this)->toEscaped(tosevenbit)):(*this); }
1548
1549inline
1550GNativeString::GNativeString(const GUTF8String &str)
1551{
1552  if (str.length())
1553    init(str->toNative(GStringRep::NOT_ESCAPED));
1554  else
1555    init((GP<GStringRep>)str);
1556}
1557
1558inline
1559GNativeString::GNativeString(const GP<GStringRep> &str)
1560{
1561  if (str)
1562    init(str->toNative(GStringRep::NOT_ESCAPED));
1563  else
1564    init(str);
1565}
1566
1567inline
1568GNativeString::GNativeString(const GBaseString &str)
1569{
1570  if (str.length())
1571    init(str->toNative(GStringRep::NOT_ESCAPED));
1572  else
1573    init((GP<GStringRep>)str);
1574}
1575
1576
1577inline
1578GNativeString::GNativeString(const GNativeString &fmt, va_list &args)
1579{
1580  if (fmt.ptr)
1581    init(fmt->vformat(args));
1582  else
1583    init(fmt);
1584}
1585
1586inline GNativeString
1587GNativeString::create( const char *buf, const unsigned int bufsize )
1588{
1589  return GNativeString(buf,bufsize);
1590}
1591
1592inline GNativeString
1593GNativeString::create( const uint16_t *buf, const unsigned int bufsize )
1594{
1595  return GNativeString(buf,bufsize);
1596}
1597
1598inline GNativeString
1599GNativeString::create( const uint32_t *buf, const unsigned int bufsize )
1600{
1601  return GNativeString(buf,bufsize);
1602}
1603
1604inline GNativeString&
1605GNativeString::operator= (const GP<GStringRep> &str)
1606{ return init(str); }
1607
1608inline GNativeString&
1609GNativeString::operator= (const GBaseString &str)
1610{ return init(str); }
1611
1612inline GNativeString&
1613GNativeString::operator= (const GUTF8String &str)
1614{ return init(str); }
1615
1616inline GNativeString&
1617GNativeString::operator= (const GNativeString &str)
1618{ return init(str); }
1619
1620inline GNativeString
1621GNativeString::upcase( void ) const
1622{
1623  if (ptr) return (*this)->upcase();
1624  return *this;
1625}
1626
1627inline GNativeString
1628GNativeString::downcase( void ) const
1629{
1630  if (ptr) return (*this)->downcase();
1631  return *this;
1632}
1633
1634#endif // HAS_WCHAR
1635
1636inline bool
1637operator==(const char *s1, const GBaseString &s2)
1638{ return !s2.cmp(s1); }
1639
1640inline bool
1641operator!=(const char *s1, const GBaseString &s2)
1642{ return !!s2.cmp(s1); }
1643
1644inline bool
1645operator>=(const char    *s1, const GBaseString &s2)
1646{ return (s2.cmp(s1)<=0); }
1647
1648inline bool
1649operator>=(const char s1, const GBaseString &s2)
1650{ return (s2.cmp(s1)<=0); }
1651
1652inline bool
1653operator<(const char *s1, const GBaseString &s2)
1654{ return (s2.cmp(s1)>0); }
1655
1656inline bool
1657operator<(const char s1, const GBaseString &s2)
1658{ return (s2.cmp(s1)>0); }
1659
1660inline bool
1661operator> (const char    *s1, const GBaseString &s2)
1662{ return (s2.cmp(s1)<0); }
1663
1664inline bool
1665operator> (const char s1, const GBaseString &s2)
1666{ return (s2.cmp(s1)<0); }
1667
1668inline bool
1669operator<=(const char    *s1, const GBaseString &s2)
1670{ return !(s1>s2); }
1671
1672inline bool
1673operator<=(const char    s1, const GBaseString &s2)
1674{ return !(s1>s2); }
1675
1676// ------------------- The end
1677
1678
1679#ifdef HAVE_NAMESPACES
1680}
1681# ifndef NOT_USING_DJVU_NAMESPACE
1682using namespace DJVU;
1683# endif
1684#endif
1685#endif
1686
Note: See TracBrowser for help on using the repository browser.