source: trunk/libdjvu/GString.h @ 101

Last change on this file since 101 was 17, checked in by Eugene Romanenko, 16 years ago

update makefiles, remove absolute paths, update djvulibre to version 3.5.17

File size: 58.3 KB
Line 
1//C-  -*- C++ -*-
2//C- -------------------------------------------------------------------
3//C- DjVuLibre-3.5
4//C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
5//C- Copyright (c) 2001  AT&T
6//C-
7//C- This software is subject to, and may be distributed under, the
8//C- GNU General Public License, Version 2. The license should have
9//C- accompanied the software or you may obtain a copy of the license
10//C- from the Free Software Foundation at http://www.fsf.org .
11//C-
12//C- This program is distributed in the hope that it will be useful,
13//C- but WITHOUT ANY WARRANTY; without even the implied warranty of
14//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15//C- GNU General Public License for more details.
16//C-
17//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library
18//C- distributed by Lizardtech Software.  On July 19th 2002, Lizardtech
19//C- Software authorized us to replace the original DjVu(r) Reference
20//C- Library notice by the following text (see doc/lizard2002.djvu):
21//C-
22//C-  ------------------------------------------------------------------
23//C- | DjVu (r) Reference Library (v. 3.5)
24//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
25//C- | The DjVu Reference Library is protected by U.S. Pat. No.
26//C- | 6,058,214 and patents pending.
27//C- |
28//C- | This software is subject to, and may be distributed under, the
29//C- | GNU General Public License, Version 2. The license should have
30//C- | accompanied the software or you may obtain a copy of the license
31//C- | from the Free Software Foundation at http://www.fsf.org .
32//C- |
33//C- | The computer code originally released by LizardTech under this
34//C- | license and unmodified by other parties is deemed "the LIZARDTECH
35//C- | ORIGINAL CODE."  Subject to any third party intellectual property
36//C- | claims, LizardTech grants recipient a worldwide, royalty-free,
37//C- | non-exclusive license to make, use, sell, or otherwise dispose of
38//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
39//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
40//C- | General Public License.   This grant only confers the right to
41//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
42//C- | the extent such infringement is reasonably necessary to enable
43//C- | recipient to make, have made, practice, sell, or otherwise dispose
44//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
45//C- | any greater extent that may be necessary to utilize further
46//C- | modifications or combinations.
47//C- |
48//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
49//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
50//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
51//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
52//C- +------------------------------------------------------------------
53//
54// $Id: GString.h,v 1.20 2005/07/22 15:28:07 leonb Exp $
55// $Name:  $
56
57#ifndef _GSTRING_H_
58#define _GSTRING_H_
59#ifdef HAVE_CONFIG_H
60#include "config.h"
61#endif
62#if NEED_GNUG_PRAGMAS
63# pragma interface
64#endif
65
66/** @name GString.h
67
68    Files #"GString.h"# and #"GString.cpp"# implement a general
69    purpose string class \Ref{GBaseString}, with dirived types
70    \Ref{GUTF8String} and \Ref{GNativeString} for UTF8 MBS encoding
71    and the current Native MBS encoding respectively.  This
72    implementation relies on smart pointers (see
73    \Ref{GSmartPointer.h}).
74
75    {\bf Historical Comments} --- At some point during the DjVu
76    research era, it became clear that C++ compilers rarely provided
77    portable libraries. We then decided to avoid fancy classes (like
78    #iostream# or #string#) and to rely only on the good old C
79    library.  A good string class however is very useful.  We had
80    already randomly picked letter 'G' to prefix class names and we
81    logically derived the new class name.  Native English speakers
82    kept laughing in hiding.  This is ironic because we completely
83    forgot this letter 'G' when creating more challenging things
84    like the ZP Coder or the IW44 wavelets. 
85
86    {\bf Later Changes}
87    When converting to I18N, we (Lizardtech) decided that two string classes
88    where needing, replacing the original GString with \Ref{GUTF8String} and
89    \Ref{GNativeString}.
90
91    @memo
92    General purpose string class.
93    @author
94    L\'eon Bottou <leonb@research.att.com> -- initial implementation.\\
95
96// From: Leon Bottou, 1/31/2002
97// This file has very little to do with my initial implementation.
98// It has been practically rewritten by Lizardtech for i18n changes.
99// My original implementation was very small in comparison
100// <http://prdownloads.sourceforge.net/djvu/DjVu2_2b-src.tgz>.
101// In my opinion, the duplication of the string classes is a failed
102// attempt to use the type system to enforce coding policies.
103// This could be fixed.  But there are better things to do in djvulibre.
104   
105    @version
106    #$Id: GString.h,v 1.20 2005/07/22 15:28:07 leonb Exp $# */
107//@{
108
109
110#include "DjVuGlobal.h"
111#include "GContainer.h"
112
113#include <stdlib.h>
114#include <stdarg.h>
115#ifdef WIN32
116# include <windows.h>
117# define HAS_WCHAR 1
118# define HAS_MBSTATE 1
119#endif
120
121#if HAS_WCHAR
122# if !defined(AUTOCONF) || HAVE_WCHAR_H
123#  include <wchar.h>
124# endif
125#endif
126
127
128#ifdef HAVE_NAMESPACES
129namespace DJVU {
130# ifdef NOT_DEFINED // Just to fool emacs c++ mode
131}
132#endif
133#endif
134
135#if !HAS_MBSTATE
136# ifndef HAVE_MBSTATE_T
137typedef int mbstate_t;
138# endif
139#endif
140
141class GBaseString;
142class GUTF8String;
143class GNativeString;
144
145// Internal string representation.
146class GStringRep : public GPEnabled
147{
148public:
149  enum EncodeType { XUCS4, XUCS4BE, XUCS4LE, XUCS4_2143, XUCS4_3412,
150    XUTF16, XUTF16BE, XUTF16LE, XUTF8, XEBCDIC, XOTHER } ;
151
152  enum EscapeMode { UNKNOWN_ESCAPED=0,  IS_ESCAPED=1, NOT_ESCAPED=2 };
153
154  class UTF8;
155  friend class UTF8;
156  class Unicode;
157  friend class Unicode;
158
159  class ChangeLocale;
160#if HAS_WCHAR
161  class Native;
162  friend class Native;
163#endif // HAS_WCHAR
164  friend class GBaseString;
165  friend class GUTF8String;
166  friend class GNativeString;
167  friend unsigned int hash(const GBaseString &ref);
168
169public:
170  // default constructor
171  GStringRep(void);
172  // virtual destructor
173  virtual ~GStringRep();
174
175    // Other virtual methods.
176      // Create an empty string.
177  virtual GP<GStringRep> blank(const unsigned int sz) const = 0;
178      // Create a duplicate at the given size.
179  GP<GStringRep>  getbuf(int n) const;
180      // Change the value of one of the bytes.
181  GP<GStringRep> setat(int n, char ch) const;
182      // Append a string.
183  virtual GP<GStringRep> append(const GP<GStringRep> &s2) const = 0;
184      // Test if isUTF8.
185  virtual bool isUTF8(void) const { return false; }
186      // Test if Native.
187  virtual bool isNative(void) const { return false; }
188      // Convert to Native.
189  virtual GP<GStringRep> toNative(
190    const EscapeMode escape=UNKNOWN_ESCAPED ) const = 0;
191      // Convert to UTF8.
192  virtual GP<GStringRep> toUTF8(const bool nothrow=false) const = 0;
193      // Convert to same as current class.
194  virtual GP<GStringRep> toThis(
195    const GP<GStringRep> &rep,const GP<GStringRep> &locale=0) const = 0;
196      // Compare with #s2#.
197  virtual int cmp(const GP<GStringRep> &s2,const int len=(-1)) const = 0;
198
199  // Convert strings to numbers.
200  virtual int toInt(void) const = 0;
201  virtual long int toLong(
202    const int pos, int &endpos, const int base=10) const = 0;
203  virtual unsigned long toULong(
204    const int pos, int &endpos, const int base=10) const = 0;
205  virtual double toDouble(const int pos, int &endpos) const = 0;
206
207  // return the position of the next character
208  int nextChar( const int from=0 ) const;
209
210  // return next non space position
211  int nextNonSpace( const int from=0, const int len=(-1) ) const;
212
213  // return next white space position
214  int nextSpace( const int from=0, const int len=(-1) ) const;
215
216  // return the position after the last non-whitespace character.
217  int firstEndSpace( int from=0, const int len=(-1) ) const;
218
219    // Create an empty string.
220  template <class TYPE> static GP<GStringRep> create(
221    const unsigned int sz,TYPE *);
222    // Creates with a strdup string.
223  GP<GStringRep> strdup(const char *s) const;
224
225    // Creates by appending to the current string
226  GP<GStringRep> append(const char *s2) const;
227
228    // Creates with a concat operation.
229  GP<GStringRep> concat(const GP<GStringRep> &s1,const GP<GStringRep> &s2) const;
230  GP<GStringRep> concat(const char *s1,const GP<GStringRep> &s2) const;
231  GP<GStringRep> concat(const GP<GStringRep> &s1,const char *s2) const;
232  GP<GStringRep> concat(const char *s1,const char *s2) const;
233
234   /* Creates with a strdup and substr.  Negative values have strlen(s)+1
235      added to them.
236   */
237  GP<GStringRep> substr(
238    const char *s,const int start,const int length=(-1)) const;
239
240  GP<GStringRep> substr(
241    const unsigned short *s,const int start,const int length=(-1)) const;
242
243  GP<GStringRep> substr(
244    const unsigned long *s,const int start,const int length=(-1)) const;
245
246  /** Initializes a string with a formatted string (as in #vprintf#).  The
247      string is re-initialized with the characters generated according to the
248      specified format #fmt# and using the optional arguments.  See the ANSI-C
249      function #vprintf()# for more information. The current implementation
250      will cause a segmentation violation if the resulting string is longer
251      than 32768 characters. */
252  GP<GStringRep> vformat(va_list args) const;
253  // -- SEARCHING
254
255  static GP<GStringRep> UTF8ToNative( const char *s,
256    const EscapeMode escape=UNKNOWN_ESCAPED );
257  static GP<GStringRep> NativeToUTF8( const char *s );
258
259  // Creates an uppercase version of the current string.
260  GP<GStringRep> upcase(void) const;
261  // Creates a lowercase version of the current string.
262  GP<GStringRep> downcase(void) const;
263
264  /** Returns the next UCS4 character, and updates the pointer s. */
265  static unsigned long UTF8toUCS4(
266    unsigned char const *&s, void const * const endptr );
267
268  /** Returns the number of bytes in next UCS4 character,
269      and sets #w# to the next UCS4 chacter.  */
270  static int UTF8toUCS4(
271    unsigned long &w, unsigned char const s[], void const * const endptr )
272  { unsigned char const *r=s;w=UTF8toUCS4(r,endptr);return (int)((size_t)r-(size_t)s); }
273
274  /** Returns the next UCS4 word from the UTF16 string. */
275  static int UTF16toUCS4(
276     unsigned long &w, unsigned short const * const s,void const * const eptr);
277
278  static int UCS4toUTF16(
279    unsigned long w, unsigned short &w1, unsigned short &w2);
280
281  int cmp(const char *s2, const int len=(-1)) const;
282  static int cmp(
283    const GP<GStringRep> &s1, const GP<GStringRep> &s2, const int len=(-1)) ;
284  static int cmp(
285    const GP<GStringRep> &s1, const char *s2, const int len=(-1));
286  static int cmp(
287    const char *s1, const GP<GStringRep> &s2, const int len=(-1));
288  static int cmp(
289    const char *s1, const char *s2, const int len=(-1));
290
291  // Lookup the next character, and return the position of the next character.
292  int getUCS4(unsigned long &w, const int from) const;
293
294  virtual unsigned char *UCS4toString(
295    const unsigned long w, unsigned char *ptr, mbstate_t *ps=0) const = 0;
296
297  static unsigned char *UCS4toUTF8(
298    const unsigned long w,unsigned char *ptr);
299
300  static unsigned char *UCS4toNative(
301    const unsigned long w,unsigned char *ptr, mbstate_t *ps);
302
303  int search(char c, int from=0) const;
304
305  int search(char const *str, int from=0) const;
306
307  int rsearch(char c, int from=0) const;
308
309  int rsearch(char const *str, int from=0) const;
310
311  int contains(char const accept[], int from=0) const;
312
313  int rcontains(char const accept[], int from=0) const;
314
315protected:
316  // Return the next character and increment the source pointer.
317  virtual unsigned long getValidUCS4(const char *&source) const = 0;
318
319  GP<GStringRep> tocase(
320    bool (*xiswcase)(const unsigned long wc),
321    unsigned long (*xtowcase)(const unsigned long wc)) const;
322
323  // Tests if the specified character passes the xiswtest.  If so, the
324  // return pointer is incremented to the next character, otherwise the
325  // specified #ptr# is returned.
326  const char * isCharType( bool (*xiswtest)(const unsigned long wc), const char *ptr,
327    const bool reverse=false) const;
328
329  // Find the next character position that passes the isCharType test.
330  int nextCharType(
331    bool (*xiswtest)(const unsigned long wc),const int from,const int len,
332    const bool reverse=false) const;
333
334  static bool giswspace(const unsigned long w);
335  static bool giswupper(const unsigned long w);
336  static bool giswlower(const unsigned long w);
337  static unsigned long gtowupper(const unsigned long w);
338  static unsigned long gtowlower(const unsigned long w);
339
340  virtual void set_remainder( void const * const buf, const unsigned int size,
341    const EncodeType encodetype);
342  virtual void set_remainder( void const * const buf, const unsigned int size,
343    const GP<GStringRep> &encoding );
344  virtual void set_remainder ( const GP<Unicode> &remainder );
345
346  virtual GP<Unicode> get_remainder( void ) const;
347
348public:
349  /* Returns a copy of this string with characters used in XML with
350      '<'  to "&lt;", '>'  to "&gt;",  '&' to "&amp;" '\'' to
351      "&apos;", and  '\"' to  "&quot;".   Characters 0x01 through
352      0x1f are also escaped. */
353  GP<GStringRep> toEscaped( const bool tosevenbit ) const;
354
355  // Tests if a string is legally encoded in the current character set.
356  virtual bool is_valid(void) const = 0;
357
358  virtual int ncopy(wchar_t * const buf, const int buflen) const = 0;
359
360protected:
361
362// Actual string data.
363  int  size;
364  char *data;
365};
366
367class GStringRep::UTF8 : public GStringRep
368{
369public:
370  // default constructor
371  UTF8(void);
372  // virtual destructor
373  virtual ~UTF8();
374
375    // Other virtual methods.
376  virtual GP<GStringRep> blank(const unsigned int sz = 0) const;
377  virtual GP<GStringRep> append(const GP<GStringRep> &s2) const;
378      // Test if Native.
379  virtual bool isUTF8(void) const;
380      // Convert to Native.
381  virtual GP<GStringRep> toNative(
382    const EscapeMode escape=UNKNOWN_ESCAPED) const;
383      // Convert to UTF8.
384  virtual GP<GStringRep> toUTF8(const bool nothrow=false) const;
385      // Convert to same as current class.
386  virtual GP<GStringRep> toThis(
387    const GP<GStringRep> &rep,const GP<GStringRep> &) const;
388      // Compare with #s2#.
389  virtual int cmp(const GP<GStringRep> &s2,const int len=(-1)) const;
390
391  static GP<GStringRep> create(const unsigned int sz = 0);
392
393  // Convert strings to numbers.
394  virtual int toInt(void) const;
395  virtual long int toLong(
396    const int pos, int &endpos, const int base=10) const;
397  virtual unsigned long toULong(
398    const int pos, int &endpos, const int base=10) const;
399  virtual double toDouble(
400    const int pos, int &endpos) const;
401
402    // Create a strdup string.
403  static GP<GStringRep> create(const char *s);
404
405   // Creates with a concat operation.
406  static GP<GStringRep> create(
407    const GP<GStringRep> &s1,const GP<GStringRep> &s2);
408  static GP<GStringRep> create( const GP<GStringRep> &s1,const char *s2);
409  static GP<GStringRep> create( const char *s1, const GP<GStringRep> &s2);
410  static GP<GStringRep> create( const char *s1,const char *s2);
411
412    // Create with a strdup and substr operation.
413  static GP<GStringRep> create(
414    const char *s,const int start,const int length=(-1));
415
416  static GP<GStringRep> create(
417    const unsigned short *s,const int start,const int length=(-1));
418
419  static GP<GStringRep> create(
420    const unsigned long *s,const int start,const int length=(-1));
421
422  static GP<GStringRep> create_format(const char fmt[],...);
423  static GP<GStringRep> create(const char fmt[],va_list& args);
424
425  virtual unsigned char *UCS4toString(
426    const unsigned long w,unsigned char *ptr, mbstate_t *ps=0) const;
427
428  // Tests if a string is legally encoded in the current character set.
429  virtual bool is_valid(void) const;
430
431  virtual int ncopy(wchar_t * const buf, const int buflen) const;
432
433  friend class GBaseString;
434
435protected:
436  // Return the next character and increment the source pointer.
437  virtual unsigned long getValidUCS4(const char *&source) const;
438};
439
440
441/** General purpose character string.
442    Each dirivied instance of class #GBaseString# represents a
443    character string.  Overloaded operators provide a value semantic
444    to #GBaseString# objects.  Conversion operators and constructors
445    transparently convert between #GBaseString# objects and
446    #const char*# pointers.  The #GBaseString# class has no public
447    constructors, since a dirived type should always be used
448    to specify the desired multibyte character encoding.
449
450    Functions taking strings as arguments should declare their
451    arguments as "#const char*#".  Such functions will work equally
452    well with dirived #GBaseString# objects since there is a fast
453    conversion operator from the dirivied #GBaseString# objects
454    to "#const char*#".  Functions returning strings should return
455    #GUTF8String# or #GNativeString# objects because the class will
456    automatically manage the necessary memory.
457
458    Characters in the string can be identified by their position.  The
459    first character of a string is numbered zero. Negative positions
460    represent characters relative to the end of the string (i.e.
461    position #-1# accesses the last character of the string,
462    position #-2# represents the second last character, etc.)  */
463
464class GBaseString : protected GP<GStringRep>
465{
466public:
467  enum EscapeMode {
468    UNKNOWN_ESCAPED=GStringRep::UNKNOWN_ESCAPED,
469    IS_ESCAPED=GStringRep::IS_ESCAPED,
470    NOT_ESCAPED=GStringRep::NOT_ESCAPED };
471
472  friend class GUTF8String;
473  friend class GNativeString;
474protected:
475  // Sets the gstr pointer;
476  void init(void);
477
478  ~GBaseString();
479  GBaseString &init(const GP<GStringRep> &rep);
480
481  // -- CONSTRUCTORS
482  /** Null constructor. Constructs an empty string. */
483  GBaseString( void );
484
485public:
486  // -- ACCESS
487  /** Converts a string into a constant null terminated character
488      array.  This conversion operator is very efficient because
489      it simply returns a pointer to the internal string data. The
490      returned pointer remains valid as long as the string is
491      unmodified. */
492  operator const char* ( void ) const  ;
493  /// Returns the string length.
494  unsigned int length( void ) const;
495  /** Returns true if and only if the string contains zero characters.
496      This operator is useful for conditional expression in control
497      structures.
498      \begin{verbatim}
499         if (! str) { ... }
500         while (!! str) { ... }  -- Note the double operator!
501      \end{verbatim}
502      Class #GBaseString# does not to support syntax
503      "#if# #(str)# #{}#" because the required conversion operator
504      introduces dangerous ambiguities with certain compilers. */
505  bool operator! ( void ) const;
506
507  // -- INDEXING
508  /** Returns the character at position #n#. An exception
509      \Ref{GException} is thrown if number #n# is not in range #-len#
510      to #len-1#, where #len# is the length of the string.  The first
511      character of a string is numbered zero.  Negative positions
512      represent characters relative to the end of the string. */
513  char operator[] (int n) const;
514  /// Returns #TRUE# if the string contains an integer number.
515  bool is_int(void) const;
516  /// Returns #TRUE# if the string contains a float number.
517  bool is_float(void) const;
518
519  /** Converts strings between native & UTF8 **/
520  GNativeString getUTF82Native( EscapeMode escape=UNKNOWN_ESCAPED ) const;
521  GUTF8String getNative2UTF8( void ) const;
522
523  // -- ALTERING
524  /// Reinitializes a string with the null string.
525  void empty( void );
526  // -- SEARCHING
527  /** Searches character #c# in the string, starting at position
528      #from# and scanning forward until reaching the end of the
529      string.  This function returns the position of the matching
530      character.  It returns #-1# if character #c# cannot be found. */
531  int search(char c, int from=0) const;
532
533  /** Searches sub-string #str# in the string, starting at position
534      #from# and scanning forward until reaching the end of the
535      string.  This function returns the position of the first
536      matching character of the sub-string.  It returns #-1# if
537      string #str# cannot be found. */
538  int search(const char *str, int from=0) const;
539
540  /** Searches character #c# in the string, starting at position
541      #from# and scanning backwards until reaching the beginning of
542      the string.  This function returns the position of the matching
543      character.  It returns #-1# if character #c# cannot be found. */
544  int rsearch(char c, const int from=0) const;
545  /** Searches sub-string #str# in the string, starting at position
546      #from# and scanning backwards until reaching the beginning of
547      the string.  This function returns the position of the first
548      matching character of the sub-string. It returns #-1# if
549      string #str# cannot be found. */
550  int rsearch(const char *str, const int from=0) const;
551  /** Searches for any of the specified characters in the accept
552      string.  It returns #-1# if the none of the characters and
553      be found, otherwise the position of the first match. */
554  int contains(const char accept[], const int from=0) const;
555  /** Searches for any of the specified characters in the accept
556      string.  It returns #-1# if the none of the characters and be
557      found, otherwise the position of the last match. */
558  int rcontains(const char accept[], const int from=0) const;
559
560  /** Concatenates strings. Returns a string composed by concatenating
561      the characters of strings #s1# and #s2#. */
562  GUTF8String operator+(const GUTF8String &s2) const;
563  GNativeString operator+(const GNativeString &s2) const;
564
565  /** Returns an integer.  Implements i18n atoi.  */
566  int toInt(void) const;
567
568  /** Returns a long intenger.  Implments i18n strtol.  */
569  long toLong(const int pos, int &endpos, const int base=10) const;
570
571  /** Returns a unsigned long integer.  Implements i18n strtoul. */
572  unsigned long toULong(
573    const int pos, int &endpos, const int base=10) const;
574
575  /** Returns a double.  Implements the i18n strtod.  */
576  double toDouble(
577    const int pos, int &endpos ) const;
578
579  /** Returns a long intenger.  Implments i18n strtol.  */
580  static long toLong(
581    const GUTF8String& src, const int pos, int &endpos, const int base=10);
582
583  static unsigned long toULong(
584    const GUTF8String& src, const int pos, int &endpos, const int base=10);
585
586  static double toDouble(
587    const GUTF8String& src, const int pos, int &endpos);
588
589  /** Returns a long intenger.  Implments i18n strtol.  */
590  static long toLong(
591    const GNativeString& src, const int pos, int &endpos, const int base=10);
592
593  static unsigned long toULong(
594    const GNativeString& src, const int pos, int &endpos, const int base=10);
595
596  static double toDouble(
597    const GNativeString& src, const int pos, int &endpos);
598
599  // -- HASHING
600
601  // -- COMPARISONS
602    /** Returns an #int#.  Compares string with #s2# and returns
603        sorting order. */
604  int cmp(const GBaseString &s2, const int len=(-1)) const;
605    /** Returns an #int#.  Compares string with #s2# and returns
606        sorting order. */
607  int cmp(const char *s2, const int len=(-1)) const;
608    /** Returns an #int#.  Compares string with #s2# and returns
609        sorting order. */
610  int cmp(const char s2) const;
611    /** Returns an #int#.  Compares #s2# with #s2# and returns
612        sorting order. */
613  static int cmp(const char *s1, const char *s2, const int len=(-1));
614  /** Returns a boolean. The Standard C strncmp takes two string and
615      compares the first N characters.  static bool GBaseString::ncmp
616      will compare #s1# with #s2# with the #len# characters starting
617      from the beginning of the string. */
618  /** String comparison. Returns true if and only if character
619      strings #s1# and #s2# are equal (as with #strcmp#.)
620    */
621  bool operator==(const GBaseString &s2) const;
622  bool operator==(const char *s2) const;
623  friend bool operator==(const char    *s1, const GBaseString &s2);
624
625  /** String comparison. Returns true if and only if character
626      strings #s1# and #s2# are not equal (as with #strcmp#.)
627    */
628  bool operator!=(const GBaseString &s2) const;
629  bool operator!=(const char *s2) const;
630  friend bool operator!=(const char *s1, const GBaseString &s2);
631
632  /** String comparison. Returns true if and only if character
633      strings #s1# is lexicographically greater than or equal to
634      string #s2# (as with #strcmp#.) */
635  bool operator>=(const GBaseString &s2) const;
636  bool operator>=(const char *s2) const;
637  bool operator>=(const char s2) const;
638  friend bool operator>=(const char    *s1, const GBaseString &s2);
639  friend bool operator>=(const char s1, const GBaseString &s2);
640
641  /** String comparison. Returns true if and only if character
642      strings #s1# is lexicographically less than string #s2#
643      (as with #strcmp#.)
644   */
645  bool operator<(const GBaseString &s2) const;
646  bool operator<(const char *s2) const;
647  bool operator<(const char s2) const;
648  friend bool operator<(const char *s1, const GBaseString &s2);
649  friend bool operator<(const char s1, const GBaseString &s2);
650
651  /** String comparison. Returns true if and only if character
652      strings #s1# is lexicographically greater than string #s2#
653      (as with #strcmp#.)
654   */
655  bool operator> (const GBaseString &s2) const;
656  bool operator> (const char *s2) const;
657  bool operator> (const char s2) const;
658  friend bool operator> (const char    *s1, const GBaseString &s2);
659  friend bool operator> (const char s1, const GBaseString &s2);
660
661  /** String comparison. Returns true if and only if character
662      strings #s1# is lexicographically less than or equal to string
663      #s2# (as with #strcmp#.)
664   */
665  bool operator<=(const GBaseString &s2) const;
666  bool operator<=(const char *s2) const;
667  bool operator<=(const char s2) const;
668  friend bool operator<=(const char    *s1, const GBaseString &s2);
669  friend bool operator<=(const char    s1, const GBaseString &s2);
670
671   /** Returns an integer.  Implements a functional i18n atoi. Note
672       that if you pass a GBaseString that is not in Native format
673       the results may be disparaging. */
674
675  /** Returns a hash code for the string.  This hashing function
676      helps when creating associative maps with string keys (see
677      \Ref{GMap}).  This hash code may be reduced to an arbitrary
678      range by computing its remainder modulo the upper bound of
679      the range. */
680  friend unsigned int hash(const GBaseString &ref);
681  // -- HELPERS
682  friend class GStringRep;
683
684  /// Returns next non space position.
685  int nextNonSpace( const int from=0, const int len=(-1) ) const;
686
687  /// Returns next character position.
688  int nextChar( const int from=0 ) const;
689
690  /// Returns next non space position.
691  int nextSpace( const int from=0, const int len=(-1) ) const;
692
693  /// return the position after the last non-whitespace character.
694  int firstEndSpace( const int from=0,const int len=(-1) ) const;
695
696  /// Tests if the string is legally encoded in the current codepage.
697  bool is_valid(void) const;
698
699  /// copy to a wchar_t buffer
700  int ncopy(wchar_t * const buf, const int buflen) const;
701
702protected:
703  const char *gstr;
704  static void throw_illegal_subscript() no_return;
705  static const char *nullstr;
706public:
707  GNativeString UTF8ToNative(
708    const bool currentlocale=false,
709    const EscapeMode escape=UNKNOWN_ESCAPED) const;
710  GUTF8String NativeToUTF8(void) const;
711protected:
712  int CheckSubscript(int n) const;
713};
714
715/** General purpose character string.
716    Each instance of class #GUTF8String# represents a character
717    string.  Overloaded operators provide a value semantic to
718    #GUTF8String# objects.  Conversion operators and constructors
719    transparently convert between #GUTF8String# objects and
720    #const char*# pointers.
721
722    Functions taking strings as arguments should declare their
723    arguments as "#const char*#".  Such functions will work equally
724    well with #GUTF8String# objects since there is a fast conversion
725    operator from #GUTF8String# to "#const char*#".  Functions
726    returning strings should return #GUTF8String# or #GNativeString#
727    objects because the class will automatically manage the necessary
728    memory.
729
730    Characters in the string can be identified by their position.  The
731    first character of a string is numbered zero. Negative positions
732    represent characters relative to the end of the string (i.e.
733    position #-1# accesses the last character of the string,
734    position #-2# represents the second last character, etc.)  */
735
736class GUTF8String : public GBaseString
737{
738public:
739  ~GUTF8String();
740  void init(void);
741
742  GUTF8String &init(const GP<GStringRep> &rep);
743
744  // -- CONSTRUCTORS
745  /** Null constructor. Constructs an empty string. */
746  GUTF8String(void);
747  /// Constructs a string from a character.
748  GUTF8String(const char dat);
749  /// Constructs a string from a null terminated character array.
750  GUTF8String(const char *str);
751  /// Constructs a string from a null terminated character array.
752  GUTF8String(const unsigned char *str);
753  GUTF8String(const unsigned short *dat);
754  GUTF8String(const unsigned long *dat);
755  /** Constructs a string from a character array.  Elements of the
756      character array #dat# are added into the string until the
757      string length reaches #len# or until encountering a null
758      character (whichever comes first). */
759  GUTF8String(const char *dat, unsigned int len);
760  GUTF8String(const unsigned short *dat, unsigned int len);
761  GUTF8String(const unsigned long *dat, unsigned int len);
762
763  /// Construct from base class.
764  GUTF8String(const GP<GStringRep> &str);
765  GUTF8String(const GBaseString &str);
766  GUTF8String(const GUTF8String &str);
767  GUTF8String(const GNativeString &str);
768  /** Constructs a string from a character array.  Elements of the
769      character array #dat# are added into the string until the
770      string length reaches #len# or until encountering a null
771      character (whichever comes first). */
772  GUTF8String(const GBaseString &gs, int from, int len);
773
774  /** Copy a null terminated character array. Resets this string
775      with the character string contained in the null terminated
776      character array #str#. */
777  GUTF8String& operator= (const char str);
778  GUTF8String& operator= (const char *str);
779  GUTF8String& operator= (const GP<GStringRep> &str);
780  GUTF8String& operator= (const GBaseString &str);
781  GUTF8String& operator= (const GUTF8String &str);
782  GUTF8String& operator= (const GNativeString &str);
783
784  /** Constructs a string with a formatted string (as in #vprintf#).
785      The string is re-initialized with the characters generated
786      according to the specified format #fmt# and using the optional
787      arguments.  See the ANSI-C function #vprintf()# for more
788      information. The current implementation will cause a
789      segmentation violation if the resulting string is longer
790      than 32768 characters. */
791  GUTF8String(const GUTF8String &fmt, va_list &args);
792
793  /// Constructs a string from a character.
794  /** Constructs a string with a human-readable representation of
795      integer #number#.  The format is similar to format #"%d"# in
796      function #printf#. */
797  GUTF8String(const int number);
798
799  /** Constructs a string with a human-readable representation of
800      floating point number #number#. The format is similar to
801      format #"%f"# in function #printf#.  */
802  GUTF8String(const double number);
803
804
805  /** Initializes a string with a formatted string (as in #printf#).
806      The string is re-initialized with the characters generated
807      according to the specified format #fmt# and using the optional
808      arguments.  See the ANSI-C function #printf()# for more
809      information. The current implementation will cause a
810      segmentation violation if the resulting string is longer
811      than 32768 characters. */
812  GUTF8String &format(const char *fmt, ... );
813  /** Initializes a string with a formatted string (as in #vprintf#).
814      The string is re-initialized with the characters generated
815      according to the specified format #fmt# and using the optional
816      arguments.  See the ANSI-C function #vprintf()# for more
817      information. The current implementation will cause a
818      segmentation violation if the resulting string is longer
819      than 32768 characters. */
820  GUTF8String &vformat(const GUTF8String &fmt, va_list &args);
821
822  /** Returns a copy of this string with characters used in XML with
823      '<'  to "&lt;", '>'  to "&gt;",  '&' to "&amp;" '\'' to
824      "&apos;", and  '\"' to  "&quot;".   Characters 0x01 through
825      0x1f are also escaped. */
826  GUTF8String toEscaped( const bool tosevenbit=false ) const;
827
828  /** Converts strings containing HTML/XML escaped characters into
829      their unescaped forms. Numeric representations of characters
830      (e.g., "&#38;" or "&#x26;" for "*") are the only forms
831      converted by this function. */
832  GUTF8String fromEscaped( void ) const;
833
834  /** Converts strings containing HTML/XML escaped characters
835      (e.g., "&lt;" for "<") into their unescaped forms. The
836      conversion is partially defined by the ConvMap argument which
837      specifies the conversion strings to be recognized. Numeric
838      representations of characters (e.g., "&#38;" or "&#x26;"
839      for "*") are always converted. */
840  GUTF8String fromEscaped(
841    const GMap<GUTF8String,GUTF8String> ConvMap ) const;
842
843
844  // -- CONCATENATION
845  /// Appends character #ch# to the string.
846  GUTF8String& operator+= (char ch);
847
848  /// Appends the null terminated character array #str# to the string.
849  GUTF8String& operator+= (const char *str);
850  /// Appends the specified GBaseString to the string.
851  GUTF8String& operator+= (const GBaseString &str);
852
853  /** Returns a sub-string.  The sub-string is composed by copying
854      #len# characters starting at position #from# in this string.
855      The length of the resulting string may be smaller than #len#
856      if the specified range is too large. */
857  GUTF8String substr(int from, int len/*=(-1)*/) const;
858
859  /** Returns an upper case copy of this string.  The returned string
860      contains a copy of the current string with all letters turned
861      into upper case letters. */
862  GUTF8String upcase( void ) const;
863  /** Returns an lower case copy of this string.  The returned string
864      contains a copy of the current string with all letters turned
865      into lower case letters. */
866  GUTF8String downcase( void ) const;
867
868  /** Concatenates strings. Returns a string composed by concatenating
869      the characters of strings #s1# and #s2#.
870  */
871  GUTF8String operator+(const GBaseString &s2) const;
872  GUTF8String operator+(const GUTF8String &s2) const;
873  GUTF8String operator+(const GNativeString &s2) const;
874  GUTF8String operator+(const char    *s2) const;
875  friend GUTF8String operator+(const char    *s1, const GUTF8String &s2);
876
877  /** Provides a direct access to the string buffer.  Returns a
878      pointer for directly accessing the string buffer.  This pointer
879      valid remains valid as long as the string is not modified by
880      other means.  Positive values for argument #n# represent the
881      length of the returned buffer.  The returned string buffer will
882      be large enough to hold at least #n# characters plus a null
883      character.  If #n# is positive but smaller than the string
884      length, the string will be truncated to #n# characters. */
885  char *getbuf(int n = -1);
886  /** Set the character at position #n# to value #ch#.  An exception
887      \Ref{GException} is thrown if number #n# is not in range #-len#
888      to #len#, where #len# is the length of the string.  If character
889      #ch# is zero, the string is truncated at position #n#.  The
890      first character of a string is numbered zero. Negative
891      positions represent characters relative to the end of the
892      string. If position #n# is equal to the length of the string,
893      this function appends character #ch# to the end of the string. */
894  void setat(const int n, const char ch);
895public:
896  typedef enum GStringRep::EncodeType EncodeType;
897  static GUTF8String create(void const * const buf,
898    const unsigned int size,
899    const EncodeType encodetype, const GUTF8String &encoding);
900  static GUTF8String create( void const * const buf,
901    unsigned int size, const EncodeType encodetype );
902  static GUTF8String create( void const * const buf,
903    const unsigned int size, const GUTF8String &encoding );
904  static GUTF8String create( void const * const buf,
905    const unsigned int size, const GP<GStringRep::Unicode> &remainder);
906  GP<GStringRep::Unicode> get_remainder(void) const;
907  static GUTF8String create( const char *buf, const unsigned int bufsize );
908  static GUTF8String create( const unsigned short *buf, const unsigned int bufsize );
909  static GUTF8String create( const unsigned long *buf, const unsigned int bufsize );
910};
911
912
913#if !HAS_WCHAR
914#define GBaseString GUTF8String
915#endif
916
917/** General purpose character string.
918    Each instance of class #GNativeString# represents a character
919    string.  Overloaded operators provide a value semantic to
920    #GNativeString# objects.  Conversion operators and constructors
921    transparently convert between #GNativeString# objects and
922    #const char*# pointers.
923
924    Functions taking strings as arguments should declare their
925    arguments as "#const char*#".  Such functions will work equally
926    well with #GNativeString# objects since there is a fast conversion
927    operator from #GNativeString# to "#const char*#".  Functions
928    returning strings should return #GUTF8String# or #GNativeString#
929    objects because the class will automatically manage the necessary
930    memory.
931
932    Characters in the string can be identified by their position.  The
933    first character of a string is numbered zero. Negative positions
934    represent characters relative to the end of the string (i.e.
935    position #-1# accesses the last character of the string,
936    position #-2# represents the second last character, etc.)  */
937
938class GNativeString : public GBaseString
939{
940public:
941  ~GNativeString();
942  // -- CONSTRUCTORS
943  /** Null constructor. Constructs an empty string. */
944  GNativeString(void);
945  /// Constructs a string from a character.
946  GNativeString(const char dat);
947  /// Constructs a string from a null terminated character array.
948  GNativeString(const char *str);
949  /// Constructs a string from a null terminated character array.
950  GNativeString(const unsigned char *str);
951  GNativeString(const unsigned short *str);
952  GNativeString(const unsigned long *str);
953  /** Constructs a string from a character array.  Elements of the
954      character array #dat# are added into the string until the
955      string length reaches #len# or until encountering a null
956      character (whichever comes first). */
957  GNativeString(const char *dat, unsigned int len);
958  GNativeString(const unsigned short *dat, unsigned int len);
959  GNativeString(const unsigned long *dat, unsigned int len);
960  /// Construct from base class.
961  GNativeString(const GP<GStringRep> &str);
962  GNativeString(const GBaseString &str);
963#if HAS_WCHAR
964  GNativeString(const GUTF8String &str);
965#endif
966  GNativeString(const GNativeString &str);
967  /** Constructs a string from a character array.  Elements of the
968      character array #dat# are added into the string until the
969      string length reaches #len# or until encountering a null
970      character (whichever comes first). */
971  GNativeString(const GBaseString &gs, int from, int len);
972
973  /** Constructs a string with a formatted string (as in #vprintf#).
974      The string is re-initialized with the characters generated
975      according to the specified format #fmt# and using the optional
976      arguments.  See the ANSI-C function #vprintf()# for more
977      information. The current implementation will cause a
978      segmentation violation if the resulting string is longer than
979      32768 characters. */
980  GNativeString(const GNativeString &fmt, va_list &args);
981
982  /** Constructs a string with a human-readable representation of
983      integer #number#.  The format is similar to format #"%d"# in
984      function #printf#. */
985  GNativeString(const int number);
986
987  /** Constructs a string with a human-readable representation of
988      floating point number #number#. The format is similar to
989      format #"%f"# in function #printf#.  */
990  GNativeString(const double number);
991
992#if !HAS_WCHAR
993#undef GBaseString
994#else
995  /// Initialize this string class
996  void init(void);
997
998  /// Initialize this string class
999  GNativeString &init(const GP<GStringRep> &rep);
1000
1001  /** Copy a null terminated character array. Resets this string with
1002      the character string contained in the null terminated character
1003      array #str#. */
1004  GNativeString& operator= (const char str);
1005  GNativeString& operator= (const char *str);
1006  GNativeString& operator= (const GP<GStringRep> &str);
1007  GNativeString& operator= (const GBaseString &str);
1008  GNativeString& operator= (const GUTF8String &str);
1009  GNativeString& operator= (const GNativeString &str);
1010  // -- CONCATENATION
1011  /// Appends character #ch# to the string.
1012  GNativeString& operator+= (char ch);
1013  /// Appends the null terminated character array #str# to the string.
1014  GNativeString& operator+= (const char *str);
1015  /// Appends the specified GBaseString to the string.
1016  GNativeString& operator+= (const GBaseString &str);
1017
1018  /** Returns a sub-string.  The sub-string is composed by copying
1019      #len# characters starting at position #from# in this string.
1020      The length of the resulting string may be smaller than #len#
1021      if the specified range is too large. */
1022  GNativeString substr(int from, int len/*=(-1)*/) const;
1023
1024  /** Returns an upper case copy of this string.  The returned
1025      string contains a copy of the current string with all letters
1026      turned into upper case letters. */
1027  GNativeString upcase( void ) const;
1028  /** Returns an lower case copy of this string.  The returned
1029      string contains a copy of the current string with all letters
1030      turned into lower case letters. */
1031  GNativeString downcase( void ) const;
1032
1033
1034  GNativeString operator+(const GBaseString &s2) const;
1035  GNativeString operator+(const GNativeString &s2) const;
1036  GUTF8String operator+(const GUTF8String &s2) const;
1037  GNativeString operator+(const char *s2) const;
1038  friend GNativeString operator+(const char *s1, const GNativeString &s2);
1039
1040  /** Initializes a string with a formatted string (as in #printf#).
1041      The string is re-initialized with the characters generated
1042      according to the specified format #fmt# and using the optional
1043      arguments.  See the ANSI-C function #printf()# for more
1044      information. The current implementation will cause a
1045      segmentation violation if the resulting string is longer than
1046      32768 characters. */
1047  GNativeString &format(const char *fmt, ... );
1048  /** Initializes a string with a formatted string (as in #vprintf#).
1049      The string is re-initialized with the characters generated
1050      according to the specified format #fmt# and using the optional
1051      arguments.  See the ANSI-C function #vprintf()# for more
1052      information. The current implementation will cause a
1053      segmentation violation if the resulting string is longer than
1054      32768 characters. */
1055  GNativeString &vformat(const GNativeString &fmt, va_list &args);
1056
1057  /** Returns a copy of this string with characters used in XML with
1058      '<'  to "&lt;", '>'  to "&gt;",  '&' to "&amp;" '\'' to
1059      "&apos;", and  '\"' to  "&quot;".   Characters 0x01 through
1060      0x1f are also escaped. */
1061  GNativeString toEscaped( const bool tosevenbit=false ) const;
1062
1063
1064  /** Provides a direct access to the string buffer.  Returns a
1065      pointer for directly accessing the string buffer.  This
1066      pointer valid remains valid as long as the string is not
1067      modified by other means.  Positive values for argument #n#
1068      represent the length of the returned buffer.  The returned
1069      string buffer will be large enough to hold at least #n#
1070      characters plus a null character.  If #n# is positive but
1071      smaller than the string length, the string will be truncated
1072      to #n# characters. */
1073  char *getbuf(int n = -1);
1074  /** Set the character at position #n# to value #ch#.  An exception
1075      \Ref{GException} is thrown if number #n# is not in range #-len#
1076      to #len#, where #len# is the length of the string.  If
1077      character #ch# is zero, the string is truncated at position
1078      #n#.  The first character of a string is numbered zero.
1079      Negative positions represent characters relative to the end of
1080      the string. If position #n# is equal to the length of the
1081      string, this function appends character #ch# to the end of the
1082      string. */
1083  void setat(const int n, const char ch);
1084
1085  static GNativeString create( const char *buf, const unsigned int bufsize );
1086  static GNativeString create( const unsigned short *buf, const unsigned int bufsize );
1087  static GNativeString create( const unsigned long *buf, const unsigned int bufsize );
1088#endif // WinCE
1089};
1090
1091//@}
1092
1093inline
1094GBaseString::operator const char* ( void ) const
1095{
1096  return ptr?(*this)->data:nullstr;
1097}
1098
1099inline unsigned int
1100GBaseString::length( void ) const
1101{
1102  return ptr ? (*this)->size : 0;
1103}
1104
1105inline bool
1106GBaseString::operator! ( void ) const
1107{
1108  return !ptr;
1109}
1110
1111inline GUTF8String
1112GUTF8String::upcase( void ) const
1113{
1114  if (ptr) return (*this)->upcase();
1115  return *this;
1116}
1117
1118inline GUTF8String
1119GUTF8String::downcase( void ) const
1120{
1121  if (ptr) return (*this)->downcase();
1122  return *this;
1123}
1124
1125inline void
1126GUTF8String::init(void)
1127{ GBaseString::init(); }
1128
1129inline GUTF8String &
1130GUTF8String::init(const GP<GStringRep> &rep)
1131{ GP<GStringRep>::operator=(rep?rep->toUTF8(true):rep); init(); return *this; }
1132
1133inline GUTF8String &
1134GUTF8String::vformat(const GUTF8String &fmt, va_list &args)
1135{ return (*this = (fmt.ptr?GUTF8String(fmt,args):fmt)); }
1136
1137inline GUTF8String
1138GUTF8String::toEscaped( const bool tosevenbit ) const
1139{ return ptr?GUTF8String((*this)->toEscaped(tosevenbit)):(*this); }
1140
1141inline GP<GStringRep::Unicode> 
1142GUTF8String::get_remainder(void) const
1143{
1144  GP<GStringRep::Unicode> retval;
1145  if(ptr)
1146    retval=((*this)->get_remainder());
1147  return retval;
1148}
1149
1150inline
1151GUTF8String::GUTF8String(const GNativeString &str)
1152{ init(str.length()?(str->toUTF8(true)):(GP<GStringRep>)str); }
1153
1154inline
1155GUTF8String::GUTF8String(const GP<GStringRep> &str)
1156{ init(str?(str->toUTF8(true)):str); }
1157
1158inline
1159GUTF8String::GUTF8String(const GBaseString &str)
1160{ init(str.length()?(str->toUTF8(true)):(GP<GStringRep>)str); }
1161
1162inline void
1163GBaseString::init(void)
1164{
1165  gstr=ptr?((*this)->data):nullstr;
1166}
1167/** Returns an integer.  Implements i18n atoi.  */
1168inline int
1169GBaseString::toInt(void) const
1170{ return ptr?(*this)->toInt():0; }
1171
1172/** Returns a long intenger.  Implments i18n strtol.  */
1173inline long
1174GBaseString::toLong(const int pos, int &endpos, const int base) const
1175{
1176  long int retval=0;
1177  if(ptr)
1178  {
1179    retval=(*this)->toLong(pos, endpos, base);
1180  }else
1181  {
1182    endpos=(-1);
1183  }
1184  return retval;
1185}
1186
1187inline long
1188GBaseString::toLong(
1189  const GUTF8String& src, const int pos, int &endpos, const int base)
1190{
1191  return src.toLong(pos,endpos,base);
1192}
1193
1194inline long
1195GBaseString::toLong(
1196  const GNativeString& src, const int pos, int &endpos, const int base)
1197{
1198  return src.toLong(pos,endpos,base);
1199}
1200
1201/** Returns a unsigned long integer.  Implements i18n strtoul. */
1202inline unsigned long
1203GBaseString::toULong(const int pos, int &endpos, const int base) const
1204{
1205  unsigned long retval=0;
1206  if(ptr)
1207  {
1208    retval=(*this)->toULong(pos, endpos, base);
1209  }else
1210  {
1211    endpos=(-1);
1212  }
1213  return retval;
1214}
1215
1216inline unsigned long
1217GBaseString::toULong(
1218  const GUTF8String& src, const int pos, int &endpos, const int base)
1219{
1220  return src.toULong(pos,endpos,base);
1221}
1222
1223inline unsigned long
1224GBaseString::toULong(
1225  const GNativeString& src, const int pos, int &endpos, const int base)
1226{
1227  return src.toULong(pos,endpos,base);
1228}
1229
1230/** Returns a double.  Implements the i18n strtod.  */
1231inline double
1232GBaseString::toDouble(
1233  const int pos, int &endpos ) const
1234{
1235  double retval=(double)0;
1236  if(ptr)
1237  {
1238    retval=(*this)->toDouble(pos, endpos);
1239  }else
1240  {
1241    endpos=(-1);
1242  }
1243  return retval;
1244}
1245
1246inline double
1247GBaseString::toDouble(
1248  const GUTF8String& src, const int pos, int &endpos)
1249{
1250  return src.toDouble(pos,endpos);
1251}
1252
1253inline double
1254GBaseString::toDouble(
1255  const GNativeString& src, const int pos, int &endpos)
1256{
1257  return src.toDouble(pos,endpos);
1258}
1259
1260inline GBaseString &
1261GBaseString::init(const GP<GStringRep> &rep)
1262{ GP<GStringRep>::operator=(rep); init(); return *this;}
1263
1264inline char
1265GBaseString::operator[] (int n) const
1266{ return ((n||ptr)?((*this)->data[CheckSubscript(n)]):0); }
1267
1268inline int
1269GBaseString::search(char c, int from) const
1270{ return ptr?((*this)->search(c,from)):(-1); }
1271
1272inline int
1273GBaseString::search(const char *str, int from) const
1274{ return ptr?((*this)->search(str,from)):(-1); }
1275
1276inline int
1277GBaseString::rsearch(char c, const int from) const
1278{ return ptr?((*this)->rsearch(c,from)):(-1); }
1279
1280inline int
1281GBaseString::rsearch(const char *str, const int from) const
1282{ return ptr?((*this)->rsearch(str,from)):(-1); }
1283
1284inline int
1285GBaseString::contains(const char accept[], const int from) const
1286{ return ptr?((*this)->contains(accept,from)):(-1); }
1287
1288inline int
1289GBaseString::rcontains(const char accept[], const int from) const
1290{ return ptr?((*this)->rcontains(accept,from)):(-1); }
1291
1292inline int
1293GBaseString::cmp(const GBaseString &s2, const int len) const
1294{ return GStringRep::cmp(*this,s2,len); }
1295
1296inline int
1297GBaseString::cmp(const char *s2, const int len) const
1298{ return GStringRep::cmp(*this,s2,len); }
1299
1300inline int
1301GBaseString::cmp(const char s2) const
1302{ return GStringRep::cmp(*this,&s2,1); }
1303
1304inline int
1305GBaseString::cmp(const char *s1, const char *s2, const int len)
1306{ return GStringRep::cmp(s1,s2,len); }
1307
1308inline bool
1309GBaseString::operator==(const GBaseString &s2) const
1310{ return !cmp(s2); }
1311
1312inline bool
1313GBaseString::operator==(const char *s2) const
1314{ return !cmp(s2); }
1315
1316inline bool
1317GBaseString::operator!=(const GBaseString &s2) const
1318{ return !!cmp(s2); }
1319
1320inline bool
1321GBaseString::operator!=(const char *s2) const
1322{ return !!cmp(s2); }
1323
1324inline bool
1325GBaseString::operator>=(const GBaseString &s2) const
1326{ return (cmp(s2)>=0); }
1327
1328inline bool
1329GBaseString::operator>=(const char *s2) const
1330{ return (cmp(s2)>=0); }
1331
1332inline bool
1333GBaseString::operator>=(const char s2) const
1334{ return (cmp(s2)>=0); }
1335
1336inline bool
1337GBaseString::operator<(const GBaseString &s2) const
1338{ return (cmp(s2)<0); }
1339
1340inline bool
1341GBaseString::operator<(const char *s2) const
1342{ return (cmp(s2)<0); }
1343
1344inline bool
1345GBaseString::operator<(const char s2) const
1346{ return (cmp(s2)<0); }
1347
1348inline bool
1349GBaseString::operator> (const GBaseString &s2) const
1350{ return (cmp(s2)>0); }
1351
1352inline bool
1353GBaseString::operator> (const char *s2) const
1354{ return (cmp(s2)>0); }
1355
1356inline bool
1357GBaseString::operator> (const char s2) const
1358{ return (cmp(s2)>0); }
1359
1360inline bool
1361GBaseString::operator<=(const GBaseString &s2) const
1362{ return (cmp(s2)<=0); }
1363
1364inline bool
1365GBaseString::operator<=(const char *s2) const
1366{ return (cmp(s2)<=0); }
1367
1368inline bool
1369GBaseString::operator<=(const char s2) const
1370{ return (cmp(s2)<=0); }
1371
1372inline int
1373GBaseString::nextNonSpace( const int from, const int len ) const
1374{ return ptr?(*this)->nextNonSpace(from,len):0; }
1375
1376inline int
1377GBaseString::nextChar( const int from ) const
1378{ return ptr?(*this)->nextChar(from):0; }
1379
1380inline int
1381GBaseString::nextSpace( const int from, const int len ) const
1382{ return ptr?(*this)->nextSpace(from,len):0; }
1383
1384inline int
1385GBaseString::firstEndSpace( const int from,const int len ) const
1386{ return ptr?(*this)->firstEndSpace(from,len):0; }
1387
1388inline bool
1389GBaseString::is_valid(void) const
1390{ return ptr?((*this)->is_valid()):true; }
1391
1392inline int
1393GBaseString::ncopy(wchar_t * const buf, const int buflen) const
1394{if(buf&&buflen)buf[0]=0;return ptr?((*this)->ncopy(buf,buflen)):0;}
1395
1396inline int
1397GBaseString::CheckSubscript(int n) const
1398{
1399  if(n)
1400  {
1401    if (n<0 && ptr)
1402      n += (*this)->size;
1403    if (n<0 || !ptr || n > (int)(*this)->size)
1404      throw_illegal_subscript();
1405  }
1406  return n;
1407}
1408
1409inline GBaseString::GBaseString(void) { init(); }
1410
1411inline GUTF8String::GUTF8String(void) { }
1412
1413inline GUTF8String::GUTF8String(const GUTF8String &str)
1414{ init(str); }
1415
1416inline GUTF8String& GUTF8String::operator= (const GP<GStringRep> &str)
1417{ return init(str); }
1418
1419inline GUTF8String& GUTF8String::operator= (const GBaseString &str)
1420{ return init(str); }
1421
1422inline GUTF8String& GUTF8String::operator= (const GUTF8String &str)
1423{ return init(str); }
1424
1425inline GUTF8String& GUTF8String::operator= (const GNativeString &str)
1426{ return init(str); }
1427
1428inline GUTF8String
1429GUTF8String::create( const char *buf, const unsigned int bufsize )
1430{
1431#if HAS_WCHAR
1432  return GNativeString(buf,bufsize);
1433#else
1434  return GUTF8String(buf,bufsize);
1435#endif
1436}
1437
1438inline GUTF8String
1439GUTF8String::create( const unsigned short *buf, const unsigned int bufsize )
1440{
1441  return GUTF8String(buf,bufsize);
1442}
1443
1444inline GUTF8String
1445GUTF8String::create( const unsigned long *buf, const unsigned int bufsize )
1446{
1447  return GUTF8String(buf,bufsize);
1448}
1449
1450inline GNativeString::GNativeString(void) {}
1451
1452#if !HAS_WCHAR
1453// For Windows CE, GNativeString is essentially GUTF8String
1454
1455inline
1456GNativeString::GNativeString(const GUTF8String &str)
1457: GUTF8String(str) {}
1458
1459inline
1460GNativeString::GNativeString(const GP<GStringRep> &str)
1461: GUTF8String(str) {}
1462
1463inline
1464GNativeString::GNativeString(const char dat)
1465: GUTF8String(dat) {}
1466
1467inline
1468GNativeString::GNativeString(const char *str)
1469: GUTF8String(str) {}
1470
1471inline
1472GNativeString::GNativeString(const unsigned char *str)
1473: GUTF8String(str) {}
1474
1475inline
1476GNativeString::GNativeString(const unsigned short *str)
1477: GUTF8String(str) {}
1478
1479inline
1480GNativeString::GNativeString(const unsigned long *str)
1481: GUTF8String(str) {}
1482
1483inline
1484GNativeString::GNativeString(const char *dat, unsigned int len)
1485: GUTF8String(dat,len) {}
1486
1487inline
1488GNativeString::GNativeString(const unsigned short *dat, unsigned int len)
1489: GUTF8String(dat,len) {}
1490
1491inline
1492GNativeString::GNativeString(const unsigned long *dat, unsigned int len)
1493: GUTF8String(dat,len) {}
1494
1495inline
1496GNativeString::GNativeString(const GNativeString &str)
1497: GUTF8String(str) {}
1498
1499inline
1500GNativeString::GNativeString(const int number)
1501: GUTF8String(number) {}
1502
1503inline
1504GNativeString::GNativeString(const double number)
1505: GUTF8String(number) {}
1506
1507inline
1508GNativeString::GNativeString(const GNativeString &fmt, va_list &args)
1509: GUTF8String(fmt,args) {}
1510
1511#else // HAS_WCHAR
1512
1513/// Initialize this string class
1514inline void
1515GNativeString::init(void)
1516{ GBaseString::init(); }
1517
1518/// Initialize this string class
1519inline GNativeString &
1520GNativeString::init(const GP<GStringRep> &rep)
1521{
1522  GP<GStringRep>::operator=(rep?rep->toNative(GStringRep::NOT_ESCAPED):rep);
1523  init();
1524  return *this;
1525}
1526
1527inline GNativeString
1528GNativeString::substr(int from, int len) const
1529{ return GNativeString(*this, from, len); }
1530
1531inline GNativeString &
1532GNativeString::vformat(const GNativeString &fmt, va_list &args)
1533{ return (*this = (fmt.ptr?GNativeString(fmt,args):fmt)); }
1534
1535inline GNativeString
1536GNativeString::toEscaped( const bool tosevenbit ) const
1537{ return ptr?GNativeString((*this)->toEscaped(tosevenbit)):(*this); }
1538
1539inline
1540GNativeString::GNativeString(const GUTF8String &str)
1541{
1542  if (str.length())
1543    init(str->toNative(GStringRep::NOT_ESCAPED));
1544  else
1545    init((GP<GStringRep>)str);
1546}
1547
1548inline
1549GNativeString::GNativeString(const GP<GStringRep> &str)
1550{
1551  if (str)
1552    init(str->toNative(GStringRep::NOT_ESCAPED));
1553  else
1554    init(str);
1555}
1556
1557inline
1558GNativeString::GNativeString(const GBaseString &str)
1559{
1560  if (str.length())
1561    init(str->toNative(GStringRep::NOT_ESCAPED));
1562  else
1563    init((GP<GStringRep>)str);
1564}
1565
1566
1567inline
1568GNativeString::GNativeString(const GNativeString &fmt, va_list &args)
1569{
1570  if (fmt.ptr)
1571    init(fmt->vformat(args));
1572  else
1573    init(fmt);
1574}
1575
1576inline GNativeString
1577GNativeString::create( const char *buf, const unsigned int bufsize )
1578{
1579  return GNativeString(buf,bufsize);
1580}
1581
1582inline GNativeString
1583GNativeString::create( const unsigned short *buf, const unsigned int bufsize )
1584{
1585  return GNativeString(buf,bufsize);
1586}
1587
1588inline GNativeString
1589GNativeString::create( const unsigned long *buf, const unsigned int bufsize )
1590{
1591  return GNativeString(buf,bufsize);
1592}
1593
1594inline GNativeString&
1595GNativeString::operator= (const GP<GStringRep> &str)
1596{ return init(str); }
1597
1598inline GNativeString&
1599GNativeString::operator= (const GBaseString &str)
1600{ return init(str); }
1601
1602inline GNativeString&
1603GNativeString::operator= (const GUTF8String &str)
1604{ return init(str); }
1605
1606inline GNativeString&
1607GNativeString::operator= (const GNativeString &str)
1608{ return init(str); }
1609
1610inline GNativeString
1611GNativeString::upcase( void ) const
1612{
1613  if (ptr) return (*this)->upcase();
1614  return *this;
1615}
1616
1617inline GNativeString
1618GNativeString::downcase( void ) const
1619{
1620  if (ptr) return (*this)->downcase();
1621  return *this;
1622}
1623
1624#endif // HAS_WCHAR
1625
1626inline bool
1627operator==(const char *s1, const GBaseString &s2)
1628{ return !s2.cmp(s1); }
1629
1630inline bool
1631operator!=(const char *s1, const GBaseString &s2)
1632{ return !!s2.cmp(s1); }
1633
1634inline bool
1635operator>=(const char    *s1, const GBaseString &s2)
1636{ return (s2.cmp(s1)<=0); }
1637
1638inline bool
1639operator>=(const char s1, const GBaseString &s2)
1640{ return (s2.cmp(s1)<=0); }
1641
1642inline bool
1643operator<(const char *s1, const GBaseString &s2)
1644{ return (s2.cmp(s1)>0); }
1645
1646inline bool
1647operator<(const char s1, const GBaseString &s2)
1648{ return (s2.cmp(s1)>0); }
1649
1650inline bool
1651operator> (const char    *s1, const GBaseString &s2)
1652{ return (s2.cmp(s1)<0); }
1653
1654inline bool
1655operator> (const char s1, const GBaseString &s2)
1656{ return (s2.cmp(s1)<0); }
1657
1658inline bool
1659operator<=(const char    *s1, const GBaseString &s2)
1660{ return !(s1>s2); }
1661
1662inline bool
1663operator<=(const char    s1, const GBaseString &s2)
1664{ return !(s1>s2); }
1665
1666// ------------------- The end
1667
1668
1669#ifdef HAVE_NAMESPACES
1670}
1671# ifndef NOT_USING_DJVU_NAMESPACE
1672using namespace DJVU;
1673# endif
1674#endif
1675#endif
1676
Note: See TracBrowser for help on using the repository browser.