source: trunk/libdjvu/GString.h @ 206

Last change on this file since 206 was 206, checked in by Eugene Romanenko, 14 years ago

DJVU plugin: djvulibre updated to version 3.5.19

File size: 56.8 KB
Line 
1//C-  -*- C++ -*-
2//C- -------------------------------------------------------------------
3//C- DjVuLibre-3.5
4//C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
5//C- Copyright (c) 2001  AT&T
6//C-
7//C- This software is subject to, and may be distributed under, the
8//C- GNU General Public License, either Version 2 of the license,
9//C- or (at your option) any later version. The license should have
10//C- accompanied the software or you may obtain a copy of the license
11//C- from the Free Software Foundation at http://www.fsf.org .
12//C-
13//C- This program is distributed in the hope that it will be useful,
14//C- but WITHOUT ANY WARRANTY; without even the implied warranty of
15//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16//C- GNU General Public License for more details.
17//C-
18//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
19//C- Lizardtech Software.  Lizardtech Software has authorized us to
20//C- replace the original DjVu(r) Reference Library notice by the following
21//C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
22//C-
23//C-  ------------------------------------------------------------------
24//C- | DjVu (r) Reference Library (v. 3.5)
25//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
26//C- | The DjVu Reference Library is protected by U.S. Pat. No.
27//C- | 6,058,214 and patents pending.
28//C- |
29//C- | This software is subject to, and may be distributed under, the
30//C- | GNU General Public License, either Version 2 of the license,
31//C- | or (at your option) any later version. The license should have
32//C- | accompanied the software or you may obtain a copy of the license
33//C- | from the Free Software Foundation at http://www.fsf.org .
34//C- |
35//C- | The computer code originally released by LizardTech under this
36//C- | license and unmodified by other parties is deemed "the LIZARDTECH
37//C- | ORIGINAL CODE."  Subject to any third party intellectual property
38//C- | claims, LizardTech grants recipient a worldwide, royalty-free,
39//C- | non-exclusive license to make, use, sell, or otherwise dispose of
40//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
41//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
42//C- | General Public License.   This grant only confers the right to
43//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
44//C- | the extent such infringement is reasonably necessary to enable
45//C- | recipient to make, have made, practice, sell, or otherwise dispose
46//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
47//C- | any greater extent that may be necessary to utilize further
48//C- | modifications or combinations.
49//C- |
50//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
51//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
52//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
53//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
54//C- +------------------------------------------------------------------
55//
56// $Id: GString.h,v 1.21 2007/03/25 20:48:32 leonb Exp $
57// $Name: release_3_5_19 $
58
59#ifndef _GSTRING_H_
60#define _GSTRING_H_
61#ifdef HAVE_CONFIG_H
62#include "config.h"
63#endif
64#if NEED_GNUG_PRAGMAS
65# pragma interface
66#endif
67
68/** @name GString.h
69
70    Files #"GString.h"# and #"GString.cpp"# implement a general
71    purpose string class \Ref{GBaseString}, with dirived types
72    \Ref{GUTF8String} and \Ref{GNativeString} for UTF8 MBS encoding
73    and the current Native MBS encoding respectively.  This
74    implementation relies on smart pointers (see
75    \Ref{GSmartPointer.h}).
76
77    {\bf Historical Comments} --- At some point during the DjVu
78    research era, it became clear that C++ compilers rarely provided
79    portable libraries. We then decided to avoid fancy classes (like
80    #iostream# or #string#) and to rely only on the good old C
81    library.  A good string class however is very useful.  We had
82    already randomly picked letter 'G' to prefix class names and we
83    logically derived the new class name.  Native English speakers
84    kept laughing in hiding.  This is ironic because we completely
85    forgot this letter 'G' when creating more challenging things
86    like the ZP Coder or the IW44 wavelets. 
87
88    {\bf Later Changes}
89    When converting to I18N, we (Lizardtech) decided that two string classes
90    where needing, replacing the original GString with \Ref{GUTF8String} and
91    \Ref{GNativeString}.
92
93    @memo
94    General purpose string class.
95    @author
96    L\'eon Bottou <leonb@research.att.com> -- initial implementation.\\
97
98// From: Leon Bottou, 1/31/2002
99// This file has very little to do with my initial implementation.
100// It has been practically rewritten by Lizardtech for i18n changes.
101// My original implementation was very small in comparison
102// <http://prdownloads.sourceforge.net/djvu/DjVu2_2b-src.tgz>.
103// In my opinion, the duplication of the string classes is a failed
104// attempt to use the type system to enforce coding policies.
105// This could be fixed.  But there are better things to do in djvulibre.
106   
107    @version
108    #$Id: GString.h,v 1.21 2007/03/25 20:48:32 leonb Exp $# */
109//@{
110
111
112#include "DjVuGlobal.h"
113#include "GContainer.h"
114
115#include <stdlib.h>
116#include <stdarg.h>
117#ifdef WIN32
118# include <windows.h>
119# define HAS_WCHAR 1
120# define HAS_MBSTATE 1
121#endif
122
123#if HAS_WCHAR
124# if !defined(AUTOCONF) || HAVE_WCHAR_H
125#  include <wchar.h>
126# endif
127#endif
128
129
130#ifdef HAVE_NAMESPACES
131namespace DJVU {
132# ifdef NOT_DEFINED // Just to fool emacs c++ mode
133}
134#endif
135#endif
136
137#if !HAS_MBSTATE
138# ifndef HAVE_MBSTATE_T
139typedef int mbstate_t;
140# endif
141#endif
142
143class GBaseString;
144class GUTF8String;
145class GNativeString;
146
147// Internal string representation.
148class GStringRep : public GPEnabled
149{
150public:
151  enum EncodeType { XUCS4, XUCS4BE, XUCS4LE, XUCS4_2143, XUCS4_3412,
152    XUTF16, XUTF16BE, XUTF16LE, XUTF8, XEBCDIC, XOTHER } ;
153
154  enum EscapeMode { UNKNOWN_ESCAPED=0,  IS_ESCAPED=1, NOT_ESCAPED=2 };
155
156  class UTF8;
157  friend class UTF8;
158  class Unicode;
159  friend class Unicode;
160
161  class ChangeLocale;
162#if HAS_WCHAR
163  class Native;
164  friend class Native;
165#endif // HAS_WCHAR
166  friend class GBaseString;
167  friend class GUTF8String;
168  friend class GNativeString;
169  friend unsigned int hash(const GBaseString &ref);
170
171public:
172  // default constructor
173  GStringRep(void);
174  // virtual destructor
175  virtual ~GStringRep();
176
177    // Other virtual methods.
178      // Create an empty string.
179  virtual GP<GStringRep> blank(const unsigned int sz) const = 0;
180      // Create a duplicate at the given size.
181  GP<GStringRep>  getbuf(int n) const;
182      // Change the value of one of the bytes.
183  GP<GStringRep> setat(int n, char ch) const;
184      // Append a string.
185  virtual GP<GStringRep> append(const GP<GStringRep> &s2) const = 0;
186      // Test if isUTF8.
187  virtual bool isUTF8(void) const { return false; }
188      // Test if Native.
189  virtual bool isNative(void) const { return false; }
190      // Convert to Native.
191  virtual GP<GStringRep> toNative(
192    const EscapeMode escape=UNKNOWN_ESCAPED ) const = 0;
193      // Convert to UTF8.
194  virtual GP<GStringRep> toUTF8(const bool nothrow=false) const = 0;
195      // Convert to same as current class.
196  virtual GP<GStringRep> toThis(
197    const GP<GStringRep> &rep,const GP<GStringRep> &locale=0) const = 0;
198      // Compare with #s2#.
199  virtual int cmp(const GP<GStringRep> &s2,const int len=(-1)) const = 0;
200
201  // Convert strings to numbers.
202  virtual int toInt(void) const = 0;
203  virtual long int toLong(
204    const int pos, int &endpos, const int base=10) const = 0;
205  virtual unsigned long toULong(
206    const int pos, int &endpos, const int base=10) const = 0;
207  virtual double toDouble(const int pos, int &endpos) const = 0;
208
209  // return the position of the next character
210  int nextChar( const int from=0 ) const;
211
212  // return next non space position
213  int nextNonSpace( const int from=0, const int len=(-1) ) const;
214
215  // return next white space position
216  int nextSpace( const int from=0, const int len=(-1) ) const;
217
218  // return the position after the last non-whitespace character.
219  int firstEndSpace( int from=0, const int len=(-1) ) const;
220
221    // Create an empty string.
222  template <class TYPE> static GP<GStringRep> create(
223    const unsigned int sz,TYPE *);
224    // Creates with a strdup string.
225  GP<GStringRep> strdup(const char *s) const;
226
227    // Creates by appending to the current string
228  GP<GStringRep> append(const char *s2) const;
229
230    // Creates with a concat operation.
231  GP<GStringRep> concat(const GP<GStringRep> &s1,const GP<GStringRep> &s2) const;
232  GP<GStringRep> concat(const char *s1,const GP<GStringRep> &s2) const;
233  GP<GStringRep> concat(const GP<GStringRep> &s1,const char *s2) const;
234  GP<GStringRep> concat(const char *s1,const char *s2) const;
235
236   /* Creates with a strdup and substr.  Negative values have strlen(s)+1
237      added to them.
238   */
239  GP<GStringRep> substr(
240    const char *s,const int start,const int length=(-1)) const;
241
242  GP<GStringRep> substr(
243    const unsigned short *s,const int start,const int length=(-1)) const;
244
245  GP<GStringRep> substr(
246    const unsigned long *s,const int start,const int length=(-1)) const;
247
248  /** Initializes a string with a formatted string (as in #vprintf#).  The
249      string is re-initialized with the characters generated according to the
250      specified format #fmt# and using the optional arguments.  See the ANSI-C
251      function #vprintf()# for more information. The current implementation
252      will cause a segmentation violation if the resulting string is longer
253      than 32768 characters. */
254  GP<GStringRep> vformat(va_list args) const;
255  // -- SEARCHING
256
257  static GP<GStringRep> UTF8ToNative( const char *s,
258    const EscapeMode escape=UNKNOWN_ESCAPED );
259  static GP<GStringRep> NativeToUTF8( const char *s );
260
261  // Creates an uppercase version of the current string.
262  GP<GStringRep> upcase(void) const;
263  // Creates a lowercase version of the current string.
264  GP<GStringRep> downcase(void) const;
265
266  /** Returns the next UCS4 character, and updates the pointer s. */
267  static unsigned long UTF8toUCS4(
268    unsigned char const *&s, void const * const endptr );
269
270  /** Returns the number of bytes in next UCS4 character,
271      and sets #w# to the next UCS4 chacter.  */
272  static int UTF8toUCS4(
273    unsigned long &w, unsigned char const s[], void const * const endptr )
274  { unsigned char const *r=s;w=UTF8toUCS4(r,endptr);return (int)((size_t)r-(size_t)s); }
275
276  /** Returns the next UCS4 word from the UTF16 string. */
277  static int UTF16toUCS4(
278     unsigned long &w, unsigned short const * const s,void const * const eptr);
279
280  static int UCS4toUTF16(
281    unsigned long w, unsigned short &w1, unsigned short &w2);
282
283  int cmp(const char *s2, const int len=(-1)) const;
284  static int cmp(
285    const GP<GStringRep> &s1, const GP<GStringRep> &s2, const int len=(-1)) ;
286  static int cmp(
287    const GP<GStringRep> &s1, const char *s2, const int len=(-1));
288  static int cmp(
289    const char *s1, const GP<GStringRep> &s2, const int len=(-1));
290  static int cmp(
291    const char *s1, const char *s2, const int len=(-1));
292
293  // Lookup the next character, and return the position of the next character.
294  int getUCS4(unsigned long &w, const int from) const;
295
296  virtual unsigned char *UCS4toString(
297    const unsigned long w, unsigned char *ptr, mbstate_t *ps=0) const = 0;
298
299  static unsigned char *UCS4toUTF8(
300    const unsigned long w,unsigned char *ptr);
301
302  static unsigned char *UCS4toNative(
303    const unsigned long w,unsigned char *ptr, mbstate_t *ps);
304
305  int search(char c, int from=0) const;
306
307  int search(char const *str, int from=0) const;
308
309  int rsearch(char c, int from=0) const;
310
311  int rsearch(char const *str, int from=0) const;
312
313  int contains(char const accept[], int from=0) const;
314
315  int rcontains(char const accept[], int from=0) const;
316
317protected:
318  // Return the next character and increment the source pointer.
319  virtual unsigned long getValidUCS4(const char *&source) const = 0;
320
321  GP<GStringRep> tocase(
322    bool (*xiswcase)(const unsigned long wc),
323    unsigned long (*xtowcase)(const unsigned long wc)) const;
324
325  // Tests if the specified character passes the xiswtest.  If so, the
326  // return pointer is incremented to the next character, otherwise the
327  // specified #ptr# is returned.
328  const char * isCharType( bool (*xiswtest)(const unsigned long wc), const char *ptr,
329    const bool reverse=false) const;
330
331  // Find the next character position that passes the isCharType test.
332  int nextCharType(
333    bool (*xiswtest)(const unsigned long wc),const int from,const int len,
334    const bool reverse=false) const;
335
336  static bool giswspace(const unsigned long w);
337  static bool giswupper(const unsigned long w);
338  static bool giswlower(const unsigned long w);
339  static unsigned long gtowupper(const unsigned long w);
340  static unsigned long gtowlower(const unsigned long w);
341
342  virtual void set_remainder( void const * const buf, const unsigned int size,
343    const EncodeType encodetype);
344  virtual void set_remainder( void const * const buf, const unsigned int size,
345    const GP<GStringRep> &encoding );
346  virtual void set_remainder ( const GP<Unicode> &remainder );
347
348  virtual GP<Unicode> get_remainder( void ) const;
349
350public:
351  /* Returns a copy of this string with characters used in XML with
352      '<'  to "&lt;", '>'  to "&gt;",  '&' to "&amp;" '\'' to
353      "&apos;", and  '\"' to  "&quot;".   Characters 0x01 through
354      0x1f are also escaped. */
355  GP<GStringRep> toEscaped( const bool tosevenbit ) const;
356
357  // Tests if a string is legally encoded in the current character set.
358  virtual bool is_valid(void) const = 0;
359
360  virtual int ncopy(wchar_t * const buf, const int buflen) const = 0;
361
362protected:
363
364// Actual string data.
365  int  size;
366  char *data;
367};
368
369class GStringRep::UTF8 : public GStringRep
370{
371public:
372  // default constructor
373  UTF8(void);
374  // virtual destructor
375  virtual ~UTF8();
376
377    // Other virtual methods.
378  virtual GP<GStringRep> blank(const unsigned int sz = 0) const;
379  virtual GP<GStringRep> append(const GP<GStringRep> &s2) const;
380      // Test if Native.
381  virtual bool isUTF8(void) const;
382      // Convert to Native.
383  virtual GP<GStringRep> toNative(
384    const EscapeMode escape=UNKNOWN_ESCAPED) const;
385      // Convert to UTF8.
386  virtual GP<GStringRep> toUTF8(const bool nothrow=false) const;
387      // Convert to same as current class.
388  virtual GP<GStringRep> toThis(
389    const GP<GStringRep> &rep,const GP<GStringRep> &) const;
390      // Compare with #s2#.
391  virtual int cmp(const GP<GStringRep> &s2,const int len=(-1)) const;
392
393  static GP<GStringRep> create(const unsigned int sz = 0);
394
395  // Convert strings to numbers.
396  virtual int toInt(void) const;
397  virtual long int toLong(
398    const int pos, int &endpos, const int base=10) const;
399  virtual unsigned long toULong(
400    const int pos, int &endpos, const int base=10) const;
401  virtual double toDouble(
402    const int pos, int &endpos) const;
403
404    // Create a strdup string.
405  static GP<GStringRep> create(const char *s);
406
407   // Creates with a concat operation.
408  static GP<GStringRep> create(
409    const GP<GStringRep> &s1,const GP<GStringRep> &s2);
410  static GP<GStringRep> create( const GP<GStringRep> &s1,const char *s2);
411  static GP<GStringRep> create( const char *s1, const GP<GStringRep> &s2);
412  static GP<GStringRep> create( const char *s1,const char *s2);
413
414    // Create with a strdup and substr operation.
415  static GP<GStringRep> create(
416    const char *s,const int start,const int length=(-1));
417
418  static GP<GStringRep> create(
419    const unsigned short *s,const int start,const int length=(-1));
420
421  static GP<GStringRep> create(
422    const unsigned long *s,const int start,const int length=(-1));
423
424  static GP<GStringRep> create_format(const char fmt[],...);
425  static GP<GStringRep> create(const char fmt[],va_list& args);
426
427  virtual unsigned char *UCS4toString(
428    const unsigned long w,unsigned char *ptr, mbstate_t *ps=0) const;
429
430  // Tests if a string is legally encoded in the current character set.
431  virtual bool is_valid(void) const;
432
433  virtual int ncopy(wchar_t * const buf, const int buflen) const;
434
435  friend class GBaseString;
436
437protected:
438  // Return the next character and increment the source pointer.
439  virtual unsigned long getValidUCS4(const char *&source) const;
440};
441
442
443/** General purpose character string.
444    Each dirivied instance of class #GBaseString# represents a
445    character string.  Overloaded operators provide a value semantic
446    to #GBaseString# objects.  Conversion operators and constructors
447    transparently convert between #GBaseString# objects and
448    #const char*# pointers.  The #GBaseString# class has no public
449    constructors, since a dirived type should always be used
450    to specify the desired multibyte character encoding.
451
452    Functions taking strings as arguments should declare their
453    arguments as "#const char*#".  Such functions will work equally
454    well with dirived #GBaseString# objects since there is a fast
455    conversion operator from the dirivied #GBaseString# objects
456    to "#const char*#".  Functions returning strings should return
457    #GUTF8String# or #GNativeString# objects because the class will
458    automatically manage the necessary memory.
459
460    Characters in the string can be identified by their position.  The
461    first character of a string is numbered zero. Negative positions
462    represent characters relative to the end of the string (i.e.
463    position #-1# accesses the last character of the string,
464    position #-2# represents the second last character, etc.)  */
465
466class GBaseString : protected GP<GStringRep>
467{
468public:
469  enum EscapeMode {
470    UNKNOWN_ESCAPED=GStringRep::UNKNOWN_ESCAPED,
471    IS_ESCAPED=GStringRep::IS_ESCAPED,
472    NOT_ESCAPED=GStringRep::NOT_ESCAPED };
473
474  friend class GUTF8String;
475  friend class GNativeString;
476protected:
477  // Sets the gstr pointer;
478  void init(void);
479
480  ~GBaseString();
481  GBaseString &init(const GP<GStringRep> &rep);
482
483  // -- CONSTRUCTORS
484  /** Null constructor. Constructs an empty string. */
485  GBaseString( void );
486
487public:
488  // -- ACCESS
489  /** Converts a string into a constant null terminated character
490      array.  This conversion operator is very efficient because
491      it simply returns a pointer to the internal string data. The
492      returned pointer remains valid as long as the string is
493      unmodified. */
494  operator const char* ( void ) const  ;
495  /// Returns the string length.
496  unsigned int length( void ) const;
497  /** Returns true if and only if the string contains zero characters.
498      This operator is useful for conditional expression in control
499      structures.
500      \begin{verbatim}
501         if (! str) { ... }
502         while (!! str) { ... }  -- Note the double operator!
503      \end{verbatim}
504      Class #GBaseString# does not to support syntax
505      "#if# #(str)# #{}#" because the required conversion operator
506      introduces dangerous ambiguities with certain compilers. */
507  bool operator! ( void ) const;
508
509  // -- INDEXING
510  /** Returns the character at position #n#. An exception
511      \Ref{GException} is thrown if number #n# is not in range #-len#
512      to #len-1#, where #len# is the length of the string.  The first
513      character of a string is numbered zero.  Negative positions
514      represent characters relative to the end of the string. */
515  char operator[] (int n) const;
516  /// Returns #TRUE# if the string contains an integer number.
517  bool is_int(void) const;
518  /// Returns #TRUE# if the string contains a float number.
519  bool is_float(void) const;
520
521  /** Converts strings between native & UTF8 **/
522  GNativeString getUTF82Native( EscapeMode escape=UNKNOWN_ESCAPED ) const;
523  GUTF8String getNative2UTF8( void ) const;
524
525  // -- ALTERING
526  /// Reinitializes a string with the null string.
527  void empty( void );
528  // -- SEARCHING
529  /** Searches character #c# in the string, starting at position
530      #from# and scanning forward until reaching the end of the
531      string.  This function returns the position of the matching
532      character.  It returns #-1# if character #c# cannot be found. */
533  int search(char c, int from=0) const;
534
535  /** Searches sub-string #str# in the string, starting at position
536      #from# and scanning forward until reaching the end of the
537      string.  This function returns the position of the first
538      matching character of the sub-string.  It returns #-1# if
539      string #str# cannot be found. */
540  int search(const char *str, int from=0) const;
541
542  /** Searches character #c# in the string, starting at position
543      #from# and scanning backwards until reaching the beginning of
544      the string.  This function returns the position of the matching
545      character.  It returns #-1# if character #c# cannot be found. */
546  int rsearch(char c, const int from=0) const;
547  /** Searches sub-string #str# in the string, starting at position
548      #from# and scanning backwards until reaching the beginning of
549      the string.  This function returns the position of the first
550      matching character of the sub-string. It returns #-1# if
551      string #str# cannot be found. */
552  int rsearch(const char *str, const int from=0) const;
553  /** Searches for any of the specified characters in the accept
554      string.  It returns #-1# if the none of the characters and
555      be found, otherwise the position of the first match. */
556  int contains(const char accept[], const int from=0) const;
557  /** Searches for any of the specified characters in the accept
558      string.  It returns #-1# if the none of the characters and be
559      found, otherwise the position of the last match. */
560  int rcontains(const char accept[], const int from=0) const;
561
562  /** Concatenates strings. Returns a string composed by concatenating
563      the characters of strings #s1# and #s2#. */
564  GUTF8String operator+(const GUTF8String &s2) const;
565  GNativeString operator+(const GNativeString &s2) const;
566
567  /** Returns an integer.  Implements i18n atoi.  */
568  int toInt(void) const;
569
570  /** Returns a long intenger.  Implments i18n strtol.  */
571  long toLong(const int pos, int &endpos, const int base=10) const;
572
573  /** Returns a unsigned long integer.  Implements i18n strtoul. */
574  unsigned long toULong(
575    const int pos, int &endpos, const int base=10) const;
576
577  /** Returns a double.  Implements the i18n strtod.  */
578  double toDouble(
579    const int pos, int &endpos ) const;
580
581  /** Returns a long intenger.  Implments i18n strtol.  */
582  static long toLong(
583    const GUTF8String& src, const int pos, int &endpos, const int base=10);
584
585  static unsigned long toULong(
586    const GUTF8String& src, const int pos, int &endpos, const int base=10);
587
588  static double toDouble(
589    const GUTF8String& src, const int pos, int &endpos);
590
591  /** Returns a long intenger.  Implments i18n strtol.  */
592  static long toLong(
593    const GNativeString& src, const int pos, int &endpos, const int base=10);
594
595  static unsigned long toULong(
596    const GNativeString& src, const int pos, int &endpos, const int base=10);
597
598  static double toDouble(
599    const GNativeString& src, const int pos, int &endpos);
600
601  // -- HASHING
602
603  // -- COMPARISONS
604    /** Returns an #int#.  Compares string with #s2# and returns
605        sorting order. */
606  int cmp(const GBaseString &s2, const int len=(-1)) const;
607    /** Returns an #int#.  Compares string with #s2# and returns
608        sorting order. */
609  int cmp(const char *s2, const int len=(-1)) const;
610    /** Returns an #int#.  Compares string with #s2# and returns
611        sorting order. */
612  int cmp(const char s2) const;
613    /** Returns an #int#.  Compares #s2# with #s2# and returns
614        sorting order. */
615  static int cmp(const char *s1, const char *s2, const int len=(-1));
616  /** Returns a boolean. The Standard C strncmp takes two string and
617      compares the first N characters.  static bool GBaseString::ncmp
618      will compare #s1# with #s2# with the #len# characters starting
619      from the beginning of the string. */
620  /** String comparison. Returns true if and only if character
621      strings #s1# and #s2# are equal (as with #strcmp#.)
622    */
623  bool operator==(const GBaseString &s2) const;
624  bool operator==(const char *s2) const;
625  friend bool operator==(const char    *s1, const GBaseString &s2);
626
627  /** String comparison. Returns true if and only if character
628      strings #s1# and #s2# are not equal (as with #strcmp#.)
629    */
630  bool operator!=(const GBaseString &s2) const;
631  bool operator!=(const char *s2) const;
632  friend bool operator!=(const char *s1, const GBaseString &s2);
633
634  /** String comparison. Returns true if and only if character
635      strings #s1# is lexicographically greater than or equal to
636      string #s2# (as with #strcmp#.) */
637  bool operator>=(const GBaseString &s2) const;
638  bool operator>=(const char *s2) const;
639  bool operator>=(const char s2) const;
640  friend bool operator>=(const char    *s1, const GBaseString &s2);
641  friend bool operator>=(const char s1, const GBaseString &s2);
642
643  /** String comparison. Returns true if and only if character
644      strings #s1# is lexicographically less than string #s2#
645      (as with #strcmp#.)
646   */
647  bool operator<(const GBaseString &s2) const;
648  bool operator<(const char *s2) const;
649  bool operator<(const char s2) const;
650  friend bool operator<(const char *s1, const GBaseString &s2);
651  friend bool operator<(const char s1, const GBaseString &s2);
652
653  /** String comparison. Returns true if and only if character
654      strings #s1# is lexicographically greater than string #s2#
655      (as with #strcmp#.)
656   */
657  bool operator> (const GBaseString &s2) const;
658  bool operator> (const char *s2) const;
659  bool operator> (const char s2) const;
660  friend bool operator> (const char    *s1, const GBaseString &s2);
661  friend bool operator> (const char s1, const GBaseString &s2);
662
663  /** String comparison. Returns true if and only if character
664      strings #s1# is lexicographically less than or equal to string
665      #s2# (as with #strcmp#.)
666   */
667  bool operator<=(const GBaseString &s2) const;
668  bool operator<=(const char *s2) const;
669  bool operator<=(const char s2) const;
670  friend bool operator<=(const char    *s1, const GBaseString &s2);
671  friend bool operator<=(const char    s1, const GBaseString &s2);
672
673   /** Returns an integer.  Implements a functional i18n atoi. Note
674       that if you pass a GBaseString that is not in Native format
675       the results may be disparaging. */
676
677  /** Returns a hash code for the string.  This hashing function
678      helps when creating associative maps with string keys (see
679      \Ref{GMap}).  This hash code may be reduced to an arbitrary
680      range by computing its remainder modulo the upper bound of
681      the range. */
682  friend unsigned int hash(const GBaseString &ref);
683  // -- HELPERS
684  friend class GStringRep;
685
686  /// Returns next non space position.
687  int nextNonSpace( const int from=0, const int len=(-1) ) const;
688
689  /// Returns next character position.
690  int nextChar( const int from=0 ) const;
691
692  /// Returns next non space position.
693  int nextSpace( const int from=0, const int len=(-1) ) const;
694
695  /// return the position after the last non-whitespace character.
696  int firstEndSpace( const int from=0,const int len=(-1) ) const;
697
698  /// Tests if the string is legally encoded in the current codepage.
699  bool is_valid(void) const;
700
701  /// copy to a wchar_t buffer
702  int ncopy(wchar_t * const buf, const int buflen) const;
703
704protected:
705  const char *gstr;
706  static void throw_illegal_subscript() no_return;
707  static const char *nullstr;
708public:
709  GNativeString UTF8ToNative(
710    const bool currentlocale=false,
711    const EscapeMode escape=UNKNOWN_ESCAPED) const;
712  GUTF8String NativeToUTF8(void) const;
713protected:
714  int CheckSubscript(int n) const;
715};
716
717/** General purpose character string.
718    Each instance of class #GUTF8String# represents a character
719    string.  Overloaded operators provide a value semantic to
720    #GUTF8String# objects.  Conversion operators and constructors
721    transparently convert between #GUTF8String# objects and
722    #const char*# pointers.
723
724    Functions taking strings as arguments should declare their
725    arguments as "#const char*#".  Such functions will work equally
726    well with #GUTF8String# objects since there is a fast conversion
727    operator from #GUTF8String# to "#const char*#".  Functions
728    returning strings should return #GUTF8String# or #GNativeString#
729    objects because the class will automatically manage the necessary
730    memory.
731
732    Characters in the string can be identified by their position.  The
733    first character of a string is numbered zero. Negative positions
734    represent characters relative to the end of the string (i.e.
735    position #-1# accesses the last character of the string,
736    position #-2# represents the second last character, etc.)  */
737
738class GUTF8String : public GBaseString
739{
740public:
741  ~GUTF8String();
742  void init(void);
743
744  GUTF8String &init(const GP<GStringRep> &rep);
745
746  // -- CONSTRUCTORS
747  /** Null constructor. Constructs an empty string. */
748  GUTF8String(void);
749  /// Constructs a string from a character.
750  GUTF8String(const char dat);
751  /// Constructs a string from a null terminated character array.
752  GUTF8String(const char *str);
753  /// Constructs a string from a null terminated character array.
754  GUTF8String(const unsigned char *str);
755  GUTF8String(const unsigned short *dat);
756  GUTF8String(const unsigned long *dat);
757  /** Constructs a string from a character array.  Elements of the
758      character array #dat# are added into the string until the
759      string length reaches #len# or until encountering a null
760      character (whichever comes first). */
761  GUTF8String(const char *dat, unsigned int len);
762  GUTF8String(const unsigned short *dat, unsigned int len);
763  GUTF8String(const unsigned long *dat, unsigned int len);
764
765  /// Construct from base class.
766  GUTF8String(const GP<GStringRep> &str);
767  GUTF8String(const GBaseString &str);
768  GUTF8String(const GUTF8String &str);
769  GUTF8String(const GNativeString &str);
770  /** Constructs a string from a character array.  Elements of the
771      character array #dat# are added into the string until the
772      string length reaches #len# or until encountering a null
773      character (whichever comes first). */
774  GUTF8String(const GBaseString &gs, int from, int len);
775
776  /** Copy a null terminated character array. Resets this string
777      with the character string contained in the null terminated
778      character array #str#. */
779  GUTF8String& operator= (const char str);
780  GUTF8String& operator= (const char *str);
781  GUTF8String& operator= (const GP<GStringRep> &str);
782  GUTF8String& operator= (const GBaseString &str);
783  GUTF8String& operator= (const GUTF8String &str);
784  GUTF8String& operator= (const GNativeString &str);
785
786  /** Constructs a string with a formatted string (as in #vprintf#).
787      The string is re-initialized with the characters generated
788      according to the specified format #fmt# and using the optional
789      arguments.  See the ANSI-C function #vprintf()# for more
790      information. The current implementation will cause a
791      segmentation violation if the resulting string is longer
792      than 32768 characters. */
793  GUTF8String(const GUTF8String &fmt, va_list &args);
794
795  /// Constructs a string from a character.
796  /** Constructs a string with a human-readable representation of
797      integer #number#.  The format is similar to format #"%d"# in
798      function #printf#. */
799  GUTF8String(const int number);
800
801  /** Constructs a string with a human-readable representation of
802      floating point number #number#. The format is similar to
803      format #"%f"# in function #printf#.  */
804  GUTF8String(const double number);
805
806
807  /** Initializes a string with a formatted string (as in #printf#).
808      The string is re-initialized with the characters generated
809      according to the specified format #fmt# and using the optional
810      arguments.  See the ANSI-C function #printf()# for more
811      information. The current implementation will cause a
812      segmentation violation if the resulting string is longer
813      than 32768 characters. */
814  GUTF8String &format(const char *fmt, ... );
815  /** Initializes a string with a formatted string (as in #vprintf#).
816      The string is re-initialized with the characters generated
817      according to the specified format #fmt# and using the optional
818      arguments.  See the ANSI-C function #vprintf()# for more
819      information. The current implementation will cause a
820      segmentation violation if the resulting string is longer
821      than 32768 characters. */
822  GUTF8String &vformat(const GUTF8String &fmt, va_list &args);
823
824  /** Returns a copy of this string with characters used in XML with
825      '<'  to "&lt;", '>'  to "&gt;",  '&' to "&amp;" '\'' to
826      "&apos;", and  '\"' to  "&quot;".   Characters 0x01 through
827      0x1f are also escaped. */
828  GUTF8String toEscaped( const bool tosevenbit=false ) const;
829
830  /** Converts strings containing HTML/XML escaped characters into
831      their unescaped forms. Numeric representations of characters
832      (e.g., "&#38;" or "&#x26;" for "*") are the only forms
833      converted by this function. */
834  GUTF8String fromEscaped( void ) const;
835
836  /** Converts strings containing HTML/XML escaped characters
837      (e.g., "&lt;" for "<") into their unescaped forms. The
838      conversion is partially defined by the ConvMap argument which
839      specifies the conversion strings to be recognized. Numeric
840      representations of characters (e.g., "&#38;" or "&#x26;"
841      for "*") are always converted. */
842  GUTF8String fromEscaped(
843    const GMap<GUTF8String,GUTF8String> ConvMap ) const;
844
845
846  // -- CONCATENATION
847  /// Appends character #ch# to the string.
848  GUTF8String& operator+= (char ch);
849
850  /// Appends the null terminated character array #str# to the string.
851  GUTF8String& operator+= (const char *str);
852  /// Appends the specified GBaseString to the string.
853  GUTF8String& operator+= (const GBaseString &str);
854
855  /** Returns a sub-string.  The sub-string is composed by copying
856      #len# characters starting at position #from# in this string.
857      The length of the resulting string may be smaller than #len#
858      if the specified range is too large. */
859  GUTF8String substr(int from, int len/*=(-1)*/) const;
860
861  /** Returns an upper case copy of this string.  The returned string
862      contains a copy of the current string with all letters turned
863      into upper case letters. */
864  GUTF8String upcase( void ) const;
865  /** Returns an lower case copy of this string.  The returned string
866      contains a copy of the current string with all letters turned
867      into lower case letters. */
868  GUTF8String downcase( void ) const;
869
870  /** Concatenates strings. Returns a string composed by concatenating
871      the characters of strings #s1# and #s2#.
872  */
873  GUTF8String operator+(const GBaseString &s2) const;
874  GUTF8String operator+(const GUTF8String &s2) const;
875  GUTF8String operator+(const GNativeString &s2) const;
876  GUTF8String operator+(const char    *s2) const;
877  friend GUTF8String operator+(const char    *s1, const GUTF8String &s2);
878
879  /** Provides a direct access to the string buffer.  Returns a
880      pointer for directly accessing the string buffer.  This pointer
881      valid remains valid as long as the string is not modified by
882      other means.  Positive values for argument #n# represent the
883      length of the returned buffer.  The returned string buffer will
884      be large enough to hold at least #n# characters plus a null
885      character.  If #n# is positive but smaller than the string
886      length, the string will be truncated to #n# characters. */
887  char *getbuf(int n = -1);
888  /** Set the character at position #n# to value #ch#.  An exception
889      \Ref{GException} is thrown if number #n# is not in range #-len#
890      to #len#, where #len# is the length of the string.  If character
891      #ch# is zero, the string is truncated at position #n#.  The
892      first character of a string is numbered zero. Negative
893      positions represent characters relative to the end of the
894      string. If position #n# is equal to the length of the string,
895      this function appends character #ch# to the end of the string. */
896  void setat(const int n, const char ch);
897public:
898  typedef enum GStringRep::EncodeType EncodeType;
899  static GUTF8String create(void const * const buf,
900    const unsigned int size,
901    const EncodeType encodetype, const GUTF8String &encoding);
902  static GUTF8String create( void const * const buf,
903    unsigned int size, const EncodeType encodetype );
904  static GUTF8String create( void const * const buf,
905    const unsigned int size, const GUTF8String &encoding );
906  static GUTF8String create( void const * const buf,
907    const unsigned int size, const GP<GStringRep::Unicode> &remainder);
908  GP<GStringRep::Unicode> get_remainder(void) const;
909  static GUTF8String create( const char *buf, const unsigned int bufsize );
910  static GUTF8String create( const unsigned short *buf, const unsigned int bufsize );
911  static GUTF8String create( const unsigned long *buf, const unsigned int bufsize );
912};
913
914
915#if !HAS_WCHAR
916#define GBaseString GUTF8String
917#endif
918
919/** General purpose character string.
920    Each instance of class #GNativeString# represents a character
921    string.  Overloaded operators provide a value semantic to
922    #GNativeString# objects.  Conversion operators and constructors
923    transparently convert between #GNativeString# objects and
924    #const char*# pointers.
925
926    Functions taking strings as arguments should declare their
927    arguments as "#const char*#".  Such functions will work equally
928    well with #GNativeString# objects since there is a fast conversion
929    operator from #GNativeString# to "#const char*#".  Functions
930    returning strings should return #GUTF8String# or #GNativeString#
931    objects because the class will automatically manage the necessary
932    memory.
933
934    Characters in the string can be identified by their position.  The
935    first character of a string is numbered zero. Negative positions
936    represent characters relative to the end of the string (i.e.
937    position #-1# accesses the last character of the string,
938    position #-2# represents the second last character, etc.)  */
939
940class GNativeString : public GBaseString
941{
942public:
943  ~GNativeString();
944  // -- CONSTRUCTORS
945  /** Null constructor. Constructs an empty string. */
946  GNativeString(void);
947  /// Constructs a string from a character.
948  GNativeString(const char dat);
949  /// Constructs a string from a null terminated character array.
950  GNativeString(const char *str);
951  /// Constructs a string from a null terminated character array.
952  GNativeString(const unsigned char *str);
953  GNativeString(const unsigned short *str);
954  GNativeString(const unsigned long *str);
955  /** Constructs a string from a character array.  Elements of the
956      character array #dat# are added into the string until the
957      string length reaches #len# or until encountering a null
958      character (whichever comes first). */
959  GNativeString(const char *dat, unsigned int len);
960  GNativeString(const unsigned short *dat, unsigned int len);
961  GNativeString(const unsigned long *dat, unsigned int len);
962  /// Construct from base class.
963  GNativeString(const GP<GStringRep> &str);
964  GNativeString(const GBaseString &str);
965#if HAS_WCHAR
966  GNativeString(const GUTF8String &str);
967#endif
968  GNativeString(const GNativeString &str);
969  /** Constructs a string from a character array.  Elements of the
970      character array #dat# are added into the string until the
971      string length reaches #len# or until encountering a null
972      character (whichever comes first). */
973  GNativeString(const GBaseString &gs, int from, int len);
974
975  /** Constructs a string with a formatted string (as in #vprintf#).
976      The string is re-initialized with the characters generated
977      according to the specified format #fmt# and using the optional
978      arguments.  See the ANSI-C function #vprintf()# for more
979      information. The current implementation will cause a
980      segmentation violation if the resulting string is longer than
981      32768 characters. */
982  GNativeString(const GNativeString &fmt, va_list &args);
983
984  /** Constructs a string with a human-readable representation of
985      integer #number#.  The format is similar to format #"%d"# in
986      function #printf#. */
987  GNativeString(const int number);
988
989  /** Constructs a string with a human-readable representation of
990      floating point number #number#. The format is similar to
991      format #"%f"# in function #printf#.  */
992  GNativeString(const double number);
993
994#if !HAS_WCHAR
995#undef GBaseString
996#else
997  /// Initialize this string class
998  void init(void);
999
1000  /// Initialize this string class
1001  GNativeString &init(const GP<GStringRep> &rep);
1002
1003  /** Copy a null terminated character array. Resets this string with
1004      the character string contained in the null terminated character
1005      array #str#. */
1006  GNativeString& operator= (const char str);
1007  GNativeString& operator= (const char *str);
1008  GNativeString& operator= (const GP<GStringRep> &str);
1009  GNativeString& operator= (const GBaseString &str);
1010  GNativeString& operator= (const GUTF8String &str);
1011  GNativeString& operator= (const GNativeString &str);
1012  // -- CONCATENATION
1013  /// Appends character #ch# to the string.
1014  GNativeString& operator+= (char ch);
1015  /// Appends the null terminated character array #str# to the string.
1016  GNativeString& operator+= (const char *str);
1017  /// Appends the specified GBaseString to the string.
1018  GNativeString& operator+= (const GBaseString &str);
1019
1020  /** Returns a sub-string.  The sub-string is composed by copying
1021      #len# characters starting at position #from# in this string.
1022      The length of the resulting string may be smaller than #len#
1023      if the specified range is too large. */
1024  GNativeString substr(int from, int len/*=(-1)*/) const;
1025
1026  /** Returns an upper case copy of this string.  The returned
1027      string contains a copy of the current string with all letters
1028      turned into upper case letters. */
1029  GNativeString upcase( void ) const;
1030  /** Returns an lower case copy of this string.  The returned
1031      string contains a copy of the current string with all letters
1032      turned into lower case letters. */
1033  GNativeString downcase( void ) const;
1034
1035
1036  GNativeString operator+(const GBaseString &s2) const;
1037  GNativeString operator+(const GNativeString &s2) const;
1038  GUTF8String operator+(const GUTF8String &s2) const;
1039  GNativeString operator+(const char *s2) const;
1040  friend GNativeString operator+(const char *s1, const GNativeString &s2);
1041
1042  /** Initializes a string with a formatted string (as in #printf#).
1043      The string is re-initialized with the characters generated
1044      according to the specified format #fmt# and using the optional
1045      arguments.  See the ANSI-C function #printf()# for more
1046      information. The current implementation will cause a
1047      segmentation violation if the resulting string is longer than
1048      32768 characters. */
1049  GNativeString &format(const char *fmt, ... );
1050  /** Initializes a string with a formatted string (as in #vprintf#).
1051      The string is re-initialized with the characters generated
1052      according to the specified format #fmt# and using the optional
1053      arguments.  See the ANSI-C function #vprintf()# for more
1054      information. The current implementation will cause a
1055      segmentation violation if the resulting string is longer than
1056      32768 characters. */
1057  GNativeString &vformat(const GNativeString &fmt, va_list &args);
1058
1059  /** Returns a copy of this string with characters used in XML with
1060      '<'  to "&lt;", '>'  to "&gt;",  '&' to "&amp;" '\'' to
1061      "&apos;", and  '\"' to  "&quot;".   Characters 0x01 through
1062      0x1f are also escaped. */
1063  GNativeString toEscaped( const bool tosevenbit=false ) const;
1064
1065
1066  /** Provides a direct access to the string buffer.  Returns a
1067      pointer for directly accessing the string buffer.  This
1068      pointer valid remains valid as long as the string is not
1069      modified by other means.  Positive values for argument #n#
1070      represent the length of the returned buffer.  The returned
1071      string buffer will be large enough to hold at least #n#
1072      characters plus a null character.  If #n# is positive but
1073      smaller than the string length, the string will be truncated
1074      to #n# characters. */
1075  char *getbuf(int n = -1);
1076  /** Set the character at position #n# to value #ch#.  An exception
1077      \Ref{GException} is thrown if number #n# is not in range #-len#
1078      to #len#, where #len# is the length of the string.  If
1079      character #ch# is zero, the string is truncated at position
1080      #n#.  The first character of a string is numbered zero.
1081      Negative positions represent characters relative to the end of
1082      the string. If position #n# is equal to the length of the
1083      string, this function appends character #ch# to the end of the
1084      string. */
1085  void setat(const int n, const char ch);
1086
1087  static GNativeString create( const char *buf, const unsigned int bufsize );
1088  static GNativeString create( const unsigned short *buf, const unsigned int bufsize );
1089  static GNativeString create( const unsigned long *buf, const unsigned int bufsize );
1090#endif // WinCE
1091};
1092
1093//@}
1094
1095inline
1096GBaseString::operator const char* ( void ) const
1097{
1098  return ptr?(*this)->data:nullstr;
1099}
1100
1101inline unsigned int
1102GBaseString::length( void ) const
1103{
1104  return ptr ? (*this)->size : 0;
1105}
1106
1107inline bool
1108GBaseString::operator! ( void ) const
1109{
1110  return !ptr;
1111}
1112
1113inline GUTF8String
1114GUTF8String::upcase( void ) const
1115{
1116  if (ptr) return (*this)->upcase();
1117  return *this;
1118}
1119
1120inline GUTF8String
1121GUTF8String::downcase( void ) const
1122{
1123  if (ptr) return (*this)->downcase();
1124  return *this;
1125}
1126
1127inline void
1128GUTF8String::init(void)
1129{ GBaseString::init(); }
1130
1131inline GUTF8String &
1132GUTF8String::init(const GP<GStringRep> &rep)
1133{ GP<GStringRep>::operator=(rep?rep->toUTF8(true):rep); init(); return *this; }
1134
1135inline GUTF8String &
1136GUTF8String::vformat(const GUTF8String &fmt, va_list &args)
1137{ return (*this = (fmt.ptr?GUTF8String(fmt,args):fmt)); }
1138
1139inline GUTF8String
1140GUTF8String::toEscaped( const bool tosevenbit ) const
1141{ return ptr?GUTF8String((*this)->toEscaped(tosevenbit)):(*this); }
1142
1143inline GP<GStringRep::Unicode> 
1144GUTF8String::get_remainder(void) const
1145{
1146  GP<GStringRep::Unicode> retval;
1147  if(ptr)
1148    retval=((*this)->get_remainder());
1149  return retval;
1150}
1151
1152inline
1153GUTF8String::GUTF8String(const GNativeString &str)
1154{ init(str.length()?(str->toUTF8(true)):(GP<GStringRep>)str); }
1155
1156inline
1157GUTF8String::GUTF8String(const GP<GStringRep> &str)
1158{ init(str?(str->toUTF8(true)):str); }
1159
1160inline
1161GUTF8String::GUTF8String(const GBaseString &str)
1162{ init(str.length()?(str->toUTF8(true)):(GP<GStringRep>)str); }
1163
1164inline void
1165GBaseString::init(void)
1166{
1167  gstr=ptr?((*this)->data):nullstr;
1168}
1169/** Returns an integer.  Implements i18n atoi.  */
1170inline int
1171GBaseString::toInt(void) const
1172{ return ptr?(*this)->toInt():0; }
1173
1174/** Returns a long intenger.  Implments i18n strtol.  */
1175inline long
1176GBaseString::toLong(const int pos, int &endpos, const int base) const
1177{
1178  long int retval=0;
1179  if(ptr)
1180  {
1181    retval=(*this)->toLong(pos, endpos, base);
1182  }else
1183  {
1184    endpos=(-1);
1185  }
1186  return retval;
1187}
1188
1189inline long
1190GBaseString::toLong(
1191  const GUTF8String& src, const int pos, int &endpos, const int base)
1192{
1193  return src.toLong(pos,endpos,base);
1194}
1195
1196inline long
1197GBaseString::toLong(
1198  const GNativeString& src, const int pos, int &endpos, const int base)
1199{
1200  return src.toLong(pos,endpos,base);
1201}
1202
1203/** Returns a unsigned long integer.  Implements i18n strtoul. */
1204inline unsigned long
1205GBaseString::toULong(const int pos, int &endpos, const int base) const
1206{
1207  unsigned long retval=0;
1208  if(ptr)
1209  {
1210    retval=(*this)->toULong(pos, endpos, base);
1211  }else
1212  {
1213    endpos=(-1);
1214  }
1215  return retval;
1216}
1217
1218inline unsigned long
1219GBaseString::toULong(
1220  const GUTF8String& src, const int pos, int &endpos, const int base)
1221{
1222  return src.toULong(pos,endpos,base);
1223}
1224
1225inline unsigned long
1226GBaseString::toULong(
1227  const GNativeString& src, const int pos, int &endpos, const int base)
1228{
1229  return src.toULong(pos,endpos,base);
1230}
1231
1232/** Returns a double.  Implements the i18n strtod.  */
1233inline double
1234GBaseString::toDouble(
1235  const int pos, int &endpos ) const
1236{
1237  double retval=(double)0;
1238  if(ptr)
1239  {
1240    retval=(*this)->toDouble(pos, endpos);
1241  }else
1242  {
1243    endpos=(-1);
1244  }
1245  return retval;
1246}
1247
1248inline double
1249GBaseString::toDouble(
1250  const GUTF8String& src, const int pos, int &endpos)
1251{
1252  return src.toDouble(pos,endpos);
1253}
1254
1255inline double
1256GBaseString::toDouble(
1257  const GNativeString& src, const int pos, int &endpos)
1258{
1259  return src.toDouble(pos,endpos);
1260}
1261
1262inline GBaseString &
1263GBaseString::init(const GP<GStringRep> &rep)
1264{ GP<GStringRep>::operator=(rep); init(); return *this;}
1265
1266inline char
1267GBaseString::operator[] (int n) const
1268{ return ((n||ptr)?((*this)->data[CheckSubscript(n)]):0); }
1269
1270inline int
1271GBaseString::search(char c, int from) const
1272{ return ptr?((*this)->search(c,from)):(-1); }
1273
1274inline int
1275GBaseString::search(const char *str, int from) const
1276{ return ptr?((*this)->search(str,from)):(-1); }
1277
1278inline int
1279GBaseString::rsearch(char c, const int from) const
1280{ return ptr?((*this)->rsearch(c,from)):(-1); }
1281
1282inline int
1283GBaseString::rsearch(const char *str, const int from) const
1284{ return ptr?((*this)->rsearch(str,from)):(-1); }
1285
1286inline int
1287GBaseString::contains(const char accept[], const int from) const
1288{ return ptr?((*this)->contains(accept,from)):(-1); }
1289
1290inline int
1291GBaseString::rcontains(const char accept[], const int from) const
1292{ return ptr?((*this)->rcontains(accept,from)):(-1); }
1293
1294inline int
1295GBaseString::cmp(const GBaseString &s2, const int len) const
1296{ return GStringRep::cmp(*this,s2,len); }
1297
1298inline int
1299GBaseString::cmp(const char *s2, const int len) const
1300{ return GStringRep::cmp(*this,s2,len); }
1301
1302inline int
1303GBaseString::cmp(const char s2) const
1304{ return GStringRep::cmp(*this,&s2,1); }
1305
1306inline int
1307GBaseString::cmp(const char *s1, const char *s2, const int len)
1308{ return GStringRep::cmp(s1,s2,len); }
1309
1310inline bool
1311GBaseString::operator==(const GBaseString &s2) const
1312{ return !cmp(s2); }
1313
1314inline bool
1315GBaseString::operator==(const char *s2) const
1316{ return !cmp(s2); }
1317
1318inline bool
1319GBaseString::operator!=(const GBaseString &s2) const
1320{ return !!cmp(s2); }
1321
1322inline bool
1323GBaseString::operator!=(const char *s2) const
1324{ return !!cmp(s2); }
1325
1326inline bool
1327GBaseString::operator>=(const GBaseString &s2) const
1328{ return (cmp(s2)>=0); }
1329
1330inline bool
1331GBaseString::operator>=(const char *s2) const
1332{ return (cmp(s2)>=0); }
1333
1334inline bool
1335GBaseString::operator>=(const char s2) const
1336{ return (cmp(s2)>=0); }
1337
1338inline bool
1339GBaseString::operator<(const GBaseString &s2) const
1340{ return (cmp(s2)<0); }
1341
1342inline bool
1343GBaseString::operator<(const char *s2) const
1344{ return (cmp(s2)<0); }
1345
1346inline bool
1347GBaseString::operator<(const char s2) const
1348{ return (cmp(s2)<0); }
1349
1350inline bool
1351GBaseString::operator> (const GBaseString &s2) const
1352{ return (cmp(s2)>0); }
1353
1354inline bool
1355GBaseString::operator> (const char *s2) const
1356{ return (cmp(s2)>0); }
1357
1358inline bool
1359GBaseString::operator> (const char s2) const
1360{ return (cmp(s2)>0); }
1361
1362inline bool
1363GBaseString::operator<=(const GBaseString &s2) const
1364{ return (cmp(s2)<=0); }
1365
1366inline bool
1367GBaseString::operator<=(const char *s2) const
1368{ return (cmp(s2)<=0); }
1369
1370inline bool
1371GBaseString::operator<=(const char s2) const
1372{ return (cmp(s2)<=0); }
1373
1374inline int
1375GBaseString::nextNonSpace( const int from, const int len ) const
1376{ return ptr?(*this)->nextNonSpace(from,len):0; }
1377
1378inline int
1379GBaseString::nextChar( const int from ) const
1380{ return ptr?(*this)->nextChar(from):0; }
1381
1382inline int
1383GBaseString::nextSpace( const int from, const int len ) const
1384{ return ptr?(*this)->nextSpace(from,len):0; }
1385
1386inline int
1387GBaseString::firstEndSpace( const int from,const int len ) const
1388{ return ptr?(*this)->firstEndSpace(from,len):0; }
1389
1390inline bool
1391GBaseString::is_valid(void) const
1392{ return ptr?((*this)->is_valid()):true; }
1393
1394inline int
1395GBaseString::ncopy(wchar_t * const buf, const int buflen) const
1396{if(buf&&buflen)buf[0]=0;return ptr?((*this)->ncopy(buf,buflen)):0;}
1397
1398inline int
1399GBaseString::CheckSubscript(int n) const
1400{
1401  if(n)
1402  {
1403    if (n<0 && ptr)
1404      n += (*this)->size;
1405    if (n<0 || !ptr || n > (int)(*this)->size)
1406      throw_illegal_subscript();
1407  }
1408  return n;
1409}
1410
1411inline GBaseString::GBaseString(void) { init(); }
1412
1413inline GUTF8String::GUTF8String(void) { }
1414
1415inline GUTF8String::GUTF8String(const GUTF8String &str)
1416{ init(str); }
1417
1418inline GUTF8String& GUTF8String::operator= (const GP<GStringRep> &str)
1419{ return init(str); }
1420
1421inline GUTF8String& GUTF8String::operator= (const GBaseString &str)
1422{ return init(str); }
1423
1424inline GUTF8String& GUTF8String::operator= (const GUTF8String &str)
1425{ return init(str); }
1426
1427inline GUTF8String& GUTF8String::operator= (const GNativeString &str)
1428{ return init(str); }
1429
1430inline GUTF8String
1431GUTF8String::create( const char *buf, const unsigned int bufsize )
1432{
1433#if HAS_WCHAR
1434  return GNativeString(buf,bufsize);
1435#else
1436  return GUTF8String(buf,bufsize);
1437#endif
1438}
1439
1440inline GUTF8String
1441GUTF8String::create( const unsigned short *buf, const unsigned int bufsize )
1442{
1443  return GUTF8String(buf,bufsize);
1444}
1445
1446inline GUTF8String
1447GUTF8String::create( const unsigned long *buf, const unsigned int bufsize )
1448{
1449  return GUTF8String(buf,bufsize);
1450}
1451
1452inline GNativeString::GNativeString(void) {}
1453
1454#if !HAS_WCHAR
1455// For Windows CE, GNativeString is essentially GUTF8String
1456
1457inline
1458GNativeString::GNativeString(const GUTF8String &str)
1459: GUTF8String(str) {}
1460
1461inline
1462GNativeString::GNativeString(const GP<GStringRep> &str)
1463: GUTF8String(str) {}
1464
1465inline
1466GNativeString::GNativeString(const char dat)
1467: GUTF8String(dat) {}
1468
1469inline
1470GNativeString::GNativeString(const char *str)
1471: GUTF8String(str) {}
1472
1473inline
1474GNativeString::GNativeString(const unsigned char *str)
1475: GUTF8String(str) {}
1476
1477inline
1478GNativeString::GNativeString(const unsigned short *str)
1479: GUTF8String(str) {}
1480
1481inline
1482GNativeString::GNativeString(const unsigned long *str)
1483: GUTF8String(str) {}
1484
1485inline
1486GNativeString::GNativeString(const char *dat, unsigned int len)
1487: GUTF8String(dat,len) {}
1488
1489inline
1490GNativeString::GNativeString(const unsigned short *dat, unsigned int len)
1491: GUTF8String(dat,len) {}
1492
1493inline
1494GNativeString::GNativeString(const unsigned long *dat, unsigned int len)
1495: GUTF8String(dat,len) {}
1496
1497inline
1498GNativeString::GNativeString(const GNativeString &str)
1499: GUTF8String(str) {}
1500
1501inline
1502GNativeString::GNativeString(const int number)
1503: GUTF8String(number) {}
1504
1505inline
1506GNativeString::GNativeString(const double number)
1507: GUTF8String(number) {}
1508
1509inline
1510GNativeString::GNativeString(const GNativeString &fmt, va_list &args)
1511: GUTF8String(fmt,args) {}
1512
1513#else // HAS_WCHAR
1514
1515/// Initialize this string class
1516inline void
1517GNativeString::init(void)
1518{ GBaseString::init(); }
1519
1520/// Initialize this string class
1521inline GNativeString &
1522GNativeString::init(const GP<GStringRep> &rep)
1523{
1524  GP<GStringRep>::operator=(rep?rep->toNative(GStringRep::NOT_ESCAPED):rep);
1525  init();
1526  return *this;
1527}
1528
1529inline GNativeString
1530GNativeString::substr(int from, int len) const
1531{ return GNativeString(*this, from, len); }
1532
1533inline GNativeString &
1534GNativeString::vformat(const GNativeString &fmt, va_list &args)
1535{ return (*this = (fmt.ptr?GNativeString(fmt,args):fmt)); }
1536
1537inline GNativeString
1538GNativeString::toEscaped( const bool tosevenbit ) const
1539{ return ptr?GNativeString((*this)->toEscaped(tosevenbit)):(*this); }
1540
1541inline
1542GNativeString::GNativeString(const GUTF8String &str)
1543{
1544  if (str.length())
1545    init(str->toNative(GStringRep::NOT_ESCAPED));
1546  else
1547    init((GP<GStringRep>)str);
1548}
1549
1550inline
1551GNativeString::GNativeString(const GP<GStringRep> &str)
1552{
1553  if (str)
1554    init(str->toNative(GStringRep::NOT_ESCAPED));
1555  else
1556    init(str);
1557}
1558
1559inline
1560GNativeString::GNativeString(const GBaseString &str)
1561{
1562  if (str.length())
1563    init(str->toNative(GStringRep::NOT_ESCAPED));
1564  else
1565    init((GP<GStringRep>)str);
1566}
1567
1568
1569inline
1570GNativeString::GNativeString(const GNativeString &fmt, va_list &args)
1571{
1572  if (fmt.ptr)
1573    init(fmt->vformat(args));
1574  else
1575    init(fmt);
1576}
1577
1578inline GNativeString
1579GNativeString::create( const char *buf, const unsigned int bufsize )
1580{
1581  return GNativeString(buf,bufsize);
1582}
1583
1584inline GNativeString
1585GNativeString::create( const unsigned short *buf, const unsigned int bufsize )
1586{
1587  return GNativeString(buf,bufsize);
1588}
1589
1590inline GNativeString
1591GNativeString::create( const unsigned long *buf, const unsigned int bufsize )
1592{
1593  return GNativeString(buf,bufsize);
1594}
1595
1596inline GNativeString&
1597GNativeString::operator= (const GP<GStringRep> &str)
1598{ return init(str); }
1599
1600inline GNativeString&
1601GNativeString::operator= (const GBaseString &str)
1602{ return init(str); }
1603
1604inline GNativeString&
1605GNativeString::operator= (const GUTF8String &str)
1606{ return init(str); }
1607
1608inline GNativeString&
1609GNativeString::operator= (const GNativeString &str)
1610{ return init(str); }
1611
1612inline GNativeString
1613GNativeString::upcase( void ) const
1614{
1615  if (ptr) return (*this)->upcase();
1616  return *this;
1617}
1618
1619inline GNativeString
1620GNativeString::downcase( void ) const
1621{
1622  if (ptr) return (*this)->downcase();
1623  return *this;
1624}
1625
1626#endif // HAS_WCHAR
1627
1628inline bool
1629operator==(const char *s1, const GBaseString &s2)
1630{ return !s2.cmp(s1); }
1631
1632inline bool
1633operator!=(const char *s1, const GBaseString &s2)
1634{ return !!s2.cmp(s1); }
1635
1636inline bool
1637operator>=(const char    *s1, const GBaseString &s2)
1638{ return (s2.cmp(s1)<=0); }
1639
1640inline bool
1641operator>=(const char s1, const GBaseString &s2)
1642{ return (s2.cmp(s1)<=0); }
1643
1644inline bool
1645operator<(const char *s1, const GBaseString &s2)
1646{ return (s2.cmp(s1)>0); }
1647
1648inline bool
1649operator<(const char s1, const GBaseString &s2)
1650{ return (s2.cmp(s1)>0); }
1651
1652inline bool
1653operator> (const char    *s1, const GBaseString &s2)
1654{ return (s2.cmp(s1)<0); }
1655
1656inline bool
1657operator> (const char s1, const GBaseString &s2)
1658{ return (s2.cmp(s1)<0); }
1659
1660inline bool
1661operator<=(const char    *s1, const GBaseString &s2)
1662{ return !(s1>s2); }
1663
1664inline bool
1665operator<=(const char    s1, const GBaseString &s2)
1666{ return !(s1>s2); }
1667
1668// ------------------- The end
1669
1670
1671#ifdef HAVE_NAMESPACES
1672}
1673# ifndef NOT_USING_DJVU_NAMESPACE
1674using namespace DJVU;
1675# endif
1676#endif
1677#endif
1678
Note: See TracBrowser for help on using the repository browser.