source: trunk/libdjvu/GURL.h @ 15

Last change on this file since 15 was 15, checked in by Eugene Romanenko, 15 years ago

needed libs update

File size: 14.8 KB
Line 
1//C-  -*- C++ -*-
2//C- -------------------------------------------------------------------
3//C- DjVuLibre-3.5
4//C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
5//C- Copyright (c) 2001  AT&T
6//C-
7//C- This software is subject to, and may be distributed under, the
8//C- GNU General Public License, Version 2. The license should have
9//C- accompanied the software or you may obtain a copy of the license
10//C- from the Free Software Foundation at http://www.fsf.org .
11//C-
12//C- This program is distributed in the hope that it will be useful,
13//C- but WITHOUT ANY WARRANTY; without even the implied warranty of
14//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15//C- GNU General Public License for more details.
16//C-
17//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library
18//C- distributed by Lizardtech Software.  On July 19th 2002, Lizardtech
19//C- Software authorized us to replace the original DjVu(r) Reference
20//C- Library notice by the following text (see doc/lizard2002.djvu):
21//C-
22//C-  ------------------------------------------------------------------
23//C- | DjVu (r) Reference Library (v. 3.5)
24//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
25//C- | The DjVu Reference Library is protected by U.S. Pat. No.
26//C- | 6,058,214 and patents pending.
27//C- |
28//C- | This software is subject to, and may be distributed under, the
29//C- | GNU General Public License, Version 2. The license should have
30//C- | accompanied the software or you may obtain a copy of the license
31//C- | from the Free Software Foundation at http://www.fsf.org .
32//C- |
33//C- | The computer code originally released by LizardTech under this
34//C- | license and unmodified by other parties is deemed "the LIZARDTECH
35//C- | ORIGINAL CODE."  Subject to any third party intellectual property
36//C- | claims, LizardTech grants recipient a worldwide, royalty-free,
37//C- | non-exclusive license to make, use, sell, or otherwise dispose of
38//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
39//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
40//C- | General Public License.   This grant only confers the right to
41//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
42//C- | the extent such infringement is reasonably necessary to enable
43//C- | recipient to make, have made, practice, sell, or otherwise dispose
44//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
45//C- | any greater extent that may be necessary to utilize further
46//C- | modifications or combinations.
47//C- |
48//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
49//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
50//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
51//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
52//C- +------------------------------------------------------------------
53//
54// $Id: GURL.h,v 1.9 2003/11/07 22:08:21 leonb Exp $
55// $Name: release_3_5_16 $
56
57#ifndef _GURL_H_
58#define _GURL_H_
59#ifdef HAVE_CONFIG_H
60#include "config.h"
61#endif
62#if NEED_GNUG_PRAGMAS
63# pragma interface
64#endif
65
66
67#include "GString.h"
68#include "Arrays.h"
69#include "GThreads.h"
70
71
72#ifdef HAVE_NAMESPACES
73namespace DJVU {
74# ifdef NOT_DEFINED // Just to fool emacs c++ mode
75}
76#endif
77#endif
78
79/** @name GURL.h
80    Files #"GURL.h"# and #"GURL.cpp"# contain the implementation of the
81    \Ref{GURL} class used to store URLs in a system independent format.
82    @memo System independent URL representation.
83    @author Andrei Erofeev <eaf@geocities.com>
84
85// From: Leon Bottou, 1/31/2002
86// This has been heavily changed by Lizardtech.
87// They decided to use URLs for everyting, including
88// the most basic file access.  The URL class now is a unholy
89// mixture of code for syntactically parsing the urls (which is was)
90// and file status code (only for local file: urls).
91
92    @version #$Id: GURL.h,v 1.9 2003/11/07 22:08:21 leonb Exp $#
93*/
94
95//@{
96
97/** System independent URL representation.
98
99    This class is used in the library to store URLs in a system independent
100    format. The idea to use a general class to hold URL arose after we
101    realized, that DjVu had to be able to access files both from the WEB
102    and from the local disk. While it is strange to talk about system
103    independence of HTTP URLs, file names formats obviously differ from
104    platform to platform. They may contain forward slashes, backward slashes,
105    colons as separators, etc. There maybe more than one URL corresponding
106    to the same file name. Compare #file:/dir/file.djvu# and
107    #file://localhost/dir/file.djvu#.
108
109    To simplify a developer's life we have created this class, which contains
110    inside a canonical representation of URLs.
111
112    File URLs are converted to internal format with the help of \Ref{GOS} class.
113
114    All other URLs are modified to contain only forward slashes.
115*/
116
117class GURL
118{
119public:
120  class Filename;
121  class UTF8;
122  class Native;
123protected:
124      /** @name Constructors
125          Accept the string URL, check that it starts from #file:/#
126          or #http:/# and convert to internal system independent
127          representation.
128      */
129      //@{
130      ///
131   GURL(const char * url_string);
132      //@}
133
134public:
135   GURL(void);
136
137   GURL(const GUTF8String & url_string);
138
139   GURL(const GNativeString & url_string);
140
141   GURL(const GUTF8String &xurl, const GURL &codebase);
142
143   GURL(const GNativeString &xurl, const GURL &codebase);
144
145      /// Copy constructor
146   GURL(const GURL & gurl);
147
148      /// The destructor
149   virtual ~GURL(void) {}
150
151private:
152      // The 'class_lock' should be locked whenever you're accessing
153      // url, or cgi_name_arr, or cgi_value_arr.
154   GCriticalSection     class_lock;
155protected:
156   GUTF8String  url;
157   DArray<GUTF8String>  cgi_name_arr, cgi_value_arr;
158   bool validurl;
159
160   void         init(const bool nothrow=false);
161   void         convert_slashes(void);
162   void         beautify_path(void);
163   static GUTF8String   beautify_path(GUTF8String url);
164
165   static GUTF8String   protocol(const GUTF8String& url);
166   void         parse_cgi_args(void);
167   void         store_cgi_args(void);
168public:
169   /// Test if the URL is valid. If invalid, reinitialize.
170   bool is_valid(void) const;     // const lies to the compiler because of dependency problems
171
172      /// Extracts the {\em protocol} part from the URL and returns it
173   GUTF8String  protocol(void) const;
174
175      /** Returns string after the first '\#' with decoded
176          escape sequences. */
177   GUTF8String  hash_argument(void) const;
178
179      /** Inserts the #arg# after a separating hash into the URL.
180          The function encodes any illegal character in #arg# using
181          \Ref{GOS::encode_reserved}(). */
182   void         set_hash_argument(const GUTF8String &arg);
183
184      /** Returns the total number of CGI arguments in the URL.
185          CGI arguments follow '#?#' sign and are separated by '#&#' signs */
186   int          cgi_arguments(void) const;
187
188      /** Returns the total number of DjVu-related CGI arguments (arguments
189          following #DJVUOPTS# in the URL). */
190   int          djvu_cgi_arguments(void) const;
191
192      /** Returns that part of CGI argument number #num#, which is
193          before the equal sign. */
194   GUTF8String  cgi_name(int num) const;
195
196      /** Returns that part of DjVu-related CGI argument number #num#,
197          which is before the equal sign. */
198   GUTF8String  djvu_cgi_name(int num) const;
199
200      /** Returns that part of CGI argument number #num#, which is
201          after the equal sign. */
202   GUTF8String  cgi_value(int num) const;
203   
204      /** Returns that part of DjVu-related CGI argument number #num#,
205          which is after the equal sign. */
206   GUTF8String  djvu_cgi_value(int num) const;
207   
208      /** Returns array of all known CGI names (part of CGI argument before
209          the equal sign) */
210   DArray<GUTF8String>cgi_names(void) const;
211
212      /** Returns array of names of DjVu-related CGI arguments (arguments
213          following #DJVUOPTS# option. */
214   DArray<GUTF8String>djvu_cgi_names(void) const;
215   
216      /** Returns array of all known CGI names (part of CGI argument before
217          the equal sign) */
218   DArray<GUTF8String>cgi_values(void) const;
219
220      /** Returns array of values of DjVu-related CGI arguments (arguments
221          following #DJVUOPTS# option. */
222   DArray<GUTF8String>djvu_cgi_values(void) const;
223
224      /// Erases everything after the first '\#' or '?'
225   void         clear_all_arguments(void);
226
227      /// Erases everything after the first '\#'
228   void         clear_hash_argument(void);
229
230      /// Erases DjVu CGI arguments (following "#DJVUOPTS#")
231   void         clear_djvu_cgi_arguments(void);
232
233      /// Erases all CGI arguments (following the first '?')
234   void         clear_cgi_arguments(void);
235
236      /** Appends the specified CGI argument. Will insert "#DJVUOPTS#" if
237          necessary */
238   void         add_djvu_cgi_argument(const GUTF8String &name, const char * value=0);
239   
240      /** Returns the URL corresponding to the directory containing
241          the document with this URL. The function basically takes the
242          URL and clears everything after the last slash. */
243   GURL         base(void) const;
244
245      /// Returns the aboslute URL without the host part.
246   GUTF8String pathname(void) const;
247
248      /** Returns the name part of this URL.
249          For example, if the URL is #http://www.lizardtech.com/file%201.djvu# then
250          this function will return #file%201.djvu#. \Ref{fname}() will
251          return #file 1.djvu# at the same time. */
252   GUTF8String  name(void) const;
253
254      /** Returns the name part of this URL with escape sequences expanded.
255          For example, if the URL is #http://www.lizardtech.com/file%201.djvu# then
256          this function will return #file 1.djvu#. \Ref{name}() will
257          return #file%201.djvu# at the same time. */
258   GUTF8String  fname(void) const;
259
260      /// Returns the extention part of name of document in this URL.
261   GUTF8String  extension(void) const;
262
263      /// Checks if this is an empty URL
264   bool         is_empty(void) const;
265
266      /// Checks if the URL is local (starts from #file:/#) or not
267   bool         is_local_file_url(void) const;
268
269      /** @name Concatenation operators
270          Concatenate the GURL with the passed {\em name}. If the {\em name}
271          is absolute (has non empty protocol prefix), we just return
272          #GURL(name)#. Otherwise the #name# is appended to the GURL after a
273          separating slash.
274      */
275      //@{
276      ///
277//   GURL               operator+(const GUTF8String &name) const;
278      //@}
279
280      /// Returns TRUE if #gurl1# and #gurl2# are the same
281   bool GURL::operator==(const GURL & gurl2) const;
282
283      /// Returns TRUE if #gurl1# and #gurl2# are different
284   bool GURL::operator!=(const GURL & gurl2) const;
285
286      /// Assignment operator
287   GURL &       operator=(const GURL & url);
288
289      /// Returns Internal URL representation
290   operator     const char*(void) const { return url; };
291
292  /** Returns a string representing the URL.  This function normally
293      returns a standard file URL as described in RFC 1738. 
294      Some versions of MSIE do not support this standard syntax.
295      A brain damaged MSIE compatible syntax is generated
296      when the optional argument #useragent# contains string #"MSIE"# or
297      #"Microsoft"#. */
298   GUTF8String get_string(const GUTF8String &useragent) const;
299
300   GUTF8String get_string(const bool nothrow=false) const;
301
302      /// Escape special characters
303   static GUTF8String encode_reserved(const GUTF8String &gs);
304
305   /** Decodes reserved characters from the URL.
306      See also: \Ref{encode_reserved}(). */
307   static GUTF8String decode_reserved(const GUTF8String &url);
308
309  /// Test if this url is an existing file, directory, or device.
310  bool is_local_path(void) const;
311
312  /// Test if this url is an existing file.
313  bool is_file(void) const;
314
315  /// Test if this url is an existing directory.
316  bool is_dir(void) const;
317
318  /// Follows symbolic links.
319  GURL follow_symlinks(void) const;
320
321  /// Creates the specified directory.
322  int mkdir(void) const;
323
324  /** Deletes file or directory.
325      Directories are not deleted unless the directory is empty.
326      Returns a negative number if an error occurs. */
327  int deletefile(void) const;
328
329  /** Recursively erases contents of directory. The directory
330      itself will not be removed. */
331  int cleardir(const int timeout=0) const;
332
333  /// Rename a file or directory.
334  int renameto(const GURL &newurl) const;
335
336  /// List the contents of a directory.
337  GList<GURL> listdir(void) const;
338
339  /** Returns a filename for a URL. Argument #url# must be a legal file URL.
340      This function applies heuristic rules to convert the URL into a valid
341      file name. It is guaranteed that this function can properly parse all
342      URLs generated by #filename_to_url#. The heuristics also work better when
343      the file actually exists.  The empty string is returned when this
344      function cannot parse the URL or when the URL is not a file URL.
345        URL formats are as described in RFC 1738 plus the following alternative
346      formats for files on the local host:
347
348                file://<letter>:/<path>
349                file://<letter>|/<path>
350                file:/<path>
351
352      which are accepted because various browsers recognize them.*/
353   GUTF8String UTF8Filename(void) const;
354   /// Same but returns a native string.
355   GNativeString NativeFilename(void) const;
356
357      /** Hashing function.
358          @return hash suitable for usage in \Ref{GMap} */
359   friend unsigned int  hash(const GURL & gurl);
360
361  /** Returns fully qualified file names.  This functions constructs the fully
362      qualified name of file or directory #filename#. When provided, the
363      optional argument #fromdirname# is used as the current directory when
364      interpreting relative specifications in #filename#.  Function
365      #expand_name# is very useful for logically concatenating file names.  It
366      knows which separators should be used for each operating system and it
367      knows which syntactical rules apply. */
368  static GUTF8String expand_name(const GUTF8String &filename, const char *fromdirname=0);
369};
370
371class GURL::UTF8 : public GURL
372{
373public:
374  UTF8(const GUTF8String &xurl);
375  UTF8(const GUTF8String &xurl, const GURL &codebase);
376};
377
378class GURL::Native : public GURL
379{
380public:
381  Native(const GNativeString &xurl);
382  Native(const GNativeString &xurl, const GURL &codebase);
383};
384
385class GURL::Filename : public GURL
386{
387public:
388  Filename(const GUTF8String &filename);
389  Filename(const GNativeString &filename);
390  class UTF8;
391  class Native;
392};
393
394class GURL::Filename::UTF8 : public GURL::Filename
395{
396public:
397  UTF8(const GUTF8String &filename);
398};
399
400class GURL::Filename::Native : public GURL::Filename
401{
402public:
403  Native(const GNativeString &filename);
404};
405
406
407inline bool
408GURL::operator!=(const GURL & gurl2) const
409{
410  return !(*this == gurl2);
411}
412
413inline GUTF8String
414GURL::protocol(void) const
415{
416   return protocol(get_string());
417}
418
419inline bool
420GURL::is_empty(void) const
421{
422   return !url.length()||!get_string().length();
423}
424
425// Test if the URL is valid.
426// If invalid, reinitialize and return the result.
427inline bool
428GURL::is_valid(void) const
429{
430  if(!validurl)
431    const_cast<GURL *>(this)->init(true);
432  return validurl;
433}
434
435
436
437//@}
438
439
440#ifdef HAVE_NAMESPACES
441}
442# ifndef NOT_USING_DJVU_NAMESPACE
443using namespace DJVU;
444# endif
445#endif
446#endif
Note: See TracBrowser for help on using the repository browser.