source: trunk/libdjvu/GURL.h @ 280

Last change on this file since 280 was 280, checked in by rbri, 11 years ago

DJVU plugin: djvulibre updated to version 3.5.22

File size: 15.0 KB
Line 
1//C-  -*- C++ -*-
2//C- -------------------------------------------------------------------
3//C- DjVuLibre-3.5
4//C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
5//C- Copyright (c) 2001  AT&T
6//C-
7//C- This software is subject to, and may be distributed under, the
8//C- GNU General Public License, either Version 2 of the license,
9//C- or (at your option) any later version. The license should have
10//C- accompanied the software or you may obtain a copy of the license
11//C- from the Free Software Foundation at http://www.fsf.org .
12//C-
13//C- This program is distributed in the hope that it will be useful,
14//C- but WITHOUT ANY WARRANTY; without even the implied warranty of
15//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16//C- GNU General Public License for more details.
17//C-
18//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
19//C- Lizardtech Software.  Lizardtech Software has authorized us to
20//C- replace the original DjVu(r) Reference Library notice by the following
21//C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
22//C-
23//C-  ------------------------------------------------------------------
24//C- | DjVu (r) Reference Library (v. 3.5)
25//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
26//C- | The DjVu Reference Library is protected by U.S. Pat. No.
27//C- | 6,058,214 and patents pending.
28//C- |
29//C- | This software is subject to, and may be distributed under, the
30//C- | GNU General Public License, either Version 2 of the license,
31//C- | or (at your option) any later version. The license should have
32//C- | accompanied the software or you may obtain a copy of the license
33//C- | from the Free Software Foundation at http://www.fsf.org .
34//C- |
35//C- | The computer code originally released by LizardTech under this
36//C- | license and unmodified by other parties is deemed "the LIZARDTECH
37//C- | ORIGINAL CODE."  Subject to any third party intellectual property
38//C- | claims, LizardTech grants recipient a worldwide, royalty-free,
39//C- | non-exclusive license to make, use, sell, or otherwise dispose of
40//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
41//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
42//C- | General Public License.   This grant only confers the right to
43//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
44//C- | the extent such infringement is reasonably necessary to enable
45//C- | recipient to make, have made, practice, sell, or otherwise dispose
46//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
47//C- | any greater extent that may be necessary to utilize further
48//C- | modifications or combinations.
49//C- |
50//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
51//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
52//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
53//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
54//C- +------------------------------------------------------------------
55//
56// $Id: GURL.h,v 1.13 2008/01/27 11:36:27 leonb Exp $
57// $Name: release_3_5_22 $
58
59#ifndef _GURL_H_
60#define _GURL_H_
61#ifdef HAVE_CONFIG_H
62#include "config.h"
63#endif
64#if NEED_GNUG_PRAGMAS
65# pragma interface
66#endif
67
68
69#include "GString.h"
70#include "Arrays.h"
71#include "GThreads.h"
72
73
74#ifdef HAVE_NAMESPACES
75namespace DJVU {
76# ifdef NOT_DEFINED // Just to fool emacs c++ mode
77}
78#endif
79#endif
80
81/** @name GURL.h
82    Files #"GURL.h"# and #"GURL.cpp"# contain the implementation of the
83    \Ref{GURL} class used to store URLs in a system independent format.
84    @memo System independent URL representation.
85    @author Andrei Erofeev <eaf@geocities.com>
86
87// From: Leon Bottou, 1/31/2002
88// This has been heavily changed by Lizardtech.
89// They decided to use URLs for everyting, including
90// the most basic file access.  The URL class now is a unholy
91// mixture of code for syntactically parsing the urls (which is was)
92// and file status code (only for local file: urls).
93
94    @version #$Id: GURL.h,v 1.13 2008/01/27 11:36:27 leonb Exp $#
95*/
96
97//@{
98
99/** System independent URL representation.
100
101    This class is used in the library to store URLs in a system independent
102    format. The idea to use a general class to hold URL arose after we
103    realized, that DjVu had to be able to access files both from the WEB
104    and from the local disk. While it is strange to talk about system
105    independence of HTTP URLs, file names formats obviously differ from
106    platform to platform. They may contain forward slashes, backward slashes,
107    colons as separators, etc. There maybe more than one URL corresponding
108    to the same file name. Compare #file:/dir/file.djvu# and
109    #file://localhost/dir/file.djvu#.
110
111    To simplify a developer's life we have created this class, which contains
112    inside a canonical representation of URLs.
113
114    File URLs are converted to internal format with the help of \Ref{GOS} class.
115
116    All other URLs are modified to contain only forward slashes.
117*/
118
119class DJVUAPI GURL
120{
121public:
122  class Filename;
123  class UTF8;
124  class Native;
125protected:
126      /** @name Constructors
127          Accept the string URL, check that it starts from #file:/#
128          or #http:/# and convert to internal system independent
129          representation.
130      */
131      //@{
132      ///
133   GURL(const char * url_string);
134      //@}
135
136public:
137   GURL(void);
138
139   GURL(const GUTF8String & url_string);
140
141   GURL(const GNativeString & url_string);
142
143   GURL(const GUTF8String &xurl, const GURL &codebase);
144
145   GURL(const GNativeString &xurl, const GURL &codebase);
146
147      /// Copy constructor
148   GURL(const GURL & gurl);
149
150      /// The destructor
151   virtual ~GURL(void) {}
152
153private:
154      // The 'class_lock' should be locked whenever you're accessing
155      // url, or cgi_name_arr, or cgi_value_arr.
156   GCriticalSection     class_lock;
157protected:
158   GUTF8String  url;
159   DArray<GUTF8String>  cgi_name_arr, cgi_value_arr;
160   bool validurl;
161
162   void         init(const bool nothrow=false);
163   void         convert_slashes(void);
164   void         beautify_path(void);
165   static GUTF8String   beautify_path(GUTF8String url);
166
167   static GUTF8String   protocol(const GUTF8String& url);
168   void         parse_cgi_args(void);
169   void         store_cgi_args(void);
170public:
171   /// Test if the URL is valid. If invalid, reinitialize.
172   bool is_valid(void) const;     // const lies to the compiler because of dependency problems
173
174      /// Extracts the {\em protocol} part from the URL and returns it
175   GUTF8String  protocol(void) const;
176
177      /** Returns string after the first '\#' with decoded
178          escape sequences. */
179   GUTF8String  hash_argument(void) const;
180
181      /** Inserts the #arg# after a separating hash into the URL.
182          The function encodes any illegal character in #arg# using
183          \Ref{GOS::encode_reserved}(). */
184   void         set_hash_argument(const GUTF8String &arg);
185
186      /** Returns the total number of CGI arguments in the URL.
187          CGI arguments follow '#?#' sign and are separated by '#&#' signs */
188   int          cgi_arguments(void) const;
189
190      /** Returns the total number of DjVu-related CGI arguments (arguments
191          following #DJVUOPTS# in the URL). */
192   int          djvu_cgi_arguments(void) const;
193
194      /** Returns that part of CGI argument number #num#, which is
195          before the equal sign. */
196   GUTF8String  cgi_name(int num) const;
197
198      /** Returns that part of DjVu-related CGI argument number #num#,
199          which is before the equal sign. */
200   GUTF8String  djvu_cgi_name(int num) const;
201
202      /** Returns that part of CGI argument number #num#, which is
203          after the equal sign. */
204   GUTF8String  cgi_value(int num) const;
205   
206      /** Returns that part of DjVu-related CGI argument number #num#,
207          which is after the equal sign. */
208   GUTF8String  djvu_cgi_value(int num) const;
209   
210      /** Returns array of all known CGI names (part of CGI argument before
211          the equal sign) */
212   DArray<GUTF8String>cgi_names(void) const;
213
214      /** Returns array of names of DjVu-related CGI arguments (arguments
215          following #DJVUOPTS# option. */
216   DArray<GUTF8String>djvu_cgi_names(void) const;
217   
218      /** Returns array of all known CGI names (part of CGI argument before
219          the equal sign) */
220   DArray<GUTF8String>cgi_values(void) const;
221
222      /** Returns array of values of DjVu-related CGI arguments (arguments
223          following #DJVUOPTS# option. */
224   DArray<GUTF8String>djvu_cgi_values(void) const;
225
226      /// Erases everything after the first '\#' or '?'
227   void         clear_all_arguments(void);
228
229      /// Erases everything after the first '\#'
230   void         clear_hash_argument(void);
231
232      /// Erases DjVu CGI arguments (following "#DJVUOPTS#")
233   void         clear_djvu_cgi_arguments(void);
234
235      /// Erases all CGI arguments (following the first '?')
236   void         clear_cgi_arguments(void);
237
238      /** Appends the specified CGI argument. Will insert "#DJVUOPTS#" if
239          necessary */
240   void         add_djvu_cgi_argument(const GUTF8String &name, const char * value=0);
241   
242      /** Returns the URL corresponding to the directory containing
243          the document with this URL. The function basically takes the
244          URL and clears everything after the last slash. */
245   GURL         base(void) const;
246
247      /// Returns the aboslute URL without the host part.
248   GUTF8String pathname(void) const;
249
250      /** Returns the name part of this URL.
251          For example, if the URL is #http://www.lizardtech.com/file%201.djvu# then
252          this function will return #file%201.djvu#. \Ref{fname}() will
253          return #file 1.djvu# at the same time. */
254   GUTF8String  name(void) const;
255
256      /** Returns the name part of this URL with escape sequences expanded.
257          For example, if the URL is #http://www.lizardtech.com/file%201.djvu# then
258          this function will return #file 1.djvu#. \Ref{name}() will
259          return #file%201.djvu# at the same time. */
260   GUTF8String  fname(void) const;
261
262      /// Returns the extention part of name of document in this URL.
263   GUTF8String  extension(void) const;
264
265      /// Checks if this is an empty URL
266   bool         is_empty(void) const;
267
268      /// Checks if the URL is local (starts from #file:/#) or not
269   bool         is_local_file_url(void) const;
270
271      /** @name Concatenation operators
272          Concatenate the GURL with the passed {\em name}. If the {\em name}
273          is absolute (has non empty protocol prefix), we just return
274          #GURL(name)#. Otherwise the #name# is appended to the GURL after a
275          separating slash.
276      */
277      //@{
278      ///
279//   GURL               operator+(const GUTF8String &name) const;
280      //@}
281
282      /// Returns TRUE if #gurl1# and #gurl2# are the same
283   bool operator==(const GURL & gurl2) const;
284
285      /// Returns TRUE if #gurl1# and #gurl2# are different
286   bool operator!=(const GURL & gurl2) const;
287
288      /// Assignment operator
289   GURL &       operator=(const GURL & url);
290
291      /// Returns Internal URL representation
292   operator     const char*(void) const { return url; };
293
294  /** Returns a string representing the URL.  This function normally
295      returns a standard file URL as described in RFC 1738. 
296      Some versions of MSIE do not support this standard syntax.
297      A brain damaged MSIE compatible syntax is generated
298      when the optional argument #useragent# contains string #"MSIE"# or
299      #"Microsoft"#. */
300   GUTF8String get_string(const GUTF8String &useragent) const;
301
302   GUTF8String get_string(const bool nothrow=false) const;
303
304      /// Escape special characters
305   static GUTF8String encode_reserved(const GUTF8String &gs);
306
307   /** Decodes reserved characters from the URL.
308      See also: \Ref{encode_reserved}(). */
309   static GUTF8String decode_reserved(const GUTF8String &url);
310
311  /// Test if this url is an existing file, directory, or device.
312  bool is_local_path(void) const;
313
314  /// Test if this url is an existing file.
315  bool is_file(void) const;
316
317  /// Test if this url is an existing directory.
318  bool is_dir(void) const;
319
320  /// Follows symbolic links.
321  GURL follow_symlinks(void) const;
322
323  /// Creates the specified directory.
324  int mkdir(void) const;
325
326  /** Deletes file or directory.
327      Directories are not deleted unless the directory is empty.
328      Returns a negative number if an error occurs. */
329  int deletefile(void) const;
330
331  /** Recursively erases contents of directory. The directory
332      itself will not be removed. */
333  int cleardir(const int timeout=0) const;
334
335  /// Rename a file or directory.
336  int renameto(const GURL &newurl) const;
337
338  /// List the contents of a directory.
339  GList<GURL> listdir(void) const;
340
341  /** Returns a filename for a URL. Argument #url# must be a legal file URL.
342      This function applies heuristic rules to convert the URL into a valid
343      file name. It is guaranteed that this function can properly parse all
344      URLs generated by #filename_to_url#. The heuristics also work better when
345      the file actually exists.  The empty string is returned when this
346      function cannot parse the URL or when the URL is not a file URL.
347        URL formats are as described in RFC 1738 plus the following alternative
348      formats for files on the local host:
349
350                file://<letter>:/<path>
351                file://<letter>|/<path>
352                file:/<path>
353
354      which are accepted because various browsers recognize them.*/
355   GUTF8String UTF8Filename(void) const;
356   /// Same but returns a native string.
357   GNativeString NativeFilename(void) const;
358
359      /** Hashing function.
360          @return hash suitable for usage in \Ref{GMap} */
361   friend unsigned int  hash(const GURL & gurl);
362
363  /** Returns fully qualified file names.  This functions constructs the fully
364      qualified name of file or directory #filename#. When provided, the
365      optional argument #fromdirname# is used as the current directory when
366      interpreting relative specifications in #filename#.  Function
367      #expand_name# is very useful for logically concatenating file names.  It
368      knows which separators should be used for each operating system and it
369      knows which syntactical rules apply. */
370  static GUTF8String expand_name(const GUTF8String &filename, const char *fromdirname=0);
371};
372
373class DJVUAPI GURL::UTF8 : public GURL
374{
375public:
376  UTF8(const GUTF8String &xurl);
377  UTF8(const GUTF8String &xurl, const GURL &codebase);
378};
379
380class DJVUAPI GURL::Native : public GURL
381{
382public:
383  Native(const GNativeString &xurl);
384  Native(const GNativeString &xurl, const GURL &codebase);
385};
386
387class DJVUAPI GURL::Filename : public GURL
388{
389public:
390  Filename(const GUTF8String &filename);
391  Filename(const GNativeString &filename);
392  class UTF8;
393  class Native;
394};
395
396class DJVUAPI GURL::Filename::UTF8 : public GURL::Filename
397{
398public:
399  UTF8(const GUTF8String &filename);
400};
401
402class DJVUAPI GURL::Filename::Native : public GURL::Filename
403{
404public:
405  Native(const GNativeString &filename);
406};
407
408
409inline bool
410GURL::operator!=(const GURL & gurl2) const
411{
412  return !(*this == gurl2);
413}
414
415inline GUTF8String
416GURL::protocol(void) const
417{
418   return protocol(get_string());
419}
420
421inline bool
422GURL::is_empty(void) const
423{
424   return !url.length()||!get_string().length();
425}
426
427// Test if the URL is valid.
428// If invalid, reinitialize and return the result.
429inline bool
430GURL::is_valid(void) const
431{
432  if(!validurl)
433    const_cast<GURL *>(this)->init(true);
434  return validurl;
435}
436
437
438
439//@}
440
441
442#ifdef HAVE_NAMESPACES
443}
444# ifndef NOT_USING_DJVU_NAMESPACE
445using namespace DJVU;
446# endif
447#endif
448#endif
Note: See TracBrowser for help on using the repository browser.