source: trunk/poppler/freetype-2.1.10/src/tools/docmaker/sources.py @ 2

Last change on this file since 2 was 2, checked in by Eugene Romanenko, 15 years ago

First import

File size: 10.3 KB
Line 
1#
2# this file contains definitions of classes needed to decompose
3# C sources files into a series of multi-line "blocks". There are
4# two kinds of blocks:
5#
6#   - normal blocks, which contain source code or ordinary comments
7#
8#   - documentation blocks, which have restricted formatting, and
9#     whose text always start with a documentation markup tag like
10#     "<Function>", "<Type>", etc..
11#
12# the routines used to process the content of documentation blocks
13# are not contained here, but in "content.py"
14#
15# the classes and methods found here only deal with text parsing
16# and basic documentation block extraction
17#
18import fileinput, re, sys, os, string
19
20
21
22
23
24
25################################################################
26##
27##  BLOCK FORMAT PATTERN
28##
29##   A simple class containing compiled regular expressions used
30##   to detect potential documentation format block comments within
31##   C source code
32##
33##   note that the 'column' pattern must contain a group that will
34##   be used to "unbox" the content of documentation comment blocks
35##
36class SourceBlockFormat:
37
38    def __init__( self, id, start, column, end ):
39        """create a block pattern, used to recognize special documentation blocks"""
40
41        self.id     = id
42        self.start  = re.compile( start, re.VERBOSE )
43        self.column = re.compile( column, re.VERBOSE )
44        self.end    = re.compile( end, re.VERBOSE )
45
46
47
48#
49# format 1 documentation comment blocks look like the following:
50#
51#    /************************************/
52#    /*                                  */
53#    /*                                  */
54#    /*                                  */
55#    /************************************/
56#
57# we define a few regular expressions here to detect them
58#
59
60start = r'''
61  \s*       # any number of whitespace
62  /\*{2,}/  # followed by '/' and at least two asterisks then '/'
63  \s*$      # eventually followed by whitespace
64'''
65
66column = r'''
67  \s*      # any number of whitespace
68  /\*{1}   # followed by '/' and precisely one asterisk
69  ([^*].*) # followed by anything (group 1)
70  \*{1}/   # followed by one asterisk and a '/'
71  \s*$     # enventually followed by whitespace
72'''
73
74re_source_block_format1 = SourceBlockFormat( 1, start, column, start )
75
76#
77# format 2 documentation comment blocks look like the following:
78#
79#    /************************************ (at least 2 asterisks)
80#     *
81#     *
82#     *
83#     *
84#     **/       (1 or more asterisks at the end)
85#
86# we define a few regular expressions here to detect them
87#
88start = r'''
89  \s*     # any number of whitespace
90  /\*{2,} # followed by '/' and at least two asterisks
91  \s*$    # eventually followed by whitespace
92'''
93
94column = r'''
95  \s*         # any number of whitespace
96  \*{1}(?!/)  # followed by precisely one asterisk not followed by `/'
97  (.*)        # then anything (group1)
98'''
99
100end = r'''
101  \s*     # any number of whitespace
102  \*+/    # followed by at least one asterisk, then '/'
103'''
104
105re_source_block_format2 = SourceBlockFormat( 2, start, column, end )
106
107#
108# the list of supported documentation block formats, we could add new ones
109# relatively easily
110#
111re_source_block_formats = [ re_source_block_format1, re_source_block_format2 ]
112
113
114#
115# the following regular expressions corresponds to markup tags
116# within the documentation comment blocks. they're equivalent
117# despite their different syntax
118#
119# notice how each markup tag _must_ begin a new line
120#
121re_markup_tag1 = re.compile( r'''\s*<(\w*)>''' )  # <xxxx> format
122re_markup_tag2 = re.compile( r'''\s*@(\w*):''' )  # @xxxx: format
123
124#
125# the list of supported markup tags, we could add new ones relatively
126# easily
127#
128re_markup_tags = [ re_markup_tag1, re_markup_tag2 ]
129
130#
131# used to detect a cross-reference, after markup tags have been stripped
132#
133re_crossref = re.compile( r'@(\w*)(.*)' )
134
135#
136# used to detect italic and bold styles in paragraph text
137#
138re_italic = re.compile( r'_(\w+)_' )
139re_bold   = re.compile( r'\*(\w+)\*' )
140
141#
142# used to detect the end of commented source lines
143#
144re_source_sep = re.compile( r'\s*/\*\s*\*/' )
145
146#
147# used to perform cross-reference within source output
148#
149re_source_crossref = re.compile( r'(\W*)(\w*)' )
150
151#
152# a list of reserved source keywords
153#
154re_source_keywords = re.compile( '''( typedef |
155                                       struct |
156                                       enum   |
157                                       union  |
158                                       const  |
159                                       char   |
160                                       int    |
161                                       short  |
162                                       long   |
163                                       void   |
164                                       signed |
165                                       unsigned |
166                                       \#include |
167                                       \#define  |
168                                       \#undef   |
169                                       \#if      |
170                                       \#ifdef   |
171                                       \#ifndef  |
172                                       \#else    |
173                                       \#endif   )''', re.VERBOSE )
174
175################################################################
176##
177##  SOURCE BLOCK CLASS
178##
179##   A SourceProcessor is in charge or reading a C source file
180##   and decomposing it into a series of different "SourceBlocks".
181##   each one of these blocks can be made of the following data:
182##
183##   - A documentation comment block that starts with "/**" and
184##     whose exact format will be discussed later
185##
186##   - normal sources lines, include comments
187##
188##   the important fields in a text block are the following ones:
189##
190##     self.lines   : a list of text lines for the corresponding block
191##
192##     self.content : for documentation comment blocks only, this is the
193##                    block content that has been "unboxed" from its
194##                    decoration. This is None for all other blocks
195##                    (i.e. sources or ordinary comments with no starting
196##                     markup tag)
197##
198class SourceBlock:
199    def __init__( self, processor, filename, lineno, lines ):
200        self.processor = processor
201        self.filename  = filename
202        self.lineno    = lineno
203        self.lines     = lines[:]
204        self.format    = processor.format
205        self.content   = []
206
207        if self.format == None:
208            return
209
210        words = []
211
212        # extract comment lines
213        lines = []
214
215        for line0 in self.lines:
216            m = self.format.column.match( line0 )
217            if m:
218                lines.append( m.group(1) )
219
220        # now, look for a markup tag
221        for l in lines:
222            l = string.strip(l)
223            if len(l) > 0:
224                for tag in re_markup_tags:
225                    if tag.match( l ):
226                        self.content = lines
227                return
228
229    def location( self ):
230        return "(" + self.filename + ":" + repr(self.lineno) + ")"
231
232
233    # debugging only - not used in normal operations
234    def dump( self ):
235
236        if self.content:
237            print "{{{content start---"
238            for l in self.content:
239                print l
240            print "---content end}}}"
241            return
242
243        fmt = ""
244        if self.format:
245            fmt = repr(self.format.id) + " "
246
247        for line in self.lines:
248            print line
249
250
251################################################################
252##
253##  SOURCE PROCESSOR CLASS
254##
255##   The SourceProcessor is in charge or reading a C source file
256##   and decomposing it into a series of different "SourceBlock"
257##   objects.
258##
259##   each one of these blocks can be made of the following data:
260##
261##   - A documentation comment block that starts with "/**" and
262##     whose exact format will be discussed later
263##
264##   - normal sources lines, include comments
265##
266##
267class SourceProcessor:
268
269    def  __init__( self ):
270        """initialize a source processor"""
271        self.blocks   = []
272        self.filename = None
273        self.format   = None
274        self.lines    = []
275
276    def  reset( self ):
277        """reset a block processor, clean all its blocks"""
278        self.blocks = []
279        self.format = None
280
281
282    def  parse_file( self, filename ):
283        """parse a C source file, and adds its blocks to the processor's list"""
284
285        self.reset()
286
287        self.filename = filename
288
289        fileinput.close()
290        self.format    = None
291        self.lineno    = 0
292        self.lines     = []
293
294        for line in fileinput.input( filename ):
295
296            # strip trailing newlines, important on Windows machines !!
297            if  line[-1] == '\012':
298                line = line[0:-1]
299
300            if self.format == None:
301                self.process_normal_line( line )
302
303            else:
304                if self.format.end.match( line ):
305                    # that's a normal block end, add it to lines and
306                    # create a new block
307                    self.lines.append( line )
308                    self.add_block_lines()
309
310                elif self.format.column.match( line ):
311                    # that's a normal column line, add it to 'lines'
312                    self.lines.append( line )
313
314                else:
315                    # humm.. this is an unexcepted block end,
316                    # create a new block, but don't process the line
317                    self.add_block_lines()
318
319                    # we need to process the line again
320                    self.process_normal_line( line )
321
322        # record the last lines
323        self.add_block_lines()
324
325
326
327    def process_normal_line( self, line ):
328        """process a normal line and check if it's the start of a new block"""
329        for f in re_source_block_formats:
330          if f.start.match( line ):
331            self.add_block_lines()
332            self.format = f
333            self.lineno = fileinput.filelineno()
334
335        self.lines.append( line )
336
337
338
339    def add_block_lines( self ):
340        """add the current accumulated lines, and create a new block"""
341        if self.lines != []:
342            block = SourceBlock( self, self.filename, self.lineno, self.lines )
343
344            self.blocks.append( block )
345            self.format = None
346            self.lines  = []
347
348
349    # debugging only, not used in normal operations
350    def dump( self ):
351        """print all blocks in a processor"""
352        for b in self.blocks:
353            b.dump()
354
355# eof
Note: See TracBrowser for help on using the repository browser.