1 | /* nfa - NFA construction routines */
|
---|
2 |
|
---|
3 | /* Copyright (c) 1990 The Regents of the University of California. */
|
---|
4 | /* All rights reserved. */
|
---|
5 |
|
---|
6 | /* This code is derived from software contributed to Berkeley by */
|
---|
7 | /* Vern Paxson. */
|
---|
8 |
|
---|
9 | /* The United States Government has rights in this work pursuant */
|
---|
10 | /* to contract no. DE-AC03-76SF00098 between the United States */
|
---|
11 | /* Department of Energy and the University of California. */
|
---|
12 |
|
---|
13 | /* This file is part of flex. */
|
---|
14 |
|
---|
15 | /* Redistribution and use in source and binary forms, with or without */
|
---|
16 | /* modification, are permitted provided that the following conditions */
|
---|
17 | /* are met: */
|
---|
18 |
|
---|
19 | /* 1. Redistributions of source code must retain the above copyright */
|
---|
20 | /* notice, this list of conditions and the following disclaimer. */
|
---|
21 | /* 2. Redistributions in binary form must reproduce the above copyright */
|
---|
22 | /* notice, this list of conditions and the following disclaimer in the */
|
---|
23 | /* documentation and/or other materials provided with the distribution. */
|
---|
24 |
|
---|
25 | /* Neither the name of the University nor the names of its contributors */
|
---|
26 | /* may be used to endorse or promote products derived from this software */
|
---|
27 | /* without specific prior written permission. */
|
---|
28 |
|
---|
29 | /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
|
---|
30 | /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
|
---|
31 | /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
---|
32 | /* PURPOSE. */
|
---|
33 |
|
---|
34 | #include "flexdef.h"
|
---|
35 |
|
---|
36 |
|
---|
37 | /* declare functions that have forward references */
|
---|
38 |
|
---|
39 | int dupmachine PROTO ((int));
|
---|
40 | void mkxtion PROTO ((int, int));
|
---|
41 |
|
---|
42 |
|
---|
43 | /* add_accept - add an accepting state to a machine
|
---|
44 | *
|
---|
45 | * accepting_number becomes mach's accepting number.
|
---|
46 | */
|
---|
47 |
|
---|
48 | void add_accept (mach, accepting_number)
|
---|
49 | int mach, accepting_number;
|
---|
50 | {
|
---|
51 | /* Hang the accepting number off an epsilon state. if it is associated
|
---|
52 | * with a state that has a non-epsilon out-transition, then the state
|
---|
53 | * will accept BEFORE it makes that transition, i.e., one character
|
---|
54 | * too soon.
|
---|
55 | */
|
---|
56 |
|
---|
57 | if (transchar[finalst[mach]] == SYM_EPSILON)
|
---|
58 | accptnum[finalst[mach]] = accepting_number;
|
---|
59 |
|
---|
60 | else {
|
---|
61 | int astate = mkstate (SYM_EPSILON);
|
---|
62 |
|
---|
63 | accptnum[astate] = accepting_number;
|
---|
64 | (void) link_machines (mach, astate);
|
---|
65 | }
|
---|
66 | }
|
---|
67 |
|
---|
68 |
|
---|
69 | /* copysingl - make a given number of copies of a singleton machine
|
---|
70 | *
|
---|
71 | * synopsis
|
---|
72 | *
|
---|
73 | * newsng = copysingl( singl, num );
|
---|
74 | *
|
---|
75 | * newsng - a new singleton composed of num copies of singl
|
---|
76 | * singl - a singleton machine
|
---|
77 | * num - the number of copies of singl to be present in newsng
|
---|
78 | */
|
---|
79 |
|
---|
80 | int copysingl (singl, num)
|
---|
81 | int singl, num;
|
---|
82 | {
|
---|
83 | int copy, i;
|
---|
84 |
|
---|
85 | copy = mkstate (SYM_EPSILON);
|
---|
86 |
|
---|
87 | for (i = 1; i <= num; ++i)
|
---|
88 | copy = link_machines (copy, dupmachine (singl));
|
---|
89 |
|
---|
90 | return copy;
|
---|
91 | }
|
---|
92 |
|
---|
93 |
|
---|
94 | /* dumpnfa - debugging routine to write out an nfa */
|
---|
95 |
|
---|
96 | void dumpnfa (state1)
|
---|
97 | int state1;
|
---|
98 |
|
---|
99 | {
|
---|
100 | int sym, tsp1, tsp2, anum, ns;
|
---|
101 |
|
---|
102 | fprintf (stderr,
|
---|
103 | _
|
---|
104 | ("\n\n********** beginning dump of nfa with start state %d\n"),
|
---|
105 | state1);
|
---|
106 |
|
---|
107 | /* We probably should loop starting at firstst[state1] and going to
|
---|
108 | * lastst[state1], but they're not maintained properly when we "or"
|
---|
109 | * all of the rules together. So we use our knowledge that the machine
|
---|
110 | * starts at state 1 and ends at lastnfa.
|
---|
111 | */
|
---|
112 |
|
---|
113 | /* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */
|
---|
114 | for (ns = 1; ns <= lastnfa; ++ns) {
|
---|
115 | fprintf (stderr, _("state # %4d\t"), ns);
|
---|
116 |
|
---|
117 | sym = transchar[ns];
|
---|
118 | tsp1 = trans1[ns];
|
---|
119 | tsp2 = trans2[ns];
|
---|
120 | anum = accptnum[ns];
|
---|
121 |
|
---|
122 | fprintf (stderr, "%3d: %4d, %4d", sym, tsp1, tsp2);
|
---|
123 |
|
---|
124 | if (anum != NIL)
|
---|
125 | fprintf (stderr, " [%d]", anum);
|
---|
126 |
|
---|
127 | fprintf (stderr, "\n");
|
---|
128 | }
|
---|
129 |
|
---|
130 | fprintf (stderr, _("********** end of dump\n"));
|
---|
131 | }
|
---|
132 |
|
---|
133 |
|
---|
134 | /* dupmachine - make a duplicate of a given machine
|
---|
135 | *
|
---|
136 | * synopsis
|
---|
137 | *
|
---|
138 | * copy = dupmachine( mach );
|
---|
139 | *
|
---|
140 | * copy - holds duplicate of mach
|
---|
141 | * mach - machine to be duplicated
|
---|
142 | *
|
---|
143 | * note that the copy of mach is NOT an exact duplicate; rather, all the
|
---|
144 | * transition states values are adjusted so that the copy is self-contained,
|
---|
145 | * as the original should have been.
|
---|
146 | *
|
---|
147 | * also note that the original MUST be contiguous, with its low and high
|
---|
148 | * states accessible by the arrays firstst and lastst
|
---|
149 | */
|
---|
150 |
|
---|
151 | int dupmachine (mach)
|
---|
152 | int mach;
|
---|
153 | {
|
---|
154 | int i, init, state_offset;
|
---|
155 | int state = 0;
|
---|
156 | int last = lastst[mach];
|
---|
157 |
|
---|
158 | for (i = firstst[mach]; i <= last; ++i) {
|
---|
159 | state = mkstate (transchar[i]);
|
---|
160 |
|
---|
161 | if (trans1[i] != NO_TRANSITION) {
|
---|
162 | mkxtion (finalst[state], trans1[i] + state - i);
|
---|
163 |
|
---|
164 | if (transchar[i] == SYM_EPSILON &&
|
---|
165 | trans2[i] != NO_TRANSITION)
|
---|
166 | mkxtion (finalst[state],
|
---|
167 | trans2[i] + state - i);
|
---|
168 | }
|
---|
169 |
|
---|
170 | accptnum[state] = accptnum[i];
|
---|
171 | }
|
---|
172 |
|
---|
173 | if (state == 0)
|
---|
174 | flexfatal (_("empty machine in dupmachine()"));
|
---|
175 |
|
---|
176 | state_offset = state - i + 1;
|
---|
177 |
|
---|
178 | init = mach + state_offset;
|
---|
179 | firstst[init] = firstst[mach] + state_offset;
|
---|
180 | finalst[init] = finalst[mach] + state_offset;
|
---|
181 | lastst[init] = lastst[mach] + state_offset;
|
---|
182 |
|
---|
183 | return init;
|
---|
184 | }
|
---|
185 |
|
---|
186 |
|
---|
187 | /* finish_rule - finish up the processing for a rule
|
---|
188 | *
|
---|
189 | * An accepting number is added to the given machine. If variable_trail_rule
|
---|
190 | * is true then the rule has trailing context and both the head and trail
|
---|
191 | * are variable size. Otherwise if headcnt or trailcnt is non-zero then
|
---|
192 | * the machine recognizes a pattern with trailing context and headcnt is
|
---|
193 | * the number of characters in the matched part of the pattern, or zero
|
---|
194 | * if the matched part has variable length. trailcnt is the number of
|
---|
195 | * trailing context characters in the pattern, or zero if the trailing
|
---|
196 | * context has variable length.
|
---|
197 | */
|
---|
198 |
|
---|
199 | void finish_rule (mach, variable_trail_rule, headcnt, trailcnt,
|
---|
200 | pcont_act)
|
---|
201 | int mach, variable_trail_rule, headcnt, trailcnt, pcont_act;
|
---|
202 | {
|
---|
203 | char action_text[MAXLINE];
|
---|
204 |
|
---|
205 | add_accept (mach, num_rules);
|
---|
206 |
|
---|
207 | /* We did this in new_rule(), but it often gets the wrong
|
---|
208 | * number because we do it before we start parsing the current rule.
|
---|
209 | */
|
---|
210 | rule_linenum[num_rules] = linenum;
|
---|
211 |
|
---|
212 | /* If this is a continued action, then the line-number has already
|
---|
213 | * been updated, giving us the wrong number.
|
---|
214 | */
|
---|
215 | if (continued_action)
|
---|
216 | --rule_linenum[num_rules];
|
---|
217 |
|
---|
218 |
|
---|
219 | /* If the previous rule was continued action, then we inherit the
|
---|
220 | * previous newline flag, possibly overriding the current one.
|
---|
221 | */
|
---|
222 | if (pcont_act && rule_has_nl[num_rules - 1])
|
---|
223 | rule_has_nl[num_rules] = true;
|
---|
224 |
|
---|
225 | sprintf (action_text, "case %d:\n", num_rules);
|
---|
226 | add_action (action_text);
|
---|
227 | if (rule_has_nl[num_rules]) {
|
---|
228 | sprintf (action_text, "/* rule %d can match eol */\n",
|
---|
229 | num_rules);
|
---|
230 | add_action (action_text);
|
---|
231 | }
|
---|
232 |
|
---|
233 |
|
---|
234 | if (variable_trail_rule) {
|
---|
235 | rule_type[num_rules] = RULE_VARIABLE;
|
---|
236 |
|
---|
237 | if (performance_report > 0)
|
---|
238 | fprintf (stderr,
|
---|
239 | _
|
---|
240 | ("Variable trailing context rule at line %d\n"),
|
---|
241 | rule_linenum[num_rules]);
|
---|
242 |
|
---|
243 | variable_trailing_context_rules = true;
|
---|
244 | }
|
---|
245 |
|
---|
246 | else {
|
---|
247 | rule_type[num_rules] = RULE_NORMAL;
|
---|
248 |
|
---|
249 | if (headcnt > 0 || trailcnt > 0) {
|
---|
250 | /* Do trailing context magic to not match the trailing
|
---|
251 | * characters.
|
---|
252 | */
|
---|
253 | char *scanner_cp = "YY_G(yy_c_buf_p) = yy_cp";
|
---|
254 | char *scanner_bp = "yy_bp";
|
---|
255 |
|
---|
256 | add_action
|
---|
257 | ("*yy_cp = YY_G(yy_hold_char); /* undo effects of setting up yytext */\n");
|
---|
258 |
|
---|
259 | if (headcnt > 0) {
|
---|
260 | sprintf (action_text, "%s = %s + %d;\n",
|
---|
261 | scanner_cp, scanner_bp, headcnt);
|
---|
262 | add_action (action_text);
|
---|
263 | }
|
---|
264 |
|
---|
265 | else {
|
---|
266 | sprintf (action_text, "%s -= %d;\n",
|
---|
267 | scanner_cp, trailcnt);
|
---|
268 | add_action (action_text);
|
---|
269 | }
|
---|
270 |
|
---|
271 | add_action
|
---|
272 | ("YY_DO_BEFORE_ACTION; /* set up yytext again */\n");
|
---|
273 | }
|
---|
274 | }
|
---|
275 |
|
---|
276 | /* Okay, in the action code at this point yytext and yyleng have
|
---|
277 | * their proper final values for this rule, so here's the point
|
---|
278 | * to do any user action. But don't do it for continued actions,
|
---|
279 | * as that'll result in multiple YY_RULE_SETUP's.
|
---|
280 | */
|
---|
281 | if (!continued_action)
|
---|
282 | add_action ("YY_RULE_SETUP\n");
|
---|
283 |
|
---|
284 | line_directive_out ((FILE *) 0, 1);
|
---|
285 | }
|
---|
286 |
|
---|
287 |
|
---|
288 | /* link_machines - connect two machines together
|
---|
289 | *
|
---|
290 | * synopsis
|
---|
291 | *
|
---|
292 | * new = link_machines( first, last );
|
---|
293 | *
|
---|
294 | * new - a machine constructed by connecting first to last
|
---|
295 | * first - the machine whose successor is to be last
|
---|
296 | * last - the machine whose predecessor is to be first
|
---|
297 | *
|
---|
298 | * note: this routine concatenates the machine first with the machine
|
---|
299 | * last to produce a machine new which will pattern-match first first
|
---|
300 | * and then last, and will fail if either of the sub-patterns fails.
|
---|
301 | * FIRST is set to new by the operation. last is unmolested.
|
---|
302 | */
|
---|
303 |
|
---|
304 | int link_machines (first, last)
|
---|
305 | int first, last;
|
---|
306 | {
|
---|
307 | if (first == NIL)
|
---|
308 | return last;
|
---|
309 |
|
---|
310 | else if (last == NIL)
|
---|
311 | return first;
|
---|
312 |
|
---|
313 | else {
|
---|
314 | mkxtion (finalst[first], last);
|
---|
315 | finalst[first] = finalst[last];
|
---|
316 | lastst[first] = MAX (lastst[first], lastst[last]);
|
---|
317 | firstst[first] = MIN (firstst[first], firstst[last]);
|
---|
318 |
|
---|
319 | return first;
|
---|
320 | }
|
---|
321 | }
|
---|
322 |
|
---|
323 |
|
---|
324 | /* mark_beginning_as_normal - mark each "beginning" state in a machine
|
---|
325 | * as being a "normal" (i.e., not trailing context-
|
---|
326 | * associated) states
|
---|
327 | *
|
---|
328 | * The "beginning" states are the epsilon closure of the first state
|
---|
329 | */
|
---|
330 |
|
---|
331 | void mark_beginning_as_normal (mach)
|
---|
332 | register int mach;
|
---|
333 | {
|
---|
334 | switch (state_type[mach]) {
|
---|
335 | case STATE_NORMAL:
|
---|
336 | /* Oh, we've already visited here. */
|
---|
337 | return;
|
---|
338 |
|
---|
339 | case STATE_TRAILING_CONTEXT:
|
---|
340 | state_type[mach] = STATE_NORMAL;
|
---|
341 |
|
---|
342 | if (transchar[mach] == SYM_EPSILON) {
|
---|
343 | if (trans1[mach] != NO_TRANSITION)
|
---|
344 | mark_beginning_as_normal (trans1[mach]);
|
---|
345 |
|
---|
346 | if (trans2[mach] != NO_TRANSITION)
|
---|
347 | mark_beginning_as_normal (trans2[mach]);
|
---|
348 | }
|
---|
349 | break;
|
---|
350 |
|
---|
351 | default:
|
---|
352 | flexerror (_
|
---|
353 | ("bad state type in mark_beginning_as_normal()"));
|
---|
354 | break;
|
---|
355 | }
|
---|
356 | }
|
---|
357 |
|
---|
358 |
|
---|
359 | /* mkbranch - make a machine that branches to two machines
|
---|
360 | *
|
---|
361 | * synopsis
|
---|
362 | *
|
---|
363 | * branch = mkbranch( first, second );
|
---|
364 | *
|
---|
365 | * branch - a machine which matches either first's pattern or second's
|
---|
366 | * first, second - machines whose patterns are to be or'ed (the | operator)
|
---|
367 | *
|
---|
368 | * Note that first and second are NEITHER destroyed by the operation. Also,
|
---|
369 | * the resulting machine CANNOT be used with any other "mk" operation except
|
---|
370 | * more mkbranch's. Compare with mkor()
|
---|
371 | */
|
---|
372 |
|
---|
373 | int mkbranch (first, second)
|
---|
374 | int first, second;
|
---|
375 | {
|
---|
376 | int eps;
|
---|
377 |
|
---|
378 | if (first == NO_TRANSITION)
|
---|
379 | return second;
|
---|
380 |
|
---|
381 | else if (second == NO_TRANSITION)
|
---|
382 | return first;
|
---|
383 |
|
---|
384 | eps = mkstate (SYM_EPSILON);
|
---|
385 |
|
---|
386 | mkxtion (eps, first);
|
---|
387 | mkxtion (eps, second);
|
---|
388 |
|
---|
389 | return eps;
|
---|
390 | }
|
---|
391 |
|
---|
392 |
|
---|
393 | /* mkclos - convert a machine into a closure
|
---|
394 | *
|
---|
395 | * synopsis
|
---|
396 | * new = mkclos( state );
|
---|
397 | *
|
---|
398 | * new - a new state which matches the closure of "state"
|
---|
399 | */
|
---|
400 |
|
---|
401 | int mkclos (state)
|
---|
402 | int state;
|
---|
403 | {
|
---|
404 | return mkopt (mkposcl (state));
|
---|
405 | }
|
---|
406 |
|
---|
407 |
|
---|
408 | /* mkopt - make a machine optional
|
---|
409 | *
|
---|
410 | * synopsis
|
---|
411 | *
|
---|
412 | * new = mkopt( mach );
|
---|
413 | *
|
---|
414 | * new - a machine which optionally matches whatever mach matched
|
---|
415 | * mach - the machine to make optional
|
---|
416 | *
|
---|
417 | * notes:
|
---|
418 | * 1. mach must be the last machine created
|
---|
419 | * 2. mach is destroyed by the call
|
---|
420 | */
|
---|
421 |
|
---|
422 | int mkopt (mach)
|
---|
423 | int mach;
|
---|
424 | {
|
---|
425 | int eps;
|
---|
426 |
|
---|
427 | if (!SUPER_FREE_EPSILON (finalst[mach])) {
|
---|
428 | eps = mkstate (SYM_EPSILON);
|
---|
429 | mach = link_machines (mach, eps);
|
---|
430 | }
|
---|
431 |
|
---|
432 | /* Can't skimp on the following if FREE_EPSILON(mach) is true because
|
---|
433 | * some state interior to "mach" might point back to the beginning
|
---|
434 | * for a closure.
|
---|
435 | */
|
---|
436 | eps = mkstate (SYM_EPSILON);
|
---|
437 | mach = link_machines (eps, mach);
|
---|
438 |
|
---|
439 | mkxtion (mach, finalst[mach]);
|
---|
440 |
|
---|
441 | return mach;
|
---|
442 | }
|
---|
443 |
|
---|
444 |
|
---|
445 | /* mkor - make a machine that matches either one of two machines
|
---|
446 | *
|
---|
447 | * synopsis
|
---|
448 | *
|
---|
449 | * new = mkor( first, second );
|
---|
450 | *
|
---|
451 | * new - a machine which matches either first's pattern or second's
|
---|
452 | * first, second - machines whose patterns are to be or'ed (the | operator)
|
---|
453 | *
|
---|
454 | * note that first and second are both destroyed by the operation
|
---|
455 | * the code is rather convoluted because an attempt is made to minimize
|
---|
456 | * the number of epsilon states needed
|
---|
457 | */
|
---|
458 |
|
---|
459 | int mkor (first, second)
|
---|
460 | int first, second;
|
---|
461 | {
|
---|
462 | int eps, orend;
|
---|
463 |
|
---|
464 | if (first == NIL)
|
---|
465 | return second;
|
---|
466 |
|
---|
467 | else if (second == NIL)
|
---|
468 | return first;
|
---|
469 |
|
---|
470 | else {
|
---|
471 | /* See comment in mkopt() about why we can't use the first
|
---|
472 | * state of "first" or "second" if they satisfy "FREE_EPSILON".
|
---|
473 | */
|
---|
474 | eps = mkstate (SYM_EPSILON);
|
---|
475 |
|
---|
476 | first = link_machines (eps, first);
|
---|
477 |
|
---|
478 | mkxtion (first, second);
|
---|
479 |
|
---|
480 | if (SUPER_FREE_EPSILON (finalst[first]) &&
|
---|
481 | accptnum[finalst[first]] == NIL) {
|
---|
482 | orend = finalst[first];
|
---|
483 | mkxtion (finalst[second], orend);
|
---|
484 | }
|
---|
485 |
|
---|
486 | else if (SUPER_FREE_EPSILON (finalst[second]) &&
|
---|
487 | accptnum[finalst[second]] == NIL) {
|
---|
488 | orend = finalst[second];
|
---|
489 | mkxtion (finalst[first], orend);
|
---|
490 | }
|
---|
491 |
|
---|
492 | else {
|
---|
493 | eps = mkstate (SYM_EPSILON);
|
---|
494 |
|
---|
495 | first = link_machines (first, eps);
|
---|
496 | orend = finalst[first];
|
---|
497 |
|
---|
498 | mkxtion (finalst[second], orend);
|
---|
499 | }
|
---|
500 | }
|
---|
501 |
|
---|
502 | finalst[first] = orend;
|
---|
503 | return first;
|
---|
504 | }
|
---|
505 |
|
---|
506 |
|
---|
507 | /* mkposcl - convert a machine into a positive closure
|
---|
508 | *
|
---|
509 | * synopsis
|
---|
510 | * new = mkposcl( state );
|
---|
511 | *
|
---|
512 | * new - a machine matching the positive closure of "state"
|
---|
513 | */
|
---|
514 |
|
---|
515 | int mkposcl (state)
|
---|
516 | int state;
|
---|
517 | {
|
---|
518 | int eps;
|
---|
519 |
|
---|
520 | if (SUPER_FREE_EPSILON (finalst[state])) {
|
---|
521 | mkxtion (finalst[state], state);
|
---|
522 | return state;
|
---|
523 | }
|
---|
524 |
|
---|
525 | else {
|
---|
526 | eps = mkstate (SYM_EPSILON);
|
---|
527 | mkxtion (eps, state);
|
---|
528 | return link_machines (state, eps);
|
---|
529 | }
|
---|
530 | }
|
---|
531 |
|
---|
532 |
|
---|
533 | /* mkrep - make a replicated machine
|
---|
534 | *
|
---|
535 | * synopsis
|
---|
536 | * new = mkrep( mach, lb, ub );
|
---|
537 | *
|
---|
538 | * new - a machine that matches whatever "mach" matched from "lb"
|
---|
539 | * number of times to "ub" number of times
|
---|
540 | *
|
---|
541 | * note
|
---|
542 | * if "ub" is INFINITE_REPEAT then "new" matches "lb" or more occurrences of "mach"
|
---|
543 | */
|
---|
544 |
|
---|
545 | int mkrep (mach, lb, ub)
|
---|
546 | int mach, lb, ub;
|
---|
547 | {
|
---|
548 | int base_mach, tail, copy, i;
|
---|
549 |
|
---|
550 | base_mach = copysingl (mach, lb - 1);
|
---|
551 |
|
---|
552 | if (ub == INFINITE_REPEAT) {
|
---|
553 | copy = dupmachine (mach);
|
---|
554 | mach = link_machines (mach,
|
---|
555 | link_machines (base_mach,
|
---|
556 | mkclos (copy)));
|
---|
557 | }
|
---|
558 |
|
---|
559 | else {
|
---|
560 | tail = mkstate (SYM_EPSILON);
|
---|
561 |
|
---|
562 | for (i = lb; i < ub; ++i) {
|
---|
563 | copy = dupmachine (mach);
|
---|
564 | tail = mkopt (link_machines (copy, tail));
|
---|
565 | }
|
---|
566 |
|
---|
567 | mach =
|
---|
568 | link_machines (mach,
|
---|
569 | link_machines (base_mach, tail));
|
---|
570 | }
|
---|
571 |
|
---|
572 | return mach;
|
---|
573 | }
|
---|
574 |
|
---|
575 |
|
---|
576 | /* mkstate - create a state with a transition on a given symbol
|
---|
577 | *
|
---|
578 | * synopsis
|
---|
579 | *
|
---|
580 | * state = mkstate( sym );
|
---|
581 | *
|
---|
582 | * state - a new state matching sym
|
---|
583 | * sym - the symbol the new state is to have an out-transition on
|
---|
584 | *
|
---|
585 | * note that this routine makes new states in ascending order through the
|
---|
586 | * state array (and increments LASTNFA accordingly). The routine DUPMACHINE
|
---|
587 | * relies on machines being made in ascending order and that they are
|
---|
588 | * CONTIGUOUS. Change it and you will have to rewrite DUPMACHINE (kludge
|
---|
589 | * that it admittedly is)
|
---|
590 | */
|
---|
591 |
|
---|
592 | int mkstate (sym)
|
---|
593 | int sym;
|
---|
594 | {
|
---|
595 | if (++lastnfa >= current_mns) {
|
---|
596 | if ((current_mns += MNS_INCREMENT) >= maximum_mns)
|
---|
597 | lerrif (_
|
---|
598 | ("input rules are too complicated (>= %d NFA states)"),
|
---|
599 | current_mns);
|
---|
600 |
|
---|
601 | ++num_reallocs;
|
---|
602 |
|
---|
603 | firstst = reallocate_integer_array (firstst, current_mns);
|
---|
604 | lastst = reallocate_integer_array (lastst, current_mns);
|
---|
605 | finalst = reallocate_integer_array (finalst, current_mns);
|
---|
606 | transchar =
|
---|
607 | reallocate_integer_array (transchar, current_mns);
|
---|
608 | trans1 = reallocate_integer_array (trans1, current_mns);
|
---|
609 | trans2 = reallocate_integer_array (trans2, current_mns);
|
---|
610 | accptnum =
|
---|
611 | reallocate_integer_array (accptnum, current_mns);
|
---|
612 | assoc_rule =
|
---|
613 | reallocate_integer_array (assoc_rule, current_mns);
|
---|
614 | state_type =
|
---|
615 | reallocate_integer_array (state_type, current_mns);
|
---|
616 | }
|
---|
617 |
|
---|
618 | firstst[lastnfa] = lastnfa;
|
---|
619 | finalst[lastnfa] = lastnfa;
|
---|
620 | lastst[lastnfa] = lastnfa;
|
---|
621 | transchar[lastnfa] = sym;
|
---|
622 | trans1[lastnfa] = NO_TRANSITION;
|
---|
623 | trans2[lastnfa] = NO_TRANSITION;
|
---|
624 | accptnum[lastnfa] = NIL;
|
---|
625 | assoc_rule[lastnfa] = num_rules;
|
---|
626 | state_type[lastnfa] = current_state_type;
|
---|
627 |
|
---|
628 | /* Fix up equivalence classes base on this transition. Note that any
|
---|
629 | * character which has its own transition gets its own equivalence
|
---|
630 | * class. Thus only characters which are only in character classes
|
---|
631 | * have a chance at being in the same equivalence class. E.g. "a|b"
|
---|
632 | * puts 'a' and 'b' into two different equivalence classes. "[ab]"
|
---|
633 | * puts them in the same equivalence class (barring other differences
|
---|
634 | * elsewhere in the input).
|
---|
635 | */
|
---|
636 |
|
---|
637 | if (sym < 0) {
|
---|
638 | /* We don't have to update the equivalence classes since
|
---|
639 | * that was already done when the ccl was created for the
|
---|
640 | * first time.
|
---|
641 | */
|
---|
642 | }
|
---|
643 |
|
---|
644 | else if (sym == SYM_EPSILON)
|
---|
645 | ++numeps;
|
---|
646 |
|
---|
647 | else {
|
---|
648 | check_char (sym);
|
---|
649 |
|
---|
650 | if (useecs)
|
---|
651 | /* Map NUL's to csize. */
|
---|
652 | mkechar (sym ? sym : csize, nextecm, ecgroup);
|
---|
653 | }
|
---|
654 |
|
---|
655 | return lastnfa;
|
---|
656 | }
|
---|
657 |
|
---|
658 |
|
---|
659 | /* mkxtion - make a transition from one state to another
|
---|
660 | *
|
---|
661 | * synopsis
|
---|
662 | *
|
---|
663 | * mkxtion( statefrom, stateto );
|
---|
664 | *
|
---|
665 | * statefrom - the state from which the transition is to be made
|
---|
666 | * stateto - the state to which the transition is to be made
|
---|
667 | */
|
---|
668 |
|
---|
669 | void mkxtion (statefrom, stateto)
|
---|
670 | int statefrom, stateto;
|
---|
671 | {
|
---|
672 | if (trans1[statefrom] == NO_TRANSITION)
|
---|
673 | trans1[statefrom] = stateto;
|
---|
674 |
|
---|
675 | else if ((transchar[statefrom] != SYM_EPSILON) ||
|
---|
676 | (trans2[statefrom] != NO_TRANSITION))
|
---|
677 | flexfatal (_("found too many transitions in mkxtion()"));
|
---|
678 |
|
---|
679 | else { /* second out-transition for an epsilon state */
|
---|
680 | ++eps2;
|
---|
681 | trans2[statefrom] = stateto;
|
---|
682 | }
|
---|
683 | }
|
---|
684 |
|
---|
685 | /* new_rule - initialize for a new rule */
|
---|
686 |
|
---|
687 | void new_rule ()
|
---|
688 | {
|
---|
689 | if (++num_rules >= current_max_rules) {
|
---|
690 | ++num_reallocs;
|
---|
691 | current_max_rules += MAX_RULES_INCREMENT;
|
---|
692 | rule_type = reallocate_integer_array (rule_type,
|
---|
693 | current_max_rules);
|
---|
694 | rule_linenum = reallocate_integer_array (rule_linenum,
|
---|
695 | current_max_rules);
|
---|
696 | rule_useful = reallocate_integer_array (rule_useful,
|
---|
697 | current_max_rules);
|
---|
698 | rule_has_nl = reallocate_bool_array (rule_has_nl,
|
---|
699 | current_max_rules);
|
---|
700 | }
|
---|
701 |
|
---|
702 | if (num_rules > MAX_RULE)
|
---|
703 | lerrif (_("too many rules (> %d)!"), MAX_RULE);
|
---|
704 |
|
---|
705 | rule_linenum[num_rules] = linenum;
|
---|
706 | rule_useful[num_rules] = false;
|
---|
707 | rule_has_nl[num_rules] = false;
|
---|
708 | }
|
---|