Context Navigation

app.c@ 1973

Last change on this file since 1973 was 1973, checked in by Silvan Scherrer, 8 years ago
binutils: update trunk to version 2.27
File size: 35.1 KB

Line
1	/* This is the Assembler Pre-Processor
2	Copyright (C) 1987-2016 Free Software Foundation, Inc.
3
4	This file is part of GAS, the GNU Assembler.
5
6	GAS is free software; you can redistribute it and/or modify
7	it under the terms of the GNU General Public License as published by
8	the Free Software Foundation; either version 3, or (at your option)
9	any later version.
10
11	GAS is distributed in the hope that it will be useful, but WITHOUT
12	ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13	or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
14	License for more details.
15
16	You should have received a copy of the GNU General Public License
17	along with GAS; see the file COPYING. If not, write to the Free
18	Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
19	02110-1301, USA. */
20
21	/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90. */
22	/* App, the assembler pre-processor. This pre-processor strips out
23	excess spaces, turns single-quoted characters into a decimal
24	constant, and turns the # in # <number> <filename> <garbage> into a
25	.linefile. This needs better error-handling. */
26
27	#include "as.h"
28
29	#if (__STDC__ != 1)
30	#ifndef const
31	#define const /* empty */
32	#endif
33	#endif
34
35	#ifdef H_TICK_HEX
36	int enable_h_tick_hex = 0;
37	#endif
38
39	#ifdef TC_M68K
40	/* Whether we are scrubbing in m68k MRI mode. This is different from
41	flag_m68k_mri, because the two flags will be affected by the .mri
42	pseudo-op at different times. */
43	static int scrub_m68k_mri;
44
45	/* The pseudo-op which switches in and out of MRI mode. See the
46	comment in do_scrub_chars. */
47	static const char mri_pseudo[] = ".mri 0";
48	#else
49	#define scrub_m68k_mri 0
50	#endif
51
52	#if defined TC_ARM && defined OBJ_ELF
53	/* The pseudo-op for which we need to special-case `@' characters.
54	See the comment in do_scrub_chars. */
55	static const char symver_pseudo[] = ".symver";
56	static const char * symver_state;
57	#endif
58
59	static char lex[256];
60	static const char symbol_chars[] =
61	"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
62
63	#define LEX_IS_SYMBOL_COMPONENT 1
64	#define LEX_IS_WHITESPACE 2
65	#define LEX_IS_LINE_SEPARATOR 3
66	#define LEX_IS_COMMENT_START 4
67	#define LEX_IS_LINE_COMMENT_START 5
68	#define LEX_IS_TWOCHAR_COMMENT_1ST 6
69	#define LEX_IS_STRINGQUOTE 8
70	#define LEX_IS_COLON 9
71	#define LEX_IS_NEWLINE 10
72	#define LEX_IS_ONECHAR_QUOTE 11
73	#ifdef TC_V850
74	#define LEX_IS_DOUBLEDASH_1ST 12
75	#endif
76	#ifdef TC_M32R
77	#define DOUBLEBAR_PARALLEL
78	#endif
79	#ifdef DOUBLEBAR_PARALLEL
80	#define LEX_IS_DOUBLEBAR_1ST 13
81	#endif
82	#define LEX_IS_PARALLEL_SEPARATOR 14
83	#ifdef H_TICK_HEX
84	#define LEX_IS_H 15
85	#endif
86	#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
87	#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
88	#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
89	#define IS_PARALLEL_SEPARATOR(c) (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
90	#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
91	#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
92	#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
93
94	static int process_escape (int);
95
96	/* FIXME-soon: The entire lexer/parser thingy should be
97	built statically at compile time rather than dynamically
98	each and every time the assembler is run. xoxorich. */
99
100	void
101	do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
102	{
103	const char *p;
104	int c;
105
106	lex[' '] = LEX_IS_WHITESPACE;
107	lex['\t'] = LEX_IS_WHITESPACE;
108	lex['\r'] = LEX_IS_WHITESPACE;
109	lex['\n'] = LEX_IS_NEWLINE;
110	lex[':'] = LEX_IS_COLON;
111
112	#ifdef TC_M68K
113	scrub_m68k_mri = m68k_mri;
114
115	if (! m68k_mri)
116	#endif
117	{
118	lex['"'] = LEX_IS_STRINGQUOTE;
119
120	#if ! defined (TC_HPPA) && ! defined (TC_I370)
121	/* I370 uses single-quotes to delimit integer, float constants. */
122	lex['\''] = LEX_IS_ONECHAR_QUOTE;
123	#endif
124
125	#ifdef SINGLE_QUOTE_STRINGS
126	lex['\''] = LEX_IS_STRINGQUOTE;
127	#endif
128	}
129
130	/* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
131	in state 5 of do_scrub_chars must be changed. */
132
133	/* Note that these override the previous defaults, e.g. if ';' is a
134	comment char, then it isn't a line separator. */
135	for (p = symbol_chars; *p; ++p)
136	lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
137
138	for (c = 128; c < 256; ++c)
139	lex[c] = LEX_IS_SYMBOL_COMPONENT;
140
141	#ifdef tc_symbol_chars
142	/* This macro permits the processor to specify all characters which
143	may appears in an operand. This will prevent the scrubber from
144	discarding meaningful whitespace in certain cases. The i386
145	backend uses this to support prefixes, which can confuse the
146	scrubber as to whether it is parsing operands or opcodes. */
147	for (p = tc_symbol_chars; *p; ++p)
148	lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
149	#endif
150
151	/* The m68k backend wants to be able to change comment_chars. */
152	#ifndef tc_comment_chars
153	#define tc_comment_chars comment_chars
154	#endif
155	for (p = tc_comment_chars; *p; p++)
156	lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
157
158	for (p = line_comment_chars; *p; p++)
159	lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
160
161	#ifndef tc_line_separator_chars
162	#define tc_line_separator_chars line_separator_chars
163	#endif
164	for (p = tc_line_separator_chars; *p; p++)
165	lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
166
167	#ifdef tc_parallel_separator_chars
168	/* This macro permits the processor to specify all characters which
169	separate parallel insns on the same line. */
170	for (p = tc_parallel_separator_chars; *p; p++)
171	lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
172	#endif
173
174	/* Only allow slash-star comments if slash is not in use.
175	FIXME: This isn't right. We should always permit them. */
176	if (lex['/'] == 0)
177	lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
178
179	#ifdef TC_M68K
180	if (m68k_mri)
181	{
182	lex['\''] = LEX_IS_STRINGQUOTE;
183	lex[';'] = LEX_IS_COMMENT_START;
184	lex['*'] = LEX_IS_LINE_COMMENT_START;
185	/* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
186	then it can't be used in an expression. */
187	lex['!'] = LEX_IS_LINE_COMMENT_START;
188	}
189	#endif
190
191	#ifdef TC_V850
192	lex['-'] = LEX_IS_DOUBLEDASH_1ST;
193	#endif
194	#ifdef DOUBLEBAR_PARALLEL
195	lex['\|'] = LEX_IS_DOUBLEBAR_1ST;
196	#endif
197	#ifdef TC_D30V
198	/* Must do this is we want VLIW instruction with "->" or "<-". */
199	lex['-'] = LEX_IS_SYMBOL_COMPONENT;
200	#endif
201
202	#ifdef H_TICK_HEX
203	if (enable_h_tick_hex)
204	{
205	lex['h'] = LEX_IS_H;
206	lex['H'] = LEX_IS_H;
207	}
208	#endif
209	}
210
211	/* Saved state of the scrubber. */
212	static int state;
213	static int old_state;
214	static const char *out_string;
215	static char out_buf[20];
216	static int add_newlines;
217	static char *saved_input;
218	static size_t saved_input_len;
219	static char input_buffer[32 * 1024];
220	static const char *mri_state;
221	static char mri_last_ch;
222
223	/* Data structure for saving the state of app across #include's. Note that
224	app is called asynchronously to the parsing of the .include's, so our
225	state at the time .include is interpreted is completely unrelated.
226	That's why we have to save it all. */
227
228	struct app_save
229	{
230	int state;
231	int old_state;
232	const char * out_string;
233	char out_buf[sizeof (out_buf)];
234	int add_newlines;
235	char * saved_input;
236	size_t saved_input_len;
237	#ifdef TC_M68K
238	int scrub_m68k_mri;
239	#endif
240	const char * mri_state;
241	char mri_last_ch;
242	#if defined TC_ARM && defined OBJ_ELF
243	const char * symver_state;
244	#endif
245	};
246
247	char *
248	app_push (void)
249	{
250	struct app_save *saved;
251
252	saved = XNEW (struct app_save);
253	saved->state = state;
254	saved->old_state = old_state;
255	saved->out_string = out_string;
256	memcpy (saved->out_buf, out_buf, sizeof (out_buf));
257	saved->add_newlines = add_newlines;
258	if (saved_input == NULL)
259	saved->saved_input = NULL;
260	else
261	{
262	saved->saved_input = XNEWVEC (char, saved_input_len);
263	memcpy (saved->saved_input, saved_input, saved_input_len);
264	saved->saved_input_len = saved_input_len;
265	}
266	#ifdef TC_M68K
267	saved->scrub_m68k_mri = scrub_m68k_mri;
268	#endif
269	saved->mri_state = mri_state;
270	saved->mri_last_ch = mri_last_ch;
271	#if defined TC_ARM && defined OBJ_ELF
272	saved->symver_state = symver_state;
273	#endif
274
275	/* do_scrub_begin() is not useful, just wastes time. */
276
277	state = 0;
278	saved_input = NULL;
279	add_newlines = 0;
280
281	return (char *) saved;
282	}
283
284	void
285	app_pop (char *arg)
286	{
287	struct app_save saved = (struct app_save ) arg;
288
289	/* There is no do_scrub_end (). */
290	state = saved->state;
291	old_state = saved->old_state;
292	out_string = saved->out_string;
293	memcpy (out_buf, saved->out_buf, sizeof (out_buf));
294	add_newlines = saved->add_newlines;
295	if (saved->saved_input == NULL)
296	saved_input = NULL;
297	else
298	{
299	gas_assert (saved->saved_input_len <= sizeof (input_buffer));
300	memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
301	saved_input = input_buffer;
302	saved_input_len = saved->saved_input_len;
303	free (saved->saved_input);
304	}
305	#ifdef TC_M68K
306	scrub_m68k_mri = saved->scrub_m68k_mri;
307	#endif
308	mri_state = saved->mri_state;
309	mri_last_ch = saved->mri_last_ch;
310	#if defined TC_ARM && defined OBJ_ELF
311	symver_state = saved->symver_state;
312	#endif
313
314	free (arg);
315	}
316
317	/* @@ This assumes that \n &c are the same on host and target. This is not
318	necessarily true. */
319
320	static int
321	process_escape (int ch)
322	{
323	switch (ch)
324	{
325	case 'b':
326	return '\b';
327	case 'f':
328	return '\f';
329	case 'n':
330	return '\n';
331	case 'r':
332	return '\r';
333	case 't':
334	return '\t';
335	case '\'':
336	return '\'';
337	case '"':
338	return '\"';
339	default:
340	return ch;
341	}
342	}
343
344	/* This function is called to process input characters. The GET
345	parameter is used to retrieve more input characters. GET should
346	set its parameter to point to a buffer, and return the length of
347	the buffer; it should return 0 at end of file. The scrubbed output
348	characters are put into the buffer starting at TOSTART; the TOSTART
349	buffer is TOLEN bytes in length. The function returns the number
350	of scrubbed characters put into TOSTART. This will be TOLEN unless
351	end of file was seen. This function is arranged as a state
352	machine, and saves its state so that it may return at any point.
353	This is the way the old code used to work. */
354
355	size_t
356	do_scrub_chars (size_t (get) (char , size_t), char *tostart, size_t tolen)
357	{
358	char *to = tostart;
359	char *toend = tostart + tolen;
360	char *from;
361	char *fromend;
362	size_t fromlen;
363	int ch, ch2 = 0;
364	/* Character that started the string we're working on. */
365	static char quotechar;
366
367	/*State 0: beginning of normal line
368	1: After first whitespace on line (flush more white)
369	2: After first non-white (opcode) on line (keep 1white)
370	3: after second white on line (into operands) (flush white)
371	4: after putting out a .linefile, put out digits
372	5: parsing a string, then go to old-state
373	6: putting out \ escape in a "d string.
374	7: no longer used
375	8: no longer used
376	9: After seeing symbol char in state 3 (keep 1white after symchar)
377	10: After seeing whitespace in state 9 (keep white before symchar)
378	11: After seeing a symbol character in state 0 (eg a label definition)
379	-1: output string in out_string and go to the state in old_state
380	-2: flush text until a '*' '/' is seen, then go to state old_state
381	#ifdef TC_V850
382	12: After seeing a dash, looking for a second dash as a start
383	of comment.
384	#endif
385	#ifdef DOUBLEBAR_PARALLEL
386	13: After seeing a vertical bar, looking for a second
387	vertical bar as a parallel expression separator.
388	#endif
389	#ifdef TC_PREDICATE_START_CHAR
390	14: After seeing a predicate start character at state 0, looking
391	for a predicate end character as predicate.
392	15: After seeing a predicate start character at state 1, looking
393	for a predicate end character as predicate.
394	#endif
395	#ifdef TC_Z80
396	16: After seeing an 'a' or an 'A' at the start of a symbol
397	17: After seeing an 'f' or an 'F' in state 16
398	#endif
399	*/
400
401	/* I added states 9 and 10 because the MIPS ECOFF assembler uses
402	constructs like ``.loc 1 20''. This was turning into ``.loc
403	120''. States 9 and 10 ensure that a space is never dropped in
404	between characters which could appear in an identifier. Ian
405	Taylor, ian@cygnus.com.
406
407	I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
408	correctly on the PA (and any other target where colons are optional).
409	Jeff Law, law@cs.utah.edu.
410
411	I added state 13 so that something like "cmp r1, r2 \|\| trap #1" does not
412	get squashed into "cmp r1,r2\|\|trap#1", with the all important space
413	between the 'trap' and the '#1' being eliminated. nickc@cygnus.com */
414
415	/* This macro gets the next input character. */
416
417	#define GET() \
418	(from < fromend \
419	? * (unsigned char *) (from++) \
420	: (saved_input = NULL, \
421	fromlen = (*get) (input_buffer, sizeof input_buffer), \
422	from = input_buffer, \
423	fromend = from + fromlen, \
424	(fromlen == 0 \
425	? EOF \
426	: * (unsigned char *) (from++))))
427
428	/* This macro pushes a character back on the input stream. */
429
430	#define UNGET(uch) (*--from = (uch))
431
432	/* This macro puts a character into the output buffer. If this
433	character fills the output buffer, this macro jumps to the label
434	TOFULL. We use this rather ugly approach because we need to
435	handle two different termination conditions: EOF on the input
436	stream, and a full output buffer. It would be simpler if we
437	always read in the entire input stream before processing it, but
438	I don't want to make such a significant change to the assembler's
439	memory usage. */
440
441	#define PUT(pch) \
442	do \
443	{ \
444	*to++ = (pch); \
445	if (to >= toend) \
446	goto tofull; \
447	} \
448	while (0)
449
450	if (saved_input != NULL)
451	{
452	from = saved_input;
453	fromend = from + saved_input_len;
454	}
455	else
456	{
457	fromlen = (*get) (input_buffer, sizeof input_buffer);
458	if (fromlen == 0)
459	return 0;
460	from = input_buffer;
461	fromend = from + fromlen;
462	}
463
464	while (1)
465	{
466	/* The cases in this switch end with continue, in order to
467	branch back to the top of this while loop and generate the
468	next output character in the appropriate state. */
469	switch (state)
470	{
471	case -1:
472	ch = *out_string++;
473	if (*out_string == '\0')
474	{
475	state = old_state;
476	old_state = 3;
477	}
478	PUT (ch);
479	continue;
480
481	case -2:
482	for (;;)
483	{
484	do
485	{
486	ch = GET ();
487
488	if (ch == EOF)
489	{
490	as_warn (_("end of file in comment"));
491	goto fromeof;
492	}
493
494	if (ch == '\n')
495	PUT ('\n');
496	}
497	while (ch != '*');
498
499	while ((ch = GET ()) == '*')
500	;
501
502	if (ch == EOF)
503	{
504	as_warn (_("end of file in comment"));
505	goto fromeof;
506	}
507
508	if (ch == '/')
509	break;
510
511	UNGET (ch);
512	}
513
514	state = old_state;
515	UNGET (' ');
516	continue;
517
518	case 4:
519	ch = GET ();
520	if (ch == EOF)
521	goto fromeof;
522	else if (ch >= '0' && ch <= '9')
523	PUT (ch);
524	else
525	{
526	while (ch != EOF && IS_WHITESPACE (ch))
527	ch = GET ();
528	if (ch == '"')
529	{
530	quotechar = ch;
531	state = 5;
532	old_state = 3;
533	PUT (ch);
534	}
535	else
536	{
537	while (ch != EOF && ch != '\n')
538	ch = GET ();
539	state = 0;
540	PUT (ch);
541	}
542	}
543	continue;
544
545	case 5:
546	/* We are going to copy everything up to a quote character,
547	with special handling for a backslash. We try to
548	optimize the copying in the simple case without using the
549	GET and PUT macros. */
550	{
551	char *s;
552	ptrdiff_t len;
553
554	for (s = from; s < fromend; s++)
555	{
556	ch = *s;
557	if (ch == '\\'
558	\|\| ch == quotechar
559	\|\| ch == '\n')
560	break;
561	}
562	len = s - from;
563	if (len > toend - to)
564	len = toend - to;
565	if (len > 0)
566	{
567	memcpy (to, from, len);
568	to += len;
569	from += len;
570	if (to >= toend)
571	goto tofull;
572	}
573	}
574
575	ch = GET ();
576	if (ch == EOF)
577	{
578	/* This buffer is here specifically so
579	that the UNGET below will work. */
580	static char one_char_buf[1];
581
582	as_warn (_("end of file in string; '%c' inserted"), quotechar);
583	state = old_state;
584	from = fromend = one_char_buf + 1;
585	fromlen = 1;
586	UNGET ('\n');
587	PUT (quotechar);
588	}
589	else if (ch == quotechar)
590	{
591	state = old_state;
592	PUT (ch);
593	}
594	#ifndef NO_STRING_ESCAPES
595	else if (ch == '\\')
596	{
597	state = 6;
598	PUT (ch);
599	}
600	#endif
601	else if (scrub_m68k_mri && ch == '\n')
602	{
603	/* Just quietly terminate the string. This permits lines like
604	bne label loop if we haven't reach end yet. */
605	state = old_state;
606	UNGET (ch);
607	PUT ('\'');
608	}
609	else
610	{
611	PUT (ch);
612	}
613	continue;
614
615	case 6:
616	state = 5;
617	ch = GET ();
618	switch (ch)
619	{
620	/* Handle strings broken across lines, by turning '\n' into
621	'\\' and 'n'. */
622	case '\n':
623	UNGET ('n');
624	add_newlines++;
625	PUT ('\\');
626	continue;
627
628	case EOF:
629	as_warn (_("end of file in string; '%c' inserted"), quotechar);
630	PUT (quotechar);
631	continue;
632
633	case '"':
634	case '\\':
635	case 'b':
636	case 'f':
637	case 'n':
638	case 'r':
639	case 't':
640	case 'v':
641	case 'x':
642	case 'X':
643	case '0':
644	case '1':
645	case '2':
646	case '3':
647	case '4':
648	case '5':
649	case '6':
650	case '7':
651	break;
652
653	default:
654	#ifdef ONLY_STANDARD_ESCAPES
655	as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
656	#endif
657	break;
658	}
659	PUT (ch);
660	continue;
661
662	#ifdef DOUBLEBAR_PARALLEL
663	case 13:
664	ch = GET ();
665	if (ch != '\|')
666	abort ();
667
668	/* Reset back to state 1 and pretend that we are parsing a
669	line from just after the first white space. */
670	state = 1;
671	PUT ('\|');
672	#ifdef TC_TIC6X
673	/* "\|\|^" is used for SPMASKed instructions. */
674	ch = GET ();
675	if (ch == EOF)
676	goto fromeof;
677	else if (ch == '^')
678	PUT ('^');
679	else
680	UNGET (ch);
681	#endif
682	continue;
683	#endif
684	#ifdef TC_Z80
685	case 16:
686	/* We have seen an 'a' at the start of a symbol, look for an 'f'. */
687	ch = GET ();
688	if (ch == 'f' \|\| ch == 'F')
689	{
690	state = 17;
691	PUT (ch);
692	}
693	else
694	{
695	state = 9;
696	break;
697	}
698	case 17:
699	/* We have seen "af" at the start of a symbol,
700	a ' here is a part of that symbol. */
701	ch = GET ();
702	state = 9;
703	if (ch == '\'')
704	/* Change to avoid warning about unclosed string. */
705	PUT ('`');
706	else if (ch != EOF)
707	UNGET (ch);
708	break;
709	#endif
710	}
711
712	/* OK, we are somewhere in states 0 through 4 or 9 through 11. */
713
714	/* flushchar: */
715	ch = GET ();
716
717	#ifdef TC_PREDICATE_START_CHAR
718	if (ch == TC_PREDICATE_START_CHAR && (state == 0 \|\| state == 1))
719	{
720	state += 14;
721	PUT (ch);
722	continue;
723	}
724	else if (state == 14 \|\| state == 15)
725	{
726	if (ch == TC_PREDICATE_END_CHAR)
727	{
728	state -= 14;
729	PUT (ch);
730	ch = GET ();
731	}
732	else
733	{
734	PUT (ch);
735	continue;
736	}
737	}
738	#endif
739
740	recycle:
741
742	#if defined TC_ARM && defined OBJ_ELF
743	/* We need to watch out for .symver directives. See the comment later
744	in this function. */
745	if (symver_state == NULL)
746	{
747	if ((state == 0 \|\| state == 1) && ch == symver_pseudo[0])
748	symver_state = symver_pseudo + 1;
749	}
750	else
751	{
752	/* We advance to the next state if we find the right
753	character. */
754	if (ch != '\0' && (*symver_state == ch))
755	++symver_state;
756	else if (*symver_state != '\0')
757	/* We did not get the expected character, or we didn't
758	get a valid terminating character after seeing the
759	entire pseudo-op, so we must go back to the beginning. */
760	symver_state = NULL;
761	else
762	{
763	/* We've read the entire pseudo-op. If this is the end
764	of the line, go back to the beginning. */
765	if (IS_NEWLINE (ch))
766	symver_state = NULL;
767	}
768	}
769	#endif /* TC_ARM && OBJ_ELF */
770
771	#ifdef TC_M68K
772	/* We want to have pseudo-ops which control whether we are in
773	MRI mode or not. Unfortunately, since m68k MRI mode affects
774	the scrubber, that means that we need a special purpose
775	recognizer here. */
776	if (mri_state == NULL)
777	{
778	if ((state == 0 \|\| state == 1)
779	&& ch == mri_pseudo[0])
780	mri_state = mri_pseudo + 1;
781	}
782	else
783	{
784	/* We advance to the next state if we find the right
785	character, or if we need a space character and we get any
786	whitespace character, or if we need a '0' and we get a
787	'1' (this is so that we only need one state to handle
788	``.mri 0'' and ``.mri 1''). */
789	if (ch != '\0'
790	&& (*mri_state == ch
791	\|\| (*mri_state == ' '
792	&& lex[ch] == LEX_IS_WHITESPACE)
793	\|\| (*mri_state == '0'
794	&& ch == '1')))
795	{
796	mri_last_ch = ch;
797	++mri_state;
798	}
799	else if (*mri_state != '\0'
800	\|\| (lex[ch] != LEX_IS_WHITESPACE
801	&& lex[ch] != LEX_IS_NEWLINE))
802	{
803	/* We did not get the expected character, or we didn't
804	get a valid terminating character after seeing the
805	entire pseudo-op, so we must go back to the
806	beginning. */
807	mri_state = NULL;
808	}
809	else
810	{
811	/* We've read the entire pseudo-op. mips_last_ch is
812	either '0' or '1' indicating whether to enter or
813	leave MRI mode. */
814	do_scrub_begin (mri_last_ch == '1');
815	mri_state = NULL;
816
817	/* We continue handling the character as usual. The
818	main gas reader must also handle the .mri pseudo-op
819	to control expression parsing and the like. */
820	}
821	}
822	#endif
823
824	if (ch == EOF)
825	{
826	if (state != 0)
827	{
828	as_warn (_("end of file not at end of a line; newline inserted"));
829	state = 0;
830	PUT ('\n');
831	}
832	goto fromeof;
833	}
834
835	switch (lex[ch])
836	{
837	case LEX_IS_WHITESPACE:
838	do
839	{
840	ch = GET ();
841	}
842	while (ch != EOF && IS_WHITESPACE (ch));
843	if (ch == EOF)
844	goto fromeof;
845
846	if (state == 0)
847	{
848	/* Preserve a single whitespace character at the
849	beginning of a line. */
850	state = 1;
851	UNGET (ch);
852	PUT (' ');
853	break;
854	}
855
856	#ifdef KEEP_WHITE_AROUND_COLON
857	if (lex[ch] == LEX_IS_COLON)
858	{
859	/* Only keep this white if there's no white after the
860	colon. */
861	ch2 = GET ();
862	if (ch2 != EOF)
863	UNGET (ch2);
864	if (!IS_WHITESPACE (ch2))
865	{
866	state = 9;
867	UNGET (ch);
868	PUT (' ');
869	break;
870	}
871	}
872	#endif
873	if (IS_COMMENT (ch)
874	\|\| ch == '/'
875	\|\| IS_LINE_SEPARATOR (ch)
876	\|\| IS_PARALLEL_SEPARATOR (ch))
877	{
878	if (scrub_m68k_mri)
879	{
880	/* In MRI mode, we keep these spaces. */
881	UNGET (ch);
882	PUT (' ');
883	break;
884	}
885	goto recycle;
886	}
887
888	/* If we're in state 2 or 11, we've seen a non-white
889	character followed by whitespace. If the next character
890	is ':', this is whitespace after a label name which we
891	normally must ignore. In MRI mode, though, spaces are
892	not permitted between the label and the colon. */
893	if ((state == 2 \|\| state == 11)
894	&& lex[ch] == LEX_IS_COLON
895	&& ! scrub_m68k_mri)
896	{
897	state = 1;
898	PUT (ch);
899	break;
900	}
901
902	switch (state)
903	{
904	case 1:
905	/* We can arrive here if we leave a leading whitespace
906	character at the beginning of a line. */
907	goto recycle;
908	case 2:
909	state = 3;
910	if (to + 1 < toend)
911	{
912	/* Optimize common case by skipping UNGET/GET. */
913	PUT (' '); /* Sp after opco */
914	goto recycle;
915	}
916	UNGET (ch);
917	PUT (' ');
918	break;
919	case 3:
920	#ifndef TC_KEEP_OPERAND_SPACES
921	/* For TI C6X, we keep these spaces as they may separate
922	functional unit specifiers from operands. */
923	if (scrub_m68k_mri)
924	#endif
925	{
926	/* In MRI mode, we keep these spaces. */
927	UNGET (ch);
928	PUT (' ');
929	break;
930	}
931	goto recycle; /* Sp in operands */
932	case 9:
933	case 10:
934	#ifndef TC_KEEP_OPERAND_SPACES
935	if (scrub_m68k_mri)
936	#endif
937	{
938	/* In MRI mode, we keep these spaces. */
939	state = 3;
940	UNGET (ch);
941	PUT (' ');
942	break;
943	}
944	state = 10; /* Sp after symbol char */
945	goto recycle;
946	case 11:
947	if (LABELS_WITHOUT_COLONS \|\| flag_m68k_mri)
948	state = 1;
949	else
950	{
951	/* We know that ch is not ':', since we tested that
952	case above. Therefore this is not a label, so it
953	must be the opcode, and we've just seen the
954	whitespace after it. */
955	state = 3;
956	}
957	UNGET (ch);
958	PUT (' '); /* Sp after label definition. */
959	break;
960	default:
961	BAD_CASE (state);
962	}
963	break;
964
965	case LEX_IS_TWOCHAR_COMMENT_1ST:
966	ch2 = GET ();
967	if (ch2 == '*')
968	{
969	for (;;)
970	{
971	do
972	{
973	ch2 = GET ();
974	if (ch2 != EOF && IS_NEWLINE (ch2))
975	add_newlines++;
976	}
977	while (ch2 != EOF && ch2 != '*');
978
979	while (ch2 == '*')
980	ch2 = GET ();
981
982	if (ch2 == EOF \|\| ch2 == '/')
983	break;
984
985	/* This UNGET will ensure that we count newlines
986	correctly. */
987	UNGET (ch2);
988	}
989
990	if (ch2 == EOF)
991	as_warn (_("end of file in multiline comment"));
992
993	ch = ' ';
994	goto recycle;
995	}
996	#ifdef DOUBLESLASH_LINE_COMMENTS
997	else if (ch2 == '/')
998	{
999	do
1000	{
1001	ch = GET ();
1002	}
1003	while (ch != EOF && !IS_NEWLINE (ch));
1004	if (ch == EOF)
1005	as_warn ("end of file in comment; newline inserted");
1006	state = 0;
1007	PUT ('\n');
1008	break;
1009	}
1010	#endif
1011	else
1012	{
1013	if (ch2 != EOF)
1014	UNGET (ch2);
1015	if (state == 9 \|\| state == 10)
1016	state = 3;
1017	PUT (ch);
1018	}
1019	break;
1020
1021	case LEX_IS_STRINGQUOTE:
1022	quotechar = ch;
1023	if (state == 10)
1024	{
1025	/* Preserve the whitespace in foo "bar". */
1026	UNGET (ch);
1027	state = 3;
1028	PUT (' ');
1029
1030	/* PUT didn't jump out. We could just break, but we
1031	know what will happen, so optimize a bit. */
1032	ch = GET ();
1033	old_state = 3;
1034	}
1035	else if (state == 9)
1036	old_state = 3;
1037	else
1038	old_state = state;
1039	state = 5;
1040	PUT (ch);
1041	break;
1042
1043	#ifndef IEEE_STYLE
1044	case LEX_IS_ONECHAR_QUOTE:
1045	#ifdef H_TICK_HEX
1046	if (state == 9 && enable_h_tick_hex)
1047	{
1048	char c;
1049
1050	c = GET ();
1051	as_warn ("'%c found after symbol", c);
1052	UNGET (c);
1053	}
1054	#endif
1055	if (state == 10)
1056	{
1057	/* Preserve the whitespace in foo 'b'. */
1058	UNGET (ch);
1059	state = 3;
1060	PUT (' ');
1061	break;
1062	}
1063	ch = GET ();
1064	if (ch == EOF)
1065	{
1066	as_warn (_("end of file after a one-character quote; \\0 inserted"));
1067	ch = 0;
1068	}
1069	if (ch == '\\')
1070	{
1071	ch = GET ();
1072	if (ch == EOF)
1073	{
1074	as_warn (_("end of file in escape character"));
1075	ch = '\\';
1076	}
1077	else
1078	ch = process_escape (ch);
1079	}
1080	sprintf (out_buf, "%d", (int) (unsigned char) ch);
1081
1082	/* None of these 'x constants for us. We want 'x'. */
1083	if ((ch = GET ()) != '\'')
1084	{
1085	#ifdef REQUIRE_CHAR_CLOSE_QUOTE
1086	as_warn (_("missing close quote; (assumed)"));
1087	#else
1088	if (ch != EOF)
1089	UNGET (ch);
1090	#endif
1091	}
1092	if (strlen (out_buf) == 1)
1093	{
1094	PUT (out_buf[0]);
1095	break;
1096	}
1097	if (state == 9)
1098	old_state = 3;
1099	else
1100	old_state = state;
1101	state = -1;
1102	out_string = out_buf;
1103	PUT (*out_string++);
1104	break;
1105	#endif
1106
1107	case LEX_IS_COLON:
1108	#ifdef KEEP_WHITE_AROUND_COLON
1109	state = 9;
1110	#else
1111	if (state == 9 \|\| state == 10)
1112	state = 3;
1113	else if (state != 3)
1114	state = 1;
1115	#endif
1116	PUT (ch);
1117	break;
1118
1119	case LEX_IS_NEWLINE:
1120	/* Roll out a bunch of newlines from inside comments, etc. */
1121	if (add_newlines)
1122	{
1123	--add_newlines;
1124	UNGET (ch);
1125	}
1126	/* Fall through. */
1127
1128	case LEX_IS_LINE_SEPARATOR:
1129	state = 0;
1130	PUT (ch);
1131	break;
1132
1133	case LEX_IS_PARALLEL_SEPARATOR:
1134	state = 1;
1135	PUT (ch);
1136	break;
1137
1138	#ifdef TC_V850
1139	case LEX_IS_DOUBLEDASH_1ST:
1140	ch2 = GET ();
1141	if (ch2 != '-')
1142	{
1143	if (ch2 != EOF)
1144	UNGET (ch2);
1145	goto de_fault;
1146	}
1147	/* Read and skip to end of line. */
1148	do
1149	{
1150	ch = GET ();
1151	}
1152	while (ch != EOF && ch != '\n');
1153
1154	if (ch == EOF)
1155	as_warn (_("end of file in comment; newline inserted"));
1156
1157	state = 0;
1158	PUT ('\n');
1159	break;
1160	#endif
1161	#ifdef DOUBLEBAR_PARALLEL
1162	case LEX_IS_DOUBLEBAR_1ST:
1163	ch2 = GET ();
1164	if (ch2 != EOF)
1165	UNGET (ch2);
1166	if (ch2 != '\|')
1167	goto de_fault;
1168
1169	/* Handle '\|\|' in two states as invoking PUT twice might
1170	result in the first one jumping out of this loop. We'd
1171	then lose track of the state and one '\|' char. */
1172	state = 13;
1173	PUT ('\|');
1174	break;
1175	#endif
1176	case LEX_IS_LINE_COMMENT_START:
1177	/* FIXME-someday: The two character comment stuff was badly
1178	thought out. On i386, we want '/' as line comment start
1179	AND we want C style comments. hence this hack. The
1180	whole lexical process should be reworked. xoxorich. */
1181	if (ch == '/')
1182	{
1183	ch2 = GET ();
1184	if (ch2 == '*')
1185	{
1186	old_state = 3;
1187	state = -2;
1188	break;
1189	}
1190	else
1191	{
1192	UNGET (ch2);
1193	}
1194	}
1195
1196	if (state == 0 \|\| state == 1) /* Only comment at start of line. */
1197	{
1198	int startch;
1199
1200	startch = ch;
1201
1202	do
1203	{
1204	ch = GET ();
1205	}
1206	while (ch != EOF && IS_WHITESPACE (ch));
1207
1208	if (ch == EOF)
1209	{
1210	as_warn (_("end of file in comment; newline inserted"));
1211	PUT ('\n');
1212	break;
1213	}
1214
1215	if (ch < '0' \|\| ch > '9' \|\| state != 0 \|\| startch != '#')
1216	{
1217	/* Not a cpp line. */
1218	while (ch != EOF && !IS_NEWLINE (ch))
1219	ch = GET ();
1220	if (ch == EOF)
1221	{
1222	as_warn (_("end of file in comment; newline inserted"));
1223	PUT ('\n');
1224	}
1225	else /* IS_NEWLINE (ch) */
1226	{
1227	/* To process non-zero add_newlines. */
1228	UNGET (ch);
1229	}
1230	state = 0;
1231	break;
1232	}
1233	/* Looks like `# 123 "filename"' from cpp. */
1234	UNGET (ch);
1235	old_state = 4;
1236	state = -1;
1237	if (scrub_m68k_mri)
1238	out_string = "\tlinefile ";
1239	else
1240	out_string = "\t.linefile ";
1241	PUT (*out_string++);
1242	break;
1243	}
1244
1245	#ifdef TC_D10V
1246	/* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1247	Trap is the only short insn that has a first operand that is
1248	neither register nor label.
1249	We must prevent exef0f \|\|trap #1 to degenerate to exef0f \|\|trap#1 .
1250	We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1251	already LEX_IS_LINE_COMMENT_START. However, it is the
1252	only character in line_comment_chars for d10v, hence we
1253	can recognize it as such. */
1254	/* An alternative approach would be to reset the state to 1 when
1255	we see '\|\|', '<'- or '->', but that seems to be overkill. */
1256	if (state == 10)
1257	PUT (' ');
1258	#endif
1259	/* We have a line comment character which is not at the
1260	start of a line. If this is also a normal comment
1261	character, fall through. Otherwise treat it as a default
1262	character. */
1263	if (strchr (tc_comment_chars, ch) == NULL
1264	&& (! scrub_m68k_mri
1265	\|\| (ch != '!' && ch != '*')))
1266	goto de_fault;
1267	if (scrub_m68k_mri
1268	&& (ch == '!' \|\| ch == '*' \|\| ch == '#')
1269	&& state != 1
1270	&& state != 10)
1271	goto de_fault;
1272	/* Fall through. */
1273	case LEX_IS_COMMENT_START:
1274	#if defined TC_ARM && defined OBJ_ELF
1275	/* On the ARM, `@' is the comment character.
1276	Unfortunately this is also a special character in ELF .symver
1277	directives (and .type, though we deal with those another way).
1278	So we check if this line is such a directive, and treat
1279	the character as default if so. This is a hack. */
1280	if ((symver_state != NULL) && (*symver_state == 0))
1281	goto de_fault;
1282	#endif
1283
1284	#ifdef TC_ARM
1285	/* For the ARM, care is needed not to damage occurrences of \@
1286	by stripping the @ onwards. Yuck. */
1287	if (to > tostart && *(to - 1) == '\\')
1288	/* Do not treat the @ as a start-of-comment. */
1289	goto de_fault;
1290	#endif
1291
1292	#ifdef WARN_COMMENTS
1293	if (!found_comment)
1294	found_comment_file = as_where (&found_comment);
1295	#endif
1296	do
1297	{
1298	ch = GET ();
1299	}
1300	while (ch != EOF && !IS_NEWLINE (ch));
1301	if (ch == EOF)
1302	as_warn (_("end of file in comment; newline inserted"));
1303	state = 0;
1304	PUT ('\n');
1305	break;
1306
1307	#ifdef H_TICK_HEX
1308	case LEX_IS_H:
1309	/* Look for strings like H'[0-9A-Fa-f] and if found, replace
1310	the H' with 0x to make them gas-style hex characters. */
1311	if (enable_h_tick_hex)
1312	{
1313	char quot;
1314
1315	quot = GET ();
1316	if (quot == '\'')
1317	{
1318	UNGET ('x');
1319	ch = '0';
1320	}
1321	else
1322	UNGET (quot);
1323	}
1324	/* FALL THROUGH */
1325	#endif
1326
1327	case LEX_IS_SYMBOL_COMPONENT:
1328	if (state == 10)
1329	{
1330	/* This is a symbol character following another symbol
1331	character, with whitespace in between. We skipped
1332	the whitespace earlier, so output it now. */
1333	UNGET (ch);
1334	state = 3;
1335	PUT (' ');
1336	break;
1337	}
1338
1339	#ifdef TC_Z80
1340	/* "af'" is a symbol containing '\''. */
1341	if (state == 3 && (ch == 'a' \|\| ch == 'A'))
1342	{
1343	state = 16;
1344	PUT (ch);
1345	ch = GET ();
1346	if (ch == 'f' \|\| ch == 'F')
1347	{
1348	state = 17;
1349	PUT (ch);
1350	break;
1351	}
1352	else
1353	{
1354	state = 9;
1355	if (ch == EOF \|\| !IS_SYMBOL_COMPONENT (ch))
1356	{
1357	if (ch != EOF)
1358	UNGET (ch);
1359	break;
1360	}
1361	}
1362	}
1363	#endif
1364	if (state == 3)
1365	state = 9;
1366
1367	/* This is a common case. Quickly copy CH and all the
1368	following symbol component or normal characters. */
1369	if (to + 1 < toend
1370	&& mri_state == NULL
1371	#if defined TC_ARM && defined OBJ_ELF
1372	&& symver_state == NULL
1373	#endif
1374	)
1375	{
1376	char *s;
1377	ptrdiff_t len;
1378
1379	for (s = from; s < fromend; s++)
1380	{
1381	int type;
1382
1383	ch2 = (unsigned char ) s;
1384	type = lex[ch2];
1385	if (type != 0
1386	&& type != LEX_IS_SYMBOL_COMPONENT)
1387	break;
1388	}
1389
1390	if (s > from)
1391	/* Handle the last character normally, for
1392	simplicity. */
1393	--s;
1394
1395	len = s - from;
1396
1397	if (len > (toend - to) - 1)
1398	len = (toend - to) - 1;
1399
1400	if (len > 0)
1401	{
1402	PUT (ch);
1403	memcpy (to, from, len);
1404	to += len;
1405	from += len;
1406	if (to >= toend)
1407	goto tofull;
1408	ch = GET ();
1409	}
1410	}
1411
1412	/* Fall through. */
1413	default:
1414	de_fault:
1415	/* Some relatively `normal' character. */
1416	if (state == 0)
1417	{
1418	state = 11; /* Now seeing label definition. */
1419	}
1420	else if (state == 1)
1421	{
1422	state = 2; /* Ditto. */
1423	}
1424	else if (state == 9)
1425	{
1426	if (!IS_SYMBOL_COMPONENT (ch))
1427	state = 3;
1428	}
1429	else if (state == 10)
1430	{
1431	if (ch == '\\')
1432	{
1433	/* Special handling for backslash: a backslash may
1434	be the beginning of a formal parameter (of a
1435	macro) following another symbol character, with
1436	whitespace in between. If that is the case, we
1437	output a space before the parameter. Strictly
1438	speaking, correct handling depends upon what the
1439	macro parameter expands into; if the parameter
1440	expands into something which does not start with
1441	an operand character, then we don't want to keep
1442	the space. We don't have enough information to
1443	make the right choice, so here we are making the
1444	choice which is more likely to be correct. */
1445	if (to + 1 >= toend)
1446	{
1447	/* If we're near the end of the buffer, save the
1448	character for the next time round. Otherwise
1449	we'll lose our state. */
1450	UNGET (ch);
1451	goto tofull;
1452	}
1453	*to++ = ' ';
1454	}
1455
1456	state = 3;
1457	}
1458	PUT (ch);
1459	break;
1460	}
1461	}
1462
1463	/NOTREACHED/
1464
1465	fromeof:
1466	/* We have reached the end of the input. */
1467	return to - tostart;
1468
1469	tofull:
1470	/* The output buffer is full. Save any input we have not yet
1471	processed. */
1472	if (fromend > from)
1473	{
1474	saved_input = from;
1475	saved_input_len = fromend - from;
1476	}
1477	else
1478	saved_input = NULL;
1479
1480	return to - tostart;
1481	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: binutils/trunk/gas/app.c@ 1973

Download in other formats: