1 | /* -*- Mode: C -*- |
---|
2 | * |
---|
3 | * CForall Lexer Version 1.0, Copyright (C) Peter A. Buhr 2001 -- Permission is granted to copy this |
---|
4 | * grammar and to use it within software systems. THIS GRAMMAR IS PROVIDED "AS IS" AND WITHOUT |
---|
5 | * ANY EXPRESS OR IMPLIED WARRANTIES. |
---|
6 | * |
---|
7 | * lex.l -- |
---|
8 | * |
---|
9 | * Author : Peter A. Buhr |
---|
10 | * Created On : Sat Sep 22 08:58:10 2001 |
---|
11 | * Last Modified By : Peter A. Buhr |
---|
12 | * Last Modified On : Sat Nov 1 18:09:47 2003 |
---|
13 | * Update Count : 197 |
---|
14 | */ |
---|
15 | |
---|
16 | %option yylineno |
---|
17 | |
---|
18 | %{ |
---|
19 | /* This lexer assumes the program has been preprocessed by cpp. Hence, all user level preprocessor |
---|
20 | directive have been performed and removed from the source. The only exceptions are preprocessor |
---|
21 | directives passed to the compiler (e.g., line-number directives) and C/C++ style comments, which |
---|
22 | are ignored. */ |
---|
23 | |
---|
24 | /*************** Includes and Defines *****************************/ |
---|
25 | |
---|
26 | #include <string> |
---|
27 | |
---|
28 | #include "lex.h" |
---|
29 | #include "ParseNode.h" |
---|
30 | #include "cfa.tab.h" /* YACC generated definitions based on C++ grammar */ |
---|
31 | |
---|
32 | char *yyfilename; |
---|
33 | |
---|
34 | #define WHITE_RETURN(x) /* do nothing */ |
---|
35 | #define NEWLINE_RETURN() WHITE_RETURN('\n') |
---|
36 | #define RETURN_VAL(x) yylval.tok.str = new std::string(yytext); \ |
---|
37 | yylval.tok.loc.file = yyfilename; \ |
---|
38 | yylval.tok.loc.line = yylineno; \ |
---|
39 | return(x) |
---|
40 | |
---|
41 | #define KEYWORD_RETURN(x) RETURN_VAL(x) /* keyword */ |
---|
42 | #define IDENTIFIER_RETURN() RETURN_VAL((typedefTable.isIdentifier(yytext) ? IDENTIFIER : typedefTable.isTypedef(yytext) ? TYPEDEFname : TYPEGENname)) |
---|
43 | #define ATTRIBUTE_RETURN() RETURN_VAL((typedefTable.isIdentifier(yytext) ? ATTR_IDENTIFIER : typedefTable.isTypedef(yytext) ? ATTR_TYPEDEFname : ATTR_TYPEGENname)) |
---|
44 | |
---|
45 | #define ASCIIOP_RETURN() RETURN_VAL((int)yytext[0]) /* single character operator */ |
---|
46 | #define NAMEDOP_RETURN(x) RETURN_VAL(x) /* multichar operator, with a name */ |
---|
47 | |
---|
48 | #define NUMERIC_RETURN(x) rm_underscore(); RETURN_VAL(x) /* numeric constant */ |
---|
49 | |
---|
50 | void rm_underscore() { /* remove underscores in constant or escape sequence */ |
---|
51 | int j = 0; |
---|
52 | for ( int i = 0; i < yyleng; i += 1 ) { |
---|
53 | if ( yytext[i] != '_' ) { |
---|
54 | yytext[j] = yytext[i]; |
---|
55 | j += 1; |
---|
56 | } // if |
---|
57 | } // for |
---|
58 | yyleng = j; |
---|
59 | yytext[yyleng] = '\0'; |
---|
60 | } |
---|
61 | |
---|
62 | %} |
---|
63 | |
---|
64 | octal [0-7] |
---|
65 | nonzero [1-9] |
---|
66 | decimal [0-9] |
---|
67 | hex [0-9a-fA-F] |
---|
68 | universal_char "\\"((u{hex_quad})|(U{hex_quad}{2})) |
---|
69 | |
---|
70 | /* identifier, GCC: $ in identifier */ |
---|
71 | identifier ([a-zA-Z_$]|{universal_char})([0-9a-zA-Z_$]|{universal_char})* |
---|
72 | |
---|
73 | /* attribute identifier, GCC: $ in identifier */ |
---|
74 | attr_identifier "@"{identifier} |
---|
75 | |
---|
76 | /* numeric constants, CFA: '_' in constant */ |
---|
77 | hex_quad {hex}{4} |
---|
78 | integer_suffix "_"?(([uU][lL]?)|([uU]("ll"|"LL")?)|([lL][uU]?)|("ll"|"LL")[uU]?) |
---|
79 | |
---|
80 | octal_digits ({octal})|({octal}({octal}|"_")*{octal}) |
---|
81 | octal_prefix "0""_"? |
---|
82 | octal_constant (("0")|({octal_prefix}{octal_digits})){integer_suffix}? |
---|
83 | |
---|
84 | nonzero_digits ({nonzero})|({nonzero}({decimal}|"_")*{decimal}) |
---|
85 | decimal_constant {nonzero_digits}{integer_suffix}? |
---|
86 | |
---|
87 | hex_digits ({hex})|({hex}({hex}|"_")*{hex}) |
---|
88 | hex_prefix "0"[xX]"_"? |
---|
89 | hex_constant {hex_prefix}{hex_digits}{integer_suffix}? |
---|
90 | |
---|
91 | decimal_digits ({decimal})|({decimal}({decimal}|"_")*{decimal}) |
---|
92 | fractional_constant ({decimal_digits}?"."{decimal_digits})|({decimal_digits}".") |
---|
93 | exponent "_"?[eE]"_"?[+-]?{decimal_digits} |
---|
94 | floating_suffix "_"?[flFL] |
---|
95 | floating_constant (({fractional_constant}{exponent}?)|({decimal_digits}{exponent})){floating_suffix}? |
---|
96 | |
---|
97 | binary_exponent "_"?[pP]"_"?[+-]?{decimal_digits} |
---|
98 | hex_fractional_constant ({hex_digits}?"."{hex_digits})|({hex_digits}".") |
---|
99 | hex_floating_constant {hex_prefix}(({hex_fractional_constant}{binary_exponent})|({hex_digits}{binary_exponent})){floating_suffix}? |
---|
100 | |
---|
101 | /* character escape sequence, GCC: \e => esc character */ |
---|
102 | simple_escape "\\"[abefnrtv'"?\\] |
---|
103 | octal_escape "\\"{octal}{1,3} |
---|
104 | hex_escape "\\""x"{hex}+ |
---|
105 | escape_seq {simple_escape}|{octal_escape}|{hex_escape}|{universal_char} |
---|
106 | |
---|
107 | /* display/white-space characters */ |
---|
108 | h_tab [\011] |
---|
109 | form_feed [\014] |
---|
110 | v_tab [\013] |
---|
111 | c_return [\015] |
---|
112 | h_white [ ]|{h_tab} |
---|
113 | |
---|
114 | /* operators */ |
---|
115 | op_unary_only "~"|"!" |
---|
116 | op_unary_binary "+"|"-"|"*" |
---|
117 | op_unary_pre_post "++"|"--" |
---|
118 | op_unary {op_unary_only}|{op_unary_binary}|{op_unary_pre_post} |
---|
119 | |
---|
120 | op_binary_only "/"|"%"|"^"|"&"|"|"|"<"|">"|"="|"=="|"!="|"<<"|">>"|"<="|">="|"+="|"-="|"*="|"/="|"%="|"&="|"|="|"^="|"<<="|">>=" |
---|
121 | op_binary_over {op_unary_binary}|{op_binary_only} |
---|
122 | op_binary_not_over "?"|"->"|"&&"|"||" |
---|
123 | operator {op_unary_pre_post}|{op_binary_over}|{op_binary_not_over} |
---|
124 | |
---|
125 | %x COMMENT |
---|
126 | |
---|
127 | %% |
---|
128 | /* line directives */ |
---|
129 | ^{h_white}*"#"{h_white}*[0-9]+{h_white}*["][^"\n]+["][^\n]*"\n" { |
---|
130 | char *end_num; |
---|
131 | char *begin_string, *end_string; |
---|
132 | char *filename; |
---|
133 | long lineno, length; |
---|
134 | lineno = strtol( yytext + 1, &end_num, 0 ); |
---|
135 | begin_string = strchr( end_num, '"' ); |
---|
136 | if( begin_string ) { |
---|
137 | end_string = strchr( begin_string + 1, '"' ); |
---|
138 | if( end_string ) { |
---|
139 | length = end_string - begin_string - 1; |
---|
140 | filename = new char[ length + 1 ]; |
---|
141 | memcpy( filename, begin_string + 1, length ); |
---|
142 | filename[ length ] = '\0'; |
---|
143 | //std::cout << "file " << filename << " line " << lineno << std::endl; |
---|
144 | yylineno = lineno; |
---|
145 | yyfilename = filename; |
---|
146 | } |
---|
147 | } |
---|
148 | } |
---|
149 | |
---|
150 | /* ignore preprocessor directives (for now) */ |
---|
151 | ^{h_white}*"#"[^\n]*"\n" ; |
---|
152 | |
---|
153 | /* ignore C style comments */ |
---|
154 | "/*" {BEGIN COMMENT;} |
---|
155 | <COMMENT>.|\n ; |
---|
156 | <COMMENT>"*/" {BEGIN 0;} |
---|
157 | |
---|
158 | /* ignore C++ style comments */ |
---|
159 | "//"[^\n]*"\n" ; |
---|
160 | |
---|
161 | /* ignore whitespace */ |
---|
162 | {h_white}+ {WHITE_RETURN(' ');} |
---|
163 | ({v_tab}|{c_return}|{form_feed})+ {WHITE_RETURN(' ');} |
---|
164 | ({h_white}|{v_tab}|{c_return}|{form_feed})*"\n" {NEWLINE_RETURN();} |
---|
165 | |
---|
166 | /* keywords */ |
---|
167 | __alignof {KEYWORD_RETURN(ALIGNOF);} /* GCC */ |
---|
168 | __alignof__ {KEYWORD_RETURN(ALIGNOF);} /* GCC */ |
---|
169 | asm {KEYWORD_RETURN(ASM);} |
---|
170 | __asm {KEYWORD_RETURN(ASM);} /* GCC */ |
---|
171 | __asm__ {KEYWORD_RETURN(ASM);} /* GCC */ |
---|
172 | __attribute {KEYWORD_RETURN(ATTRIBUTE);} /* GCC */ |
---|
173 | __attribute__ {KEYWORD_RETURN(ATTRIBUTE);} /* GCC */ |
---|
174 | auto {KEYWORD_RETURN(AUTO);} |
---|
175 | _Bool {KEYWORD_RETURN(BOOL);} /* ANSI99 */ |
---|
176 | break {KEYWORD_RETURN(BREAK);} |
---|
177 | case {KEYWORD_RETURN(CASE);} |
---|
178 | catch {KEYWORD_RETURN(CATCH);} /* CFA */ |
---|
179 | char {KEYWORD_RETURN(CHAR);} |
---|
180 | choose {KEYWORD_RETURN(CHOOSE);} |
---|
181 | _Complex {KEYWORD_RETURN(COMPLEX);} /* ANSI99 */ |
---|
182 | __complex {KEYWORD_RETURN(COMPLEX);} /* GCC */ |
---|
183 | __complex__ {KEYWORD_RETURN(COMPLEX);} /* GCC */ |
---|
184 | const {KEYWORD_RETURN(CONST);} |
---|
185 | __const {KEYWORD_RETURN(CONST);} /* GCC */ |
---|
186 | __const__ {KEYWORD_RETURN(CONST);} /* GCC */ |
---|
187 | context {KEYWORD_RETURN(CONTEXT);} |
---|
188 | continue {KEYWORD_RETURN(CONTINUE);} |
---|
189 | default {KEYWORD_RETURN(DEFAULT);} |
---|
190 | do {KEYWORD_RETURN(DO);} |
---|
191 | double {KEYWORD_RETURN(DOUBLE);} |
---|
192 | dtype {KEYWORD_RETURN(DTYPE);} |
---|
193 | else {KEYWORD_RETURN(ELSE);} |
---|
194 | enum {KEYWORD_RETURN(ENUM);} |
---|
195 | __extension__ {KEYWORD_RETURN(EXTENSION);} /* GCC */ |
---|
196 | extern {KEYWORD_RETURN(EXTERN);} |
---|
197 | fallthru {KEYWORD_RETURN(FALLTHRU);} |
---|
198 | finally {KEYWORD_RETURN(FINALLY);} /* CFA */ |
---|
199 | float {KEYWORD_RETURN(FLOAT);} |
---|
200 | for {KEYWORD_RETURN(FOR);} |
---|
201 | forall {KEYWORD_RETURN(FORALL);} |
---|
202 | fortran {KEYWORD_RETURN(FORTRAN);} |
---|
203 | ftype {KEYWORD_RETURN(FTYPE);} |
---|
204 | goto {KEYWORD_RETURN(GOTO);} |
---|
205 | if {KEYWORD_RETURN(IF);} |
---|
206 | _Imaginary {KEYWORD_RETURN(IMAGINARY);} /* ANSI99 */ |
---|
207 | __imag {KEYWORD_RETURN(IMAGINARY);} /* GCC */ |
---|
208 | __imag__ {KEYWORD_RETURN(IMAGINARY);} /* GCC */ |
---|
209 | inline {KEYWORD_RETURN(INLINE);} /* ANSI99 */ |
---|
210 | __inline {KEYWORD_RETURN(INLINE);} /* GCC */ |
---|
211 | __inline__ {KEYWORD_RETURN(INLINE);} /* GCC */ |
---|
212 | int {KEYWORD_RETURN(INT);} |
---|
213 | __label__ {KEYWORD_RETURN(LABEL);} /* GCC */ |
---|
214 | long {KEYWORD_RETURN(LONG);} |
---|
215 | lvalue {KEYWORD_RETURN(LVALUE);} |
---|
216 | register {KEYWORD_RETURN(REGISTER);} |
---|
217 | restrict {KEYWORD_RETURN(RESTRICT);} /* ANSI99 */ |
---|
218 | __restrict {KEYWORD_RETURN(RESTRICT);} /* GCC */ |
---|
219 | __restrict__ {KEYWORD_RETURN(RESTRICT);} /* GCC */ |
---|
220 | return {KEYWORD_RETURN(RETURN);} |
---|
221 | short {KEYWORD_RETURN(SHORT);} |
---|
222 | signed {KEYWORD_RETURN(SIGNED);} |
---|
223 | __signed {KEYWORD_RETURN(SIGNED);} /* GCC */ |
---|
224 | __signed__ {KEYWORD_RETURN(SIGNED);} /* GCC */ |
---|
225 | sizeof {KEYWORD_RETURN(SIZEOF);} |
---|
226 | static {KEYWORD_RETURN(STATIC);} |
---|
227 | struct {KEYWORD_RETURN(STRUCT);} |
---|
228 | switch {KEYWORD_RETURN(SWITCH);} |
---|
229 | throw {KEYWORD_RETURN(THROW);} /* CFA */ |
---|
230 | try {KEYWORD_RETURN(TRY);} /* CFA */ |
---|
231 | type {KEYWORD_RETURN(TYPE);} |
---|
232 | typedef {KEYWORD_RETURN(TYPEDEF);} |
---|
233 | typeof {KEYWORD_RETURN(TYPEOF);} /* GCC */ |
---|
234 | __typeof {KEYWORD_RETURN(TYPEOF);} /* GCC */ |
---|
235 | __typeof__ {KEYWORD_RETURN(TYPEOF);} /* GCC */ |
---|
236 | union {KEYWORD_RETURN(UNION);} |
---|
237 | unsigned {KEYWORD_RETURN(UNSIGNED);} |
---|
238 | void {KEYWORD_RETURN(VOID);} |
---|
239 | volatile {KEYWORD_RETURN(VOLATILE);} |
---|
240 | __volatile {KEYWORD_RETURN(VOLATILE);} /* GCC */ |
---|
241 | __volatile__ {KEYWORD_RETURN(VOLATILE);} /* GCC */ |
---|
242 | while {KEYWORD_RETURN(WHILE);} |
---|
243 | |
---|
244 | /* identifier */ |
---|
245 | {identifier} {IDENTIFIER_RETURN();} |
---|
246 | {attr_identifier} {ATTRIBUTE_RETURN();} |
---|
247 | |
---|
248 | /* numeric constants */ |
---|
249 | "0" {NUMERIC_RETURN(ZERO);} /* CFA */ |
---|
250 | "1" {NUMERIC_RETURN(ONE);} /* CFA */ |
---|
251 | {decimal_constant} {NUMERIC_RETURN(INTEGERconstant);} |
---|
252 | {octal_constant} {NUMERIC_RETURN(INTEGERconstant);} |
---|
253 | {hex_constant} {NUMERIC_RETURN(INTEGERconstant);} |
---|
254 | {floating_constant} {NUMERIC_RETURN(FLOATINGconstant);} |
---|
255 | {hex_floating_constant} {NUMERIC_RETURN(FLOATINGconstant);} |
---|
256 | |
---|
257 | /* character constant, allows empty value */ |
---|
258 | "L"?[']([^'\\\n]|{escape_seq})*['] {RETURN_VAL(CHARACTERconstant);} |
---|
259 | |
---|
260 | /* string constant */ |
---|
261 | "L"?["]([^"\\\n]|{escape_seq})*["] {RETURN_VAL(STRINGliteral);} |
---|
262 | |
---|
263 | /* punctuation */ |
---|
264 | "[" {ASCIIOP_RETURN();} |
---|
265 | "]" {ASCIIOP_RETURN();} |
---|
266 | "(" {ASCIIOP_RETURN();} |
---|
267 | ")" {ASCIIOP_RETURN();} |
---|
268 | "{" {ASCIIOP_RETURN();} |
---|
269 | "}" {ASCIIOP_RETURN();} |
---|
270 | "," {ASCIIOP_RETURN();} /* also operator */ |
---|
271 | ":" {ASCIIOP_RETURN();} |
---|
272 | ";" {ASCIIOP_RETURN();} |
---|
273 | "." {ASCIIOP_RETURN();} /* also operator */ |
---|
274 | "..." {NAMEDOP_RETURN(ELLIPSIS);} |
---|
275 | |
---|
276 | /* alternative ANSI99 brackets, "<:" & "<:<:" handled by preprocessor */ |
---|
277 | "<:" {RETURN_VAL('[');} |
---|
278 | ":>" {RETURN_VAL(']');} |
---|
279 | "<%" {RETURN_VAL('{');} |
---|
280 | "%>" {RETURN_VAL('}');} |
---|
281 | |
---|
282 | /* operators */ |
---|
283 | "!" {ASCIIOP_RETURN();} |
---|
284 | "+" {ASCIIOP_RETURN();} |
---|
285 | "-" {ASCIIOP_RETURN();} |
---|
286 | "*" {ASCIIOP_RETURN();} |
---|
287 | "/" {ASCIIOP_RETURN();} |
---|
288 | "%" {ASCIIOP_RETURN();} |
---|
289 | "^" {ASCIIOP_RETURN();} |
---|
290 | "~" {ASCIIOP_RETURN();} |
---|
291 | "&" {ASCIIOP_RETURN();} |
---|
292 | "|" {ASCIIOP_RETURN();} |
---|
293 | "<" {ASCIIOP_RETURN();} |
---|
294 | ">" {ASCIIOP_RETURN();} |
---|
295 | "=" {ASCIIOP_RETURN();} |
---|
296 | "?" {ASCIIOP_RETURN();} |
---|
297 | |
---|
298 | "++" {NAMEDOP_RETURN(ICR);} |
---|
299 | "--" {NAMEDOP_RETURN(DECR);} |
---|
300 | "==" {NAMEDOP_RETURN(EQ);} |
---|
301 | "!=" {NAMEDOP_RETURN(NE);} |
---|
302 | "<<" {NAMEDOP_RETURN(LS);} |
---|
303 | ">>" {NAMEDOP_RETURN(RS);} |
---|
304 | "<=" {NAMEDOP_RETURN(LE);} |
---|
305 | ">=" {NAMEDOP_RETURN(GE);} |
---|
306 | "&&" {NAMEDOP_RETURN(ANDAND);} |
---|
307 | "||" {NAMEDOP_RETURN(OROR);} |
---|
308 | "->" {NAMEDOP_RETURN(ARROW);} |
---|
309 | "+=" {NAMEDOP_RETURN(PLUSassign);} |
---|
310 | "-=" {NAMEDOP_RETURN(MINUSassign);} |
---|
311 | "*=" {NAMEDOP_RETURN(MULTassign);} |
---|
312 | "/=" {NAMEDOP_RETURN(DIVassign);} |
---|
313 | "%=" {NAMEDOP_RETURN(MODassign);} |
---|
314 | "&=" {NAMEDOP_RETURN(ANDassign);} |
---|
315 | "|=" {NAMEDOP_RETURN(ORassign);} |
---|
316 | "^=" {NAMEDOP_RETURN(ERassign);} |
---|
317 | "<<=" {NAMEDOP_RETURN(LSassign);} |
---|
318 | ">>=" {NAMEDOP_RETURN(RSassign);} |
---|
319 | |
---|
320 | /* CFA, operator identifier */ |
---|
321 | {op_unary}"?" {IDENTIFIER_RETURN();} /* unary */ |
---|
322 | "?"({op_unary_pre_post}|"()"|"[?]") {IDENTIFIER_RETURN();} |
---|
323 | "?"{op_binary_over}"?" {IDENTIFIER_RETURN();} /* binary */ |
---|
324 | /* |
---|
325 | This rule handles ambiguous cases with operator identifiers, e.g., "int *?*?()", where the |
---|
326 | string "*?*?" can be lexed as "*"/"?*?" or "*?"/"*?". Since it is common practise to put |
---|
327 | a unary operator juxtaposed to an identifier, e.g., "*i", users will be annoyed if they |
---|
328 | cannot do this with respect to operator identifiers. Even with this special hack, there |
---|
329 | are 5 general cases that cannot be handled. The first case is for the function-call |
---|
330 | identifier "?()": |
---|
331 | |
---|
332 | int * ?()(); // declaration: space required after '*' |
---|
333 | * ?()(); // expression: space required after '*' |
---|
334 | |
---|
335 | Without the space, the string "*?()" is ambiguous without N character look ahead; it |
---|
336 | requires scanning ahead to determine if there is a '(', which is the start of an |
---|
337 | argument/parameter list. |
---|
338 | |
---|
339 | The 4 remaining cases occur in expressions: |
---|
340 | |
---|
341 | i++?i:0; // space required before '?' |
---|
342 | i--?i:0; // space required before '?' |
---|
343 | i?++i:0; // space required after '?' |
---|
344 | i?--i:0; // space required after '?' |
---|
345 | |
---|
346 | In the first two cases, the string "i++?" is ambiguous, where this string can be lexed as |
---|
347 | "i"/"++?" or "i++"/"?"; it requires scanning ahead to determine if there is a '(', which |
---|
348 | is the start of an argument list. In the second two cases, the string "?++x" is |
---|
349 | ambiguous, where this string can be lexed as "?++"/"x" or "?"/"++x"; it requires scanning |
---|
350 | ahead to determine if there is a '(', which is the start of an argument list. |
---|
351 | */ |
---|
352 | {op_unary}"?"(({op_unary_pre_post}|"[?]")|({op_binary_over}"?")) { |
---|
353 | // 1 or 2 character unary operator ? |
---|
354 | int i = yytext[1] == '?' ? 1 : 2; |
---|
355 | yyless( i ); /* put back characters up to first '?' */ |
---|
356 | if ( i > 1 ) { |
---|
357 | NAMEDOP_RETURN( yytext[0] == '+' ? ICR : DECR ); |
---|
358 | } else { |
---|
359 | ASCIIOP_RETURN(); |
---|
360 | } // if |
---|
361 | } |
---|
362 | |
---|
363 | /* unknown characters */ |
---|
364 | . {printf("unknown character(s):\"%s\" on line %d\n", yytext, yylineno);} |
---|
365 | |
---|
366 | %% |
---|
367 | |
---|
368 | |
---|
369 | /* Local Variables: */ |
---|
370 | /* fill-column: 100 */ |
---|
371 | /* compile-command: "gmake" */ |
---|
372 | /* End: */ |
---|