1 | /* -*- Mode: C -*- |
---|
2 | * |
---|
3 | * CForall Lexer Version 1.0, Copyright (C) Peter A. Buhr 2001 -- Permission is granted to copy this |
---|
4 | * grammar and to use it within software systems. THIS GRAMMAR IS PROVIDED "AS IS" AND WITHOUT |
---|
5 | * ANY EXPRESS OR IMPLIED WARRANTIES. |
---|
6 | * |
---|
7 | * lex.l -- |
---|
8 | * |
---|
9 | * Author : Peter A. Buhr |
---|
10 | * Created On : Sat Sep 22 08:58:10 2001 |
---|
11 | * Last Modified By : Peter A. Buhr |
---|
12 | * Last Modified On : Thu Jan 23 16:17:09 2003 |
---|
13 | * Update Count : 191 |
---|
14 | */ |
---|
15 | |
---|
16 | %option yylineno |
---|
17 | |
---|
18 | %{ |
---|
19 | /* This lexer assumes the program has been preprocessed by cpp. Hence, all user level preprocessor |
---|
20 | directive have been performed and removed from the source. The only exceptions are preprocessor |
---|
21 | directives passed to the compiler (e.g., line-number directives) and C/C++ style comments, which |
---|
22 | are ignored. */ |
---|
23 | |
---|
24 | /*************** Includes and Defines *****************************/ |
---|
25 | |
---|
26 | #include <string> |
---|
27 | |
---|
28 | #include "ParseNode.h" |
---|
29 | #include "cfa.tab.h" /* YACC generated definitions based on C++ grammar */ |
---|
30 | #include "lex.h" |
---|
31 | |
---|
32 | char *yyfilename; |
---|
33 | |
---|
34 | #define WHITE_RETURN(x) /* do nothing */ |
---|
35 | #define NEWLINE_RETURN() WHITE_RETURN('\n') |
---|
36 | #define RETURN_VAL(x) yylval.tok.str = new std::string(yytext); yylval.tok.file = yyfilename; yylval.tok.line = yylineno; return(x) |
---|
37 | |
---|
38 | #define KEYWORD_RETURN(x) RETURN_VAL(x) /* keyword */ |
---|
39 | #define IDENTIFIER_RETURN() RETURN_VAL((typedefTable.isIdentifier(yytext) ? IDENTIFIER : typedefTable.isTypedef(yytext) ? TYPEDEFname : TYPEGENname)) |
---|
40 | |
---|
41 | #define ASCIIOP_RETURN() RETURN_VAL((int)yytext[0]) /* single character operator */ |
---|
42 | #define NAMEDOP_RETURN(x) RETURN_VAL(x) /* multichar operator, with a name */ |
---|
43 | |
---|
44 | #define NUMERIC_RETURN(x) rm_underscore(); RETURN_VAL(x) /* numeric constant */ |
---|
45 | |
---|
46 | void rm_underscore() { /* remove underscores in constant or escape sequence */ |
---|
47 | int j = 0; |
---|
48 | for ( int i = 0; i < yyleng; i += 1 ) { |
---|
49 | if ( yytext[i] != '_' ) { |
---|
50 | yytext[j] = yytext[i]; |
---|
51 | j += 1; |
---|
52 | } // if |
---|
53 | } // for |
---|
54 | yyleng = j; |
---|
55 | yytext[yyleng] = '\0'; |
---|
56 | } |
---|
57 | |
---|
58 | %} |
---|
59 | |
---|
60 | octal [0-7] |
---|
61 | nonzero [1-9] |
---|
62 | decimal [0-9] |
---|
63 | hex [0-9a-fA-F] |
---|
64 | |
---|
65 | /* identifier, GCC: $ in identifier */ |
---|
66 | universal_char "\\"((u{hex_quad})|(U{hex_quad}{2})) |
---|
67 | identifier ([a-zA-Z_$]|{universal_char})([0-9a-zA-Z_$]|{universal_char})* |
---|
68 | |
---|
69 | /* numeric constants, CFA: '_' in constant */ |
---|
70 | hex_quad {hex}{4} |
---|
71 | integer_suffix "_"?(([uU][lL]?)|([uU]("ll"|"LL")?)|([lL][uU]?)|("ll"|"LL")[uU]?) |
---|
72 | |
---|
73 | octal_digits ({octal})|({octal}({octal}|"_")*{octal}) |
---|
74 | octal_prefix "0""_"? |
---|
75 | octal_constant (("0")|({octal_prefix}{octal_digits})){integer_suffix}? |
---|
76 | |
---|
77 | nonzero_digits ({nonzero})|({nonzero}({decimal}|"_")*{decimal}) |
---|
78 | decimal_constant {nonzero_digits}{integer_suffix}? |
---|
79 | |
---|
80 | hex_digits ({hex})|({hex}({hex}|"_")*{hex}) |
---|
81 | hex_prefix "0"[xX]"_"? |
---|
82 | hex_constant {hex_prefix}{hex_digits}{integer_suffix}? |
---|
83 | |
---|
84 | decimal_digits ({decimal})|({decimal}({decimal}|"_")*{decimal}) |
---|
85 | fractional_constant ({decimal_digits}?"."{decimal_digits})|({decimal_digits}".") |
---|
86 | exponent "_"?[eE]"_"?[+-]?{decimal_digits} |
---|
87 | floating_suffix "_"?[flFL] |
---|
88 | floating_constant (({fractional_constant}{exponent}?)|({decimal_digits}{exponent})){floating_suffix}? |
---|
89 | |
---|
90 | binary_exponent "_"?[pP]"_"?[+-]?{decimal_digits} |
---|
91 | hex_fractional_constant ({hex_digits}?"."{hex_digits})|({hex_digits}".") |
---|
92 | hex_floating_constant {hex_prefix}(({hex_fractional_constant}{binary_exponent})|({hex_digits}{binary_exponent})){floating_suffix}? |
---|
93 | |
---|
94 | /* character escape sequence, GCC: \e => esc character */ |
---|
95 | simple_escape "\\"[abefnrtv'"?\\] |
---|
96 | octal_escape "\\"{octal}{1,3} |
---|
97 | hex_escape "\\""x"{hex}+ |
---|
98 | escape_seq {simple_escape}|{octal_escape}|{hex_escape}|{universal_char} |
---|
99 | |
---|
100 | /* display/white-space characters */ |
---|
101 | h_tab [\011] |
---|
102 | form_feed [\014] |
---|
103 | v_tab [\013] |
---|
104 | c_return [\015] |
---|
105 | h_white [ ]|{h_tab} |
---|
106 | |
---|
107 | /* operators */ |
---|
108 | op_unary_only "~"|"!" |
---|
109 | op_unary_binary "+"|"-"|"*" |
---|
110 | op_unary_pre_post "++"|"--" |
---|
111 | op_unary {op_unary_only}|{op_unary_binary}|{op_unary_pre_post} |
---|
112 | |
---|
113 | op_binary_only "/"|"%"|"^"|"&"|"|"|"<"|">"|"="|"=="|"!="|"<<"|">>"|"<="|">="|"+="|"-="|"*="|"/="|"%="|"&="|"|="|"^="|"<<="|">>=" |
---|
114 | op_binary_over {op_unary_binary}|{op_binary_only} |
---|
115 | op_binary_not_over "?"|"->"|"&&"|"||" |
---|
116 | operator {op_unary_pre_post}|{op_binary_over}|{op_binary_not_over} |
---|
117 | |
---|
118 | %x COMMENT |
---|
119 | |
---|
120 | %% |
---|
121 | /* line directives */ |
---|
122 | ^{h_white}*"#"{h_white}*[0-9]+{h_white}*["][^"\n]+["][^\n]*"\n" { |
---|
123 | char *end_num; |
---|
124 | char *begin_string, *end_string; |
---|
125 | char *filename; |
---|
126 | long lineno, length; |
---|
127 | lineno = strtol( yytext + 1, &end_num, 0 ); |
---|
128 | begin_string = strchr( end_num, '"' ); |
---|
129 | if( begin_string ) { |
---|
130 | end_string = strchr( begin_string + 1, '"' ); |
---|
131 | if( end_string ) { |
---|
132 | length = end_string - begin_string - 1; |
---|
133 | filename = new char[ length + 1 ]; |
---|
134 | memcpy( filename, begin_string + 1, length ); |
---|
135 | filename[ length ] = '\0'; |
---|
136 | //std::cout << "file " << filename << " line " << lineno << std::endl; |
---|
137 | yylineno = lineno; |
---|
138 | yyfilename = filename; |
---|
139 | } |
---|
140 | } |
---|
141 | } |
---|
142 | |
---|
143 | /* ignore preprocessor directives (for now) */ |
---|
144 | ^{h_white}*"#"[^\n]*"\n" ; |
---|
145 | |
---|
146 | /* ignore C style comments */ |
---|
147 | "/*" {BEGIN COMMENT;} |
---|
148 | <COMMENT>.|\n ; |
---|
149 | <COMMENT>"*/" {BEGIN 0;} |
---|
150 | |
---|
151 | /* ignore C++ style comments */ |
---|
152 | "//"[^\n]*"\n" ; |
---|
153 | |
---|
154 | /* ignore whitespace */ |
---|
155 | {h_white}+ {WHITE_RETURN(' ');} |
---|
156 | ({v_tab}|{c_return}|{form_feed})+ {WHITE_RETURN(' ');} |
---|
157 | ({h_white}|{v_tab}|{c_return}|{form_feed})*"\n" {NEWLINE_RETURN();} |
---|
158 | |
---|
159 | /* keywords */ |
---|
160 | __alignof {KEYWORD_RETURN(ALIGNOF);} /* GCC */ |
---|
161 | __alignof__ {KEYWORD_RETURN(ALIGNOF);} /* GCC */ |
---|
162 | asm {KEYWORD_RETURN(ASM);} |
---|
163 | __asm {KEYWORD_RETURN(ASM);} /* GCC */ |
---|
164 | __asm__ {KEYWORD_RETURN(ASM);} /* GCC */ |
---|
165 | __attribute {KEYWORD_RETURN(ATTRIBUTE);} /* GCC */ |
---|
166 | __attribute__ {KEYWORD_RETURN(ATTRIBUTE);} /* GCC */ |
---|
167 | auto {KEYWORD_RETURN(AUTO);} |
---|
168 | _Bool {KEYWORD_RETURN(BOOL);} /* ANSI99 */ |
---|
169 | break {KEYWORD_RETURN(BREAK);} |
---|
170 | case {KEYWORD_RETURN(CASE);} |
---|
171 | catch {KEYWORD_RETURN(CATCH);} /* CFA */ |
---|
172 | char {KEYWORD_RETURN(CHAR);} |
---|
173 | choose {KEYWORD_RETURN(CHOOSE);} |
---|
174 | _Complex {KEYWORD_RETURN(COMPLEX);} /* ANSI99 */ |
---|
175 | __complex {KEYWORD_RETURN(COMPLEX);} /* GCC */ |
---|
176 | __complex__ {KEYWORD_RETURN(COMPLEX);} /* GCC */ |
---|
177 | const {KEYWORD_RETURN(CONST);} |
---|
178 | __const {KEYWORD_RETURN(CONST);} /* GCC */ |
---|
179 | __const__ {KEYWORD_RETURN(CONST);} /* GCC */ |
---|
180 | context {KEYWORD_RETURN(CONTEXT);} |
---|
181 | continue {KEYWORD_RETURN(CONTINUE);} |
---|
182 | default {KEYWORD_RETURN(DEFAULT);} |
---|
183 | do {KEYWORD_RETURN(DO);} |
---|
184 | double {KEYWORD_RETURN(DOUBLE);} |
---|
185 | dtype {KEYWORD_RETURN(DTYPE);} |
---|
186 | else {KEYWORD_RETURN(ELSE);} |
---|
187 | enum {KEYWORD_RETURN(ENUM);} |
---|
188 | __extension__ {KEYWORD_RETURN(EXTENSION);} /* GCC */ |
---|
189 | extern {KEYWORD_RETURN(EXTERN);} |
---|
190 | fallthru {KEYWORD_RETURN(FALLTHRU);} |
---|
191 | float {KEYWORD_RETURN(FLOAT);} |
---|
192 | for {KEYWORD_RETURN(FOR);} |
---|
193 | forall {KEYWORD_RETURN(FORALL);} |
---|
194 | fortran {KEYWORD_RETURN(FORTRAN);} |
---|
195 | ftype {KEYWORD_RETURN(FTYPE);} |
---|
196 | goto {KEYWORD_RETURN(GOTO);} |
---|
197 | if {KEYWORD_RETURN(IF);} |
---|
198 | _Imaginary {KEYWORD_RETURN(IMAGINARY);} /* ANSI99 */ |
---|
199 | __imag {KEYWORD_RETURN(IMAGINARY);} /* GCC */ |
---|
200 | __imag__ {KEYWORD_RETURN(IMAGINARY);} /* GCC */ |
---|
201 | inline {KEYWORD_RETURN(INLINE);} /* ANSI99 */ |
---|
202 | __inline {KEYWORD_RETURN(INLINE);} /* GCC */ |
---|
203 | __inline__ {KEYWORD_RETURN(INLINE);} /* GCC */ |
---|
204 | int {KEYWORD_RETURN(INT);} |
---|
205 | __label__ {KEYWORD_RETURN(LABEL);} /* GCC */ |
---|
206 | long {KEYWORD_RETURN(LONG);} |
---|
207 | lvalue {KEYWORD_RETURN(LVALUE);} |
---|
208 | register {KEYWORD_RETURN(REGISTER);} |
---|
209 | restrict {KEYWORD_RETURN(RESTRICT);} /* ANSI99 */ |
---|
210 | __restrict {KEYWORD_RETURN(RESTRICT);} /* GCC */ |
---|
211 | __restrict__ {KEYWORD_RETURN(RESTRICT);} /* GCC */ |
---|
212 | return {KEYWORD_RETURN(RETURN);} |
---|
213 | short {KEYWORD_RETURN(SHORT);} |
---|
214 | signed {KEYWORD_RETURN(SIGNED);} |
---|
215 | __signed {KEYWORD_RETURN(SIGNED);} /* GCC */ |
---|
216 | __signed__ {KEYWORD_RETURN(SIGNED);} /* GCC */ |
---|
217 | sizeof {KEYWORD_RETURN(SIZEOF);} |
---|
218 | static {KEYWORD_RETURN(STATIC);} |
---|
219 | struct {KEYWORD_RETURN(STRUCT);} |
---|
220 | switch {KEYWORD_RETURN(SWITCH);} |
---|
221 | throw {KEYWORD_RETURN(THROW);} /* CFA */ |
---|
222 | try {KEYWORD_RETURN(TRY);} /* CFA */ |
---|
223 | type {KEYWORD_RETURN(TYPE);} |
---|
224 | typedef {KEYWORD_RETURN(TYPEDEF);} |
---|
225 | typeof {KEYWORD_RETURN(TYPEOF);} /* GCC */ |
---|
226 | __typeof {KEYWORD_RETURN(TYPEOF);} /* GCC */ |
---|
227 | __typeof__ {KEYWORD_RETURN(TYPEOF);} /* GCC */ |
---|
228 | union {KEYWORD_RETURN(UNION);} |
---|
229 | unsigned {KEYWORD_RETURN(UNSIGNED);} |
---|
230 | void {KEYWORD_RETURN(VOID);} |
---|
231 | volatile {KEYWORD_RETURN(VOLATILE);} |
---|
232 | __volatile {KEYWORD_RETURN(VOLATILE);} /* GCC */ |
---|
233 | __volatile__ {KEYWORD_RETURN(VOLATILE);} /* GCC */ |
---|
234 | while {KEYWORD_RETURN(WHILE);} |
---|
235 | |
---|
236 | /* identifier */ |
---|
237 | {identifier} {IDENTIFIER_RETURN();} |
---|
238 | |
---|
239 | /* numeric constants */ |
---|
240 | "0" {NUMERIC_RETURN(ZERO);} /* CFA */ |
---|
241 | "1" {NUMERIC_RETURN(ONE);} /* CFA */ |
---|
242 | {decimal_constant} {NUMERIC_RETURN(INTEGERconstant);} |
---|
243 | {octal_constant} {NUMERIC_RETURN(INTEGERconstant);} |
---|
244 | {hex_constant} {NUMERIC_RETURN(INTEGERconstant);} |
---|
245 | {floating_constant} {NUMERIC_RETURN(FLOATINGconstant);} |
---|
246 | {hex_floating_constant} {NUMERIC_RETURN(FLOATINGconstant);} |
---|
247 | |
---|
248 | /* character constant, allows empty value */ |
---|
249 | "L"?[']([^'\\\n]|{escape_seq})*['] {RETURN_VAL(CHARACTERconstant);} |
---|
250 | |
---|
251 | /* string constant */ |
---|
252 | "L"?["]([^"\\\n]|{escape_seq})*["] {RETURN_VAL(STRINGliteral);} |
---|
253 | |
---|
254 | /* punctuation */ |
---|
255 | "[" {ASCIIOP_RETURN();} |
---|
256 | "]" {ASCIIOP_RETURN();} |
---|
257 | "(" {ASCIIOP_RETURN();} |
---|
258 | ")" {ASCIIOP_RETURN();} |
---|
259 | "{" {ASCIIOP_RETURN();} |
---|
260 | "}" {ASCIIOP_RETURN();} |
---|
261 | "," {ASCIIOP_RETURN();} /* also operator */ |
---|
262 | ":" {ASCIIOP_RETURN();} |
---|
263 | ";" {ASCIIOP_RETURN();} |
---|
264 | "." {ASCIIOP_RETURN();} /* also operator */ |
---|
265 | "..." {NAMEDOP_RETURN(ELLIPSIS);} |
---|
266 | |
---|
267 | /* alternative ANSI99 brackets, "<:" & "<:<:" handled by preprocessor */ |
---|
268 | "<:" {RETURN_VAL('[');} |
---|
269 | ":>" {RETURN_VAL(']');} |
---|
270 | "<%" {RETURN_VAL('{');} |
---|
271 | "%>" {RETURN_VAL('}');} |
---|
272 | |
---|
273 | /* operators */ |
---|
274 | "!" {ASCIIOP_RETURN();} |
---|
275 | "+" {ASCIIOP_RETURN();} |
---|
276 | "-" {ASCIIOP_RETURN();} |
---|
277 | "*" {ASCIIOP_RETURN();} |
---|
278 | "/" {ASCIIOP_RETURN();} |
---|
279 | "%" {ASCIIOP_RETURN();} |
---|
280 | "^" {ASCIIOP_RETURN();} |
---|
281 | "~" {ASCIIOP_RETURN();} |
---|
282 | "&" {ASCIIOP_RETURN();} |
---|
283 | "|" {ASCIIOP_RETURN();} |
---|
284 | "<" {ASCIIOP_RETURN();} |
---|
285 | ">" {ASCIIOP_RETURN();} |
---|
286 | "=" {ASCIIOP_RETURN();} |
---|
287 | "?" {ASCIIOP_RETURN();} |
---|
288 | |
---|
289 | "++" {NAMEDOP_RETURN(ICR);} |
---|
290 | "--" {NAMEDOP_RETURN(DECR);} |
---|
291 | "==" {NAMEDOP_RETURN(EQ);} |
---|
292 | "!=" {NAMEDOP_RETURN(NE);} |
---|
293 | "<<" {NAMEDOP_RETURN(LS);} |
---|
294 | ">>" {NAMEDOP_RETURN(RS);} |
---|
295 | "<=" {NAMEDOP_RETURN(LE);} |
---|
296 | ">=" {NAMEDOP_RETURN(GE);} |
---|
297 | "&&" {NAMEDOP_RETURN(ANDAND);} |
---|
298 | "||" {NAMEDOP_RETURN(OROR);} |
---|
299 | "->" {NAMEDOP_RETURN(ARROW);} |
---|
300 | "+=" {NAMEDOP_RETURN(PLUSassign);} |
---|
301 | "-=" {NAMEDOP_RETURN(MINUSassign);} |
---|
302 | "*=" {NAMEDOP_RETURN(MULTassign);} |
---|
303 | "/=" {NAMEDOP_RETURN(DIVassign);} |
---|
304 | "%=" {NAMEDOP_RETURN(MODassign);} |
---|
305 | "&=" {NAMEDOP_RETURN(ANDassign);} |
---|
306 | "|=" {NAMEDOP_RETURN(ORassign);} |
---|
307 | "^=" {NAMEDOP_RETURN(ERassign);} |
---|
308 | "<<=" {NAMEDOP_RETURN(LSassign);} |
---|
309 | ">>=" {NAMEDOP_RETURN(RSassign);} |
---|
310 | |
---|
311 | /* CFA, operator identifier */ |
---|
312 | {op_unary}"?" {IDENTIFIER_RETURN();} /* unary */ |
---|
313 | "?"({op_unary_pre_post}|"()"|"[?]") {IDENTIFIER_RETURN();} |
---|
314 | "?"{op_binary_over}"?" {IDENTIFIER_RETURN();} /* binary */ |
---|
315 | /* |
---|
316 | This rule handles ambiguous cases with operator identifiers, e.g., "int *?*?()", where the |
---|
317 | string "*?*?" can be lexed as "*"/"?*?" or "*?"/"*?". Since it is common practise to put |
---|
318 | a unary operator juxtaposed to an identifier, e.g., "*i", users will be annoyed if they |
---|
319 | cannot do this with respect to operator identifiers. Even with this special hack, there |
---|
320 | are 5 general cases that cannot be handled. The first case is for the function-call |
---|
321 | identifier "?()": |
---|
322 | |
---|
323 | int * ?()(); // declaration: space required after '*' |
---|
324 | * ?()(); // expression: space required after '*' |
---|
325 | |
---|
326 | Without the space, the string "*?()" is ambiguous without N character look ahead; it |
---|
327 | requires scanning ahead to determine if there is a '(', which is the start of an |
---|
328 | argument/parameter list. |
---|
329 | |
---|
330 | The 4 remaining cases occur in expressions: |
---|
331 | |
---|
332 | i++?i:0; // space required before '?' |
---|
333 | i--?i:0; // space required before '?' |
---|
334 | i?++i:0; // space required after '?' |
---|
335 | i?--i:0; // space required after '?' |
---|
336 | |
---|
337 | In the first two cases, the string "i++?" is ambiguous, where this string can be lexed as |
---|
338 | "i"/"++?" or "i++"/"?"; it requires scanning ahead to determine if there is a '(', which |
---|
339 | is the start of an argument list. In the second two cases, the string "?++x" is |
---|
340 | ambiguous, where this string can be lexed as "?++"/"x" or "?"/"++x"; it requires scanning |
---|
341 | ahead to determine if there is a '(', which is the start of an argument list. |
---|
342 | */ |
---|
343 | {op_unary}"?"(({op_unary_pre_post}|"[?]")|({op_binary_over}"?")) { |
---|
344 | // 1 or 2 character unary operator ? |
---|
345 | int i = yytext[1] == '?' ? 1 : 2; |
---|
346 | yyless( i ); /* put back characters up to first '?' */ |
---|
347 | if ( i > 1 ) { |
---|
348 | NAMEDOP_RETURN( yytext[0] == '+' ? ICR : DECR ); |
---|
349 | } else { |
---|
350 | ASCIIOP_RETURN(); |
---|
351 | } // if |
---|
352 | } |
---|
353 | |
---|
354 | /* unknown characters */ |
---|
355 | . {printf("unknown character(s):\"%s\" on line %d\n", yytext, yylineno);} |
---|
356 | |
---|
357 | %% |
---|
358 | |
---|
359 | |
---|
360 | /* Local Variables: */ |
---|
361 | /* fill-column: 100 */ |
---|
362 | /* compile-command: "gmake" */ |
---|
363 | /* End: */ |
---|