source: translator/Parser/lex.l@ c8ffe20b

ADT aaron-thesis arm-eh ast-experimental cleanup-dtors ctor deferred_resn demangler enum forall-pointer-decay gc_noraii jacob/cs343-translation jenkins-sandbox memory new-ast new-ast-unique-expr new-env no_list persistent-indexer pthread-emulation qualifiedEnum resolv-new string with_gc
Last change on this file since c8ffe20b was 8c17ab0, checked in by Peter A. Buhr <pabuhr@…>, 11 years ago

add quoted identifiers, add compilation include directory, reformatted some files

  • Property mode set to 100644
File size: 13.2 KB
RevLine 
[51b73452]1/* -*- Mode: C -*-
2 *
3 * CForall Lexer Version 1.0, Copyright (C) Peter A. Buhr 2001 -- Permission is granted to copy this
4 * grammar and to use it within software systems. THIS GRAMMAR IS PROVIDED "AS IS" AND WITHOUT
5 * ANY EXPRESS OR IMPLIED WARRANTIES.
6 *
7 * lex.l --
8 *
9 * Author : Peter A. Buhr
10 * Created On : Sat Sep 22 08:58:10 2001
11 * Last Modified By : Peter A. Buhr
[8c17ab0]12 * Last Modified On : Tue Nov 11 08:10:05 2014
13 * Update Count : 215
[51b73452]14 */
15
16%option yylineno
17
18%{
[8c17ab0]19// This lexer assumes the program has been preprocessed by cpp. Hence, all user level preprocessor
20// directive have been performed and removed from the source. The only exceptions are preprocessor
21// directives passed to the compiler (e.g., line-number directives) and C/C++ style comments, which
22// are ignored.
[51b73452]23
[8c17ab0]24//**************************** Includes and Defines ****************************
[51b73452]25
26#include <string>
27
28#include "lex.h"
29#include "ParseNode.h"
[8c17ab0]30#include "cfa.tab.h" // YACC generated definitions based on C++ grammar
[51b73452]31
32char *yyfilename;
33
[8c17ab0]34#define WHITE_RETURN(x) // do nothing
[51b73452]35#define NEWLINE_RETURN() WHITE_RETURN('\n')
36#define RETURN_VAL(x) yylval.tok.str = new std::string(yytext); \
37 yylval.tok.loc.file = yyfilename; \
38 yylval.tok.loc.line = yylineno; \
39 return(x)
40
[8c17ab0]41#define KEYWORD_RETURN(x) RETURN_VAL(x) // keyword
[51b73452]42#define IDENTIFIER_RETURN() RETURN_VAL((typedefTable.isIdentifier(yytext) ? IDENTIFIER : typedefTable.isTypedef(yytext) ? TYPEDEFname : TYPEGENname))
[8c17ab0]43//#define ATTRIBUTE_RETURN() RETURN_VAL((typedefTable.isIdentifier(yytext) ? ATTR_IDENTIFIER : typedefTable.isTypedef(yytext) ? ATTR_TYPEDEFname : ATTR_TYPEGENname))
44#define ATTRIBUTE_RETURN() RETURN_VAL(ATTR_IDENTIFIER)
[51b73452]45
[8c17ab0]46#define ASCIIOP_RETURN() RETURN_VAL((int)yytext[0]) // single character operator
47#define NAMEDOP_RETURN(x) RETURN_VAL(x) // multichar operator, with a name
[51b73452]48
[8c17ab0]49#define NUMERIC_RETURN(x) rm_underscore(); RETURN_VAL(x) // numeric constant
[51b73452]50
[8c17ab0]51void rm_underscore() { // remove underscores in constant or escape sequence
[51b73452]52 int j = 0;
53 for ( int i = 0; i < yyleng; i += 1 ) {
54 if ( yytext[i] != '_' ) {
55 yytext[j] = yytext[i];
56 j += 1;
57 } // if
58 } // for
59 yyleng = j;
60 yytext[yyleng] = '\0';
61}
62
63%}
64
65octal [0-7]
66nonzero [1-9]
67decimal [0-9]
68hex [0-9a-fA-F]
69universal_char "\\"((u{hex_quad})|(U{hex_quad}{2}))
70
[8c17ab0]71 // identifier, GCC: $ in identifier
[51b73452]72identifier ([a-zA-Z_$]|{universal_char})([0-9a-zA-Z_$]|{universal_char})*
73
[8c17ab0]74 // quoted identifier
75quoted_identifier "`"{identifier}"`"
76
77 // attribute identifier, GCC: $ in identifier
[51b73452]78attr_identifier "@"{identifier}
79
[8c17ab0]80 // numeric constants, CFA: '_' in constant
[51b73452]81hex_quad {hex}{4}
82integer_suffix "_"?(([uU][lL]?)|([uU]("ll"|"LL")?)|([lL][uU]?)|("ll"|"LL")[uU]?)
83
84octal_digits ({octal})|({octal}({octal}|"_")*{octal})
85octal_prefix "0""_"?
86octal_constant (("0")|({octal_prefix}{octal_digits})){integer_suffix}?
87
88nonzero_digits ({nonzero})|({nonzero}({decimal}|"_")*{decimal})
89decimal_constant {nonzero_digits}{integer_suffix}?
90
91hex_digits ({hex})|({hex}({hex}|"_")*{hex})
92hex_prefix "0"[xX]"_"?
93hex_constant {hex_prefix}{hex_digits}{integer_suffix}?
94
95decimal_digits ({decimal})|({decimal}({decimal}|"_")*{decimal})
96fractional_constant ({decimal_digits}?"."{decimal_digits})|({decimal_digits}".")
97exponent "_"?[eE]"_"?[+-]?{decimal_digits}
98floating_suffix "_"?[flFL]
99floating_constant (({fractional_constant}{exponent}?)|({decimal_digits}{exponent})){floating_suffix}?
100
101binary_exponent "_"?[pP]"_"?[+-]?{decimal_digits}
102hex_fractional_constant ({hex_digits}?"."{hex_digits})|({hex_digits}".")
103hex_floating_constant {hex_prefix}(({hex_fractional_constant}{binary_exponent})|({hex_digits}{binary_exponent})){floating_suffix}?
104
[8c17ab0]105 // character escape sequence, GCC: \e => esc character
[51b73452]106simple_escape "\\"[abefnrtv'"?\\]
[8c17ab0]107 // ' stop highlighting
[51b73452]108octal_escape "\\"{octal}{1,3}
109hex_escape "\\""x"{hex}+
110escape_seq {simple_escape}|{octal_escape}|{hex_escape}|{universal_char}
111
[8c17ab0]112 // display/white-space characters
[51b73452]113h_tab [\011]
114form_feed [\014]
115v_tab [\013]
116c_return [\015]
117h_white [ ]|{h_tab}
118
[8c17ab0]119 // operators
[51b73452]120op_unary_only "~"|"!"
121op_unary_binary "+"|"-"|"*"
122op_unary_pre_post "++"|"--"
123op_unary {op_unary_only}|{op_unary_binary}|{op_unary_pre_post}
124
125op_binary_only "/"|"%"|"^"|"&"|"|"|"<"|">"|"="|"=="|"!="|"<<"|">>"|"<="|">="|"+="|"-="|"*="|"/="|"%="|"&="|"|="|"^="|"<<="|">>="
126op_binary_over {op_unary_binary}|{op_binary_only}
127op_binary_not_over "?"|"->"|"&&"|"||"
128operator {op_unary_pre_post}|{op_binary_over}|{op_binary_not_over}
129
130%x COMMENT
[8c17ab0]131%x QUOTED
[51b73452]132
133%%
134 /* line directives */
135^{h_white}*"#"{h_white}*[0-9]+{h_white}*["][^"\n]+["][^\n]*"\n" {
[8c17ab0]136 /* " stop highlighting */
[51b73452]137 char *end_num;
138 char *begin_string, *end_string;
139 char *filename;
140 long lineno, length;
141 lineno = strtol( yytext + 1, &end_num, 0 );
142 begin_string = strchr( end_num, '"' );
143 if( begin_string ) {
144 end_string = strchr( begin_string + 1, '"' );
145 if( end_string ) {
146 length = end_string - begin_string - 1;
147 filename = new char[ length + 1 ];
148 memcpy( filename, begin_string + 1, length );
149 filename[ length ] = '\0';
150 //std::cout << "file " << filename << " line " << lineno << std::endl;
151 yylineno = lineno;
152 yyfilename = filename;
153 }
154 }
155}
156
157 /* ignore preprocessor directives (for now) */
158^{h_white}*"#"[^\n]*"\n" ;
159
160 /* ignore C style comments */
161"/*" {BEGIN COMMENT;}
162<COMMENT>.|\n ;
163<COMMENT>"*/" {BEGIN 0;}
164
165 /* ignore C++ style comments */
166"//"[^\n]*"\n" ;
167
168 /* ignore whitespace */
169{h_white}+ {WHITE_RETURN(' ');}
170({v_tab}|{c_return}|{form_feed})+ {WHITE_RETURN(' ');}
171({h_white}|{v_tab}|{c_return}|{form_feed})*"\n" {NEWLINE_RETURN();}
172
173 /* keywords */
174__alignof {KEYWORD_RETURN(ALIGNOF);} /* GCC */
175__alignof__ {KEYWORD_RETURN(ALIGNOF);} /* GCC */
176asm {KEYWORD_RETURN(ASM);}
177__asm {KEYWORD_RETURN(ASM);} /* GCC */
178__asm__ {KEYWORD_RETURN(ASM);} /* GCC */
179__attribute {KEYWORD_RETURN(ATTRIBUTE);} /* GCC */
180__attribute__ {KEYWORD_RETURN(ATTRIBUTE);} /* GCC */
181auto {KEYWORD_RETURN(AUTO);}
182_Bool {KEYWORD_RETURN(BOOL);} /* ANSI99 */
183break {KEYWORD_RETURN(BREAK);}
184case {KEYWORD_RETURN(CASE);}
185catch {KEYWORD_RETURN(CATCH);} /* CFA */
186char {KEYWORD_RETURN(CHAR);}
[8c17ab0]187choose {KEYWORD_RETURN(CHOOSE);} /* CFA */
[51b73452]188_Complex {KEYWORD_RETURN(COMPLEX);} /* ANSI99 */
189__complex {KEYWORD_RETURN(COMPLEX);} /* GCC */
190__complex__ {KEYWORD_RETURN(COMPLEX);} /* GCC */
191const {KEYWORD_RETURN(CONST);}
192__const {KEYWORD_RETURN(CONST);} /* GCC */
193__const__ {KEYWORD_RETURN(CONST);} /* GCC */
[8c17ab0]194context {KEYWORD_RETURN(CONTEXT);} /* CFA */
[51b73452]195continue {KEYWORD_RETURN(CONTINUE);}
196default {KEYWORD_RETURN(DEFAULT);}
197do {KEYWORD_RETURN(DO);}
198double {KEYWORD_RETURN(DOUBLE);}
[8c17ab0]199dtype {KEYWORD_RETURN(DTYPE);} /* CFA */
[51b73452]200else {KEYWORD_RETURN(ELSE);}
201enum {KEYWORD_RETURN(ENUM);}
202__extension__ {KEYWORD_RETURN(EXTENSION);} /* GCC */
203extern {KEYWORD_RETURN(EXTERN);}
[8c17ab0]204fallthru {KEYWORD_RETURN(FALLTHRU);} /* CFA */
[51b73452]205finally {KEYWORD_RETURN(FINALLY);} /* CFA */
206float {KEYWORD_RETURN(FLOAT);}
207for {KEYWORD_RETURN(FOR);}
[8c17ab0]208forall {KEYWORD_RETURN(FORALL);} /* CFA */
[51b73452]209fortran {KEYWORD_RETURN(FORTRAN);}
[8c17ab0]210ftype {KEYWORD_RETURN(FTYPE);} /* CFA */
[51b73452]211goto {KEYWORD_RETURN(GOTO);}
212if {KEYWORD_RETURN(IF);}
213_Imaginary {KEYWORD_RETURN(IMAGINARY);} /* ANSI99 */
214__imag {KEYWORD_RETURN(IMAGINARY);} /* GCC */
215__imag__ {KEYWORD_RETURN(IMAGINARY);} /* GCC */
216inline {KEYWORD_RETURN(INLINE);} /* ANSI99 */
217__inline {KEYWORD_RETURN(INLINE);} /* GCC */
218__inline__ {KEYWORD_RETURN(INLINE);} /* GCC */
219int {KEYWORD_RETURN(INT);}
220__label__ {KEYWORD_RETURN(LABEL);} /* GCC */
221long {KEYWORD_RETURN(LONG);}
[8c17ab0]222lvalue {KEYWORD_RETURN(LVALUE);} /* CFA */
[51b73452]223register {KEYWORD_RETURN(REGISTER);}
224restrict {KEYWORD_RETURN(RESTRICT);} /* ANSI99 */
225__restrict {KEYWORD_RETURN(RESTRICT);} /* GCC */
226__restrict__ {KEYWORD_RETURN(RESTRICT);} /* GCC */
227return {KEYWORD_RETURN(RETURN);}
228short {KEYWORD_RETURN(SHORT);}
229signed {KEYWORD_RETURN(SIGNED);}
230__signed {KEYWORD_RETURN(SIGNED);} /* GCC */
231__signed__ {KEYWORD_RETURN(SIGNED);} /* GCC */
232sizeof {KEYWORD_RETURN(SIZEOF);}
233static {KEYWORD_RETURN(STATIC);}
234struct {KEYWORD_RETURN(STRUCT);}
235switch {KEYWORD_RETURN(SWITCH);}
236throw {KEYWORD_RETURN(THROW);} /* CFA */
237try {KEYWORD_RETURN(TRY);} /* CFA */
[8c17ab0]238type {KEYWORD_RETURN(TYPE);} /* CFA */
[51b73452]239typedef {KEYWORD_RETURN(TYPEDEF);}
240typeof {KEYWORD_RETURN(TYPEOF);} /* GCC */
241__typeof {KEYWORD_RETURN(TYPEOF);} /* GCC */
242__typeof__ {KEYWORD_RETURN(TYPEOF);} /* GCC */
243union {KEYWORD_RETURN(UNION);}
244unsigned {KEYWORD_RETURN(UNSIGNED);}
245void {KEYWORD_RETURN(VOID);}
246volatile {KEYWORD_RETURN(VOLATILE);}
247__volatile {KEYWORD_RETURN(VOLATILE);} /* GCC */
248__volatile__ {KEYWORD_RETURN(VOLATILE);} /* GCC */
249while {KEYWORD_RETURN(WHILE);}
250
251 /* identifier */
252{identifier} {IDENTIFIER_RETURN();}
253{attr_identifier} {ATTRIBUTE_RETURN();}
[8c17ab0]254"`" {BEGIN QUOTED;}
255<QUOTED>{identifier} {IDENTIFIER_RETURN();}
256<QUOTED>"`" {BEGIN 0;}
[51b73452]257
258 /* numeric constants */
259"0" {NUMERIC_RETURN(ZERO);} /* CFA */
260"1" {NUMERIC_RETURN(ONE);} /* CFA */
261{decimal_constant} {NUMERIC_RETURN(INTEGERconstant);}
262{octal_constant} {NUMERIC_RETURN(INTEGERconstant);}
263{hex_constant} {NUMERIC_RETURN(INTEGERconstant);}
264{floating_constant} {NUMERIC_RETURN(FLOATINGconstant);}
265{hex_floating_constant} {NUMERIC_RETURN(FLOATINGconstant);}
266
267 /* character constant, allows empty value */
268"L"?[']([^'\\\n]|{escape_seq})*['] {RETURN_VAL(CHARACTERconstant);}
[8c17ab0]269 /* ' stop highlighting */
[51b73452]270
271 /* string constant */
272"L"?["]([^"\\\n]|{escape_seq})*["] {RETURN_VAL(STRINGliteral);}
[8c17ab0]273 /* " stop highlighting */
[51b73452]274
275 /* punctuation */
276"[" {ASCIIOP_RETURN();}
277"]" {ASCIIOP_RETURN();}
278"(" {ASCIIOP_RETURN();}
279")" {ASCIIOP_RETURN();}
280"{" {ASCIIOP_RETURN();}
281"}" {ASCIIOP_RETURN();}
282"," {ASCIIOP_RETURN();} /* also operator */
283":" {ASCIIOP_RETURN();}
284";" {ASCIIOP_RETURN();}
285"." {ASCIIOP_RETURN();} /* also operator */
286"..." {NAMEDOP_RETURN(ELLIPSIS);}
287
288 /* alternative ANSI99 brackets, "<:" & "<:<:" handled by preprocessor */
289"<:" {RETURN_VAL('[');}
290":>" {RETURN_VAL(']');}
291"<%" {RETURN_VAL('{');}
292"%>" {RETURN_VAL('}');}
293
294 /* operators */
295"!" {ASCIIOP_RETURN();}
296"+" {ASCIIOP_RETURN();}
297"-" {ASCIIOP_RETURN();}
298"*" {ASCIIOP_RETURN();}
299"/" {ASCIIOP_RETURN();}
300"%" {ASCIIOP_RETURN();}
301"^" {ASCIIOP_RETURN();}
302"~" {ASCIIOP_RETURN();}
303"&" {ASCIIOP_RETURN();}
304"|" {ASCIIOP_RETURN();}
305"<" {ASCIIOP_RETURN();}
306">" {ASCIIOP_RETURN();}
307"=" {ASCIIOP_RETURN();}
308"?" {ASCIIOP_RETURN();}
309
310"++" {NAMEDOP_RETURN(ICR);}
311"--" {NAMEDOP_RETURN(DECR);}
312"==" {NAMEDOP_RETURN(EQ);}
313"!=" {NAMEDOP_RETURN(NE);}
314"<<" {NAMEDOP_RETURN(LS);}
315">>" {NAMEDOP_RETURN(RS);}
316"<=" {NAMEDOP_RETURN(LE);}
317">=" {NAMEDOP_RETURN(GE);}
318"&&" {NAMEDOP_RETURN(ANDAND);}
319"||" {NAMEDOP_RETURN(OROR);}
320"->" {NAMEDOP_RETURN(ARROW);}
321"+=" {NAMEDOP_RETURN(PLUSassign);}
322"-=" {NAMEDOP_RETURN(MINUSassign);}
323"*=" {NAMEDOP_RETURN(MULTassign);}
324"/=" {NAMEDOP_RETURN(DIVassign);}
325"%=" {NAMEDOP_RETURN(MODassign);}
326"&=" {NAMEDOP_RETURN(ANDassign);}
327"|=" {NAMEDOP_RETURN(ORassign);}
328"^=" {NAMEDOP_RETURN(ERassign);}
329"<<=" {NAMEDOP_RETURN(LSassign);}
330">>=" {NAMEDOP_RETURN(RSassign);}
331
332 /* CFA, operator identifier */
333{op_unary}"?" {IDENTIFIER_RETURN();} /* unary */
334"?"({op_unary_pre_post}|"()"|"[?]") {IDENTIFIER_RETURN();}
335"?"{op_binary_over}"?" {IDENTIFIER_RETURN();} /* binary */
336 /*
337 This rule handles ambiguous cases with operator identifiers, e.g., "int *?*?()", where the
338 string "*?*?" can be lexed as "*"/"?*?" or "*?"/"*?". Since it is common practise to put
339 a unary operator juxtaposed to an identifier, e.g., "*i", users will be annoyed if they
340 cannot do this with respect to operator identifiers. Even with this special hack, there
341 are 5 general cases that cannot be handled. The first case is for the function-call
342 identifier "?()":
343
344 int * ?()(); // declaration: space required after '*'
345 * ?()(); // expression: space required after '*'
346
347 Without the space, the string "*?()" is ambiguous without N character look ahead; it
348 requires scanning ahead to determine if there is a '(', which is the start of an
349 argument/parameter list.
350
351 The 4 remaining cases occur in expressions:
352
353 i++?i:0; // space required before '?'
354 i--?i:0; // space required before '?'
355 i?++i:0; // space required after '?'
356 i?--i:0; // space required after '?'
357
358 In the first two cases, the string "i++?" is ambiguous, where this string can be lexed as
359 "i"/"++?" or "i++"/"?"; it requires scanning ahead to determine if there is a '(', which
360 is the start of an argument list. In the second two cases, the string "?++x" is
361 ambiguous, where this string can be lexed as "?++"/"x" or "?"/"++x"; it requires scanning
362 ahead to determine if there is a '(', which is the start of an argument list.
363 */
364{op_unary}"?"(({op_unary_pre_post}|"[?]")|({op_binary_over}"?")) {
365 // 1 or 2 character unary operator ?
366 int i = yytext[1] == '?' ? 1 : 2;
367 yyless( i ); /* put back characters up to first '?' */
368 if ( i > 1 ) {
369 NAMEDOP_RETURN( yytext[0] == '+' ? ICR : DECR );
370 } else {
371 ASCIIOP_RETURN();
372 } // if
373 }
374
375 /* unknown characters */
376. {printf("unknown character(s):\"%s\" on line %d\n", yytext, yylineno);}
377
378%%
379
380
[8c17ab0]381// Local Variables:
382// fill-column: 100
383// compile-command: "make"
384// End:
Note: See TracBrowser for help on using the repository browser.