source: src/Parser/lex.ll@ dfee306

ADT aaron-thesis arm-eh ast-experimental cleanup-dtors ctor deferred_resn demangler enum forall-pointer-decay gc_noraii jacob/cs343-translation jenkins-sandbox memory new-ast new-ast-unique-expr new-env no_list persistent-indexer pthread-emulation qualifiedEnum resolv-new string with_gc
Last change on this file since dfee306 was de62360d, checked in by Peter A. Buhr <pabuhr@…>, 10 years ago

fix computed goto, fixed -std=, implicit typedefs for enum and aggregates, add _Noreturn _Thread_local

  • Property mode set to 100644
File size: 14.7 KB
RevLine 
[b87a5ed]1/*
2 * Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
3 *
4 * The contents of this file are covered under the licence agreement in the
5 * file "LICENCE" distributed with Cforall.
[51b73452]6 *
7 * lex.l --
8 *
9 * Author : Peter A. Buhr
10 * Created On : Sat Sep 22 08:58:10 2001
11 * Last Modified By : Peter A. Buhr
[de62360d]12 * Last Modified On : Fri Jun 19 11:10:14 2015
13 * Update Count : 392
[51b73452]14 */
15
16%option yylineno
[5f2f2d7]17%option nounput
[51b73452]18
19%{
[de62360d]20// This lexer assumes the program has been preprocessed by cpp. Hence, all user level preprocessor directive have been
21// performed and removed from the source. The only exceptions are preprocessor directives passed to the compiler (e.g.,
22// line-number directives) and C/C++ style comments, which are ignored.
[51b73452]23
[8c17ab0]24//**************************** Includes and Defines ****************************
[51b73452]25
26#include <string>
27
28#include "lex.h"
29#include "ParseNode.h"
[59db689]30#include "parser.h" // YACC generated definitions based on C++ grammar
[51b73452]31
32char *yyfilename;
[b87a5ed]33std::string *strtext; // accumulate parts of character and string constant value
[51b73452]34
[de62360d]35#define RETURN_LOCN(x) yylval.tok.loc.file = yyfilename; yylval.tok.loc.line = yylineno; return( x )
36#define RETURN_VAL(x) yylval.tok.str = new std::string( yytext ); RETURN_LOCN( x )
37#define RETURN_CHAR(x) yylval.tok.str = NULL; RETURN_LOCN( x )
38#define RETURN_STR(x) yylval.tok.str = strtext; RETURN_LOCN( x )
[5f2f2d7]39
[b87a5ed]40#define WHITE_RETURN(x) // do nothing
[de62360d]41#define NEWLINE_RETURN() WHITE_RETURN( '\n' )
42#define ASCIIOP_RETURN() RETURN_CHAR( (int)yytext[0] ) // single character operator
43#define NAMEDOP_RETURN(x) RETURN_VAL( x ) // multichar operator, with a name
44#define NUMERIC_RETURN(x) rm_underscore(); RETURN_VAL( x ) // numeric constant
45#define KEYWORD_RETURN(x) RETURN_CHAR( x ) // keyword
46#define IDENTIFIER_RETURN() RETURN_VAL( (typedefTable.isIdentifier( yytext ) ? IDENTIFIER : typedefTable.isTypedef( yytext ) ? TYPEDEFname : TYPEGENname ) )
47#define ATTRIBUTE_RETURN() RETURN_VAL( ATTR_IDENTIFIER )
[51b73452]48
[3848e0e]49void rm_underscore() {
[b87a5ed]50 // remove underscores in numeric constant
51 int j = 0;
52 for ( int i = 0; yytext[i] != '\0'; i += 1 ) {
53 if ( yytext[i] != '_' ) {
54 yytext[j] = yytext[i];
55 j += 1;
56 } // if
57 } // for
58 yyleng = j;
59 yytext[yyleng] = '\0';
[51b73452]60}
61
62%}
63
64octal [0-7]
65nonzero [1-9]
66decimal [0-9]
67hex [0-9a-fA-F]
[3848e0e]68universal_char "\\"((u"_"?{hex_quad})|(U"_"?{hex_quad}{2}))
[51b73452]69
[b87a5ed]70 // identifier, GCC: $ in identifier
[51b73452]71identifier ([a-zA-Z_$]|{universal_char})([0-9a-zA-Z_$]|{universal_char})*
72
[b87a5ed]73 // quoted identifier
[8c17ab0]74quoted_identifier "`"{identifier}"`"
75
[b87a5ed]76 // attribute identifier, GCC: $ in identifier
[51b73452]77attr_identifier "@"{identifier}
78
[b87a5ed]79 // numeric constants, CFA: '_' in constant
[3848e0e]80hex_quad {hex}("_"?{hex}){3}
[51b73452]81integer_suffix "_"?(([uU][lL]?)|([uU]("ll"|"LL")?)|([lL][uU]?)|("ll"|"LL")[uU]?)
82
83octal_digits ({octal})|({octal}({octal}|"_")*{octal})
84octal_prefix "0""_"?
85octal_constant (("0")|({octal_prefix}{octal_digits})){integer_suffix}?
86
87nonzero_digits ({nonzero})|({nonzero}({decimal}|"_")*{decimal})
88decimal_constant {nonzero_digits}{integer_suffix}?
89
90hex_digits ({hex})|({hex}({hex}|"_")*{hex})
91hex_prefix "0"[xX]"_"?
92hex_constant {hex_prefix}{hex_digits}{integer_suffix}?
93
94decimal_digits ({decimal})|({decimal}({decimal}|"_")*{decimal})
95fractional_constant ({decimal_digits}?"."{decimal_digits})|({decimal_digits}".")
96exponent "_"?[eE]"_"?[+-]?{decimal_digits}
97floating_suffix "_"?[flFL]
98floating_constant (({fractional_constant}{exponent}?)|({decimal_digits}{exponent})){floating_suffix}?
99
100binary_exponent "_"?[pP]"_"?[+-]?{decimal_digits}
101hex_fractional_constant ({hex_digits}?"."{hex_digits})|({hex_digits}".")
102hex_floating_constant {hex_prefix}(({hex_fractional_constant}{binary_exponent})|({hex_digits}{binary_exponent})){floating_suffix}?
103
[b87a5ed]104 // character escape sequence, GCC: \e => esc character
[51b73452]105simple_escape "\\"[abefnrtv'"?\\]
[b87a5ed]106 // ' stop highlighting
[3848e0e]107octal_escape "\\"{octal}("_"?{octal}){0,2}
108hex_escape "\\""x""_"?{hex_digits}
[51b73452]109escape_seq {simple_escape}|{octal_escape}|{hex_escape}|{universal_char}
[59db689]110cwide_prefix "L"|"U"|"u"
111swide_prefix {cwide_prefix}|"u8"
[51b73452]112
[b87a5ed]113 // display/white-space characters
[51b73452]114h_tab [\011]
115form_feed [\014]
116v_tab [\013]
117c_return [\015]
118h_white [ ]|{h_tab}
119
[b87a5ed]120 // operators
[51b73452]121op_unary_only "~"|"!"
122op_unary_binary "+"|"-"|"*"
123op_unary_pre_post "++"|"--"
124op_unary {op_unary_only}|{op_unary_binary}|{op_unary_pre_post}
125
126op_binary_only "/"|"%"|"^"|"&"|"|"|"<"|">"|"="|"=="|"!="|"<<"|">>"|"<="|">="|"+="|"-="|"*="|"/="|"%="|"&="|"|="|"^="|"<<="|">>="
127op_binary_over {op_unary_binary}|{op_binary_only}
128op_binary_not_over "?"|"->"|"&&"|"||"
129operator {op_unary_pre_post}|{op_binary_over}|{op_binary_not_over}
130
131%x COMMENT
[3848e0e]132%x BKQUOTE
133%x QUOTE
134%x STRING
[51b73452]135
136%%
[b87a5ed]137 /* line directives */
[51b73452]138^{h_white}*"#"{h_white}*[0-9]+{h_white}*["][^"\n]+["][^\n]*"\n" {
[8c17ab0]139 /* " stop highlighting */
[51b73452]140 char *end_num;
141 char *begin_string, *end_string;
142 char *filename;
143 long lineno, length;
144 lineno = strtol( yytext + 1, &end_num, 0 );
145 begin_string = strchr( end_num, '"' );
[3848e0e]146 if ( begin_string ) {
[b87a5ed]147 end_string = strchr( begin_string + 1, '"' );
148 if ( end_string ) {
149 length = end_string - begin_string - 1;
150 filename = new char[ length + 1 ];
151 memcpy( filename, begin_string + 1, length );
152 filename[ length ] = '\0';
153 //std::cout << "file " << filename << " line " << lineno << std::endl;
154 yylineno = lineno;
155 yyfilename = filename;
156 } // if
157 } // if
[51b73452]158}
159
[b87a5ed]160 /* ignore preprocessor directives (for now) */
[51b73452]161^{h_white}*"#"[^\n]*"\n" ;
162
[cd623a4]163 /* ignore C style comments (ALSO HANDLED BY CPP) */
[3848e0e]164"/*" { BEGIN COMMENT; }
[cd623a4]165<COMMENT>.|\n ;
166<COMMENT>"*/" { BEGIN 0; }
[51b73452]167
[cd623a4]168 /* ignore C++ style comments (ALSO HANDLED BY CPP) */
169"//"[^\n]*"\n" ;
[51b73452]170
[b87a5ed]171 /* ignore whitespace */
[3848e0e]172{h_white}+ { WHITE_RETURN(' '); }
173({v_tab}|{c_return}|{form_feed})+ { WHITE_RETURN(' '); }
174({h_white}|{v_tab}|{c_return}|{form_feed})*"\n" { NEWLINE_RETURN(); }
[51b73452]175
[b87a5ed]176 /* keywords */
177_Alignas { KEYWORD_RETURN(ALIGNAS); } // C11
178_Alignof { KEYWORD_RETURN(ALIGNOF); } // C11
179__alignof { KEYWORD_RETURN(ALIGNOF); } // GCC
180__alignof__ { KEYWORD_RETURN(ALIGNOF); } // GCC
181asm { KEYWORD_RETURN(ASM); }
182__asm { KEYWORD_RETURN(ASM); } // GCC
183__asm__ { KEYWORD_RETURN(ASM); } // GCC
184_Atomic { KEYWORD_RETURN(ATOMIC); } // C11
185__attribute { KEYWORD_RETURN(ATTRIBUTE); } // GCC
186__attribute__ { KEYWORD_RETURN(ATTRIBUTE); } // GCC
[3848e0e]187auto { KEYWORD_RETURN(AUTO); }
[b87a5ed]188_Bool { KEYWORD_RETURN(BOOL); } // C99
[3848e0e]189break { KEYWORD_RETURN(BREAK); }
190case { KEYWORD_RETURN(CASE); }
[b87a5ed]191catch { KEYWORD_RETURN(CATCH); } // CFA
[3848e0e]192char { KEYWORD_RETURN(CHAR); }
[b87a5ed]193choose { KEYWORD_RETURN(CHOOSE); } // CFA
194_Complex { KEYWORD_RETURN(COMPLEX); } // C99
195__complex { KEYWORD_RETURN(COMPLEX); } // GCC
196__complex__ { KEYWORD_RETURN(COMPLEX); } // GCC
[3848e0e]197const { KEYWORD_RETURN(CONST); }
[b87a5ed]198__const { KEYWORD_RETURN(CONST); } // GCC
199__const__ { KEYWORD_RETURN(CONST); } // GCC
200context { KEYWORD_RETURN(CONTEXT); } // CFA
[3848e0e]201continue { KEYWORD_RETURN(CONTINUE); }
202default { KEYWORD_RETURN(DEFAULT); }
[b87a5ed]203do { KEYWORD_RETURN(DO); }
[3848e0e]204double { KEYWORD_RETURN(DOUBLE); }
[b87a5ed]205dtype { KEYWORD_RETURN(DTYPE); } // CFA
[3848e0e]206else { KEYWORD_RETURN(ELSE); }
207enum { KEYWORD_RETURN(ENUM); }
[b87a5ed]208__extension__ { KEYWORD_RETURN(EXTENSION); } // GCC
[3848e0e]209extern { KEYWORD_RETURN(EXTERN); }
[b87a5ed]210fallthru { KEYWORD_RETURN(FALLTHRU); } // CFA
211finally { KEYWORD_RETURN(FINALLY); } // CFA
[3848e0e]212float { KEYWORD_RETURN(FLOAT); }
[b87a5ed]213__float128 { KEYWORD_RETURN(FLOAT); } // GCC
214for { KEYWORD_RETURN(FOR); }
215forall { KEYWORD_RETURN(FORALL); } // CFA
[3848e0e]216fortran { KEYWORD_RETURN(FORTRAN); }
[b87a5ed]217ftype { KEYWORD_RETURN(FTYPE); } // CFA
218_Generic { KEYWORD_RETURN(GENERIC); } // C11
[3848e0e]219goto { KEYWORD_RETURN(GOTO); }
[b87a5ed]220if { KEYWORD_RETURN(IF); }
221_Imaginary { KEYWORD_RETURN(IMAGINARY); } // C99
222__imag { KEYWORD_RETURN(IMAGINARY); } // GCC
223__imag__ { KEYWORD_RETURN(IMAGINARY); } // GCC
224inline { KEYWORD_RETURN(INLINE); } // C99
225__inline { KEYWORD_RETURN(INLINE); } // GCC
226__inline__ { KEYWORD_RETURN(INLINE); } // GCC
227int { KEYWORD_RETURN(INT); }
228__int128 { KEYWORD_RETURN(INT); } // GCC
229__label__ { KEYWORD_RETURN(LABEL); } // GCC
[3848e0e]230long { KEYWORD_RETURN(LONG); }
[b87a5ed]231lvalue { KEYWORD_RETURN(LVALUE); } // CFA
232_Noreturn { KEYWORD_RETURN(NORETURN); } // C11
[3848e0e]233register { KEYWORD_RETURN(REGISTER); }
[b87a5ed]234restrict { KEYWORD_RETURN(RESTRICT); } // C99
235__restrict { KEYWORD_RETURN(RESTRICT); } // GCC
236__restrict__ { KEYWORD_RETURN(RESTRICT); } // GCC
[3848e0e]237return { KEYWORD_RETURN(RETURN); }
238short { KEYWORD_RETURN(SHORT); }
239signed { KEYWORD_RETURN(SIGNED); }
[b87a5ed]240__signed { KEYWORD_RETURN(SIGNED); } // GCC
241__signed__ { KEYWORD_RETURN(SIGNED); } // GCC
[3848e0e]242sizeof { KEYWORD_RETURN(SIZEOF); }
243static { KEYWORD_RETURN(STATIC); }
[b87a5ed]244_Static_assert { KEYWORD_RETURN(STATICASSERT); } // C11
[3848e0e]245struct { KEYWORD_RETURN(STRUCT); }
246switch { KEYWORD_RETURN(SWITCH); }
[b87a5ed]247_Thread_local { KEYWORD_RETURN(THREADLOCAL); } // C11
248throw { KEYWORD_RETURN(THROW); } // CFA
249try { KEYWORD_RETURN(TRY); } // CFA
250type { KEYWORD_RETURN(TYPE); } // CFA
[3848e0e]251typedef { KEYWORD_RETURN(TYPEDEF); }
[b87a5ed]252typeof { KEYWORD_RETURN(TYPEOF); } // GCC
253__typeof { KEYWORD_RETURN(TYPEOF); } // GCC
254__typeof__ { KEYWORD_RETURN(TYPEOF); } // GCC
[3848e0e]255union { KEYWORD_RETURN(UNION); }
256unsigned { KEYWORD_RETURN(UNSIGNED); }
257void { KEYWORD_RETURN(VOID); }
258volatile { KEYWORD_RETURN(VOLATILE); }
[b87a5ed]259__volatile { KEYWORD_RETURN(VOLATILE); } // GCC
260__volatile__ { KEYWORD_RETURN(VOLATILE); } // GCC
[3848e0e]261while { KEYWORD_RETURN(WHILE); }
[51b73452]262
[b87a5ed]263 /* identifier */
264{identifier} { IDENTIFIER_RETURN(); }
265{attr_identifier} { ATTRIBUTE_RETURN(); }
[3848e0e]266"`" { BEGIN BKQUOTE; }
[b87a5ed]267<BKQUOTE>{identifier} { IDENTIFIER_RETURN(); }
268<BKQUOTE>"`" { BEGIN 0; }
[51b73452]269
[b87a5ed]270 /* numeric constants */
271"0" { NUMERIC_RETURN(ZERO); } // CFA
272"1" { NUMERIC_RETURN(ONE); } // CFA
[59db689]273{decimal_constant} { NUMERIC_RETURN(INTEGERconstant); }
274{octal_constant} { NUMERIC_RETURN(INTEGERconstant); }
275{hex_constant} { NUMERIC_RETURN(INTEGERconstant); }
[3848e0e]276{floating_constant} { NUMERIC_RETURN(FLOATINGconstant); }
277{hex_floating_constant} { NUMERIC_RETURN(FLOATINGconstant); }
[51b73452]278
[b87a5ed]279 /* character constant, allows empty value */
[59db689]280({cwide_prefix}[_]?)?['] { BEGIN QUOTE; rm_underscore(); strtext = new std::string; *strtext += std::string( yytext ); }
[b87a5ed]281<QUOTE>[^'\\\n]* { *strtext += std::string( yytext ); }
282<QUOTE>['\n] { BEGIN 0; *strtext += std::string( yytext); RETURN_STR(CHARACTERconstant); }
283 /* ' stop highlighting */
[51b73452]284
[b87a5ed]285 /* string constant */
[59db689]286({swide_prefix}[_]?)?["] { BEGIN STRING; rm_underscore(); strtext = new std::string; *strtext += std::string( yytext ); }
[b87a5ed]287<STRING>[^"\\\n]* { *strtext += std::string( yytext ); }
[59db689]288<STRING>["\n] { BEGIN 0; *strtext += std::string( yytext ); RETURN_STR(STRINGliteral); }
[b87a5ed]289 /* " stop highlighting */
[51b73452]290
[59db689]291 /* common character/string constant */
[3848e0e]292<QUOTE,STRING>{escape_seq} { rm_underscore(); *strtext += std::string( yytext ); }
[cd623a4]293<QUOTE,STRING>"\\"{h_white}*"\n" {} // continuation (ALSO HANDLED BY CPP)
[59db689]294<QUOTE,STRING>"\\" { *strtext += std::string( yytext ); } // unknown escape character
[3848e0e]295
[b87a5ed]296 /* punctuation */
297"[" { ASCIIOP_RETURN(); }
298"]" { ASCIIOP_RETURN(); }
299"(" { ASCIIOP_RETURN(); }
300")" { ASCIIOP_RETURN(); }
301"{" { ASCIIOP_RETURN(); }
302"}" { ASCIIOP_RETURN(); }
303"," { ASCIIOP_RETURN(); } // also operator
304":" { ASCIIOP_RETURN(); }
305";" { ASCIIOP_RETURN(); }
306"." { ASCIIOP_RETURN(); } // also operator
[3848e0e]307"..." { NAMEDOP_RETURN(ELLIPSIS); }
308
[b87a5ed]309 /* alternative C99 brackets, "<:" & "<:<:" handled by preprocessor */
[3848e0e]310"<:" { RETURN_VAL('['); }
311":>" { RETURN_VAL(']'); }
312"<%" { RETURN_VAL('{'); }
313"%>" { RETURN_VAL('}'); }
[51b73452]314
[b87a5ed]315 /* operators */
316"!" { ASCIIOP_RETURN(); }
317"+" { ASCIIOP_RETURN(); }
318"-" { ASCIIOP_RETURN(); }
319"*" { ASCIIOP_RETURN(); }
320"/" { ASCIIOP_RETURN(); }
321"%" { ASCIIOP_RETURN(); }
322"^" { ASCIIOP_RETURN(); }
323"~" { ASCIIOP_RETURN(); }
324"&" { ASCIIOP_RETURN(); }
325"|" { ASCIIOP_RETURN(); }
326"<" { ASCIIOP_RETURN(); }
327">" { ASCIIOP_RETURN(); }
328"=" { ASCIIOP_RETURN(); }
329"?" { ASCIIOP_RETURN(); }
[3848e0e]330
331"++" { NAMEDOP_RETURN(ICR); }
332"--" { NAMEDOP_RETURN(DECR); }
333"==" { NAMEDOP_RETURN(EQ); }
334"!=" { NAMEDOP_RETURN(NE); }
335"<<" { NAMEDOP_RETURN(LS); }
336">>" { NAMEDOP_RETURN(RS); }
337"<=" { NAMEDOP_RETURN(LE); }
338">=" { NAMEDOP_RETURN(GE); }
339"&&" { NAMEDOP_RETURN(ANDAND); }
340"||" { NAMEDOP_RETURN(OROR); }
341"->" { NAMEDOP_RETURN(ARROW); }
342"+=" { NAMEDOP_RETURN(PLUSassign); }
343"-=" { NAMEDOP_RETURN(MINUSassign); }
344"*=" { NAMEDOP_RETURN(MULTassign); }
345"/=" { NAMEDOP_RETURN(DIVassign); }
346"%=" { NAMEDOP_RETURN(MODassign); }
347"&=" { NAMEDOP_RETURN(ANDassign); }
348"|=" { NAMEDOP_RETURN(ORassign); }
349"^=" { NAMEDOP_RETURN(ERassign); }
350"<<=" { NAMEDOP_RETURN(LSassign); }
351">>=" { NAMEDOP_RETURN(RSassign); }
[51b73452]352
[b87a5ed]353 /* CFA, operator identifier */
354{op_unary}"?" { IDENTIFIER_RETURN(); } // unary
[a61fea9a]355"?"({op_unary_pre_post}|"()"|"[?]"|"{}") { IDENTIFIER_RETURN(); }
[b87a5ed]356"?"{op_binary_over}"?" { IDENTIFIER_RETURN(); } // binary
[51b73452]357 /*
[b87a5ed]358 This rule handles ambiguous cases with operator identifiers, e.g., "int *?*?()", where the string "*?*?"
359 can be lexed as "*"/"?*?" or "*?"/"*?". Since it is common practise to put a unary operator juxtaposed
360 to an identifier, e.g., "*i", users will be annoyed if they cannot do this with respect to operator
361 identifiers. Even with this special hack, there are 5 general cases that cannot be handled. The first
362 case is for the function-call identifier "?()":
[51b73452]363
364 int * ?()(); // declaration: space required after '*'
365 * ?()(); // expression: space required after '*'
366
[b87a5ed]367 Without the space, the string "*?()" is ambiguous without N character look ahead; it requires scanning
368 ahead to determine if there is a '(', which is the start of an argument/parameter list.
[51b73452]369
370 The 4 remaining cases occur in expressions:
371
372 i++?i:0; // space required before '?'
373 i--?i:0; // space required before '?'
374 i?++i:0; // space required after '?'
375 i?--i:0; // space required after '?'
376
[b87a5ed]377 In the first two cases, the string "i++?" is ambiguous, where this string can be lexed as "i"/"++?" or
378 "i++"/"?"; it requires scanning ahead to determine if there is a '(', which is the start of an argument
379 list. In the second two cases, the string "?++x" is ambiguous, where this string can be lexed as
380 "?++"/"x" or "?"/"++x"; it requires scanning ahead to determine if there is a '(', which is the start of
381 an argument list.
[51b73452]382 */
383{op_unary}"?"(({op_unary_pre_post}|"[?]")|({op_binary_over}"?")) {
[b87a5ed]384 // 1 or 2 character unary operator ?
385 int i = yytext[1] == '?' ? 1 : 2;
386 yyless( i ); // put back characters up to first '?'
387 if ( i > 1 ) {
388 NAMEDOP_RETURN( yytext[0] == '+' ? ICR : DECR );
389 } else {
390 ASCIIOP_RETURN();
391 } // if
392}
393
394 /* unknown characters */
[3848e0e]395. { printf("unknown character(s):\"%s\" on line %d\n", yytext, yylineno); }
[51b73452]396
397%%
398
[b87a5ed]399// Local Variables: //
400// mode: c++ //
[de62360d]401// tab-width: 4 //
[b87a5ed]402// compile-command: "make install" //
403// End: //
Note: See TracBrowser for help on using the repository browser.