source: src/Parser/lex.ll@ db82596

ADT aaron-thesis arm-eh ast-experimental cleanup-dtors ctor deferred_resn demangler enum forall-pointer-decay gc_noraii jacob/cs343-translation jenkins-sandbox memory new-ast new-ast-unique-expr new-env no_list persistent-indexer pthread-emulation qualifiedEnum resolv-new string with_gc
Last change on this file since db82596 was 56c3935, checked in by Peter A. Buhr <pabuhr@…>, 10 years ago

redo automake third attempt

  • Property mode set to 100644
File size: 14.7 KB
RevLine 
[b87a5ed]1/*
2 * Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
3 *
4 * The contents of this file are covered under the licence agreement in the
5 * file "LICENCE" distributed with Cforall.
[51b73452]6 *
7 * lex.l --
8 *
9 * Author : Peter A. Buhr
10 * Created On : Sat Sep 22 08:58:10 2001
11 * Last Modified By : Peter A. Buhr
[56c3935]12 * Last Modified On : Sun May 31 23:41:32 2015
13 * Update Count : 334
[51b73452]14 */
15
16%option yylineno
17
18%{
[b87a5ed]19// This lexer assumes the program has been preprocessed by cpp. Hence, all user level preprocessor directive
20// have been performed and removed from the source. The only exceptions are preprocessor directives passed to
21// the compiler (e.g., line-number directives) and C/C++ style comments, which are ignored.
[51b73452]22
[8c17ab0]23//**************************** Includes and Defines ****************************
[51b73452]24
25#include <string>
26
27#include "lex.h"
28#include "ParseNode.h"
[56c3935]29#include "parser.h" // YACC generated definitions based on C++ grammar
[51b73452]30
31char *yyfilename;
[b87a5ed]32std::string *strtext; // accumulate parts of character and string constant value
[51b73452]33
[b87a5ed]34#define WHITE_RETURN(x) // do nothing
[51b73452]35#define NEWLINE_RETURN() WHITE_RETURN('\n')
36#define RETURN_VAL(x) yylval.tok.str = new std::string(yytext); \
[b87a5ed]37 yylval.tok.loc.file = yyfilename; \
38 yylval.tok.loc.line = yylineno; \
39 return(x)
[3848e0e]40#define RETURN_STR(x) yylval.tok.str = strtext; \
[b87a5ed]41 yylval.tok.loc.file = yyfilename; \
42 yylval.tok.loc.line = yylineno; \
43 return(x)
[51b73452]44
[b87a5ed]45#define KEYWORD_RETURN(x) RETURN_VAL(x) // keyword
[51b73452]46#define IDENTIFIER_RETURN() RETURN_VAL((typedefTable.isIdentifier(yytext) ? IDENTIFIER : typedefTable.isTypedef(yytext) ? TYPEDEFname : TYPEGENname))
[8c17ab0]47//#define ATTRIBUTE_RETURN() RETURN_VAL((typedefTable.isIdentifier(yytext) ? ATTR_IDENTIFIER : typedefTable.isTypedef(yytext) ? ATTR_TYPEDEFname : ATTR_TYPEGENname))
48#define ATTRIBUTE_RETURN() RETURN_VAL(ATTR_IDENTIFIER)
[51b73452]49
[b87a5ed]50#define ASCIIOP_RETURN() RETURN_VAL((int)yytext[0]) // single character operator
51#define NAMEDOP_RETURN(x) RETURN_VAL(x) // multichar operator, with a name
[51b73452]52
[8c17ab0]53#define NUMERIC_RETURN(x) rm_underscore(); RETURN_VAL(x) // numeric constant
[51b73452]54
[3848e0e]55void rm_underscore() {
[b87a5ed]56 // remove underscores in numeric constant
57 int j = 0;
58 for ( int i = 0; yytext[i] != '\0'; i += 1 ) {
59 if ( yytext[i] != '_' ) {
60 yytext[j] = yytext[i];
61 j += 1;
62 } // if
63 } // for
64 yyleng = j;
65 yytext[yyleng] = '\0';
[51b73452]66}
67
68%}
69
70octal [0-7]
71nonzero [1-9]
72decimal [0-9]
73hex [0-9a-fA-F]
[3848e0e]74universal_char "\\"((u"_"?{hex_quad})|(U"_"?{hex_quad}{2}))
[51b73452]75
[b87a5ed]76 // identifier, GCC: $ in identifier
[51b73452]77identifier ([a-zA-Z_$]|{universal_char})([0-9a-zA-Z_$]|{universal_char})*
78
[b87a5ed]79 // quoted identifier
[8c17ab0]80quoted_identifier "`"{identifier}"`"
81
[b87a5ed]82 // attribute identifier, GCC: $ in identifier
[51b73452]83attr_identifier "@"{identifier}
84
[b87a5ed]85 // numeric constants, CFA: '_' in constant
[3848e0e]86hex_quad {hex}("_"?{hex}){3}
[51b73452]87integer_suffix "_"?(([uU][lL]?)|([uU]("ll"|"LL")?)|([lL][uU]?)|("ll"|"LL")[uU]?)
88
89octal_digits ({octal})|({octal}({octal}|"_")*{octal})
90octal_prefix "0""_"?
91octal_constant (("0")|({octal_prefix}{octal_digits})){integer_suffix}?
92
93nonzero_digits ({nonzero})|({nonzero}({decimal}|"_")*{decimal})
94decimal_constant {nonzero_digits}{integer_suffix}?
95
96hex_digits ({hex})|({hex}({hex}|"_")*{hex})
97hex_prefix "0"[xX]"_"?
98hex_constant {hex_prefix}{hex_digits}{integer_suffix}?
99
100decimal_digits ({decimal})|({decimal}({decimal}|"_")*{decimal})
101fractional_constant ({decimal_digits}?"."{decimal_digits})|({decimal_digits}".")
102exponent "_"?[eE]"_"?[+-]?{decimal_digits}
103floating_suffix "_"?[flFL]
104floating_constant (({fractional_constant}{exponent}?)|({decimal_digits}{exponent})){floating_suffix}?
105
106binary_exponent "_"?[pP]"_"?[+-]?{decimal_digits}
107hex_fractional_constant ({hex_digits}?"."{hex_digits})|({hex_digits}".")
108hex_floating_constant {hex_prefix}(({hex_fractional_constant}{binary_exponent})|({hex_digits}{binary_exponent})){floating_suffix}?
109
[b87a5ed]110 // character escape sequence, GCC: \e => esc character
[51b73452]111simple_escape "\\"[abefnrtv'"?\\]
[b87a5ed]112 // ' stop highlighting
[3848e0e]113octal_escape "\\"{octal}("_"?{octal}){0,2}
114hex_escape "\\""x""_"?{hex_digits}
[51b73452]115escape_seq {simple_escape}|{octal_escape}|{hex_escape}|{universal_char}
116
[b87a5ed]117 // display/white-space characters
[51b73452]118h_tab [\011]
119form_feed [\014]
120v_tab [\013]
121c_return [\015]
122h_white [ ]|{h_tab}
123
[b87a5ed]124 // operators
[51b73452]125op_unary_only "~"|"!"
126op_unary_binary "+"|"-"|"*"
127op_unary_pre_post "++"|"--"
128op_unary {op_unary_only}|{op_unary_binary}|{op_unary_pre_post}
129
130op_binary_only "/"|"%"|"^"|"&"|"|"|"<"|">"|"="|"=="|"!="|"<<"|">>"|"<="|">="|"+="|"-="|"*="|"/="|"%="|"&="|"|="|"^="|"<<="|">>="
131op_binary_over {op_unary_binary}|{op_binary_only}
132op_binary_not_over "?"|"->"|"&&"|"||"
133operator {op_unary_pre_post}|{op_binary_over}|{op_binary_not_over}
134
135%x COMMENT
[3848e0e]136%x BKQUOTE
137%x QUOTE
138%x STRING
[51b73452]139
140%%
[b87a5ed]141 /* line directives */
[51b73452]142^{h_white}*"#"{h_white}*[0-9]+{h_white}*["][^"\n]+["][^\n]*"\n" {
[8c17ab0]143 /* " stop highlighting */
[51b73452]144 char *end_num;
145 char *begin_string, *end_string;
146 char *filename;
147 long lineno, length;
148 lineno = strtol( yytext + 1, &end_num, 0 );
149 begin_string = strchr( end_num, '"' );
[3848e0e]150 if ( begin_string ) {
[b87a5ed]151 end_string = strchr( begin_string + 1, '"' );
152 if ( end_string ) {
153 length = end_string - begin_string - 1;
154 filename = new char[ length + 1 ];
155 memcpy( filename, begin_string + 1, length );
156 filename[ length ] = '\0';
157 //std::cout << "file " << filename << " line " << lineno << std::endl;
158 yylineno = lineno;
159 yyfilename = filename;
160 } // if
161 } // if
[51b73452]162}
163
[b87a5ed]164 /* ignore preprocessor directives (for now) */
[51b73452]165^{h_white}*"#"[^\n]*"\n" ;
166
[b87a5ed]167 /* ignore C style comments */
[3848e0e]168"/*" { BEGIN COMMENT; }
[51b73452]169<COMMENT>.|\n ;
[3848e0e]170<COMMENT>"*/" { BEGIN 0; }
[51b73452]171
[b87a5ed]172 /* ignore C++ style comments */
[51b73452]173"//"[^\n]*"\n" ;
174
[b87a5ed]175 /* ignore whitespace */
[3848e0e]176{h_white}+ { WHITE_RETURN(' '); }
177({v_tab}|{c_return}|{form_feed})+ { WHITE_RETURN(' '); }
178({h_white}|{v_tab}|{c_return}|{form_feed})*"\n" { NEWLINE_RETURN(); }
[51b73452]179
[b87a5ed]180 /* keywords */
181_Alignas { KEYWORD_RETURN(ALIGNAS); } // C11
182_Alignof { KEYWORD_RETURN(ALIGNOF); } // C11
183__alignof { KEYWORD_RETURN(ALIGNOF); } // GCC
184__alignof__ { KEYWORD_RETURN(ALIGNOF); } // GCC
185asm { KEYWORD_RETURN(ASM); }
186__asm { KEYWORD_RETURN(ASM); } // GCC
187__asm__ { KEYWORD_RETURN(ASM); } // GCC
188_Atomic { KEYWORD_RETURN(ATOMIC); } // C11
189__attribute { KEYWORD_RETURN(ATTRIBUTE); } // GCC
190__attribute__ { KEYWORD_RETURN(ATTRIBUTE); } // GCC
[3848e0e]191auto { KEYWORD_RETURN(AUTO); }
[b87a5ed]192_Bool { KEYWORD_RETURN(BOOL); } // C99
[3848e0e]193break { KEYWORD_RETURN(BREAK); }
194case { KEYWORD_RETURN(CASE); }
[b87a5ed]195catch { KEYWORD_RETURN(CATCH); } // CFA
[3848e0e]196char { KEYWORD_RETURN(CHAR); }
[b87a5ed]197choose { KEYWORD_RETURN(CHOOSE); } // CFA
198_Complex { KEYWORD_RETURN(COMPLEX); } // C99
199__complex { KEYWORD_RETURN(COMPLEX); } // GCC
200__complex__ { KEYWORD_RETURN(COMPLEX); } // GCC
[3848e0e]201const { KEYWORD_RETURN(CONST); }
[b87a5ed]202__const { KEYWORD_RETURN(CONST); } // GCC
203__const__ { KEYWORD_RETURN(CONST); } // GCC
204context { KEYWORD_RETURN(CONTEXT); } // CFA
[3848e0e]205continue { KEYWORD_RETURN(CONTINUE); }
206default { KEYWORD_RETURN(DEFAULT); }
[b87a5ed]207do { KEYWORD_RETURN(DO); }
[3848e0e]208double { KEYWORD_RETURN(DOUBLE); }
[b87a5ed]209dtype { KEYWORD_RETURN(DTYPE); } // CFA
[3848e0e]210else { KEYWORD_RETURN(ELSE); }
211enum { KEYWORD_RETURN(ENUM); }
[b87a5ed]212__extension__ { KEYWORD_RETURN(EXTENSION); } // GCC
[3848e0e]213extern { KEYWORD_RETURN(EXTERN); }
[b87a5ed]214fallthru { KEYWORD_RETURN(FALLTHRU); } // CFA
215finally { KEYWORD_RETURN(FINALLY); } // CFA
[3848e0e]216float { KEYWORD_RETURN(FLOAT); }
[b87a5ed]217__float128 { KEYWORD_RETURN(FLOAT); } // GCC
218for { KEYWORD_RETURN(FOR); }
219forall { KEYWORD_RETURN(FORALL); } // CFA
[3848e0e]220fortran { KEYWORD_RETURN(FORTRAN); }
[b87a5ed]221ftype { KEYWORD_RETURN(FTYPE); } // CFA
222_Generic { KEYWORD_RETURN(GENERIC); } // C11
[3848e0e]223goto { KEYWORD_RETURN(GOTO); }
[b87a5ed]224if { KEYWORD_RETURN(IF); }
225_Imaginary { KEYWORD_RETURN(IMAGINARY); } // C99
226__imag { KEYWORD_RETURN(IMAGINARY); } // GCC
227__imag__ { KEYWORD_RETURN(IMAGINARY); } // GCC
228inline { KEYWORD_RETURN(INLINE); } // C99
229__inline { KEYWORD_RETURN(INLINE); } // GCC
230__inline__ { KEYWORD_RETURN(INLINE); } // GCC
231int { KEYWORD_RETURN(INT); }
232__int128 { KEYWORD_RETURN(INT); } // GCC
233__label__ { KEYWORD_RETURN(LABEL); } // GCC
[3848e0e]234long { KEYWORD_RETURN(LONG); }
[b87a5ed]235lvalue { KEYWORD_RETURN(LVALUE); } // CFA
236_Noreturn { KEYWORD_RETURN(NORETURN); } // C11
[3848e0e]237register { KEYWORD_RETURN(REGISTER); }
[b87a5ed]238restrict { KEYWORD_RETURN(RESTRICT); } // C99
239__restrict { KEYWORD_RETURN(RESTRICT); } // GCC
240__restrict__ { KEYWORD_RETURN(RESTRICT); } // GCC
[3848e0e]241return { KEYWORD_RETURN(RETURN); }
242short { KEYWORD_RETURN(SHORT); }
243signed { KEYWORD_RETURN(SIGNED); }
[b87a5ed]244__signed { KEYWORD_RETURN(SIGNED); } // GCC
245__signed__ { KEYWORD_RETURN(SIGNED); } // GCC
[3848e0e]246sizeof { KEYWORD_RETURN(SIZEOF); }
247static { KEYWORD_RETURN(STATIC); }
[b87a5ed]248_Static_assert { KEYWORD_RETURN(STATICASSERT); } // C11
[3848e0e]249struct { KEYWORD_RETURN(STRUCT); }
250switch { KEYWORD_RETURN(SWITCH); }
[b87a5ed]251_Thread_local { KEYWORD_RETURN(THREADLOCAL); } // C11
252throw { KEYWORD_RETURN(THROW); } // CFA
253try { KEYWORD_RETURN(TRY); } // CFA
254type { KEYWORD_RETURN(TYPE); } // CFA
[3848e0e]255typedef { KEYWORD_RETURN(TYPEDEF); }
[b87a5ed]256typeof { KEYWORD_RETURN(TYPEOF); } // GCC
257__typeof { KEYWORD_RETURN(TYPEOF); } // GCC
258__typeof__ { KEYWORD_RETURN(TYPEOF); } // GCC
[3848e0e]259union { KEYWORD_RETURN(UNION); }
260unsigned { KEYWORD_RETURN(UNSIGNED); }
261void { KEYWORD_RETURN(VOID); }
262volatile { KEYWORD_RETURN(VOLATILE); }
[b87a5ed]263__volatile { KEYWORD_RETURN(VOLATILE); } // GCC
264__volatile__ { KEYWORD_RETURN(VOLATILE); } // GCC
[3848e0e]265while { KEYWORD_RETURN(WHILE); }
[51b73452]266
[b87a5ed]267 /* identifier */
268{identifier} { IDENTIFIER_RETURN(); }
269{attr_identifier} { ATTRIBUTE_RETURN(); }
[3848e0e]270"`" { BEGIN BKQUOTE; }
[b87a5ed]271<BKQUOTE>{identifier} { IDENTIFIER_RETURN(); }
272<BKQUOTE>"`" { BEGIN 0; }
[51b73452]273
[b87a5ed]274 /* numeric constants */
275"0" { NUMERIC_RETURN(ZERO); } // CFA
276"1" { NUMERIC_RETURN(ONE); } // CFA
[3848e0e]277{decimal_constant} { NUMERIC_RETURN(INTEGERconstant); }
278{octal_constant} { NUMERIC_RETURN(INTEGERconstant); }
279{hex_constant} { NUMERIC_RETURN(INTEGERconstant); }
280{floating_constant} { NUMERIC_RETURN(FLOATINGconstant); }
281{hex_floating_constant} { NUMERIC_RETURN(FLOATINGconstant); }
[51b73452]282
[b87a5ed]283 /* character constant, allows empty value */
[3848e0e]284"L"?"_"?['] { BEGIN QUOTE; rm_underscore(); strtext = new std::string; *strtext += std::string( yytext ); }
[b87a5ed]285<QUOTE>[^'\\\n]* { *strtext += std::string( yytext ); }
286<QUOTE>['\n] { BEGIN 0; *strtext += std::string( yytext); RETURN_STR(CHARACTERconstant); }
287 /* ' stop highlighting */
[51b73452]288
[b87a5ed]289 /* string constant */
[3848e0e]290"L"?"_"?["] { BEGIN STRING; rm_underscore(); strtext = new std::string; *strtext += std::string( yytext ); }
[b87a5ed]291<STRING>[^"\\\n]* { *strtext += std::string( yytext ); }
292<STRING>["\n] { BEGIN 0; *strtext += std::string( yytext); RETURN_STR(STRINGliteral); }
293 /* " stop highlighting */
[51b73452]294
[3848e0e]295<QUOTE,STRING>{escape_seq} { rm_underscore(); *strtext += std::string( yytext ); }
296<QUOTE,STRING>[\\] { *strtext += std::string( yytext ); } // unknown escape character
297
[b87a5ed]298 /* punctuation */
299"[" { ASCIIOP_RETURN(); }
300"]" { ASCIIOP_RETURN(); }
301"(" { ASCIIOP_RETURN(); }
302")" { ASCIIOP_RETURN(); }
303"{" { ASCIIOP_RETURN(); }
304"}" { ASCIIOP_RETURN(); }
305"," { ASCIIOP_RETURN(); } // also operator
306":" { ASCIIOP_RETURN(); }
307";" { ASCIIOP_RETURN(); }
308"." { ASCIIOP_RETURN(); } // also operator
[3848e0e]309"..." { NAMEDOP_RETURN(ELLIPSIS); }
310
[b87a5ed]311 /* alternative C99 brackets, "<:" & "<:<:" handled by preprocessor */
[3848e0e]312"<:" { RETURN_VAL('['); }
313":>" { RETURN_VAL(']'); }
314"<%" { RETURN_VAL('{'); }
315"%>" { RETURN_VAL('}'); }
[51b73452]316
[b87a5ed]317 /* operators */
318"!" { ASCIIOP_RETURN(); }
319"+" { ASCIIOP_RETURN(); }
320"-" { ASCIIOP_RETURN(); }
321"*" { ASCIIOP_RETURN(); }
322"/" { ASCIIOP_RETURN(); }
323"%" { ASCIIOP_RETURN(); }
324"^" { ASCIIOP_RETURN(); }
325"~" { ASCIIOP_RETURN(); }
326"&" { ASCIIOP_RETURN(); }
327"|" { ASCIIOP_RETURN(); }
328"<" { ASCIIOP_RETURN(); }
329">" { ASCIIOP_RETURN(); }
330"=" { ASCIIOP_RETURN(); }
331"?" { ASCIIOP_RETURN(); }
[3848e0e]332
333"++" { NAMEDOP_RETURN(ICR); }
334"--" { NAMEDOP_RETURN(DECR); }
335"==" { NAMEDOP_RETURN(EQ); }
336"!=" { NAMEDOP_RETURN(NE); }
337"<<" { NAMEDOP_RETURN(LS); }
338">>" { NAMEDOP_RETURN(RS); }
339"<=" { NAMEDOP_RETURN(LE); }
340">=" { NAMEDOP_RETURN(GE); }
341"&&" { NAMEDOP_RETURN(ANDAND); }
342"||" { NAMEDOP_RETURN(OROR); }
343"->" { NAMEDOP_RETURN(ARROW); }
344"+=" { NAMEDOP_RETURN(PLUSassign); }
345"-=" { NAMEDOP_RETURN(MINUSassign); }
346"*=" { NAMEDOP_RETURN(MULTassign); }
347"/=" { NAMEDOP_RETURN(DIVassign); }
348"%=" { NAMEDOP_RETURN(MODassign); }
349"&=" { NAMEDOP_RETURN(ANDassign); }
350"|=" { NAMEDOP_RETURN(ORassign); }
351"^=" { NAMEDOP_RETURN(ERassign); }
352"<<=" { NAMEDOP_RETURN(LSassign); }
353">>=" { NAMEDOP_RETURN(RSassign); }
[51b73452]354
[b87a5ed]355 /* CFA, operator identifier */
356{op_unary}"?" { IDENTIFIER_RETURN(); } // unary
[3848e0e]357"?"({op_unary_pre_post}|"()"|"[?]") { IDENTIFIER_RETURN(); }
[b87a5ed]358"?"{op_binary_over}"?" { IDENTIFIER_RETURN(); } // binary
[51b73452]359 /*
[b87a5ed]360 This rule handles ambiguous cases with operator identifiers, e.g., "int *?*?()", where the string "*?*?"
361 can be lexed as "*"/"?*?" or "*?"/"*?". Since it is common practise to put a unary operator juxtaposed
362 to an identifier, e.g., "*i", users will be annoyed if they cannot do this with respect to operator
363 identifiers. Even with this special hack, there are 5 general cases that cannot be handled. The first
364 case is for the function-call identifier "?()":
[51b73452]365
366 int * ?()(); // declaration: space required after '*'
367 * ?()(); // expression: space required after '*'
368
[b87a5ed]369 Without the space, the string "*?()" is ambiguous without N character look ahead; it requires scanning
370 ahead to determine if there is a '(', which is the start of an argument/parameter list.
[51b73452]371
372 The 4 remaining cases occur in expressions:
373
374 i++?i:0; // space required before '?'
375 i--?i:0; // space required before '?'
376 i?++i:0; // space required after '?'
377 i?--i:0; // space required after '?'
378
[b87a5ed]379 In the first two cases, the string "i++?" is ambiguous, where this string can be lexed as "i"/"++?" or
380 "i++"/"?"; it requires scanning ahead to determine if there is a '(', which is the start of an argument
381 list. In the second two cases, the string "?++x" is ambiguous, where this string can be lexed as
382 "?++"/"x" or "?"/"++x"; it requires scanning ahead to determine if there is a '(', which is the start of
383 an argument list.
[51b73452]384 */
385{op_unary}"?"(({op_unary_pre_post}|"[?]")|({op_binary_over}"?")) {
[b87a5ed]386 // 1 or 2 character unary operator ?
387 int i = yytext[1] == '?' ? 1 : 2;
388 yyless( i ); // put back characters up to first '?'
389 if ( i > 1 ) {
390 NAMEDOP_RETURN( yytext[0] == '+' ? ICR : DECR );
391 } else {
392 ASCIIOP_RETURN();
393 } // if
394}
395
396 /* unknown characters */
[3848e0e]397. { printf("unknown character(s):\"%s\" on line %d\n", yytext, yylineno); }
[51b73452]398
399%%
400
[b87a5ed]401// Local Variables: //
402// fill-column: 110 //
403// tab-width: 4 //
404// mode: c++ //
405// compile-command: "make install" //
406// End: //
Note: See TracBrowser for help on using the repository browser.