source: src/Parser/lex.ll@ 9bd6105

ADT aaron-thesis arm-eh ast-experimental cleanup-dtors deferred_resn demangler enum forall-pointer-decay jacob/cs343-translation jenkins-sandbox new-ast new-ast-unique-expr new-env no_list persistent-indexer pthread-emulation qualifiedEnum resolv-new with_gc
Last change on this file since 9bd6105 was ec55ed5, checked in by Rob Schluntz <rschlunt@…>, 8 years ago

Merge branch 'master' of plg.uwaterloo.ca:/u/cforall/software/cfa/cfa-cc

  • Property mode set to 100644
File size: 16.7 KB
Line 
1/*
2 * Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
3 *
4 * The contents of this file are covered under the licence agreement in the
5 * file "LICENCE" distributed with Cforall.
6 *
7 * lex.ll --
8 *
9 * Author : Peter A. Buhr
10 * Created On : Sat Sep 22 08:58:10 2001
11 * Last Modified By : Peter A. Buhr
12 * Last Modified On : Tue Aug 22 22:43:39 2017
13 * Update Count : 558
14 */
15
16%option yylineno
17%option noyywrap
18%option nounput
19
20%{
21// This lexer assumes the program has been preprocessed by cpp. Hence, all user level preprocessor directive have been
22// performed and removed from the source. The only exceptions are preprocessor directives passed to the compiler (e.g.,
23// line-number directives) and C/C++ style comments, which are ignored.
24
25//**************************** Includes and Defines ****************************
26
27#include <string>
28#include <cstdio> // FILENAME_MAX
29
30#include "ParseNode.h"
31#include "TypedefTable.h"
32
33char *yyfilename;
34std::string *strtext; // accumulate parts of character and string constant value
35
36#define RETURN_LOCN(x) yylval.tok.loc.file = yyfilename; yylval.tok.loc.line = yylineno; return( x )
37#define RETURN_VAL(x) yylval.tok.str = new std::string( yytext ); RETURN_LOCN( x )
38#define RETURN_CHAR(x) yylval.tok.str = nullptr; RETURN_LOCN( x )
39#define RETURN_STR(x) yylval.tok.str = strtext; RETURN_LOCN( x )
40
41#define WHITE_RETURN(x) // do nothing
42#define NEWLINE_RETURN() WHITE_RETURN( '\n' )
43#define ASCIIOP_RETURN() RETURN_CHAR( (int)yytext[0] ) // single character operator
44#define NAMEDOP_RETURN(x) RETURN_CHAR( x ) // multichar operator, with a name
45#define NUMERIC_RETURN(x) rm_underscore(); RETURN_VAL( x ) // numeric constant
46#define KEYWORD_RETURN(x) RETURN_CHAR( x ) // keyword
47#define QKEYWORD_RETURN(x) typedefTable.isKind( yytext ); RETURN_VAL(x); // quasi-keyword
48#define IDENTIFIER_RETURN() RETURN_VAL( typedefTable.isKind( yytext ) )
49#define ATTRIBUTE_RETURN() RETURN_VAL( ATTR_IDENTIFIER )
50
51void rm_underscore() {
52 // Remove underscores in numeric constant by copying the non-underscore characters to the front of the string.
53 yyleng = 0;
54 for ( int i = 0; yytext[i] != '\0'; i += 1 ) {
55 if ( yytext[i] != '_' ) {
56 yytext[yyleng] = yytext[i];
57 yyleng += 1;
58 } // if
59 } // for
60 yytext[yyleng] = '\0';
61}
62
63// Stop warning due to incorrectly generated flex code.
64#pragma GCC diagnostic ignored "-Wsign-compare"
65%}
66
67octal [0-7]
68nonzero [1-9]
69decimal [0-9]
70hex [0-9a-fA-F]
71universal_char "\\"((u"_"?{hex_quad})|(U"_"?{hex_quad}{2}))
72
73 // identifier, GCC: $ in identifier
74identifier ([a-zA-Z_$]|{universal_char})([0-9a-zA-Z_$]|{universal_char})*
75
76 // attribute identifier, GCC: $ in identifier
77attr_identifier "@"{identifier}
78
79 // numeric constants, CFA: '_' in constant
80hex_quad {hex}("_"?{hex}){3}
81integer_suffix "_"?(([uU](("ll"|"LL"|[lL])[iI]|[iI]?("ll"|"LL"|[lL])?))|([iI](("ll"|"LL"|[lL])[uU]|[uU]?("ll"|"LL"|[lL])?))|(("ll"|"LL"|[lL])([iI][uU]|[uU]?[iI]?)))
82
83octal_digits ({octal})|({octal}({octal}|"_")*{octal})
84octal_prefix "0""_"?
85octal_constant (("0")|({octal_prefix}{octal_digits})){integer_suffix}?
86
87nonzero_digits ({nonzero})|({nonzero}({decimal}|"_")*{decimal})
88decimal_constant {nonzero_digits}{integer_suffix}?
89
90hex_digits ({hex})|({hex}({hex}|"_")*{hex})
91hex_prefix "0"[xX]"_"?
92hex_constant {hex_prefix}{hex_digits}{integer_suffix}?
93
94decimal_digits ({decimal})|({decimal}({decimal}|"_")*{decimal})
95real_decimal {decimal_digits}"."{exponent}?{floating_suffix}?
96real_fraction "."{decimal_digits}{exponent}?{floating_suffix}?
97real_constant {decimal_digits}{real_fraction}
98exponent "_"?[eE]"_"?[+-]?{decimal_digits}
99 // GCC: D (double) and iI (imaginary) suffixes, and DL (long double)
100floating_suffix "_"?([fFdDlL][iI]?|[iI][lLfFdD]?|"DL")
101floating_constant (({real_constant}{exponent}?)|({decimal_digits}{exponent})){floating_suffix}?
102
103binary_exponent "_"?[pP]"_"?[+-]?{decimal_digits}
104hex_fractional_constant ({hex_digits}?"."{hex_digits})|({hex_digits}".")
105hex_floating_constant {hex_prefix}(({hex_fractional_constant}{binary_exponent})|({hex_digits}{binary_exponent})){floating_suffix}?
106
107 // character escape sequence, GCC: \e => esc character
108simple_escape "\\"[abefnrtv'"?\\]
109 // ' stop highlighting
110octal_escape "\\"{octal}("_"?{octal}){0,2}
111hex_escape "\\""x""_"?{hex_digits}
112escape_seq {simple_escape}|{octal_escape}|{hex_escape}|{universal_char}
113cwide_prefix "L"|"U"|"u"
114swide_prefix {cwide_prefix}|"u8"
115
116 // display/white-space characters
117h_tab [\011]
118form_feed [\014]
119v_tab [\013]
120c_return [\015]
121h_white [ ]|{h_tab}
122
123 // overloadable operators
124op_unary_only "~"|"!"
125op_unary_binary "+"|"-"|"*"
126op_unary_pre_post "++"|"--"
127op_unary {op_unary_only}|{op_unary_binary}|{op_unary_pre_post}
128
129op_binary_only "/"|"%"|"\\"|"^"|"&"|"|"|"<"|">"|"="|"=="|"!="|"<<"|">>"|"<="|">="|"+="|"-="|"*="|"/="|"%="|"\\="|"&="|"|="|"^="|"<<="|">>="
130op_binary_over {op_unary_binary}|{op_binary_only}
131 // op_binary_not_over "?"|"->"|"."|"&&"|"||"|"@="
132 // operator {op_unary_pre_post}|{op_binary_over}|{op_binary_not_over}
133
134%x COMMENT
135%x BKQUOTE
136%x QUOTE
137%x STRING
138
139%%
140 /* line directives */
141^{h_white}*"#"{h_white}*[0-9]+{h_white}*["][^"\n]+["].*"\n" {
142 /* " stop highlighting */
143 static char filename[FILENAME_MAX]; // temporarily store current source-file name
144 char *end_num;
145 char *begin_string, *end_string;
146 long lineno, length;
147 lineno = strtol( yytext + 1, &end_num, 0 );
148 begin_string = strchr( end_num, '"' );
149 if ( begin_string ) { // file name ?
150 end_string = strchr( begin_string + 1, '"' ); // look for ending delimiter
151 assert( end_string ); // closing quote ?
152 length = end_string - begin_string - 1; // file-name length without quotes or sentinel
153 assert( length < FILENAME_MAX ); // room for sentinel ?
154 memcpy( &filename, begin_string + 1, length ); // copy file name from yytext
155 filename[ length ] = '\0'; // terminate string with sentinel
156 //std::cout << "file " << filename << " line " << lineno << std::endl;
157 yylineno = lineno;
158 yyfilename = filename;
159 } // if
160}
161
162 /* ignore preprocessor directives (for now) */
163^{h_white}*"#"[^\n]*"\n" ;
164
165 /* ignore C style comments (ALSO HANDLED BY CPP) */
166"/*" { BEGIN COMMENT; }
167<COMMENT>.|\n ;
168<COMMENT>"*/" { BEGIN 0; }
169
170 /* ignore C++ style comments (ALSO HANDLED BY CPP) */
171"//"[^\n]*"\n" ;
172
173 /* ignore whitespace */
174{h_white}+ { WHITE_RETURN(' '); }
175({v_tab}|{c_return}|{form_feed})+ { WHITE_RETURN(' '); }
176({h_white}|{v_tab}|{c_return}|{form_feed})*"\n" { NEWLINE_RETURN(); }
177
178 /* keywords */
179_Alignas { KEYWORD_RETURN(ALIGNAS); } // C11
180_Alignof { KEYWORD_RETURN(ALIGNOF); } // C11
181__alignof { KEYWORD_RETURN(ALIGNOF); } // GCC
182__alignof__ { KEYWORD_RETURN(ALIGNOF); } // GCC
183asm { KEYWORD_RETURN(ASM); }
184__asm { KEYWORD_RETURN(ASM); } // GCC
185__asm__ { KEYWORD_RETURN(ASM); } // GCC
186_At { KEYWORD_RETURN(AT); } // CFA
187_Atomic { KEYWORD_RETURN(ATOMIC); } // C11
188__attribute { KEYWORD_RETURN(ATTRIBUTE); } // GCC
189__attribute__ { KEYWORD_RETURN(ATTRIBUTE); } // GCC
190auto { KEYWORD_RETURN(AUTO); }
191_Bool { KEYWORD_RETURN(BOOL); } // C99
192break { KEYWORD_RETURN(BREAK); }
193case { KEYWORD_RETURN(CASE); }
194catch { KEYWORD_RETURN(CATCH); } // CFA
195catchResume { KEYWORD_RETURN(CATCHRESUME); } // CFA
196char { KEYWORD_RETURN(CHAR); }
197choose { KEYWORD_RETURN(CHOOSE); } // CFA
198_Complex { KEYWORD_RETURN(COMPLEX); } // C99
199__complex { KEYWORD_RETURN(COMPLEX); } // GCC
200__complex__ { KEYWORD_RETURN(COMPLEX); } // GCC
201const { KEYWORD_RETURN(CONST); }
202__const { KEYWORD_RETURN(CONST); } // GCC
203__const__ { KEYWORD_RETURN(CONST); } // GCC
204continue { KEYWORD_RETURN(CONTINUE); }
205coroutine { KEYWORD_RETURN(COROUTINE); } // CFA
206default { KEYWORD_RETURN(DEFAULT); }
207disable { KEYWORD_RETURN(DISABLE); } // CFA
208do { KEYWORD_RETURN(DO); }
209double { KEYWORD_RETURN(DOUBLE); }
210dtype { KEYWORD_RETURN(DTYPE); } // CFA
211else { KEYWORD_RETURN(ELSE); }
212enable { KEYWORD_RETURN(ENABLE); } // CFA
213enum { KEYWORD_RETURN(ENUM); }
214__extension__ { KEYWORD_RETURN(EXTENSION); } // GCC
215extern { KEYWORD_RETURN(EXTERN); }
216fallthrough { KEYWORD_RETURN(FALLTHRU); } // CFA
217fallthru { KEYWORD_RETURN(FALLTHRU); } // CFA
218finally { KEYWORD_RETURN(FINALLY); } // CFA
219float { KEYWORD_RETURN(FLOAT); }
220__float128 { KEYWORD_RETURN(FLOAT); } // GCC
221for { KEYWORD_RETURN(FOR); }
222forall { KEYWORD_RETURN(FORALL); } // CFA
223fortran { KEYWORD_RETURN(FORTRAN); }
224ftype { KEYWORD_RETURN(FTYPE); } // CFA
225_Generic { KEYWORD_RETURN(GENERIC); } // C11
226goto { KEYWORD_RETURN(GOTO); }
227if { KEYWORD_RETURN(IF); }
228_Imaginary { KEYWORD_RETURN(IMAGINARY); } // C99
229__imag { KEYWORD_RETURN(IMAGINARY); } // GCC
230__imag__ { KEYWORD_RETURN(IMAGINARY); } // GCC
231inline { KEYWORD_RETURN(INLINE); } // C99
232__inline { KEYWORD_RETURN(INLINE); } // GCC
233__inline__ { KEYWORD_RETURN(INLINE); } // GCC
234int { KEYWORD_RETURN(INT); }
235__int128 { KEYWORD_RETURN(INT); } // GCC
236__int128_t { KEYWORD_RETURN(INT); } // GCC
237__label__ { KEYWORD_RETURN(LABEL); } // GCC
238long { KEYWORD_RETURN(LONG); }
239monitor { KEYWORD_RETURN(MONITOR); } // CFA
240mutex { KEYWORD_RETURN(MUTEX); } // CFA
241_Noreturn { KEYWORD_RETURN(NORETURN); } // C11
242__builtin_offsetof { KEYWORD_RETURN(OFFSETOF); } // GCC
243one_t { NUMERIC_RETURN(ONE_T); } // CFA
244otype { KEYWORD_RETURN(OTYPE); } // CFA
245register { KEYWORD_RETURN(REGISTER); }
246restrict { KEYWORD_RETURN(RESTRICT); } // C99
247__restrict { KEYWORD_RETURN(RESTRICT); } // GCC
248__restrict__ { KEYWORD_RETURN(RESTRICT); } // GCC
249return { KEYWORD_RETURN(RETURN); }
250short { KEYWORD_RETURN(SHORT); }
251signed { KEYWORD_RETURN(SIGNED); }
252__signed { KEYWORD_RETURN(SIGNED); } // GCC
253__signed__ { KEYWORD_RETURN(SIGNED); } // GCC
254sizeof { KEYWORD_RETURN(SIZEOF); }
255static { KEYWORD_RETURN(STATIC); }
256_Static_assert { KEYWORD_RETURN(STATICASSERT); } // C11
257struct { KEYWORD_RETURN(STRUCT); }
258switch { KEYWORD_RETURN(SWITCH); }
259thread { KEYWORD_RETURN(THREAD); } // C11
260_Thread_local { KEYWORD_RETURN(THREADLOCAL); } // C11
261throw { KEYWORD_RETURN(THROW); } // CFA
262throwResume { KEYWORD_RETURN(THROWRESUME); } // CFA
263timeout { QKEYWORD_RETURN(TIMEOUT); } // CFA
264trait { KEYWORD_RETURN(TRAIT); } // CFA
265try { KEYWORD_RETURN(TRY); } // CFA
266ttype { KEYWORD_RETURN(TTYPE); } // CFA
267typedef { KEYWORD_RETURN(TYPEDEF); }
268typeof { KEYWORD_RETURN(TYPEOF); } // GCC
269__typeof { KEYWORD_RETURN(TYPEOF); } // GCC
270__typeof__ { KEYWORD_RETURN(TYPEOF); } // GCC
271__uint128_t { KEYWORD_RETURN(INT); } // GCC
272union { KEYWORD_RETURN(UNION); }
273unsigned { KEYWORD_RETURN(UNSIGNED); }
274__builtin_va_list { KEYWORD_RETURN(VALIST); } // GCC
275virtual { KEYWORD_RETURN(VIRTUAL); } // CFA
276void { KEYWORD_RETURN(VOID); }
277volatile { KEYWORD_RETURN(VOLATILE); }
278__volatile { KEYWORD_RETURN(VOLATILE); } // GCC
279__volatile__ { KEYWORD_RETURN(VOLATILE); } // GCC
280waitfor { KEYWORD_RETURN(WAITFOR); }
281or { QKEYWORD_RETURN(WOR); } // CFA
282when { KEYWORD_RETURN(WHEN); }
283while { KEYWORD_RETURN(WHILE); }
284with { KEYWORD_RETURN(WITH); } // CFA
285zero_t { NUMERIC_RETURN(ZERO_T); } // CFA
286
287 /* identifier */
288{identifier} { IDENTIFIER_RETURN(); }
289{attr_identifier} { ATTRIBUTE_RETURN(); }
290"`" { BEGIN BKQUOTE; }
291<BKQUOTE>{identifier} { IDENTIFIER_RETURN(); }
292<BKQUOTE>"`" { BEGIN 0; }
293
294 /* numeric constants */
295{decimal_constant} { NUMERIC_RETURN(INTEGERconstant); }
296{octal_constant} { NUMERIC_RETURN(INTEGERconstant); }
297{hex_constant} { NUMERIC_RETURN(INTEGERconstant); }
298{real_decimal} { NUMERIC_RETURN(REALDECIMALconstant); } // must appear before floating_constant
299{real_fraction} { NUMERIC_RETURN(REALFRACTIONconstant); } // must appear before floating_constant
300{floating_constant} { NUMERIC_RETURN(FLOATINGconstant); }
301{hex_floating_constant} { NUMERIC_RETURN(FLOATINGconstant); }
302
303 /* character constant, allows empty value */
304({cwide_prefix}[_]?)?['] { BEGIN QUOTE; rm_underscore(); strtext = new std::string( yytext, yyleng ); }
305<QUOTE>[^'\\\n]* { strtext->append( yytext, yyleng ); }
306<QUOTE>['\n] { BEGIN 0; strtext->append( yytext, yyleng ); RETURN_STR(CHARACTERconstant); }
307 /* ' stop highlighting */
308
309 /* string constant */
310({swide_prefix}[_]?)?["] { BEGIN STRING; rm_underscore(); strtext = new std::string( yytext, yyleng ); }
311<STRING>[^"\\\n]* { strtext->append( yytext, yyleng ); }
312<STRING>["\n] { BEGIN 0; strtext->append( yytext, yyleng ); RETURN_STR(STRINGliteral); }
313 /* " stop highlighting */
314
315 /* common character/string constant */
316<QUOTE,STRING>{escape_seq} { rm_underscore(); strtext->append( yytext, yyleng ); }
317<QUOTE,STRING>"\\"{h_white}*"\n" {} // continuation (ALSO HANDLED BY CPP)
318<QUOTE,STRING>"\\" { strtext->append( yytext, yyleng ); } // unknown escape character
319
320 /* punctuation */
321"@" { ASCIIOP_RETURN(); }
322"[" { ASCIIOP_RETURN(); }
323"]" { ASCIIOP_RETURN(); }
324"(" { ASCIIOP_RETURN(); }
325")" { ASCIIOP_RETURN(); }
326"{" { ASCIIOP_RETURN(); }
327"}" { ASCIIOP_RETURN(); }
328"," { ASCIIOP_RETURN(); } // also operator
329":" { ASCIIOP_RETURN(); }
330";" { ASCIIOP_RETURN(); }
331"." { ASCIIOP_RETURN(); } // also operator
332"..." { NAMEDOP_RETURN(ELLIPSIS); }
333
334 /* alternative C99 brackets, "<:" & "<:<:" handled by preprocessor */
335"<:" { RETURN_VAL('['); }
336":>" { RETURN_VAL(']'); }
337"<%" { RETURN_VAL('{'); }
338"%>" { RETURN_VAL('}'); }
339
340 /* operators */
341"!" { ASCIIOP_RETURN(); }
342"+" { ASCIIOP_RETURN(); }
343"-" { ASCIIOP_RETURN(); }
344"*" { ASCIIOP_RETURN(); }
345"\\" { ASCIIOP_RETURN(); } // CFA, exponentiation
346"/" { ASCIIOP_RETURN(); }
347"%" { ASCIIOP_RETURN(); }
348"^" { ASCIIOP_RETURN(); }
349"~" { ASCIIOP_RETURN(); }
350"&" { ASCIIOP_RETURN(); }
351"|" { ASCIIOP_RETURN(); }
352"<" { ASCIIOP_RETURN(); }
353">" { ASCIIOP_RETURN(); }
354"=" { ASCIIOP_RETURN(); }
355"?" { ASCIIOP_RETURN(); }
356
357"++" { NAMEDOP_RETURN(ICR); }
358"--" { NAMEDOP_RETURN(DECR); }
359"==" { NAMEDOP_RETURN(EQ); }
360"!=" { NAMEDOP_RETURN(NE); }
361"<<" { NAMEDOP_RETURN(LS); }
362">>" { NAMEDOP_RETURN(RS); }
363"<=" { NAMEDOP_RETURN(LE); }
364">=" { NAMEDOP_RETURN(GE); }
365"&&" { NAMEDOP_RETURN(ANDAND); }
366"||" { NAMEDOP_RETURN(OROR); }
367"->" { NAMEDOP_RETURN(ARROW); }
368"+=" { NAMEDOP_RETURN(PLUSassign); }
369"-=" { NAMEDOP_RETURN(MINUSassign); }
370"\\=" { NAMEDOP_RETURN(EXPassign); } // CFA, exponentiation
371"*=" { NAMEDOP_RETURN(MULTassign); }
372"/=" { NAMEDOP_RETURN(DIVassign); }
373"%=" { NAMEDOP_RETURN(MODassign); }
374"&=" { NAMEDOP_RETURN(ANDassign); }
375"|=" { NAMEDOP_RETURN(ORassign); }
376"^=" { NAMEDOP_RETURN(ERassign); }
377"<<=" { NAMEDOP_RETURN(LSassign); }
378">>=" { NAMEDOP_RETURN(RSassign); }
379
380"@=" { NAMEDOP_RETURN(ATassign); } // CFA
381
382 /* CFA, operator identifier */
383{op_unary}"?" { IDENTIFIER_RETURN(); } // unary
384"?"({op_unary_pre_post}|"()"|"[?]"|"{}") { IDENTIFIER_RETURN(); }
385"^?{}" { IDENTIFIER_RETURN(); }
386"?"{op_binary_over}"?" { IDENTIFIER_RETURN(); } // binary
387 /*
388 This rule handles ambiguous cases with operator identifiers, e.g., "int *?*?()", where the string "*?*?" can be
389 lexed as "*?"/"*?" or "*"/"?*?". Since it is common practise to put a unary operator juxtaposed to an identifier,
390 e.g., "*i", users will be annoyed if they cannot do this with respect to operator identifiers. Therefore, there is
391 a lexical look-ahead for the second case, with backtracking to return the leading unary operator and then
392 reparsing the trailing operator identifier. Otherwise a space is needed between the unary operator and operator
393 identifier to disambiguate this common case.
394
395 A similar issue occurs with the dereference, *?(...), and routine-call, ?()(...) identifiers. The ambiguity
396 occurs when the deference operator has no parameters, *?() and *?()(...), requiring arbitrary whitespace
397 look-ahead for the routine-call parameter-list to disambiguate. However, the dereference operator must have a
398 parameter/argument to dereference *?(...). Hence, always interpreting the string *?() as * ?() does not preclude
399 any meaningful program.
400
401 The remaining cases are with the increment/decrement operators and conditional expression:
402
403 i++? ...(...);
404 i?++ ...(...);
405
406 requiring arbitrary whitespace look-ahead for the operator parameter-list, even though that interpretation is an
407 incorrect expression (juxtaposed identifiers). Therefore, it is necessary to disambiguate these cases with a
408 space:
409
410 i++ ? i : 0;
411 i? ++i : 0;
412 */
413{op_unary}"?"({op_unary_pre_post}|"()"|"[?]"|{op_binary_over}"?") {
414 // 1 or 2 character unary operator ?
415 int i = yytext[1] == '?' ? 1 : 2;
416 yyless( i ); // put back characters up to first '?'
417 if ( i > 1 ) {
418 NAMEDOP_RETURN( yytext[0] == '+' ? ICR : DECR );
419 } else {
420 ASCIIOP_RETURN();
421 } // if
422}
423
424 /* unknown characters */
425. { printf("unknown character(s):\"%s\" on line %d\n", yytext, yylineno); }
426
427%%
428
429// Local Variables: //
430// mode: c++ //
431// tab-width: 4 //
432// compile-command: "make install" //
433// End: //
Note: See TracBrowser for help on using the repository browser.