source: src/Parser/lex.ll@ 8b52686

ADT aaron-thesis arm-eh ast-experimental cleanup-dtors ctor deferred_resn demangler enum forall-pointer-decay gc_noraii jacob/cs343-translation jenkins-sandbox memory new-ast new-ast-unique-expr new-env no_list persistent-indexer pthread-emulation qualifiedEnum resolv-new with_gc
Last change on this file since 8b52686 was dd51906, checked in by Peter A. Buhr <pabuhr@…>, 9 years ago

automake change gnu back to foreign (do not know why it changed), add := and & (reference) to lexer/parser

  • Property mode set to 100644
File size: 15.3 KB
Line 
1/*
2 * Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
3 *
4 * The contents of this file are covered under the licence agreement in the
5 * file "LICENCE" distributed with Cforall.
6 *
7 * lex.l --
8 *
9 * Author : Peter A. Buhr
10 * Created On : Sat Sep 22 08:58:10 2001
11 * Last Modified By : Peter A. Buhr
12 * Last Modified On : Mon Jun 6 18:08:27 2016
13 * Update Count : 451
14 */
15
16%option yylineno
17%option nounput
18
19%{
20// This lexer assumes the program has been preprocessed by cpp. Hence, all user level preprocessor directive have been
21// performed and removed from the source. The only exceptions are preprocessor directives passed to the compiler (e.g.,
22// line-number directives) and C/C++ style comments, which are ignored.
23
24//**************************** Includes and Defines ****************************
25
26#include <string>
27
28#include "lex.h"
29#include "parser.h" // YACC generated definitions based on C++ grammar
30#include "ParseNode.h"
31#include "TypedefTable.h"
32
33char *yyfilename;
34std::string *strtext; // accumulate parts of character and string constant value
35
36#define RETURN_LOCN(x) yylval.tok.loc.file = yyfilename; yylval.tok.loc.line = yylineno; return( x )
37#define RETURN_VAL(x) yylval.tok.str = new std::string( yytext ); RETURN_LOCN( x )
38#define RETURN_CHAR(x) yylval.tok.str = NULL; RETURN_LOCN( x )
39#define RETURN_STR(x) yylval.tok.str = strtext; RETURN_LOCN( x )
40
41#define WHITE_RETURN(x) // do nothing
42#define NEWLINE_RETURN() WHITE_RETURN( '\n' )
43#define ASCIIOP_RETURN() RETURN_CHAR( (int)yytext[0] ) // single character operator
44#define NAMEDOP_RETURN(x) RETURN_VAL( x ) // multichar operator, with a name
45#define NUMERIC_RETURN(x) rm_underscore(); RETURN_VAL( x ) // numeric constant
46#define KEYWORD_RETURN(x) RETURN_CHAR( x ) // keyword
47#define IDENTIFIER_RETURN() RETURN_VAL( typedefTable.isKind( yytext ) )
48#define ATTRIBUTE_RETURN() RETURN_VAL( ATTR_IDENTIFIER )
49
50void rm_underscore() {
51 // remove underscores in numeric constant
52 int j = 0;
53 for ( int i = 0; yytext[i] != '\0'; i += 1 ) {
54 if ( yytext[i] != '_' ) {
55 yytext[j] = yytext[i];
56 j += 1;
57 } // if
58 } // for
59 yyleng = j;
60 yytext[yyleng] = '\0';
61}
62
63%}
64
65octal [0-7]
66nonzero [1-9]
67decimal [0-9]
68hex [0-9a-fA-F]
69universal_char "\\"((u"_"?{hex_quad})|(U"_"?{hex_quad}{2}))
70
71 // identifier, GCC: $ in identifier
72identifier ([a-zA-Z_$]|{universal_char})([0-9a-zA-Z_$]|{universal_char})*
73
74 // quoted identifier
75quoted_identifier "`"{identifier}"`"
76
77 // attribute identifier, GCC: $ in identifier
78attr_identifier "@"{identifier}
79
80 // numeric constants, CFA: '_' in constant
81hex_quad {hex}("_"?{hex}){3}
82integer_suffix "_"?(([uU][lL]?)|([uU]("ll"|"LL")?)|([lL][uU]?)|("ll"|"LL")[uU]?)
83
84octal_digits ({octal})|({octal}({octal}|"_")*{octal})
85octal_prefix "0""_"?
86octal_constant (("0")|({octal_prefix}{octal_digits})){integer_suffix}?
87
88nonzero_digits ({nonzero})|({nonzero}({decimal}|"_")*{decimal})
89decimal_constant {nonzero_digits}{integer_suffix}?
90
91hex_digits ({hex})|({hex}({hex}|"_")*{hex})
92hex_prefix "0"[xX]"_"?
93hex_constant {hex_prefix}{hex_digits}{integer_suffix}?
94
95decimal_digits ({decimal})|({decimal}({decimal}|"_")*{decimal})
96fractional_constant ({decimal_digits}?"."{decimal_digits})|({decimal_digits}".")
97exponent "_"?[eE]"_"?[+-]?{decimal_digits}
98 // GCC: D (double), DL (long double) and iI (imaginary) suffixes
99floating_suffix "_"?([fFdDlL][iI]?|"DL"|[iI][lLfFdD]?)
100 //floating_suffix "_"?([fFdD]|[lL]|[D][L])|([iI][lLfFdD])|([lLfFdD][iI]))
101floating_constant (({fractional_constant}{exponent}?)|({decimal_digits}{exponent})){floating_suffix}?
102
103binary_exponent "_"?[pP]"_"?[+-]?{decimal_digits}
104hex_fractional_constant ({hex_digits}?"."{hex_digits})|({hex_digits}".")
105hex_floating_constant {hex_prefix}(({hex_fractional_constant}{binary_exponent})|({hex_digits}{binary_exponent})){floating_suffix}?
106
107 // character escape sequence, GCC: \e => esc character
108simple_escape "\\"[abefnrtv'"?\\]
109 // ' stop highlighting
110octal_escape "\\"{octal}("_"?{octal}){0,2}
111hex_escape "\\""x""_"?{hex_digits}
112escape_seq {simple_escape}|{octal_escape}|{hex_escape}|{universal_char}
113cwide_prefix "L"|"U"|"u"
114swide_prefix {cwide_prefix}|"u8"
115
116 // display/white-space characters
117h_tab [\011]
118form_feed [\014]
119v_tab [\013]
120c_return [\015]
121h_white [ ]|{h_tab}
122
123 // operators
124op_unary_only "~"|"!"
125op_unary_binary "+"|"-"|"*"
126op_unary_pre_post "++"|"--"
127op_unary {op_unary_only}|{op_unary_binary}|{op_unary_pre_post}
128
129op_binary_only "/"|"%"|"^"|"&"|"|"|"<"|">"|"="|"=="|"!="|"<<"|">>"|"<="|">="|"+="|"-="|"*="|"/="|"%="|"&="|"|="|"^="|"<<="|">>="
130op_binary_over {op_unary_binary}|{op_binary_only}
131op_binary_not_over "?"|"->"|"&&"|"||"
132operator {op_unary_pre_post}|{op_binary_over}|{op_binary_not_over}
133
134%x COMMENT
135%x BKQUOTE
136%x QUOTE
137%x STRING
138
139%%
140 /* line directives */
141^{h_white}*"#"{h_white}*[0-9]+{h_white}*["][^"\n]+["].*"\n" {
142 /* " stop highlighting */
143 char *end_num;
144 char *begin_string, *end_string;
145 char *filename;
146 long lineno, length;
147 lineno = strtol( yytext + 1, &end_num, 0 );
148 begin_string = strchr( end_num, '"' );
149 if ( begin_string ) {
150 end_string = strchr( begin_string + 1, '"' );
151 if ( end_string ) {
152 length = end_string - begin_string - 1;
153 filename = new char[ length + 1 ];
154 memcpy( filename, begin_string + 1, length );
155 filename[ length ] = '\0';
156 //std::cout << "file " << filename << " line " << lineno << std::endl;
157 yylineno = lineno;
158 yyfilename = filename;
159 } // if
160 } // if
161}
162
163 /* ignore preprocessor directives (for now) */
164^{h_white}*"#"[^\n]*"\n" ;
165
166 /* ignore C style comments (ALSO HANDLED BY CPP) */
167"/*" { BEGIN COMMENT; }
168<COMMENT>.|\n ;
169<COMMENT>"*/" { BEGIN 0; }
170
171 /* ignore C++ style comments (ALSO HANDLED BY CPP) */
172"//"[^\n]*"\n" ;
173
174 /* ignore whitespace */
175{h_white}+ { WHITE_RETURN(' '); }
176({v_tab}|{c_return}|{form_feed})+ { WHITE_RETURN(' '); }
177({h_white}|{v_tab}|{c_return}|{form_feed})*"\n" { NEWLINE_RETURN(); }
178
179 /* keywords */
180_Alignas { KEYWORD_RETURN(ALIGNAS); } // C11
181_Alignof { KEYWORD_RETURN(ALIGNOF); } // C11
182__alignof { KEYWORD_RETURN(ALIGNOF); } // GCC
183__alignof__ { KEYWORD_RETURN(ALIGNOF); } // GCC
184asm { KEYWORD_RETURN(ASM); }
185__asm { KEYWORD_RETURN(ASM); } // GCC
186__asm__ { KEYWORD_RETURN(ASM); } // GCC
187_At { KEYWORD_RETURN(AT); } // CFA
188_Atomic { KEYWORD_RETURN(ATOMIC); } // C11
189__attribute { KEYWORD_RETURN(ATTRIBUTE); } // GCC
190__attribute__ { KEYWORD_RETURN(ATTRIBUTE); } // GCC
191auto { KEYWORD_RETURN(AUTO); }
192_Bool { KEYWORD_RETURN(BOOL); } // C99
193break { KEYWORD_RETURN(BREAK); }
194case { KEYWORD_RETURN(CASE); }
195catch { KEYWORD_RETURN(CATCH); } // CFA
196catchResume { KEYWORD_RETURN(CATCHRESUME); } // CFA
197char { KEYWORD_RETURN(CHAR); }
198choose { KEYWORD_RETURN(CHOOSE); } // CFA
199_Complex { KEYWORD_RETURN(COMPLEX); } // C99
200__complex { KEYWORD_RETURN(COMPLEX); } // GCC
201__complex__ { KEYWORD_RETURN(COMPLEX); } // GCC
202const { KEYWORD_RETURN(CONST); }
203__const { KEYWORD_RETURN(CONST); } // GCC
204__const__ { KEYWORD_RETURN(CONST); } // GCC
205continue { KEYWORD_RETURN(CONTINUE); }
206default { KEYWORD_RETURN(DEFAULT); }
207disable { KEYWORD_RETURN(DISABLE); } // CFA
208do { KEYWORD_RETURN(DO); }
209double { KEYWORD_RETURN(DOUBLE); }
210dtype { KEYWORD_RETURN(DTYPE); } // CFA
211else { KEYWORD_RETURN(ELSE); }
212enable { KEYWORD_RETURN(ENABLE); } // CFA
213enum { KEYWORD_RETURN(ENUM); }
214__extension__ { KEYWORD_RETURN(EXTENSION); } // GCC
215extern { KEYWORD_RETURN(EXTERN); }
216fallthru { KEYWORD_RETURN(FALLTHRU); } // CFA
217finally { KEYWORD_RETURN(FINALLY); } // CFA
218float { KEYWORD_RETURN(FLOAT); }
219__float128 { KEYWORD_RETURN(FLOAT); } // GCC
220for { KEYWORD_RETURN(FOR); }
221forall { KEYWORD_RETURN(FORALL); } // CFA
222fortran { KEYWORD_RETURN(FORTRAN); }
223ftype { KEYWORD_RETURN(FTYPE); } // CFA
224_Generic { KEYWORD_RETURN(GENERIC); } // C11
225goto { KEYWORD_RETURN(GOTO); }
226if { KEYWORD_RETURN(IF); }
227_Imaginary { KEYWORD_RETURN(IMAGINARY); } // C99
228__imag { KEYWORD_RETURN(IMAGINARY); } // GCC
229__imag__ { KEYWORD_RETURN(IMAGINARY); } // GCC
230inline { KEYWORD_RETURN(INLINE); } // C99
231__inline { KEYWORD_RETURN(INLINE); } // GCC
232__inline__ { KEYWORD_RETURN(INLINE); } // GCC
233int { KEYWORD_RETURN(INT); }
234__int128 { KEYWORD_RETURN(INT); } // GCC
235__label__ { KEYWORD_RETURN(LABEL); } // GCC
236long { KEYWORD_RETURN(LONG); }
237lvalue { KEYWORD_RETURN(LVALUE); } // CFA
238_Noreturn { KEYWORD_RETURN(NORETURN); } // C11
239__builtin_offsetof { KEYWORD_RETURN(OFFSETOF); } // GCC
240otype { KEYWORD_RETURN(OTYPE); } // CFA
241register { KEYWORD_RETURN(REGISTER); }
242restrict { KEYWORD_RETURN(RESTRICT); } // C99
243__restrict { KEYWORD_RETURN(RESTRICT); } // GCC
244__restrict__ { KEYWORD_RETURN(RESTRICT); } // GCC
245return { KEYWORD_RETURN(RETURN); }
246short { KEYWORD_RETURN(SHORT); }
247signed { KEYWORD_RETURN(SIGNED); }
248__signed { KEYWORD_RETURN(SIGNED); } // GCC
249__signed__ { KEYWORD_RETURN(SIGNED); } // GCC
250sizeof { KEYWORD_RETURN(SIZEOF); }
251static { KEYWORD_RETURN(STATIC); }
252_Static_assert { KEYWORD_RETURN(STATICASSERT); } // C11
253struct { KEYWORD_RETURN(STRUCT); }
254switch { KEYWORD_RETURN(SWITCH); }
255_Thread_local { KEYWORD_RETURN(THREADLOCAL); } // C11
256throw { KEYWORD_RETURN(THROW); } // CFA
257throwResume { KEYWORD_RETURN(THROWRESUME); } // CFA
258trait { KEYWORD_RETURN(TRAIT); } // CFA
259try { KEYWORD_RETURN(TRY); } // CFA
260typedef { KEYWORD_RETURN(TYPEDEF); }
261typeof { KEYWORD_RETURN(TYPEOF); } // GCC
262__typeof { KEYWORD_RETURN(TYPEOF); } // GCC
263__typeof__ { KEYWORD_RETURN(TYPEOF); } // GCC
264union { KEYWORD_RETURN(UNION); }
265unsigned { KEYWORD_RETURN(UNSIGNED); }
266__builtin_va_list { KEYWORD_RETURN(VALIST); } // GCC
267void { KEYWORD_RETURN(VOID); }
268volatile { KEYWORD_RETURN(VOLATILE); }
269__volatile { KEYWORD_RETURN(VOLATILE); } // GCC
270__volatile__ { KEYWORD_RETURN(VOLATILE); } // GCC
271while { KEYWORD_RETURN(WHILE); }
272
273 /* identifier */
274{identifier} { IDENTIFIER_RETURN(); }
275{attr_identifier} { ATTRIBUTE_RETURN(); }
276"`" { BEGIN BKQUOTE; }
277<BKQUOTE>{identifier} { IDENTIFIER_RETURN(); }
278<BKQUOTE>"`" { BEGIN 0; }
279
280 /* numeric constants */
281"0" { NUMERIC_RETURN(ZERO); } // CFA
282"1" { NUMERIC_RETURN(ONE); } // CFA
283{decimal_constant} { NUMERIC_RETURN(INTEGERconstant); }
284{octal_constant} { NUMERIC_RETURN(INTEGERconstant); }
285{hex_constant} { NUMERIC_RETURN(INTEGERconstant); }
286{floating_constant} { NUMERIC_RETURN(FLOATINGconstant); }
287{hex_floating_constant} { NUMERIC_RETURN(FLOATINGconstant); }
288
289 /* character constant, allows empty value */
290({cwide_prefix}[_]?)?['] { BEGIN QUOTE; rm_underscore(); strtext = new std::string; *strtext += std::string( yytext ); }
291<QUOTE>[^'\\\n]* { *strtext += std::string( yytext ); }
292<QUOTE>['\n] { BEGIN 0; *strtext += std::string( yytext); RETURN_STR(CHARACTERconstant); }
293 /* ' stop highlighting */
294
295 /* string constant */
296({swide_prefix}[_]?)?["] { BEGIN STRING; rm_underscore(); strtext = new std::string; *strtext += std::string( yytext ); }
297<STRING>[^"\\\n]* { *strtext += std::string( yytext ); }
298<STRING>["\n] { BEGIN 0; *strtext += std::string( yytext ); RETURN_STR(STRINGliteral); }
299 /* " stop highlighting */
300
301 /* common character/string constant */
302<QUOTE,STRING>{escape_seq} { rm_underscore(); *strtext += std::string( yytext ); }
303<QUOTE,STRING>"\\"{h_white}*"\n" {} // continuation (ALSO HANDLED BY CPP)
304<QUOTE,STRING>"\\" { *strtext += std::string( yytext ); } // unknown escape character
305
306 /* punctuation */
307"[" { ASCIIOP_RETURN(); }
308"]" { ASCIIOP_RETURN(); }
309"(" { ASCIIOP_RETURN(); }
310")" { ASCIIOP_RETURN(); }
311"{" { ASCIIOP_RETURN(); }
312"}" { ASCIIOP_RETURN(); }
313"," { ASCIIOP_RETURN(); } // also operator
314":" { ASCIIOP_RETURN(); }
315";" { ASCIIOP_RETURN(); }
316"." { ASCIIOP_RETURN(); } // also operator
317"..." { NAMEDOP_RETURN(ELLIPSIS); }
318
319 /* alternative C99 brackets, "<:" & "<:<:" handled by preprocessor */
320"<:" { RETURN_VAL('['); }
321":>" { RETURN_VAL(']'); }
322"<%" { RETURN_VAL('{'); }
323"%>" { RETURN_VAL('}'); }
324
325 /* operators */
326"!" { ASCIIOP_RETURN(); }
327"+" { ASCIIOP_RETURN(); }
328"-" { ASCIIOP_RETURN(); }
329"*" { ASCIIOP_RETURN(); }
330"/" { ASCIIOP_RETURN(); }
331"%" { ASCIIOP_RETURN(); }
332"^" { ASCIIOP_RETURN(); }
333"~" { ASCIIOP_RETURN(); }
334"&" { ASCIIOP_RETURN(); }
335"|" { ASCIIOP_RETURN(); }
336"<" { ASCIIOP_RETURN(); }
337">" { ASCIIOP_RETURN(); }
338"=" { ASCIIOP_RETURN(); }
339"?" { ASCIIOP_RETURN(); }
340
341"++" { NAMEDOP_RETURN(ICR); }
342"--" { NAMEDOP_RETURN(DECR); }
343"==" { NAMEDOP_RETURN(EQ); }
344"!=" { NAMEDOP_RETURN(NE); }
345"<<" { NAMEDOP_RETURN(LS); }
346">>" { NAMEDOP_RETURN(RS); }
347"<=" { NAMEDOP_RETURN(LE); }
348">=" { NAMEDOP_RETURN(GE); }
349"&&" { NAMEDOP_RETURN(ANDAND); }
350"||" { NAMEDOP_RETURN(OROR); }
351"->" { NAMEDOP_RETURN(ARROW); }
352"+=" { NAMEDOP_RETURN(PLUSassign); }
353"-=" { NAMEDOP_RETURN(MINUSassign); }
354"*=" { NAMEDOP_RETURN(MULTassign); }
355"/=" { NAMEDOP_RETURN(DIVassign); }
356"%=" { NAMEDOP_RETURN(MODassign); }
357"&=" { NAMEDOP_RETURN(ANDassign); }
358"|=" { NAMEDOP_RETURN(ORassign); }
359"^=" { NAMEDOP_RETURN(ERassign); }
360"<<=" { NAMEDOP_RETURN(LSassign); }
361">>=" { NAMEDOP_RETURN(RSassign); }
362
363"@=" { NAMEDOP_RETURN(ATassign); }
364":=" { NAMEDOP_RETURN(REFassign); }
365
366 /* CFA, operator identifier */
367{op_unary}"?" { IDENTIFIER_RETURN(); } // unary
368"?"({op_unary_pre_post}|"()"|"[?]"|"{}") { IDENTIFIER_RETURN(); }
369"^?{}" { IDENTIFIER_RETURN(); }
370"?"{op_binary_over}"?" { IDENTIFIER_RETURN(); } // binary
371 /*
372 This rule handles ambiguous cases with operator identifiers, e.g., "int *?*?()", where the string "*?*?"
373 can be lexed as "*"/"?*?" or "*?"/"*?". Since it is common practise to put a unary operator juxtaposed
374 to an identifier, e.g., "*i", users will be annoyed if they cannot do this with respect to operator
375 identifiers. Even with this special hack, there are 5 general cases that cannot be handled. The first
376 case is for the function-call identifier "?()":
377
378 int * ?()(); // declaration: space required after '*'
379 * ?()(); // expression: space required after '*'
380
381 Without the space, the string "*?()" is ambiguous without N character look ahead; it requires scanning
382 ahead to determine if there is a '(', which is the start of an argument/parameter list.
383
384 The 4 remaining cases occur in expressions:
385
386 i++?i:0; // space required before '?'
387 i--?i:0; // space required before '?'
388 i?++i:0; // space required after '?'
389 i?--i:0; // space required after '?'
390
391 In the first two cases, the string "i++?" is ambiguous, where this string can be lexed as "i"/"++?" or
392 "i++"/"?"; it requires scanning ahead to determine if there is a '(', which is the start of an argument
393 list. In the second two cases, the string "?++x" is ambiguous, where this string can be lexed as
394 "?++"/"x" or "?"/"++x"; it requires scanning ahead to determine if there is a '(', which is the start of
395 an argument list.
396 */
397{op_unary}"?"({op_unary_pre_post}|"[?]"|{op_binary_over}"?") {
398 // 1 or 2 character unary operator ?
399 int i = yytext[1] == '?' ? 1 : 2;
400 yyless( i ); // put back characters up to first '?'
401 if ( i > 1 ) {
402 NAMEDOP_RETURN( yytext[0] == '+' ? ICR : DECR );
403 } else {
404 ASCIIOP_RETURN();
405 } // if
406}
407
408 /* unknown characters */
409. { printf("unknown character(s):\"%s\" on line %d\n", yytext, yylineno); }
410
411%%
412
413// Local Variables: //
414// mode: c++ //
415// tab-width: 4 //
416// compile-command: "make install" //
417// End: //
Note: See TracBrowser for help on using the repository browser.