source: src/Parser/lex.ll@ 79308c8e

ADT aaron-thesis arm-eh ast-experimental cleanup-dtors deferred_resn demangler enum forall-pointer-decay jacob/cs343-translation jenkins-sandbox new-ast new-ast-unique-expr new-env no_list persistent-indexer pthread-emulation qualifiedEnum resolv-new with_gc
Last change on this file since 79308c8e was ba2356b, checked in by Peter A. Buhr <pabuhr@…>, 8 years ago

additional change for integer_suffix and cleanup

  • Property mode set to 100644
File size: 16.2 KB
Line 
1/*
2 * Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
3 *
4 * The contents of this file are covered under the licence agreement in the
5 * file "LICENCE" distributed with Cforall.
6 *
7 * lex.l --
8 *
9 * Author : Peter A. Buhr
10 * Created On : Sat Sep 22 08:58:10 2001
11 * Last Modified By : Peter A. Buhr
12 * Last Modified On : Thu May 18 09:03:49 2017
13 * Update Count : 513
14 */
15
16%option yylineno
17%option nounput
18
19%{
20// This lexer assumes the program has been preprocessed by cpp. Hence, all user level preprocessor directive have been
21// performed and removed from the source. The only exceptions are preprocessor directives passed to the compiler (e.g.,
22// line-number directives) and C/C++ style comments, which are ignored.
23
24//**************************** Includes and Defines ****************************
25
26#include <string>
27#include <cstdio> // FILENAME_MAX
28
29#include "lex.h"
30#include "parser.h" // YACC generated definitions based on C++ grammar
31#include "ParseNode.h"
32#include "TypedefTable.h"
33
34char *yyfilename;
35std::string *strtext; // accumulate parts of character and string constant value
36
37#define RETURN_LOCN(x) yylval.tok.loc.file = yyfilename; yylval.tok.loc.line = yylineno; return( x )
38#define RETURN_VAL(x) yylval.tok.str = new std::string( yytext ); RETURN_LOCN( x )
39#define RETURN_CHAR(x) yylval.tok.str = nullptr; RETURN_LOCN( x )
40#define RETURN_STR(x) yylval.tok.str = strtext; RETURN_LOCN( x )
41
42#define WHITE_RETURN(x) // do nothing
43#define NEWLINE_RETURN() WHITE_RETURN( '\n' )
44#define ASCIIOP_RETURN() RETURN_CHAR( (int)yytext[0] ) // single character operator
45#define NAMEDOP_RETURN(x) RETURN_CHAR( x ) // multichar operator, with a name
46#define NUMERIC_RETURN(x) rm_underscore(); RETURN_VAL( x ) // numeric constant
47#define KEYWORD_RETURN(x) RETURN_CHAR( x ) // keyword
48#define IDENTIFIER_RETURN() RETURN_VAL( typedefTable.isKind( yytext ) )
49#define ATTRIBUTE_RETURN() RETURN_VAL( ATTR_IDENTIFIER )
50
51void rm_underscore() {
52 // Remove underscores in numeric constant by copying the non-underscore characters to the front of the string.
53 yyleng = 0;
54 for ( int i = 0; yytext[i] != '\0'; i += 1 ) {
55 if ( yytext[i] != '_' ) {
56 yytext[yyleng] = yytext[i];
57 yyleng += 1;
58 } // if
59 } // for
60 yytext[yyleng] = '\0';
61}
62
63%}
64
65octal [0-7]
66nonzero [1-9]
67decimal [0-9]
68hex [0-9a-fA-F]
69universal_char "\\"((u"_"?{hex_quad})|(U"_"?{hex_quad}{2}))
70
71 // identifier, GCC: $ in identifier
72identifier ([a-zA-Z_$]|{universal_char})([0-9a-zA-Z_$]|{universal_char})*
73
74 // attribute identifier, GCC: $ in identifier
75attr_identifier "@"{identifier}
76
77 // numeric constants, CFA: '_' in constant
78hex_quad {hex}("_"?{hex}){3}
79integer_suffix "_"?(([uU](("ll"|"LL"|[lL])[iI]|[iI]?("ll"|"LL"|[lL])?))|([iI](("ll"|"LL"|[lL])[uU]|[uU]?("ll"|"LL"|[lL])?))|(("ll"|"LL"|[lL])([iI][uU]|[uU]?[iI]?)))
80
81octal_digits ({octal})|({octal}({octal}|"_")*{octal})
82octal_prefix "0""_"?
83octal_constant (("0")|({octal_prefix}{octal_digits})){integer_suffix}?
84
85nonzero_digits ({nonzero})|({nonzero}({decimal}|"_")*{decimal})
86decimal_constant {nonzero_digits}{integer_suffix}?
87
88hex_digits ({hex})|({hex}({hex}|"_")*{hex})
89hex_prefix "0"[xX]"_"?
90hex_constant {hex_prefix}{hex_digits}{integer_suffix}?
91
92decimal_digits ({decimal})|({decimal}({decimal}|"_")*{decimal})
93real_decimal {decimal_digits}"."{exponent}?{floating_suffix}?
94real_fraction "."{decimal_digits}{exponent}?{floating_suffix}?
95real_constant {decimal_digits}{real_fraction}
96exponent "_"?[eE]"_"?[+-]?{decimal_digits}
97 // GCC: D (double) and iI (imaginary) suffixes, and DL (long double)
98floating_suffix "_"?([fFdDlL][iI]?|[iI][lLfFdD]?|"DL")
99floating_constant (({real_constant}{exponent}?)|({decimal_digits}{exponent})){floating_suffix}?
100
101binary_exponent "_"?[pP]"_"?[+-]?{decimal_digits}
102hex_fractional_constant ({hex_digits}?"."{hex_digits})|({hex_digits}".")
103hex_floating_constant {hex_prefix}(({hex_fractional_constant}{binary_exponent})|({hex_digits}{binary_exponent})){floating_suffix}?
104
105 // character escape sequence, GCC: \e => esc character
106simple_escape "\\"[abefnrtv'"?\\]
107 // ' stop highlighting
108octal_escape "\\"{octal}("_"?{octal}){0,2}
109hex_escape "\\""x""_"?{hex_digits}
110escape_seq {simple_escape}|{octal_escape}|{hex_escape}|{universal_char}
111cwide_prefix "L"|"U"|"u"
112swide_prefix {cwide_prefix}|"u8"
113
114 // display/white-space characters
115h_tab [\011]
116form_feed [\014]
117v_tab [\013]
118c_return [\015]
119h_white [ ]|{h_tab}
120
121 // overloadable operators
122op_unary_only "~"|"!"
123op_unary_binary "+"|"-"|"*"
124op_unary_pre_post "++"|"--"
125op_unary {op_unary_only}|{op_unary_binary}|{op_unary_pre_post}
126
127op_binary_only "/"|"%"|"^"|"&"|"|"|"<"|">"|"="|"=="|"!="|"<<"|">>"|"<="|">="|"+="|"-="|"*="|"/="|"%="|"&="|"|="|"^="|"<<="|">>="
128op_binary_over {op_unary_binary}|{op_binary_only}
129 // op_binary_not_over "?"|"->"|"."|"&&"|"||"|"@="
130 // operator {op_unary_pre_post}|{op_binary_over}|{op_binary_not_over}
131
132%x COMMENT
133%x BKQUOTE
134%x QUOTE
135%x STRING
136
137%%
138 /* line directives */
139^{h_white}*"#"{h_white}*[0-9]+{h_white}*["][^"\n]+["].*"\n" {
140 /* " stop highlighting */
141 static char filename[FILENAME_MAX]; // temporarily store current source-file name
142 char *end_num;
143 char *begin_string, *end_string;
144 long lineno, length;
145 lineno = strtol( yytext + 1, &end_num, 0 );
146 begin_string = strchr( end_num, '"' );
147 if ( begin_string ) { // file name ?
148 end_string = strchr( begin_string + 1, '"' ); // look for ending delimiter
149 assert( end_string ); // closing quote ?
150 length = end_string - begin_string - 1; // file-name length without quotes or sentinel
151 assert( length < FILENAME_MAX ); // room for sentinel ?
152 memcpy( &filename, begin_string + 1, length ); // copy file name from yytext
153 filename[ length ] = '\0'; // terminate string with sentinel
154 //std::cout << "file " << filename << " line " << lineno << std::endl;
155 yylineno = lineno;
156 yyfilename = filename;
157 } // if
158}
159
160 /* ignore preprocessor directives (for now) */
161^{h_white}*"#"[^\n]*"\n" ;
162
163 /* ignore C style comments (ALSO HANDLED BY CPP) */
164"/*" { BEGIN COMMENT; }
165<COMMENT>.|\n ;
166<COMMENT>"*/" { BEGIN 0; }
167
168 /* ignore C++ style comments (ALSO HANDLED BY CPP) */
169"//"[^\n]*"\n" ;
170
171 /* ignore whitespace */
172{h_white}+ { WHITE_RETURN(' '); }
173({v_tab}|{c_return}|{form_feed})+ { WHITE_RETURN(' '); }
174({h_white}|{v_tab}|{c_return}|{form_feed})*"\n" { NEWLINE_RETURN(); }
175
176 /* keywords */
177_Alignas { KEYWORD_RETURN(ALIGNAS); } // C11
178_Alignof { KEYWORD_RETURN(ALIGNOF); } // C11
179__alignof { KEYWORD_RETURN(ALIGNOF); } // GCC
180__alignof__ { KEYWORD_RETURN(ALIGNOF); } // GCC
181asm { KEYWORD_RETURN(ASM); }
182__asm { KEYWORD_RETURN(ASM); } // GCC
183__asm__ { KEYWORD_RETURN(ASM); } // GCC
184_At { KEYWORD_RETURN(AT); } // CFA
185_Atomic { KEYWORD_RETURN(ATOMIC); } // C11
186__attribute { KEYWORD_RETURN(ATTRIBUTE); } // GCC
187__attribute__ { KEYWORD_RETURN(ATTRIBUTE); } // GCC
188auto { KEYWORD_RETURN(AUTO); }
189_Bool { KEYWORD_RETURN(BOOL); } // C99
190break { KEYWORD_RETURN(BREAK); }
191case { KEYWORD_RETURN(CASE); }
192catch { KEYWORD_RETURN(CATCH); } // CFA
193catchResume { KEYWORD_RETURN(CATCHRESUME); } // CFA
194char { KEYWORD_RETURN(CHAR); }
195choose { KEYWORD_RETURN(CHOOSE); } // CFA
196_Complex { KEYWORD_RETURN(COMPLEX); } // C99
197__complex { KEYWORD_RETURN(COMPLEX); } // GCC
198__complex__ { KEYWORD_RETURN(COMPLEX); } // GCC
199const { KEYWORD_RETURN(CONST); }
200__const { KEYWORD_RETURN(CONST); } // GCC
201__const__ { KEYWORD_RETURN(CONST); } // GCC
202continue { KEYWORD_RETURN(CONTINUE); }
203coroutine { KEYWORD_RETURN(COROUTINE); } // CFA
204default { KEYWORD_RETURN(DEFAULT); }
205disable { KEYWORD_RETURN(DISABLE); } // CFA
206do { KEYWORD_RETURN(DO); }
207double { KEYWORD_RETURN(DOUBLE); }
208dtype { KEYWORD_RETURN(DTYPE); } // CFA
209else { KEYWORD_RETURN(ELSE); }
210enable { KEYWORD_RETURN(ENABLE); } // CFA
211enum { KEYWORD_RETURN(ENUM); }
212__extension__ { KEYWORD_RETURN(EXTENSION); } // GCC
213extern { KEYWORD_RETURN(EXTERN); }
214fallthrough { KEYWORD_RETURN(FALLTHRU); } // CFA
215fallthru { KEYWORD_RETURN(FALLTHRU); } // CFA
216finally { KEYWORD_RETURN(FINALLY); } // CFA
217float { KEYWORD_RETURN(FLOAT); }
218__float128 { KEYWORD_RETURN(FLOAT); } // GCC
219for { KEYWORD_RETURN(FOR); }
220forall { KEYWORD_RETURN(FORALL); } // CFA
221fortran { KEYWORD_RETURN(FORTRAN); }
222ftype { KEYWORD_RETURN(FTYPE); } // CFA
223_Generic { KEYWORD_RETURN(GENERIC); } // C11
224goto { KEYWORD_RETURN(GOTO); }
225if { KEYWORD_RETURN(IF); }
226_Imaginary { KEYWORD_RETURN(IMAGINARY); } // C99
227__imag { KEYWORD_RETURN(IMAGINARY); } // GCC
228__imag__ { KEYWORD_RETURN(IMAGINARY); } // GCC
229inline { KEYWORD_RETURN(INLINE); } // C99
230__inline { KEYWORD_RETURN(INLINE); } // GCC
231__inline__ { KEYWORD_RETURN(INLINE); } // GCC
232int { KEYWORD_RETURN(INT); }
233__int128 { KEYWORD_RETURN(INT); } // GCC
234__label__ { KEYWORD_RETURN(LABEL); } // GCC
235long { KEYWORD_RETURN(LONG); }
236lvalue { KEYWORD_RETURN(LVALUE); } // CFA
237monitor { KEYWORD_RETURN(MONITOR); } // CFA
238mutex { KEYWORD_RETURN(MUTEX); } // CFA
239_Noreturn { KEYWORD_RETURN(NORETURN); } // C11
240__builtin_offsetof { KEYWORD_RETURN(OFFSETOF); } // GCC
241one_t { NUMERIC_RETURN(ONE_T); } // CFA
242otype { KEYWORD_RETURN(OTYPE); } // CFA
243register { KEYWORD_RETURN(REGISTER); }
244restrict { KEYWORD_RETURN(RESTRICT); } // C99
245__restrict { KEYWORD_RETURN(RESTRICT); } // GCC
246__restrict__ { KEYWORD_RETURN(RESTRICT); } // GCC
247return { KEYWORD_RETURN(RETURN); }
248short { KEYWORD_RETURN(SHORT); }
249signed { KEYWORD_RETURN(SIGNED); }
250__signed { KEYWORD_RETURN(SIGNED); } // GCC
251__signed__ { KEYWORD_RETURN(SIGNED); } // GCC
252sizeof { KEYWORD_RETURN(SIZEOF); }
253static { KEYWORD_RETURN(STATIC); }
254_Static_assert { KEYWORD_RETURN(STATICASSERT); } // C11
255struct { KEYWORD_RETURN(STRUCT); }
256switch { KEYWORD_RETURN(SWITCH); }
257thread { KEYWORD_RETURN(THREAD); } // C11
258_Thread_local { KEYWORD_RETURN(THREADLOCAL); } // C11
259throw { KEYWORD_RETURN(THROW); } // CFA
260throwResume { KEYWORD_RETURN(THROWRESUME); } // CFA
261trait { KEYWORD_RETURN(TRAIT); } // CFA
262try { KEYWORD_RETURN(TRY); } // CFA
263ttype { KEYWORD_RETURN(TTYPE); } // CFA
264typedef { KEYWORD_RETURN(TYPEDEF); }
265typeof { KEYWORD_RETURN(TYPEOF); } // GCC
266__typeof { KEYWORD_RETURN(TYPEOF); } // GCC
267__typeof__ { KEYWORD_RETURN(TYPEOF); } // GCC
268union { KEYWORD_RETURN(UNION); }
269unsigned { KEYWORD_RETURN(UNSIGNED); }
270__builtin_va_list { KEYWORD_RETURN(VALIST); } // GCC
271void { KEYWORD_RETURN(VOID); }
272volatile { KEYWORD_RETURN(VOLATILE); }
273__volatile { KEYWORD_RETURN(VOLATILE); } // GCC
274__volatile__ { KEYWORD_RETURN(VOLATILE); } // GCC
275while { KEYWORD_RETURN(WHILE); }
276zero_t { NUMERIC_RETURN(ZERO_T); } // CFA
277
278 /* identifier */
279{identifier} { IDENTIFIER_RETURN(); }
280{attr_identifier} { ATTRIBUTE_RETURN(); }
281"`" { BEGIN BKQUOTE; }
282<BKQUOTE>{identifier} { IDENTIFIER_RETURN(); }
283<BKQUOTE>"`" { BEGIN 0; }
284
285 /* numeric constants */
286"0" { NUMERIC_RETURN(ZERO); } // CFA
287"1" { NUMERIC_RETURN(ONE); } // CFA
288{decimal_constant} { NUMERIC_RETURN(INTEGERconstant); }
289{octal_constant} { NUMERIC_RETURN(INTEGERconstant); }
290{hex_constant} { NUMERIC_RETURN(INTEGERconstant); }
291{real_decimal} { NUMERIC_RETURN(REALDECIMALconstant); } // must appear before floating_constant
292{real_fraction} { NUMERIC_RETURN(REALFRACTIONconstant); } // must appear before floating_constant
293{floating_constant} { NUMERIC_RETURN(FLOATINGconstant); }
294{hex_floating_constant} { NUMERIC_RETURN(FLOATINGconstant); }
295
296 /* character constant, allows empty value */
297({cwide_prefix}[_]?)?['] { BEGIN QUOTE; rm_underscore(); strtext = new std::string( yytext, yyleng ); }
298<QUOTE>[^'\\\n]* { strtext->append( yytext, yyleng ); }
299<QUOTE>['\n] { BEGIN 0; strtext->append( yytext, yyleng ); RETURN_STR(CHARACTERconstant); }
300 /* ' stop highlighting */
301
302 /* string constant */
303({swide_prefix}[_]?)?["] { BEGIN STRING; rm_underscore(); strtext = new std::string( yytext, yyleng ); }
304<STRING>[^"\\\n]* { strtext->append( yytext, yyleng ); }
305<STRING>["\n] { BEGIN 0; strtext->append( yytext, yyleng ); RETURN_STR(STRINGliteral); }
306 /* " stop highlighting */
307
308 /* common character/string constant */
309<QUOTE,STRING>{escape_seq} { rm_underscore(); strtext->append( yytext, yyleng ); }
310<QUOTE,STRING>"\\"{h_white}*"\n" {} // continuation (ALSO HANDLED BY CPP)
311<QUOTE,STRING>"\\" { strtext->append( yytext, yyleng ); } // unknown escape character
312
313 /* punctuation */
314"@" { ASCIIOP_RETURN(); }
315"[" { ASCIIOP_RETURN(); }
316"]" { ASCIIOP_RETURN(); }
317"(" { ASCIIOP_RETURN(); }
318")" { ASCIIOP_RETURN(); }
319"{" { ASCIIOP_RETURN(); }
320"}" { ASCIIOP_RETURN(); }
321"," { ASCIIOP_RETURN(); } // also operator
322":" { ASCIIOP_RETURN(); }
323";" { ASCIIOP_RETURN(); }
324"." { ASCIIOP_RETURN(); } // also operator
325"..." { NAMEDOP_RETURN(ELLIPSIS); }
326
327 /* alternative C99 brackets, "<:" & "<:<:" handled by preprocessor */
328"<:" { RETURN_VAL('['); }
329":>" { RETURN_VAL(']'); }
330"<%" { RETURN_VAL('{'); }
331"%>" { RETURN_VAL('}'); }
332
333 /* operators */
334"!" { ASCIIOP_RETURN(); }
335"+" { ASCIIOP_RETURN(); }
336"-" { ASCIIOP_RETURN(); }
337"*" { ASCIIOP_RETURN(); }
338"/" { ASCIIOP_RETURN(); }
339"%" { ASCIIOP_RETURN(); }
340"^" { ASCIIOP_RETURN(); }
341"~" { ASCIIOP_RETURN(); }
342"&" { ASCIIOP_RETURN(); }
343"|" { ASCIIOP_RETURN(); }
344"<" { ASCIIOP_RETURN(); }
345">" { ASCIIOP_RETURN(); }
346"=" { ASCIIOP_RETURN(); }
347"?" { ASCIIOP_RETURN(); }
348
349"++" { NAMEDOP_RETURN(ICR); }
350"--" { NAMEDOP_RETURN(DECR); }
351"==" { NAMEDOP_RETURN(EQ); }
352"!=" { NAMEDOP_RETURN(NE); }
353"<<" { NAMEDOP_RETURN(LS); }
354">>" { NAMEDOP_RETURN(RS); }
355"<=" { NAMEDOP_RETURN(LE); }
356">=" { NAMEDOP_RETURN(GE); }
357"&&" { NAMEDOP_RETURN(ANDAND); }
358"||" { NAMEDOP_RETURN(OROR); }
359"->" { NAMEDOP_RETURN(ARROW); }
360"+=" { NAMEDOP_RETURN(PLUSassign); }
361"-=" { NAMEDOP_RETURN(MINUSassign); }
362"*=" { NAMEDOP_RETURN(MULTassign); }
363"/=" { NAMEDOP_RETURN(DIVassign); }
364"%=" { NAMEDOP_RETURN(MODassign); }
365"&=" { NAMEDOP_RETURN(ANDassign); }
366"|=" { NAMEDOP_RETURN(ORassign); }
367"^=" { NAMEDOP_RETURN(ERassign); }
368"<<=" { NAMEDOP_RETURN(LSassign); }
369">>=" { NAMEDOP_RETURN(RSassign); }
370
371"@=" { NAMEDOP_RETURN(ATassign); } // CFA
372
373 /* CFA, operator identifier */
374{op_unary}"?" { IDENTIFIER_RETURN(); } // unary
375"?"({op_unary_pre_post}|"()"|"[?]"|"{}") { IDENTIFIER_RETURN(); }
376"^?{}" { IDENTIFIER_RETURN(); }
377"?"{op_binary_over}"?" { IDENTIFIER_RETURN(); } // binary
378 /*
379 This rule handles ambiguous cases with operator identifiers, e.g., "int *?*?()", where the string "*?*?"
380 can be lexed as "*"/"?*?" or "*?"/"*?". Since it is common practise to put a unary operator juxtaposed
381 to an identifier, e.g., "*i", users will be annoyed if they cannot do this with respect to operator
382 identifiers. Even with this special hack, there are 5 general cases that cannot be handled. The first
383 case is for the function-call identifier "?()":
384
385 int * ?()(); // declaration: space required after '*'
386 * ?()(); // expression: space required after '*'
387
388 Without the space, the string "*?()" is ambiguous without N character look ahead; it requires scanning
389 ahead to determine if there is a '(', which is the start of an argument/parameter list.
390
391 The 4 remaining cases occur in expressions:
392
393 i++?i:0; // space required before '?'
394 i--?i:0; // space required before '?'
395 i?++i:0; // space required after '?'
396 i?--i:0; // space required after '?'
397
398 In the first two cases, the string "i++?" is ambiguous, where this string can be lexed as "i"/"++?" or
399 "i++"/"?"; it requires scanning ahead to determine if there is a '(', which is the start of an argument
400 list. In the second two cases, the string "?++x" is ambiguous, where this string can be lexed as
401 "?++"/"x" or "?"/"++x"; it requires scanning ahead to determine if there is a '(', which is the start of
402 an argument list.
403 */
404{op_unary}"?"({op_unary_pre_post}|"[?]"|{op_binary_over}"?") {
405 // 1 or 2 character unary operator ?
406 int i = yytext[1] == '?' ? 1 : 2;
407 yyless( i ); // put back characters up to first '?'
408 if ( i > 1 ) {
409 NAMEDOP_RETURN( yytext[0] == '+' ? ICR : DECR );
410 } else {
411 ASCIIOP_RETURN();
412 } // if
413}
414
415 /* unknown characters */
416. { printf("unknown character(s):\"%s\" on line %d\n", yytext, yylineno); }
417
418%%
419
420// Local Variables: //
421// mode: c++ //
422// tab-width: 4 //
423// compile-command: "make install" //
424// End: //
Note: See TracBrowser for help on using the repository browser.