source: translator/Parser/lex.l@ 643a2e1

ADT aaron-thesis arm-eh ast-experimental cleanup-dtors ctor deferred_resn demangler enum forall-pointer-decay gc_noraii jacob/cs343-translation jenkins-sandbox memory new-ast new-ast-unique-expr new-env no_list persistent-indexer pthread-emulation qualifiedEnum resolv-new string with_gc
Last change on this file since 643a2e1 was 2c2242c, checked in by Peter A. Buhr <pabuhr@…>, 11 years ago

deal with newer built

  • Property mode set to 100644
File size: 14.6 KB
Line 
1/* -*- Mode: C -*-
2 *
3 * CForall Lexer Version 1.0, Copyright (C) Peter A. Buhr 2001 -- Permission is granted to copy this
4 * grammar and to use it within software systems. THIS GRAMMAR IS PROVIDED "AS IS" AND WITHOUT
5 * ANY EXPRESS OR IMPLIED WARRANTIES.
6 *
7 * lex.l --
8 *
9 * Author : Peter A. Buhr
10 * Created On : Sat Sep 22 08:58:10 2001
11 * Last Modified By : Peter A. Buhr
12 * Last Modified On : Wed Jan 21 08:43:59 2015
13 * Update Count : 320
14 */
15
16%option yylineno
17
18%{
19// This lexer assumes the program has been preprocessed by cpp. Hence, all user level preprocessor
20// directive have been performed and removed from the source. The only exceptions are preprocessor
21// directives passed to the compiler (e.g., line-number directives) and C/C++ style comments, which
22// are ignored.
23
24//**************************** Includes and Defines ****************************
25
26#include <string>
27
28#include "lex.h"
29#include "ParseNode.h"
30#include "cfa.tab.h" // YACC generated definitions based on C++ grammar
31
32char *yyfilename;
33std::string *strtext; // accumulate parts of character and string constant value
34
35#define WHITE_RETURN(x) // do nothing
36#define NEWLINE_RETURN() WHITE_RETURN('\n')
37#define RETURN_VAL(x) yylval.tok.str = new std::string(yytext); \
38 yylval.tok.loc.file = yyfilename; \
39 yylval.tok.loc.line = yylineno; \
40 return(x)
41#define RETURN_STR(x) yylval.tok.str = strtext; \
42 yylval.tok.loc.file = yyfilename; \
43 yylval.tok.loc.line = yylineno; \
44 return(x)
45
46#define KEYWORD_RETURN(x) RETURN_VAL(x) // keyword
47#define IDENTIFIER_RETURN() RETURN_VAL((typedefTable.isIdentifier(yytext) ? IDENTIFIER : typedefTable.isTypedef(yytext) ? TYPEDEFname : TYPEGENname))
48//#define ATTRIBUTE_RETURN() RETURN_VAL((typedefTable.isIdentifier(yytext) ? ATTR_IDENTIFIER : typedefTable.isTypedef(yytext) ? ATTR_TYPEDEFname : ATTR_TYPEGENname))
49#define ATTRIBUTE_RETURN() RETURN_VAL(ATTR_IDENTIFIER)
50
51#define ASCIIOP_RETURN() RETURN_VAL((int)yytext[0]) // single character operator
52#define NAMEDOP_RETURN(x) RETURN_VAL(x) // multichar operator, with a name
53
54#define NUMERIC_RETURN(x) rm_underscore(); RETURN_VAL(x) // numeric constant
55
56void rm_underscore() {
57 // remove underscores in numeric constant
58 int j = 0;
59 for ( int i = 0; yytext[i] != '\0'; i += 1 ) {
60 if ( yytext[i] != '_' ) {
61 yytext[j] = yytext[i];
62 j += 1;
63 } // if
64 } // for
65 yyleng = j;
66 yytext[yyleng] = '\0';
67}
68
69%}
70
71octal [0-7]
72nonzero [1-9]
73decimal [0-9]
74hex [0-9a-fA-F]
75universal_char "\\"((u"_"?{hex_quad})|(U"_"?{hex_quad}{2}))
76
77 // identifier, GCC: $ in identifier
78identifier ([a-zA-Z_$]|{universal_char})([0-9a-zA-Z_$]|{universal_char})*
79
80 // quoted identifier
81quoted_identifier "`"{identifier}"`"
82
83 // attribute identifier, GCC: $ in identifier
84attr_identifier "@"{identifier}
85
86 // numeric constants, CFA: '_' in constant
87hex_quad {hex}("_"?{hex}){3}
88integer_suffix "_"?(([uU][lL]?)|([uU]("ll"|"LL")?)|([lL][uU]?)|("ll"|"LL")[uU]?)
89
90octal_digits ({octal})|({octal}({octal}|"_")*{octal})
91octal_prefix "0""_"?
92octal_constant (("0")|({octal_prefix}{octal_digits})){integer_suffix}?
93
94nonzero_digits ({nonzero})|({nonzero}({decimal}|"_")*{decimal})
95decimal_constant {nonzero_digits}{integer_suffix}?
96
97hex_digits ({hex})|({hex}({hex}|"_")*{hex})
98hex_prefix "0"[xX]"_"?
99hex_constant {hex_prefix}{hex_digits}{integer_suffix}?
100
101decimal_digits ({decimal})|({decimal}({decimal}|"_")*{decimal})
102fractional_constant ({decimal_digits}?"."{decimal_digits})|({decimal_digits}".")
103exponent "_"?[eE]"_"?[+-]?{decimal_digits}
104floating_suffix "_"?[flFL]
105floating_constant (({fractional_constant}{exponent}?)|({decimal_digits}{exponent})){floating_suffix}?
106
107binary_exponent "_"?[pP]"_"?[+-]?{decimal_digits}
108hex_fractional_constant ({hex_digits}?"."{hex_digits})|({hex_digits}".")
109hex_floating_constant {hex_prefix}(({hex_fractional_constant}{binary_exponent})|({hex_digits}{binary_exponent})){floating_suffix}?
110
111 // character escape sequence, GCC: \e => esc character
112simple_escape "\\"[abefnrtv'"?\\]
113 // ' stop highlighting
114octal_escape "\\"{octal}("_"?{octal}){0,2}
115hex_escape "\\""x""_"?{hex_digits}
116escape_seq {simple_escape}|{octal_escape}|{hex_escape}|{universal_char}
117
118 // display/white-space characters
119h_tab [\011]
120form_feed [\014]
121v_tab [\013]
122c_return [\015]
123h_white [ ]|{h_tab}
124
125 // operators
126op_unary_only "~"|"!"
127op_unary_binary "+"|"-"|"*"
128op_unary_pre_post "++"|"--"
129op_unary {op_unary_only}|{op_unary_binary}|{op_unary_pre_post}
130
131op_binary_only "/"|"%"|"^"|"&"|"|"|"<"|">"|"="|"=="|"!="|"<<"|">>"|"<="|">="|"+="|"-="|"*="|"/="|"%="|"&="|"|="|"^="|"<<="|">>="
132op_binary_over {op_unary_binary}|{op_binary_only}
133op_binary_not_over "?"|"->"|"&&"|"||"
134operator {op_unary_pre_post}|{op_binary_over}|{op_binary_not_over}
135
136%x COMMENT
137%x BKQUOTE
138%x QUOTE
139%x STRING
140
141%%
142 /* line directives */
143^{h_white}*"#"{h_white}*[0-9]+{h_white}*["][^"\n]+["][^\n]*"\n" {
144 /* " stop highlighting */
145 char *end_num;
146 char *begin_string, *end_string;
147 char *filename;
148 long lineno, length;
149 lineno = strtol( yytext + 1, &end_num, 0 );
150 begin_string = strchr( end_num, '"' );
151 if ( begin_string ) {
152 end_string = strchr( begin_string + 1, '"' );
153 if ( end_string ) {
154 length = end_string - begin_string - 1;
155 filename = new char[ length + 1 ];
156 memcpy( filename, begin_string + 1, length );
157 filename[ length ] = '\0';
158 //std::cout << "file " << filename << " line " << lineno << std::endl;
159 yylineno = lineno;
160 yyfilename = filename;
161 }
162 }
163}
164
165 /* ignore preprocessor directives (for now) */
166^{h_white}*"#"[^\n]*"\n" ;
167
168 /* ignore C style comments */
169"/*" { BEGIN COMMENT; }
170<COMMENT>.|\n ;
171<COMMENT>"*/" { BEGIN 0; }
172
173 /* ignore C++ style comments */
174"//"[^\n]*"\n" ;
175
176 /* ignore whitespace */
177{h_white}+ { WHITE_RETURN(' '); }
178({v_tab}|{c_return}|{form_feed})+ { WHITE_RETURN(' '); }
179({h_white}|{v_tab}|{c_return}|{form_feed})*"\n" { NEWLINE_RETURN(); }
180
181 /* keywords */
182_Alignas { KEYWORD_RETURN(ALIGNAS); } // C11
183_Alignof { KEYWORD_RETURN(ALIGNOF); } // C11
184__alignof { KEYWORD_RETURN(ALIGNOF); } // GCC
185__alignof__ { KEYWORD_RETURN(ALIGNOF); } // GCC
186asm { KEYWORD_RETURN(ASM); }
187__asm { KEYWORD_RETURN(ASM); } // GCC
188__asm__ { KEYWORD_RETURN(ASM); } // GCC
189_Atomic { KEYWORD_RETURN(ATOMIC); } // C11
190__attribute { KEYWORD_RETURN(ATTRIBUTE); } // GCC
191__attribute__ { KEYWORD_RETURN(ATTRIBUTE); } // GCC
192auto { KEYWORD_RETURN(AUTO); }
193_Bool { KEYWORD_RETURN(BOOL); } // C99
194break { KEYWORD_RETURN(BREAK); }
195case { KEYWORD_RETURN(CASE); }
196catch { KEYWORD_RETURN(CATCH); } // CFA
197char { KEYWORD_RETURN(CHAR); }
198choose { KEYWORD_RETURN(CHOOSE); } // CFA
199_Complex { KEYWORD_RETURN(COMPLEX); } // C99
200__complex { KEYWORD_RETURN(COMPLEX); } // GCC
201__complex__ { KEYWORD_RETURN(COMPLEX); } // GCC
202const { KEYWORD_RETURN(CONST); }
203__const { KEYWORD_RETURN(CONST); } // GCC
204__const__ { KEYWORD_RETURN(CONST); } // GCC
205context { KEYWORD_RETURN(CONTEXT); } // CFA
206continue { KEYWORD_RETURN(CONTINUE); }
207default { KEYWORD_RETURN(DEFAULT); }
208do { KEYWORD_RETURN(DO); }
209double { KEYWORD_RETURN(DOUBLE); }
210dtype { KEYWORD_RETURN(DTYPE); } // CFA
211else { KEYWORD_RETURN(ELSE); }
212enum { KEYWORD_RETURN(ENUM); }
213__extension__ { KEYWORD_RETURN(EXTENSION); } // GCC
214extern { KEYWORD_RETURN(EXTERN); }
215fallthru { KEYWORD_RETURN(FALLTHRU); } // CFA
216finally { KEYWORD_RETURN(FINALLY); } // CFA
217float { KEYWORD_RETURN(FLOAT); }
218__float128 { KEYWORD_RETURN(FLOAT); } // GCC
219for { KEYWORD_RETURN(FOR); }
220forall { KEYWORD_RETURN(FORALL); } // CFA
221fortran { KEYWORD_RETURN(FORTRAN); }
222ftype { KEYWORD_RETURN(FTYPE); } // CFA
223_Generic { KEYWORD_RETURN(GENERIC); } // C11
224goto { KEYWORD_RETURN(GOTO); }
225if { KEYWORD_RETURN(IF); }
226_Imaginary { KEYWORD_RETURN(IMAGINARY); } // C99
227__imag { KEYWORD_RETURN(IMAGINARY); } // GCC
228__imag__ { KEYWORD_RETURN(IMAGINARY); } // GCC
229inline { KEYWORD_RETURN(INLINE); } // C99
230__inline { KEYWORD_RETURN(INLINE); } // GCC
231__inline__ { KEYWORD_RETURN(INLINE); } // GCC
232int { KEYWORD_RETURN(INT); }
233__int128 { KEYWORD_RETURN(INT); } // GCC
234__label__ { KEYWORD_RETURN(LABEL); } // GCC
235long { KEYWORD_RETURN(LONG); }
236lvalue { KEYWORD_RETURN(LVALUE); } // CFA
237_Noreturn { KEYWORD_RETURN(NORETURN); } // C11
238register { KEYWORD_RETURN(REGISTER); }
239restrict { KEYWORD_RETURN(RESTRICT); } // C99
240__restrict { KEYWORD_RETURN(RESTRICT); } // GCC
241__restrict__ { KEYWORD_RETURN(RESTRICT); } // GCC
242return { KEYWORD_RETURN(RETURN); }
243short { KEYWORD_RETURN(SHORT); }
244signed { KEYWORD_RETURN(SIGNED); }
245__signed { KEYWORD_RETURN(SIGNED); } // GCC
246__signed__ { KEYWORD_RETURN(SIGNED); } // GCC
247sizeof { KEYWORD_RETURN(SIZEOF); }
248static { KEYWORD_RETURN(STATIC); }
249_Static_assert { KEYWORD_RETURN(STATICASSERT); } // C11
250struct { KEYWORD_RETURN(STRUCT); }
251switch { KEYWORD_RETURN(SWITCH); }
252_Thread_local { KEYWORD_RETURN(THREADLOCAL); } // C11
253throw { KEYWORD_RETURN(THROW); } // CFA
254try { KEYWORD_RETURN(TRY); } // CFA
255type { KEYWORD_RETURN(TYPE); } // CFA
256typedef { KEYWORD_RETURN(TYPEDEF); }
257typeof { KEYWORD_RETURN(TYPEOF); } // GCC
258__typeof { KEYWORD_RETURN(TYPEOF); } // GCC
259__typeof__ { KEYWORD_RETURN(TYPEOF); } // GCC
260union { KEYWORD_RETURN(UNION); }
261unsigned { KEYWORD_RETURN(UNSIGNED); }
262void { KEYWORD_RETURN(VOID); }
263volatile { KEYWORD_RETURN(VOLATILE); }
264__volatile { KEYWORD_RETURN(VOLATILE); } // GCC
265__volatile__ { KEYWORD_RETURN(VOLATILE); } // GCC
266while { KEYWORD_RETURN(WHILE); }
267
268 /* identifier */
269{identifier} { IDENTIFIER_RETURN(); }
270{attr_identifier} { ATTRIBUTE_RETURN(); }
271"`" { BEGIN BKQUOTE; }
272<BKQUOTE>{identifier} { IDENTIFIER_RETURN(); }
273<BKQUOTE>"`" { BEGIN 0; }
274
275 /* numeric constants */
276"0" { NUMERIC_RETURN(ZERO); } // CFA
277"1" { NUMERIC_RETURN(ONE); } // CFA
278{decimal_constant} { NUMERIC_RETURN(INTEGERconstant); }
279{octal_constant} { NUMERIC_RETURN(INTEGERconstant); }
280{hex_constant} { NUMERIC_RETURN(INTEGERconstant); }
281{floating_constant} { NUMERIC_RETURN(FLOATINGconstant); }
282{hex_floating_constant} { NUMERIC_RETURN(FLOATINGconstant); }
283
284 /* character constant, allows empty value */
285"L"?"_"?['] { BEGIN QUOTE; rm_underscore(); strtext = new std::string; *strtext += std::string( yytext ); }
286<QUOTE>[^'\\\n]* { *strtext += std::string( yytext ); }
287<QUOTE>['\n] { BEGIN 0; *strtext += std::string( yytext); RETURN_STR(CHARACTERconstant); }
288 /* ' stop highlighting */
289
290 /* string constant */
291"L"?"_"?["] { BEGIN STRING; rm_underscore(); strtext = new std::string; *strtext += std::string( yytext ); }
292<STRING>[^"\\\n]* { *strtext += std::string( yytext ); }
293<STRING>["\n] { BEGIN 0; *strtext += std::string( yytext); RETURN_STR(STRINGliteral); }
294 /* " stop highlighting */
295
296<QUOTE,STRING>{escape_seq} { rm_underscore(); *strtext += std::string( yytext ); }
297<QUOTE,STRING>[\\] { *strtext += std::string( yytext ); } // unknown escape character
298
299 /* punctuation */
300"[" { ASCIIOP_RETURN(); }
301"]" { ASCIIOP_RETURN(); }
302"(" { ASCIIOP_RETURN(); }
303")" { ASCIIOP_RETURN(); }
304"{" { ASCIIOP_RETURN(); }
305"}" { ASCIIOP_RETURN(); }
306"," { ASCIIOP_RETURN(); } // also operator
307":" { ASCIIOP_RETURN(); }
308";" { ASCIIOP_RETURN(); }
309"." { ASCIIOP_RETURN(); } // also operator
310"..." { NAMEDOP_RETURN(ELLIPSIS); }
311
312 /* alternative C99 brackets, "<:" & "<:<:" handled by preprocessor */
313"<:" { RETURN_VAL('['); }
314":>" { RETURN_VAL(']'); }
315"<%" { RETURN_VAL('{'); }
316"%>" { RETURN_VAL('}'); }
317
318 /* operators */
319"!" { ASCIIOP_RETURN(); }
320"+" { ASCIIOP_RETURN(); }
321"-" { ASCIIOP_RETURN(); }
322"*" { ASCIIOP_RETURN(); }
323"/" { ASCIIOP_RETURN(); }
324"%" { ASCIIOP_RETURN(); }
325"^" { ASCIIOP_RETURN(); }
326"~" { ASCIIOP_RETURN(); }
327"&" { ASCIIOP_RETURN(); }
328"|" { ASCIIOP_RETURN(); }
329"<" { ASCIIOP_RETURN(); }
330">" { ASCIIOP_RETURN(); }
331"=" { ASCIIOP_RETURN(); }
332"?" { ASCIIOP_RETURN(); }
333
334"++" { NAMEDOP_RETURN(ICR); }
335"--" { NAMEDOP_RETURN(DECR); }
336"==" { NAMEDOP_RETURN(EQ); }
337"!=" { NAMEDOP_RETURN(NE); }
338"<<" { NAMEDOP_RETURN(LS); }
339">>" { NAMEDOP_RETURN(RS); }
340"<=" { NAMEDOP_RETURN(LE); }
341">=" { NAMEDOP_RETURN(GE); }
342"&&" { NAMEDOP_RETURN(ANDAND); }
343"||" { NAMEDOP_RETURN(OROR); }
344"->" { NAMEDOP_RETURN(ARROW); }
345"+=" { NAMEDOP_RETURN(PLUSassign); }
346"-=" { NAMEDOP_RETURN(MINUSassign); }
347"*=" { NAMEDOP_RETURN(MULTassign); }
348"/=" { NAMEDOP_RETURN(DIVassign); }
349"%=" { NAMEDOP_RETURN(MODassign); }
350"&=" { NAMEDOP_RETURN(ANDassign); }
351"|=" { NAMEDOP_RETURN(ORassign); }
352"^=" { NAMEDOP_RETURN(ERassign); }
353"<<=" { NAMEDOP_RETURN(LSassign); }
354">>=" { NAMEDOP_RETURN(RSassign); }
355
356 /* CFA, operator identifier */
357{op_unary}"?" { IDENTIFIER_RETURN(); } // unary
358"?"({op_unary_pre_post}|"()"|"[?]") { IDENTIFIER_RETURN(); }
359"?"{op_binary_over}"?" { IDENTIFIER_RETURN(); } // binary
360 /*
361 This rule handles ambiguous cases with operator identifiers, e.g., "int *?*?()", where the
362 string "*?*?" can be lexed as "*"/"?*?" or "*?"/"*?". Since it is common practise to put
363 a unary operator juxtaposed to an identifier, e.g., "*i", users will be annoyed if they
364 cannot do this with respect to operator identifiers. Even with this special hack, there
365 are 5 general cases that cannot be handled. The first case is for the function-call
366 identifier "?()":
367
368 int * ?()(); // declaration: space required after '*'
369 * ?()(); // expression: space required after '*'
370
371 Without the space, the string "*?()" is ambiguous without N character look ahead; it
372 requires scanning ahead to determine if there is a '(', which is the start of an
373 argument/parameter list.
374
375 The 4 remaining cases occur in expressions:
376
377 i++?i:0; // space required before '?'
378 i--?i:0; // space required before '?'
379 i?++i:0; // space required after '?'
380 i?--i:0; // space required after '?'
381
382 In the first two cases, the string "i++?" is ambiguous, where this string can be lexed as
383 "i"/"++?" or "i++"/"?"; it requires scanning ahead to determine if there is a '(', which
384 is the start of an argument list. In the second two cases, the string "?++x" is
385 ambiguous, where this string can be lexed as "?++"/"x" or "?"/"++x"; it requires scanning
386 ahead to determine if there is a '(', which is the start of an argument list.
387 */
388{op_unary}"?"(({op_unary_pre_post}|"[?]")|({op_binary_over}"?")) {
389 // 1 or 2 character unary operator ?
390 int i = yytext[1] == '?' ? 1 : 2;
391 yyless( i ); // put back characters up to first '?'
392 if ( i > 1 ) {
393 NAMEDOP_RETURN( yytext[0] == '+' ? ICR : DECR );
394 } else {
395 ASCIIOP_RETURN();
396 } // if
397 }
398
399 /* unknown characters */
400. { printf("unknown character(s):\"%s\" on line %d\n", yytext, yylineno); }
401
402%%
403
404
405// Local Variables:
406// fill-column: 100
407// compile-command: "make"
408// End:
Note: See TracBrowser for help on using the repository browser.