source: translator/Parser.old/lex.l@ c8ffe20b

ADT aaron-thesis arm-eh ast-experimental cleanup-dtors ctor deferred_resn demangler enum forall-pointer-decay gc_noraii jacob/cs343-translation jenkins-sandbox memory new-ast new-ast-unique-expr new-env no_list persistent-indexer pthread-emulation qualifiedEnum resolv-new string with_gc
Last change on this file since c8ffe20b was 51b73452, checked in by Peter A. Buhr <pabuhr@…>, 11 years ago

initial commit

  • Property mode set to 100644
File size: 12.4 KB
Line 
1/* -*- Mode: C -*-
2 *
3 * CForall Lexer Version 1.0, Copyright (C) Peter A. Buhr 2001 -- Permission is granted to copy this
4 * grammar and to use it within software systems. THIS GRAMMAR IS PROVIDED "AS IS" AND WITHOUT
5 * ANY EXPRESS OR IMPLIED WARRANTIES.
6 *
7 * lex.l --
8 *
9 * Author : Peter A. Buhr
10 * Created On : Sat Sep 22 08:58:10 2001
11 * Last Modified By : Peter A. Buhr
12 * Last Modified On : Thu Jan 23 16:17:09 2003
13 * Update Count : 191
14 */
15
16%option yylineno
17
18%{
19/* This lexer assumes the program has been preprocessed by cpp. Hence, all user level preprocessor
20 directive have been performed and removed from the source. The only exceptions are preprocessor
21 directives passed to the compiler (e.g., line-number directives) and C/C++ style comments, which
22 are ignored. */
23
24/*************** Includes and Defines *****************************/
25
26#include <string>
27
28#include "ParseNode.h"
29#include "cfa.tab.h" /* YACC generated definitions based on C++ grammar */
30#include "lex.h"
31
32char *yyfilename;
33
34#define WHITE_RETURN(x) /* do nothing */
35#define NEWLINE_RETURN() WHITE_RETURN('\n')
36#define RETURN_VAL(x) yylval.tok.str = new std::string(yytext); yylval.tok.file = yyfilename; yylval.tok.line = yylineno; return(x)
37
38#define KEYWORD_RETURN(x) RETURN_VAL(x) /* keyword */
39#define IDENTIFIER_RETURN() RETURN_VAL((typedefTable.isIdentifier(yytext) ? IDENTIFIER : typedefTable.isTypedef(yytext) ? TYPEDEFname : TYPEGENname))
40
41#define ASCIIOP_RETURN() RETURN_VAL((int)yytext[0]) /* single character operator */
42#define NAMEDOP_RETURN(x) RETURN_VAL(x) /* multichar operator, with a name */
43
44#define NUMERIC_RETURN(x) rm_underscore(); RETURN_VAL(x) /* numeric constant */
45
46void rm_underscore() { /* remove underscores in constant or escape sequence */
47 int j = 0;
48 for ( int i = 0; i < yyleng; i += 1 ) {
49 if ( yytext[i] != '_' ) {
50 yytext[j] = yytext[i];
51 j += 1;
52 } // if
53 } // for
54 yyleng = j;
55 yytext[yyleng] = '\0';
56}
57
58%}
59
60octal [0-7]
61nonzero [1-9]
62decimal [0-9]
63hex [0-9a-fA-F]
64
65 /* identifier, GCC: $ in identifier */
66universal_char "\\"((u{hex_quad})|(U{hex_quad}{2}))
67identifier ([a-zA-Z_$]|{universal_char})([0-9a-zA-Z_$]|{universal_char})*
68
69 /* numeric constants, CFA: '_' in constant */
70hex_quad {hex}{4}
71integer_suffix "_"?(([uU][lL]?)|([uU]("ll"|"LL")?)|([lL][uU]?)|("ll"|"LL")[uU]?)
72
73octal_digits ({octal})|({octal}({octal}|"_")*{octal})
74octal_prefix "0""_"?
75octal_constant (("0")|({octal_prefix}{octal_digits})){integer_suffix}?
76
77nonzero_digits ({nonzero})|({nonzero}({decimal}|"_")*{decimal})
78decimal_constant {nonzero_digits}{integer_suffix}?
79
80hex_digits ({hex})|({hex}({hex}|"_")*{hex})
81hex_prefix "0"[xX]"_"?
82hex_constant {hex_prefix}{hex_digits}{integer_suffix}?
83
84decimal_digits ({decimal})|({decimal}({decimal}|"_")*{decimal})
85fractional_constant ({decimal_digits}?"."{decimal_digits})|({decimal_digits}".")
86exponent "_"?[eE]"_"?[+-]?{decimal_digits}
87floating_suffix "_"?[flFL]
88floating_constant (({fractional_constant}{exponent}?)|({decimal_digits}{exponent})){floating_suffix}?
89
90binary_exponent "_"?[pP]"_"?[+-]?{decimal_digits}
91hex_fractional_constant ({hex_digits}?"."{hex_digits})|({hex_digits}".")
92hex_floating_constant {hex_prefix}(({hex_fractional_constant}{binary_exponent})|({hex_digits}{binary_exponent})){floating_suffix}?
93
94 /* character escape sequence, GCC: \e => esc character */
95simple_escape "\\"[abefnrtv'"?\\]
96octal_escape "\\"{octal}{1,3}
97hex_escape "\\""x"{hex}+
98escape_seq {simple_escape}|{octal_escape}|{hex_escape}|{universal_char}
99
100 /* display/white-space characters */
101h_tab [\011]
102form_feed [\014]
103v_tab [\013]
104c_return [\015]
105h_white [ ]|{h_tab}
106
107 /* operators */
108op_unary_only "~"|"!"
109op_unary_binary "+"|"-"|"*"
110op_unary_pre_post "++"|"--"
111op_unary {op_unary_only}|{op_unary_binary}|{op_unary_pre_post}
112
113op_binary_only "/"|"%"|"^"|"&"|"|"|"<"|">"|"="|"=="|"!="|"<<"|">>"|"<="|">="|"+="|"-="|"*="|"/="|"%="|"&="|"|="|"^="|"<<="|">>="
114op_binary_over {op_unary_binary}|{op_binary_only}
115op_binary_not_over "?"|"->"|"&&"|"||"
116operator {op_unary_pre_post}|{op_binary_over}|{op_binary_not_over}
117
118%x COMMENT
119
120%%
121 /* line directives */
122^{h_white}*"#"{h_white}*[0-9]+{h_white}*["][^"\n]+["][^\n]*"\n" {
123 char *end_num;
124 char *begin_string, *end_string;
125 char *filename;
126 long lineno, length;
127 lineno = strtol( yytext + 1, &end_num, 0 );
128 begin_string = strchr( end_num, '"' );
129 if( begin_string ) {
130 end_string = strchr( begin_string + 1, '"' );
131 if( end_string ) {
132 length = end_string - begin_string - 1;
133 filename = new char[ length + 1 ];
134 memcpy( filename, begin_string + 1, length );
135 filename[ length ] = '\0';
136 //std::cout << "file " << filename << " line " << lineno << std::endl;
137 yylineno = lineno;
138 yyfilename = filename;
139 }
140 }
141}
142
143 /* ignore preprocessor directives (for now) */
144^{h_white}*"#"[^\n]*"\n" ;
145
146 /* ignore C style comments */
147"/*" {BEGIN COMMENT;}
148<COMMENT>.|\n ;
149<COMMENT>"*/" {BEGIN 0;}
150
151 /* ignore C++ style comments */
152"//"[^\n]*"\n" ;
153
154 /* ignore whitespace */
155{h_white}+ {WHITE_RETURN(' ');}
156({v_tab}|{c_return}|{form_feed})+ {WHITE_RETURN(' ');}
157({h_white}|{v_tab}|{c_return}|{form_feed})*"\n" {NEWLINE_RETURN();}
158
159 /* keywords */
160__alignof {KEYWORD_RETURN(ALIGNOF);} /* GCC */
161__alignof__ {KEYWORD_RETURN(ALIGNOF);} /* GCC */
162asm {KEYWORD_RETURN(ASM);}
163__asm {KEYWORD_RETURN(ASM);} /* GCC */
164__asm__ {KEYWORD_RETURN(ASM);} /* GCC */
165__attribute {KEYWORD_RETURN(ATTRIBUTE);} /* GCC */
166__attribute__ {KEYWORD_RETURN(ATTRIBUTE);} /* GCC */
167auto {KEYWORD_RETURN(AUTO);}
168_Bool {KEYWORD_RETURN(BOOL);} /* ANSI99 */
169break {KEYWORD_RETURN(BREAK);}
170case {KEYWORD_RETURN(CASE);}
171catch {KEYWORD_RETURN(CATCH);} /* CFA */
172char {KEYWORD_RETURN(CHAR);}
173choose {KEYWORD_RETURN(CHOOSE);}
174_Complex {KEYWORD_RETURN(COMPLEX);} /* ANSI99 */
175__complex {KEYWORD_RETURN(COMPLEX);} /* GCC */
176__complex__ {KEYWORD_RETURN(COMPLEX);} /* GCC */
177const {KEYWORD_RETURN(CONST);}
178__const {KEYWORD_RETURN(CONST);} /* GCC */
179__const__ {KEYWORD_RETURN(CONST);} /* GCC */
180context {KEYWORD_RETURN(CONTEXT);}
181continue {KEYWORD_RETURN(CONTINUE);}
182default {KEYWORD_RETURN(DEFAULT);}
183do {KEYWORD_RETURN(DO);}
184double {KEYWORD_RETURN(DOUBLE);}
185dtype {KEYWORD_RETURN(DTYPE);}
186else {KEYWORD_RETURN(ELSE);}
187enum {KEYWORD_RETURN(ENUM);}
188__extension__ {KEYWORD_RETURN(EXTENSION);} /* GCC */
189extern {KEYWORD_RETURN(EXTERN);}
190fallthru {KEYWORD_RETURN(FALLTHRU);}
191float {KEYWORD_RETURN(FLOAT);}
192for {KEYWORD_RETURN(FOR);}
193forall {KEYWORD_RETURN(FORALL);}
194fortran {KEYWORD_RETURN(FORTRAN);}
195ftype {KEYWORD_RETURN(FTYPE);}
196goto {KEYWORD_RETURN(GOTO);}
197if {KEYWORD_RETURN(IF);}
198_Imaginary {KEYWORD_RETURN(IMAGINARY);} /* ANSI99 */
199__imag {KEYWORD_RETURN(IMAGINARY);} /* GCC */
200__imag__ {KEYWORD_RETURN(IMAGINARY);} /* GCC */
201inline {KEYWORD_RETURN(INLINE);} /* ANSI99 */
202__inline {KEYWORD_RETURN(INLINE);} /* GCC */
203__inline__ {KEYWORD_RETURN(INLINE);} /* GCC */
204int {KEYWORD_RETURN(INT);}
205__label__ {KEYWORD_RETURN(LABEL);} /* GCC */
206long {KEYWORD_RETURN(LONG);}
207lvalue {KEYWORD_RETURN(LVALUE);}
208register {KEYWORD_RETURN(REGISTER);}
209restrict {KEYWORD_RETURN(RESTRICT);} /* ANSI99 */
210__restrict {KEYWORD_RETURN(RESTRICT);} /* GCC */
211__restrict__ {KEYWORD_RETURN(RESTRICT);} /* GCC */
212return {KEYWORD_RETURN(RETURN);}
213short {KEYWORD_RETURN(SHORT);}
214signed {KEYWORD_RETURN(SIGNED);}
215__signed {KEYWORD_RETURN(SIGNED);} /* GCC */
216__signed__ {KEYWORD_RETURN(SIGNED);} /* GCC */
217sizeof {KEYWORD_RETURN(SIZEOF);}
218static {KEYWORD_RETURN(STATIC);}
219struct {KEYWORD_RETURN(STRUCT);}
220switch {KEYWORD_RETURN(SWITCH);}
221throw {KEYWORD_RETURN(THROW);} /* CFA */
222try {KEYWORD_RETURN(TRY);} /* CFA */
223type {KEYWORD_RETURN(TYPE);}
224typedef {KEYWORD_RETURN(TYPEDEF);}
225typeof {KEYWORD_RETURN(TYPEOF);} /* GCC */
226__typeof {KEYWORD_RETURN(TYPEOF);} /* GCC */
227__typeof__ {KEYWORD_RETURN(TYPEOF);} /* GCC */
228union {KEYWORD_RETURN(UNION);}
229unsigned {KEYWORD_RETURN(UNSIGNED);}
230void {KEYWORD_RETURN(VOID);}
231volatile {KEYWORD_RETURN(VOLATILE);}
232__volatile {KEYWORD_RETURN(VOLATILE);} /* GCC */
233__volatile__ {KEYWORD_RETURN(VOLATILE);} /* GCC */
234while {KEYWORD_RETURN(WHILE);}
235
236 /* identifier */
237{identifier} {IDENTIFIER_RETURN();}
238
239 /* numeric constants */
240"0" {NUMERIC_RETURN(ZERO);} /* CFA */
241"1" {NUMERIC_RETURN(ONE);} /* CFA */
242{decimal_constant} {NUMERIC_RETURN(INTEGERconstant);}
243{octal_constant} {NUMERIC_RETURN(INTEGERconstant);}
244{hex_constant} {NUMERIC_RETURN(INTEGERconstant);}
245{floating_constant} {NUMERIC_RETURN(FLOATINGconstant);}
246{hex_floating_constant} {NUMERIC_RETURN(FLOATINGconstant);}
247
248 /* character constant, allows empty value */
249"L"?[']([^'\\\n]|{escape_seq})*['] {RETURN_VAL(CHARACTERconstant);}
250
251 /* string constant */
252"L"?["]([^"\\\n]|{escape_seq})*["] {RETURN_VAL(STRINGliteral);}
253
254 /* punctuation */
255"[" {ASCIIOP_RETURN();}
256"]" {ASCIIOP_RETURN();}
257"(" {ASCIIOP_RETURN();}
258")" {ASCIIOP_RETURN();}
259"{" {ASCIIOP_RETURN();}
260"}" {ASCIIOP_RETURN();}
261"," {ASCIIOP_RETURN();} /* also operator */
262":" {ASCIIOP_RETURN();}
263";" {ASCIIOP_RETURN();}
264"." {ASCIIOP_RETURN();} /* also operator */
265"..." {NAMEDOP_RETURN(ELLIPSIS);}
266
267 /* alternative ANSI99 brackets, "<:" & "<:<:" handled by preprocessor */
268"<:" {RETURN_VAL('[');}
269":>" {RETURN_VAL(']');}
270"<%" {RETURN_VAL('{');}
271"%>" {RETURN_VAL('}');}
272
273 /* operators */
274"!" {ASCIIOP_RETURN();}
275"+" {ASCIIOP_RETURN();}
276"-" {ASCIIOP_RETURN();}
277"*" {ASCIIOP_RETURN();}
278"/" {ASCIIOP_RETURN();}
279"%" {ASCIIOP_RETURN();}
280"^" {ASCIIOP_RETURN();}
281"~" {ASCIIOP_RETURN();}
282"&" {ASCIIOP_RETURN();}
283"|" {ASCIIOP_RETURN();}
284"<" {ASCIIOP_RETURN();}
285">" {ASCIIOP_RETURN();}
286"=" {ASCIIOP_RETURN();}
287"?" {ASCIIOP_RETURN();}
288
289"++" {NAMEDOP_RETURN(ICR);}
290"--" {NAMEDOP_RETURN(DECR);}
291"==" {NAMEDOP_RETURN(EQ);}
292"!=" {NAMEDOP_RETURN(NE);}
293"<<" {NAMEDOP_RETURN(LS);}
294">>" {NAMEDOP_RETURN(RS);}
295"<=" {NAMEDOP_RETURN(LE);}
296">=" {NAMEDOP_RETURN(GE);}
297"&&" {NAMEDOP_RETURN(ANDAND);}
298"||" {NAMEDOP_RETURN(OROR);}
299"->" {NAMEDOP_RETURN(ARROW);}
300"+=" {NAMEDOP_RETURN(PLUSassign);}
301"-=" {NAMEDOP_RETURN(MINUSassign);}
302"*=" {NAMEDOP_RETURN(MULTassign);}
303"/=" {NAMEDOP_RETURN(DIVassign);}
304"%=" {NAMEDOP_RETURN(MODassign);}
305"&=" {NAMEDOP_RETURN(ANDassign);}
306"|=" {NAMEDOP_RETURN(ORassign);}
307"^=" {NAMEDOP_RETURN(ERassign);}
308"<<=" {NAMEDOP_RETURN(LSassign);}
309">>=" {NAMEDOP_RETURN(RSassign);}
310
311 /* CFA, operator identifier */
312{op_unary}"?" {IDENTIFIER_RETURN();} /* unary */
313"?"({op_unary_pre_post}|"()"|"[?]") {IDENTIFIER_RETURN();}
314"?"{op_binary_over}"?" {IDENTIFIER_RETURN();} /* binary */
315 /*
316 This rule handles ambiguous cases with operator identifiers, e.g., "int *?*?()", where the
317 string "*?*?" can be lexed as "*"/"?*?" or "*?"/"*?". Since it is common practise to put
318 a unary operator juxtaposed to an identifier, e.g., "*i", users will be annoyed if they
319 cannot do this with respect to operator identifiers. Even with this special hack, there
320 are 5 general cases that cannot be handled. The first case is for the function-call
321 identifier "?()":
322
323 int * ?()(); // declaration: space required after '*'
324 * ?()(); // expression: space required after '*'
325
326 Without the space, the string "*?()" is ambiguous without N character look ahead; it
327 requires scanning ahead to determine if there is a '(', which is the start of an
328 argument/parameter list.
329
330 The 4 remaining cases occur in expressions:
331
332 i++?i:0; // space required before '?'
333 i--?i:0; // space required before '?'
334 i?++i:0; // space required after '?'
335 i?--i:0; // space required after '?'
336
337 In the first two cases, the string "i++?" is ambiguous, where this string can be lexed as
338 "i"/"++?" or "i++"/"?"; it requires scanning ahead to determine if there is a '(', which
339 is the start of an argument list. In the second two cases, the string "?++x" is
340 ambiguous, where this string can be lexed as "?++"/"x" or "?"/"++x"; it requires scanning
341 ahead to determine if there is a '(', which is the start of an argument list.
342 */
343{op_unary}"?"(({op_unary_pre_post}|"[?]")|({op_binary_over}"?")) {
344 // 1 or 2 character unary operator ?
345 int i = yytext[1] == '?' ? 1 : 2;
346 yyless( i ); /* put back characters up to first '?' */
347 if ( i > 1 ) {
348 NAMEDOP_RETURN( yytext[0] == '+' ? ICR : DECR );
349 } else {
350 ASCIIOP_RETURN();
351 } // if
352 }
353
354 /* unknown characters */
355. {printf("unknown character(s):\"%s\" on line %d\n", yytext, yylineno);}
356
357%%
358
359
360/* Local Variables: */
361/* fill-column: 100 */
362/* compile-command: "gmake" */
363/* End: */
Note: See TracBrowser for help on using the repository browser.