source: translator/Parser.old/lex.l @ 51b7345

ADTaaron-thesisarm-ehast-experimentalcleanup-dtorsctordeferred_resndemanglerenumforall-pointer-decaygc_noraiijacob/cs343-translationjenkins-sandboxmemorynew-astnew-ast-unique-exprnew-envno_listpersistent-indexerpthread-emulationqualifiedEnumresolv-newstringwith_gc
Last change on this file since 51b7345 was 51b7345, checked in by Peter A. Buhr <pabuhr@…>, 10 years ago

initial commit

  • Property mode set to 100644
File size: 12.4 KB
Line 
1/*                               -*- Mode: C -*-
2 *
3 * CForall Lexer Version 1.0, Copyright (C) Peter A. Buhr 2001 -- Permission is granted to copy this
4 *      grammar and to use it within software systems.  THIS GRAMMAR IS PROVIDED "AS IS" AND WITHOUT
5 *      ANY EXPRESS OR IMPLIED WARRANTIES.
6 *
7 * lex.l --
8 *
9 * Author           : Peter A. Buhr
10 * Created On       : Sat Sep 22 08:58:10 2001
11 * Last Modified By : Peter A. Buhr
12 * Last Modified On : Thu Jan 23 16:17:09 2003
13 * Update Count     : 191
14 */
15
16%option yylineno
17
18%{
19/* This lexer assumes the program has been preprocessed by cpp. Hence, all user level preprocessor
20   directive have been performed and removed from the source. The only exceptions are preprocessor
21   directives passed to the compiler (e.g., line-number directives) and C/C++ style comments, which
22   are ignored. */
23
24/*************** Includes and Defines *****************************/
25
26#include <string>
27
28#include "ParseNode.h"
29#include "cfa.tab.h" /* YACC generated definitions based on C++ grammar */
30#include "lex.h"
31
32char *yyfilename;
33
34#define WHITE_RETURN(x)         /* do nothing */
35#define NEWLINE_RETURN()        WHITE_RETURN('\n')
36#define RETURN_VAL(x)           yylval.tok.str = new std::string(yytext); yylval.tok.file = yyfilename; yylval.tok.line = yylineno; return(x)
37
38#define KEYWORD_RETURN(x)       RETURN_VAL(x)           /* keyword */
39#define IDENTIFIER_RETURN()     RETURN_VAL((typedefTable.isIdentifier(yytext) ? IDENTIFIER : typedefTable.isTypedef(yytext) ? TYPEDEFname : TYPEGENname))
40
41#define ASCIIOP_RETURN()        RETURN_VAL((int)yytext[0]) /* single character operator */
42#define NAMEDOP_RETURN(x)       RETURN_VAL(x)           /* multichar operator, with a name */
43
44#define NUMERIC_RETURN(x)       rm_underscore(); RETURN_VAL(x) /* numeric constant */
45
46void rm_underscore() {                                  /* remove underscores in constant or escape sequence */
47    int j = 0;
48    for ( int i = 0; i < yyleng; i += 1 ) {
49        if ( yytext[i] != '_' ) {
50            yytext[j] = yytext[i];
51            j += 1;
52        } // if
53    } // for
54    yyleng = j;
55    yytext[yyleng] = '\0';
56}
57
58%}
59
60octal [0-7]
61nonzero [1-9]
62decimal [0-9]
63hex [0-9a-fA-F]
64
65        /* identifier, GCC: $ in identifier */
66universal_char "\\"((u{hex_quad})|(U{hex_quad}{2}))
67identifier ([a-zA-Z_$]|{universal_char})([0-9a-zA-Z_$]|{universal_char})*
68
69        /*  numeric constants, CFA: '_' in constant */
70hex_quad {hex}{4}
71integer_suffix "_"?(([uU][lL]?)|([uU]("ll"|"LL")?)|([lL][uU]?)|("ll"|"LL")[uU]?)
72
73octal_digits ({octal})|({octal}({octal}|"_")*{octal})
74octal_prefix "0""_"?
75octal_constant (("0")|({octal_prefix}{octal_digits})){integer_suffix}?
76
77nonzero_digits ({nonzero})|({nonzero}({decimal}|"_")*{decimal})
78decimal_constant {nonzero_digits}{integer_suffix}?
79
80hex_digits ({hex})|({hex}({hex}|"_")*{hex})
81hex_prefix "0"[xX]"_"?
82hex_constant {hex_prefix}{hex_digits}{integer_suffix}?
83
84decimal_digits ({decimal})|({decimal}({decimal}|"_")*{decimal})
85fractional_constant ({decimal_digits}?"."{decimal_digits})|({decimal_digits}".")
86exponent "_"?[eE]"_"?[+-]?{decimal_digits}
87floating_suffix "_"?[flFL]
88floating_constant (({fractional_constant}{exponent}?)|({decimal_digits}{exponent})){floating_suffix}?
89
90binary_exponent "_"?[pP]"_"?[+-]?{decimal_digits}
91hex_fractional_constant ({hex_digits}?"."{hex_digits})|({hex_digits}".")
92hex_floating_constant {hex_prefix}(({hex_fractional_constant}{binary_exponent})|({hex_digits}{binary_exponent})){floating_suffix}?
93
94        /* character escape sequence, GCC: \e => esc character */
95simple_escape "\\"[abefnrtv'"?\\]
96octal_escape "\\"{octal}{1,3}
97hex_escape "\\""x"{hex}+
98escape_seq {simple_escape}|{octal_escape}|{hex_escape}|{universal_char}
99
100        /* display/white-space characters */
101h_tab [\011]
102form_feed [\014]
103v_tab [\013]
104c_return [\015]
105h_white [ ]|{h_tab}
106
107        /* operators */
108op_unary_only "~"|"!"
109op_unary_binary "+"|"-"|"*"
110op_unary_pre_post "++"|"--"
111op_unary {op_unary_only}|{op_unary_binary}|{op_unary_pre_post}
112
113op_binary_only "/"|"%"|"^"|"&"|"|"|"<"|">"|"="|"=="|"!="|"<<"|">>"|"<="|">="|"+="|"-="|"*="|"/="|"%="|"&="|"|="|"^="|"<<="|">>="
114op_binary_over {op_unary_binary}|{op_binary_only}
115op_binary_not_over "?"|"->"|"&&"|"||"
116operator {op_unary_pre_post}|{op_binary_over}|{op_binary_not_over}
117
118%x COMMENT
119
120%%
121        /* line directives */
122^{h_white}*"#"{h_white}*[0-9]+{h_white}*["][^"\n]+["][^\n]*"\n" {
123        char *end_num;
124        char *begin_string, *end_string;
125        char *filename;
126        long lineno, length;
127        lineno = strtol( yytext + 1, &end_num, 0 );
128        begin_string = strchr( end_num, '"' );
129        if( begin_string ) {
130          end_string = strchr( begin_string + 1, '"' );
131          if( end_string ) {
132            length = end_string - begin_string - 1;
133            filename = new char[ length + 1 ];
134            memcpy( filename, begin_string + 1, length );
135            filename[ length ] = '\0';
136            //std::cout << "file " << filename << " line " << lineno << std::endl;
137            yylineno = lineno;
138            yyfilename = filename;
139          }
140        }
141}
142
143        /* ignore preprocessor directives (for now) */
144^{h_white}*"#"[^\n]*"\n" ;
145
146        /* ignore C style comments */
147"/*"                    {BEGIN COMMENT;}
148<COMMENT>.|\n           ;
149<COMMENT>"*/"           {BEGIN 0;}
150
151        /* ignore C++ style comments */
152"//"[^\n]*"\n"          ;
153
154        /* ignore whitespace */
155{h_white}+              {WHITE_RETURN(' ');}
156({v_tab}|{c_return}|{form_feed})+ {WHITE_RETURN(' ');}
157({h_white}|{v_tab}|{c_return}|{form_feed})*"\n" {NEWLINE_RETURN();}
158
159        /* keywords */
160__alignof               {KEYWORD_RETURN(ALIGNOF);}      /* GCC */
161__alignof__             {KEYWORD_RETURN(ALIGNOF);}      /* GCC */
162asm                     {KEYWORD_RETURN(ASM);}
163__asm                   {KEYWORD_RETURN(ASM);}          /* GCC */
164__asm__                 {KEYWORD_RETURN(ASM);}          /* GCC */
165__attribute             {KEYWORD_RETURN(ATTRIBUTE);}    /* GCC */
166__attribute__           {KEYWORD_RETURN(ATTRIBUTE);}    /* GCC */
167auto                    {KEYWORD_RETURN(AUTO);}
168_Bool                   {KEYWORD_RETURN(BOOL);}         /* ANSI99 */
169break                   {KEYWORD_RETURN(BREAK);}
170case                    {KEYWORD_RETURN(CASE);}
171catch                   {KEYWORD_RETURN(CATCH);}        /* CFA */
172char                    {KEYWORD_RETURN(CHAR);}
173choose                  {KEYWORD_RETURN(CHOOSE);}
174_Complex                {KEYWORD_RETURN(COMPLEX);}      /* ANSI99 */
175__complex               {KEYWORD_RETURN(COMPLEX);}      /* GCC */
176__complex__             {KEYWORD_RETURN(COMPLEX);}      /* GCC */
177const                   {KEYWORD_RETURN(CONST);}
178__const                 {KEYWORD_RETURN(CONST);}        /* GCC */
179__const__               {KEYWORD_RETURN(CONST);}        /* GCC */
180context                 {KEYWORD_RETURN(CONTEXT);}
181continue                {KEYWORD_RETURN(CONTINUE);}
182default                 {KEYWORD_RETURN(DEFAULT);}
183do                      {KEYWORD_RETURN(DO);}
184double                  {KEYWORD_RETURN(DOUBLE);}
185dtype                   {KEYWORD_RETURN(DTYPE);}
186else                    {KEYWORD_RETURN(ELSE);}
187enum                    {KEYWORD_RETURN(ENUM);}
188__extension__           {KEYWORD_RETURN(EXTENSION);}    /* GCC */
189extern                  {KEYWORD_RETURN(EXTERN);}
190fallthru                {KEYWORD_RETURN(FALLTHRU);}
191float                   {KEYWORD_RETURN(FLOAT);}
192for                     {KEYWORD_RETURN(FOR);}
193forall                  {KEYWORD_RETURN(FORALL);}
194fortran                 {KEYWORD_RETURN(FORTRAN);}
195ftype                   {KEYWORD_RETURN(FTYPE);}
196goto                    {KEYWORD_RETURN(GOTO);}
197if                      {KEYWORD_RETURN(IF);}
198_Imaginary              {KEYWORD_RETURN(IMAGINARY);}    /* ANSI99 */
199__imag                  {KEYWORD_RETURN(IMAGINARY);}    /* GCC */
200__imag__                {KEYWORD_RETURN(IMAGINARY);}    /* GCC */
201inline                  {KEYWORD_RETURN(INLINE);}       /* ANSI99 */
202__inline                {KEYWORD_RETURN(INLINE);}       /* GCC */
203__inline__              {KEYWORD_RETURN(INLINE);}       /* GCC */
204int                     {KEYWORD_RETURN(INT);}
205__label__               {KEYWORD_RETURN(LABEL);}        /* GCC */
206long                    {KEYWORD_RETURN(LONG);}
207lvalue                  {KEYWORD_RETURN(LVALUE);}
208register                {KEYWORD_RETURN(REGISTER);}
209restrict                {KEYWORD_RETURN(RESTRICT);}     /* ANSI99 */
210__restrict              {KEYWORD_RETURN(RESTRICT);}     /* GCC */
211__restrict__            {KEYWORD_RETURN(RESTRICT);}     /* GCC */
212return                  {KEYWORD_RETURN(RETURN);}
213short                   {KEYWORD_RETURN(SHORT);}
214signed                  {KEYWORD_RETURN(SIGNED);}
215__signed                {KEYWORD_RETURN(SIGNED);}       /* GCC */
216__signed__              {KEYWORD_RETURN(SIGNED);}       /* GCC */
217sizeof                  {KEYWORD_RETURN(SIZEOF);}
218static                  {KEYWORD_RETURN(STATIC);}
219struct                  {KEYWORD_RETURN(STRUCT);}
220switch                  {KEYWORD_RETURN(SWITCH);}
221throw                   {KEYWORD_RETURN(THROW);}        /* CFA */
222try                     {KEYWORD_RETURN(TRY);}          /* CFA */
223type                    {KEYWORD_RETURN(TYPE);}
224typedef                 {KEYWORD_RETURN(TYPEDEF);}
225typeof                  {KEYWORD_RETURN(TYPEOF);}       /* GCC */
226__typeof                {KEYWORD_RETURN(TYPEOF);}       /* GCC */
227__typeof__              {KEYWORD_RETURN(TYPEOF);}       /* GCC */
228union                   {KEYWORD_RETURN(UNION);}
229unsigned                {KEYWORD_RETURN(UNSIGNED);}
230void                    {KEYWORD_RETURN(VOID);}
231volatile                {KEYWORD_RETURN(VOLATILE);}
232__volatile              {KEYWORD_RETURN(VOLATILE);}     /* GCC */
233__volatile__            {KEYWORD_RETURN(VOLATILE);}     /* GCC */
234while                   {KEYWORD_RETURN(WHILE);}
235
236        /* identifier */
237{identifier}            {IDENTIFIER_RETURN();}
238
239        /* numeric constants */
240"0"                     {NUMERIC_RETURN(ZERO);}         /* CFA */
241"1"                     {NUMERIC_RETURN(ONE);}          /* CFA */
242{decimal_constant}      {NUMERIC_RETURN(INTEGERconstant);}
243{octal_constant}        {NUMERIC_RETURN(INTEGERconstant);}
244{hex_constant}          {NUMERIC_RETURN(INTEGERconstant);}
245{floating_constant}     {NUMERIC_RETURN(FLOATINGconstant);}
246{hex_floating_constant} {NUMERIC_RETURN(FLOATINGconstant);}
247
248        /* character constant, allows empty value */
249"L"?[']([^'\\\n]|{escape_seq})*['] {RETURN_VAL(CHARACTERconstant);}
250
251        /* string constant */
252"L"?["]([^"\\\n]|{escape_seq})*["] {RETURN_VAL(STRINGliteral);}
253
254        /* punctuation */
255"["                     {ASCIIOP_RETURN();}
256"]"                     {ASCIIOP_RETURN();}
257"("                     {ASCIIOP_RETURN();}
258")"                     {ASCIIOP_RETURN();}
259"{"                     {ASCIIOP_RETURN();}
260"}"                     {ASCIIOP_RETURN();}
261","                     {ASCIIOP_RETURN();}             /* also operator */
262":"                     {ASCIIOP_RETURN();}
263";"                     {ASCIIOP_RETURN();}
264"."                     {ASCIIOP_RETURN();}             /* also operator */
265"..."                   {NAMEDOP_RETURN(ELLIPSIS);}
266
267        /* alternative ANSI99 brackets, "<:" & "<:<:" handled by preprocessor */
268"<:"                    {RETURN_VAL('[');}
269":>"                    {RETURN_VAL(']');}
270"<%"                    {RETURN_VAL('{');}
271"%>"                    {RETURN_VAL('}');}
272
273        /* operators */
274"!"                     {ASCIIOP_RETURN();}
275"+"                     {ASCIIOP_RETURN();}
276"-"                     {ASCIIOP_RETURN();}
277"*"                     {ASCIIOP_RETURN();}
278"/"                     {ASCIIOP_RETURN();}
279"%"                     {ASCIIOP_RETURN();}
280"^"                     {ASCIIOP_RETURN();}
281"~"                     {ASCIIOP_RETURN();}
282"&"                     {ASCIIOP_RETURN();}
283"|"                     {ASCIIOP_RETURN();}
284"<"                     {ASCIIOP_RETURN();}
285">"                     {ASCIIOP_RETURN();}
286"="                     {ASCIIOP_RETURN();}
287"?"                     {ASCIIOP_RETURN();}
288
289"++"                    {NAMEDOP_RETURN(ICR);}
290"--"                    {NAMEDOP_RETURN(DECR);}
291"=="                    {NAMEDOP_RETURN(EQ);}
292"!="                    {NAMEDOP_RETURN(NE);}
293"<<"                    {NAMEDOP_RETURN(LS);}
294">>"                    {NAMEDOP_RETURN(RS);}
295"<="                    {NAMEDOP_RETURN(LE);}
296">="                    {NAMEDOP_RETURN(GE);}
297"&&"                    {NAMEDOP_RETURN(ANDAND);}
298"||"                    {NAMEDOP_RETURN(OROR);}
299"->"                    {NAMEDOP_RETURN(ARROW);}
300"+="                    {NAMEDOP_RETURN(PLUSassign);}
301"-="                    {NAMEDOP_RETURN(MINUSassign);}
302"*="                    {NAMEDOP_RETURN(MULTassign);}
303"/="                    {NAMEDOP_RETURN(DIVassign);}
304"%="                    {NAMEDOP_RETURN(MODassign);}
305"&="                    {NAMEDOP_RETURN(ANDassign);}
306"|="                    {NAMEDOP_RETURN(ORassign);}
307"^="                    {NAMEDOP_RETURN(ERassign);}
308"<<="                   {NAMEDOP_RETURN(LSassign);}
309">>="                   {NAMEDOP_RETURN(RSassign);}
310
311        /* CFA, operator identifier */
312{op_unary}"?"           {IDENTIFIER_RETURN();}          /* unary */
313"?"({op_unary_pre_post}|"()"|"[?]") {IDENTIFIER_RETURN();}
314"?"{op_binary_over}"?"  {IDENTIFIER_RETURN();}          /* binary */
315        /*
316          This rule handles ambiguous cases with operator identifiers, e.g., "int *?*?()", where the
317          string "*?*?"  can be lexed as "*"/"?*?" or "*?"/"*?". Since it is common practise to put
318          a unary operator juxtaposed to an identifier, e.g., "*i", users will be annoyed if they
319          cannot do this with respect to operator identifiers. Even with this special hack, there
320          are 5 general cases that cannot be handled. The first case is for the function-call
321          identifier "?()":
322
323          int * ?()();  // declaration: space required after '*'
324          * ?()();      // expression: space required after '*'
325
326          Without the space, the string "*?()" is ambiguous without N character look ahead; it
327          requires scanning ahead to determine if there is a '(', which is the start of an
328          argument/parameter list.
329
330          The 4 remaining cases occur in expressions:
331
332          i++?i:0;              // space required before '?'
333          i--?i:0;              // space required before '?'
334          i?++i:0;              // space required after '?'
335          i?--i:0;              // space required after '?'
336
337          In the first two cases, the string "i++?" is ambiguous, where this string can be lexed as
338          "i"/"++?" or "i++"/"?"; it requires scanning ahead to determine if there is a '(', which
339          is the start of an argument list.  In the second two cases, the string "?++x" is
340          ambiguous, where this string can be lexed as "?++"/"x" or "?"/"++x"; it requires scanning
341          ahead to determine if there is a '(', which is the start of an argument list.
342        */
343{op_unary}"?"(({op_unary_pre_post}|"[?]")|({op_binary_over}"?")) {
344                            // 1 or 2 character unary operator ?
345                            int i = yytext[1] == '?' ? 1 : 2;
346                            yyless( i );                /* put back characters up to first '?' */
347                            if ( i > 1 ) {
348                                NAMEDOP_RETURN( yytext[0] == '+' ? ICR : DECR );
349                            } else {
350                                ASCIIOP_RETURN();
351                            } // if
352                        }
353
354        /* unknown characters */
355.                       {printf("unknown character(s):\"%s\" on line %d\n", yytext, yylineno);}
356
357%%
358
359
360/* Local Variables: */
361/* fill-column: 100 */
362/* compile-command: "gmake" */
363/* End: */
Note: See TracBrowser for help on using the repository browser.