source: translator/Parser/lex.l @ 134b86a

ADTaaron-thesisarm-ehast-experimentalcleanup-dtorsctordeferred_resndemanglerenumforall-pointer-decaygc_noraiijacob/cs343-translationjenkins-sandboxmemorynew-astnew-ast-unique-exprnew-envno_listpersistent-indexerpthread-emulationqualifiedEnumresolv-newstringwith_gc
Last change on this file since 134b86a was 51b7345, checked in by Peter A. Buhr <pabuhr@…>, 10 years ago

initial commit

  • Property mode set to 100644
File size: 12.8 KB
Line 
1/*                               -*- Mode: C -*-
2 *
3 * CForall Lexer Version 1.0, Copyright (C) Peter A. Buhr 2001 -- Permission is granted to copy this
4 *      grammar and to use it within software systems.  THIS GRAMMAR IS PROVIDED "AS IS" AND WITHOUT
5 *      ANY EXPRESS OR IMPLIED WARRANTIES.
6 *
7 * lex.l --
8 *
9 * Author           : Peter A. Buhr
10 * Created On       : Sat Sep 22 08:58:10 2001
11 * Last Modified By : Peter A. Buhr
12 * Last Modified On : Sat Nov  1 18:09:47 2003
13 * Update Count     : 197
14 */
15
16%option yylineno
17
18%{
19/* This lexer assumes the program has been preprocessed by cpp. Hence, all user level preprocessor
20   directive have been performed and removed from the source. The only exceptions are preprocessor
21   directives passed to the compiler (e.g., line-number directives) and C/C++ style comments, which
22   are ignored. */
23
24/*************** Includes and Defines *****************************/
25
26#include <string>
27
28#include "lex.h"
29#include "ParseNode.h"
30#include "cfa.tab.h" /* YACC generated definitions based on C++ grammar */
31
32char *yyfilename;
33
34#define WHITE_RETURN(x)         /* do nothing */
35#define NEWLINE_RETURN()        WHITE_RETURN('\n')
36#define RETURN_VAL(x)           yylval.tok.str = new std::string(yytext); \
37                                yylval.tok.loc.file = yyfilename; \
38                                yylval.tok.loc.line = yylineno; \
39                                return(x)
40
41#define KEYWORD_RETURN(x)       RETURN_VAL(x)           /* keyword */
42#define IDENTIFIER_RETURN()     RETURN_VAL((typedefTable.isIdentifier(yytext) ? IDENTIFIER : typedefTable.isTypedef(yytext) ? TYPEDEFname : TYPEGENname))
43#define ATTRIBUTE_RETURN()      RETURN_VAL((typedefTable.isIdentifier(yytext) ? ATTR_IDENTIFIER : typedefTable.isTypedef(yytext) ? ATTR_TYPEDEFname : ATTR_TYPEGENname))
44
45#define ASCIIOP_RETURN()        RETURN_VAL((int)yytext[0]) /* single character operator */
46#define NAMEDOP_RETURN(x)       RETURN_VAL(x)           /* multichar operator, with a name */
47
48#define NUMERIC_RETURN(x)       rm_underscore(); RETURN_VAL(x) /* numeric constant */
49
50void rm_underscore() {                                  /* remove underscores in constant or escape sequence */
51    int j = 0;
52    for ( int i = 0; i < yyleng; i += 1 ) {
53        if ( yytext[i] != '_' ) {
54            yytext[j] = yytext[i];
55            j += 1;
56        } // if
57    } // for
58    yyleng = j;
59    yytext[yyleng] = '\0';
60}
61
62%}
63
64octal [0-7]
65nonzero [1-9]
66decimal [0-9]
67hex [0-9a-fA-F]
68universal_char "\\"((u{hex_quad})|(U{hex_quad}{2}))
69
70        /* identifier, GCC: $ in identifier */
71identifier ([a-zA-Z_$]|{universal_char})([0-9a-zA-Z_$]|{universal_char})*
72
73        /* attribute identifier, GCC: $ in identifier */
74attr_identifier "@"{identifier}
75
76        /*  numeric constants, CFA: '_' in constant */
77hex_quad {hex}{4}
78integer_suffix "_"?(([uU][lL]?)|([uU]("ll"|"LL")?)|([lL][uU]?)|("ll"|"LL")[uU]?)
79
80octal_digits ({octal})|({octal}({octal}|"_")*{octal})
81octal_prefix "0""_"?
82octal_constant (("0")|({octal_prefix}{octal_digits})){integer_suffix}?
83
84nonzero_digits ({nonzero})|({nonzero}({decimal}|"_")*{decimal})
85decimal_constant {nonzero_digits}{integer_suffix}?
86
87hex_digits ({hex})|({hex}({hex}|"_")*{hex})
88hex_prefix "0"[xX]"_"?
89hex_constant {hex_prefix}{hex_digits}{integer_suffix}?
90
91decimal_digits ({decimal})|({decimal}({decimal}|"_")*{decimal})
92fractional_constant ({decimal_digits}?"."{decimal_digits})|({decimal_digits}".")
93exponent "_"?[eE]"_"?[+-]?{decimal_digits}
94floating_suffix "_"?[flFL]
95floating_constant (({fractional_constant}{exponent}?)|({decimal_digits}{exponent})){floating_suffix}?
96
97binary_exponent "_"?[pP]"_"?[+-]?{decimal_digits}
98hex_fractional_constant ({hex_digits}?"."{hex_digits})|({hex_digits}".")
99hex_floating_constant {hex_prefix}(({hex_fractional_constant}{binary_exponent})|({hex_digits}{binary_exponent})){floating_suffix}?
100
101        /* character escape sequence, GCC: \e => esc character */
102simple_escape "\\"[abefnrtv'"?\\]
103octal_escape "\\"{octal}{1,3}
104hex_escape "\\""x"{hex}+
105escape_seq {simple_escape}|{octal_escape}|{hex_escape}|{universal_char}
106
107        /* display/white-space characters */
108h_tab [\011]
109form_feed [\014]
110v_tab [\013]
111c_return [\015]
112h_white [ ]|{h_tab}
113
114        /* operators */
115op_unary_only "~"|"!"
116op_unary_binary "+"|"-"|"*"
117op_unary_pre_post "++"|"--"
118op_unary {op_unary_only}|{op_unary_binary}|{op_unary_pre_post}
119
120op_binary_only "/"|"%"|"^"|"&"|"|"|"<"|">"|"="|"=="|"!="|"<<"|">>"|"<="|">="|"+="|"-="|"*="|"/="|"%="|"&="|"|="|"^="|"<<="|">>="
121op_binary_over {op_unary_binary}|{op_binary_only}
122op_binary_not_over "?"|"->"|"&&"|"||"
123operator {op_unary_pre_post}|{op_binary_over}|{op_binary_not_over}
124
125%x COMMENT
126
127%%
128        /* line directives */
129^{h_white}*"#"{h_white}*[0-9]+{h_white}*["][^"\n]+["][^\n]*"\n" {
130        char *end_num;
131        char *begin_string, *end_string;
132        char *filename;
133        long lineno, length;
134        lineno = strtol( yytext + 1, &end_num, 0 );
135        begin_string = strchr( end_num, '"' );
136        if( begin_string ) {
137          end_string = strchr( begin_string + 1, '"' );
138          if( end_string ) {
139            length = end_string - begin_string - 1;
140            filename = new char[ length + 1 ];
141            memcpy( filename, begin_string + 1, length );
142            filename[ length ] = '\0';
143            //std::cout << "file " << filename << " line " << lineno << std::endl;
144            yylineno = lineno;
145            yyfilename = filename;
146          }
147        }
148}
149
150        /* ignore preprocessor directives (for now) */
151^{h_white}*"#"[^\n]*"\n" ;
152
153        /* ignore C style comments */
154"/*"                    {BEGIN COMMENT;}
155<COMMENT>.|\n           ;
156<COMMENT>"*/"           {BEGIN 0;}
157
158        /* ignore C++ style comments */
159"//"[^\n]*"\n"          ;
160
161        /* ignore whitespace */
162{h_white}+              {WHITE_RETURN(' ');}
163({v_tab}|{c_return}|{form_feed})+ {WHITE_RETURN(' ');}
164({h_white}|{v_tab}|{c_return}|{form_feed})*"\n" {NEWLINE_RETURN();}
165
166        /* keywords */
167__alignof               {KEYWORD_RETURN(ALIGNOF);}      /* GCC */
168__alignof__             {KEYWORD_RETURN(ALIGNOF);}      /* GCC */
169asm                     {KEYWORD_RETURN(ASM);}
170__asm                   {KEYWORD_RETURN(ASM);}          /* GCC */
171__asm__                 {KEYWORD_RETURN(ASM);}          /* GCC */
172__attribute             {KEYWORD_RETURN(ATTRIBUTE);}    /* GCC */
173__attribute__           {KEYWORD_RETURN(ATTRIBUTE);}    /* GCC */
174auto                    {KEYWORD_RETURN(AUTO);}
175_Bool                   {KEYWORD_RETURN(BOOL);}         /* ANSI99 */
176break                   {KEYWORD_RETURN(BREAK);}
177case                    {KEYWORD_RETURN(CASE);}
178catch                   {KEYWORD_RETURN(CATCH);}        /* CFA */
179char                    {KEYWORD_RETURN(CHAR);}
180choose                  {KEYWORD_RETURN(CHOOSE);}
181_Complex                {KEYWORD_RETURN(COMPLEX);}      /* ANSI99 */
182__complex               {KEYWORD_RETURN(COMPLEX);}      /* GCC */
183__complex__             {KEYWORD_RETURN(COMPLEX);}      /* GCC */
184const                   {KEYWORD_RETURN(CONST);}
185__const                 {KEYWORD_RETURN(CONST);}        /* GCC */
186__const__               {KEYWORD_RETURN(CONST);}        /* GCC */
187context                 {KEYWORD_RETURN(CONTEXT);}
188continue                {KEYWORD_RETURN(CONTINUE);}
189default                 {KEYWORD_RETURN(DEFAULT);}
190do                      {KEYWORD_RETURN(DO);}
191double                  {KEYWORD_RETURN(DOUBLE);}
192dtype                   {KEYWORD_RETURN(DTYPE);}
193else                    {KEYWORD_RETURN(ELSE);}
194enum                    {KEYWORD_RETURN(ENUM);}
195__extension__           {KEYWORD_RETURN(EXTENSION);}    /* GCC */
196extern                  {KEYWORD_RETURN(EXTERN);}
197fallthru                {KEYWORD_RETURN(FALLTHRU);}
198finally                 {KEYWORD_RETURN(FINALLY);}      /* CFA */
199float                   {KEYWORD_RETURN(FLOAT);}
200for                     {KEYWORD_RETURN(FOR);}
201forall                  {KEYWORD_RETURN(FORALL);}
202fortran                 {KEYWORD_RETURN(FORTRAN);}
203ftype                   {KEYWORD_RETURN(FTYPE);}
204goto                    {KEYWORD_RETURN(GOTO);}
205if                      {KEYWORD_RETURN(IF);}
206_Imaginary              {KEYWORD_RETURN(IMAGINARY);}    /* ANSI99 */
207__imag                  {KEYWORD_RETURN(IMAGINARY);}    /* GCC */
208__imag__                {KEYWORD_RETURN(IMAGINARY);}    /* GCC */
209inline                  {KEYWORD_RETURN(INLINE);}       /* ANSI99 */
210__inline                {KEYWORD_RETURN(INLINE);}       /* GCC */
211__inline__              {KEYWORD_RETURN(INLINE);}       /* GCC */
212int                     {KEYWORD_RETURN(INT);}
213__label__               {KEYWORD_RETURN(LABEL);}        /* GCC */
214long                    {KEYWORD_RETURN(LONG);}
215lvalue                  {KEYWORD_RETURN(LVALUE);}
216register                {KEYWORD_RETURN(REGISTER);}
217restrict                {KEYWORD_RETURN(RESTRICT);}     /* ANSI99 */
218__restrict              {KEYWORD_RETURN(RESTRICT);}     /* GCC */
219__restrict__            {KEYWORD_RETURN(RESTRICT);}     /* GCC */
220return                  {KEYWORD_RETURN(RETURN);}
221short                   {KEYWORD_RETURN(SHORT);}
222signed                  {KEYWORD_RETURN(SIGNED);}
223__signed                {KEYWORD_RETURN(SIGNED);}       /* GCC */
224__signed__              {KEYWORD_RETURN(SIGNED);}       /* GCC */
225sizeof                  {KEYWORD_RETURN(SIZEOF);}
226static                  {KEYWORD_RETURN(STATIC);}
227struct                  {KEYWORD_RETURN(STRUCT);}
228switch                  {KEYWORD_RETURN(SWITCH);}
229throw                   {KEYWORD_RETURN(THROW);}        /* CFA */
230try                     {KEYWORD_RETURN(TRY);}          /* CFA */
231type                    {KEYWORD_RETURN(TYPE);}
232typedef                 {KEYWORD_RETURN(TYPEDEF);}
233typeof                  {KEYWORD_RETURN(TYPEOF);}       /* GCC */
234__typeof                {KEYWORD_RETURN(TYPEOF);}       /* GCC */
235__typeof__              {KEYWORD_RETURN(TYPEOF);}       /* GCC */
236union                   {KEYWORD_RETURN(UNION);}
237unsigned                {KEYWORD_RETURN(UNSIGNED);}
238void                    {KEYWORD_RETURN(VOID);}
239volatile                {KEYWORD_RETURN(VOLATILE);}
240__volatile              {KEYWORD_RETURN(VOLATILE);}     /* GCC */
241__volatile__            {KEYWORD_RETURN(VOLATILE);}     /* GCC */
242while                   {KEYWORD_RETURN(WHILE);}
243
244        /* identifier */
245{identifier}            {IDENTIFIER_RETURN();}
246{attr_identifier}       {ATTRIBUTE_RETURN();}
247
248        /* numeric constants */
249"0"                     {NUMERIC_RETURN(ZERO);}         /* CFA */
250"1"                     {NUMERIC_RETURN(ONE);}          /* CFA */
251{decimal_constant}      {NUMERIC_RETURN(INTEGERconstant);}
252{octal_constant}        {NUMERIC_RETURN(INTEGERconstant);}
253{hex_constant}          {NUMERIC_RETURN(INTEGERconstant);}
254{floating_constant}     {NUMERIC_RETURN(FLOATINGconstant);}
255{hex_floating_constant} {NUMERIC_RETURN(FLOATINGconstant);}
256
257        /* character constant, allows empty value */
258"L"?[']([^'\\\n]|{escape_seq})*['] {RETURN_VAL(CHARACTERconstant);}
259
260        /* string constant */
261"L"?["]([^"\\\n]|{escape_seq})*["] {RETURN_VAL(STRINGliteral);}
262
263        /* punctuation */
264"["                     {ASCIIOP_RETURN();}
265"]"                     {ASCIIOP_RETURN();}
266"("                     {ASCIIOP_RETURN();}
267")"                     {ASCIIOP_RETURN();}
268"{"                     {ASCIIOP_RETURN();}
269"}"                     {ASCIIOP_RETURN();}
270","                     {ASCIIOP_RETURN();}             /* also operator */
271":"                     {ASCIIOP_RETURN();}
272";"                     {ASCIIOP_RETURN();}
273"."                     {ASCIIOP_RETURN();}             /* also operator */
274"..."                   {NAMEDOP_RETURN(ELLIPSIS);}
275
276        /* alternative ANSI99 brackets, "<:" & "<:<:" handled by preprocessor */
277"<:"                    {RETURN_VAL('[');}
278":>"                    {RETURN_VAL(']');}
279"<%"                    {RETURN_VAL('{');}
280"%>"                    {RETURN_VAL('}');}
281
282        /* operators */
283"!"                     {ASCIIOP_RETURN();}
284"+"                     {ASCIIOP_RETURN();}
285"-"                     {ASCIIOP_RETURN();}
286"*"                     {ASCIIOP_RETURN();}
287"/"                     {ASCIIOP_RETURN();}
288"%"                     {ASCIIOP_RETURN();}
289"^"                     {ASCIIOP_RETURN();}
290"~"                     {ASCIIOP_RETURN();}
291"&"                     {ASCIIOP_RETURN();}
292"|"                     {ASCIIOP_RETURN();}
293"<"                     {ASCIIOP_RETURN();}
294">"                     {ASCIIOP_RETURN();}
295"="                     {ASCIIOP_RETURN();}
296"?"                     {ASCIIOP_RETURN();}
297
298"++"                    {NAMEDOP_RETURN(ICR);}
299"--"                    {NAMEDOP_RETURN(DECR);}
300"=="                    {NAMEDOP_RETURN(EQ);}
301"!="                    {NAMEDOP_RETURN(NE);}
302"<<"                    {NAMEDOP_RETURN(LS);}
303">>"                    {NAMEDOP_RETURN(RS);}
304"<="                    {NAMEDOP_RETURN(LE);}
305">="                    {NAMEDOP_RETURN(GE);}
306"&&"                    {NAMEDOP_RETURN(ANDAND);}
307"||"                    {NAMEDOP_RETURN(OROR);}
308"->"                    {NAMEDOP_RETURN(ARROW);}
309"+="                    {NAMEDOP_RETURN(PLUSassign);}
310"-="                    {NAMEDOP_RETURN(MINUSassign);}
311"*="                    {NAMEDOP_RETURN(MULTassign);}
312"/="                    {NAMEDOP_RETURN(DIVassign);}
313"%="                    {NAMEDOP_RETURN(MODassign);}
314"&="                    {NAMEDOP_RETURN(ANDassign);}
315"|="                    {NAMEDOP_RETURN(ORassign);}
316"^="                    {NAMEDOP_RETURN(ERassign);}
317"<<="                   {NAMEDOP_RETURN(LSassign);}
318">>="                   {NAMEDOP_RETURN(RSassign);}
319
320        /* CFA, operator identifier */
321{op_unary}"?"           {IDENTIFIER_RETURN();}          /* unary */
322"?"({op_unary_pre_post}|"()"|"[?]") {IDENTIFIER_RETURN();}
323"?"{op_binary_over}"?"  {IDENTIFIER_RETURN();}          /* binary */
324        /*
325          This rule handles ambiguous cases with operator identifiers, e.g., "int *?*?()", where the
326          string "*?*?"  can be lexed as "*"/"?*?" or "*?"/"*?". Since it is common practise to put
327          a unary operator juxtaposed to an identifier, e.g., "*i", users will be annoyed if they
328          cannot do this with respect to operator identifiers. Even with this special hack, there
329          are 5 general cases that cannot be handled. The first case is for the function-call
330          identifier "?()":
331
332          int * ?()();  // declaration: space required after '*'
333          * ?()();      // expression: space required after '*'
334
335          Without the space, the string "*?()" is ambiguous without N character look ahead; it
336          requires scanning ahead to determine if there is a '(', which is the start of an
337          argument/parameter list.
338
339          The 4 remaining cases occur in expressions:
340
341          i++?i:0;              // space required before '?'
342          i--?i:0;              // space required before '?'
343          i?++i:0;              // space required after '?'
344          i?--i:0;              // space required after '?'
345
346          In the first two cases, the string "i++?" is ambiguous, where this string can be lexed as
347          "i"/"++?" or "i++"/"?"; it requires scanning ahead to determine if there is a '(', which
348          is the start of an argument list.  In the second two cases, the string "?++x" is
349          ambiguous, where this string can be lexed as "?++"/"x" or "?"/"++x"; it requires scanning
350          ahead to determine if there is a '(', which is the start of an argument list.
351        */
352{op_unary}"?"(({op_unary_pre_post}|"[?]")|({op_binary_over}"?")) {
353                            // 1 or 2 character unary operator ?
354                            int i = yytext[1] == '?' ? 1 : 2;
355                            yyless( i );                /* put back characters up to first '?' */
356                            if ( i > 1 ) {
357                                NAMEDOP_RETURN( yytext[0] == '+' ? ICR : DECR );
358                            } else {
359                                ASCIIOP_RETURN();
360                            } // if
361                        }
362
363        /* unknown characters */
364.                       {printf("unknown character(s):\"%s\" on line %d\n", yytext, yylineno);}
365
366%%
367
368
369/* Local Variables: */
370/* fill-column: 100 */
371/* compile-command: "gmake" */
372/* End: */
Note: See TracBrowser for help on using the repository browser.