source: translator/Parser/lex.l @ 42dcae7

ADTaaron-thesisarm-ehast-experimentalcleanup-dtorsctordeferred_resndemanglerenumforall-pointer-decaygc_noraiijacob/cs343-translationjenkins-sandboxmemorynew-astnew-ast-unique-exprnew-envno_listpersistent-indexerpthread-emulationqualifiedEnumresolv-newstringwith_gc
Last change on this file since 42dcae7 was 8c17ab0, checked in by Peter A. Buhr <pabuhr@…>, 10 years ago

add quoted identifiers, add compilation include directory, reformatted some files

  • Property mode set to 100644
File size: 13.2 KB
Line 
1/*                               -*- Mode: C -*-
2 *
3 * CForall Lexer Version 1.0, Copyright (C) Peter A. Buhr 2001 -- Permission is granted to copy this
4 *      grammar and to use it within software systems.  THIS GRAMMAR IS PROVIDED "AS IS" AND WITHOUT
5 *      ANY EXPRESS OR IMPLIED WARRANTIES.
6 *
7 * lex.l --
8 *
9 * Author           : Peter A. Buhr
10 * Created On       : Sat Sep 22 08:58:10 2001
11 * Last Modified By : Peter A. Buhr
12 * Last Modified On : Tue Nov 11 08:10:05 2014
13 * Update Count     : 215
14 */
15
16%option yylineno
17
18%{
19// This lexer assumes the program has been preprocessed by cpp. Hence, all user level preprocessor
20// directive have been performed and removed from the source. The only exceptions are preprocessor
21// directives passed to the compiler (e.g., line-number directives) and C/C++ style comments, which
22// are ignored.
23
24//**************************** Includes and Defines ****************************
25
26#include <string>
27
28#include "lex.h"
29#include "ParseNode.h"
30#include "cfa.tab.h" // YACC generated definitions based on C++ grammar
31
32char *yyfilename;
33
34#define WHITE_RETURN(x)                                 // do nothing
35#define NEWLINE_RETURN()        WHITE_RETURN('\n')
36#define RETURN_VAL(x)           yylval.tok.str = new std::string(yytext); \
37                                yylval.tok.loc.file = yyfilename; \
38                                yylval.tok.loc.line = yylineno; \
39                                return(x)
40
41#define KEYWORD_RETURN(x)       RETURN_VAL(x)           // keyword
42#define IDENTIFIER_RETURN()     RETURN_VAL((typedefTable.isIdentifier(yytext) ? IDENTIFIER : typedefTable.isTypedef(yytext) ? TYPEDEFname : TYPEGENname))
43//#define ATTRIBUTE_RETURN()    RETURN_VAL((typedefTable.isIdentifier(yytext) ? ATTR_IDENTIFIER : typedefTable.isTypedef(yytext) ? ATTR_TYPEDEFname : ATTR_TYPEGENname))
44#define ATTRIBUTE_RETURN()      RETURN_VAL(ATTR_IDENTIFIER)
45
46#define ASCIIOP_RETURN()        RETURN_VAL((int)yytext[0]) // single character operator
47#define NAMEDOP_RETURN(x)       RETURN_VAL(x)           // multichar operator, with a name
48
49#define NUMERIC_RETURN(x)       rm_underscore(); RETURN_VAL(x) // numeric constant
50
51void rm_underscore() {                                  // remove underscores in constant or escape sequence
52    int j = 0;
53    for ( int i = 0; i < yyleng; i += 1 ) {
54        if ( yytext[i] != '_' ) {
55            yytext[j] = yytext[i];
56            j += 1;
57        } // if
58    } // for
59    yyleng = j;
60    yytext[yyleng] = '\0';
61}
62
63%}
64
65octal [0-7]
66nonzero [1-9]
67decimal [0-9]
68hex [0-9a-fA-F]
69universal_char "\\"((u{hex_quad})|(U{hex_quad}{2}))
70
71        // identifier, GCC: $ in identifier
72identifier ([a-zA-Z_$]|{universal_char})([0-9a-zA-Z_$]|{universal_char})*
73
74        // quoted identifier
75quoted_identifier "`"{identifier}"`"
76
77        // attribute identifier, GCC: $ in identifier
78attr_identifier "@"{identifier}
79
80        // numeric constants, CFA: '_' in constant
81hex_quad {hex}{4}
82integer_suffix "_"?(([uU][lL]?)|([uU]("ll"|"LL")?)|([lL][uU]?)|("ll"|"LL")[uU]?)
83
84octal_digits ({octal})|({octal}({octal}|"_")*{octal})
85octal_prefix "0""_"?
86octal_constant (("0")|({octal_prefix}{octal_digits})){integer_suffix}?
87
88nonzero_digits ({nonzero})|({nonzero}({decimal}|"_")*{decimal})
89decimal_constant {nonzero_digits}{integer_suffix}?
90
91hex_digits ({hex})|({hex}({hex}|"_")*{hex})
92hex_prefix "0"[xX]"_"?
93hex_constant {hex_prefix}{hex_digits}{integer_suffix}?
94
95decimal_digits ({decimal})|({decimal}({decimal}|"_")*{decimal})
96fractional_constant ({decimal_digits}?"."{decimal_digits})|({decimal_digits}".")
97exponent "_"?[eE]"_"?[+-]?{decimal_digits}
98floating_suffix "_"?[flFL]
99floating_constant (({fractional_constant}{exponent}?)|({decimal_digits}{exponent})){floating_suffix}?
100
101binary_exponent "_"?[pP]"_"?[+-]?{decimal_digits}
102hex_fractional_constant ({hex_digits}?"."{hex_digits})|({hex_digits}".")
103hex_floating_constant {hex_prefix}(({hex_fractional_constant}{binary_exponent})|({hex_digits}{binary_exponent})){floating_suffix}?
104
105        // character escape sequence, GCC: \e => esc character
106simple_escape "\\"[abefnrtv'"?\\]
107        // ' stop highlighting
108octal_escape "\\"{octal}{1,3}
109hex_escape "\\""x"{hex}+
110escape_seq {simple_escape}|{octal_escape}|{hex_escape}|{universal_char}
111
112        // display/white-space characters
113h_tab [\011]
114form_feed [\014]
115v_tab [\013]
116c_return [\015]
117h_white [ ]|{h_tab}
118
119        // operators
120op_unary_only "~"|"!"
121op_unary_binary "+"|"-"|"*"
122op_unary_pre_post "++"|"--"
123op_unary {op_unary_only}|{op_unary_binary}|{op_unary_pre_post}
124
125op_binary_only "/"|"%"|"^"|"&"|"|"|"<"|">"|"="|"=="|"!="|"<<"|">>"|"<="|">="|"+="|"-="|"*="|"/="|"%="|"&="|"|="|"^="|"<<="|">>="
126op_binary_over {op_unary_binary}|{op_binary_only}
127op_binary_not_over "?"|"->"|"&&"|"||"
128operator {op_unary_pre_post}|{op_binary_over}|{op_binary_not_over}
129
130%x COMMENT
131%x QUOTED
132
133%%
134        /* line directives */
135^{h_white}*"#"{h_white}*[0-9]+{h_white}*["][^"\n]+["][^\n]*"\n" {
136        /* " stop highlighting */
137        char *end_num;
138        char *begin_string, *end_string;
139        char *filename;
140        long lineno, length;
141        lineno = strtol( yytext + 1, &end_num, 0 );
142        begin_string = strchr( end_num, '"' );
143        if( begin_string ) {
144          end_string = strchr( begin_string + 1, '"' );
145          if( end_string ) {
146            length = end_string - begin_string - 1;
147            filename = new char[ length + 1 ];
148            memcpy( filename, begin_string + 1, length );
149            filename[ length ] = '\0';
150            //std::cout << "file " << filename << " line " << lineno << std::endl;
151            yylineno = lineno;
152            yyfilename = filename;
153          }
154        }
155}
156
157        /* ignore preprocessor directives (for now) */
158^{h_white}*"#"[^\n]*"\n" ;
159
160        /* ignore C style comments */
161"/*"                    {BEGIN COMMENT;}
162<COMMENT>.|\n           ;
163<COMMENT>"*/"           {BEGIN 0;}
164
165        /* ignore C++ style comments */
166"//"[^\n]*"\n"          ;
167
168        /* ignore whitespace */
169{h_white}+              {WHITE_RETURN(' ');}
170({v_tab}|{c_return}|{form_feed})+ {WHITE_RETURN(' ');}
171({h_white}|{v_tab}|{c_return}|{form_feed})*"\n" {NEWLINE_RETURN();}
172
173        /* keywords */
174__alignof               {KEYWORD_RETURN(ALIGNOF);}      /* GCC */
175__alignof__             {KEYWORD_RETURN(ALIGNOF);}      /* GCC */
176asm                     {KEYWORD_RETURN(ASM);}
177__asm                   {KEYWORD_RETURN(ASM);}          /* GCC */
178__asm__                 {KEYWORD_RETURN(ASM);}          /* GCC */
179__attribute             {KEYWORD_RETURN(ATTRIBUTE);}    /* GCC */
180__attribute__           {KEYWORD_RETURN(ATTRIBUTE);}    /* GCC */
181auto                    {KEYWORD_RETURN(AUTO);}
182_Bool                   {KEYWORD_RETURN(BOOL);}         /* ANSI99 */
183break                   {KEYWORD_RETURN(BREAK);}
184case                    {KEYWORD_RETURN(CASE);}
185catch                   {KEYWORD_RETURN(CATCH);}        /* CFA */
186char                    {KEYWORD_RETURN(CHAR);}
187choose                  {KEYWORD_RETURN(CHOOSE);}       /* CFA */
188_Complex                {KEYWORD_RETURN(COMPLEX);}      /* ANSI99 */
189__complex               {KEYWORD_RETURN(COMPLEX);}      /* GCC */
190__complex__             {KEYWORD_RETURN(COMPLEX);}      /* GCC */
191const                   {KEYWORD_RETURN(CONST);}
192__const                 {KEYWORD_RETURN(CONST);}        /* GCC */
193__const__               {KEYWORD_RETURN(CONST);}        /* GCC */
194context                 {KEYWORD_RETURN(CONTEXT);}      /* CFA */
195continue                {KEYWORD_RETURN(CONTINUE);}
196default                 {KEYWORD_RETURN(DEFAULT);}
197do                      {KEYWORD_RETURN(DO);}
198double                  {KEYWORD_RETURN(DOUBLE);}
199dtype                   {KEYWORD_RETURN(DTYPE);}        /* CFA */
200else                    {KEYWORD_RETURN(ELSE);}
201enum                    {KEYWORD_RETURN(ENUM);}
202__extension__           {KEYWORD_RETURN(EXTENSION);}    /* GCC */
203extern                  {KEYWORD_RETURN(EXTERN);}
204fallthru                {KEYWORD_RETURN(FALLTHRU);}     /* CFA */
205finally                 {KEYWORD_RETURN(FINALLY);}      /* CFA */
206float                   {KEYWORD_RETURN(FLOAT);}
207for                     {KEYWORD_RETURN(FOR);}
208forall                  {KEYWORD_RETURN(FORALL);}       /* CFA */
209fortran                 {KEYWORD_RETURN(FORTRAN);}
210ftype                   {KEYWORD_RETURN(FTYPE);}        /* CFA */
211goto                    {KEYWORD_RETURN(GOTO);}
212if                      {KEYWORD_RETURN(IF);}
213_Imaginary              {KEYWORD_RETURN(IMAGINARY);}    /* ANSI99 */
214__imag                  {KEYWORD_RETURN(IMAGINARY);}    /* GCC */
215__imag__                {KEYWORD_RETURN(IMAGINARY);}    /* GCC */
216inline                  {KEYWORD_RETURN(INLINE);}       /* ANSI99 */
217__inline                {KEYWORD_RETURN(INLINE);}       /* GCC */
218__inline__              {KEYWORD_RETURN(INLINE);}       /* GCC */
219int                     {KEYWORD_RETURN(INT);}
220__label__               {KEYWORD_RETURN(LABEL);}        /* GCC */
221long                    {KEYWORD_RETURN(LONG);}
222lvalue                  {KEYWORD_RETURN(LVALUE);}       /* CFA */
223register                {KEYWORD_RETURN(REGISTER);}
224restrict                {KEYWORD_RETURN(RESTRICT);}     /* ANSI99 */
225__restrict              {KEYWORD_RETURN(RESTRICT);}     /* GCC */
226__restrict__            {KEYWORD_RETURN(RESTRICT);}     /* GCC */
227return                  {KEYWORD_RETURN(RETURN);}
228short                   {KEYWORD_RETURN(SHORT);}
229signed                  {KEYWORD_RETURN(SIGNED);}
230__signed                {KEYWORD_RETURN(SIGNED);}       /* GCC */
231__signed__              {KEYWORD_RETURN(SIGNED);}       /* GCC */
232sizeof                  {KEYWORD_RETURN(SIZEOF);}
233static                  {KEYWORD_RETURN(STATIC);}
234struct                  {KEYWORD_RETURN(STRUCT);}
235switch                  {KEYWORD_RETURN(SWITCH);}
236throw                   {KEYWORD_RETURN(THROW);}        /* CFA */
237try                     {KEYWORD_RETURN(TRY);}          /* CFA */
238type                    {KEYWORD_RETURN(TYPE);}         /* CFA */
239typedef                 {KEYWORD_RETURN(TYPEDEF);}
240typeof                  {KEYWORD_RETURN(TYPEOF);}       /* GCC */
241__typeof                {KEYWORD_RETURN(TYPEOF);}       /* GCC */
242__typeof__              {KEYWORD_RETURN(TYPEOF);}       /* GCC */
243union                   {KEYWORD_RETURN(UNION);}
244unsigned                {KEYWORD_RETURN(UNSIGNED);}
245void                    {KEYWORD_RETURN(VOID);}
246volatile                {KEYWORD_RETURN(VOLATILE);}
247__volatile              {KEYWORD_RETURN(VOLATILE);}     /* GCC */
248__volatile__            {KEYWORD_RETURN(VOLATILE);}     /* GCC */
249while                   {KEYWORD_RETURN(WHILE);}
250
251        /* identifier */
252{identifier}            {IDENTIFIER_RETURN();}
253{attr_identifier}       {ATTRIBUTE_RETURN();}
254"`"                     {BEGIN QUOTED;}
255<QUOTED>{identifier}    {IDENTIFIER_RETURN();}
256<QUOTED>"`"             {BEGIN 0;}
257
258        /* numeric constants */
259"0"                     {NUMERIC_RETURN(ZERO);}         /* CFA */
260"1"                     {NUMERIC_RETURN(ONE);}          /* CFA */
261{decimal_constant}      {NUMERIC_RETURN(INTEGERconstant);}
262{octal_constant}        {NUMERIC_RETURN(INTEGERconstant);}
263{hex_constant}          {NUMERIC_RETURN(INTEGERconstant);}
264{floating_constant}     {NUMERIC_RETURN(FLOATINGconstant);}
265{hex_floating_constant} {NUMERIC_RETURN(FLOATINGconstant);}
266
267        /* character constant, allows empty value */
268"L"?[']([^'\\\n]|{escape_seq})*['] {RETURN_VAL(CHARACTERconstant);}
269        /* ' stop highlighting */
270
271        /* string constant */
272"L"?["]([^"\\\n]|{escape_seq})*["] {RETURN_VAL(STRINGliteral);}
273        /* " stop highlighting */
274
275        /* punctuation */
276"["                     {ASCIIOP_RETURN();}
277"]"                     {ASCIIOP_RETURN();}
278"("                     {ASCIIOP_RETURN();}
279")"                     {ASCIIOP_RETURN();}
280"{"                     {ASCIIOP_RETURN();}
281"}"                     {ASCIIOP_RETURN();}
282","                     {ASCIIOP_RETURN();}             /* also operator */
283":"                     {ASCIIOP_RETURN();}
284";"                     {ASCIIOP_RETURN();}
285"."                     {ASCIIOP_RETURN();}             /* also operator */
286"..."                   {NAMEDOP_RETURN(ELLIPSIS);}
287
288        /* alternative ANSI99 brackets, "<:" & "<:<:" handled by preprocessor */
289"<:"                    {RETURN_VAL('[');}
290":>"                    {RETURN_VAL(']');}
291"<%"                    {RETURN_VAL('{');}
292"%>"                    {RETURN_VAL('}');}
293
294        /* operators */
295"!"                     {ASCIIOP_RETURN();}
296"+"                     {ASCIIOP_RETURN();}
297"-"                     {ASCIIOP_RETURN();}
298"*"                     {ASCIIOP_RETURN();}
299"/"                     {ASCIIOP_RETURN();}
300"%"                     {ASCIIOP_RETURN();}
301"^"                     {ASCIIOP_RETURN();}
302"~"                     {ASCIIOP_RETURN();}
303"&"                     {ASCIIOP_RETURN();}
304"|"                     {ASCIIOP_RETURN();}
305"<"                     {ASCIIOP_RETURN();}
306">"                     {ASCIIOP_RETURN();}
307"="                     {ASCIIOP_RETURN();}
308"?"                     {ASCIIOP_RETURN();}
309
310"++"                    {NAMEDOP_RETURN(ICR);}
311"--"                    {NAMEDOP_RETURN(DECR);}
312"=="                    {NAMEDOP_RETURN(EQ);}
313"!="                    {NAMEDOP_RETURN(NE);}
314"<<"                    {NAMEDOP_RETURN(LS);}
315">>"                    {NAMEDOP_RETURN(RS);}
316"<="                    {NAMEDOP_RETURN(LE);}
317">="                    {NAMEDOP_RETURN(GE);}
318"&&"                    {NAMEDOP_RETURN(ANDAND);}
319"||"                    {NAMEDOP_RETURN(OROR);}
320"->"                    {NAMEDOP_RETURN(ARROW);}
321"+="                    {NAMEDOP_RETURN(PLUSassign);}
322"-="                    {NAMEDOP_RETURN(MINUSassign);}
323"*="                    {NAMEDOP_RETURN(MULTassign);}
324"/="                    {NAMEDOP_RETURN(DIVassign);}
325"%="                    {NAMEDOP_RETURN(MODassign);}
326"&="                    {NAMEDOP_RETURN(ANDassign);}
327"|="                    {NAMEDOP_RETURN(ORassign);}
328"^="                    {NAMEDOP_RETURN(ERassign);}
329"<<="                   {NAMEDOP_RETURN(LSassign);}
330">>="                   {NAMEDOP_RETURN(RSassign);}
331
332        /* CFA, operator identifier */
333{op_unary}"?"           {IDENTIFIER_RETURN();}          /* unary */
334"?"({op_unary_pre_post}|"()"|"[?]") {IDENTIFIER_RETURN();}
335"?"{op_binary_over}"?"  {IDENTIFIER_RETURN();}          /* binary */
336        /*
337          This rule handles ambiguous cases with operator identifiers, e.g., "int *?*?()", where the
338          string "*?*?"  can be lexed as "*"/"?*?" or "*?"/"*?". Since it is common practise to put
339          a unary operator juxtaposed to an identifier, e.g., "*i", users will be annoyed if they
340          cannot do this with respect to operator identifiers. Even with this special hack, there
341          are 5 general cases that cannot be handled. The first case is for the function-call
342          identifier "?()":
343
344          int * ?()();  // declaration: space required after '*'
345          * ?()();      // expression: space required after '*'
346
347          Without the space, the string "*?()" is ambiguous without N character look ahead; it
348          requires scanning ahead to determine if there is a '(', which is the start of an
349          argument/parameter list.
350
351          The 4 remaining cases occur in expressions:
352
353          i++?i:0;              // space required before '?'
354          i--?i:0;              // space required before '?'
355          i?++i:0;              // space required after '?'
356          i?--i:0;              // space required after '?'
357
358          In the first two cases, the string "i++?" is ambiguous, where this string can be lexed as
359          "i"/"++?" or "i++"/"?"; it requires scanning ahead to determine if there is a '(', which
360          is the start of an argument list.  In the second two cases, the string "?++x" is
361          ambiguous, where this string can be lexed as "?++"/"x" or "?"/"++x"; it requires scanning
362          ahead to determine if there is a '(', which is the start of an argument list.
363        */
364{op_unary}"?"(({op_unary_pre_post}|"[?]")|({op_binary_over}"?")) {
365                            // 1 or 2 character unary operator ?
366                            int i = yytext[1] == '?' ? 1 : 2;
367                            yyless( i );                /* put back characters up to first '?' */
368                            if ( i > 1 ) {
369                                NAMEDOP_RETURN( yytext[0] == '+' ? ICR : DECR );
370                            } else {
371                                ASCIIOP_RETURN();
372                            } // if
373                        }
374
375        /* unknown characters */
376.                       {printf("unknown character(s):\"%s\" on line %d\n", yytext, yylineno);}
377
378%%
379
380
381// Local Variables:
382// fill-column: 100
383// compile-command: "make"
384// End:
Note: See TracBrowser for help on using the repository browser.