/* -*- Mode: C -*- * * CForall Lexer Version 1.0, Copyright (C) Peter A. Buhr 2001 -- Permission is granted to copy this * grammar and to use it within software systems. THIS GRAMMAR IS PROVIDED "AS IS" AND WITHOUT * ANY EXPRESS OR IMPLIED WARRANTIES. * * lex.l -- * * Author : Peter A. Buhr * Created On : Sat Sep 22 08:58:10 2001 * Last Modified By : Peter A. Buhr * Last Modified On : Thu Jan 23 16:17:09 2003 * Update Count : 191 */ %option yylineno %{ /* This lexer assumes the program has been preprocessed by cpp. Hence, all user level preprocessor directive have been performed and removed from the source. The only exceptions are preprocessor directives passed to the compiler (e.g., line-number directives) and C/C++ style comments, which are ignored. */ /*************** Includes and Defines *****************************/ #include #include "ParseNode.h" #include "cfa.tab.h" /* YACC generated definitions based on C++ grammar */ #include "lex.h" char *yyfilename; #define WHITE_RETURN(x) /* do nothing */ #define NEWLINE_RETURN() WHITE_RETURN('\n') #define RETURN_VAL(x) yylval.tok.str = new std::string(yytext); yylval.tok.file = yyfilename; yylval.tok.line = yylineno; return(x) #define KEYWORD_RETURN(x) RETURN_VAL(x) /* keyword */ #define IDENTIFIER_RETURN() RETURN_VAL((typedefTable.isIdentifier(yytext) ? IDENTIFIER : typedefTable.isTypedef(yytext) ? TYPEDEFname : TYPEGENname)) #define ASCIIOP_RETURN() RETURN_VAL((int)yytext[0]) /* single character operator */ #define NAMEDOP_RETURN(x) RETURN_VAL(x) /* multichar operator, with a name */ #define NUMERIC_RETURN(x) rm_underscore(); RETURN_VAL(x) /* numeric constant */ void rm_underscore() { /* remove underscores in constant or escape sequence */ int j = 0; for ( int i = 0; i < yyleng; i += 1 ) { if ( yytext[i] != '_' ) { yytext[j] = yytext[i]; j += 1; } // if } // for yyleng = j; yytext[yyleng] = '\0'; } %} octal [0-7] nonzero [1-9] decimal [0-9] hex [0-9a-fA-F] /* identifier, GCC: $ in identifier */ universal_char "\\"((u{hex_quad})|(U{hex_quad}{2})) identifier ([a-zA-Z_$]|{universal_char})([0-9a-zA-Z_$]|{universal_char})* /* numeric constants, CFA: '_' in constant */ hex_quad {hex}{4} integer_suffix "_"?(([uU][lL]?)|([uU]("ll"|"LL")?)|([lL][uU]?)|("ll"|"LL")[uU]?) octal_digits ({octal})|({octal}({octal}|"_")*{octal}) octal_prefix "0""_"? octal_constant (("0")|({octal_prefix}{octal_digits})){integer_suffix}? nonzero_digits ({nonzero})|({nonzero}({decimal}|"_")*{decimal}) decimal_constant {nonzero_digits}{integer_suffix}? hex_digits ({hex})|({hex}({hex}|"_")*{hex}) hex_prefix "0"[xX]"_"? hex_constant {hex_prefix}{hex_digits}{integer_suffix}? decimal_digits ({decimal})|({decimal}({decimal}|"_")*{decimal}) fractional_constant ({decimal_digits}?"."{decimal_digits})|({decimal_digits}".") exponent "_"?[eE]"_"?[+-]?{decimal_digits} floating_suffix "_"?[flFL] floating_constant (({fractional_constant}{exponent}?)|({decimal_digits}{exponent})){floating_suffix}? binary_exponent "_"?[pP]"_"?[+-]?{decimal_digits} hex_fractional_constant ({hex_digits}?"."{hex_digits})|({hex_digits}".") hex_floating_constant {hex_prefix}(({hex_fractional_constant}{binary_exponent})|({hex_digits}{binary_exponent})){floating_suffix}? /* character escape sequence, GCC: \e => esc character */ simple_escape "\\"[abefnrtv'"?\\] octal_escape "\\"{octal}{1,3} hex_escape "\\""x"{hex}+ escape_seq {simple_escape}|{octal_escape}|{hex_escape}|{universal_char} /* display/white-space characters */ h_tab [\011] form_feed [\014] v_tab [\013] c_return [\015] h_white [ ]|{h_tab} /* operators */ op_unary_only "~"|"!" op_unary_binary "+"|"-"|"*" op_unary_pre_post "++"|"--" op_unary {op_unary_only}|{op_unary_binary}|{op_unary_pre_post} op_binary_only "/"|"%"|"^"|"&"|"|"|"<"|">"|"="|"=="|"!="|"<<"|">>"|"<="|">="|"+="|"-="|"*="|"/="|"%="|"&="|"|="|"^="|"<<="|">>=" op_binary_over {op_unary_binary}|{op_binary_only} op_binary_not_over "?"|"->"|"&&"|"||" operator {op_unary_pre_post}|{op_binary_over}|{op_binary_not_over} %x COMMENT %% /* line directives */ ^{h_white}*"#"{h_white}*[0-9]+{h_white}*["][^"\n]+["][^\n]*"\n" { char *end_num; char *begin_string, *end_string; char *filename; long lineno, length; lineno = strtol( yytext + 1, &end_num, 0 ); begin_string = strchr( end_num, '"' ); if( begin_string ) { end_string = strchr( begin_string + 1, '"' ); if( end_string ) { length = end_string - begin_string - 1; filename = new char[ length + 1 ]; memcpy( filename, begin_string + 1, length ); filename[ length ] = '\0'; //std::cout << "file " << filename << " line " << lineno << std::endl; yylineno = lineno; yyfilename = filename; } } } /* ignore preprocessor directives (for now) */ ^{h_white}*"#"[^\n]*"\n" ; /* ignore C style comments */ "/*" {BEGIN COMMENT;} .|\n ; "*/" {BEGIN 0;} /* ignore C++ style comments */ "//"[^\n]*"\n" ; /* ignore whitespace */ {h_white}+ {WHITE_RETURN(' ');} ({v_tab}|{c_return}|{form_feed})+ {WHITE_RETURN(' ');} ({h_white}|{v_tab}|{c_return}|{form_feed})*"\n" {NEWLINE_RETURN();} /* keywords */ __alignof {KEYWORD_RETURN(ALIGNOF);} /* GCC */ __alignof__ {KEYWORD_RETURN(ALIGNOF);} /* GCC */ asm {KEYWORD_RETURN(ASM);} __asm {KEYWORD_RETURN(ASM);} /* GCC */ __asm__ {KEYWORD_RETURN(ASM);} /* GCC */ __attribute {KEYWORD_RETURN(ATTRIBUTE);} /* GCC */ __attribute__ {KEYWORD_RETURN(ATTRIBUTE);} /* GCC */ auto {KEYWORD_RETURN(AUTO);} _Bool {KEYWORD_RETURN(BOOL);} /* ANSI99 */ break {KEYWORD_RETURN(BREAK);} case {KEYWORD_RETURN(CASE);} catch {KEYWORD_RETURN(CATCH);} /* CFA */ char {KEYWORD_RETURN(CHAR);} choose {KEYWORD_RETURN(CHOOSE);} _Complex {KEYWORD_RETURN(COMPLEX);} /* ANSI99 */ __complex {KEYWORD_RETURN(COMPLEX);} /* GCC */ __complex__ {KEYWORD_RETURN(COMPLEX);} /* GCC */ const {KEYWORD_RETURN(CONST);} __const {KEYWORD_RETURN(CONST);} /* GCC */ __const__ {KEYWORD_RETURN(CONST);} /* GCC */ context {KEYWORD_RETURN(CONTEXT);} continue {KEYWORD_RETURN(CONTINUE);} default {KEYWORD_RETURN(DEFAULT);} do {KEYWORD_RETURN(DO);} double {KEYWORD_RETURN(DOUBLE);} dtype {KEYWORD_RETURN(DTYPE);} else {KEYWORD_RETURN(ELSE);} enum {KEYWORD_RETURN(ENUM);} __extension__ {KEYWORD_RETURN(EXTENSION);} /* GCC */ extern {KEYWORD_RETURN(EXTERN);} fallthru {KEYWORD_RETURN(FALLTHRU);} float {KEYWORD_RETURN(FLOAT);} for {KEYWORD_RETURN(FOR);} forall {KEYWORD_RETURN(FORALL);} fortran {KEYWORD_RETURN(FORTRAN);} ftype {KEYWORD_RETURN(FTYPE);} goto {KEYWORD_RETURN(GOTO);} if {KEYWORD_RETURN(IF);} _Imaginary {KEYWORD_RETURN(IMAGINARY);} /* ANSI99 */ __imag {KEYWORD_RETURN(IMAGINARY);} /* GCC */ __imag__ {KEYWORD_RETURN(IMAGINARY);} /* GCC */ inline {KEYWORD_RETURN(INLINE);} /* ANSI99 */ __inline {KEYWORD_RETURN(INLINE);} /* GCC */ __inline__ {KEYWORD_RETURN(INLINE);} /* GCC */ int {KEYWORD_RETURN(INT);} __label__ {KEYWORD_RETURN(LABEL);} /* GCC */ long {KEYWORD_RETURN(LONG);} lvalue {KEYWORD_RETURN(LVALUE);} register {KEYWORD_RETURN(REGISTER);} restrict {KEYWORD_RETURN(RESTRICT);} /* ANSI99 */ __restrict {KEYWORD_RETURN(RESTRICT);} /* GCC */ __restrict__ {KEYWORD_RETURN(RESTRICT);} /* GCC */ return {KEYWORD_RETURN(RETURN);} short {KEYWORD_RETURN(SHORT);} signed {KEYWORD_RETURN(SIGNED);} __signed {KEYWORD_RETURN(SIGNED);} /* GCC */ __signed__ {KEYWORD_RETURN(SIGNED);} /* GCC */ sizeof {KEYWORD_RETURN(SIZEOF);} static {KEYWORD_RETURN(STATIC);} struct {KEYWORD_RETURN(STRUCT);} switch {KEYWORD_RETURN(SWITCH);} throw {KEYWORD_RETURN(THROW);} /* CFA */ try {KEYWORD_RETURN(TRY);} /* CFA */ type {KEYWORD_RETURN(TYPE);} typedef {KEYWORD_RETURN(TYPEDEF);} typeof {KEYWORD_RETURN(TYPEOF);} /* GCC */ __typeof {KEYWORD_RETURN(TYPEOF);} /* GCC */ __typeof__ {KEYWORD_RETURN(TYPEOF);} /* GCC */ union {KEYWORD_RETURN(UNION);} unsigned {KEYWORD_RETURN(UNSIGNED);} void {KEYWORD_RETURN(VOID);} volatile {KEYWORD_RETURN(VOLATILE);} __volatile {KEYWORD_RETURN(VOLATILE);} /* GCC */ __volatile__ {KEYWORD_RETURN(VOLATILE);} /* GCC */ while {KEYWORD_RETURN(WHILE);} /* identifier */ {identifier} {IDENTIFIER_RETURN();} /* numeric constants */ "0" {NUMERIC_RETURN(ZERO);} /* CFA */ "1" {NUMERIC_RETURN(ONE);} /* CFA */ {decimal_constant} {NUMERIC_RETURN(INTEGERconstant);} {octal_constant} {NUMERIC_RETURN(INTEGERconstant);} {hex_constant} {NUMERIC_RETURN(INTEGERconstant);} {floating_constant} {NUMERIC_RETURN(FLOATINGconstant);} {hex_floating_constant} {NUMERIC_RETURN(FLOATINGconstant);} /* character constant, allows empty value */ "L"?[']([^'\\\n]|{escape_seq})*['] {RETURN_VAL(CHARACTERconstant);} /* string constant */ "L"?["]([^"\\\n]|{escape_seq})*["] {RETURN_VAL(STRINGliteral);} /* punctuation */ "[" {ASCIIOP_RETURN();} "]" {ASCIIOP_RETURN();} "(" {ASCIIOP_RETURN();} ")" {ASCIIOP_RETURN();} "{" {ASCIIOP_RETURN();} "}" {ASCIIOP_RETURN();} "," {ASCIIOP_RETURN();} /* also operator */ ":" {ASCIIOP_RETURN();} ";" {ASCIIOP_RETURN();} "." {ASCIIOP_RETURN();} /* also operator */ "..." {NAMEDOP_RETURN(ELLIPSIS);} /* alternative ANSI99 brackets, "<:" & "<:<:" handled by preprocessor */ "<:" {RETURN_VAL('[');} ":>" {RETURN_VAL(']');} "<%" {RETURN_VAL('{');} "%>" {RETURN_VAL('}');} /* operators */ "!" {ASCIIOP_RETURN();} "+" {ASCIIOP_RETURN();} "-" {ASCIIOP_RETURN();} "*" {ASCIIOP_RETURN();} "/" {ASCIIOP_RETURN();} "%" {ASCIIOP_RETURN();} "^" {ASCIIOP_RETURN();} "~" {ASCIIOP_RETURN();} "&" {ASCIIOP_RETURN();} "|" {ASCIIOP_RETURN();} "<" {ASCIIOP_RETURN();} ">" {ASCIIOP_RETURN();} "=" {ASCIIOP_RETURN();} "?" {ASCIIOP_RETURN();} "++" {NAMEDOP_RETURN(ICR);} "--" {NAMEDOP_RETURN(DECR);} "==" {NAMEDOP_RETURN(EQ);} "!=" {NAMEDOP_RETURN(NE);} "<<" {NAMEDOP_RETURN(LS);} ">>" {NAMEDOP_RETURN(RS);} "<=" {NAMEDOP_RETURN(LE);} ">=" {NAMEDOP_RETURN(GE);} "&&" {NAMEDOP_RETURN(ANDAND);} "||" {NAMEDOP_RETURN(OROR);} "->" {NAMEDOP_RETURN(ARROW);} "+=" {NAMEDOP_RETURN(PLUSassign);} "-=" {NAMEDOP_RETURN(MINUSassign);} "*=" {NAMEDOP_RETURN(MULTassign);} "/=" {NAMEDOP_RETURN(DIVassign);} "%=" {NAMEDOP_RETURN(MODassign);} "&=" {NAMEDOP_RETURN(ANDassign);} "|=" {NAMEDOP_RETURN(ORassign);} "^=" {NAMEDOP_RETURN(ERassign);} "<<=" {NAMEDOP_RETURN(LSassign);} ">>=" {NAMEDOP_RETURN(RSassign);} /* CFA, operator identifier */ {op_unary}"?" {IDENTIFIER_RETURN();} /* unary */ "?"({op_unary_pre_post}|"()"|"[?]") {IDENTIFIER_RETURN();} "?"{op_binary_over}"?" {IDENTIFIER_RETURN();} /* binary */ /* This rule handles ambiguous cases with operator identifiers, e.g., "int *?*?()", where the string "*?*?" can be lexed as "*"/"?*?" or "*?"/"*?". Since it is common practise to put a unary operator juxtaposed to an identifier, e.g., "*i", users will be annoyed if they cannot do this with respect to operator identifiers. Even with this special hack, there are 5 general cases that cannot be handled. The first case is for the function-call identifier "?()": int * ?()(); // declaration: space required after '*' * ?()(); // expression: space required after '*' Without the space, the string "*?()" is ambiguous without N character look ahead; it requires scanning ahead to determine if there is a '(', which is the start of an argument/parameter list. The 4 remaining cases occur in expressions: i++?i:0; // space required before '?' i--?i:0; // space required before '?' i?++i:0; // space required after '?' i?--i:0; // space required after '?' In the first two cases, the string "i++?" is ambiguous, where this string can be lexed as "i"/"++?" or "i++"/"?"; it requires scanning ahead to determine if there is a '(', which is the start of an argument list. In the second two cases, the string "?++x" is ambiguous, where this string can be lexed as "?++"/"x" or "?"/"++x"; it requires scanning ahead to determine if there is a '(', which is the start of an argument list. */ {op_unary}"?"(({op_unary_pre_post}|"[?]")|({op_binary_over}"?")) { // 1 or 2 character unary operator ? int i = yytext[1] == '?' ? 1 : 2; yyless( i ); /* put back characters up to first '?' */ if ( i > 1 ) { NAMEDOP_RETURN( yytext[0] == '+' ? ICR : DECR ); } else { ASCIIOP_RETURN(); } // if } /* unknown characters */ . {printf("unknown character(s):\"%s\" on line %d\n", yytext, yylineno);} %% /* Local Variables: */ /* fill-column: 100 */ /* compile-command: "gmake" */ /* End: */