/* -*- Mode: C -*- * * CForall Lexer Version 1.0, Copyright (C) Peter A. Buhr 2001 -- Permission is granted to copy this * grammar and to use it within software systems. THIS GRAMMAR IS PROVIDED "AS IS" AND WITHOUT * ANY EXPRESS OR IMPLIED WARRANTIES. * * lex.l -- * * Author : Peter A. Buhr * Created On : Sat Sep 22 08:58:10 2001 * Last Modified By : Peter A. Buhr * Last Modified On : Tue Nov 11 08:10:05 2014 * Update Count : 215 */ %option yylineno %{ // This lexer assumes the program has been preprocessed by cpp. Hence, all user level preprocessor // directive have been performed and removed from the source. The only exceptions are preprocessor // directives passed to the compiler (e.g., line-number directives) and C/C++ style comments, which // are ignored. //**************************** Includes and Defines **************************** #include #include "lex.h" #include "ParseNode.h" #include "cfa.tab.h" // YACC generated definitions based on C++ grammar char *yyfilename; #define WHITE_RETURN(x) // do nothing #define NEWLINE_RETURN() WHITE_RETURN('\n') #define RETURN_VAL(x) yylval.tok.str = new std::string(yytext); \ yylval.tok.loc.file = yyfilename; \ yylval.tok.loc.line = yylineno; \ return(x) #define KEYWORD_RETURN(x) RETURN_VAL(x) // keyword #define IDENTIFIER_RETURN() RETURN_VAL((typedefTable.isIdentifier(yytext) ? IDENTIFIER : typedefTable.isTypedef(yytext) ? TYPEDEFname : TYPEGENname)) //#define ATTRIBUTE_RETURN() RETURN_VAL((typedefTable.isIdentifier(yytext) ? ATTR_IDENTIFIER : typedefTable.isTypedef(yytext) ? ATTR_TYPEDEFname : ATTR_TYPEGENname)) #define ATTRIBUTE_RETURN() RETURN_VAL(ATTR_IDENTIFIER) #define ASCIIOP_RETURN() RETURN_VAL((int)yytext[0]) // single character operator #define NAMEDOP_RETURN(x) RETURN_VAL(x) // multichar operator, with a name #define NUMERIC_RETURN(x) rm_underscore(); RETURN_VAL(x) // numeric constant void rm_underscore() { // remove underscores in constant or escape sequence int j = 0; for ( int i = 0; i < yyleng; i += 1 ) { if ( yytext[i] != '_' ) { yytext[j] = yytext[i]; j += 1; } // if } // for yyleng = j; yytext[yyleng] = '\0'; } %} octal [0-7] nonzero [1-9] decimal [0-9] hex [0-9a-fA-F] universal_char "\\"((u{hex_quad})|(U{hex_quad}{2})) // identifier, GCC: $ in identifier identifier ([a-zA-Z_$]|{universal_char})([0-9a-zA-Z_$]|{universal_char})* // quoted identifier quoted_identifier "`"{identifier}"`" // attribute identifier, GCC: $ in identifier attr_identifier "@"{identifier} // numeric constants, CFA: '_' in constant hex_quad {hex}{4} integer_suffix "_"?(([uU][lL]?)|([uU]("ll"|"LL")?)|([lL][uU]?)|("ll"|"LL")[uU]?) octal_digits ({octal})|({octal}({octal}|"_")*{octal}) octal_prefix "0""_"? octal_constant (("0")|({octal_prefix}{octal_digits})){integer_suffix}? nonzero_digits ({nonzero})|({nonzero}({decimal}|"_")*{decimal}) decimal_constant {nonzero_digits}{integer_suffix}? hex_digits ({hex})|({hex}({hex}|"_")*{hex}) hex_prefix "0"[xX]"_"? hex_constant {hex_prefix}{hex_digits}{integer_suffix}? decimal_digits ({decimal})|({decimal}({decimal}|"_")*{decimal}) fractional_constant ({decimal_digits}?"."{decimal_digits})|({decimal_digits}".") exponent "_"?[eE]"_"?[+-]?{decimal_digits} floating_suffix "_"?[flFL] floating_constant (({fractional_constant}{exponent}?)|({decimal_digits}{exponent})){floating_suffix}? binary_exponent "_"?[pP]"_"?[+-]?{decimal_digits} hex_fractional_constant ({hex_digits}?"."{hex_digits})|({hex_digits}".") hex_floating_constant {hex_prefix}(({hex_fractional_constant}{binary_exponent})|({hex_digits}{binary_exponent})){floating_suffix}? // character escape sequence, GCC: \e => esc character simple_escape "\\"[abefnrtv'"?\\] // ' stop highlighting octal_escape "\\"{octal}{1,3} hex_escape "\\""x"{hex}+ escape_seq {simple_escape}|{octal_escape}|{hex_escape}|{universal_char} // display/white-space characters h_tab [\011] form_feed [\014] v_tab [\013] c_return [\015] h_white [ ]|{h_tab} // operators op_unary_only "~"|"!" op_unary_binary "+"|"-"|"*" op_unary_pre_post "++"|"--" op_unary {op_unary_only}|{op_unary_binary}|{op_unary_pre_post} op_binary_only "/"|"%"|"^"|"&"|"|"|"<"|">"|"="|"=="|"!="|"<<"|">>"|"<="|">="|"+="|"-="|"*="|"/="|"%="|"&="|"|="|"^="|"<<="|">>=" op_binary_over {op_unary_binary}|{op_binary_only} op_binary_not_over "?"|"->"|"&&"|"||" operator {op_unary_pre_post}|{op_binary_over}|{op_binary_not_over} %x COMMENT %x QUOTED %% /* line directives */ ^{h_white}*"#"{h_white}*[0-9]+{h_white}*["][^"\n]+["][^\n]*"\n" { /* " stop highlighting */ char *end_num; char *begin_string, *end_string; char *filename; long lineno, length; lineno = strtol( yytext + 1, &end_num, 0 ); begin_string = strchr( end_num, '"' ); if( begin_string ) { end_string = strchr( begin_string + 1, '"' ); if( end_string ) { length = end_string - begin_string - 1; filename = new char[ length + 1 ]; memcpy( filename, begin_string + 1, length ); filename[ length ] = '\0'; //std::cout << "file " << filename << " line " << lineno << std::endl; yylineno = lineno; yyfilename = filename; } } } /* ignore preprocessor directives (for now) */ ^{h_white}*"#"[^\n]*"\n" ; /* ignore C style comments */ "/*" {BEGIN COMMENT;} .|\n ; "*/" {BEGIN 0;} /* ignore C++ style comments */ "//"[^\n]*"\n" ; /* ignore whitespace */ {h_white}+ {WHITE_RETURN(' ');} ({v_tab}|{c_return}|{form_feed})+ {WHITE_RETURN(' ');} ({h_white}|{v_tab}|{c_return}|{form_feed})*"\n" {NEWLINE_RETURN();} /* keywords */ __alignof {KEYWORD_RETURN(ALIGNOF);} /* GCC */ __alignof__ {KEYWORD_RETURN(ALIGNOF);} /* GCC */ asm {KEYWORD_RETURN(ASM);} __asm {KEYWORD_RETURN(ASM);} /* GCC */ __asm__ {KEYWORD_RETURN(ASM);} /* GCC */ __attribute {KEYWORD_RETURN(ATTRIBUTE);} /* GCC */ __attribute__ {KEYWORD_RETURN(ATTRIBUTE);} /* GCC */ auto {KEYWORD_RETURN(AUTO);} _Bool {KEYWORD_RETURN(BOOL);} /* ANSI99 */ break {KEYWORD_RETURN(BREAK);} case {KEYWORD_RETURN(CASE);} catch {KEYWORD_RETURN(CATCH);} /* CFA */ char {KEYWORD_RETURN(CHAR);} choose {KEYWORD_RETURN(CHOOSE);} /* CFA */ _Complex {KEYWORD_RETURN(COMPLEX);} /* ANSI99 */ __complex {KEYWORD_RETURN(COMPLEX);} /* GCC */ __complex__ {KEYWORD_RETURN(COMPLEX);} /* GCC */ const {KEYWORD_RETURN(CONST);} __const {KEYWORD_RETURN(CONST);} /* GCC */ __const__ {KEYWORD_RETURN(CONST);} /* GCC */ context {KEYWORD_RETURN(CONTEXT);} /* CFA */ continue {KEYWORD_RETURN(CONTINUE);} default {KEYWORD_RETURN(DEFAULT);} do {KEYWORD_RETURN(DO);} double {KEYWORD_RETURN(DOUBLE);} dtype {KEYWORD_RETURN(DTYPE);} /* CFA */ else {KEYWORD_RETURN(ELSE);} enum {KEYWORD_RETURN(ENUM);} __extension__ {KEYWORD_RETURN(EXTENSION);} /* GCC */ extern {KEYWORD_RETURN(EXTERN);} fallthru {KEYWORD_RETURN(FALLTHRU);} /* CFA */ finally {KEYWORD_RETURN(FINALLY);} /* CFA */ float {KEYWORD_RETURN(FLOAT);} for {KEYWORD_RETURN(FOR);} forall {KEYWORD_RETURN(FORALL);} /* CFA */ fortran {KEYWORD_RETURN(FORTRAN);} ftype {KEYWORD_RETURN(FTYPE);} /* CFA */ goto {KEYWORD_RETURN(GOTO);} if {KEYWORD_RETURN(IF);} _Imaginary {KEYWORD_RETURN(IMAGINARY);} /* ANSI99 */ __imag {KEYWORD_RETURN(IMAGINARY);} /* GCC */ __imag__ {KEYWORD_RETURN(IMAGINARY);} /* GCC */ inline {KEYWORD_RETURN(INLINE);} /* ANSI99 */ __inline {KEYWORD_RETURN(INLINE);} /* GCC */ __inline__ {KEYWORD_RETURN(INLINE);} /* GCC */ int {KEYWORD_RETURN(INT);} __label__ {KEYWORD_RETURN(LABEL);} /* GCC */ long {KEYWORD_RETURN(LONG);} lvalue {KEYWORD_RETURN(LVALUE);} /* CFA */ register {KEYWORD_RETURN(REGISTER);} restrict {KEYWORD_RETURN(RESTRICT);} /* ANSI99 */ __restrict {KEYWORD_RETURN(RESTRICT);} /* GCC */ __restrict__ {KEYWORD_RETURN(RESTRICT);} /* GCC */ return {KEYWORD_RETURN(RETURN);} short {KEYWORD_RETURN(SHORT);} signed {KEYWORD_RETURN(SIGNED);} __signed {KEYWORD_RETURN(SIGNED);} /* GCC */ __signed__ {KEYWORD_RETURN(SIGNED);} /* GCC */ sizeof {KEYWORD_RETURN(SIZEOF);} static {KEYWORD_RETURN(STATIC);} struct {KEYWORD_RETURN(STRUCT);} switch {KEYWORD_RETURN(SWITCH);} throw {KEYWORD_RETURN(THROW);} /* CFA */ try {KEYWORD_RETURN(TRY);} /* CFA */ type {KEYWORD_RETURN(TYPE);} /* CFA */ typedef {KEYWORD_RETURN(TYPEDEF);} typeof {KEYWORD_RETURN(TYPEOF);} /* GCC */ __typeof {KEYWORD_RETURN(TYPEOF);} /* GCC */ __typeof__ {KEYWORD_RETURN(TYPEOF);} /* GCC */ union {KEYWORD_RETURN(UNION);} unsigned {KEYWORD_RETURN(UNSIGNED);} void {KEYWORD_RETURN(VOID);} volatile {KEYWORD_RETURN(VOLATILE);} __volatile {KEYWORD_RETURN(VOLATILE);} /* GCC */ __volatile__ {KEYWORD_RETURN(VOLATILE);} /* GCC */ while {KEYWORD_RETURN(WHILE);} /* identifier */ {identifier} {IDENTIFIER_RETURN();} {attr_identifier} {ATTRIBUTE_RETURN();} "`" {BEGIN QUOTED;} {identifier} {IDENTIFIER_RETURN();} "`" {BEGIN 0;} /* numeric constants */ "0" {NUMERIC_RETURN(ZERO);} /* CFA */ "1" {NUMERIC_RETURN(ONE);} /* CFA */ {decimal_constant} {NUMERIC_RETURN(INTEGERconstant);} {octal_constant} {NUMERIC_RETURN(INTEGERconstant);} {hex_constant} {NUMERIC_RETURN(INTEGERconstant);} {floating_constant} {NUMERIC_RETURN(FLOATINGconstant);} {hex_floating_constant} {NUMERIC_RETURN(FLOATINGconstant);} /* character constant, allows empty value */ "L"?[']([^'\\\n]|{escape_seq})*['] {RETURN_VAL(CHARACTERconstant);} /* ' stop highlighting */ /* string constant */ "L"?["]([^"\\\n]|{escape_seq})*["] {RETURN_VAL(STRINGliteral);} /* " stop highlighting */ /* punctuation */ "[" {ASCIIOP_RETURN();} "]" {ASCIIOP_RETURN();} "(" {ASCIIOP_RETURN();} ")" {ASCIIOP_RETURN();} "{" {ASCIIOP_RETURN();} "}" {ASCIIOP_RETURN();} "," {ASCIIOP_RETURN();} /* also operator */ ":" {ASCIIOP_RETURN();} ";" {ASCIIOP_RETURN();} "." {ASCIIOP_RETURN();} /* also operator */ "..." {NAMEDOP_RETURN(ELLIPSIS);} /* alternative ANSI99 brackets, "<:" & "<:<:" handled by preprocessor */ "<:" {RETURN_VAL('[');} ":>" {RETURN_VAL(']');} "<%" {RETURN_VAL('{');} "%>" {RETURN_VAL('}');} /* operators */ "!" {ASCIIOP_RETURN();} "+" {ASCIIOP_RETURN();} "-" {ASCIIOP_RETURN();} "*" {ASCIIOP_RETURN();} "/" {ASCIIOP_RETURN();} "%" {ASCIIOP_RETURN();} "^" {ASCIIOP_RETURN();} "~" {ASCIIOP_RETURN();} "&" {ASCIIOP_RETURN();} "|" {ASCIIOP_RETURN();} "<" {ASCIIOP_RETURN();} ">" {ASCIIOP_RETURN();} "=" {ASCIIOP_RETURN();} "?" {ASCIIOP_RETURN();} "++" {NAMEDOP_RETURN(ICR);} "--" {NAMEDOP_RETURN(DECR);} "==" {NAMEDOP_RETURN(EQ);} "!=" {NAMEDOP_RETURN(NE);} "<<" {NAMEDOP_RETURN(LS);} ">>" {NAMEDOP_RETURN(RS);} "<=" {NAMEDOP_RETURN(LE);} ">=" {NAMEDOP_RETURN(GE);} "&&" {NAMEDOP_RETURN(ANDAND);} "||" {NAMEDOP_RETURN(OROR);} "->" {NAMEDOP_RETURN(ARROW);} "+=" {NAMEDOP_RETURN(PLUSassign);} "-=" {NAMEDOP_RETURN(MINUSassign);} "*=" {NAMEDOP_RETURN(MULTassign);} "/=" {NAMEDOP_RETURN(DIVassign);} "%=" {NAMEDOP_RETURN(MODassign);} "&=" {NAMEDOP_RETURN(ANDassign);} "|=" {NAMEDOP_RETURN(ORassign);} "^=" {NAMEDOP_RETURN(ERassign);} "<<=" {NAMEDOP_RETURN(LSassign);} ">>=" {NAMEDOP_RETURN(RSassign);} /* CFA, operator identifier */ {op_unary}"?" {IDENTIFIER_RETURN();} /* unary */ "?"({op_unary_pre_post}|"()"|"[?]") {IDENTIFIER_RETURN();} "?"{op_binary_over}"?" {IDENTIFIER_RETURN();} /* binary */ /* This rule handles ambiguous cases with operator identifiers, e.g., "int *?*?()", where the string "*?*?" can be lexed as "*"/"?*?" or "*?"/"*?". Since it is common practise to put a unary operator juxtaposed to an identifier, e.g., "*i", users will be annoyed if they cannot do this with respect to operator identifiers. Even with this special hack, there are 5 general cases that cannot be handled. The first case is for the function-call identifier "?()": int * ?()(); // declaration: space required after '*' * ?()(); // expression: space required after '*' Without the space, the string "*?()" is ambiguous without N character look ahead; it requires scanning ahead to determine if there is a '(', which is the start of an argument/parameter list. The 4 remaining cases occur in expressions: i++?i:0; // space required before '?' i--?i:0; // space required before '?' i?++i:0; // space required after '?' i?--i:0; // space required after '?' In the first two cases, the string "i++?" is ambiguous, where this string can be lexed as "i"/"++?" or "i++"/"?"; it requires scanning ahead to determine if there is a '(', which is the start of an argument list. In the second two cases, the string "?++x" is ambiguous, where this string can be lexed as "?++"/"x" or "?"/"++x"; it requires scanning ahead to determine if there is a '(', which is the start of an argument list. */ {op_unary}"?"(({op_unary_pre_post}|"[?]")|({op_binary_over}"?")) { // 1 or 2 character unary operator ? int i = yytext[1] == '?' ? 1 : 2; yyless( i ); /* put back characters up to first '?' */ if ( i > 1 ) { NAMEDOP_RETURN( yytext[0] == '+' ? ICR : DECR ); } else { ASCIIOP_RETURN(); } // if } /* unknown characters */ . {printf("unknown character(s):\"%s\" on line %d\n", yytext, yylineno);} %% // Local Variables: // fill-column: 100 // compile-command: "make" // End: