// // Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo // // The contents of this file are covered under the licence agreement in the // file "LICENCE" distributed with Cforall. // // parser.yy -- // // Author : Peter A. Buhr // Created On : Sat Sep 1 20:22:55 2001 // Last Modified By : Peter A. Buhr // Last Modified On : Wed Jul 4 20:56:04 2018 // Update Count : 3616 // // This grammar is based on the ANSI99/11 C grammar, specifically parts of EXPRESSION and STATEMENTS, and on the C // grammar by James A. Roskind, specifically parts of DECLARATIONS and EXTERNAL DEFINITIONS. While parts have been // copied, important changes have been made in all sections; these changes are sufficient to constitute a new grammar. // In particular, this grammar attempts to be more syntactically precise, i.e., it parses less incorrect language syntax // that must be subsequently rejected by semantic checks. Nevertheless, there are still several semantic checks // required and many are noted in the grammar. Finally, the grammar is extended with GCC and CFA language extensions. // Acknowledgments to Richard Bilson, Glen Ditchfield, and Rodolfo Gabriel Esteves who all helped when I got stuck with // the grammar. // The root language for this grammar is ANSI99/11 C. All of ANSI99/11 is parsed, except for: // // 1. designation with '=' (use ':' instead) // // Most of the syntactic extensions from ANSI90 to ANSI11 C are marked with the comment "C99/C11". This grammar also has // two levels of extensions. The first extensions cover most of the GCC C extensions, except for: // // 1. designation with and without '=' (use ':' instead) // 2. attributes not allowed in parenthesis of declarator // // All of the syntactic extensions for GCC C are marked with the comment "GCC". The second extensions are for Cforall // (CFA), which fixes several of C's outstanding problems and extends C with many modern language concepts. All of the // syntactic extensions for CFA C are marked with the comment "CFA". As noted above, there is one unreconcileable // parsing problem between C99 and CFA with respect to designators; this is discussed in detail before the "designation" // grammar rule. %{ #define YYDEBUG_LEXER_TEXT (yylval) // lexer loads this up each time #define YYDEBUG 1 // get the pretty debugging code to compile #define YYERROR_VERBOSE // more information in syntax errors #undef __GNUC_MINOR__ #include #include using namespace std; #include "ParseNode.h" #include "TypedefTable.h" #include "TypeData.h" #include "LinkageSpec.h" #include "Common/SemanticError.h" // error_str #include "Common/utility.h" // for maybeMoveBuild, maybeBuild, CodeLo... extern DeclarationNode * parseTree; extern LinkageSpec::Spec linkage; extern TypedefTable typedefTable; stack< LinkageSpec::Spec > linkageStack; bool appendStr( string & to, string & from ) { // 1. Multiple strings are concatenated into a single string but not combined internally. The reason is that // "\x12" "3" is treated as 2 characters versus 1 because "escape sequences are converted into single members of // the execution character set just prior to adjacent string literal concatenation" (C11, Section 6.4.5-8). It is // easier to let the C compiler handle this case. // // 2. String encodings are transformed into canonical form (one encoding at start) so the encoding can be found // without searching the string, e.g.: "abc" L"def" L"ghi" => L"abc" "def" "ghi". Multiple encodings must match, // i.e., u"a" U"b" L"c" is disallowed. if ( from[0] != '"' ) { // encoding ? if ( to[0] != '"' ) { // encoding ? if ( to[0] != from[0] || to[1] != from[1] ) { // different encodings ? yyerror( "non-matching string encodings for string-literal concatenation" ); return false; // parse error, must call YYERROR in action } else if ( from[1] == '8' ) { from.erase( 0, 1 ); // remove 2nd encoding } // if } else { if ( from[1] == '8' ) { // move encoding to start to = "u8" + to; from.erase( 0, 1 ); // remove 2nd encoding } else { to = from[0] + to; } // if } // if from.erase( 0, 1 ); // remove 2nd encoding } // if to += " " + from; // concatenated into single string return true; } // appendStr DeclarationNode * distAttr( DeclarationNode * specifier, DeclarationNode * declList ) { // distribute declaration_specifier across all declared variables, e.g., static, const, __attribute__. DeclarationNode * cur = declList, * cl = (new DeclarationNode)->addType( specifier ); //cur->addType( specifier ); for ( cur = dynamic_cast< DeclarationNode * >( cur->get_next() ); cur != nullptr; cur = dynamic_cast< DeclarationNode * >( cur->get_next() ) ) { cl->cloneBaseType( cur ); } // for declList->addType( cl ); // delete cl; return declList; } // distAttr void distExt( DeclarationNode * declaration ) { // distribute EXTENSION across all declarations for ( DeclarationNode *iter = declaration; iter != nullptr; iter = (DeclarationNode *)iter->get_next() ) { iter->set_extension( true ); } // for } // distExt void distQual( DeclarationNode * declaration, DeclarationNode * qualifiers ) { // distribute qualifiers across all declarations in a distribution statemement for ( DeclarationNode * iter = declaration; iter != nullptr; iter = (DeclarationNode *)iter->get_next() ) { iter->addQualifiers( qualifiers->clone() ); } // for } // distExt // There is an ambiguity for inline generic-routine return-types and generic routines. // forall( otype T ) struct S { int i; } bar( T ) {} // Does the forall bind to the struct or the routine, and how would it be possible to explicitly specify the binding. // forall( otype T ) struct S { int T; } forall( otype W ) bar( W ) {} // Currently, the forall is associated with the routine, and the generic type has to be separately defined: // forall( otype T ) struct S { int T; }; // forall( otype W ) bar( W ) {} void rebindForall( DeclarationNode * declSpec, DeclarationNode * funcDecl ) { if ( declSpec->type->kind == TypeData::Aggregate ) { // ignore aggregate definition funcDecl->type->forall = declSpec->type->aggregate.params; // move forall from aggregate to function type declSpec->type->aggregate.params = nullptr; } // if } // rebindForall NameExpr * build_postfix_name( const string * name ) { NameExpr * new_name = build_varref( new string( "?`" + *name ) ); delete name; return new_name; } // build_postfix_name bool forall = false, xxx = false, yyy = false; // aggregate have one or more forall qualifiers ? // https://www.gnu.org/software/bison/manual/bison.html#Location-Type #define YYLLOC_DEFAULT(Cur, Rhs, N) \ if ( N ) { \ (Cur).first_line = YYRHSLOC( Rhs, 1 ).first_line; \ (Cur).first_column = YYRHSLOC( Rhs, 1 ).first_column; \ (Cur).last_line = YYRHSLOC( Rhs, N ).last_line; \ (Cur).last_column = YYRHSLOC( Rhs, N ).last_column; \ (Cur).filename = YYRHSLOC( Rhs, 1 ).filename; \ } else { \ (Cur).first_line = (Cur).last_line = YYRHSLOC( Rhs, 0 ).last_line; \ (Cur).first_column = (Cur).last_column = YYRHSLOC( Rhs, 0 ).last_column; \ (Cur).filename = YYRHSLOC( Rhs, 0 ).filename; \ } %} %define parse.error verbose // Types declaration for productions %union { Token tok; ParseNode * pn; ExpressionNode * en; DeclarationNode * decl; DeclarationNode::Aggregate aggKey; DeclarationNode::TypeClass tclass; StatementNode * sn; WaitForStmt * wfs; Expression * constant; IfCtl * ifctl; ForCtl * fctl; LabelNode * label; InitializerNode * in; OperKinds op; std::string * str; bool flag; CatchStmt::Kind catch_kind; GenericExpr * genexpr; } //************************* TERMINAL TOKENS ******************************** // keywords %token TYPEDEF %token EXTERN STATIC AUTO REGISTER %token THREADLOCAL // C11 %token INLINE FORTRAN // C99, extension ISO/IEC 9899:1999 Section J.5.9(1) %token NORETURN // C11 %token CONST VOLATILE %token RESTRICT // C99 %token ATOMIC // C11 %token FORALL MUTEX VIRTUAL // CFA %token VOID CHAR SHORT INT LONG FLOAT DOUBLE SIGNED UNSIGNED %token BOOL COMPLEX IMAGINARY // C99 %token INT128 FLOAT80 FLOAT128 // GCC %token ZERO_T ONE_T // CFA %token VALIST // GCC %token TYPEOF LABEL // GCC %token ENUM STRUCT UNION %token EXCEPTION // CFA %token COROUTINE MONITOR THREAD // CFA %token OTYPE FTYPE DTYPE TTYPE TRAIT // CFA %token SIZEOF OFFSETOF %token ATTRIBUTE EXTENSION // GCC %token IF ELSE SWITCH CASE DEFAULT DO WHILE FOR BREAK CONTINUE GOTO RETURN %token CHOOSE DISABLE ENABLE FALLTHRU FALLTHROUGH TRY CATCH CATCHRESUME FINALLY THROW THROWRESUME AT WITH WHEN WAITFOR // CFA %token ASM // C99, extension ISO/IEC 9899:1999 Section J.5.10(1) %token ALIGNAS ALIGNOF GENERIC STATICASSERT // C11 // names and constants: lexer differentiates between identifier and typedef names %token IDENTIFIER QUOTED_IDENTIFIER TYPEDEFname TYPEGENname %token TIMEOUT WOR %token ATTR_IDENTIFIER ATTR_TYPEDEFname ATTR_TYPEGENname %token INTEGERconstant CHARACTERconstant STRINGliteral %token DIRECTIVE // Floating point constant is broken into three kinds of tokens because of the ambiguity with tuple indexing and // overloading constants 0/1, e.g., x.1 is lexed as (x)(.1), where (.1) is a factional constant, but is semantically // converted into the tuple index (.)(1). e.g., 3.x %token FLOATING_DECIMALconstant FLOATING_FRACTIONconstant FLOATINGconstant // multi-character operators %token ARROW // -> %token ICR DECR // ++ -- %token LS RS // << >> %token LE GE EQ NE // <= >= == != %token ANDAND OROR // && || %token ELLIPSIS // ... %token EXPassign MULTassign DIVassign MODassign // \= *= /= %= %token PLUSassign MINUSassign // += -= %token LSassign RSassign // <<= >>= %token ANDassign ERassign ORassign // &= ^= |= %token ATassign // @= %type identifier no_attr_identifier %type identifier_or_type_name no_attr_identifier_or_type_name attr_name %type quasi_keyword %type string_literal %type string_literal_list // expressions %type constant %type tuple tuple_expression_list %type ptrref_operator unary_operator assignment_operator %type primary_expression postfix_expression unary_expression %type cast_expression exponential_expression multiplicative_expression additive_expression %type shift_expression relational_expression equality_expression %type AND_expression exclusive_OR_expression inclusive_OR_expression %type logical_AND_expression logical_OR_expression %type conditional_expression constant_expression assignment_expression assignment_expression_opt %type comma_expression comma_expression_opt %type argument_expression_list argument_expression default_initialize_opt %type if_control_expression %type for_control_expression %type subrange %type asm_name_opt %type asm_operands_opt asm_operands_list asm_operand %type