// // Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo // // The contents of this file are covered under the licence agreement in the // file "LICENCE" distributed with Cforall. // // parser.yy -- // // Author : Peter A. Buhr // Created On : Sat Sep 1 20:22:55 2001 // Last Modified By : Peter A. Buhr // Last Modified On : Sun Mar 28 18:53:34 2021 // Update Count : 4940 // // This grammar is based on the ANSI99/11 C grammar, specifically parts of EXPRESSION and STATEMENTS, and on the C // grammar by James A. Roskind, specifically parts of DECLARATIONS and EXTERNAL DEFINITIONS. While parts have been // copied, important changes have been made in all sections; these changes are sufficient to constitute a new grammar. // In particular, this grammar attempts to be more syntactically precise, i.e., it parses less incorrect language syntax // that must be subsequently rejected by semantic checks. Nevertheless, there are still several semantic checks // required and many are noted in the grammar. Finally, the grammar is extended with GCC and CFA language extensions. // Acknowledgments to Richard Bilson, Glen Ditchfield, and Rodolfo Gabriel Esteves who all helped when I got stuck with // the grammar. // The root language for this grammar is ANSI99/11 C. All of ANSI99/11 is parsed, except for: // // 1. designation with '=' (use ':' instead) // // Most of the syntactic extensions from ANSI90 to ANSI11 C are marked with the comment "C99/C11". This grammar also has // two levels of extensions. The first extensions cover most of the GCC C extensions, except for: // // 1. designation with and without '=' (use ':' instead) // // All of the syntactic extensions for GCC C are marked with the comment "GCC". The second extensions are for Cforall // (CFA), which fixes several of C's outstanding problems and extends C with many modern language concepts. All of the // syntactic extensions for CFA C are marked with the comment "CFA". As noted above, there is one unreconcileable // parsing problem between C99 and CFA with respect to designators; this is discussed in detail before the "designation" // grammar rule. %{ #define YYDEBUG_LEXER_TEXT( yylval ) // lexer loads this up each time #define YYDEBUG 1 // get the pretty debugging code to compile #define YYERROR_VERBOSE // more information in syntax errors #undef __GNUC_MINOR__ #include #include using namespace std; #include "SynTree/Declaration.h" #include "ParseNode.h" #include "TypedefTable.h" #include "TypeData.h" #include "SynTree/LinkageSpec.h" #include "Common/SemanticError.h" // error_str #include "Common/utility.h" // for maybeMoveBuild, maybeBuild, CodeLo... extern DeclarationNode * parseTree; extern LinkageSpec::Spec linkage; extern TypedefTable typedefTable; stack linkageStack; bool appendStr( string & to, string & from ) { // 1. Multiple strings are concatenated into a single string but not combined internally. The reason is that // "\x12" "3" is treated as 2 characters versus 1 because "escape sequences are converted into single members of // the execution character set just prior to adjacent string literal concatenation" (C11, Section 6.4.5-8). It is // easier to let the C compiler handle this case. // // 2. String encodings are transformed into canonical form (one encoding at start) so the encoding can be found // without searching the string, e.g.: "abc" L"def" L"ghi" => L"abc" "def" "ghi". Multiple encodings must match, // i.e., u"a" U"b" L"c" is disallowed. if ( from[0] != '"' ) { // encoding ? if ( to[0] != '"' ) { // encoding ? if ( to[0] != from[0] || to[1] != from[1] ) { // different encodings ? yyerror( "non-matching string encodings for string-literal concatenation" ); return false; // parse error, must call YYERROR in action } else if ( from[1] == '8' ) { from.erase( 0, 1 ); // remove 2nd encoding } // if } else { if ( from[1] == '8' ) { // move encoding to start to = "u8" + to; from.erase( 0, 1 ); // remove 2nd encoding } else { to = from[0] + to; } // if } // if from.erase( 0, 1 ); // remove 2nd encoding } // if to += " " + from; // concatenated into single string return true; } // appendStr DeclarationNode * distAttr( DeclarationNode * specifier, DeclarationNode * declList ) { // distribute declaration_specifier across all declared variables, e.g., static, const, __attribute__. DeclarationNode * cur = declList, * cl = (new DeclarationNode)->addType( specifier ); for ( cur = dynamic_cast( cur->get_next() ); cur != nullptr; cur = dynamic_cast( cur->get_next() ) ) { cl->cloneBaseType( cur ); } // for declList->addType( cl ); return declList; } // distAttr void distExt( DeclarationNode * declaration ) { // distribute EXTENSION across all declarations for ( DeclarationNode *iter = declaration; iter != nullptr; iter = (DeclarationNode *)iter->get_next() ) { iter->set_extension( true ); } // for } // distExt void distInl( DeclarationNode * declaration ) { // distribute EXTENSION across all declarations for ( DeclarationNode *iter = declaration; iter != nullptr; iter = (DeclarationNode *)iter->get_next() ) { iter->set_inLine( true ); } // for } // distInl void distQual( DeclarationNode * declaration, DeclarationNode * qualifiers ) { // distribute qualifiers across all non-variable declarations in a distribution statemement for ( DeclarationNode * iter = declaration; iter != nullptr; iter = (DeclarationNode *)iter->get_next() ) { // SKULLDUGGERY: Distributions are parsed inside out, so qualifiers are added to declarations inside out. Since // addQualifiers appends to the back of the list, the forall clauses are in the wrong order (right to left). To // get the qualifiers in the correct order and still use addQualifiers (otherwise, 90% of addQualifiers has to // be copied to add to front), the appropriate forall pointers are interchanged before calling addQualifiers. DeclarationNode * clone = qualifiers->clone(); if ( qualifiers->type ) { // forall clause ? (handles SC) if ( iter->type->kind == TypeData::Aggregate ) { // struct/union ? swap( clone->type->forall, iter->type->aggregate.params ); iter->addQualifiers( clone ); } else if ( iter->type->kind == TypeData::AggregateInst && iter->type->aggInst.aggregate->aggregate.body ) { // struct/union ? // Create temporary node to hold aggregate, call addQualifiers as above, then put nodes back together. DeclarationNode newnode; swap( newnode.type, iter->type->aggInst.aggregate ); swap( clone->type->forall, newnode.type->aggregate.params ); newnode.addQualifiers( clone ); swap( newnode.type, iter->type->aggInst.aggregate ); } else if ( iter->type->kind == TypeData::Function ) { // routines ? swap( clone->type->forall, iter->type->forall ); iter->addQualifiers( clone ); } // if } else { // just SC qualifiers iter->addQualifiers( clone ); } // if } // for delete qualifiers; } // distQual // There is an ambiguity for inline generic-routine return-types and generic routines. // forall( otype T ) struct S { int i; } bar( T ) {} // Does the forall bind to the struct or the routine, and how would it be possible to explicitly specify the binding. // forall( otype T ) struct S { int T; } forall( otype W ) bar( W ) {} // Currently, the forall is associated with the routine, and the generic type has to be separately defined: // forall( otype T ) struct S { int T; }; // forall( otype W ) bar( W ) {} void rebindForall( DeclarationNode * declSpec, DeclarationNode * funcDecl ) { if ( declSpec->type->kind == TypeData::Aggregate ) { // ignore aggregate definition funcDecl->type->forall = declSpec->type->aggregate.params; // move forall from aggregate to function type declSpec->type->aggregate.params = nullptr; } // if } // rebindForall string * build_postfix_name( string * name ) { *name = string("__postfix_func_") + *name; return name; } // build_postfix_name DeclarationNode * fieldDecl( DeclarationNode * typeSpec, DeclarationNode * fieldList ) { if ( ! fieldList ) { // field declarator ? if ( ! ( typeSpec->type && (typeSpec->type->kind == TypeData::Aggregate || typeSpec->type->kind == TypeData::Enum) ) ) { stringstream ss; typeSpec->type->print( ss ); SemanticWarning( yylloc, Warning::SuperfluousDecl, ss.str().c_str() ); return nullptr; } // if fieldList = DeclarationNode::newName( nullptr ); } // if return distAttr( typeSpec, fieldList ); // mark all fields in list } // fieldDecl ForCtrl * forCtrl( ExpressionNode * type, string * index, ExpressionNode * start, enum OperKinds compop, ExpressionNode * comp, ExpressionNode * inc ) { ConstantExpr * constant = dynamic_cast(type->expr.get()); if ( constant && (constant->get_constant()->get_value() == "0" || constant->get_constant()->get_value() == "1") ) { type = new ExpressionNode( new CastExpr( maybeMoveBuild(type), new BasicType( Type::Qualifiers(), BasicType::SignedInt ) ) ); } // if return new ForCtrl( distAttr( DeclarationNode::newTypeof( type, true ), DeclarationNode::newName( index )->addInitializer( new InitializerNode( start ) ) ), // NULL comp/inc => leave blank comp ? new ExpressionNode( build_binary_val( compop, new ExpressionNode( build_varref( new string( *index ) ) ), comp ) ) : 0, inc ? new ExpressionNode( build_binary_val( compop == OperKinds::LThan || compop == OperKinds::LEThan ? // choose += or -= for upto/downto OperKinds::PlusAssn : OperKinds::MinusAssn, new ExpressionNode( build_varref( new string( *index ) ) ), inc ) ) : 0 ); } // forCtrl ForCtrl * forCtrl( ExpressionNode * type, ExpressionNode * index, ExpressionNode * start, enum OperKinds compop, ExpressionNode * comp, ExpressionNode * inc ) { if ( NameExpr * identifier = dynamic_cast(index->expr.get()) ) { return forCtrl( type, new string( identifier->name ), start, compop, comp, inc ); } else if ( CommaExpr * commaExpr = dynamic_cast(index->expr.get()) ) { if ( NameExpr * identifier = dynamic_cast(commaExpr->arg1 ) ) { return forCtrl( type, new string( identifier->name ), start, compop, comp, inc ); } else { SemanticError( yylloc, "Expression disallowed. Only loop-index name allowed." ); return nullptr; } // if } else { SemanticError( yylloc, "Expression disallowed. Only loop-index name allowed." ); return nullptr; } // if } // forCtrl bool forall = false, yyy = false; // aggregate have one or more forall qualifiers ? // https://www.gnu.org/software/bison/manual/bison.html#Location-Type #define YYLLOC_DEFAULT(Cur, Rhs, N) \ if ( N ) { \ (Cur).first_line = YYRHSLOC( Rhs, 1 ).first_line; \ (Cur).first_column = YYRHSLOC( Rhs, 1 ).first_column; \ (Cur).last_line = YYRHSLOC( Rhs, N ).last_line; \ (Cur).last_column = YYRHSLOC( Rhs, N ).last_column; \ (Cur).filename = YYRHSLOC( Rhs, 1 ).filename; \ } else { \ (Cur).first_line = (Cur).last_line = YYRHSLOC( Rhs, 0 ).last_line; \ (Cur).first_column = (Cur).last_column = YYRHSLOC( Rhs, 0 ).last_column; \ (Cur).filename = YYRHSLOC( Rhs, 0 ).filename; \ } %} %define parse.error verbose // Types declaration for productions %union { Token tok; ParseNode * pn; ExpressionNode * en; DeclarationNode * decl; AggregateDecl::Aggregate aggKey; TypeDecl::Kind tclass; StatementNode * sn; WaitForStmt * wfs; Expression * constant; IfCtrl * ifctl; ForCtrl * fctl; enum OperKinds compop; LabelNode * label; InitializerNode * in; OperKinds op; std::string * str; bool flag; CatchStmt::Kind catch_kind; GenericExpr * genexpr; } //************************* TERMINAL TOKENS ******************************** // keywords %token TYPEDEF %token EXTERN STATIC AUTO REGISTER %token THREADLOCAL // C11 %token INLINE FORTRAN // C99, extension ISO/IEC 9899:1999 Section J.5.9(1) %token NORETURN // C11 %token CONST VOLATILE %token RESTRICT // C99 %token ATOMIC // C11 %token FORALL MUTEX VIRTUAL COERCE // CFA %token VOID CHAR SHORT INT LONG FLOAT DOUBLE SIGNED UNSIGNED %token BOOL COMPLEX IMAGINARY // C99 %token INT128 UINT128 uuFLOAT80 uuFLOAT128 // GCC %token uFLOAT16 uFLOAT32 uFLOAT32X uFLOAT64 uFLOAT64X uFLOAT128 // GCC %token ZERO_T ONE_T // CFA %token VALIST // GCC %token AUTO_TYPE // GCC %token TYPEOF BASETYPEOF LABEL // GCC %token ENUM STRUCT UNION %token EXCEPTION // CFA %token GENERATOR COROUTINE MONITOR THREAD // CFA %token OTYPE FTYPE DTYPE TTYPE TRAIT // CFA %token SIZEOF OFFSETOF // %token RESUME // CFA %token SUSPEND // CFA %token ATTRIBUTE EXTENSION // GCC %token IF ELSE SWITCH CASE DEFAULT DO WHILE FOR BREAK CONTINUE GOTO RETURN %token CHOOSE DISABLE ENABLE FALLTHRU FALLTHROUGH TRY THROW THROWRESUME AT WITH WHEN WAITFOR // CFA %token ASM // C99, extension ISO/IEC 9899:1999 Section J.5.10(1) %token ALIGNAS ALIGNOF GENERIC STATICASSERT // C11 // names and constants: lexer differentiates between identifier and typedef names %token IDENTIFIER QUOTED_IDENTIFIER TYPEDEFname TYPEGENname %token TIMEOUT WOR CATCH RECOVER CATCHRESUME FIXUP FINALLY // CFA %token INTEGERconstant CHARACTERconstant STRINGliteral %token DIRECTIVE // Floating point constant is broken into three kinds of tokens because of the ambiguity with tuple indexing and // overloading constants 0/1, e.g., x.1 is lexed as (x)(.1), where (.1) is a factional constant, but is semantically // converted into the tuple index (.)(1). e.g., 3.x %token FLOATING_DECIMALconstant FLOATING_FRACTIONconstant FLOATINGconstant // multi-character operators %token ARROW // -> %token ICR DECR // ++ -- %token LS RS // << >> %token LE GE EQ NE // <= >= == != %token ANDAND OROR // && || %token ELLIPSIS // ... %token EXPassign MULTassign DIVassign MODassign // \= *= /= %= %token PLUSassign MINUSassign // += -= %token LSassign RSassign // <<= >>= %token ANDassign ERassign ORassign // &= ^= |= %token ErangeUpEq ErangeDown ErangeDownEq // ~= -~ -~= %token ATassign // @= %type identifier %type identifier_or_type_name attr_name %type quasi_keyword %type string_literal %type string_literal_list // expressions %type constant %type tuple tuple_expression_list %type ptrref_operator unary_operator assignment_operator simple_assignment_operator compound_assignment_operator %type primary_expression postfix_expression unary_expression %type cast_expression_list cast_expression exponential_expression multiplicative_expression additive_expression %type shift_expression relational_expression equality_expression %type AND_expression exclusive_OR_expression inclusive_OR_expression %type logical_AND_expression logical_OR_expression %type conditional_expression constant_expression assignment_expression assignment_expression_opt %type comma_expression comma_expression_opt %type argument_expression_list_opt argument_expression default_initializer_opt %type if_control_expression %type for_control_expression for_control_expression_list %type inclexcl %type subrange %type asm_name_opt %type asm_operands_opt asm_operands_list asm_operand %type