[fda8168] | 1 | /* -*- Mode: C -*- |
---|
| 2 | * |
---|
| 3 | * Pretty Printer, Copyright (C) Rodolfo G. Esteves and Peter A. Buhr 2001 |
---|
| 4 | * Permission is granted to copy this grammar and to use it within software systems. |
---|
| 5 | * THIS GRAMMAR IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. |
---|
| 6 | * |
---|
| 7 | * yacc.y -- |
---|
| 8 | * |
---|
| 9 | * Author : Rodolfo G. Esteves |
---|
| 10 | * Created On : Sat Dec 15 13:44:21 2001 |
---|
| 11 | * Last Modified By : Peter A. Buhr |
---|
| 12 | * Last Modified On : Mon Jun 27 21:51:06 2016 |
---|
| 13 | * Update Count : 1028 |
---|
| 14 | */ |
---|
| 15 | |
---|
| 16 | %{ |
---|
| 17 | #include <stdio.h> |
---|
| 18 | #include "parse.h" |
---|
| 19 | #include "filter.h" |
---|
| 20 | |
---|
| 21 | #define YYDEBUG 1 // get the pretty debugging code to compile |
---|
| 22 | |
---|
| 23 | extern list<string> ws_list; // lex variable containing accumulated whitespace |
---|
| 24 | void lexC( void ); |
---|
| 25 | string lexYacc( void ); |
---|
| 26 | |
---|
| 27 | void yyerror( char *s ) { |
---|
| 28 | extern int yylineno; |
---|
| 29 | |
---|
| 30 | cerr << "Error in line: " << yylineno << ": " << s << endl; |
---|
| 31 | return; |
---|
| 32 | } |
---|
| 33 | |
---|
| 34 | Token *declstart; |
---|
| 35 | Token *rulestart; |
---|
| 36 | Token *nameliststart; |
---|
| 37 | %} |
---|
| 38 | |
---|
| 39 | %union { |
---|
| 40 | Token *tokenp; |
---|
| 41 | } |
---|
| 42 | |
---|
| 43 | %token<tokenp> ',' |
---|
| 44 | %token<tokenp> '<' |
---|
| 45 | %token<tokenp> '>' |
---|
| 46 | %token<tokenp> '{' |
---|
| 47 | %token<tokenp> '}' |
---|
| 48 | %token<tokenp> ':' |
---|
| 49 | %token<tokenp> ';' |
---|
| 50 | %token<tokenp> '|' |
---|
| 51 | |
---|
| 52 | %token<tokenp> MARK // %% |
---|
| 53 | %token<tokenp> LCURL // %{ |
---|
| 54 | %token<tokenp> RCURL // %} |
---|
| 55 | |
---|
| 56 | %token<tokenp> INTEGER // integer constant |
---|
| 57 | %token<tokenp> CHARACTER // character constant |
---|
| 58 | %token<tokenp> IDENTIFIER // identifier |
---|
| 59 | %token<tokenp> CODE // C code |
---|
| 60 | |
---|
| 61 | %token<tokenp> START // %start |
---|
| 62 | %token<tokenp> UNION // %union |
---|
| 63 | %token<tokenp> TOKEN // %token |
---|
| 64 | %token<tokenp> LEFT // %left |
---|
| 65 | %token<tokenp> RIGHT // %right |
---|
| 66 | %token<tokenp> NONASSOC // %nonassoc |
---|
| 67 | %token<tokenp> TYPE // %type |
---|
| 68 | %token<tokenp> PURE_PARSER // %pure_parser |
---|
| 69 | %token<tokenp> SEMANTIC_PARSER // %semantic_parser |
---|
| 70 | %token<tokenp> EXPECT // %expect |
---|
| 71 | %token<tokenp> THONG // %thong |
---|
| 72 | |
---|
| 73 | %token<tokenp> PREC // %prec |
---|
| 74 | |
---|
| 75 | %token END_TERMINALS // ALL TERMINAL TOKEN NAMES MUST APPEAR BEFORE THIS |
---|
| 76 | |
---|
| 77 | %type<tokenp> sections |
---|
| 78 | %token _SECTIONS |
---|
| 79 | %type<tokenp> mark |
---|
| 80 | %type<tokenp> defsection_opt |
---|
| 81 | %token _DEFSECTION_OPT |
---|
| 82 | %type<tokenp> declarations |
---|
| 83 | %type<tokenp> literalblock |
---|
| 84 | %token _LITERALBLOCK |
---|
| 85 | %type<tokenp> declaration |
---|
| 86 | %token _DECLARATION |
---|
| 87 | %type<tokenp> union |
---|
| 88 | %type<tokenp> rword |
---|
| 89 | %type<tokenp> tag_opt |
---|
| 90 | %token _TAG_OPT |
---|
| 91 | %type<tokenp> namenolist |
---|
| 92 | %token _NAMENOLIST |
---|
| 93 | %type<tokenp> nameno |
---|
| 94 | %token _NAMENO |
---|
| 95 | %type<tokenp> namelist |
---|
| 96 | %token _NAMELIST |
---|
| 97 | %type<tokenp> name |
---|
| 98 | %type<tokenp> rulesection |
---|
| 99 | %token _RULESECTION |
---|
| 100 | %type<tokenp> rules |
---|
| 101 | %token _RULE |
---|
| 102 | %type<tokenp> lhs |
---|
| 103 | %token _LHS |
---|
| 104 | %type<tokenp> rhs |
---|
| 105 | %token _RHS |
---|
| 106 | %type<tokenp> prod |
---|
| 107 | %type<tokenp> prec |
---|
| 108 | %token _PREC |
---|
| 109 | %type<tokenp> action |
---|
| 110 | %token _ACTION |
---|
| 111 | %type<tokenp> usersection_opt |
---|
| 112 | %token _USERSECTION_OPT |
---|
| 113 | %type<tokenp> ccode_opt |
---|
| 114 | %type<tokenp> blocks |
---|
| 115 | |
---|
| 116 | %start grammar |
---|
| 117 | |
---|
| 118 | %% |
---|
| 119 | grammar : sections |
---|
| 120 | { |
---|
| 121 | filter( $1 ); /* filter parse tree */ |
---|
| 122 | freeTree( $1 ); /* free parse-tree storage (optional: used with purify) */ |
---|
| 123 | } |
---|
| 124 | ; |
---|
| 125 | |
---|
| 126 | sections : defsection_opt mark rulesection usersection_opt |
---|
| 127 | { |
---|
| 128 | $$ = new Token( "sections", _SECTIONS ); |
---|
| 129 | $1->left = $2; |
---|
| 130 | $2->left = $3; |
---|
| 131 | $3->left = $4; |
---|
| 132 | $$->down = $1; |
---|
| 133 | } |
---|
| 134 | ; |
---|
| 135 | |
---|
| 136 | mark : MARK |
---|
| 137 | | error /* missing %% */ |
---|
| 138 | { |
---|
| 139 | cerr << "no input grammar, missing %% mark" << endl; |
---|
| 140 | exit( -1 ); |
---|
| 141 | } |
---|
| 142 | ; |
---|
| 143 | |
---|
| 144 | defsection_opt : /* empty */ |
---|
| 145 | { |
---|
| 146 | //cerr << "defsection_opt1: " << endl; |
---|
| 147 | $$ = new Token( "declaration_opt", _DEFSECTION_OPT ); |
---|
| 148 | } |
---|
| 149 | | declarations |
---|
| 150 | { |
---|
| 151 | //cerr << "defsection_opt2: " << $1->text << "(" << $1 << ")" << endl; |
---|
| 152 | $$ = new Token( "declaration_opt", _DEFSECTION_OPT ); |
---|
| 153 | $$->down = declstart; |
---|
| 154 | } |
---|
| 155 | ; |
---|
| 156 | |
---|
| 157 | declarations : literalblock |
---|
| 158 | { |
---|
| 159 | //cerr << "declarations1: " << $1->text << "(" << $1 << ")" << endl; |
---|
| 160 | $$ = declstart = $1; |
---|
| 161 | } |
---|
| 162 | | declaration |
---|
| 163 | { |
---|
| 164 | //cerr << "declarations2: " << $1->text << "(" << $1 << ")" << endl; |
---|
| 165 | $$ = declstart = new Token( "declaration", _DECLARATION ); |
---|
| 166 | $$->down = $1; |
---|
| 167 | } |
---|
| 168 | | declarations literalblock |
---|
| 169 | { |
---|
| 170 | //cerr << "declarations3: "<< $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
| 171 | $1->left = $2; |
---|
| 172 | $$ = $2; |
---|
| 173 | } |
---|
| 174 | | declarations declaration |
---|
| 175 | { |
---|
| 176 | //cerr << "declarations4: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
| 177 | $$ = new Token( "declaration", _DECLARATION ); |
---|
| 178 | $1->left = $$; |
---|
| 179 | $$->down = $2; |
---|
| 180 | } |
---|
| 181 | ; |
---|
| 182 | |
---|
| 183 | literalblock : LCURL |
---|
| 184 | { lexC(); } |
---|
| 185 | ccode_opt |
---|
| 186 | { $<tokenp>$ = new Token( lexYacc(), CODE ); } |
---|
| 187 | RCURL |
---|
| 188 | { |
---|
| 189 | //cerr << "literalblock: " << $1->text << "(" << $1 << ") " << $<tokenp>4->text << " " << $5->text << "(" << $5 << ")" << endl; |
---|
| 190 | $1->left = $<tokenp>4; |
---|
| 191 | $<tokenp>4->left = $5; |
---|
| 192 | $$ = new Token( "literalblock", _LITERALBLOCK ); |
---|
| 193 | $$->down = $1; |
---|
| 194 | } |
---|
| 195 | ; |
---|
| 196 | |
---|
| 197 | declaration : union |
---|
| 198 | | START IDENTIFIER |
---|
| 199 | { |
---|
| 200 | $1->left = $2; |
---|
| 201 | $$ = $1; |
---|
| 202 | } |
---|
| 203 | | rword tag_opt namenolist |
---|
| 204 | { |
---|
| 205 | Token *n = new Token( "namenolist", _NAMENOLIST ); |
---|
| 206 | n->down = nameliststart; |
---|
| 207 | $1->left = $2; |
---|
| 208 | $2->left = n; |
---|
| 209 | $$ = $1; |
---|
| 210 | } |
---|
| 211 | | TYPE tag_opt namelist |
---|
| 212 | { |
---|
| 213 | Token *n = new Token( "namelist", _NAMELIST ); |
---|
| 214 | n->down = nameliststart; |
---|
| 215 | $1->left = $2; |
---|
| 216 | $2->left = n; |
---|
| 217 | $$ = $1; |
---|
| 218 | } |
---|
| 219 | | PURE_PARSER |
---|
| 220 | | SEMANTIC_PARSER |
---|
| 221 | | EXPECT INTEGER /* bison */ |
---|
| 222 | { |
---|
| 223 | $1->left = $2; |
---|
| 224 | $$ = $1; |
---|
| 225 | } |
---|
| 226 | | THONG /* bison */ |
---|
| 227 | ; |
---|
| 228 | |
---|
| 229 | union : UNION |
---|
| 230 | '{' |
---|
| 231 | { lexC(); } |
---|
| 232 | ccode_opt |
---|
| 233 | { |
---|
| 234 | // Remove the trailing '}' which is added in lex. |
---|
| 235 | string temp( lexYacc() ); |
---|
| 236 | $<tokenp>$ = new Token( temp.substr( 0, temp.length() - 1 ), CODE ); |
---|
| 237 | } |
---|
| 238 | '}' |
---|
| 239 | { |
---|
| 240 | $1->left = $2; |
---|
| 241 | $2->left = $<tokenp>5; |
---|
| 242 | $<tokenp>5->left = $6; |
---|
| 243 | $$ = $1; |
---|
| 244 | } |
---|
| 245 | ; |
---|
| 246 | |
---|
| 247 | rword : TOKEN |
---|
| 248 | | LEFT |
---|
| 249 | | RIGHT |
---|
| 250 | | NONASSOC |
---|
| 251 | ; |
---|
| 252 | |
---|
| 253 | tag_opt : /* empty */ |
---|
| 254 | { |
---|
| 255 | //cerr << "tag_opt" << endl; |
---|
| 256 | $$ = new Token( "tag_opt", _TAG_OPT ); |
---|
| 257 | } |
---|
| 258 | | '<' IDENTIFIER '>' |
---|
| 259 | { |
---|
| 260 | $1->left = $2; |
---|
| 261 | $2->left = $3; |
---|
| 262 | $$ = new Token( "tag_opt", _TAG_OPT ); |
---|
| 263 | $$->down = $1; |
---|
| 264 | } |
---|
| 265 | ; |
---|
| 266 | |
---|
| 267 | namenolist : nameno |
---|
| 268 | { |
---|
| 269 | //cerr << "namenolist1: " << $1->text << "(" << $1 << ")" << endl; |
---|
| 270 | $$ = nameliststart = $1; |
---|
| 271 | } |
---|
| 272 | | namenolist nameno |
---|
| 273 | { |
---|
| 274 | //cerr << "namenolist2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
| 275 | $1->left = $2; |
---|
| 276 | $$ = $2; |
---|
| 277 | } |
---|
| 278 | | namenolist ',' nameno |
---|
| 279 | { |
---|
| 280 | //cerr << "namenolist3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl; |
---|
| 281 | $1->left = $2; |
---|
| 282 | $2->left = $3; |
---|
| 283 | $$ = $3; |
---|
| 284 | } |
---|
| 285 | ; |
---|
| 286 | |
---|
| 287 | nameno : name |
---|
| 288 | { |
---|
| 289 | $$ = new Token( "nameno", _NAMENO ); |
---|
| 290 | $$->down = $1; |
---|
| 291 | } |
---|
| 292 | | name INTEGER |
---|
| 293 | { |
---|
| 294 | $$ = new Token( "nameno", _NAMENO ); |
---|
| 295 | $1->left = $2; |
---|
| 296 | $$->down = $1; |
---|
| 297 | } |
---|
| 298 | ; |
---|
| 299 | |
---|
| 300 | namelist : name |
---|
| 301 | { |
---|
| 302 | //cerr << "namelist1: " << $1->text << "(" << $1 << ")" << endl; |
---|
| 303 | $$ = nameliststart = $1; |
---|
| 304 | } |
---|
| 305 | | namelist name |
---|
| 306 | { |
---|
| 307 | //cerr << "namelist2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
| 308 | $1->left = $2; |
---|
| 309 | $$ = $2; |
---|
| 310 | } |
---|
| 311 | | namelist ',' name |
---|
| 312 | { |
---|
| 313 | //cerr << "namelist3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl; |
---|
| 314 | $1->left = $2; |
---|
| 315 | $2->left = $3; |
---|
| 316 | $$ = $3; |
---|
| 317 | } |
---|
| 318 | ; |
---|
| 319 | |
---|
| 320 | name : IDENTIFIER |
---|
| 321 | | CHARACTER |
---|
| 322 | ; |
---|
| 323 | |
---|
| 324 | rulesection : rules |
---|
| 325 | { |
---|
| 326 | //cerr << "rulesection1: " << $1->text << "(" << $1 << ")" << endl; |
---|
| 327 | $$ = new Token( "rulesection", _RULESECTION ); |
---|
| 328 | $$->down = $1; |
---|
| 329 | } |
---|
| 330 | | error /* no rules */ |
---|
| 331 | { |
---|
| 332 | cerr << "no rules in the input grammar" << endl; |
---|
| 333 | exit( -1 ); |
---|
| 334 | } |
---|
| 335 | ; |
---|
| 336 | |
---|
| 337 | // These grammar rules are complex because the Yacc language is LR(2) due to the optional ';' at the end of |
---|
| 338 | // rules. The following rules convert the LR(2) grammar into LR(1) by lengthening the rules to allow |
---|
| 339 | // sufficient look ahead. Unfortunately, this change makes handling the semantic actions more complex because |
---|
| 340 | // there are two lists (rules, rhs) being built but only one list tail can be returned through $$ for |
---|
| 341 | // chaining. |
---|
| 342 | |
---|
| 343 | rules : lhs rhs |
---|
| 344 | { |
---|
| 345 | //cerr << "rules1: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
| 346 | $$ = rulestart; |
---|
| 347 | } |
---|
| 348 | | lhs rhs ';' |
---|
| 349 | { |
---|
| 350 | //cerr << "rules2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl; |
---|
| 351 | $2->addDownLeftTail( $3 ); |
---|
| 352 | $$ = rulestart; |
---|
| 353 | } |
---|
| 354 | ; |
---|
| 355 | |
---|
| 356 | lhs : IDENTIFIER ':' |
---|
| 357 | { |
---|
| 358 | //cerr << "lhs: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
| 359 | $$ = new Token( "lhs", _LHS ); |
---|
| 360 | //cerr << " lhs: " << $$->text << "(" << $$ << ")" << endl; |
---|
| 361 | $1->left = $2; |
---|
| 362 | $$->down = $1; |
---|
| 363 | } |
---|
| 364 | ; |
---|
| 365 | |
---|
| 366 | rhs : /* empty */ |
---|
| 367 | { |
---|
| 368 | //cerr << "rhs1: " << $<tokenp>0->text << "(" << $<tokenp>0 << ")" << endl; |
---|
| 369 | rulestart = new Token( "rule", _RULE ); |
---|
| 370 | rulestart->down = $<tokenp>0; // initial lhs is already on the stack from "rules" |
---|
| 371 | $$ = new Token( "rhs", _RHS ); |
---|
| 372 | //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl; |
---|
| 373 | $<tokenp>0->left = $$; |
---|
| 374 | } |
---|
| 375 | | rhs lhs |
---|
| 376 | { |
---|
| 377 | //cerr << "rhs2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
| 378 | Token *temp = new Token( "rule", _RULE ); |
---|
| 379 | rulestart->addLeftTail( temp ); |
---|
| 380 | temp->down = $2; |
---|
| 381 | $$ = new Token( "rhs", _RHS ); |
---|
| 382 | //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl; |
---|
| 383 | $2->left = $$; |
---|
| 384 | } |
---|
| 385 | | rhs ';' lhs |
---|
| 386 | { |
---|
| 387 | //cerr << "rhs3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl; |
---|
| 388 | $1->addDownLeftTail( $2 ); |
---|
| 389 | Token *temp = new Token( "rule", _RULE ); |
---|
| 390 | rulestart->addLeftTail( temp ); |
---|
| 391 | temp->down = $3; |
---|
| 392 | $$ = new Token( "rhs", _RHS ); |
---|
| 393 | //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl; |
---|
| 394 | $3->left = $$; |
---|
| 395 | } |
---|
| 396 | | rhs prod |
---|
| 397 | { |
---|
| 398 | //cerr << "rhs4: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
| 399 | $1->addDownLeftTail( $2 ); |
---|
| 400 | $$ = $1; |
---|
| 401 | } |
---|
| 402 | | rhs '|' |
---|
| 403 | { |
---|
| 404 | //cerr << "rhs5: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
| 405 | $1->addDownLeftTail( $2 ); |
---|
| 406 | $$ = new Token( "rhs", _RHS ); |
---|
| 407 | $1->left = $$; |
---|
| 408 | //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl; |
---|
| 409 | } |
---|
| 410 | ; |
---|
| 411 | |
---|
| 412 | prod : action |
---|
| 413 | | IDENTIFIER |
---|
| 414 | | CHARACTER |
---|
| 415 | | prec |
---|
| 416 | ; |
---|
| 417 | |
---|
| 418 | prec : PREC name |
---|
| 419 | { |
---|
| 420 | //cerr << "prec: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
| 421 | $1->left = $2; |
---|
| 422 | $$ = new Token( "prec", _PREC ); |
---|
| 423 | $$->down = $1; |
---|
| 424 | } |
---|
| 425 | ; |
---|
| 426 | |
---|
| 427 | action : '{' |
---|
| 428 | { lexC(); } |
---|
| 429 | ccode_opt |
---|
| 430 | { |
---|
| 431 | // Remove the trailing '}' added in lex. |
---|
| 432 | string temp( lexYacc() ); |
---|
| 433 | $<tokenp>$ = new Token( temp.substr( 0, temp.length() - 1 ), CODE ); |
---|
| 434 | } |
---|
| 435 | '}' |
---|
| 436 | { |
---|
| 437 | $1->left = $<tokenp>4; |
---|
| 438 | $<tokenp>4->left = $5; |
---|
| 439 | $$ = new Token( "action", _ACTION ); |
---|
| 440 | $$->down = $1; |
---|
| 441 | } |
---|
| 442 | ; |
---|
| 443 | |
---|
| 444 | usersection_opt : /* empty */ |
---|
| 445 | { |
---|
| 446 | //cerr << "usersection_opt" << endl; |
---|
| 447 | // attach remaining WS to fictitious code |
---|
| 448 | Token *temp = new Token( "", ws_list, CODE ); |
---|
| 449 | $$ = new Token( "usersection_opt", _USERSECTION_OPT ); |
---|
| 450 | $$->down = temp; |
---|
| 451 | } |
---|
| 452 | | MARK |
---|
| 453 | { lexC(); } |
---|
| 454 | ccode_opt |
---|
| 455 | { |
---|
| 456 | Token *temp = new Token( lexYacc(), CODE ); |
---|
| 457 | //cerr << "usersection_opt: " << $1->text << " " << temp->text << endl; |
---|
| 458 | $1->left = temp; |
---|
| 459 | $$ = new Token( "usersection_opt", _USERSECTION_OPT ); |
---|
| 460 | $$->down = $1; |
---|
| 461 | } |
---|
| 462 | ; |
---|
| 463 | |
---|
| 464 | ccode_opt : /* empty */ {} |
---|
| 465 | | blocks |
---|
| 466 | ; |
---|
| 467 | |
---|
| 468 | // This rule matches internal braces "{}" in C code to the level of the braces of a union/action. These |
---|
| 469 | // internal braces are returned as Tokens from the lexer but are unused because the braces are already |
---|
| 470 | // concatenated into the code string built by the lexer. Therefore, the tokens for the braces are immediately |
---|
| 471 | // deleted. |
---|
| 472 | |
---|
| 473 | blocks : '{' { delete $1; } ccode_opt '}' { delete $4; } |
---|
| 474 | | blocks '{' { delete $2; } ccode_opt '}' { delete $5; } |
---|
| 475 | ; |
---|
| 476 | %% |
---|
| 477 | |
---|
| 478 | /* Local Variables: */ |
---|
| 479 | /* fill-column: 110 */ |
---|
| 480 | /* compile-command: "gmake" */ |
---|
| 481 | /* End: */ |
---|