1 | // |
---|
2 | // Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo |
---|
3 | // |
---|
4 | // The contents of this file are covered under the licence agreement in the |
---|
5 | // file "LICENCE" distributed with Cforall. |
---|
6 | // |
---|
7 | // parser.yy -- |
---|
8 | // |
---|
9 | // Author : Rodolfo G. Esteves |
---|
10 | // Created On : Sat Dec 15 13:44:21 2001 |
---|
11 | // Last Modified By : Peter A. Buhr |
---|
12 | // Last Modified On : Tue Jan 26 22:50:03 2021 |
---|
13 | // Update Count : 1053 |
---|
14 | // |
---|
15 | |
---|
16 | %{ |
---|
17 | #define YYDEBUG_LEXER_TEXT( yylval ) // lexer loads this up each time |
---|
18 | #define YYDEBUG 1 // get the pretty debugging code to compile |
---|
19 | #define YYERROR_VERBOSE // more information in syntax errors |
---|
20 | |
---|
21 | #include <iostream> |
---|
22 | using namespace std; |
---|
23 | #include "ParserTypes.h" |
---|
24 | #include "filter.h" |
---|
25 | |
---|
26 | extern list<string> ws_list; // lex variable containing accumulated whitespace |
---|
27 | void lexC( void ); |
---|
28 | string lexYacc( void ); |
---|
29 | |
---|
30 | void yyerror( string s ) { |
---|
31 | extern int yylineno; |
---|
32 | |
---|
33 | cerr << "Error in line: " << yylineno << ": " << s << endl; |
---|
34 | return; |
---|
35 | } |
---|
36 | |
---|
37 | Token *declstart; |
---|
38 | Token *rulestart; |
---|
39 | Token *nameliststart; |
---|
40 | %} |
---|
41 | |
---|
42 | %union { |
---|
43 | Token *tokenp; |
---|
44 | } |
---|
45 | |
---|
46 | %token<tokenp> ',' |
---|
47 | %token<tokenp> '<' |
---|
48 | %token<tokenp> '>' |
---|
49 | %token<tokenp> '{' |
---|
50 | %token<tokenp> '}' |
---|
51 | %token<tokenp> ':' |
---|
52 | %token<tokenp> ';' |
---|
53 | %token<tokenp> '|' |
---|
54 | |
---|
55 | %token<tokenp> MARK // %% |
---|
56 | %token<tokenp> LCURL // %{ |
---|
57 | %token<tokenp> RCURL // %} |
---|
58 | |
---|
59 | %token<tokenp> INTEGER // integer constant |
---|
60 | %token<tokenp> CHARACTER // character constant |
---|
61 | %token<tokenp> IDENTIFIER // identifier |
---|
62 | %token<tokenp> CODE // C code |
---|
63 | |
---|
64 | %token<tokenp> DEFINE // %define |
---|
65 | %token<tokenp> EXPECT // %expect |
---|
66 | %token<tokenp> LEFT // %left |
---|
67 | %token<tokenp> LOCATIONS // %locations |
---|
68 | %token<tokenp> NONASSOC // %nonassoc |
---|
69 | %token<tokenp> PRECEDENCE // %precedence |
---|
70 | %token<tokenp> PURE_PARSER // %pure_parser |
---|
71 | %token<tokenp> RIGHT // %right |
---|
72 | %token<tokenp> SEMANTIC_PARSER // %semantic_parser |
---|
73 | %token<tokenp> START // %start |
---|
74 | %token<tokenp> THONG // %thong |
---|
75 | %token<tokenp> TOKEN // %token |
---|
76 | %token<tokenp> TYPE // %type |
---|
77 | %token<tokenp> UNION // %union |
---|
78 | |
---|
79 | %token<tokenp> PREC // %prec |
---|
80 | |
---|
81 | %token END_TERMINALS // ALL TERMINAL TOKEN NAMES MUST APPEAR BEFORE THIS |
---|
82 | |
---|
83 | %type<tokenp> sections |
---|
84 | %token _SECTIONS |
---|
85 | %type<tokenp> mark |
---|
86 | %type<tokenp> defsection_opt |
---|
87 | %token _DEFSECTION_OPT |
---|
88 | %type<tokenp> declarations |
---|
89 | %type<tokenp> literalblock |
---|
90 | %token _LITERALBLOCK |
---|
91 | %type<tokenp> declaration |
---|
92 | %token _DECLARATION |
---|
93 | %type<tokenp> union |
---|
94 | %type<tokenp> rword |
---|
95 | %type<tokenp> tag_opt |
---|
96 | %token _TAG_OPT |
---|
97 | %type<tokenp> namenolist |
---|
98 | %token _NAMENOLIST |
---|
99 | %type<tokenp> nameno |
---|
100 | %token _NAMENO |
---|
101 | %type<tokenp> namelist |
---|
102 | %token _NAMELIST |
---|
103 | %type<tokenp> name |
---|
104 | %type<tokenp> rulesection |
---|
105 | %token _RULESECTION |
---|
106 | %type<tokenp> rules |
---|
107 | %token _RULE |
---|
108 | %type<tokenp> lhs |
---|
109 | %token _LHS |
---|
110 | %type<tokenp> rhs |
---|
111 | %token _RHS |
---|
112 | %type<tokenp> prod |
---|
113 | %type<tokenp> prec |
---|
114 | %token _PREC |
---|
115 | %type<tokenp> action |
---|
116 | %token _ACTION |
---|
117 | %type<tokenp> usersection_opt |
---|
118 | %token _USERSECTION_OPT |
---|
119 | %type<tokenp> ccode_opt |
---|
120 | %type<tokenp> blocks |
---|
121 | |
---|
122 | %start grammar |
---|
123 | |
---|
124 | %% |
---|
125 | grammar : |
---|
126 | sections |
---|
127 | { |
---|
128 | filter( $1 ); // filter parse tree |
---|
129 | freeTree( $1 ); // free parse-tree storage (optional: used with purify) |
---|
130 | } |
---|
131 | ; |
---|
132 | |
---|
133 | sections : |
---|
134 | defsection_opt mark rulesection usersection_opt |
---|
135 | { |
---|
136 | $$ = new Token( "sections", _SECTIONS ); |
---|
137 | $1->left = $2; |
---|
138 | $2->left = $3; |
---|
139 | $3->left = $4; |
---|
140 | $$->down = $1; |
---|
141 | } |
---|
142 | ; |
---|
143 | |
---|
144 | mark : |
---|
145 | MARK |
---|
146 | | error // missing %% |
---|
147 | { |
---|
148 | cerr << "no input grammar, missing %% mark" << endl; |
---|
149 | exit( -1 ); |
---|
150 | } |
---|
151 | ; |
---|
152 | |
---|
153 | defsection_opt : |
---|
154 | // empty |
---|
155 | { |
---|
156 | //cerr << "defsection_opt1: " << endl; |
---|
157 | $$ = new Token( "declaration_opt", _DEFSECTION_OPT ); |
---|
158 | } |
---|
159 | | declarations |
---|
160 | { |
---|
161 | //cerr << "defsection_opt2: " << $1->text << "(" << $1 << ")" << endl; |
---|
162 | $$ = new Token( "declaration_opt", _DEFSECTION_OPT ); |
---|
163 | $$->down = declstart; |
---|
164 | } |
---|
165 | ; |
---|
166 | |
---|
167 | declarations : |
---|
168 | literalblock |
---|
169 | { |
---|
170 | //cerr << "declarations1: " << $1->text << "(" << $1 << ")" << endl; |
---|
171 | $$ = declstart = $1; |
---|
172 | } |
---|
173 | | declaration |
---|
174 | { |
---|
175 | //cerr << "declarations2: " << $1->text << "(" << $1 << ")" << endl; |
---|
176 | $$ = declstart = new Token( "declaration", _DECLARATION ); |
---|
177 | $$->down = $1; |
---|
178 | } |
---|
179 | | declarations literalblock |
---|
180 | { |
---|
181 | //cerr << "declarations3: "<< $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
182 | $1->left = $2; |
---|
183 | $$ = $2; |
---|
184 | } |
---|
185 | | declarations declaration |
---|
186 | { |
---|
187 | //cerr << "declarations4: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
188 | $$ = new Token( "declaration", _DECLARATION ); |
---|
189 | $1->left = $$; |
---|
190 | $$->down = $2; |
---|
191 | } |
---|
192 | ; |
---|
193 | |
---|
194 | literalblock : |
---|
195 | LCURL |
---|
196 | { lexC(); } |
---|
197 | ccode_opt |
---|
198 | { $<tokenp>$ = new Token( lexYacc(), CODE ); } |
---|
199 | RCURL |
---|
200 | { |
---|
201 | //cerr << "literalblock: " << $1->text << "(" << $1 << ") " << $<tokenp>4->text << " " << $5->text << "(" << $5 << ")" << endl; |
---|
202 | $1->left = $<tokenp>4; |
---|
203 | $<tokenp>4->left = $5; |
---|
204 | $$ = new Token( "literalblock", _LITERALBLOCK ); |
---|
205 | $$->down = $1; |
---|
206 | } |
---|
207 | ; |
---|
208 | |
---|
209 | declaration : |
---|
210 | union |
---|
211 | | START IDENTIFIER |
---|
212 | { |
---|
213 | $1->left = $2; |
---|
214 | $$ = $1; |
---|
215 | } |
---|
216 | | rword tag_opt namenolist |
---|
217 | { |
---|
218 | Token *n = new Token( "namenolist", _NAMENOLIST ); |
---|
219 | n->down = nameliststart; |
---|
220 | $1->left = $2; |
---|
221 | $2->left = n; |
---|
222 | $$ = $1; |
---|
223 | } |
---|
224 | | TYPE tag_opt namelist |
---|
225 | { |
---|
226 | Token *n = new Token( "namelist", _NAMELIST ); |
---|
227 | n->down = nameliststart; |
---|
228 | $1->left = $2; |
---|
229 | $2->left = n; |
---|
230 | $$ = $1; |
---|
231 | } |
---|
232 | | PURE_PARSER |
---|
233 | | SEMANTIC_PARSER |
---|
234 | | EXPECT INTEGER // bison |
---|
235 | { |
---|
236 | $1->left = $2; |
---|
237 | $$ = $1; |
---|
238 | } |
---|
239 | | DEFINE // bison |
---|
240 | | LOCATIONS |
---|
241 | | THONG // bison |
---|
242 | ; |
---|
243 | |
---|
244 | union : |
---|
245 | UNION '{' |
---|
246 | { lexC(); } |
---|
247 | ccode_opt |
---|
248 | { |
---|
249 | // Remove the trailing '}' which is added in lex. |
---|
250 | string temp( lexYacc() ); |
---|
251 | $<tokenp>$ = new Token( temp.substr( 0, temp.length() - 1 ), CODE ); |
---|
252 | } |
---|
253 | '}' |
---|
254 | { |
---|
255 | $1->left = $2; |
---|
256 | $2->left = $<tokenp>5; |
---|
257 | $<tokenp>5->left = $6; |
---|
258 | $$ = $1; |
---|
259 | } |
---|
260 | ; |
---|
261 | |
---|
262 | rword : |
---|
263 | TOKEN |
---|
264 | | LEFT |
---|
265 | | RIGHT |
---|
266 | | NONASSOC |
---|
267 | | PRECEDENCE |
---|
268 | ; |
---|
269 | |
---|
270 | tag_opt : |
---|
271 | // empty |
---|
272 | { |
---|
273 | //cerr << "tag_opt" << endl; |
---|
274 | $$ = new Token( "tag_opt", _TAG_OPT ); |
---|
275 | } |
---|
276 | | '<' IDENTIFIER '>' |
---|
277 | { |
---|
278 | $1->left = $2; |
---|
279 | $2->left = $3; |
---|
280 | $$ = new Token( "tag_opt", _TAG_OPT ); |
---|
281 | $$->down = $1; |
---|
282 | } |
---|
283 | ; |
---|
284 | |
---|
285 | namenolist : |
---|
286 | nameno |
---|
287 | { |
---|
288 | //cerr << "namenolist1: " << $1->text << "(" << $1 << ")" << endl; |
---|
289 | $$ = nameliststart = $1; |
---|
290 | } |
---|
291 | | namenolist nameno |
---|
292 | { |
---|
293 | //cerr << "namenolist2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
294 | $1->left = $2; |
---|
295 | $$ = $2; |
---|
296 | } |
---|
297 | | namenolist ',' nameno |
---|
298 | { |
---|
299 | //cerr << "namenolist3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl; |
---|
300 | $1->left = $2; |
---|
301 | $2->left = $3; |
---|
302 | $$ = $3; |
---|
303 | } |
---|
304 | ; |
---|
305 | |
---|
306 | nameno : |
---|
307 | name |
---|
308 | { |
---|
309 | $$ = new Token( "nameno", _NAMENO ); |
---|
310 | $$->down = $1; |
---|
311 | } |
---|
312 | | name INTEGER |
---|
313 | { |
---|
314 | $$ = new Token( "nameno", _NAMENO ); |
---|
315 | $1->left = $2; |
---|
316 | $$->down = $1; |
---|
317 | } |
---|
318 | ; |
---|
319 | |
---|
320 | namelist : |
---|
321 | name |
---|
322 | { |
---|
323 | //cerr << "namelist1: " << $1->text << "(" << $1 << ")" << endl; |
---|
324 | $$ = nameliststart = $1; |
---|
325 | } |
---|
326 | | namelist name |
---|
327 | { |
---|
328 | //cerr << "namelist2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
329 | $1->left = $2; |
---|
330 | $$ = $2; |
---|
331 | } |
---|
332 | | namelist ',' name |
---|
333 | { |
---|
334 | //cerr << "namelist3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl; |
---|
335 | $1->left = $2; |
---|
336 | $2->left = $3; |
---|
337 | $$ = $3; |
---|
338 | } |
---|
339 | ; |
---|
340 | |
---|
341 | name : |
---|
342 | IDENTIFIER |
---|
343 | | CHARACTER |
---|
344 | ; |
---|
345 | |
---|
346 | rulesection : |
---|
347 | rules |
---|
348 | { |
---|
349 | //cerr << "rulesection1: " << $1->text << "(" << $1 << ")" << endl; |
---|
350 | $$ = new Token( "rulesection", _RULESECTION ); |
---|
351 | $$->down = $1; |
---|
352 | } |
---|
353 | | error // no rules |
---|
354 | { |
---|
355 | cerr << "no rules in the input grammar" << endl; |
---|
356 | exit( -1 ); |
---|
357 | } |
---|
358 | ; |
---|
359 | |
---|
360 | // These grammar rules are complex because the Yacc language is LR(2) due to the optional ';' at the end of rules. The |
---|
361 | // following rules convert the LR(2) grammar into LR(1) by lengthening the rules to allow sufficient look |
---|
362 | // ahead. Unfortunately, this change makes handling the semantic actions more complex because there are two lists |
---|
363 | // (rules, rhs) being built but only one list tail can be returned through $$ for chaining. |
---|
364 | |
---|
365 | rules : |
---|
366 | lhs rhs |
---|
367 | { |
---|
368 | //cerr << "rules1: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
369 | $$ = rulestart; |
---|
370 | } |
---|
371 | | lhs rhs ';' |
---|
372 | { |
---|
373 | //cerr << "rules2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl; |
---|
374 | $2->addDownLeftTail( $3 ); |
---|
375 | $$ = rulestart; |
---|
376 | } |
---|
377 | ; |
---|
378 | |
---|
379 | lhs : |
---|
380 | IDENTIFIER ':' |
---|
381 | { |
---|
382 | //cerr << "lhs: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
383 | $$ = new Token( "lhs", _LHS ); |
---|
384 | //cerr << " lhs: " << $$->text << "(" << $$ << ")" << endl; |
---|
385 | $1->left = $2; |
---|
386 | $$->down = $1; |
---|
387 | } |
---|
388 | ; |
---|
389 | |
---|
390 | rhs : |
---|
391 | // empty |
---|
392 | { |
---|
393 | //cerr << "rhs1: " << $<tokenp>0->text << "(" << $<tokenp>0 << ")" << endl; |
---|
394 | rulestart = new Token( "rule", _RULE ); |
---|
395 | rulestart->down = $<tokenp>0; // initial lhs is already on the stack from "rules" |
---|
396 | $$ = new Token( "rhs", _RHS ); |
---|
397 | //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl; |
---|
398 | $<tokenp>0->left = $$; |
---|
399 | } |
---|
400 | | rhs lhs |
---|
401 | { |
---|
402 | //cerr << "rhs2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
403 | Token *temp = new Token( "rule", _RULE ); |
---|
404 | rulestart->addLeftTail( temp ); |
---|
405 | temp->down = $2; |
---|
406 | $$ = new Token( "rhs", _RHS ); |
---|
407 | //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl; |
---|
408 | $2->left = $$; |
---|
409 | } |
---|
410 | | rhs ';' lhs |
---|
411 | { |
---|
412 | //cerr << "rhs3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl; |
---|
413 | $1->addDownLeftTail( $2 ); |
---|
414 | Token *temp = new Token( "rule", _RULE ); |
---|
415 | rulestart->addLeftTail( temp ); |
---|
416 | temp->down = $3; |
---|
417 | $$ = new Token( "rhs", _RHS ); |
---|
418 | //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl; |
---|
419 | $3->left = $$; |
---|
420 | } |
---|
421 | | rhs prod |
---|
422 | { |
---|
423 | //cerr << "rhs4: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
424 | $1->addDownLeftTail( $2 ); |
---|
425 | $$ = $1; |
---|
426 | } |
---|
427 | | rhs '|' |
---|
428 | { |
---|
429 | //cerr << "rhs5: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
430 | $1->addDownLeftTail( $2 ); |
---|
431 | $$ = new Token( "rhs", _RHS ); |
---|
432 | $1->left = $$; |
---|
433 | //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl; |
---|
434 | } |
---|
435 | ; |
---|
436 | |
---|
437 | prod : |
---|
438 | action |
---|
439 | | IDENTIFIER |
---|
440 | | CHARACTER |
---|
441 | | prec |
---|
442 | ; |
---|
443 | |
---|
444 | prec : |
---|
445 | PREC name |
---|
446 | { |
---|
447 | //cerr << "prec: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
448 | $1->left = $2; |
---|
449 | $$ = new Token( "prec", _PREC ); |
---|
450 | $$->down = $1; |
---|
451 | } |
---|
452 | ; |
---|
453 | |
---|
454 | action : |
---|
455 | '{' |
---|
456 | { lexC(); } |
---|
457 | ccode_opt |
---|
458 | { |
---|
459 | // Remove the trailing '}' added in lex. |
---|
460 | string temp( lexYacc() ); |
---|
461 | $<tokenp>$ = new Token( temp.substr( 0, temp.length() - 1 ), CODE ); |
---|
462 | } |
---|
463 | '}' |
---|
464 | { |
---|
465 | $1->left = $<tokenp>4; |
---|
466 | $<tokenp>4->left = $5; |
---|
467 | $$ = new Token( "action", _ACTION ); |
---|
468 | $$->down = $1; |
---|
469 | } |
---|
470 | ; |
---|
471 | |
---|
472 | usersection_opt : |
---|
473 | // empty |
---|
474 | { |
---|
475 | //cerr << "usersection_opt" << endl; |
---|
476 | // attach remaining WS to fictitious code |
---|
477 | Token *temp = new Token( "", ws_list, CODE ); |
---|
478 | $$ = new Token( "usersection_opt", _USERSECTION_OPT ); |
---|
479 | $$->down = temp; |
---|
480 | } |
---|
481 | | MARK |
---|
482 | { lexC(); } |
---|
483 | ccode_opt |
---|
484 | { |
---|
485 | Token *temp = new Token( lexYacc(), CODE ); |
---|
486 | //cerr << "usersection_opt: " << $1->text << " " << temp->text << endl; |
---|
487 | $1->left = temp; |
---|
488 | $$ = new Token( "usersection_opt", _USERSECTION_OPT ); |
---|
489 | $$->down = $1; |
---|
490 | } |
---|
491 | ; |
---|
492 | |
---|
493 | ccode_opt : |
---|
494 | // empty |
---|
495 | {} |
---|
496 | | blocks |
---|
497 | ; |
---|
498 | |
---|
499 | // This rule matches internal braces "{}" in C code to the level of the braces of a union/action. These internal braces |
---|
500 | // are returned as Tokens from the lexer but are unused because the braces are already concatenated into the code string |
---|
501 | // built by the lexer. Therefore, the tokens for the braces are immediately deleted. |
---|
502 | |
---|
503 | blocks : |
---|
504 | '{' |
---|
505 | { delete $1; } |
---|
506 | ccode_opt '}' |
---|
507 | { delete $4; } |
---|
508 | | blocks '{' |
---|
509 | { delete $2; } |
---|
510 | ccode_opt '}' |
---|
511 | { delete $5; } |
---|
512 | ; |
---|
513 | %% |
---|
514 | |
---|
515 | // Local Variables: // |
---|
516 | // mode: c++ // |
---|
517 | // tab-width: 4 // |
---|
518 | // compile-command: "make install" // |
---|
519 | // End: // |
---|