1 | // |
---|
2 | // Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo |
---|
3 | // |
---|
4 | // The contents of this file are covered under the licence agreement in the |
---|
5 | // file "LICENCE" distributed with Cforall. |
---|
6 | // |
---|
7 | // parser.yy -- |
---|
8 | // |
---|
9 | // Author : Rodolfo G. Esteves |
---|
10 | // Created On : Sat Dec 15 13:44:21 2001 |
---|
11 | // Last Modified By : Peter A. Buhr |
---|
12 | // Last Modified On : Thu Jun 29 09:26:47 2017 |
---|
13 | // Update Count : 1045 |
---|
14 | // |
---|
15 | |
---|
16 | %{ |
---|
17 | #define YYDEBUG_LEXER_TEXT( yylval ) // lexer loads this up each time |
---|
18 | #define YYDEBUG 1 // get the pretty debugging code to compile |
---|
19 | |
---|
20 | #include <iostream> |
---|
21 | using namespace std; |
---|
22 | #include "parser.hh" |
---|
23 | #include "filter.h" |
---|
24 | |
---|
25 | extern list<string> ws_list; // lex variable containing accumulated whitespace |
---|
26 | void lexC( void ); |
---|
27 | string lexYacc( void ); |
---|
28 | |
---|
29 | void yyerror( string s ) { |
---|
30 | extern int yylineno; |
---|
31 | |
---|
32 | cerr << "Error in line: " << yylineno << ": " << s << endl; |
---|
33 | return; |
---|
34 | } |
---|
35 | |
---|
36 | Token *declstart; |
---|
37 | Token *rulestart; |
---|
38 | Token *nameliststart; |
---|
39 | %} |
---|
40 | |
---|
41 | %union { |
---|
42 | Token *tokenp; |
---|
43 | } |
---|
44 | |
---|
45 | %token<tokenp> ',' |
---|
46 | %token<tokenp> '<' |
---|
47 | %token<tokenp> '>' |
---|
48 | %token<tokenp> '{' |
---|
49 | %token<tokenp> '}' |
---|
50 | %token<tokenp> ':' |
---|
51 | %token<tokenp> ';' |
---|
52 | %token<tokenp> '|' |
---|
53 | |
---|
54 | %token<tokenp> MARK // %% |
---|
55 | %token<tokenp> LCURL // %{ |
---|
56 | %token<tokenp> RCURL // %} |
---|
57 | |
---|
58 | %token<tokenp> INTEGER // integer constant |
---|
59 | %token<tokenp> CHARACTER // character constant |
---|
60 | %token<tokenp> IDENTIFIER // identifier |
---|
61 | %token<tokenp> CODE // C code |
---|
62 | |
---|
63 | %token<tokenp> START // %start |
---|
64 | %token<tokenp> UNION // %union |
---|
65 | %token<tokenp> TOKEN // %token |
---|
66 | %token<tokenp> LEFT // %left |
---|
67 | %token<tokenp> RIGHT // %right |
---|
68 | %token<tokenp> NONASSOC // %nonassoc |
---|
69 | %token<tokenp> TYPE // %type |
---|
70 | %token<tokenp> PURE_PARSER // %pure_parser |
---|
71 | %token<tokenp> SEMANTIC_PARSER // %semantic_parser |
---|
72 | %token<tokenp> EXPECT // %expect |
---|
73 | %token<tokenp> THONG // %thong |
---|
74 | |
---|
75 | %token<tokenp> PREC // %prec |
---|
76 | |
---|
77 | %token END_TERMINALS // ALL TERMINAL TOKEN NAMES MUST APPEAR BEFORE THIS |
---|
78 | |
---|
79 | %type<tokenp> sections |
---|
80 | %token _SECTIONS |
---|
81 | %type<tokenp> mark |
---|
82 | %type<tokenp> defsection_opt |
---|
83 | %token _DEFSECTION_OPT |
---|
84 | %type<tokenp> declarations |
---|
85 | %type<tokenp> literalblock |
---|
86 | %token _LITERALBLOCK |
---|
87 | %type<tokenp> declaration |
---|
88 | %token _DECLARATION |
---|
89 | %type<tokenp> union |
---|
90 | %type<tokenp> rword |
---|
91 | %type<tokenp> tag_opt |
---|
92 | %token _TAG_OPT |
---|
93 | %type<tokenp> namenolist |
---|
94 | %token _NAMENOLIST |
---|
95 | %type<tokenp> nameno |
---|
96 | %token _NAMENO |
---|
97 | %type<tokenp> namelist |
---|
98 | %token _NAMELIST |
---|
99 | %type<tokenp> name |
---|
100 | %type<tokenp> rulesection |
---|
101 | %token _RULESECTION |
---|
102 | %type<tokenp> rules |
---|
103 | %token _RULE |
---|
104 | %type<tokenp> lhs |
---|
105 | %token _LHS |
---|
106 | %type<tokenp> rhs |
---|
107 | %token _RHS |
---|
108 | %type<tokenp> prod |
---|
109 | %type<tokenp> prec |
---|
110 | %token _PREC |
---|
111 | %type<tokenp> action |
---|
112 | %token _ACTION |
---|
113 | %type<tokenp> usersection_opt |
---|
114 | %token _USERSECTION_OPT |
---|
115 | %type<tokenp> ccode_opt |
---|
116 | %type<tokenp> blocks |
---|
117 | |
---|
118 | %start grammar |
---|
119 | |
---|
120 | %% |
---|
121 | grammar : |
---|
122 | sections |
---|
123 | { |
---|
124 | filter( $1 ); // filter parse tree |
---|
125 | freeTree( $1 ); // free parse-tree storage (optional: used with purify) |
---|
126 | } |
---|
127 | ; |
---|
128 | |
---|
129 | sections : |
---|
130 | defsection_opt mark rulesection usersection_opt |
---|
131 | { |
---|
132 | $$ = new Token( "sections", _SECTIONS ); |
---|
133 | $1->left = $2; |
---|
134 | $2->left = $3; |
---|
135 | $3->left = $4; |
---|
136 | $$->down = $1; |
---|
137 | } |
---|
138 | ; |
---|
139 | |
---|
140 | mark : |
---|
141 | MARK |
---|
142 | | error // missing %% |
---|
143 | { |
---|
144 | cerr << "no input grammar, missing %% mark" << endl; |
---|
145 | exit( -1 ); |
---|
146 | } |
---|
147 | ; |
---|
148 | |
---|
149 | defsection_opt : |
---|
150 | // empty |
---|
151 | { |
---|
152 | //cerr << "defsection_opt1: " << endl; |
---|
153 | $$ = new Token( "declaration_opt", _DEFSECTION_OPT ); |
---|
154 | } |
---|
155 | | declarations |
---|
156 | { |
---|
157 | //cerr << "defsection_opt2: " << $1->text << "(" << $1 << ")" << endl; |
---|
158 | $$ = new Token( "declaration_opt", _DEFSECTION_OPT ); |
---|
159 | $$->down = declstart; |
---|
160 | } |
---|
161 | ; |
---|
162 | |
---|
163 | declarations : |
---|
164 | literalblock |
---|
165 | { |
---|
166 | //cerr << "declarations1: " << $1->text << "(" << $1 << ")" << endl; |
---|
167 | $$ = declstart = $1; |
---|
168 | } |
---|
169 | | declaration |
---|
170 | { |
---|
171 | //cerr << "declarations2: " << $1->text << "(" << $1 << ")" << endl; |
---|
172 | $$ = declstart = new Token( "declaration", _DECLARATION ); |
---|
173 | $$->down = $1; |
---|
174 | } |
---|
175 | | declarations literalblock |
---|
176 | { |
---|
177 | //cerr << "declarations3: "<< $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
178 | $1->left = $2; |
---|
179 | $$ = $2; |
---|
180 | } |
---|
181 | | declarations declaration |
---|
182 | { |
---|
183 | //cerr << "declarations4: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
184 | $$ = new Token( "declaration", _DECLARATION ); |
---|
185 | $1->left = $$; |
---|
186 | $$->down = $2; |
---|
187 | } |
---|
188 | ; |
---|
189 | |
---|
190 | literalblock : |
---|
191 | LCURL |
---|
192 | { lexC(); } |
---|
193 | ccode_opt |
---|
194 | { $<tokenp>$ = new Token( lexYacc(), CODE ); } |
---|
195 | RCURL |
---|
196 | { |
---|
197 | //cerr << "literalblock: " << $1->text << "(" << $1 << ") " << $<tokenp>4->text << " " << $5->text << "(" << $5 << ")" << endl; |
---|
198 | $1->left = $<tokenp>4; |
---|
199 | $<tokenp>4->left = $5; |
---|
200 | $$ = new Token( "literalblock", _LITERALBLOCK ); |
---|
201 | $$->down = $1; |
---|
202 | } |
---|
203 | ; |
---|
204 | |
---|
205 | declaration : |
---|
206 | union |
---|
207 | | START IDENTIFIER |
---|
208 | { |
---|
209 | $1->left = $2; |
---|
210 | $$ = $1; |
---|
211 | } |
---|
212 | | rword tag_opt namenolist |
---|
213 | { |
---|
214 | Token *n = new Token( "namenolist", _NAMENOLIST ); |
---|
215 | n->down = nameliststart; |
---|
216 | $1->left = $2; |
---|
217 | $2->left = n; |
---|
218 | $$ = $1; |
---|
219 | } |
---|
220 | | TYPE tag_opt namelist |
---|
221 | { |
---|
222 | Token *n = new Token( "namelist", _NAMELIST ); |
---|
223 | n->down = nameliststart; |
---|
224 | $1->left = $2; |
---|
225 | $2->left = n; |
---|
226 | $$ = $1; |
---|
227 | } |
---|
228 | | PURE_PARSER |
---|
229 | | SEMANTIC_PARSER |
---|
230 | | EXPECT INTEGER // bison |
---|
231 | { |
---|
232 | $1->left = $2; |
---|
233 | $$ = $1; |
---|
234 | } |
---|
235 | | THONG // bison |
---|
236 | ; |
---|
237 | |
---|
238 | union : |
---|
239 | UNION '{' |
---|
240 | { lexC(); } |
---|
241 | ccode_opt |
---|
242 | { |
---|
243 | // Remove the trailing '}' which is added in lex. |
---|
244 | string temp( lexYacc() ); |
---|
245 | $<tokenp>$ = new Token( temp.substr( 0, temp.length() - 1 ), CODE ); |
---|
246 | } |
---|
247 | '}' |
---|
248 | { |
---|
249 | $1->left = $2; |
---|
250 | $2->left = $<tokenp>5; |
---|
251 | $<tokenp>5->left = $6; |
---|
252 | $$ = $1; |
---|
253 | } |
---|
254 | ; |
---|
255 | |
---|
256 | rword : |
---|
257 | TOKEN |
---|
258 | | LEFT |
---|
259 | | RIGHT |
---|
260 | | NONASSOC |
---|
261 | ; |
---|
262 | |
---|
263 | tag_opt : |
---|
264 | // empty |
---|
265 | { |
---|
266 | //cerr << "tag_opt" << endl; |
---|
267 | $$ = new Token( "tag_opt", _TAG_OPT ); |
---|
268 | } |
---|
269 | | '<' IDENTIFIER '>' |
---|
270 | { |
---|
271 | $1->left = $2; |
---|
272 | $2->left = $3; |
---|
273 | $$ = new Token( "tag_opt", _TAG_OPT ); |
---|
274 | $$->down = $1; |
---|
275 | } |
---|
276 | ; |
---|
277 | |
---|
278 | namenolist : |
---|
279 | nameno |
---|
280 | { |
---|
281 | //cerr << "namenolist1: " << $1->text << "(" << $1 << ")" << endl; |
---|
282 | $$ = nameliststart = $1; |
---|
283 | } |
---|
284 | | namenolist nameno |
---|
285 | { |
---|
286 | //cerr << "namenolist2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
287 | $1->left = $2; |
---|
288 | $$ = $2; |
---|
289 | } |
---|
290 | | namenolist ',' nameno |
---|
291 | { |
---|
292 | //cerr << "namenolist3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl; |
---|
293 | $1->left = $2; |
---|
294 | $2->left = $3; |
---|
295 | $$ = $3; |
---|
296 | } |
---|
297 | ; |
---|
298 | |
---|
299 | nameno : |
---|
300 | name |
---|
301 | { |
---|
302 | $$ = new Token( "nameno", _NAMENO ); |
---|
303 | $$->down = $1; |
---|
304 | } |
---|
305 | | name INTEGER |
---|
306 | { |
---|
307 | $$ = new Token( "nameno", _NAMENO ); |
---|
308 | $1->left = $2; |
---|
309 | $$->down = $1; |
---|
310 | } |
---|
311 | ; |
---|
312 | |
---|
313 | namelist : |
---|
314 | name |
---|
315 | { |
---|
316 | //cerr << "namelist1: " << $1->text << "(" << $1 << ")" << endl; |
---|
317 | $$ = nameliststart = $1; |
---|
318 | } |
---|
319 | | namelist name |
---|
320 | { |
---|
321 | //cerr << "namelist2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
322 | $1->left = $2; |
---|
323 | $$ = $2; |
---|
324 | } |
---|
325 | | namelist ',' name |
---|
326 | { |
---|
327 | //cerr << "namelist3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl; |
---|
328 | $1->left = $2; |
---|
329 | $2->left = $3; |
---|
330 | $$ = $3; |
---|
331 | } |
---|
332 | ; |
---|
333 | |
---|
334 | name : |
---|
335 | IDENTIFIER |
---|
336 | | CHARACTER |
---|
337 | ; |
---|
338 | |
---|
339 | rulesection : |
---|
340 | rules |
---|
341 | { |
---|
342 | //cerr << "rulesection1: " << $1->text << "(" << $1 << ")" << endl; |
---|
343 | $$ = new Token( "rulesection", _RULESECTION ); |
---|
344 | $$->down = $1; |
---|
345 | } |
---|
346 | | error // no rules |
---|
347 | { |
---|
348 | cerr << "no rules in the input grammar" << endl; |
---|
349 | exit( -1 ); |
---|
350 | } |
---|
351 | ; |
---|
352 | |
---|
353 | // These grammar rules are complex because the Yacc language is LR(2) due to the optional ';' at the end of rules. The |
---|
354 | // following rules convert the LR(2) grammar into LR(1) by lengthening the rules to allow sufficient look |
---|
355 | // ahead. Unfortunately, this change makes handling the semantic actions more complex because there are two lists |
---|
356 | // (rules, rhs) being built but only one list tail can be returned through $$ for chaining. |
---|
357 | |
---|
358 | rules : |
---|
359 | lhs rhs |
---|
360 | { |
---|
361 | //cerr << "rules1: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
362 | $$ = rulestart; |
---|
363 | } |
---|
364 | | lhs rhs ';' |
---|
365 | { |
---|
366 | //cerr << "rules2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl; |
---|
367 | $2->addDownLeftTail( $3 ); |
---|
368 | $$ = rulestart; |
---|
369 | } |
---|
370 | ; |
---|
371 | |
---|
372 | lhs : |
---|
373 | IDENTIFIER ':' |
---|
374 | { |
---|
375 | //cerr << "lhs: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
376 | $$ = new Token( "lhs", _LHS ); |
---|
377 | //cerr << " lhs: " << $$->text << "(" << $$ << ")" << endl; |
---|
378 | $1->left = $2; |
---|
379 | $$->down = $1; |
---|
380 | } |
---|
381 | ; |
---|
382 | |
---|
383 | rhs : |
---|
384 | // empty |
---|
385 | { |
---|
386 | //cerr << "rhs1: " << $<tokenp>0->text << "(" << $<tokenp>0 << ")" << endl; |
---|
387 | rulestart = new Token( "rule", _RULE ); |
---|
388 | rulestart->down = $<tokenp>0; // initial lhs is already on the stack from "rules" |
---|
389 | $$ = new Token( "rhs", _RHS ); |
---|
390 | //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl; |
---|
391 | $<tokenp>0->left = $$; |
---|
392 | } |
---|
393 | | rhs lhs |
---|
394 | { |
---|
395 | //cerr << "rhs2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
396 | Token *temp = new Token( "rule", _RULE ); |
---|
397 | rulestart->addLeftTail( temp ); |
---|
398 | temp->down = $2; |
---|
399 | $$ = new Token( "rhs", _RHS ); |
---|
400 | //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl; |
---|
401 | $2->left = $$; |
---|
402 | } |
---|
403 | | rhs ';' lhs |
---|
404 | { |
---|
405 | //cerr << "rhs3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl; |
---|
406 | $1->addDownLeftTail( $2 ); |
---|
407 | Token *temp = new Token( "rule", _RULE ); |
---|
408 | rulestart->addLeftTail( temp ); |
---|
409 | temp->down = $3; |
---|
410 | $$ = new Token( "rhs", _RHS ); |
---|
411 | //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl; |
---|
412 | $3->left = $$; |
---|
413 | } |
---|
414 | | rhs prod |
---|
415 | { |
---|
416 | //cerr << "rhs4: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
417 | $1->addDownLeftTail( $2 ); |
---|
418 | $$ = $1; |
---|
419 | } |
---|
420 | | rhs '|' |
---|
421 | { |
---|
422 | //cerr << "rhs5: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
423 | $1->addDownLeftTail( $2 ); |
---|
424 | $$ = new Token( "rhs", _RHS ); |
---|
425 | $1->left = $$; |
---|
426 | //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl; |
---|
427 | } |
---|
428 | ; |
---|
429 | |
---|
430 | prod : |
---|
431 | action |
---|
432 | | IDENTIFIER |
---|
433 | | CHARACTER |
---|
434 | | prec |
---|
435 | ; |
---|
436 | |
---|
437 | prec : |
---|
438 | PREC name |
---|
439 | { |
---|
440 | //cerr << "prec: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl; |
---|
441 | $1->left = $2; |
---|
442 | $$ = new Token( "prec", _PREC ); |
---|
443 | $$->down = $1; |
---|
444 | } |
---|
445 | ; |
---|
446 | |
---|
447 | action : |
---|
448 | '{' |
---|
449 | { lexC(); } |
---|
450 | ccode_opt |
---|
451 | { |
---|
452 | // Remove the trailing '}' added in lex. |
---|
453 | string temp( lexYacc() ); |
---|
454 | $<tokenp>$ = new Token( temp.substr( 0, temp.length() - 1 ), CODE ); |
---|
455 | } |
---|
456 | '}' |
---|
457 | { |
---|
458 | $1->left = $<tokenp>4; |
---|
459 | $<tokenp>4->left = $5; |
---|
460 | $$ = new Token( "action", _ACTION ); |
---|
461 | $$->down = $1; |
---|
462 | } |
---|
463 | ; |
---|
464 | |
---|
465 | usersection_opt : |
---|
466 | // empty |
---|
467 | { |
---|
468 | //cerr << "usersection_opt" << endl; |
---|
469 | // attach remaining WS to fictitious code |
---|
470 | Token *temp = new Token( "", ws_list, CODE ); |
---|
471 | $$ = new Token( "usersection_opt", _USERSECTION_OPT ); |
---|
472 | $$->down = temp; |
---|
473 | } |
---|
474 | | MARK |
---|
475 | { lexC(); } |
---|
476 | ccode_opt |
---|
477 | { |
---|
478 | Token *temp = new Token( lexYacc(), CODE ); |
---|
479 | //cerr << "usersection_opt: " << $1->text << " " << temp->text << endl; |
---|
480 | $1->left = temp; |
---|
481 | $$ = new Token( "usersection_opt", _USERSECTION_OPT ); |
---|
482 | $$->down = $1; |
---|
483 | } |
---|
484 | ; |
---|
485 | |
---|
486 | ccode_opt : |
---|
487 | // empty |
---|
488 | {} |
---|
489 | | blocks |
---|
490 | ; |
---|
491 | |
---|
492 | // This rule matches internal braces "{}" in C code to the level of the braces of a union/action. These internal braces |
---|
493 | // are returned as Tokens from the lexer but are unused because the braces are already concatenated into the code string |
---|
494 | // built by the lexer. Therefore, the tokens for the braces are immediately deleted. |
---|
495 | |
---|
496 | blocks : |
---|
497 | '{' |
---|
498 | { delete $1; } |
---|
499 | ccode_opt '}' |
---|
500 | { delete $4; } |
---|
501 | | blocks '{' |
---|
502 | { delete $2; } |
---|
503 | ccode_opt '}' |
---|
504 | { delete $5; } |
---|
505 | ; |
---|
506 | %% |
---|
507 | |
---|
508 | // Local Variables: // |
---|
509 | // mode: c++ // |
---|
510 | // tab-width: 4 // |
---|
511 | // compile-command: "make install" // |
---|
512 | // End: // |
---|