1 | //
|
---|
2 | // Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
|
---|
3 | //
|
---|
4 | // The contents of this file are covered under the licence agreement in the
|
---|
5 | // file "LICENCE" distributed with Cforall.
|
---|
6 | //
|
---|
7 | // parser.yy --
|
---|
8 | //
|
---|
9 | // Author : Rodolfo G. Esteves
|
---|
10 | // Created On : Sat Dec 15 13:44:21 2001
|
---|
11 | // Last Modified By : Peter A. Buhr
|
---|
12 | // Last Modified On : Thu Jun 29 09:26:47 2017
|
---|
13 | // Update Count : 1045
|
---|
14 | //
|
---|
15 |
|
---|
16 | %{
|
---|
17 | #define YYDEBUG_LEXER_TEXT( yylval ) // lexer loads this up each time
|
---|
18 | #define YYDEBUG 1 // get the pretty debugging code to compile
|
---|
19 |
|
---|
20 | #include <iostream>
|
---|
21 | using namespace std;
|
---|
22 | #include "ParserTypes.h"
|
---|
23 | #include "filter.h"
|
---|
24 |
|
---|
25 | extern list<string> ws_list; // lex variable containing accumulated whitespace
|
---|
26 | void lexC( void );
|
---|
27 | string lexYacc( void );
|
---|
28 |
|
---|
29 | void yyerror( string s ) {
|
---|
30 | extern int yylineno;
|
---|
31 |
|
---|
32 | cerr << "Error in line: " << yylineno << ": " << s << endl;
|
---|
33 | return;
|
---|
34 | }
|
---|
35 |
|
---|
36 | Token *declstart;
|
---|
37 | Token *rulestart;
|
---|
38 | Token *nameliststart;
|
---|
39 | %}
|
---|
40 |
|
---|
41 | %union {
|
---|
42 | Token *tokenp;
|
---|
43 | }
|
---|
44 |
|
---|
45 | %token<tokenp> ','
|
---|
46 | %token<tokenp> '<'
|
---|
47 | %token<tokenp> '>'
|
---|
48 | %token<tokenp> '{'
|
---|
49 | %token<tokenp> '}'
|
---|
50 | %token<tokenp> ':'
|
---|
51 | %token<tokenp> ';'
|
---|
52 | %token<tokenp> '|'
|
---|
53 |
|
---|
54 | %token<tokenp> MARK // %%
|
---|
55 | %token<tokenp> LCURL // %{
|
---|
56 | %token<tokenp> RCURL // %}
|
---|
57 |
|
---|
58 | %token<tokenp> INTEGER // integer constant
|
---|
59 | %token<tokenp> CHARACTER // character constant
|
---|
60 | %token<tokenp> IDENTIFIER // identifier
|
---|
61 | %token<tokenp> CODE // C code
|
---|
62 |
|
---|
63 | %token<tokenp> START // %start
|
---|
64 | %token<tokenp> UNION // %union
|
---|
65 | %token<tokenp> TOKEN // %token
|
---|
66 | %token<tokenp> LEFT // %left
|
---|
67 | %token<tokenp> RIGHT // %right
|
---|
68 | %token<tokenp> NONASSOC // %nonassoc
|
---|
69 | %token<tokenp> TYPE // %type
|
---|
70 | %token<tokenp> PURE_PARSER // %pure_parser
|
---|
71 | %token<tokenp> SEMANTIC_PARSER // %semantic_parser
|
---|
72 | %token<tokenp> EXPECT // %expect
|
---|
73 | %token<tokenp> THONG // %thong
|
---|
74 |
|
---|
75 | %token<tokenp> PREC // %prec
|
---|
76 |
|
---|
77 | %token END_TERMINALS // ALL TERMINAL TOKEN NAMES MUST APPEAR BEFORE THIS
|
---|
78 |
|
---|
79 | %type<tokenp> sections
|
---|
80 | %token _SECTIONS
|
---|
81 | %type<tokenp> mark
|
---|
82 | %type<tokenp> defsection_opt
|
---|
83 | %token _DEFSECTION_OPT
|
---|
84 | %type<tokenp> declarations
|
---|
85 | %type<tokenp> literalblock
|
---|
86 | %token _LITERALBLOCK
|
---|
87 | %type<tokenp> declaration
|
---|
88 | %token _DECLARATION
|
---|
89 | %type<tokenp> union
|
---|
90 | %type<tokenp> rword
|
---|
91 | %type<tokenp> tag_opt
|
---|
92 | %token _TAG_OPT
|
---|
93 | %type<tokenp> namenolist
|
---|
94 | %token _NAMENOLIST
|
---|
95 | %type<tokenp> nameno
|
---|
96 | %token _NAMENO
|
---|
97 | %type<tokenp> namelist
|
---|
98 | %token _NAMELIST
|
---|
99 | %type<tokenp> name
|
---|
100 | %type<tokenp> rulesection
|
---|
101 | %token _RULESECTION
|
---|
102 | %type<tokenp> rules
|
---|
103 | %token _RULE
|
---|
104 | %type<tokenp> lhs
|
---|
105 | %token _LHS
|
---|
106 | %type<tokenp> rhs
|
---|
107 | %token _RHS
|
---|
108 | %type<tokenp> prod
|
---|
109 | %type<tokenp> prec
|
---|
110 | %token _PREC
|
---|
111 | %type<tokenp> action
|
---|
112 | %token _ACTION
|
---|
113 | %type<tokenp> usersection_opt
|
---|
114 | %token _USERSECTION_OPT
|
---|
115 | %type<tokenp> ccode_opt
|
---|
116 | %type<tokenp> blocks
|
---|
117 |
|
---|
118 | %start grammar
|
---|
119 |
|
---|
120 | %%
|
---|
121 | grammar :
|
---|
122 | sections
|
---|
123 | {
|
---|
124 | filter( $1 ); // filter parse tree
|
---|
125 | freeTree( $1 ); // free parse-tree storage (optional: used with purify)
|
---|
126 | }
|
---|
127 | ;
|
---|
128 |
|
---|
129 | sections :
|
---|
130 | defsection_opt mark rulesection usersection_opt
|
---|
131 | {
|
---|
132 | $$ = new Token( "sections", _SECTIONS );
|
---|
133 | $1->left = $2;
|
---|
134 | $2->left = $3;
|
---|
135 | $3->left = $4;
|
---|
136 | $$->down = $1;
|
---|
137 | }
|
---|
138 | ;
|
---|
139 |
|
---|
140 | mark :
|
---|
141 | MARK
|
---|
142 | | error // missing %%
|
---|
143 | {
|
---|
144 | cerr << "no input grammar, missing %% mark" << endl;
|
---|
145 | exit( -1 );
|
---|
146 | }
|
---|
147 | ;
|
---|
148 |
|
---|
149 | defsection_opt :
|
---|
150 | // empty
|
---|
151 | {
|
---|
152 | //cerr << "defsection_opt1: " << endl;
|
---|
153 | $$ = new Token( "declaration_opt", _DEFSECTION_OPT );
|
---|
154 | }
|
---|
155 | | declarations
|
---|
156 | {
|
---|
157 | //cerr << "defsection_opt2: " << $1->text << "(" << $1 << ")" << endl;
|
---|
158 | $$ = new Token( "declaration_opt", _DEFSECTION_OPT );
|
---|
159 | $$->down = declstart;
|
---|
160 | }
|
---|
161 | ;
|
---|
162 |
|
---|
163 | declarations :
|
---|
164 | literalblock
|
---|
165 | {
|
---|
166 | //cerr << "declarations1: " << $1->text << "(" << $1 << ")" << endl;
|
---|
167 | $$ = declstart = $1;
|
---|
168 | }
|
---|
169 | | declaration
|
---|
170 | {
|
---|
171 | //cerr << "declarations2: " << $1->text << "(" << $1 << ")" << endl;
|
---|
172 | $$ = declstart = new Token( "declaration", _DECLARATION );
|
---|
173 | $$->down = $1;
|
---|
174 | }
|
---|
175 | | declarations literalblock
|
---|
176 | {
|
---|
177 | //cerr << "declarations3: "<< $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
|
---|
178 | $1->left = $2;
|
---|
179 | $$ = $2;
|
---|
180 | }
|
---|
181 | | declarations declaration
|
---|
182 | {
|
---|
183 | //cerr << "declarations4: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
|
---|
184 | $$ = new Token( "declaration", _DECLARATION );
|
---|
185 | $1->left = $$;
|
---|
186 | $$->down = $2;
|
---|
187 | }
|
---|
188 | ;
|
---|
189 |
|
---|
190 | literalblock :
|
---|
191 | LCURL
|
---|
192 | { lexC(); }
|
---|
193 | ccode_opt
|
---|
194 | { $<tokenp>$ = new Token( lexYacc(), CODE ); }
|
---|
195 | RCURL
|
---|
196 | {
|
---|
197 | //cerr << "literalblock: " << $1->text << "(" << $1 << ") " << $<tokenp>4->text << " " << $5->text << "(" << $5 << ")" << endl;
|
---|
198 | $1->left = $<tokenp>4;
|
---|
199 | $<tokenp>4->left = $5;
|
---|
200 | $$ = new Token( "literalblock", _LITERALBLOCK );
|
---|
201 | $$->down = $1;
|
---|
202 | }
|
---|
203 | ;
|
---|
204 |
|
---|
205 | declaration :
|
---|
206 | union
|
---|
207 | | START IDENTIFIER
|
---|
208 | {
|
---|
209 | $1->left = $2;
|
---|
210 | $$ = $1;
|
---|
211 | }
|
---|
212 | | rword tag_opt namenolist
|
---|
213 | {
|
---|
214 | Token *n = new Token( "namenolist", _NAMENOLIST );
|
---|
215 | n->down = nameliststart;
|
---|
216 | $1->left = $2;
|
---|
217 | $2->left = n;
|
---|
218 | $$ = $1;
|
---|
219 | }
|
---|
220 | | TYPE tag_opt namelist
|
---|
221 | {
|
---|
222 | Token *n = new Token( "namelist", _NAMELIST );
|
---|
223 | n->down = nameliststart;
|
---|
224 | $1->left = $2;
|
---|
225 | $2->left = n;
|
---|
226 | $$ = $1;
|
---|
227 | }
|
---|
228 | | PURE_PARSER
|
---|
229 | | SEMANTIC_PARSER
|
---|
230 | | EXPECT INTEGER // bison
|
---|
231 | {
|
---|
232 | $1->left = $2;
|
---|
233 | $$ = $1;
|
---|
234 | }
|
---|
235 | | THONG // bison
|
---|
236 | ;
|
---|
237 |
|
---|
238 | union :
|
---|
239 | UNION '{'
|
---|
240 | { lexC(); }
|
---|
241 | ccode_opt
|
---|
242 | {
|
---|
243 | // Remove the trailing '}' which is added in lex.
|
---|
244 | string temp( lexYacc() );
|
---|
245 | $<tokenp>$ = new Token( temp.substr( 0, temp.length() - 1 ), CODE );
|
---|
246 | }
|
---|
247 | '}'
|
---|
248 | {
|
---|
249 | $1->left = $2;
|
---|
250 | $2->left = $<tokenp>5;
|
---|
251 | $<tokenp>5->left = $6;
|
---|
252 | $$ = $1;
|
---|
253 | }
|
---|
254 | ;
|
---|
255 |
|
---|
256 | rword :
|
---|
257 | TOKEN
|
---|
258 | | LEFT
|
---|
259 | | RIGHT
|
---|
260 | | NONASSOC
|
---|
261 | ;
|
---|
262 |
|
---|
263 | tag_opt :
|
---|
264 | // empty
|
---|
265 | {
|
---|
266 | //cerr << "tag_opt" << endl;
|
---|
267 | $$ = new Token( "tag_opt", _TAG_OPT );
|
---|
268 | }
|
---|
269 | | '<' IDENTIFIER '>'
|
---|
270 | {
|
---|
271 | $1->left = $2;
|
---|
272 | $2->left = $3;
|
---|
273 | $$ = new Token( "tag_opt", _TAG_OPT );
|
---|
274 | $$->down = $1;
|
---|
275 | }
|
---|
276 | ;
|
---|
277 |
|
---|
278 | namenolist :
|
---|
279 | nameno
|
---|
280 | {
|
---|
281 | //cerr << "namenolist1: " << $1->text << "(" << $1 << ")" << endl;
|
---|
282 | $$ = nameliststart = $1;
|
---|
283 | }
|
---|
284 | | namenolist nameno
|
---|
285 | {
|
---|
286 | //cerr << "namenolist2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
|
---|
287 | $1->left = $2;
|
---|
288 | $$ = $2;
|
---|
289 | }
|
---|
290 | | namenolist ',' nameno
|
---|
291 | {
|
---|
292 | //cerr << "namenolist3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl;
|
---|
293 | $1->left = $2;
|
---|
294 | $2->left = $3;
|
---|
295 | $$ = $3;
|
---|
296 | }
|
---|
297 | ;
|
---|
298 |
|
---|
299 | nameno :
|
---|
300 | name
|
---|
301 | {
|
---|
302 | $$ = new Token( "nameno", _NAMENO );
|
---|
303 | $$->down = $1;
|
---|
304 | }
|
---|
305 | | name INTEGER
|
---|
306 | {
|
---|
307 | $$ = new Token( "nameno", _NAMENO );
|
---|
308 | $1->left = $2;
|
---|
309 | $$->down = $1;
|
---|
310 | }
|
---|
311 | ;
|
---|
312 |
|
---|
313 | namelist :
|
---|
314 | name
|
---|
315 | {
|
---|
316 | //cerr << "namelist1: " << $1->text << "(" << $1 << ")" << endl;
|
---|
317 | $$ = nameliststart = $1;
|
---|
318 | }
|
---|
319 | | namelist name
|
---|
320 | {
|
---|
321 | //cerr << "namelist2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
|
---|
322 | $1->left = $2;
|
---|
323 | $$ = $2;
|
---|
324 | }
|
---|
325 | | namelist ',' name
|
---|
326 | {
|
---|
327 | //cerr << "namelist3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl;
|
---|
328 | $1->left = $2;
|
---|
329 | $2->left = $3;
|
---|
330 | $$ = $3;
|
---|
331 | }
|
---|
332 | ;
|
---|
333 |
|
---|
334 | name :
|
---|
335 | IDENTIFIER
|
---|
336 | | CHARACTER
|
---|
337 | ;
|
---|
338 |
|
---|
339 | rulesection :
|
---|
340 | rules
|
---|
341 | {
|
---|
342 | //cerr << "rulesection1: " << $1->text << "(" << $1 << ")" << endl;
|
---|
343 | $$ = new Token( "rulesection", _RULESECTION );
|
---|
344 | $$->down = $1;
|
---|
345 | }
|
---|
346 | | error // no rules
|
---|
347 | {
|
---|
348 | cerr << "no rules in the input grammar" << endl;
|
---|
349 | exit( -1 );
|
---|
350 | }
|
---|
351 | ;
|
---|
352 |
|
---|
353 | // These grammar rules are complex because the Yacc language is LR(2) due to the optional ';' at the end of rules. The
|
---|
354 | // following rules convert the LR(2) grammar into LR(1) by lengthening the rules to allow sufficient look
|
---|
355 | // ahead. Unfortunately, this change makes handling the semantic actions more complex because there are two lists
|
---|
356 | // (rules, rhs) being built but only one list tail can be returned through $$ for chaining.
|
---|
357 |
|
---|
358 | rules :
|
---|
359 | lhs rhs
|
---|
360 | {
|
---|
361 | //cerr << "rules1: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
|
---|
362 | $$ = rulestart;
|
---|
363 | }
|
---|
364 | | lhs rhs ';'
|
---|
365 | {
|
---|
366 | //cerr << "rules2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl;
|
---|
367 | $2->addDownLeftTail( $3 );
|
---|
368 | $$ = rulestart;
|
---|
369 | }
|
---|
370 | ;
|
---|
371 |
|
---|
372 | lhs :
|
---|
373 | IDENTIFIER ':'
|
---|
374 | {
|
---|
375 | //cerr << "lhs: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
|
---|
376 | $$ = new Token( "lhs", _LHS );
|
---|
377 | //cerr << " lhs: " << $$->text << "(" << $$ << ")" << endl;
|
---|
378 | $1->left = $2;
|
---|
379 | $$->down = $1;
|
---|
380 | }
|
---|
381 | ;
|
---|
382 |
|
---|
383 | rhs :
|
---|
384 | // empty
|
---|
385 | {
|
---|
386 | //cerr << "rhs1: " << $<tokenp>0->text << "(" << $<tokenp>0 << ")" << endl;
|
---|
387 | rulestart = new Token( "rule", _RULE );
|
---|
388 | rulestart->down = $<tokenp>0; // initial lhs is already on the stack from "rules"
|
---|
389 | $$ = new Token( "rhs", _RHS );
|
---|
390 | //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl;
|
---|
391 | $<tokenp>0->left = $$;
|
---|
392 | }
|
---|
393 | | rhs lhs
|
---|
394 | {
|
---|
395 | //cerr << "rhs2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
|
---|
396 | Token *temp = new Token( "rule", _RULE );
|
---|
397 | rulestart->addLeftTail( temp );
|
---|
398 | temp->down = $2;
|
---|
399 | $$ = new Token( "rhs", _RHS );
|
---|
400 | //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl;
|
---|
401 | $2->left = $$;
|
---|
402 | }
|
---|
403 | | rhs ';' lhs
|
---|
404 | {
|
---|
405 | //cerr << "rhs3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl;
|
---|
406 | $1->addDownLeftTail( $2 );
|
---|
407 | Token *temp = new Token( "rule", _RULE );
|
---|
408 | rulestart->addLeftTail( temp );
|
---|
409 | temp->down = $3;
|
---|
410 | $$ = new Token( "rhs", _RHS );
|
---|
411 | //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl;
|
---|
412 | $3->left = $$;
|
---|
413 | }
|
---|
414 | | rhs prod
|
---|
415 | {
|
---|
416 | //cerr << "rhs4: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
|
---|
417 | $1->addDownLeftTail( $2 );
|
---|
418 | $$ = $1;
|
---|
419 | }
|
---|
420 | | rhs '|'
|
---|
421 | {
|
---|
422 | //cerr << "rhs5: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
|
---|
423 | $1->addDownLeftTail( $2 );
|
---|
424 | $$ = new Token( "rhs", _RHS );
|
---|
425 | $1->left = $$;
|
---|
426 | //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl;
|
---|
427 | }
|
---|
428 | ;
|
---|
429 |
|
---|
430 | prod :
|
---|
431 | action
|
---|
432 | | IDENTIFIER
|
---|
433 | | CHARACTER
|
---|
434 | | prec
|
---|
435 | ;
|
---|
436 |
|
---|
437 | prec :
|
---|
438 | PREC name
|
---|
439 | {
|
---|
440 | //cerr << "prec: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
|
---|
441 | $1->left = $2;
|
---|
442 | $$ = new Token( "prec", _PREC );
|
---|
443 | $$->down = $1;
|
---|
444 | }
|
---|
445 | ;
|
---|
446 |
|
---|
447 | action :
|
---|
448 | '{'
|
---|
449 | { lexC(); }
|
---|
450 | ccode_opt
|
---|
451 | {
|
---|
452 | // Remove the trailing '}' added in lex.
|
---|
453 | string temp( lexYacc() );
|
---|
454 | $<tokenp>$ = new Token( temp.substr( 0, temp.length() - 1 ), CODE );
|
---|
455 | }
|
---|
456 | '}'
|
---|
457 | {
|
---|
458 | $1->left = $<tokenp>4;
|
---|
459 | $<tokenp>4->left = $5;
|
---|
460 | $$ = new Token( "action", _ACTION );
|
---|
461 | $$->down = $1;
|
---|
462 | }
|
---|
463 | ;
|
---|
464 |
|
---|
465 | usersection_opt :
|
---|
466 | // empty
|
---|
467 | {
|
---|
468 | //cerr << "usersection_opt" << endl;
|
---|
469 | // attach remaining WS to fictitious code
|
---|
470 | Token *temp = new Token( "", ws_list, CODE );
|
---|
471 | $$ = new Token( "usersection_opt", _USERSECTION_OPT );
|
---|
472 | $$->down = temp;
|
---|
473 | }
|
---|
474 | | MARK
|
---|
475 | { lexC(); }
|
---|
476 | ccode_opt
|
---|
477 | {
|
---|
478 | Token *temp = new Token( lexYacc(), CODE );
|
---|
479 | //cerr << "usersection_opt: " << $1->text << " " << temp->text << endl;
|
---|
480 | $1->left = temp;
|
---|
481 | $$ = new Token( "usersection_opt", _USERSECTION_OPT );
|
---|
482 | $$->down = $1;
|
---|
483 | }
|
---|
484 | ;
|
---|
485 |
|
---|
486 | ccode_opt :
|
---|
487 | // empty
|
---|
488 | {}
|
---|
489 | | blocks
|
---|
490 | ;
|
---|
491 |
|
---|
492 | // This rule matches internal braces "{}" in C code to the level of the braces of a union/action. These internal braces
|
---|
493 | // are returned as Tokens from the lexer but are unused because the braces are already concatenated into the code string
|
---|
494 | // built by the lexer. Therefore, the tokens for the braces are immediately deleted.
|
---|
495 |
|
---|
496 | blocks :
|
---|
497 | '{'
|
---|
498 | { delete $1; }
|
---|
499 | ccode_opt '}'
|
---|
500 | { delete $4; }
|
---|
501 | | blocks '{'
|
---|
502 | { delete $2; }
|
---|
503 | ccode_opt '}'
|
---|
504 | { delete $5; }
|
---|
505 | ;
|
---|
506 | %%
|
---|
507 |
|
---|
508 | // Local Variables: //
|
---|
509 | // mode: c++ //
|
---|
510 | // tab-width: 4 //
|
---|
511 | // compile-command: "make install" //
|
---|
512 | // End: //
|
---|