source: tools/prettyprinter/parser.yy@ 7a230fd

ADT arm-eh ast-experimental cleanup-dtors enum forall-pointer-decay jacob/cs343-translation jenkins-sandbox new-ast new-ast-unique-expr pthread-emulation qualifiedEnum
Last change on this file since 7a230fd was 81bb114, checked in by Peter A. Buhr <pabuhr@…>, 7 years ago

update to support more bison directives

  • Property mode set to 100644
File size: 11.9 KB
Line 
1//
2// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
3//
4// The contents of this file are covered under the licence agreement in the
5// file "LICENCE" distributed with Cforall.
6//
7// parser.yy --
8//
9// Author : Rodolfo G. Esteves
10// Created On : Sat Dec 15 13:44:21 2001
11// Last Modified By : Peter A. Buhr
12// Last Modified On : Sun Apr 15 21:40:30 2018
13// Update Count : 1052
14//
15
16%{
17#define YYDEBUG_LEXER_TEXT( yylval ) // lexer loads this up each time
18#define YYDEBUG 1 // get the pretty debugging code to compile
19
20#include <iostream>
21using namespace std;
22#include "ParserTypes.h"
23#include "filter.h"
24
25extern list<string> ws_list; // lex variable containing accumulated whitespace
26void lexC( void );
27string lexYacc( void );
28
29void yyerror( string s ) {
30 extern int yylineno;
31
32 cerr << "Error in line: " << yylineno << ": " << s << endl;
33 return;
34}
35
36Token *declstart;
37Token *rulestart;
38Token *nameliststart;
39%}
40
41%union {
42 Token *tokenp;
43}
44
45%token<tokenp> ','
46%token<tokenp> '<'
47%token<tokenp> '>'
48%token<tokenp> '{'
49%token<tokenp> '}'
50%token<tokenp> ':'
51%token<tokenp> ';'
52%token<tokenp> '|'
53
54%token<tokenp> MARK // %%
55%token<tokenp> LCURL // %{
56%token<tokenp> RCURL // %}
57
58%token<tokenp> INTEGER // integer constant
59%token<tokenp> CHARACTER // character constant
60%token<tokenp> IDENTIFIER // identifier
61%token<tokenp> CODE // C code
62
63%token<tokenp> DEFINE // %define
64%token<tokenp> EXPECT // %expect
65%token<tokenp> LEFT // %left
66%token<tokenp> LOCATIONS // %locations
67%token<tokenp> NONASSOC // %nonassoc
68%token<tokenp> PRECEDENCE // %precedence
69%token<tokenp> PURE_PARSER // %pure_parser
70%token<tokenp> RIGHT // %right
71%token<tokenp> SEMANTIC_PARSER // %semantic_parser
72%token<tokenp> START // %start
73%token<tokenp> THONG // %thong
74%token<tokenp> TOKEN // %token
75%token<tokenp> TYPE // %type
76%token<tokenp> UNION // %union
77
78%token<tokenp> PREC // %prec
79
80%token END_TERMINALS // ALL TERMINAL TOKEN NAMES MUST APPEAR BEFORE THIS
81
82%type<tokenp> sections
83%token _SECTIONS
84%type<tokenp> mark
85%type<tokenp> defsection_opt
86%token _DEFSECTION_OPT
87%type<tokenp> declarations
88%type<tokenp> literalblock
89%token _LITERALBLOCK
90%type<tokenp> declaration
91%token _DECLARATION
92%type<tokenp> union
93%type<tokenp> rword
94%type<tokenp> tag_opt
95%token _TAG_OPT
96%type<tokenp> namenolist
97%token _NAMENOLIST
98%type<tokenp> nameno
99%token _NAMENO
100%type<tokenp> namelist
101%token _NAMELIST
102%type<tokenp> name
103%type<tokenp> rulesection
104%token _RULESECTION
105%type<tokenp> rules
106%token _RULE
107%type<tokenp> lhs
108%token _LHS
109%type<tokenp> rhs
110%token _RHS
111%type<tokenp> prod
112%type<tokenp> prec
113%token _PREC
114%type<tokenp> action
115%token _ACTION
116%type<tokenp> usersection_opt
117%token _USERSECTION_OPT
118%type<tokenp> ccode_opt
119%type<tokenp> blocks
120
121%start grammar
122
123%%
124grammar :
125 sections
126 {
127 filter( $1 ); // filter parse tree
128 freeTree( $1 ); // free parse-tree storage (optional: used with purify)
129 }
130 ;
131
132sections :
133 defsection_opt mark rulesection usersection_opt
134 {
135 $$ = new Token( "sections", _SECTIONS );
136 $1->left = $2;
137 $2->left = $3;
138 $3->left = $4;
139 $$->down = $1;
140 }
141 ;
142
143mark :
144 MARK
145 | error // missing %%
146 {
147 cerr << "no input grammar, missing %% mark" << endl;
148 exit( -1 );
149 }
150 ;
151
152defsection_opt :
153 // empty
154 {
155 //cerr << "defsection_opt1: " << endl;
156 $$ = new Token( "declaration_opt", _DEFSECTION_OPT );
157 }
158 | declarations
159 {
160 //cerr << "defsection_opt2: " << $1->text << "(" << $1 << ")" << endl;
161 $$ = new Token( "declaration_opt", _DEFSECTION_OPT );
162 $$->down = declstart;
163 }
164 ;
165
166declarations :
167 literalblock
168 {
169 //cerr << "declarations1: " << $1->text << "(" << $1 << ")" << endl;
170 $$ = declstart = $1;
171 }
172 | declaration
173 {
174 //cerr << "declarations2: " << $1->text << "(" << $1 << ")" << endl;
175 $$ = declstart = new Token( "declaration", _DECLARATION );
176 $$->down = $1;
177 }
178 | declarations literalblock
179 {
180 //cerr << "declarations3: "<< $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
181 $1->left = $2;
182 $$ = $2;
183 }
184 | declarations declaration
185 {
186 //cerr << "declarations4: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
187 $$ = new Token( "declaration", _DECLARATION );
188 $1->left = $$;
189 $$->down = $2;
190 }
191 ;
192
193literalblock :
194 LCURL
195 { lexC(); }
196 ccode_opt
197 { $<tokenp>$ = new Token( lexYacc(), CODE ); }
198 RCURL
199 {
200 //cerr << "literalblock: " << $1->text << "(" << $1 << ") " << $<tokenp>4->text << " " << $5->text << "(" << $5 << ")" << endl;
201 $1->left = $<tokenp>4;
202 $<tokenp>4->left = $5;
203 $$ = new Token( "literalblock", _LITERALBLOCK );
204 $$->down = $1;
205 }
206 ;
207
208declaration :
209 union
210 | START IDENTIFIER
211 {
212 $1->left = $2;
213 $$ = $1;
214 }
215 | rword tag_opt namenolist
216 {
217 Token *n = new Token( "namenolist", _NAMENOLIST );
218 n->down = nameliststart;
219 $1->left = $2;
220 $2->left = n;
221 $$ = $1;
222 }
223 | TYPE tag_opt namelist
224 {
225 Token *n = new Token( "namelist", _NAMELIST );
226 n->down = nameliststart;
227 $1->left = $2;
228 $2->left = n;
229 $$ = $1;
230 }
231 | PURE_PARSER
232 | SEMANTIC_PARSER
233 | EXPECT INTEGER // bison
234 {
235 $1->left = $2;
236 $$ = $1;
237 }
238 | DEFINE // bison
239 | LOCATIONS
240 | THONG // bison
241 ;
242
243union :
244 UNION '{'
245 { lexC(); }
246 ccode_opt
247 {
248 // Remove the trailing '}' which is added in lex.
249 string temp( lexYacc() );
250 $<tokenp>$ = new Token( temp.substr( 0, temp.length() - 1 ), CODE );
251 }
252 '}'
253 {
254 $1->left = $2;
255 $2->left = $<tokenp>5;
256 $<tokenp>5->left = $6;
257 $$ = $1;
258 }
259 ;
260
261rword :
262 TOKEN
263 | LEFT
264 | RIGHT
265 | NONASSOC
266 | PRECEDENCE
267 ;
268
269tag_opt :
270 // empty
271 {
272 //cerr << "tag_opt" << endl;
273 $$ = new Token( "tag_opt", _TAG_OPT );
274 }
275 | '<' IDENTIFIER '>'
276 {
277 $1->left = $2;
278 $2->left = $3;
279 $$ = new Token( "tag_opt", _TAG_OPT );
280 $$->down = $1;
281 }
282 ;
283
284namenolist :
285 nameno
286 {
287 //cerr << "namenolist1: " << $1->text << "(" << $1 << ")" << endl;
288 $$ = nameliststart = $1;
289 }
290 | namenolist nameno
291 {
292 //cerr << "namenolist2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
293 $1->left = $2;
294 $$ = $2;
295 }
296 | namenolist ',' nameno
297 {
298 //cerr << "namenolist3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl;
299 $1->left = $2;
300 $2->left = $3;
301 $$ = $3;
302 }
303 ;
304
305nameno :
306 name
307 {
308 $$ = new Token( "nameno", _NAMENO );
309 $$->down = $1;
310 }
311 | name INTEGER
312 {
313 $$ = new Token( "nameno", _NAMENO );
314 $1->left = $2;
315 $$->down = $1;
316 }
317 ;
318
319namelist :
320 name
321 {
322 //cerr << "namelist1: " << $1->text << "(" << $1 << ")" << endl;
323 $$ = nameliststart = $1;
324 }
325 | namelist name
326 {
327 //cerr << "namelist2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
328 $1->left = $2;
329 $$ = $2;
330 }
331 | namelist ',' name
332 {
333 //cerr << "namelist3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl;
334 $1->left = $2;
335 $2->left = $3;
336 $$ = $3;
337 }
338 ;
339
340name :
341 IDENTIFIER
342 | CHARACTER
343 ;
344
345rulesection :
346 rules
347 {
348 //cerr << "rulesection1: " << $1->text << "(" << $1 << ")" << endl;
349 $$ = new Token( "rulesection", _RULESECTION );
350 $$->down = $1;
351 }
352 | error // no rules
353 {
354 cerr << "no rules in the input grammar" << endl;
355 exit( -1 );
356 }
357 ;
358
359// These grammar rules are complex because the Yacc language is LR(2) due to the optional ';' at the end of rules. The
360// following rules convert the LR(2) grammar into LR(1) by lengthening the rules to allow sufficient look
361// ahead. Unfortunately, this change makes handling the semantic actions more complex because there are two lists
362// (rules, rhs) being built but only one list tail can be returned through $$ for chaining.
363
364rules :
365 lhs rhs
366 {
367 //cerr << "rules1: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
368 $$ = rulestart;
369 }
370 | lhs rhs ';'
371 {
372 //cerr << "rules2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl;
373 $2->addDownLeftTail( $3 );
374 $$ = rulestart;
375 }
376 ;
377
378lhs :
379 IDENTIFIER ':'
380 {
381 //cerr << "lhs: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
382 $$ = new Token( "lhs", _LHS );
383 //cerr << " lhs: " << $$->text << "(" << $$ << ")" << endl;
384 $1->left = $2;
385 $$->down = $1;
386 }
387 ;
388
389rhs :
390 // empty
391 {
392 //cerr << "rhs1: " << $<tokenp>0->text << "(" << $<tokenp>0 << ")" << endl;
393 rulestart = new Token( "rule", _RULE );
394 rulestart->down = $<tokenp>0; // initial lhs is already on the stack from "rules"
395 $$ = new Token( "rhs", _RHS );
396 //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl;
397 $<tokenp>0->left = $$;
398 }
399 | rhs lhs
400 {
401 //cerr << "rhs2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
402 Token *temp = new Token( "rule", _RULE );
403 rulestart->addLeftTail( temp );
404 temp->down = $2;
405 $$ = new Token( "rhs", _RHS );
406 //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl;
407 $2->left = $$;
408 }
409 | rhs ';' lhs
410 {
411 //cerr << "rhs3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl;
412 $1->addDownLeftTail( $2 );
413 Token *temp = new Token( "rule", _RULE );
414 rulestart->addLeftTail( temp );
415 temp->down = $3;
416 $$ = new Token( "rhs", _RHS );
417 //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl;
418 $3->left = $$;
419 }
420 | rhs prod
421 {
422 //cerr << "rhs4: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
423 $1->addDownLeftTail( $2 );
424 $$ = $1;
425 }
426 | rhs '|'
427 {
428 //cerr << "rhs5: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
429 $1->addDownLeftTail( $2 );
430 $$ = new Token( "rhs", _RHS );
431 $1->left = $$;
432 //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl;
433 }
434 ;
435
436prod :
437 action
438 | IDENTIFIER
439 | CHARACTER
440 | prec
441 ;
442
443prec :
444 PREC name
445 {
446 //cerr << "prec: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
447 $1->left = $2;
448 $$ = new Token( "prec", _PREC );
449 $$->down = $1;
450 }
451 ;
452
453action :
454 '{'
455 { lexC(); }
456 ccode_opt
457 {
458 // Remove the trailing '}' added in lex.
459 string temp( lexYacc() );
460 $<tokenp>$ = new Token( temp.substr( 0, temp.length() - 1 ), CODE );
461 }
462 '}'
463 {
464 $1->left = $<tokenp>4;
465 $<tokenp>4->left = $5;
466 $$ = new Token( "action", _ACTION );
467 $$->down = $1;
468 }
469 ;
470
471usersection_opt :
472 // empty
473 {
474 //cerr << "usersection_opt" << endl;
475 // attach remaining WS to fictitious code
476 Token *temp = new Token( "", ws_list, CODE );
477 $$ = new Token( "usersection_opt", _USERSECTION_OPT );
478 $$->down = temp;
479 }
480 | MARK
481 { lexC(); }
482 ccode_opt
483 {
484 Token *temp = new Token( lexYacc(), CODE );
485 //cerr << "usersection_opt: " << $1->text << " " << temp->text << endl;
486 $1->left = temp;
487 $$ = new Token( "usersection_opt", _USERSECTION_OPT );
488 $$->down = $1;
489 }
490 ;
491
492ccode_opt :
493 // empty
494 {}
495 | blocks
496 ;
497
498// This rule matches internal braces "{}" in C code to the level of the braces of a union/action. These internal braces
499// are returned as Tokens from the lexer but are unused because the braces are already concatenated into the code string
500// built by the lexer. Therefore, the tokens for the braces are immediately deleted.
501
502blocks :
503 '{'
504 { delete $1; }
505 ccode_opt '}'
506 { delete $4; }
507 | blocks '{'
508 { delete $2; }
509 ccode_opt '}'
510 { delete $5; }
511 ;
512%%
513
514// Local Variables: //
515// mode: c++ //
516// tab-width: 4 //
517// compile-command: "make install" //
518// End: //
Note: See TracBrowser for help on using the repository browser.