source: tools/prettyprinter/parser.yy@ ac10576

ADT aaron-thesis arm-eh ast-experimental cleanup-dtors deferred_resn demangler enum forall-pointer-decay jacob/cs343-translation jenkins-sandbox new-ast new-ast-unique-expr new-env no_list persistent-indexer pthread-emulation qualifiedEnum resolv-new with_gc
Last change on this file since ac10576 was 8c97ee7, checked in by Thierry Delisle <tdelisle@…>, 8 years ago

Fixed the pretty printer

  • Property mode set to 100644
File size: 11.7 KB
Line 
1//
2// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
3//
4// The contents of this file are covered under the licence agreement in the
5// file "LICENCE" distributed with Cforall.
6//
7// parser.yy --
8//
9// Author : Rodolfo G. Esteves
10// Created On : Sat Dec 15 13:44:21 2001
11// Last Modified By : Peter A. Buhr
12// Last Modified On : Thu Jun 29 09:26:47 2017
13// Update Count : 1045
14//
15
16%{
17#define YYDEBUG_LEXER_TEXT( yylval ) // lexer loads this up each time
18#define YYDEBUG 1 // get the pretty debugging code to compile
19
20#include <iostream>
21using namespace std;
22#include "ParserTypes.h"
23#include "filter.h"
24
25extern list<string> ws_list; // lex variable containing accumulated whitespace
26void lexC( void );
27string lexYacc( void );
28
29void yyerror( string s ) {
30 extern int yylineno;
31
32 cerr << "Error in line: " << yylineno << ": " << s << endl;
33 return;
34}
35
36Token *declstart;
37Token *rulestart;
38Token *nameliststart;
39%}
40
41%union {
42 Token *tokenp;
43}
44
45%token<tokenp> ','
46%token<tokenp> '<'
47%token<tokenp> '>'
48%token<tokenp> '{'
49%token<tokenp> '}'
50%token<tokenp> ':'
51%token<tokenp> ';'
52%token<tokenp> '|'
53
54%token<tokenp> MARK // %%
55%token<tokenp> LCURL // %{
56%token<tokenp> RCURL // %}
57
58%token<tokenp> INTEGER // integer constant
59%token<tokenp> CHARACTER // character constant
60%token<tokenp> IDENTIFIER // identifier
61%token<tokenp> CODE // C code
62
63%token<tokenp> START // %start
64%token<tokenp> UNION // %union
65%token<tokenp> TOKEN // %token
66%token<tokenp> LEFT // %left
67%token<tokenp> RIGHT // %right
68%token<tokenp> NONASSOC // %nonassoc
69%token<tokenp> TYPE // %type
70%token<tokenp> PURE_PARSER // %pure_parser
71%token<tokenp> SEMANTIC_PARSER // %semantic_parser
72%token<tokenp> EXPECT // %expect
73%token<tokenp> THONG // %thong
74
75%token<tokenp> PREC // %prec
76
77%token END_TERMINALS // ALL TERMINAL TOKEN NAMES MUST APPEAR BEFORE THIS
78
79%type<tokenp> sections
80%token _SECTIONS
81%type<tokenp> mark
82%type<tokenp> defsection_opt
83%token _DEFSECTION_OPT
84%type<tokenp> declarations
85%type<tokenp> literalblock
86%token _LITERALBLOCK
87%type<tokenp> declaration
88%token _DECLARATION
89%type<tokenp> union
90%type<tokenp> rword
91%type<tokenp> tag_opt
92%token _TAG_OPT
93%type<tokenp> namenolist
94%token _NAMENOLIST
95%type<tokenp> nameno
96%token _NAMENO
97%type<tokenp> namelist
98%token _NAMELIST
99%type<tokenp> name
100%type<tokenp> rulesection
101%token _RULESECTION
102%type<tokenp> rules
103%token _RULE
104%type<tokenp> lhs
105%token _LHS
106%type<tokenp> rhs
107%token _RHS
108%type<tokenp> prod
109%type<tokenp> prec
110%token _PREC
111%type<tokenp> action
112%token _ACTION
113%type<tokenp> usersection_opt
114%token _USERSECTION_OPT
115%type<tokenp> ccode_opt
116%type<tokenp> blocks
117
118%start grammar
119
120%%
121grammar :
122 sections
123 {
124 filter( $1 ); // filter parse tree
125 freeTree( $1 ); // free parse-tree storage (optional: used with purify)
126 }
127 ;
128
129sections :
130 defsection_opt mark rulesection usersection_opt
131 {
132 $$ = new Token( "sections", _SECTIONS );
133 $1->left = $2;
134 $2->left = $3;
135 $3->left = $4;
136 $$->down = $1;
137 }
138 ;
139
140mark :
141 MARK
142 | error // missing %%
143 {
144 cerr << "no input grammar, missing %% mark" << endl;
145 exit( -1 );
146 }
147 ;
148
149defsection_opt :
150 // empty
151 {
152 //cerr << "defsection_opt1: " << endl;
153 $$ = new Token( "declaration_opt", _DEFSECTION_OPT );
154 }
155 | declarations
156 {
157 //cerr << "defsection_opt2: " << $1->text << "(" << $1 << ")" << endl;
158 $$ = new Token( "declaration_opt", _DEFSECTION_OPT );
159 $$->down = declstart;
160 }
161 ;
162
163declarations :
164 literalblock
165 {
166 //cerr << "declarations1: " << $1->text << "(" << $1 << ")" << endl;
167 $$ = declstart = $1;
168 }
169 | declaration
170 {
171 //cerr << "declarations2: " << $1->text << "(" << $1 << ")" << endl;
172 $$ = declstart = new Token( "declaration", _DECLARATION );
173 $$->down = $1;
174 }
175 | declarations literalblock
176 {
177 //cerr << "declarations3: "<< $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
178 $1->left = $2;
179 $$ = $2;
180 }
181 | declarations declaration
182 {
183 //cerr << "declarations4: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
184 $$ = new Token( "declaration", _DECLARATION );
185 $1->left = $$;
186 $$->down = $2;
187 }
188 ;
189
190literalblock :
191 LCURL
192 { lexC(); }
193 ccode_opt
194 { $<tokenp>$ = new Token( lexYacc(), CODE ); }
195 RCURL
196 {
197 //cerr << "literalblock: " << $1->text << "(" << $1 << ") " << $<tokenp>4->text << " " << $5->text << "(" << $5 << ")" << endl;
198 $1->left = $<tokenp>4;
199 $<tokenp>4->left = $5;
200 $$ = new Token( "literalblock", _LITERALBLOCK );
201 $$->down = $1;
202 }
203 ;
204
205declaration :
206 union
207 | START IDENTIFIER
208 {
209 $1->left = $2;
210 $$ = $1;
211 }
212 | rword tag_opt namenolist
213 {
214 Token *n = new Token( "namenolist", _NAMENOLIST );
215 n->down = nameliststart;
216 $1->left = $2;
217 $2->left = n;
218 $$ = $1;
219 }
220 | TYPE tag_opt namelist
221 {
222 Token *n = new Token( "namelist", _NAMELIST );
223 n->down = nameliststart;
224 $1->left = $2;
225 $2->left = n;
226 $$ = $1;
227 }
228 | PURE_PARSER
229 | SEMANTIC_PARSER
230 | EXPECT INTEGER // bison
231 {
232 $1->left = $2;
233 $$ = $1;
234 }
235 | THONG // bison
236 ;
237
238union :
239 UNION '{'
240 { lexC(); }
241 ccode_opt
242 {
243 // Remove the trailing '}' which is added in lex.
244 string temp( lexYacc() );
245 $<tokenp>$ = new Token( temp.substr( 0, temp.length() - 1 ), CODE );
246 }
247 '}'
248 {
249 $1->left = $2;
250 $2->left = $<tokenp>5;
251 $<tokenp>5->left = $6;
252 $$ = $1;
253 }
254 ;
255
256rword :
257 TOKEN
258 | LEFT
259 | RIGHT
260 | NONASSOC
261 ;
262
263tag_opt :
264 // empty
265 {
266 //cerr << "tag_opt" << endl;
267 $$ = new Token( "tag_opt", _TAG_OPT );
268 }
269 | '<' IDENTIFIER '>'
270 {
271 $1->left = $2;
272 $2->left = $3;
273 $$ = new Token( "tag_opt", _TAG_OPT );
274 $$->down = $1;
275 }
276 ;
277
278namenolist :
279 nameno
280 {
281 //cerr << "namenolist1: " << $1->text << "(" << $1 << ")" << endl;
282 $$ = nameliststart = $1;
283 }
284 | namenolist nameno
285 {
286 //cerr << "namenolist2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
287 $1->left = $2;
288 $$ = $2;
289 }
290 | namenolist ',' nameno
291 {
292 //cerr << "namenolist3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl;
293 $1->left = $2;
294 $2->left = $3;
295 $$ = $3;
296 }
297 ;
298
299nameno :
300 name
301 {
302 $$ = new Token( "nameno", _NAMENO );
303 $$->down = $1;
304 }
305 | name INTEGER
306 {
307 $$ = new Token( "nameno", _NAMENO );
308 $1->left = $2;
309 $$->down = $1;
310 }
311 ;
312
313namelist :
314 name
315 {
316 //cerr << "namelist1: " << $1->text << "(" << $1 << ")" << endl;
317 $$ = nameliststart = $1;
318 }
319 | namelist name
320 {
321 //cerr << "namelist2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
322 $1->left = $2;
323 $$ = $2;
324 }
325 | namelist ',' name
326 {
327 //cerr << "namelist3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl;
328 $1->left = $2;
329 $2->left = $3;
330 $$ = $3;
331 }
332 ;
333
334name :
335 IDENTIFIER
336 | CHARACTER
337 ;
338
339rulesection :
340 rules
341 {
342 //cerr << "rulesection1: " << $1->text << "(" << $1 << ")" << endl;
343 $$ = new Token( "rulesection", _RULESECTION );
344 $$->down = $1;
345 }
346 | error // no rules
347 {
348 cerr << "no rules in the input grammar" << endl;
349 exit( -1 );
350 }
351 ;
352
353// These grammar rules are complex because the Yacc language is LR(2) due to the optional ';' at the end of rules. The
354// following rules convert the LR(2) grammar into LR(1) by lengthening the rules to allow sufficient look
355// ahead. Unfortunately, this change makes handling the semantic actions more complex because there are two lists
356// (rules, rhs) being built but only one list tail can be returned through $$ for chaining.
357
358rules :
359 lhs rhs
360 {
361 //cerr << "rules1: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
362 $$ = rulestart;
363 }
364 | lhs rhs ';'
365 {
366 //cerr << "rules2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl;
367 $2->addDownLeftTail( $3 );
368 $$ = rulestart;
369 }
370 ;
371
372lhs :
373 IDENTIFIER ':'
374 {
375 //cerr << "lhs: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
376 $$ = new Token( "lhs", _LHS );
377 //cerr << " lhs: " << $$->text << "(" << $$ << ")" << endl;
378 $1->left = $2;
379 $$->down = $1;
380 }
381 ;
382
383rhs :
384 // empty
385 {
386 //cerr << "rhs1: " << $<tokenp>0->text << "(" << $<tokenp>0 << ")" << endl;
387 rulestart = new Token( "rule", _RULE );
388 rulestart->down = $<tokenp>0; // initial lhs is already on the stack from "rules"
389 $$ = new Token( "rhs", _RHS );
390 //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl;
391 $<tokenp>0->left = $$;
392 }
393 | rhs lhs
394 {
395 //cerr << "rhs2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
396 Token *temp = new Token( "rule", _RULE );
397 rulestart->addLeftTail( temp );
398 temp->down = $2;
399 $$ = new Token( "rhs", _RHS );
400 //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl;
401 $2->left = $$;
402 }
403 | rhs ';' lhs
404 {
405 //cerr << "rhs3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl;
406 $1->addDownLeftTail( $2 );
407 Token *temp = new Token( "rule", _RULE );
408 rulestart->addLeftTail( temp );
409 temp->down = $3;
410 $$ = new Token( "rhs", _RHS );
411 //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl;
412 $3->left = $$;
413 }
414 | rhs prod
415 {
416 //cerr << "rhs4: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
417 $1->addDownLeftTail( $2 );
418 $$ = $1;
419 }
420 | rhs '|'
421 {
422 //cerr << "rhs5: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
423 $1->addDownLeftTail( $2 );
424 $$ = new Token( "rhs", _RHS );
425 $1->left = $$;
426 //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl;
427 }
428 ;
429
430prod :
431 action
432 | IDENTIFIER
433 | CHARACTER
434 | prec
435 ;
436
437prec :
438 PREC name
439 {
440 //cerr << "prec: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
441 $1->left = $2;
442 $$ = new Token( "prec", _PREC );
443 $$->down = $1;
444 }
445 ;
446
447action :
448 '{'
449 { lexC(); }
450 ccode_opt
451 {
452 // Remove the trailing '}' added in lex.
453 string temp( lexYacc() );
454 $<tokenp>$ = new Token( temp.substr( 0, temp.length() - 1 ), CODE );
455 }
456 '}'
457 {
458 $1->left = $<tokenp>4;
459 $<tokenp>4->left = $5;
460 $$ = new Token( "action", _ACTION );
461 $$->down = $1;
462 }
463 ;
464
465usersection_opt :
466 // empty
467 {
468 //cerr << "usersection_opt" << endl;
469 // attach remaining WS to fictitious code
470 Token *temp = new Token( "", ws_list, CODE );
471 $$ = new Token( "usersection_opt", _USERSECTION_OPT );
472 $$->down = temp;
473 }
474 | MARK
475 { lexC(); }
476 ccode_opt
477 {
478 Token *temp = new Token( lexYacc(), CODE );
479 //cerr << "usersection_opt: " << $1->text << " " << temp->text << endl;
480 $1->left = temp;
481 $$ = new Token( "usersection_opt", _USERSECTION_OPT );
482 $$->down = $1;
483 }
484 ;
485
486ccode_opt :
487 // empty
488 {}
489 | blocks
490 ;
491
492// This rule matches internal braces "{}" in C code to the level of the braces of a union/action. These internal braces
493// are returned as Tokens from the lexer but are unused because the braces are already concatenated into the code string
494// built by the lexer. Therefore, the tokens for the braces are immediately deleted.
495
496blocks :
497 '{'
498 { delete $1; }
499 ccode_opt '}'
500 { delete $4; }
501 | blocks '{'
502 { delete $2; }
503 ccode_opt '}'
504 { delete $5; }
505 ;
506%%
507
508// Local Variables: //
509// mode: c++ //
510// tab-width: 4 //
511// compile-command: "make install" //
512// End: //
Note: See TracBrowser for help on using the repository browser.