source: tools/prettyprinter/parser.yy@ 02c816fc

ADT aaron-thesis arm-eh ast-experimental cleanup-dtors deferred_resn demangler enum forall-pointer-decay jacob/cs343-translation jenkins-sandbox new-ast new-ast-unique-expr new-env no_list persistent-indexer pthread-emulation qualifiedEnum resolv-new with_gc
Last change on this file since 02c816fc was fc1ef62, checked in by Peter A. Buhr <pabuhr@…>, 8 years ago

add %precedence operator precedence

  • Property mode set to 100644
File size: 11.7 KB
Line 
1//
2// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
3//
4// The contents of this file are covered under the licence agreement in the
5// file "LICENCE" distributed with Cforall.
6//
7// parser.yy --
8//
9// Author : Rodolfo G. Esteves
10// Created On : Sat Dec 15 13:44:21 2001
11// Last Modified By : Peter A. Buhr
12// Last Modified On : Tue Aug 29 16:34:10 2017
13// Update Count : 1047
14//
15
16%{
17#define YYDEBUG_LEXER_TEXT( yylval ) // lexer loads this up each time
18#define YYDEBUG 1 // get the pretty debugging code to compile
19
20#include <iostream>
21using namespace std;
22#include "ParserTypes.h"
23#include "filter.h"
24
25extern list<string> ws_list; // lex variable containing accumulated whitespace
26void lexC( void );
27string lexYacc( void );
28
29void yyerror( string s ) {
30 extern int yylineno;
31
32 cerr << "Error in line: " << yylineno << ": " << s << endl;
33 return;
34}
35
36Token *declstart;
37Token *rulestart;
38Token *nameliststart;
39%}
40
41%union {
42 Token *tokenp;
43}
44
45%token<tokenp> ','
46%token<tokenp> '<'
47%token<tokenp> '>'
48%token<tokenp> '{'
49%token<tokenp> '}'
50%token<tokenp> ':'
51%token<tokenp> ';'
52%token<tokenp> '|'
53
54%token<tokenp> MARK // %%
55%token<tokenp> LCURL // %{
56%token<tokenp> RCURL // %}
57
58%token<tokenp> INTEGER // integer constant
59%token<tokenp> CHARACTER // character constant
60%token<tokenp> IDENTIFIER // identifier
61%token<tokenp> CODE // C code
62
63%token<tokenp> START // %start
64%token<tokenp> UNION // %union
65%token<tokenp> TOKEN // %token
66%token<tokenp> LEFT // %left
67%token<tokenp> RIGHT // %right
68%token<tokenp> NONASSOC // %nonassoc
69%token<tokenp> PRECEDENCE // %precedence
70%token<tokenp> TYPE // %type
71%token<tokenp> PURE_PARSER // %pure_parser
72%token<tokenp> SEMANTIC_PARSER // %semantic_parser
73%token<tokenp> EXPECT // %expect
74%token<tokenp> THONG // %thong
75
76%token<tokenp> PREC // %prec
77
78%token END_TERMINALS // ALL TERMINAL TOKEN NAMES MUST APPEAR BEFORE THIS
79
80%type<tokenp> sections
81%token _SECTIONS
82%type<tokenp> mark
83%type<tokenp> defsection_opt
84%token _DEFSECTION_OPT
85%type<tokenp> declarations
86%type<tokenp> literalblock
87%token _LITERALBLOCK
88%type<tokenp> declaration
89%token _DECLARATION
90%type<tokenp> union
91%type<tokenp> rword
92%type<tokenp> tag_opt
93%token _TAG_OPT
94%type<tokenp> namenolist
95%token _NAMENOLIST
96%type<tokenp> nameno
97%token _NAMENO
98%type<tokenp> namelist
99%token _NAMELIST
100%type<tokenp> name
101%type<tokenp> rulesection
102%token _RULESECTION
103%type<tokenp> rules
104%token _RULE
105%type<tokenp> lhs
106%token _LHS
107%type<tokenp> rhs
108%token _RHS
109%type<tokenp> prod
110%type<tokenp> prec
111%token _PREC
112%type<tokenp> action
113%token _ACTION
114%type<tokenp> usersection_opt
115%token _USERSECTION_OPT
116%type<tokenp> ccode_opt
117%type<tokenp> blocks
118
119%start grammar
120
121%%
122grammar :
123 sections
124 {
125 filter( $1 ); // filter parse tree
126 freeTree( $1 ); // free parse-tree storage (optional: used with purify)
127 }
128 ;
129
130sections :
131 defsection_opt mark rulesection usersection_opt
132 {
133 $$ = new Token( "sections", _SECTIONS );
134 $1->left = $2;
135 $2->left = $3;
136 $3->left = $4;
137 $$->down = $1;
138 }
139 ;
140
141mark :
142 MARK
143 | error // missing %%
144 {
145 cerr << "no input grammar, missing %% mark" << endl;
146 exit( -1 );
147 }
148 ;
149
150defsection_opt :
151 // empty
152 {
153 //cerr << "defsection_opt1: " << endl;
154 $$ = new Token( "declaration_opt", _DEFSECTION_OPT );
155 }
156 | declarations
157 {
158 //cerr << "defsection_opt2: " << $1->text << "(" << $1 << ")" << endl;
159 $$ = new Token( "declaration_opt", _DEFSECTION_OPT );
160 $$->down = declstart;
161 }
162 ;
163
164declarations :
165 literalblock
166 {
167 //cerr << "declarations1: " << $1->text << "(" << $1 << ")" << endl;
168 $$ = declstart = $1;
169 }
170 | declaration
171 {
172 //cerr << "declarations2: " << $1->text << "(" << $1 << ")" << endl;
173 $$ = declstart = new Token( "declaration", _DECLARATION );
174 $$->down = $1;
175 }
176 | declarations literalblock
177 {
178 //cerr << "declarations3: "<< $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
179 $1->left = $2;
180 $$ = $2;
181 }
182 | declarations declaration
183 {
184 //cerr << "declarations4: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
185 $$ = new Token( "declaration", _DECLARATION );
186 $1->left = $$;
187 $$->down = $2;
188 }
189 ;
190
191literalblock :
192 LCURL
193 { lexC(); }
194 ccode_opt
195 { $<tokenp>$ = new Token( lexYacc(), CODE ); }
196 RCURL
197 {
198 //cerr << "literalblock: " << $1->text << "(" << $1 << ") " << $<tokenp>4->text << " " << $5->text << "(" << $5 << ")" << endl;
199 $1->left = $<tokenp>4;
200 $<tokenp>4->left = $5;
201 $$ = new Token( "literalblock", _LITERALBLOCK );
202 $$->down = $1;
203 }
204 ;
205
206declaration :
207 union
208 | START IDENTIFIER
209 {
210 $1->left = $2;
211 $$ = $1;
212 }
213 | rword tag_opt namenolist
214 {
215 Token *n = new Token( "namenolist", _NAMENOLIST );
216 n->down = nameliststart;
217 $1->left = $2;
218 $2->left = n;
219 $$ = $1;
220 }
221 | TYPE tag_opt namelist
222 {
223 Token *n = new Token( "namelist", _NAMELIST );
224 n->down = nameliststart;
225 $1->left = $2;
226 $2->left = n;
227 $$ = $1;
228 }
229 | PURE_PARSER
230 | SEMANTIC_PARSER
231 | EXPECT INTEGER // bison
232 {
233 $1->left = $2;
234 $$ = $1;
235 }
236 | THONG // bison
237 ;
238
239union :
240 UNION '{'
241 { lexC(); }
242 ccode_opt
243 {
244 // Remove the trailing '}' which is added in lex.
245 string temp( lexYacc() );
246 $<tokenp>$ = new Token( temp.substr( 0, temp.length() - 1 ), CODE );
247 }
248 '}'
249 {
250 $1->left = $2;
251 $2->left = $<tokenp>5;
252 $<tokenp>5->left = $6;
253 $$ = $1;
254 }
255 ;
256
257rword :
258 TOKEN
259 | LEFT
260 | RIGHT
261 | NONASSOC
262 | PRECEDENCE
263 ;
264
265tag_opt :
266 // empty
267 {
268 //cerr << "tag_opt" << endl;
269 $$ = new Token( "tag_opt", _TAG_OPT );
270 }
271 | '<' IDENTIFIER '>'
272 {
273 $1->left = $2;
274 $2->left = $3;
275 $$ = new Token( "tag_opt", _TAG_OPT );
276 $$->down = $1;
277 }
278 ;
279
280namenolist :
281 nameno
282 {
283 //cerr << "namenolist1: " << $1->text << "(" << $1 << ")" << endl;
284 $$ = nameliststart = $1;
285 }
286 | namenolist nameno
287 {
288 //cerr << "namenolist2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
289 $1->left = $2;
290 $$ = $2;
291 }
292 | namenolist ',' nameno
293 {
294 //cerr << "namenolist3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl;
295 $1->left = $2;
296 $2->left = $3;
297 $$ = $3;
298 }
299 ;
300
301nameno :
302 name
303 {
304 $$ = new Token( "nameno", _NAMENO );
305 $$->down = $1;
306 }
307 | name INTEGER
308 {
309 $$ = new Token( "nameno", _NAMENO );
310 $1->left = $2;
311 $$->down = $1;
312 }
313 ;
314
315namelist :
316 name
317 {
318 //cerr << "namelist1: " << $1->text << "(" << $1 << ")" << endl;
319 $$ = nameliststart = $1;
320 }
321 | namelist name
322 {
323 //cerr << "namelist2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
324 $1->left = $2;
325 $$ = $2;
326 }
327 | namelist ',' name
328 {
329 //cerr << "namelist3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl;
330 $1->left = $2;
331 $2->left = $3;
332 $$ = $3;
333 }
334 ;
335
336name :
337 IDENTIFIER
338 | CHARACTER
339 ;
340
341rulesection :
342 rules
343 {
344 //cerr << "rulesection1: " << $1->text << "(" << $1 << ")" << endl;
345 $$ = new Token( "rulesection", _RULESECTION );
346 $$->down = $1;
347 }
348 | error // no rules
349 {
350 cerr << "no rules in the input grammar" << endl;
351 exit( -1 );
352 }
353 ;
354
355// These grammar rules are complex because the Yacc language is LR(2) due to the optional ';' at the end of rules. The
356// following rules convert the LR(2) grammar into LR(1) by lengthening the rules to allow sufficient look
357// ahead. Unfortunately, this change makes handling the semantic actions more complex because there are two lists
358// (rules, rhs) being built but only one list tail can be returned through $$ for chaining.
359
360rules :
361 lhs rhs
362 {
363 //cerr << "rules1: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
364 $$ = rulestart;
365 }
366 | lhs rhs ';'
367 {
368 //cerr << "rules2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl;
369 $2->addDownLeftTail( $3 );
370 $$ = rulestart;
371 }
372 ;
373
374lhs :
375 IDENTIFIER ':'
376 {
377 //cerr << "lhs: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
378 $$ = new Token( "lhs", _LHS );
379 //cerr << " lhs: " << $$->text << "(" << $$ << ")" << endl;
380 $1->left = $2;
381 $$->down = $1;
382 }
383 ;
384
385rhs :
386 // empty
387 {
388 //cerr << "rhs1: " << $<tokenp>0->text << "(" << $<tokenp>0 << ")" << endl;
389 rulestart = new Token( "rule", _RULE );
390 rulestart->down = $<tokenp>0; // initial lhs is already on the stack from "rules"
391 $$ = new Token( "rhs", _RHS );
392 //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl;
393 $<tokenp>0->left = $$;
394 }
395 | rhs lhs
396 {
397 //cerr << "rhs2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
398 Token *temp = new Token( "rule", _RULE );
399 rulestart->addLeftTail( temp );
400 temp->down = $2;
401 $$ = new Token( "rhs", _RHS );
402 //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl;
403 $2->left = $$;
404 }
405 | rhs ';' lhs
406 {
407 //cerr << "rhs3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl;
408 $1->addDownLeftTail( $2 );
409 Token *temp = new Token( "rule", _RULE );
410 rulestart->addLeftTail( temp );
411 temp->down = $3;
412 $$ = new Token( "rhs", _RHS );
413 //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl;
414 $3->left = $$;
415 }
416 | rhs prod
417 {
418 //cerr << "rhs4: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
419 $1->addDownLeftTail( $2 );
420 $$ = $1;
421 }
422 | rhs '|'
423 {
424 //cerr << "rhs5: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
425 $1->addDownLeftTail( $2 );
426 $$ = new Token( "rhs", _RHS );
427 $1->left = $$;
428 //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl;
429 }
430 ;
431
432prod :
433 action
434 | IDENTIFIER
435 | CHARACTER
436 | prec
437 ;
438
439prec :
440 PREC name
441 {
442 //cerr << "prec: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
443 $1->left = $2;
444 $$ = new Token( "prec", _PREC );
445 $$->down = $1;
446 }
447 ;
448
449action :
450 '{'
451 { lexC(); }
452 ccode_opt
453 {
454 // Remove the trailing '}' added in lex.
455 string temp( lexYacc() );
456 $<tokenp>$ = new Token( temp.substr( 0, temp.length() - 1 ), CODE );
457 }
458 '}'
459 {
460 $1->left = $<tokenp>4;
461 $<tokenp>4->left = $5;
462 $$ = new Token( "action", _ACTION );
463 $$->down = $1;
464 }
465 ;
466
467usersection_opt :
468 // empty
469 {
470 //cerr << "usersection_opt" << endl;
471 // attach remaining WS to fictitious code
472 Token *temp = new Token( "", ws_list, CODE );
473 $$ = new Token( "usersection_opt", _USERSECTION_OPT );
474 $$->down = temp;
475 }
476 | MARK
477 { lexC(); }
478 ccode_opt
479 {
480 Token *temp = new Token( lexYacc(), CODE );
481 //cerr << "usersection_opt: " << $1->text << " " << temp->text << endl;
482 $1->left = temp;
483 $$ = new Token( "usersection_opt", _USERSECTION_OPT );
484 $$->down = $1;
485 }
486 ;
487
488ccode_opt :
489 // empty
490 {}
491 | blocks
492 ;
493
494// This rule matches internal braces "{}" in C code to the level of the braces of a union/action. These internal braces
495// are returned as Tokens from the lexer but are unused because the braces are already concatenated into the code string
496// built by the lexer. Therefore, the tokens for the braces are immediately deleted.
497
498blocks :
499 '{'
500 { delete $1; }
501 ccode_opt '}'
502 { delete $4; }
503 | blocks '{'
504 { delete $2; }
505 ccode_opt '}'
506 { delete $5; }
507 ;
508%%
509
510// Local Variables: //
511// mode: c++ //
512// tab-width: 4 //
513// compile-command: "make install" //
514// End: //
Note: See TracBrowser for help on using the repository browser.