source: tools/prettyprinter/yacc.yy@ a67b60e

ADT aaron-thesis arm-eh ast-experimental cleanup-dtors deferred_resn demangler enum forall-pointer-decay jacob/cs343-translation jenkins-sandbox new-ast new-ast-unique-expr new-env no_list persistent-indexer pthread-emulation qualifiedEnum resolv-new with_gc
Last change on this file since a67b60e was a67b60e, checked in by Peter A. Buhr <pabuhr@…>, 8 years ago

rename files and adjust includes

  • Property mode set to 100644
File size: 12.0 KB
Line 
1/* -*- Mode: C -*-
2 *
3 * Pretty Printer, Copyright (C) Rodolfo G. Esteves and Peter A. Buhr 2001
4 * Permission is granted to copy this grammar and to use it within software systems.
5 * THIS GRAMMAR IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES.
6 *
7 * yacc.y --
8 *
9 * Author : Rodolfo G. Esteves
10 * Created On : Sat Dec 15 13:44:21 2001
11 * Last Modified By : Peter A. Buhr
12 * Last Modified On : Mon Jun 27 21:51:06 2016
13 * Update Count : 1028
14 */
15
16%{
17#include <stdio.h>
18#include "parse.h"
19#include "filter.h"
20
21#define YYDEBUG 1 // get the pretty debugging code to compile
22
23extern list<string> ws_list; // lex variable containing accumulated whitespace
24void lexC( void );
25string lexYacc( void );
26
27void yyerror( char *s ) {
28 extern int yylineno;
29
30 cerr << "Error in line: " << yylineno << ": " << s << endl;
31 return;
32}
33
34Token *declstart;
35Token *rulestart;
36Token *nameliststart;
37%}
38
39%union {
40 Token *tokenp;
41}
42
43%token<tokenp> ','
44%token<tokenp> '<'
45%token<tokenp> '>'
46%token<tokenp> '{'
47%token<tokenp> '}'
48%token<tokenp> ':'
49%token<tokenp> ';'
50%token<tokenp> '|'
51
52%token<tokenp> MARK // %%
53%token<tokenp> LCURL // %{
54%token<tokenp> RCURL // %}
55
56%token<tokenp> INTEGER // integer constant
57%token<tokenp> CHARACTER // character constant
58%token<tokenp> IDENTIFIER // identifier
59%token<tokenp> CODE // C code
60
61%token<tokenp> START // %start
62%token<tokenp> UNION // %union
63%token<tokenp> TOKEN // %token
64%token<tokenp> LEFT // %left
65%token<tokenp> RIGHT // %right
66%token<tokenp> NONASSOC // %nonassoc
67%token<tokenp> TYPE // %type
68%token<tokenp> PURE_PARSER // %pure_parser
69%token<tokenp> SEMANTIC_PARSER // %semantic_parser
70%token<tokenp> EXPECT // %expect
71%token<tokenp> THONG // %thong
72
73%token<tokenp> PREC // %prec
74
75%token END_TERMINALS // ALL TERMINAL TOKEN NAMES MUST APPEAR BEFORE THIS
76
77%type<tokenp> sections
78%token _SECTIONS
79%type<tokenp> mark
80%type<tokenp> defsection_opt
81%token _DEFSECTION_OPT
82%type<tokenp> declarations
83%type<tokenp> literalblock
84%token _LITERALBLOCK
85%type<tokenp> declaration
86%token _DECLARATION
87%type<tokenp> union
88%type<tokenp> rword
89%type<tokenp> tag_opt
90%token _TAG_OPT
91%type<tokenp> namenolist
92%token _NAMENOLIST
93%type<tokenp> nameno
94%token _NAMENO
95%type<tokenp> namelist
96%token _NAMELIST
97%type<tokenp> name
98%type<tokenp> rulesection
99%token _RULESECTION
100%type<tokenp> rules
101%token _RULE
102%type<tokenp> lhs
103%token _LHS
104%type<tokenp> rhs
105%token _RHS
106%type<tokenp> prod
107%type<tokenp> prec
108%token _PREC
109%type<tokenp> action
110%token _ACTION
111%type<tokenp> usersection_opt
112%token _USERSECTION_OPT
113%type<tokenp> ccode_opt
114%type<tokenp> blocks
115
116%start grammar
117
118%%
119grammar : sections
120 {
121 filter( $1 ); /* filter parse tree */
122 freeTree( $1 ); /* free parse-tree storage (optional: used with purify) */
123 }
124 ;
125
126sections : defsection_opt mark rulesection usersection_opt
127 {
128 $$ = new Token( "sections", _SECTIONS );
129 $1->left = $2;
130 $2->left = $3;
131 $3->left = $4;
132 $$->down = $1;
133 }
134 ;
135
136mark : MARK
137 | error /* missing %% */
138 {
139 cerr << "no input grammar, missing %% mark" << endl;
140 exit( -1 );
141 }
142 ;
143
144defsection_opt : /* empty */
145 {
146 //cerr << "defsection_opt1: " << endl;
147 $$ = new Token( "declaration_opt", _DEFSECTION_OPT );
148 }
149 | declarations
150 {
151 //cerr << "defsection_opt2: " << $1->text << "(" << $1 << ")" << endl;
152 $$ = new Token( "declaration_opt", _DEFSECTION_OPT );
153 $$->down = declstart;
154 }
155 ;
156
157declarations : literalblock
158 {
159 //cerr << "declarations1: " << $1->text << "(" << $1 << ")" << endl;
160 $$ = declstart = $1;
161 }
162 | declaration
163 {
164 //cerr << "declarations2: " << $1->text << "(" << $1 << ")" << endl;
165 $$ = declstart = new Token( "declaration", _DECLARATION );
166 $$->down = $1;
167 }
168 | declarations literalblock
169 {
170 //cerr << "declarations3: "<< $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
171 $1->left = $2;
172 $$ = $2;
173 }
174 | declarations declaration
175 {
176 //cerr << "declarations4: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
177 $$ = new Token( "declaration", _DECLARATION );
178 $1->left = $$;
179 $$->down = $2;
180 }
181 ;
182
183literalblock : LCURL
184 { lexC(); }
185 ccode_opt
186 { $<tokenp>$ = new Token( lexYacc(), CODE ); }
187 RCURL
188 {
189 //cerr << "literalblock: " << $1->text << "(" << $1 << ") " << $<tokenp>4->text << " " << $5->text << "(" << $5 << ")" << endl;
190 $1->left = $<tokenp>4;
191 $<tokenp>4->left = $5;
192 $$ = new Token( "literalblock", _LITERALBLOCK );
193 $$->down = $1;
194 }
195 ;
196
197declaration : union
198 | START IDENTIFIER
199 {
200 $1->left = $2;
201 $$ = $1;
202 }
203 | rword tag_opt namenolist
204 {
205 Token *n = new Token( "namenolist", _NAMENOLIST );
206 n->down = nameliststart;
207 $1->left = $2;
208 $2->left = n;
209 $$ = $1;
210 }
211 | TYPE tag_opt namelist
212 {
213 Token *n = new Token( "namelist", _NAMELIST );
214 n->down = nameliststart;
215 $1->left = $2;
216 $2->left = n;
217 $$ = $1;
218 }
219 | PURE_PARSER
220 | SEMANTIC_PARSER
221 | EXPECT INTEGER /* bison */
222 {
223 $1->left = $2;
224 $$ = $1;
225 }
226 | THONG /* bison */
227 ;
228
229union : UNION
230 '{'
231 { lexC(); }
232 ccode_opt
233 {
234 // Remove the trailing '}' which is added in lex.
235 string temp( lexYacc() );
236 $<tokenp>$ = new Token( temp.substr( 0, temp.length() - 1 ), CODE );
237 }
238 '}'
239 {
240 $1->left = $2;
241 $2->left = $<tokenp>5;
242 $<tokenp>5->left = $6;
243 $$ = $1;
244 }
245 ;
246
247rword : TOKEN
248 | LEFT
249 | RIGHT
250 | NONASSOC
251 ;
252
253tag_opt : /* empty */
254 {
255 //cerr << "tag_opt" << endl;
256 $$ = new Token( "tag_opt", _TAG_OPT );
257 }
258 | '<' IDENTIFIER '>'
259 {
260 $1->left = $2;
261 $2->left = $3;
262 $$ = new Token( "tag_opt", _TAG_OPT );
263 $$->down = $1;
264 }
265 ;
266
267namenolist : nameno
268 {
269 //cerr << "namenolist1: " << $1->text << "(" << $1 << ")" << endl;
270 $$ = nameliststart = $1;
271 }
272 | namenolist nameno
273 {
274 //cerr << "namenolist2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
275 $1->left = $2;
276 $$ = $2;
277 }
278 | namenolist ',' nameno
279 {
280 //cerr << "namenolist3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl;
281 $1->left = $2;
282 $2->left = $3;
283 $$ = $3;
284 }
285 ;
286
287nameno : name
288 {
289 $$ = new Token( "nameno", _NAMENO );
290 $$->down = $1;
291 }
292 | name INTEGER
293 {
294 $$ = new Token( "nameno", _NAMENO );
295 $1->left = $2;
296 $$->down = $1;
297 }
298 ;
299
300namelist : name
301 {
302 //cerr << "namelist1: " << $1->text << "(" << $1 << ")" << endl;
303 $$ = nameliststart = $1;
304 }
305 | namelist name
306 {
307 //cerr << "namelist2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
308 $1->left = $2;
309 $$ = $2;
310 }
311 | namelist ',' name
312 {
313 //cerr << "namelist3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl;
314 $1->left = $2;
315 $2->left = $3;
316 $$ = $3;
317 }
318 ;
319
320name : IDENTIFIER
321 | CHARACTER
322 ;
323
324rulesection : rules
325 {
326 //cerr << "rulesection1: " << $1->text << "(" << $1 << ")" << endl;
327 $$ = new Token( "rulesection", _RULESECTION );
328 $$->down = $1;
329 }
330 | error /* no rules */
331 {
332 cerr << "no rules in the input grammar" << endl;
333 exit( -1 );
334 }
335 ;
336
337// These grammar rules are complex because the Yacc language is LR(2) due to the optional ';' at the end of
338// rules. The following rules convert the LR(2) grammar into LR(1) by lengthening the rules to allow
339// sufficient look ahead. Unfortunately, this change makes handling the semantic actions more complex because
340// there are two lists (rules, rhs) being built but only one list tail can be returned through $$ for
341// chaining.
342
343rules : lhs rhs
344 {
345 //cerr << "rules1: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
346 $$ = rulestart;
347 }
348 | lhs rhs ';'
349 {
350 //cerr << "rules2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl;
351 $2->addDownLeftTail( $3 );
352 $$ = rulestart;
353 }
354 ;
355
356lhs : IDENTIFIER ':'
357 {
358 //cerr << "lhs: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
359 $$ = new Token( "lhs", _LHS );
360 //cerr << " lhs: " << $$->text << "(" << $$ << ")" << endl;
361 $1->left = $2;
362 $$->down = $1;
363 }
364 ;
365
366rhs : /* empty */
367 {
368 //cerr << "rhs1: " << $<tokenp>0->text << "(" << $<tokenp>0 << ")" << endl;
369 rulestart = new Token( "rule", _RULE );
370 rulestart->down = $<tokenp>0; // initial lhs is already on the stack from "rules"
371 $$ = new Token( "rhs", _RHS );
372 //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl;
373 $<tokenp>0->left = $$;
374 }
375 | rhs lhs
376 {
377 //cerr << "rhs2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
378 Token *temp = new Token( "rule", _RULE );
379 rulestart->addLeftTail( temp );
380 temp->down = $2;
381 $$ = new Token( "rhs", _RHS );
382 //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl;
383 $2->left = $$;
384 }
385 | rhs ';' lhs
386 {
387 //cerr << "rhs3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl;
388 $1->addDownLeftTail( $2 );
389 Token *temp = new Token( "rule", _RULE );
390 rulestart->addLeftTail( temp );
391 temp->down = $3;
392 $$ = new Token( "rhs", _RHS );
393 //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl;
394 $3->left = $$;
395 }
396 | rhs prod
397 {
398 //cerr << "rhs4: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
399 $1->addDownLeftTail( $2 );
400 $$ = $1;
401 }
402 | rhs '|'
403 {
404 //cerr << "rhs5: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
405 $1->addDownLeftTail( $2 );
406 $$ = new Token( "rhs", _RHS );
407 $1->left = $$;
408 //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl;
409 }
410 ;
411
412prod : action
413 | IDENTIFIER
414 | CHARACTER
415 | prec
416 ;
417
418prec : PREC name
419 {
420 //cerr << "prec: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
421 $1->left = $2;
422 $$ = new Token( "prec", _PREC );
423 $$->down = $1;
424 }
425 ;
426
427action : '{'
428 { lexC(); }
429 ccode_opt
430 {
431 // Remove the trailing '}' added in lex.
432 string temp( lexYacc() );
433 $<tokenp>$ = new Token( temp.substr( 0, temp.length() - 1 ), CODE );
434 }
435 '}'
436 {
437 $1->left = $<tokenp>4;
438 $<tokenp>4->left = $5;
439 $$ = new Token( "action", _ACTION );
440 $$->down = $1;
441 }
442 ;
443
444usersection_opt : /* empty */
445 {
446 //cerr << "usersection_opt" << endl;
447 // attach remaining WS to fictitious code
448 Token *temp = new Token( "", ws_list, CODE );
449 $$ = new Token( "usersection_opt", _USERSECTION_OPT );
450 $$->down = temp;
451 }
452 | MARK
453 { lexC(); }
454 ccode_opt
455 {
456 Token *temp = new Token( lexYacc(), CODE );
457 //cerr << "usersection_opt: " << $1->text << " " << temp->text << endl;
458 $1->left = temp;
459 $$ = new Token( "usersection_opt", _USERSECTION_OPT );
460 $$->down = $1;
461 }
462 ;
463
464ccode_opt : /* empty */ {}
465 | blocks
466 ;
467
468// This rule matches internal braces "{}" in C code to the level of the braces of a union/action. These
469// internal braces are returned as Tokens from the lexer but are unused because the braces are already
470// concatenated into the code string built by the lexer. Therefore, the tokens for the braces are immediately
471// deleted.
472
473blocks : '{' { delete $1; } ccode_opt '}' { delete $4; }
474 | blocks '{' { delete $2; } ccode_opt '}' { delete $5; }
475 ;
476%%
477
478/* Local Variables: */
479/* fill-column: 110 */
480/* compile-command: "gmake" */
481/* End: */
Note: See TracBrowser for help on using the repository browser.