source: tools/prettyprinter/parser.yy@ be5f0a5

ADT ast-experimental
Last change on this file since be5f0a5 was d9e4d83, checked in by Peter A. Buhr <pabuhr@…>, 5 years ago

formatting, add extern C for yylex declaration because of C++ compile, add -Wno-yacc to AM_YFLAGS

  • Property mode set to 100644
File size: 11.9 KB
Line 
1//
2// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
3//
4// The contents of this file are covered under the licence agreement in the
5// file "LICENCE" distributed with Cforall.
6//
7// parser.yy --
8//
9// Author : Rodolfo G. Esteves
10// Created On : Sat Dec 15 13:44:21 2001
11// Last Modified By : Peter A. Buhr
12// Last Modified On : Tue Jan 26 22:50:03 2021
13// Update Count : 1053
14//
15
16%{
17#define YYDEBUG_LEXER_TEXT( yylval ) // lexer loads this up each time
18#define YYDEBUG 1 // get the pretty debugging code to compile
19#define YYERROR_VERBOSE // more information in syntax errors
20
21#include <iostream>
22using namespace std;
23#include "ParserTypes.h"
24#include "filter.h"
25
26extern list<string> ws_list; // lex variable containing accumulated whitespace
27void lexC( void );
28string lexYacc( void );
29
30void yyerror( string s ) {
31 extern int yylineno;
32
33 cerr << "Error in line: " << yylineno << ": " << s << endl;
34 return;
35}
36
37Token *declstart;
38Token *rulestart;
39Token *nameliststart;
40%}
41
42%union {
43 Token *tokenp;
44}
45
46%token<tokenp> ','
47%token<tokenp> '<'
48%token<tokenp> '>'
49%token<tokenp> '{'
50%token<tokenp> '}'
51%token<tokenp> ':'
52%token<tokenp> ';'
53%token<tokenp> '|'
54
55%token<tokenp> MARK // %%
56%token<tokenp> LCURL // %{
57%token<tokenp> RCURL // %}
58
59%token<tokenp> INTEGER // integer constant
60%token<tokenp> CHARACTER // character constant
61%token<tokenp> IDENTIFIER // identifier
62%token<tokenp> CODE // C code
63
64%token<tokenp> DEFINE // %define
65%token<tokenp> EXPECT // %expect
66%token<tokenp> LEFT // %left
67%token<tokenp> LOCATIONS // %locations
68%token<tokenp> NONASSOC // %nonassoc
69%token<tokenp> PRECEDENCE // %precedence
70%token<tokenp> PURE_PARSER // %pure_parser
71%token<tokenp> RIGHT // %right
72%token<tokenp> SEMANTIC_PARSER // %semantic_parser
73%token<tokenp> START // %start
74%token<tokenp> THONG // %thong
75%token<tokenp> TOKEN // %token
76%token<tokenp> TYPE // %type
77%token<tokenp> UNION // %union
78
79%token<tokenp> PREC // %prec
80
81%token END_TERMINALS // ALL TERMINAL TOKEN NAMES MUST APPEAR BEFORE THIS
82
83%type<tokenp> sections
84%token _SECTIONS
85%type<tokenp> mark
86%type<tokenp> defsection_opt
87%token _DEFSECTION_OPT
88%type<tokenp> declarations
89%type<tokenp> literalblock
90%token _LITERALBLOCK
91%type<tokenp> declaration
92%token _DECLARATION
93%type<tokenp> union
94%type<tokenp> rword
95%type<tokenp> tag_opt
96%token _TAG_OPT
97%type<tokenp> namenolist
98%token _NAMENOLIST
99%type<tokenp> nameno
100%token _NAMENO
101%type<tokenp> namelist
102%token _NAMELIST
103%type<tokenp> name
104%type<tokenp> rulesection
105%token _RULESECTION
106%type<tokenp> rules
107%token _RULE
108%type<tokenp> lhs
109%token _LHS
110%type<tokenp> rhs
111%token _RHS
112%type<tokenp> prod
113%type<tokenp> prec
114%token _PREC
115%type<tokenp> action
116%token _ACTION
117%type<tokenp> usersection_opt
118%token _USERSECTION_OPT
119%type<tokenp> ccode_opt
120%type<tokenp> blocks
121
122%start grammar
123
124%%
125grammar :
126 sections
127 {
128 filter( $1 ); // filter parse tree
129 freeTree( $1 ); // free parse-tree storage (optional: used with purify)
130 }
131 ;
132
133sections :
134 defsection_opt mark rulesection usersection_opt
135 {
136 $$ = new Token( "sections", _SECTIONS );
137 $1->left = $2;
138 $2->left = $3;
139 $3->left = $4;
140 $$->down = $1;
141 }
142 ;
143
144mark :
145 MARK
146 | error // missing %%
147 {
148 cerr << "no input grammar, missing %% mark" << endl;
149 exit( -1 );
150 }
151 ;
152
153defsection_opt :
154 // empty
155 {
156 //cerr << "defsection_opt1: " << endl;
157 $$ = new Token( "declaration_opt", _DEFSECTION_OPT );
158 }
159 | declarations
160 {
161 //cerr << "defsection_opt2: " << $1->text << "(" << $1 << ")" << endl;
162 $$ = new Token( "declaration_opt", _DEFSECTION_OPT );
163 $$->down = declstart;
164 }
165 ;
166
167declarations :
168 literalblock
169 {
170 //cerr << "declarations1: " << $1->text << "(" << $1 << ")" << endl;
171 $$ = declstart = $1;
172 }
173 | declaration
174 {
175 //cerr << "declarations2: " << $1->text << "(" << $1 << ")" << endl;
176 $$ = declstart = new Token( "declaration", _DECLARATION );
177 $$->down = $1;
178 }
179 | declarations literalblock
180 {
181 //cerr << "declarations3: "<< $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
182 $1->left = $2;
183 $$ = $2;
184 }
185 | declarations declaration
186 {
187 //cerr << "declarations4: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
188 $$ = new Token( "declaration", _DECLARATION );
189 $1->left = $$;
190 $$->down = $2;
191 }
192 ;
193
194literalblock :
195 LCURL
196 { lexC(); }
197 ccode_opt
198 { $<tokenp>$ = new Token( lexYacc(), CODE ); }
199 RCURL
200 {
201 //cerr << "literalblock: " << $1->text << "(" << $1 << ") " << $<tokenp>4->text << " " << $5->text << "(" << $5 << ")" << endl;
202 $1->left = $<tokenp>4;
203 $<tokenp>4->left = $5;
204 $$ = new Token( "literalblock", _LITERALBLOCK );
205 $$->down = $1;
206 }
207 ;
208
209declaration :
210 union
211 | START IDENTIFIER
212 {
213 $1->left = $2;
214 $$ = $1;
215 }
216 | rword tag_opt namenolist
217 {
218 Token *n = new Token( "namenolist", _NAMENOLIST );
219 n->down = nameliststart;
220 $1->left = $2;
221 $2->left = n;
222 $$ = $1;
223 }
224 | TYPE tag_opt namelist
225 {
226 Token *n = new Token( "namelist", _NAMELIST );
227 n->down = nameliststart;
228 $1->left = $2;
229 $2->left = n;
230 $$ = $1;
231 }
232 | PURE_PARSER
233 | SEMANTIC_PARSER
234 | EXPECT INTEGER // bison
235 {
236 $1->left = $2;
237 $$ = $1;
238 }
239 | DEFINE // bison
240 | LOCATIONS
241 | THONG // bison
242 ;
243
244union :
245 UNION '{'
246 { lexC(); }
247 ccode_opt
248 {
249 // Remove the trailing '}' which is added in lex.
250 string temp( lexYacc() );
251 $<tokenp>$ = new Token( temp.substr( 0, temp.length() - 1 ), CODE );
252 }
253 '}'
254 {
255 $1->left = $2;
256 $2->left = $<tokenp>5;
257 $<tokenp>5->left = $6;
258 $$ = $1;
259 }
260 ;
261
262rword :
263 TOKEN
264 | LEFT
265 | RIGHT
266 | NONASSOC
267 | PRECEDENCE
268 ;
269
270tag_opt :
271 // empty
272 {
273 //cerr << "tag_opt" << endl;
274 $$ = new Token( "tag_opt", _TAG_OPT );
275 }
276 | '<' IDENTIFIER '>'
277 {
278 $1->left = $2;
279 $2->left = $3;
280 $$ = new Token( "tag_opt", _TAG_OPT );
281 $$->down = $1;
282 }
283 ;
284
285namenolist :
286 nameno
287 {
288 //cerr << "namenolist1: " << $1->text << "(" << $1 << ")" << endl;
289 $$ = nameliststart = $1;
290 }
291 | namenolist nameno
292 {
293 //cerr << "namenolist2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
294 $1->left = $2;
295 $$ = $2;
296 }
297 | namenolist ',' nameno
298 {
299 //cerr << "namenolist3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl;
300 $1->left = $2;
301 $2->left = $3;
302 $$ = $3;
303 }
304 ;
305
306nameno :
307 name
308 {
309 $$ = new Token( "nameno", _NAMENO );
310 $$->down = $1;
311 }
312 | name INTEGER
313 {
314 $$ = new Token( "nameno", _NAMENO );
315 $1->left = $2;
316 $$->down = $1;
317 }
318 ;
319
320namelist :
321 name
322 {
323 //cerr << "namelist1: " << $1->text << "(" << $1 << ")" << endl;
324 $$ = nameliststart = $1;
325 }
326 | namelist name
327 {
328 //cerr << "namelist2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
329 $1->left = $2;
330 $$ = $2;
331 }
332 | namelist ',' name
333 {
334 //cerr << "namelist3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl;
335 $1->left = $2;
336 $2->left = $3;
337 $$ = $3;
338 }
339 ;
340
341name :
342 IDENTIFIER
343 | CHARACTER
344 ;
345
346rulesection :
347 rules
348 {
349 //cerr << "rulesection1: " << $1->text << "(" << $1 << ")" << endl;
350 $$ = new Token( "rulesection", _RULESECTION );
351 $$->down = $1;
352 }
353 | error // no rules
354 {
355 cerr << "no rules in the input grammar" << endl;
356 exit( -1 );
357 }
358 ;
359
360// These grammar rules are complex because the Yacc language is LR(2) due to the optional ';' at the end of rules. The
361// following rules convert the LR(2) grammar into LR(1) by lengthening the rules to allow sufficient look
362// ahead. Unfortunately, this change makes handling the semantic actions more complex because there are two lists
363// (rules, rhs) being built but only one list tail can be returned through $$ for chaining.
364
365rules :
366 lhs rhs
367 {
368 //cerr << "rules1: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
369 $$ = rulestart;
370 }
371 | lhs rhs ';'
372 {
373 //cerr << "rules2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl;
374 $2->addDownLeftTail( $3 );
375 $$ = rulestart;
376 }
377 ;
378
379lhs :
380 IDENTIFIER ':'
381 {
382 //cerr << "lhs: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
383 $$ = new Token( "lhs", _LHS );
384 //cerr << " lhs: " << $$->text << "(" << $$ << ")" << endl;
385 $1->left = $2;
386 $$->down = $1;
387 }
388 ;
389
390rhs :
391 // empty
392 {
393 //cerr << "rhs1: " << $<tokenp>0->text << "(" << $<tokenp>0 << ")" << endl;
394 rulestart = new Token( "rule", _RULE );
395 rulestart->down = $<tokenp>0; // initial lhs is already on the stack from "rules"
396 $$ = new Token( "rhs", _RHS );
397 //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl;
398 $<tokenp>0->left = $$;
399 }
400 | rhs lhs
401 {
402 //cerr << "rhs2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
403 Token *temp = new Token( "rule", _RULE );
404 rulestart->addLeftTail( temp );
405 temp->down = $2;
406 $$ = new Token( "rhs", _RHS );
407 //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl;
408 $2->left = $$;
409 }
410 | rhs ';' lhs
411 {
412 //cerr << "rhs3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl;
413 $1->addDownLeftTail( $2 );
414 Token *temp = new Token( "rule", _RULE );
415 rulestart->addLeftTail( temp );
416 temp->down = $3;
417 $$ = new Token( "rhs", _RHS );
418 //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl;
419 $3->left = $$;
420 }
421 | rhs prod
422 {
423 //cerr << "rhs4: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
424 $1->addDownLeftTail( $2 );
425 $$ = $1;
426 }
427 | rhs '|'
428 {
429 //cerr << "rhs5: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
430 $1->addDownLeftTail( $2 );
431 $$ = new Token( "rhs", _RHS );
432 $1->left = $$;
433 //cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl;
434 }
435 ;
436
437prod :
438 action
439 | IDENTIFIER
440 | CHARACTER
441 | prec
442 ;
443
444prec :
445 PREC name
446 {
447 //cerr << "prec: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
448 $1->left = $2;
449 $$ = new Token( "prec", _PREC );
450 $$->down = $1;
451 }
452 ;
453
454action :
455 '{'
456 { lexC(); }
457 ccode_opt
458 {
459 // Remove the trailing '}' added in lex.
460 string temp( lexYacc() );
461 $<tokenp>$ = new Token( temp.substr( 0, temp.length() - 1 ), CODE );
462 }
463 '}'
464 {
465 $1->left = $<tokenp>4;
466 $<tokenp>4->left = $5;
467 $$ = new Token( "action", _ACTION );
468 $$->down = $1;
469 }
470 ;
471
472usersection_opt :
473 // empty
474 {
475 //cerr << "usersection_opt" << endl;
476 // attach remaining WS to fictitious code
477 Token *temp = new Token( "", ws_list, CODE );
478 $$ = new Token( "usersection_opt", _USERSECTION_OPT );
479 $$->down = temp;
480 }
481 | MARK
482 { lexC(); }
483 ccode_opt
484 {
485 Token *temp = new Token( lexYacc(), CODE );
486 //cerr << "usersection_opt: " << $1->text << " " << temp->text << endl;
487 $1->left = temp;
488 $$ = new Token( "usersection_opt", _USERSECTION_OPT );
489 $$->down = $1;
490 }
491 ;
492
493ccode_opt :
494 // empty
495 {}
496 | blocks
497 ;
498
499// This rule matches internal braces "{}" in C code to the level of the braces of a union/action. These internal braces
500// are returned as Tokens from the lexer but are unused because the braces are already concatenated into the code string
501// built by the lexer. Therefore, the tokens for the braces are immediately deleted.
502
503blocks :
504 '{'
505 { delete $1; }
506 ccode_opt '}'
507 { delete $4; }
508 | blocks '{'
509 { delete $2; }
510 ccode_opt '}'
511 { delete $5; }
512 ;
513%%
514
515// Local Variables: //
516// mode: c++ //
517// tab-width: 4 //
518// compile-command: "make install" //
519// End: //
Note: See TracBrowser for help on using the repository browser.