Context Navigation

parser.yy @ 7f2e87a

Last change on this file since 7f2e87a was d9e4d83, checked in by Peter A. Buhr <pabuhr@…>, 4 years ago
formatting, add extern C for yylex declaration because of C++ compile, add -Wno-yacc to AM_YFLAGS
Property mode set to `100644`
File size: 11.9 KB

Line
1	//
2	// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
3	//
4	// The contents of this file are covered under the licence agreement in the
5	// file "LICENCE" distributed with Cforall.
6	//
7	// parser.yy --
8	//
9	// Author : Rodolfo G. Esteves
10	// Created On : Sat Dec 15 13:44:21 2001
11	// Last Modified By : Peter A. Buhr
12	// Last Modified On : Tue Jan 26 22:50:03 2021
13	// Update Count : 1053
14	//
15
16	%{
17	#define YYDEBUG_LEXER_TEXT( yylval ) // lexer loads this up each time
18	#define YYDEBUG 1 // get the pretty debugging code to compile
19	#define YYERROR_VERBOSE // more information in syntax errors
20
21	#include <iostream>
22	using namespace std;
23	#include "ParserTypes.h"
24	#include "filter.h"
25
26	extern list<string> ws_list; // lex variable containing accumulated whitespace
27	void lexC( void );
28	string lexYacc( void );
29
30	void yyerror( string s ) {
31	extern int yylineno;
32
33	cerr << "Error in line: " << yylineno << ": " << s << endl;
34	return;
35	}
36
37	Token *declstart;
38	Token *rulestart;
39	Token *nameliststart;
40	%}
41
42	%union {
43	Token *tokenp;
44	}
45
46	%token<tokenp> ','
47	%token<tokenp> '<'
48	%token<tokenp> '>'
49	%token<tokenp> '{'
50	%token<tokenp> '}'
51	%token<tokenp> ':'
52	%token<tokenp> ';'
53	%token<tokenp> '\|'
54
55	%token<tokenp> MARK // %%
56	%token<tokenp> LCURL // %{
57	%token<tokenp> RCURL // %}
58
59	%token<tokenp> INTEGER // integer constant
60	%token<tokenp> CHARACTER // character constant
61	%token<tokenp> IDENTIFIER // identifier
62	%token<tokenp> CODE // C code
63
64	%token<tokenp> DEFINE // %define
65	%token<tokenp> EXPECT // %expect
66	%token<tokenp> LEFT // %left
67	%token<tokenp> LOCATIONS // %locations
68	%token<tokenp> NONASSOC // %nonassoc
69	%token<tokenp> PRECEDENCE // %precedence
70	%token<tokenp> PURE_PARSER // %pure_parser
71	%token<tokenp> RIGHT // %right
72	%token<tokenp> SEMANTIC_PARSER // %semantic_parser
73	%token<tokenp> START // %start
74	%token<tokenp> THONG // %thong
75	%token<tokenp> TOKEN // %token
76	%token<tokenp> TYPE // %type
77	%token<tokenp> UNION // %union
78
79	%token<tokenp> PREC // %prec
80
81	%token END_TERMINALS // ALL TERMINAL TOKEN NAMES MUST APPEAR BEFORE THIS
82
83	%type<tokenp> sections
84	%token _SECTIONS
85	%type<tokenp> mark
86	%type<tokenp> defsection_opt
87	%token _DEFSECTION_OPT
88	%type<tokenp> declarations
89	%type<tokenp> literalblock
90	%token _LITERALBLOCK
91	%type<tokenp> declaration
92	%token _DECLARATION
93	%type<tokenp> union
94	%type<tokenp> rword
95	%type<tokenp> tag_opt
96	%token _TAG_OPT
97	%type<tokenp> namenolist
98	%token _NAMENOLIST
99	%type<tokenp> nameno
100	%token _NAMENO
101	%type<tokenp> namelist
102	%token _NAMELIST
103	%type<tokenp> name
104	%type<tokenp> rulesection
105	%token _RULESECTION
106	%type<tokenp> rules
107	%token _RULE
108	%type<tokenp> lhs
109	%token _LHS
110	%type<tokenp> rhs
111	%token _RHS
112	%type<tokenp> prod
113	%type<tokenp> prec
114	%token _PREC
115	%type<tokenp> action
116	%token _ACTION
117	%type<tokenp> usersection_opt
118	%token _USERSECTION_OPT
119	%type<tokenp> ccode_opt
120	%type<tokenp> blocks
121
122	%start grammar
123
124	%%
125	grammar :
126	sections
127	{
128	filter( $1 ); // filter parse tree
129	freeTree( $1 ); // free parse-tree storage (optional: used with purify)
130	}
131	;
132
133	sections :
134	defsection_opt mark rulesection usersection_opt
135	{
136	$$ = new Token( "sections", _SECTIONS );
137	$1->left = $2;
138	$2->left = $3;
139	$3->left = $4;
140	$$->down = $1;
141	}
142	;
143
144	mark :
145	MARK
146	\| error // missing %%
147	{
148	cerr << "no input grammar, missing %% mark" << endl;
149	exit( -1 );
150	}
151	;
152
153	defsection_opt :
154	// empty
155	{
156	//cerr << "defsection_opt1: " << endl;
157	$$ = new Token( "declaration_opt", _DEFSECTION_OPT );
158	}
159	\| declarations
160	{
161	//cerr << "defsection_opt2: " << $1->text << "(" << $1 << ")" << endl;
162	$$ = new Token( "declaration_opt", _DEFSECTION_OPT );
163	$$->down = declstart;
164	}
165	;
166
167	declarations :
168	literalblock
169	{
170	//cerr << "declarations1: " << $1->text << "(" << $1 << ")" << endl;
171	$$ = declstart = $1;
172	}
173	\| declaration
174	{
175	//cerr << "declarations2: " << $1->text << "(" << $1 << ")" << endl;
176	$$ = declstart = new Token( "declaration", _DECLARATION );
177	$$->down = $1;
178	}
179	\| declarations literalblock
180	{
181	//cerr << "declarations3: "<< $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
182	$1->left = $2;
183	$$ = $2;
184	}
185	\| declarations declaration
186	{
187	//cerr << "declarations4: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
188	$$ = new Token( "declaration", _DECLARATION );
189	$1->left = $$;
190	$$->down = $2;
191	}
192	;
193
194	literalblock :
195	LCURL
196	{ lexC(); }
197	ccode_opt
198	{ $<tokenp>$ = new Token( lexYacc(), CODE ); }
199	RCURL
200	{
201	//cerr << "literalblock: " << $1->text << "(" << $1 << ") " << $<tokenp>4->text << " " << $5->text << "(" << $5 << ")" << endl;
202	$1->left = $<tokenp>4;
203	$<tokenp>4->left = $5;
204	$$ = new Token( "literalblock", _LITERALBLOCK );
205	$$->down = $1;
206	}
207	;
208
209	declaration :
210	union
211	\| START IDENTIFIER
212	{
213	$1->left = $2;
214	$$ = $1;
215	}
216	\| rword tag_opt namenolist
217	{
218	Token *n = new Token( "namenolist", _NAMENOLIST );
219	n->down = nameliststart;
220	$1->left = $2;
221	$2->left = n;
222	$$ = $1;
223	}
224	\| TYPE tag_opt namelist
225	{
226	Token *n = new Token( "namelist", _NAMELIST );
227	n->down = nameliststart;
228	$1->left = $2;
229	$2->left = n;
230	$$ = $1;
231	}
232	\| PURE_PARSER
233	\| SEMANTIC_PARSER
234	\| EXPECT INTEGER // bison
235	{
236	$1->left = $2;
237	$$ = $1;
238	}
239	\| DEFINE // bison
240	\| LOCATIONS
241	\| THONG // bison
242	;
243
244	union :
245	UNION '{'
246	{ lexC(); }
247	ccode_opt
248	{
249	// Remove the trailing '}' which is added in lex.
250	string temp( lexYacc() );
251	$<tokenp>$ = new Token( temp.substr( 0, temp.length() - 1 ), CODE );
252	}
253	'}'
254	{
255	$1->left = $2;
256	$2->left = $<tokenp>5;
257	$<tokenp>5->left = $6;
258	$$ = $1;
259	}
260	;
261
262	rword :
263	TOKEN
264	\| LEFT
265	\| RIGHT
266	\| NONASSOC
267	\| PRECEDENCE
268	;
269
270	tag_opt :
271	// empty
272	{
273	//cerr << "tag_opt" << endl;
274	$$ = new Token( "tag_opt", _TAG_OPT );
275	}
276	\| '<' IDENTIFIER '>'
277	{
278	$1->left = $2;
279	$2->left = $3;
280	$$ = new Token( "tag_opt", _TAG_OPT );
281	$$->down = $1;
282	}
283	;
284
285	namenolist :
286	nameno
287	{
288	//cerr << "namenolist1: " << $1->text << "(" << $1 << ")" << endl;
289	$$ = nameliststart = $1;
290	}
291	\| namenolist nameno
292	{
293	//cerr << "namenolist2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
294	$1->left = $2;
295	$$ = $2;
296	}
297	\| namenolist ',' nameno
298	{
299	//cerr << "namenolist3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl;
300	$1->left = $2;
301	$2->left = $3;
302	$$ = $3;
303	}
304	;
305
306	nameno :
307	name
308	{
309	$$ = new Token( "nameno", _NAMENO );
310	$$->down = $1;
311	}
312	\| name INTEGER
313	{
314	$$ = new Token( "nameno", _NAMENO );
315	$1->left = $2;
316	$$->down = $1;
317	}
318	;
319
320	namelist :
321	name
322	{
323	//cerr << "namelist1: " << $1->text << "(" << $1 << ")" << endl;
324	$$ = nameliststart = $1;
325	}
326	\| namelist name
327	{
328	//cerr << "namelist2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
329	$1->left = $2;
330	$$ = $2;
331	}
332	\| namelist ',' name
333	{
334	//cerr << "namelist3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl;
335	$1->left = $2;
336	$2->left = $3;
337	$$ = $3;
338	}
339	;
340
341	name :
342	IDENTIFIER
343	\| CHARACTER
344	;
345
346	rulesection :
347	rules
348	{
349	//cerr << "rulesection1: " << $1->text << "(" << $1 << ")" << endl;
350	$$ = new Token( "rulesection", _RULESECTION );
351	$$->down = $1;
352	}
353	\| error // no rules
354	{
355	cerr << "no rules in the input grammar" << endl;
356	exit( -1 );
357	}
358	;
359
360	// These grammar rules are complex because the Yacc language is LR(2) due to the optional ';' at the end of rules. The
361	// following rules convert the LR(2) grammar into LR(1) by lengthening the rules to allow sufficient look
362	// ahead. Unfortunately, this change makes handling the semantic actions more complex because there are two lists
363	// (rules, rhs) being built but only one list tail can be returned through $$ for chaining.
364
365	rules :
366	lhs rhs
367	{
368	//cerr << "rules1: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
369	$$ = rulestart;
370	}
371	\| lhs rhs ';'
372	{
373	//cerr << "rules2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl;
374	$2->addDownLeftTail( $3 );
375	$$ = rulestart;
376	}
377	;
378
379	lhs :
380	IDENTIFIER ':'
381	{
382	//cerr << "lhs: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
383	$$ = new Token( "lhs", _LHS );
384	//cerr << " lhs: " << $$->text << "(" << $$ << ")" << endl;
385	$1->left = $2;
386	$$->down = $1;
387	}
388	;
389
390	rhs :
391	// empty
392	{
393	//cerr << "rhs1: " << $<tokenp>0->text << "(" << $<tokenp>0 << ")" << endl;
394	rulestart = new Token( "rule", _RULE );
395	rulestart->down = $<tokenp>0; // initial lhs is already on the stack from "rules"
396	$$ = new Token( "rhs", _RHS );
397	//cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl;
398	$<tokenp>0->left = $$;
399	}
400	\| rhs lhs
401	{
402	//cerr << "rhs2: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
403	Token *temp = new Token( "rule", _RULE );
404	rulestart->addLeftTail( temp );
405	temp->down = $2;
406	$$ = new Token( "rhs", _RHS );
407	//cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl;
408	$2->left = $$;
409	}
410	\| rhs ';' lhs
411	{
412	//cerr << "rhs3: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ") " << $3->text << "(" << $3 << ")" << endl;
413	$1->addDownLeftTail( $2 );
414	Token *temp = new Token( "rule", _RULE );
415	rulestart->addLeftTail( temp );
416	temp->down = $3;
417	$$ = new Token( "rhs", _RHS );
418	//cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl;
419	$3->left = $$;
420	}
421	\| rhs prod
422	{
423	//cerr << "rhs4: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
424	$1->addDownLeftTail( $2 );
425	$$ = $1;
426	}
427	\| rhs '\|'
428	{
429	//cerr << "rhs5: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
430	$1->addDownLeftTail( $2 );
431	$$ = new Token( "rhs", _RHS );
432	$1->left = $$;
433	//cerr << " rhs: " << $$->text << "(" << $$ << ")" << endl;
434	}
435	;
436
437	prod :
438	action
439	\| IDENTIFIER
440	\| CHARACTER
441	\| prec
442	;
443
444	prec :
445	PREC name
446	{
447	//cerr << "prec: " << $1->text << "(" << $1 << ") " << $2->text << "(" << $2 << ")" << endl;
448	$1->left = $2;
449	$$ = new Token( "prec", _PREC );
450	$$->down = $1;
451	}
452	;
453
454	action :
455	'{'
456	{ lexC(); }
457	ccode_opt
458	{
459	// Remove the trailing '}' added in lex.
460	string temp( lexYacc() );
461	$<tokenp>$ = new Token( temp.substr( 0, temp.length() - 1 ), CODE );
462	}
463	'}'
464	{
465	$1->left = $<tokenp>4;
466	$<tokenp>4->left = $5;
467	$$ = new Token( "action", _ACTION );
468	$$->down = $1;
469	}
470	;
471
472	usersection_opt :
473	// empty
474	{
475	//cerr << "usersection_opt" << endl;
476	// attach remaining WS to fictitious code
477	Token *temp = new Token( "", ws_list, CODE );
478	$$ = new Token( "usersection_opt", _USERSECTION_OPT );
479	$$->down = temp;
480	}
481	\| MARK
482	{ lexC(); }
483	ccode_opt
484	{
485	Token *temp = new Token( lexYacc(), CODE );
486	//cerr << "usersection_opt: " << $1->text << " " << temp->text << endl;
487	$1->left = temp;
488	$$ = new Token( "usersection_opt", _USERSECTION_OPT );
489	$$->down = $1;
490	}
491	;
492
493	ccode_opt :
494	// empty
495	{}
496	\| blocks
497	;
498
499	// This rule matches internal braces "{}" in C code to the level of the braces of a union/action. These internal braces
500	// are returned as Tokens from the lexer but are unused because the braces are already concatenated into the code string
501	// built by the lexer. Therefore, the tokens for the braces are immediately deleted.
502
503	blocks :
504	'{'
505	{ delete $1; }
506	ccode_opt '}'
507	{ delete $4; }
508	\| blocks '{'
509	{ delete $2; }
510	ccode_opt '}'
511	{ delete $5; }
512	;
513	%%
514
515	// Local Variables: //
516	// mode: c++ //
517	// tab-width: 4 //
518	// compile-command: "make install" //
519	// End: //

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format