Context Navigation

source: doc/generic_types/generic_types.tex @ 41c25b8

ADTaaron-thesisarm-ehast-experimentalcleanup-dtorsdeferred_resndemanglerenumforall-pointer-decayjacob/cs343-translationjenkins-sandboxnew-astnew-ast-unique-exprnew-envno_listpersistent-indexerpthread-emulationqualifiedEnumresolv-newwith_gc

Last change on this file since 41c25b8 was 41c25b8, checked in by Aaron Moss <bruceiv@…>, 7 years ago
Added tag structs discussion to generics paper.
Property mode set to `100644`
File size: 16.9 KB

Line
1	% take of review (for line numbers) and anonymous (for anonymization) on submission
2	\documentclass[format=acmlarge, anonymous, review]{acmart}
3
4	\usepackage{listings} % For code listings
5
6	% Useful macros
7	\newcommand{\CFA}{C$\mathbf\forall$} % Cforall symbolic name
8	\newcommand{\CC}{\rm C\kern-.1em\hbox{+\kern-.25em+}} % C++ symbolic name
9	\newcommand{\CCeleven}{\rm C\kern-.1em\hbox{+\kern-.25em+}11} % C++11 symbolic name
10	\newcommand{\CCfourteen}{\rm C\kern-.1em\hbox{+\kern-.25em+}14} % C++14 symbolic name
11	\newcommand{\CCseventeen}{\rm C\kern-.1em\hbox{+\kern-.25em+}17} % C++17 symbolic name
12
13	\newcommand{\TODO}{\textbf{TODO}}
14	\newcommand{\eg}{\textit{e}.\textit{g}.}
15	\newcommand{\ie}{\textit{i}.\textit{e}.}
16	\newcommand{\etc}{\textit{etc}.}
17
18	% CFA programming language, based on ANSI C (with some gcc additions)
19	\lstdefinelanguage{CFA}[ANSI]{C}{
20	morekeywords={_Alignas,_Alignof,__alignof,__alignof__,asm,__asm,__asm__,_At,_Atomic,__attribute,__attribute__,auto,
21	_Bool,catch,catchResume,choose,_Complex,__complex,__complex__,__const,__const__,disable,dtype,enable,__extension__,
22	fallthrough,fallthru,finally,forall,ftype,_Generic,_Imaginary,inline,__label__,lvalue,_Noreturn,one_t,otype,restrict,_Static_assert,
23	_Thread_local,throw,throwResume,trait,try,typeof,__typeof,__typeof__,zero_t},
24	}%
25
26	\lstset{
27	language=CFA,
28	columns=fullflexible,
29	basicstyle=\linespread{0.9}\sf, % reduce line spacing and use sanserif font
30	stringstyle=\tt, % use typewriter font
31	tabsize=4, % 4 space tabbing
32	xleftmargin=\parindent, % indent code to paragraph indentation
33	% extendedchars=true, % allow ASCII characters in the range 128-255
34	% escapechar=§, % LaTeX escape in CFA code §...§ (section symbol), emacs: C-q M-'
35	mathescape=true, % LaTeX math escape in CFA code $...$
36	keepspaces=true, %
37	showstringspaces=false, % do not show spaces with cup
38	showlines=true, % show blank lines at end of code
39	aboveskip=4pt, % spacing above/below code block
40	belowskip=3pt,
41	% replace/adjust listing characters that look bad in sanserif
42	literate={-}{\raisebox{-0.15ex}{\texttt{-}}}1 {^}{\raisebox{0.6ex}{$\scriptscriptstyle\land\,$}}1
43	{~}{\raisebox{0.3ex}{$\scriptstyle\sim\,$}}1 {_}{\makebox[1.2ex][c]{\rule{1ex}{0.1ex}}}1 {`}{\ttfamily\upshape\hspace*{-0.1ex}`}1
44	{<-}{$\leftarrow$}2 {=>}{$\Rightarrow$}2 {->}{$\rightarrow$}2,
45	% moredelim=**[is][\color{red}]{®}{®}, % red highlighting ®...® (registered trademark symbol) emacs: C-q M-.
46	% moredelim=**[is][\color{blue}]{ß}{ß}, % blue highlighting ß...ß (sharp s symbol) emacs: C-q M-_
47	% moredelim=**[is][\color{OliveGreen}]{¢}{¢}, % green highlighting ¢...¢ (cent symbol) emacs: C-q M-"
48	% moredelim=[is][\lstset{keywords={}}]{¶}{¶}, % keyword escape ¶...¶ (pilcrow symbol) emacs: C-q M-^
49	}% lstset
50
51	% inline code @...@
52	\lstMakeShortInline@
53
54	% ACM Information
55	\citestyle{acmauthoryear}
56
57	\acmJournal{PACMPL}
58
59	\title{Generic Types with Efficient Dynamic Layout in \CFA{}}
60
61	\author{Aaron Moss}
62	\affiliation{%
63	\institution{University of Waterloo}
64	\department{David R. Cheriton School of Computer Science}
65	\streetaddress{Davis Centre, University of Waterloo}
66	\city{Waterloo}
67	\state{ON}
68	\postcode{N2L 3G1}
69	\country{Canada}
70	}
71	\email{a3moss@uwaterloo.ca}
72
73	\author{Robert Schluntz}
74	\affiliation{%
75	\institution{University of Waterloo}
76	\department{David R. Cheriton School of Computer Science}
77	\streetaddress{Davis Centre, University of Waterloo}
78	\city{Waterloo}
79	\state{ON}
80	\postcode{N2L 3G1}
81	\country{Canada}
82	}
83	\email{rschlunt@uwaterloo.ca}
84
85	\author{Peter Buhr}
86	\affiliation{%
87	\institution{University of Waterloo}
88	\department{David R. Cheriton School of Computer Science}
89	\streetaddress{Davis Centre, University of Waterloo}
90	\city{Waterloo}
91	\state{ON}
92	\postcode{N2L 3G1}
93	\country{Canada}
94	}
95	\email{pabuhr@uwaterloo.ca}
96
97	\terms{generic, types}
98	\keywords{generic types, polymorphic functions, Cforall}
99
100	\begin{CCSXML}
101	<ccs2012>
102	<concept>
103	<concept_id>10011007.10011006.10011008.10011024.10011025</concept_id>
104	<concept_desc>Software and its engineering~Polymorphism</concept_desc>
105	<concept_significance>500</concept_significance>
106	</concept>
107	<concept>
108	<concept_id>10011007.10011006.10011008.10011024.10011028</concept_id>
109	<concept_desc>Software and its engineering~Data types and structures</concept_desc>
110	<concept_significance>500</concept_significance>
111	</concept>
112	<concept>
113	<concept_id>10011007.10011006.10011041.10011047</concept_id>
114	<concept_desc>Software and its engineering~Source code generation</concept_desc>
115	<concept_significance>300</concept_significance>
116	</concept>
117	</ccs2012>
118	\end{CCSXML}
119
120	\ccsdesc[500]{Software and its engineering~Polymorphism}
121	\ccsdesc[500]{Software and its engineering~Data types and structures}
122	\ccsdesc[300]{Software and its engineering~Source code generation}
123
124	\begin{abstract}
125	\TODO{} Write abstract.
126	\end{abstract}
127
128	\begin{document}
129
130	\maketitle
131
132	\section{Introduction \& Background}
133
134	\CFA{}\footnote{Pronounced ``C-for-all'', and written \CFA{} or Cforall.} is an evolutionary extension of the C programming language which aims to add modern language features to C while maintaining both source compatibility with C and a familiar mental model for programmers. This paper describes how generic types are designed and implemented in \CFA{}, and how they interact with \CFA{}'s polymorphic functions.
135
136	\subsection{Polymorphic Functions}
137
138	\CFA{}'s polymorphism was originally formalized by \citet{Ditchfield92}, and first implemented by \citet{Bilson03}. The signature feature of \CFA{} is parametric-polymorphic functions; such functions are written using a @forall@ clause (which gives the language its name):
139	\begin{lstlisting}
140	forall(otype T)
141	T identity(T x) {
142	return x;
143	}
144
145	int forty_two = identity(42); // T is bound to int, forty_two == 42
146	\end{lstlisting}
147	The @identity@ function above can be applied to any complete object type (or ``@otype@''). The type variable @T@ is transformed into a set of additional implicit parameters to @identity@, which encode sufficient information about @T@ to create and return a variable of that type. The \CFA{} implementation passes the size and alignment of the type represented by an @otype@ parameter, as well as an assignment operator, constructor, copy constructor and destructor. If this extra information is not needed, the type parameter can be declared as @dtype T@, where @dtype@ is short for ``data type''.
148
149	Here, the runtime cost of polymorphism is spread over each polymorphic call, due to passing more arguments to polymorphic functions; preliminary experiments have shown this overhead to be similar to \CC{} virtual function calls. An advantage of this design is that, unlike \CC{} template functions, \CFA{} @forall@ functions are compatible with separate compilation.
150
151	Since bare polymorphic types do not provide a great range of available operations, \CFA{} provides a \emph{type assertion} mechanism to provide further information about a type:
152	\begin{lstlisting}
153	forall(otype T \| { T twice(T); })
154	T four_times(T x) {
155	return twice( twice(x) );
156	}
157
158	double twice(double d) { return d * 2.0; } // (1)
159
160	double magic = four_times(10.5); // T is bound to double, uses (1) to satisfy type assertion
161	\end{lstlisting}
162	These type assertions may be either variable or function declarations that depend on a polymorphic type variable. @four_times@ can only be called with an argument for which there exists a function named @twice@ that can take that argument and return another value of the same type; a pointer to the appropriate @twice@ function is passed as an additional implicit parameter to the call of @four_times@.
163
164	Monomorphic specializations of polymorphic functions can themselves be used to satisfy type assertions. For instance, @twice@ could have been defined using the \CFA{} syntax for operator overloading as:
165	\begin{lstlisting}
166	forall(otype S \| { S ?+?(S, S); })
167	S twice(S x) { return x + x; } // (2)
168	\end{lstlisting}
169	This version of @twice@ works for any type @S@ that has an addition operator defined for it, and it could have been used to satisfy the type assertion on @four_times@.
170	The compiler accomplishes this by creating a wrapper function calling @twice // (2)@ with @S@ bound to @double@, then providing this wrapper function to @four_times@\footnote{\lstinline@twice // (2)@ could also have had a type parameter named \lstinline@T@; \CFA{} specifies renaming of the type parameters, which would avoid the name conflict with the type variable \lstinline@T@ of \lstinline@four_times@.}.
171
172	\subsection{Traits}
173
174	\CFA{} provides \emph{traits} as a means to name a group of type assertions, as in the example below:
175	\begin{lstlisting}
176	trait has_magnitude(otype T) {
177	bool ?<?(T, T); // comparison operator for T
178	T -?(T); // negation operator for T
179	void ?{}(T*, zero_t); // constructor from 0 literal
180	};
181
182	forall(otype M \| has_magnitude(M))
183	M abs( M m ) {
184	M zero = { 0 }; // uses zero_t constructor from trait
185	return m < zero ? -m : m;
186	}
187
188	forall(otype M \| has_magnitude(M))
189	M max_magnitude( M a, M b ) {
190	return abs(a) < abs(b) ? b : a;
191	}
192	\end{lstlisting}
193
194	@otype@ is essentially syntactic sugar for the following trait:
195	\begin{lstlisting}
196	trait otype(dtype T \| sized(T)) {
197	// sized is a compiler-provided pseudo-trait for types with known size & alignment
198	void ?{}(T*); // default constructor
199	void ?{}(T*, T); // copy constructor
200	T ?=?(T*, T); // assignment operator
201	void ^?{}(T*); // destructor
202	};
203	\end{lstlisting}
204
205	Semantically, traits are simply a named lists of type assertions, but they may be used for many of the same purposes that interfaces in Java or abstract base classes in \CC{} are used for. Unlike Java interfaces or \CC{} base classes, \CFA{} types do not explicitly state any inheritance relationship to traits they satisfy; this can be considered a form of structural inheritance, similar to implementation of an interface in Go, as opposed to the nominal inheritance model of Java and \CC{}. Nominal inheritance can be simulated with traits using marker variables or functions:
206	\begin{lstlisting}
207	trait nominal(otype T) {
208	T is_nominal;
209	};
210
211	int is_nominal; // int now satisfies the nominal trait
212	{
213	char is_nominal; // char satisfies the nominal trait
214	}
215	// char no longer satisfies the nominal trait here
216	\end{lstlisting}
217
218	Traits, however, are significantly more powerful than nominal-inheritance interfaces; firstly, due to the scoping rules of the declarations that satisfy a trait's type assertions, a type may not satisfy a trait everywhere that the type is declared, as with @char@ and the @nominal@ trait above. Secondly, traits may be used to declare a relationship among multiple types, a property that may be difficult or impossible to represent in nominal-inheritance type systems:
219	\begin{lstlisting}
220	trait pointer_like(otype Ptr, otype El) {
221	lvalue El *?(Ptr); // Ptr can be dereferenced into a modifiable value of type El
222	}
223
224	struct list {
225	int value;
226	list *next; // may omit "struct" on type names
227	};
228
229	typedef list *list_iterator;
230
231	lvalue int *?( list_iterator it ) {
232	return it->value;
233	}
234	\end{lstlisting}
235
236	In the example above, @(list_iterator, int)@ satisfies @pointer_like@ by the user-defined dereference function, and @(list_iterator, list)@ also satisfies @pointer_like@ by the built-in dereference operator for pointers. Given a declaration @list_iterator it@, @it@ can be either an @int@ or a @list@, with the meaning disambiguated by context (\eg, @int x = it;@ interprets @it@ as an @int@, while @(it).value = 42;@ interprets @*it@ as a @list@).
237	While a nominal-inheritance system with associated types could model one of those two relationships by making @El@ an associated type of @Ptr@ in the @pointer_like@ implementation, few such systems could model both relationships simultaneously.
238
239	\section{Generic Types}
240
241	The generic types design for \CFA{} must integrate efficiently and naturally with the existing polymorphic functions in \CFA{}, while retaining backwards compatibility with C; maintaining separate compilation is a particularly important constraint on the design. However, where the concrete parameters of the generic type are known, there should not be extra overhead for the use of a generic type.
242
243	A generic type can be declared by placing a @forall@ specifier on a @struct@ or @union@ declaration, and instantiated using a parenthesized list of types after the type name:
244	\begin{lstlisting}
245	forall(otype R, otype S) struct pair {
246	R first;
247	S second;
248	};
249
250	forall(otype T)
251	T value( pair(const char, T) p ) { return p->second; }
252
253	forall(dtype F, otype T)
254	T value_p( pair(F, T) p ) { return *p.second; }
255
256	pair(const char*, int) p = { "magic", 42 };
257	int magic = value( &p );
258
259	pair(void, int) q = { 0, &p.second };
260	magic = value_p( q );
261	double d = 1.0;
262	pair(double, double) r = { &d, &d };
263	d = value_p( r );
264	\end{lstlisting}
265
266	\CFA{} classifies generic types as either \emph{concrete} or \emph{dynamic}. Dynamic generic types vary in their in-memory layout depending on their type parameters, while concrete generic types have a fixed memory layout regardless of type parameters. A type may have polymorphic parameters but still be concrete; \CFA{} refers to such types as \emph{dtype-static}. Polymorphic pointers are an example of dtype-static types -- @forall(dtype T) T@ is a polymorphic type, but for any @T@ chosen, @T@ will have exactly the same in-memory representation as a @void@, and can therefore be represented by a @void@ in code generation.
267
268	The \CFA{} compiler instantiates concrete generic types by template-expanding them to fresh struct types; concrete generic types can therefore be used with zero runtime overhead. To enable interoperation between equivalent instantiations of a generic type, the compiler saves the set of instantiations currently in scope and re-uses the generated struct declarations where appropriate. As an example, the concrete instantiation for @pair(const char*, int)@ would look something like this:
269	\begin{lstlisting}
270	struct _pair_conc1 {
271	const char* first;
272	int second;
273	};
274	\end{lstlisting}
275
276	A concrete generic type with dtype-static parameters is also expanded to a struct type, but this struct type is used for all matching instantiations. In the example above, the @pair(F, T)@ parameter to @value_p@ is such a type; its expansion would look something like this, and be used as the type of the variables @q@ and @r@ as well, with casts for member access where appropriate:
277	\begin{lstlisting}
278	struct _pair_conc0 {
279	void* first;
280	void* second;
281	};
282	\end{lstlisting}
283
284	\TODO{} Maybe move this after the rest of the discussion.
285	This re-use of dtype-static struct instantiations enables some useful programming patterns at zero runtime cost. The most important such pattern is using @forall(dtype T) T@ as a type-checked replacement for @void@, as in this example, which takes a @qsort@ or @bsearch@-compatible comparison routine and creates a similar lexicographic comparison for pairs of pointers:
286	\begin{lstlisting}
287	forall(dtype T)
288	int lexcmp( pair(T, T)* a, pair(T, T)* b, int (cmp)(T, T*) ) {
289	int c = cmp(a->first, b->first);
290	if ( c == 0 ) c = cmp(a->second, b->second);
291	return c;
292	}
293	\end{lstlisting}
294	Since @pair(T, T)@ is a concrete type, there are no added implicit parameters to @lexcmp@, so the code generated by \CFA{} will be effectively identical to a version of this written in standard C using @void*@, yet the \CFA{} version will be type-checked to ensure that the fields of both pairs and the arguments to the comparison function match in type.
295
296	Another useful pattern enabled by re-used dtype-static type instantiations is zero-cost ``tag'' structs. Sometimes a particular bit of information is only useful for type-checking, and can be omitted at runtime. Tag structs can be used to provide this information to the compiler without further runtime overhead, as in the following example:
297	\begin{lstlisting}
298	forall(dtype Unit) struct scalar { unsigned long value; };
299
300	struct metres {};
301	struct litres {};
302
303	forall(dtype U)
304	scalar(U) ?+?(scalar(U) a, scalar(U) b) {
305	return (scalar(U)){ a.value + b.value };
306	}
307
308	scalar(metres) half_marathon = { 21093 };
309	scalar(litres) swimming_pool = { 2500000 };
310
311	scalar(metres) marathon = half_marathon + half_marathon;
312	scalar(litres) two_pools = swimming_pool + swimming_pool;
313	marathon + swimming_pool; // ERRORv -- caught by compiler
314	\end{lstlisting}
315	@scalar@ is a dtype-static type, so all uses of it will use a single struct definition, containing only a single @unsigned long@, and can share the same implementations of common routines like @?+?@ -- these implementations may even be separately compiled, unlike \CC{} template functions. However, the \CFA{} type-checker will ensure that matching types are used by all calls to @?+?@, preventing nonsensical computations like adding the length of a marathon to the volume of an olympic pool.
316
317	\section{Tuples}
318
319	\TODO{} Integrate Rob's work
320
321	\TODO{} Check if we actually can use ttype parameters on generic types (if they set the complete flag, it should work, or nearly so).
322
323	\section{Related Work}
324
325	\TODO{} Talk about \CC{}, Cyclone, \etc{}
326
327	\section{Conclusion}
328
329	\TODO{}
330
331	\bibliographystyle{ACM-Reference-Format}
332	\bibliography{generic_types}
333
334	\end{document}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: