source: doc/generic_types/generic_types.tex @ fe1b6a4

ADTaaron-thesisarm-ehast-experimentalcleanup-dtorsdeferred_resndemanglerenumforall-pointer-decayjacob/cs343-translationjenkins-sandboxnew-astnew-ast-unique-exprnew-envno_listpersistent-indexerpthread-emulationqualifiedEnumresolv-newwith_gc
Last change on this file since fe1b6a4 was 1ca52db, checked in by Aaron Moss <a3moss@…>, 8 years ago

Add more background and generics material to paper

  • Property mode set to 100644
File size: 21.6 KB
Line 
1% take of review (for line numbers) and anonymous (for anonymization) on submission
2\documentclass[format=acmlarge, anonymous, review]{acmart}
3
4\usepackage{listings}   % For code listings
5
6% Useful macros
7\newcommand{\CFA}{C$\mathbf\forall$} % Cforall symbolic name
8\newcommand{\CC}{\rm C\kern-.1em\hbox{+\kern-.25em+}} % C++ symbolic name
9\newcommand{\CCeleven}{\rm C\kern-.1em\hbox{+\kern-.25em+}11} % C++11 symbolic name
10\newcommand{\CCfourteen}{\rm C\kern-.1em\hbox{+\kern-.25em+}14} % C++14 symbolic name
11\newcommand{\CCseventeen}{\rm C\kern-.1em\hbox{+\kern-.25em+}17} % C++17 symbolic name
12
13\newcommand{\TODO}{\textbf{TODO}}
14\newcommand{\eg}{\textit{e}.\textit{g}.}
15\newcommand{\ie}{\textit{i}.\textit{e}.}
16\newcommand{\etc}{\textit{etc}.}
17
18% CFA programming language, based on ANSI C (with some gcc additions)
19\lstdefinelanguage{CFA}[ANSI]{C}{
20        morekeywords={_Alignas,_Alignof,__alignof,__alignof__,asm,__asm,__asm__,_At,_Atomic,__attribute,__attribute__,auto,
21                _Bool,catch,catchResume,choose,_Complex,__complex,__complex__,__const,__const__,disable,dtype,enable,__extension__,
22                fallthrough,fallthru,finally,forall,ftype,_Generic,_Imaginary,inline,__label__,lvalue,_Noreturn,one_t,otype,restrict,sized,_Static_assert,
23                _Thread_local,throw,throwResume,trait,try,typeof,__typeof,__typeof__,zero_t},
24}%
25
26\lstset{
27language=CFA,
28columns=fullflexible,
29basicstyle=\linespread{0.9}\sf,                                                 % reduce line spacing and use sanserif font
30stringstyle=\tt,                                                                                % use typewriter font
31tabsize=4,                                                                                              % 4 space tabbing
32xleftmargin=\parindent,                                                                 % indent code to paragraph indentation
33% extendedchars=true,                                                                   % allow ASCII characters in the range 128-255
34% escapechar=§,                                                                                 % LaTeX escape in CFA code §...§ (section symbol), emacs: C-q M-'
35mathescape=true,                                                                                % LaTeX math escape in CFA code $...$
36keepspaces=true,                                                                                %
37showstringspaces=false,                                                                 % do not show spaces with cup
38showlines=true,                                                                                 % show blank lines at end of code
39aboveskip=4pt,                                                                                  % spacing above/below code block
40belowskip=3pt,
41% replace/adjust listing characters that look bad in sanserif
42literate={-}{\raisebox{-0.15ex}{\texttt{-}}}1 {^}{\raisebox{0.6ex}{$\scriptscriptstyle\land\,$}}1
43        {~}{\raisebox{0.3ex}{$\scriptstyle\sim\,$}}1 {_}{\makebox[1.2ex][c]{\rule{1ex}{0.1ex}}}1 {`}{\ttfamily\upshape\hspace*{-0.1ex}`}1
44        {<-}{$\leftarrow$}2 {=>}{$\Rightarrow$}2 {->}{$\rightarrow$}2,
45% moredelim=**[is][\color{red}]{®}{®},                                  % red highlighting ®...® (registered trademark symbol) emacs: C-q M-.
46% moredelim=**[is][\color{blue}]{ß}{ß},                                 % blue highlighting ß...ß (sharp s symbol) emacs: C-q M-_
47% moredelim=**[is][\color{OliveGreen}]{¢}{¢},                   % green highlighting ¢...¢ (cent symbol) emacs: C-q M-"
48% moredelim=[is][\lstset{keywords={}}]{¶}{¶},                   % keyword escape ¶...¶ (pilcrow symbol) emacs: C-q M-^
49}% lstset
50
51% inline code @...@
52\lstMakeShortInline@
53
54% ACM Information
55\citestyle{acmauthoryear}
56
57\acmJournal{PACMPL}
58
59\title{Generic and Tuple Types with Efficient Dynamic Layout in \CFA{}}
60
61\author{Aaron Moss}
62\affiliation{%
63        \institution{University of Waterloo}
64        \department{David R. Cheriton School of Computer Science}
65        \streetaddress{Davis Centre, University of Waterloo}
66        \city{Waterloo}
67        \state{ON}
68        \postcode{N2L 3G1}
69        \country{Canada}
70}
71\email{a3moss@uwaterloo.ca}
72
73\author{Robert Schluntz}
74\affiliation{%
75        \institution{University of Waterloo}
76        \department{David R. Cheriton School of Computer Science}
77        \streetaddress{Davis Centre, University of Waterloo}
78        \city{Waterloo}
79        \state{ON}
80        \postcode{N2L 3G1}
81        \country{Canada}
82}
83\email{rschlunt@uwaterloo.ca}
84
85\author{Peter Buhr}
86\affiliation{%
87        \institution{University of Waterloo}
88        \department{David R. Cheriton School of Computer Science}
89        \streetaddress{Davis Centre, University of Waterloo}
90        \city{Waterloo}
91        \state{ON}
92        \postcode{N2L 3G1}
93        \country{Canada}
94}
95\email{pabuhr@uwaterloo.ca}
96
97\terms{generic, tuple, types}
98\keywords{generic types, tuple types, polymorphic functions, C, Cforall}
99
100\begin{CCSXML}
101<ccs2012>
102<concept>
103<concept_id>10011007.10011006.10011008.10011024.10011025</concept_id>
104<concept_desc>Software and its engineering~Polymorphism</concept_desc>
105<concept_significance>500</concept_significance>
106</concept>
107<concept>
108<concept_id>10011007.10011006.10011008.10011024.10011028</concept_id>
109<concept_desc>Software and its engineering~Data types and structures</concept_desc>
110<concept_significance>500</concept_significance>
111</concept>
112<concept>
113<concept_id>10011007.10011006.10011041.10011047</concept_id>
114<concept_desc>Software and its engineering~Source code generation</concept_desc>
115<concept_significance>300</concept_significance>
116</concept>
117</ccs2012>
118\end{CCSXML}
119
120\ccsdesc[500]{Software and its engineering~Polymorphism}
121\ccsdesc[500]{Software and its engineering~Data types and structures}
122\ccsdesc[300]{Software and its engineering~Source code generation}
123
124\begin{abstract}
125\TODO{} Write abstract.
126\end{abstract}
127
128\begin{document}
129
130\maketitle
131
132\section{Introduction \& Background}
133
134\CFA{}\footnote{Pronounced ``C-for-all'', and written \CFA{} or Cforall.} is an evolutionary extension of the C programming language which aims to add modern language features to C while maintaining both source compatibility with C and a familiar mental model for programmers. Four key design goals were set out in the original design of \CFA{} \citep{Bilson03}:
135\begin{enumerate}
136\item The behaviour of standard C code must remain the same when translated by a \CFA{} compiler as when translated by a C compiler.
137\item Standard C code must be as fast and as small when translated by a \CFA{} compiler as when translated by a C compiler.
138\item \CFA{} code must be at least as portable as standard C code.
139\item Extensions introduced by \CFA{} must be translated in the most efficient way possible.
140\end{enumerate}
141The purpose of these goals is to ensure that existing C codebases can be converted to \CFA{} incrementally and with minimal effort, and that programmers who already know C can productively produce \CFA{} code without extensive training beyond the extension features they wish to employ.
142
143\CFA{} has been previously extended with polymorphic functions and name overloading (including operator overloading) \citep{Bilson03}, and deterministically-executed constructors and destructors \citep{Schluntz17}. This paper describes how generic and tuple types are designed and implemented in \CFA{} in accordance with both the backward compatibility goals and existing features described above.
144
145\subsection{Polymorphic Functions}
146\label{sec:poly-fns}
147
148\CFA{}'s polymorphism was originally formalized by \citet{Ditchfield92}, and first implemented by \citet{Bilson03}. The signature feature of \CFA{} is parametric-polymorphic functions; such functions are written using a @forall@ clause (which gives the language its name):
149\begin{lstlisting}
150forall(otype T)
151T identity(T x) {
152    return x;
153}
154
155int forty_two = identity(42); // T is bound to int, forty_two == 42
156\end{lstlisting}
157The @identity@ function above can be applied to any complete object type (or ``@otype@''). The type variable @T@ is transformed into a set of additional implicit parameters to @identity@, which encode sufficient information about @T@ to create and return a variable of that type. The \CFA{} implementation passes the size and alignment of the type represented by an @otype@ parameter, as well as an assignment operator, constructor, copy constructor and destructor. If this extra information is not needed, the type parameter can be declared as @dtype T@, where @dtype@ is short for ``data type''.
158
159Here, the runtime cost of polymorphism is spread over each polymorphic call, due to passing more arguments to polymorphic functions; preliminary experiments have shown this overhead to be similar to \CC{} virtual function calls. An advantage of this design is that, unlike \CC{} template functions, \CFA{} @forall@ functions are compatible with separate compilation.
160
161Since bare polymorphic types do not provide a great range of available operations, \CFA{} provides a \emph{type assertion} mechanism to provide further information about a type:
162\begin{lstlisting}
163forall(otype T | { T twice(T); })
164T four_times(T x) {
165    return twice( twice(x) );
166}
167
168double twice(double d) { return d * 2.0; } // (1)
169
170double magic = four_times(10.5); // T is bound to double, uses (1) to satisfy type assertion
171\end{lstlisting}
172These type assertions may be either variable or function declarations that depend on a polymorphic type variable. @four_times@ can only be called with an argument for which there exists a function named @twice@ that can take that argument and return another value of the same type; a pointer to the appropriate @twice@ function is passed as an additional implicit parameter to the call of @four_times@.
173
174Monomorphic specializations of polymorphic functions can themselves be used to satisfy type assertions. For instance, @twice@ could have been defined using the \CFA{} syntax for operator overloading as:
175\begin{lstlisting}
176forall(otype S | { S ?+?(S, S); })
177S twice(S x) { return x + x; }  // (2)
178\end{lstlisting} 
179This version of @twice@ works for any type @S@ that has an addition operator defined for it, and it could have been used to satisfy the type assertion on @four_times@.
180The compiler accomplishes this by creating a wrapper function calling @twice // (2)@ with @S@ bound to @double@, then providing this wrapper function to @four_times@\footnote{\lstinline@twice // (2)@ could also have had a type parameter named \lstinline@T@; \CFA{} specifies renaming of the type parameters, which would avoid the name conflict with the type variable \lstinline@T@ of \lstinline@four_times@.}.
181
182\subsection{Traits}
183
184\CFA{} provides \emph{traits} as a means to name a group of type assertions, as in the example below:
185\begin{lstlisting}
186trait has_magnitude(otype T) {
187    bool ?<?(T, T);  // comparison operator for T
188    T -?(T);  // negation operator for T
189    void ?{}(T*, zero_t);  // constructor from 0 literal
190};
191
192forall(otype M | has_magnitude(M))
193M abs( M m ) {
194    M zero = { 0 };  // uses zero_t constructor from trait
195    return m < zero ? -m : m;
196}
197
198forall(otype M | has_magnitude(M))
199M max_magnitude( M a, M b ) {
200    return abs(a) < abs(b) ? b : a;
201}
202\end{lstlisting}
203This capability allows specifying the same set of assertions in multiple locations, without the repetition and likelihood of mistakes that come with manually writing them out for each function declaration.
204
205@otype@ is essentially syntactic sugar for the following trait:
206\begin{lstlisting}
207trait otype(dtype T | sized(T)) {
208        // sized is a compiler-provided pseudo-trait for types with known size & alignment
209        void ?{}(T*);  // default constructor
210        void ?{}(T*, T);  // copy constructor
211        void ?=?(T*, T);  // assignment operator
212        void ^?{}(T*);  // destructor
213};
214\end{lstlisting}
215Given this information, variables of polymorphic type can be treated as if they were a complete struct type -- they can be stack-allocated using the @alloca@ compiler builtin, default or copy-initialized, assigned, and deleted. As an example, the @abs@ function above would produce generated code something like the following (simplified for clarity and brevity):
216\begin{lstlisting}
217void abs( size_t _sizeof_M, size_t _alignof_M,
218                void (*_ctor_M)(void*), void (*_copy_M)(void*, void*),
219                void (*_assign_M)(void*, void*), void (*_dtor_M)(void*),
220                bool (*_lt_M)(void*, void*), void (*_neg_M)(void*, void*),
221        void (*_ctor_M_zero)(void*, int),
222                void* m, void* _rtn ) {  // polymorphic parameter and return passed as void*
223        // M zero = { 0 };
224        void* zero = alloca(_sizeof_M);  // stack allocate 0 temporary
225        _ctor_M_zero(zero, 0);  // initialize using zero_t constructor
226        // return m < zero ? -m : m;
227        void *_tmp = alloca(_sizeof_M)
228        _copy_M( _rtn,  // copy-initialize return value
229                _lt_M( m, zero ) ?  // check condition
230                 (_neg_M(m, _tmp), _tmp) :  // negate m
231                 m);
232        _dtor_M(_tmp); _dtor_M(zero);  // destroy temporaries
233}
234\end{lstlisting}
235
236Semantically, traits are simply a named lists of type assertions, but they may be used for many of the same purposes that interfaces in Java or abstract base classes in \CC{} are used for. Unlike Java interfaces or \CC{} base classes, \CFA{} types do not explicitly state any inheritance relationship to traits they satisfy; this can be considered a form of structural inheritance, similar to implementation of an interface in Go, as opposed to the nominal inheritance model of Java and \CC{}. Nominal inheritance can be simulated with traits using marker variables or functions:
237\begin{lstlisting}
238trait nominal(otype T) {
239    T is_nominal;
240};
241
242int is_nominal;  // int now satisfies the nominal trait
243{
244    char is_nominal; // char satisfies the nominal trait
245}
246// char no longer satisfies the nominal trait here 
247\end{lstlisting}
248
249Traits, however, are significantly more powerful than nominal-inheritance interfaces; firstly, due to the scoping rules of the declarations that satisfy a trait's type assertions, a type may not satisfy a trait everywhere that the type is declared, as with @char@ and the @nominal@ trait above. Secondly, traits may be used to declare a relationship among multiple types, a property that may be difficult or impossible to represent in nominal-inheritance type systems:
250\begin{lstlisting}
251trait pointer_like(otype Ptr, otype El) {
252    lvalue El *?(Ptr); // Ptr can be dereferenced into a modifiable value of type El
253}
254
255struct list {
256    int value;
257    list *next;  // may omit "struct" on type names
258};
259
260typedef list *list_iterator;
261
262lvalue int *?( list_iterator it ) {
263    return it->value;
264}
265\end{lstlisting}
266
267In the example above, @(list_iterator, int)@ satisfies @pointer_like@ by the user-defined dereference function, and @(list_iterator, list)@ also satisfies @pointer_like@ by the built-in dereference operator for pointers. Given a declaration @list_iterator it@, @*it@ can be either an @int@ or a @list@, with the meaning disambiguated by context (\eg, @int x = *it;@ interprets @*it@ as an @int@, while @(*it).value = 42;@ interprets @*it@ as a @list@).
268While a nominal-inheritance system with associated types could model one of those two relationships by making @El@ an associated type of @Ptr@ in the @pointer_like@ implementation, few such systems could model both relationships simultaneously.
269
270\section{Generic Types}
271
272The generic types design for \CFA{} must integrate efficiently and naturally with the existing polymorphic functions in \CFA{}, while retaining backwards compatibility with C; maintaining separate compilation is a particularly important constraint on the design. However, where the concrete parameters of the generic type are known, there should not be extra overhead for the use of a generic type.
273
274A generic type can be declared by placing a @forall@ specifier on a @struct@ or @union@ declaration, and instantiated using a parenthesized list of types after the type name:
275\begin{lstlisting}
276forall(otype R, otype S) struct pair {
277    R first;
278    S second;
279};
280
281forall(otype T)
282T value( pair(const char*, T) *p ) { return p->second; }
283
284forall(dtype F, otype T)
285T value_p( pair(F*, T*) p ) { return *p.second; }
286
287pair(const char*, int) p = { "magic", 42 };
288int magic = value( &p );
289
290pair(void*, int*) q = { 0, &p.second };
291magic = value_p( q );
292double d = 1.0;
293pair(double*, double*) r = { &d, &d };
294d = value_p( r );
295\end{lstlisting}
296
297\CFA{} classifies generic types as either \emph{concrete} or \emph{dynamic}. Dynamic generic types vary in their in-memory layout depending on their type parameters, while concrete generic types have a fixed memory layout regardless of type parameters. A type may have polymorphic parameters but still be concrete; \CFA{} refers to such types as \emph{dtype-static}. Polymorphic pointers are an example of dtype-static types -- @forall(dtype T) T*@ is a polymorphic type, but for any @T@ chosen, @T*@ will have exactly the same in-memory representation as a @void*@, and can therefore be represented by a @void*@ in code generation.
298
299The \CFA{} compiler instantiates concrete generic types by template-expanding them to fresh struct types; concrete generic types can therefore be used with zero runtime overhead. To enable interoperation between equivalent instantiations of a generic type, the compiler saves the set of instantiations currently in scope and re-uses the generated struct declarations where appropriate. As an example, the concrete instantiation for @pair(const char*, int)@ would look something like this:
300\begin{lstlisting}
301struct _pair_conc1 {
302        const char* first;
303        int second;
304};
305\end{lstlisting}
306
307A concrete generic type with dtype-static parameters is also expanded to a struct type, but this struct type is used for all matching instantiations. In the example above, the @pair(F*, T*)@ parameter to @value_p@ is such a type; its expansion would look something like this, and be used as the type of the variables @q@ and @r@ as well, with casts for member access where appropriate:
308\begin{lstlisting}
309struct _pair_conc0 {
310        void* first;
311        void* second;
312};
313\end{lstlisting}
314
315Though \CFA{} implements concrete generic types efficiently, it also has a fully-general system for computing with dynamic generic types. As mentioned in Section~\ref{sec:poly-fns}, @otype@ function parameters (in fact all @sized@ polymorphic parameters) come with implicit size and alignment parameters provided by the caller. Dynamic generic structs also come with an \emph{offset array} which contains the offsets of each member of the struct\footnote{Dynamic generic unions need no such offset array, as all members are at offset 0.}. Access to members\footnote{The \lstinline@offsetof@ macro is implmented similarly.} of a dynamic generic struct is provided by adding the corresponding member of the offset array to the struct pointer at runtime, essentially moving a compile-time offset calculation to runtime where neccessary.
316
317\TODO{} Discuss caller-provided versus callee-provided offset arrays, layout functions.
318
319Whether a type is concrete, dtype-static, or dynamic is decided based solely on the type parameters and @forall@ clause on the struct declaration. This allows opaque forward declarations of generic types like @forall(otype T) struct Box;@ -- like in C, all uses of @Box(T)@ can be in a separately compiled translation unit, and callers from other translation units will know the proper calling conventions to use. If the definition of a struct type was included in the determination of dynamic or concrete, some further types may be recognized as dtype-static (\eg, @forall(otype T) struct unique_ptr { T* p };@ does not depend on @T@ for its layout, but the existence of an @otype@ parameter means that it \emph{could}.), but preserving separate compilation (and the associated C compatibility) in this way is judged to be an appropriate trade-off.
320
321The re-use of dtype-static struct instantiations enables some useful programming patterns at zero runtime cost. The most important such pattern is using @forall(dtype T) T*@ as a type-checked replacement for @void*@, as in this example, which takes a @qsort@ or @bsearch@-compatible comparison routine and creates a similar lexicographic comparison for pairs of pointers:
322\begin{lstlisting}
323forall(dtype T)
324int lexcmp( pair(T*, T*)* a, pair(T*, T*)* b, int (*cmp)(T*, T*) ) {
325        int c = cmp(a->first, b->first);
326        if ( c == 0 ) c = cmp(a->second, b->second);
327        return c;
328}
329\end{lstlisting}
330Since @pair(T*, T*)@ is a concrete type, there are no added implicit parameters to @lexcmp@, so the code generated by \CFA{} will be effectively identical to a version of this written in standard C using @void*@, yet the \CFA{} version will be type-checked to ensure that the fields of both pairs and the arguments to the comparison function match in type.
331
332Another useful pattern enabled by re-used dtype-static type instantiations is zero-cost ``tag'' structs. Sometimes a particular bit of information is only useful for type-checking, and can be omitted at runtime. Tag structs can be used to provide this information to the compiler without further runtime overhead, as in the following example:
333\begin{lstlisting}
334forall(dtype Unit) struct scalar { unsigned long value; };
335
336struct metres {};
337struct litres {};
338
339forall(dtype U)
340scalar(U) ?+?(scalar(U) a, scalar(U) b) {
341        return (scalar(U)){ a.value + b.value };
342}
343
344scalar(metres) half_marathon = { 21093 };
345scalar(litres) swimming_pool = { 2500000 };
346
347scalar(metres) marathon = half_marathon + half_marathon;
348scalar(litres) two_pools = swimming_pool + swimming_pool;
349marathon + swimming_pool; // ERROR -- caught by compiler
350\end{lstlisting}
351@scalar@ is a dtype-static type, so all uses of it will use a single struct definition, containing only a single @unsigned long@, and can share the same implementations of common routines like @?+?@ -- these implementations may even be separately compiled, unlike \CC{} template functions. However, the \CFA{} type-checker will ensure that matching types are used by all calls to @?+?@, preventing nonsensical computations like adding the length of a marathon to the volume of an olympic pool.
352
353\section{Tuples}
354
355\TODO{} Integrate Rob's work
356
357\TODO{} Check if we actually can use ttype parameters on generic types (if they set the complete flag, it should work, or nearly so).
358
359\section{Related Work}
360
361\TODO{} Talk about \CC{}, Cyclone, maybe D, Rust, \etc{}
362
363A major difference between the approaches of \CC{} and \CFA{} to polymorphism is that the set of assumed properties for a type is \emph{explicit} in \CFA{}. One of the major limiting factors of \CC{}'s approach is that templates cannot be separately compiled. In contrast, the explicit nature of assertions allows \CFA{}'s polymorphic functions to be separately compiled.
364
365\section{Conclusion \& Future Work}
366
367\TODO{} Among future work, talk about ideas for selectively template-expanding code (on decls for specific types, or on calls for accessible decls).
368
369\bibliographystyle{ACM-Reference-Format}
370\bibliography{generic_types}
371
372\end{document}
Note: See TracBrowser for help on using the repository browser.