1 | % take of review (for line numbers) and anonymous (for anonymization) on submission |
---|
2 | \documentclass[format=acmlarge, anonymous, review]{acmart} |
---|
3 | |
---|
4 | \usepackage{listings} % For code listings |
---|
5 | |
---|
6 | % Useful macros |
---|
7 | \newcommand{\CFA}{C$\mathbf\forall$} % Cforall symbolic name |
---|
8 | \newcommand{\CC}{\rm C\kern-.1em\hbox{+\kern-.25em+}} % C++ symbolic name |
---|
9 | \newcommand{\CCeleven}{\rm C\kern-.1em\hbox{+\kern-.25em+}11} % C++11 symbolic name |
---|
10 | \newcommand{\CCfourteen}{\rm C\kern-.1em\hbox{+\kern-.25em+}14} % C++14 symbolic name |
---|
11 | \newcommand{\CCseventeen}{\rm C\kern-.1em\hbox{+\kern-.25em+}17} % C++17 symbolic name |
---|
12 | |
---|
13 | \newcommand{\TODO}{\textbf{TODO}} |
---|
14 | \newcommand{\eg}{\textit{e}.\textit{g}.} |
---|
15 | \newcommand{\ie}{\textit{i}.\textit{e}.} |
---|
16 | \newcommand{\etc}{\textit{etc}.} |
---|
17 | |
---|
18 | % CFA programming language, based on ANSI C (with some gcc additions) |
---|
19 | \lstdefinelanguage{CFA}[ANSI]{C}{ |
---|
20 | morekeywords={_Alignas,_Alignof,__alignof,__alignof__,asm,__asm,__asm__,_At,_Atomic,__attribute,__attribute__,auto, |
---|
21 | _Bool,catch,catchResume,choose,_Complex,__complex,__complex__,__const,__const__,disable,dtype,enable,__extension__, |
---|
22 | fallthrough,fallthru,finally,forall,ftype,_Generic,_Imaginary,inline,__label__,lvalue,_Noreturn,one_t,otype,restrict,_Static_assert, |
---|
23 | _Thread_local,throw,throwResume,trait,try,typeof,__typeof,__typeof__,zero_t}, |
---|
24 | }% |
---|
25 | |
---|
26 | \lstset{ |
---|
27 | language=CFA, |
---|
28 | columns=fullflexible, |
---|
29 | basicstyle=\linespread{0.9}\sf, % reduce line spacing and use sanserif font |
---|
30 | stringstyle=\tt, % use typewriter font |
---|
31 | tabsize=4, % 4 space tabbing |
---|
32 | xleftmargin=\parindent, % indent code to paragraph indentation |
---|
33 | % extendedchars=true, % allow ASCII characters in the range 128-255 |
---|
34 | % escapechar=§, % LaTeX escape in CFA code §...§ (section symbol), emacs: C-q M-' |
---|
35 | mathescape=true, % LaTeX math escape in CFA code $...$ |
---|
36 | keepspaces=true, % |
---|
37 | showstringspaces=false, % do not show spaces with cup |
---|
38 | showlines=true, % show blank lines at end of code |
---|
39 | aboveskip=4pt, % spacing above/below code block |
---|
40 | belowskip=3pt, |
---|
41 | % replace/adjust listing characters that look bad in sanserif |
---|
42 | literate={-}{\raisebox{-0.15ex}{\texttt{-}}}1 {^}{\raisebox{0.6ex}{$\scriptscriptstyle\land\,$}}1 |
---|
43 | {~}{\raisebox{0.3ex}{$\scriptstyle\sim\,$}}1 {_}{\makebox[1.2ex][c]{\rule{1ex}{0.1ex}}}1 {`}{\ttfamily\upshape\hspace*{-0.1ex}`}1 |
---|
44 | {<-}{$\leftarrow$}2 {=>}{$\Rightarrow$}2 {->}{$\rightarrow$}2, |
---|
45 | % moredelim=**[is][\color{red}]{®}{®}, % red highlighting ®...® (registered trademark symbol) emacs: C-q M-. |
---|
46 | % moredelim=**[is][\color{blue}]{ß}{ß}, % blue highlighting ß...ß (sharp s symbol) emacs: C-q M-_ |
---|
47 | % moredelim=**[is][\color{OliveGreen}]{¢}{¢}, % green highlighting ¢...¢ (cent symbol) emacs: C-q M-" |
---|
48 | % moredelim=[is][\lstset{keywords={}}]{¶}{¶}, % keyword escape ¶...¶ (pilcrow symbol) emacs: C-q M-^ |
---|
49 | }% lstset |
---|
50 | |
---|
51 | % inline code @...@ |
---|
52 | \lstMakeShortInline@ |
---|
53 | |
---|
54 | % ACM Information |
---|
55 | \citestyle{acmauthoryear} |
---|
56 | |
---|
57 | \acmJournal{PACMPL} |
---|
58 | |
---|
59 | \title{Generic Types with Efficient Dynamic Layout in \CFA{}} |
---|
60 | |
---|
61 | \author{Aaron Moss} |
---|
62 | \affiliation{% |
---|
63 | \institution{University of Waterloo} |
---|
64 | \department{David R. Cheriton School of Computer Science} |
---|
65 | \streetaddress{Davis Centre, University of Waterloo} |
---|
66 | \city{Waterloo} |
---|
67 | \state{ON} |
---|
68 | \postcode{N2L 3G1} |
---|
69 | \country{Canada} |
---|
70 | } |
---|
71 | \email{a3moss@uwaterloo.ca} |
---|
72 | |
---|
73 | \author{Robert Schluntz} |
---|
74 | \affiliation{% |
---|
75 | \institution{University of Waterloo} |
---|
76 | \department{David R. Cheriton School of Computer Science} |
---|
77 | \streetaddress{Davis Centre, University of Waterloo} |
---|
78 | \city{Waterloo} |
---|
79 | \state{ON} |
---|
80 | \postcode{N2L 3G1} |
---|
81 | \country{Canada} |
---|
82 | } |
---|
83 | \email{rschlunt@uwaterloo.ca} |
---|
84 | |
---|
85 | \author{Peter Buhr} |
---|
86 | \affiliation{% |
---|
87 | \institution{University of Waterloo} |
---|
88 | \department{David R. Cheriton School of Computer Science} |
---|
89 | \streetaddress{Davis Centre, University of Waterloo} |
---|
90 | \city{Waterloo} |
---|
91 | \state{ON} |
---|
92 | \postcode{N2L 3G1} |
---|
93 | \country{Canada} |
---|
94 | } |
---|
95 | \email{pabuhr@uwaterloo.ca} |
---|
96 | |
---|
97 | \terms{generic, types} |
---|
98 | \keywords{generic types, polymorphic functions, Cforall} |
---|
99 | |
---|
100 | \begin{CCSXML} |
---|
101 | <ccs2012> |
---|
102 | <concept> |
---|
103 | <concept_id>10011007.10011006.10011008.10011024.10011025</concept_id> |
---|
104 | <concept_desc>Software and its engineering~Polymorphism</concept_desc> |
---|
105 | <concept_significance>500</concept_significance> |
---|
106 | </concept> |
---|
107 | <concept> |
---|
108 | <concept_id>10011007.10011006.10011008.10011024.10011028</concept_id> |
---|
109 | <concept_desc>Software and its engineering~Data types and structures</concept_desc> |
---|
110 | <concept_significance>500</concept_significance> |
---|
111 | </concept> |
---|
112 | <concept> |
---|
113 | <concept_id>10011007.10011006.10011041.10011047</concept_id> |
---|
114 | <concept_desc>Software and its engineering~Source code generation</concept_desc> |
---|
115 | <concept_significance>300</concept_significance> |
---|
116 | </concept> |
---|
117 | </ccs2012> |
---|
118 | \end{CCSXML} |
---|
119 | |
---|
120 | \ccsdesc[500]{Software and its engineering~Polymorphism} |
---|
121 | \ccsdesc[500]{Software and its engineering~Data types and structures} |
---|
122 | \ccsdesc[300]{Software and its engineering~Source code generation} |
---|
123 | |
---|
124 | \begin{abstract} |
---|
125 | \TODO{} Write abstract. |
---|
126 | \end{abstract} |
---|
127 | |
---|
128 | \begin{document} |
---|
129 | |
---|
130 | \maketitle |
---|
131 | |
---|
132 | \section{Introduction \& Background} |
---|
133 | |
---|
134 | \CFA{}\footnote{Pronounced ``C-for-all'', and written \CFA{} or Cforall.} is an evolutionary extension of the C programming language which aims to add modern language features to C while maintaining both source compatibility with C and a familiar mental model for programmers. This paper describes how generic types are designed and implemented in \CFA{}, and how they interact with \CFA{}'s polymorphic functions. |
---|
135 | |
---|
136 | \subsection{Polymorphic Functions} |
---|
137 | |
---|
138 | \CFA{}'s polymorphism was originally formalized by \citet{Ditchfield92}, and first implemented by \citet{Bilson03}. The signature feature of \CFA{} is parametric-polymorphic functions; such functions are written using a @forall@ clause (which gives the language its name): |
---|
139 | \begin{lstlisting} |
---|
140 | forall(otype T) |
---|
141 | T identity(T x) {is_ |
---|
142 | return x; |
---|
143 | } |
---|
144 | |
---|
145 | int forty_two = identity(42); // T is bound to int, forty_two == 42 |
---|
146 | \end{lstlisting} |
---|
147 | The @identity@ function above can be applied to any complete object type (or ``@otype@''). The type variable @T@ is transformed into a set of additional implicit parameters to @identity@, which encode sufficient information about @T@ to create and return a variable of that type. The \CFA{} implementation passes the size and alignment of the type represented by an @otype@ parameter, as well as an assignment operator, constructor, copy constructor and destructor. If this extra information is not needed, the type parameter can be declared as @dtype T@, where @dtype@ is short for ``data type''. |
---|
148 | |
---|
149 | Here, the runtime cost of polymorphism is spread over each polymorphic call, due to passing more arguments to polymorphic functions; preliminary experiments have shown this overhead to be similar to \CC{} virtual function calls. An advantage of this design is that, unlike \CC{} template functions, \CFA{} @forall@ functions are compatible with separate compilation. |
---|
150 | |
---|
151 | Since bare polymorphic types do not provide a great range of available operations, \CFA{} provides a \emph{type assertion} mechanism to provide further information about a type: |
---|
152 | \begin{lstlisting} |
---|
153 | forall(otype T | { T twice(T); }) |
---|
154 | T four_times(T x) { |
---|
155 | return twice( twice(x) ); |
---|
156 | } |
---|
157 | |
---|
158 | double twice(double d) { return d * 2.0; } // (1) |
---|
159 | |
---|
160 | double magic = four_times(10.5); // T is bound to double, uses (1) to satisfy type assertion |
---|
161 | \end{lstlisting} |
---|
162 | These type assertions may be either variable or function declarations that depend on a polymorphic type variable. @four_times@ can only be called with an argument for which there exists a function named @twice@ that can take that argument and return another value of the same type; a pointer to the appropriate @twice@ function is passed as an additional implicit parameter to the call of @four_times@. |
---|
163 | |
---|
164 | Monomorphic specializations of polymorphic functions can themselves be used to satisfy type assertions. For instance, @twice@ could have been defined using the \CFA{} syntax for operator overloading as: |
---|
165 | \begin{lstlisting} |
---|
166 | forall(otype S | { S ?+?(S, S); }) |
---|
167 | S twice(S x) { return x + x; } // (2) |
---|
168 | \end{lstlisting} |
---|
169 | This version of @twice@ works for any type @S@ that has an addition operator defined for it, and it could have been used to satisfy the type assertion on @four_times@. |
---|
170 | The compiler accomplishes this by creating a wrapper function calling @twice // (2)@ with @S@ bound to @double@, then providing this wrapper function to @four_times@\footnote{\lstinline@twice // (2)@ could also have had a type parameter named \lstinline@T@; \CFA{} specifies renaming of the type parameters, which would avoid the name conflict with the type variable \lstinline@T@ of \lstinline@four_times@.}. |
---|
171 | |
---|
172 | \subsection{Traits} |
---|
173 | |
---|
174 | \CFA{} provides \emph{traits} as a means to name a group of type assertions, as in the example below: |
---|
175 | \begin{lstlisting} |
---|
176 | trait has_magnitude(otype T) { |
---|
177 | bool ?<?(T, T); // comparison operator for T |
---|
178 | T -?(T); // negation operator for T |
---|
179 | void ?{}(T*, zero_t); // constructor from 0 literal |
---|
180 | }; |
---|
181 | |
---|
182 | forall(otype M | has_magnitude(M)) |
---|
183 | M abs( M m ) { |
---|
184 | M zero = { 0 }; // uses zero_t constructor from trait |
---|
185 | return m < zero ? -m : m; |
---|
186 | } |
---|
187 | |
---|
188 | forall(otype M | has_magnitude(M)) |
---|
189 | M max_magnitude( M a, M b ) { |
---|
190 | return abs(a) < abs(b) ? b : a; |
---|
191 | } |
---|
192 | \end{lstlisting} |
---|
193 | |
---|
194 | @otype@ is essentially syntactic sugar for the following trait: |
---|
195 | \begin{lstlisting} |
---|
196 | trait otype(dtype T | sized(T)) { |
---|
197 | // sized is a compiler-provided pseudo-trait for types with known size & alignment |
---|
198 | void ?{}(T*); // default constructor |
---|
199 | void ?{}(T*, T); // copy constructor |
---|
200 | T ?=?(T*, T); // assignment operator |
---|
201 | void ^?{}(T*); // destructor |
---|
202 | }; |
---|
203 | \end{lstlisting} |
---|
204 | |
---|
205 | Semantically, traits are simply a named lists of type assertions, but they may be used for many of the same purposes that interfaces in Java or abstract base classes in \CC{} are used for. Unlike Java interfaces or \CC{} base classes, \CFA{} types do not explicitly state any inheritance relationship to traits they satisfy; this can be considered a form of structural inheritance, similar to implementation of an interface in Go, as opposed to the nominal inheritance model of Java and \CC{}. Nominal inheritance can be simulated with traits using marker variables or functions: |
---|
206 | \begin{lstlisting} |
---|
207 | trait nominal(otype T) { |
---|
208 | T is_nominal; |
---|
209 | }; |
---|
210 | |
---|
211 | int is_nominal; // int now satisfies the nominal trait |
---|
212 | { |
---|
213 | char is_nominal; // char satisfies the nominal trait |
---|
214 | } |
---|
215 | // char no longer satisfies the nominal trait here |
---|
216 | \end{lstlisting} |
---|
217 | |
---|
218 | Traits, however, are significantly more powerful than nominal-inheritance interfaces; firstly, due to the scoping rules of the declarations that satisfy a trait's type assertions, a type may not satisfy a trait everywhere that the type is declared, as with @char@ and the @nominal@ trait above. Secondly, traits may be used to declare a relationship among multiple types, a property that may be difficult or impossible to represent in nominal-inheritance type systems: |
---|
219 | \begin{lstlisting} |
---|
220 | trait pointer_like(otype Ptr, otype El) { |
---|
221 | lvalue El *?(Ptr); // Ptr can be dereferenced into a modifiable value of type El |
---|
222 | } |
---|
223 | |
---|
224 | struct list { |
---|
225 | int value; |
---|
226 | list *next; // may omit "struct" on type names |
---|
227 | }; |
---|
228 | |
---|
229 | typedef list *list_iterator; |
---|
230 | |
---|
231 | lvalue int *?( list_iterator it ) { |
---|
232 | return it->value; |
---|
233 | } |
---|
234 | \end{lstlisting} |
---|
235 | |
---|
236 | In the example above, @(list_iterator, int)@ satisfies @pointer_like@ by the user-defined dereference function, and @(list_iterator, list)@ also satisfies @pointer_like@ by the built-in dereference operator for pointers. Given a declaration @list_iterator it@, @*it@ can be either an @int@ or a @list@, with the meaning disambiguated by context (\eg, @int x = *it;@ interprets @*it@ as an @int@, while @(*it).value = 42;@ interprets @*it@ as a @list@). |
---|
237 | While a nominal-inheritance system with associated types could model one of those two relationships by making @El@ an associated type of @Ptr@ in the @pointer_like@ implementation, few such systems could model both relationships simultaneously. |
---|
238 | |
---|
239 | \section{Generic Types} |
---|
240 | |
---|
241 | The generic types design for \CFA{} must integrate efficiently and naturally with the existing polymorphic functions in \CFA{}, while retaining backwards compatibility with C; maintaining separate compilation is a particularly important constraint on the design. However, where the concrete parameters of the generic type are known, there should not be extra overhead for the use of a generic type. |
---|
242 | |
---|
243 | A generic type can be declared by placing a @forall@ specifier on a @struct@ or @union@ declaration, and instantiated using a parenthesized list of types after the type name: |
---|
244 | \begin{lstlisting} |
---|
245 | forall(otype R, otype S) struct pair { |
---|
246 | R first; |
---|
247 | S second; |
---|
248 | }; |
---|
249 | |
---|
250 | forall(otype T) |
---|
251 | T value( pair(const char*, T) *p ) { return p->second; } |
---|
252 | |
---|
253 | forall(dtype F, otype T) |
---|
254 | T value_p( pair(F*, T*) p ) { return *p.second; } |
---|
255 | |
---|
256 | pair(const char*, int) p = { "magic", 42 }; |
---|
257 | int magic = value( &p ); |
---|
258 | |
---|
259 | pair(void*, int*) q = { 0, &p.second }; |
---|
260 | magic = value_p( q ); |
---|
261 | double d = 1.0; |
---|
262 | pair(double*, double*) r = { &d, &d }; |
---|
263 | d = value_p( r ); |
---|
264 | \end{lstlisting} |
---|
265 | |
---|
266 | \CFA{} classifies generic types as either \emph{concrete} or \emph{dynamic}. Dynamic generic types vary in their in-memory layout depending on their type parameters, while concrete generic types have a fixed memory layout regardless of type parameters. A type may have polymorphic parameters but still be concrete; \CFA{} refers to such types as \emph{dtype-static}. Polymorphic pointers are an example of dtype-static types -- @forall(dtype T) T*@ is a polymorphic type, but for any @T@ chosen, @T*@ will have exactly the same in-memory representation as a @void*@, and can therefore be represented by a @void*@ in code generation. |
---|
267 | |
---|
268 | The \CFA{} compiler instantiates concrete generic types by template-expanding them to fresh struct types; concrete generic types can therefore be used with zero runtime overhead. To enable interoperation between equivalent instantiations of a generic type, the compiler saves the set of instantiations currently in scope and re-uses the generated struct declarations where appropriate. As an example, the concrete instantiation for @pair(const char*, int)@ would look something like this: |
---|
269 | \begin{lstlisting} |
---|
270 | struct _pair_conc1 { |
---|
271 | const char* first; |
---|
272 | int second; |
---|
273 | }; |
---|
274 | \end{lstlisting} |
---|
275 | |
---|
276 | A concrete generic type with dtype-static parameters is also expanded to a struct type, but this struct type is used for all matching instantiations. In the example above, the @pair(F*, T*)@ parameter to @value_p@ is such a type; its expansion would look something like this, and be used as the type of the variables @q@ and @r@ as well, with casts for member access where appropriate: |
---|
277 | \begin{lstlisting} |
---|
278 | struct _pair_conc0 { |
---|
279 | void* first; |
---|
280 | void* second; |
---|
281 | }; |
---|
282 | \end{lstlisting} |
---|
283 | |
---|
284 | \TODO{} Maybe move this after the rest of the discussion. |
---|
285 | This re-use of dtype-static struct instantiations enables some useful programming patterns at zero runtime cost. The most important such pattern is using @forall(dtype T) T*@ as a type-checked replacement for @void*@, as in this example, which takes a @qsort@ or @bsearch@-compatible comparison routine and creates a similar lexicographic comparison for pairs of pointers: |
---|
286 | \begin{lstlisting} |
---|
287 | forall(dtype T) |
---|
288 | int lexcmp( pair(T*, T*)* a, pair(T*, T*)* b, int (*cmp)(T*, T*) ) { |
---|
289 | int c = cmp(a->first, b->first); |
---|
290 | if ( c == 0 ) c = cmp(a->second, b->second); |
---|
291 | return c; |
---|
292 | } |
---|
293 | \end{lstlisting} |
---|
294 | Since @pair(T*, T*)@ is a concrete type, there are no added implicit parameters to @lexcmp@, so the code generated by \CFA{} will be effectively identical to a version of this written in standard C using @void*@, yet the \CFA{} version will be type-checked to ensure that the fields of both pairs and the arguments to the comparison function match in type. |
---|
295 | |
---|
296 | \TODO{} The second is zero-cost ``tag'' structs. |
---|
297 | |
---|
298 | \section{Tuples} |
---|
299 | |
---|
300 | \TODO{} Integrate Rob's work |
---|
301 | |
---|
302 | \TODO{} Check if we actually can use ttype parameters on generic types (if they set the complete flag, it should work, or nearly so). |
---|
303 | |
---|
304 | \section{Related Work} |
---|
305 | |
---|
306 | \TODO{} Talk about \CC{}, Cyclone, \etc{} |
---|
307 | |
---|
308 | \section{Conclusion} |
---|
309 | |
---|
310 | \TODO{} |
---|
311 | |
---|
312 | \bibliographystyle{ACM-Reference-Format} |
---|
313 | \bibliography{generic_types} |
---|
314 | |
---|
315 | \end{document} |
---|