Ignore:
Timestamp:
Mar 22, 2017, 5:22:06 PM (7 years ago)
Author:
Thierry Delisle <tdelisle@…>
Branches:
ADT, aaron-thesis, arm-eh, ast-experimental, cleanup-dtors, deferred_resn, demangler, enum, forall-pointer-decay, jacob/cs343-translation, jenkins-sandbox, master, new-ast, new-ast-unique-expr, new-env, no_list, persistent-indexer, pthread-emulation, qualifiedEnum, resolv-new, with_gc
Children:
9fcdfa3
Parents:
d9c8a59 (diff), c4187df (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge branch 'master' of plg.uwaterloo.ca:software/cfa/cfa-cc

File:
1 edited

Legend:

Unmodified
Added
Removed
  • doc/generic_types/generic_types.tex

    rd9c8a59 rccd8bc3  
     1% take of review (for line numbers) and anonymous (for anonymization) on submission
    12\documentclass[format=acmlarge, anonymous, review]{acmart}
    23
     4\usepackage{listings}   % For code listings
     5
     6% Useful macros
     7\newcommand{\CFA}{C$\mathbf\forall$} % Cforall symbolic name
     8\newcommand{\CC}{\rm C\kern-.1em\hbox{+\kern-.25em+}} % C++ symbolic name
     9\newcommand{\CCeleven}{\rm C\kern-.1em\hbox{+\kern-.25em+}11} % C++11 symbolic name
     10\newcommand{\CCfourteen}{\rm C\kern-.1em\hbox{+\kern-.25em+}14} % C++14 symbolic name
     11\newcommand{\CCseventeen}{\rm C\kern-.1em\hbox{+\kern-.25em+}17} % C++17 symbolic name
     12
     13\newcommand{\TODO}{\textbf{TODO}}
     14\newcommand{\eg}{\textit{e}.\textit{g}.}
     15\newcommand{\ie}{\textit{i}.\textit{e}.}
     16\newcommand{\etc}{\textit{etc}.}
     17
     18% CFA programming language, based on ANSI C (with some gcc additions)
     19\lstdefinelanguage{CFA}[ANSI]{C}{
     20        morekeywords={_Alignas,_Alignof,__alignof,__alignof__,asm,__asm,__asm__,_At,_Atomic,__attribute,__attribute__,auto,
     21                _Bool,catch,catchResume,choose,_Complex,__complex,__complex__,__const,__const__,disable,dtype,enable,__extension__,
     22                fallthrough,fallthru,finally,forall,ftype,_Generic,_Imaginary,inline,__label__,lvalue,_Noreturn,one_t,otype,restrict,_Static_assert,
     23                _Thread_local,throw,throwResume,trait,try,typeof,__typeof,__typeof__,zero_t},
     24}%
     25
     26\lstset{
     27language=CFA,
     28columns=fullflexible,
     29basicstyle=\linespread{0.9}\sf,                                                 % reduce line spacing and use sanserif font
     30stringstyle=\tt,                                                                                % use typewriter font
     31tabsize=4,                                                                                              % 4 space tabbing
     32xleftmargin=\parindent,                                                                 % indent code to paragraph indentation
     33% extendedchars=true,                                                                   % allow ASCII characters in the range 128-255
     34% escapechar=§,                                                                                 % LaTeX escape in CFA code §...§ (section symbol), emacs: C-q M-'
     35mathescape=true,                                                                                % LaTeX math escape in CFA code $...$
     36keepspaces=true,                                                                                %
     37showstringspaces=false,                                                                 % do not show spaces with cup
     38showlines=true,                                                                                 % show blank lines at end of code
     39aboveskip=4pt,                                                                                  % spacing above/below code block
     40belowskip=3pt,
     41% replace/adjust listing characters that look bad in sanserif
     42literate={-}{\raisebox{-0.15ex}{\texttt{-}}}1 {^}{\raisebox{0.6ex}{$\scriptscriptstyle\land\,$}}1
     43        {~}{\raisebox{0.3ex}{$\scriptstyle\sim\,$}}1 {_}{\makebox[1.2ex][c]{\rule{1ex}{0.1ex}}}1 {`}{\ttfamily\upshape\hspace*{-0.1ex}`}1
     44        {<-}{$\leftarrow$}2 {=>}{$\Rightarrow$}2 {->}{$\rightarrow$}2,
     45% moredelim=**[is][\color{red}]{®}{®},                                  % red highlighting ®...® (registered trademark symbol) emacs: C-q M-.
     46% moredelim=**[is][\color{blue}]{ß}{ß},                                 % blue highlighting ß...ß (sharp s symbol) emacs: C-q M-_
     47% moredelim=**[is][\color{OliveGreen}]{¢}{¢},                   % green highlighting ¢...¢ (cent symbol) emacs: C-q M-"
     48% moredelim=[is][\lstset{keywords={}}]{¶}{¶},                   % keyword escape ¶...¶ (pilcrow symbol) emacs: C-q M-^
     49}% lstset
     50
     51% inline code @...@
     52\lstMakeShortInline@
     53
     54% ACM Information
    355\citestyle{acmauthoryear}
    4 
    5 \newcommand{\CFA}{C$\mathbf\forall$}
    6 \newcommand{\TODO}{\textbf{TODO}}
    756
    857\acmJournal{PACMPL}
     
    2170}
    2271\email{a3moss@uwaterloo.ca}
     72
     73\author{Robert Schluntz}
     74\affiliation{%
     75        \institution{University of Waterloo}
     76        \department{David R. Cheriton School of Computer Science}
     77        \streetaddress{Davis Centre, University of Waterloo}
     78        \city{Waterloo}
     79        \state{ON}
     80        \postcode{N2L 3G1}
     81        \country{Canada}
     82}
     83\email{rschlunt@uwaterloo.ca}
     84
     85\author{Peter Buhr}
     86\affiliation{%
     87        \institution{University of Waterloo}
     88        \department{David R. Cheriton School of Computer Science}
     89        \streetaddress{Davis Centre, University of Waterloo}
     90        \city{Waterloo}
     91        \state{ON}
     92        \postcode{N2L 3G1}
     93        \country{Canada}
     94}
     95\email{pabuhr@uwaterloo.ca}
    2396
    2497\terms{generic, types}
     
    49122\ccsdesc[300]{Software and its engineering~Source code generation}
    50123
    51 % \abstract{Abstract goes here.}
    52124\begin{abstract}
    53125\TODO{} Write abstract.
     
    59131
    60132\section{Introduction \& Background}
    61 \CFA{}\footnote{Pronounced ``C-for-all'', and written \CFA{} or Cforall.} is an evolutionary modernization of the C programming language which aims to add modern language features to C while maintaining both source compatibility with C and a familiar mental model for programmers. This paper describes how generic types are designed and implemented in \CFA{}, and how they interact with \CFA{}'s polymorphic functions.
    62 
    63 \CFA{}'s polymorphism was originally formalized by \citet{Ditchfield92}, and first implemented by \citet{Bilson03}.
     133
     134\CFA{}\footnote{Pronounced ``C-for-all'', and written \CFA{} or Cforall.} is an evolutionary extension of the C programming language which aims to add modern language features to C while maintaining both source compatibility with C and a familiar mental model for programmers. This paper describes how generic types are designed and implemented in \CFA{}, and how they interact with \CFA{}'s polymorphic functions.
     135
     136\subsection{Polymorphic Functions}
     137
     138\CFA{}'s polymorphism was originally formalized by \citet{Ditchfield92}, and first implemented by \citet{Bilson03}. The signature feature of \CFA{} is parametric-polymorphic functions; such functions are written using a @forall@ clause (which gives the language its name):
     139\begin{lstlisting}
     140forall(otype T)
     141T identity(T x) {is_
     142    return x;
     143}
     144
     145int forty_two = identity(42); // T is bound to int, forty_two == 42
     146\end{lstlisting}
     147The @identity@ function above can be applied to any complete object type (or ``@otype@''). The type variable @T@ is transformed into a set of additional implicit parameters to @identity@, which encode sufficient information about @T@ to create and return a variable of that type. The \CFA{} implementation passes the size and alignment of the type represented by an @otype@ parameter, as well as an assignment operator, constructor, copy constructor and destructor. If this extra information is not needed, the type parameter can be declared as @dtype T@, where @dtype@ is short for ``data type''.
     148
     149Here, the runtime cost of polymorphism is spread over each polymorphic call, due to passing more arguments to polymorphic functions; preliminary experiments have shown this overhead to be similar to \CC{} virtual function calls. An advantage of this design is that, unlike \CC{} template functions, \CFA{} @forall@ functions are compatible with separate compilation.
     150
     151Since bare polymorphic types do not provide a great range of available operations, \CFA{} provides a \emph{type assertion} mechanism to provide further information about a type:
     152\begin{lstlisting}
     153forall(otype T | { T twice(T); })
     154T four_times(T x) {
     155    return twice( twice(x) );
     156}
     157
     158double twice(double d) { return d * 2.0; } // (1)
     159
     160double magic = four_times(10.5); // T is bound to double, uses (1) to satisfy type assertion
     161\end{lstlisting}
     162These type assertions may be either variable or function declarations that depend on a polymorphic type variable. @four_times@ can only be called with an argument for which there exists a function named @twice@ that can take that argument and return another value of the same type; a pointer to the appropriate @twice@ function is passed as an additional implicit parameter to the call of @four_times@.
     163
     164Monomorphic specializations of polymorphic functions can themselves be used to satisfy type assertions. For instance, @twice@ could have been defined using the \CFA{} syntax for operator overloading as:
     165\begin{lstlisting}
     166forall(otype S | { S ?+?(S, S); })
     167S twice(S x) { return x + x; }  // (2)
     168\end{lstlisting}
     169This version of @twice@ works for any type @S@ that has an addition operator defined for it, and it could have been used to satisfy the type assertion on @four_times@.
     170The compiler accomplishes this by creating a wrapper function calling @twice // (2)@ with @S@ bound to @double@, then providing this wrapper function to @four_times@\footnote{\lstinline@twice // (2)@ could also have had a type parameter named \lstinline@T@; \CFA{} specifies renaming of the type parameters, which would avoid the name conflict with the type variable \lstinline@T@ of \lstinline@four_times@.}.
     171
     172\subsection{Traits}
     173
     174\CFA{} provides \emph{traits} as a means to name a group of type assertions, as in the example below:
     175\begin{lstlisting}
     176trait has_magnitude(otype T) {
     177    bool ?<?(T, T);  // comparison operator for T
     178    T -?(T);  // negation operator for T
     179    void ?{}(T*, zero_t);  // constructor from 0 literal
     180};
     181
     182forall(otype M | has_magnitude(M))
     183M abs( M m ) {
     184    M zero = { 0 };  // uses zero_t constructor from trait
     185    return m < zero ? -m : m;
     186}
     187
     188forall(otype M | has_magnitude(M))
     189M max_magnitude( M a, M b ) {
     190    return abs(a) < abs(b) ? b : a;
     191}
     192\end{lstlisting}
     193
     194@otype@ is essentially syntactic sugar for the following trait:
     195\begin{lstlisting}
     196trait otype(dtype T | sized(T)) {
     197        // sized is a compiler-provided pseudo-trait for types with known size & alignment
     198        void ?{}(T*);  // default constructor
     199        void ?{}(T*, T);  // copy constructor
     200        T ?=?(T*, T);  // assignment operator
     201        void ^?{}(T*);  // destructor
     202};
     203\end{lstlisting}
     204
     205Semantically, traits are simply a named lists of type assertions, but they may be used for many of the same purposes that interfaces in Java or abstract base classes in \CC{} are used for. Unlike Java interfaces or \CC{} base classes, \CFA{} types do not explicitly state any inheritance relationship to traits they satisfy; this can be considered a form of structural inheritance, similar to implementation of an interface in Go, as opposed to the nominal inheritance model of Java and \CC{}. Nominal inheritance can be simulated with traits using marker variables or functions:
     206\begin{lstlisting}
     207trait nominal(otype T) {
     208    T is_nominal;
     209};
     210
     211int is_nominal;  // int now satisfies the nominal trait
     212{
     213    char is_nominal; // char satisfies the nominal trait
     214}
     215// char no longer satisfies the nominal trait here 
     216\end{lstlisting}
     217
     218Traits, however, are significantly more powerful than nominal-inheritance interfaces; firstly, due to the scoping rules of the declarations that satisfy a trait's type assertions, a type may not satisfy a trait everywhere that the type is declared, as with @char@ and the @nominal@ trait above. Secondly, traits may be used to declare a relationship among multiple types, a property that may be difficult or impossible to represent in nominal-inheritance type systems:
     219\begin{lstlisting}
     220trait pointer_like(otype Ptr, otype El) {
     221    lvalue El *?(Ptr); // Ptr can be dereferenced into a modifiable value of type El
     222}
     223
     224struct list {
     225    int value;
     226    list *next;  // may omit "struct" on type names
     227};
     228
     229typedef list *list_iterator;
     230
     231lvalue int *?( list_iterator it ) {
     232    return it->value;
     233}
     234\end{lstlisting}
     235
     236In the example above, @(list_iterator, int)@ satisfies @pointer_like@ by the user-defined dereference function, and @(list_iterator, list)@ also satisfies @pointer_like@ by the built-in dereference operator for pointers. Given a declaration @list_iterator it@, @*it@ can be either an @int@ or a @list@, with the meaning disambiguated by context (\eg, @int x = *it;@ interprets @*it@ as an @int@, while @(*it).value = 42;@ interprets @*it@ as a @list@).
     237While a nominal-inheritance system with associated types could model one of those two relationships by making @El@ an associated type of @Ptr@ in the @pointer_like@ implementation, few such systems could model both relationships simultaneously.
     238
     239\section{Generic Types}
     240
     241The generic types design for \CFA{} must integrate efficiently and naturally with the existing polymorphic functions in \CFA{}, while retaining backwards compatibility with C; maintaining separate compilation is a particularly important constraint on the design. However, where the concrete parameters of the generic type are known, there should not be extra overhead for the use of a generic type.
     242
     243A generic type can be declared by placing a @forall@ specifier on a @struct@ or @union@ declaration, and instantiated using a parenthesized list of types after the type name:
     244\begin{lstlisting}
     245forall(otype R, otype S) struct pair {
     246    R first;
     247    S second;
     248};
     249
     250forall(otype T)
     251T value( pair(const char*, T) *p ) { return p->second; }
     252
     253forall(dtype F, otype T)
     254T value_p( pair(F*, T*) p ) { return *p.second; }
     255
     256pair(const char*, int) p = { "magic", 42 };
     257int magic = value( &p );
     258
     259pair(void*, int*) q = { 0, &p.second };
     260magic = value_p( q );
     261double d = 1.0;
     262pair(double*, double*) r = { &d, &d };
     263d = value_p( r );
     264\end{lstlisting}
     265
     266\CFA{} classifies generic types as either \emph{concrete} or \emph{dynamic}. Dynamic generic types vary in their in-memory layout depending on their type parameters, while concrete generic types have a fixed memory layout regardless of type parameters. A type may have polymorphic parameters but still be concrete; \CFA{} refers to such types as \emph{dtype-static}. Polymorphic pointers are an example of dtype-static types -- @forall(dtype T) T*@ is a polymorphic type, but for any @T@ chosen, @T*@ will have exactly the same in-memory representation as a @void*@, and can therefore be represented by a @void*@ in code generation.
     267
     268The \CFA{} compiler instantiates concrete generic types by template-expanding them to fresh struct types; concrete generic types can therefore be used with zero runtime overhead. To enable interoperation between equivalent instantiations of a generic type, the compiler saves the set of instantiations currently in scope and re-uses the generated struct declarations where appropriate. As an example, the concrete instantiation for @pair(const char*, int)@ would look something like this:
     269\begin{lstlisting}
     270struct _pair_conc1 {
     271        const char* first;
     272        int second;
     273};
     274\end{lstlisting}
     275
     276A concrete generic type with dtype-static parameters is also expanded to a struct type, but this struct type is used for all matching instantiations. In the example above, the @pair(F*, T*)@ parameter to @value_p@ is such a type; its expansion would look something like this, and be used as the type of the variables @q@ and @r@ as well, with casts for member access where appropriate:
     277\begin{lstlisting}
     278struct _pair_conc0 {
     279        void* first;
     280        void* second;
     281};
     282\end{lstlisting}
     283
     284\TODO{} Maybe move this after the rest of the discussion.
     285This re-use of dtype-static struct instantiations enables some useful programming patterns at zero runtime cost. The most important such pattern is using @forall(dtype T) T*@ as a type-checked replacement for @void*@, as in this example, which takes a @qsort@ or @bsearch@-compatible comparison routine and creates a similar lexicographic comparison for pairs of pointers:
     286\begin{lstlisting}
     287forall(dtype T)
     288int lexcmp( pair(T*, T*)* a, pair(T*, T*)* b, int (*cmp)(T*, T*) ) {
     289        int c = cmp(a->first, b->first);
     290        if ( c == 0 ) c = cmp(a->second, b->second);
     291        return c;
     292}
     293\end{lstlisting}
     294Since @pair(T*, T*)@ is a concrete type, there are no added implicit parameters to @lexcmp@, so the code generated by \CFA{} will be effectively identical to a version of this written in standard C using @void*@, yet the \CFA{} version will be type-checked to ensure that the fields of both pairs and the arguments to the comparison function match in type.
     295
     296\TODO{} The second is zero-cost ``tag'' structs.
     297
     298\section{Tuples}
     299
     300\TODO{} Integrate Rob's work
     301
     302\TODO{} Check if we actually can use ttype parameters on generic types (if they set the complete flag, it should work, or nearly so).
     303
     304\section{Related Work}
     305
     306\TODO{} Talk about \CC{}, Cyclone, \etc{}
     307
     308\section{Conclusion}
     309
     310\TODO{}
    64311
    65312\bibliographystyle{ACM-Reference-Format}
Note: See TracChangeset for help on using the changeset viewer.